Support MKV embedded SubRip captions.

2026-04-27 15:07:40 +00:00 · 2015-09-01 14:04:47 +01:00 · 2015-09-01 14:04:47 +01:00 · 83568ca52f
commit 83568ca52f
parent 009d4d0c2c
5 changed files with 161 additions and 21 deletions
--- a/library/src/androidTest/assets/subrip/no_end_timecodes
+++ b/library/src/androidTest/assets/subrip/no_end_timecodes
@ -0,0 +1,11 @@
+1
+00:00:00,000 -->
+SubRip doesn't technically allow missing end timecodes.
+
+2
+00:00:02,345 -->
+We interpret it to mean that a subtitle extends to the start of the next one.
+
+3
+00:00:03,456 -->
+Or to the end of the media.
--- a/library/src/androidTest/java/com/google/android/exoplayer/text/subrip/SubripParserTest.java
+++ b/library/src/androidTest/java/com/google/android/exoplayer/text/subrip/SubripParserTest.java
@ -25,13 +25,14 @@ import java.io.InputStream;
 */
 public final class SubripParserTest extends InstrumentationTestCase {

-  private static final String TYPICAL_SUBRIP_FILE = "subrip/typical";
-  private static final String EMPTY_SUBRIP_FILE = "subrip/empty";
+  private static final String EMPTY_FILE = "subrip/empty";
+  private static final String TYPICAL_FILE = "subrip/typical";
+  private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";

-  public void testParseNullSubripFile() throws IOException {
+  public void testParseEmptySubripFile() throws IOException {
    SubripParser parser = new SubripParser();
    InputStream inputStream =
-        getInstrumentation().getContext().getResources().getAssets().open(EMPTY_SUBRIP_FILE);
+        getInstrumentation().getContext().getResources().getAssets().open(EMPTY_FILE);
    SubripSubtitle subtitle = parser.parse(inputStream);
    // Assert that the subtitle is empty.
    assertEquals(0, subtitle.getEventTimeCount());
@ -41,7 +42,7 @@ public final class SubripParserTest extends InstrumentationTestCase {
  public void testParseTypicalSubripFile() throws IOException {
    SubripParser parser = new SubripParser();
    InputStream inputStream =
-        getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_SUBRIP_FILE);
+        getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_FILE);
    SubripSubtitle subtitle = parser.parse(inputStream);

    // Test event count.
@ -60,4 +61,29 @@ public final class SubripParserTest extends InstrumentationTestCase {
    assertEquals(3456000, subtitle.getEventTime(3));
  }

+  public void testParseNoEndTimecodes() throws IOException {
+    SubripParser parser = new SubripParser();
+    InputStream inputStream = getInstrumentation().getContext().getResources().getAssets()
+        .open(NO_END_TIMECODES_FILE);
+    SubripSubtitle subtitle = parser.parse(inputStream);
+
+    // Test event count.
+    assertEquals(3, subtitle.getEventTimeCount());
+
+    // Test first cue.
+    assertEquals(0, subtitle.getEventTime(0));
+    assertEquals("SubRip doesn't technically allow missing end timecodes.",
+        subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
+
+    // Test second cue.
+    assertEquals(2345000, subtitle.getEventTime(1));
+    assertEquals("We interpret it to mean that a subtitle extends to the start of the next one.",
+        subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString());
+
+    // Test third cue.
+    assertEquals(3456000, subtitle.getEventTime(2));
+    assertEquals("Or to the end of the media.",
+        subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
+  }
+
 }
--- a/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java
+++ b/library/src/main/java/com/google/android/exoplayer/extractor/webm/WebmExtractor.java
@ -73,6 +73,8 @@ public final class WebmExtractor implements Extractor {
  private static final String CODEC_ID_AAC = "A_AAC";
  private static final String CODEC_ID_MP3 = "A_MPEG/L3";
  private static final String CODEC_ID_AC3 = "A_AC3";
+  private static final String CODEC_ID_SUBRIP = "S_TEXT/UTF8";
+
  private static final int VORBIS_MAX_INPUT_SIZE = 8192;
  private static final int OPUS_MAX_INPUT_SIZE = 5760;
  private static final int MP3_MAX_INPUT_SIZE = 4096;
@ -98,6 +100,7 @@ public final class WebmExtractor implements Extractor {
  private static final int ID_SIMPLE_BLOCK = 0xA3;
  private static final int ID_BLOCK_GROUP = 0xA0;
  private static final int ID_BLOCK = 0xA1;
+  private static final int ID_BLOCK_DURATION = 0x9B;
  private static final int ID_REFERENCE_BLOCK = 0xFB;
  private static final int ID_TRACKS = 0x1654AE6B;
  private static final int ID_TRACK_ENTRY = 0xAE;
@ -131,12 +134,39 @@ public final class WebmExtractor implements Extractor {
  private static final int ID_CUE_TIME = 0xB3;
  private static final int ID_CUE_TRACK_POSITIONS = 0xB7;
  private static final int ID_CUE_CLUSTER_POSITION = 0xF1;
+  private static final int ID_LANGUAGE = 0x22B59C;

  private static final int LACING_NONE = 0;
  private static final int LACING_XIPH = 1;
  private static final int LACING_FIXED_SIZE = 2;
  private static final int LACING_EBML = 3;

+  /**
+   * A template for the prefix that must be added to each subrip sample. The 12 byte end timecode
+   * starting at {@link #SUBRIP_PREFIX_END_TIMECODE_OFFSET} is set to a dummy value, and must be
+   * replaced with the duration of the subtitle.
+   * <p>
+   * Equivalent to the UTF-8 string: "1\n00:00:00,000 --> 00:00:00,000\n".
+   */
+  private static final byte[] SUBRIP_PREFIX = new byte[] {49, 10, 48, 48, 58, 48, 48, 58, 48, 48,
+      44, 48, 48, 48, 32, 45, 45, 62, 32, 48, 48, 58, 48, 48, 58, 48, 48, 44, 48, 48, 48, 10};
+  /**
+   * A special end timecode indicating that a subtitle should be displayed until the next subtitle,
+   * or until the end of the media in the case of the last subtitle.
+   * <p>
+   * Equivalent to the UTF-8 string: "            ".
+   */
+  private static final byte[] SUBRIP_TIMECODE_EMPTY =
+      new byte[] {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
+  /**
+   * The byte offset of the end timecode in {@link #SUBRIP_PREFIX}.
+   */
+  private static final int SUBRIP_PREFIX_END_TIMECODE_OFFSET = 19;
+  /**
+   * The length in bytes of a timecode in a subrip prefix.
+   */
+  private static final int SUBRIP_TIMECODE_LENGTH = 12;
+
  private final EbmlReader reader;
  private final VarintReader varintReader;
  private final SparseArray<Track> tracks;
@ -148,6 +178,7 @@ public final class WebmExtractor implements Extractor {
  private final ParsableByteArray vorbisNumPageSamples;
  private final ParsableByteArray seekEntryIdBytes;
  private final ParsableByteArray sampleStrippedBytes;
+  private final ParsableByteArray subripSample;

  private long segmentContentPosition = UNKNOWN;
  private long segmentContentSize = UNKNOWN;
@ -178,13 +209,13 @@ public final class WebmExtractor implements Extractor {
  // Block reading state.
  private int blockState;
  private long blockTimeUs;
+  private long blockDurationUs;
  private int blockLacingSampleIndex;
  private int blockLacingSampleCount;
  private int[] blockLacingSampleSizes;
  private int blockTrackNumber;
  private int blockTrackNumberLength;
  private int blockFlags;
-  private byte[] blockEncryptionKeyId;

  // Sample reading state.
  private int sampleBytesRead;
@ -212,6 +243,7 @@ public final class WebmExtractor implements Extractor {
    nalStartCode = new ParsableByteArray(NalUnitUtil.NAL_START_CODE);
    nalLength = new ParsableByteArray(4);
    sampleStrippedBytes = new ParsableByteArray();
+    subripSample = new ParsableByteArray();
  }

  @Override
@ -274,6 +306,7 @@ public final class WebmExtractor implements Extractor {
      case ID_SEEK_POSITION:
      case ID_TIMECODE_SCALE:
      case ID_TIME_CODE:
+      case ID_BLOCK_DURATION:
      case ID_PIXEL_WIDTH:
      case ID_PIXEL_HEIGHT:
      case ID_TRACK_NUMBER:
@ -293,6 +326,7 @@ public final class WebmExtractor implements Extractor {
        return EbmlReader.TYPE_UNSIGNED_INT;
      case ID_DOC_TYPE:
      case ID_CODEC_ID:
+      case ID_LANGUAGE:
        return EbmlReader.TYPE_STRING;
      case ID_SEEK_ID:
      case ID_CONTENT_COMPRESSION_SETTINGS:
@ -397,7 +431,7 @@ public final class WebmExtractor implements Extractor {
        if (!sampleSeenReferenceBlock) {
          blockFlags |= C.SAMPLE_FLAG_SYNC;
        }
-        outputSampleMetadata(tracks.get(blockTrackNumber), blockTimeUs);
+        commitSampleToOutput(tracks.get(blockTrackNumber), blockTimeUs);
        blockState = BLOCK_STATE_START;
        return;
      case ID_CONTENT_ENCODING:
@ -531,6 +565,9 @@ public final class WebmExtractor implements Extractor {
      case ID_TIME_CODE:
        clusterTimecodeUs = scaleTimecodeToUs(value);
        return;
+      case ID_BLOCK_DURATION:
+        blockDurationUs = scaleTimecodeToUs(value);
+        return;
      default:
        return;
    }
@ -560,6 +597,9 @@ public final class WebmExtractor implements Extractor {
      case ID_CODEC_ID:
        currentTrack.codecId = value;
        return;
+      case ID_LANGUAGE:
+        currentTrack.language = value;
+        return;
      default:
        return;
    }
@ -597,6 +637,7 @@ public final class WebmExtractor implements Extractor {
        if (blockState == BLOCK_STATE_START) {
          blockTrackNumber = (int) varintReader.readUnsignedVarint(input, false, true);
          blockTrackNumberLength = varintReader.getLastLength();
+          blockDurationUs = UNKNOWN;
          blockState = BLOCK_STATE_HEADER;
          scratch.reset();
        }
@ -698,7 +739,6 @@ public final class WebmExtractor implements Extractor {
              || (id == ID_SIMPLE_BLOCK && (scratch.data[2] & 0x80) == 0x80);
          blockFlags = (isKeyframe ? C.SAMPLE_FLAG_SYNC : 0)
              | (isInvisible ? C.SAMPLE_FLAG_DECODE_ONLY : 0);
-          blockEncryptionKeyId = track.encryptionKeyId;
          blockState = BLOCK_STATE_DATA;
          blockLacingSampleIndex = 0;
        }
@ -709,7 +749,7 @@ public final class WebmExtractor implements Extractor {
            writeSampleData(input, track, blockLacingSampleSizes[blockLacingSampleIndex]);
            long sampleTimeUs = this.blockTimeUs
                + (blockLacingSampleIndex * track.defaultSampleDurationNs) / 1000;
-            outputSampleMetadata(track, sampleTimeUs);
+            commitSampleToOutput(track, sampleTimeUs);
            blockLacingSampleIndex++;
          }
          blockState = BLOCK_STATE_START;
@ -725,8 +765,11 @@ public final class WebmExtractor implements Extractor {
    }
  }

-  private void outputSampleMetadata(Track track, long timeUs) {
-    track.output.sampleMetadata(timeUs, blockFlags, sampleBytesWritten, 0, blockEncryptionKeyId);
+  private void commitSampleToOutput(Track track, long timeUs) {
+    if (CODEC_ID_SUBRIP.equals(track.codecId)) {
+      writeSubripSample(track);
+    }
+    track.output.sampleMetadata(timeUs, blockFlags, sampleBytesWritten, 0, track.encryptionKeyId);
    sampleRead = true;
    resetSample();
  }
@ -758,6 +801,21 @@ public final class WebmExtractor implements Extractor {

  private void writeSampleData(ExtractorInput input, Track track, int size)
      throws IOException, InterruptedException {
+    if (CODEC_ID_SUBRIP.equals(track.codecId)) {
+      int sizeWithPrefix = SUBRIP_PREFIX.length + size;
+      if (subripSample.capacity() < sizeWithPrefix) {
+        // Initialize subripSample to contain the required prefix and have space to hold a subtitle
+        // twice as long as this one.
+        subripSample.data = Arrays.copyOf(SUBRIP_PREFIX, sizeWithPrefix + size);
+      }
+      input.readFully(subripSample.data, SUBRIP_PREFIX.length, size);
+      subripSample.setPosition(0);
+      subripSample.setLimit(sizeWithPrefix);
+      // Defer writing the data to the track output. We need to modify the sample data by setting
+      // the correct end timecode, which we might not have yet.
+      return;
+    }
+
    TrackOutput output = track.output;
    if (!sampleEncodingHandled) {
      if (track.hasContentEncryption) {
@ -834,6 +892,33 @@ public final class WebmExtractor implements Extractor {
    }
  }

+  private void writeSubripSample(Track track) {
+    setSubripSampleEndTimecode(subripSample.data, blockDurationUs);
+    // Note: If we ever want to support DRM protected subtitles then we'll need to output the
+    // appropriate encryption data here.
+    track.output.sampleData(subripSample, subripSample.limit());
+    sampleBytesWritten += subripSample.limit();
+  }
+
+  private static void setSubripSampleEndTimecode(byte[] subripSampleData, long timeUs) {
+    byte[] timeCodeData;
+    if (timeUs == UNKNOWN) {
+      timeCodeData = SUBRIP_TIMECODE_EMPTY;
+    } else {
+      int hours = (int) (timeUs / 3600000000L);
+      timeUs -= (hours * 3600000000L);
+      int minutes = (int) (timeUs / 60000000);
+      timeUs -= (minutes * 60000000);
+      int seconds = (int) (timeUs / 1000000);
+      timeUs -= (seconds * 1000000);
+      int milliseconds = (int) (timeUs / 1000);
+      timeCodeData = String.format("%02d:%02d:%02d,%03d", hours, minutes, seconds, milliseconds)
+          .getBytes();
+    }
+    System.arraycopy(timeCodeData, 0, subripSampleData, SUBRIP_PREFIX_END_TIMECODE_OFFSET,
+        SUBRIP_TIMECODE_LENGTH);
+  }
+
  /**
   * Writes {@code length} bytes of sample data into {@code target} at {@code offset}, consisting of
   * pending {@link #sampleStrippedBytes} and any remaining data read from {@code input}.
@ -948,7 +1033,8 @@ public final class WebmExtractor implements Extractor {
        || CODEC_ID_VORBIS.equals(codecId)
        || CODEC_ID_AAC.equals(codecId)
        || CODEC_ID_MP3.equals(codecId)
-        || CODEC_ID_AC3.equals(codecId);
+        || CODEC_ID_AC3.equals(codecId)
+        || CODEC_ID_SUBRIP.equals(codecId);
  }

  /**
@ -1032,6 +1118,9 @@ public final class WebmExtractor implements Extractor {
    public long codecDelayNs = 0;
    public long seekPreRollNs = 0;

+    // Text elements.
+    private String language = "eng";
+
    // Set when the output is initialized. nalUnitLengthFieldLength is only set for H264/H265.
    public TrackOutput output;
    public int nalUnitLengthFieldLength;
@ -1097,6 +1186,9 @@ public final class WebmExtractor implements Extractor {
        case CODEC_ID_AC3:
          mimeType = MimeTypes.AUDIO_AC3;
          break;
+        case CODEC_ID_SUBRIP:
+          mimeType = MimeTypes.APPLICATION_SUBRIP;
+          break;
        default:
          throw new ParserException("Unrecognized codec identifier.");
      }
@ -1108,6 +1200,8 @@ public final class WebmExtractor implements Extractor {
      } else if (MimeTypes.isVideo(mimeType)) {
        format = MediaFormat.createVideoFormat(mimeType, MediaFormat.NO_VALUE, maxInputSize,
            durationUs, width, height, 0, initializationData);
+      } else if (MimeTypes.APPLICATION_SUBRIP.equals(mimeType)) {
+        format = MediaFormat.createTextFormat(mimeType, MediaFormat.NO_VALUE, language, durationUs);
      } else {
        throw new ParserException("Unexpected MIME type.");
      }
--- a/library/src/main/java/com/google/android/exoplayer/text/subrip/SubripParser.java
+++ b/library/src/main/java/com/google/android/exoplayer/text/subrip/SubripParser.java
@ -39,7 +39,7 @@ import java.util.regex.Pattern;
 */
 public final class SubripParser implements SubtitleParser {

-  private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(.*)\\s+-->\\s+(.*)");
+  private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
  private static final Pattern SUBRIP_TIMESTAMP =
      Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");

@ -54,6 +54,7 @@ public final class SubripParser implements SubtitleParser {
    ArrayList<Cue> cues = new ArrayList<>();
    LongArray cueTimesUs = new LongArray();
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
+    boolean haveEndTimecode;
    String currentLine;

    while ((currentLine = reader.readLine()) != null) {
@ -65,11 +66,16 @@ public final class SubripParser implements SubtitleParser {
      }

      // Read and parse the timing line.
+      haveEndTimecode = false;
      currentLine = reader.readLine();
      Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
      if (matcher.find()) {
-        cueTimesUs.add(parseTimestampUs(matcher.group(1)));
-        cueTimesUs.add(parseTimestampUs(matcher.group(2)));
+        cueTimesUs.add(parseTimecode(matcher.group(1)));
+        String endTimecode = matcher.group(2);
+        if (!TextUtils.isEmpty(endTimecode)) {
+          haveEndTimecode = true;
+          cueTimesUs.add(parseTimecode(matcher.group(2)));
+        }
      } else {
        throw new ParserException("Expected timing line: " + currentLine);
      }
@ -85,6 +91,9 @@ public final class SubripParser implements SubtitleParser {

      Spanned text = Html.fromHtml(textBuilder.toString());
      cues.add(new Cue(text));
+      if (haveEndTimecode) {
+        cues.add(null);
+      }
    }

    Cue[] cuesArray = new Cue[cues.size()];
@ -98,7 +107,7 @@ public final class SubripParser implements SubtitleParser {
    return MimeTypes.APPLICATION_SUBRIP.equals(mimeType);
  }

-  private static long parseTimestampUs(String s) throws NumberFormatException {
+  private static long parseTimecode(String s) throws NumberFormatException {
    Matcher matcher = SUBRIP_TIMESTAMP.matcher(s);
    if (!matcher.matches()) {
      throw new NumberFormatException("has invalid format");
--- a/library/src/main/java/com/google/android/exoplayer/text/subrip/SubripSubtitle.java
+++ b/library/src/main/java/com/google/android/exoplayer/text/subrip/SubripSubtitle.java
@ -32,8 +32,8 @@ import java.util.List;
  private final long[] cueTimesUs;

  /**
-   * @param cues The cues in the subtitle.
-   * @param cueTimesUs Interleaved cue start and end times, in microseconds.
+   * @param cues The cues in the subtitle. Null entries may be used to represent empty cues.
+   * @param cueTimesUs The cue times, in microseconds.
   */
  public SubripSubtitle(Cue[] cues, long[] cueTimesUs) {
    this.cues = cues;
@ -69,11 +69,11 @@ import java.util.List;
  @Override
  public List<Cue> getCues(long timeUs) {
    int index = Util.binarySearchFloor(cueTimesUs, timeUs, true, false);
-    if (index == -1 || index % 2 == 1) {
-      // timeUs is earlier than the start of the first cue, or corresponds to a gap between cues.
+    if (index == -1 || cues[index] == null) {
+      // timeUs is earlier than the start of the first cue, or we have an empty cue.
      return Collections.<Cue>emptyList();
    } else {
-      return Collections.singletonList(cues[index / 2]);
+      return Collections.singletonList(cues[index]);
    }
  }