Add non-strict SubRip parsing (enabled by default).

This commit is contained in:
Oliver Woodman 2015-09-10 18:29:36 +01:00
parent ede7a69d95
commit 1f3bd97a2d
7 changed files with 160 additions and 28 deletions

View file

@ -5,4 +5,8 @@ This is the first subtitle.
2
00:00:02,345 --> 00:00:03,456
This is the second subtitle.
Second subtitle with second line.
Second subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This is the third subtitle.

View file

@ -0,0 +1,13 @@
1
00:00:00,000 --> 00:00:01,234
This is the first subtitle.
2
00:00:02,345 --> 00:00:03,456
This is the second subtitle.
Second subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This is the third subtitle.

View file

@ -0,0 +1,11 @@
1
00:00:00,000 --> 00:00:01,234
This is the first subtitle.
00:00:02,345 --> 00:00:03,456
This is the second subtitle.
Second subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This is the third subtitle.

View file

@ -0,0 +1,11 @@
1
00:00:00,000 --> 00:00:01,234
This is the first subtitle.
2
This is the second subtitle.
Second subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This is the third subtitle.

View file

@ -15,6 +15,8 @@
*/
package com.google.android.exoplayer.text.subrip;
import com.google.android.exoplayer.ParserException;
import android.test.InstrumentationTestCase;
import java.io.IOException;
@ -27,44 +29,83 @@ public final class SubripParserTest extends InstrumentationTestCase {
private static final String EMPTY_FILE = "subrip/empty";
private static final String TYPICAL_FILE = "subrip/typical";
private static final String TYPICAL_EXTRA_BLANK_LINE = "subrip/typical_extra_blank_line";
private static final String TYPICAL_MISSING_TIMECODE = "subrip/typical_missing_timecode";
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
public void testParseEmptySubripFile() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(EMPTY_FILE);
public void testParseEmpty() throws IOException {
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(EMPTY_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Assert that the subtitle is empty.
assertEquals(0, subtitle.getEventTimeCount());
assertTrue(subtitle.getCues(0).isEmpty());
}
public void testParseTypicalSubripFile() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_FILE);
public void testParseTypical() throws IOException {
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(TYPICAL_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
assertEquals(6, subtitle.getEventTimeCount());
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
// Test event count.
public void testParseTypicalExtraBlankLine() throws IOException {
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(TYPICAL_EXTRA_BLANK_LINE);
SubripSubtitle subtitle = parser.parse(inputStream);
assertEquals(6, subtitle.getEventTimeCount());
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
public void testParseTypicalMissingTimecode() throws IOException {
// Strict parsing should fail.
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(TYPICAL_MISSING_TIMECODE);
try {
parser.parse(inputStream);
fail();
} catch (ParserException e) {
// Expected.
}
// Non-strict parsing should succeed, parsing the first and third cues only.
parser = new SubripParser(false);
inputStream = getInputStream(TYPICAL_MISSING_TIMECODE);
SubripSubtitle subtitle = parser.parse(inputStream);
assertEquals(4, subtitle.getEventTimeCount());
assertTypicalCue1(subtitle, 0);
assertTypicalCue3(subtitle, 2);
}
// Test first cue.
assertEquals(0, subtitle.getEventTime(0));
assertEquals("This is the first subtitle.",
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
assertEquals(1234000, subtitle.getEventTime(1));
public void testParseTypicalMissingSequence() throws IOException {
// Strict parsing should fail.
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(TYPICAL_MISSING_SEQUENCE);
try {
parser.parse(inputStream);
fail();
} catch (ParserException e) {
// Expected.
}
// Test second cue.
assertEquals(2345000, subtitle.getEventTime(2));
assertEquals("This is the second subtitle.\nSecond subtitle with second line.",
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
assertEquals(3456000, subtitle.getEventTime(3));
// Non-strict parsing should succeed, parsing the first and third cues only.
parser = new SubripParser(false);
inputStream = getInputStream(TYPICAL_MISSING_SEQUENCE);
SubripSubtitle subtitle = parser.parse(inputStream);
assertEquals(4, subtitle.getEventTimeCount());
assertTypicalCue1(subtitle, 0);
assertTypicalCue3(subtitle, 2);
}
public void testParseNoEndTimecodes() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream = getInstrumentation().getContext().getResources().getAssets()
.open(NO_END_TIMECODES_FILE);
SubripParser parser = new SubripParser(true);
InputStream inputStream = getInputStream(NO_END_TIMECODES_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Test event count.
@ -86,4 +127,29 @@ public final class SubripParserTest extends InstrumentationTestCase {
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
}
private InputStream getInputStream(String fileName) throws IOException {
return getInstrumentation().getContext().getResources().getAssets().open(fileName);
}
private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) {
assertEquals(0, subtitle.getEventTime(eventIndex));
assertEquals("This is the first subtitle.",
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
assertEquals(1234000, subtitle.getEventTime(eventIndex + 1));
}
private static void assertTypicalCue2(SubripSubtitle subtitle, int eventIndex) {
assertEquals(2345000, subtitle.getEventTime(eventIndex));
assertEquals("This is the second subtitle.\nSecond subtitle with second line.",
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
assertEquals(3456000, subtitle.getEventTime(eventIndex + 1));
}
private static void assertTypicalCue3(SubripSubtitle subtitle, int eventIndex) {
assertEquals(4567000, subtitle.getEventTime(eventIndex));
assertEquals("This is the third subtitle.",
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
assertEquals(8901000, subtitle.getEventTime(eventIndex + 1));
}
}

View file

@ -25,6 +25,7 @@ import com.google.android.exoplayer.util.MimeTypes;
import android.text.Html;
import android.text.Spanned;
import android.text.TextUtils;
import android.util.Log;
import java.io.BufferedReader;
import java.io.IOException;
@ -39,13 +40,30 @@ import java.util.regex.Pattern;
*/
public final class SubripParser implements SubtitleParser {
private static final String TAG = "SubripParser";
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
private static final Pattern SUBRIP_TIMESTAMP =
Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");
private final StringBuilder textBuilder;
private final boolean strictParsing;
/**
* Equivalent to {@code SubripParser(false)}.
*/
public SubripParser() {
this(false);
}
/**
* @param strictParsing If true, {@link #parse(InputStream)} will throw a {@link ParserException}
* if the stream contains invalid data. If false, the parser will make a best effort to ignore
* minor errors in the stream. Note however that a {@link ParserException} will still be
* thrown when this is not possible.
*/
public SubripParser(boolean strictParsing) {
this.strictParsing = strictParsing;
textBuilder = new StringBuilder();
}
@ -58,15 +76,21 @@ public final class SubripParser implements SubtitleParser {
String currentLine;
while ((currentLine = reader.readLine()) != null) {
// Skip blank lines.
if (currentLine.length() == 0)
if (currentLine.length() == 0) {
// Skip blank lines.
continue;
}
// Parse the numeric counter as a sanity check.
// Parse the index line as a sanity check.
try {
Integer.parseInt(currentLine);
} catch (NumberFormatException e) {
throw new ParserException("Expected numeric counter: " + currentLine);
if (!strictParsing) {
Log.w(TAG, "Skipping invalid index: " + currentLine);
continue;
} else {
throw new ParserException("Expected numeric counter: " + currentLine);
}
}
// Read and parse the timing line.
@ -80,6 +104,9 @@ public final class SubripParser implements SubtitleParser {
haveEndTimecode = true;
cueTimesUs.add(parseTimecode(matcher.group(2)));
}
} else if (!strictParsing) {
Log.w(TAG, "Skipping invalid timing: " + currentLine);
continue;
} else {
throw new ParserException("Expected timing line: " + currentLine);
}

View file

@ -135,7 +135,7 @@ public final class TtmlParser implements SubtitleParser {
if (strictParsing) {
throw e;
} else {
Log.e(TAG, "Suppressing parser error", e);
Log.w(TAG, "Suppressing parser error", e);
// Treat the node (and by extension, all of its children) as unsupported.
unsupportedNodeDepth++;
}