mirror of
https://github.com/samsonjs/media.git
synced 2026-04-27 15:07:40 +00:00
Add non-strict SubRip parsing (enabled by default).
This commit is contained in:
parent
ede7a69d95
commit
1f3bd97a2d
7 changed files with 160 additions and 28 deletions
|
|
@ -6,3 +6,7 @@ This is the first subtitle.
|
||||||
00:00:02,345 --> 00:00:03,456
|
00:00:02,345 --> 00:00:03,456
|
||||||
This is the second subtitle.
|
This is the second subtitle.
|
||||||
Second subtitle with second line.
|
Second subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This is the third subtitle.
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:02,345 --> 00:00:03,456
|
||||||
|
This is the second subtitle.
|
||||||
|
Second subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This is the third subtitle.
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
00:00:02,345 --> 00:00:03,456
|
||||||
|
This is the second subtitle.
|
||||||
|
Second subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This is the third subtitle.
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
1
|
||||||
|
00:00:00,000 --> 00:00:01,234
|
||||||
|
This is the first subtitle.
|
||||||
|
|
||||||
|
2
|
||||||
|
This is the second subtitle.
|
||||||
|
Second subtitle with second line.
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:04,567 --> 00:00:08,901
|
||||||
|
This is the third subtitle.
|
||||||
|
|
@ -15,6 +15,8 @@
|
||||||
*/
|
*/
|
||||||
package com.google.android.exoplayer.text.subrip;
|
package com.google.android.exoplayer.text.subrip;
|
||||||
|
|
||||||
|
import com.google.android.exoplayer.ParserException;
|
||||||
|
|
||||||
import android.test.InstrumentationTestCase;
|
import android.test.InstrumentationTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -27,44 +29,83 @@ public final class SubripParserTest extends InstrumentationTestCase {
|
||||||
|
|
||||||
private static final String EMPTY_FILE = "subrip/empty";
|
private static final String EMPTY_FILE = "subrip/empty";
|
||||||
private static final String TYPICAL_FILE = "subrip/typical";
|
private static final String TYPICAL_FILE = "subrip/typical";
|
||||||
|
private static final String TYPICAL_EXTRA_BLANK_LINE = "subrip/typical_extra_blank_line";
|
||||||
|
private static final String TYPICAL_MISSING_TIMECODE = "subrip/typical_missing_timecode";
|
||||||
|
private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence";
|
||||||
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
|
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
|
||||||
|
|
||||||
public void testParseEmptySubripFile() throws IOException {
|
public void testParseEmpty() throws IOException {
|
||||||
SubripParser parser = new SubripParser();
|
SubripParser parser = new SubripParser(true);
|
||||||
InputStream inputStream =
|
InputStream inputStream = getInputStream(EMPTY_FILE);
|
||||||
getInstrumentation().getContext().getResources().getAssets().open(EMPTY_FILE);
|
|
||||||
SubripSubtitle subtitle = parser.parse(inputStream);
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
// Assert that the subtitle is empty.
|
// Assert that the subtitle is empty.
|
||||||
assertEquals(0, subtitle.getEventTimeCount());
|
assertEquals(0, subtitle.getEventTimeCount());
|
||||||
assertTrue(subtitle.getCues(0).isEmpty());
|
assertTrue(subtitle.getCues(0).isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testParseTypicalSubripFile() throws IOException {
|
public void testParseTypical() throws IOException {
|
||||||
SubripParser parser = new SubripParser();
|
SubripParser parser = new SubripParser(true);
|
||||||
InputStream inputStream =
|
InputStream inputStream = getInputStream(TYPICAL_FILE);
|
||||||
getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_FILE);
|
|
||||||
SubripSubtitle subtitle = parser.parse(inputStream);
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
|
assertEquals(6, subtitle.getEventTimeCount());
|
||||||
|
assertTypicalCue1(subtitle, 0);
|
||||||
|
assertTypicalCue2(subtitle, 2);
|
||||||
|
assertTypicalCue3(subtitle, 4);
|
||||||
|
}
|
||||||
|
|
||||||
// Test event count.
|
public void testParseTypicalExtraBlankLine() throws IOException {
|
||||||
|
SubripParser parser = new SubripParser(true);
|
||||||
|
InputStream inputStream = getInputStream(TYPICAL_EXTRA_BLANK_LINE);
|
||||||
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
|
assertEquals(6, subtitle.getEventTimeCount());
|
||||||
|
assertTypicalCue1(subtitle, 0);
|
||||||
|
assertTypicalCue2(subtitle, 2);
|
||||||
|
assertTypicalCue3(subtitle, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testParseTypicalMissingTimecode() throws IOException {
|
||||||
|
// Strict parsing should fail.
|
||||||
|
SubripParser parser = new SubripParser(true);
|
||||||
|
InputStream inputStream = getInputStream(TYPICAL_MISSING_TIMECODE);
|
||||||
|
try {
|
||||||
|
parser.parse(inputStream);
|
||||||
|
fail();
|
||||||
|
} catch (ParserException e) {
|
||||||
|
// Expected.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-strict parsing should succeed, parsing the first and third cues only.
|
||||||
|
parser = new SubripParser(false);
|
||||||
|
inputStream = getInputStream(TYPICAL_MISSING_TIMECODE);
|
||||||
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
assertEquals(4, subtitle.getEventTimeCount());
|
assertEquals(4, subtitle.getEventTimeCount());
|
||||||
|
assertTypicalCue1(subtitle, 0);
|
||||||
|
assertTypicalCue3(subtitle, 2);
|
||||||
|
}
|
||||||
|
|
||||||
// Test first cue.
|
public void testParseTypicalMissingSequence() throws IOException {
|
||||||
assertEquals(0, subtitle.getEventTime(0));
|
// Strict parsing should fail.
|
||||||
assertEquals("This is the first subtitle.",
|
SubripParser parser = new SubripParser(true);
|
||||||
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
|
InputStream inputStream = getInputStream(TYPICAL_MISSING_SEQUENCE);
|
||||||
assertEquals(1234000, subtitle.getEventTime(1));
|
try {
|
||||||
|
parser.parse(inputStream);
|
||||||
|
fail();
|
||||||
|
} catch (ParserException e) {
|
||||||
|
// Expected.
|
||||||
|
}
|
||||||
|
|
||||||
// Test second cue.
|
// Non-strict parsing should succeed, parsing the first and third cues only.
|
||||||
assertEquals(2345000, subtitle.getEventTime(2));
|
parser = new SubripParser(false);
|
||||||
assertEquals("This is the second subtitle.\nSecond subtitle with second line.",
|
inputStream = getInputStream(TYPICAL_MISSING_SEQUENCE);
|
||||||
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
assertEquals(3456000, subtitle.getEventTime(3));
|
assertEquals(4, subtitle.getEventTimeCount());
|
||||||
|
assertTypicalCue1(subtitle, 0);
|
||||||
|
assertTypicalCue3(subtitle, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testParseNoEndTimecodes() throws IOException {
|
public void testParseNoEndTimecodes() throws IOException {
|
||||||
SubripParser parser = new SubripParser();
|
SubripParser parser = new SubripParser(true);
|
||||||
InputStream inputStream = getInstrumentation().getContext().getResources().getAssets()
|
InputStream inputStream = getInputStream(NO_END_TIMECODES_FILE);
|
||||||
.open(NO_END_TIMECODES_FILE);
|
|
||||||
SubripSubtitle subtitle = parser.parse(inputStream);
|
SubripSubtitle subtitle = parser.parse(inputStream);
|
||||||
|
|
||||||
// Test event count.
|
// Test event count.
|
||||||
|
|
@ -86,4 +127,29 @@ public final class SubripParserTest extends InstrumentationTestCase {
|
||||||
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
|
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private InputStream getInputStream(String fileName) throws IOException {
|
||||||
|
return getInstrumentation().getContext().getResources().getAssets().open(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) {
|
||||||
|
assertEquals(0, subtitle.getEventTime(eventIndex));
|
||||||
|
assertEquals("This is the first subtitle.",
|
||||||
|
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
|
||||||
|
assertEquals(1234000, subtitle.getEventTime(eventIndex + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertTypicalCue2(SubripSubtitle subtitle, int eventIndex) {
|
||||||
|
assertEquals(2345000, subtitle.getEventTime(eventIndex));
|
||||||
|
assertEquals("This is the second subtitle.\nSecond subtitle with second line.",
|
||||||
|
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
|
||||||
|
assertEquals(3456000, subtitle.getEventTime(eventIndex + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertTypicalCue3(SubripSubtitle subtitle, int eventIndex) {
|
||||||
|
assertEquals(4567000, subtitle.getEventTime(eventIndex));
|
||||||
|
assertEquals("This is the third subtitle.",
|
||||||
|
subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString());
|
||||||
|
assertEquals(8901000, subtitle.getEventTime(eventIndex + 1));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import com.google.android.exoplayer.util.MimeTypes;
|
||||||
import android.text.Html;
|
import android.text.Html;
|
||||||
import android.text.Spanned;
|
import android.text.Spanned;
|
||||||
import android.text.TextUtils;
|
import android.text.TextUtils;
|
||||||
|
import android.util.Log;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -39,13 +40,30 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public final class SubripParser implements SubtitleParser {
|
public final class SubripParser implements SubtitleParser {
|
||||||
|
|
||||||
|
private static final String TAG = "SubripParser";
|
||||||
|
|
||||||
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
|
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
|
||||||
private static final Pattern SUBRIP_TIMESTAMP =
|
private static final Pattern SUBRIP_TIMESTAMP =
|
||||||
Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");
|
Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");
|
||||||
|
|
||||||
private final StringBuilder textBuilder;
|
private final StringBuilder textBuilder;
|
||||||
|
private final boolean strictParsing;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to {@code SubripParser(false)}.
|
||||||
|
*/
|
||||||
public SubripParser() {
|
public SubripParser() {
|
||||||
|
this(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param strictParsing If true, {@link #parse(InputStream)} will throw a {@link ParserException}
|
||||||
|
* if the stream contains invalid data. If false, the parser will make a best effort to ignore
|
||||||
|
* minor errors in the stream. Note however that a {@link ParserException} will still be
|
||||||
|
* thrown when this is not possible.
|
||||||
|
*/
|
||||||
|
public SubripParser(boolean strictParsing) {
|
||||||
|
this.strictParsing = strictParsing;
|
||||||
textBuilder = new StringBuilder();
|
textBuilder = new StringBuilder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -58,15 +76,21 @@ public final class SubripParser implements SubtitleParser {
|
||||||
String currentLine;
|
String currentLine;
|
||||||
|
|
||||||
while ((currentLine = reader.readLine()) != null) {
|
while ((currentLine = reader.readLine()) != null) {
|
||||||
// Skip blank lines.
|
if (currentLine.length() == 0) {
|
||||||
if (currentLine.length() == 0)
|
// Skip blank lines.
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Parse the numeric counter as a sanity check.
|
// Parse the index line as a sanity check.
|
||||||
try {
|
try {
|
||||||
Integer.parseInt(currentLine);
|
Integer.parseInt(currentLine);
|
||||||
} catch (NumberFormatException e) {
|
} catch (NumberFormatException e) {
|
||||||
throw new ParserException("Expected numeric counter: " + currentLine);
|
if (!strictParsing) {
|
||||||
|
Log.w(TAG, "Skipping invalid index: " + currentLine);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
throw new ParserException("Expected numeric counter: " + currentLine);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read and parse the timing line.
|
// Read and parse the timing line.
|
||||||
|
|
@ -80,6 +104,9 @@ public final class SubripParser implements SubtitleParser {
|
||||||
haveEndTimecode = true;
|
haveEndTimecode = true;
|
||||||
cueTimesUs.add(parseTimecode(matcher.group(2)));
|
cueTimesUs.add(parseTimecode(matcher.group(2)));
|
||||||
}
|
}
|
||||||
|
} else if (!strictParsing) {
|
||||||
|
Log.w(TAG, "Skipping invalid timing: " + currentLine);
|
||||||
|
continue;
|
||||||
} else {
|
} else {
|
||||||
throw new ParserException("Expected timing line: " + currentLine);
|
throw new ParserException("Expected timing line: " + currentLine);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ public final class TtmlParser implements SubtitleParser {
|
||||||
if (strictParsing) {
|
if (strictParsing) {
|
||||||
throw e;
|
throw e;
|
||||||
} else {
|
} else {
|
||||||
Log.e(TAG, "Suppressing parser error", e);
|
Log.w(TAG, "Suppressing parser error", e);
|
||||||
// Treat the node (and by extension, all of its children) as unsupported.
|
// Treat the node (and by extension, all of its children) as unsupported.
|
||||||
unsupportedNodeDepth++;
|
unsupportedNodeDepth++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue