mirror of
https://github.com/samsonjs/media.git
synced 2026-04-02 10:45:51 +00:00
Refactored the Webvtt parsing classes
Moved the behaviors related to Cue's to the WebvttCueParser class. This way, the parsing methods will be more easily accessible to other classes, such as the MP4Webvtt parser. This class also has some methods that require state to avoid repetitive avoidable allocations. The method visibility is subject to changes in further CLs. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=111616824
This commit is contained in:
parent
0aa4d3d209
commit
651996983b
5 changed files with 281 additions and 248 deletions
|
|
@ -27,7 +27,7 @@ import android.text.style.UnderlineSpan;
|
|||
public final class WebvttCueParserTest extends InstrumentationTestCase {
|
||||
|
||||
public void testParseStrictValidClassesAndTrailingTokens() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("<v.first.loud Esme>"
|
||||
Spanned text = WebvttCueParser.parseCueText("<v.first.loud Esme>"
|
||||
+ "This <u.style1.style2 some stuff>is</u> text with <b.foo><i.bar>html</i></b> tags");
|
||||
|
||||
assertEquals("This is text with html tags", text.toString());
|
||||
|
|
@ -48,7 +48,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseStrictValidUnsupportedTagsStrippedOut() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(
|
||||
Spanned text = WebvttCueParser.parseCueText(
|
||||
"<v.first.loud Esme>This <unsupported>is</unsupported> text with "
|
||||
+ "<notsupp><invalid>html</invalid></notsupp> tags");
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseWellFormedUnclosedEndAtCueEnd() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(
|
||||
Spanned text = WebvttCueParser.parseCueText(
|
||||
"An <u some trailing stuff>unclosed u tag with <i>italic</i> inside");
|
||||
|
||||
assertEquals("An unclosed u tag with italic inside", text.toString());
|
||||
|
|
@ -76,7 +76,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseWellFormedUnclosedEndAtParent() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(
|
||||
Spanned text = WebvttCueParser.parseCueText(
|
||||
"An unclosed u tag with <i><u>underline and italic</i> inside");
|
||||
|
||||
assertEquals("An unclosed u tag with underline and italic inside", text.toString());
|
||||
|
|
@ -95,7 +95,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseMalformedNestedElements() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(
|
||||
Spanned text = WebvttCueParser.parseCueText(
|
||||
"<b><u>An unclosed u tag with <i>italic</u> inside</i></b>");
|
||||
assertEquals("An unclosed u tag with italic inside", text.toString());
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseCloseNonExistingTag() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("blah<b>blah</i>blah</b>blah");
|
||||
Spanned text = WebvttCueParser.parseCueText("blah<b>blah</i>blah</b>blah");
|
||||
assertEquals("blahblahblahblah", text.toString());
|
||||
|
||||
StyleSpan[] spans = getSpans(text, StyleSpan.class);
|
||||
|
|
@ -132,42 +132,42 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseEmptyTagName() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("An unclosed u tag with <>italic inside");
|
||||
Spanned text = WebvttCueParser.parseCueText("An unclosed u tag with <>italic inside");
|
||||
assertEquals("An unclosed u tag with italic inside", text.toString());
|
||||
}
|
||||
|
||||
public void testParseEntities() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("& > < ");
|
||||
Spanned text = WebvttCueParser.parseCueText("& > < ");
|
||||
assertEquals("& > < ", text.toString());
|
||||
}
|
||||
|
||||
public void testParseEntitiesUnsupported() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("&noway; &sure;");
|
||||
Spanned text = WebvttCueParser.parseCueText("&noway; &sure;");
|
||||
assertEquals(" ", text.toString());
|
||||
}
|
||||
|
||||
public void testParseEntitiesNotTerminated() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("& here comes text");
|
||||
Spanned text = WebvttCueParser.parseCueText("& here comes text");
|
||||
assertEquals("& here comes text", text.toString());
|
||||
}
|
||||
|
||||
public void testParseEntitiesNotTerminatedUnsupported() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("&surenot here comes text");
|
||||
Spanned text = WebvttCueParser.parseCueText("&surenot here comes text");
|
||||
assertEquals(" here comes text", text.toString());
|
||||
}
|
||||
|
||||
public void testParseEntitiesNotTerminatedNoSpace() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("&surenot");
|
||||
Spanned text = WebvttCueParser.parseCueText("&surenot");
|
||||
assertEquals("&surenot", text.toString());
|
||||
}
|
||||
|
||||
public void testParseVoidTag() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse("here comes<br/> text<br/>");
|
||||
Spanned text = WebvttCueParser.parseCueText("here comes<br/> text<br/>");
|
||||
assertEquals("here comes text", text.toString());
|
||||
}
|
||||
|
||||
public void testParseMultipleTagsOfSameKind() {
|
||||
Spanned text = WebvttCueParser.parse("blah <b>blah</b> blah <b>foo</b>");
|
||||
Spanned text = WebvttCueParser.parseCueText("blah <b>blah</b> blah <b>foo</b>");
|
||||
|
||||
assertEquals("blah blah blah foo", text.toString());
|
||||
StyleSpan[] spans = getSpans(text, StyleSpan.class);
|
||||
|
|
@ -181,7 +181,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseInvalidVoidSlash() {
|
||||
Spanned text = WebvttCueParser.parse("blah <b/.st1.st2 trailing stuff> blah");
|
||||
Spanned text = WebvttCueParser.parseCueText("blah <b/.st1.st2 trailing stuff> blah");
|
||||
|
||||
assertEquals("blah blah", text.toString());
|
||||
StyleSpan[] spans = getSpans(text, StyleSpan.class);
|
||||
|
|
@ -189,37 +189,37 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
|
|||
}
|
||||
|
||||
public void testParseMonkey() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(
|
||||
Spanned text = WebvttCueParser.parseCueText(
|
||||
"< u>An unclosed u tag with <<<<< i>italic</u></u></u></u ></i><u><u> inside");
|
||||
assertEquals("An unclosed u tag with italic inside", text.toString());
|
||||
text = WebvttCueParser.parse(">>>>>>>>>An unclosed u tag with <<<<< italic</u></u></u></u >"
|
||||
+ "</i><u><u> inside");
|
||||
text = WebvttCueParser.parseCueText(">>>>>>>>>An unclosed u tag with <<<<< italic</u></u></u>"
|
||||
+ "</u ></i><u><u> inside");
|
||||
assertEquals(">>>>>>>>>An unclosed u tag with inside", text.toString());
|
||||
}
|
||||
|
||||
public void testParseCornerCases() throws Exception {
|
||||
Spanned text = WebvttCueParser.parse(">");
|
||||
Spanned text = WebvttCueParser.parseCueText(">");
|
||||
assertEquals(">", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("<");
|
||||
text = WebvttCueParser.parseCueText("<");
|
||||
assertEquals("", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("<b.st1.st2 annotation");
|
||||
text = WebvttCueParser.parseCueText("<b.st1.st2 annotation");
|
||||
assertEquals("", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("<<<<<<<<<<<<<<<<");
|
||||
text = WebvttCueParser.parseCueText("<<<<<<<<<<<<<<<<");
|
||||
assertEquals("", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("<<<<<<>><<<<<<<<<<");
|
||||
text = WebvttCueParser.parseCueText("<<<<<<>><<<<<<<<<<");
|
||||
assertEquals(">", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("<>");
|
||||
text = WebvttCueParser.parseCueText("<>");
|
||||
assertEquals("", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("&");
|
||||
text = WebvttCueParser.parseCueText("&");
|
||||
assertEquals("&", text.toString());
|
||||
|
||||
text = WebvttCueParser.parse("&&&&&&&");
|
||||
text = WebvttCueParser.parseCueText("&&&&&&&");
|
||||
assertEquals("&&&&&&&", text.toString());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import com.google.android.exoplayer.extractor.PositionHolder;
|
|||
import com.google.android.exoplayer.extractor.SeekMap;
|
||||
import com.google.android.exoplayer.extractor.TrackOutput;
|
||||
import com.google.android.exoplayer.extractor.ts.PtsTimestampAdjuster;
|
||||
import com.google.android.exoplayer.text.webvtt.WebvttCueParser;
|
||||
import com.google.android.exoplayer.text.webvtt.WebvttParserUtil;
|
||||
import com.google.android.exoplayer.util.MimeTypes;
|
||||
import com.google.android.exoplayer.util.ParsableByteArray;
|
||||
|
|
@ -137,7 +138,7 @@ import java.util.regex.Pattern;
|
|||
}
|
||||
|
||||
// Find the first cue header and parse the start time.
|
||||
Matcher cueHeaderMatcher = WebvttParserUtil.findNextCueHeader(webvttData);
|
||||
Matcher cueHeaderMatcher = WebvttCueParser.findNextCueHeader(webvttData);
|
||||
if (cueHeaderMatcher == null) {
|
||||
// No cues found. Don't output a sample, but still output a corresponding track.
|
||||
buildTrackOutput(0);
|
||||
|
|
|
|||
|
|
@ -15,7 +15,11 @@
|
|||
*/
|
||||
package com.google.android.exoplayer.text.webvtt;
|
||||
|
||||
import com.google.android.exoplayer.text.Cue;
|
||||
import com.google.android.exoplayer.util.ParsableByteArray;
|
||||
|
||||
import android.graphics.Typeface;
|
||||
import android.text.Layout.Alignment;
|
||||
import android.text.SpannableStringBuilder;
|
||||
import android.text.Spanned;
|
||||
import android.text.style.StyleSpan;
|
||||
|
|
@ -23,11 +27,19 @@ import android.text.style.UnderlineSpan;
|
|||
import android.util.Log;
|
||||
|
||||
import java.util.Stack;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Parser for webvtt cue text. (https://w3c.github.io/webvtt/#cue-text)
|
||||
*/
|
||||
/* package */ final class WebvttCueParser {
|
||||
public final class WebvttCueParser {
|
||||
|
||||
public static final Pattern CUE_HEADER_PATTERN = Pattern
|
||||
.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
|
||||
|
||||
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
||||
private static final Pattern CUE_SETTING_PATTERN = Pattern.compile("(\\S+?):(\\S+)");
|
||||
|
||||
private static final char CHAR_LESS_THAN = '<';
|
||||
private static final char CHAR_GREATER_THAN = '>';
|
||||
|
|
@ -54,9 +66,102 @@ import java.util.Stack;
|
|||
|
||||
private static final String TAG = "WebvttCueParser";
|
||||
|
||||
private WebvttCueParser() {}
|
||||
private StringBuilder textBuilder;
|
||||
private PositionHolder positionHolder;
|
||||
|
||||
public static Spanned parse(String markup) {
|
||||
public WebvttCueParser() {
|
||||
positionHolder = new PositionHolder();
|
||||
textBuilder = new StringBuilder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the next valid Webvtt cue in a parsable array, including timestamps, settings and text.
|
||||
*
|
||||
* @param webvttData parsable Webvtt file data.
|
||||
* @return a {@link WebvttCue} instance if cue content is found. {@code null} otherwise.
|
||||
*/
|
||||
public WebvttCue parseNextValidCue(ParsableByteArray webvttData) {
|
||||
Matcher cueHeaderMatcher;
|
||||
while ((cueHeaderMatcher = findNextCueHeader(webvttData)) != null) {
|
||||
WebvttCue currentCue = parseCue(cueHeaderMatcher, webvttData);
|
||||
if (currentCue != null) {
|
||||
return currentCue;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private WebvttCue parseCue(Matcher cueHeaderMatcher, ParsableByteArray webvttData) {
|
||||
long cueStartTime;
|
||||
long cueEndTime;
|
||||
try {
|
||||
// Parse the cue start and end times.
|
||||
cueStartTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(1));
|
||||
cueEndTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(2));
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
||||
return null;
|
||||
}
|
||||
|
||||
// Default cue settings.
|
||||
Alignment cueTextAlignment = null;
|
||||
float cueLine = Cue.DIMEN_UNSET;
|
||||
int cueLineType = Cue.TYPE_UNSET;
|
||||
int cueLineAnchor = Cue.TYPE_UNSET;
|
||||
float cuePosition = Cue.DIMEN_UNSET;
|
||||
int cuePositionAnchor = Cue.TYPE_UNSET;
|
||||
float cueWidth = Cue.DIMEN_UNSET;
|
||||
|
||||
// Parse the cue settings list.
|
||||
Matcher cueSettingMatcher = CUE_SETTING_PATTERN.matcher(cueHeaderMatcher.group(3));
|
||||
while (cueSettingMatcher.find()) {
|
||||
String name = cueSettingMatcher.group(1);
|
||||
String value = cueSettingMatcher.group(2);
|
||||
try {
|
||||
if ("line".equals(name)) {
|
||||
parseLineAttribute(value, positionHolder);
|
||||
cueLine = positionHolder.position;
|
||||
cueLineType = positionHolder.lineType;
|
||||
cueLineAnchor = positionHolder.positionAnchor;
|
||||
} else if ("align".equals(name)) {
|
||||
cueTextAlignment = parseTextAlignment(value);
|
||||
} else if ("position".equals(name)) {
|
||||
parsePositionAttribute(value, positionHolder);
|
||||
cuePosition = positionHolder.position;
|
||||
cuePositionAnchor = positionHolder.positionAnchor;
|
||||
} else if ("size".equals(name)) {
|
||||
cueWidth = WebvttParserUtil.parsePercentage(value);
|
||||
} else {
|
||||
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
|
||||
}
|
||||
}
|
||||
|
||||
if (cuePosition != Cue.DIMEN_UNSET && cuePositionAnchor == Cue.TYPE_UNSET) {
|
||||
// Computed position alignment should be derived from the text alignment if it has not been
|
||||
// set explicitly.
|
||||
cuePositionAnchor = alignmentToAnchor(cueTextAlignment);
|
||||
}
|
||||
|
||||
// Parse the cue text.
|
||||
textBuilder.setLength(0);
|
||||
String line;
|
||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
||||
if (textBuilder.length() > 0) {
|
||||
textBuilder.append("\n");
|
||||
}
|
||||
textBuilder.append(line.trim());
|
||||
}
|
||||
|
||||
CharSequence cueText = parseCueText(textBuilder.toString());
|
||||
|
||||
return new WebvttCue(cueStartTime, cueEndTime, cueText, cueTextAlignment, cueLine,
|
||||
cueLineType, cueLineAnchor, cuePosition, cuePositionAnchor, cueWidth);
|
||||
}
|
||||
|
||||
/* package */ static Spanned parseCueText(String markup) {
|
||||
SpannableStringBuilder spannedText = new SpannableStringBuilder();
|
||||
Stack<StartTag> startTagStack = new Stack<>();
|
||||
String[] tagTokens;
|
||||
|
|
@ -121,6 +226,126 @@ import java.util.Stack;
|
|||
return spannedText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads lines up to and including the next WebVTT cue header.
|
||||
*
|
||||
* @param input The input from which lines should be read.
|
||||
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
|
||||
* reached without a cue header being found. In the case that a cue header is found, groups 1,
|
||||
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
|
||||
*/
|
||||
public static Matcher findNextCueHeader(ParsableByteArray input) {
|
||||
String line;
|
||||
while ((line = input.readLine()) != null) {
|
||||
if (COMMENT.matcher(line).matches()) {
|
||||
// Skip until the end of the comment block.
|
||||
while ((line = input.readLine()) != null && !line.isEmpty()) {}
|
||||
} else {
|
||||
Matcher cueHeaderMatcher = WebvttCueParser.CUE_HEADER_PATTERN.matcher(line);
|
||||
if (cueHeaderMatcher.matches()) {
|
||||
return cueHeaderMatcher;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static final class PositionHolder {
|
||||
|
||||
public float position;
|
||||
public int positionAnchor;
|
||||
public int lineType;
|
||||
|
||||
}
|
||||
|
||||
// Internal methods
|
||||
|
||||
private static void parseLineAttribute(String s, PositionHolder out)
|
||||
throws NumberFormatException {
|
||||
int lineAnchor;
|
||||
int commaPosition = s.indexOf(',');
|
||||
if (commaPosition != -1) {
|
||||
lineAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
|
||||
s = s.substring(0, commaPosition);
|
||||
} else {
|
||||
lineAnchor = Cue.TYPE_UNSET;
|
||||
}
|
||||
float line;
|
||||
int lineType;
|
||||
if (s.endsWith("%")) {
|
||||
line = WebvttParserUtil.parsePercentage(s);
|
||||
lineType = Cue.LINE_TYPE_FRACTION;
|
||||
} else {
|
||||
line = Integer.parseInt(s);
|
||||
lineType = Cue.LINE_TYPE_NUMBER;
|
||||
}
|
||||
out.position = line;
|
||||
out.positionAnchor = lineAnchor;
|
||||
out.lineType = lineType;
|
||||
}
|
||||
|
||||
private static void parsePositionAttribute(String s, PositionHolder out)
|
||||
throws NumberFormatException {
|
||||
int positionAnchor;
|
||||
int commaPosition = s.indexOf(',');
|
||||
if (commaPosition != -1) {
|
||||
positionAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
|
||||
s = s.substring(0, commaPosition);
|
||||
} else {
|
||||
positionAnchor = Cue.TYPE_UNSET;
|
||||
}
|
||||
out.position = WebvttParserUtil.parsePercentage(s);
|
||||
out.positionAnchor = positionAnchor;
|
||||
out.lineType = Cue.TYPE_UNSET;
|
||||
}
|
||||
|
||||
private static int parsePositionAnchor(String s) {
|
||||
switch (s) {
|
||||
case "start":
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
case "middle":
|
||||
return Cue.ANCHOR_TYPE_MIDDLE;
|
||||
case "end":
|
||||
return Cue.ANCHOR_TYPE_END;
|
||||
default:
|
||||
Log.w(TAG, "Invalid anchor value: " + s);
|
||||
return Cue.TYPE_UNSET;
|
||||
}
|
||||
}
|
||||
|
||||
private static Alignment parseTextAlignment(String s) {
|
||||
switch (s) {
|
||||
case "start":
|
||||
case "left":
|
||||
return Alignment.ALIGN_NORMAL;
|
||||
case "middle":
|
||||
return Alignment.ALIGN_CENTER;
|
||||
case "end":
|
||||
case "right":
|
||||
return Alignment.ALIGN_OPPOSITE;
|
||||
default:
|
||||
Log.w(TAG, "Invalid alignment value: " + s);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static int alignmentToAnchor(Alignment alignment) {
|
||||
if (alignment == null) {
|
||||
return Cue.TYPE_UNSET;
|
||||
}
|
||||
switch (alignment) {
|
||||
case ALIGN_NORMAL:
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
case ALIGN_CENTER:
|
||||
return Cue.ANCHOR_TYPE_MIDDLE;
|
||||
case ALIGN_OPPOSITE:
|
||||
return Cue.ANCHOR_TYPE_END;
|
||||
default:
|
||||
Log.w(TAG, "Unrecognized alignment: " + alignment);
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find end of tag (>). The position returned is the position of the > plus one (exclusive).
|
||||
*
|
||||
|
|
|
|||
|
|
@ -16,18 +16,13 @@
|
|||
package com.google.android.exoplayer.text.webvtt;
|
||||
|
||||
import com.google.android.exoplayer.ParserException;
|
||||
import com.google.android.exoplayer.text.Cue;
|
||||
import com.google.android.exoplayer.text.SubtitleParser;
|
||||
import com.google.android.exoplayer.util.MimeTypes;
|
||||
import com.google.android.exoplayer.util.ParsableByteArray;
|
||||
|
||||
import android.text.Layout.Alignment;
|
||||
import android.text.TextUtils;
|
||||
import android.util.Log;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* A simple WebVTT parser.
|
||||
|
|
@ -36,16 +31,12 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
public final class WebvttParser implements SubtitleParser {
|
||||
|
||||
private static final String TAG = "WebvttParser";
|
||||
|
||||
private static final Pattern CUE_SETTING = Pattern.compile("(\\S+?):(\\S+)");
|
||||
|
||||
private final PositionHolder positionHolder;
|
||||
private final StringBuilder textBuilder;
|
||||
private final WebvttCueParser cueParser;
|
||||
private final ParsableByteArray parsableWebvttData;
|
||||
|
||||
public WebvttParser() {
|
||||
positionHolder = new PositionHolder();
|
||||
textBuilder = new StringBuilder();
|
||||
cueParser = new WebvttCueParser();
|
||||
parsableWebvttData = new ParsableByteArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -55,190 +46,20 @@ public final class WebvttParser implements SubtitleParser {
|
|||
|
||||
@Override
|
||||
public final WebvttSubtitle parse(byte[] bytes, int offset, int length) throws ParserException {
|
||||
ParsableByteArray webvttData = new ParsableByteArray(bytes, offset + length);
|
||||
webvttData.setPosition(offset);
|
||||
parsableWebvttData.reset(bytes, offset + length);
|
||||
parsableWebvttData.setPosition(offset);
|
||||
|
||||
// Validate the first line of the header, and skip the remainder.
|
||||
WebvttParserUtil.validateWebvttHeaderLine(webvttData);
|
||||
while (!TextUtils.isEmpty(webvttData.readLine())) {}
|
||||
WebvttParserUtil.validateWebvttHeaderLine(parsableWebvttData);
|
||||
while (!TextUtils.isEmpty(parsableWebvttData.readLine())) {}
|
||||
|
||||
// Process the cues and text.
|
||||
// Extract Cues
|
||||
ArrayList<WebvttCue> subtitles = new ArrayList<>();
|
||||
Matcher cueHeaderMatcher;
|
||||
while ((cueHeaderMatcher = WebvttParserUtil.findNextCueHeader(webvttData)) != null) {
|
||||
long cueStartTime;
|
||||
long cueEndTime;
|
||||
try {
|
||||
// Parse the cue start and end times.
|
||||
cueStartTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(1));
|
||||
cueEndTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(2));
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Default cue settings.
|
||||
Alignment cueTextAlignment = null;
|
||||
float cueLine = Cue.DIMEN_UNSET;
|
||||
int cueLineType = Cue.TYPE_UNSET;
|
||||
int cueLineAnchor = Cue.TYPE_UNSET;
|
||||
float cuePosition = Cue.DIMEN_UNSET;
|
||||
int cuePositionAnchor = Cue.TYPE_UNSET;
|
||||
float cueWidth = Cue.DIMEN_UNSET;
|
||||
|
||||
// Parse the cue settings list.
|
||||
Matcher cueSettingMatcher = CUE_SETTING.matcher(cueHeaderMatcher.group(3));
|
||||
while (cueSettingMatcher.find()) {
|
||||
String name = cueSettingMatcher.group(1);
|
||||
String value = cueSettingMatcher.group(2);
|
||||
try {
|
||||
if ("line".equals(name)) {
|
||||
parseLineAttribute(value, positionHolder);
|
||||
cueLine = positionHolder.position;
|
||||
cueLineType = positionHolder.lineType;
|
||||
cueLineAnchor = positionHolder.positionAnchor;
|
||||
} else if ("align".equals(name)) {
|
||||
cueTextAlignment = parseTextAlignment(value);
|
||||
} else if ("position".equals(name)) {
|
||||
parsePositionAttribute(value, positionHolder);
|
||||
cuePosition = positionHolder.position;
|
||||
cuePositionAnchor = positionHolder.positionAnchor;
|
||||
} else if ("size".equals(name)) {
|
||||
cueWidth = parsePercentage(value);
|
||||
} else {
|
||||
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
|
||||
}
|
||||
}
|
||||
|
||||
if (cuePosition != Cue.DIMEN_UNSET && cuePositionAnchor == Cue.TYPE_UNSET) {
|
||||
// Computed position alignment should be derived from the text alignment if it has not been
|
||||
// set explicitly.
|
||||
cuePositionAnchor = alignmentToAnchor(cueTextAlignment);
|
||||
}
|
||||
|
||||
// Parse the cue text.
|
||||
textBuilder.setLength(0);
|
||||
String line;
|
||||
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
|
||||
if (textBuilder.length() > 0) {
|
||||
textBuilder.append("\n");
|
||||
}
|
||||
textBuilder.append(line.trim());
|
||||
}
|
||||
|
||||
CharSequence cueText = WebvttCueParser.parse(textBuilder.toString());
|
||||
|
||||
WebvttCue cue = new WebvttCue(cueStartTime, cueEndTime, cueText, cueTextAlignment, cueLine,
|
||||
cueLineType, cueLineAnchor, cuePosition, cuePositionAnchor, cueWidth);
|
||||
subtitles.add(cue);
|
||||
WebvttCue currentWebvttCue;
|
||||
while ((currentWebvttCue = cueParser.parseNextValidCue(parsableWebvttData)) != null) {
|
||||
subtitles.add(currentWebvttCue);
|
||||
}
|
||||
|
||||
return new WebvttSubtitle(subtitles);
|
||||
}
|
||||
|
||||
private static void parseLineAttribute(String s, PositionHolder out)
|
||||
throws NumberFormatException {
|
||||
int lineAnchor;
|
||||
int commaPosition = s.indexOf(",");
|
||||
if (commaPosition != -1) {
|
||||
lineAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
|
||||
s = s.substring(0, commaPosition);
|
||||
} else {
|
||||
lineAnchor = Cue.TYPE_UNSET;
|
||||
}
|
||||
float line;
|
||||
int lineType;
|
||||
if (s.endsWith("%")) {
|
||||
line = parsePercentage(s);
|
||||
lineType = Cue.LINE_TYPE_FRACTION;
|
||||
} else {
|
||||
line = Integer.parseInt(s);
|
||||
lineType = Cue.LINE_TYPE_NUMBER;
|
||||
}
|
||||
out.position = line;
|
||||
out.positionAnchor = lineAnchor;
|
||||
out.lineType = lineType;
|
||||
}
|
||||
|
||||
private static void parsePositionAttribute(String s, PositionHolder out)
|
||||
throws NumberFormatException {
|
||||
int positionAnchor;
|
||||
int commaPosition = s.indexOf(",");
|
||||
if (commaPosition != -1) {
|
||||
positionAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
|
||||
s = s.substring(0, commaPosition);
|
||||
} else {
|
||||
positionAnchor = Cue.TYPE_UNSET;
|
||||
}
|
||||
out.position = parsePercentage(s);
|
||||
out.positionAnchor = positionAnchor;
|
||||
out.lineType = Cue.TYPE_UNSET;
|
||||
}
|
||||
|
||||
private static float parsePercentage(String s) throws NumberFormatException {
|
||||
if (!s.endsWith("%")) {
|
||||
throw new NumberFormatException("Percentages must end with %");
|
||||
}
|
||||
s = s.substring(0, s.length() - 1);
|
||||
return Float.parseFloat(s) / 100;
|
||||
}
|
||||
|
||||
private static int parsePositionAnchor(String s) {
|
||||
switch (s) {
|
||||
case "start":
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
case "middle":
|
||||
return Cue.ANCHOR_TYPE_MIDDLE;
|
||||
case "end":
|
||||
return Cue.ANCHOR_TYPE_END;
|
||||
default:
|
||||
Log.w(TAG, "Invalid anchor value: " + s);
|
||||
return Cue.TYPE_UNSET;
|
||||
}
|
||||
}
|
||||
|
||||
private static Alignment parseTextAlignment(String s) {
|
||||
switch (s) {
|
||||
case "start":
|
||||
case "left":
|
||||
return Alignment.ALIGN_NORMAL;
|
||||
case "middle":
|
||||
return Alignment.ALIGN_CENTER;
|
||||
case "end":
|
||||
case "right":
|
||||
return Alignment.ALIGN_OPPOSITE;
|
||||
default:
|
||||
Log.w(TAG, "Invalid alignment value: " + s);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static int alignmentToAnchor(Alignment alignment) {
|
||||
if (alignment == null) {
|
||||
return Cue.TYPE_UNSET;
|
||||
}
|
||||
switch (alignment) {
|
||||
case ALIGN_NORMAL:
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
case ALIGN_CENTER:
|
||||
return Cue.ANCHOR_TYPE_MIDDLE;
|
||||
case ALIGN_OPPOSITE:
|
||||
return Cue.ANCHOR_TYPE_END;
|
||||
default:
|
||||
Log.w(TAG, "Unrecognized alignment: " + alignment);
|
||||
return Cue.ANCHOR_TYPE_START;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class PositionHolder {
|
||||
|
||||
public float position;
|
||||
public int positionAnchor;
|
||||
public int lineType;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ package com.google.android.exoplayer.text.webvtt;
|
|||
import com.google.android.exoplayer.ParserException;
|
||||
import com.google.android.exoplayer.util.ParsableByteArray;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
|
|
@ -27,8 +26,6 @@ import java.util.regex.Pattern;
|
|||
public final class WebvttParserUtil {
|
||||
|
||||
private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
|
||||
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
|
||||
private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
|
||||
|
||||
private WebvttParserUtil() {}
|
||||
|
||||
|
|
@ -45,30 +42,6 @@ public final class WebvttParserUtil {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads lines up to and including the next WebVTT cue header.
|
||||
*
|
||||
* @param input The input from which lines should be read.
|
||||
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
|
||||
* reached without a cue header being found. In the case that a cue header is found, groups 1,
|
||||
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
|
||||
*/
|
||||
public static Matcher findNextCueHeader(ParsableByteArray input) {
|
||||
String line;
|
||||
while ((line = input.readLine()) != null) {
|
||||
if (COMMENT.matcher(line).matches()) {
|
||||
// Skip until the end of the comment block.
|
||||
while ((line = input.readLine()) != null && !line.isEmpty()) {}
|
||||
} else {
|
||||
Matcher cueHeaderMatcher = CUE_HEADER.matcher(line);
|
||||
if (cueHeaderMatcher.matches()) {
|
||||
return cueHeaderMatcher;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a WebVTT timestamp.
|
||||
*
|
||||
|
|
@ -86,4 +59,17 @@ public final class WebvttParserUtil {
|
|||
return (value * 1000 + Long.parseLong(parts[1])) * 1000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a percentage and returns a scaled float.
|
||||
* @param s contains the number to parse.
|
||||
* @return a float scaled number. 1.0 represents 100%.
|
||||
* @throws NumberFormatException if the number format is invalid or does not end with '%'.
|
||||
*/
|
||||
public static float parsePercentage(String s) throws NumberFormatException {
|
||||
if (!s.endsWith("%")) {
|
||||
throw new NumberFormatException("Percentages must end with %");
|
||||
}
|
||||
return Float.parseFloat(s.substring(0, s.length() - 1)) / 100;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue