diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java index 79913d2aa9..0eaeddd74a 100644 --- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java +++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java @@ -531,6 +531,54 @@ public final class ParsableByteArray { return line; } + /** + * Reads a line of text. + * + *
A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
+ * ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset
+ * is used. This method discards leading UTF-16 byte order marks (BOM), if present.
+ *
+ * @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used
+ * @return The line not including any line-termination characters, or null if the end of the data
+ * has already been reached.
+ */
+ @Nullable
+ public String readLineUtf16(boolean isLittleEndian) {
+ if (bytesLeft() == 0) {
+ return null;
+ }
+
+ int lineLimit = calculateLineLimitForUtf16(isLittleEndian);
+
+ if (lineLimit - position >= 2 && isUtf16BOM(data[position], data[position + 1])) {
+ // There's a UTF-16 byte order mark at the start of the line. Discard it.
+ position += 2;
+ }
+
+ String line;
+ if (isLittleEndian) {
+ line = Util.fromUtf16LEBytes(data, position, lineLimit - position);
+ } else {
+ line = Util.fromUtf16BEBytes(data, position, lineLimit - position);
+ }
+
+ position = lineLimit;
+ if (position == limit) {
+ return line;
+ }
+
+ if (isEqualsInUtf16(data[position], data[position + 1], '\r', isLittleEndian)) {
+ position += 2;
+ if (position == limit) {
+ return line;
+ }
+ }
+ if (isEqualsInUtf16(data[position], data[position + 1], '\n', isLittleEndian)) {
+ position += 2;
+ }
+ return line;
+ }
+
/**
* Reads a long value encoded by UTF-8 encoding
*
@@ -565,4 +613,29 @@ public final class ParsableByteArray {
position += length;
return value;
}
+
+ private boolean isEqualsInUtf16(byte first, byte second, char value, boolean isLittleEndian) {
+ return (isLittleEndian && (first | second << 8) == value)
+ || (!isLittleEndian && (first << 8 | second) == value);
+ }
+
+ private boolean isUtf16BOM(byte first, byte second) {
+ return (first == (byte) 0xFF && second == (byte) 0xFE)
+ || (first == (byte) 0xFE && second == (byte) 0xFF);
+ }
+
+ private int calculateLineLimitForUtf16(boolean isLittleEndian) {
+ int lineLimit = position;
+ while (lineLimit < limit - 1) {
+ if (isLittleEndian && Util.isLinebreak(data[lineLimit] | data[lineLimit + 1] << 8)) {
+ break;
+ } else if (!isLittleEndian && Util.isLinebreak(data[lineLimit] << 8 | data[lineLimit + 1])) {
+ break;
+ }
+
+ lineLimit += 2;
+ }
+
+ return lineLimit;
+ }
}
diff --git a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
index b0479023d2..41dd6ce193 100644
--- a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
+++ b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
@@ -682,6 +682,30 @@ public final class Util {
return new String(bytes, offset, length, Charsets.UTF_8);
}
+ /**
+ * Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray.
+ *
+ * @param bytes The UTF-16 encoded bytes to decode.
+ * @param offset The index of the first byte to decode.
+ * @param length The number of bytes to decode.
+ * @return The string.
+ */
+ public static String fromUtf16LEBytes(byte[] bytes, int offset, int length) {
+ return new String(bytes, offset, length, Charsets.UTF_16LE);
+ }
+
+ /**
+ * Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray.
+ *
+ * @param bytes The UTF-16 encoded bytes to decode.
+ * @param offset The index of the first byte to decode.
+ * @param length The number of bytes to decode.
+ * @return The string.
+ */
+ public static String fromUtf16BEBytes(byte[] bytes, int offset, int length) {
+ return new String(bytes, offset, length, Charsets.UTF_16BE);
+ }
+
/**
* Returns a new byte array containing the code points of a {@link String} encoded using UTF-8.
*
diff --git a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
index 2ae22bacd2..df5c2a7bb4 100644
--- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
+++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
@@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.LongArray;
import com.google.android.exoplayer2.util.ParsableByteArray;
+import com.google.common.base.Charsets;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
LongArray cueTimesUs = new LongArray();
ParsableByteArray subripData = new ParsableByteArray(bytes, length);
+ @Nullable Charset utf16Charset;
+ if (bytes.length >= 2) {
+ utf16Charset = getUtf16Charset(bytes[0], bytes[1]);
+ } else {
+ utf16Charset = null;
+ }
+
@Nullable String currentLine;
- while ((currentLine = subripData.readLine()) != null) {
+ while (true) {
+ if (utf16Charset != null) {
+ currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
+ } else {
+ currentLine = subripData.readLine();
+ }
+
+ if (currentLine == null) {
+ break;
+ }
+
if (currentLine.length() == 0) {
// Skip blank lines.
continue;
@@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
}
// Read and parse the timing line.
- currentLine = subripData.readLine();
+ if (utf16Charset != null) {
+ currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
+ } else {
+ currentLine = subripData.readLine();
+ }
if (currentLine == null) {
Log.w(TAG, "Unexpected end");
break;
@@ -109,13 +132,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
// Read and parse the text and tags.
textBuilder.setLength(0);
tags.clear();
- currentLine = subripData.readLine();
+ if (utf16Charset != null) {
+ currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
+ } else {
+ currentLine = subripData.readLine();
+ }
while (!TextUtils.isEmpty(currentLine)) {
if (textBuilder.length() > 0) {
textBuilder.append("
");
}
textBuilder.append(processLine(currentLine, tags));
- currentLine = subripData.readLine();
+ if (utf16Charset != null) {
+ currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
+ } else {
+ currentLine = subripData.readLine();
+ }
}
Spanned text = Html.fromHtml(textBuilder.toString());
@@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
return new SubripSubtitle(cuesArray, cueTimesUsArray);
}
+ @Nullable
+ private Charset getUtf16Charset(byte first, byte second) {
+ if (first == (byte) 0xFE && second == (byte) 0xFF) {
+ // UTF-16 (BE)
+ return Charsets.UTF_16BE;
+ }
+
+ if (first == (byte) 0xFF && second == (byte) 0xFE) {
+ // UTF-16 (LE)
+ return Charsets.UTF_16LE;
+ }
+
+ return null;
+ }
+
/**
* Trims and removes tags from the given line. The removed tags are added to {@code tags}.
*
diff --git a/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java b/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
index c868cc9a70..f7175b0b6b 100644
--- a/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
+++ b/library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
@@ -40,6 +40,8 @@ public final class SubripDecoderTest {
private static final String TYPICAL_NEGATIVE_TIMESTAMPS =
"media/subrip/typical_negative_timestamps";
private static final String TYPICAL_UNEXPECTED_END = "media/subrip/typical_unexpected_end";
+ private static final String TYPICAL_UTF16BE = "media/subrip/typical_utf16be";
+ private static final String TYPICAL_UTF16LE = "media/subrip/typical_utf16le";
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
private static final String TYPICAL_NO_HOURS_AND_MILLIS =
"media/subrip/typical_no_hours_and_millis";
@@ -80,6 +82,34 @@ public final class SubripDecoderTest {
assertTypicalCue3(subtitle, 4);
}
+ @Test
+ public void decodeTypicalUtf16LE() throws IOException {
+ SubripDecoder decoder = new SubripDecoder();
+ byte[] bytes =
+ TestUtil.getByteArray(
+ ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE);
+ Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
+
+ assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
+ assertTypicalCue1(subtitle, 0);
+ assertTypicalCue2(subtitle, 2);
+ assertTypicalCue3(subtitle, 4);
+ }
+
+ @Test
+ public void decodeTypicalUtf16BE() throws IOException {
+ SubripDecoder decoder = new SubripDecoder();
+ byte[] bytes =
+ TestUtil.getByteArray(
+ ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE);
+ Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
+
+ assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
+ assertTypicalCue1(subtitle, 0);
+ assertTypicalCue2(subtitle, 2);
+ assertTypicalCue3(subtitle, 4);
+ }
+
@Test
public void decodeTypicalExtraBlankLine() throws IOException {
SubripDecoder decoder = new SubripDecoder();
diff --git a/testdata/src/test/assets/media/subrip/typical_utf16be b/testdata/src/test/assets/media/subrip/typical_utf16be
new file mode 100644
index 0000000000..9531c26808
Binary files /dev/null and b/testdata/src/test/assets/media/subrip/typical_utf16be differ
diff --git a/testdata/src/test/assets/media/subrip/typical_utf16le b/testdata/src/test/assets/media/subrip/typical_utf16le
new file mode 100644
index 0000000000..f73574d72a
Binary files /dev/null and b/testdata/src/test/assets/media/subrip/typical_utf16le differ