diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 872ab9349e..711b3c4be0 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -67,6 +67,9 @@ to remove a previously set `Surface` if the codec supports this (`MediaCodecInfo.detachedSurfaceSupported`). * Text: + * Add a custom `VoiceSpan` and populate it for + [WebVTT voice spans](https://www.w3.org/TR/webvtt1/#webvtt-cue-voice-span) + ([#1632](https://github.com/androidx/media/issues/1632)). * Metadata: * Image: * Add `ExternallyLoadedImageDecoder` for simplified integration with diff --git a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java index edcda586d2..73a64fc9fa 100644 --- a/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java +++ b/libraries/common/src/main/java/androidx/media3/common/text/CustomSpanBundler.java @@ -45,20 +45,11 @@ import java.util.ArrayList; */ /* package */ final class CustomSpanBundler { - /** - * Media3 custom span implementations. One of the following: - * - * - */ + /** Media3 custom span implementations. */ @Documented @Retention(RetentionPolicy.SOURCE) @Target({TYPE_USE}) - @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT}) + @IntDef({UNKNOWN, RUBY, TEXT_EMPHASIS, HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, VOICE}) private @interface CustomSpanType {} private static final int UNKNOWN = -1; @@ -69,6 +60,8 @@ import java.util.ArrayList; private static final int HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT = 3; + private static final int VOICE = 4; + private static final String FIELD_START_INDEX = Util.intToStringMaxRadix(0); private static final String FIELD_END_INDEX = Util.intToStringMaxRadix(1); private static final String FIELD_FLAGS = Util.intToStringMaxRadix(2); @@ -94,6 +87,11 @@ import java.util.ArrayList; text, span, /* spanType= */ HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT, /* params= */ null); bundledCustomSpans.add(bundle); } + for (VoiceSpan span : text.getSpans(0, text.length(), VoiceSpan.class)) { + Bundle bundle = + spanToBundle(text, span, /* spanType= */ VOICE, /* params= */ span.toBundle()); + bundledCustomSpans.add(bundle); + } return bundledCustomSpans; } @@ -113,6 +111,9 @@ import java.util.ArrayList; case HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT: text.setSpan(new HorizontalTextInVerticalContextSpan(), start, end, flags); break; + case VOICE: + text.setSpan(VoiceSpan.fromBundle(checkNotNull(span)), start, end, flags); + break; default: break; } diff --git a/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java new file mode 100644 index 0000000000..fc73efb14e --- /dev/null +++ b/libraries/common/src/main/java/androidx/media3/common/text/VoiceSpan.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package androidx.media3.common.text; + +import static androidx.media3.common.util.Assertions.checkNotNull; + +import android.os.Bundle; +import androidx.media3.common.util.UnstableApi; +import androidx.media3.common.util.Util; + +/** + * A span representing the speaker of the spanned text. + * + *

For example a WebVTT voice + * span. + */ +@UnstableApi +public final class VoiceSpan { + + /** The voice name. */ + public final String name; + + private static final String FIELD_NAME = Util.intToStringMaxRadix(0); + + public VoiceSpan(String name) { + this.name = name; + } + + public Bundle toBundle() { + Bundle bundle = new Bundle(); + bundle.putString(FIELD_NAME, name); + return bundle; + } + + public static VoiceSpan fromBundle(Bundle bundle) { + return new VoiceSpan(checkNotNull(bundle.getString(FIELD_NAME))); + } +} diff --git a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java index 45a8d2ab1e..5f7a1bd08a 100644 --- a/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java +++ b/libraries/common/src/test/java/androidx/media3/common/text/CustomCueBundlerTest.java @@ -42,6 +42,7 @@ import org.junit.runner.RunWith; @RunWith(AndroidJUnit4.class) public class CustomCueBundlerTest { + private static final VoiceSpan VOICE_SPAN = new VoiceSpan("name"); private static final RubySpan RUBY_SPAN = new RubySpan("ruby text", TextAnnotation.POSITION_AFTER); private static final TextEmphasisSpan TEXT_EMPHASIS_SPAN = @@ -55,7 +56,8 @@ public class CustomCueBundlerTest { ImmutableMap.of( RUBY_SPAN, new Pair<>(1, 2), TEXT_EMPHASIS_SPAN, new Pair<>(2, 3), - HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7)); + HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN, new Pair<>(5, 7), + VOICE_SPAN, new Pair<>(8, 10)); @Test public void serializingSpannableWithAllCustomSpans() { @@ -92,6 +94,11 @@ public class CustomCueBundlerTest { .hasHorizontalTextInVerticalContextSpanBetween( ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).first, ALL_SPANS_TO_START_END_INDEX.get(HORIZONTAL_TEXT_IN_VERTICAL_CONTEXT_SPAN).second); + SpannedSubject.assertThat(result) + .hasVoiceSpanBetween( + ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).first, + ALL_SPANS_TO_START_END_INDEX.get(VOICE_SPAN).second) + .withName(VOICE_SPAN.name); } @Test diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java index cbf44a94b6..3b9d048b1f 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/webvtt/WebvttCueParser.java @@ -43,6 +43,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.SpanUtil; import androidx.media3.common.text.TextAnnotation; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Log; import androidx.media3.common.util.ParsableByteArray; @@ -555,8 +556,10 @@ public final class WebvttCueParser { case TAG_CLASS: applyDefaultColors(text, startTag.classes, start, end); break; - case TAG_LANG: case TAG_VOICE: + applyVoiceSpan(text, startTag.voice, start, end); + break; + case TAG_LANG: case "": // Case of the "whole cue" virtual tag. break; default: @@ -658,6 +661,11 @@ public final class WebvttCueParser { } } + private static void applyVoiceSpan( + SpannableStringBuilder text, String voice, int start, int end) { + text.setSpan(new VoiceSpan(voice), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); + } + private static void applyStyleToText( SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end) { if (style == null) { diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java index e49bc8604b..14cf6e55dd 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/text/webvtt/WebvttCueParserTest.java @@ -46,7 +46,7 @@ public final class WebvttCueParserTest { public void parseStrictValidUnsupportedTagsStrippedOut() throws Exception { Spanned text = parseCueText( - "This is text with " + "This is text with " + "html tags"); assertThat(text.toString()).isEqualTo("This is text with html tags"); @@ -242,6 +242,45 @@ public final class WebvttCueParserTest { assertThat(text.toString()).isEqualTo("&&&&&&&"); } + @Test + public void parseEmptyVoiceSpan() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text).hasVoiceSpanBetween(0, "Text with a single voice span".length()).withName(""); + } + + @Test + public void parseVoiceSpanWithName() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withName("Esme"); + } + + @Test + public void ignoreVoiceSpanClasses() throws Exception { + Spanned text = parseCueText("Text with a single voice span"); + + assertThat(text.toString()).isEqualTo("Text with a single voice span"); + assertThat(text) + .hasVoiceSpanBetween(0, "Text with a single voice span".length()) + .withName("Esme"); + } + + @Test + public void parseMultipleVoiceSpans() throws Exception { + Spanned text = parseCueText("Text with multiple voice spans"); + + assertThat(text.toString()).isEqualTo("Text with multiple voice spans"); + assertThat(text).hasVoiceSpanBetween(0, "Text with ".length()).withName("Esme"); + assertThat(text) + .hasVoiceSpanBetween("Text with ".length(), "Text with multiple voice spans".length()) + .withName("Mary"); + } + private static Spanned parseCueText(String string) { return WebvttCueParser.parseCueText( /* id= */ null, string, /* styles= */ Collections.emptyList()); diff --git a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java index 43d7b9533e..3c3a5c1c0c 100644 --- a/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java +++ b/libraries/test_utils/src/main/java/androidx/media3/test/utils/truth/SpannedSubject.java @@ -42,6 +42,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.TextAnnotation; import androidx.media3.common.text.TextEmphasisSpan; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.NullableType; import androidx.media3.common.util.UnstableApi; import androidx.media3.common.util.Util; @@ -634,6 +635,42 @@ public final class SpannedSubject extends Subject { hasNoSpansOfTypeBetween(HorizontalTextInVerticalContextSpan.class, start, end); } + /** + * Checks that the subject has an {@link VoiceSpan} from {@code start} to {@code end}. + * + * @param start The start of the expected span. + * @param end The end of the expected span. + * @return A {@link VoiceSpan} object for optional additional assertions on the flags. + */ + public VoiceText hasVoiceSpanBetween(int start, int end) { + if (actual == null) { + failWithoutActual(simpleFact("Spanned must not be null")); + return ALREADY_FAILED_WITH_NAME_AND_CLASSES; + } + + List voiceSpans = findMatchingSpans(start, end, VoiceSpan.class); + if (voiceSpans.size() == 1) { + return check("VoiceSpan (start=%s,end=%s)", start, end) + .about(voiceSpanSubjects(actual)) + .that(voiceSpans); + } + failWithExpectedSpan(start, end, VoiceSpan.class, actual.toString().substring(start, end)); + return ALREADY_FAILED_WITH_NAME_AND_CLASSES; + } + + /** + * Checks that the subject has no {@link VoiceSpan}s on any of the text between {@code start} and + * {@code end}. + * + *

This fails even if the start and end indexes don't exactly match. + * + * @param start The start index to start searching for spans. + * @param end The end index to stop searching for spans. + */ + public void hasNoVoiceSpanBetween(int start, int end) { + hasNoSpansOfTypeBetween(VoiceSpan.class, start, end); + } + /** * Checks that the subject has no spans of type {@code spanClazz} on any of the text between * {@code start} and {@code end}. @@ -1272,4 +1309,83 @@ public final class SpannedSubject extends Subject { } } } + + /** Allows assertions about a span's voice its position. */ + public interface VoiceText { + /** + * Checks that at least one of the matched spans has the expected {@code name}. + * + * @param name The expected name of the voice. + * @return A {@link AndSpanFlags} object for optional additional assertions on the flags. + */ + AndSpanFlags withName(String name); + } + + private static final VoiceText ALREADY_FAILED_WITH_NAME_AND_CLASSES = + (name) -> ALREADY_FAILED_AND_FLAGS; + + private static Factory> voiceSpanSubjects( + Spanned actualSpanned) { + return (FailureMetadata metadata, @Nullable List spans) -> + new VoiceSpanSubject(metadata, spans, actualSpanned); + } + + private static final class VoiceSpanSubject extends Subject implements VoiceText { + + @Nullable private final List actualSpans; + private final Spanned actualSpanned; + + private VoiceSpanSubject( + FailureMetadata metadata, @Nullable List actualSpans, Spanned actualSpanned) { + super(metadata, actualSpans); + this.actualSpans = actualSpans; + this.actualSpanned = actualSpanned; + } + + @Override + public AndSpanFlags withName(String name) { + List matchingSpanFlags = new ArrayList<>(); + List voiceName = new ArrayList<>(); + for (VoiceSpan span : checkNotNull(actualSpans)) { + voiceName.add(new Name(span.name)); + if (span.name.equals(name)) { + matchingSpanFlags.add(actualSpanned.getSpanFlags(span)); + } + } + check("voiceName").that(voiceName).containsExactly(new Name(name)); + return check("flags").about(spanFlags()).that(matchingSpanFlags); + } + + private static final class Name { + + private final String name; + + private Name(String name) { + this.name = name; + } + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Name that = (Name) o; + return name.equals(that.name); + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public String toString() { + return String.format("{name=%s}", name); + } + } + } } diff --git a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java index 57ef16713b..dc7b2fca39 100644 --- a/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java +++ b/libraries/test_utils/src/test/java/androidx/media3/test/utils/truth/SpannedSubjectTest.java @@ -40,6 +40,7 @@ import androidx.media3.common.text.HorizontalTextInVerticalContextSpan; import androidx.media3.common.text.RubySpan; import androidx.media3.common.text.TextAnnotation; import androidx.media3.common.text.TextEmphasisSpan; +import androidx.media3.common.text.VoiceSpan; import androidx.media3.common.util.Util; import androidx.media3.test.utils.truth.SpannedSubject.AndSpanFlags; import androidx.media3.test.utils.truth.SpannedSubject.WithSpanFlags; @@ -902,6 +903,59 @@ public class SpannedSubjectTest { SpannedSubject::hasNoHorizontalTextInVerticalContextSpanBetween); } + @Test + public void voiceSpan_success() { + SpannableString spannable = + createSpannable(new VoiceSpan("speaker"), Spanned.SPAN_INCLUSIVE_EXCLUSIVE); + + assertThat(spannable) + .hasVoiceSpanBetween(SPAN_START, SPAN_END) + .withName("speaker") + .andFlags(Spanned.SPAN_INCLUSIVE_EXCLUSIVE); + } + + @Test + public void voiceSpan_wrongEndIndex() { + checkHasSpanFailsDueToIndexMismatch( + new VoiceSpan("speaker"), SpannedSubject::hasVoiceSpanBetween); + } + + @Test + public void voiceSpan_wrongName() { + SpannableString spannable = createSpannable(new VoiceSpan("speaker")); + + AssertionError expected = + expectFailure( + whenTesting -> + whenTesting + .that(spannable) + .hasVoiceSpanBetween(SPAN_START, SPAN_END) + .withName("different speaker")); + + assertThat(expected).factValue("value of").contains("voiceName"); + assertThat(expected).factValue("expected").contains("name=different speaker"); + assertThat(expected).factValue("but was").contains("name=speaker"); + } + + @Test + public void voiceSpan_wrongFlags() { + checkHasSpanFailsDueToFlagMismatch( + new VoiceSpan("speaker"), + (subject, start, end) -> subject.hasVoiceSpanBetween(start, end).withName("speaker")); + } + + @Test + public void noVoiceSpan_success() { + SpannableString spannable = createSpannableWithUnrelatedSpanAnd(new VoiceSpan("speaker")); + + assertThat(spannable).hasNoVoiceSpanBetween(UNRELATED_SPAN_START, UNRELATED_SPAN_END); + } + + @Test + public void noVoiceSpan_failure() { + checkHasNoSpanFails(new VoiceSpan("speaker"), SpannedSubject::hasNoVoiceSpanBetween); + } + private interface HasSpanFunction { T call(SpannedSubject s, int start, int end); }