diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 4d2baf5adf..b88458f9c5 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -76,6 +76,8 @@ This release corresponds to the ([#63](https://github.com/androidx/media/pull/63)). * Add VP8 fragmented packet handling ([#110](https://github.com/androidx/media/pull/110)). + * Support frames/fragments in VP9 + ([#115](https://github.com/androidx/media/pull/115)). * Leanback extension: * Listen to `playWhenReady` changes in `LeanbackAdapter` ([10420](https://github.com/google/ExoPlayer/issues/10420)). diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9Reader.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9Reader.java index e6558c3891..3d3d108acd 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9Reader.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9Reader.java @@ -16,6 +16,8 @@ package androidx.media3.exoplayer.rtsp.reader; import static androidx.media3.common.util.Assertions.checkArgument; +import static androidx.media3.common.util.Assertions.checkNotNull; +import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.common.util.Assertions.checkStateNotNull; import androidx.media3.common.C; @@ -55,27 +57,34 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; /** The combined size of a sample that is fragmented into multiple RTP packets. */ private int fragmentedSampleSizeBytes; + private long fragmentedSampleTimeUs; + private int width; private int height; /** * Whether the first packet of a VP9 frame is received, it mark the start of a VP9 partition. A * VP9 frame can be split into multiple RTP packets. */ - private boolean gotFirstPacketOfVP9Frame; + private boolean gotFirstPacketOfVp9Frame; private boolean reportedOutputFormat; + private boolean isKeyFrame; /** Creates an instance. */ public RtpVp9Reader(RtpPayloadFormat payloadFormat) { this.payloadFormat = payloadFormat; firstReceivedTimestamp = C.TIME_UNSET; + fragmentedSampleSizeBytes = C.LENGTH_UNSET; + fragmentedSampleTimeUs = C.TIME_UNSET; // The start time offset must be 0 until the first seek. startTimeOffsetUs = 0; previousSequenceNumber = C.INDEX_UNSET; width = C.LENGTH_UNSET; height = C.LENGTH_UNSET; - gotFirstPacketOfVP9Frame = false; + // TODO(b/240318842) Clean up VP8/VP9 reader. + gotFirstPacketOfVp9Frame = false; reportedOutputFormat = false; + isKeyFrame = false; } @Override @@ -85,7 +94,10 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; } @Override - public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {} + public void onReceivingFirstPacket(long timestamp, int sequenceNumber) { + checkState(firstReceivedTimestamp == C.TIME_UNSET); + firstReceivedTimestamp = timestamp; + } @Override public void consume( @@ -93,11 +105,10 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; checkStateNotNull(trackOutput); if (validateVp9Descriptor(data, sequenceNumber)) { - @C.BufferFlags int bufferFlags = 0; - if (fragmentedSampleSizeBytes == 0 - && gotFirstPacketOfVP9Frame - && (data.peekUnsignedByte() & 0x04) == 0) { - bufferFlags = C.BUFFER_FLAG_KEY_FRAME; + if (fragmentedSampleSizeBytes == C.LENGTH_UNSET && gotFirstPacketOfVp9Frame) { + // Parsing the frame_type in VP9 uncompressed header, 0 - key frame, 1 - inter frame. + // Refer to VP9 Bitstream superframe and uncompressed header, Section 4.1. + isKeyFrame = (data.peekUnsignedByte() & 0x04) == 0; } if (!reportedOutputFormat && width != C.LENGTH_UNSET && height != C.LENGTH_UNSET) { @@ -111,21 +122,15 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; int currentFragmentSizeBytes = data.bytesLeft(); // Write the video sample. trackOutput.sampleData(data, currentFragmentSizeBytes); - fragmentedSampleSizeBytes += currentFragmentSizeBytes; + if (fragmentedSampleSizeBytes == C.LENGTH_UNSET) { + fragmentedSampleSizeBytes = currentFragmentSizeBytes; + } else { + fragmentedSampleSizeBytes += currentFragmentSizeBytes; + } + fragmentedSampleTimeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp); if (rtpMarker) { - if (firstReceivedTimestamp == C.TIME_UNSET) { - firstReceivedTimestamp = timestamp; - } - long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp); - trackOutput.sampleMetadata( - timeUs, - bufferFlags, - fragmentedSampleSizeBytes, - /* offset= */ 0, - /* cryptoData= */ null); - fragmentedSampleSizeBytes = 0; - gotFirstPacketOfVP9Frame = false; + outputSampleMetadataForFragmentedPackets(); } previousSequenceNumber = sequenceNumber; } @@ -134,7 +139,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; @Override public void seek(long nextRtpTimestamp, long timeUs) { firstReceivedTimestamp = nextRtpTimestamp; - fragmentedSampleSizeBytes = 0; + fragmentedSampleSizeBytes = C.LENGTH_UNSET; startTimeOffsetUs = timeUs; } @@ -162,19 +167,16 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; // +-+-+-+-+-+-+-+-+ int header = payload.readUnsignedByte(); - if (!gotFirstPacketOfVP9Frame) { - if ((header & 0x08) == 0) { - Log.w( - TAG, - "First payload octet of the RTP packet is not the beginning of a new VP9 partition," - + " Dropping current packet."); - return false; + if ((header & 0x08) == 0x08) { + if (gotFirstPacketOfVp9Frame && fragmentedSampleSizeBytes > 0) { + // Received new VP9 fragment, output data of previous fragment to decoder. + outputSampleMetadataForFragmentedPackets(); } - gotFirstPacketOfVP9Frame = true; - } else { + gotFirstPacketOfVp9Frame = true; + } else if (gotFirstPacketOfVp9Frame) { // Check that this packet is in the sequence of the previous packet. int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber); - if (packetSequenceNumber != expectedSequenceNumber) { + if (packetSequenceNumber < expectedSequenceNumber) { Log.w( TAG, Util.formatInvariant( @@ -183,6 +185,12 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; expectedSequenceNumber, packetSequenceNumber)); return false; } + } else { + Log.w( + TAG, + "First payload octet of the RTP packet is not the beginning of a new VP9 partition," + + " Dropping current packet."); + return false; } // Check if optional I header is present. @@ -250,6 +258,24 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; return true; } + /** + * Outputs sample metadata of the received fragmented packets. + * + *

Call this method only after receiving an end of a VP9 partition. + */ + private void outputSampleMetadataForFragmentedPackets() { + checkNotNull(trackOutput) + .sampleMetadata( + fragmentedSampleTimeUs, + isKeyFrame ? C.BUFFER_FLAG_KEY_FRAME : 0, + fragmentedSampleSizeBytes, + /* offset= */ 0, + /* cryptoData= */ null); + fragmentedSampleSizeBytes = C.LENGTH_UNSET; + fragmentedSampleTimeUs = C.TIME_UNSET; + gotFirstPacketOfVp9Frame = false; + } + private static long toSampleUs( long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) { return startTimeOffsetUs diff --git a/libraries/exoplayer_rtsp/src/test/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9ReaderTest.java b/libraries/exoplayer_rtsp/src/test/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9ReaderTest.java new file mode 100644 index 0000000000..22f87ff702 --- /dev/null +++ b/libraries/exoplayer_rtsp/src/test/java/androidx/media3/exoplayer/rtsp/reader/RtpVp9ReaderTest.java @@ -0,0 +1,198 @@ +/* + * Copyright 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package androidx.media3.exoplayer.rtsp.reader; + +import static androidx.media3.common.util.Util.getBytesFromHexString; +import static com.google.common.truth.Truth.assertThat; + +import androidx.media3.common.C; +import androidx.media3.common.Format; +import androidx.media3.common.MimeTypes; +import androidx.media3.common.util.ParsableByteArray; +import androidx.media3.common.util.Util; +import androidx.media3.exoplayer.rtsp.RtpPacket; +import androidx.media3.exoplayer.rtsp.RtpPayloadFormat; +import androidx.media3.test.utils.FakeExtractorOutput; +import androidx.media3.test.utils.FakeTrackOutput; +import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.common.collect.ImmutableMap; +import com.google.common.primitives.Bytes; +import java.util.Arrays; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** Unit test for {@link RtpVp9Reader}. */ +@RunWith(AndroidJUnit4.class) +public final class RtpVp9ReaderTest { + + /** VP9 uses a 90 KHz media clock (RFC7741 Section 4.1). */ + private static final long MEDIA_CLOCK_FREQUENCY = 90_000; + + private static final byte[] PARTITION_1 = getBytesFromHexString("000102030405060708090A0B0C0D0E"); + // 000102030405060708090A + private static final byte[] PARTITION_1_FRAGMENT_1 = + Arrays.copyOf(PARTITION_1, /* newLength= */ 11); + // 0B0C0D0E + private static final byte[] PARTITION_1_FRAGMENT_2 = + Arrays.copyOfRange(PARTITION_1, /* from= */ 11, /* to= */ 15); + private static final long PARTITION_1_RTP_TIMESTAMP = 2599168056L; + private static final RtpPacket PACKET_PARTITION_1_FRAGMENT_1 = + new RtpPacket.Builder() + .setTimestamp(PARTITION_1_RTP_TIMESTAMP) + .setSequenceNumber(40289) + .setMarker(false) + .setPayloadData(Bytes.concat(getBytesFromHexString("08"), PARTITION_1_FRAGMENT_1)) + .build(); + private static final RtpPacket PACKET_PARTITION_1_FRAGMENT_2 = + new RtpPacket.Builder() + .setTimestamp(PARTITION_1_RTP_TIMESTAMP) + .setSequenceNumber(40290) + .setMarker(true) + .setPayloadData(Bytes.concat(getBytesFromHexString("00"), PARTITION_1_FRAGMENT_2)) + .build(); + + private static final byte[] PARTITION_2 = getBytesFromHexString("0D0C0B0A09080706050403020100"); + // 0D0C0B0A090807060504 + private static final byte[] PARTITION_2_FRAGMENT_1 = + Arrays.copyOf(PARTITION_2, /* newLength= */ 10); + // 03020100 + private static final byte[] PARTITION_2_FRAGMENT_2 = + Arrays.copyOfRange(PARTITION_2, /* from= */ 10, /* to= */ 14); + private static final long PARTITION_2_RTP_TIMESTAMP = 2599168344L; + private static final RtpPacket PACKET_PARTITION_2_FRAGMENT_1 = + new RtpPacket.Builder() + .setTimestamp(PARTITION_2_RTP_TIMESTAMP) + .setSequenceNumber(40291) + .setMarker(false) + .setPayloadData(Bytes.concat(getBytesFromHexString("08"), PARTITION_2_FRAGMENT_1)) + .build(); + private static final RtpPacket PACKET_PARTITION_2_FRAGMENT_2 = + new RtpPacket.Builder() + .setTimestamp(PARTITION_2_RTP_TIMESTAMP) + .setSequenceNumber(40292) + .setMarker(true) + .setPayloadData(Bytes.concat(getBytesFromHexString("00"), PARTITION_2_FRAGMENT_2)) + .build(); + private static final long PARTITION_2_PRESENTATION_TIMESTAMP_US = + Util.scaleLargeTimestamp( + (PARTITION_2_RTP_TIMESTAMP - PARTITION_1_RTP_TIMESTAMP), + /* multiplier= */ C.MICROS_PER_SECOND, + /* divisor= */ MEDIA_CLOCK_FREQUENCY); + + private FakeExtractorOutput extractorOutput; + + @Before + public void setUp() { + extractorOutput = + new FakeExtractorOutput( + (id, type) -> new FakeTrackOutput(/* deduplicateConsecutiveFormats= */ true)); + } + + @Test + public void consume_validPackets() { + RtpVp9Reader vp9Reader = createVp9Reader(); + + vp9Reader.createTracks(extractorOutput, /* trackId= */ 0); + vp9Reader.onReceivingFirstPacket( + PACKET_PARTITION_1_FRAGMENT_1.timestamp, PACKET_PARTITION_1_FRAGMENT_1.sequenceNumber); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_2); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_2); + + FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(0); + assertThat(trackOutput.getSampleCount()).isEqualTo(2); + assertThat(trackOutput.getSampleData(0)).isEqualTo(PARTITION_1); + assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(0); + assertThat(trackOutput.getSampleData(1)).isEqualTo(PARTITION_2); + assertThat(trackOutput.getSampleTimeUs(1)).isEqualTo(PARTITION_2_PRESENTATION_TIMESTAMP_US); + } + + @Test + public void consume_fragmentedFrameMissingFirstFragment() { + RtpVp9Reader vp9Reader = createVp9Reader(); + + vp9Reader.createTracks(extractorOutput, /* trackId= */ 0); + // First packet timing information is transmitted over RTSP, not RTP. + vp9Reader.onReceivingFirstPacket( + PACKET_PARTITION_1_FRAGMENT_1.timestamp, PACKET_PARTITION_1_FRAGMENT_1.sequenceNumber); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_2); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_2); + + FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(0); + assertThat(trackOutput.getSampleCount()).isEqualTo(1); + assertThat(trackOutput.getSampleData(0)).isEqualTo(PARTITION_2); + assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(PARTITION_2_PRESENTATION_TIMESTAMP_US); + } + + @Test + public void consume_fragmentedFrameMissingBoundaryFragment() { + RtpVp9Reader vp9Reader = createVp9Reader(); + + vp9Reader.createTracks(extractorOutput, /* trackId= */ 0); + vp9Reader.onReceivingFirstPacket( + PACKET_PARTITION_1_FRAGMENT_1.timestamp, PACKET_PARTITION_1_FRAGMENT_1.sequenceNumber); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_2); + + FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(0); + assertThat(trackOutput.getSampleCount()).isEqualTo(2); + assertThat(trackOutput.getSampleData(0)).isEqualTo(PARTITION_1_FRAGMENT_1); + assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(0); + assertThat(trackOutput.getSampleData(1)).isEqualTo(PARTITION_2); + assertThat(trackOutput.getSampleTimeUs(1)).isEqualTo(PARTITION_2_PRESENTATION_TIMESTAMP_US); + } + + @Test + public void consume_outOfOrderFragmentedFrame() { + RtpVp9Reader vp9Reader = createVp9Reader(); + + vp9Reader.createTracks(extractorOutput, /* trackId= */ 0); + vp9Reader.onReceivingFirstPacket( + PACKET_PARTITION_1_FRAGMENT_1.timestamp, PACKET_PARTITION_1_FRAGMENT_1.sequenceNumber); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_1); + consume(vp9Reader, PACKET_PARTITION_1_FRAGMENT_2); + consume(vp9Reader, PACKET_PARTITION_2_FRAGMENT_2); + + FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(0); + assertThat(trackOutput.getSampleCount()).isEqualTo(2); + assertThat(trackOutput.getSampleData(0)).isEqualTo(PARTITION_1_FRAGMENT_1); + assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(0); + assertThat(trackOutput.getSampleData(1)).isEqualTo(PARTITION_2); + assertThat(trackOutput.getSampleTimeUs(1)).isEqualTo(PARTITION_2_PRESENTATION_TIMESTAMP_US); + } + + private static RtpVp9Reader createVp9Reader() { + return new RtpVp9Reader( + new RtpPayloadFormat( + new Format.Builder().setSampleMimeType(MimeTypes.VIDEO_VP9).build(), + /* rtpPayloadType= */ 96, + /* clockRate= */ (int) MEDIA_CLOCK_FREQUENCY, + /* fmtpParameters= */ ImmutableMap.of())); + } + + private static void consume(RtpVp9Reader vp9Reader, RtpPacket rtpPacket) { + vp9Reader.consume( + new ParsableByteArray(rtpPacket.payloadData), + rtpPacket.timestamp, + rtpPacket.sequenceNumber, + rtpPacket.marker); + } +}