diff --git a/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java b/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java index b4b872a6da..821a7a2ba6 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java @@ -15,6 +15,8 @@ */ package androidx.media3.common.util; +import static androidx.media3.common.util.Assertions.checkArgument; + import android.util.Pair; import androidx.annotation.Nullable; import androidx.media3.common.C; @@ -85,14 +87,15 @@ public final class CodecSpecificDataUtil { * to parse. * @return A pair consisting of the width and the height. */ - public static Pair parseMpeg4VideoSpecificConfig(byte[] videoSpecificConfig) { + public static Pair getVideoResolutionFromMpeg4VideoConfig( + byte[] videoSpecificConfig) { int offset = 0; boolean foundVOL = false; - ParsableByteArray scdScratchBytes = new ParsableByteArray(videoSpecificConfig); + ParsableByteArray scratchBytes = new ParsableByteArray(videoSpecificConfig); while (offset + 3 < videoSpecificConfig.length) { - if (scdScratchBytes.readUnsignedInt24() != VISUAL_OBJECT_LAYER + if (scratchBytes.readUnsignedInt24() != VISUAL_OBJECT_LAYER || (videoSpecificConfig[offset + 3] & 0xf0) != VISUAL_OBJECT_LAYER_START) { - scdScratchBytes.setPosition(scdScratchBytes.getPosition() - 2); + scratchBytes.setPosition(scratchBytes.getPosition() - 2); offset++; continue; } @@ -100,57 +103,59 @@ public final class CodecSpecificDataUtil { break; } - Assertions.checkArgument(foundVOL, "Invalid input. VOL not found"); + checkArgument(foundVOL, "Invalid input: VOL not found."); - ParsableBitArray scdScratchBits = new ParsableBitArray(videoSpecificConfig); - scdScratchBits.skipBits((offset + 4) * 8); - scdScratchBits.skipBits(1); // random_accessible_vol - scdScratchBits.skipBits(8); // video_object_type_indication + ParsableBitArray scratchBits = new ParsableBitArray(videoSpecificConfig); + // Skip the start codecs from the bitstream + scratchBits.skipBits((offset + 4) * 8); + scratchBits.skipBits(1); // random_accessible_vol + scratchBits.skipBits(8); // video_object_type_indication - if (scdScratchBits.readBit()) { // object_layer_identifier - scdScratchBits.skipBits(4); // video_object_layer_verid - scdScratchBits.skipBits(3); // video_object_layer_priority + if (scratchBits.readBit()) { // object_layer_identifier + scratchBits.skipBits(4); // video_object_layer_verid + scratchBits.skipBits(3); // video_object_layer_priority } - int aspectRatioInfo = scdScratchBits.readBits(4); + int aspectRatioInfo = scratchBits.readBits(4); if (aspectRatioInfo == EXTENDED_PAR) { - scdScratchBits.skipBits(8); // par_width - scdScratchBits.skipBits(8); // par_height + scratchBits.skipBits(8); // par_width + scratchBits.skipBits(8); // par_height } - if (scdScratchBits.readBit()) { // vol_control_parameters - scdScratchBits.skipBits(2); // chroma_format - scdScratchBits.skipBits(1); // low_delay - if (scdScratchBits.readBit()) { // vbv_parameters - scdScratchBits.skipBits(79); + if (scratchBits.readBit()) { // vol_control_parameters + scratchBits.skipBits(2); // chroma_format + scratchBits.skipBits(1); // low_delay + if (scratchBits.readBit()) { // vbv_parameters + scratchBits.skipBits(79); } } - int videoObjectLayerShape = scdScratchBits.readBits(2); - Assertions.checkArgument(videoObjectLayerShape == RECTANGULAR, "Unsupported feature"); + int videoObjectLayerShape = scratchBits.readBits(2); + checkArgument( + videoObjectLayerShape == RECTANGULAR, "Only supports rectangular video object layer shape"); - Assertions.checkArgument(scdScratchBits.readBit(), "Invalid input"); // marker_bit - int vopTimeIncrementResolution = scdScratchBits.readBits(16); - Assertions.checkArgument(scdScratchBits.readBit(), "Invalid input"); // marker_bit + checkArgument(scratchBits.readBit()); // marker_bit + int vopTimeIncrementResolution = scratchBits.readBits(16); + checkArgument(scratchBits.readBit()); // marker_bit - if (scdScratchBits.readBit()) { // fixed_vop_rate - Assertions.checkArgument(vopTimeIncrementResolution > 0, "Invalid input"); - --vopTimeIncrementResolution; - int numBits = 0; + if (scratchBits.readBit()) { // fixed_vop_rate + checkArgument(vopTimeIncrementResolution > 0); + vopTimeIncrementResolution--; + int numBitsToSkip = 0; while (vopTimeIncrementResolution > 0) { - ++numBits; + numBitsToSkip++; vopTimeIncrementResolution >>= 1; } - scdScratchBits.skipBits(numBits); // fixed_vop_time_increment + scratchBits.skipBits(numBitsToSkip); // fixed_vop_time_increment } - Assertions.checkArgument(scdScratchBits.readBit(), "Invalid input"); // marker_bit - int videoObjectLayerWidth = scdScratchBits.readBits(13); - Assertions.checkArgument(scdScratchBits.readBit(), "Invalid input"); // marker_bit - int videoObjectLayerHeight = scdScratchBits.readBits(13); - Assertions.checkArgument(scdScratchBits.readBit(), "Invalid input"); // marker_bit + checkArgument(scratchBits.readBit()); // marker_bit + int videoObjectLayerWidth = scratchBits.readBits(13); + checkArgument(scratchBits.readBit()); // marker_bit + int videoObjectLayerHeight = scratchBits.readBits(13); + checkArgument(scratchBits.readBit()); // marker_bit - scdScratchBits.skipBits(1); // interlaced + scratchBits.skipBits(1); // interlaced return Pair.create(videoObjectLayerWidth, videoObjectLayerHeight); } diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java index d0cc763720..f8edb33311 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java @@ -174,17 +174,14 @@ import com.google.common.collect.ImmutableMap; @Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4V_CONFIG); if (configInput != null) { byte[] csd = Util.getBytesFromHexString(configInput); - ImmutableList initializationData = ImmutableList.of(csd); - formatBuilder.setInitializationData(initializationData); - Pair dimensions = CodecSpecificDataUtil.parseMpeg4VideoSpecificConfig(csd); - formatBuilder.setWidth(dimensions.first); - formatBuilder.setHeight(dimensions.second); + formatBuilder.setInitializationData(ImmutableList.of(csd)); + Pair resolution = + CodecSpecificDataUtil.getVideoResolutionFromMpeg4VideoConfig(csd); + formatBuilder.setWidth(resolution.first); + formatBuilder.setHeight(resolution.second); } @Nullable String profileLevel = fmtpAttributes.get(PARAMETER_PROFILE_LEVEL_ID); - if (profileLevel == null) { - profileLevel = "1"; // default - } - formatBuilder.setCodecs(MPEG4_CODECS_PREFIX + profileLevel); + formatBuilder.setCodecs(MPEG4_CODECS_PREFIX + (profileLevel == null ? "1" : profileLevel)); } private static void processH264FmtpAttribute( diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java index a3dacd0f89..8154b9379b 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java @@ -23,6 +23,7 @@ import androidx.media3.common.ParserException; import androidx.media3.common.util.Log; import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.Util; +import androidx.media3.exoplayer.rtsp.RtpPacket; import androidx.media3.exoplayer.rtsp.RtpPayloadFormat; import androidx.media3.extractor.ExtractorOutput; import androidx.media3.extractor.TrackOutput; @@ -38,9 +39,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private static final long MEDIA_CLOCK_FREQUENCY = 90_000; - /** - * VOP unit type. - */ + /** VOP unit type. */ private static final int I_VOP = 0; private final RtpPayloadFormat payloadFormat; @@ -66,22 +65,31 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; } @Override - public void onReceivingFirstPacket(long timestamp, int sequenceNumber) { - Log.i(TAG, "RtpMPEG4Reader onReceivingFirstPacket"); - } + public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {} @Override public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker) throws ParserException { - if (previousSequenceNumber != C.INDEX_UNSET && sequenceNumber != (previousSequenceNumber + 1)) { - Log.e(TAG, "Packet loss"); - } checkStateNotNull(trackOutput); + // Check that this packet is in the sequence of the previous packet. + if (previousSequenceNumber != C.INDEX_UNSET) { + int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber); + if (sequenceNumber != expectedSequenceNumber) { + Log.w( + TAG, + Util.formatInvariant( + "Received RTP packet with unexpected sequence number. Expected: %d; received: %d." + + " Dropping packet.", + expectedSequenceNumber, sequenceNumber)); + return; + } + } + // Parse VOP Type and get the buffer flags int limit = data.bytesLeft(); trackOutput.sampleData(data, limit); + if (sampleLength == 0) bufferFlags = getBufferFlagsFromVop(data); sampleLength += limit; - parseVopType(data); // Marker (M) bit: The marker bit is set to 1 to indicate the last RTP // packet(or only RTP packet) of a VOP. When multiple VOPs are carried @@ -95,7 +103,6 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; trackOutput.sampleMetadata(timeUs, bufferFlags, sampleLength, 0, null); sampleLength = 0; } - previousSequenceNumber = sequenceNumber; } @@ -109,20 +116,23 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; // Internal methods. /** - * Parses VOP Coding type + * Parses VOP Coding type. * * Sets {@link #bufferFlags} according to the VOP Coding type. */ - private void parseVopType(ParsableByteArray data) { + @C.BufferFlags + private static int getBufferFlagsFromVop(ParsableByteArray data) { + int flags = 0; // search for VOP_START_CODE (00 00 01 B6) byte[] inputData = data.getData(); - byte[] startCode = {0x0, 0x0, 0x01, (byte) 0xB6}; + byte[] startCode = new byte[] {0x0, 0x0, 0x1, (byte) 0xB6}; int vopStartCodePos = Bytes.indexOf(inputData, startCode); if (vopStartCodePos != -1) { data.setPosition(vopStartCodePos + 4); int vopType = data.peekUnsignedByte() >> 6; - bufferFlags = getBufferFlagsFromVopType(vopType); + flags = vopType == I_VOP ? C.BUFFER_FLAG_KEY_FRAME : 0; } + return flags; } private static long toSampleUs( @@ -133,9 +143,4 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; /* multiplier= */ C.MICROS_PER_SECOND, /* divisor= */ MEDIA_CLOCK_FREQUENCY); } - - @C.BufferFlags - private static int getBufferFlagsFromVopType(int vopType) { - return vopType == I_VOP ? C.BUFFER_FLAG_KEY_FRAME : 0; - } }