mirror of
https://github.com/samsonjs/media.git
synced 2026-03-25 09:25:53 +00:00
improve PES packet parsing for MPEG-H
This commit is contained in:
parent
5df45f7e64
commit
b6990c3c5b
2 changed files with 314 additions and 420 deletions
|
|
@ -23,274 +23,31 @@ import androidx.annotation.Nullable;
|
|||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.ParserException;
|
||||
import androidx.media3.common.util.ParsableBitArray;
|
||||
import androidx.media3.common.util.ParsableByteArray;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Utility methods for parsing MPEG-H frames, which are access units in MPEG-H bitstreams. */
|
||||
@UnstableApi
|
||||
public final class MpeghUtil {
|
||||
|
||||
/** Holds information contained in the parsed MPEG-H frame. */
|
||||
public static final class FrameInfo {
|
||||
|
||||
/** Signals if the MPEG-H frame contains a mpegh3daConfig packet. */
|
||||
public final boolean containsConfig;
|
||||
|
||||
/** Signals if the mpegh3daConfig packet in the MPEG-H frame has changed. */
|
||||
public final boolean configChanged;
|
||||
|
||||
/** The default number of audio samples in the frame. */
|
||||
public final int standardFrameSamples;
|
||||
|
||||
/** The audio sampling rate in Hz. */
|
||||
public final int samplingRate;
|
||||
|
||||
/** The actual number of audio samples in the frame. */
|
||||
public final int frameSamples;
|
||||
|
||||
/** The number of bytes building the frame. */
|
||||
public final int frameBytes;
|
||||
|
||||
/** The label of the main stream in the frame. */
|
||||
public final long mainStreamLabel;
|
||||
|
||||
/** The profile level indication of the audio in the frame. */
|
||||
public final int mpegh3daProfileLevelIndication;
|
||||
|
||||
/** An array of compatible profile level indications of the audio in the frame. */
|
||||
@Nullable public final byte[] compatibleSetIndication;
|
||||
|
||||
/**
|
||||
* Initializes the {@link FrameInfo} with fields containing certain values.
|
||||
*
|
||||
* @param containsConfig See {@link #containsConfig}.
|
||||
* @param configChanged See {@link #configChanged}.
|
||||
* @param standardFrameSamples See {@link #standardFrameSamples}.
|
||||
* @param samplingRate See {@link #samplingRate}.
|
||||
* @param frameSamples See {@link #frameSamples}.
|
||||
* @param frameBytes See {@link #frameBytes}.
|
||||
* @param mainStreamLabel See {@link #mainStreamLabel}.
|
||||
* @param mpegh3daProfileLevelIndication See {@link #mpegh3daProfileLevelIndication}.
|
||||
* @param compatibleSetIndication See {@link #compatibleSetIndication}.
|
||||
*/
|
||||
public FrameInfo(
|
||||
boolean containsConfig,
|
||||
boolean configChanged,
|
||||
int standardFrameSamples,
|
||||
int samplingRate,
|
||||
int frameSamples,
|
||||
int frameBytes,
|
||||
long mainStreamLabel,
|
||||
int mpegh3daProfileLevelIndication,
|
||||
@Nullable byte[] compatibleSetIndication) {
|
||||
this.containsConfig = containsConfig;
|
||||
this.configChanged = configChanged;
|
||||
this.standardFrameSamples = standardFrameSamples;
|
||||
this.samplingRate = samplingRate;
|
||||
this.frameSamples = frameSamples;
|
||||
this.frameBytes = frameBytes;
|
||||
this.mainStreamLabel = mainStreamLabel;
|
||||
this.mpegh3daProfileLevelIndication = mpegh3daProfileLevelIndication;
|
||||
this.compatibleSetIndication =
|
||||
compatibleSetIndication != null
|
||||
? Arrays.copyOf(compatibleSetIndication, compatibleSetIndication.length)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
/** See ISO_IEC_23003-8;2022, 14.4.4. */
|
||||
private static final byte[] MHAS_SYNC_PACKET = new byte[] {(byte) 0xC0, (byte) 0x01, (byte) 0xA5};
|
||||
private static final int MHAS_SYNC_WORD = 0xC001A5;
|
||||
|
||||
private static final int MHAS_SYNC_PACKET_LENGTH = 3;
|
||||
public static final int MHAS_SYNC_WORD_LENGTH = 3;
|
||||
|
||||
public static final int MAX_MHAS_PACKET_HEADER_SIZE = 15;
|
||||
|
||||
/**
|
||||
* Locates the next MHAS sync packet, advancing the position to the start of the sync packet. If a
|
||||
* sync packet was not located, the position is advanced to the limit. See ISO_IEC_23008-3;2022,
|
||||
* 14.4.4.
|
||||
* Returns whether a given integer matches an MHAS sync word. See ISO_IEC_23008-3;2022, 14.4.4.
|
||||
*
|
||||
* @param data The byte array whose position should be advanced.
|
||||
* @return Whether a sync packet position was found.
|
||||
* @param word An integer.
|
||||
* @return Whether a given integer matches an MHAS sync word.
|
||||
*/
|
||||
public static boolean findSyncPacket(ParsableByteArray data) {
|
||||
int syncIndex = 0;
|
||||
while (syncIndex < MHAS_SYNC_PACKET_LENGTH
|
||||
&& data.bytesLeft() >= MHAS_SYNC_PACKET_LENGTH - syncIndex) {
|
||||
if (data.readUnsignedByte() == MHAS_SYNC_PACKET[syncIndex]) {
|
||||
syncIndex++;
|
||||
} else {
|
||||
syncIndex = 0; // Restart comparison from the beginning
|
||||
}
|
||||
}
|
||||
|
||||
if (syncIndex == MHAS_SYNC_PACKET_LENGTH) {
|
||||
data.setPosition(data.getPosition() - MHAS_SYNC_PACKET_LENGTH - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
data.setPosition(data.limit());
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a complete MHAS frame could be parsed by calculating if enough data is available in
|
||||
* the provided {@link ParsableBitArray}.
|
||||
*
|
||||
* @param data The bit array to parse.
|
||||
* @return Whether a complete MHAS frame could be parsed.
|
||||
*/
|
||||
public static boolean canParseFrame(ParsableBitArray data) {
|
||||
boolean result = false;
|
||||
int originalPosition = data.getPosition();
|
||||
while (true) {
|
||||
MhasPacketHeader header;
|
||||
try {
|
||||
header = parseMhasPacketHeader(data);
|
||||
} catch (Exception e) {
|
||||
// There is not enough data available to parse the MHAS packet header.
|
||||
break;
|
||||
}
|
||||
if (data.bitsLeft() < header.packetLength * C.BITS_PER_BYTE) {
|
||||
// There is not enough data available to parse the current MHAS packet.
|
||||
break;
|
||||
}
|
||||
data.skipBytes(header.packetLength);
|
||||
|
||||
if (header.packetType == MhasPacketHeader.PACTYP_MPEGH3DAFRAME) {
|
||||
// An mpegh3daFrame packet has been found which signals the end of the MHAS frame.
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
data.setPosition(originalPosition);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the necessary info of an MPEG-H frame into the FrameInfo structure.
|
||||
*
|
||||
* @param data The bit array to parse, positioned at the start of the MHAS frame.
|
||||
* @param prevFrameInfo A {@link FrameInfo} derived from the previous frame in the stream or
|
||||
* {@code null} when there is no previous frame.
|
||||
* @return {@link FrameInfo} of the current frame.
|
||||
* @throws ParserException if a valid {@link FrameInfo} cannot be parsed.
|
||||
*/
|
||||
public static FrameInfo parseFrame(ParsableBitArray data, @Nullable FrameInfo prevFrameInfo)
|
||||
throws ParserException {
|
||||
int standardFrameSamples;
|
||||
int samplingFrequency;
|
||||
long mainStreamLabel;
|
||||
boolean frameFound = false;
|
||||
boolean configFound = false;
|
||||
boolean configChanged = false;
|
||||
int truncationSamples = C.LENGTH_UNSET;
|
||||
int mpegh3daProfileLevelIndication = -1;
|
||||
@Nullable byte[] compatibleSetIndication = null;
|
||||
|
||||
if (prevFrameInfo != null) {
|
||||
standardFrameSamples = prevFrameInfo.standardFrameSamples;
|
||||
samplingFrequency = prevFrameInfo.samplingRate;
|
||||
mainStreamLabel = prevFrameInfo.mainStreamLabel;
|
||||
} else {
|
||||
standardFrameSamples = C.LENGTH_UNSET;
|
||||
samplingFrequency = C.RATE_UNSET_INT;
|
||||
mainStreamLabel = -1;
|
||||
}
|
||||
|
||||
int initialBitsLeft = data.bitsLeft();
|
||||
|
||||
if (!data.isByteAligned()) {
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Input data buffer is not Byte aligned", /* cause= */ null);
|
||||
}
|
||||
|
||||
do {
|
||||
// parse MHAS packet header
|
||||
MhasPacketHeader packetHeader = parseMhasPacketHeader(data);
|
||||
int originalPosition = data.getPosition();
|
||||
|
||||
switch (packetHeader.packetType) {
|
||||
case MhasPacketHeader.PACTYP_MPEGH3DACFG:
|
||||
// we already found a mpegh3daConfig
|
||||
if (configFound) {
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Found a second mpegh3daConfig packet", /* cause= */ null);
|
||||
}
|
||||
configFound = true;
|
||||
|
||||
// check for config change
|
||||
if (packetHeader.packetLabel != mainStreamLabel) {
|
||||
configChanged = true;
|
||||
}
|
||||
// save new packet label
|
||||
mainStreamLabel = packetHeader.packetLabel;
|
||||
|
||||
// parse the mpegh3daConfig
|
||||
Mpegh3daConfig mpegh3daConfig = parseMpegh3daConfig(data);
|
||||
|
||||
// get the necessary data from mpegh3daConfig
|
||||
samplingFrequency = mpegh3daConfig.samplingFrequency;
|
||||
standardFrameSamples = mpegh3daConfig.standardFrameSamples;
|
||||
mpegh3daProfileLevelIndication = mpegh3daConfig.profileLevelIndication;
|
||||
if (mpegh3daConfig.compatibleProfileLevelSet != null) {
|
||||
compatibleSetIndication = mpegh3daConfig.compatibleProfileLevelSet;
|
||||
}
|
||||
|
||||
data.setPosition(originalPosition);
|
||||
data.skipBytes(packetHeader.packetLength);
|
||||
break;
|
||||
|
||||
case MhasPacketHeader.PACTYP_AUDIOTRUNCATION:
|
||||
truncationSamples = parseAudioTruncationInfo(data);
|
||||
if (truncationSamples > standardFrameSamples) {
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Truncation size is too big", /* cause= */ null);
|
||||
}
|
||||
|
||||
data.setPosition(originalPosition);
|
||||
data.skipBytes(packetHeader.packetLength);
|
||||
break;
|
||||
|
||||
case MhasPacketHeader.PACTYP_MPEGH3DAFRAME:
|
||||
// check packet label
|
||||
if (packetHeader.packetLabel != mainStreamLabel) {
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Mpegh3daFrame packet does not belong to main stream", /* cause= */ null);
|
||||
}
|
||||
frameFound = true;
|
||||
data.skipBytes(packetHeader.packetLength);
|
||||
break;
|
||||
|
||||
default:
|
||||
data.skipBytes(packetHeader.packetLength);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!data.isByteAligned()) {
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Data buffer is not Byte aligned after parsing", /* cause= */ null);
|
||||
}
|
||||
|
||||
} while (!frameFound);
|
||||
|
||||
int parsedBytes = (initialBitsLeft - data.bitsLeft()) / C.BITS_PER_BYTE;
|
||||
|
||||
return new FrameInfo(
|
||||
configFound,
|
||||
configChanged,
|
||||
standardFrameSamples,
|
||||
/* samplingRate= */ samplingFrequency,
|
||||
/* frameSamples= */ (truncationSamples == C.LENGTH_UNSET
|
||||
? standardFrameSamples
|
||||
: standardFrameSamples - truncationSamples),
|
||||
/* frameBytes= */ parsedBytes,
|
||||
mainStreamLabel,
|
||||
mpegh3daProfileLevelIndication,
|
||||
compatibleSetIndication);
|
||||
public static boolean isSyncWord(int word) {
|
||||
return (word & 0xFFFFFF) == MHAS_SYNC_WORD;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -300,8 +57,9 @@ public final class MpeghUtil {
|
|||
* @return The {@link MhasPacketHeader} info.
|
||||
* @throws ParserException if a valid {@link MhasPacketHeader} cannot be parsed.
|
||||
*/
|
||||
private static MhasPacketHeader parseMhasPacketHeader(ParsableBitArray data)
|
||||
public static MhasPacketHeader parseMhasPacketHeader(ParsableBitArray data)
|
||||
throws ParserException {
|
||||
int dataStartPos = data.getPosition();
|
||||
@MhasPacketHeader.Type int packetType = checkedCast(readEscapedValue(data, 3, 8, 8));
|
||||
long packetLabel = readEscapedValue(data, 2, 8, 32);
|
||||
|
||||
|
|
@ -314,10 +72,10 @@ public final class MpeghUtil {
|
|||
switch (packetType) {
|
||||
case MhasPacketHeader.PACTYP_MPEGH3DACFG:
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Mpegh3daConfig packet with invalid packet label 0", /* cause= */ null);
|
||||
"Mpegh3daConfig packet with invalid packet label 0", /* cause= */ null);
|
||||
case MhasPacketHeader.PACTYP_AUDIOTRUNCATION:
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"AudioTruncation packet with invalid packet label 0", /* cause= */ null);
|
||||
"AudioTruncation packet with invalid packet label 0", /* cause= */ null);
|
||||
case MhasPacketHeader.PACTYP_MPEGH3DAFRAME:
|
||||
throw ParserException.createForMalformedContainer(
|
||||
"Mpegh3daFrame packet with invalid packet label 0", /* cause= */ null);
|
||||
|
|
@ -327,7 +85,10 @@ public final class MpeghUtil {
|
|||
}
|
||||
|
||||
int packetLength = checkedCast(readEscapedValue(data, 11, 24, 24));
|
||||
return new MhasPacketHeader(packetType, packetLabel, packetLength);
|
||||
|
||||
int headerLength = (data.getPosition() - dataStartPos) / C.BITS_PER_BYTE;
|
||||
|
||||
return new MhasPacketHeader(packetType, packetLabel, packetLength, headerLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -518,7 +279,7 @@ public final class MpeghUtil {
|
|||
* @return The {@link Mpegh3daConfig}.
|
||||
* @throws ParserException if a valid {@link Mpegh3daConfig} cannot be parsed.
|
||||
*/
|
||||
private static Mpegh3daConfig parseMpegh3daConfig(ParsableBitArray data) throws ParserException {
|
||||
public static Mpegh3daConfig parseMpegh3daConfig(ParsableBitArray data) throws ParserException {
|
||||
@Nullable byte[] compatibleProfileLevelSet = null;
|
||||
int profileLevelIndication = data.readBits(8);
|
||||
|
||||
|
|
@ -573,15 +334,14 @@ public final class MpeghUtil {
|
|||
* See ISO_IEC_23008-3;2022, 14.2.2, Table 225.
|
||||
*
|
||||
* @param data The bit array to be parsed.
|
||||
* @return The number of truncated samples or {@link C#LENGTH_UNSET} if decoder should ignore the
|
||||
* info.
|
||||
* @return The number of truncated samples.
|
||||
*/
|
||||
private static int parseAudioTruncationInfo(ParsableBitArray data) {
|
||||
public static int parseAudioTruncationInfo(ParsableBitArray data) {
|
||||
if (data.readBit()) { // isActive
|
||||
data.skipBits(2); // reserved(1), truncFromBegin(1)
|
||||
return data.readBits(13);
|
||||
}
|
||||
return C.LENGTH_UNSET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -794,7 +554,7 @@ public final class MpeghUtil {
|
|||
|
||||
private MpeghUtil() {}
|
||||
|
||||
private static class MhasPacketHeader {
|
||||
public static class MhasPacketHeader {
|
||||
|
||||
/** MHAS packet types. See ISO_IEC_23008-3;2022, 14.4. */
|
||||
@Documented
|
||||
|
|
@ -823,47 +583,49 @@ public final class MpeghUtil {
|
|||
PACTYPE_PCMDATA,
|
||||
PACTYP_LOUDNESS
|
||||
})
|
||||
private @interface Type {}
|
||||
public @interface Type {}
|
||||
|
||||
private static final int PACTYP_FILLDATA = 0;
|
||||
private static final int PACTYP_MPEGH3DACFG = 1;
|
||||
private static final int PACTYP_MPEGH3DAFRAME = 2;
|
||||
private static final int PACTYP_AUDIOSCENEINFO = 3;
|
||||
private static final int PACTYP_SYNC = 6;
|
||||
private static final int PACTYP_SYNCGAP = 7;
|
||||
private static final int PACTYP_MARKER = 8;
|
||||
private static final int PACTYP_CRC16 = 9;
|
||||
private static final int PACTYP_CRC32 = 10;
|
||||
private static final int PACTYP_DESCRIPTOR = 11;
|
||||
private static final int PACTYP_USERINTERACTION = 12;
|
||||
private static final int PACTYP_LOUDNESS_DRC = 13;
|
||||
private static final int PACTYP_BUFFERINFO = 14;
|
||||
private static final int PACTYP_GLOBAL_CRC16 = 15;
|
||||
private static final int PACTYP_GLOBAL_CRC32 = 16;
|
||||
private static final int PACTYP_AUDIOTRUNCATION = 17;
|
||||
private static final int PACTYP_GENDATA = 18;
|
||||
private static final int PACTYPE_EARCON = 19;
|
||||
private static final int PACTYPE_PCMCONFIG = 20;
|
||||
private static final int PACTYPE_PCMDATA = 21;
|
||||
private static final int PACTYP_LOUDNESS = 22;
|
||||
public static final int PACTYP_FILLDATA = 0;
|
||||
public static final int PACTYP_MPEGH3DACFG = 1;
|
||||
public static final int PACTYP_MPEGH3DAFRAME = 2;
|
||||
public static final int PACTYP_AUDIOSCENEINFO = 3;
|
||||
public static final int PACTYP_SYNC = 6;
|
||||
public static final int PACTYP_SYNCGAP = 7;
|
||||
public static final int PACTYP_MARKER = 8;
|
||||
public static final int PACTYP_CRC16 = 9;
|
||||
public static final int PACTYP_CRC32 = 10;
|
||||
public static final int PACTYP_DESCRIPTOR = 11;
|
||||
public static final int PACTYP_USERINTERACTION = 12;
|
||||
public static final int PACTYP_LOUDNESS_DRC = 13;
|
||||
public static final int PACTYP_BUFFERINFO = 14;
|
||||
public static final int PACTYP_GLOBAL_CRC16 = 15;
|
||||
public static final int PACTYP_GLOBAL_CRC32 = 16;
|
||||
public static final int PACTYP_AUDIOTRUNCATION = 17;
|
||||
public static final int PACTYP_GENDATA = 18;
|
||||
public static final int PACTYPE_EARCON = 19;
|
||||
public static final int PACTYPE_PCMCONFIG = 20;
|
||||
public static final int PACTYPE_PCMDATA = 21;
|
||||
public static final int PACTYP_LOUDNESS = 22;
|
||||
|
||||
private @Type int packetType;
|
||||
private long packetLabel;
|
||||
private int packetLength;
|
||||
public final @Type int packetType;
|
||||
public final long packetLabel;
|
||||
public final int packetLength;
|
||||
public final int headerLength;
|
||||
|
||||
public MhasPacketHeader(@Type int type, long label, int length) {
|
||||
packetType = type;
|
||||
packetLabel = label;
|
||||
packetLength = length;
|
||||
public MhasPacketHeader(@Type int packetType, long packetLabel, int packetLength, int headerLength) {
|
||||
this.packetType = packetType;
|
||||
this.packetLabel = packetLabel;
|
||||
this.packetLength = packetLength;
|
||||
this.headerLength = headerLength;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Mpegh3daConfig {
|
||||
public static class Mpegh3daConfig {
|
||||
|
||||
private final int profileLevelIndication;
|
||||
private final int samplingFrequency;
|
||||
private final int standardFrameSamples;
|
||||
@Nullable private final byte[] compatibleProfileLevelSet;
|
||||
public final int profileLevelIndication;
|
||||
public final int samplingFrequency;
|
||||
public final int standardFrameSamples;
|
||||
@Nullable public final byte[] compatibleProfileLevelSet;
|
||||
|
||||
private Mpegh3daConfig(
|
||||
int profileLevelIndication,
|
||||
|
|
|
|||
|
|
@ -15,16 +15,18 @@
|
|||
*/
|
||||
package androidx.media3.extractor.ts;
|
||||
|
||||
import static androidx.media3.common.util.Assertions.checkStateNotNull;
|
||||
import static androidx.media3.extractor.ts.TsPayloadReader.FLAG_DATA_ALIGNMENT_INDICATOR;
|
||||
import static androidx.media3.extractor.ts.TsPayloadReader.FLAG_RANDOM_ACCESS_INDICATOR;
|
||||
import static java.lang.Math.min;
|
||||
import static java.lang.annotation.ElementType.TYPE_USE;
|
||||
|
||||
import android.util.Log;
|
||||
import androidx.annotation.IntDef;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.media3.common.C;
|
||||
import androidx.media3.common.Format;
|
||||
import androidx.media3.common.MimeTypes;
|
||||
import androidx.media3.common.ParserException;
|
||||
import androidx.media3.common.util.Assertions;
|
||||
import androidx.media3.common.util.ParsableBitArray;
|
||||
import androidx.media3.common.util.ParsableByteArray;
|
||||
import androidx.media3.common.util.UnstableApi;
|
||||
|
|
@ -33,6 +35,10 @@ import androidx.media3.extractor.ExtractorOutput;
|
|||
import androidx.media3.extractor.MpeghUtil;
|
||||
import androidx.media3.extractor.TrackOutput;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.util.List;
|
||||
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
|
||||
|
||||
|
|
@ -40,15 +46,21 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
|
|||
@UnstableApi
|
||||
public final class MpeghReader implements ElementaryStreamReader {
|
||||
|
||||
private static final String TAG = "MpeghReader";
|
||||
@Documented
|
||||
@Retention(RetentionPolicy.SOURCE)
|
||||
@Target(TYPE_USE)
|
||||
@IntDef({STATE_FINDING_SYNC, STATE_READING_PACKET_HEADER, STATE_READING_PACKET_PAYLOAD})
|
||||
private @interface State {}
|
||||
|
||||
private final ParsableByteArray dataBuffer;
|
||||
private static final int STATE_FINDING_SYNC = 0;
|
||||
private static final int STATE_READING_PACKET_HEADER = 1;
|
||||
private static final int STATE_READING_PACKET_PAYLOAD = 2;
|
||||
|
||||
private @State int state;
|
||||
|
||||
private @MonotonicNonNull String formatId;
|
||||
private @MonotonicNonNull TrackOutput output;
|
||||
private int dataInBuffer;
|
||||
|
||||
@Nullable private MpeghUtil.FrameInfo prevFrameInfo;
|
||||
|
||||
// The timestamp to attach to the next sample in the current packet.
|
||||
private double timeUs;
|
||||
|
|
@ -57,8 +69,43 @@ public final class MpeghReader implements ElementaryStreamReader {
|
|||
private boolean rapPending;
|
||||
private @TsPayloadReader.Flags int flags;
|
||||
|
||||
private int syncBytes;
|
||||
|
||||
private final ParsableByteArray headerScratchBytes;
|
||||
private boolean headerDataFinished;
|
||||
|
||||
private final ParsableByteArray dataScratchBytes;
|
||||
|
||||
private int payloadBytesRead;
|
||||
private int frameBytes;
|
||||
|
||||
@Nullable
|
||||
private MpeghUtil.MhasPacketHeader header;
|
||||
private int samplingRate;
|
||||
private int standardFrameLength;
|
||||
private int truncationSamples;
|
||||
private long mainStreamLabel;
|
||||
private boolean configFound;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new reader for MPEG-H elementary streams.
|
||||
*/
|
||||
public MpeghReader() {
|
||||
dataBuffer = new ParsableByteArray();
|
||||
state = STATE_FINDING_SYNC;
|
||||
syncBytes = 0;
|
||||
headerScratchBytes = new ParsableByteArray(new byte[MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE]);
|
||||
dataScratchBytes = new ParsableByteArray();
|
||||
header = null;
|
||||
headerDataFinished = false;
|
||||
payloadBytesRead = 0;
|
||||
frameBytes = 0;
|
||||
samplingRate = C.RATE_UNSET_INT;
|
||||
standardFrameLength = C.LENGTH_UNSET;
|
||||
truncationSamples = 0;
|
||||
mainStreamLabel = C.INDEX_UNSET;
|
||||
configFound = false;
|
||||
dataPending = false;
|
||||
rapPending = true;
|
||||
timeUs = C.TIME_UNSET;
|
||||
timeUsPending = C.TIME_UNSET;
|
||||
|
|
@ -66,8 +113,24 @@ public final class MpeghReader implements ElementaryStreamReader {
|
|||
|
||||
@Override
|
||||
public void seek() {
|
||||
clearDataBuffer();
|
||||
state = STATE_FINDING_SYNC;
|
||||
syncBytes = 0;
|
||||
headerScratchBytes.setPosition(0);
|
||||
dataScratchBytes.setPosition(0);
|
||||
dataScratchBytes.setLimit(0);
|
||||
header = null;
|
||||
headerDataFinished = false;
|
||||
payloadBytesRead = 0;
|
||||
frameBytes = 0;
|
||||
samplingRate = C.RATE_UNSET_INT;
|
||||
standardFrameLength = C.LENGTH_UNSET;
|
||||
truncationSamples = 0;
|
||||
mainStreamLabel = C.INDEX_UNSET;
|
||||
configFound = false;
|
||||
dataPending = false;
|
||||
rapPending = true;
|
||||
timeUs = C.TIME_UNSET;
|
||||
timeUsPending = C.TIME_UNSET;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -82,12 +145,7 @@ public final class MpeghReader implements ElementaryStreamReader {
|
|||
public void packetStarted(long pesTimeUs, @TsPayloadReader.Flags int flags) {
|
||||
this.flags = flags;
|
||||
|
||||
if ((this.flags & FLAG_DATA_ALIGNMENT_INDICATOR) != 0 && dataInBuffer != 0) {
|
||||
Log.w(TAG, "Internal byte buffer was unexpectedly not empty at data aligned PES");
|
||||
clearDataBuffer();
|
||||
}
|
||||
|
||||
if (dataInBuffer > 0) {
|
||||
if (!rapPending && (frameBytes != 0 || !headerDataFinished)) {
|
||||
dataPending = true;
|
||||
}
|
||||
|
||||
|
|
@ -101,132 +159,206 @@ public final class MpeghReader implements ElementaryStreamReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void consume(ParsableByteArray data) {
|
||||
// write the PES payload to a data buffer until the packet is complete
|
||||
appendToDataBuffer(data);
|
||||
public void consume(ParsableByteArray data) throws ParserException {
|
||||
Assertions.checkStateNotNull(output); // Asserts that createTracks has been called.
|
||||
|
||||
int headerDataPos;
|
||||
ParsableBitArray bitArray = new ParsableBitArray();
|
||||
while (data.bytesLeft() > 0) {
|
||||
switch (state) {
|
||||
case STATE_FINDING_SYNC:
|
||||
if (skipToNextSync(data)) {
|
||||
state = STATE_READING_PACKET_HEADER;
|
||||
}
|
||||
break;
|
||||
case STATE_READING_PACKET_HEADER:
|
||||
// check if the gathering of data in header scratch buffer was finished and adjust remaining bytes
|
||||
if (headerDataFinished && headerScratchBytes.getPosition() > 0) {
|
||||
System.arraycopy(headerScratchBytes.getData(), headerScratchBytes.getPosition(),
|
||||
headerScratchBytes.getData(), 0, headerScratchBytes.bytesLeft());
|
||||
headerScratchBytes.setPosition(headerScratchBytes.bytesLeft());
|
||||
headerDataFinished = false;
|
||||
}
|
||||
|
||||
// read into header scratch buffer
|
||||
if (continueRead(data, headerScratchBytes, MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE)) {
|
||||
// make the scratch bytes available for parsing
|
||||
headerScratchBytes.setPosition(0);
|
||||
bitArray.reset(headerScratchBytes);
|
||||
|
||||
// parse the MHAS packet header
|
||||
header = MpeghUtil.parseMhasPacketHeader(bitArray);
|
||||
|
||||
// write the packet header to output
|
||||
output.sampleData(headerScratchBytes, header.headerLength);
|
||||
|
||||
payloadBytesRead = 0;
|
||||
frameBytes += header.packetLength + header.headerLength;
|
||||
|
||||
if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION ||
|
||||
header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG) {
|
||||
dataScratchBytes.ensureCapacity(header.packetLength);
|
||||
dataScratchBytes.setPosition(0);
|
||||
dataScratchBytes.setLimit(header.packetLength);
|
||||
}
|
||||
// MHAS packet header finished -> obtain the packet payload
|
||||
state = STATE_READING_PACKET_PAYLOAD;
|
||||
headerDataFinished = true;
|
||||
}
|
||||
break;
|
||||
case STATE_READING_PACKET_PAYLOAD:
|
||||
if (header == null) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG ||
|
||||
header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION) {
|
||||
// read bytes from header scratch buffer into the data scratch buffer
|
||||
headerDataPos = headerScratchBytes.getPosition();
|
||||
if (headerDataPos != MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE) {
|
||||
continueRead(headerScratchBytes, dataScratchBytes, header.packetLength);
|
||||
}
|
||||
headerScratchBytes.setPosition(headerDataPos);
|
||||
// read bytes from input data into the data scratch buffer
|
||||
int dataStartPos = data.getPosition();
|
||||
continueRead(data, dataScratchBytes, header.packetLength);
|
||||
data.setPosition(dataStartPos);
|
||||
}
|
||||
|
||||
int bytesToRead;
|
||||
// read bytes from header scratch buffer and write them into the output
|
||||
headerDataPos = headerScratchBytes.getPosition();
|
||||
if (headerDataPos != MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE) {
|
||||
bytesToRead = min(headerScratchBytes.bytesLeft(),
|
||||
header.packetLength - payloadBytesRead);
|
||||
output.sampleData(headerScratchBytes, bytesToRead);
|
||||
payloadBytesRead += bytesToRead;
|
||||
}
|
||||
// read bytes from input data and write them into the output
|
||||
bytesToRead = min(data.bytesLeft(), header.packetLength - payloadBytesRead);
|
||||
output.sampleData(data, bytesToRead);
|
||||
payloadBytesRead += bytesToRead;
|
||||
|
||||
if (payloadBytesRead == header.packetLength) {
|
||||
dataScratchBytes.setPosition(0);
|
||||
bitArray.reset(dataScratchBytes);
|
||||
if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG) {
|
||||
MpeghUtil.Mpegh3daConfig config = MpeghUtil.parseMpegh3daConfig(bitArray);
|
||||
samplingRate = config.samplingFrequency;
|
||||
standardFrameLength = config.standardFrameSamples;
|
||||
if (mainStreamLabel != header.packetLabel) {
|
||||
mainStreamLabel = header.packetLabel;
|
||||
// set the output format
|
||||
String codecs = "mhm1";
|
||||
if (config.profileLevelIndication != C.INDEX_UNSET) {
|
||||
codecs += String.format(".%02X", config.profileLevelIndication);
|
||||
}
|
||||
@Nullable List<byte[]> initializationData = null;
|
||||
if (config.compatibleProfileLevelSet != null
|
||||
&& config.compatibleProfileLevelSet.length > 0) {
|
||||
// The first entry in initializationData is reserved for the audio specific config.
|
||||
initializationData = ImmutableList.of(Util.EMPTY_BYTE_ARRAY,
|
||||
config.compatibleProfileLevelSet);
|
||||
}
|
||||
Format format =
|
||||
new Format.Builder()
|
||||
.setId(formatId)
|
||||
.setSampleMimeType(MimeTypes.AUDIO_MPEGH_MHM1)
|
||||
.setSampleRate(samplingRate)
|
||||
.setCodecs(codecs)
|
||||
.setInitializationData(initializationData)
|
||||
.build();
|
||||
output.format(format);
|
||||
}
|
||||
configFound = true;
|
||||
} else if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION) {
|
||||
truncationSamples = MpeghUtil.parseAudioTruncationInfo(bitArray);
|
||||
} else if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DAFRAME) {
|
||||
@C.BufferFlags int flag = 0;
|
||||
// if we have a frame with an mpegh3daConfig, set the first obtained AU to a key frame
|
||||
if (configFound) {
|
||||
flag = C.BUFFER_FLAG_KEY_FRAME;
|
||||
rapPending = false;
|
||||
}
|
||||
double sampleDurationUs =
|
||||
(double) C.MICROS_PER_SECOND * (standardFrameLength - truncationSamples)
|
||||
/ samplingRate;
|
||||
long pts = Math.round(timeUs);
|
||||
if (dataPending) {
|
||||
dataPending = false;
|
||||
timeUs = timeUsPending;
|
||||
} else {
|
||||
timeUs += sampleDurationUs;
|
||||
}
|
||||
output.sampleMetadata(pts, flag, frameBytes, 0, null);
|
||||
configFound = false;
|
||||
truncationSamples = 0;
|
||||
frameBytes = 0;
|
||||
}
|
||||
header = null;
|
||||
// MHAS packet payload finished -> obtain a new packet header
|
||||
state = STATE_READING_PACKET_HEADER;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void packetFinished(boolean isEndOfInput) {
|
||||
checkStateNotNull(output); // Asserts that createTracks has been called.
|
||||
// try to find the sync packet and adjust the data buffer if necessary
|
||||
maybeFindSync();
|
||||
|
||||
ParsableBitArray dataBitBuffer = new ParsableBitArray();
|
||||
// get as many MPEG-H AUs as possible from the data buffer
|
||||
while (true) {
|
||||
dataBitBuffer.reset(dataBuffer);
|
||||
|
||||
// check if a complete MPEG-H frame could be parsed
|
||||
if (!MpeghUtil.canParseFrame(dataBitBuffer)) {
|
||||
// parsing could not be completed because of not enough data
|
||||
break;
|
||||
}
|
||||
|
||||
MpeghUtil.FrameInfo frameInfo;
|
||||
try {
|
||||
frameInfo = MpeghUtil.parseFrame(dataBitBuffer, prevFrameInfo);
|
||||
} catch (ParserException e) {
|
||||
// an error occurred --> maybe try to find sync and proceed with processing
|
||||
dataBitBuffer.byteAlign();
|
||||
removeUsedFromDataBuffer();
|
||||
rapPending = true;
|
||||
maybeFindSync();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (frameInfo.configChanged && frameInfo.containsConfig) {
|
||||
// set the output format
|
||||
String codecs = "mhm1";
|
||||
if (frameInfo.mpegh3daProfileLevelIndication != C.INDEX_UNSET) {
|
||||
codecs += String.format(".%02X", frameInfo.mpegh3daProfileLevelIndication);
|
||||
}
|
||||
@Nullable List<byte[]> initializationData = null;
|
||||
if (frameInfo.compatibleSetIndication != null
|
||||
&& frameInfo.compatibleSetIndication.length > 0) {
|
||||
// The first entry in initializationData is reserved for the audio specific config.
|
||||
initializationData =
|
||||
ImmutableList.of(Util.EMPTY_BYTE_ARRAY, frameInfo.compatibleSetIndication);
|
||||
}
|
||||
Format format =
|
||||
new Format.Builder()
|
||||
.setId(formatId)
|
||||
.setSampleMimeType(MimeTypes.AUDIO_MPEGH_MHM1)
|
||||
.setSampleRate(frameInfo.samplingRate)
|
||||
.setCodecs(codecs)
|
||||
.setInitializationData(initializationData)
|
||||
.build();
|
||||
output.format(format);
|
||||
}
|
||||
|
||||
// write AU to output
|
||||
dataBuffer.setPosition(0);
|
||||
output.sampleData(dataBuffer, frameInfo.frameBytes);
|
||||
|
||||
@C.BufferFlags int flag = 0;
|
||||
// if we have a frame with an mpegh3daConfig, set the first obtained AU to a key frame
|
||||
if (frameInfo.containsConfig) {
|
||||
flag = C.BUFFER_FLAG_KEY_FRAME;
|
||||
rapPending = false;
|
||||
}
|
||||
double sampleDurationUs =
|
||||
(double) C.MICROS_PER_SECOND * frameInfo.frameSamples / frameInfo.samplingRate;
|
||||
long pts = Math.round(timeUs);
|
||||
if (dataPending) {
|
||||
dataPending = false;
|
||||
timeUs = timeUsPending;
|
||||
} else {
|
||||
timeUs += sampleDurationUs;
|
||||
}
|
||||
output.sampleMetadata(pts, flag, frameInfo.frameBytes, 0, null);
|
||||
|
||||
removeUsedFromDataBuffer();
|
||||
prevFrameInfo = frameInfo;
|
||||
}
|
||||
// Do nothing.
|
||||
}
|
||||
|
||||
private void maybeFindSync() {
|
||||
|
||||
/**
|
||||
* Continues a read from the provided {@code source} into a given {@code target}.
|
||||
*
|
||||
* @param source The source from which to read.
|
||||
* @param target The target into which data is to be read.
|
||||
* @param targetLength The target length of the read.
|
||||
* @return Whether the target length was reached.
|
||||
*/
|
||||
private boolean continueRead(ParsableByteArray source, ParsableByteArray target,
|
||||
int targetLength) {
|
||||
int bytesToRead = min(source.bytesLeft(), targetLength - target.getPosition());
|
||||
source.readBytes(target.getData(), target.getPosition(), bytesToRead);
|
||||
target.setPosition(target.getPosition() + bytesToRead);
|
||||
return target.getPosition() == targetLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Locates the next SYNC value in the buffer, advancing the position to the byte that immediately
|
||||
* follows it. If SYNC was not located, the position is advanced to the limit.
|
||||
*
|
||||
* @param pesBuffer The buffer whose position should be advanced.
|
||||
* @return Whether SYNC was found.
|
||||
*/
|
||||
private boolean skipToNextSync(ParsableByteArray pesBuffer) {
|
||||
// we are still waiting for a RAP frame
|
||||
if (rapPending) {
|
||||
if ((flags & FLAG_RANDOM_ACCESS_INDICATOR) == 0) {
|
||||
// RAI is not signalled -> drop the PES data
|
||||
clearDataBuffer();
|
||||
pesBuffer.setPosition(pesBuffer.limit());
|
||||
} else {
|
||||
if ((flags & FLAG_DATA_ALIGNMENT_INDICATOR) == 0) {
|
||||
// if RAI is signalled but the data is not aligned we need to find the sync packet
|
||||
if (!MpeghUtil.findSyncPacket(dataBuffer)) {
|
||||
// sync packet could not be found -> drop the PES data
|
||||
clearDataBuffer();
|
||||
return;
|
||||
while (pesBuffer.bytesLeft() > 0) {
|
||||
syncBytes <<= C.BITS_PER_BYTE;
|
||||
syncBytes |= pesBuffer.readUnsignedByte();
|
||||
if (MpeghUtil.isSyncWord(syncBytes)) {
|
||||
pesBuffer.setPosition(pesBuffer.getPosition() - MpeghUtil.MHAS_SYNC_WORD_LENGTH);
|
||||
syncBytes = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// sync packet was found -> remove PES data before the sync packet
|
||||
removeUsedFromDataBuffer();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pesBuffer.setPosition(pesBuffer.limit());
|
||||
}
|
||||
}
|
||||
|
||||
private void clearDataBuffer() {
|
||||
dataPending = false;
|
||||
rapPending = true;
|
||||
dataInBuffer = 0;
|
||||
dataBuffer.reset(dataInBuffer);
|
||||
}
|
||||
|
||||
private void appendToDataBuffer(ParsableByteArray data) {
|
||||
int bytesToRead = data.bytesLeft();
|
||||
dataBuffer.ensureCapacity(dataInBuffer + bytesToRead);
|
||||
System.arraycopy(
|
||||
data.getData(), data.getPosition(), dataBuffer.getData(), dataInBuffer, bytesToRead);
|
||||
data.skipBytes(bytesToRead);
|
||||
dataInBuffer += bytesToRead;
|
||||
dataBuffer.reset(dataInBuffer);
|
||||
}
|
||||
|
||||
private void removeUsedFromDataBuffer() {
|
||||
dataInBuffer -= dataBuffer.getPosition();
|
||||
System.arraycopy(
|
||||
dataBuffer.getData(), dataBuffer.getPosition(), dataBuffer.getData(), 0, dataInBuffer);
|
||||
dataBuffer.reset(dataInBuffer);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue