improve PES packet parsing for MPEG-H

2026-04-27 15:07:40 +00:00 · 2024-01-18 10:01:50 +01:00 · 2024-01-18 10:01:50 +01:00 · b6990c3c5b
commit b6990c3c5b
parent 5df45f7e64
2 changed files with 314 additions and 420 deletions
--- a/libraries/extractor/src/main/java/androidx/media3/extractor/MpeghUtil.java
+++ b/libraries/extractor/src/main/java/androidx/media3/extractor/MpeghUtil.java
@ -23,274 +23,31 @@ import androidx.annotation.Nullable;
 import androidx.media3.common.C;
 import androidx.media3.common.ParserException;
 import androidx.media3.common.util.ParsableBitArray;
-import androidx.media3.common.util.ParsableByteArray;
 import androidx.media3.common.util.UnstableApi;
 import java.lang.annotation.Documented;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
-import java.util.Arrays;

 /** Utility methods for parsing MPEG-H frames, which are access units in MPEG-H bitstreams. */
@UnstableApi
 public final class MpeghUtil {

-  /** Holds information contained in the parsed MPEG-H frame. */
-  public static final class FrameInfo {
-
-    /** Signals if the MPEG-H frame contains a mpegh3daConfig packet. */
-    public final boolean containsConfig;
-
-    /** Signals if the mpegh3daConfig packet in the MPEG-H frame has changed. */
-    public final boolean configChanged;
-
-    /** The default number of audio samples in the frame. */
-    public final int standardFrameSamples;
-
-    /** The audio sampling rate in Hz. */
-    public final int samplingRate;
-
-    /** The actual number of audio samples in the frame. */
-    public final int frameSamples;
-
-    /** The number of bytes building the frame. */
-    public final int frameBytes;
-
-    /** The label of the main stream in the frame. */
-    public final long mainStreamLabel;
-
-    /** The profile level indication of the audio in the frame. */
-    public final int mpegh3daProfileLevelIndication;
-
-    /** An array of compatible profile level indications of the audio in the frame. */
-    @Nullable public final byte[] compatibleSetIndication;
-
-    /**
-     * Initializes the {@link FrameInfo} with fields containing certain values.
-     *
-     * @param containsConfig See {@link #containsConfig}.
-     * @param configChanged See {@link #configChanged}.
-     * @param standardFrameSamples See {@link #standardFrameSamples}.
-     * @param samplingRate See {@link #samplingRate}.
-     * @param frameSamples See {@link #frameSamples}.
-     * @param frameBytes See {@link #frameBytes}.
-     * @param mainStreamLabel See {@link #mainStreamLabel}.
-     * @param mpegh3daProfileLevelIndication See {@link #mpegh3daProfileLevelIndication}.
-     * @param compatibleSetIndication See {@link #compatibleSetIndication}.
-     */
-    public FrameInfo(
-        boolean containsConfig,
-        boolean configChanged,
-        int standardFrameSamples,
-        int samplingRate,
-        int frameSamples,
-        int frameBytes,
-        long mainStreamLabel,
-        int mpegh3daProfileLevelIndication,
-        @Nullable byte[] compatibleSetIndication) {
-      this.containsConfig = containsConfig;
-      this.configChanged = configChanged;
-      this.standardFrameSamples = standardFrameSamples;
-      this.samplingRate = samplingRate;
-      this.frameSamples = frameSamples;
-      this.frameBytes = frameBytes;
-      this.mainStreamLabel = mainStreamLabel;
-      this.mpegh3daProfileLevelIndication = mpegh3daProfileLevelIndication;
-      this.compatibleSetIndication =
-          compatibleSetIndication != null
-              ? Arrays.copyOf(compatibleSetIndication, compatibleSetIndication.length)
-              : null;
-    }
-  }
-
  /** See ISO_IEC_23003-8;2022, 14.4.4. */
-  private static final byte[] MHAS_SYNC_PACKET = new byte[] {(byte) 0xC0, (byte) 0x01, (byte) 0xA5};
+  private static final int MHAS_SYNC_WORD = 0xC001A5;

-  private static final int MHAS_SYNC_PACKET_LENGTH = 3;
+  public static final int MHAS_SYNC_WORD_LENGTH = 3;
+
+  public static final int MAX_MHAS_PACKET_HEADER_SIZE = 15;

  /**
-   * Locates the next MHAS sync packet, advancing the position to the start of the sync packet. If a
-   * sync packet was not located, the position is advanced to the limit. See ISO_IEC_23008-3;2022,
-   * 14.4.4.
+   * Returns whether a given integer matches an MHAS sync word. See ISO_IEC_23008-3;2022, 14.4.4.
   *
-   * @param data The byte array whose position should be advanced.
-   * @return Whether a sync packet position was found.
+   * @param word An integer.
+   * @return Whether a given integer matches an MHAS sync word.
   */
-  public static boolean findSyncPacket(ParsableByteArray data) {
-    int syncIndex = 0;
-    while (syncIndex < MHAS_SYNC_PACKET_LENGTH
-        && data.bytesLeft() >= MHAS_SYNC_PACKET_LENGTH - syncIndex) {
-      if (data.readUnsignedByte() == MHAS_SYNC_PACKET[syncIndex]) {
-        syncIndex++;
-      } else {
-        syncIndex = 0; // Restart comparison from the beginning
-      }
-    }
-
-    if (syncIndex == MHAS_SYNC_PACKET_LENGTH) {
-      data.setPosition(data.getPosition() - MHAS_SYNC_PACKET_LENGTH - 1);
-      return true;
-    }
-
-    data.setPosition(data.limit());
-    return false;
-  }
-
-  /**
-   * Checks if a complete MHAS frame could be parsed by calculating if enough data is available in
-   * the provided {@link ParsableBitArray}.
-   *
-   * @param data The bit array to parse.
-   * @return Whether a complete MHAS frame could be parsed.
-   */
-  public static boolean canParseFrame(ParsableBitArray data) {
-    boolean result = false;
-    int originalPosition = data.getPosition();
-    while (true) {
-      MhasPacketHeader header;
-      try {
-        header = parseMhasPacketHeader(data);
-      } catch (Exception e) {
-        // There is not enough data available to parse the MHAS packet header.
-        break;
-      }
-      if (data.bitsLeft() < header.packetLength * C.BITS_PER_BYTE) {
-        // There is not enough data available to parse the current MHAS packet.
-        break;
-      }
-      data.skipBytes(header.packetLength);
-
-      if (header.packetType == MhasPacketHeader.PACTYP_MPEGH3DAFRAME) {
-        // An mpegh3daFrame packet has been found which signals the end of the MHAS frame.
-        result = true;
-        break;
-      }
-    }
-    data.setPosition(originalPosition);
-    return result;
-  }
-
-  /**
-   * Parses the necessary info of an MPEG-H frame into the FrameInfo structure.
-   *
-   * @param data The bit array to parse, positioned at the start of the MHAS frame.
-   * @param prevFrameInfo A {@link FrameInfo} derived from the previous frame in the stream or
-   *     {@code null} when there is no previous frame.
-   * @return {@link FrameInfo} of the current frame.
-   * @throws ParserException if a valid {@link FrameInfo} cannot be parsed.
-   */
-  public static FrameInfo parseFrame(ParsableBitArray data, @Nullable FrameInfo prevFrameInfo)
-      throws ParserException {
-    int standardFrameSamples;
-    int samplingFrequency;
-    long mainStreamLabel;
-    boolean frameFound = false;
-    boolean configFound = false;
-    boolean configChanged = false;
-    int truncationSamples = C.LENGTH_UNSET;
-    int mpegh3daProfileLevelIndication = -1;
-    @Nullable byte[] compatibleSetIndication = null;
-
-    if (prevFrameInfo != null) {
-      standardFrameSamples = prevFrameInfo.standardFrameSamples;
-      samplingFrequency = prevFrameInfo.samplingRate;
-      mainStreamLabel = prevFrameInfo.mainStreamLabel;
-    } else {
-      standardFrameSamples = C.LENGTH_UNSET;
-      samplingFrequency = C.RATE_UNSET_INT;
-      mainStreamLabel = -1;
-    }
-
-    int initialBitsLeft = data.bitsLeft();
-
-    if (!data.isByteAligned()) {
-      throw ParserException.createForMalformedContainer(
-          "Input data buffer is not Byte aligned", /* cause= */ null);
-    }
-
-    do {
-      // parse MHAS packet header
-      MhasPacketHeader packetHeader = parseMhasPacketHeader(data);
-      int originalPosition = data.getPosition();
-
-      switch (packetHeader.packetType) {
-        case MhasPacketHeader.PACTYP_MPEGH3DACFG:
-          // we already found a mpegh3daConfig
-          if (configFound) {
-            throw ParserException.createForMalformedContainer(
-                "Found a second mpegh3daConfig packet", /* cause= */ null);
-          }
-          configFound = true;
-
-          // check for config change
-          if (packetHeader.packetLabel != mainStreamLabel) {
-            configChanged = true;
-          }
-          // save new packet label
-          mainStreamLabel = packetHeader.packetLabel;
-
-          // parse the mpegh3daConfig
-          Mpegh3daConfig mpegh3daConfig = parseMpegh3daConfig(data);
-
-          // get the necessary data from mpegh3daConfig
-          samplingFrequency = mpegh3daConfig.samplingFrequency;
-          standardFrameSamples = mpegh3daConfig.standardFrameSamples;
-          mpegh3daProfileLevelIndication = mpegh3daConfig.profileLevelIndication;
-          if (mpegh3daConfig.compatibleProfileLevelSet != null) {
-            compatibleSetIndication = mpegh3daConfig.compatibleProfileLevelSet;
-          }
-
-          data.setPosition(originalPosition);
-          data.skipBytes(packetHeader.packetLength);
-          break;
-
-        case MhasPacketHeader.PACTYP_AUDIOTRUNCATION:
-          truncationSamples = parseAudioTruncationInfo(data);
-          if (truncationSamples > standardFrameSamples) {
-            throw ParserException.createForMalformedContainer(
-                "Truncation size is too big", /* cause= */ null);
-          }
-
-          data.setPosition(originalPosition);
-          data.skipBytes(packetHeader.packetLength);
-          break;
-
-        case MhasPacketHeader.PACTYP_MPEGH3DAFRAME:
-          // check packet label
-          if (packetHeader.packetLabel != mainStreamLabel) {
-            throw ParserException.createForMalformedContainer(
-                "Mpegh3daFrame packet does not belong to main stream", /* cause= */ null);
-          }
-          frameFound = true;
-          data.skipBytes(packetHeader.packetLength);
-          break;
-
-        default:
-          data.skipBytes(packetHeader.packetLength);
-          break;
-      }
-
-      if (!data.isByteAligned()) {
-        throw ParserException.createForMalformedContainer(
-            "Data buffer is not Byte aligned after parsing", /* cause= */ null);
-      }
-
-    } while (!frameFound);
-
-    int parsedBytes = (initialBitsLeft - data.bitsLeft()) / C.BITS_PER_BYTE;
-
-    return new FrameInfo(
-        configFound,
-        configChanged,
-        standardFrameSamples,
-        /* samplingRate= */ samplingFrequency,
-        /* frameSamples= */ (truncationSamples == C.LENGTH_UNSET
-            ? standardFrameSamples
-            : standardFrameSamples - truncationSamples),
-        /* frameBytes= */ parsedBytes,
-        mainStreamLabel,
-        mpegh3daProfileLevelIndication,
-        compatibleSetIndication);
+  public static boolean isSyncWord(int word) {
+    return (word & 0xFFFFFF) == MHAS_SYNC_WORD;
  }

  /**
@ -300,8 +57,9 @@ public final class MpeghUtil {
   * @return The {@link MhasPacketHeader} info.
   * @throws ParserException if a valid {@link MhasPacketHeader} cannot be parsed.
   */
-  private static MhasPacketHeader parseMhasPacketHeader(ParsableBitArray data)
+  public static MhasPacketHeader parseMhasPacketHeader(ParsableBitArray data)
      throws ParserException {
+    int dataStartPos = data.getPosition();
    @MhasPacketHeader.Type int packetType = checkedCast(readEscapedValue(data, 3, 8, 8));
    long packetLabel = readEscapedValue(data, 2, 8, 32);

@ -314,10 +72,10 @@ public final class MpeghUtil {
      switch (packetType) {
        case MhasPacketHeader.PACTYP_MPEGH3DACFG:
          throw ParserException.createForMalformedContainer(
-              "Mpegh3daConfig packet with invalid packet label 0", /* cause= */ null);
+              "Mpegh3daConfig packet with invalid packet label 0", /* cause= */  null);
        case MhasPacketHeader.PACTYP_AUDIOTRUNCATION:
          throw ParserException.createForMalformedContainer(
-              "AudioTruncation packet with invalid packet label 0", /* cause= */ null);
+              "AudioTruncation packet with invalid packet label 0", /* cause= */  null);
        case MhasPacketHeader.PACTYP_MPEGH3DAFRAME:
          throw ParserException.createForMalformedContainer(
              "Mpegh3daFrame packet with invalid packet label 0", /* cause= */ null);
@ -327,7 +85,10 @@ public final class MpeghUtil {
    }

    int packetLength = checkedCast(readEscapedValue(data, 11, 24, 24));
-    return new MhasPacketHeader(packetType, packetLabel, packetLength);
+
+    int headerLength = (data.getPosition() - dataStartPos) / C.BITS_PER_BYTE;
+
+    return new MhasPacketHeader(packetType, packetLabel, packetLength, headerLength);
  }

  /**
@ -518,7 +279,7 @@ public final class MpeghUtil {
   * @return The {@link Mpegh3daConfig}.
   * @throws ParserException if a valid {@link Mpegh3daConfig} cannot be parsed.
   */
-  private static Mpegh3daConfig parseMpegh3daConfig(ParsableBitArray data) throws ParserException {
+  public static Mpegh3daConfig parseMpegh3daConfig(ParsableBitArray data) throws ParserException {
    @Nullable byte[] compatibleProfileLevelSet = null;
    int profileLevelIndication = data.readBits(8);

@ -573,15 +334,14 @@ public final class MpeghUtil {
   * See ISO_IEC_23008-3;2022, 14.2.2, Table 225.
   *
   * @param data The bit array to be parsed.
-   * @return The number of truncated samples or {@link C#LENGTH_UNSET} if decoder should ignore the
-   *     info.
+   * @return The number of truncated samples.
   */
-  private static int parseAudioTruncationInfo(ParsableBitArray data) {
+  public static int parseAudioTruncationInfo(ParsableBitArray data) {
    if (data.readBit()) { // isActive
      data.skipBits(2); // reserved(1), truncFromBegin(1)
      return data.readBits(13);
    }
-    return C.LENGTH_UNSET;
+    return 0;
  }

  /**
@ -794,7 +554,7 @@ public final class MpeghUtil {

  private MpeghUtil() {}

-  private static class MhasPacketHeader {
+  public static class MhasPacketHeader {

    /** MHAS packet types. See ISO_IEC_23008-3;2022, 14.4. */
    @Documented
@ -823,47 +583,49 @@ public final class MpeghUtil {
      PACTYPE_PCMDATA,
      PACTYP_LOUDNESS
    })
-    private @interface Type {}
+    public @interface Type {}

-    private static final int PACTYP_FILLDATA = 0;
-    private static final int PACTYP_MPEGH3DACFG = 1;
-    private static final int PACTYP_MPEGH3DAFRAME = 2;
-    private static final int PACTYP_AUDIOSCENEINFO = 3;
-    private static final int PACTYP_SYNC = 6;
-    private static final int PACTYP_SYNCGAP = 7;
-    private static final int PACTYP_MARKER = 8;
-    private static final int PACTYP_CRC16 = 9;
-    private static final int PACTYP_CRC32 = 10;
-    private static final int PACTYP_DESCRIPTOR = 11;
-    private static final int PACTYP_USERINTERACTION = 12;
-    private static final int PACTYP_LOUDNESS_DRC = 13;
-    private static final int PACTYP_BUFFERINFO = 14;
-    private static final int PACTYP_GLOBAL_CRC16 = 15;
-    private static final int PACTYP_GLOBAL_CRC32 = 16;
-    private static final int PACTYP_AUDIOTRUNCATION = 17;
-    private static final int PACTYP_GENDATA = 18;
-    private static final int PACTYPE_EARCON = 19;
-    private static final int PACTYPE_PCMCONFIG = 20;
-    private static final int PACTYPE_PCMDATA = 21;
-    private static final int PACTYP_LOUDNESS = 22;
+    public static final int PACTYP_FILLDATA = 0;
+    public static final int PACTYP_MPEGH3DACFG = 1;
+    public static final int PACTYP_MPEGH3DAFRAME = 2;
+    public static final int PACTYP_AUDIOSCENEINFO = 3;
+    public static final int PACTYP_SYNC = 6;
+    public static final int PACTYP_SYNCGAP = 7;
+    public static final int PACTYP_MARKER = 8;
+    public static final int PACTYP_CRC16 = 9;
+    public static final int PACTYP_CRC32 = 10;
+    public static final int PACTYP_DESCRIPTOR = 11;
+    public static final int PACTYP_USERINTERACTION = 12;
+    public static final int PACTYP_LOUDNESS_DRC = 13;
+    public static final int PACTYP_BUFFERINFO = 14;
+    public static final int PACTYP_GLOBAL_CRC16 = 15;
+    public static final int PACTYP_GLOBAL_CRC32 = 16;
+    public static final int PACTYP_AUDIOTRUNCATION = 17;
+    public static final int PACTYP_GENDATA = 18;
+    public static final int PACTYPE_EARCON = 19;
+    public static final int PACTYPE_PCMCONFIG = 20;
+    public static final int PACTYPE_PCMDATA = 21;
+    public static final int PACTYP_LOUDNESS = 22;

-    private @Type int packetType;
-    private long packetLabel;
-    private int packetLength;
+    public final @Type int packetType;
+    public final long packetLabel;
+    public final int packetLength;
+    public final int headerLength;

-    public MhasPacketHeader(@Type int type, long label, int length) {
-      packetType = type;
-      packetLabel = label;
-      packetLength = length;
+    public MhasPacketHeader(@Type int packetType, long packetLabel, int packetLength, int headerLength) {
+      this.packetType = packetType;
+      this.packetLabel = packetLabel;
+      this.packetLength = packetLength;
+      this.headerLength = headerLength;
    }
  }

-  private static class Mpegh3daConfig {
+  public static class Mpegh3daConfig {

-    private final int profileLevelIndication;
-    private final int samplingFrequency;
-    private final int standardFrameSamples;
-    @Nullable private final byte[] compatibleProfileLevelSet;
+    public final int profileLevelIndication;
+    public final int samplingFrequency;
+    public final int standardFrameSamples;
+    @Nullable public final byte[] compatibleProfileLevelSet;

    private Mpegh3daConfig(
        int profileLevelIndication,
--- a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/MpeghReader.java
+++ b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/MpeghReader.java
@ -15,16 +15,18 @@
 */
 package androidx.media3.extractor.ts;

-import static androidx.media3.common.util.Assertions.checkStateNotNull;
 import static androidx.media3.extractor.ts.TsPayloadReader.FLAG_DATA_ALIGNMENT_INDICATOR;
 import static androidx.media3.extractor.ts.TsPayloadReader.FLAG_RANDOM_ACCESS_INDICATOR;
+import static java.lang.Math.min;
+import static java.lang.annotation.ElementType.TYPE_USE;

-import android.util.Log;
+import androidx.annotation.IntDef;
 import androidx.annotation.Nullable;
 import androidx.media3.common.C;
 import androidx.media3.common.Format;
 import androidx.media3.common.MimeTypes;
 import androidx.media3.common.ParserException;
+import androidx.media3.common.util.Assertions;
 import androidx.media3.common.util.ParsableBitArray;
 import androidx.media3.common.util.ParsableByteArray;
 import androidx.media3.common.util.UnstableApi;
@ -33,6 +35,10 @@ import androidx.media3.extractor.ExtractorOutput;
 import androidx.media3.extractor.MpeghUtil;
 import androidx.media3.extractor.TrackOutput;
 import com.google.common.collect.ImmutableList;
+import java.lang.annotation.Documented;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
 import java.util.List;
 import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

@ -40,15 +46,21 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@UnstableApi
 public final class MpeghReader implements ElementaryStreamReader {

-  private static final String TAG = "MpeghReader";
+  @Documented
+  @Retention(RetentionPolicy.SOURCE)
+  @Target(TYPE_USE)
+  @IntDef({STATE_FINDING_SYNC, STATE_READING_PACKET_HEADER, STATE_READING_PACKET_PAYLOAD})
+  private @interface State {}

-  private final ParsableByteArray dataBuffer;
+  private static final int STATE_FINDING_SYNC = 0;
+  private static final int STATE_READING_PACKET_HEADER = 1;
+  private static final int STATE_READING_PACKET_PAYLOAD = 2;
+
+  private @State int state;

  private @MonotonicNonNull String formatId;
  private @MonotonicNonNull TrackOutput output;
-  private int dataInBuffer;

-  @Nullable private MpeghUtil.FrameInfo prevFrameInfo;

  // The timestamp to attach to the next sample in the current packet.
  private double timeUs;
@ -57,8 +69,43 @@ public final class MpeghReader implements ElementaryStreamReader {
  private boolean rapPending;
  private @TsPayloadReader.Flags int flags;

+  private int syncBytes;
+
+  private final ParsableByteArray headerScratchBytes;
+  private boolean headerDataFinished;
+
+  private final ParsableByteArray dataScratchBytes;
+
+  private int payloadBytesRead;
+  private int frameBytes;
+
+  @Nullable
+  private MpeghUtil.MhasPacketHeader header;
+  private int samplingRate;
+  private int standardFrameLength;
+  private int truncationSamples;
+  private long mainStreamLabel;
+  private boolean configFound;
+
+
+  /**
+   * Constructs a new reader for MPEG-H elementary streams.
+   */
  public MpeghReader() {
-    dataBuffer = new ParsableByteArray();
+    state = STATE_FINDING_SYNC;
+    syncBytes = 0;
+    headerScratchBytes = new ParsableByteArray(new byte[MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE]);
+    dataScratchBytes = new ParsableByteArray();
+    header = null;
+    headerDataFinished = false;
+    payloadBytesRead = 0;
+    frameBytes = 0;
+    samplingRate = C.RATE_UNSET_INT;
+    standardFrameLength = C.LENGTH_UNSET;
+    truncationSamples = 0;
+    mainStreamLabel = C.INDEX_UNSET;
+    configFound = false;
+    dataPending = false;
    rapPending = true;
    timeUs = C.TIME_UNSET;
    timeUsPending = C.TIME_UNSET;
@ -66,8 +113,24 @@ public final class MpeghReader implements ElementaryStreamReader {

  @Override
  public void seek() {
-    clearDataBuffer();
+    state = STATE_FINDING_SYNC;
+    syncBytes = 0;
+    headerScratchBytes.setPosition(0);
+    dataScratchBytes.setPosition(0);
+    dataScratchBytes.setLimit(0);
+    header = null;
+    headerDataFinished = false;
+    payloadBytesRead = 0;
+    frameBytes = 0;
+    samplingRate = C.RATE_UNSET_INT;
+    standardFrameLength = C.LENGTH_UNSET;
+    truncationSamples = 0;
+    mainStreamLabel = C.INDEX_UNSET;
+    configFound = false;
+    dataPending = false;
+    rapPending = true;
    timeUs = C.TIME_UNSET;
+    timeUsPending = C.TIME_UNSET;
  }

  @Override
@ -82,12 +145,7 @@ public final class MpeghReader implements ElementaryStreamReader {
  public void packetStarted(long pesTimeUs, @TsPayloadReader.Flags int flags) {
    this.flags = flags;

-    if ((this.flags & FLAG_DATA_ALIGNMENT_INDICATOR) != 0 && dataInBuffer != 0) {
-      Log.w(TAG, "Internal byte buffer was unexpectedly not empty at data aligned PES");
-      clearDataBuffer();
-    }
-
-    if (dataInBuffer > 0) {
+    if (!rapPending && (frameBytes != 0 || !headerDataFinished)) {
      dataPending = true;
    }

@ -101,132 +159,206 @@ public final class MpeghReader implements ElementaryStreamReader {
  }

  @Override
-  public void consume(ParsableByteArray data) {
-    // write the PES payload to a data buffer until the packet is complete
-    appendToDataBuffer(data);
+  public void consume(ParsableByteArray data) throws ParserException {
+    Assertions.checkStateNotNull(output); // Asserts that createTracks has been called.
+
+    int headerDataPos;
+    ParsableBitArray bitArray = new ParsableBitArray();
+    while (data.bytesLeft() > 0) {
+      switch (state) {
+        case STATE_FINDING_SYNC:
+          if (skipToNextSync(data)) {
+            state = STATE_READING_PACKET_HEADER;
+          }
+          break;
+        case STATE_READING_PACKET_HEADER:
+          // check if the gathering of data in header scratch buffer was finished and adjust remaining bytes
+          if (headerDataFinished && headerScratchBytes.getPosition() > 0) {
+            System.arraycopy(headerScratchBytes.getData(), headerScratchBytes.getPosition(),
+                headerScratchBytes.getData(), 0, headerScratchBytes.bytesLeft());
+            headerScratchBytes.setPosition(headerScratchBytes.bytesLeft());
+            headerDataFinished = false;
+          }
+
+          // read into header scratch buffer
+          if (continueRead(data, headerScratchBytes, MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE)) {
+            // make the scratch bytes available for parsing
+            headerScratchBytes.setPosition(0);
+            bitArray.reset(headerScratchBytes);
+
+            // parse the MHAS packet header
+            header = MpeghUtil.parseMhasPacketHeader(bitArray);
+
+            // write the packet header to output
+            output.sampleData(headerScratchBytes, header.headerLength);
+
+            payloadBytesRead = 0;
+            frameBytes += header.packetLength + header.headerLength;
+
+            if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION ||
+                header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG) {
+              dataScratchBytes.ensureCapacity(header.packetLength);
+              dataScratchBytes.setPosition(0);
+              dataScratchBytes.setLimit(header.packetLength);
+            }
+            // MHAS packet header finished -> obtain the packet payload
+            state = STATE_READING_PACKET_PAYLOAD;
+            headerDataFinished = true;
+          }
+          break;
+        case STATE_READING_PACKET_PAYLOAD:
+          if (header == null) {
+            throw new IllegalStateException();
+          }
+          if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG ||
+              header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION) {
+            // read bytes from header scratch buffer into the data scratch buffer
+            headerDataPos = headerScratchBytes.getPosition();
+            if (headerDataPos != MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE) {
+              continueRead(headerScratchBytes, dataScratchBytes, header.packetLength);
+            }
+            headerScratchBytes.setPosition(headerDataPos);
+            // read bytes from input data into the data scratch buffer
+            int dataStartPos = data.getPosition();
+            continueRead(data, dataScratchBytes, header.packetLength);
+            data.setPosition(dataStartPos);
+          }
+
+          int bytesToRead;
+          // read bytes from header scratch buffer and write them into the output
+          headerDataPos = headerScratchBytes.getPosition();
+          if (headerDataPos != MpeghUtil.MAX_MHAS_PACKET_HEADER_SIZE) {
+            bytesToRead = min(headerScratchBytes.bytesLeft(),
+                header.packetLength - payloadBytesRead);
+            output.sampleData(headerScratchBytes, bytesToRead);
+            payloadBytesRead += bytesToRead;
+          }
+          // read bytes from input data and write them into the output
+          bytesToRead = min(data.bytesLeft(), header.packetLength - payloadBytesRead);
+          output.sampleData(data, bytesToRead);
+          payloadBytesRead += bytesToRead;
+
+          if (payloadBytesRead == header.packetLength) {
+            dataScratchBytes.setPosition(0);
+            bitArray.reset(dataScratchBytes);
+            if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DACFG) {
+              MpeghUtil.Mpegh3daConfig config = MpeghUtil.parseMpegh3daConfig(bitArray);
+              samplingRate = config.samplingFrequency;
+              standardFrameLength = config.standardFrameSamples;
+              if (mainStreamLabel != header.packetLabel) {
+                mainStreamLabel = header.packetLabel;
+                // set the output format
+                String codecs = "mhm1";
+                if (config.profileLevelIndication != C.INDEX_UNSET) {
+                  codecs += String.format(".%02X", config.profileLevelIndication);
+                }
+                @Nullable List<byte[]> initializationData = null;
+                if (config.compatibleProfileLevelSet != null
+                    && config.compatibleProfileLevelSet.length > 0) {
+                  // The first entry in initializationData is reserved for the audio specific config.
+                  initializationData = ImmutableList.of(Util.EMPTY_BYTE_ARRAY,
+                      config.compatibleProfileLevelSet);
+                }
+                Format format =
+                    new Format.Builder()
+                        .setId(formatId)
+                        .setSampleMimeType(MimeTypes.AUDIO_MPEGH_MHM1)
+                        .setSampleRate(samplingRate)
+                        .setCodecs(codecs)
+                        .setInitializationData(initializationData)
+                        .build();
+                output.format(format);
+              }
+              configFound = true;
+            } else if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_AUDIOTRUNCATION) {
+              truncationSamples = MpeghUtil.parseAudioTruncationInfo(bitArray);
+            } else if (header.packetType == MpeghUtil.MhasPacketHeader.PACTYP_MPEGH3DAFRAME) {
+              @C.BufferFlags int flag = 0;
+              // if we have a frame with an mpegh3daConfig, set the first obtained AU to a key frame
+              if (configFound) {
+                flag = C.BUFFER_FLAG_KEY_FRAME;
+                rapPending = false;
+              }
+              double sampleDurationUs =
+                  (double) C.MICROS_PER_SECOND * (standardFrameLength - truncationSamples)
+                      / samplingRate;
+              long pts = Math.round(timeUs);
+              if (dataPending) {
+                dataPending = false;
+                timeUs = timeUsPending;
+              } else {
+                timeUs += sampleDurationUs;
+              }
+              output.sampleMetadata(pts, flag, frameBytes, 0, null);
+              configFound = false;
+              truncationSamples = 0;
+              frameBytes = 0;
+            }
+            header = null;
+            // MHAS packet payload finished -> obtain a new packet header
+            state = STATE_READING_PACKET_HEADER;
+          }
+          break;
+        default:
+          throw new IllegalStateException();
+      }
+    }
  }

  @Override
  public void packetFinished(boolean isEndOfInput) {
-    checkStateNotNull(output); // Asserts that createTracks has been called.
-    // try to find the sync packet and adjust the data buffer if necessary
-    maybeFindSync();
-
-    ParsableBitArray dataBitBuffer = new ParsableBitArray();
-    // get as many MPEG-H AUs as possible from the data buffer
-    while (true) {
-      dataBitBuffer.reset(dataBuffer);
-
-      // check if a complete MPEG-H frame could be parsed
-      if (!MpeghUtil.canParseFrame(dataBitBuffer)) {
-        // parsing could not be completed because of not enough data
-        break;
-      }
-
-      MpeghUtil.FrameInfo frameInfo;
-      try {
-        frameInfo = MpeghUtil.parseFrame(dataBitBuffer, prevFrameInfo);
-      } catch (ParserException e) {
-        // an error occurred --> maybe try to find sync and proceed with processing
-        dataBitBuffer.byteAlign();
-        removeUsedFromDataBuffer();
-        rapPending = true;
-        maybeFindSync();
-        continue;
-      }
-
-      if (frameInfo.configChanged && frameInfo.containsConfig) {
-        // set the output format
-        String codecs = "mhm1";
-        if (frameInfo.mpegh3daProfileLevelIndication != C.INDEX_UNSET) {
-          codecs += String.format(".%02X", frameInfo.mpegh3daProfileLevelIndication);
-        }
-        @Nullable List<byte[]> initializationData = null;
-        if (frameInfo.compatibleSetIndication != null
-            && frameInfo.compatibleSetIndication.length > 0) {
-          // The first entry in initializationData is reserved for the audio specific config.
-          initializationData =
-              ImmutableList.of(Util.EMPTY_BYTE_ARRAY, frameInfo.compatibleSetIndication);
-        }
-        Format format =
-            new Format.Builder()
-                .setId(formatId)
-                .setSampleMimeType(MimeTypes.AUDIO_MPEGH_MHM1)
-                .setSampleRate(frameInfo.samplingRate)
-                .setCodecs(codecs)
-                .setInitializationData(initializationData)
-                .build();
-        output.format(format);
-      }
-
-      // write AU to output
-      dataBuffer.setPosition(0);
-      output.sampleData(dataBuffer, frameInfo.frameBytes);
-
-      @C.BufferFlags int flag = 0;
-      // if we have a frame with an mpegh3daConfig, set the first obtained AU to a key frame
-      if (frameInfo.containsConfig) {
-        flag = C.BUFFER_FLAG_KEY_FRAME;
-        rapPending = false;
-      }
-      double sampleDurationUs =
-          (double) C.MICROS_PER_SECOND * frameInfo.frameSamples / frameInfo.samplingRate;
-      long pts = Math.round(timeUs);
-      if (dataPending) {
-        dataPending = false;
-        timeUs = timeUsPending;
-      } else {
-        timeUs += sampleDurationUs;
-      }
-      output.sampleMetadata(pts, flag, frameInfo.frameBytes, 0, null);
-
-      removeUsedFromDataBuffer();
-      prevFrameInfo = frameInfo;
-    }
+    // Do nothing.
  }

-  private void maybeFindSync() {
+
+  /**
+   * Continues a read from the provided {@code source} into a given {@code target}.
+   *
+   * @param source       The source from which to read.
+   * @param target       The target into which data is to be read.
+   * @param targetLength The target length of the read.
+   * @return Whether the target length was reached.
+   */
+  private boolean continueRead(ParsableByteArray source, ParsableByteArray target,
+      int targetLength) {
+    int bytesToRead = min(source.bytesLeft(), targetLength - target.getPosition());
+    source.readBytes(target.getData(), target.getPosition(), bytesToRead);
+    target.setPosition(target.getPosition() + bytesToRead);
+    return target.getPosition() == targetLength;
+  }
+
+  /**
+   * Locates the next SYNC value in the buffer, advancing the position to the byte that immediately
+   * follows it. If SYNC was not located, the position is advanced to the limit.
+   *
+   * @param pesBuffer The buffer whose position should be advanced.
+   * @return Whether SYNC was found.
+   */
+  private boolean skipToNextSync(ParsableByteArray pesBuffer) {
    // we are still waiting for a RAP frame
    if (rapPending) {
      if ((flags & FLAG_RANDOM_ACCESS_INDICATOR) == 0) {
        // RAI is not signalled -> drop the PES data
-        clearDataBuffer();
+        pesBuffer.setPosition(pesBuffer.limit());
      } else {
        if ((flags & FLAG_DATA_ALIGNMENT_INDICATOR) == 0) {
          // if RAI is signalled but the data is not aligned we need to find the sync packet
-          if (!MpeghUtil.findSyncPacket(dataBuffer)) {
-            // sync packet could not be found -> drop the PES data
-            clearDataBuffer();
-            return;
+          while (pesBuffer.bytesLeft() > 0) {
+            syncBytes <<= C.BITS_PER_BYTE;
+            syncBytes |= pesBuffer.readUnsignedByte();
+            if (MpeghUtil.isSyncWord(syncBytes)) {
+              pesBuffer.setPosition(pesBuffer.getPosition() - MpeghUtil.MHAS_SYNC_WORD_LENGTH);
+              syncBytes = 0;
+              return true;
+            }
          }
-          // sync packet was found -> remove PES data before the sync packet
-          removeUsedFromDataBuffer();
+        } else {
+          return true;
        }
      }
+    } else {
+      pesBuffer.setPosition(pesBuffer.limit());
    }
-  }
-
-  private void clearDataBuffer() {
-    dataPending = false;
-    rapPending = true;
-    dataInBuffer = 0;
-    dataBuffer.reset(dataInBuffer);
-  }
-
-  private void appendToDataBuffer(ParsableByteArray data) {
-    int bytesToRead = data.bytesLeft();
-    dataBuffer.ensureCapacity(dataInBuffer + bytesToRead);
-    System.arraycopy(
-        data.getData(), data.getPosition(), dataBuffer.getData(), dataInBuffer, bytesToRead);
-    data.skipBytes(bytesToRead);
-    dataInBuffer += bytesToRead;
-    dataBuffer.reset(dataInBuffer);
-  }
-
-  private void removeUsedFromDataBuffer() {
-    dataInBuffer -= dataBuffer.getPosition();
-    System.arraycopy(
-        dataBuffer.getData(), dataBuffer.getPosition(), dataBuffer.getData(), 0, dataInBuffer);
-    dataBuffer.reset(dataInBuffer);
+    return false;
  }
 }