Write sample data offset related fields in tfhd and trun box

This fix makes output playable on VLC player.
The output does not play on QuickTime player which is being fixed in
a separate CL.

#minor-release

PiperOrigin-RevId: 601118813
(cherry picked from commit 806f90922b)
This commit is contained in:
sheenachhabra 2024-01-24 07:49:08 -08:00 committed by SheenaChhabra
parent db0262efdb
commit 539a8f9a24
7 changed files with 120 additions and 45 deletions

View file

@ -20,6 +20,7 @@ import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER; import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER;
import static androidx.media3.muxer.Mp4Utils.BYTES_PER_INTEGER;
import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE; import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE;
import android.media.MediaCodec; import android.media.MediaCodec;
@ -49,7 +50,13 @@ import java.util.Locale;
* buffers}. * buffers}.
*/ */
/* package */ final class Boxes { /* package */ final class Boxes {
private static final int BYTES_PER_INTEGER = 4; // Box size (4 bytes) + Box name (4 bytes)
public static final int BOX_HEADER_SIZE = 2 * BYTES_PER_INTEGER;
public static final int MFHD_BOX_CONTENT_SIZE = 2 * BYTES_PER_INTEGER;
public static final int TFHD_BOX_CONTENT_SIZE = 4 * BYTES_PER_INTEGER;
// unsigned int(2) sample_depends_on = 2 (bit index 25 and 24) // unsigned int(2) sample_depends_on = 2 (bit index 25 and 24)
private static final int TRUN_BOX_SYNC_SAMPLE_FLAGS = 0b00000010_00000000_00000000_00000000; private static final int TRUN_BOX_SYNC_SAMPLE_FLAGS = 0b00000010_00000000_00000000_00000000;
// unsigned int(2) sample_depends_on = 1 (bit index 25 and 24) // unsigned int(2) sample_depends_on = 1 (bit index 25 and 24)
@ -822,7 +829,7 @@ import java.util.Locale;
/** Returns the movie fragment header (mfhd) box. */ /** Returns the movie fragment header (mfhd) box. */
public static ByteBuffer mfhd(int sequenceNumber) { public static ByteBuffer mfhd(int sequenceNumber) {
ByteBuffer contents = ByteBuffer.allocate(2 * BYTES_PER_INTEGER); ByteBuffer contents = ByteBuffer.allocate(MFHD_BOX_CONTENT_SIZE);
contents.putInt(0x0); // version and flags contents.putInt(0x0); // version and flags
contents.putInt(sequenceNumber); // An unsigned int(32) contents.putInt(sequenceNumber); // An unsigned int(32)
contents.flip(); contents.flip();
@ -835,20 +842,21 @@ import java.util.Locale;
} }
/** Returns a track fragment header (tfhd) box. */ /** Returns a track fragment header (tfhd) box. */
public static ByteBuffer tfhd(int trackId) { public static ByteBuffer tfhd(int trackId, long baseDataOffset) {
ByteBuffer contents = ByteBuffer.allocate(2 * BYTES_PER_INTEGER); ByteBuffer contents = ByteBuffer.allocate(TFHD_BOX_CONTENT_SIZE);
contents.putInt(0x0); // version and flags // 0x000001 base-data-offset-present: indicates the presence of the base-data-offset field.
contents.putInt(0x0 | 0x000001); // version and flags
contents.putInt(trackId); contents.putInt(trackId);
contents.putLong(baseDataOffset);
contents.flip(); contents.flip();
return BoxUtils.wrapIntoBox("tfhd", contents); return BoxUtils.wrapIntoBox("tfhd", contents);
} }
/** Returns a track fragment run (trun) box. */ /** Returns a track fragment run (trun) box. */
public static ByteBuffer trun(List<SampleMetadata> samplesMetadata) { public static ByteBuffer trun(List<SampleMetadata> samplesMetadata, int dataOffset) {
// 3 integers are required for each sample's metadata. ByteBuffer contents = ByteBuffer.allocate(getTrunBoxContentSize(samplesMetadata.size()));
ByteBuffer contents =
ByteBuffer.allocate(2 * BYTES_PER_INTEGER + 3 * samplesMetadata.size() * BYTES_PER_INTEGER);
// 0x000001 data-offset-present.
// 0x000100 sample-duration-present: indicates that each sample has its own duration, otherwise // 0x000100 sample-duration-present: indicates that each sample has its own duration, otherwise
// the default is used. // the default is used.
// 0x000200 sample-size-present: indicates that each sample has its own size, otherwise the // 0x000200 sample-size-present: indicates that each sample has its own size, otherwise the
@ -856,9 +864,10 @@ import java.util.Locale;
// 0x000400 sample-flags-present: indicates that each sample has its own flags, otherwise the // 0x000400 sample-flags-present: indicates that each sample has its own flags, otherwise the
// default is used. // default is used.
// Version is 0x0. // Version is 0x0.
int versionAndFlags = 0x0 | 0x000100 | 0x000200 | 0x000400; int versionAndFlags = 0x0 | 0x000001 | 0x000100 | 0x000200 | 0x000400;
contents.putInt(versionAndFlags); contents.putInt(versionAndFlags);
contents.putInt(samplesMetadata.size()); // An unsigned int(32) contents.putInt(samplesMetadata.size()); // An unsigned int(32)
contents.putInt(dataOffset); // A signed int(32)
for (int i = 0; i < samplesMetadata.size(); i++) { for (int i = 0; i < samplesMetadata.size(); i++) {
SampleMetadata currentSampleMetadata = samplesMetadata.get(i); SampleMetadata currentSampleMetadata = samplesMetadata.get(i);
contents.putInt((int) currentSampleMetadata.durationVu); // An unsigned int(32) contents.putInt((int) currentSampleMetadata.durationVu); // An unsigned int(32)
@ -872,6 +881,13 @@ import java.util.Locale;
return BoxUtils.wrapIntoBox("trun", contents); return BoxUtils.wrapIntoBox("trun", contents);
} }
/** Returns the size required for {@link #trun(List, int)} box content. */
public static int getTrunBoxContentSize(int sampleCount) {
int trunBoxFixedSize = 3 * BYTES_PER_INTEGER;
// 3 int(32-bit) gets written for each sample.
return trunBoxFixedSize + 3 * sampleCount * BYTES_PER_INTEGER;
}
/** Returns a movie extends (mvex) box. */ /** Returns a movie extends (mvex) box. */
public static ByteBuffer mvex(List<ByteBuffer> trexBoxes) { public static ByteBuffer mvex(List<ByteBuffer> trexBoxes) {
return BoxUtils.wrapBoxesIntoBox("mvex", trexBoxes); return BoxUtils.wrapBoxesIntoBox("mvex", trexBoxes);

View file

@ -17,6 +17,10 @@ package androidx.media3.muxer;
import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.muxer.Boxes.BOX_HEADER_SIZE;
import static androidx.media3.muxer.Boxes.MFHD_BOX_CONTENT_SIZE;
import static androidx.media3.muxer.Boxes.TFHD_BOX_CONTENT_SIZE;
import static androidx.media3.muxer.Boxes.getTrunBoxContentSize;
import static androidx.media3.muxer.Mp4Utils.UNSIGNED_INT_MAX_VALUE; import static androidx.media3.muxer.Mp4Utils.UNSIGNED_INT_MAX_VALUE;
import static java.lang.Math.max; import static java.lang.Math.max;
import static java.lang.Math.min; import static java.lang.Math.min;
@ -116,16 +120,52 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
} }
} }
private static ImmutableList<ByteBuffer> createTrafBoxes(List<ProcessedTrackInfo> trackInfos) { private static ImmutableList<ByteBuffer> createTrafBoxes(
List<ProcessedTrackInfo> trackInfos, long moofBoxStartPosition) {
ImmutableList.Builder<ByteBuffer> trafBoxes = new ImmutableList.Builder<>(); ImmutableList.Builder<ByteBuffer> trafBoxes = new ImmutableList.Builder<>();
int moofBoxSize = calculateMoofBoxSize(trackInfos);
int mdatBoxHeaderSize = BOX_HEADER_SIZE;
// dataOffset denotes the relative position of the first sample of the track from the
// moofBoxStartPosition.
int dataOffset = moofBoxSize + mdatBoxHeaderSize;
for (int i = 0; i < trackInfos.size(); i++) { for (int i = 0; i < trackInfos.size(); i++) {
ProcessedTrackInfo currentTrackInfo = trackInfos.get(i); ProcessedTrackInfo currentTrackInfo = trackInfos.get(i);
ByteBuffer trun = Boxes.trun(currentTrackInfo.pendingSamplesMetadata); trafBoxes.add(
trafBoxes.add(Boxes.traf(Boxes.tfhd(currentTrackInfo.trackId), trun)); Boxes.traf(
Boxes.tfhd(currentTrackInfo.trackId, /* baseDataOffset= */ moofBoxStartPosition),
Boxes.trun(currentTrackInfo.pendingSamplesMetadata, dataOffset)));
dataOffset += currentTrackInfo.totalSamplesSize;
} }
return trafBoxes.build(); return trafBoxes.build();
} }
private static int calculateMoofBoxSize(List<ProcessedTrackInfo> trackInfos) {
/* moof box looks like:
moof
mfhd
traf
tfhd
trun
traf
tfhd
trun
*/
int moofBoxHeaderSize = BOX_HEADER_SIZE;
int mfhdBoxSize = BOX_HEADER_SIZE + MFHD_BOX_CONTENT_SIZE;
int trafBoxHeaderSize = BOX_HEADER_SIZE;
int tfhdBoxSize = BOX_HEADER_SIZE + TFHD_BOX_CONTENT_SIZE;
int trunBoxHeaderFixedSize = BOX_HEADER_SIZE;
int trafBoxesSize = 0;
for (int i = 0; i < trackInfos.size(); i++) {
ProcessedTrackInfo trackInfo = trackInfos.get(i);
int trunBoxSize =
trunBoxHeaderFixedSize + getTrunBoxContentSize(trackInfo.pendingSamplesMetadata.size());
trafBoxesSize += trafBoxHeaderSize + tfhdBoxSize + trunBoxSize;
}
return moofBoxHeaderSize + mfhdBoxSize + trafBoxesSize;
}
private void createHeader() throws IOException { private void createHeader() throws IOException {
output.position(0L); output.position(0L);
output.write(Boxes.ftyp()); output.write(Boxes.ftyp());
@ -158,9 +198,20 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
} }
private void createFragment() throws IOException { private void createFragment() throws IOException {
/* Each fragment looks like:
moof
mfhd
traf
tfhd
trun
traf
tfhd
trun
mdat
*/
ImmutableList<ProcessedTrackInfo> trackInfos = processAllTracks(); ImmutableList<ProcessedTrackInfo> trackInfos = processAllTracks();
// Write moof box. ImmutableList<ByteBuffer> trafBoxes =
ImmutableList<ByteBuffer> trafBoxes = createTrafBoxes(trackInfos); createTrafBoxes(trackInfos, /* moofBoxStartPosition= */ output.position());
if (trafBoxes.isEmpty()) { if (trafBoxes.isEmpty()) {
return; return;
} }
@ -232,7 +283,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
Mp4Muxer.LAST_FRAME_DURATION_BEHAVIOR_DUPLICATE_PREV_DURATION); Mp4Muxer.LAST_FRAME_DURATION_BEHAVIOR_DUPLICATE_PREV_DURATION);
ImmutableList.Builder<SampleMetadata> pendingSamplesMetadata = new ImmutableList.Builder<>(); ImmutableList.Builder<SampleMetadata> pendingSamplesMetadata = new ImmutableList.Builder<>();
int totalSamplesSize = 0;
for (int i = 0; i < sampleBufferInfos.size(); i++) { for (int i = 0; i < sampleBufferInfos.size(); i++) {
totalSamplesSize += sampleBufferInfos.get(i).size;
pendingSamplesMetadata.add( pendingSamplesMetadata.add(
new SampleMetadata( new SampleMetadata(
sampleDurations.get(i), sampleDurations.get(i),
@ -242,15 +295,18 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
// Clear the queue. // Clear the queue.
track.pendingSamplesBufferInfo.clear(); track.pendingSamplesBufferInfo.clear();
return new ProcessedTrackInfo(trackId, pendingSamplesMetadata.build()); return new ProcessedTrackInfo(trackId, totalSamplesSize, pendingSamplesMetadata.build());
} }
private static class ProcessedTrackInfo { private static class ProcessedTrackInfo {
public final int trackId; public final int trackId;
public final int totalSamplesSize;
public final ImmutableList<SampleMetadata> pendingSamplesMetadata; public final ImmutableList<SampleMetadata> pendingSamplesMetadata;
public ProcessedTrackInfo(int trackId, ImmutableList<SampleMetadata> pendingSamplesMetadata) { public ProcessedTrackInfo(
int trackId, int totalSamplesSize, ImmutableList<SampleMetadata> pendingSamplesMetadata) {
this.trackId = trackId; this.trackId = trackId;
this.totalSamplesSize = totalSamplesSize;
this.pendingSamplesMetadata = pendingSamplesMetadata; this.pendingSamplesMetadata = pendingSamplesMetadata;
} }
} }

View file

@ -17,6 +17,9 @@ package androidx.media3.muxer;
/** Utilities for MP4 files. */ /** Utilities for MP4 files. */
/* package */ final class Mp4Utils { /* package */ final class Mp4Utils {
/* Total number of bytes in an integer. */
public static final int BYTES_PER_INTEGER = 4;
/** /**
* The maximum length of boxes which have fixed sizes. * The maximum length of boxes which have fixed sizes.
* *

View file

@ -539,7 +539,7 @@ public class BoxesTest {
@Test @Test
public void createTfhdBox_matchesExpected() throws IOException { public void createTfhdBox_matchesExpected() throws IOException {
ByteBuffer tfhdBox = Boxes.tfhd(/* trackId= */ 1); ByteBuffer tfhdBox = Boxes.tfhd(/* trackId= */ 1, /* baseDataOffset= */ 1_000L);
DumpableMp4Box dumpableBox = new DumpableMp4Box(tfhdBox); DumpableMp4Box dumpableBox = new DumpableMp4Box(tfhdBox);
DumpFileAsserts.assertOutput( DumpFileAsserts.assertOutput(
@ -558,7 +558,7 @@ public class BoxesTest {
/* flags= */ i == 0 ? MediaCodec.BUFFER_FLAG_KEY_FRAME : 0)); /* flags= */ i == 0 ? MediaCodec.BUFFER_FLAG_KEY_FRAME : 0));
} }
ByteBuffer trunBox = Boxes.trun(samplesMetadata); ByteBuffer trunBox = Boxes.trun(samplesMetadata, /* dataOffset= */ 1_000);
DumpableMp4Box dumpableBox = new DumpableMp4Box(trunBox); DumpableMp4Box dumpableBox = new DumpableMp4Box(trunBox);
DumpFileAsserts.assertOutput( DumpFileAsserts.assertOutput(

View file

@ -58,33 +58,33 @@ moov (1209 bytes):
Data = length 24, hash C35D3183 Data = length 24, hash C35D3183
trex (32 bytes): trex (32 bytes):
Data = length 24, hash 14070F84 Data = length 24, hash 14070F84
moof (2852 bytes): moof (2876 bytes):
mfhd (16 bytes): mfhd (16 bytes):
Data = length 8, hash 94446F02 Data = length 8, hash 94446F02
traf (1120 bytes): traf (1132 bytes):
tfhd (16 bytes): tfhd (24 bytes):
Data = length 8, hash 94446F02 Data = length 16, hash D37153D4
trun (1096 bytes): trun (1100 bytes):
Data = length 1088, hash 1F7B824F Data = length 1092, hash BA1962E9
traf (1708 bytes): traf (1720 bytes):
tfhd (16 bytes): tfhd (24 bytes):
Data = length 8, hash 94446F03 Data = length 16, hash 67B5C2D5
trun (1684 bytes): trun (1688 bytes):
Data = length 1676, hash 46E974DC Data = length 1680, hash 2EDF9B97
mdat (5712387 bytes): mdat (5712387 bytes):
Data = length 5712379, hash 86B2819D Data = length 5712379, hash 86B2819D
moof (1220 bytes): moof (1244 bytes):
mfhd (16 bytes): mfhd (16 bytes):
Data = length 8, hash 94446F03 Data = length 8, hash 94446F03
traf (484 bytes): traf (496 bytes):
tfhd (16 bytes): tfhd (24 bytes):
Data = length 8, hash 94446F02 Data = length 16, hash D372A134
trun (460 bytes): trun (464 bytes):
Data = length 452, hash 36E6F796 Data = length 456, hash E01BEFF7
traf (712 bytes): traf (724 bytes):
tfhd (16 bytes): tfhd (24 bytes):
Data = length 8, hash 94446F03 Data = length 16, hash 67B71035
trun (688 bytes): trun (692 bytes):
Data = length 680, hash 4E3D2F16 Data = length 684, hash 73BBFD29
mdat (2364921 bytes): mdat (2364921 bytes):
Data = length 2364913, hash D363A845 Data = length 2364913, hash D363A845

View file

@ -1,2 +1,2 @@
tfhd (16 bytes): tfhd (24 bytes):
Data = length 8, hash 94446F02 Data = length 16, hash D37153C8

View file

@ -1,2 +1,2 @@
trun (76 bytes): trun (80 bytes):
Data = length 68, hash 750F9113 Data = length 72, hash 516DBD9