Write sample data offset related fields in tfhd and trun box

This fix makes output playable on VLC player.
The output does not play on QuickTime player which is being fixed in
a separate CL.

#minor-release

PiperOrigin-RevId: 601118813
(cherry picked from commit 806f90922b)
This commit is contained in:
sheenachhabra 2024-01-24 07:49:08 -08:00 committed by SheenaChhabra
parent db0262efdb
commit 539a8f9a24
7 changed files with 120 additions and 45 deletions

View file

@ -20,6 +20,7 @@ import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER;
import static androidx.media3.muxer.Mp4Utils.BYTES_PER_INTEGER;
import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE;
import android.media.MediaCodec;
@ -49,7 +50,13 @@ import java.util.Locale;
* buffers}.
*/
/* package */ final class Boxes {
private static final int BYTES_PER_INTEGER = 4;
// Box size (4 bytes) + Box name (4 bytes)
public static final int BOX_HEADER_SIZE = 2 * BYTES_PER_INTEGER;
public static final int MFHD_BOX_CONTENT_SIZE = 2 * BYTES_PER_INTEGER;
public static final int TFHD_BOX_CONTENT_SIZE = 4 * BYTES_PER_INTEGER;
// unsigned int(2) sample_depends_on = 2 (bit index 25 and 24)
private static final int TRUN_BOX_SYNC_SAMPLE_FLAGS = 0b00000010_00000000_00000000_00000000;
// unsigned int(2) sample_depends_on = 1 (bit index 25 and 24)
@ -822,7 +829,7 @@ import java.util.Locale;
/** Returns the movie fragment header (mfhd) box. */
public static ByteBuffer mfhd(int sequenceNumber) {
ByteBuffer contents = ByteBuffer.allocate(2 * BYTES_PER_INTEGER);
ByteBuffer contents = ByteBuffer.allocate(MFHD_BOX_CONTENT_SIZE);
contents.putInt(0x0); // version and flags
contents.putInt(sequenceNumber); // An unsigned int(32)
contents.flip();
@ -835,20 +842,21 @@ import java.util.Locale;
}
/** Returns a track fragment header (tfhd) box. */
public static ByteBuffer tfhd(int trackId) {
ByteBuffer contents = ByteBuffer.allocate(2 * BYTES_PER_INTEGER);
contents.putInt(0x0); // version and flags
public static ByteBuffer tfhd(int trackId, long baseDataOffset) {
ByteBuffer contents = ByteBuffer.allocate(TFHD_BOX_CONTENT_SIZE);
// 0x000001 base-data-offset-present: indicates the presence of the base-data-offset field.
contents.putInt(0x0 | 0x000001); // version and flags
contents.putInt(trackId);
contents.putLong(baseDataOffset);
contents.flip();
return BoxUtils.wrapIntoBox("tfhd", contents);
}
/** Returns a track fragment run (trun) box. */
public static ByteBuffer trun(List<SampleMetadata> samplesMetadata) {
// 3 integers are required for each sample's metadata.
ByteBuffer contents =
ByteBuffer.allocate(2 * BYTES_PER_INTEGER + 3 * samplesMetadata.size() * BYTES_PER_INTEGER);
public static ByteBuffer trun(List<SampleMetadata> samplesMetadata, int dataOffset) {
ByteBuffer contents = ByteBuffer.allocate(getTrunBoxContentSize(samplesMetadata.size()));
// 0x000001 data-offset-present.
// 0x000100 sample-duration-present: indicates that each sample has its own duration, otherwise
// the default is used.
// 0x000200 sample-size-present: indicates that each sample has its own size, otherwise the
@ -856,9 +864,10 @@ import java.util.Locale;
// 0x000400 sample-flags-present: indicates that each sample has its own flags, otherwise the
// default is used.
// Version is 0x0.
int versionAndFlags = 0x0 | 0x000100 | 0x000200 | 0x000400;
int versionAndFlags = 0x0 | 0x000001 | 0x000100 | 0x000200 | 0x000400;
contents.putInt(versionAndFlags);
contents.putInt(samplesMetadata.size()); // An unsigned int(32)
contents.putInt(dataOffset); // A signed int(32)
for (int i = 0; i < samplesMetadata.size(); i++) {
SampleMetadata currentSampleMetadata = samplesMetadata.get(i);
contents.putInt((int) currentSampleMetadata.durationVu); // An unsigned int(32)
@ -872,6 +881,13 @@ import java.util.Locale;
return BoxUtils.wrapIntoBox("trun", contents);
}
/** Returns the size required for {@link #trun(List, int)} box content. */
public static int getTrunBoxContentSize(int sampleCount) {
int trunBoxFixedSize = 3 * BYTES_PER_INTEGER;
// 3 int(32-bit) gets written for each sample.
return trunBoxFixedSize + 3 * sampleCount * BYTES_PER_INTEGER;
}
/** Returns a movie extends (mvex) box. */
public static ByteBuffer mvex(List<ByteBuffer> trexBoxes) {
return BoxUtils.wrapBoxesIntoBox("mvex", trexBoxes);

View file

@ -17,6 +17,10 @@ package androidx.media3.muxer;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.muxer.Boxes.BOX_HEADER_SIZE;
import static androidx.media3.muxer.Boxes.MFHD_BOX_CONTENT_SIZE;
import static androidx.media3.muxer.Boxes.TFHD_BOX_CONTENT_SIZE;
import static androidx.media3.muxer.Boxes.getTrunBoxContentSize;
import static androidx.media3.muxer.Mp4Utils.UNSIGNED_INT_MAX_VALUE;
import static java.lang.Math.max;
import static java.lang.Math.min;
@ -116,16 +120,52 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
}
private static ImmutableList<ByteBuffer> createTrafBoxes(List<ProcessedTrackInfo> trackInfos) {
private static ImmutableList<ByteBuffer> createTrafBoxes(
List<ProcessedTrackInfo> trackInfos, long moofBoxStartPosition) {
ImmutableList.Builder<ByteBuffer> trafBoxes = new ImmutableList.Builder<>();
int moofBoxSize = calculateMoofBoxSize(trackInfos);
int mdatBoxHeaderSize = BOX_HEADER_SIZE;
// dataOffset denotes the relative position of the first sample of the track from the
// moofBoxStartPosition.
int dataOffset = moofBoxSize + mdatBoxHeaderSize;
for (int i = 0; i < trackInfos.size(); i++) {
ProcessedTrackInfo currentTrackInfo = trackInfos.get(i);
ByteBuffer trun = Boxes.trun(currentTrackInfo.pendingSamplesMetadata);
trafBoxes.add(Boxes.traf(Boxes.tfhd(currentTrackInfo.trackId), trun));
trafBoxes.add(
Boxes.traf(
Boxes.tfhd(currentTrackInfo.trackId, /* baseDataOffset= */ moofBoxStartPosition),
Boxes.trun(currentTrackInfo.pendingSamplesMetadata, dataOffset)));
dataOffset += currentTrackInfo.totalSamplesSize;
}
return trafBoxes.build();
}
private static int calculateMoofBoxSize(List<ProcessedTrackInfo> trackInfos) {
/* moof box looks like:
moof
mfhd
traf
tfhd
trun
traf
tfhd
trun
*/
int moofBoxHeaderSize = BOX_HEADER_SIZE;
int mfhdBoxSize = BOX_HEADER_SIZE + MFHD_BOX_CONTENT_SIZE;
int trafBoxHeaderSize = BOX_HEADER_SIZE;
int tfhdBoxSize = BOX_HEADER_SIZE + TFHD_BOX_CONTENT_SIZE;
int trunBoxHeaderFixedSize = BOX_HEADER_SIZE;
int trafBoxesSize = 0;
for (int i = 0; i < trackInfos.size(); i++) {
ProcessedTrackInfo trackInfo = trackInfos.get(i);
int trunBoxSize =
trunBoxHeaderFixedSize + getTrunBoxContentSize(trackInfo.pendingSamplesMetadata.size());
trafBoxesSize += trafBoxHeaderSize + tfhdBoxSize + trunBoxSize;
}
return moofBoxHeaderSize + mfhdBoxSize + trafBoxesSize;
}
private void createHeader() throws IOException {
output.position(0L);
output.write(Boxes.ftyp());
@ -158,9 +198,20 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
}
private void createFragment() throws IOException {
/* Each fragment looks like:
moof
mfhd
traf
tfhd
trun
traf
tfhd
trun
mdat
*/
ImmutableList<ProcessedTrackInfo> trackInfos = processAllTracks();
// Write moof box.
ImmutableList<ByteBuffer> trafBoxes = createTrafBoxes(trackInfos);
ImmutableList<ByteBuffer> trafBoxes =
createTrafBoxes(trackInfos, /* moofBoxStartPosition= */ output.position());
if (trafBoxes.isEmpty()) {
return;
}
@ -232,7 +283,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
Mp4Muxer.LAST_FRAME_DURATION_BEHAVIOR_DUPLICATE_PREV_DURATION);
ImmutableList.Builder<SampleMetadata> pendingSamplesMetadata = new ImmutableList.Builder<>();
int totalSamplesSize = 0;
for (int i = 0; i < sampleBufferInfos.size(); i++) {
totalSamplesSize += sampleBufferInfos.get(i).size;
pendingSamplesMetadata.add(
new SampleMetadata(
sampleDurations.get(i),
@ -242,15 +295,18 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
// Clear the queue.
track.pendingSamplesBufferInfo.clear();
return new ProcessedTrackInfo(trackId, pendingSamplesMetadata.build());
return new ProcessedTrackInfo(trackId, totalSamplesSize, pendingSamplesMetadata.build());
}
private static class ProcessedTrackInfo {
public final int trackId;
public final int totalSamplesSize;
public final ImmutableList<SampleMetadata> pendingSamplesMetadata;
public ProcessedTrackInfo(int trackId, ImmutableList<SampleMetadata> pendingSamplesMetadata) {
public ProcessedTrackInfo(
int trackId, int totalSamplesSize, ImmutableList<SampleMetadata> pendingSamplesMetadata) {
this.trackId = trackId;
this.totalSamplesSize = totalSamplesSize;
this.pendingSamplesMetadata = pendingSamplesMetadata;
}
}

View file

@ -17,6 +17,9 @@ package androidx.media3.muxer;
/** Utilities for MP4 files. */
/* package */ final class Mp4Utils {
/* Total number of bytes in an integer. */
public static final int BYTES_PER_INTEGER = 4;
/**
* The maximum length of boxes which have fixed sizes.
*

View file

@ -539,7 +539,7 @@ public class BoxesTest {
@Test
public void createTfhdBox_matchesExpected() throws IOException {
ByteBuffer tfhdBox = Boxes.tfhd(/* trackId= */ 1);
ByteBuffer tfhdBox = Boxes.tfhd(/* trackId= */ 1, /* baseDataOffset= */ 1_000L);
DumpableMp4Box dumpableBox = new DumpableMp4Box(tfhdBox);
DumpFileAsserts.assertOutput(
@ -558,7 +558,7 @@ public class BoxesTest {
/* flags= */ i == 0 ? MediaCodec.BUFFER_FLAG_KEY_FRAME : 0));
}
ByteBuffer trunBox = Boxes.trun(samplesMetadata);
ByteBuffer trunBox = Boxes.trun(samplesMetadata, /* dataOffset= */ 1_000);
DumpableMp4Box dumpableBox = new DumpableMp4Box(trunBox);
DumpFileAsserts.assertOutput(

View file

@ -58,33 +58,33 @@ moov (1209 bytes):
Data = length 24, hash C35D3183
trex (32 bytes):
Data = length 24, hash 14070F84
moof (2852 bytes):
moof (2876 bytes):
mfhd (16 bytes):
Data = length 8, hash 94446F02
traf (1120 bytes):
tfhd (16 bytes):
Data = length 8, hash 94446F02
trun (1096 bytes):
Data = length 1088, hash 1F7B824F
traf (1708 bytes):
tfhd (16 bytes):
Data = length 8, hash 94446F03
trun (1684 bytes):
Data = length 1676, hash 46E974DC
traf (1132 bytes):
tfhd (24 bytes):
Data = length 16, hash D37153D4
trun (1100 bytes):
Data = length 1092, hash BA1962E9
traf (1720 bytes):
tfhd (24 bytes):
Data = length 16, hash 67B5C2D5
trun (1688 bytes):
Data = length 1680, hash 2EDF9B97
mdat (5712387 bytes):
Data = length 5712379, hash 86B2819D
moof (1220 bytes):
moof (1244 bytes):
mfhd (16 bytes):
Data = length 8, hash 94446F03
traf (484 bytes):
tfhd (16 bytes):
Data = length 8, hash 94446F02
trun (460 bytes):
Data = length 452, hash 36E6F796
traf (712 bytes):
tfhd (16 bytes):
Data = length 8, hash 94446F03
trun (688 bytes):
Data = length 680, hash 4E3D2F16
traf (496 bytes):
tfhd (24 bytes):
Data = length 16, hash D372A134
trun (464 bytes):
Data = length 456, hash E01BEFF7
traf (724 bytes):
tfhd (24 bytes):
Data = length 16, hash 67B71035
trun (692 bytes):
Data = length 684, hash 73BBFD29
mdat (2364921 bytes):
Data = length 2364913, hash D363A845

View file

@ -1,2 +1,2 @@
tfhd (16 bytes):
Data = length 8, hash 94446F02
tfhd (24 bytes):
Data = length 16, hash D37153C8

View file

@ -1,2 +1,2 @@
trun (76 bytes):
Data = length 68, hash 750F9113
trun (80 bytes):
Data = length 72, hash 516DBD9