From bb3948aa983668ee8ebf0255e20b0e904515c36f Mon Sep 17 00:00:00 2001 From: sheenachhabra Date: Tue, 20 Aug 2024 05:25:48 -0700 Subject: [PATCH] Implement interleaving of editable video tracks The CL adds another way of writing editable video tracks where the samples will be interleaved with the primary track samples in the "mdat" box. PiperOrigin-RevId: 665313751 --- .../java/androidx/media3/muxer/Mp4Muxer.java | 20 ++- .../java/androidx/media3/muxer/Mp4Writer.java | 81 +++++++++++- .../media3/muxer/Mp4MuxerEndToEndTest.java | 117 +++++++++++++++++- ...ditable_track_samples_interleaved.mp4.dump | 91 ++++++++++++++ ...ditable_track_samples_interleaved.mp4.dump | 48 +++++++ 5 files changed, 345 insertions(+), 12 deletions(-) create mode 100644 libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump create mode 100644 libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java index 45e1607c91..e5680edbc6 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java @@ -121,14 +121,21 @@ public final class Mp4Muxer implements Muxer { String getCacheFilePath(); } - public final CacheFileProvider cacheFileProvider; + public final boolean shouldInterleaveSamples; + @Nullable public final CacheFileProvider cacheFileProvider; /** * Creates an instance. * - * @param cacheFileProvider A {@link CacheFileProvider}. + * @param shouldInterleaveSamples Whether to interleave editable video track samples with + * primary track samples. + * @param cacheFileProvider A {@link CacheFileProvider}. Required only when {@code + * shouldInterleaveSamples} is set to {@code false}, can be {@code null} otherwise. */ - public EditableVideoParameters(CacheFileProvider cacheFileProvider) { + public EditableVideoParameters( + boolean shouldInterleaveSamples, @Nullable CacheFileProvider cacheFileProvider) { + checkArgument(shouldInterleaveSamples || cacheFileProvider != null); + this.shouldInterleaveSamples = shouldInterleaveSamples; this.cacheFileProvider = cacheFileProvider; } } @@ -373,6 +380,10 @@ public final class Mp4Muxer implements Muxer { */ public TrackToken addTrack(int sortKey, Format format) throws MuxerException { if (outputFileFormat == FILE_FORMAT_EDITABLE_VIDEO && isEditableVideoTrack(format)) { + if (checkNotNull(editableVideoParameters).shouldInterleaveSamples) { + // Editable video tracks are handled by the primary Mp4Writer. + return mp4Writer.addEditableVideoTrack(sortKey, format); + } try { ensureSetupForEditableVideoTracks(); } catch (FileNotFoundException e) { @@ -484,7 +495,8 @@ public final class Mp4Muxer implements Muxer { @EnsuresNonNull({"editableVideoMp4Writer"}) private void ensureSetupForEditableVideoTracks() throws FileNotFoundException { if (editableVideoMp4Writer == null) { - cacheFilePath = checkNotNull(editableVideoParameters).cacheFileProvider.getCacheFilePath(); + cacheFilePath = + checkNotNull(checkNotNull(editableVideoParameters).cacheFileProvider).getCacheFilePath(); cacheFileOutputStream = new FileOutputStream(cacheFilePath); editableVideoMetadataCollector = new MetadataCollector(); editableVideoMp4Writer = diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java index 62f461bed5..4245e9acb4 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java @@ -20,12 +20,17 @@ import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.muxer.AnnexBUtils.doesSampleContainAnnexBNalUnits; import static androidx.media3.muxer.Boxes.BOX_HEADER_SIZE; import static androidx.media3.muxer.Boxes.LARGE_SIZE_BOX_HEADER_SIZE; +import static androidx.media3.muxer.Boxes.getEdvdBoxHeader; +import static androidx.media3.muxer.MuxerUtil.getEditableTracksLengthMetadata; +import static androidx.media3.muxer.MuxerUtil.getEditableTracksOffsetMetadata; +import static androidx.media3.muxer.MuxerUtil.populateEditableVideoTracksMetadata; import static java.lang.Math.max; import static java.lang.Math.min; import android.media.MediaCodec.BufferInfo; import androidx.media3.common.Format; import androidx.media3.common.util.Util; +import androidx.media3.container.MdtaMetadataEntry; import com.google.common.collect.Range; import java.io.IOException; import java.nio.ByteBuffer; @@ -47,6 +52,7 @@ import java.util.concurrent.atomic.AtomicBoolean; private final @Mp4Muxer.LastFrameDurationBehavior int lastFrameDurationBehavior; private final boolean sampleCopyEnabled; private final List tracks; + private final List editableVideoTracks; private final AtomicBoolean hasWrittenSamples; // Stores location of the space reserved for the moov box at the beginning of the file (after ftyp @@ -89,6 +95,7 @@ import java.util.concurrent.atomic.AtomicBoolean; this.lastFrameDurationBehavior = lastFrameDurationBehavior; this.sampleCopyEnabled = sampleCopyEnabled; tracks = new ArrayList<>(); + editableVideoTracks = new ArrayList<>(); hasWrittenSamples = new AtomicBoolean(false); canWriteMoovAtStart = attemptStreamableOutputEnabled; lastMoovWritten = Range.closed(0L, 0L); @@ -108,6 +115,22 @@ import java.util.concurrent.atomic.AtomicBoolean; return track; } + /** + * Adds an editable video track of the given {@link Format}. + * + *

See {@link MuxerUtil#isEditableVideoTrack(Format)} for editable video tracks. + * + * @param sortKey The key used for sorting the track list. + * @param format The {@link Format} for the track. + * @return A unique {@link Track}. It should be used in {@link #writeSampleData}. + */ + public Track addEditableVideoTrack(int sortKey, Format format) { + Track track = new Track(format, sortKey, sampleCopyEnabled); + editableVideoTracks.add(track); + Collections.sort(editableVideoTracks, (a, b) -> Integer.compare(a.sortKey, b.sortKey)); + return track; + } + /** * Writes encoded sample data. * @@ -132,11 +155,60 @@ import java.util.concurrent.atomic.AtomicBoolean; for (int i = 0; i < tracks.size(); i++) { writePendingTrackSamples(tracks.get(i)); } + for (int i = 0; i < editableVideoTracks.size(); i++) { + writePendingTrackSamples(editableVideoTracks.get(i)); + } // Leave the file empty if no samples are written. - if (hasWrittenSamples.get()) { - finalizeMoovBox(); + if (!hasWrittenSamples.get()) { + return; } + + finalizeMoovBox(); + + if (!editableVideoTracks.isEmpty()) { + writeEdvdBox(); + } + } + + private void writeEdvdBox() throws IOException { + // The exact offset is known after writing primary track data. + MdtaMetadataEntry placeholderEditableTrackOffset = + getEditableTracksOffsetMetadata(/* offset= */ 0L); + metadataCollector.addMetadata(placeholderEditableTrackOffset); + ByteBuffer edvdBox = getEdvdBox(); + metadataCollector.addMetadata(getEditableTracksLengthMetadata(edvdBox.remaining())); + finalizeMoovBox(); + // Once final moov is written, update the actual offset. + metadataCollector.removeMdtaMetadataEntry(placeholderEditableTrackOffset); + metadataCollector.addMetadata(getEditableTracksOffsetMetadata(outputFileChannel.size())); + long fileSizeBefore = outputFileChannel.size(); + finalizeMoovBox(); + checkState(fileSizeBefore == outputFileChannel.size()); + // After writing primary track data, write the edvd box. + outputFileChannel.position(outputFileChannel.size()); + outputFileChannel.write(edvdBox); + } + + private ByteBuffer getEdvdBox() { + // The edvd box will have one ftyp and one moov box. + ByteBuffer ftypBox = Boxes.ftyp(); + MetadataCollector editableVideoMetadataCollector = new MetadataCollector(); + populateEditableVideoTracksMetadata( + editableVideoMetadataCollector, + metadataCollector.timestampData, + /* samplesInterleaved= */ true, + editableVideoTracks); + ByteBuffer moovBox = + Mp4MoovStructure.moov( + editableVideoTracks, + editableVideoMetadataCollector, + findMinimumPresentationTimestampUsAcrossTracks(editableVideoTracks), + /* isFragmentedMp4= */ false, + lastFrameDurationBehavior); + ByteBuffer edvdBoxHeader = + getEdvdBoxHeader(/* payloadSize= */ ftypBox.remaining() + moovBox.remaining()); + return BoxUtils.concatenateBuffers(edvdBoxHeader, ftypBox, moovBox); } /** @@ -435,9 +507,10 @@ import java.util.concurrent.atomic.AtomicBoolean; } private void doInterleave() throws IOException { - boolean newSamplesWritten = maybeWritePendingTrackSamples(tracks); + boolean primaryTrackSampleWritten = maybeWritePendingTrackSamples(tracks); + maybeWritePendingTrackSamples(editableVideoTracks); - if (newSamplesWritten && canWriteMoovAtStart) { + if (primaryTrackSampleWritten && canWriteMoovAtStart) { maybeWriteMoovAtStart(); } } diff --git a/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java b/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java index a0b9a7149a..d62b4af52e 100644 --- a/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java +++ b/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java @@ -384,7 +384,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -416,7 +418,7 @@ public class Mp4MuxerEndToEndTest { DumpableMp4Box outputFileDumpableBox = new DumpableMp4Box(ByteBuffer.wrap(TestUtil.getByteArrayFromFilePath(outputFilePath))); - // 1 track is written in the outer moov box and 2 tracks are writtin in the edvd.moov box. + // 1 track is written in the outer moov box and 2 tracks are written in the edvd.moov box. DumpFileAsserts.assertOutput( context, outputFileDumpableBox, @@ -474,7 +476,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -523,7 +527,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -564,6 +570,109 @@ public class Mp4MuxerEndToEndTest { MuxerTestUtil.getExpectedDumpFilePath("mp4_with_editable_video_tracks.mp4")); } + @Test + public void + writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_primaryVideoTracksMatchesExpected() + throws Exception { + String outputFilePath = temporaryFolder.newFile().getPath(); + Mp4Muxer muxer = + new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) + .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null)) + .build(); + + try { + muxer.addMetadataEntry( + new Mp4TimestampData( + /* creationTimestampSeconds= */ 1_000_000L, + /* modificationTimestampSeconds= */ 5_000_000L)); + TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT); + TrackToken sharpVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL) + .build()); + TrackToken depthLinearVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR) + .build()); + writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5); + } finally { + muxer.close(); + } + + FakeExtractorOutput primaryTracksOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), outputFilePath); + // The Mp4Extractor extracts primary tracks by default. + DumpFileAsserts.assertOutput( + context, + primaryTracksOutput, + MuxerTestUtil.getExpectedDumpFilePath( + "mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4")); + } + + @Test + public void + writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_editableVideoTracksMatchesExpected() + throws Exception { + String outputFilePath = temporaryFolder.newFile().getPath(); + Mp4Muxer muxer = + new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) + .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null)) + .build(); + + try { + muxer.addMetadataEntry( + new Mp4TimestampData( + /* creationTimestampSeconds= */ 1_000_000L, + /* modificationTimestampSeconds= */ 5_000_000L)); + TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT); + TrackToken sharpVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL) + .build()); + TrackToken depthLinearVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR) + .build()); + writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5); + } finally { + muxer.close(); + } + + FakeExtractorOutput editableTracksOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor( + new DefaultSubtitleParserFactory(), Mp4Extractor.FLAG_READ_EDITABLE_VIDEO_TRACKS), + outputFilePath); + DumpFileAsserts.assertOutput( + context, + editableTracksOutput, + MuxerTestUtil.getExpectedDumpFilePath( + "mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4")); + } + private static void writeFakeSamples(Mp4Muxer muxer, TrackToken trackToken, int sampleCount) throws Muxer.MuxerException { for (int i = 0; i < sampleCount; i++) { diff --git a/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump new file mode 100644 index 0000000000..396722fb5d --- /dev/null +++ b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump @@ -0,0 +1,91 @@ +seekMap: + isSeekable = true + duration = 0 + getPosition(0) = [[timeUs=0, position=400332]] + getPosition(1) = [[timeUs=0, position=400556]] + getPosition(0) = [[timeUs=0, position=400332]] + getPosition(0) = [[timeUs=0, position=400332]] +numberOfTracks = 2 +track 0: + total output bytes = 280 + sample count = 5 + format 0: + id = 1 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + roleFlags = [auxiliary] + auxiliaryTrackType = original + metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +track 1: + total output bytes = 280 + sample count = 5 + format 0: + id = 2 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + roleFlags = [auxiliary] + auxiliaryTrackType = depth-linear + metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump new file mode 100644 index 0000000000..87f986254c --- /dev/null +++ b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump @@ -0,0 +1,48 @@ +seekMap: + isSeekable = true + duration = 0 + getPosition(0) = [[timeUs=0, position=400052]] + getPosition(1) = [[timeUs=0, position=400276]] + getPosition(0) = [[timeUs=0, position=400052]] + getPosition(0) = [[timeUs=0, position=400052]] +numberOfTracks = 1 +track 0: + total output bytes = 280 + sample count = 5 + format 0: + id = 1 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + metadata = entries=[mdta: key=editable.tracks.length, value=1493, mdta: key=editable.tracks.offset, value=400892, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +tracksEnded = true