diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java index 45e1607c91..e5680edbc6 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Muxer.java @@ -121,14 +121,21 @@ public final class Mp4Muxer implements Muxer { String getCacheFilePath(); } - public final CacheFileProvider cacheFileProvider; + public final boolean shouldInterleaveSamples; + @Nullable public final CacheFileProvider cacheFileProvider; /** * Creates an instance. * - * @param cacheFileProvider A {@link CacheFileProvider}. + * @param shouldInterleaveSamples Whether to interleave editable video track samples with + * primary track samples. + * @param cacheFileProvider A {@link CacheFileProvider}. Required only when {@code + * shouldInterleaveSamples} is set to {@code false}, can be {@code null} otherwise. */ - public EditableVideoParameters(CacheFileProvider cacheFileProvider) { + public EditableVideoParameters( + boolean shouldInterleaveSamples, @Nullable CacheFileProvider cacheFileProvider) { + checkArgument(shouldInterleaveSamples || cacheFileProvider != null); + this.shouldInterleaveSamples = shouldInterleaveSamples; this.cacheFileProvider = cacheFileProvider; } } @@ -373,6 +380,10 @@ public final class Mp4Muxer implements Muxer { */ public TrackToken addTrack(int sortKey, Format format) throws MuxerException { if (outputFileFormat == FILE_FORMAT_EDITABLE_VIDEO && isEditableVideoTrack(format)) { + if (checkNotNull(editableVideoParameters).shouldInterleaveSamples) { + // Editable video tracks are handled by the primary Mp4Writer. + return mp4Writer.addEditableVideoTrack(sortKey, format); + } try { ensureSetupForEditableVideoTracks(); } catch (FileNotFoundException e) { @@ -484,7 +495,8 @@ public final class Mp4Muxer implements Muxer { @EnsuresNonNull({"editableVideoMp4Writer"}) private void ensureSetupForEditableVideoTracks() throws FileNotFoundException { if (editableVideoMp4Writer == null) { - cacheFilePath = checkNotNull(editableVideoParameters).cacheFileProvider.getCacheFilePath(); + cacheFilePath = + checkNotNull(checkNotNull(editableVideoParameters).cacheFileProvider).getCacheFilePath(); cacheFileOutputStream = new FileOutputStream(cacheFilePath); editableVideoMetadataCollector = new MetadataCollector(); editableVideoMp4Writer = diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java index 62f461bed5..4245e9acb4 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Mp4Writer.java @@ -20,12 +20,17 @@ import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.muxer.AnnexBUtils.doesSampleContainAnnexBNalUnits; import static androidx.media3.muxer.Boxes.BOX_HEADER_SIZE; import static androidx.media3.muxer.Boxes.LARGE_SIZE_BOX_HEADER_SIZE; +import static androidx.media3.muxer.Boxes.getEdvdBoxHeader; +import static androidx.media3.muxer.MuxerUtil.getEditableTracksLengthMetadata; +import static androidx.media3.muxer.MuxerUtil.getEditableTracksOffsetMetadata; +import static androidx.media3.muxer.MuxerUtil.populateEditableVideoTracksMetadata; import static java.lang.Math.max; import static java.lang.Math.min; import android.media.MediaCodec.BufferInfo; import androidx.media3.common.Format; import androidx.media3.common.util.Util; +import androidx.media3.container.MdtaMetadataEntry; import com.google.common.collect.Range; import java.io.IOException; import java.nio.ByteBuffer; @@ -47,6 +52,7 @@ import java.util.concurrent.atomic.AtomicBoolean; private final @Mp4Muxer.LastFrameDurationBehavior int lastFrameDurationBehavior; private final boolean sampleCopyEnabled; private final List tracks; + private final List editableVideoTracks; private final AtomicBoolean hasWrittenSamples; // Stores location of the space reserved for the moov box at the beginning of the file (after ftyp @@ -89,6 +95,7 @@ import java.util.concurrent.atomic.AtomicBoolean; this.lastFrameDurationBehavior = lastFrameDurationBehavior; this.sampleCopyEnabled = sampleCopyEnabled; tracks = new ArrayList<>(); + editableVideoTracks = new ArrayList<>(); hasWrittenSamples = new AtomicBoolean(false); canWriteMoovAtStart = attemptStreamableOutputEnabled; lastMoovWritten = Range.closed(0L, 0L); @@ -108,6 +115,22 @@ import java.util.concurrent.atomic.AtomicBoolean; return track; } + /** + * Adds an editable video track of the given {@link Format}. + * + *

See {@link MuxerUtil#isEditableVideoTrack(Format)} for editable video tracks. + * + * @param sortKey The key used for sorting the track list. + * @param format The {@link Format} for the track. + * @return A unique {@link Track}. It should be used in {@link #writeSampleData}. + */ + public Track addEditableVideoTrack(int sortKey, Format format) { + Track track = new Track(format, sortKey, sampleCopyEnabled); + editableVideoTracks.add(track); + Collections.sort(editableVideoTracks, (a, b) -> Integer.compare(a.sortKey, b.sortKey)); + return track; + } + /** * Writes encoded sample data. * @@ -132,11 +155,60 @@ import java.util.concurrent.atomic.AtomicBoolean; for (int i = 0; i < tracks.size(); i++) { writePendingTrackSamples(tracks.get(i)); } + for (int i = 0; i < editableVideoTracks.size(); i++) { + writePendingTrackSamples(editableVideoTracks.get(i)); + } // Leave the file empty if no samples are written. - if (hasWrittenSamples.get()) { - finalizeMoovBox(); + if (!hasWrittenSamples.get()) { + return; } + + finalizeMoovBox(); + + if (!editableVideoTracks.isEmpty()) { + writeEdvdBox(); + } + } + + private void writeEdvdBox() throws IOException { + // The exact offset is known after writing primary track data. + MdtaMetadataEntry placeholderEditableTrackOffset = + getEditableTracksOffsetMetadata(/* offset= */ 0L); + metadataCollector.addMetadata(placeholderEditableTrackOffset); + ByteBuffer edvdBox = getEdvdBox(); + metadataCollector.addMetadata(getEditableTracksLengthMetadata(edvdBox.remaining())); + finalizeMoovBox(); + // Once final moov is written, update the actual offset. + metadataCollector.removeMdtaMetadataEntry(placeholderEditableTrackOffset); + metadataCollector.addMetadata(getEditableTracksOffsetMetadata(outputFileChannel.size())); + long fileSizeBefore = outputFileChannel.size(); + finalizeMoovBox(); + checkState(fileSizeBefore == outputFileChannel.size()); + // After writing primary track data, write the edvd box. + outputFileChannel.position(outputFileChannel.size()); + outputFileChannel.write(edvdBox); + } + + private ByteBuffer getEdvdBox() { + // The edvd box will have one ftyp and one moov box. + ByteBuffer ftypBox = Boxes.ftyp(); + MetadataCollector editableVideoMetadataCollector = new MetadataCollector(); + populateEditableVideoTracksMetadata( + editableVideoMetadataCollector, + metadataCollector.timestampData, + /* samplesInterleaved= */ true, + editableVideoTracks); + ByteBuffer moovBox = + Mp4MoovStructure.moov( + editableVideoTracks, + editableVideoMetadataCollector, + findMinimumPresentationTimestampUsAcrossTracks(editableVideoTracks), + /* isFragmentedMp4= */ false, + lastFrameDurationBehavior); + ByteBuffer edvdBoxHeader = + getEdvdBoxHeader(/* payloadSize= */ ftypBox.remaining() + moovBox.remaining()); + return BoxUtils.concatenateBuffers(edvdBoxHeader, ftypBox, moovBox); } /** @@ -435,9 +507,10 @@ import java.util.concurrent.atomic.AtomicBoolean; } private void doInterleave() throws IOException { - boolean newSamplesWritten = maybeWritePendingTrackSamples(tracks); + boolean primaryTrackSampleWritten = maybeWritePendingTrackSamples(tracks); + maybeWritePendingTrackSamples(editableVideoTracks); - if (newSamplesWritten && canWriteMoovAtStart) { + if (primaryTrackSampleWritten && canWriteMoovAtStart) { maybeWriteMoovAtStart(); } } diff --git a/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java b/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java index a0b9a7149a..d62b4af52e 100644 --- a/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java +++ b/libraries/muxer/src/test/java/androidx/media3/muxer/Mp4MuxerEndToEndTest.java @@ -384,7 +384,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -416,7 +418,7 @@ public class Mp4MuxerEndToEndTest { DumpableMp4Box outputFileDumpableBox = new DumpableMp4Box(ByteBuffer.wrap(TestUtil.getByteArrayFromFilePath(outputFilePath))); - // 1 track is written in the outer moov box and 2 tracks are writtin in the edvd.moov box. + // 1 track is written in the outer moov box and 2 tracks are written in the edvd.moov box. DumpFileAsserts.assertOutput( context, outputFileDumpableBox, @@ -474,7 +476,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -523,7 +527,9 @@ public class Mp4MuxerEndToEndTest { Mp4Muxer muxer = new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) - .setEditableVideoParameters(new Mp4Muxer.EditableVideoParameters(() -> cacheFilePath)) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ false, () -> cacheFilePath)) .build(); try { @@ -564,6 +570,109 @@ public class Mp4MuxerEndToEndTest { MuxerTestUtil.getExpectedDumpFilePath("mp4_with_editable_video_tracks.mp4")); } + @Test + public void + writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_primaryVideoTracksMatchesExpected() + throws Exception { + String outputFilePath = temporaryFolder.newFile().getPath(); + Mp4Muxer muxer = + new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) + .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null)) + .build(); + + try { + muxer.addMetadataEntry( + new Mp4TimestampData( + /* creationTimestampSeconds= */ 1_000_000L, + /* modificationTimestampSeconds= */ 5_000_000L)); + TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT); + TrackToken sharpVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL) + .build()); + TrackToken depthLinearVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR) + .build()); + writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5); + } finally { + muxer.close(); + } + + FakeExtractorOutput primaryTracksOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor(new DefaultSubtitleParserFactory()), outputFilePath); + // The Mp4Extractor extracts primary tracks by default. + DumpFileAsserts.assertOutput( + context, + primaryTracksOutput, + MuxerTestUtil.getExpectedDumpFilePath( + "mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4")); + } + + @Test + public void + writeMp4File_withFileFormatEditableVideoAndEditableVideoTracksAndShouldInterleaveSamples_editableVideoTracksMatchesExpected() + throws Exception { + String outputFilePath = temporaryFolder.newFile().getPath(); + Mp4Muxer muxer = + new Mp4Muxer.Builder(new FileOutputStream(outputFilePath)) + .setOutputFileFormat(Mp4Muxer.FILE_FORMAT_EDITABLE_VIDEO) + .setEditableVideoParameters( + new Mp4Muxer.EditableVideoParameters( + /* shouldInterleaveSamples= */ true, /* cacheFileProvider= */ null)) + .build(); + + try { + muxer.addMetadataEntry( + new Mp4TimestampData( + /* creationTimestampSeconds= */ 1_000_000L, + /* modificationTimestampSeconds= */ 5_000_000L)); + TrackToken primaryVideoTrackToken = muxer.addTrack(FAKE_VIDEO_FORMAT); + TrackToken sharpVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_ORIGINAL) + .build()); + TrackToken depthLinearVideoTrackToken = + muxer.addTrack( + FAKE_VIDEO_FORMAT + .buildUpon() + .setRoleFlags(C.ROLE_FLAG_AUXILIARY) + .setAuxiliaryTrackType(C.AUXILIARY_TRACK_TYPE_DEPTH_LINEAR) + .build()); + writeFakeSamples(muxer, primaryVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, sharpVideoTrackToken, /* sampleCount= */ 5); + writeFakeSamples(muxer, depthLinearVideoTrackToken, /* sampleCount= */ 5); + } finally { + muxer.close(); + } + + FakeExtractorOutput editableTracksOutput = + TestUtil.extractAllSamplesFromFilePath( + new Mp4Extractor( + new DefaultSubtitleParserFactory(), Mp4Extractor.FLAG_READ_EDITABLE_VIDEO_TRACKS), + outputFilePath); + DumpFileAsserts.assertOutput( + context, + editableTracksOutput, + MuxerTestUtil.getExpectedDumpFilePath( + "mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4")); + } + private static void writeFakeSamples(Mp4Muxer muxer, TrackToken trackToken, int sampleCount) throws Muxer.MuxerException { for (int i = 0; i < sampleCount; i++) { diff --git a/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump new file mode 100644 index 0000000000..396722fb5d --- /dev/null +++ b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_editable_video_tracks_when_editable_track_samples_interleaved.mp4.dump @@ -0,0 +1,91 @@ +seekMap: + isSeekable = true + duration = 0 + getPosition(0) = [[timeUs=0, position=400332]] + getPosition(1) = [[timeUs=0, position=400556]] + getPosition(0) = [[timeUs=0, position=400332]] + getPosition(0) = [[timeUs=0, position=400332]] +numberOfTracks = 2 +track 0: + total output bytes = 280 + sample count = 5 + format 0: + id = 1 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + roleFlags = [auxiliary] + auxiliaryTrackType = original + metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +track 1: + total output bytes = 280 + sample count = 5 + format 0: + id = 2 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + roleFlags = [auxiliary] + auxiliaryTrackType = depth-linear + metadata = entries=[mdta: key=editable.tracks.map, value=track types = 0,1, mdta: key=editable.tracks.samples.location, value=1, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +tracksEnded = true diff --git a/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump new file mode 100644 index 0000000000..87f986254c --- /dev/null +++ b/libraries/test_data/src/test/assets/muxerdumps/mp4_with_primary_tracks_when_editable_track_samples_interleaved.mp4.dump @@ -0,0 +1,48 @@ +seekMap: + isSeekable = true + duration = 0 + getPosition(0) = [[timeUs=0, position=400052]] + getPosition(1) = [[timeUs=0, position=400276]] + getPosition(0) = [[timeUs=0, position=400052]] + getPosition(0) = [[timeUs=0, position=400052]] +numberOfTracks = 1 +track 0: + total output bytes = 280 + sample count = 5 + format 0: + id = 1 + sampleMimeType = video/avc + codecs = avc1.F4000A + maxInputSize = 86 + maxNumReorderSamples = 2 + width = 12 + height = 10 + colorInfo: + colorRange = 1 + lumaBitdepth = 8 + chromaBitdepth = 8 + metadata = entries=[mdta: key=editable.tracks.length, value=1493, mdta: key=editable.tracks.offset, value=400892, Mp4Timestamp: creation time=1000000, modification time=5000000, timescale=10000] + initializationData: + data = length 28, hash 410B510 + data = length 9, hash FBADD682 + sample 0: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 1: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 2: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 3: + time = 0 + flags = 1 + data = length 56, hash C4551A2E + sample 4: + time = 0 + flags = 536870913 + data = length 56, hash C4551A2E +tracksEnded = true