Transformer: Add api to drop audio samples before the first video frame

fix for Issue: androidx/media#829

Manual Testing: Viewed the transformer output file of previously problematic case in Exoplayer, Chrome, VLC, Quicktime and Safari and all showed the issue not to occur anymore. The newly produced output file can be found at https://github.com/androidx/media/assets/42352357/fdf105c1-9550-422f-b088-7900f655ac78

PiperOrigin-RevId: 593104752
This commit is contained in:
tofunmi 2023-12-22 06:15:32 -08:00 committed by Copybara-Service
parent a496bbd777
commit 1632f37d70
6 changed files with 212 additions and 21 deletions

View file

@ -49,6 +49,11 @@
* Increase transmuxing speed, especially for 'remove video' edits.
* Work around an issue where the encoder would throw at configuration time
due to setting a high operating rate.
* Add api to ensure that the output file starts on a video frame. This can
make the output of trimming operations more compatible with player
implementations that don't show the first video frame until its
presentation timestamp
([#829](https://github.com/androidx/media/issues/829)).
* Track Selection:
* Add `DefaultTrackSelector.selectImageTrack` to enable image track
selection.

View file

@ -0,0 +1,45 @@
format audio:
sampleMimeType = audio/mp4a-latm
channelCount = 2
sampleRate = 40000
format video:
sampleMimeType = video/avc
width = 1080
height = 720
colorInfo:
colorSpace = 1
colorRange = 2
colorTransfer = 3
initializationData:
data = length 4, hash E93C3
sample:
trackType = audio
dataHashCode = 955331
size = 4
isKeyFrame = true
presentationTimeUs = 10
sample:
trackType = audio
dataHashCode = 955331
size = 4
isKeyFrame = true
presentationTimeUs = 12
sample:
trackType = audio
dataHashCode = 955331
size = 4
isKeyFrame = true
presentationTimeUs = 17
sample:
trackType = video
dataHashCode = 955331
size = 4
isKeyFrame = true
presentationTimeUs = 10
sample:
trackType = video
dataHashCode = 955331
size = 4
isKeyFrame = true
presentationTimeUs = 15
released = false

View file

@ -19,6 +19,7 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Util.areEqual;
import static androidx.media3.common.util.Util.contains;
import static androidx.media3.common.util.Util.usToMs;
@ -47,7 +48,6 @@ import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import org.checkerframework.checker.nullness.qual.EnsuresNonNull;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/**
* A wrapper around a media muxer.
@ -99,6 +99,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
private final String outputPath;
private final Muxer.Factory muxerFactory;
private final Listener listener;
private final boolean dropSamplesBeforeFirstVideoSample;
private final SparseArray<TrackInfo> trackTypeToInfo;
private final ScheduledExecutorService abortScheduledExecutorService;
@ -113,6 +114,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
private @MuxerMode int muxerMode;
private boolean muxedPartialVideo;
private boolean muxedPartialAudio;
private long firstVideoPresentationTimeUs;
private volatile int additionalRotationDegrees;
private volatile int trackCount;
@ -125,16 +127,24 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
* @param listener A {@link MuxerWrapper.Listener}.
* @param muxerMode The {@link MuxerMode}. The initial mode must be {@link #MUXER_MODE_DEFAULT} or
* {@link #MUXER_MODE_MUX_PARTIAL}.
* @param dropSamplesBeforeFirstVideoSample Whether to drop any non-video samples with
* presentation timestamps before the first video sample.
*/
public MuxerWrapper(
String outputPath, Muxer.Factory muxerFactory, Listener listener, @MuxerMode int muxerMode) {
String outputPath,
Muxer.Factory muxerFactory,
Listener listener,
@MuxerMode int muxerMode,
boolean dropSamplesBeforeFirstVideoSample) {
this.outputPath = outputPath;
this.muxerFactory = muxerFactory;
this.listener = listener;
checkArgument(muxerMode == MUXER_MODE_DEFAULT || muxerMode == MUXER_MODE_MUX_PARTIAL);
this.muxerMode = muxerMode;
this.dropSamplesBeforeFirstVideoSample = dropSamplesBeforeFirstVideoSample;
trackTypeToInfo = new SparseArray<>();
previousTrackType = C.TRACK_TYPE_NONE;
firstVideoPresentationTimeUs = C.TIME_UNSET;
abortScheduledExecutorService = Util.newSingleThreadScheduledExecutor(TIMER_THREAD_NAME);
}
@ -256,7 +266,6 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
checkArgument(existingFormat.sampleRate == format.sampleRate);
checkArgument(existingFormat.initializationDataEquals(format));
}
checkNotNull(muxer);
resetAbortTimer();
return;
}
@ -308,7 +317,8 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
* @param data The sample to write.
* @param isKeyFrame Whether the sample is a key frame.
* @param presentationTimeUs The presentation time of the sample in microseconds.
* @return Whether the sample was successfully written. {@code false} if samples of other
* @return Whether the sample was successfully written, or dropped if configured to drop the
* sample via {@code dropSamplesBeforeFirstVideoSample}. {@code false} if samples of other
* {@linkplain C.TrackType track types} should be written first to ensure the files track
* interleaving is balanced, or if the muxer hasn't {@linkplain #addTrackFormat(Format)
* received a format} for every {@linkplain #setTrackCount(int) track}.
@ -328,12 +338,22 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
presentationTimeUs,
/* extraFormat= */ "%s",
/* extraArgs...= */ canWriteSample);
if (firstVideoPresentationTimeUs == C.TIME_UNSET) {
firstVideoPresentationTimeUs = presentationTimeUs;
}
} else if (trackType == C.TRACK_TYPE_AUDIO) {
DebugTraceUtil.logEvent(
DebugTraceUtil.EVENT_MUXER_CAN_WRITE_SAMPLE_AUDIO,
presentationTimeUs,
/* extraFormat= */ "%s",
/* extraArgs...= */ canWriteSample);
if (dropSamplesBeforeFirstVideoSample
&& firstVideoPresentationTimeUs != C.TIME_UNSET
&& presentationTimeUs < firstVideoPresentationTimeUs) {
// Drop the buffer.
resetAbortTimer();
return true;
}
}
if (!canWriteSample) {
return false;
@ -343,8 +363,8 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
trackInfo.bytesWritten += data.remaining();
trackInfo.timeUs = max(trackInfo.timeUs, presentationTimeUs);
checkNotNull(muxer);
resetAbortTimer();
checkStateNotNull(muxer);
muxer.writeSampleData(
trackInfo.index, data, presentationTimeUs, isKeyFrame ? C.BUFFER_FLAG_KEY_FRAME : 0);
if (trackType == C.TRACK_TYPE_VIDEO) {
@ -442,6 +462,12 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
}
private boolean canWriteSample(@C.TrackType int trackType, long presentationTimeUs) {
if (dropSamplesBeforeFirstVideoSample
&& trackType != C.TRACK_TYPE_VIDEO
&& firstVideoPresentationTimeUs == C.TIME_UNSET) {
// Haven't received the first video sample yet, so can't write any audio.
return false;
}
if (!isReady) {
return false;
}
@ -462,8 +488,8 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull;
return presentationTimeUs - minTrackTimeUs <= MAX_TRACK_WRITE_AHEAD_US;
}
@RequiresNonNull("muxer")
private void resetAbortTimer() {
checkStateNotNull(muxer);
long maxDelayBetweenSamplesMs = muxer.getMaxDelayBetweenSamplesMs();
if (maxDelayBetweenSamplesMs == C.TIME_UNSET) {
return;

View file

@ -108,6 +108,7 @@ public final class Transformer {
private boolean removeVideo;
private boolean flattenForSlowMotion;
private boolean trimOptimizationEnabled;
private boolean fileStartsOnVideoFrameEnabled;
private ListenerSet<Transformer.Listener> listeners;
private AssetLoader.@MonotonicNonNull Factory assetLoaderFactory;
private AudioMixer.Factory audioMixerFactory;
@ -148,6 +149,7 @@ public final class Transformer {
this.removeAudio = transformer.removeAudio;
this.removeVideo = transformer.removeVideo;
this.trimOptimizationEnabled = transformer.trimOptimizationEnabled;
this.fileStartsOnVideoFrameEnabled = transformer.fileStartsOnVideoFrameEnabled;
this.listeners = transformer.listeners;
this.assetLoaderFactory = transformer.assetLoaderFactory;
this.audioMixerFactory = transformer.audioMixerFactory;
@ -323,6 +325,25 @@ public final class Transformer {
return this;
}
/**
* Set whether to ensure that the output file starts on a video frame.
*
* <p>Any audio samples that are earlier than the first video frame will be dropped. This can
* make the output of trimming operations more compatible with player implementations that don't
* show the first video frame until its presentation timestamp.
*
* <p>Ignored when {@linkplain #experimentalSetTrimOptimizationEnabled trim optimization} is
* set.
*
* @param enabled Whether to ensure that the file starts on a video frame.
* @return This builder.
*/
@CanIgnoreReturnValue
public Builder setEnsureFileStartsOnVideoFrameEnabled(boolean enabled) {
fileStartsOnVideoFrameEnabled = enabled;
return this;
}
/**
* @deprecated Use {@link #addListener(Listener)}, {@link #removeListener(Listener)} or {@link
* #removeAllListeners()} instead.
@ -539,6 +560,7 @@ public final class Transformer {
removeVideo,
flattenForSlowMotion,
trimOptimizationEnabled,
fileStartsOnVideoFrameEnabled,
listeners,
assetLoaderFactory,
audioMixerFactory,
@ -734,6 +756,7 @@ public final class Transformer {
private final boolean removeVideo;
private final boolean flattenForSlowMotion;
private final boolean trimOptimizationEnabled;
private final boolean fileStartsOnVideoFrameEnabled;
private final ListenerSet<Transformer.Listener> listeners;
@Nullable private final AssetLoader.Factory assetLoaderFactory;
private final AudioMixer.Factory audioMixerFactory;
@ -769,6 +792,7 @@ public final class Transformer {
boolean removeVideo,
boolean flattenForSlowMotion,
boolean trimOptimizationEnabled,
boolean fileStartsOnVideoFrameEnabled,
ListenerSet<Listener> listeners,
@Nullable AssetLoader.Factory assetLoaderFactory,
AudioMixer.Factory audioMixerFactory,
@ -787,6 +811,7 @@ public final class Transformer {
this.removeVideo = removeVideo;
this.flattenForSlowMotion = flattenForSlowMotion;
this.trimOptimizationEnabled = trimOptimizationEnabled;
this.fileStartsOnVideoFrameEnabled = fileStartsOnVideoFrameEnabled;
this.listeners = listeners;
this.assetLoaderFactory = assetLoaderFactory;
this.audioMixerFactory = audioMixerFactory;
@ -926,7 +951,12 @@ public final class Transformer {
if (!trimOptimizationEnabled || isMultiAsset()) {
startInternal(
composition,
new MuxerWrapper(path, muxerFactory, componentListener, MuxerWrapper.MUXER_MODE_DEFAULT),
new MuxerWrapper(
path,
muxerFactory,
componentListener,
MuxerWrapper.MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ fileStartsOnVideoFrameEnabled),
componentListener,
/* initialTimestampOffsetUs= */ 0);
} else {
@ -1114,7 +1144,8 @@ public final class Transformer {
checkNotNull(outputFilePath),
muxerFactory,
componentListener,
MuxerWrapper.MUXER_MODE_DEFAULT),
MuxerWrapper.MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ false),
componentListener,
/* initialTimestampOffsetUs= */ 0);
}
@ -1144,7 +1175,8 @@ public final class Transformer {
checkNotNull(outputFilePath),
muxerFactory,
componentListener,
MuxerWrapper.MUXER_MODE_MUX_PARTIAL);
MuxerWrapper.MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
startInternal(
TransmuxTranscodeHelper.createVideoOnlyComposition(
@ -1193,7 +1225,8 @@ public final class Transformer {
checkNotNull(oldFilePath),
muxerFactory,
componentListener,
MuxerWrapper.MUXER_MODE_DEFAULT),
MuxerWrapper.MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ false),
componentListener,
/* initialTimestampOffsetUs= */ 0);
}
@ -1273,7 +1306,8 @@ public final class Transformer {
checkNotNull(outputFilePath),
muxerFactory,
componentListener,
MuxerWrapper.MUXER_MODE_MUX_PARTIAL);
MuxerWrapper.MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
if (shouldTranscodeVideo(
checkNotNull(mp4MetadataInfo.videoFormat),
composition,

View file

@ -67,7 +67,8 @@ public final class EncodedSampleExporterTest {
/* outputPath= */ "unused",
new InAppMuxer.Factory(),
mock(MuxerWrapper.Listener.class),
MuxerWrapper.MUXER_MODE_DEFAULT),
MuxerWrapper.MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ false),
fallbackListener,
/* initialTimestampOffsetUs= */ 0);
}

View file

@ -17,14 +17,19 @@ package androidx.media3.transformer;
import static androidx.media3.common.MimeTypes.AUDIO_AAC;
import static androidx.media3.common.MimeTypes.VIDEO_H264;
import static androidx.media3.transformer.MuxerWrapper.MUXER_MODE_DEFAULT;
import static androidx.media3.transformer.MuxerWrapper.MUXER_MODE_MUX_PARTIAL;
import static androidx.media3.transformer.TestUtil.getDumpFileName;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import android.content.Context;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.ColorInfo;
import androidx.media3.common.Format;
import androidx.media3.test.utils.DumpFileAsserts;
import androidx.test.core.app.ApplicationProvider;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import com.google.common.collect.ImmutableList;
import java.nio.ByteBuffer;
@ -32,6 +37,7 @@ import org.junit.After;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.TestName;
import org.junit.runner.RunWith;
/** Unit tests for {@link MuxerWrapper}. */
@ -55,13 +61,14 @@ public class MuxerWrapperTest {
private static final ByteBuffer FAKE_SAMPLE = ByteBuffer.wrap(new byte[] {1, 2, 3, 4});
@Rule public final TemporaryFolder temporaryFolder = new TemporaryFolder();
@Rule public final TestName testName = new TestName();
@Nullable private MuxerWrapper muxerWrapper;
@After
public void tearDown() throws Muxer.MuxerException {
if (muxerWrapper != null) {
muxerWrapper.release(false);
muxerWrapper.release(/* forCancellation= */ false);
}
}
@ -72,7 +79,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MuxerWrapper.MUXER_MODE_DEFAULT);
MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ false);
assertThrows(IllegalStateException.class, muxerWrapper::changeToAppendMode);
}
@ -84,7 +92,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
@ -104,7 +113,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_AUDIO_TRACK_FORMAT);
muxerWrapper.writeSample(
@ -123,7 +133,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
muxerWrapper.writeSample(
@ -144,7 +155,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_AUDIO_TRACK_FORMAT);
muxerWrapper.writeSample(
@ -158,6 +170,71 @@ public class MuxerWrapperTest {
IllegalArgumentException.class, () -> muxerWrapper.addTrackFormat(differentAudioFormat));
}
@Test
public void
writeSample_dropSamplesBeforeFirstVideoSampleEnabled_rejectsAudioSamplesReceivedBeforeFirstVideoSample()
throws Exception {
muxerWrapper =
new MuxerWrapper(
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ true);
muxerWrapper.setTrackCount(2);
muxerWrapper.addTrackFormat(FAKE_AUDIO_TRACK_FORMAT);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
assertThat(
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO,
FAKE_SAMPLE,
/* isKeyFrame= */ true,
/* presentationTimeUs= */ 0))
.isFalse();
}
@Test
public void
writeSample_dropSamplesBeforeFirstVideoSampleEnabled_dropsAudioSamplesTimedBeforeFirstVideoSample()
throws Exception {
String testId = testName.getMethodName();
Context context = ApplicationProvider.getApplicationContext();
CapturingMuxer.Factory muxerFactory = new CapturingMuxer.Factory();
muxerWrapper =
new MuxerWrapper(
temporaryFolder.newFile().getPath(),
muxerFactory,
new NoOpMuxerListenerImpl(),
MUXER_MODE_DEFAULT,
/* dropSamplesBeforeFirstVideoSample= */ true);
muxerWrapper.setTrackCount(2);
muxerWrapper.addTrackFormat(FAKE_AUDIO_TRACK_FORMAT);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 0);
muxerWrapper.writeSample(
C.TRACK_TYPE_VIDEO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 10);
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 5);
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 10);
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 12);
muxerWrapper.writeSample(
C.TRACK_TYPE_VIDEO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 15);
muxerWrapper.writeSample(
C.TRACK_TYPE_AUDIO, FAKE_SAMPLE, /* isKeyFrame= */ true, /* presentationTimeUs= */ 17);
muxerWrapper.endTrack(C.TRACK_TYPE_AUDIO);
muxerWrapper.endTrack(C.TRACK_TYPE_VIDEO);
DumpFileAsserts.assertOutput(
context,
muxerFactory.getCreatedMuxer(),
getDumpFileName(/* originalFileName= */ "testspecificdumps/" + testId));
}
@Test
public void isEnded_afterPartialVideoMuxed_returnsTrue() throws Exception {
muxerWrapper =
@ -165,7 +242,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
muxerWrapper.writeSample(
@ -182,7 +260,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(2);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
@ -207,7 +286,8 @@ public class MuxerWrapperTest {
temporaryFolder.newFile().getPath(),
new DefaultMuxer.Factory(),
new NoOpMuxerListenerImpl(),
MUXER_MODE_MUX_PARTIAL);
MUXER_MODE_MUX_PARTIAL,
/* dropSamplesBeforeFirstVideoSample= */ false);
muxerWrapper.setTrackCount(1);
muxerWrapper.addTrackFormat(FAKE_VIDEO_TRACK_FORMAT);
muxerWrapper.writeSample(