From b25d00a7950802a01ed7f42e48445d6ead0a0735 Mon Sep 17 00:00:00 2001 From: andrewlewis Date: Mon, 30 May 2022 11:04:24 +0000 Subject: [PATCH] Retain stream time offsets through codecs ExoPlayer applies a large time offset to buffers so that, if the input has negative timestamps, generally buffers seen by the decoders should have positive timestamps. Modify how the offset is handled in `Transformer` so that decoders and encoders generally see positive timestamps, by leaving the offset on samples when reading them in the base renderer (remove the code that removed the offset), and then removing the offset when muxing. Also update the frame processor chain and slow motion flattening code to retain the existing behavior after this change (these both need original media presentation timestamps) Tested via existing end-to-end tests and manually verified that the overlay frame processor shows the expected original media timestamps. Aside: we don't need the same logic as ExoPlayer to track stream offsets across the decoder yet, because we don't try to handle stream changes during playback in single asset editing. (There is an edge case of multi-period DASH that may not work but I doubt anyone will use that as input to `Transformer` before we change the code to handle multi-asset properly.) In future we should try to refactor interaction with the decoder to use the same code for Transformer and ExoPlayer. PiperOrigin-RevId: 451846055 --- .../FrameProcessorChainPixelTest.java | 1 + .../transformer/FrameProcessorChainTest.java | 1 + .../AudioTranscodingSamplePipeline.java | 4 +++ .../transformer/FrameProcessorChain.java | 18 ++++++++-- .../PassthroughSamplePipeline.java | 4 --- .../transformer/SefSlowMotionFlattener.java | 36 +++++++++++++------ .../transformer/TransformerAudioRenderer.java | 4 +-- .../transformer/TransformerBaseRenderer.java | 10 +++--- .../transformer/TransformerVideoRenderer.java | 16 ++++++--- .../VideoTranscodingSamplePipeline.java | 7 ++-- 10 files changed, 69 insertions(+), 32 deletions(-) diff --git a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java index 912d4d7f8b..7786a7d1a1 100644 --- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java +++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainPixelTest.java @@ -309,6 +309,7 @@ public final class FrameProcessorChainPixelTest { pixelWidthHeightRatio, inputWidth, inputHeight, + /* streamOffsetUs= */ 0L, effects, /* enableExperimentalHdrEditing= */ false); Size outputSize = frameProcessorChain.getOutputSize(); diff --git a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java index 511593ea39..05920f2058 100644 --- a/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java +++ b/libraries/transformer/src/androidTest/java/androidx/media3/transformer/FrameProcessorChainTest.java @@ -130,6 +130,7 @@ public final class FrameProcessorChainTest { pixelWidthHeightRatio, inputSize.getWidth(), inputSize.getHeight(), + /* streamOffsetUs= */ 0L, effects.build(), /* enableExperimentalHdrEditing= */ false); } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java index 7ca38a3aed..deaa247f72 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/AudioTranscodingSamplePipeline.java @@ -56,6 +56,7 @@ import org.checkerframework.dataflow.qual.Pure; public AudioTranscodingSamplePipeline( Format inputFormat, + long streamOffsetUs, TransformationRequest transformationRequest, Codec.DecoderFactory decoderFactory, Codec.EncoderFactory encoderFactory, @@ -108,6 +109,9 @@ import org.checkerframework.dataflow.qual.Pure; fallbackListener.onTransformationRequestFinalized( createFallbackTransformationRequest( transformationRequest, requestedOutputFormat, encoder.getConfigurationFormat())); + + // Use the same stream offset as the input stream for encoder input buffers. + nextEncoderInputBufferTimeUs = streamOffsetUs; } @Override diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java b/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java index b2e13171b8..a1688077c7 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/FrameProcessorChain.java @@ -101,6 +101,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; float pixelWidthHeightRatio, int inputWidth, int inputHeight, + long streamOffsetUs, List effects, boolean enableExperimentalHdrEditing) throws FrameProcessingException { @@ -119,6 +120,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; pixelWidthHeightRatio, inputWidth, inputHeight, + streamOffsetUs, effects, enableExperimentalHdrEditing, singleThreadExecutorService)) @@ -145,6 +147,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; float pixelWidthHeightRatio, int inputWidth, int inputHeight, + long streamOffsetUs, List effects, boolean enableExperimentalHdrEditing, ExecutorService singleThreadExecutorService) @@ -190,6 +193,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; eglContext, singleThreadExecutorService, inputExternalTexId, + streamOffsetUs, framebuffers, textureProcessors, listener, @@ -252,6 +256,11 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; private final EGLContext eglContext; /** Some OpenGL commands may block, so all OpenGL commands are run on a background thread. */ private final ExecutorService singleThreadExecutorService; + /** + * Offset compared to original media presentation time that has been added to incoming frame + * timestamps, in microseconds. + */ + private final long streamOffsetUs; /** Futures corresponding to the executor service's pending tasks. */ private final ConcurrentLinkedQueue> futures; /** Number of frames {@linkplain #registerInputFrame() registered} but not fully processed. */ @@ -308,6 +317,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; EGLContext eglContext, ExecutorService singleThreadExecutorService, int inputExternalTexId, + long streamOffsetUs, int[] framebuffers, ImmutableList textureProcessors, Listener listener, @@ -317,6 +327,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; this.eglDisplay = eglDisplay; this.eglContext = eglContext; this.singleThreadExecutorService = singleThreadExecutorService; + this.streamOffsetUs = streamOffsetUs; this.framebuffers = framebuffers; this.textureProcessors = textureProcessors; this.listener = listener; @@ -476,8 +487,9 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; } inputSurfaceTexture.updateTexImage(); - long presentationTimeNs = inputSurfaceTexture.getTimestamp(); - presentationTimeUs = presentationTimeNs / 1000; + long inputFrameTimeNs = inputSurfaceTexture.getTimestamp(); + // Correct for the stream offset so processors see original media presentation timestamps. + presentationTimeUs = inputFrameTimeNs / 1000 - streamOffsetUs; inputSurfaceTexture.getTransformMatrix(textureTransformMatrix); ((ExternalTextureProcessor) textureProcessors.get(0)) .setTextureTransformMatrix(textureTransformMatrix); @@ -502,7 +514,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull; clearOutputFrame(); getLast(textureProcessors).drawFrame(presentationTimeUs); - EGLExt.eglPresentationTimeANDROID(eglDisplay, outputEglSurface, presentationTimeNs); + EGLExt.eglPresentationTimeANDROID(eglDisplay, outputEglSurface, inputFrameTimeNs); EGL14.eglSwapBuffers(eglDisplay, outputEglSurface); if (debugSurfaceViewWrapper != null) { diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java index a8bbc8f577..f2387ace36 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/PassthroughSamplePipeline.java @@ -25,17 +25,14 @@ import androidx.media3.decoder.DecoderInputBuffer; private final DecoderInputBuffer buffer; private final Format format; - private final long outputPresentationTimeOffsetUs; private boolean hasPendingBuffer; public PassthroughSamplePipeline( Format format, - long outputPresentationTimeOffsetUs, TransformationRequest transformationRequest, FallbackListener fallbackListener) { this.format = format; - this.outputPresentationTimeOffsetUs = outputPresentationTimeOffsetUs; buffer = new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT); hasPendingBuffer = false; fallbackListener.onTransformationRequestFinalized(transformationRequest); @@ -49,7 +46,6 @@ import androidx.media3.decoder.DecoderInputBuffer; @Override public void queueInputBuffer() { - buffer.timeUs -= outputPresentationTimeOffsetUs; hasPendingBuffer = true; } diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java b/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java index 8602733a93..5e99932323 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/SefSlowMotionFlattener.java @@ -28,7 +28,6 @@ import androidx.media3.common.Format; import androidx.media3.common.Metadata; import androidx.media3.common.MimeTypes; import androidx.media3.common.util.Util; -import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.extractor.metadata.mp4.SlowMotionData; import androidx.media3.extractor.metadata.mp4.SmtaMetadataEntry; import com.google.common.collect.ImmutableList; @@ -106,9 +105,15 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; * segments into account, in microseconds. */ private long frameTimeDeltaUs; + /** + * The presentation time for the last {@linkplain #dropOrTransformSample(ByteBuffer, long) + * processed sample}. + */ + private long lastSamplePresentationTimeUs; public SefSlowMotionFlattener(Format format) { scratch = new byte[NAL_START_CODE_LENGTH]; + lastSamplePresentationTimeUs = C.TIME_UNSET; MetadataInfo metadataInfo = getMetadataInfo(format.metadata); slowMotionData = metadataInfo.slowMotionData; List segments = @@ -132,36 +137,47 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; * Applies slow motion flattening by either indicating that the buffer's data should be dropped or * transforming it in place. * + *

After calling this method, call {@link #getSamplePresentationTimeUs()} to get the new + * presentation time for the sample (whether it was dropped or not). + * * @return Whether the buffer should be dropped. */ - @RequiresNonNull("#1.data") - public boolean dropOrTransformSample(DecoderInputBuffer buffer) { + public boolean dropOrTransformSample(ByteBuffer buffer, long bufferTimeUs) { if (slowMotionData == null) { // The input is not an SEF slow motion video. + lastSamplePresentationTimeUs = bufferTimeUs; return false; } - ByteBuffer data = buffer.data; - int originalPosition = data.position(); - data.position(originalPosition + NAL_START_CODE_LENGTH); - data.get(scratch, 0, 4); // Read nal_unit_header_svc_extension. + int originalPosition = buffer.position(); + buffer.position(originalPosition + NAL_START_CODE_LENGTH); + buffer.get(scratch, 0, 4); // Read nal_unit_header_svc_extension. int nalUnitType = scratch[0] & 0x1F; boolean svcExtensionFlag = ((scratch[1] & 0xFF) >> 7) == 1; checkState( nalUnitType == NAL_UNIT_TYPE_PREFIX && svcExtensionFlag, "Missing SVC extension prefix NAL unit."); int layer = (scratch[3] & 0xFF) >> 5; - boolean shouldKeepFrame = processCurrentFrame(layer, buffer.timeUs); + boolean shouldKeepFrame = processCurrentFrame(layer, bufferTimeUs); // Update the timestamp regardless of whether the buffer is dropped as the timestamp may be // reused for the empty end-of-stream buffer. - buffer.timeUs = getCurrentFrameOutputTimeUs(/* inputTimeUs= */ buffer.timeUs); + lastSamplePresentationTimeUs = getCurrentFrameOutputTimeUs(bufferTimeUs); if (shouldKeepFrame) { - data.position(originalPosition); + buffer.position(originalPosition); return false; } return true; } + /** + * Returns the new presentation time for the last sample handled via {@link + * #dropOrTransformSample(ByteBuffer, long)}. + */ + public long getSamplePresentationTimeUs() { + checkState(lastSamplePresentationTimeUs != C.TIME_UNSET); + return lastSamplePresentationTimeUs; + } + /** * Processes the current frame and returns whether it should be kept. * diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java index 271fc81ccd..8180b199cd 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerAudioRenderer.java @@ -70,12 +70,12 @@ import androidx.media3.extractor.metadata.mp4.SlowMotionData; Format inputFormat = checkNotNull(formatHolder.format); if (shouldPassthrough(inputFormat)) { samplePipeline = - new PassthroughSamplePipeline( - inputFormat, startPositionOffsetUs, transformationRequest, fallbackListener); + new PassthroughSamplePipeline(inputFormat, transformationRequest, fallbackListener); } else { samplePipeline = new AudioTranscodingSamplePipeline( inputFormat, + streamOffsetUs, transformationRequest, decoderFactory, encoderFactory, diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java index 2bc4cafac1..b1da9c5ca3 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerBaseRenderer.java @@ -45,7 +45,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; protected boolean muxerWrapperTrackAdded; protected boolean muxerWrapperTrackEnded; protected long streamOffsetUs; - protected long startPositionOffsetUs; + protected long streamStartPositionUs; protected @MonotonicNonNull SamplePipeline samplePipeline; public TransformerBaseRenderer( @@ -110,7 +110,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; @Override protected final void onStreamChanged(Format[] formats, long startPositionUs, long offsetUs) { this.streamOffsetUs = offsetUs; - this.startPositionOffsetUs = startPositionUs - offsetUs; + this.streamStartPositionUs = startPositionUs; } @Override @@ -178,11 +178,14 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; return false; } + long samplePresentationTimeUs = samplePipelineOutputBuffer.timeUs - streamStartPositionUs; + // TODO(b/204892224): Consider subtracting the first sample timestamp from the sample pipeline + // buffer from all samples so that they are guaranteed to start from zero in the output file. if (!muxerWrapper.writeSample( getTrackType(), checkStateNotNull(samplePipelineOutputBuffer.data), samplePipelineOutputBuffer.isKeyFrame(), - samplePipelineOutputBuffer.timeUs)) { + samplePresentationTimeUs)) { return false; } samplePipeline.releaseOutputBuffer(); @@ -212,7 +215,6 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; return false; } mediaClock.updateTimeForTrackType(getTrackType(), samplePipelineInputBuffer.timeUs); - samplePipelineInputBuffer.timeUs -= streamOffsetUs; checkStateNotNull(samplePipelineInputBuffer.data); maybeQueueSampleToPipeline(samplePipelineInputBuffer); return true; diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java index 0321934a56..086f0bfc17 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/TransformerVideoRenderer.java @@ -89,14 +89,13 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; Format inputFormat = checkNotNull(formatHolder.format); if (shouldPassthrough(inputFormat)) { samplePipeline = - new PassthroughSamplePipeline( - inputFormat, startPositionOffsetUs, transformationRequest, fallbackListener); + new PassthroughSamplePipeline(inputFormat, transformationRequest, fallbackListener); } else { samplePipeline = new VideoTranscodingSamplePipeline( context, inputFormat, - startPositionOffsetUs, + streamOffsetUs, transformationRequest, effects, decoderFactory, @@ -113,7 +112,7 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; } private boolean shouldPassthrough(Format inputFormat) { - if (startPositionOffsetUs != 0 && !clippingStartsAtKeyFrame) { + if ((streamStartPositionUs - streamOffsetUs) != 0 && !clippingStartsAtKeyFrame) { return false; } if (encoderFactory.videoNeedsEncoding()) { @@ -166,9 +165,16 @@ import org.checkerframework.checker.nullness.qual.RequiresNonNull; @RequiresNonNull({"samplePipeline", "#1.data"}) protected void maybeQueueSampleToPipeline(DecoderInputBuffer inputBuffer) throws TransformationException { + if (sefSlowMotionFlattener == null) { + samplePipeline.queueInputBuffer(); + return; + } + ByteBuffer data = inputBuffer.data; + long presentationTimeUs = inputBuffer.timeUs - streamOffsetUs; boolean shouldDropSample = - sefSlowMotionFlattener != null && sefSlowMotionFlattener.dropOrTransformSample(inputBuffer); + sefSlowMotionFlattener.dropOrTransformSample(data, presentationTimeUs); + inputBuffer.timeUs = streamOffsetUs + sefSlowMotionFlattener.getSamplePresentationTimeUs(); if (shouldDropSample) { data.clear(); } else { diff --git a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java index fa0ce4cdc4..83ea088026 100644 --- a/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java +++ b/libraries/transformer/src/main/java/androidx/media3/transformer/VideoTranscodingSamplePipeline.java @@ -36,7 +36,6 @@ import org.checkerframework.dataflow.qual.Pure; */ /* package */ final class VideoTranscodingSamplePipeline implements SamplePipeline { private final int outputRotationDegrees; - private final long outputPresentationTimeOffsetUs; private final int maxPendingFrameCount; private final DecoderInputBuffer decoderInputBuffer; @@ -53,7 +52,7 @@ import org.checkerframework.dataflow.qual.Pure; public VideoTranscodingSamplePipeline( Context context, Format inputFormat, - long outputPresentationTimeOffsetUs, + long streamOffsetUs, TransformationRequest transformationRequest, ImmutableList effects, Codec.DecoderFactory decoderFactory, @@ -63,7 +62,6 @@ import org.checkerframework.dataflow.qual.Pure; FrameProcessorChain.Listener frameProcessorChainListener, Transformer.DebugViewProvider debugViewProvider) throws TransformationException { - this.outputPresentationTimeOffsetUs = outputPresentationTimeOffsetUs; decoderInputBuffer = new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED); encoderOutputBuffer = @@ -102,6 +100,7 @@ import org.checkerframework.dataflow.qual.Pure; inputFormat.pixelWidthHeightRatio, /* inputWidth= */ decodedWidth, /* inputHeight= */ decodedHeight, + streamOffsetUs, effectsListBuilder.build(), transformationRequest.enableHdrEditing); } catch (FrameProcessingException e) { @@ -202,7 +201,7 @@ import org.checkerframework.dataflow.qual.Pure; return null; } MediaCodec.BufferInfo bufferInfo = checkNotNull(encoder.getOutputBufferInfo()); - encoderOutputBuffer.timeUs = bufferInfo.presentationTimeUs - outputPresentationTimeOffsetUs; + encoderOutputBuffer.timeUs = bufferInfo.presentationTimeUs; encoderOutputBuffer.setFlags(bufferInfo.flags); return encoderOutputBuffer; }