Fix encoding stalling by interleaving audio and video

Thanks to the AVFoundation team I learned that both audio and video
samples are supposed to be interleaved whenever media data is ready
from either call to encode ready samples, and that fixes encoding this
video encoding with x264 and ffmpeg.
This commit is contained in:
Sami Samhuri 2024-10-22 17:40:12 -07:00
parent e7fbbacd30
commit 9297a14920
No known key found for this signature in database
4 changed files with 68 additions and 62 deletions

View file

@ -24,6 +24,7 @@ let package = Package(
.process("Resources/test-no-audio.mp4"), .process("Resources/test-no-audio.mp4"),
.process("Resources/test-no-video.m4a"), .process("Resources/test-no-video.m4a"),
.process("Resources/test-spatial-audio.mov"), .process("Resources/test-spatial-audio.mov"),
.process("Resources/test-x264-1080p-h264-60fps.mp4"),
] ]
), ),
] ]

View file

@ -116,11 +116,18 @@ actor SampleWriter {
reader.startReading() reader.startReading()
try Task.checkCancellation() try Task.checkCancellation()
await encodeAudioTracks() startEncodingAudioTracks()
try Task.checkCancellation() startEncodingVideoTracks()
await encodeVideoTracks() while reader.status == .reading, writer.status == .writing {
try Task.checkCancellation() guard !Task.isCancelled else {
// Flag so that we stop writing samples
isCancelled = true
throw CancellationError()
}
try await Task.sleep(for: .milliseconds(10))
}
guard reader.status != .cancelled && writer.status != .cancelled else { guard reader.status != .cancelled && writer.status != .cancelled else {
throw CancellationError() throw CancellationError()
@ -204,74 +211,44 @@ actor SampleWriter {
// MARK: - Encoding // MARK: - Encoding
private func encodeAudioTracks() async { private func startEncodingAudioTracks() {
// Don't do anything when we have no audio to encode. // Don't do anything when we have no audio to encode.
guard audioInput != nil, audioOutput != nil else { guard let audioInput, audioOutput != nil else {
return return
} }
await withTaskCancellationHandler { audioInput.requestMediaDataWhenReady(on: queue) {
await withCheckedContinuation { continuation in
self.audioInput!.requestMediaDataWhenReady(on: queue) {
self.assumeIsolated { _self in
guard !_self.isCancelled else {
log.debug("Cancelled while encoding audio")
_self.reader.cancelReading()
_self.writer.cancelWriting()
continuation.resume()
return
}
let hasMoreSamples = _self.writeReadySamples(
output: _self.audioOutput!,
input: _self.audioInput!
)
if !hasMoreSamples {
log.debug("Finished encoding audio")
continuation.resume()
}
}
}
}
} onCancel: {
log.debug("Task cancelled while encoding audio")
Task {
await self.cancel()
}
}
}
private func encodeVideoTracks() async {
await withTaskCancellationHandler {
await withCheckedContinuation { continuation in
self.videoInput!.requestMediaDataWhenReady(on: queue) {
// NOTE: assumeIsolated crashes on macOS at the moment // NOTE: assumeIsolated crashes on macOS at the moment
self.assumeIsolated { _self in self.assumeIsolated { _self in
guard !_self.isCancelled else { _self.writeAllReadySamples()
log.debug("Cancelled while encoding video") }
_self.reader.cancelReading() }
_self.writer.cancelWriting() }
continuation.resume()
private func startEncodingVideoTracks() {
videoInput!.requestMediaDataWhenReady(on: queue) {
// NOTE: assumeIsolated crashes on macOS at the moment
self.assumeIsolated { _self in
_self.writeAllReadySamples()
}
}
}
private func writeAllReadySamples() {
guard !isCancelled else {
log.debug("Cancelled while writing samples")
reader.cancelReading()
writer.cancelWriting()
return return
} }
let hasMoreSamples = _self.writeReadySamples( if let audioInput, let audioOutput {
output: _self.videoOutput!, let hasMoreAudio = writeReadySamples(output: audioOutput, input: audioInput)
input: _self.videoInput! if !hasMoreAudio { log.debug("Finished encoding audio") }
)
if !hasMoreSamples {
log.debug("Finished encoding video")
continuation.resume()
}
}
}
}
} onCancel: {
log.debug("Task cancelled while encoding video")
Task {
await self.cancel()
}
} }
let hasMoreVideo = writeReadySamples(output: videoOutput!, input: videoInput!)
if !hasMoreVideo { log.debug("Finished encoding video") }
} }
private func writeReadySamples(output: AVAssetReaderOutput, input: AVAssetWriterInput) -> Bool { private func writeReadySamples(output: AVAssetReaderOutput, input: AVAssetWriterInput) -> Bool {

View file

@ -192,6 +192,34 @@ final class ExportSessionTests {
#expect(try await exportedTrack.load(.naturalSize) == CGSize(width: 1280, height: 720)) #expect(try await exportedTrack.load(.naturalSize) == CGSize(width: 1280, height: 720))
} }
@Test func test_export_x264_60fps() async throws {
let sourceURL = resourceURL(named: "test-x264-1080p-h264-60fps.mp4")
let destinationURL = makeTemporaryURL()
let subject = ExportSession()
try await subject.export(
asset: makeAsset(url: sourceURL),
video: .codec(.h264, width: 1920, height: 1080)
.bitrate(2_500_000)
.fps(30),
to: destinationURL.url,
as: .mp4
)
let exportedAsset = AVURLAsset(url: destinationURL.url)
let videoTrack = try #require(await exportedAsset.sendTracks(withMediaType: .video).first)
#expect(try await videoTrack.load(.naturalSize) == CGSize(width: 1920, height: 1080))
#expect(try await videoTrack.load(.nominalFrameRate) == 30.0)
let dataRate = try await videoTrack.load(.estimatedDataRate)
#expect((2_400_000 ... 2_700_000).contains(dataRate))
let videoFormat = try #require(await videoTrack.load(.formatDescriptions).first)
#expect(videoFormat.mediaType == .video)
#expect(videoFormat.mediaSubType == .h264)
#expect(videoFormat.extensions[.colorPrimaries] == .colorPrimaries(.itu_R_709_2))
#expect(videoFormat.extensions[.transferFunction] == .transferFunction(.itu_R_709_2))
#expect(videoFormat.extensions[.yCbCrMatrix] == .yCbCrMatrix(.itu_R_709_2))
}
@Test func test_export_progress() async throws { @Test func test_export_progress() async throws {
let sourceURL = resourceURL(named: "test-720p-h264-24fps.mov") let sourceURL = resourceURL(named: "test-720p-h264-24fps.mov")
let progressValues = SendableWrapper<[Float]>([]) let progressValues = SendableWrapper<[Float]>([])