Fix encoding stalling by interleaving audio and video

Thanks to the AVFoundation team I learned that both audio and video
samples are supposed to be interleaved whenever media data is ready
from either call to encode ready samples, and that fixes encoding this
video encoding with x264 and ffmpeg.
This commit is contained in:
Sami Samhuri 2024-10-22 17:40:12 -07:00
parent e7fbbacd30
commit 9297a14920
No known key found for this signature in database
4 changed files with 68 additions and 62 deletions

View file

@ -24,6 +24,7 @@ let package = Package(
.process("Resources/test-no-audio.mp4"),
.process("Resources/test-no-video.m4a"),
.process("Resources/test-spatial-audio.mov"),
.process("Resources/test-x264-1080p-h264-60fps.mp4"),
]
),
]

View file

@ -116,11 +116,18 @@ actor SampleWriter {
reader.startReading()
try Task.checkCancellation()
await encodeAudioTracks()
try Task.checkCancellation()
startEncodingAudioTracks()
startEncodingVideoTracks()
await encodeVideoTracks()
try Task.checkCancellation()
while reader.status == .reading, writer.status == .writing {
guard !Task.isCancelled else {
// Flag so that we stop writing samples
isCancelled = true
throw CancellationError()
}
try await Task.sleep(for: .milliseconds(10))
}
guard reader.status != .cancelled && writer.status != .cancelled else {
throw CancellationError()
@ -204,76 +211,46 @@ actor SampleWriter {
// MARK: - Encoding
private func encodeAudioTracks() async {
private func startEncodingAudioTracks() {
// Don't do anything when we have no audio to encode.
guard audioInput != nil, audioOutput != nil else {
guard let audioInput, audioOutput != nil else {
return
}
await withTaskCancellationHandler {
await withCheckedContinuation { continuation in
self.audioInput!.requestMediaDataWhenReady(on: queue) {
self.assumeIsolated { _self in
guard !_self.isCancelled else {
log.debug("Cancelled while encoding audio")
_self.reader.cancelReading()
_self.writer.cancelWriting()
continuation.resume()
return
}
let hasMoreSamples = _self.writeReadySamples(
output: _self.audioOutput!,
input: _self.audioInput!
)
if !hasMoreSamples {
log.debug("Finished encoding audio")
continuation.resume()
}
}
}
}
} onCancel: {
log.debug("Task cancelled while encoding audio")
Task {
await self.cancel()
audioInput.requestMediaDataWhenReady(on: queue) {
// NOTE: assumeIsolated crashes on macOS at the moment
self.assumeIsolated { _self in
_self.writeAllReadySamples()
}
}
}
private func encodeVideoTracks() async {
await withTaskCancellationHandler {
await withCheckedContinuation { continuation in
self.videoInput!.requestMediaDataWhenReady(on: queue) {
// NOTE: assumeIsolated crashes on macOS at the moment
self.assumeIsolated { _self in
guard !_self.isCancelled else {
log.debug("Cancelled while encoding video")
_self.reader.cancelReading()
_self.writer.cancelWriting()
continuation.resume()
return
}
let hasMoreSamples = _self.writeReadySamples(
output: _self.videoOutput!,
input: _self.videoInput!
)
if !hasMoreSamples {
log.debug("Finished encoding video")
continuation.resume()
}
}
}
}
} onCancel: {
log.debug("Task cancelled while encoding video")
Task {
await self.cancel()
private func startEncodingVideoTracks() {
videoInput!.requestMediaDataWhenReady(on: queue) {
// NOTE: assumeIsolated crashes on macOS at the moment
self.assumeIsolated { _self in
_self.writeAllReadySamples()
}
}
}
private func writeAllReadySamples() {
guard !isCancelled else {
log.debug("Cancelled while writing samples")
reader.cancelReading()
writer.cancelWriting()
return
}
if let audioInput, let audioOutput {
let hasMoreAudio = writeReadySamples(output: audioOutput, input: audioInput)
if !hasMoreAudio { log.debug("Finished encoding audio") }
}
let hasMoreVideo = writeReadySamples(output: videoOutput!, input: videoInput!)
if !hasMoreVideo { log.debug("Finished encoding video") }
}
private func writeReadySamples(output: AVAssetReaderOutput, input: AVAssetWriterInput) -> Bool {
while input.isReadyForMoreMediaData {
guard reader.status == .reading && writer.status == .writing,

View file

@ -192,6 +192,34 @@ final class ExportSessionTests {
#expect(try await exportedTrack.load(.naturalSize) == CGSize(width: 1280, height: 720))
}
@Test func test_export_x264_60fps() async throws {
let sourceURL = resourceURL(named: "test-x264-1080p-h264-60fps.mp4")
let destinationURL = makeTemporaryURL()
let subject = ExportSession()
try await subject.export(
asset: makeAsset(url: sourceURL),
video: .codec(.h264, width: 1920, height: 1080)
.bitrate(2_500_000)
.fps(30),
to: destinationURL.url,
as: .mp4
)
let exportedAsset = AVURLAsset(url: destinationURL.url)
let videoTrack = try #require(await exportedAsset.sendTracks(withMediaType: .video).first)
#expect(try await videoTrack.load(.naturalSize) == CGSize(width: 1920, height: 1080))
#expect(try await videoTrack.load(.nominalFrameRate) == 30.0)
let dataRate = try await videoTrack.load(.estimatedDataRate)
#expect((2_400_000 ... 2_700_000).contains(dataRate))
let videoFormat = try #require(await videoTrack.load(.formatDescriptions).first)
#expect(videoFormat.mediaType == .video)
#expect(videoFormat.mediaSubType == .h264)
#expect(videoFormat.extensions[.colorPrimaries] == .colorPrimaries(.itu_R_709_2))
#expect(videoFormat.extensions[.transferFunction] == .transferFunction(.itu_R_709_2))
#expect(videoFormat.extensions[.yCbCrMatrix] == .yCbCrMatrix(.itu_R_709_2))
}
@Test func test_export_progress() async throws {
let sourceURL = resourceURL(named: "test-720p-h264-24fps.mov")
let progressValues = SendableWrapper<[Float]>([])