From ca5886781653b2532c8f3345d6ae15e886b78d61 Mon Sep 17 00:00:00 2001 From: rohks Date: Wed, 18 Dec 2024 11:00:21 -0800 Subject: [PATCH] Hard-code channel count and sample rate for AMR-NB and AMR-WB AMR-NB and AMR-WB are inherently mono, so channel count is set to 1. Sample rate is also hard-coded to adhere to codec standards. Also removed unused parameter `hasAdditionalViews` in `StriData`. #cherrypick PiperOrigin-RevId: 707606245 --- .../media3/extractor/mp4/BoxParser.java | 28 +++++++++++-------- .../bbb_mono_16kHz_23.05kbps_amrwb.3gp.dump | 2 +- .../bbb_mono_8kHz_12.2kbps_amrnb.3gp.dump | 2 +- .../transmuxed_with_inappmuxer.dump | 2 +- .../transmuxed_with_inappmuxer.dump | 2 +- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/BoxParser.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/BoxParser.java index 2ef5faa443..9c05e188de 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/BoxParser.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/BoxParser.java @@ -98,6 +98,9 @@ public final class BoxParser { @SuppressWarnings("ConstantCaseForConstants") private static final int TYPE_vide = 0x76696465; + private static final int SAMPLE_RATE_AMR_NB = 8_000; + private static final int SAMPLE_RATE_AMR_WB = 16_000; + /** * The threshold number of samples to trim from the start/end of an audio track when applying an * edit below which gapless info can be used (rather than removing samples from the sample table). @@ -1859,12 +1862,20 @@ public final class BoxParser { return; } - // As per the IAMF spec (https://aomediacodec.github.io/iamf/#iasampleentry-section), - // channelCount and sampleRate SHALL be set to 0 and ignored. We ignore it by using - // Format.NO_VALUE instead of 0. if (atomType == Mp4Box.TYPE_iamf) { + // As per the IAMF spec (https://aomediacodec.github.io/iamf/#iasampleentry-section), + // channelCount and sampleRate SHALL be set to 0 and ignored. We ignore it by using + // Format.NO_VALUE instead of 0. channelCount = Format.NO_VALUE; sampleRate = Format.NO_VALUE; + } else if (atomType == Mp4Box.TYPE_samr) { + // AMR NB audio is always mono, 8kHz + channelCount = 1; + sampleRate = SAMPLE_RATE_AMR_NB; + } else if (atomType == Mp4Box.TYPE_sawb) { + // AMR WB audio is always mono, 16kHz + channelCount = 1; + sampleRate = SAMPLE_RATE_AMR_WB; } int childPosition = parent.getPosition(); @@ -2235,8 +2246,7 @@ public final class BoxParser { new StriData( ((striInfo & 0x01) == 0x01), ((striInfo & 0x02) == 0x02), - ((striInfo & 0x08) == 0x08), - ((striInfo & 0x04) == 0x04))); + ((striInfo & 0x08) == 0x08))); } childPosition += childAtomSize; } @@ -2506,17 +2516,11 @@ public final class BoxParser { private final boolean hasLeftEyeView; private final boolean hasRightEyeView; private final boolean eyeViewsReversed; - private final boolean hasAdditionalViews; - public StriData( - boolean hasLeftEyeView, - boolean hasRightEyeView, - boolean eyeViewsReversed, - boolean hasAdditionalViews) { + public StriData(boolean hasLeftEyeView, boolean hasRightEyeView, boolean eyeViewsReversed) { this.hasLeftEyeView = hasLeftEyeView; this.hasRightEyeView = hasRightEyeView; this.eyeViewsReversed = eyeViewsReversed; - this.hasAdditionalViews = hasAdditionalViews; } } diff --git a/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_16kHz_23.05kbps_amrwb.3gp.dump b/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_16kHz_23.05kbps_amrwb.3gp.dump index d26225099e..a331017a64 100644 --- a/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_16kHz_23.05kbps_amrwb.3gp.dump +++ b/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_16kHz_23.05kbps_amrwb.3gp.dump @@ -15,7 +15,7 @@ track 0: containerMimeType = audio/mp4 sampleMimeType = audio/amr-wb maxInputSize = 89 - channelCount = 2 + channelCount = 1 sampleRate = 16000 language = und metadata = entries=[Mp4Timestamp: creation time=100000000, modification time=500000000, timescale=10000] diff --git a/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_8kHz_12.2kbps_amrnb.3gp.dump b/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_8kHz_12.2kbps_amrnb.3gp.dump index a3b50db8a4..7ba70f3568 100644 --- a/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_8kHz_12.2kbps_amrnb.3gp.dump +++ b/libraries/test_data/src/test/assets/muxerdumps/bbb_mono_8kHz_12.2kbps_amrnb.3gp.dump @@ -15,7 +15,7 @@ track 0: containerMimeType = audio/mp4 sampleMimeType = audio/3gpp maxInputSize = 62 - channelCount = 2 + channelCount = 1 sampleRate = 8000 language = und metadata = entries=[Mp4Timestamp: creation time=100000000, modification time=500000000, timescale=10000] diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_16kHz_23.05kbps_amrwb.3gp/transmuxed_with_inappmuxer.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_16kHz_23.05kbps_amrwb.3gp/transmuxed_with_inappmuxer.dump index 1d2f43d2da..df1e7bb0e9 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_16kHz_23.05kbps_amrwb.3gp/transmuxed_with_inappmuxer.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_16kHz_23.05kbps_amrwb.3gp/transmuxed_with_inappmuxer.dump @@ -15,7 +15,7 @@ track 0: containerMimeType = audio/mp4 sampleMimeType = audio/amr-wb maxInputSize = 89 - channelCount = 2 + channelCount = 1 sampleRate = 16000 language = und metadata = entries=[Mp4Timestamp: creation time=3000000000, modification time=4000000000, timescale=10000] diff --git a/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_8kHz_12.2kbps_amrnb.3gp/transmuxed_with_inappmuxer.dump b/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_8kHz_12.2kbps_amrnb.3gp/transmuxed_with_inappmuxer.dump index fe68598338..4a02c40030 100644 --- a/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_8kHz_12.2kbps_amrnb.3gp/transmuxed_with_inappmuxer.dump +++ b/libraries/test_data/src/test/assets/transformerdumps/mp4/bbb_mono_8kHz_12.2kbps_amrnb.3gp/transmuxed_with_inappmuxer.dump @@ -15,7 +15,7 @@ track 0: containerMimeType = audio/mp4 sampleMimeType = audio/3gpp maxInputSize = 62 - channelCount = 2 + channelCount = 1 sampleRate = 8000 language = und metadata = entries=[Mp4Timestamp: creation time=3000000000, modification time=4000000000, timescale=10000]