Add AC-4 channel configuration support in DASH

This commit is contained in:
ybai001 2024-09-25 09:17:50 +08:00
parent e7254b809a
commit 1381c92d76
8 changed files with 479 additions and 3 deletions

View file

@ -77,6 +77,17 @@ public class DashManifestParser extends DefaultHandler
private static final Pattern CEA_708_ACCESSIBILITY_PATTERN =
Pattern.compile("([1-9]|[1-5][0-9]|6[0-3])=.*");
/**
* Maps the value attribute of an AudioChannelConfiguration with schemeIdUri
* "tag:dolby.com,2015:dash:audio_channel_configuration:2015", as defined by ETSI TS 103 190-2
* v1.2.1 clause G.3. Table A.27 in ETSI TS 103 190-2 v1.2.1 defines the speaker counts of each
* speaker group index, numbers will be counted only when respective indexes present.
*/
private static final int[] DOLBY_AC4_CHANNEL_CONFIGURATION_MAPPING =
new int[] {
2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2
};
/**
* Maps the value attribute of an AudioChannelConfiguration with schemeIdUri
* "urn:mpeg:mpegB:cicp:ChannelConfiguration", as defined by ISO 23001-8 clause 8.1, to a channel
@ -441,7 +452,7 @@ public class DashManifestParser extends DefaultHandler
} else if (XmlPullParserUtil.isStartTag(xpp, "Role")) {
roleDescriptors.add(parseDescriptor(xpp, "Role"));
} else if (XmlPullParserUtil.isStartTag(xpp, "AudioChannelConfiguration")) {
audioChannels = parseAudioChannelConfiguration(xpp);
audioChannels = parseAudioChannelConfiguration(xpp, codecs);
} else if (XmlPullParserUtil.isStartTag(xpp, "Accessibility")) {
accessibilityDescriptors.add(parseDescriptor(xpp, "Accessibility"));
} else if (XmlPullParserUtil.isStartTag(xpp, "EssentialProperty")) {
@ -720,7 +731,7 @@ public class DashManifestParser extends DefaultHandler
}
baseUrls.addAll(parseBaseUrl(xpp, parentBaseUrls, dvbProfileDeclared));
} else if (XmlPullParserUtil.isStartTag(xpp, "AudioChannelConfiguration")) {
audioChannels = parseAudioChannelConfiguration(xpp);
audioChannels = parseAudioChannelConfiguration(xpp, codecs);
} else if (XmlPullParserUtil.isStartTag(xpp, "SegmentBase")) {
segmentBase = parseSegmentBase(xpp, (SingleSegmentBase) segmentBase);
} else if (XmlPullParserUtil.isStartTag(xpp, "SegmentList")) {
@ -1487,7 +1498,7 @@ public class DashManifestParser extends DefaultHandler
// AudioChannelConfiguration parsing.
protected int parseAudioChannelConfiguration(XmlPullParser xpp)
protected int parseAudioChannelConfiguration(XmlPullParser xpp, String codecs)
throws XmlPullParserException, IOException {
String schemeIdUri = parseString(xpp, "schemeIdUri", null);
int audioChannels;
@ -1509,6 +1520,9 @@ public class DashManifestParser extends DefaultHandler
case "urn:dolby:dash:audio_channel_configuration:2011":
audioChannels = parseDolbyChannelConfiguration(xpp);
break;
case "tag:dolby.com,2015:dash:audio_channel_configuration:2015":
audioChannels = parseDolbyAC4ChannelConfiguration(xpp, codecs);
break;
default:
audioChannels = Format.NO_VALUE;
break;
@ -2043,6 +2057,49 @@ public class DashManifestParser extends DefaultHandler
}
}
/**
* Parses the number of channels from the value attribute of an AudioChannelConfiguration with
* schemeIdUri "tag:dolby.com,2015:dash:audio_channel_configuration:2015" as defined by table A.27
* in ETSI TS 103 190-2 v1.2.1.
*
* @param xpp The parser from which to read.
* @return The parsed number of channels, or {@link Format#NO_VALUE} if the channel count could
* not be parsed.
*/
protected static int parseDolbyAC4ChannelConfiguration(XmlPullParser xpp, String codecs) {
@Nullable String value = xpp.getAttributeValue(null, "value");
// The value attribute must be set to a six-digit uppercase hexadecimal
if (value == null || value.length() != 6) {
return Format.NO_VALUE;
}
final int DOLBY_AC4_OBJECT_BASED_AUDIO_STREAM = 0x800000;
final int ac4ChannelMask = Integer.parseInt(value, /* radix= */ 16);
if ((ac4ChannelMask & DOLBY_AC4_OBJECT_BASED_AUDIO_STREAM) ==
DOLBY_AC4_OBJECT_BASED_AUDIO_STREAM) {
// object-based audio content
String[] codecList = Util.splitCodecs(codecs);
// The AC-4 codec string format is "ac-4.xx.yy.zz", where zz is presentation level
final String AC4_PRESENTATION_LEVEL3 = "03";
final String AC4_PRESENTATION_LEVEL4 = "04";
String[] parts = Ascii.toLowerCase(codecList[0].trim()).split("\\.");
if (parts.length == 4 && parts[0].equals("ac-4")) {
if (parts[3].equals(AC4_PRESENTATION_LEVEL3)) {
return 18; // AC-4 Level 3 object-based content is mapped to 17.1 channels
} else if (parts[3].equals(AC4_PRESENTATION_LEVEL4)) {
return 21; // AC-4 Level 4 object-based content is mapped to 20.1 channels
}
}
return Format.NO_VALUE;
}
// channel-based audio content
// bits 0...18 indicate the presence of individual channel groups
int channelCount = 0;
for (int i = 0; i < DOLBY_AC4_CHANNEL_CONFIGURATION_MAPPING.length; i++) {
channelCount += ((ac4ChannelMask >> i) & 0x1) * DOLBY_AC4_CHANNEL_CONFIGURATION_MAPPING[i];
}
return channelCount == 0 ? Format.NO_VALUE : channelCount;
}
protected static long parseLastSegmentNumberSupplementalProperty(
List<Descriptor> supplementalProperties) {
for (int i = 0; i < supplementalProperties.size(); i++) {

View file

@ -110,6 +110,15 @@ public final class FragmentedMp4ExtractorParameterizedTest {
/* closedCaptionFormats= */ ImmutableList.of(), "media/mp4/sample_ac4_fragmented.mp4");
}
@Test
public void sampleWithAc4Level4Track() throws Exception {
ExtractorAsserts.assertBehavior(
getExtractorFactory(
/* closedCaptionFormats= */ ImmutableList.of(), subtitlesParsedDuringExtraction),
"media/mp4/sample_ac4_level4_fragmented.mp4",
simulationConfig);
}
@Test
public void sampleWithProtectedAc4Track() throws Exception {
assertExtractorBehavior(

View file

@ -0,0 +1,98 @@
seekMap:
isSeekable = true
duration = 853333
getPosition(0) = [[timeUs=0, position=669]]
getPosition(1) = [[timeUs=0, position=669]]
getPosition(426666) = [[timeUs=0, position=669]]
getPosition(853333) = [[timeUs=0, position=669]]
numberOfTracks = 1
track 0:
total output bytes = 162700
sample count = 20
format 0:
id = 1
sampleMimeType = audio/ac4
channelCount = 21
sampleRate = 48000
language = und
sample 0:
time = 0
flags = 1
data = length 8135, hash B524F88E
sample 1:
time = 42666
flags = 0
data = length 8135, hash FB80C2FB
sample 2:
time = 85333
flags = 0
data = length 8135, hash 907C0C31
sample 3:
time = 128000
flags = 0
data = length 8135, hash FDFBD32B
sample 4:
time = 170666
flags = 0
data = length 8135, hash 6CAF0549
sample 5:
time = 213333
flags = 0
data = length 8135, hash F5CA1C9A
sample 6:
time = 256000
flags = 0
data = length 8135, hash B1B5160D
sample 7:
time = 298666
flags = 0
data = length 8135, hash 9E923B3F
sample 8:
time = 341333
flags = 0
data = length 8135, hash B1C0BB1F
sample 9:
time = 384000
flags = 0
data = length 8135, hash 56F65A03
sample 10:
time = 426666
flags = 1
data = length 8135, hash D07FA9A1
sample 11:
time = 469333
flags = 0
data = length 8135, hash EF26FDDE
sample 12:
time = 512000
flags = 0
data = length 8135, hash 8946EEEB
sample 13:
time = 554666
flags = 0
data = length 8135, hash AC2E4C99
sample 14:
time = 597333
flags = 0
data = length 8135, hash B63A1D8
sample 15:
time = 640000
flags = 0
data = length 8135, hash 23119F0F
sample 16:
time = 682666
flags = 0
data = length 8135, hash 507972CA
sample 17:
time = 725333
flags = 0
data = length 8135, hash E574BC00
sample 18:
time = 768000
flags = 0
data = length 8135, hash 52F482FA
sample 19:
time = 810666
flags = 0
data = length 8135, hash C1A7B518
tracksEnded = true

View file

@ -0,0 +1,98 @@
seekMap:
isSeekable = true
duration = 853333
getPosition(0) = [[timeUs=0, position=669]]
getPosition(1) = [[timeUs=0, position=669]]
getPosition(426666) = [[timeUs=0, position=669]]
getPosition(853333) = [[timeUs=0, position=669]]
numberOfTracks = 1
track 0:
total output bytes = 162700
sample count = 20
format 0:
id = 1
sampleMimeType = audio/ac4
channelCount = 21
sampleRate = 48000
language = und
sample 0:
time = 0
flags = 1
data = length 8135, hash B524F88E
sample 1:
time = 42666
flags = 0
data = length 8135, hash FB80C2FB
sample 2:
time = 85333
flags = 0
data = length 8135, hash 907C0C31
sample 3:
time = 128000
flags = 0
data = length 8135, hash FDFBD32B
sample 4:
time = 170666
flags = 0
data = length 8135, hash 6CAF0549
sample 5:
time = 213333
flags = 0
data = length 8135, hash F5CA1C9A
sample 6:
time = 256000
flags = 0
data = length 8135, hash B1B5160D
sample 7:
time = 298666
flags = 0
data = length 8135, hash 9E923B3F
sample 8:
time = 341333
flags = 0
data = length 8135, hash B1C0BB1F
sample 9:
time = 384000
flags = 0
data = length 8135, hash 56F65A03
sample 10:
time = 426666
flags = 1
data = length 8135, hash D07FA9A1
sample 11:
time = 469333
flags = 0
data = length 8135, hash EF26FDDE
sample 12:
time = 512000
flags = 0
data = length 8135, hash 8946EEEB
sample 13:
time = 554666
flags = 0
data = length 8135, hash AC2E4C99
sample 14:
time = 597333
flags = 0
data = length 8135, hash B63A1D8
sample 15:
time = 640000
flags = 0
data = length 8135, hash 23119F0F
sample 16:
time = 682666
flags = 0
data = length 8135, hash 507972CA
sample 17:
time = 725333
flags = 0
data = length 8135, hash E574BC00
sample 18:
time = 768000
flags = 0
data = length 8135, hash 52F482FA
sample 19:
time = 810666
flags = 0
data = length 8135, hash C1A7B518
tracksEnded = true

View file

@ -0,0 +1,58 @@
seekMap:
isSeekable = true
duration = 853333
getPosition(0) = [[timeUs=0, position=669]]
getPosition(1) = [[timeUs=0, position=669]]
getPosition(426666) = [[timeUs=0, position=669]]
getPosition(853333) = [[timeUs=0, position=669]]
numberOfTracks = 1
track 0:
total output bytes = 81350
sample count = 10
format 0:
id = 1
sampleMimeType = audio/ac4
channelCount = 21
sampleRate = 48000
language = und
sample 0:
time = 426666
flags = 1
data = length 8135, hash D07FA9A1
sample 1:
time = 469333
flags = 0
data = length 8135, hash EF26FDDE
sample 2:
time = 512000
flags = 0
data = length 8135, hash 8946EEEB
sample 3:
time = 554666
flags = 0
data = length 8135, hash AC2E4C99
sample 4:
time = 597333
flags = 0
data = length 8135, hash B63A1D8
sample 5:
time = 640000
flags = 0
data = length 8135, hash 23119F0F
sample 6:
time = 682666
flags = 0
data = length 8135, hash 507972CA
sample 7:
time = 725333
flags = 0
data = length 8135, hash E574BC00
sample 8:
time = 768000
flags = 0
data = length 8135, hash 52F482FA
sample 9:
time = 810666
flags = 0
data = length 8135, hash C1A7B518
tracksEnded = true

View file

@ -0,0 +1,58 @@
seekMap:
isSeekable = true
duration = 853333
getPosition(0) = [[timeUs=0, position=669]]
getPosition(1) = [[timeUs=0, position=669]]
getPosition(426666) = [[timeUs=0, position=669]]
getPosition(853333) = [[timeUs=0, position=669]]
numberOfTracks = 1
track 0:
total output bytes = 81350
sample count = 10
format 0:
id = 1
sampleMimeType = audio/ac4
channelCount = 21
sampleRate = 48000
language = und
sample 0:
time = 426666
flags = 1
data = length 8135, hash D07FA9A1
sample 1:
time = 469333
flags = 0
data = length 8135, hash EF26FDDE
sample 2:
time = 512000
flags = 0
data = length 8135, hash 8946EEEB
sample 3:
time = 554666
flags = 0
data = length 8135, hash AC2E4C99
sample 4:
time = 597333
flags = 0
data = length 8135, hash B63A1D8
sample 5:
time = 640000
flags = 0
data = length 8135, hash 23119F0F
sample 6:
time = 682666
flags = 0
data = length 8135, hash 507972CA
sample 7:
time = 725333
flags = 0
data = length 8135, hash E574BC00
sample 8:
time = 768000
flags = 0
data = length 8135, hash 52F482FA
sample 9:
time = 810666
flags = 0
data = length 8135, hash C1A7B518
tracksEnded = true

View file

@ -0,0 +1,98 @@
seekMap:
isSeekable = true
duration = 853333
getPosition(0) = [[timeUs=0, position=669]]
getPosition(1) = [[timeUs=0, position=669]]
getPosition(426666) = [[timeUs=0, position=669]]
getPosition(853333) = [[timeUs=0, position=669]]
numberOfTracks = 1
track 0:
total output bytes = 162700
sample count = 20
format 0:
id = 1
sampleMimeType = audio/ac4
channelCount = 21
sampleRate = 48000
language = und
sample 0:
time = 0
flags = 1
data = length 8135, hash B524F88E
sample 1:
time = 42666
flags = 0
data = length 8135, hash FB80C2FB
sample 2:
time = 85333
flags = 0
data = length 8135, hash 907C0C31
sample 3:
time = 128000
flags = 0
data = length 8135, hash FDFBD32B
sample 4:
time = 170666
flags = 0
data = length 8135, hash 6CAF0549
sample 5:
time = 213333
flags = 0
data = length 8135, hash F5CA1C9A
sample 6:
time = 256000
flags = 0
data = length 8135, hash B1B5160D
sample 7:
time = 298666
flags = 0
data = length 8135, hash 9E923B3F
sample 8:
time = 341333
flags = 0
data = length 8135, hash B1C0BB1F
sample 9:
time = 384000
flags = 0
data = length 8135, hash 56F65A03
sample 10:
time = 426666
flags = 1
data = length 8135, hash D07FA9A1
sample 11:
time = 469333
flags = 0
data = length 8135, hash EF26FDDE
sample 12:
time = 512000
flags = 0
data = length 8135, hash 8946EEEB
sample 13:
time = 554666
flags = 0
data = length 8135, hash AC2E4C99
sample 14:
time = 597333
flags = 0
data = length 8135, hash B63A1D8
sample 15:
time = 640000
flags = 0
data = length 8135, hash 23119F0F
sample 16:
time = 682666
flags = 0
data = length 8135, hash 507972CA
sample 17:
time = 725333
flags = 0
data = length 8135, hash E574BC00
sample 18:
time = 768000
flags = 0
data = length 8135, hash 52F482FA
sample 19:
time = 810666
flags = 0
data = length 8135, hash C1A7B518
tracksEnded = true