mirror of
https://github.com/samsonjs/media.git
synced 2026-03-29 10:05:48 +00:00
Switch language normalization to 2-letter language codes.
2-letter codes (ISO 639-1) are the standard Android normalization and thus we
should prefer them to 3-letter codes (although both are technically allowed
according the BCP47).
This helps in two ways:
1. It simplifies app interaction with our normalized language codes as the
Locale class makes it easy to convert a 2-letter to a 3-letter code but
not the other way round.
2. It better normalizes codes on API<21 where we previously had issues with
language+country codes (see tests).
3. It allows us to normalize both ISO 639-2/T and ISO 639-2/B codes to the same
language.
PiperOrigin-RevId: 258729728
This commit is contained in:
parent
e181d4bd35
commit
f82920926d
5 changed files with 114 additions and 26 deletions
|
|
@ -8,6 +8,8 @@
|
|||
* Fix issue where initial seek positions get ignored when playing a preroll ad.
|
||||
* Fix `DataSchemeDataSource` re-opening and range requests
|
||||
([#6192](https://github.com/google/ExoPlayer/issues/6192)).
|
||||
* Switch normalized BCP-47 language codes to use 2-letter ISO 639-1 language
|
||||
tags instead of 3-letter ISO 639-2 language tags.
|
||||
|
||||
### 2.10.3 ###
|
||||
|
||||
|
|
|
|||
|
|
@ -2318,14 +2318,14 @@ public class DefaultTrackSelector extends MappingTrackSelector {
|
|||
if (TextUtils.equals(format.language, language)) {
|
||||
return 3;
|
||||
}
|
||||
// Partial match where one language is a subset of the other (e.g. "zho-hans" and "zho-hans-hk")
|
||||
// Partial match where one language is a subset of the other (e.g. "zh-hans" and "zh-hans-hk")
|
||||
if (format.language.startsWith(language) || language.startsWith(format.language)) {
|
||||
return 2;
|
||||
}
|
||||
// Partial match where only the main language tag is the same (e.g. "fra-fr" and "fra-ca")
|
||||
if (format.language.length() >= 3
|
||||
&& language.length() >= 3
|
||||
&& format.language.substring(0, 3).equals(language.substring(0, 3))) {
|
||||
// Partial match where only the main language tag is the same (e.g. "fr-fr" and "fr-ca")
|
||||
String formatMainLanguage = Util.splitAtFirst(format.language, "-")[0];
|
||||
String queryMainLanguage = Util.splitAtFirst(language, "-")[0];
|
||||
if (formatMainLanguage.equals(queryMainLanguage)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ import java.util.Calendar;
|
|||
import java.util.Collections;
|
||||
import java.util.Formatter;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
|
|
@ -135,6 +136,10 @@ public final class Util {
|
|||
+ "(T(([0-9]*)H)?(([0-9]*)M)?(([0-9.]*)S)?)?$");
|
||||
private static final Pattern ESCAPED_CHARACTER_PATTERN = Pattern.compile("%([A-Fa-f0-9]{2})");
|
||||
|
||||
// Android standardizes to ISO 639-1 2-letter codes and provides no way to map a 3-letter
|
||||
// ISO 639-2 code back to the corresponding 2-letter code.
|
||||
@Nullable private static HashMap<String, String> languageTagIso3ToIso2;
|
||||
|
||||
private Util() {}
|
||||
|
||||
/**
|
||||
|
|
@ -450,18 +455,25 @@ public final class Util {
|
|||
if (language == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
Locale locale = getLocaleForLanguageTag(language);
|
||||
int localeLanguageLength = locale.getLanguage().length();
|
||||
String normLanguage = locale.getISO3Language();
|
||||
if (normLanguage.isEmpty()) {
|
||||
return toLowerInvariant(language);
|
||||
}
|
||||
String normTag = getLocaleLanguageTag(locale);
|
||||
return toLowerInvariant(normLanguage + normTag.substring(localeLanguageLength));
|
||||
} catch (MissingResourceException e) {
|
||||
Locale locale = getLocaleForLanguageTag(language);
|
||||
String localeLanguage = locale.getLanguage();
|
||||
int localeLanguageLength = localeLanguage.length();
|
||||
if (localeLanguageLength == 0) {
|
||||
// Return original language for invalid language tags.
|
||||
return toLowerInvariant(language);
|
||||
} else if (localeLanguageLength == 3) {
|
||||
// Locale.toLanguageTag will ensure a normalized well-formed output. However, 3-letter
|
||||
// ISO 639-2 language codes will not be converted to 2-letter ISO 639-1 codes automatically.
|
||||
if (languageTagIso3ToIso2 == null) {
|
||||
languageTagIso3ToIso2 = createIso3ToIso2Map();
|
||||
}
|
||||
String iso2Language = languageTagIso3ToIso2.get(localeLanguage);
|
||||
if (iso2Language != null) {
|
||||
localeLanguage = iso2Language;
|
||||
}
|
||||
}
|
||||
String normTag = getLocaleLanguageTag(locale);
|
||||
return toLowerInvariant(localeLanguage + normTag.substring(localeLanguageLength));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2013,6 +2025,54 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
|
||||
private static HashMap<String, String> createIso3ToIso2Map() {
|
||||
String[] iso2Languages = Locale.getISOLanguages();
|
||||
HashMap<String, String> iso3ToIso2 =
|
||||
new HashMap<>(
|
||||
/* initialCapacity= */ iso2Languages.length + iso3BibliographicalToIso2.length);
|
||||
for (String iso2 : iso2Languages) {
|
||||
try {
|
||||
// This returns the ISO 639-2/T code for the language.
|
||||
String iso3 = new Locale(iso2).getISO3Language();
|
||||
if (!TextUtils.isEmpty(iso3)) {
|
||||
iso3ToIso2.put(iso3, iso2);
|
||||
}
|
||||
} catch (MissingResourceException e) {
|
||||
// Shouldn't happen for list of known languages, but we don't want to throw either.
|
||||
}
|
||||
}
|
||||
// Add additional ISO 639-2/B codes to mapping.
|
||||
for (int i = 0; i < iso3BibliographicalToIso2.length; i += 2) {
|
||||
iso3ToIso2.put(iso3BibliographicalToIso2[i], iso3BibliographicalToIso2[i + 1]);
|
||||
}
|
||||
return iso3ToIso2;
|
||||
}
|
||||
|
||||
// See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes.
|
||||
private static final String[] iso3BibliographicalToIso2 =
|
||||
new String[] {
|
||||
"alb", "sq",
|
||||
"arm", "hy",
|
||||
"baq", "eu",
|
||||
"bur", "my",
|
||||
"tib", "bo",
|
||||
"chi", "zh",
|
||||
"cze", "cs",
|
||||
"dut", "nl",
|
||||
"ger", "de",
|
||||
"gre", "el",
|
||||
"fre", "fr",
|
||||
"geo", "ka",
|
||||
"ice", "is",
|
||||
"mac", "mk",
|
||||
"mao", "mi",
|
||||
"may", "ms",
|
||||
"per", "fa",
|
||||
"rum", "ro",
|
||||
"slo", "sk",
|
||||
"wel", "cy"
|
||||
};
|
||||
|
||||
/**
|
||||
* Allows the CRC calculation to be done byte by byte instead of bit per bit being the order
|
||||
* "most significant bit first".
|
||||
|
|
|
|||
|
|
@ -268,14 +268,15 @@ public class UtilTest {
|
|||
@Test
|
||||
@Config(sdk = 21)
|
||||
public void testNormalizeLanguageCodeV21() {
|
||||
assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa");
|
||||
assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa");
|
||||
assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("spa-ar");
|
||||
assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("spa-ar");
|
||||
assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("spa-ar-dialect");
|
||||
assertThat(Util.normalizeLanguageCode("es-419")).isEqualTo("spa-419");
|
||||
assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zho-hans-tw");
|
||||
assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zho-tw");
|
||||
assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es");
|
||||
assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es");
|
||||
assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar");
|
||||
assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("es-ar");
|
||||
assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("es-ar-dialect");
|
||||
assertThat(Util.normalizeLanguageCode("ES-419")).isEqualTo("es-419");
|
||||
assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zh-hans-tw");
|
||||
assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zh-tw");
|
||||
assertThat(Util.normalizeLanguageCode("zho-hans-tw")).isEqualTo("zh-hans-tw");
|
||||
assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und");
|
||||
assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist");
|
||||
}
|
||||
|
|
@ -283,13 +284,38 @@ public class UtilTest {
|
|||
@Test
|
||||
@Config(sdk = 16)
|
||||
public void testNormalizeLanguageCode() {
|
||||
assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa");
|
||||
assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa");
|
||||
assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es");
|
||||
assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es");
|
||||
assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar");
|
||||
assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und");
|
||||
assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalizeIso6392BibliographicalAndTextualCodes() {
|
||||
// See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes.
|
||||
assertThat(Util.normalizeLanguageCode("alb")).isEqualTo(Util.normalizeLanguageCode("sqi"));
|
||||
assertThat(Util.normalizeLanguageCode("arm")).isEqualTo(Util.normalizeLanguageCode("hye"));
|
||||
assertThat(Util.normalizeLanguageCode("baq")).isEqualTo(Util.normalizeLanguageCode("eus"));
|
||||
assertThat(Util.normalizeLanguageCode("bur")).isEqualTo(Util.normalizeLanguageCode("mya"));
|
||||
assertThat(Util.normalizeLanguageCode("chi")).isEqualTo(Util.normalizeLanguageCode("zho"));
|
||||
assertThat(Util.normalizeLanguageCode("cze")).isEqualTo(Util.normalizeLanguageCode("ces"));
|
||||
assertThat(Util.normalizeLanguageCode("dut")).isEqualTo(Util.normalizeLanguageCode("nld"));
|
||||
assertThat(Util.normalizeLanguageCode("fre")).isEqualTo(Util.normalizeLanguageCode("fra"));
|
||||
assertThat(Util.normalizeLanguageCode("geo")).isEqualTo(Util.normalizeLanguageCode("kat"));
|
||||
assertThat(Util.normalizeLanguageCode("ger")).isEqualTo(Util.normalizeLanguageCode("deu"));
|
||||
assertThat(Util.normalizeLanguageCode("gre")).isEqualTo(Util.normalizeLanguageCode("ell"));
|
||||
assertThat(Util.normalizeLanguageCode("ice")).isEqualTo(Util.normalizeLanguageCode("isl"));
|
||||
assertThat(Util.normalizeLanguageCode("mac")).isEqualTo(Util.normalizeLanguageCode("mkd"));
|
||||
assertThat(Util.normalizeLanguageCode("mao")).isEqualTo(Util.normalizeLanguageCode("mri"));
|
||||
assertThat(Util.normalizeLanguageCode("may")).isEqualTo(Util.normalizeLanguageCode("msa"));
|
||||
assertThat(Util.normalizeLanguageCode("per")).isEqualTo(Util.normalizeLanguageCode("fas"));
|
||||
assertThat(Util.normalizeLanguageCode("rum")).isEqualTo(Util.normalizeLanguageCode("ron"));
|
||||
assertThat(Util.normalizeLanguageCode("slo")).isEqualTo(Util.normalizeLanguageCode("slk"));
|
||||
assertThat(Util.normalizeLanguageCode("tib")).isEqualTo(Util.normalizeLanguageCode("bod"));
|
||||
assertThat(Util.normalizeLanguageCode("wel")).isEqualTo(Util.normalizeLanguageCode("cym"));
|
||||
}
|
||||
|
||||
private static void assertEscapeUnescapeFileName(String fileName, String escapedFileName) {
|
||||
assertThat(escapeFileName(fileName)).isEqualTo(escapedFileName);
|
||||
assertThat(unescapeFileName(escapedFileName)).isEqualTo(fileName);
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ public class HlsMasterPlaylistParserTest {
|
|||
Format closedCaptionFormat = playlist.muxedCaptionFormats.get(0);
|
||||
assertThat(closedCaptionFormat.sampleMimeType).isEqualTo(MimeTypes.APPLICATION_CEA708);
|
||||
assertThat(closedCaptionFormat.accessibilityChannel).isEqualTo(4);
|
||||
assertThat(closedCaptionFormat.language).isEqualTo("spa");
|
||||
assertThat(closedCaptionFormat.language).isEqualTo("es");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in a new issue