diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 17190cfc0d..f0813034e0 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -8,6 +8,8 @@ * Fix issue where initial seek positions get ignored when playing a preroll ad. * Fix `DataSchemeDataSource` re-opening and range requests ([#6192](https://github.com/google/ExoPlayer/issues/6192)). +* Switch normalized BCP-47 language codes to use 2-letter ISO 639-1 language + tags instead of 3-letter ISO 639-2 language tags. ### 2.10.3 ### diff --git a/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java b/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java index 949bd178ea..b8dd40f8bd 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java @@ -2318,14 +2318,14 @@ public class DefaultTrackSelector extends MappingTrackSelector { if (TextUtils.equals(format.language, language)) { return 3; } - // Partial match where one language is a subset of the other (e.g. "zho-hans" and "zho-hans-hk") + // Partial match where one language is a subset of the other (e.g. "zh-hans" and "zh-hans-hk") if (format.language.startsWith(language) || language.startsWith(format.language)) { return 2; } - // Partial match where only the main language tag is the same (e.g. "fra-fr" and "fra-ca") - if (format.language.length() >= 3 - && language.length() >= 3 - && format.language.substring(0, 3).equals(language.substring(0, 3))) { + // Partial match where only the main language tag is the same (e.g. "fr-fr" and "fr-ca") + String formatMainLanguage = Util.splitAtFirst(format.language, "-")[0]; + String queryMainLanguage = Util.splitAtFirst(language, "-")[0]; + if (formatMainLanguage.equals(queryMainLanguage)) { return 1; } return 0; diff --git a/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java b/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java index 86ad6fd6b3..919cda76c1 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java @@ -71,6 +71,7 @@ import java.util.Calendar; import java.util.Collections; import java.util.Formatter; import java.util.GregorianCalendar; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.MissingResourceException; @@ -135,6 +136,10 @@ public final class Util { + "(T(([0-9]*)H)?(([0-9]*)M)?(([0-9.]*)S)?)?$"); private static final Pattern ESCAPED_CHARACTER_PATTERN = Pattern.compile("%([A-Fa-f0-9]{2})"); + // Android standardizes to ISO 639-1 2-letter codes and provides no way to map a 3-letter + // ISO 639-2 code back to the corresponding 2-letter code. + @Nullable private static HashMap languageTagIso3ToIso2; + private Util() {} /** @@ -450,18 +455,25 @@ public final class Util { if (language == null) { return null; } - try { - Locale locale = getLocaleForLanguageTag(language); - int localeLanguageLength = locale.getLanguage().length(); - String normLanguage = locale.getISO3Language(); - if (normLanguage.isEmpty()) { - return toLowerInvariant(language); - } - String normTag = getLocaleLanguageTag(locale); - return toLowerInvariant(normLanguage + normTag.substring(localeLanguageLength)); - } catch (MissingResourceException e) { + Locale locale = getLocaleForLanguageTag(language); + String localeLanguage = locale.getLanguage(); + int localeLanguageLength = localeLanguage.length(); + if (localeLanguageLength == 0) { + // Return original language for invalid language tags. return toLowerInvariant(language); + } else if (localeLanguageLength == 3) { + // Locale.toLanguageTag will ensure a normalized well-formed output. However, 3-letter + // ISO 639-2 language codes will not be converted to 2-letter ISO 639-1 codes automatically. + if (languageTagIso3ToIso2 == null) { + languageTagIso3ToIso2 = createIso3ToIso2Map(); + } + String iso2Language = languageTagIso3ToIso2.get(localeLanguage); + if (iso2Language != null) { + localeLanguage = iso2Language; + } } + String normTag = getLocaleLanguageTag(locale); + return toLowerInvariant(localeLanguage + normTag.substring(localeLanguageLength)); } /** @@ -2013,6 +2025,54 @@ public final class Util { } } + private static HashMap createIso3ToIso2Map() { + String[] iso2Languages = Locale.getISOLanguages(); + HashMap iso3ToIso2 = + new HashMap<>( + /* initialCapacity= */ iso2Languages.length + iso3BibliographicalToIso2.length); + for (String iso2 : iso2Languages) { + try { + // This returns the ISO 639-2/T code for the language. + String iso3 = new Locale(iso2).getISO3Language(); + if (!TextUtils.isEmpty(iso3)) { + iso3ToIso2.put(iso3, iso2); + } + } catch (MissingResourceException e) { + // Shouldn't happen for list of known languages, but we don't want to throw either. + } + } + // Add additional ISO 639-2/B codes to mapping. + for (int i = 0; i < iso3BibliographicalToIso2.length; i += 2) { + iso3ToIso2.put(iso3BibliographicalToIso2[i], iso3BibliographicalToIso2[i + 1]); + } + return iso3ToIso2; + } + + // See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes. + private static final String[] iso3BibliographicalToIso2 = + new String[] { + "alb", "sq", + "arm", "hy", + "baq", "eu", + "bur", "my", + "tib", "bo", + "chi", "zh", + "cze", "cs", + "dut", "nl", + "ger", "de", + "gre", "el", + "fre", "fr", + "geo", "ka", + "ice", "is", + "mac", "mk", + "mao", "mi", + "may", "ms", + "per", "fa", + "rum", "ro", + "slo", "sk", + "wel", "cy" + }; + /** * Allows the CRC calculation to be done byte by byte instead of bit per bit being the order * "most significant bit first". diff --git a/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java b/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java index 9abec0cd8f..f85ee37c07 100644 --- a/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java +++ b/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java @@ -268,14 +268,15 @@ public class UtilTest { @Test @Config(sdk = 21) public void testNormalizeLanguageCodeV21() { - assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa"); - assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa"); - assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("spa-ar"); - assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("spa-ar"); - assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("spa-ar-dialect"); - assertThat(Util.normalizeLanguageCode("es-419")).isEqualTo("spa-419"); - assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zho-hans-tw"); - assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zho-tw"); + assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es"); + assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es"); + assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar"); + assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("es-ar"); + assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("es-ar-dialect"); + assertThat(Util.normalizeLanguageCode("ES-419")).isEqualTo("es-419"); + assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zh-hans-tw"); + assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zh-tw"); + assertThat(Util.normalizeLanguageCode("zho-hans-tw")).isEqualTo("zh-hans-tw"); assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und"); assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist"); } @@ -283,13 +284,38 @@ public class UtilTest { @Test @Config(sdk = 16) public void testNormalizeLanguageCode() { - assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa"); - assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa"); + assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es"); + assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es"); assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar"); assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und"); assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist"); } + @Test + public void testNormalizeIso6392BibliographicalAndTextualCodes() { + // See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes. + assertThat(Util.normalizeLanguageCode("alb")).isEqualTo(Util.normalizeLanguageCode("sqi")); + assertThat(Util.normalizeLanguageCode("arm")).isEqualTo(Util.normalizeLanguageCode("hye")); + assertThat(Util.normalizeLanguageCode("baq")).isEqualTo(Util.normalizeLanguageCode("eus")); + assertThat(Util.normalizeLanguageCode("bur")).isEqualTo(Util.normalizeLanguageCode("mya")); + assertThat(Util.normalizeLanguageCode("chi")).isEqualTo(Util.normalizeLanguageCode("zho")); + assertThat(Util.normalizeLanguageCode("cze")).isEqualTo(Util.normalizeLanguageCode("ces")); + assertThat(Util.normalizeLanguageCode("dut")).isEqualTo(Util.normalizeLanguageCode("nld")); + assertThat(Util.normalizeLanguageCode("fre")).isEqualTo(Util.normalizeLanguageCode("fra")); + assertThat(Util.normalizeLanguageCode("geo")).isEqualTo(Util.normalizeLanguageCode("kat")); + assertThat(Util.normalizeLanguageCode("ger")).isEqualTo(Util.normalizeLanguageCode("deu")); + assertThat(Util.normalizeLanguageCode("gre")).isEqualTo(Util.normalizeLanguageCode("ell")); + assertThat(Util.normalizeLanguageCode("ice")).isEqualTo(Util.normalizeLanguageCode("isl")); + assertThat(Util.normalizeLanguageCode("mac")).isEqualTo(Util.normalizeLanguageCode("mkd")); + assertThat(Util.normalizeLanguageCode("mao")).isEqualTo(Util.normalizeLanguageCode("mri")); + assertThat(Util.normalizeLanguageCode("may")).isEqualTo(Util.normalizeLanguageCode("msa")); + assertThat(Util.normalizeLanguageCode("per")).isEqualTo(Util.normalizeLanguageCode("fas")); + assertThat(Util.normalizeLanguageCode("rum")).isEqualTo(Util.normalizeLanguageCode("ron")); + assertThat(Util.normalizeLanguageCode("slo")).isEqualTo(Util.normalizeLanguageCode("slk")); + assertThat(Util.normalizeLanguageCode("tib")).isEqualTo(Util.normalizeLanguageCode("bod")); + assertThat(Util.normalizeLanguageCode("wel")).isEqualTo(Util.normalizeLanguageCode("cym")); + } + private static void assertEscapeUnescapeFileName(String fileName, String escapedFileName) { assertThat(escapeFileName(fileName)).isEqualTo(escapedFileName); assertThat(unescapeFileName(escapedFileName)).isEqualTo(fileName); diff --git a/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java b/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java index 095739271e..254a2b2bd1 100644 --- a/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java +++ b/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java @@ -263,7 +263,7 @@ public class HlsMasterPlaylistParserTest { Format closedCaptionFormat = playlist.muxedCaptionFormats.get(0); assertThat(closedCaptionFormat.sampleMimeType).isEqualTo(MimeTypes.APPLICATION_CEA708); assertThat(closedCaptionFormat.accessibilityChannel).isEqualTo(4); - assertThat(closedCaptionFormat.language).isEqualTo("spa"); + assertThat(closedCaptionFormat.language).isEqualTo("es"); } @Test