Switch language normalization to 2-letter language codes.

2-letter codes (ISO 639-1) are the standard Android normalization and thus we should prefer them to 3-letter codes (although both are technically allowed according the BCP47). This helps in two ways: 1. It simplifies app interaction with our normalized language codes as the Locale class makes it easy to convert a 2-letter to a 3-letter code but not the other way round. 2. It better normalizes codes on API<21 where we previously had issues with language+country codes (see tests). 3. It allows us to normalize both ISO 639-2/T and ISO 639-2/B codes to the same language. PiperOrigin-RevId: 258729728
2026-04-27 15:07:40 +00:00 · 2019-07-18 10:08:19 +01:00 · 2019-07-18 10:08:19 +01:00 · f82920926d
commit f82920926d
parent e181d4bd35
5 changed files with 114 additions and 26 deletions
--- a/RELEASENOTES.md
+++ b/RELEASENOTES.md
@ -8,6 +8,8 @@
 * Fix issue where initial seek positions get ignored when playing a preroll ad.
 * Fix `DataSchemeDataSource` re-opening and range requests
  ([#6192](https://github.com/google/ExoPlayer/issues/6192)).
+* Switch normalized BCP-47 language codes to use 2-letter ISO 639-1 language
+  tags instead of 3-letter ISO 639-2 language tags.

 ### 2.10.3 ###

--- a/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/trackselection/DefaultTrackSelector.java
@ -2318,14 +2318,14 @@ public class DefaultTrackSelector extends MappingTrackSelector {
    if (TextUtils.equals(format.language, language)) {
      return 3;
    }
-    // Partial match where one language is a subset of the other (e.g. "zho-hans" and "zho-hans-hk")
+    // Partial match where one language is a subset of the other (e.g. "zh-hans" and "zh-hans-hk")
    if (format.language.startsWith(language) || language.startsWith(format.language)) {
      return 2;
    }
-    // Partial match where only the main language tag is the same (e.g. "fra-fr" and "fra-ca")
-    if (format.language.length() >= 3
-        && language.length() >= 3
-        && format.language.substring(0, 3).equals(language.substring(0, 3))) {
+    // Partial match where only the main language tag is the same (e.g. "fr-fr" and "fr-ca")
+    String formatMainLanguage = Util.splitAtFirst(format.language, "-")[0];
+    String queryMainLanguage = Util.splitAtFirst(language, "-")[0];
+    if (formatMainLanguage.equals(queryMainLanguage)) {
      return 1;
    }
    return 0;
--- a/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/util/Util.java
@ -71,6 +71,7 @@ import java.util.Calendar;
 import java.util.Collections;
 import java.util.Formatter;
 import java.util.GregorianCalendar;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.MissingResourceException;
@ -135,6 +136,10 @@ public final class Util {
          + "(T(([0-9]*)H)?(([0-9]*)M)?(([0-9.]*)S)?)?$");
  private static final Pattern ESCAPED_CHARACTER_PATTERN = Pattern.compile("%([A-Fa-f0-9]{2})");

+  // Android standardizes to ISO 639-1 2-letter codes and provides no way to map a 3-letter
+  // ISO 639-2 code back to the corresponding 2-letter code.
+  @Nullable private static HashMap<String, String> languageTagIso3ToIso2;
+
  private Util() {}

  /**
@ -450,18 +455,25 @@ public final class Util {
    if (language == null) {
      return null;
    }
-    try {
-      Locale locale = getLocaleForLanguageTag(language);
-      int localeLanguageLength = locale.getLanguage().length();
-      String normLanguage = locale.getISO3Language();
-      if (normLanguage.isEmpty()) {
-        return toLowerInvariant(language);
-      }
-      String normTag = getLocaleLanguageTag(locale);
-      return toLowerInvariant(normLanguage + normTag.substring(localeLanguageLength));
-    } catch (MissingResourceException e) {
+    Locale locale = getLocaleForLanguageTag(language);
+    String localeLanguage = locale.getLanguage();
+    int localeLanguageLength = localeLanguage.length();
+    if (localeLanguageLength == 0) {
+      // Return original language for invalid language tags.
      return toLowerInvariant(language);
+    } else if (localeLanguageLength == 3) {
+      // Locale.toLanguageTag will ensure a normalized well-formed output. However, 3-letter
+      // ISO 639-2 language codes will not be converted to 2-letter ISO 639-1 codes automatically.
+      if (languageTagIso3ToIso2 == null) {
+        languageTagIso3ToIso2 = createIso3ToIso2Map();
+      }
+      String iso2Language = languageTagIso3ToIso2.get(localeLanguage);
+      if (iso2Language != null) {
+        localeLanguage = iso2Language;
+      }
    }
+    String normTag = getLocaleLanguageTag(locale);
+    return toLowerInvariant(localeLanguage + normTag.substring(localeLanguageLength));
  }

  /**
@ -2013,6 +2025,54 @@ public final class Util {
    }
  }

+  private static HashMap<String, String> createIso3ToIso2Map() {
+    String[] iso2Languages = Locale.getISOLanguages();
+    HashMap<String, String> iso3ToIso2 =
+        new HashMap<>(
+            /* initialCapacity= */ iso2Languages.length + iso3BibliographicalToIso2.length);
+    for (String iso2 : iso2Languages) {
+      try {
+        // This returns the ISO 639-2/T code for the language.
+        String iso3 = new Locale(iso2).getISO3Language();
+        if (!TextUtils.isEmpty(iso3)) {
+          iso3ToIso2.put(iso3, iso2);
+        }
+      } catch (MissingResourceException e) {
+        // Shouldn't happen for list of known languages, but we don't want to throw either.
+      }
+    }
+    // Add additional ISO 639-2/B codes to mapping.
+    for (int i = 0; i < iso3BibliographicalToIso2.length; i += 2) {
+      iso3ToIso2.put(iso3BibliographicalToIso2[i], iso3BibliographicalToIso2[i + 1]);
+    }
+    return iso3ToIso2;
+  }
+
+  // See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes.
+  private static final String[] iso3BibliographicalToIso2 =
+      new String[] {
+        "alb", "sq",
+        "arm", "hy",
+        "baq", "eu",
+        "bur", "my",
+        "tib", "bo",
+        "chi", "zh",
+        "cze", "cs",
+        "dut", "nl",
+        "ger", "de",
+        "gre", "el",
+        "fre", "fr",
+        "geo", "ka",
+        "ice", "is",
+        "mac", "mk",
+        "mao", "mi",
+        "may", "ms",
+        "per", "fa",
+        "rum", "ro",
+        "slo", "sk",
+        "wel", "cy"
+      };
+
  /**
   * Allows the CRC calculation to be done byte by byte instead of bit per bit being the order
   * "most significant bit first".
--- a/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java
+++ b/library/core/src/test/java/com/google/android/exoplayer2/util/UtilTest.java
@ -268,14 +268,15 @@ public class UtilTest {
  @Test
  @Config(sdk = 21)
  public void testNormalizeLanguageCodeV21() {
-    assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa");
-    assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa");
-    assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("spa-ar");
-    assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("spa-ar");
-    assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("spa-ar-dialect");
-    assertThat(Util.normalizeLanguageCode("es-419")).isEqualTo("spa-419");
-    assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zho-hans-tw");
-    assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zho-tw");
+    assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es");
+    assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es");
+    assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar");
+    assertThat(Util.normalizeLanguageCode("SpA-ar")).isEqualTo("es-ar");
+    assertThat(Util.normalizeLanguageCode("es-AR-dialect")).isEqualTo("es-ar-dialect");
+    assertThat(Util.normalizeLanguageCode("ES-419")).isEqualTo("es-419");
+    assertThat(Util.normalizeLanguageCode("zh-hans-tw")).isEqualTo("zh-hans-tw");
+    assertThat(Util.normalizeLanguageCode("zh-tw-hans")).isEqualTo("zh-tw");
+    assertThat(Util.normalizeLanguageCode("zho-hans-tw")).isEqualTo("zh-hans-tw");
    assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und");
    assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist");
  }
@ -283,13 +284,38 @@ public class UtilTest {
  @Test
  @Config(sdk = 16)
  public void testNormalizeLanguageCode() {
-    assertThat(Util.normalizeLanguageCode("es")).isEqualTo("spa");
-    assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("spa");
+    assertThat(Util.normalizeLanguageCode("es")).isEqualTo("es");
+    assertThat(Util.normalizeLanguageCode("spa")).isEqualTo("es");
    assertThat(Util.normalizeLanguageCode("es-AR")).isEqualTo("es-ar");
    assertThat(Util.normalizeLanguageCode("und")).isEqualTo("und");
    assertThat(Util.normalizeLanguageCode("DoesNotExist")).isEqualTo("doesnotexist");
  }

+  @Test
+  public void testNormalizeIso6392BibliographicalAndTextualCodes() {
+    // See https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes.
+    assertThat(Util.normalizeLanguageCode("alb")).isEqualTo(Util.normalizeLanguageCode("sqi"));
+    assertThat(Util.normalizeLanguageCode("arm")).isEqualTo(Util.normalizeLanguageCode("hye"));
+    assertThat(Util.normalizeLanguageCode("baq")).isEqualTo(Util.normalizeLanguageCode("eus"));
+    assertThat(Util.normalizeLanguageCode("bur")).isEqualTo(Util.normalizeLanguageCode("mya"));
+    assertThat(Util.normalizeLanguageCode("chi")).isEqualTo(Util.normalizeLanguageCode("zho"));
+    assertThat(Util.normalizeLanguageCode("cze")).isEqualTo(Util.normalizeLanguageCode("ces"));
+    assertThat(Util.normalizeLanguageCode("dut")).isEqualTo(Util.normalizeLanguageCode("nld"));
+    assertThat(Util.normalizeLanguageCode("fre")).isEqualTo(Util.normalizeLanguageCode("fra"));
+    assertThat(Util.normalizeLanguageCode("geo")).isEqualTo(Util.normalizeLanguageCode("kat"));
+    assertThat(Util.normalizeLanguageCode("ger")).isEqualTo(Util.normalizeLanguageCode("deu"));
+    assertThat(Util.normalizeLanguageCode("gre")).isEqualTo(Util.normalizeLanguageCode("ell"));
+    assertThat(Util.normalizeLanguageCode("ice")).isEqualTo(Util.normalizeLanguageCode("isl"));
+    assertThat(Util.normalizeLanguageCode("mac")).isEqualTo(Util.normalizeLanguageCode("mkd"));
+    assertThat(Util.normalizeLanguageCode("mao")).isEqualTo(Util.normalizeLanguageCode("mri"));
+    assertThat(Util.normalizeLanguageCode("may")).isEqualTo(Util.normalizeLanguageCode("msa"));
+    assertThat(Util.normalizeLanguageCode("per")).isEqualTo(Util.normalizeLanguageCode("fas"));
+    assertThat(Util.normalizeLanguageCode("rum")).isEqualTo(Util.normalizeLanguageCode("ron"));
+    assertThat(Util.normalizeLanguageCode("slo")).isEqualTo(Util.normalizeLanguageCode("slk"));
+    assertThat(Util.normalizeLanguageCode("tib")).isEqualTo(Util.normalizeLanguageCode("bod"));
+    assertThat(Util.normalizeLanguageCode("wel")).isEqualTo(Util.normalizeLanguageCode("cym"));
+  }
+
  private static void assertEscapeUnescapeFileName(String fileName, String escapedFileName) {
    assertThat(escapeFileName(fileName)).isEqualTo(escapedFileName);
    assertThat(unescapeFileName(escapedFileName)).isEqualTo(fileName);
--- a/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java
+++ b/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/playlist/HlsMasterPlaylistParserTest.java
@ -263,7 +263,7 @@ public class HlsMasterPlaylistParserTest {
    Format closedCaptionFormat = playlist.muxedCaptionFormats.get(0);
    assertThat(closedCaptionFormat.sampleMimeType).isEqualTo(MimeTypes.APPLICATION_CEA708);
    assertThat(closedCaptionFormat.accessibilityChannel).isEqualTo(4);
-    assertThat(closedCaptionFormat.language).isEqualTo("spa");
+    assertThat(closedCaptionFormat.language).isEqualTo("es");
  }

  @Test