ChromeVox: load all available phonetic dictionaries at build time.

This change adds a build action that decompresses all localized message files and extracts their phonetic dictionaries. These are then written to a generated .js file and loaded into ChromeVox. The getPhoneticDisambiguation API checks the generated contents when doing lookups and a TtsBackground test has been added to confirm this behavior. Tangible effect: phonetic hints are given when moving by character when language switching is active. Bug: 923068 Relnotes: N/A Change-Id: Ia10d2f2344fe543f65279bbbfb54f4c72f5ad8da Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2144405 Commit-Queue: Akihiro Ota <akihiroota@chromium.org> Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Cr-Commit-Position: refs/heads/master@{#759849}

ChromeVox: load all available phonetic dictionaries at build time.
This change adds a build action that decompresses all localized message files and extracts their phonetic dictionaries. These are then written to a generated .js file and loaded into ChromeVox. The getPhoneticDisambiguation API checks the generated contents when doing lookups and a TtsBackground test has been added to confirm this behavior. Tangible effect: phonetic hints are given when moving by character when language switching is active. Bug: 923068 Relnotes: N/A Change-Id: Ia10d2f2344fe543f65279bbbfb54f4c72f5ad8da Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2144405 Commit-Queue: Akihiro Ota <akihiroota@chromium.org> Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Cr-Commit-Position: refs/heads/master@{#759849}
f5f18cd4 · Akihiro Ota · Commit Bot · 0fbb4553 · f5f18cd4 · f5f18cd4
Commit f5f18cd4 authored Apr 16, 2020 by Akihiro Ota Committed by Commit Bot Apr 16, 2020
6 changed files
--- a/chrome/browser/resources/chromeos/accessibility/chromevox/BUILD.gn
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/BUILD.gn
@@ -140,6 +140,7 @@ chromevox_out_dir = "$root_out_dir/resources/chromeos/accessibility/chromevox"
 group("build") {
  deps = [
    ":chromevox_copied_files",
+    ":chromevox_phonetic_dictionaries_js",
    "//chrome/browser/resources/chromeos/accessibility/braille_ime:braille_ime_manifest",
    "//third_party/chromevox:chromevox_third_party_resources",
    "//third_party/liblouis",
@@ -252,6 +253,20 @@ template("generate_deps_js") {
  }
 }
+action("chromevox_phonetic_dictionaries_js") {
+  script = "tools/phonetic_dictionaries.py"
+  src_dir = [ "$root_out_dir/resources/chromeos/accessibility/_locales" ]
+  output_file = "$chromevox_out_dir/phonetic_dictionaries.js"
+  outputs = [ output_file ]
+  deps = [
+    "//chrome/browser/resources/chromeos/accessibility:accessibility_strings",
+  ]
+  args = [
+           "-o",
+           rebase_path(output_file, root_build_dir),
+         ] + rebase_path(src_dir, root_build_dir)
+}
 if (chromevox_compress_js) {
  template("compress_js") {
    run_jsbundler(target_name) {

--- a/chrome/browser/resources/chromeos/accessibility/chromevox/background/background.html
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/background/background.html
@@ -8,6 +8,9 @@
 <!-- <script src="loader.js"></script> -->
 <script src="../chromeVoxChromeBackgroundScript.js" charset="utf-8">
 </script>
+<!-- Generated at build time -->
+<script type="text/javascript" src="../phonetic_dictionaries.js"
+  charset="utf-8"></script>
 <!-- Third party -->
 <!-- Speech Rule Engine -->

--- a/chrome/browser/resources/chromeos/accessibility/chromevox/background/externs.js
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/background/externs.js
@@ -6,6 +6,7 @@
 var localStorage = {};
 var sessionStorage = {};
+var PhoneticDictionaries = {};
 /** Speech Rule Engine. See sre_browser.js. */
 var SRE = {};

--- a/chrome/browser/resources/chromeos/accessibility/chromevox/background/phonetic_data.js
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/background/phonetic_data.js
@@ -12,58 +12,43 @@ goog.provide('PhoneticData');
 goog.require('JaPhoneticData');
-/**
- * Maps languages to their phonetic maps.
- * @type {Object<string,Object<string,string>>}
- * @private
- */
-PhoneticData.phoneticMap_ = {};
 /**
 * Initialization function for PhoneticData.
 */
 PhoneticData.init = function() {
  JaPhoneticData.init();
-  try {
-    // The UI language of the browser. This corresponds to the system language
-    // set by the user. Behind the scenes, the getUIlanguage() API retrieves the
-    // locale that was passed from the browser to the renderer via the --lang
-    // command line flag.
-    const browserUILanguage = chrome.i18n.getUILanguage().toLowerCase();
-    // Phonetic disambiguation data for the browserUI language.
-    // This is loaded from a chromevox_strings_*.xtb file, where * is a variable
-    // language code that corresponds to the system language.
-    const browserUILanguagePhoneticMap = /** @type {Object<string,string>} */
-        (JSON.parse(Msgs.getMsg('phonetic_map')));
-    PhoneticData.phoneticMap_[browserUILanguage] = browserUILanguagePhoneticMap;
-  } catch (e) {
-    console.log('Error: unable to parse phonetic map message.');
-  }
-  PhoneticData.phoneticMap_['ja'] = JaPhoneticData.phoneticMap_;
 };
 /**
- * Returns the phonetic disambiguation for the provided character in the
+ * Returns the phonetic disambiguation for |character| in |locale|.
- * provided language. Returns empty string if disambiguation can't be found.
+ * Returns empty string if disambiguation can't be found.
- * @param {string} language
+ * @param {string} locale
 * @param {string} character
 * @return {string}
 */
-PhoneticData.getPhoneticDisambiguation = function(language, character) {
+PhoneticData.getPhoneticDisambiguation = function(locale, character) {
-  if (!language || !character) {
+  const phoneticDictionaries =
+      chrome.extension.getBackgroundPage().PhoneticDictionaries;
+  if (!locale || !character || !phoneticDictionaries ||
+      !phoneticDictionaries.phoneticMap_) {
    return '';
  }
-  language = language.toLowerCase();
+  locale = locale.toLowerCase();
  character = character.toLowerCase();
-  // If language isn't in the map, try stripping extra information, such as the
+  let map = null;
-  // country and/or script codes (e.g. "en-us" or "zh-hant-hk") and use only the
+  if (locale === 'ja') {
-  // language code to do a lookup.
+    map = JaPhoneticData.phoneticMap_;
-  if (!PhoneticData.phoneticMap_[language]) {
+  } else {
-    language = language.split('-')[0];
+    // Try a lookup using |locale|, but use only the language component if the
+    // lookup fails, e.g. "en-us" -> "en" or "zh-hant-hk" -> "zh".
+    map = phoneticDictionaries.phoneticMap_[locale] ||
+        phoneticDictionaries.phoneticMap_[locale.split('-')[0]];
  }
-  // If language still isn't in the map, return empty string.
-  if (!PhoneticData.phoneticMap_[language]) {
+  if (!map) {
    return '';
  }
-  return PhoneticData.phoneticMap_[language][character] || '';
+  return map[character] || '';
 };
--- a/chrome/browser/resources/chromeos/accessibility/chromevox/common/tts_background_test.js
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/common/tts_background_test.js
@@ -248,3 +248,46 @@ SYNC_TEST_F('ChromeVoxTtsBackgroundTest', 'SplitUntilSmall', function() {
  assertEqualsJSON(['a'], split('a', 'a'));
  assertEqualsJSON(['a'], split('a', 'b'));
 });
+SYNC_TEST_F('ChromeVoxTtsBackgroundTest', 'Phonetics', function() {
+  const tts = new TtsBackground(false);
+  let spokenStrings = [];
+  tts.speakUsingQueue_ = (utterance, ...rest) => {
+    spokenStrings.push(utterance.textString);
+  };
+  // English.
+  tts.speak('t', QueueMode.QUEUE, {lang: 'en-us', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('T'));
+  assertTrue(spokenStrings.includes('tango'));
+  tts.speak('a', QueueMode.QUEUE, {lang: 'en-us', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('A'));
+  assertTrue(spokenStrings.includes('alpha'));
+  spokenStrings = [];
+  // German.
+  tts.speak('t', QueueMode.QUEUE, {lang: 'de', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('T'));
+  assertTrue(spokenStrings.includes('Theodor'));
+  tts.speak('a', QueueMode.QUEUE, {lang: 'de', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('A'));
+  assertTrue(spokenStrings.includes('Anton'));
+  spokenStrings = [];
+  // Japanese.
+  tts.speak('t', QueueMode.QUEUE, {lang: 'ja', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('T'));
+  assertTrue(spokenStrings.includes('ティー タイム'));
+  tts.speak('a', QueueMode.QUEUE, {lang: 'ja', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('A'));
+  assertTrue(spokenStrings.includes('エイ アニマル'));
+  tts.speak('人', QueueMode.QUEUE, {lang: 'ja', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('人'));
+  assertTrue(spokenStrings.includes('ヒト，ニンゲン ノ ニン'));
+  spokenStrings = [];
+  // Error handling.
+  tts.speak('t', QueueMode.QUEUE, {lang: 'qwerty', phoneticCharacters: true});
+  assertTrue(spokenStrings.includes('T'));
+  assertEquals(1, spokenStrings.length);
+});
--- a/chrome/browser/resources/chromeos/accessibility/chromevox/tools/phonetic_dictionaries.py
+++ b/chrome/browser/resources/chromeos/accessibility/chromevox/tools/phonetic_dictionaries.py
+#!/usr/bin/env python
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+'''Generates phonetic_dictionaries.js'''
+import gzip
+import json
+import optparse
+import os
+import sys
+HEADER = '''goog.provide('PhoneticDictionaries');
+PhoneticDictionaries.phoneticMap_ = {
+'''
+CONTENT_TEMPLATE = '''"%(locale)s": %(data)s,
+'''
+FOOTER = '''};
+'''
+def quit(message):
+  '''Prints an error message and exit the program.'''
+  sys.stderr.write(message + '\n')
+  sys.exit(1)
+def open_file(filename):
+    if filename.endswith('.gz'):
+        return gzip.open(filename)
+    return open(filename)
+def main():
+    # Parse input.
+    parser = optparse.OptionParser(description=__doc__)
+    parser.add_option(
+      '-o',
+      '--output_file',
+      action='store',
+      metavar='SPEC',
+      help=('Where to output the generated file.'))
+    options, args = parser.parse_args()
+    if options.output_file is None:
+        quit('Output file not specified')
+    if len(args) != 1:
+        quit('Exactly one input directory must be specified')
+    dir_name = args[0]
+    out_file = options.output_file
+    output = HEADER
+    # Extract phonetic dictionaries from all compressed message files and write
+    # them to a .js file.
+    for locale in os.listdir(dir_name):
+        locale_dir = os.path.join(dir_name, locale)
+        if not os.path.isdir(locale_dir):
+            continue
+        files = os.listdir(locale_dir)
+        if not len(files) == 1:
+            continue
+        file = files[0]
+        file_path = os.path.join(locale_dir,file)
+        with open_file(file_path) as in_file:
+            contents = json.loads(in_file.read().strip())
+            try:
+                test = json.loads(contents['CHROMEVOX_PHONETIC_MAP']['message'])
+                if sys.version_info >= (3,0):
+                    data = contents['CHROMEVOX_PHONETIC_MAP']['message']
+                else:
+                    # Need to encode utf8 if running python 2.
+                    data = (contents['CHROMEVOX_PHONETIC_MAP']['message']
+                    .encode('utf-8'))
+                locale = locale.replace('_', '-').lower()
+                output += CONTENT_TEMPLATE % {'locale': locale, 'data': data}
+            except ValueError as e:
+                continue
+    # Write to file.
+    with open(out_file, 'w') as dest_file:
+        dest_file.write(output + FOOTER)
+if __name__ == '__main__':
+    main()