tools: add tests for AffReader

This class has been around since the initial commit. This change makes it not exit(1) on errors, then adds test coverage for most of it. Bug: None Change-Id: I5c366415c34abaa6bb90ce4657f2f94c3a45a1ad Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2040211Reviewed-by: Nico Weber <thakis@chromium.org> Commit-Queue: Elly Fong-Jones <ellyjones@chromium.org> Cr-Commit-Position: refs/heads/master@{#739015}

tools: add tests for AffReader
This class has been around since the initial commit. This change makes it not exit(1) on errors, then adds test coverage for most of it. Bug: None Change-Id: I5c366415c34abaa6bb90ce4657f2f94c3a45a1ad Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2040211Reviewed-by: Nico Weber <thakis@chromium.org> Commit-Queue: Elly Fong-Jones <ellyjones@chromium.org> Cr-Commit-Position: refs/heads/master@{#739015}
d704d1f2 · Elly Fong-Jones · Commit Bot · a0ecc777 · d704d1f2 · d704d1f2
Commit d704d1f2 authored Feb 06, 2020 by Elly Fong-Jones Committed by Commit Bot Feb 06, 2020
11 changed files
--- a/chrome/test/BUILD.gn
+++ b/chrome/test/BUILD.gn
@@ -4479,6 +4479,20 @@ test("unit_tests") {
      "../tools/convert_dict/convert_dict_unittest.cc",
    ]
+    if (!is_mac) {
+      sources += [ "../tools/convert_dict/aff_reader_unittest.cc" ]
+    }
+    data += [
+      "data/convert_dict/affix-flags.aff",
+      "data/convert_dict/empty.aff",
+      "data/convert_dict/indexed-affix.aff",
+      "data/convert_dict/leading-comment.aff",
+      "data/convert_dict/other-commands.aff",
+      "data/convert_dict/prefix-suffix.aff",
+      "data/convert_dict/rep.aff",
+    ]
    if (use_renderer_spellchecker) {
      sources +=
          [ "../browser/spellchecker/spell_check_host_chrome_impl_unittest.cc" ]

--- a/chrome/test/data/convert_dict/affix-flags.aff
+++ b/chrome/test/data/convert_dict/affix-flags.aff
+# This file defines a group of affixes using the AF keyword.
+AF 3
+AF Foo
+AF Bar
+AF Foobar
--- a/chrome/test/data/convert_dict/empty.aff
+++ b/chrome/test/data/convert_dict/empty.aff
--- a/chrome/test/data/convert_dict/indexed-affix.aff
+++ b/chrome/test/data/convert_dict/indexed-affix.aff
+# This file exercises "continuation classes", whereby an affix can declare that
+# it may be followed by another affix. This file declares two affixes:
+#   "s", which may be applied to any ADJECTIVE (i.e. "wearable" -> "wearables")
+#   "able", which may be applied to any VERB (i.e. "wear" -> "wearable")
+SFX ADJECTIVE Y 1
+SFX ADJECTIVE 0 s .
+SFX VERB Y 1
+SFX VERB 0 able/ADJECTIVE .
--- a/chrome/test/data/convert_dict/leading-comment.aff
+++ b/chrome/test/data/convert_dict/leading-comment.aff
+# This file has a leading comment, containing the word 'Foobar'.
--- a/chrome/test/data/convert_dict/other-commands.aff
+++ b/chrome/test/data/convert_dict/other-commands.aff
+# This file uses commands the parser doesn't know about at all.
+FOOBAR foo bar
--- a/chrome/test/data/convert_dict/prefix-suffix.aff
+++ b/chrome/test/data/convert_dict/prefix-suffix.aff
+# This file describes two rules for an affix class named VERB:
+# * Words in affix class VERB can be prefixed with "re"
+# * Words in affix class VERB can be suffixed with:
+#    * "ed" if they do not end in "y"
+#    * "ied", deleting the trailing y, if they do end in "y"
+# Define the affix class we're going to use:
+AF 1
+AF VERB
+# The fields of PFX and SFX are either:
+#    ?FX <class> <cross> <count>
+# or ?FX <class> <remove> <add> <match> [...]
+# where the ? may be either P (for prefix) or S (for suffix).
+# The first type of line is a header, which describes how many of this type of
+# ?FX there are for this class. The second type of line describes how to
+# construct a new word using this affix:
+#   For words in <class>, if they end in <match>, you can remove <remove> from
+#   the end being considered, then add <add> at that end.
+# Where the end being considered is the beginning for PFX and the end for SFX.
+# The meaning of <cross> and of the trailing stuff that is allowed in the second
+# type of line (the morphology) is obscure to me (ellyjones@).
+PFX VERB Y  1
+PFX VERB 0 re .
+SFX VERB Y   2
+SFX VERB 0  ed [^y]
+SFX VERB y ied y
--- a/chrome/test/data/convert_dict/rep.aff
+++ b/chrome/test/data/convert_dict/rep.aff
+# This file exercises the REP syntax for replacements.
+REP 3
+REP f ph
+REP ph f
+REP ^alot$ a_lot
--- a/chrome/tools/convert_dict/aff_reader.cc
+++ b/chrome/tools/convert_dict/aff_reader.cc
@@ -10,6 +10,7 @@
 #include "base/files/file_util.h"
 #include "base/i18n/icu_string_conversions.h"
+#include "base/logging.h"
 #include "base/strings/string_split.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversions.h"
@@ -47,17 +48,6 @@ void CollapseDuplicateSpaces(std::string* str) {
  }
 }
-// Print an error message and terminate execution
-void Panic(const char* fmt, ...) {
-  va_list ap;
-  printf("ERROR: ");
-  va_start(ap, fmt);
-  vprintf(fmt, ap);
-  va_end(ap);
-  printf("\n");
-  exit(1);
-}
 }  // namespace
 AffReader::AffReader(const base::FilePath& path)
@@ -130,11 +120,13 @@ bool AffReader::Read() {
               StringBeginsWith(line, "MAP ")) {
      HandleEncodedCommand(line);
    } else if (StringBeginsWith(line, "IGNORE ")) {
-      Panic("We don't support the IGNORE command yet. This would change how "
+      LOG(FATAL)
-        "we would insert things in our lookup table.");
+          << "We don't support the IGNORE command yet. This would change how "
+             "we would insert things in our lookup table.";
    } else if (StringBeginsWith(line, "COMPLEXPREFIXES ")) {
-      Panic("We don't support the COMPLEXPREFIXES command yet. This would "
+      LOG(FATAL)
-        "mean we have to insert words backwards as well (I think)");
+          << "We don't support the COMPLEXPREFIXES command yet. This would "
+             "mean we have to insert words backwards as well (I think)";
    } else {
      // All other commands get stored in the other commands list.
      HandleRawCommand(line);
@@ -261,14 +253,12 @@ void AffReader::AddAffix(std::string* rule) {
              part.substr(slash_index + 1), " ",
              base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
          if (after_slash.size() == 0) {
-            Panic("Found 0 terms after slash in affix rule '%s', "
+            LOG(FATAL) << "Found 0 terms after slash in affix rule '" << part
-                      "but need at least 2.",
+                       << "' but need at least 2.";
-                   part.c_str());
          }
          if (after_slash.size() == 1) {
-            printf("WARNING: Found 1 term after slash in affix rule '%s', "
+            LOG(WARNING) << "Found 1 term after slash in affix rule '" << part
-                      "but expected at least 2. Adding '.'.\n",
+                         << "', but expected at least 2. Adding '.'.";
-                   part.c_str());
            after_slash.push_back(".");
          }
          // Note that we may get a third term here which is the morphological
@@ -281,10 +271,12 @@ void AffReader::AddAffix(std::string* rule) {
                                    after_slash[1].c_str());
        }
-        // Reencode from here
+        // Re-encode from here
        std::string reencoded;
-        if (!EncodingToUTF8(part, &reencoded))
+        if (!EncodingToUTF8(part, &reencoded)) {
-          Panic("Cannot encode affix rule part '%s' to utf8.", part.c_str());
+          LOG(FATAL) << "Cannot encode affix rule part '" << part
+                     << "' to utf8.";
+        }
        *rule = rule->substr(0, part_start) + reencoded;
        break;
@@ -303,13 +295,15 @@ void AffReader::AddReplacement(std::string* rule) {
  CollapseDuplicateSpaces(rule);
  std::string utf8rule;
-  if (!EncodingToUTF8(*rule, &utf8rule))
+  if (!EncodingToUTF8(*rule, &utf8rule)) {
-    Panic("Cannot encode replacement rule '%s' to utf8.", rule->c_str());
+    LOG(FATAL) << "Cannot encode replacement rule '" << rule << "' to utf8.";
+  }
  // The first space separates key and value.
  size_t space_index = utf8rule.find(' ');
-  if (space_index == std::string::npos)
+  if (space_index == std::string::npos) {
-    Panic("Did not find a space in '%s'.", utf8rule.c_str());
+    LOG(FATAL) << "Did not find a space in '" << utf8rule << "'.";
+  }
  std::vector<std::string> split;
  split.push_back(utf8rule.substr(0, space_index));
@@ -329,8 +323,9 @@ void AffReader::HandleRawCommand(const std::string& line) {
 void AffReader::HandleEncodedCommand(const std::string& line) {
  std::string utf8;
-  if (!EncodingToUTF8(line, &utf8))
+  if (!EncodingToUTF8(line, &utf8)) {
-    Panic("Cannot encode command '%s' to utf8.", line.c_str());
+    LOG(FATAL) << "Cannot encode command '" << line << "' to utf8.";
+  }
  other_commands_.push_back(utf8);
 }

--- a/chrome/tools/convert_dict/aff_reader.h
+++ b/chrome/tools/convert_dict/aff_reader.h
@@ -59,10 +59,10 @@ class AffReader {
  std::vector<std::string> GetAffixGroups() const;
 private:
-  // Command-specific handlers. These are given the string folling the
+  // Command-specific handlers. These are given the string following the
  // command. The input rule may be modified arbitrarily by the function.
  int AddAffixGroup(std::string* rule);  // Returns the new affix group ID.
-  void AddAffix(std::string* rule);  // SFX/PFX
+  void AddAffix(std::string* rule);      // SFX/PFX
  void AddReplacement(std::string* rule);
  // void HandleFlag(std::string* rule);

--- a/chrome/tools/convert_dict/aff_reader_unittest.cc
+++ b/chrome/tools/convert_dict/aff_reader_unittest.cc
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// When understanding these test cases, the nuspell docs are a useful reference:
+//   https://github.com/nuspell/nuspell/wiki/Affix-File-Format
+#include "testing/gtest/include/gtest/gtest.h"
+#include "base/files/file_path.h"
+#include "base/path_service.h"
+#include "chrome/common/chrome_paths.h"
+#include "chrome/tools/convert_dict/aff_reader.h"
+base::FilePath TestFilePath(const std::string& name) {
+  base::FilePath db_path;
+  base::PathService::Get(chrome::DIR_TEST_DATA, &db_path);
+  return db_path.AppendASCII("convert_dict").AppendASCII(name);
+}
+using convert_dict::AffReader;
+TEST(AffReaderTest, EmptyFile) {
+  AffReader reader(TestFilePath("empty.aff"));
+  EXPECT_TRUE(reader.Read());
+}
+TEST(AffReaderTest, LeadingComment) {
+  AffReader reader(TestFilePath("leading-comment.aff"));
+  EXPECT_TRUE(reader.Read());
+  EXPECT_NE(std::string::npos, reader.comments().find("Foobar"));
+}
+TEST(AffReaderTest, AffixFlags) {
+  AffReader reader(TestFilePath("affix-flags.aff"));
+  EXPECT_TRUE(reader.Read());
+  EXPECT_EQ(reader.GetAffixGroups()[0], "AF Foo");
+  EXPECT_EQ(reader.GetAffixGroups()[1], "AF Bar");
+  EXPECT_EQ(reader.GetAffixGroups()[2], "AF Foobar");
+}
+TEST(AffReaderTest, PrefixSuffix) {
+  AffReader reader(TestFilePath("prefix-suffix.aff"));
+  EXPECT_TRUE(reader.Read());
+  EXPECT_EQ(reader.affix_rules()[0], "PFX VERB Y 1");
+  EXPECT_EQ(reader.affix_rules()[1], "PFX VERB 0 re .");
+}
+TEST(AffReaderTest, IndexedAffix) {
+  AffReader reader(TestFilePath("indexed-affix.aff"));
+  EXPECT_TRUE(reader.Read());
+  // The class name ("ADJECTIVE" in the input) should have been converted into
+  // an index in the AffReader's internal class table.
+  EXPECT_EQ(reader.affix_rules()[3], "SFX VERB 0 able/1 .");
+}
+TEST(AffReaderTest, Rep) {
+  AffReader reader(TestFilePath("rep.aff"));
+  EXPECT_TRUE(reader.Read());
+  EXPECT_EQ(reader.replacements()[0].first, "f");
+  EXPECT_EQ(reader.replacements()[0].second, "ph");
+  EXPECT_EQ(reader.replacements()[2].first, "^alot$");
+  // The "_" in the input should have been converted to a space - this is how
+  // multi-word suggestions are represented in AFF files.
+  EXPECT_EQ(reader.replacements()[2].second, "a lot");
+}
+TEST(AffReaderTest, OtherCommands) {
+  AffReader reader(TestFilePath("other-commands.aff"));
+  EXPECT_TRUE(reader.Read());
+  EXPECT_EQ(reader.other_commands()[0], "FOOBAR foo bar");
+}