Split hunspell_fuzzer into separate spell and suggest fuzzers

Hunspell::suggest() is much slower than Hunspell::spell(), which results in the fuzzer running at ~1 case/sec on clusterfuzz. Splitting the fuzzer into two to test the functions separately allows spell() to be fuzzed much faster and effectively. BUG=1009087 Change-Id: I733754bcf78a84bd4448ab2e753899fea947b309 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1847596Reviewed-by: Rouslan Solomakhin <rouslan@chromium.org> Commit-Queue: Anand Mistry <amistry@chromium.org> Cr-Commit-Position: refs/heads/master@{#705447}

Split hunspell_fuzzer into separate spell and suggest fuzzers
Hunspell::suggest() is much slower than Hunspell::spell(), which results in the fuzzer running at ~1 case/sec on clusterfuzz. Splitting the fuzzer into two to test the functions separately allows spell() to be fuzzed much faster and effectively. BUG=1009087 Change-Id: I733754bcf78a84bd4448ab2e753899fea947b309 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1847596Reviewed-by: Rouslan Solomakhin <rouslan@chromium.org> Commit-Queue: Anand Mistry <amistry@chromium.org> Cr-Commit-Position: refs/heads/master@{#705447}
9e3901bb · Anand K. Mistry · Commit Bot · 1918f036 · 9e3901bb · 9e3901bb
Commit 9e3901bb authored Oct 14, 2019 by Anand K. Mistry Committed by Commit Bot Oct 14, 2019
3 changed files
--- a/third_party/hunspell/BUILD.gn
+++ b/third_party/hunspell/BUILD.gn
@@ -109,10 +109,24 @@ static_library("hunspell") {
  }
 }
-fuzzer_test("hunspell_fuzzer") {
+fuzzer_test("hunspell_spell_fuzzer") {
  sources = [
-    "fuzz/hunspell_fuzzer.cc",
    "fuzz/hunspell_fuzzer_hunspell_dictionary.h",
+    "fuzz/hunspell_spell_fuzzer.cc",
+  ]
+  deps = [
+    ":hunspell",
+    "//base:base",
+  ]
+  # This is a dictionary for the fuzzer, not a spellcheck dictionary.
+  dict = "fuzz/hunspell.dict"
+}
+fuzzer_test("hunspell_suggest_fuzzer") {
+  sources = [
+    "fuzz/hunspell_fuzzer_hunspell_dictionary.h",
+    "fuzz/hunspell_suggest_fuzzer.cc",
  ]
  deps = [
    ":hunspell",

--- a/third_party/hunspell/fuzz/hunspell_fuzzer.cc
+++ b/third_party/hunspell/fuzz/hunspell_fuzzer.cc
@@ -8,16 +8,16 @@
 #include "base/strings/string16.h"
 #include "base/strings/utf_string_conversions.h"
-#include "third_party/hunspell/src/hunspell/hunspell.hxx"
 #include "third_party/hunspell/fuzz/hunspell_fuzzer_hunspell_dictionary.h"
+#include "third_party/hunspell/src/hunspell/hunspell.hxx"
 // Entry point for LibFuzzer.
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  if (!size)
    return 0;
-  static Hunspell* hunspell = new Hunspell(kHunspellDictionary,
+  static Hunspell* hunspell =
-                                           sizeof(kHunspellDictionary));
+      new Hunspell(kHunspellDictionary, sizeof(kHunspellDictionary));
  std::string data_string(reinterpret_cast<const char*>(data), size);
@@ -28,8 +28,5 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  hunspell->spell(data_string);
-  std::vector<std::string> suggestions =
-      hunspell->suggest(data_string);
  return 0;
 }
--- a/third_party/hunspell/fuzz/hunspell_suggest_fuzzer.cc
+++ b/third_party/hunspell/fuzz/hunspell_suggest_fuzzer.cc
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
+#include "third_party/hunspell/fuzz/hunspell_fuzzer_hunspell_dictionary.h"
+#include "third_party/hunspell/src/hunspell/hunspell.hxx"
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  if (!size)
+    return 0;
+  static Hunspell* hunspell =
+      new Hunspell(kHunspellDictionary, sizeof(kHunspellDictionary));
+  std::string data_string(reinterpret_cast<const char*>(data), size);
+  // hunspell is not handling invalid UTF8. To avoid that, do the same thing
+  // Chromium does - convert to UTF16, and back to UTF8. Valid UTF8 guaranteed.
+  base::string16 utf16_string = base::UTF8ToUTF16(data_string);
+  data_string = base::UTF16ToUTF8(utf16_string);
+  std::vector<std::string> suggestions = hunspell->suggest(data_string);
+  return 0;
+}