Commit 5d6411a1 authored by groby's avatar groby Committed by Commit bot

[Hunspell Fuzzer] Restrict fuzzer data to valid UTF8

Hunspell cannot handle invalid UTF8. Chrome makes sure that all
data is in valid UTF8 - this CL enforces the same for the fuzzer.

BUG=none
R=mmoroz@chromium.org

Review-Url: https://codereview.chromium.org/2223603002
Cr-Commit-Position: refs/heads/master@{#414878}
parent 1333f1b9
......@@ -119,6 +119,7 @@ fuzzer_test("hunspell_fuzzer") {
]
deps = [
":hunspell",
"//base:base",
]
# This is a dictionary for the fuzzer, not a spellcheck dictionary.
......
include_rules = [
"+base",
]
......@@ -6,6 +6,8 @@
#include <stdint.h>
#include <string>
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "third_party/hunspell/src/hunspell/hunspell.hxx"
#include "third_party/hunspell/fuzz/hunspell_fuzzer_hunspell_dictionary.h"
......@@ -18,11 +20,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
sizeof(kHunspellDictionary));
std::string data_string(reinterpret_cast<const char*>(data), size);
// hunspell is not handling invalid UTF8. To avoid that, do the same thing
// Chromium does - convert to UTF16, and back to UTF8. Valid UTF8 guaranteed.
base::string16 utf16_string = base::UTF8ToUTF16(data_string);
data_string = base::UTF16ToUTF8(utf16_string);
hunspell->spell(data_string.c_str());
char** suggestions = nullptr;
int suggetion_length = hunspell->suggest(&suggestions, data_string.c_str());
hunspell->free_list(&suggestions, suggetion_length);
int suggestion_length = hunspell->suggest(&suggestions, data_string.c_str());
hunspell->free_list(&suggestions, suggestion_length);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment