Commit 48f74546 authored by Clemens Backes's avatar Clemens Backes Committed by Chromium LUCI CQ

[v8][inspector][fuzzer] Use script to generate corpus

We currently use the existing v8 inspector tests as seed for the
inspector fuzzer. They don't work well for a couple of reasons though:

1) The inspector tests use utilities defined in the 'protocol-test.js'
   file, which is preloaded automatically by the inspector test runner.
   That file is part of the corpus, so the fuzzer might get lucky and
   combine 'protocol-test.js' with another corpus file, but in most
   cases it won't.
2) The inspector fuzzer does not provide all utils that the
   inspector-test executable provides. In particular, file access is
   prohibited, i.e. `utils.read` and `utils.load` are not available.
3) Some tests load more files as prerequisite, e.g. all wasm tests load
   the 'wasm-inspector-test.js' file which defines wasm-specific
   utilities.
4) The current corpus also includes the '*-expected.txt' files, which is
   not a big problem, but adds files to the corpus which are not
   interesting for the fuzzer.

This CL fixes all these issues by generating the corpus via a small
python script. The script scans the v8/test/inspector directory for
'*.js' files, and generates a respective fuzzer corpus file in the
output directly. This file includes a copy of the 'protocol-test.js'
file, and all `utils.load` calls are resolved as well.

R=mbarbella@chromium.org
CC=machenbach@chromium.org, szuend@chromium.org

Bug: chromium:1142437
Change-Id: I1f15182b7afcc7741c9daf96af8e734af3325f54
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2563552Reviewed-by: default avatarMartin Barbella <mbarbella@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#831926}
parent c961a1a1
......@@ -137,6 +137,18 @@ fuzzer_test("v8_script_parser_fuzzer") {
environment_variables = [ "AFL_DRIVER_DONT_DEFER=1" ]
}
v8_inspector_fuzzer_corpus_dir = "$target_gen_dir/v8_inspector_fuzzer_corpus"
action("generate_v8_inspector_fuzzer_corpus") {
script = "generate_v8_inspector_fuzzer_corpus.py"
sources = [ "generate_v8_inspector_fuzzer_corpus.py" ]
args = [
rebase_path("//v8/test/inspector/", root_build_dir),
rebase_path(v8_inspector_fuzzer_corpus_dir, root_build_dir),
]
outputs = [ v8_inspector_fuzzer_corpus_dir ]
}
fuzzer_test("v8_inspector_fuzzer") {
sources = []
deps = [ "//v8:inspector_fuzzer" ]
......@@ -150,7 +162,8 @@ fuzzer_test("v8_inspector_fuzzer") {
"handle_segv=1",
]
dict = "dicts/generated/javascript.dict"
seed_corpus = "//v8/test/inspector/"
seed_corpus = v8_inspector_fuzzer_corpus_dir
seed_corpus_deps = [ ":generate_v8_inspector_fuzzer_corpus" ]
# The fuzzer is able to handle any input, but since the input is interpreted
# as JS code, restricting to ascii only will increase fuzzing efficiency.
......
#!/usr/bin/env python
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import shutil
import sys
load_regexp = re.compile(r'^\s*utils\.load\([\'"]([^\'"]+)[\'"]\);\s*$')
load_root = None
def resolve_loads(output_file, input_lines, loaded_files):
for line in input_lines:
load_match = load_regexp.match(line)
if not load_match:
output_file.write(line)
continue
load_file(output_file, load_match.group(1), loaded_files)
def load_file(output_file, input_file, loaded_files):
if input_file in loaded_files:
sys.exit('Recursive load of \'{}\''.format(input_file))
loaded_files.add(input_file)
output_file.write('\n// Loaded from \'{}\':\n'.format(input_file))
with open(os.path.join(load_root, input_file)) as input_file:
resolve_loads(output_file, input_file.readlines(), loaded_files)
def generate_content(output_file, input_file):
# The fuzzer does not provide the same methods on 'utils' as the
# inspector-test executable. Thus mock out non-existing ones via a proxy.
output_file.write("""
utils = new Proxy(utils, {
get: function(target, prop) {
if (prop in target) return target[prop];
return i=>i;
}
});
""".lstrip())
# Always prepend the 'protocol-test.js' file, which is always loaded first
# by the test runner for inspector tests.
protocol_test_file = os.path.join('test', 'inspector', 'protocol-test.js')
load_file(output_file, protocol_test_file, set())
# Then load the actual input file, inlining all recursively loaded files.
load_file(output_file, input_file, set())
def main():
if len(sys.argv) != 3:
print(
'Usage: {} <path to input directory> <path to output directory>'.format(
sys.argv[0]))
sys.exit(1)
input_root = sys.argv[1]
output_root = sys.argv[2]
# Start with a clean output directory.
if os.path.exists(output_root):
shutil.rmtree(output_root)
os.makedirs(output_root)
# Loaded files are relative to the v8 root, which is two levels above the
# inspector test directory.
global load_root
load_root = os.path.dirname(os.path.dirname(os.path.normpath(input_root)))
for parent, _, files in os.walk(input_root):
for filename in files:
if filename.endswith('.js'):
output_file = os.path.join(output_root, filename)
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
os.makedirs(os.path.dirname(output_file))
with open(output_file, 'w') as output_file:
abs_input_file = os.path.join(parent, filename)
rel_input_file = os.path.relpath(abs_input_file, load_root)
generate_content(output_file, rel_input_file)
# Done.
sys.exit(0)
if __name__ == '__main__':
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment