Restart content shell before test retries when running all tests.

If we're retrying a test, then it's because we think it might be flaky and rerunning it might provide a different result. Restarting content shell prevents state from leaking from previous tests. This CL does not apply to '--gtest_repeat'. That will be modified to use this same restart mechanism in a future CL. This CL slightly changes the implementation of "batch-size". Previously, the implementation relied on the assumption that "batch-size" never changed and restarted content shell after completion of the test. This CL makes "batch-size" a variable. In order to handle the case where "batch-size" is reduced [e.g. from 5 to 1, when 2 tests have been run in the current batch], this CL moves the restart of content shell to occur before the test is run, rather than after. Bug: 889036 Change-Id: Ibe28474033ea1fb1e67b9c4b8320bdefe95331ad Reviewed-on: https://chromium-review.googlesource.com/c/1288974Reviewed-by: Dirk Pranke <dpranke@chromium.org> Reviewed-by: Ned Nguyen <nednguyen@google.com> Commit-Queue: Erik Chen <erikchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#601989}

Restart content shell before test retries when running all tests.
If we're retrying a test, then it's because we think it might be flaky and rerunning it might provide a different result. Restarting content shell prevents state from leaking from previous tests. This CL does not apply to '--gtest_repeat'. That will be modified to use this same restart mechanism in a future CL. This CL slightly changes the implementation of "batch-size". Previously, the implementation relied on the assumption that "batch-size" never changed and restarted content shell after completion of the test. This CL makes "batch-size" a variable. In order to handle the case where "batch-size" is reduced [e.g. from 5 to 1, when 2 tests have been run in the current batch], this CL moves the restart of content shell to occur before the test is run, rather than after. Bug: 889036 Change-Id: Ibe28474033ea1fb1e67b9c4b8320bdefe95331ad Reviewed-on: https://chromium-review.googlesource.com/c/1288974Reviewed-by: Dirk Pranke <dpranke@chromium.org> Reviewed-by: Ned Nguyen <nednguyen@google.com> Commit-Queue: Erik Chen <erikchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#601989}
d3fdbbf1 · erikchen · Commit Bot · 80659f16 · d3fdbbf1 · d3fdbbf1
Commit d3fdbbf1 authored Oct 23, 2018 by erikchen Committed by Commit Bot Oct 23, 2018
4 changed files
--- a/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py
+++ b/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py
@@ -74,6 +74,15 @@ class LayoutTestRunner(object):
        self._current_run_results = None

    def run_tests(self, expectations, test_inputs, tests_to_skip, num_workers, retry_attempt):
+        # If we're retrying a test, then it's because we think it might be flaky
+        # and rerunning it might provide a different result. We must restart
+        # content shell to get a valid result, as otherwise state can leak
+        # from previous tests. To do so, we set a batch size of 1, as that
+        # prevents content shell reuse.
+        batch_size = self._options.batch_size or 0
+        if retry_attempt >= 1:
+            batch_size = 1
+
        self._expectations = expectations
        self._test_inputs = test_inputs
        self._retry_attempt = retry_attempt
@@ -96,7 +105,7 @@ class LayoutTestRunner(object):
            test_inputs,
            int(self._options.child_processes),
            self._options.fully_parallel,
-            self._options.batch_size == 1)
+            batch_size == 1)

        self._reorder_tests_by_args(locked_shards)
        self._reorder_tests_by_args(unlocked_shards)
@@ -118,13 +127,13 @@ class LayoutTestRunner(object):
        start_time = time.time()
        try:
            with message_pool.get(self, self._worker_factory, num_workers, self._port.host) as pool:
-                pool.run(('test_list', shard.name, shard.test_inputs) for shard in all_shards)
+                pool.run(('test_list', shard.name, shard.test_inputs, batch_size) for shard in all_shards)

            if self._shards_to_redo:
                num_workers -= len(self._shards_to_redo)
                if num_workers > 0:
                    with message_pool.get(self, self._worker_factory, num_workers, self._port.host) as pool:
-                        pool.run(('test_list', shard.name, shard.test_inputs) for shard in self._shards_to_redo)
+                        pool.run(('test_list', shard.name, shard.test_inputs, batch_size) for shard in self._shards_to_redo)
        except TestRunInterruptedException as error:
            _log.warning(error.reason)
            test_run_results.interrupted = True
@@ -240,7 +249,6 @@ class Worker(object):
        # The remaining fields are initialized in start()
        self._host = None
        self._port = None
-        self._batch_size = None
        self._batch_count = None
        self._filesystem = None
        self._primary_driver = None
@@ -264,12 +272,11 @@ class Worker(object):
            self._secondary_driver = self._port.create_driver(self._worker_number)

        self._batch_count = 0
-        self._batch_size = self._options.batch_size or 0

-    def handle(self, name, source, test_list_name, test_inputs):
+    def handle(self, name, source, test_list_name, test_inputs, batch_size):
        assert name == 'test_list'
        for i, test_input in enumerate(test_inputs):
-            device_failed = self._run_test(test_input, test_list_name)
+            device_failed = self._run_test(test_input, test_list_name, batch_size)
            if device_failed:
                self._caller.post('device_failed', test_list_name, test_inputs[i:])
                self._caller.stop_running()
@@ -292,13 +299,14 @@ class Worker(object):
        test_input.should_run_pixel_test_first = (
            self._port.should_run_pixel_test_first(test_input.test_name))

-    def _run_test(self, test_input, shard_name):
-        self._batch_count += 1
-
-        stop_when_done = False
-        if self._batch_size > 0 and self._batch_count >= self._batch_size:
+    def _run_test(self, test_input, shard_name, batch_size):
+        # If the batch size has been exceeded, kill the drivers.
+        if batch_size > 0 and self._batch_count >= batch_size:
+            self._kill_driver(self._primary_driver, 'primary')
+            self._kill_driver(self._secondary_driver, 'secondary')
            self._batch_count = 0
-            stop_when_done = True
+
+        self._batch_count += 1

        self._update_test_input(test_input)
        start = time.time()
@@ -307,8 +315,7 @@ class Worker(object):
        self._caller.post('started_test', test_input)
        result = single_test_runner.run_single_test(
            self._port, self._options, self._results_directory, self._name,
-            self._primary_driver, self._secondary_driver, test_input,
-            stop_when_done)
+            self._primary_driver, self._secondary_driver, test_input)

        result.shard_name = shard_name
        result.worker_name = self._name

--- a/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py
+++ b/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py
@@ -45,10 +45,10 @@ _log = logging.getLogger(__name__)

 def run_single_test(
        port, options, results_directory, worker_name, primary_driver,
-        secondary_driver, test_input, stop_when_done):
+        secondary_driver, test_input):
    runner = SingleTestRunner(
        port, options, results_directory, worker_name, primary_driver,
-        secondary_driver, test_input, stop_when_done)
+        secondary_driver, test_input)
    try:
        return runner.run()
    except DeviceFailure as error:
@@ -58,7 +58,7 @@ def run_single_test(

 class SingleTestRunner(object):
    def __init__(self, port, options, results_directory, worker_name,
-                 primary_driver, secondary_driver, test_input, stop_when_done):
+                 primary_driver, secondary_driver, test_input):
        self._port = port
        self._filesystem = port.host.filesystem
        self._options = options
@@ -71,7 +71,6 @@ class SingleTestRunner(object):
        self._should_run_pixel_test = test_input.should_run_pixel_test
        self._should_run_pixel_test_first = test_input.should_run_pixel_test_first
        self._reference_files = test_input.reference_files
-        self._stop_when_done = stop_when_done

        # If this is a virtual test that uses the default flags instead of the
        # virtual flags for it's references, run it on the secondary driver so
@@ -139,7 +138,7 @@ class SingleTestRunner(object):
        # for timeouts and crashes (real or forced by the driver). Most crashes should
        # indicate problems found by a sanitizer (ASAN, LSAN, etc.), but we will report
        # on other crashes and timeouts as well in order to detect at least *some* basic failures.
-        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
+        driver_output = self._driver.run_test(self._driver_input())
        expected_driver_output = self._expected_driver_output()
        failures = self._handle_error(driver_output)
        test_result = TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
@@ -150,7 +149,7 @@ class SingleTestRunner(object):

    def _run_compare_test(self):
        """Runs the signle test and returns test result."""
-        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
+        driver_output = self._driver.run_test(self._driver_input())
        expected_driver_output = self._expected_driver_output()

        test_result = self._compare_output(expected_driver_output, driver_output)
@@ -162,7 +161,7 @@ class SingleTestRunner(object):
        """Similar to _run_compare_test(), but has the side effect of updating or adding baselines.
        This is called when --reset-results and/or --copy-baselines are specified in the command line.
        If --reset-results, in the returned result we treat baseline mismatch as success."""
-        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
+        driver_output = self._driver.run_test(self._driver_input())
        expected_driver_output = self._expected_driver_output()
        actual_failures = self._compare_output(expected_driver_output, driver_output).failures
        failures = self._handle_error(driver_output) if self._options.reset_results else actual_failures
@@ -450,7 +449,7 @@ class SingleTestRunner(object):
        return failures

    def _run_reftest(self):
-        test_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
+        test_output = self._driver.run_test(self._driver_input())
        total_test_time = test_output.test_time
        expected_output = None
        test_result = None
@@ -485,7 +484,7 @@ class SingleTestRunner(object):
            reference_test_names.append(reference_test_name)
            driver_input = DriverInput(reference_test_name, self._timeout_ms,
                                       image_hash=test_output.image_hash, should_run_pixel_test=True, args=args)
-            expected_output = self._reference_driver.run_test(driver_input, self._stop_when_done)
+            expected_output = self._reference_driver.run_test(driver_input)
            total_test_time += expected_output.test_time
            test_result = self._compare_output_with_reference(
                expected_output, test_output, reference_filename, expectation == '!=')

--- a/third_party/blink/tools/blinkpy/web_tests/port/driver.py
+++ b/third_party/blink/tools/blinkpy/web_tests/port/driver.py
@@ -144,7 +144,7 @@ class Driver(object):
    def __del__(self):
        self.stop()

-    def run_test(self, driver_input, stop_when_done):
+    def run_test(self, driver_input):
        """Run a single test and return the results.

        Note that it is okay if a test times out or crashes. content_shell
@@ -182,10 +182,10 @@ class Driver(object):
                self._crashed_process_name = 'unknown process name'
                self._crashed_pid = 0

-        if stop_when_done or crashed or timed_out or leaked:
+        if crashed or timed_out or leaked:
            # We call stop() even if we crashed or timed out in order to get any remaining stdout/stderr output.
            # In the timeout case, we kill the hung process as well.
-            out, err = self._server_process.stop(self._port.driver_stop_timeout() if stop_when_done else 0.0)
+            out, err = self._server_process.stop(0.0)
            if out:
                text += out
            if err:

--- a/third_party/blink/tools/blinkpy/web_tests/port/test.py
+++ b/third_party/blink/tools/blinkpy/web_tests/port/test.py
@@ -564,7 +564,7 @@ class TestDriver(Driver):
        return [self._port._path_to_driver()] + \
            self._port.get_option('additional_driver_flag', []) + per_test_args

-    def run_test(self, driver_input, stop_when_done):
+    def run_test(self, driver_input):
        if not self.started:
            self.started = True
            self.pid = TestDriver.next_pid
@@ -633,9 +633,6 @@ class TestDriver(Driver):
            crash = True
            crash_log = 'reftest crash log'

-        if stop_when_done:
-            self.stop()
-
        if test.actual_checksum == driver_input.image_hash:
            image = None
        else: