[wptrunner/Chromium] Split out test messages into stderr

This only affects the Chromium log formatter of wptrunner. We want to produce a valid metadata and full logs separately, the former as the "-expected.txt" artifact and the latter as "-stderr.txt". Bug: 1154085 Cq-Include-Trybots: luci.chromium.try:linux-wpt-identity-fyi-rel,linux-wpt-input-fyi-rel Change-Id: I059e05005d2b5f72708c783c5c81deaa86f8b417 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2567094 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Luke Z <lpz@chromium.org> Cr-Commit-Position: refs/heads/master@{#832486}

[wptrunner/Chromium] Split out test messages into stderr
This only affects the Chromium log formatter of wptrunner. We want to produce a valid metadata and full logs separately, the former as the "-expected.txt" artifact and the latter as "-stderr.txt". Bug: 1154085 Cq-Include-Trybots: luci.chromium.try:linux-wpt-identity-fyi-rel,linux-wpt-input-fyi-rel Change-Id: I059e05005d2b5f72708c783c5c81deaa86f8b417 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2567094 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Luke Z <lpz@chromium.org> Cr-Commit-Position: refs/heads/master@{#832486}
fbec8fd5 · Robert Ma · Chromium LUCI CQ · 5fde5d40 · fbec8fd5 · fbec8fd5
Commit fbec8fd5 authored Dec 01, 2020 by Robert Ma Committed by Chromium LUCI CQ Dec 01, 2020
4 changed files
--- a/testing/scripts/wpt_common.py
+++ b/testing/scripts/wpt_common.py
@@ -96,8 +96,9 @@ class BaseWptScriptAdapter(common.BaseIsolatedScriptArgsAdapter):
        self.fs.write_text_file(self.wpt_output,
                                json.dumps(output_json))

-    def _handle_log_artifact(self, log_artifact, root_node, results_dir,
+    def _handle_text_outputs(self, actual_metadata, root_node, results_dir,
                             path_so_far):
+        """Produces actual, expected and diff text outputs."""
        # If the test passed, there are no artifacts to output. Note that if a
        # baseline file (*.ini file) exists, an actual of PASS means that the
        # test matched the baseline, not that the test itself passed. As such,
@@ -106,9 +107,9 @@ class BaseWptScriptAdapter(common.BaseIsolatedScriptArgsAdapter):
        if root_node["actual"] == "PASS":
            return

-        # Note that the log_artifact is a list of strings, so we join
+        # Note that the actual_metadata is a list of strings, so we join
        # them on new lines when writing to file.
-        actual_text = "\n".join(log_artifact)
+        actual_text = "\n".join(actual_metadata)
        actual_subpath = self._write_text_artifact(
            test_failures.FILENAME_SUFFIX_ACTUAL,
            results_dir, path_so_far, actual_text)
@@ -157,21 +158,33 @@ class BaseWptScriptAdapter(common.BaseIsolatedScriptArgsAdapter):
            # Found a leaf, process it
            if "artifacts" not in root_node:
                return
-            log_artifact = root_node["artifacts"].pop("log", None)
-            if log_artifact:
-                self._handle_log_artifact(log_artifact, root_node, results_dir,
-                                          path_so_far)

-            screenshot_artifact = root_node["artifacts"].pop("screenshots",
-                                                             None)
+            actual_metadata = root_node["artifacts"].pop(
+                "wpt_actual_metadata", None)
+            if actual_metadata:
+                self._handle_text_outputs(
+                    actual_metadata, root_node, results_dir, path_so_far)
+
+            screenshot_artifact = root_node["artifacts"].pop(
+                "screenshots", None)
            if screenshot_artifact:
                screenshot_paths_dict = self._write_screenshot_artifact(
                    results_dir, path_so_far, screenshot_artifact)
                for screenshot_key, path in screenshot_paths_dict.items():
                    root_node["artifacts"][screenshot_key] = [path]

-            crashlog_artifact = root_node["artifacts"].pop("wpt_crash_log",
-                                                           None)
+            log_artifact = root_node["artifacts"].pop("wpt_log", None)
+            if log_artifact:
+                artifact_subpath = self._write_text_artifact(
+                    test_failures.FILENAME_SUFFIX_STDERR,
+                    results_dir, path_so_far, "\n".join(log_artifact))
+                if artifact_subpath:
+                    # Required by fast/harness/results.html to show stderr.
+                    root_node["has_stderr"] = True
+                    root_node["artifacts"]["stderr"] = [artifact_subpath]
+
+            crashlog_artifact = root_node["artifacts"].pop(
+                "wpt_crash_log", None)
            if crashlog_artifact:
                # Note that the crashlog_artifact is a list of strings, so we
                # join them on new lines when writing to file.

--- a/testing/scripts/wpt_common_unittest.py
+++ b/testing/scripts/wpt_common_unittest.py
@@ -81,8 +81,8 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
        return json.loads(self.host.filesystem.read_text_file(
            OUTPUT_JSON_FILENAME))

-    def test_write_log_artifact(self):
-        # Ensure that log artifacts are written to the correct location.
+    def test_write_text_outputs(self):
+        # Ensure that text outputs are written to the correct location.

        # We only generate an actual.txt if our actual wasn't PASS, so in this
        # case we shouldn't write anything.
@@ -93,7 +93,7 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
                    'actual': 'PASS',
                    'artifacts': {
                        'wpt_actual_status': ['OK'],
-                        'log': ['test.html actual text'],
+                        'wpt_actual_metadata': ['test.html actual text'],
                    },
                },
            },
@@ -121,8 +121,8 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
        # Ensure the artifact in the json was replaced with the location of
        # the newly-created file.
        updated_json = self._load_json_output()
-        self.assertFalse(
-            "log" in updated_json["tests"]["test.html"]["artifacts"])
+        self.assertFalse("wpt_actual_metadata" in
+                         updated_json["tests"]["test.html"]["artifacts"])
        self.assertEqual(
            [actual_path],
            updated_json["tests"]["test.html"]["artifacts"]["actual_text"])
@@ -142,6 +142,36 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
            [pretty_diff_path],
            updated_json["tests"]["test.html"]["artifacts"]["pretty_text_diff"])

+    def test_write_log_artifact(self):
+        # Ensure that crash log artifacts are written to the correct location.
+        json_dict = {
+            'tests': {
+                'test.html': {
+                    'expected': 'PASS',
+                    'actual': 'FAIL',
+                    'artifacts': {
+                        'wpt_actual_status': ['ERROR'],
+                        'wpt_log': ['test.html exceptions'],
+                    },
+                },
+            },
+            'path_delimiter': '/',
+        }
+        self._create_json_output(json_dict)
+        self.wpt_adapter.do_post_test_run_tasks()
+        written_files = self.wpt_adapter.fs.written_files
+        stderr_path = os.path.join("layout-test-results", "test-stderr.txt")
+        self.assertEqual("test.html exceptions", written_files[stderr_path])
+        # Ensure the artifact in the json was replaced with the location of
+        # the newly-created file.
+        updated_json = self._load_json_output()
+        self.assertFalse(
+            "wpt_log" in updated_json["tests"]["test.html"]["artifacts"])
+        self.assertEqual(
+            [stderr_path],
+            updated_json["tests"]["test.html"]["artifacts"]["stderr"])
+        self.assertTrue(updated_json["tests"]["test.html"]["has_stderr"])
+
    def test_write_crashlog_artifact(self):
        # Ensure that crash log artifacts are written to the correct location.
        json_dict = {
@@ -225,7 +255,7 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
                    'actual': 'FAIL',
                    'artifacts': {
                        'wpt_actual_status': ['OK'],
-                        'log': ['test.html actual text'],
+                        'wpt_actual_metadata': ['test.html actual text'],
                    },
                },
            },
@@ -247,8 +277,8 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
        # Ensure the artifacts in the json were replaced with the locations of
        # the newly-created files.
        updated_json = self._load_json_output()
-        self.assertFalse(
-            "log" in updated_json["tests"]["test.html"]["artifacts"])
+        self.assertFalse("wpt_actual_metadata" in
+                         updated_json["tests"]["test.html"]["artifacts"])
        self.assertEqual(
            [actual_path],
            updated_json["tests"]["test.html"]["artifacts"]["actual_text"])
@@ -289,7 +319,7 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
                    'actual': 'FAIL',
                    'artifacts': {
                        'wpt_actual_status': ['OK'],
-                        'log': ['variant bar/abc actual text'],
+                        'wpt_actual_metadata': ['variant bar/abc actual text'],
                    },
                },
            },
@@ -316,7 +346,7 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
        # Ensure the artifacts in the json were replaced with the locations of
        # the newly-created files.
        updated_json = self._load_json_output()
-        self.assertFalse("log" in updated_json["tests"]
+        self.assertFalse("wpt_actual_metadata" in updated_json["tests"]
                         ["variant.html?foo=bar/abc"]["artifacts"])
        self.assertEqual(
            [actual_path],
@@ -343,7 +373,8 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
                    'actual': 'FAIL',
                    'artifacts': {
                        'wpt_actual_status': ['OK'],
-                        'log': ['dir/multiglob worker actual text'],
+                        'wpt_actual_metadata':
+                        ['dir/multiglob worker actual text'],
                    },
                },
            },
@@ -372,8 +403,8 @@ class BaseWptScriptAdapterTest(unittest.TestCase):
        # Ensure the artifacts in the json were replaced with the locations of
        # the newly-created files.
        updated_json = self._load_json_output()
-        self.assertFalse("log" in updated_json["tests"]
-            ["dir/multiglob.https.any.worker.html"]["artifacts"])
+        self.assertFalse("wpt_actual_metadata" in updated_json["tests"]
+                         ["dir/multiglob.https.any.worker.html"]["artifacts"])
        self.assertEqual(
            [actual_path],
            updated_json["tests"]["dir/multiglob.https.any.worker.html"]

--- a/third_party/blink/web_tests/external/wpt/tools/wptrunner/wptrunner/formatters/chromium.py
+++ b/third_party/blink/web_tests/external/wpt/tools/wptrunner/wptrunner/formatters/chromium.py
@@ -31,48 +31,55 @@ class ChromiumFormatter(base.BaseFormatter):
        # the trie and the leaf contains the dict of per-test data.
        self.tests = {}

-        # Message dictionary, keyed by test name. Value is a list of strings:
-        # see _append_test_message for the format.
+        # Two dictionaries keyed by test name. Values are lists of strings:
+        # actual metadata content and other messages, respectively.
+        # See _append_test_message for examples.
+        self.actual_metadata = defaultdict(list)
        self.messages = defaultdict(list)

        # List of tests that have failing subtests.
        self.tests_with_subtest_fails = set()

        # Browser log for the current test under execution.
-        self.test_log = []
+        # These logs are from ChromeDriver's stdout/err, so we cannot say for
+        # sure which test a message is from, but instead we correlate them based
+        # on timing.
+        self.browser_log = []

    def _append_test_message(self, test, subtest, wpt_actual_status, message):
-        """
+        r"""
        Appends the message data for a test or subtest.
+
        :param str test: the name of the test
        :param str subtest: the name of the subtest with the message. Will be
                            None if this is called for a test.
        :param str wpt_actual_status: the test status as reported by WPT
        :param str message: the string to append to the message for this test

-        Example:
-          [test_or_subtest_name]
-            expected: FAIL
-            message: some test message eg assert failure
+        Example actual_metadata of a test with a subtest:
+            "[test_name]\n  expected: OK\n"
+            "  [subtest_name]\n    expected: FAIL\n"
+
+        NOTE: throughout this function we output a key called "expected" but
+        fill it in with the actual status. This is by design. The goal of this
+        output is to look exactly like WPT's expectation metadata so that it
+        can be easily diff-ed.
+
+        Messages are appended verbatim to self.messages[test].
        """
-        # NOTE: throughout this function we output a key called "expected" but
-        # fill it in with the actual status. This is by design. The goal of this
-        # output is to look exactly like WPT's expectation metadata so that it
-        # can be easily diff-ed.
        if subtest:
            result = "  [%s]\n    expected: %s\n" % (subtest, wpt_actual_status)
+            self.actual_metadata[test].append(result)
            if message:
-                result += "    message: %s\n" % message
-            self.messages[test].append(result)
+                self.messages[test].append("%s: %s\n" % (subtest, message))
        else:
            # No subtest, so this is the top-level test. The result must be
-            # prepended to the list of messages, so that the message for the
-            # test comes before any subtest messages.
+            # prepended to the list, so that it comes before any subtest.
            test_name_last_part = test.split("/")[-1]
            result = "[%s]\n  expected: %s\n" % (test_name_last_part, wpt_actual_status)
+            self.actual_metadata[test].insert(0, result)
            if message:
-                result += "  message: %s\n" % message
-            self.messages[test].insert(0, result)
+                self.messages[test].insert(0, "Harness: %s\n" % message)

    def _append_artifact(self, cur_dict, artifact_name, artifact_value):
        """
@@ -86,13 +93,16 @@ class ChromiumFormatter(base.BaseFormatter):
            cur_dict["artifacts"] = defaultdict(list)
        cur_dict["artifacts"][artifact_name].append(artifact_value)

-    def _store_test_result(self, name, actual, expected, messages, wpt_actual,
-                           subtest_failure, reftest_screenshots):
+    def _store_test_result(self, name, actual, expected, actual_metadata,
+                           messages, wpt_actual, subtest_failure,
+                           reftest_screenshots=None):
        """
        Stores the result of a single test in |self.tests|
+
        :param str name: name of the test.
        :param str actual: actual status of the test.
        :param str expected: expected statuses of the test.
+        :param list actual_metadata: a list of metadata items.
        :param list messages: a list of test messages.
        :param str wpt_actual: actual status reported by wpt, may differ from |actual|.
        :param bool subtest_failure: whether this test failed because of subtests.
@@ -112,10 +122,12 @@ class ChromiumFormatter(base.BaseFormatter):
        if wpt_actual != actual:
            self._append_artifact(cur_dict, "wpt_actual_status", wpt_actual)
        if wpt_actual == 'CRASH':
-            for line in self.test_log:
+            for line in self.browser_log:
                self._append_artifact(cur_dict, "wpt_crash_log", line)
+        for metadata in actual_metadata:
+            self._append_artifact(cur_dict, "wpt_actual_metadata", metadata)
        for message in messages:
-            self._append_artifact(cur_dict, "log", message)
+            self._append_artifact(cur_dict, "wpt_log", message)

        # Store screenshots (if any).
        for item in reftest_screenshots or []:
@@ -230,19 +242,21 @@ class ChromiumFormatter(base.BaseFormatter):
        self._store_test_result(test_name,
                                actual_status,
                                expected_statuses,
+                                self.actual_metadata[test_name],
                                self.messages[test_name],
                                wpt_actual_status,
                                subtest_failure,
                                data.get("extra", {}).get("reftest_screenshots"))

-        # Remove the test from messages dict to avoid accumulating too many.
+        # Remove the test from dicts to avoid accumulating too many.
+        self.actual_metadata.pop(test_name)
        self.messages.pop(test_name)

        # Update the count of how many tests ran with each status.
        self.num_failures_by_status[actual_status] += 1

        # New test, new browser logs.
-        self.test_log = []
+        self.browser_log = []

    def suite_end(self, data):
        # Create the final result dictionary
@@ -259,4 +273,4 @@ class ChromiumFormatter(base.BaseFormatter):

    def process_output(self, data):
        if 'command' in data and 'chromedriver' in data['command']:
-            self.test_log.append(data['data'])
+            self.browser_log.append(data['data'])
--- a/third_party/blink/web_tests/external/wpt/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
+++ b/third_party/blink/web_tests/external/wpt/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
@@ -157,17 +157,24 @@ def test_subtest_messages(capfd):
    output_json = json.load(output)

    t1_artifacts = output_json["tests"]["t1"]["artifacts"]
-    assert t1_artifacts["log"] == [
+    assert t1_artifacts["wpt_actual_metadata"] == [
        "[t1]\n  expected: PASS\n",
-        "  [t1_a]\n    expected: FAIL\n    message: t1_a_message\n",
-        "  [t1_b]\n    expected: PASS\n    message: t1_b_message\n",
+        "  [t1_a]\n    expected: FAIL\n",
+        "  [t1_b]\n    expected: PASS\n",
+    ]
+    assert t1_artifacts["wpt_log"] == [
+        "t1_a: t1_a_message\n",
+        "t1_b: t1_b_message\n",
    ]
    assert t1_artifacts["wpt_subtest_failure"] == ["true"]
    t2_artifacts = output_json["tests"]["t2"]["artifacts"]
-    assert t2_artifacts["log"] == [
-        "[t2]\n  expected: TIMEOUT\n  message: t2_message\n",
+    assert t2_artifacts["wpt_actual_metadata"] == [
+        "[t2]\n  expected: TIMEOUT\n",
        "  [t2_a]\n    expected: PASS\n",
    ]
+    assert t2_artifacts["wpt_log"] == [
+        "Harness: t2_message\n"
+    ]
    assert "wpt_subtest_failure" not in t2_artifacts.keys()


@@ -210,11 +217,17 @@ def test_subtest_failure(capfd):

    test_obj = output_json["tests"]["t1"]
    t1_artifacts = test_obj["artifacts"]
-    assert t1_artifacts["log"] == [
-        "[t1]\n  expected: PASS\n  message: top_message\n",
-        "  [t1_a]\n    expected: FAIL\n    message: t1_a_message\n",
-        "  [t1_b]\n    expected: PASS\n    message: t1_b_message\n",
-        "  [t1_c]\n    expected: TIMEOUT\n    message: t1_c_message\n",
+    assert t1_artifacts["wpt_actual_metadata"] == [
+        "[t1]\n  expected: PASS\n",
+        "  [t1_a]\n    expected: FAIL\n",
+        "  [t1_b]\n    expected: PASS\n",
+        "  [t1_c]\n    expected: TIMEOUT\n",
+    ]
+    assert t1_artifacts["wpt_log"] == [
+        "Harness: top_message\n",
+        "t1_a: t1_a_message\n",
+        "t1_b: t1_b_message\n",
+        "t1_c: t1_c_message\n",
    ]
    assert t1_artifacts["wpt_subtest_failure"] == ["true"]
    # The status of the test in the output is a failure because subtests failed,
@@ -266,13 +279,16 @@ def test_expected_subtest_failure(capfd):
    output_json = json.load(output)

    test_obj = output_json["tests"]["t1"]
-    t1_log = test_obj["artifacts"]["log"]
-    print("Lpz t1log=%s" % t1_log)
-    assert t1_log == [
+    assert test_obj["artifacts"]["wpt_actual_metadata"] == [
        "[t1]\n  expected: OK\n",
-        "  [t1_a]\n    expected: FAIL\n    message: t1_a_message\n",
-        "  [t1_b]\n    expected: PASS\n    message: t1_b_message\n",
-        "  [t1_c]\n    expected: TIMEOUT\n    message: t1_c_message\n",
+        "  [t1_a]\n    expected: FAIL\n",
+        "  [t1_b]\n    expected: PASS\n",
+        "  [t1_c]\n    expected: TIMEOUT\n",
+    ]
+    assert test_obj["artifacts"]["wpt_log"] == [
+        "t1_a: t1_a_message\n",
+        "t1_b: t1_b_message\n",
+        "t1_c: t1_c_message\n",
    ]
    # The status of the test in the output is a pass because the subtest
    # failures were all expected.
@@ -316,9 +332,12 @@ def test_unexpected_subtest_pass(capfd):

    test_obj = output_json["tests"]["t1"]
    t1_artifacts = test_obj["artifacts"]
-    assert t1_artifacts["log"] == [
+    assert t1_artifacts["wpt_actual_metadata"] == [
        "[t1]\n  expected: PASS\n",
-        "  [t1_a]\n    expected: PASS\n    message: t1_a_message\n",
+        "  [t1_a]\n    expected: PASS\n",
+    ]
+    assert t1_artifacts["wpt_log"] == [
+        "t1_a: t1_a_message\n",
    ]
    assert t1_artifacts["wpt_subtest_failure"] == ["true"]
    # Since the subtest status is unexpected, we fail the test. But we report