Pixel tests framework do not make gerrit comments for flaky tests

Currently, if a pixel test is flaky(failing->passing) on CQ, Skia Gold will send out a gerrit comment about test failure. We don't want that since the test finally passed. The comment should only get sent out when the test is deterministic failure. But on Skia Gold side, there's no way to know how many retry we have. To resolve this, we need the test to know if this is the last retry. If it is and the test is failing, the test can tell Gold to send out gerrit comment. Bug: 1139468 Change-Id: I68f9f47afc444227b285d6eae14c96854c79b62c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2486391Reviewed-by: Nico Weber <thakis@chromium.org> Reviewed-by: Brian Sheedy <bsheedy@chromium.org> Commit-Queue: Sven Zheng <svenzheng@chromium.org> Cr-Commit-Position: refs/heads/master@{#821868}

Pixel tests framework do not make gerrit comments for flaky tests
Currently, if a pixel test is flaky(failing->passing) on CQ, Skia Gold will send out a gerrit comment about test failure. We don't want that since the test finally passed. The comment should only get sent out when the test is deterministic failure. But on Skia Gold side, there's no way to know how many retry we have. To resolve this, we need the test to know if this is the last retry. If it is and the test is failing, the test can tell Gold to send out gerrit comment. Bug: 1139468 Change-Id: I68f9f47afc444227b285d6eae14c96854c79b62c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2486391Reviewed-by: Nico Weber <thakis@chromium.org> Reviewed-by: Brian Sheedy <bsheedy@chromium.org> Commit-Queue: Sven Zheng <svenzheng@chromium.org> Cr-Commit-Position: refs/heads/master@{#821868}
914494ce · Sven Zheng · Commit Bot · 7c83db4f · 914494ce · 914494ce
Commit 914494ce authored Oct 28, 2020 by Sven Zheng Committed by Commit Bot Oct 28, 2020
7 changed files
--- a/base/test/launcher/test_launcher.cc
+++ b/base/test/launcher/test_launcher.cc
@@ -252,7 +252,8 @@ bool BotModeEnabled(const CommandLine* command_line) {
 // Returns command line command line after gtest-specific processing
 // and applying |wrapper|.
 CommandLine PrepareCommandLineForGTest(const CommandLine& command_line,
-                                       const std::string& wrapper) {
+                                       const std::string& wrapper,
+                                       const size_t retries_left) {
  CommandLine new_command_line(command_line.GetProgram());
  CommandLine::SwitchMap switches = command_line.GetSwitches();
@@ -263,6 +264,16 @@ CommandLine PrepareCommandLineForGTest(const CommandLine& command_line,
  // Don't try to write the final XML report in child processes.
  switches.erase(kGTestOutputFlag);
+  if (switches.find(switches::kTestLauncherRetriesLeft) == switches.end()) {
+    switches[switches::kTestLauncherRetriesLeft] =
+#if defined(OS_WIN)
+        base::NumberToWString(
+#else
+        base::NumberToString(
+#endif
+            retries_left);
+  }
  for (CommandLine::SwitchMap::const_iterator iter = switches.begin();
       iter != switches.end(); ++iter) {
    new_command_line.AppendSwitchNative((*iter).first, (*iter).second);
@@ -885,6 +896,7 @@ TestLauncher::TestLauncher(TestLauncherDelegate* launcher_delegate,
      test_finished_count_(0),
      test_success_count_(0),
      test_broken_count_(0),
+      retries_left_(0),
      retry_limit_(retry_limit),
      force_run_broken_tests_(false),
      watchdog_timer_(FROM_HERE,
@@ -978,8 +990,8 @@ void TestLauncher::LaunchChildGTestProcess(
      test_names, task_temp_dir, &result_file);
  // Record the exact command line used to launch the child.
-  CommandLine new_command_line(
+  CommandLine new_command_line(PrepareCommandLineForGTest(
-      PrepareCommandLineForGTest(cmd_line, launcher_delegate_->GetWrapper()));
+      cmd_line, launcher_delegate_->GetWrapper(), retries_left_));
  LaunchOptions options;
  options.flags = launcher_delegate_->GetLaunchOptions();
@@ -1368,6 +1380,7 @@ bool TestLauncher::Init(CommandLine* command_line) {
    retry_limit_ = 0U;
  }
+  retries_left_ = retry_limit_;
  force_run_broken_tests_ =
      command_line->HasSwitch(switches::kTestLauncherForceRunBrokenTests);
@@ -1810,9 +1823,7 @@ void TestLauncher::PrintFuzzyMatchingTestNames() {
 }
 bool TestLauncher::RunRetryTests() {
-  // Number of retries in this iteration.
+  while (!tests_to_retry_.empty() && retries_left_ > 0) {
-  size_t retry_count = 0;
-  while (!tests_to_retry_.empty() && retry_count < retry_limit_) {
    // Retry all tests that depend on a failing test.
    std::vector<std::string> test_names;
    for (const TestInfo& test_info : tests_) {
@@ -1829,12 +1840,12 @@ bool TestLauncher::RunRetryTests() {
      return false;
    fprintf(stdout, "Retrying %zu test%s (retry #%zu)\n", retry_started_count,
-            retry_started_count > 1 ? "s" : "", retry_count);
+            retry_started_count > 1 ? "s" : "", retry_limit_ - retries_left_);
    fflush(stdout);
+    --retries_left_;
    TestRunner test_runner(this);
    test_runner.Run(test_names);
-    retry_count++;
  }
  return tests_to_retry_.empty();
 }

--- a/base/test/launcher/test_launcher.h
+++ b/base/test/launcher/test_launcher.h
@@ -277,6 +277,9 @@ class TestLauncher {
  // likely indicating a more systemic problem if widespread.
  size_t test_broken_count_;
+  // How many retries are left.
+  size_t retries_left_;
  // Maximum number of retries per iteration.
  size_t retry_limit_;

--- a/base/test/test_switches.cc
+++ b/base/test/test_switches.cc
@@ -68,6 +68,11 @@ const char switches::kTestLauncherPrintTestStdio[] =
 const char switches::kTestLauncherPrintWritablePath[] =
    "test-launcher-print-writable-path";
+// Indicate how many retries are left. Tests in general should not pass in this
+// flag. This flag is used for launcher to pass retries-left information
+// to the runner process.
+const char switches::kTestLauncherRetriesLeft[] = "test-launcher-retries-left";
 // These two flags has the same effect, but don't use them at the same time.
 // And isolated-script-test-launcher-retry-limit is preferred in the future.
 // Maximum number of times to retry a test after failure.

--- a/base/test/test_switches.h
+++ b/base/test/test_switches.h
@@ -27,6 +27,7 @@ extern const char kTestLauncherOutput[];
 extern const char kTestLauncherPrintTempLeaks[];
 extern const char kTestLauncherPrintTestStdio[];
 extern const char kTestLauncherPrintWritablePath[];
+extern const char kTestLauncherRetriesLeft[];
 extern const char kTestLauncherRetryLimit[];
 extern const char kTestLauncherShardIndex[];
 extern const char kTestLauncherSummaryOutput[];

--- a/ui/base/BUILD.gn
+++ b/ui/base/BUILD.gn
@@ -640,6 +640,7 @@ if (is_win || is_mac || (is_linux && !is_chromeos)) {
    ]
    deps = [
      "//base",
+      "//base/test:test_config",
      "//testing/gtest",
      "//ui/gfx",
      "//ui/snapshot",

--- a/ui/base/test/skia_gold_pixel_diff.cc
+++ b/ui/base/test/skia_gold_pixel_diff.cc
@@ -20,7 +20,10 @@
 #include "base/path_service.h"
 #include "base/process/launch.h"
 #include "base/process/process.h"
+#include "base/strings/strcat.h"
+#include "base/strings/string_number_conversions.h"
 #include "base/strings/string_util.h"
+#include "base/test/test_switches.h"
 #include "base/threading/thread_restrictions.h"
 #include "base/values.h"
 #include "testing/gtest/include/gtest/gtest.h"
@@ -86,6 +89,38 @@ void FillInSystemEnvironment(base::Value::DictStorage& ds) {
  ds["processor"] = std::make_unique<base::Value>(processor);
 }
+// Returns whether image comparison failure should result in Gerrit comments.
+// In general, when a pixel test fails on CQ, Gold will make a gerrit
+// comment indicating that the cl breaks some pixel tests. However,
+// if the test is flaky and has a failure->passing pattern, we don't
+// want Gold to make gerrit comments on the first failure.
+// This function returns true iff:
+//  * it's a tryjob and no retries left.
+//  or * it's a CI job.
+bool ShouldMakeGerritCommentsOnFailures() {
+  base::CommandLine* cmd = base::CommandLine::ForCurrentProcess();
+  if (!cmd->HasSwitch(kIssueKey))
+    return true;
+  if (cmd->HasSwitch(switches::kTestLauncherRetriesLeft)) {
+    int retries_left = 0;
+    bool succeed = base::StringToInt(
+        cmd->GetSwitchValueASCII(switches::kTestLauncherRetriesLeft),
+        &retries_left);
+    if (!succeed) {
+      LOG(ERROR) << switches::kTestLauncherRetriesLeft << " = "
+                 << cmd->GetSwitchValueASCII(switches::kTestLauncherRetriesLeft)
+                 << " can not convert to integer.";
+      return true;
+    }
+    if (retries_left > 0) {
+      LOG(INFO) << "Test failure will not result in Gerrit comment because"
+                   " there are more retries.";
+      return false;
+    }
+  }
+  return true;
+}
 // Fill in test environment to the keys_file. The format is json.
 // We need the system information to determine whether a new screenshot
 // is good or not. All the information that can affect the output of pixels
@@ -242,6 +277,15 @@ bool SkiaGoldPixelDiff::UploadToSkiaGoldServer(
  cmd.AppendSwitchPath("png-file", local_file_path);
  cmd.AppendSwitchPath("work-dir", working_dir_);
+  std::map<std::string, std::string> optional_keys;
+  if (!ShouldMakeGerritCommentsOnFailures()) {
+    optional_keys["ignore"] = "1";
+  }
+  for (auto key : optional_keys) {
+    cmd.AppendSwitchASCII("add-test-optional-key",
+                          base::StrCat({key.first, ":", key.second}));
+  }
  if (algorithm)
    algorithm->AppendAlgorithmToCmdline(cmd);

--- a/ui/base/test/skia_gold_pixel_diff_unittest.cc
+++ b/ui/base/test/skia_gold_pixel_diff_unittest.cc