Revert "[base] Fail test batches if they leak processes, under POSIX."

This reverts commit 2ea83bd2. Reason for revert: Multiple failing tests on Mac ASAN, see for example https://ci.chromium.org/p/chromium/builders/ci/Mac%20ASan%2064%20Tests%20%281%29/64426 Original change's description: > [base] Fail test batches if they leak processes, under POSIX. > > Under Windows and Fuchsia the TestLauncher runs test batch processes in > their own jobs, allowing any leaked sub-processes leaked by tests to be > cleaned up. > > Under POSIX platforms each batch is run in its own process group, but the > group was only being kill()ed if the test batch failed or crashed. > > TestLauncher now proactively kill()s each test batch's process group. The > result of the call is also checked, and the test batch marked as failed > unless kill() reports that the process-group was already gone. > > Note that this will still miss processes launched into new process groups > by tests. > > Bug: 1094369 > Change-Id: If379d200953a823b2020766cda73f8cf27bfdb7f > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2346388 > Commit-Queue: Wez <wez@chromium.org> > Auto-Submit: Wez <wez@chromium.org> > Reviewed-by: Takuto Ikuta <tikuta@chromium.org> > Cr-Commit-Position: refs/heads/master@{#797277} TBR=wez@chromium.org,tikuta@chromium.org Change-Id: I340068b6f84583af89fa7c75d43566acc2b61537 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 1094369 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2352811Reviewed-by: Bret Sepulveda <bsep@chromium.org> Commit-Queue: Bret Sepulveda <bsep@chromium.org> Cr-Commit-Position: refs/heads/master@{#797596}

Revert "[base] Fail test batches if they leak processes, under POSIX."
This reverts commit 2ea83bd2. Reason for revert: Multiple failing tests on Mac ASAN, see for example https://ci.chromium.org/p/chromium/builders/ci/Mac%20ASan%2064%20Tests%20%281%29/64426 Original change's description: > [base] Fail test batches if they leak processes, under POSIX. > > Under Windows and Fuchsia the TestLauncher runs test batch processes in > their own jobs, allowing any leaked sub-processes leaked by tests to be > cleaned up. > > Under POSIX platforms each batch is run in its own process group, but the > group was only being kill()ed if the test batch failed or crashed. > > TestLauncher now proactively kill()s each test batch's process group. The > result of the call is also checked, and the test batch marked as failed > unless kill() reports that the process-group was already gone. > > Note that this will still miss processes launched into new process groups > by tests. > > Bug: 1094369 > Change-Id: If379d200953a823b2020766cda73f8cf27bfdb7f > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2346388 > Commit-Queue: Wez <wez@chromium.org> > Auto-Submit: Wez <wez@chromium.org> > Reviewed-by: Takuto Ikuta <tikuta@chromium.org> > Cr-Commit-Position: refs/heads/master@{#797277} TBR=wez@chromium.org,tikuta@chromium.org Change-Id: I340068b6f84583af89fa7c75d43566acc2b61537 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 1094369 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2352811Reviewed-by: Bret Sepulveda <bsep@chromium.org> Commit-Queue: Bret Sepulveda <bsep@chromium.org> Cr-Commit-Position: refs/heads/master@{#797596}
84a0a6bf · Bret Sepulveda · Commit Bot · b2789a9f · 84a0a6bf · 84a0a6bf
Commit 84a0a6bf authored Aug 13, 2020 by Bret Sepulveda Committed by Commit Bot Aug 13, 2020
5 changed files
--- a/base/process/kill.h
+++ b/base/process/kill.h
@@ -84,6 +84,12 @@ BASE_EXPORT bool KillProcesses(const FilePath::StringType& executable_name,
                               int exit_code,
                               const ProcessFilter* filter);

+#if defined(OS_POSIX)
+// Attempts to kill the process group identified by |process_group_id|. Returns
+// true on success.
+BASE_EXPORT bool KillProcessGroup(ProcessHandle process_group_id);
+#endif  // defined(OS_POSIX)
+
 // Get the termination status of the process by interpreting the
 // circumstances of the child process' death. |exit_code| is set to
 // the status returned by waitpid() on POSIX, and from GetExitCodeProcess() on

--- a/base/process/kill_fuchsia.cc
+++ b/base/process/kill_fuchsia.cc
@@ -13,6 +13,14 @@

 namespace base {

+bool KillProcessGroup(ProcessHandle process_group_id) {
+  // |process_group_id| is really a job on Fuchsia.
+  zx_status_t status = zx_task_kill(process_group_id);
+  DLOG_IF(ERROR, status != ZX_OK)
+      << "unable to terminate job " << process_group_id;
+  return status == ZX_OK;
+}
+
 TerminationStatus GetTerminationStatus(ProcessHandle handle, int* exit_code) {
  DCHECK(exit_code);


--- a/base/process/kill_posix.cc
+++ b/base/process/kill_posix.cc
@@ -78,6 +78,15 @@ TerminationStatus GetTerminationStatusImpl(ProcessHandle handle,

 }  // namespace

+#if !defined(OS_NACL_NONSFI)
+bool KillProcessGroup(ProcessHandle process_group_id) {
+  bool result = kill(-1 * process_group_id, SIGKILL) == 0;
+  if (!result)
+    DPLOG(ERROR) << "Unable to terminate process group " << process_group_id;
+  return result;
+}
+#endif  // !defined(OS_NACL_NONSFI)
+
 TerminationStatus GetTerminationStatus(ProcessHandle handle, int* exit_code) {
  return GetTerminationStatusImpl(handle, false /* can_block */, exit_code);
 }

--- a/base/test/launcher/test_launcher.cc
+++ b/base/test/launcher/test_launcher.cc
@@ -438,21 +438,38 @@ int LaunchChildTestProcessWithOptions(const CommandLine& command_line,
    // Cleanup the data directory.
    CHECK(DeletePathRecursively(nested_data_path));
 #elif defined(OS_POSIX)
-    // It is not possible to waitpid() on any leaked sub-processes of the test
-    // batch process, since those are not direct children of this process.
-    // kill()ing the process-group will return a result indicating whether the
-    // group was found (i.e. processes were still running in it) or not (i.e.
-    // sub-processes had exited already). Unfortunately many tests (e.g. browser
-    // tests) have processes exit asynchronously, so checking the kill() result
-    // will report false failures.
-    // Unconditionally kill the process group, regardless of the batch exit-code
-    // until a better solution is available.
-    int kill_result = kill(-1 * process.Handle(), SIGKILL);
-    if (kill_result < 0 && errno != ESRCH) {
-      PLOG(ERROR) << "kill(-" << process.Handle() << ") failed";
-      exit_code = -1;
+
+#if BUILDFLAG(CLANG_PROFILING)
+    // TODO(crbug.com/1094369): Remove this condition once the child process
+    // leaking bug is fixed.
+    //
+    // TODO(crbug.com/1095075): Make test launcher treat lingering child
+    // processes hard failures so that they can be detected and surfaced
+    // gracefully.
+    //
+    // When profiling is enabled, browser child processes take extra time to
+    // dump profiles, which means that lingering processes are much more likely
+    // to happen than non-profiling build. Therefore, on POSIX, in order to
+    // avoid polluting the machine state, ensure any child processes that the
+    // test might have created are cleaned up to avoid potential leaking even
+    // when tests have passed. On Windows, child processes are automatically
+    // cleaned up using JobObjects.
+    //
+    // On non-profiling build, when tests have passed, we don't clean up the
+    // lingering processes even when there are any, and the reason is that they
+    // usually indicate prod issues, letting them slip to the following test
+    // tasks and cause failures increses the chance of them being surfaced.
+    kill(-1 * process.Handle(), SIGKILL);
+#else
+    if (exit_code != 0) {
+      // On POSIX, in case the test does not exit cleanly, either due to a crash
+      // or due to it timing out, we need to clean up any child processes that
+      // it might have created. On Windows, child processes are automatically
+      // cleaned up using JobObjects.
+      KillProcessGroup(process.Handle());
    }
-#endif  // defined(OS_POSIX)
+#endif
+#endif

    GetLiveProcesses()->erase(process.Handle());
  }

--- a/base/test/launcher/test_launcher_unittest.cc
+++ b/base/test/launcher/test_launcher_unittest.cc
@@ -15,14 +15,12 @@
 #include "base/test/launcher/test_launcher.h"
 #include "base/test/launcher/test_launcher_test_utils.h"
 #include "base/test/launcher/unit_test_launcher.h"
-#include "base/test/multiprocess_test.h"
 #include "base/test/task_environment.h"
 #include "base/test/test_timeouts.h"
 #include "base/threading/thread_task_runner_handle.h"
 #include "build/build_config.h"
 #include "testing/gmock/include/gmock/gmock.h"
 #include "testing/gtest/include/gtest/gtest.h"
-#include "testing/multiprocess_func_list.h"

 #if defined(OS_WIN)
 #include "base/win/windows_version.h"
@@ -710,12 +708,12 @@ TEST(MockUnitTests, DISABLED_FailTest) {
 TEST(MockUnitTests, DISABLED_CrashTest) {
  IMMEDIATE_CRASH();
 }
-// Basic test will not be reached, due to the preceding crash in the same batch.
+// Basic test will not be reached with default batch size.
 TEST(MockUnitTests, DISABLED_NoRunTest) {
  ASSERT_TRUE(true);
 }

-// Using TestLauncher to launch 3 basic unitests
+// Using TestLauncher to launch 3 simple unitests
 // and validate the resulting json file.
 TEST_F(UnitTestLauncherDelegateTester, RunMockTests) {
  CommandLine command_line(CommandLine::ForCurrentProcess()->GetProgram());