Commit 1b3220d5 authored by Maksym Onufriienko's avatar Maksym Onufriienko Committed by Commit Bot

[iOS] Add handling to prevent potential timeouts in tear_down.

Add handling to prevent potential timeouts in suprocess.check_call
We're observing some timeouts in some of the bot runs,
so we're trying to add some preventative guardrails in test_runner
in places that have blocking calls to prevent potential timeouts.
Handling freezes in blocking calls will allow for more graceful recoveries
and/or even just better data collections in the case of time outs
if they happen.

This CL specifically is adding some handling around suprocess.check_call in tear_down.
Since this is a blocking call, if it should freeze,
it would cause the entire tear_down to timeout instead of gracefully recovering
and completing the rest of teardown, which would include data collection.e.g.
https://logs.chromium.org/logs/chrome/buildbucket/cr-buildbucket.appspot.com/8900250401157621888/+/steps/base_unittests__iPhone_X_iOS_13.1___with_patch__on_iOS-13.1/0/stdout

Bug: 1011851
Change-Id: Ib5a955551e209648b9982593b9e2c1ee4a71f7a2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1845774
Commit-Queue: Maksym Onufriienko <monufriienko@chromium.org>
Reviewed-by: default avatarJustin Cohen <justincohen@chromium.org>
Reviewed-by: default avatarAaron Gable <agable@chromium.org>
Cr-Commit-Position: refs/heads/master@{#705303}
parent 18fa16bb
...@@ -198,13 +198,14 @@ def defaults_delete(d, key): ...@@ -198,13 +198,14 @@ def defaults_delete(d, key):
subprocess.call(['defaults', 'delete', d, key]) subprocess.call(['defaults', 'delete', d, key])
def terminate_process(proc): def terminate_process(proc, proc_name):
"""Terminates the process. """Terminates the process.
If an error occurs ignore it, just print out a message. If an error occurs ignore it, just print out a message.
Args: Args:
proc: A subprocess to terminate. proc: A subprocess to terminate.
proc_name: A name of process.
""" """
try: try:
LOGGER.info('Killing hung process %s' % proc.pid) LOGGER.info('Killing hung process %s' % proc.pid)
...@@ -215,20 +216,21 @@ def terminate_process(proc): ...@@ -215,20 +216,21 @@ def terminate_process(proc):
# Check whether proc.pid process is still alive. # Check whether proc.pid process is still alive.
if ps.is_running(): if ps.is_running():
LOGGER.info( LOGGER.info(
'Process iossim is still alive! Xcodebuild process might block it.') 'Process %s is still alive! %s process might block it.',
xcodebuild_processes = [ proc.name, proc_name)
running_processes = [
p for p in psutil.process_iter() p for p in psutil.process_iter()
# Use as_dict() to avoid API changes across versions of psutil. # Use as_dict() to avoid API changes across versions of psutil.
if 'xcodebuild' == p.as_dict(attrs=['name'])['name']] if proc_name == p.as_dict(attrs=['name'])['name']]
if not xcodebuild_processes: if not running_processes:
LOGGER.debug('There are no running xcodebuild processes.') LOGGER.debug('There are no running %s processes.', proc_name)
break break
LOGGER.debug('List of running xcodebuild processes: %s' LOGGER.debug('List of running %s processes: %s'
% xcodebuild_processes) % (proc_name, running_processes))
# Killing xcodebuild processes # Killing running processes with proc_name
for p in xcodebuild_processes: for p in running_processes:
p.send_signal(signal.SIGKILL) p.send_signal(signal.SIGKILL)
psutil.wait_procs(xcodebuild_processes) psutil.wait_procs(running_processes)
else: else:
LOGGER.info('Process was killed!') LOGGER.info('Process was killed!')
break break
...@@ -236,7 +238,10 @@ def terminate_process(proc): ...@@ -236,7 +238,10 @@ def terminate_process(proc):
LOGGER.info('Error while killing a process: %s' % ex) LOGGER.info('Error while killing a process: %s' % ex)
def print_process_output(proc, parser, timeout=READLINE_TIMEOUT): def print_process_output(proc,
proc_name=None,
parser=None,
timeout=READLINE_TIMEOUT):
"""Logs process messages in console and waits until process is done. """Logs process messages in console and waits until process is done.
Method waits until no output message and if no message for timeout seconds, Method waits until no output message and if no message for timeout seconds,
...@@ -244,10 +249,17 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT): ...@@ -244,10 +249,17 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT):
Args: Args:
proc: A running process. proc: A running process.
proc_name: (str) A process name that has to be killed
if no output occurs in specified timeout. Sometimes proc generates
child process that may block its parent and for such cases
proc_name refers to the name of child process.
If proc_name is not specified, proc.name will be used to kill process.
Parser: A parser. Parser: A parser.
timeout: Timeout(in seconds) to subprocess.stdout.readline method. timeout: A timeout(in seconds) to subprocess.stdout.readline method.
""" """
out = [] out = []
if not proc_name:
proc_name = psutil.Process(proc.pid).name()
while True: while True:
# subprocess.stdout.readline() might be stuck from time to time # subprocess.stdout.readline() might be stuck from time to time
# and tests fail because of TIMEOUT. # and tests fail because of TIMEOUT.
...@@ -255,7 +267,7 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT): ...@@ -255,7 +267,7 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT):
# that will kill `frozen` running process if no new line is read # that will kill `frozen` running process if no new line is read
# and will finish test attempt. # and will finish test attempt.
# If new line appears in timeout, just cancel timer. # If new line appears in timeout, just cancel timer.
timer = threading.Timer(timeout, terminate_process, [proc]) timer = threading.Timer(timeout, terminate_process, [proc, proc_name])
timer.start() timer.start()
line = proc.stdout.readline() line = proc.stdout.readline()
timer.cancel() timer.cancel()
...@@ -263,7 +275,8 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT): ...@@ -263,7 +275,8 @@ def print_process_output(proc, parser, timeout=READLINE_TIMEOUT):
break break
line = line.rstrip() line = line.rstrip()
out.append(line) out.append(line)
parser.ProcessLine(line) if parser:
parser.ProcessLine(line)
LOGGER.info(line) LOGGER.info(line)
sys.stdout.flush() sys.stdout.flush()
LOGGER.debug('Finished print_process_output.') LOGGER.debug('Finished print_process_output.')
...@@ -567,6 +580,19 @@ class TestRunner(object): ...@@ -567,6 +580,19 @@ class TestRunner(object):
""" """
return os.environ.copy() return os.environ.copy()
def start_proc(self, cmd):
"""Starts a process with cmd command and os.environ.
Returns:
An instance of process.
"""
return subprocess.Popen(
cmd,
env=self.get_launch_env(),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
def shutdown_and_restart(self): def shutdown_and_restart(self):
"""Restart a device or relaunch a simulator.""" """Restart a device or relaunch a simulator."""
pass pass
...@@ -598,6 +624,7 @@ class TestRunner(object): ...@@ -598,6 +624,7 @@ class TestRunner(object):
os.mkdir(os.path.join(self.out_dir, 'DerivedData')) os.mkdir(os.path.join(self.out_dir, 'DerivedData'))
derived_data = os.path.join(self.out_dir, 'DerivedData') derived_data = os.path.join(self.out_dir, 'DerivedData')
for directory in os.listdir(DERIVED_DATA): for directory in os.listdir(DERIVED_DATA):
LOGGER.info('Copying %s directory', directory)
shutil.move(os.path.join(DERIVED_DATA, directory), derived_data) shutil.move(os.path.join(DERIVED_DATA, directory), derived_data)
def wipe_derived_data(self): def wipe_derived_data(self):
...@@ -680,15 +707,10 @@ class TestRunner(object): ...@@ -680,15 +707,10 @@ class TestRunner(object):
else: else:
# TODO(crbug.com/812705): Implement test sharding for unit tests. # TODO(crbug.com/812705): Implement test sharding for unit tests.
# TODO(crbug.com/812712): Use thread pool for DeviceTestRunner as well. # TODO(crbug.com/812712): Use thread pool for DeviceTestRunner as well.
proc = subprocess.Popen( proc = self.start_proc(cmd)
cmd,
env=self.get_launch_env(),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
old_handler = self.set_sigterm_handler( old_handler = self.set_sigterm_handler(
lambda _signum, _frame: self.handle_sigterm(proc)) lambda _signum, _frame: self.handle_sigterm(proc))
print_process_output(proc, parser) print_process_output(proc, 'xcodebuild', parser)
LOGGER.info('Waiting for test process to terminate.') LOGGER.info('Waiting for test process to terminate.')
proc.wait() proc.wait()
...@@ -1028,14 +1050,9 @@ class SimulatorTestRunner(TestRunner): ...@@ -1028,14 +1050,9 @@ class SimulatorTestRunner(TestRunner):
if self.xctest_path: if self.xctest_path:
cmd.append(self.xctest_path) cmd.append(self.xctest_path)
proc = subprocess.Popen( proc = self.start_proc(cmd)
cmd, out = print_process_output(proc, 'xcodebuild',
env=self.get_launch_env(), xctest_utils.XCTestLogParser())
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
out = print_process_output(proc, xctest_utils.XCTestLogParser())
self.deleteSimulator(udid) self.deleteSimulator(udid)
return (out, udid, proc.returncode) return (out, udid, proc.returncode)
...@@ -1306,12 +1323,7 @@ class WprProxySimulatorTestRunner(SimulatorTestRunner): ...@@ -1306,12 +1323,7 @@ class WprProxySimulatorTestRunner(SimulatorTestRunner):
if self.xctest_path: if self.xctest_path:
recipe_cmd.append(self.xctest_path) recipe_cmd.append(self.xctest_path)
proc = subprocess.Popen( proc = self.start_proc(recipe_cmd)
recipe_cmd,
env=self.get_launch_env(),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
old_handler = self.set_sigterm_handler( old_handler = self.set_sigterm_handler(
lambda _signum, _frame: self.handle_sigterm(proc)) lambda _signum, _frame: self.handle_sigterm(proc))
...@@ -1320,7 +1332,7 @@ class WprProxySimulatorTestRunner(SimulatorTestRunner): ...@@ -1320,7 +1332,7 @@ class WprProxySimulatorTestRunner(SimulatorTestRunner):
else: else:
parser = gtest_utils.GTestLogParser() parser = gtest_utils.GTestLogParser()
print_process_output(proc, parser) print_process_output(proc, 'xcodebuild', parser)
proc.wait() proc.wait()
self.set_sigterm_handler(old_handler) self.set_sigterm_handler(old_handler)
...@@ -1626,15 +1638,23 @@ class DeviceTestRunner(TestRunner): ...@@ -1626,15 +1638,23 @@ class DeviceTestRunner(TestRunner):
def uninstall_apps(self): def uninstall_apps(self):
"""Uninstalls all apps found on the device.""" """Uninstalls all apps found on the device."""
for app in subprocess.check_output( for app in self.get_installed_packages():
['idevicefs', '--udid', self.udid, 'ls', '@']).splitlines(): cmd = ['ideviceinstaller', '--udid', self.udid, '--uninstall', app]
subprocess.check_call( print_process_output(self.start_proc(cmd))
['ideviceinstaller', '--udid', self.udid, '--uninstall', app])
def install_app(self): def install_app(self):
"""Installs the app.""" """Installs the app."""
subprocess.check_call( cmd = ['ideviceinstaller', '--udid', self.udid, '--install', self.app_path]
['ideviceinstaller', '--udid', self.udid, '--install', self.app_path]) print_process_output(self.start_proc(cmd))
def get_installed_packages(self):
"""Gets a list of installed packages on a device.
Returns:
A list of installed packages on a device.
"""
cmd = ['idevicefs', '--udid', self.udid, 'ls', '@']
return print_process_output(self.start_proc(cmd))
def set_up(self): def set_up(self):
"""Performs setup actions which must occur prior to every test launch.""" """Performs setup actions which must occur prior to every test launch."""
...@@ -1644,14 +1664,15 @@ class DeviceTestRunner(TestRunner): ...@@ -1644,14 +1664,15 @@ class DeviceTestRunner(TestRunner):
def extract_test_data(self): def extract_test_data(self):
"""Extracts data emitted by the test.""" """Extracts data emitted by the test."""
try: cmd = [
subprocess.check_call([
'idevicefs', 'idevicefs',
'--udid', self.udid, '--udid', self.udid,
'pull', 'pull',
'@%s/Documents' % self.cfbundleid, '@%s/Documents' % self.cfbundleid,
os.path.join(self.out_dir, 'Documents'), os.path.join(self.out_dir, 'Documents'),
]) ]
try:
print_process_output(self.start_proc(cmd))
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
raise TestDataExtractionError() raise TestDataExtractionError()
...@@ -1673,13 +1694,14 @@ class DeviceTestRunner(TestRunner): ...@@ -1673,13 +1694,14 @@ class DeviceTestRunner(TestRunner):
"""Retrieves crash reports produced by the test.""" """Retrieves crash reports produced by the test."""
logs_dir = os.path.join(self.out_dir, 'Logs') logs_dir = os.path.join(self.out_dir, 'Logs')
os.mkdir(logs_dir) os.mkdir(logs_dir)
try: cmd = [
subprocess.check_call([
'idevicecrashreport', 'idevicecrashreport',
'--extract', '--extract',
'--udid', self.udid, '--udid', self.udid,
logs_dir, logs_dir,
]) ]
try:
print_process_output(self.start_proc(cmd))
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
# TODO(crbug.com/828951): Raise the exception when the bug is fixed. # TODO(crbug.com/828951): Raise the exception when the bug is fixed.
LOGGER.warning('Failed to retrieve crash reports from device.') LOGGER.warning('Failed to retrieve crash reports from device.')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment