Commit 058a951c authored by Sergey Ulanov's avatar Sergey Ulanov Committed by Commit Bot

[Fuchsia] Capture system logs from serial port when booting Qemu

When booting Fuchsia under qemu on test bots the system often hangs
before SSH can be connected. To debug this issue this CL adds
kernel.serial=legacy flag to Zircon and captures qemu output.

Bug: 869753
Change-Id: Ibe5e979612abc740ef5081bdc5fdb583b0c57722
Reviewed-on: https://chromium-review.googlesource.com/1157920
Commit-Queue: Sergey Ulanov <sergeyu@chromium.org>
Reviewed-by: default avatarKevin Marshall <kmarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#580342}
parent 848864f6
......@@ -5,9 +5,11 @@
"""Implements commands for running and interacting with Fuchsia on devices."""
import boot_data
import log_reader
import logging
import os
import subprocess
import sys
import target
import time
import uuid
......@@ -17,6 +19,9 @@ from common import SDK_ROOT, EnsurePathExists
CONNECT_RETRY_COUNT = 20
CONNECT_RETRY_WAIT_SECS = 1
# Number of failed connection attempts before redirecting system logs to stdout.
CONNECT_RETRY_COUNT_BEFORE_LOGGING = 10
class DeviceTarget(target.Target):
def __init__(self, output_dir, target_cpu, host=None, port=None,
ssh_config=None):
......@@ -93,8 +98,20 @@ class DeviceTarget(target.Target):
logging.debug(' '.join(bootserver_command))
subprocess.check_call(bootserver_command)
# Setup loglistener. Logs will be redirected to stdout if the device takes
# longer than expected to boot.
loglistener_path = os.path.join(SDK_ROOT, 'tools', 'loglistener')
loglistener = subprocess.Popen([loglistener_path, node_name],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=open(os.devnull))
self._SetSystemLogsReader(
log_reader.LogReader(loglistener, loglistener.stdout))
logging.debug('Waiting for device to join network.')
for _ in xrange(CONNECT_RETRY_COUNT):
for retry in xrange(CONNECT_RETRY_COUNT):
if retry == CONNECT_RETRY_COUNT_BEFORE_LOGGING:
self._system_logs_reader.RedirectTo(sys.stdout);
self._host = self.__Discover(node_name)
if self._host:
break
......
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import multiprocessing
import os
import Queue
import select
import threading
class LogReader(object):
"""Helper class used to read a debug log stream on a background thread.
By default all log messages are stored in an internal buffer. After
RedirectTo() is called all logs are redirected to the specified stream.
Optionally the caller can give onwnership of the log process to the reader.
That process will be killed automatically when the log reader is closed.
"""
def __init__(self, process, stream):
self._process = process
self._stream = stream
self._buffer = Queue.Queue()
# Start a background thread that redirects the stream to self._buffer.
self._StartThread(lambda line: self._buffer.put(line))
def _StartThread(self, on_message_received):
self._quit_pipe, thread_quit_pipe = multiprocessing.Pipe()
self._thread = threading.Thread(target=self._ReadThread,
args=(thread_quit_pipe,
on_message_received))
self._thread.daemon = True
self._thread.start()
def __enter__(self):
return self
def __exit__(self, exception_type, exception_value, traceback):
self.Close()
def __del__(self):
self.Close()
def _ReadThread(self, quit_pipe, on_message_received):
"""Main function for the background thread that reads |self._stream| and
calls |on_message_received| for each line."""
poll = select.poll()
poll.register(self._stream.fileno(), select.POLLIN)
poll.register(quit_pipe.fileno(), select.POLLIN)
quit = False
while not quit:
events = poll.poll()
for fileno, event in events:
if fileno == quit_pipe.fileno():
quit = True
break
assert fileno == self._stream.fileno()
if event & select.POLLIN:
on_message_received(self._stream.readline())
elif event & select.POLLHUP:
quit = True
def _StopThread(self):
if self._thread:
try:
self._quit_pipe.send("quit")
except IOError:
# The thread has already quit and closed the other end of the pipe.
pass
self._thread.join();
self._thread = None
def Close(self):
self._StopThread()
if self._process:
self._process.kill()
self._process = None
def RedirectTo(self, stream):
self._StopThread()
# Drain and reset the buffer.
while True:
try:
line = self._buffer.get_nowait()
stream.write(line)
except Queue.Empty:
break
# Restart the thread to write stream to stdout.
self._StartThread(lambda line: stream.write(line))
......@@ -5,6 +5,7 @@
"""Implements commands for running and interacting with Fuchsia on QEMU."""
import boot_data
import log_reader
import logging
import target
import os
......@@ -61,6 +62,10 @@ class QemuTarget(target.Target):
# noisy ANSI spew from the user's terminal emulator.
kernel_args.append('TERM=dumb')
# Enable logging to the serial port. This is a temporary fix to investigate
# the root cause for https://crbug.com/869753 .
kernel_args.append('kernel.serial=legacy')
qemu_command = [qemu_path,
'-m', str(self._ram_size_mb),
'-nographic',
......@@ -133,10 +138,13 @@ class QemuTarget(target.Target):
logging.debug('Launching QEMU.')
logging.debug(' '.join(qemu_command))
stdio_flags = {'stdin': open(os.devnull),
'stdout': open(os.devnull),
'stderr': open(os.devnull)}
self._qemu_process = subprocess.Popen(qemu_command, **stdio_flags)
# QEMU stderr/stdout are redirected to LogReader to debug
# https://crbug.com/86975 .
self._qemu_process = subprocess.Popen(qemu_command, stdin=open(os.devnull),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
self._SetSystemLogsReader(
log_reader.LogReader(None, self._qemu_process.stdout))
self._WaitUntilReady();
def Shutdown(self):
......
......@@ -4,59 +4,18 @@
import boot_data
import common
import log_reader
import logging
import os
import remote_cmd
import subprocess
import sys
import tempfile
import time
import threading
import Queue
_SHUTDOWN_CMD = ['dm', 'poweroff']
_ATTACH_MAX_RETRIES = 10
_ATTACH_RETRY_INTERVAL = 1
class LoglistenerReader(object):
"""Helper class used to read loglistener output."""
def __init__(self, process):
self._process = process
self._queue = Queue.Queue()
self._thread = threading.Thread(target=self._read_thread)
self._thread.daemon = True
self._thread.start()
def __enter__(self):
return self
def __exit__(self, exception_type, exception_value, traceback):
self.close()
def _read_thread(self):
try:
while True:
line = self._process.stdout.readline()
if not line:
return
self._queue.put(line)
finally:
self._process.stdout.close()
def close(self):
self._process.kill()
def dump_logs(self):
while True:
try:
line = self._queue.get(block=False)
logging.info(line.strip())
except Queue.Empty:
return
class FuchsiaTargetException(Exception):
def __init__(self, message):
super(FuchsiaTargetException, self).__init__(message)
......@@ -70,6 +29,7 @@ class Target(object):
self._started = False
self._dry_run = False
self._target_cpu = target_cpu
self._system_logs_reader = None
# Functions used by the Python context manager for teardown.
def __enter__(self):
......@@ -185,19 +145,19 @@ class Target(object):
def _AssertIsStarted(self):
assert self.IsStarted()
def _SetSystemLogsReader(self, reader):
if self._system_logs_reader:
_system_logs_reader.Close()
self._system_logs_reader = reader
def _WaitUntilReady(self, retries=_ATTACH_MAX_RETRIES):
logging.info('Connecting to Fuchsia using SSH.')
loglistener_path = os.path.join(common.SDK_ROOT, 'tools', 'loglistener')
instance_id = boot_data.GetNodeName(self._output_dir)
process = subprocess.Popen([loglistener_path, instance_id],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=open(os.devnull))
with LoglistenerReader(process) as loglistener_reader:
try:
for retry in xrange(retries + 1):
if retry > 2:
# Log loglistener output after 2 failed SSH connection attempt.
loglistener_reader.dump_logs();
if retry == 2 and self._system_logs_reader:
# Log system logs after 2 failed SSH connection attempt.
self._system_logs_reader.RedirectTo(sys.stdout);
host, port = self._GetEndpoint()
if remote_cmd.RunSsh(self._GetSshConfigPath(), host, port, ['true'],
......@@ -206,7 +166,12 @@ class Target(object):
self._started = True
return True
time.sleep(_ATTACH_RETRY_INTERVAL)
logging.error('Timeout limit reached.')
finally:
if self._system_logs_reader:
self._system_logs_reader.Close()
self._system_logs_reader = None
logging.error('Timeout limit reached.')
raise FuchsiaTargetException('Couldn\'t connect using SSH.')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment