Commit 12a87df1 authored by jeun@chromium.org's avatar jeun@chromium.org

[Findit] Plain objects to represent and parse stack trace.

NOTRY=true

Review URL: https://codereview.chromium.org/430943003

Cr-Commit-Position: refs/heads/master@{#288270}
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@288270 0039d316-1c4b-4281-b951-d872f2087c98
parent 42a54f01
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
class FileDictionary(object):
"""Maps file in a stacktrace to its crash information.
It maps file to another dictionary, which maps the file's path to crashed
lines, stack frame indices and crashed functions.
"""
def __init__(self):
"""Initializes the file dictionary."""
self.file_dict = {}
def AddFile(self, file_name, file_path, crashed_line_number,
stack_frame_index, function):
"""Adds file and its crash information to the map.
Args:
file_name: The name of the crashed file.
file_path: The path of the crashed file.
crashed_line_number: The crashed line of the file.
stack_frame_index: The file's position in the callstack.
function: The name of the crashed function.
"""
# Populate the dictionary if this file/path has not been added before.
if file_name not in self.file_dict:
self.file_dict[file_name] = {}
if file_path not in self.file_dict[file_name]:
self.file_dict[file_name][file_path] = {}
self.file_dict[file_name][file_path]['line_numbers'] = []
self.file_dict[file_name][file_path]['stack_frame_indices'] = []
self.file_dict[file_name][file_path]['function'] = []
# Add the crashed line, frame index and function name.
self.file_dict[file_name][file_path]['line_numbers'].append(
crashed_line_number)
self.file_dict[file_name][file_path]['stack_frame_indices'].append(
stack_frame_index)
self.file_dict[file_name][file_path]['function'].append(function)
def GetPathDic(self, file_name):
"""Returns file's path and crash information."""
return self.file_dict[file_name]
def GetCrashedLineNumbers(self, file_path):
"""Returns crashed line numbers given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['line_numbers']
def GetCrashStackFrameindex(self, file_path):
"""Returns stack frame indices given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['stack_frame_indices']
def GetCrashFunction(self, file_path):
"""Returns list of crashed functions given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['function']
def __iter__(self):
return iter(self.file_dict)
class ComponentDictionary(object):
"""Represents a file dictionary.
It maps each component (blink, chrome, etc) to a file dictionary.
"""
def __init__(self, components):
"""Initializes the dictionary with given components."""
self.component_dict = {}
# Create file dictionary for all the components.
for component in components:
self.component_dict[component] = FileDictionary()
def __iter__(self):
return iter(self.component_dict)
def GetFileDict(self, component):
"""Returns a file dictionary for a given component."""
return self.component_dict[component]
def GenerateFileDict(self, stack_frame_list):
"""Generates file dictionary, given an instance of StackFrame list."""
# Iterate through the list of stackframe objects.
for stack_frame in stack_frame_list:
# If the component of this line is not in the list of components to
# look for, ignore this line.
component = stack_frame.component
if component not in self.component_dict:
continue
# Get values of the variables
file_name = stack_frame.file_name
file_path = stack_frame.file_path
crashed_line_number = stack_frame.crashed_line_number
stack_frame_index = stack_frame.index
function = stack_frame.function
# Add the file to this component's dictionary of files.
file_dict = self.component_dict[component]
file_dict.AddFile(file_name, file_path, crashed_line_number,
stack_frame_index, function)
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import cgi
import json
import os
import time
import urllib
INFINITY = float('inf')
def NormalizePathLinux(path):
"""Normalizes linux path.
Args:
path: A string representing a path.
Returns:
A tuple containing a component this path is in (e.g blink, skia, etc)
and a path in that component's repository.
"""
normalized_path = os.path.abspath(path)
if 'src/v8/' in normalized_path:
component = 'v8'
normalized_path = normalized_path.split('src/v8/')[1]
# TODO(jeun): Integrate with parsing DEPS file.
if 'WebKit/' in normalized_path:
component = 'blink'
normalized_path = ''.join(path.split('WebKit/')[1:])
else:
component = 'chromium'
if '/build/' in normalized_path:
normalized_path = normalized_path.split('/build/')[-1]
if not (normalized_path.startswith('src/') or
normalized_path.startswith('Source/')):
normalized_path = 'src/' + normalized_path
return (component, normalized_path)
def SplitRange(regression):
"""Splits a range as retrieved from clusterfuzz.
Args:
regression: A string in format 'r1234:r5678'.
Returns:
A list containing two numbers represented in string, for example
['1234','5678'].
"""
revisions = regression.split(':')
# If regression information is not available, return none.
if len(revisions) != 2:
return None
# Strip 'r' from both start and end range.
start_range = revisions[0].lstrip('r')
end_range = revisions[1].lstrip('r')
return [start_range, end_range]
def LoadJSON(json_string):
"""Loads json object from string, or None.
Args:
json_string: A string to get object from.
Returns:
JSON object if the string represents a JSON object, None otherwise.
"""
try:
data = json.loads(json_string)
except ValueError:
data = None
return data
def GetDataFromURL(url, retries=10, sleep_time=0.1):
"""Retrieves raw data from URL, tries 10 times.
Args:
url: URL to get data from.
retries: Number of times to retry connection.
sleep_time: Time in seconds to wait before retrying connection.
Returns:
None if the data retrieval fails, or the raw data.
"""
data = None
for i in range(retries):
# Retrieves data from URL.
try:
data = urllib.urlopen(url)
# If retrieval is successful, return the data.
if data:
return data.read()
# If retrieval fails, try after sleep_time second.
except IOError:
time.sleep(sleep_time)
continue
# Return None if it fails to read data from URL 'retries' times.
return None
def FindMinLineDistance(crashed_line_list, changed_line_numbers):
"""Calculates how far the changed line is from one of the crashes.
Finds the minimum distance between the lines that the file crashed on
and the lines that the file changed. For example, if the file crashed on
line 200 and the CL changes line 203,204 and 205, the function returns 3.
Args:
crashed_line_list: A list of lines that the file crashed on.
changed_line_numbers: A list of lines that the file changed.
Returns:
The minimum distance. If either of the input lists is empty,
it returns inf.
"""
min_distance = INFINITY
for line in crashed_line_list:
for distance in changed_line_numbers:
# Find the current distance and update the min if current distance is
# less than current min.
current_distance = abs(line - distance)
if current_distance < min_distance:
min_distance = current_distance
return min_distance
def GuessIfSameSubPath(path1, path2):
"""Guesses if two paths represent same path.
Compares the name of the folders in the path (by split('/')), and checks
if they match either more than 3 or min of path lengths.
Args:
path1: First path.
path2: Second path to compare.
Returns:
True if it they are thought to be a same path, False otherwise.
"""
path1 = path1.split('/')
path2 = path2.split('/')
intersection = set(path1).intersection(set(path2))
return len(intersection) >= (min(3, min(len(path1), len(path2))))
def FindMinStackFrameNumber(stack_frame_indices, priorities):
"""Finds the minimum stack number, from the list of stack numbers.
Args:
stack_frame_indices: A list of lists containing stack position.
priorities: A list of of priority for each file.
Returns:
Inf if stack_frame_indices is empty, minimum stack number otherwise.
"""
# Get the indexes of the highest priority (or low priority number).
highest_priority = min(priorities)
highest_priority_indices = []
for i in range(len(priorities)):
if priorities[i] == highest_priority:
highest_priority_indices.append(i)
# Gather the list of stack frame numbers for the files that change the
# crash lines.
flattened = []
for i in highest_priority_indices:
flattened += stack_frame_indices[i]
# If no stack frame information is available, return inf. Else, return min.
if not flattened:
return INFINITY
else:
return min(flattened)
def AddHyperlink(text, link):
"""Returns a string with HTML link tag.
Args:
text: A string to add link.
link: A link to add to the string.
Returns:
A string with hyperlink added.
"""
sanitized_link = cgi.escape(link, quote=True)
sanitized_text = cgi.escape(text)
return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
def PrettifyList(l):
"""Returns a string representation of a list.
It adds comma in between the elements and removes the brackets.
Args:
l: A list to prettify.
Returns:
A string representation of the list.
"""
return str(l)[1:-1]
def PrettifyFiles(file_list):
"""Returns a string representation of a list of file names.
Args:
file_list: A list of tuple, (file_name, file_url).
Returns:
A string representation of file names with their urls.
"""
ret = ['\n']
for file_name, file_url in file_list:
ret.append(' %s\n' % AddHyperlink(file_name, file_url))
return ''.join(ret)
def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
line_range=3):
"""Finds the overlap betwee changed lines and crashed lines.
Finds the intersection of the lines that caused the crash and
lines that the file changes. The intersection looks within 3 lines
of the line that caused the crash.
Args:
crashed_line_list: A list of lines that the file crashed on.
stack_frame_index: A list of positions in stack for each of the lines.
changed_line_numbers: A list of lines that the file changed.
line_range: Number of lines to look backwards from crashed lines.
Returns:
line_intersection: Intersection between crashed_line_list and
changed_line_numbers.
stack_frame_index_intersection: Stack number for each of the intersections.
"""
line_intersection = []
stack_frame_index_intersection = []
# Iterate through the crashed lines, and its occurence in stack.
for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index):
# Also check previous 'line_range' lines.
line_minus_n = range(line - line_range, line + 1)
for changed_line in changed_line_numbers:
# If a CL does not change crahsed line, check next line.
if changed_line not in line_minus_n:
continue
# If the changed line is exactly the crashed line, add that line.
if line in changed_line_numbers:
intersected_line = line
# If the changed line is in 3 lines of the crashed line, add the line.
else:
intersected_line = changed_line
# Avoid adding the same line twice.
if intersected_line not in line_intersection:
line_intersection.append(intersected_line)
stack_frame_index_intersection.append(stack_frame_index)
break
return (line_intersection, stack_frame_index_intersection)
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import crash_utils
class StackFrame(object):
"""Represents a frame in stacktrace.
Attributes:
index: An index of the stack frame.
component: A component this line represents, such as blink, chrome, etc.
file_name: The name of the file that crashed.
function: The function that caused the crash.
file_path: The path of the crashed file.
crashed_line_number: The line of the file that caused the crash.
"""
def __init__(self, stack_frame_index, component, file_name,
function, file_path, crashed_line_number):
self.index = stack_frame_index
self.component = component
self.file_name = file_name
self.function = function
self.file_path = file_path
self.crashed_line_number = crashed_line_number
class CallStack(object):
"""Represents a call stack within a stacktrace.
It is a list of StackFrame object, and the object keeps track of whether
the stack is crash stack, freed or previously-allocated.
"""
def __init__(self, stack_priority):
self.frame_list = []
self.priority = stack_priority
def Add(self, stacktrace_line):
self.frame_list.append(stacktrace_line)
def GetTopNFrames(self, n):
return self.frame_list[:n]
class Stacktrace(object):
"""Represents Stacktrace object.
Contains a list of callstacks, because one stacktrace might have more than
one callstacks.
"""
def __init__(self, stacktrace, build_type):
self.stack_list = []
self.ParseStacktrace(stacktrace, build_type)
def ParseStacktrace(self, stacktrace, build_type):
"""Parses stacktrace and normalizes it.
If there are multiple callstacks within the stacktrace,
it will parse each of them separately, and store them in the stack_list
variable.
Args:
stacktrace: A string containing stacktrace.
build_type: A string containing the build type of the crash.
"""
# If the passed in string is empty, the object does not represent anything.
if not stacktrace:
self.stack_list = None
return
# Reset the stack list.
self.stack_list = []
reached_new_callstack = False
# Note that we do not need exact stack frame index, we only need relative
# position of a frame within a callstack. The reason for not extracting
# index from a line is that some stack frames do not have index.
stack_frame_index = 0
current_stack = None
for line in stacktrace:
(is_new_callstack, stack_priority) = self.__IsStartOfNewCallStack(
line, build_type)
if is_new_callstack:
# If this callstack is crash stack, update the boolean.
if not reached_new_callstack:
reached_new_callstack = True
current_stack = CallStack(stack_priority)
# If this is from freed or allocation, add the callstack we have
# to the list of callstacks, and increment the stack priority.
else:
stack_frame_index = 0
if current_stack and current_stack.frame_list:
self.stack_list.append(current_stack)
current_stack = CallStack(stack_priority)
# Generate stack frame object from the line.
parsed_stack_frame = self.__GenerateStackFrame(
stack_frame_index, line, build_type)
# If the line does not represent the stack frame, ignore this line.
if not parsed_stack_frame:
continue
# Add the parsed stack frame object to the current stack.
current_stack.Add(parsed_stack_frame)
stack_frame_index += 1
# Add the current callstack only if there are frames in it.
if current_stack and current_stack.frame_list:
self.stack_list.append(current_stack)
def __IsStartOfNewCallStack(self, line, build_type):
"""Check if this line is the start of the new callstack.
Since each builds have different format of stacktrace, the logic for
checking the line for all builds is handled in here.
Args:
line: Line to check for.
build_type: The name of the build.
Returns:
True if the line is the start of new callstack, False otherwise. If True,
it also returns the priority of the line.
"""
# Currently not supported.
if 'android' in build_type:
pass
elif 'syzyasan' in build_type:
# In syzyasan build, new stack starts with 'crash stack:',
# 'freed stack:', etc.
callstack_start_pattern = re.compile(r'^(.*) stack:$')
match = callstack_start_pattern.match(line)
# If the line matches the callstack start pattern.
if match:
# Check the type of the new match.
stack_type = match.group(1)
# Crash stack gets priority 0.
if stack_type == 'Crash':
return (True, 0)
# Other callstacks all get priority 1.
else:
return (True, 1)
elif 'tsan' in build_type:
# Create patterns for each callstack type.
crash_callstack_start_pattern = re.compile(
r'^(Read|Write) of size \d+')
allocation_callstack_start_pattern = re.compile(
r'^Previous (write|read) of size \d+')
location_callstack_start_pattern = re.compile(
r'^Location is heap block of size \d+')
# Crash stack gets priority 0.
if crash_callstack_start_pattern.match(line):
return (True, 0)
# All other stacks get priority 1.
if allocation_callstack_start_pattern.match(line):
return (True, 1)
if location_callstack_start_pattern.match(line):
return (True, 1)
else:
# In asan and other build types, crash stack can start
# in two different ways.
crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?ERROR:')
crash_callstack_start_pattern2 = re.compile(
r'^(READ|WRITE) of size \d+ at')
freed_callstack_start_pattern = re.compile(
r'^freed by thread T\d+ (.* )?here:')
allocation_callstack_start_pattern = re.compile(
r'^previously allocated by thread T\d+ (.* )?here:')
other_callstack_start_pattern = re.compile(
r'^Thread T\d+ (.* )?created by')
# Crash stack gets priority 0.
if (crash_callstack_start_pattern1.match(line) or
crash_callstack_start_pattern2.match(line)):
return (True, 0)
# All other callstack gets priority 1.
if freed_callstack_start_pattern.match(line):
return (True, 1)
if allocation_callstack_start_pattern.match(line):
return (True, 1)
if other_callstack_start_pattern.match(line):
return (True, 1)
# If the line does not match any pattern, return false and a dummy for
# stack priority.
return (False, -1)
def __GenerateStackFrame(self, stack_frame_index, line, build_type):
"""Extracts information from a line in stacktrace.
Args:
stack_frame_index: A stack frame index of this line.
line: A stacktrace string to extract data from.
build_type: A string containing the build type
of this crash (e.g. linux_asan_chrome_mp).
Returns:
A triple containing the name of the function, the path of the file and
the crashed line number.
"""
line_parts = line.split()
try:
# Filter out lines that are not stack frame.
stack_frame_index_pattern = re.compile(r'#(\d+)')
if not stack_frame_index_pattern.match(line_parts[0]):
return None
# Tsan has different stack frame style from other builds.
if build_type.startswith('linux_tsan'):
file_path_and_line = line_parts[-2]
function = ' '.join(line_parts[1:-2])
else:
file_path_and_line = line_parts[-1]
function = ' '.join(line_parts[3:-1])
# Get file path and line info from the line.
file_path_and_line = file_path_and_line.split(':')
file_path = file_path_and_line[0]
crashed_line_number = int(file_path_and_line[1])
# Return None if the line is malformed.
except IndexError:
return None
except ValueError:
return None
# Normalize the file path so that it can be compared to repository path.
file_name = os.path.basename(file_path)
(component, file_path) = crash_utils.NormalizePathLinux(file_path)
# FIXME(jeun): Add other components.
if not (component == 'blink' or component == 'chromium'):
return None
# Return a new stack frame object with the parsed information.
return StackFrame(stack_frame_index, component, file_name, function,
file_path, crashed_line_number)
def __getitem__(self, index):
return self.stack_list[index]
def GetCrashStack(self):
for callstack in self.stack_list:
# Only the crash stack has the priority 0.
if callstack.priority == 0:
return callstack
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment