Commit 91fff9ef authored by jeun's avatar jeun Committed by Commit bot

[Findit]Bug fixing and implemented some feature requests.

Fixed various bugs, including exception on stacktrace and unnamed variable in git parser.
Changed the algorithm to use file_path only instead of file_name.
Removed URL links from the result.
Added commit message to the final result.
Updated OWNERS.

NOTRY=true

Review URL: https://codereview.chromium.org/478763003

Cr-Commit-Position: refs/heads/master@{#291698}
parent e33d7ceb
stgao@chromium.org
jeun@chromium.org
\ No newline at end of file
......@@ -5,6 +5,7 @@
from threading import Lock, Thread
from common import utils
import crash_utils
class Blame(object):
......@@ -28,7 +29,7 @@ class Blame(object):
"""
def __init__(self, line_content, component_name, stack_frame_index,
file_name, line_number, author, revision,
file_name, line_number, author, revision, message,
url, range_start, range_end):
# Set all the variables from the arguments.
self.line_content = line_content
......@@ -38,6 +39,7 @@ class Blame(object):
self.line_number = line_number
self.author = author
self.revision = revision
self.message = message
self.url = url
self.range_start = range_start
self.range_end = range_end
......@@ -56,7 +58,8 @@ class BlameList(object):
def __getitem__(self, index):
return self.blame_list[index]
def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers,
def FindBlame(self, callstack, component_to_crash_revision_dict,
component_to_regression_dict, parsers,
top_n_frames=10):
"""Given a stack within a stacktrace, retrieves blame information.
......@@ -65,10 +68,10 @@ class BlameList(object):
Args:
callstack: The list of stack frames.
crash_revision_dict: A dictionary that maps component to its crash
revision.
regression_dict: A dictionary that maps component to its revision
range.
component_to_crash_revision_dict: A dictionary that maps component to its
crash revision.
component_to_regression_dict: A dictionary that maps component to its
revision range.
parsers: A list of two parsers, svn_parser and git_parser
top_n_frames: A number of stack frames to show the blame result for.
"""
......@@ -80,23 +83,22 @@ class BlameList(object):
# If the component this line is from does not have a crash revision,
# it is not possible to get blame information, so ignore this line.
component_path = stack_frame.component_path
if component_path not in crash_revision_dict:
if component_path not in component_to_crash_revision_dict:
continue
crash_revision = crash_revision_dict[component_path]['revision']
crash_revision = component_to_crash_revision_dict[
component_path]['revision']
range_start = None
range_end = None
is_git = utils.IsGitHash(crash_revision)
if is_git:
repository_parser = parsers['git']
else:
repository_parser = parsers['svn']
repository_type = crash_utils.GetRepositoryType(crash_revision)
repository_parser = parsers[repository_type]
# If the revision is in SVN, and if regression information is available,
# get it. For Git, we cannot know the ordering between hash numbers.
if not is_git:
if regression_dict and component_path in regression_dict:
component_object = regression_dict[component_path]
if repository_type == 'svn':
if component_to_regression_dict and \
component_path in component_to_regression_dict:
component_object = component_to_regression_dict[component_path]
range_start = int(component_object['old_revision'])
range_end = int(component_object['new_revision'])
......@@ -120,20 +122,20 @@ class BlameList(object):
component_name = stack_frame.component_name
file_name = stack_frame.file_name
file_path = stack_frame.file_path
crashed_line_number = stack_frame.crashed_line_number
crashed_line_number = stack_frame.crashed_line_range[0]
# Parse blame information.
parsed_blame_info = repository_parser.ParseBlameInfo(
component_path, file_path, crashed_line_number, crash_revision)
# If it fails to retrieve information, do not do anything.
if not parsed_blame_info or len(parsed_blame_info) != 4:
if not parsed_blame_info:
return
# Create blame object from the parsed info and add it to the list.
(line_content, revision, author, url) = parsed_blame_info
(line_content, revision, author, url, message) = parsed_blame_info
blame = Blame(line_content, component_name, stack_frame_index, file_name,
crashed_line_number, author, revision, url,
crashed_line_number, author, revision, message, url,
range_start, range_end)
with self.blame_list_lock:
......
......@@ -5,6 +5,8 @@
import base64
import json
import os
import time
import urllib2
from common import utils
......@@ -64,9 +66,22 @@ def _GetComponentName(path, host_dirs):
return '_'.join(path.split('/'))
def _GetContentOfDEPS(url):
_, content = utils.GetHttpClient().Get(url, timeout=60)
return content
def _GetContentOfDEPS(url, retries=5, sleep_time=0.1):
count = 0
while True:
count += 1
try:
_, content = utils.GetHttpClient().Get(url, timeout=60)
return content
# TODO(jeun): Handle HTTP Errors, such as 404.
except urllib2.HTTPError:
if count < retries:
time.sleep(sleep_time)
else:
break
return ''
def GetChromiumComponents(chromium_revision,
......
......@@ -2,8 +2,6 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
class FileDictionary(object):
"""Maps file in a stacktrace to its crash information.
......@@ -16,52 +14,41 @@ class FileDictionary(object):
"""Initializes the file dictionary."""
self.file_dict = {}
def AddFile(self, file_name, file_path, crashed_line_number,
stack_frame_index, function):
def AddFile(self, file_path, crashed_line_range, stack_frame_index,
function):
"""Adds file and its crash information to the map.
Args:
file_name: The name of the crashed file.
file_path: The path of the crashed file.
crashed_line_number: The crashed line of the file.
crashed_line_range: The crashed line of the file.
stack_frame_index: The file's position in the callstack.
function: The name of the crashed function.
"""
# Populate the dictionary if this file/path has not been added before.
if file_name not in self.file_dict:
self.file_dict[file_name] = {}
if file_path not in self.file_dict[file_name]:
self.file_dict[file_name][file_path] = {}
self.file_dict[file_name][file_path]['line_numbers'] = []
self.file_dict[file_name][file_path]['stack_frame_indices'] = []
self.file_dict[file_name][file_path]['function'] = []
# Populate the dictionary if this file path has not been added before.
if file_path not in self.file_dict:
self.file_dict[file_path] = {}
self.file_dict[file_path]['line_numbers'] = []
self.file_dict[file_path]['stack_frame_indices'] = []
self.file_dict[file_path]['function'] = []
# Add the crashed line, frame index and function name.
self.file_dict[file_name][file_path]['line_numbers'].append(
crashed_line_number)
self.file_dict[file_name][file_path]['stack_frame_indices'].append(
self.file_dict[file_path]['line_numbers'].append(
crashed_line_range)
self.file_dict[file_path]['stack_frame_indices'].append(
stack_frame_index)
self.file_dict[file_name][file_path]['function'].append(function)
def GetPathDic(self, file_name):
"""Returns file's path and crash information."""
return self.file_dict[file_name]
self.file_dict[file_path]['function'].append(function)
def GetCrashedLineNumbers(self, file_path):
"""Returns crashed line numbers given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['line_numbers']
return self.file_dict[file_path]['line_numbers']
def GetCrashStackFrameindex(self, file_path):
def GetCrashStackFrameIndices(self, file_path):
"""Returns stack frame indices given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['stack_frame_indices']
return self.file_dict[file_path]['stack_frame_indices']
def GetCrashFunction(self, file_path):
def GetCrashFunctions(self, file_path):
"""Returns list of crashed functions given a file path."""
file_name = os.path.basename(file_path)
return self.file_dict[file_name][file_path]['function']
return self.file_dict[file_path]['function']
def __iter__(self):
return iter(self.file_dict)
......@@ -99,18 +86,17 @@ class ComponentDictionary(object):
continue
# Get values of the variables
file_name = stack_frame.file_name
file_path = stack_frame.file_path
crashed_line_number = stack_frame.crashed_line_number
crashed_line_range = stack_frame.crashed_line_range
stack_frame_index = stack_frame.index
function = stack_frame.function
# Add the file to this component's dictionary of files.
file_dict = self.component_dict[component_path]
file_dict.AddFile(file_name, file_path, crashed_line_number,
stack_frame_index, function)
file_dict.AddFile(file_path, crashed_line_range, stack_frame_index,
function)
def __CreateFileDictFromCallstack(self, callstack, top_n_frames=15):
def __CreateFileDictFromCallstack(self, callstack, top_n_frames=10):
"""Creates a file dict that maps a file to the occurrence in the stack.
Args:
......
[svn:src/]
changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/svn/trunk/src/&range=%s
revision_url: http://src.chromium.org/viewvc/chrome?revision=%d&view=revision
diff_url: http://src.chromium.org/viewvc/chrome%s?r1=%d&r2=%d&pathrev=%d
diff_url: http://src.chromium.org/viewvc/chrome/trunk/%s?r1=%d&r2=%d&pathrev=%d
blame_url: http://src.chromium.org/viewvc/chrome/trunk/%s?annotate=%s&pathrev=%s
[svn:src/third_party/WebKit/]
changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/blink/trunk/&range=%s
revision_url: http://src.chromium.org/viewvc/blink?revision=%d&view=revision
diff_url: http://src.chromium.org/viewvc/blink%s?r1=%d&r2=%d&pathrev=%d
diff_url: http://src.chromium.org/viewvc/blink/trunk/%s?r1=%d&r2=%d&pathrev=%d
blame_url: http://src.chromium.org/viewvc/blink/trunk/%s?annotate=%s&pathrev=%s
[svn:src/third_party/WebKit/Source/]
changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/blink/trunk/&range=%s
revision_url: http://src.chromium.org/viewvc/blink?revision=%d&view=revision
diff_url: http://src.chromium.org/viewvc/blink%s?r1=%d&r2=%d&pathrev=%d
diff_url: http://src.chromium.org/viewvc/blink/trunk/%s?r1=%d&r2=%d&pathrev=%d
blame_url: http://src.chromium.org/viewvc/blink/trunk/%s?annotate=%s&pathrev=%s
[git]
changelog_url: /+log/%s..%s
revision_url: /+/%s
diff_url: /%s^!/%s
diff_url: /+/%s^!/%s
blame_url: /+blame/%s/%s?format=json
[codereview]
......
This diff is collapsed.
# Copyright 2014 The Chromium Authors. All rights reserved.
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import chromium_deps
from common import utils
import crash_utils
import findit_for_crash as findit
import stacktrace
......@@ -33,7 +32,7 @@ def SplitStacktrace(stacktrace_string):
line = line.strip()
# If the line starts with +, it signifies the start of new stacktrace.
if line.startswith('+'):
if line.startswith('+-') and line.endswith('-+'):
if 'Release Build Stacktrace' in line:
in_release_or_debug_stacktrace = True
current_stacktrace_lines = []
......@@ -63,9 +62,15 @@ def FindCulpritCLs(stacktrace_string,
component_regression=None,
chrome_crash_revision=None,
component_crash_revision=None,
crashing_component=None):
crashing_component_path=None,
crashing_component_name=None,
crashing_component_repo_url=None):
"""Returns the result, a list of result.Result objects and message.
If either or both of component_regression and component_crash_revision is not
None, is is assumed that crashing_component_path and
crashing_component_repo_url are not None.
Args:
stacktrace_string: A string representing stacktrace.
build_type: The type of the job.
......@@ -75,27 +80,23 @@ def FindCulpritCLs(stacktrace_string,
chrome_crash_revision: A crash revision of chrome, in string.
component_crash_revision: A crash revision of the component,
if component build.
crashing_component: Yet to be decided.
crashing_component_path: A relative path of the crashing component, as in
DEPS file. For example, it would be 'src/v8' for
v8 and 'src/third_party/WebKit' for blink.
crashing_component_name: A name of the crashing component, such as v8.
crashing_component_repo_url: The URL of the crashing component's repo, as
shown in DEPS file. For example,
'https://chromium.googlesource.com/skia.git'
for skia.
Returns:
A list of result objects, along with the short description on where the
result is from.
"""
build_type = build_type.lower()
if 'syzyasan' in build_type:
return ('This build type is currently not supported.', [])
logging.basicConfig(filename='errors.log', level=logging.WARNING,
filemode='w')
component_to_crash_revision_dict = {}
component_to_regression_dict = {}
# TODO(jeun): Come up with a good way to connect crashing component name to
# its path.
if component_regression or component_crash_revision:
return ('Component builds are not supported yet.', [])
# If chrome regression is available, parse DEPS file.
chrome_regression = crash_utils.SplitRange(chrome_regression)
if chrome_regression:
......@@ -113,6 +114,65 @@ def FindCulpritCLs(stacktrace_string,
component_to_crash_revision_dict = chromium_deps.GetChromiumComponents(
chrome_crash_revision)
# Check if component regression information is available.
component_regression = crash_utils.SplitRange(component_regression)
if component_regression:
component_regression_start = component_regression[0]
component_regression_end = component_regression[1]
# If this component already has an entry in parsed DEPS file, overwrite
# regression range and url.
if crashing_component_path in component_to_regression_dict:
component_regression_info = \
component_to_regression_dict[crashing_component_path]
component_regression_info['old_revision'] = component_regression_start
component_regression_info['new_revision'] = component_regression_end
component_regression_info['repository'] = crashing_component_repo_url
# if this component does not have an entry, add the entry to the parsed
# DEPS file.
else:
repository_type = crash_utils.GetRepositoryType(
component_regression_start)
component_regression_info = {
'path': crashing_component_path,
'rolled': True,
'name': crashing_component_name,
'old_revision': component_regression_start,
'new_revision': component_regression_end,
'repository': crashing_component_repo_url,
'repository_type': repository_type
}
component_to_regression_dict[crashing_component_path] = \
component_regression_info
# If component crash revision is available, add it to the parsed crash
# revisions.
if component_crash_revision:
# If this component has already a crash revision info, overwrite it.
if crashing_component_path in component_to_crash_revision_dict:
component_crash_revision_info = \
component_to_crash_revision_dict[crashing_component_path]
component_crash_revision_info['revision'] = component_crash_revision
component_crash_revision_info['repository'] = crashing_component_repo_url
# If not, add it to the parsed DEPS.
else:
if utils.IsGitHash(component_crash_revision):
repository_type = 'git'
else:
repository_type = 'svn'
component_crash_revision_info = {
'path': crashing_component_path,
'name': crashing_component_name,
'repository': crashing_component_repo_url,
'repository_type': repository_type,
'revision': component_crash_revision
}
component_to_crash_revision_dict[crashing_component_path] = \
component_crash_revision_info
# Parsed DEPS is used to normalize the stacktrace. Since parsed regression
# and parsed crash state essentially contain same information, use either.
if component_to_regression_dict:
......@@ -126,8 +186,13 @@ def FindCulpritCLs(stacktrace_string,
# Split stacktrace into release build/debug build and parse them.
(release_build_stacktrace, debug_build_stacktrace) = SplitStacktrace(
stacktrace_string)
parsed_release_build_stacktrace = stacktrace.Stacktrace(
release_build_stacktrace, build_type, parsed_deps)
if not (release_build_stacktrace or debug_build_stacktrace):
parsed_release_build_stacktrace = stacktrace.Stacktrace(
stacktrace_string.splitlines(), build_type, parsed_deps)
else:
parsed_release_build_stacktrace = stacktrace.Stacktrace(
release_build_stacktrace, build_type, parsed_deps)
parsed_debug_build_stacktrace = stacktrace.Stacktrace(
debug_build_stacktrace, build_type, parsed_deps)
......@@ -139,6 +204,9 @@ def FindCulpritCLs(stacktrace_string,
elif parsed_debug_build_stacktrace.stack_list:
main_stack = parsed_debug_build_stacktrace.GetCrashStack()
else:
if 'mac_' in build_type:
return ('No line information available in stacktrace.', [])
return ('Stacktrace is malformed.', [])
# Run the algorithm on the parsed stacktrace, and return the result.
......
This diff is collapsed.
# Copyright 2014 The Chromium Authors. All rights reserved.
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import base64
import logging
import os
import xml.dom.minidom as minidom
from xml.parsers.expat import ExpatError
import crash_utils
from repository_parser_interface import ParserInterface
FILE_CHANGE_TYPE_MAP = {
'add': 'A',
'delete': 'D',
'modify': 'M'
}
class GitParser(ParserInterface):
"""Parser for Git repository in googlesource.
......@@ -39,14 +43,16 @@ class GitParser(ParserInterface):
html_url = url + '?pretty=fuller'
response = crash_utils.GetDataFromURL(html_url)
if not response:
logging.error('Failed to retrieve changelog from %s', html_url)
return (revision_map, file_to_revision_map)
# Parse xml out of the returned string. If it failes, return empty map.
# Parse xml out of the returned string. If it failes, Try parsing
# from JSON objects.
try:
dom = minidom.parseString(response)
except ExpatError:
logging.error('Failed to parse changelog from %s', url)
self.ParseChangelogFromJSON(range_start, range_end, changelog_url,
revision_url, revision_map,
file_to_revision_map)
return (revision_map, file_to_revision_map)
# The revisions information are in from the third divs to the second
......@@ -93,22 +99,16 @@ class GitParser(ParserInterface):
for li in lis:
# Retrieve path and action of the changed file
file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue
file_action = li.getElementsByTagName('span')[0].getAttribute('class')
file_change_type = li.getElementsByTagName('span')[
0].getAttribute('class')
# Normalize file action so that it is same as SVN parser.
if file_action == 'add':
file_action = 'A'
elif file_action == 'delete':
file_action = 'D'
elif file_action == 'modify':
file_action = 'M'
file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
# Add the changed file to the map.
changed_file = os.path.basename(file_path)
if changed_file not in file_to_revision_map:
file_to_revision_map[changed_file] = []
file_to_revision_map[changed_file].append((githash, file_action,
file_path))
if file_path not in file_to_revision_map:
file_to_revision_map[file_path] = []
file_to_revision_map[file_path].append((githash, file_change_type))
# Add this revision object to the map.
revision_map[githash] = revision
......@@ -137,14 +137,12 @@ class GitParser(ParserInterface):
json_url = changelog_url + '?format=json'
response = crash_utils.GetDataFromURL(json_url)
if not response:
logging.error('Failed to retrieve changelog from %s.', json_url)
return
# Parse changelog from the returned object. The returned string should
# start with ")}]'\n", so start from the 6th character.
revisions = crash_utils.LoadJSON(response[5:])
if not revisions:
logging.error('Failed to parse changelog from %s.', json_url)
return
# Parse individual revision in the log.
......@@ -165,13 +163,11 @@ class GitParser(ParserInterface):
url = revision_url % githash
response = crash_utils.GetDataFromURL(url + '?format=json')
if not response:
logging.warning('Failed to retrieve revision from %s.', url)
return
# Load JSON object from the string. If it fails, terminate the function.
json_revision = crash_utils.LoadJSON(response[5:])
if not json_revision:
logging.warning('Failed to parse revision from %s.', url)
return
# Create a map representing object and get githash from the JSON object.
......@@ -186,43 +182,35 @@ class GitParser(ParserInterface):
# Iterate through the changed files.
for diff in json_revision['tree_diff']:
file_path = diff['new_path']
file_action = diff['type']
file_change_type = diff['type']
# Normalize file action so that it fits with svn_repository_parser.
if file_action == 'add':
file_action = 'A'
elif file_action == 'delete':
file_action = 'D'
elif file_action == 'modify':
file_action = 'M'
file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
# Add the file to the map.
changed_file = os.path.basename(file_path)
if changed_file not in file_to_revision_map:
file_to_revision_map[changed_file] = []
file_to_revision_map[changed_file].append(
(githash, file_action, file_path))
if file_path not in file_to_revision_map:
file_to_revision_map[file_path] = []
file_to_revision_map[file_path].append((githash, file_change_type))
# Add this CL to the map.
revision_map[githash] = revision
return
def ParseLineDiff(self, path, component, file_action, githash):
def ParseLineDiff(self, path, component, file_change_type, githash):
changed_line_numbers = []
changed_line_contents = []
base_url = self.component_to_url_map[component]['repository']
backup_url = (base_url + self.url_parts_map['revision_url']) % githash
# If the file is added (not modified), treat it as if it is not changed.
if file_action == 'A':
if file_change_type == 'A':
return (backup_url, changed_line_numbers, changed_line_contents)
# Retrieves the diff data from URL, and if it fails, return emptry lines.
url = (base_url + self.url_parts_map['diff_url']) % (githash, path)
data = crash_utils.GetDataFromURL(url + '?format=text')
if not data:
logging.error('Failed to get diff from %s.', url)
return (backup_url, changed_line_numbers, changed_line_contents)
# Decode the returned object to line diff info
......@@ -260,16 +248,12 @@ class GitParser(ParserInterface):
blame_url = base_url + url_part
json_string = crash_utils.GetDataFromURL(blame_url)
if not json_string:
logging.error('Failed to retrieve annotation information from %s.',
blame_url)
return
# Parse JSON object from the string. The returned string should
# start with ")}]'\n", so start from the 6th character.
annotation = crash_utils.LoadJSON(json_string[5:])
if not annotation:
logging.error('Failed to parse annotation information from %s.',
blame_url)
return
# Go through the regions, which is a list of consecutive lines with same
......@@ -289,7 +273,9 @@ class GitParser(ParserInterface):
# TODO(jeun): Add a way to get content from JSON object.
content = None
return (content, revision, author, revision_url)
(revision_info, _) = self.ParseChangelog(component, revision, revision)
message = revision_info[revision]['message']
return (content, revision, author, revision_url, message)
# Return none if the region does not exist.
return None
......@@ -2,7 +2,6 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import re
from threading import Lock
......@@ -10,7 +9,7 @@ from threading import Lock
import crash_utils
REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*)')
REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*?)/(\d+)')
class Match(object):
......@@ -20,7 +19,7 @@ class Match(object):
contains information about files it changes, their authors, etc.
Attributes:
is_reverted: True if this CL is reverted by other CL.
is_revert: True if this CL is reverted by other CL.
revert_of: If this CL is a revert of some other CL, a revision number/
git hash of that CL.
crashed_line_numbers: The list of lines that caused crash for this CL.
......@@ -33,10 +32,9 @@ class Match(object):
component_name: The name of the component that this CL belongs to.
stack_frame_indices: For files that caused crash, list of where in the
stackframe they occur.
rank: The highest priority among the files the CL changes. Priority = 1
if it changes the crashed line, and priority = 2 if it is a simple
file change.
priorities: A list of priorities for each of the changed file.
priorities: A list of priorities for each of the changed file. A priority
is 1 if the file changes a crashed line, and 2 if it changes
the file but not the crashed line.
reivision_url: The revision URL of the CL.
review_url: The codereview URL that reviews this CL.
reviewers: The list of people that reviewed this CL.
......@@ -45,17 +43,18 @@ class Match(object):
REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
def __init__(self, revision, component_name):
self.is_reverted = False
self.is_revert = False
self.revert_of = None
self.message = None
self.crashed_line_numbers = []
self.function_list = []
self.min_distance = crash_utils.INFINITY
self.min_distance_info = None
self.changed_files = []
self.changed_file_urls = []
self.author = revision['author']
self.component_name = component_name
self.stack_frame_indices = []
self.rank = crash_utils.INFINITY
self.priorities = []
self.revision_url = revision['url']
self.review_url = ''
......@@ -72,6 +71,7 @@ class Match(object):
message: The message to parse.
codereview_api_url: URL to retrieve codereview data from.
"""
self.message = message
for line in message.splitlines():
line = line.strip()
review_url_line_match = REVIEW_URL_PATTERN.match(line)
......@@ -80,14 +80,12 @@ class Match(object):
if review_url_line_match:
# Get review number for the code review site from the line.
issue_number = review_url_line_match.group(2)
issue_number = review_url_line_match.group(3)
# Get JSON from the code review site, ignore the line if it fails.
url = codereview_api_url % issue_number
json_string = crash_utils.GetDataFromURL(url)
if not json_string:
logging.warning('Failed to retrieve code review information from %s',
url)
continue
# Load the JSON from the string, and get the list of reviewers.
......@@ -97,7 +95,7 @@ class Match(object):
# Check if this CL is a revert of other CL.
if line.lower().startswith('revert'):
self.is_reverted = True
self.is_revert = True
# Check if the line says what CL this CL is a revert of.
revert = self.REVERT_PATTERN.match(line)
......
# Copyright 2014 The Chromium Authors. All rights reserved.
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
......
......@@ -6,7 +6,7 @@
class Result(object):
def __init__(self, suspected_cl, revision_url, component_name, author,
reason, review_url, reviewers, line_content):
reason, review_url, reviewers, line_content, message):
self.suspected_cl = suspected_cl
self.revision_url = revision_url
self.component_name = component_name
......@@ -15,3 +15,4 @@ class Result(object):
self.review_url = review_url
self.reviewers = reviewers
self.line_content = line_content
self.commit_message = message
......@@ -2,12 +2,16 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import crash_utils
SYZYASAN_STACK_FRAME_PATTERN = re.compile(
r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]')
FILE_PATH_AND_LINE_PATTERN = re.compile(r'(.*?):(\d+)(:\d+)?')
class StackFrame(object):
"""Represents a frame in stacktrace.
......@@ -18,18 +22,18 @@ class StackFrame(object):
file_name: The name of the file that crashed.
function: The function that caused the crash.
file_path: The path of the crashed file.
crashed_line_number: The line of the file that caused the crash.
crashed_line_range: The line of the file that caused the crash.
"""
def __init__(self, stack_frame_index, component_path, component_name,
file_name, function, file_path, crashed_line_number):
file_name, function, file_path, crashed_line_range):
self.index = stack_frame_index
self.component_path = component_path
self.component_name = component_name
self.file_name = file_name
self.function = function
self.file_path = file_path
self.crashed_line_number = crashed_line_number
self.crashed_line_range = crashed_line_range
class CallStack(object):
......@@ -59,10 +63,9 @@ class Stacktrace(object):
def __init__(self, stacktrace, build_type, parsed_deps):
self.stack_list = None
self.parsed_deps = parsed_deps
self.ParseStacktrace(stacktrace, build_type)
self.ParseStacktrace(stacktrace, build_type, parsed_deps)
def ParseStacktrace(self, stacktrace, build_type):
def ParseStacktrace(self, stacktrace, build_type, parsed_deps):
"""Parses stacktrace and normalizes it.
If there are multiple callstacks within the stacktrace,
......@@ -72,11 +75,11 @@ class Stacktrace(object):
Args:
stacktrace: A string containing stacktrace.
build_type: A string containing the build type of the crash.
parsed_deps: A parsed DEPS file to normalize path with.
"""
# If the passed in string is empty, the object does not represent anything.
if not stacktrace:
return
# Reset the stack list.
self.stack_list = []
reached_new_callstack = False
......@@ -84,14 +87,13 @@ class Stacktrace(object):
# position of a frame within a callstack. The reason for not extracting
# index from a line is that some stack frames do not have index.
stack_frame_index = 0
current_stack = None
current_stack = CallStack(-1)
for line in stacktrace:
line = line.strip()
(is_new_callstack, stack_priority) = self.__IsStartOfNewCallStack(
line, build_type)
if is_new_callstack:
# If this callstack is crash stack, update the boolean.
if not reached_new_callstack:
reached_new_callstack = True
......@@ -107,7 +109,7 @@ class Stacktrace(object):
# Generate stack frame object from the line.
parsed_stack_frame = self.__GenerateStackFrame(
stack_frame_index, line, build_type)
stack_frame_index, line, build_type, parsed_deps)
# If the line does not represent the stack frame, ignore this line.
if not parsed_stack_frame:
......@@ -135,11 +137,7 @@ class Stacktrace(object):
True if the line is the start of new callstack, False otherwise. If True,
it also returns the priority of the line.
"""
# Currently not supported.
if 'android' in build_type:
pass
elif 'syzyasan' in build_type:
if 'syzyasan' in build_type:
# In syzyasan build, new stack starts with 'crash stack:',
# 'freed stack:', etc.
callstack_start_pattern = re.compile(r'^(.*) stack:$')
......@@ -160,9 +158,12 @@ class Stacktrace(object):
elif 'tsan' in build_type:
# Create patterns for each callstack type.
crash_callstack_start_pattern = re.compile(
crash_callstack_start_pattern1 = re.compile(
r'^(Read|Write) of size \d+')
crash_callstack_start_pattern2 = re.compile(
r'^[A-Z]+: ThreadSanitizer')
allocation_callstack_start_pattern = re.compile(
r'^Previous (write|read) of size \d+')
......@@ -170,7 +171,8 @@ class Stacktrace(object):
r'^Location is heap block of size \d+')
# Crash stack gets priority 0.
if crash_callstack_start_pattern.match(line):
if (crash_callstack_start_pattern1.match(line) or
crash_callstack_start_pattern2.match(line)):
return (True, 0)
# All other stacks get priority 1.
......@@ -183,9 +185,10 @@ class Stacktrace(object):
else:
# In asan and other build types, crash stack can start
# in two different ways.
crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?ERROR:')
crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?[A-Z]+:')
crash_callstack_start_pattern2 = re.compile(
r'^(READ|WRITE) of size \d+ at')
crash_callstack_start_pattern3 = re.compile(r'^backtrace:')
freed_callstack_start_pattern = re.compile(
r'^freed by thread T\d+ (.* )?here:')
......@@ -198,7 +201,8 @@ class Stacktrace(object):
# Crash stack gets priority 0.
if (crash_callstack_start_pattern1.match(line) or
crash_callstack_start_pattern2.match(line)):
crash_callstack_start_pattern2.match(line) or
crash_callstack_start_pattern3.match(line)):
return (True, 0)
# All other callstack gets priority 1.
......@@ -215,7 +219,8 @@ class Stacktrace(object):
# stack priority.
return (False, -1)
def __GenerateStackFrame(self, stack_frame_index, line, build_type):
def __GenerateStackFrame(self, stack_frame_index, line, build_type,
parsed_deps):
"""Extracts information from a line in stacktrace.
Args:
......@@ -223,32 +228,59 @@ class Stacktrace(object):
line: A stacktrace string to extract data from.
build_type: A string containing the build type
of this crash (e.g. linux_asan_chrome_mp).
parsed_deps: A parsed DEPS file to normalize path with.
Returns:
A triple containing the name of the function, the path of the file and
the crashed line number.
"""
line_parts = line.split()
try:
# Filter out lines that are not stack frame.
stack_frame_index_pattern = re.compile(r'#(\d+)')
if not stack_frame_index_pattern.match(line_parts[0]):
return None
# Tsan has different stack frame style from other builds.
if build_type.startswith('linux_tsan'):
file_path_and_line = line_parts[-2]
function = ' '.join(line_parts[1:-2])
if 'syzyasan' in build_type:
stack_frame_match = SYZYASAN_STACK_FRAME_PATTERN.match(line)
if not stack_frame_match:
return None
file_path = stack_frame_match.group(5)
crashed_line_range = [int(stack_frame_match.group(6))]
function = stack_frame_match.group(2)
else:
file_path_and_line = line_parts[-1]
function = ' '.join(line_parts[3:-1])
if not line_parts[0].startswith('#'):
return None
if 'tsan' in build_type:
file_path_and_line = line_parts[-2]
function = ' '.join(line_parts[1:-2])
else:
file_path_and_line = line_parts[-1]
function = ' '.join(line_parts[3:-1])
# Get file path and line info from the line.
file_path_and_line_match = FILE_PATH_AND_LINE_PATTERN.match(
file_path_and_line)
# Return None if the file path information is not available
if not file_path_and_line_match:
return None
file_path = file_path_and_line_match.group(1)
# Get file path and line info from the line.
file_path_and_line = file_path_and_line.split(':')
file_path = file_path_and_line[0]
crashed_line_number = int(file_path_and_line[1])
# Get the crashed line range. For example, file_path:line_number:range.
crashed_line_range_num = file_path_and_line_match.group(3)
if crashed_line_range_num:
# Strip ':' prefix.
crashed_line_range_num = int(crashed_line_range_num[1:])
else:
crashed_line_range_num = 0
crashed_line_number = int(file_path_and_line_match.group(2))
# For example, 655:1 has crashed lines 655 and 656.
crashed_line_range = \
range(crashed_line_number,
crashed_line_number + crashed_line_range_num + 1)
# Return None if the line is malformed.
except IndexError:
......@@ -257,17 +289,13 @@ class Stacktrace(object):
return None
# Normalize the file path so that it can be compared to repository path.
file_name = os.path.basename(file_path)
(component_path, component_name, file_path) = (
crash_utils.NormalizePathLinux(file_path, self.parsed_deps))
# If this component is not supported, ignore this line.
if not component_path:
return None
crash_utils.NormalizePath(file_path, parsed_deps))
# Return a new stack frame object with the parsed information.
file_name = file_path.split('/')[-1]
return StackFrame(stack_frame_index, component_path, component_name,
file_name, function, file_path, crashed_line_number)
file_name, function, file_path, crashed_line_range)
def __getitem__(self, index):
return self.stack_list[index]
......
# Copyright 2014 The Chromium Authors. All rights reserved.
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import os
import xml.dom.minidom as minidom
from xml.parsers.expat import ExpatError
......@@ -41,7 +39,6 @@ class SVNParser(ParserInterface):
url_map = self.component_to_urls_map.get(component)
if not url_map:
logging.error('Component %s is not currently supported.', component)
return (revision_map, file_to_revision_map)
# Retrieve data from the url, return empty map if fails.
......@@ -49,16 +46,12 @@ class SVNParser(ParserInterface):
url = url_map['changelog_url'] % revision_range_str
response = crash_utils.GetDataFromURL(url)
if not response:
logging.error('Failed to retrieve changelog from %s, range %s.',
url, revision_range_str)
return (revision_map, file_to_revision_map)
# Parse xml out of the returned string. If it fails, return empty map.
try:
xml_revisions = minidom.parseString(response)
except ExpatError:
logging.error('Failed to parse changelog from %s, range %s.',
url, revision_range_str)
return (revision_map, file_to_revision_map)
# Iterate through the returned XML object.
......@@ -78,16 +71,18 @@ class SVNParser(ParserInterface):
paths = revision.getElementsByTagName('paths')
if paths:
for changed_path in paths[0].getElementsByTagName('path'):
# Get path, file action and file name from the xml.
# Get path and file change type from the xml.
file_path = changed_path.firstChild.nodeValue
file_action = changed_path.getAttribute('action')
changed_file = os.path.basename(file_path)
file_change_type = changed_path.getAttribute('action')
if file_path.startswith('/trunk/'):
file_path = file_path[len('/trunk/'):]
# Add file to the map.
if changed_file not in file_to_revision_map:
file_to_revision_map[changed_file] = []
file_to_revision_map[changed_file].append(
(revision_number, file_action, file_path))
if file_path not in file_to_revision_map:
file_to_revision_map[file_path] = []
file_to_revision_map[file_path].append(
(revision_number, file_change_type))
# Set commit message of the CL.
revision_object['message'] = revision.getElementsByTagName('msg')[
......@@ -102,18 +97,17 @@ class SVNParser(ParserInterface):
return (revision_map, file_to_revision_map)
def ParseLineDiff(self, path, component, file_action, revision_number):
def ParseLineDiff(self, path, component, file_change_type, revision_number):
changed_line_numbers = []
changed_line_contents = []
url_map = self.component_to_urls_map.get(component)
if not url_map:
logging.error('Component %s is not currently supported.', component)
return (None, None, None)
# If the file is added (not modified), treat it as if it is not changed.
backup_url = url_map['revision_url'] % revision_number
if file_action == 'A':
if file_change_type == 'A':
return (backup_url, changed_line_numbers, changed_line_contents)
# Retrieve data from the url. If no data is retrieved, return empty lists.
......@@ -121,7 +115,6 @@ class SVNParser(ParserInterface):
revision_number, revision_number)
data = crash_utils.GetDataFromURL(url)
if not data:
logging.error('Failed to get line changes from %s.', url)
return (backup_url, changed_line_numbers, changed_line_contents)
line_diff_html = minidom.parseString(data)
......@@ -129,8 +122,6 @@ class SVNParser(ParserInterface):
# If there are not NUM_TABLES tables in the html page, there should be an
# error in the html page.
if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:
logging.error('Failed to retrieve the diff of revision %d from %s.',
revision_number, url)
return (backup_url, changed_line_numbers, changed_line_contents)
# Diff content is in the second table. Each line of the diff content
......@@ -163,8 +154,6 @@ class SVNParser(ParserInterface):
# If there aren't 3 tds, this line does should not contain line diff.
if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:
logging.warning('Failed to get a line of new file in revision %d.',
revision_number)
continue
# If line number information is not in hyperlink, ignore this line.
......@@ -173,8 +162,6 @@ class SVNParser(ParserInterface):
left_diff_type = tds[1].getAttribute('class')[prefix_len:]
right_diff_type = tds[2].getAttribute('class')[prefix_len:]
except IndexError:
logging.warning('Failed to get a line of file in revision %d.',
revision_number)
continue
# Treat the line as modified only if both left and right diff has type
......@@ -198,15 +185,12 @@ class SVNParser(ParserInterface):
def ParseBlameInfo(self, component, file_path, line, revision):
url_map = self.component_to_urls_map.get(component)
if not url_map:
logging.error('Component %s is not currently supported.', component)
return None
# Retrieve blame data from url, return None if fails.
url = url_map['blame_url'] % (file_path, revision, revision)
data = crash_utils.GetDataFromURL(url)
if not data:
logging.error('Failed to retrieve annotation information from %s.',
url)
return None
blame_html = minidom.parseString(data)
......@@ -214,17 +198,18 @@ class SVNParser(ParserInterface):
title = blame_html.getElementsByTagName('title')
# If the returned html page is an exception page, return None.
if title[0].firstChild.nodeValue == 'ViewVC Exception':
logging.error('Failed to retrieve blame information from %s.', url)
return None
# Each of the blame result is in <tr>.
blame_results = blame_html.getElementsByTagName('tr')
blame_result = blame_results[line]
try:
blame_result = blame_results[line]
except IndexError:
return None
# There must be 4 <td> for each <tr>. If not, this page is wrong.
tds = blame_result.getElementsByTagName('td')
if len(tds) != 4:
logging.error('Failed to retrieve blame information from %s.', url)
return None
# The third <td> has the line content, separated by <span>s. Combine
......@@ -257,6 +242,9 @@ class SVNParser(ParserInterface):
except IndexError:
revision = tds[2].firstChild.nodeValue
(revision_info, _) = self.ParseChangelog(component, revision, revision)
message = revision_info[int(revision)]['message']
# Return the parsed information.
revision_url = url_map['revision_url'] % int(revision)
return (line_content, revision, author, revision_url)
return (line_content, revision, author, revision_url, message)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment