[Findit]Bug fixing and implemented some feature requests.

Fixed various bugs, including exception on stacktrace and unnamed variable in git parser. Changed the algorithm to use file_path only instead of file_name. Removed URL links from the result. Added commit message to the final result. Updated OWNERS. NOTRY=true Review URL: https://codereview.chromium.org/478763003 Cr-Commit-Position: refs/heads/master@{#291698}

[Findit]Bug fixing and implemented some feature requests.
Fixed various bugs, including exception on stacktrace and unnamed variable in git parser. Changed the algorithm to use file_path only instead of file_name. Removed URL links from the result. Added commit message to the final result. Updated OWNERS. NOTRY=true Review URL: https://codereview.chromium.org/478763003 Cr-Commit-Position: refs/heads/master@{#291698}
91fff9ef · jeun · Commit bot · e33d7ceb · 91fff9ef · 91fff9ef
Commit 91fff9ef authored Aug 25, 2014 by jeun Committed by Commit bot Aug 25, 2014
14 changed files
--- a/tools/findit/OWNERS
+++ b/tools/findit/OWNERS
 stgao@chromium.org
+jeun@chromium.org
\ No newline at end of file
--- a/tools/findit/blame.py
+++ b/tools/findit/blame.py
@@ -5,6 +5,7 @@
 from threading import Lock, Thread
 from common import utils
+import crash_utils
 class Blame(object):
@@ -28,7 +29,7 @@ class Blame(object):
  """
  def __init__(self, line_content, component_name, stack_frame_index,
-               file_name, line_number, author, revision,
+               file_name, line_number, author, revision, message,
               url, range_start, range_end):
    # Set all the variables from the arguments.
    self.line_content = line_content
@@ -38,6 +39,7 @@ class Blame(object):
    self.line_number = line_number
    self.author = author
    self.revision = revision
+    self.message = message
    self.url = url
    self.range_start = range_start
    self.range_end = range_end
@@ -56,7 +58,8 @@ class BlameList(object):
  def __getitem__(self, index):
    return self.blame_list[index]
-  def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers,
+  def FindBlame(self, callstack, component_to_crash_revision_dict,
+                component_to_regression_dict, parsers,
                top_n_frames=10):
    """Given a stack within a stacktrace, retrieves blame information.
@@ -65,10 +68,10 @@ class BlameList(object):
    Args:
      callstack: The list of stack frames.
-      crash_revision_dict: A dictionary that maps component to its crash
+      component_to_crash_revision_dict: A dictionary that maps component to its
-                          revision.
+                                        crash revision.
-      regression_dict: A dictionary that maps component to its revision
+      component_to_regression_dict: A dictionary that maps component to its
-                          range.
+                                    revision range.
      parsers: A list of two parsers, svn_parser and git_parser
      top_n_frames: A number of stack frames to show the blame result for.
    """
@@ -80,23 +83,22 @@ class BlameList(object):
      # If the component this line is from does not have a crash revision,
      # it is not possible to get blame information, so ignore this line.
      component_path = stack_frame.component_path
-      if component_path not in crash_revision_dict:
+      if component_path not in component_to_crash_revision_dict:
        continue
-      crash_revision = crash_revision_dict[component_path]['revision']
+      crash_revision = component_to_crash_revision_dict[
+          component_path]['revision']
      range_start = None
      range_end = None
-      is_git = utils.IsGitHash(crash_revision)
+      repository_type = crash_utils.GetRepositoryType(crash_revision)
-      if is_git:
+      repository_parser = parsers[repository_type]
-        repository_parser = parsers['git']
-      else:
-        repository_parser = parsers['svn']
      # If the revision is in SVN, and if regression information is available,
      # get it. For Git, we cannot know the ordering between hash numbers.
-      if not is_git:
+      if repository_type == 'svn':
-        if regression_dict and component_path in regression_dict:
+        if component_to_regression_dict and \
-          component_object = regression_dict[component_path]
+            component_path in component_to_regression_dict:
+          component_object = component_to_regression_dict[component_path]
          range_start = int(component_object['old_revision'])
          range_end = int(component_object['new_revision'])
@@ -120,20 +122,20 @@ class BlameList(object):
    component_name = stack_frame.component_name
    file_name = stack_frame.file_name
    file_path = stack_frame.file_path
-    crashed_line_number = stack_frame.crashed_line_number
+    crashed_line_number = stack_frame.crashed_line_range[0]
    # Parse blame information.
    parsed_blame_info = repository_parser.ParseBlameInfo(
        component_path, file_path, crashed_line_number, crash_revision)
    # If it fails to retrieve information, do not do anything.
-    if not parsed_blame_info or len(parsed_blame_info) != 4:
+    if not parsed_blame_info:
      return
    # Create blame object from the parsed info and add it to the list.
-    (line_content, revision, author, url) = parsed_blame_info
+    (line_content, revision, author, url, message) = parsed_blame_info
    blame = Blame(line_content, component_name, stack_frame_index, file_name,
-                  crashed_line_number, author, revision, url,
+                  crashed_line_number, author, revision, message, url,
                  range_start, range_end)
    with self.blame_list_lock:

--- a/tools/findit/chromium_deps.py
+++ b/tools/findit/chromium_deps.py
@@ -5,6 +5,8 @@
 import base64
 import json
 import os
+import time
+import urllib2
 from common import utils
@@ -64,10 +66,23 @@ def _GetComponentName(path, host_dirs):
  return '_'.join(path.split('/'))
-def _GetContentOfDEPS(url):
+def _GetContentOfDEPS(url, retries=5, sleep_time=0.1):
+  count = 0
+  while True:
+    count += 1
+    try:
      _, content = utils.GetHttpClient().Get(url, timeout=60)
      return content
+    # TODO(jeun): Handle HTTP Errors, such as 404.
+    except urllib2.HTTPError:
+      if count < retries:
+        time.sleep(sleep_time)
+      else:
+        break
+  return ''
 def GetChromiumComponents(chromium_revision,
                          os_platform='unix',

--- a/tools/findit/component_dictionary.py
+++ b/tools/findit/component_dictionary.py
@@ -2,8 +2,6 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-import os
 class FileDictionary(object):
  """Maps file in a stacktrace to its crash information.
@@ -16,52 +14,41 @@ class FileDictionary(object):
    """Initializes the file dictionary."""
    self.file_dict = {}
-  def AddFile(self, file_name, file_path, crashed_line_number,
+  def AddFile(self, file_path, crashed_line_range, stack_frame_index,
-              stack_frame_index, function):
+              function):
    """Adds file and its crash information to the map.
    Args:
-      file_name: The name of the crashed file.
      file_path: The path of the crashed file.
-      crashed_line_number: The crashed line of the file.
+      crashed_line_range: The crashed line of the file.
      stack_frame_index: The file's position in the callstack.
      function: The name of the crashed function.
    """
-    # Populate the dictionary if this file/path has not been added before.
+    # Populate the dictionary if this file path has not been added before.
-    if file_name not in self.file_dict:
+    if file_path not in self.file_dict:
-      self.file_dict[file_name] = {}
+      self.file_dict[file_path] = {}
+      self.file_dict[file_path]['line_numbers'] = []
-    if file_path not in self.file_dict[file_name]:
+      self.file_dict[file_path]['stack_frame_indices'] = []
-      self.file_dict[file_name][file_path] = {}
+      self.file_dict[file_path]['function'] = []
-      self.file_dict[file_name][file_path]['line_numbers'] = []
-      self.file_dict[file_name][file_path]['stack_frame_indices'] = []
-      self.file_dict[file_name][file_path]['function'] = []
    # Add the crashed line, frame index and function name.
-    self.file_dict[file_name][file_path]['line_numbers'].append(
+    self.file_dict[file_path]['line_numbers'].append(
-        crashed_line_number)
+        crashed_line_range)
-    self.file_dict[file_name][file_path]['stack_frame_indices'].append(
+    self.file_dict[file_path]['stack_frame_indices'].append(
        stack_frame_index)
-    self.file_dict[file_name][file_path]['function'].append(function)
+    self.file_dict[file_path]['function'].append(function)
-  def GetPathDic(self, file_name):
-    """Returns file's path and crash information."""
-    return self.file_dict[file_name]
  def GetCrashedLineNumbers(self, file_path):
    """Returns crashed line numbers given a file path."""
-    file_name = os.path.basename(file_path)
+    return self.file_dict[file_path]['line_numbers']
-    return self.file_dict[file_name][file_path]['line_numbers']
-  def GetCrashStackFrameindex(self, file_path):
+  def GetCrashStackFrameIndices(self, file_path):
    """Returns stack frame indices given a file path."""
-    file_name = os.path.basename(file_path)
+    return self.file_dict[file_path]['stack_frame_indices']
-    return self.file_dict[file_name][file_path]['stack_frame_indices']
-  def GetCrashFunction(self, file_path):
+  def GetCrashFunctions(self, file_path):
    """Returns list of crashed functions given a file path."""
-    file_name = os.path.basename(file_path)
+    return self.file_dict[file_path]['function']
-    return self.file_dict[file_name][file_path]['function']
  def __iter__(self):
    return iter(self.file_dict)
@@ -99,18 +86,17 @@ class ComponentDictionary(object):
        continue
      # Get values of the variables
-      file_name = stack_frame.file_name
      file_path = stack_frame.file_path
-      crashed_line_number = stack_frame.crashed_line_number
+      crashed_line_range = stack_frame.crashed_line_range
      stack_frame_index = stack_frame.index
      function = stack_frame.function
      # Add the file to this component's dictionary of files.
      file_dict = self.component_dict[component_path]
-      file_dict.AddFile(file_name, file_path, crashed_line_number,
+      file_dict.AddFile(file_path, crashed_line_range, stack_frame_index,
-                        stack_frame_index, function)
+                        function)
-  def __CreateFileDictFromCallstack(self, callstack, top_n_frames=15):
+  def __CreateFileDictFromCallstack(self, callstack, top_n_frames=10):
    """Creates a file dict that maps a file to the occurrence in the stack.
    Args:

--- a/tools/findit/config.ini
+++ b/tools/findit/config.ini
 [svn:src/]
 changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/svn/trunk/src/&range=%s
 revision_url: http://src.chromium.org/viewvc/chrome?revision=%d&view=revision
-diff_url: http://src.chromium.org/viewvc/chrome%s?r1=%d&r2=%d&pathrev=%d
+diff_url: http://src.chromium.org/viewvc/chrome/trunk/%s?r1=%d&r2=%d&pathrev=%d
 blame_url: http://src.chromium.org/viewvc/chrome/trunk/%s?annotate=%s&pathrev=%s
 [svn:src/third_party/WebKit/]
 changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/blink/trunk/&range=%s
 revision_url: http://src.chromium.org/viewvc/blink?revision=%d&view=revision
-diff_url: http://src.chromium.org/viewvc/blink%s?r1=%d&r2=%d&pathrev=%d
+diff_url: http://src.chromium.org/viewvc/blink/trunk/%s?r1=%d&r2=%d&pathrev=%d
 blame_url: http://src.chromium.org/viewvc/blink/trunk/%s?annotate=%s&pathrev=%s
 [svn:src/third_party/WebKit/Source/]
 changelog_url: http://build.chromium.org/cgi-bin/svn-log?url=http://src.chromium.org/blink/trunk/&range=%s
 revision_url: http://src.chromium.org/viewvc/blink?revision=%d&view=revision
-diff_url: http://src.chromium.org/viewvc/blink%s?r1=%d&r2=%d&pathrev=%d
+diff_url: http://src.chromium.org/viewvc/blink/trunk/%s?r1=%d&r2=%d&pathrev=%d
 blame_url: http://src.chromium.org/viewvc/blink/trunk/%s?annotate=%s&pathrev=%s
 [git]
 changelog_url: /+log/%s..%s
 revision_url: /+/%s
-diff_url: /%s^!/%s
+diff_url: /+/%s^!/%s
 blame_url: /+blame/%s/%s?format=json
 [codereview]

--- a/tools/findit/crash_utils.py
+++ b/tools/findit/crash_utils.py
--- a/tools/findit/findit_for_clusterfuzz.py
+++ b/tools/findit/findit_for_clusterfuzz.py
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-import logging
 import chromium_deps
+from common import utils
 import crash_utils
 import findit_for_crash as findit
 import stacktrace
@@ -33,7 +32,7 @@ def SplitStacktrace(stacktrace_string):
    line = line.strip()
    # If the line starts with +, it signifies the start of new stacktrace.
-    if line.startswith('+'):
+    if line.startswith('+-') and line.endswith('-+'):
      if 'Release Build Stacktrace' in line:
        in_release_or_debug_stacktrace = True
        current_stacktrace_lines = []
@@ -63,9 +62,15 @@ def FindCulpritCLs(stacktrace_string,
                   component_regression=None,
                   chrome_crash_revision=None,
                   component_crash_revision=None,
-                   crashing_component=None):
+                   crashing_component_path=None,
+                   crashing_component_name=None,
+                   crashing_component_repo_url=None):
  """Returns the result, a list of result.Result objects and message.
+  If either or both of component_regression and component_crash_revision is not
+  None, is is assumed that crashing_component_path and
+  crashing_component_repo_url are not None.
  Args:
    stacktrace_string: A string representing stacktrace.
    build_type: The type of the job.
@@ -75,27 +80,23 @@ def FindCulpritCLs(stacktrace_string,
    chrome_crash_revision: A crash revision of chrome, in string.
    component_crash_revision: A crash revision of the component,
                              if component build.
-    crashing_component: Yet to be decided.
+    crashing_component_path: A relative path of the crashing component, as in
+                             DEPS file. For example, it would be 'src/v8' for
+                             v8 and 'src/third_party/WebKit' for blink.
+    crashing_component_name: A name of the crashing component, such as v8.
+    crashing_component_repo_url: The URL of the crashing component's repo, as
+                                 shown in DEPS file. For example,
+                                 'https://chromium.googlesource.com/skia.git'
+                                 for skia.
  Returns:
    A list of result objects, along with the short description on where the
    result is from.
  """
  build_type = build_type.lower()
-  if 'syzyasan' in build_type:
-    return ('This build type is currently not supported.', [])
-  logging.basicConfig(filename='errors.log', level=logging.WARNING,
-                      filemode='w')
  component_to_crash_revision_dict = {}
  component_to_regression_dict = {}
-  # TODO(jeun): Come up with a good way to connect crashing component name to
-  # its path.
-  if component_regression or component_crash_revision:
-    return ('Component builds are not supported yet.', [])
  # If chrome regression is available, parse DEPS file.
  chrome_regression = crash_utils.SplitRange(chrome_regression)
  if chrome_regression:
@@ -113,6 +114,65 @@ def FindCulpritCLs(stacktrace_string,
    component_to_crash_revision_dict = chromium_deps.GetChromiumComponents(
        chrome_crash_revision)
+  # Check if component regression information is available.
+  component_regression = crash_utils.SplitRange(component_regression)
+  if component_regression:
+    component_regression_start = component_regression[0]
+    component_regression_end = component_regression[1]
+    # If this component already has an entry in parsed DEPS file, overwrite
+    # regression range and url.
+    if crashing_component_path in component_to_regression_dict:
+      component_regression_info = \
+          component_to_regression_dict[crashing_component_path]
+      component_regression_info['old_revision'] = component_regression_start
+      component_regression_info['new_revision'] = component_regression_end
+      component_regression_info['repository'] = crashing_component_repo_url
+    # if this component does not have an entry, add the entry to the parsed
+    # DEPS file.
+    else:
+      repository_type = crash_utils.GetRepositoryType(
+          component_regression_start)
+      component_regression_info = {
+          'path': crashing_component_path,
+          'rolled': True,
+          'name': crashing_component_name,
+          'old_revision': component_regression_start,
+          'new_revision': component_regression_end,
+          'repository': crashing_component_repo_url,
+          'repository_type': repository_type
+      }
+      component_to_regression_dict[crashing_component_path] = \
+          component_regression_info
+  # If component crash revision is available, add it to the parsed crash
+  # revisions.
+  if component_crash_revision:
+    # If this component has already a crash revision info, overwrite it.
+    if crashing_component_path in component_to_crash_revision_dict:
+      component_crash_revision_info = \
+          component_to_crash_revision_dict[crashing_component_path]
+      component_crash_revision_info['revision'] = component_crash_revision
+      component_crash_revision_info['repository'] = crashing_component_repo_url
+    # If not, add it to the parsed DEPS.
+    else:
+      if utils.IsGitHash(component_crash_revision):
+        repository_type = 'git'
+      else:
+        repository_type = 'svn'
+      component_crash_revision_info = {
+          'path': crashing_component_path,
+          'name': crashing_component_name,
+          'repository': crashing_component_repo_url,
+          'repository_type': repository_type,
+          'revision': component_crash_revision
+      }
+      component_to_crash_revision_dict[crashing_component_path] = \
+          component_crash_revision_info
  # Parsed DEPS is used to normalize the stacktrace. Since parsed regression
  # and parsed crash state essentially contain same information, use either.
  if component_to_regression_dict:
@@ -126,8 +186,13 @@ def FindCulpritCLs(stacktrace_string,
  # Split stacktrace into release build/debug build and parse them.
  (release_build_stacktrace, debug_build_stacktrace) = SplitStacktrace(
      stacktrace_string)
+  if not (release_build_stacktrace or debug_build_stacktrace):
+    parsed_release_build_stacktrace = stacktrace.Stacktrace(
+        stacktrace_string.splitlines(), build_type, parsed_deps)
+  else:
    parsed_release_build_stacktrace = stacktrace.Stacktrace(
        release_build_stacktrace, build_type, parsed_deps)
  parsed_debug_build_stacktrace = stacktrace.Stacktrace(
      debug_build_stacktrace, build_type, parsed_deps)
@@ -139,6 +204,9 @@ def FindCulpritCLs(stacktrace_string,
  elif parsed_debug_build_stacktrace.stack_list:
    main_stack = parsed_debug_build_stacktrace.GetCrashStack()
  else:
+    if 'mac_' in build_type:
+      return ('No line information available in stacktrace.', [])
    return ('Stacktrace is malformed.', [])
  # Run the algorithm on the parsed stacktrace, and return the result.

--- a/tools/findit/findit_for_crash.py
+++ b/tools/findit/findit_for_crash.py
--- a/tools/findit/git_repository_parser.py
+++ b/tools/findit/git_repository_parser.py
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 import base64
-import logging
-import os
 import xml.dom.minidom as minidom
 from xml.parsers.expat import ExpatError
 import crash_utils
 from repository_parser_interface import ParserInterface
+FILE_CHANGE_TYPE_MAP = {
+    'add': 'A',
+    'delete': 'D',
+    'modify': 'M'
+}
 class GitParser(ParserInterface):
  """Parser for Git repository in googlesource.
@@ -39,14 +43,16 @@ class GitParser(ParserInterface):
    html_url = url + '?pretty=fuller'
    response = crash_utils.GetDataFromURL(html_url)
    if not response:
-      logging.error('Failed to retrieve changelog from %s', html_url)
      return (revision_map, file_to_revision_map)
-    # Parse xml out of the returned string. If it failes, return empty map.
+    # Parse xml out of the returned string. If it failes, Try parsing
+    # from JSON objects.
    try:
      dom = minidom.parseString(response)
    except ExpatError:
-      logging.error('Failed to parse changelog from %s', url)
+      self.ParseChangelogFromJSON(range_start, range_end, changelog_url,
+                                  revision_url, revision_map,
+                                  file_to_revision_map)
      return (revision_map, file_to_revision_map)
    # The revisions information are in from the third divs to the second
@@ -93,22 +99,16 @@ class GitParser(ParserInterface):
      for li in lis:
        # Retrieve path and action of the changed file
        file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue
-        file_action = li.getElementsByTagName('span')[0].getAttribute('class')
+        file_change_type = li.getElementsByTagName('span')[
+            0].getAttribute('class')
        # Normalize file action so that it is same as SVN parser.
-        if file_action == 'add':
+        file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
-          file_action = 'A'
-        elif file_action == 'delete':
-          file_action = 'D'
-        elif file_action == 'modify':
-          file_action = 'M'
        # Add the changed file to the map.
-        changed_file = os.path.basename(file_path)
+        if file_path not in file_to_revision_map:
-        if changed_file not in file_to_revision_map:
+          file_to_revision_map[file_path] = []
-          file_to_revision_map[changed_file] = []
+        file_to_revision_map[file_path].append((githash, file_change_type))
-        file_to_revision_map[changed_file].append((githash, file_action,
-                                                   file_path))
      # Add this revision object to the map.
      revision_map[githash] = revision
@@ -137,14 +137,12 @@ class GitParser(ParserInterface):
    json_url = changelog_url + '?format=json'
    response = crash_utils.GetDataFromURL(json_url)
    if not response:
-      logging.error('Failed to retrieve changelog from %s.', json_url)
      return
    # Parse changelog from the returned object. The returned string should
    # start with ")}]'\n", so start from the 6th character.
    revisions = crash_utils.LoadJSON(response[5:])
    if not revisions:
-      logging.error('Failed to parse changelog from %s.', json_url)
      return
    # Parse individual revision in the log.
@@ -165,13 +163,11 @@ class GitParser(ParserInterface):
    url = revision_url % githash
    response = crash_utils.GetDataFromURL(url + '?format=json')
    if not response:
-      logging.warning('Failed to retrieve revision from %s.', url)
      return
    # Load JSON object from the string. If it fails, terminate the function.
    json_revision = crash_utils.LoadJSON(response[5:])
    if not json_revision:
-      logging.warning('Failed to parse revision from %s.', url)
      return
    # Create a map representing object and get githash from the JSON object.
@@ -186,43 +182,35 @@ class GitParser(ParserInterface):
    # Iterate through the changed files.
    for diff in json_revision['tree_diff']:
      file_path = diff['new_path']
-      file_action = diff['type']
+      file_change_type = diff['type']
      # Normalize file action so that it fits with svn_repository_parser.
-      if file_action == 'add':
+      file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
-        file_action = 'A'
-      elif file_action == 'delete':
-        file_action = 'D'
-      elif file_action == 'modify':
-        file_action = 'M'
      # Add the file to the map.
-      changed_file = os.path.basename(file_path)
+      if file_path not in file_to_revision_map:
-      if changed_file not in file_to_revision_map:
+        file_to_revision_map[file_path] = []
-        file_to_revision_map[changed_file] = []
+      file_to_revision_map[file_path].append((githash, file_change_type))
-      file_to_revision_map[changed_file].append(
-          (githash, file_action, file_path))
    # Add this CL to the map.
    revision_map[githash] = revision
    return
-  def ParseLineDiff(self, path, component, file_action, githash):
+  def ParseLineDiff(self, path, component, file_change_type, githash):
    changed_line_numbers = []
    changed_line_contents = []
    base_url = self.component_to_url_map[component]['repository']
    backup_url = (base_url + self.url_parts_map['revision_url']) % githash
    # If the file is added (not modified), treat it as if it is not changed.
-    if file_action == 'A':
+    if file_change_type == 'A':
      return (backup_url, changed_line_numbers, changed_line_contents)
    # Retrieves the diff data from URL, and if it fails, return emptry lines.
    url = (base_url + self.url_parts_map['diff_url']) % (githash, path)
    data = crash_utils.GetDataFromURL(url + '?format=text')
    if not data:
-      logging.error('Failed to get diff from %s.', url)
      return (backup_url, changed_line_numbers, changed_line_contents)
    # Decode the returned object to line diff info
@@ -260,16 +248,12 @@ class GitParser(ParserInterface):
    blame_url = base_url + url_part
    json_string = crash_utils.GetDataFromURL(blame_url)
    if not json_string:
-      logging.error('Failed to retrieve annotation information from %s.',
-                    blame_url)
      return
    # Parse JSON object from the string. The returned string should
    # start with ")}]'\n", so start from the 6th character.
    annotation = crash_utils.LoadJSON(json_string[5:])
    if not annotation:
-      logging.error('Failed to parse annotation information from %s.',
-                    blame_url)
      return
    # Go through the regions, which is a list of consecutive lines with same
@@ -289,7 +273,9 @@ class GitParser(ParserInterface):
        # TODO(jeun): Add a way to get content from JSON object.
        content = None
-        return (content, revision, author, revision_url)
+        (revision_info, _) = self.ParseChangelog(component, revision, revision)
+        message = revision_info[revision]['message']
+        return (content, revision, author, revision_url, message)
    # Return none if the region does not exist.
    return None
--- a/tools/findit/match_set.py
+++ b/tools/findit/match_set.py
@@ -2,7 +2,6 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-import logging
 import re
 from threading import Lock
@@ -10,7 +9,7 @@ from threading import Lock
 import crash_utils
-REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*)')
+REVIEW_URL_PATTERN = re.compile(r'Review URL:( *)(.*?)/(\d+)')
 class Match(object):
@@ -20,7 +19,7 @@ class Match(object):
  contains information about files it changes, their authors, etc.
  Attributes:
-    is_reverted: True if this CL is reverted by other CL.
+    is_revert: True if this CL is reverted by other CL.
    revert_of: If this CL is a revert of some other CL, a revision number/
               git hash of that CL.
    crashed_line_numbers: The list of lines that caused crash for this CL.
@@ -33,10 +32,9 @@ class Match(object):
    component_name: The name of the component that this CL belongs to.
    stack_frame_indices: For files that caused crash, list of where in the
                         stackframe they occur.
-    rank: The highest priority among the files the CL changes. Priority = 1
+    priorities: A list of priorities for each of the changed file. A priority
-          if it changes the crashed line, and priority = 2 if it is a simple
+                is 1 if the file changes a crashed line, and 2 if it changes
-          file change.
+                the file but not the crashed line.
-    priorities: A list of priorities for each of the changed file.
    reivision_url: The revision URL of the CL.
    review_url: The codereview URL that reviews this CL.
    reviewers: The list of people that reviewed this CL.
@@ -45,17 +43,18 @@ class Match(object):
  REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
  def __init__(self, revision, component_name):
-    self.is_reverted = False
+    self.is_revert = False
    self.revert_of = None
+    self.message = None
    self.crashed_line_numbers = []
    self.function_list = []
    self.min_distance = crash_utils.INFINITY
+    self.min_distance_info = None
    self.changed_files = []
    self.changed_file_urls = []
    self.author = revision['author']
    self.component_name = component_name
    self.stack_frame_indices = []
-    self.rank = crash_utils.INFINITY
    self.priorities = []
    self.revision_url = revision['url']
    self.review_url = ''
@@ -72,6 +71,7 @@ class Match(object):
      message: The message to parse.
      codereview_api_url: URL to retrieve codereview data from.
    """
+    self.message = message
    for line in message.splitlines():
      line = line.strip()
      review_url_line_match = REVIEW_URL_PATTERN.match(line)
@@ -80,14 +80,12 @@ class Match(object):
      if review_url_line_match:
        # Get review number for the code review site from the line.
-        issue_number = review_url_line_match.group(2)
+        issue_number = review_url_line_match.group(3)
        # Get JSON from the code review site, ignore the line if it fails.
        url = codereview_api_url % issue_number
        json_string = crash_utils.GetDataFromURL(url)
        if not json_string:
-          logging.warning('Failed to retrieve code review information from %s',
-                          url)
          continue
        # Load the JSON from the string, and get the list of reviewers.
@@ -97,7 +95,7 @@ class Match(object):
      # Check if this CL is a revert of other CL.
      if line.lower().startswith('revert'):
-        self.is_reverted = True
+        self.is_revert = True
        # Check if the line says what CL this CL is a revert of.
        revert = self.REVERT_PATTERN.match(line)

--- a/tools/findit/repository_parser_interface.py
+++ b/tools/findit/repository_parser_interface.py
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

--- a/tools/findit/result.py
+++ b/tools/findit/result.py
@@ -6,7 +6,7 @@
 class Result(object):
  def __init__(self, suspected_cl, revision_url, component_name, author,
-               reason, review_url, reviewers, line_content):
+               reason, review_url, reviewers, line_content, message):
    self.suspected_cl = suspected_cl
    self.revision_url = revision_url
    self.component_name = component_name
@@ -15,3 +15,4 @@ class Result(object):
    self.review_url = review_url
    self.reviewers = reviewers
    self.line_content = line_content
+    self.commit_message = message
--- a/tools/findit/stacktrace.py
+++ b/tools/findit/stacktrace.py
@@ -2,12 +2,16 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-import os
 import re
 import crash_utils
+SYZYASAN_STACK_FRAME_PATTERN = re.compile(
+    r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]')
+FILE_PATH_AND_LINE_PATTERN = re.compile(r'(.*?):(\d+)(:\d+)?')
 class StackFrame(object):
  """Represents a frame in stacktrace.
@@ -18,18 +22,18 @@ class StackFrame(object):
    file_name: The name of the file that crashed.
    function: The function that caused the crash.
    file_path: The path of the crashed file.
-    crashed_line_number: The line of the file that caused the crash.
+    crashed_line_range: The line of the file that caused the crash.
  """
  def __init__(self, stack_frame_index, component_path, component_name,
-               file_name, function, file_path, crashed_line_number):
+               file_name, function, file_path, crashed_line_range):
    self.index = stack_frame_index
    self.component_path = component_path
    self.component_name = component_name
    self.file_name = file_name
    self.function = function
    self.file_path = file_path
-    self.crashed_line_number = crashed_line_number
+    self.crashed_line_range = crashed_line_range
 class CallStack(object):
@@ -59,10 +63,9 @@ class Stacktrace(object):
  def __init__(self, stacktrace, build_type, parsed_deps):
    self.stack_list = None
-    self.parsed_deps = parsed_deps
+    self.ParseStacktrace(stacktrace, build_type, parsed_deps)
-    self.ParseStacktrace(stacktrace, build_type)
-  def ParseStacktrace(self, stacktrace, build_type):
+  def ParseStacktrace(self, stacktrace, build_type, parsed_deps):
    """Parses stacktrace and normalizes it.
    If there are multiple callstacks within the stacktrace,
@@ -72,11 +75,11 @@ class Stacktrace(object):
    Args:
      stacktrace: A string containing stacktrace.
      build_type: A string containing the build type of the crash.
+      parsed_deps: A parsed DEPS file to normalize path with.
    """
    # If the passed in string is empty, the object does not represent anything.
    if not stacktrace:
      return
    # Reset the stack list.
    self.stack_list = []
    reached_new_callstack = False
@@ -84,14 +87,13 @@ class Stacktrace(object):
    # position of a frame within a callstack. The reason for not extracting
    # index from a line is that some stack frames do not have index.
    stack_frame_index = 0
-    current_stack = None
+    current_stack = CallStack(-1)
    for line in stacktrace:
+      line = line.strip()
      (is_new_callstack, stack_priority) = self.__IsStartOfNewCallStack(
          line, build_type)
      if is_new_callstack:
        # If this callstack is crash stack, update the boolean.
        if not reached_new_callstack:
          reached_new_callstack = True
@@ -107,7 +109,7 @@ class Stacktrace(object):
      # Generate stack frame object from the line.
      parsed_stack_frame = self.__GenerateStackFrame(
-          stack_frame_index, line, build_type)
+          stack_frame_index, line, build_type, parsed_deps)
      # If the line does not represent the stack frame, ignore this line.
      if not parsed_stack_frame:
@@ -135,11 +137,7 @@ class Stacktrace(object):
      True if the line is the start of new callstack, False otherwise. If True,
      it also returns the priority of the line.
    """
-    # Currently not supported.
+    if 'syzyasan' in build_type:
-    if 'android' in build_type:
-      pass
-    elif 'syzyasan' in build_type:
      # In syzyasan build, new stack starts with 'crash stack:',
      # 'freed stack:', etc.
      callstack_start_pattern = re.compile(r'^(.*) stack:$')
@@ -160,9 +158,12 @@ class Stacktrace(object):
    elif 'tsan' in build_type:
      # Create patterns for each callstack type.
-      crash_callstack_start_pattern = re.compile(
+      crash_callstack_start_pattern1 = re.compile(
          r'^(Read|Write) of size \d+')
+      crash_callstack_start_pattern2 = re.compile(
+          r'^[A-Z]+: ThreadSanitizer')
      allocation_callstack_start_pattern = re.compile(
          r'^Previous (write|read) of size \d+')
@@ -170,7 +171,8 @@ class Stacktrace(object):
          r'^Location is heap block of size \d+')
      # Crash stack gets priority 0.
-      if crash_callstack_start_pattern.match(line):
+      if (crash_callstack_start_pattern1.match(line) or
+          crash_callstack_start_pattern2.match(line)):
        return (True, 0)
      # All other stacks get priority 1.
@@ -183,9 +185,10 @@ class Stacktrace(object):
    else:
      # In asan and other build types, crash stack can start
      # in two different ways.
-      crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?ERROR:')
+      crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?[A-Z]+:')
      crash_callstack_start_pattern2 = re.compile(
          r'^(READ|WRITE) of size \d+ at')
+      crash_callstack_start_pattern3 = re.compile(r'^backtrace:')
      freed_callstack_start_pattern = re.compile(
          r'^freed by thread T\d+ (.* )?here:')
@@ -198,7 +201,8 @@ class Stacktrace(object):
      # Crash stack gets priority 0.
      if (crash_callstack_start_pattern1.match(line) or
-          crash_callstack_start_pattern2.match(line)):
+          crash_callstack_start_pattern2.match(line) or
+          crash_callstack_start_pattern3.match(line)):
        return (True, 0)
      # All other callstack gets priority 1.
@@ -215,7 +219,8 @@ class Stacktrace(object):
    # stack priority.
    return (False, -1)
-  def __GenerateStackFrame(self, stack_frame_index, line, build_type):
+  def __GenerateStackFrame(self, stack_frame_index, line, build_type,
+                           parsed_deps):
    """Extracts information from a line in stacktrace.
    Args:
@@ -223,32 +228,59 @@ class Stacktrace(object):
      line: A stacktrace string to extract data from.
      build_type: A string containing the build type
                    of this crash (e.g. linux_asan_chrome_mp).
+      parsed_deps: A parsed DEPS file to normalize path with.
    Returns:
      A triple containing the name of the function, the path of the file and
      the crashed line number.
    """
    line_parts = line.split()
    try:
-      # Filter out lines that are not stack frame.
-      stack_frame_index_pattern = re.compile(r'#(\d+)')
+      if 'syzyasan' in build_type:
-      if not stack_frame_index_pattern.match(line_parts[0]):
+        stack_frame_match = SYZYASAN_STACK_FRAME_PATTERN.match(line)
+        if not stack_frame_match:
          return None
+        file_path = stack_frame_match.group(5)
+        crashed_line_range = [int(stack_frame_match.group(6))]
+        function = stack_frame_match.group(2)
-      # Tsan has different stack frame style from other builds.
+      else:
-      if build_type.startswith('linux_tsan'):
+        if not line_parts[0].startswith('#'):
+          return None
+        if 'tsan' in build_type:
          file_path_and_line = line_parts[-2]
          function = ' '.join(line_parts[1:-2])
        else:
          file_path_and_line = line_parts[-1]
          function = ' '.join(line_parts[3:-1])
        # Get file path and line info from the line.
-      file_path_and_line = file_path_and_line.split(':')
+        file_path_and_line_match = FILE_PATH_AND_LINE_PATTERN.match(
-      file_path = file_path_and_line[0]
+            file_path_and_line)
-      crashed_line_number = int(file_path_and_line[1])
+        # Return None if the file path information is not available
+        if not file_path_and_line_match:
+          return None
+        file_path = file_path_and_line_match.group(1)
+        # Get the crashed line range. For example, file_path:line_number:range.
+        crashed_line_range_num = file_path_and_line_match.group(3)
+        if crashed_line_range_num:
+          # Strip ':' prefix.
+          crashed_line_range_num = int(crashed_line_range_num[1:])
+        else:
+          crashed_line_range_num = 0
+        crashed_line_number = int(file_path_and_line_match.group(2))
+        # For example, 655:1 has crashed lines 655 and 656.
+        crashed_line_range = \
+            range(crashed_line_number,
+                  crashed_line_number + crashed_line_range_num + 1)
    # Return None if the line is malformed.
    except IndexError:
@@ -257,17 +289,13 @@ class Stacktrace(object):
      return None
    # Normalize the file path so that it can be compared to repository path.
-    file_name = os.path.basename(file_path)
    (component_path, component_name, file_path) = (
-        crash_utils.NormalizePathLinux(file_path, self.parsed_deps))
+        crash_utils.NormalizePath(file_path, parsed_deps))
-    # If this component is not supported, ignore this line.
-    if not component_path:
-      return None
    # Return a new stack frame object with the parsed information.
+    file_name = file_path.split('/')[-1]
    return StackFrame(stack_frame_index, component_path, component_name,
-                      file_name, function, file_path, crashed_line_number)
+                      file_name, function, file_path, crashed_line_range)
  def __getitem__(self, index):
    return self.stack_list[index]

--- a/tools/findit/svn_repository_parser.py
+++ b/tools/findit/svn_repository_parser.py
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-import logging
-import os
 import xml.dom.minidom as minidom
 from xml.parsers.expat import ExpatError
@@ -41,7 +39,6 @@ class SVNParser(ParserInterface):
    url_map = self.component_to_urls_map.get(component)
    if not url_map:
-      logging.error('Component %s is not currently supported.', component)
      return (revision_map, file_to_revision_map)
    # Retrieve data from the url, return empty map if fails.
@@ -49,16 +46,12 @@ class SVNParser(ParserInterface):
    url = url_map['changelog_url'] % revision_range_str
    response = crash_utils.GetDataFromURL(url)
    if not response:
-      logging.error('Failed to retrieve changelog from %s, range %s.',
-                    url, revision_range_str)
      return (revision_map, file_to_revision_map)
    # Parse xml out of the returned string. If it fails, return empty map.
    try:
      xml_revisions = minidom.parseString(response)
    except ExpatError:
-      logging.error('Failed to parse changelog from %s, range %s.',
-                    url, revision_range_str)
      return (revision_map, file_to_revision_map)
    # Iterate through the returned XML object.
@@ -78,16 +71,18 @@ class SVNParser(ParserInterface):
      paths = revision.getElementsByTagName('paths')
      if paths:
        for changed_path in paths[0].getElementsByTagName('path'):
-          # Get path, file action and file name from the xml.
+          # Get path and file change type from the xml.
          file_path = changed_path.firstChild.nodeValue
-          file_action = changed_path.getAttribute('action')
+          file_change_type = changed_path.getAttribute('action')
-          changed_file = os.path.basename(file_path)
+          if file_path.startswith('/trunk/'):
+            file_path = file_path[len('/trunk/'):]
          # Add file to the map.
-          if changed_file not in file_to_revision_map:
+          if file_path not in file_to_revision_map:
-            file_to_revision_map[changed_file] = []
+            file_to_revision_map[file_path] = []
-          file_to_revision_map[changed_file].append(
+          file_to_revision_map[file_path].append(
-              (revision_number, file_action, file_path))
+              (revision_number, file_change_type))
      # Set commit message of the CL.
      revision_object['message'] = revision.getElementsByTagName('msg')[
@@ -102,18 +97,17 @@ class SVNParser(ParserInterface):
    return (revision_map, file_to_revision_map)
-  def ParseLineDiff(self, path, component, file_action, revision_number):
+  def ParseLineDiff(self, path, component, file_change_type, revision_number):
    changed_line_numbers = []
    changed_line_contents = []
    url_map = self.component_to_urls_map.get(component)
    if not url_map:
-      logging.error('Component %s is not currently supported.', component)
      return (None, None, None)
    # If the file is added (not modified), treat it as if it is not changed.
    backup_url = url_map['revision_url'] % revision_number
-    if file_action == 'A':
+    if file_change_type == 'A':
      return (backup_url, changed_line_numbers, changed_line_contents)
    # Retrieve data from the url. If no data is retrieved, return empty lists.
@@ -121,7 +115,6 @@ class SVNParser(ParserInterface):
                                 revision_number, revision_number)
    data = crash_utils.GetDataFromURL(url)
    if not data:
-      logging.error('Failed to get line changes from %s.', url)
      return (backup_url, changed_line_numbers, changed_line_contents)
    line_diff_html = minidom.parseString(data)
@@ -129,8 +122,6 @@ class SVNParser(ParserInterface):
    # If there are not NUM_TABLES tables in the html page, there should be an
    # error in the html page.
    if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:
-      logging.error('Failed to retrieve the diff of revision %d from %s.',
-                    revision_number, url)
      return (backup_url, changed_line_numbers, changed_line_contents)
    # Diff content is in the second table. Each line of the diff content
@@ -163,8 +154,6 @@ class SVNParser(ParserInterface):
      # If there aren't 3 tds, this line does should not contain line diff.
      if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:
-        logging.warning('Failed to get a line of new file in revision %d.',
-                        revision_number)
        continue
      # If line number information is not in hyperlink, ignore this line.
@@ -173,8 +162,6 @@ class SVNParser(ParserInterface):
        left_diff_type = tds[1].getAttribute('class')[prefix_len:]
        right_diff_type = tds[2].getAttribute('class')[prefix_len:]
      except IndexError:
-        logging.warning('Failed to get a line of file in revision %d.',
-                        revision_number)
        continue
      # Treat the line as modified only if both left and right diff has type
@@ -198,15 +185,12 @@ class SVNParser(ParserInterface):
  def ParseBlameInfo(self, component, file_path, line, revision):
    url_map = self.component_to_urls_map.get(component)
    if not url_map:
-      logging.error('Component %s is not currently supported.', component)
      return None
    # Retrieve blame data from url, return None if fails.
    url = url_map['blame_url'] % (file_path, revision, revision)
    data = crash_utils.GetDataFromURL(url)
    if not data:
-      logging.error('Failed to retrieve annotation information from %s.',
-                    url)
      return None
    blame_html = minidom.parseString(data)
@@ -214,17 +198,18 @@ class SVNParser(ParserInterface):
    title = blame_html.getElementsByTagName('title')
    # If the returned html page is an exception page, return None.
    if title[0].firstChild.nodeValue == 'ViewVC Exception':
-      logging.error('Failed to retrieve blame information from %s.', url)
      return None
    # Each of the blame result is in <tr>.
    blame_results = blame_html.getElementsByTagName('tr')
+    try:
      blame_result = blame_results[line]
+    except IndexError:
+      return None
    # There must be 4 <td> for each <tr>. If not, this page is wrong.
    tds = blame_result.getElementsByTagName('td')
    if len(tds) != 4:
-      logging.error('Failed to retrieve blame information from %s.', url)
      return None
    # The third <td> has the line content, separated by <span>s. Combine
@@ -257,6 +242,9 @@ class SVNParser(ParserInterface):
    except IndexError:
      revision = tds[2].firstChild.nodeValue
+    (revision_info, _) = self.ParseChangelog(component, revision, revision)
+    message = revision_info[int(revision)]['message']
    # Return the parsed information.
    revision_url = url_map['revision_url'] % int(revision)
-    return (line_content, revision, author, revision_url)
+    return (line_content, revision, author, revision_url, message)