Commit 5095c022 authored by Henrique Nakashima's avatar Henrique Nakashima Committed by Commit Bot

Script to gather OWNERS, LoC, and git activity data about modules

tools/android/modularization/getowners.py traverses the codebase
reading OWNERS files and uses git commands to read a number of
metrics at the granularity of modules.

This script will be run periodically in a builder to feed information
into a dashboard.

Bug: 1135347
Change-Id: I8ef4050396c796cc96aff757d99ad15cb14b9483
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2451326
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: default avatarPeter Wen <wnwen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#828373}
parent e3c649ed
fredmello@chromium.org
hnakashima@chromium.org
wnwen@chromium.org
#!/usr/bin/env python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
r'''Get chromium OWNERS information for android directories.
tools/android/modularization/getowners.py -- \
--git-dir ~/chromium/src \
-o ~/owners.json
'''
import argparse
import collections
import dataclasses
import datetime
import functools
import multiprocessing
import os
import re
import time
from typing import Dict, List, Optional, Tuple
import owners_data
import owners_exporter
import owners_git
import owners_input
def main():
arg_parser = argparse.ArgumentParser(
description='Traverses the chromium codebase gathering OWNERS data.')
required_arg_group = arg_parser.add_argument_group('required arguments')
required_arg_group.add_argument('--git-dir',
required=True,
help='Root directory to search for owners.')
required_arg_group.add_argument('-o',
'--output',
required=True,
help='File to write the result json to.')
arg_parser.add_argument(
'--limit-to-dir',
help='Limit to a single directory. Used to restrict a smaller scope for '
'debugging.')
arguments = arg_parser.parse_args()
start_time = time.time()
chromium_root = os.path.expanduser(arguments.git_dir)
# Guarantee path does not end with '/'
chromium_root = os.path.normpath(chromium_root)
paths_to_search = owners_input.get_android_folders(chromium_root,
arguments.limit_to_dir)
with multiprocessing.Pool() as p:
data = p.map(functools.partial(_process_requested_path, chromium_root),
paths_to_search)
owners_exporter.to_json_file(data, arguments.output)
print(f'Exported to {arguments.output}')
elapsed_time = time.time() - start_time
print(f'--- Took {elapsed_time} seconds ---')
def _process_requested_path(
chromium_root: str, requested_path: owners_data.RequestedPath
) -> Tuple[owners_data.RequestedPath, owners_data.PathData]:
'''Gets the necessary information from the git repository.'''
owners_file = _find_owners_file(chromium_root, requested_path.path)
owners = _build_owners_info(chromium_root, owners_file)
git_data = _fetch_git_data(chromium_root, requested_path)
path_data = owners_data.PathData(owners, git_data)
return (requested_path, path_data)
def _fetch_git_data(chromium_root: str,
requested_path: owners_data.RequestedPath
) -> owners_data.GitData:
'''Fetches git data for a given directory for the last 182 days.
Includes # of commits, reverts, relands, authors, and reviewers.
'''
line_delimiter = '\ncommit '
author_search = r'^Author: (.*) <(.*)>'
date_search = r'Date: (.*)'
reviewer_search = r'^ Reviewed-by: (.*) <(.*)>'
revert_token = r'^ (\[?)Revert(\]?) \"'
reland_token = r'^ (\[?)Reland(\]?) \"'
ignored_authors = ('autoroll', 'roller')
git_log = owners_git.get_log(chromium_root, requested_path.path, 182)
git_data = owners_data.GitData()
for commit_msg in git_log.split(line_delimiter):
author_re = re.search(author_search, commit_msg,
re.IGNORECASE | re.MULTILINE)
if author_re:
author = author_re.group(2)
if any(ignored in author for ignored in ignored_authors):
continue # ignore flagged authors
git_data.authors[author] += 1
reviewer_re = re.findall(reviewer_search, commit_msg,
re.IGNORECASE | re.MULTILINE)
for _, reviewer in reviewer_re:
git_data.reviewers[reviewer] += 1
date_re = re.search(date_search, commit_msg, re.IGNORECASE | re.MULTILINE)
if date_re and not git_data.latest_cl_date:
d = date_re.group(1).strip().split(' ')[:-1] # Minus tz offset.
dobj = datetime.datetime.strptime(' '.join(d), '%a %b %d %H:%M:%S %Y')
git_data.latest_cl_date = int(dobj.timestamp())
git_data.cls += 1
for i, line in enumerate(commit_msg.split('\n')):
if i == 4:
if re.search(revert_token, line, re.IGNORECASE | re.MULTILINE):
git_data.reverted_cls += 1
if re.search(reland_token, line, re.IGNORECASE | re.MULTILINE):
git_data.relanded_cls += 1
break
git_data.lines_of_code = owners_git.get_total_lines_of_code(
chromium_root, requested_path.path)
git_data.number_of_files = owners_git.get_total_files(chromium_root,
requested_path.path)
git_data.git_head = owners_git.get_head_hash(chromium_root)
git_data.git_head_time = owners_git.get_last_commit_date(chromium_root)
return git_data
def _find_owners_file(chromium_root: str, filepath: str) -> str:
'''Returns the path to the OWNERS file for the given path (or up the tree).'''
if not filepath.startswith(os.path.join(chromium_root, '')):
filepath = os.path.join(chromium_root, filepath)
if os.path.isdir(filepath):
ofile = os.path.join(filepath, 'OWNERS')
else:
if 'OWNERS' in os.path.basename(filepath):
ofile = filepath
else:
filepath = os.path.dirname(filepath)
ofile = os.path.join(filepath, 'OWNERS')
if os.path.exists(ofile):
return ofile
else:
return _find_owners_file(chromium_root, os.path.dirname(filepath))
owners_map: Dict[str, owners_data.Owners] = {}
def _build_owners_info(chromium_root: str,
owners_filepath: str) -> owners_data.Owners:
'''Creates a synthetic representation of an OWNERS file.'''
if not owners_filepath: return None
assert owners_filepath.startswith(os.path.join(chromium_root, ''))
owners_file = owners_filepath[len(chromium_root) + 1:]
if owners_file in owners_map:
return owners_map[owners_file]
owners = owners_data.Owners(owners_file)
with open(owners_filepath, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
elif line.startswith('file://'):
owners.file_inherited = line[len('file://'):].strip()
elif line.startswith('# COMPONENT:'):
owners.component = line[len('# COMPONENT:'):].strip()
elif line.startswith('# TEAM:'):
owners.team = line[len('# TEAM:'):].strip()
elif line.startswith('# OS:'):
owners.os = line[len('# OS:'):].strip()
elif line.startswith('#'):
continue
elif line.startswith('per-file'):
continue
elif '@' in line:
# Remove comments after the email
owner_email = line.split(' ', 1)[0]
owners.owners.append(line)
owners_map[owners.owners_file] = owners
_propagate_down_owner_variables(chromium_root, owners)
return owners
def _propagate_down_owner_variables(chromium_root: str,
owners: owners_data.Owners) -> None:
'''For a given Owners, make sure that parent OWNERS are propagated down.
Search in parent directories for OWNERS in case they do not exist
in the current representation.
'''
parent_owners = owners
visited = set()
while parent_owners:
if parent_owners.owners_file in visited:
return
if not owners.owners and parent_owners.owners:
owners.owners.extend(parent_owners.owners)
if not owners.component and parent_owners.component:
owners.component = parent_owners.component
if not owners.team and parent_owners.team:
owners.team = parent_owners.team
if not owners.os and parent_owners.os:
owners.os = parent_owners.os
if owners.owners and owners.component and owners.team and owners.os:
return
visited.add(parent_owners.owners_file)
if parent_owners.file_inherited:
parent_dir = parent_owners.file_inherited
else:
parent_dir = os.path.dirname(os.path.dirname(parent_owners.owners_file))
parent_owners_file = _find_owners_file(chromium_root, parent_dir)
parent_owners = _build_owners_info(chromium_root, parent_owners_file)
if __name__ == '__main__':
main()
# Lint as: python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import collections
import dataclasses
from typing import List, Optional
@dataclasses.dataclass
class Owners:
'''A synthetic representation of an OWNERS file.'''
owners_file: str # Path to OWNERS file
file_inherited: Optional[str] = None # Referenced OWNERS file
owners: List[str] = dataclasses.field(default_factory=list) # owners' emails
component: Optional[str] = None
team: Optional[str] = None
os: Optional[str] = None
@dataclasses.dataclass
class GitData:
'''Git data for a given hash/repo/folder.'''
cls: int = 0
reverted_cls: int = 0
relanded_cls: int = 0
lines_of_code: int = 0
number_of_files: int = 0
# key: ldap / value: # of cls
authors: collections.Counter = dataclasses.field(
default_factory=collections.Counter)
reviewers: collections.Counter = dataclasses.field(
default_factory=collections.Counter)
latest_cl_date: Optional[int] = None
git_head: Optional[str] = None
git_head_time: Optional[str] = None
def get_top_authors(self, n):
return self.authors.most_common(n)
def get_top_reviewers(self, n):
return self.reviewers.most_common(n)
@dataclasses.dataclass(frozen=True)
class RequestedPath:
'''Path to be searched for.'''
path: str
feature: str
@dataclasses.dataclass(frozen=True)
class PathData:
'''Path to be searched for.'''
owner: Owners
git_data: GitData
# Lint as: python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
from typing import Dict, List, Tuple
import owners_data
def to_json_file(paths_with_data: List[
Tuple[owners_data.RequestedPath, owners_data.PathData]],
output_path: str) -> None:
'''Exports the data to an output json.'''
with open(output_path, 'w') as f:
for requested_path, path_data in paths_with_data:
data_dict: Dict = _to_data_dict(requested_path, path_data)
json.dump(data_dict, f)
f.write('\n')
def _to_data_dict(requested_path: owners_data.RequestedPath,
path_data: owners_data.PathData) -> Dict:
'''Transforms the RequestPath into a flat dictionary to be converted to json.
'''
def _joinppl(ppl, include_count=False):
r = []
for p in ppl:
r.append(p[0] if not include_count else '{} ({})'.format(p[0], p[1]))
return r
owners = path_data.owner
git_data = path_data.git_data
return {
'path': requested_path.path,
'feature': requested_path.feature,
'owners_file': owners.owners_file,
'owners_email': ', '.join(owners.owners),
'team': owners.team if owners.team else '',
'component': owners.component if owners.component else '',
'os': owners.os if owners.os else '',
'lines_of_code': str(git_data.lines_of_code),
'number_of_files': str(git_data.number_of_files),
'latest_cl_date': git_data.latest_cl_date,
'cl_count': str(git_data.cls),
'reverted_cl_count': str(git_data.reverted_cls),
'relanded_cl_count': str(git_data.relanded_cls),
'top_authors': ', '.join(_joinppl(git_data.get_top_authors(3))),
'top_reviewers': ', '.join(_joinppl(git_data.get_top_reviewers(3))),
'git_head': git_data.git_head,
'git_head_time': git_data.git_head_time,
}
# Lint as: python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''Git utility functions.'''
import subprocess
import sys
from typing import List, Optional
def get_head_hash(git_src: str) -> str:
'''Gets the repository's head hash.'''
return run_command(['git', 'rev-parse', 'HEAD'], cwd=git_src)
def get_last_commit_date(git_src: str) -> str:
'''Gets the repository's time of last commit.'''
return run_command(['git', 'log', '-1', '--format=%ct'], cwd=git_src)
def get_total_lines_of_code(git_src: str, subdirectory: str) -> int:
'''Gets the number of lines contained in the git directory.'''
filepaths = _run_ls_files_command(subdirectory, git_src)
total_loc = 0
for filepath in filepaths:
with open(filepath, 'rb') as f:
total_loc += sum(1 for line in f)
return total_loc
def get_total_files(git_src: str, subdirectory: str) -> int:
'''Gets the number of files contained in the git directory.'''
filepaths = _run_ls_files_command(subdirectory, git_src)
return len(filepaths)
def _run_ls_files_command(subdirectory: Optional[str],
git_src: str) -> List[str]:
command = _build_ls_files_command(subdirectory)
filepath_str = run_command(_build_ls_files_command(subdirectory), cwd=git_src)
return [filepath for filepath in filepath_str.split('\n') if filepath]
def _build_ls_files_command(subdirectory: Optional[str]) -> List[str]:
if subdirectory:
return ['git', 'ls-files', '--', subdirectory]
else:
return ['git', 'ls-files']
def get_log(git_src: str, subdirectory: str, trailing_days: int) -> str:
'''Gets the git log for a given directory.'''
return run_command([
'git',
'log',
'--follow',
f'--since=\"{trailing_days} days ago\"',
'--',
subdirectory,
],
cwd=git_src)
def run_command(command: List[str], cwd: str) -> str:
'''Runs a command and returns the output.
Raises an exception and prints the command output if the command fails.'''
try:
run_result = subprocess.run(command,
capture_output=True,
text=True,
check=True,
cwd=cwd)
except subprocess.CalledProcessError as e:
print(f'{command} failed with code {e.returncode}.', file=sys.stderr)
print(f'\nSTDERR:\n{e.stderr}', file=sys.stderr)
print(f'\nSTDOUT:\n{e.stdout}', file=sys.stderr)
raise
return run_result.stdout.strip()
# Lint as: python3
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
from typing import List
import owners_data
_IGNORED_FOLDERS = ('out', 'third_party', 'clank', 'build/linux',
'native_client')
_KNOWN_FOLDERS = [
r'^chrome\/browser\/(.*)\/android$', r'^chrome\/browser\/android\/(.*)$',
r'^chrome\/android\/(.*)$',
r'^chrome\/android\/java\/src\/org\/chromium\/chrome\/browser\/(.*)$',
r'^chrome\/android\/features\/(.*)$',
r'^chrome\/android\/javatests\/src\/org\/chromium\/chrome\/browser\/(.*)$',
r'^chrome\/android\/native_java_unittests\/src\/org\/chromium\/chrome\/browser\/(.*)$',
r'^chrome\/android\/junit\/src\/org\/chromium\/chrome\/browser\/(.*)$',
r'^components\/(.*)\/android$',
r'^content\/public\/android\/java\/src\/org\/chromium\/content\/browser\/(.*)$'
]
def get_android_folders(chromium_root: str,
limit_to_dir: str) -> List[owners_data.RequestedPath]:
'''Get all directories containing `android/` in their path.
Use _IGNORED_FOLDERS to exclude commonly returned folders that
need to be excluded from the resultset. Use _KNOWN_FOLDERS to propose
feature names to the folders based on their patterns.
If limit_to_dir is non-empty, only traverse that dir and its subdirectories.
'''
android_folders = []
android_folders_found = set()
for full_root, dirs, _ in os.walk(chromium_root):
assert full_root.startswith(chromium_root)
root = full_root[len(chromium_root) + 1:]
if root.startswith(_IGNORED_FOLDERS):
continue
if limit_to_dir and not root.startswith(limit_to_dir):
continue
for name in dirs:
fullpath = os.path.join(root, name)
for folder_token in _KNOWN_FOLDERS:
found = False
re_search = re.match(folder_token, fullpath, re.IGNORECASE)
if re_search:
feature = re_search.group(1)
if folder_token.endswith('(.*)$'):
if '/' not in feature:
android_folders.append(
owners_data.RequestedPath(fullpath, feature))
found = True
else:
feature = feature.split('/')[0] if '/' in feature else feature
android_folders.append(owners_data.RequestedPath(fullpath, feature))
found = True
if found:
android_folders_found.add(fullpath)
break
if fullpath.endswith('/android') \
and fullpath not in android_folders_found:
feature = fullpath.split('/')[0] if '/' in fullpath\
and not fullpath.startswith('chrome/') else fullpath
android_folders.append(owners_data.RequestedPath(fullpath, feature))
android_folders_found.add(fullpath)
return android_folders
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment