Output modularization LOC stat to json file

This CL adds an argument option to modularization LOC stat generator script so it can output the result to a specified file in json format. Also relocated the script under //tools/android. Bug: 1146478 Change-Id: I6d24b467e7ac64116461aec7780f6fb9a20da03e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2521982Reviewed-by: Yaron Friedman <yfriedman@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Commit-Queue: Jinsuk Kim <jinsukkim@chromium.org> Cr-Commit-Position: refs/heads/master@{#825507}

Output modularization LOC stat to json file
This CL adds an argument option to modularization LOC stat generator script so it can output the result to a specified file in json format. Also relocated the script under //tools/android. Bug: 1146478 Change-Id: I6d24b467e7ac64116461aec7780f6fb9a20da03e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2521982Reviewed-by: Yaron Friedman <yfriedman@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Commit-Queue: Jinsuk Kim <jinsukkim@chromium.org> Cr-Commit-Position: refs/heads/master@{#825507}
ed30ea6e · Jinsuk Kim · Commit Bot · 1f2b3e26 · 1f2b3e26 · ed30ea6e
Commit ed30ea6e authored Nov 09, 2020 by Jinsuk Kim Committed by Commit Bot Nov 09, 2020
Showing with 184 additions and 137 deletions

chrome/tools/modularization-loc-stat.py chrome/tools/modularization-loc-stat.py +0 -137

tools/android/modularization-loc-stat.py tools/android/modularization-loc-stat.py +184 -0

No files found.
--- a/chrome/tools/modularization-loc-stat.py
+++ b/chrome/tools/modularization-loc-stat.py
-#!/usr/bin/env python3
-# Copyright 2020 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Generates stats on modularization efforts. Stats include:
- Percentage of added lines in modularized files over legacy ones.
- The top 50 contributors to the modularized files.
-"""
-import argparse
-import subprocess
-import sys
-from collections import OrderedDict
-from collections import defaultdict
-_M12N_DIRS = [
-    'chrome/browser',
-    'components',
-]
-_LEGACY_DIR = 'chrome/android'
-def _gen_stat(dates):
-    # Each CL is output in the following format:
-    #
-    # #:thanhdng:2020-08-17:Use vector icons for zero state file results
-    #
-    # 118     98      chrome/browser/ui/app_list/file_icon_util.cc
-    # 2       1       chrome/browser/ui/app_list/file_icon_util.h
-    # 0       20      chrome/browser/ui/app_list/file_icon_util_unittest.cc
-    #
-    # i.e.:
-    #
-    # #:author:commit-date:subject
-    #
-    # added-lines    deleted-lines  file-path1
-    # added-lines    deleted-lines  file-path2
-    # ...
-    command = ['git', 'log', '--numstat', '--no-renames',
-               '--format=#:%al:%cs:%s', '--after='+dates[0],
-               '--before='+dates[1], 'chrome', 'components']
-    try:
-        proc = subprocess.Popen(command,
-                                bufsize=1,  # buffered mode
-                                stdout=subprocess.PIPE,
-                                universal_newlines=True)
-    except subprocess.SubprocessError as e:
-        print(f'{command} failed with code {e.returncode}.', file=sys.stderr)
-        print(f'\nSTDERR: {e.stderr}', file=sys.stderr)
-        print(f'\nSTDOUT: {e.stdout}', file=sys.stderr)
-        raise
-    author_stat = defaultdict(int)
-    total_m12n = 0
-    total_legacy = 0
-    prev_msg_len = 0
-    revert_cl = False
-    for raw_line in proc.stdout:
-        if raw_line.isspace():
-            continue
-        line = raw_line.strip()
-        if line.startswith('#'):  # patch summary line
-            _, author, commit_date, *subject = line.split(':', 4)
-            revert_cl = (subject[0].startswith('Revert') or
-                           subject[0].startswith('Reland'))
-        else:
-            if revert_cl or not line.endswith('.java'):
-                continue
-            # Do not take into account the number of deleted lines, which can
-            # turn the overall changes to negative. If a class was renamed,
-            # for instance, what's deleted is added somewhere else, so counting
-            # only for addition works. Other kinds of deletion will be ignored.
-            added, _deleted, path = line.split()
-            diff = int(added)
-            if _is_m12n_path(path):
-                total_m12n += diff
-                author_stat[author] += diff
-            elif _is_legacy_path(path):
-                total_legacy += diff
-        msg = f'\rProcessing {commit_date} by {author}'
-        _print_progress(msg, prev_msg_len)
-        prev_msg_len = len(msg)
-    _print_progress('Processing complete', prev_msg_len)
-    print('\n')
-    percentage = 100.0 * total_m12n / (total_m12n + total_legacy)
-    print(f'# of lines added in modularized files: {total_m12n}')
-    print(f'# of lines added in legacy files: {total_legacy}')
-    print(f'% of lines landing in modularized files: {percentage:2.2f}%')
-    # Shows the top 50 contributors to modularized files.
-    print('\nTop contributors:')
-    print('No  lines    %    author')
-    ranks = OrderedDict(
-        sorted(author_stat.items(), key=lambda x:x[1], reverse=True))
-    for rank, author in enumerate(list(ranks.keys())[:50], 1):
-        lines = ranks[author]
-        if lines == 0:
-            break
-        ratio = 100 * lines / total_m12n
-        print(f'{rank:2d} {lines:6d} {ratio:5.1f}  {author}')
-def _is_m12n_path(path):
-    for prefix in _M12N_DIRS:
-        if path.startswith(prefix):
-            return True
-    return False
-def _is_legacy_path(path):
-    return path.startswith(_LEGACY_DIR)
-def _print_progress(msg, prev_msg_len):
-    msg_len = len(msg)
-    # Add spaces to remove the previous progress output completely.
-    if msg_len < prev_msg_len:
-        msg += ' ' * (prev_msg_len - msg_len)
-    print(msg, end='\r')
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Generates LOC stats for modularization effort.")
-    parser.add_argument(
-        '--date',
-        required=True,
-        type=str,
-        metavar=('<date-from>', '<date-to>'),
-        nargs=2,
-        help='date range (YYYY-MM-DD)~(YYYY-MM-DD)')
-    args = parser.parse_args()
-    _gen_stat(args.date)
--- a/tools/android/modularization-loc-stat.py
+++ b/tools/android/modularization-loc-stat.py
+#!/usr/bin/env python3
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Generates stats on modularization efforts. Stats include:
+- Percentage of added lines in modularized files over legacy ones.
+- The top 50 contributors to the modularized files.
+"""
+import argparse
+import json
+import subprocess
+import sys
+from collections import OrderedDict
+from collections import defaultdict
+_M12N_DIRS = [
+    'chrome/browser',
+    'components',
+]
+_LEGACY_DIR = 'chrome/android'
+def GenerateLOCStats(dates, quiet, json_format):
+  """Generate modulazation LOC stats.
+  Args:
+    dates: A tuple containing the range of dates for git patches to process.
+    quiet: True if no message is output during the processing.
+    json_format: True if the output should be in json format. Otherwise
+        a plain, human-readable table is generated.
+  Return:
+    Text string containing the stat in a specified format.
+  """
+  #  Each CL is output in the following format:
+  #
+  #   :thanhdng:2020-08-17:Use vector icons for zero state file results
+  #
+  #  118     98      chrome/browser/ui/app_list/file_icon_util.cc
+  #  2       1       chrome/browser/ui/app_list/file_icon_util.h
+  #  0       20      chrome/browser/ui/app_list/file_icon_util_unittest.cc
+  #
+  #  i.e.:
+  #
+  #   :author:commit-date:subject
+  #
+  #  added-lines    deleted-lines  file-path1
+  #  added-lines    deleted-lines  file-path2
+  #  ...
+  command = [
+      'git', 'log', '--numstat', '--no-renames', '--format=#:%al:%cs:%s',
+      '--after=' + dates[0], '--before=' + dates[1], 'chrome', 'components'
+  ]
+  try:
+    proc = subprocess.Popen(
+        command,
+        bufsize=1,  # buffered mode
+        stdout=subprocess.PIPE,
+        universal_newlines=True)
+  except subprocess.SubprocessError as e:
+    print(f'{command} failed with code {e.returncode}.', file=sys.stderr)
+    print(f'\nSTDERR: {e.stderr}', file=sys.stderr)
+    print(f'\nSTDOUT: {e.stdout}', file=sys.stderr)
+    raise
+  author_stat = defaultdict(int)
+  total_m12n = 0
+  total_legacy = 0
+  prev_msg_len = 0
+  revert_cl = False
+  for raw_line in proc.stdout:
+    if raw_line.isspace():
+      continue
+    line = raw_line.strip()
+    if line.startswith('#'):  # patch summary line
+      _, author, commit_date, *subject = line.split(':', 4)
+      revert_cl = (subject[0].startswith('Revert')
+                   or subject[0].startswith('Reland'))
+    else:
+      if revert_cl or not line.endswith('.java'):
+        continue
+      # Do not take into account the number of deleted lines, which can
+      # turn the overall changes to negative. If a class was renamed,
+      # for instance, what's deleted is added somewhere else, so counting
+      # only for addition works. Other kinds of deletion will be ignored.
+      added, _deleted, path = line.split()
+      diff = int(added)
+      if _is_m12n_path(path):
+        total_m12n += diff
+        author_stat[author] += diff
+      elif _is_legacy_path(path):
+        total_legacy += diff
+    msg = f'\rProcessing {commit_date} by {author}'
+    if not quiet: _print_progress(msg, prev_msg_len)
+    prev_msg_len = len(msg)
+  if not quiet:
+    _print_progress('Processing complete', prev_msg_len)
+    print('\n')
+  rankings = OrderedDict(
+      sorted(author_stat.items(), key=lambda x: x[1], reverse=True))
+  if json_format:
+    return json.dumps({
+        'loc_modularized': total_m12n,
+        'loc_legacy': total_legacy,
+        'rankings': rankings,
+    })
+  else:
+    output = []
+    percentage = 100.0 * total_m12n / (total_m12n + total_legacy)
+    output.append(f'# of lines added in modularized files: {total_m12n}')
+    output.append(f'# of lines added in legacy files: {total_legacy}')
+    output.append(f'% of lines landing in modularized files: {percentage:2.2f}')
+    # Shows the top 50 contributors to modularized files.
+    output.append('\nTop contributors:')
+    output.append('No  lines    %    author')
+    for rank, author in enumerate(list(rankings.keys())[:50], 1):
+      lines = rankings[author]
+      if lines == 0:
+        break
+      ratio = 100 * lines / total_m12n
+      output.append(f'{rank:2d} {lines:6d} {ratio:5.1f}  {author}')
+    return '\n'.join(output)
+def _is_m12n_path(path):
+  for prefix in _M12N_DIRS:
+    if path.startswith(prefix):
+      return True
+  return False
+def _is_legacy_path(path):
+  return path.startswith(_LEGACY_DIR)
+def _print_progress(msg, prev_msg_len):
+  msg_len = len(msg)
+  # Add spaces to remove the previous progress output completely.
+  if msg_len < prev_msg_len:
+    msg += ' ' * (prev_msg_len - msg_len)
+  print(msg, end='\r')
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(
+      description="Generates LOC stats for modularization effort.")
+  parser.add_argument('--date',
+                      required=True,
+                      type=str,
+                      metavar=('<date-from>', '<date-to>'),
+                      nargs=2,
+                      help='date range (YYYY-MM-DD)~(YYYY-MM-DD)')
+  parser.add_argument('-q',
+                      '--quiet',
+                      action='store_true',
+                      help='Do not output any message while processing')
+  parser.add_argument('-j',
+                      '--json',
+                      action='store_true',
+                      help='Output result in json format. '
+                      'If not specified, output in more human-readable table.')
+  parser.add_argument('-o',
+                      '--output',
+                      type=str,
+                      help='File to write the result to in json format. '
+                      'If not specified, outputs to console.')
+  args = parser.parse_args()
+  result = GenerateLOCStats(args.date, args.quiet, args.json)
+  if args.output:
+    with open(args.output, 'w') as f:
+      f.write(result)
+  else:
+    print(result)