[Android WebView] Rewrite copyrights scanner in Python

This is to prepare for running the scanner as a presubmit check. I have added some manual tests to make sure that the new scanner is compatible with the old one. I plan to replace them with automatic unittests once I start using presubmit-style input API objects that are easily mockable. BUG=343104 Review URL: https://codereview.chromium.org/622493004 Cr-Commit-Position: refs/heads/master@{#297828}

[Android WebView] Rewrite copyrights scanner in Python
This is to prepare for running the scanner as a presubmit check. I have added some manual tests to make sure that the new scanner is compatible with the old one. I plan to replace them with automatic unittests once I start using presubmit-style input API objects that are easily mockable. BUG=343104 Review URL: https://codereview.chromium.org/622493004 Cr-Commit-Position: refs/heads/master@{#297828}
f8af6334 · mnaganov · Commit bot · dc445f21 · f8af6334 · dc445f21
Commit f8af6334 authored Oct 02, 2014 by mnaganov Committed by Commit bot Oct 02, 2014
29 changed files
--- a/android_webview/tools/copyright_scanner.py
+++ b/android_webview/tools/copyright_scanner.py
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities for scanning source files to determine code authorship.
+"""
+import itertools
+import os
+import re
+def FindFiles(root_dir, start_paths_list, excluded_dirs_list):
+  """Similar to UNIX utility find(1), searches for files in the directories.
+  Automatically leaves out only source code files.
+  Args:
+    root_dir: The root directory, to which all other paths are relative.
+    start_paths_list: The list of paths to start search from. Each path can
+      be a file or a directory.
+    excluded_dirs_list: The list of directories to skip.
+  Returns:
+    The list of source code files found, relative to |root_dir|.
+  """
+  dirs_blacklist = ['/' + d + '/' for d in excluded_dirs_list]
+  def IsBlacklistedDir(d):
+    for item in dirs_blacklist:
+      if item in d:
+        return True
+    return False
+  files_whitelist_re = re.compile(
+    r'\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)'
+    '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?'
+    '|tex|mli?)$')
+  files = []
+  base_path_len = len(root_dir)
+  for path in start_paths_list:
+    full_path = os.path.join(root_dir, path)
+    if os.path.isfile(full_path):
+      if files_whitelist_re.search(path):
+        files.append(path)
+    else:
+      for dirpath, dirnames, filenames in os.walk(full_path):
+        # Remove excluded subdirs for faster scanning.
+        for item in dirnames[:]:
+          if IsBlacklistedDir(os.path.join(dirpath, item)[base_path_len + 1:]):
+            dirnames.remove(item)
+        for filename in filenames:
+          filepath = os.path.join(dirpath, filename)[base_path_len + 1:]
+          if files_whitelist_re.search(filepath) and \
+              not IsBlacklistedDir(filepath):
+            files.append(filepath)
+  return files
+python_multiline_string_double_re = re.compile(
+  r'"""[^"]*(?:"""|$)', flags=re.MULTILINE)
+python_multiline_string_single_re = re.compile(
+  r"'''[^']*(?:'''|$)", flags=re.MULTILINE)
+automatically_generated_re = re.compile(
+  r'(All changes made in this file will be lost'
+  '|DO NOT (EDIT|delete this file)'
+  '|Generated (at|automatically|data)'
+  '|Automatically generated'
+  '|\Wgenerated\s+(?:\w+\s+)*file\W)', flags=re.IGNORECASE)
+def _IsGeneratedFile(header):
+  header = header.upper()
+  if '"""' in header:
+    header = python_multiline_string_double_re.sub('', header)
+  if "'''" in header:
+    header = python_multiline_string_single_re.sub('', header)
+  # First do simple strings lookup to save time.
+  if 'ALL CHANGES MADE IN THIS FILE WILL BE LOST' in header:
+    return True
+  if 'DO NOT EDIT' in header or 'DO NOT DELETE' in header or \
+      'GENERATED' in header:
+    return automatically_generated_re.search(header)
+  return False
+GENERATED_FILE = 'GENERATED FILE'
+NO_COPYRIGHT = '*No copyright*'
+class _CopyrightsScanner(object):
+  _c_comment_re = re.compile(r'''"[^"\\]*(?:\\.[^"\\]*)*"''')
+  _copyright_indicator = r'(?:copyright|copr\.|\xc2\xa9|\(c\))'
+  _full_copyright_indicator_re = \
+    re.compile(r'(?:\W|^)' + _copyright_indicator + r'(?::\s*|\s+)(\w.*)$', \
+                 re.IGNORECASE)
+  _copyright_disindicator_re = \
+    re.compile(r'\s*\b(?:info(?:rmation)?|notice|and|or)\b', re.IGNORECASE)
+  def __init__(self):
+    self.max_line_numbers_proximity = 3
+    self.last_a_item_line_number = -200
+    self.last_b_item_line_number = -100
+  def _CloseLineNumbers(self, a, b):
+    return 0 <= a - b <= self.max_line_numbers_proximity
+  def MatchLine(self, line_number, line):
+    if '"' in line:
+      line = _CopyrightsScanner._c_comment_re.sub('', line)
+    upcase_line = line.upper()
+    # Record '(a)' and '(b)' last occurences in C++ comments.
+    # This is to filter out '(c)' used as a list item inside C++ comments.
+    # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
+    cpp_comment_idx = upcase_line.find('//')
+    if cpp_comment_idx != -1:
+      if upcase_line.find('(A)') > cpp_comment_idx:
+        self.last_a_item_line_number = line_number
+      if upcase_line.find('(B)') > cpp_comment_idx:
+        self.last_b_item_line_number = line_number
+    # Fast bailout, uses the same patterns as _copyright_indicator regexp.
+    if not 'COPYRIGHT' in upcase_line and not 'COPR.' in upcase_line \
+        and not '\xc2\xa9' in upcase_line:
+      c_item_index = upcase_line.find('(C)')
+      if c_item_index == -1:
+        return None
+      if c_item_index > cpp_comment_idx and \
+          self._CloseLineNumbers(line_number,
+                                 self.last_b_item_line_number) and \
+          self._CloseLineNumbers(self.last_b_item_line_number,
+                                 self.last_a_item_line_number):
+        return None
+    copyr = None
+    m = _CopyrightsScanner._full_copyright_indicator_re.search(line)
+    if m and \
+        not _CopyrightsScanner._copyright_disindicator_re.match(m.group(1)):
+      copyr = m.group(0)
+      # Prettify the authorship string.
+      copyr = re.sub(r'([,.])?\s*$/', '', copyr)
+      copyr = re.sub(self._copyright_indicator, '', copyr, flags=re.IGNORECASE)
+      copyr = re.sub(r'^\s+', '', copyr)
+      copyr = re.sub(r'\s{2,}', ' ', copyr)
+      copyr = re.sub(r'\\@', '@', copyr)
+    return copyr
+def FindCopyrights(root_dir, files_to_scan):
+  """Determines code autorship, and finds generated files.
+  Args:
+    root_dir: The root directory, to which all other paths are relative.
+    files_to_scan: The list of file names to scan.
+  Returns:
+    The list of copyrights associated with each of the files given.
+    If the certain file is generated, the corresponding list consists a single
+    entry -- 'GENERATED_FILE' string. If the file has no copyright info,
+    the corresponding list contains 'NO_COPYRIGHT' string.
+  """
+  copyrights = []
+  for file_name in files_to_scan:
+    linenum = 0
+    header = ''
+    file_copyrights = []
+    scanner = _CopyrightsScanner()
+    with open(os.path.join(root_dir, file_name), 'r') as f:
+      for l in f.readlines():
+        linenum += 1
+        if linenum <= 25:
+          header += l
+        c = scanner.MatchLine(linenum, l)
+        if c:
+          file_copyrights.append(c)
+      if _IsGeneratedFile(header):
+        copyrights.append([GENERATED_FILE])
+      elif file_copyrights:
+        copyrights.append(file_copyrights)
+      else:
+        copyrights.append([NO_COPYRIGHT])
+  return copyrights
+def FindCopyrightViolations(root_dir, files_to_scan):
+  """Looks for files that are not belong exlusively to the Chromium Authors.
+  Args:
+    root_dir: The root directory, to which all other paths are relative.
+    files_to_scan: The list of file names to scan.
+  Returns:
+    The list of file names that contain non-Chromium copyrights.
+  """
+  copyrights = FindCopyrights(root_dir, files_to_scan)
+  offending_files = []
+  allowed_copyrights_re = re.compile(
+    r'^(?:20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. '
+    'All rights reserved.*)$')
+  for f, cs in itertools.izip(files_to_scan, copyrights):
+    if cs[0] == GENERATED_FILE or cs[0] == NO_COPYRIGHT:
+      continue
+    for c in cs:
+      if not allowed_copyrights_re.match(c):
+        offending_files.append(os.path.normpath(f))
+        break
+  return offending_files
--- a/android_webview/tools/find_copyrights.pl
+++ b/android_webview/tools/find_copyrights.pl
-#!/usr/bin/perl -w
-# Copyright 2013 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-# Use: echo filename1.cc ... | find_copyrights.pl
-#  or: find_copyrights.pl list_file
-#  or: find_files.pl ... | find_copyrights.pl
-use strict;
-use warnings;
-use File::Basename;
-sub check_is_generated_file($);
-sub start_copyright_parsing();
-my $progname = basename($0);
-my $generated_file_scan_boundary = 25;
-while (<>) {
-    chomp;
-    my $file = $_;
-    my $file_header = '';
-    my %copyrights;
-    open (F, "<$file") or die "$progname: Unable to access $file\n";
-    my $parse_copyright = start_copyright_parsing();
-    while (<F>) {
-        $file_header .= $_ unless $. > $generated_file_scan_boundary;
-        my $copyright_match = $parse_copyright->($_, $.);
-        if ($copyright_match) {
-            $copyrights{lc("$copyright_match")} = "$copyright_match";
-        }
-    }
-    close(F);
-    my $copyright = join(" / ", sort values %copyrights);
-    print "$file\t";
-    if (check_is_generated_file($file_header)) {
-        print "GENERATED FILE";
-    } else {
-        print ($copyright or "*No copyright*");
-    }
-    print "\n";
-}
-sub check_is_generated_file($) {
-    my $license = uc($_[0]);
-    # Remove Python multiline comments to avoid false positives
-    if (index($license, '"""') != -1) {
-        $license =~ s/"""[^"]*(?:"""|$)//mg;
-    }
-    if (index($license, "'''") != -1) {
-        $license =~ s/'''[^']*(?:'''|$)//mg;
-    }
-    # Quick checks using index.
-    if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) {
-        return 1;
-    }
-    if (index($license, 'DO NOT EDIT') != -1 ||
-        index($license, 'DO NOT DELETE') != -1 ||
-        index($license, 'GENERATED') != -1) {
-        return ($license =~ /(All changes made in this file will be lost' .
-            'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' .
-            '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i);
-    }
-    return 0;
-}
-sub are_within_increasing_progression($$$) {
-    my $delta = $_[0] - $_[1];
-    return $delta >= 0 && $delta <= $_[2];
-}
-sub start_copyright_parsing() {
-    my $max_line_numbers_proximity = 3;
-    # Set up the defaults the way that proximity checks will not succeed.
-    my $last_a_item_line_number = -200;
-    my $last_b_item_line_number = -100;
-    return sub {
-        my $line = $_[0];
-        my $line_number = $_[1];
-        # Remove C / C++ strings to avoid false positives.
-        if (index($line, '"') != -1) {
-            $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
-        }
-        my $uc_line = uc($line);
-        # Record '(a)' and '(b)' last occurences in C++ comments.
-        my $cpp_comment_idx = index($uc_line, '//');
-        if ($cpp_comment_idx != -1) {
-            if (index($uc_line, '(A)') > $cpp_comment_idx) {
-                $last_a_item_line_number = $line_number;
-            }
-            if (index($uc_line, '(B)') > $cpp_comment_idx) {
-                $last_b_item_line_number = $line_number;
-            }
-        }
-        # Fast bailout, uses the same patterns as the regexp.
-        if (index($uc_line, 'COPYRIGHT') == -1 &&
-            index($uc_line, 'COPR.') == -1 &&
-            index($uc_line, '\x{00a9}') == -1 &&
-            index($uc_line, '\xc2\xa9') == -1) {
-            my $c_item_index = index($uc_line, '(C)');
-            return '' if ($c_item_index == -1);
-            # Filter out 'c' used as a list item inside C++ comments.
-            # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
-            if ($c_item_index > $cpp_comment_idx &&
-                are_within_increasing_progression(
-                    $line_number,
-                    $last_b_item_line_number,
-                    $max_line_numbers_proximity) &&
-                are_within_increasing_progression(
-                    $last_b_item_line_number,
-                    $last_a_item_line_number,
-                    $max_line_numbers_proximity)) {
-                return '';
-            }
-        }
-        my $copyright_indicator_regex =
-            '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
-        my $full_copyright_indicator_regex =
-            sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex;
-        my $copyright_disindicator_regex =
-            '\b(?:info(?:rmation)?|notice|and|or)\b';
-        my $copyright = '';
-        if ($line =~ m%$full_copyright_indicator_regex%i) {
-            my $match = $1;
-            if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
-                $match =~ s/([,.])?\s*$//;
-                $match =~ s/$copyright_indicator_regex//ig;
-                $match =~ s/^\s+//;
-                $match =~ s/\s{2,}/ /g;
-                $match =~ s/\\@/@/g;
-                $copyright = $match;
-            }
-        }
-        return $copyright;
-    }
-}
--- a/android_webview/tools/find_files.pl
+++ b/android_webview/tools/find_files.pl
-#!/usr/bin/perl -w
-# Copyright 2013 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-# Use: find_files.pl <start-from> [exclude-dir ...]
-use strict;
-use warnings;
-use File::Basename;
-my $progname = basename($0);
-my $root_dir = shift @ARGV;
-my @find_args = ();
-while (@ARGV) {
-    my $path = shift @ARGV;
-    push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )'
-}
-push @find_args, qw(-follow -type f -print);
-open FIND, '-|', 'find', $root_dir, @find_args
-            or die "$progname: Couldn't exec find: $!\n";
-my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
-    '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
-    '|tex|mli?)$';
-my @files = ();
-while (<FIND>) {
-    chomp;
-    print "$_\n" unless (-z $_ || !m%$check_regex%);
-}
-close FIND;
--- a/android_webview/tools/run_find_copyrights_manual_tests.sh
+++ b/android_webview/tools/run_find_copyrights_manual_tests.sh
+#!/bin/sh
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+find android_webview/tools/tests -type f | sort \
+    | android_webview/tools/webview_licenses.py display_copyrights
--- a/android_webview/tools/tests/copyright-01
+++ b/android_webview/tools/tests/copyright-01
+// (c) 2014 Google Inc.
+//
+//  (a) One
+//
+//  (b) Two
+//
--- a/android_webview/tools/tests/copyright-02
+++ b/android_webview/tools/tests/copyright-02
+Copyright 2014 Google Inc.
--- a/android_webview/tools/tests/copyright-03
+++ b/android_webview/tools/tests/copyright-03
+Copr. 2014 Google Inc.
--- a/android_webview/tools/tests/copyright-04
+++ b/android_webview/tools/tests/copyright-04
+© 2014 Google Inc.
--- a/android_webview/tools/tests/copyright-05
+++ b/android_webview/tools/tests/copyright-05
+Copyright 2014    Google  Inc.
--- a/android_webview/tools/tests/generated-01
+++ b/android_webview/tools/tests/generated-01
+ALL CHANGES MADE IN THIS FILE WILL BE LOST
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-02
+++ b/android_webview/tools/tests/generated-02
+GENERATED FILE. DO NOT EDIT
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-03
+++ b/android_webview/tools/tests/generated-03
+GENERATED. DO NOT DELETE THIS FILE.
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-04
+++ b/android_webview/tools/tests/generated-04
+DO NOT EDIT
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-05
+++ b/android_webview/tools/tests/generated-05
+DO NOT DELETE THIS FILE
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-06
+++ b/android_webview/tools/tests/generated-06
+All changes made in this file will be lost
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-07
+++ b/android_webview/tools/tests/generated-07
+Automatically generated file
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-08
+++ b/android_webview/tools/tests/generated-08
+Synthetically generated dummy file
+Copyright 2014 Google
--- a/android_webview/tools/tests/generated-09
+++ b/android_webview/tools/tests/generated-09
+Generated data (by gnugnu)
+Copyright 2014 Google
--- a/android_webview/tools/tests/no-copyright-01
+++ b/android_webview/tools/tests/no-copyright-01
+std::cout << "Copyright 2014 Google"
--- a/android_webview/tools/tests/no-copyright-02
+++ b/android_webview/tools/tests/no-copyright-02
+// Several points can be made:
+//
+//  (a) One
+//
+//  (b) Two
+//
+//  (c) Three
+//
--- a/android_webview/tools/tests/no-copyright-03
+++ b/android_webview/tools/tests/no-copyright-03
+See 'foo' for copyright information.
--- a/android_webview/tools/tests/no-copyright-04
+++ b/android_webview/tools/tests/no-copyright-04
+See 'foo' for the copyright notice.
--- a/android_webview/tools/tests/no-copyright-05
+++ b/android_webview/tools/tests/no-copyright-05
+See 'foo' for the copyright and other things.
--- a/android_webview/tools/tests/non-generated-01
+++ b/android_webview/tools/tests/non-generated-01
+This file was prohibited from being generated.
--- a/android_webview/tools/tests/non-generated-02
+++ b/android_webview/tools/tests/non-generated-02
+Please do not delete our files! They are valuable to us.
--- a/android_webview/tools/tests/non-generated-03
+++ b/android_webview/tools/tests/non-generated-03
+Manually generated from dice rolls.
--- a/android_webview/tools/tests/non-generated-04
+++ b/android_webview/tools/tests/non-generated-04
+"""This Python script produces generated data
+"""
--- a/android_webview/tools/tests/non-generated-05
+++ b/android_webview/tools/tests/non-generated-05
+'''This Python script produces generated data
+'''
--- a/android_webview/tools/webview_licenses.py
+++ b/android_webview/tools/webview_licenses.py
 #!/usr/bin/python
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Copyright 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
@@ -22,7 +22,6 @@ import multiprocessing
 import optparse
 import os
 import re
-import subprocess
 import sys
 import textwrap
@@ -40,6 +39,7 @@ third_party = \
 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
 import licenses
+import copyright_scanner
 import known_issues
 class InputApi(object):
@@ -97,41 +97,12 @@ class ScanResult(object):
  Ok, Warnings, Errors = range(3)
 # Needs to be a top-level function for multiprocessing
-def _FindCopyrights(files_to_scan):
+def _FindCopyrightViolations(files_to_scan_as_string):
-  args = [os.path.join('android_webview', 'tools', 'find_copyrights.pl')]
+  return copyright_scanner.FindCopyrightViolations(
-  p = subprocess.Popen(
+    REPOSITORY_ROOT, files_to_scan_as_string)
-    args=args, cwd=REPOSITORY_ROOT,
-    stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+def _ShardList(l, shard_len):
-  lines = p.communicate(files_to_scan)[0].splitlines()
+  return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]
-  offending_files = []
-  allowed_copyrights = '^(?:\*No copyright\*' \
-      '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
-      'All rights reserved.*)$'
-  allowed_copyrights_re = re.compile(allowed_copyrights)
-  for l in lines:
-    entries = l.split('\t')
-    if entries[1] == "GENERATED FILE":
-      continue
-    copyrights = entries[1].split(' / ')
-    for c in copyrights:
-      if c and not allowed_copyrights_re.match(c):
-        offending_files.append(os.path.normpath(entries[0]))
-        break
-  return offending_files
-def _ShardString(s, delimiter, shard_len):
-  result = []
-  index = 0
-  last_pos = 0
-  for m in re.finditer(delimiter, s):
-    index += 1
-    if index % shard_len == 0:
-      result.append(s[last_pos:m.end()])
-      last_pos = m.end()
-  if not index % shard_len == 0:
-    result.append(s[last_pos:])
-  return result
 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
  """Checks that all files which are not in a listed third-party directory,
@@ -185,16 +156,12 @@ def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
  # This is not part of open source chromium, but are included on some bots.
  excluded_dirs_list.append('skia/tools/clusterfuzz-data')
-  args = [os.path.join('android_webview', 'tools', 'find_files.pl'),
+  files_to_scan = copyright_scanner.FindFiles(
-          '.'
+    REPOSITORY_ROOT, ['.'], excluded_dirs_list)
-          ] + excluded_dirs_list
+  sharded_files_to_scan = _ShardList(files_to_scan, 2000)
-  p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
-  files_to_scan = p.communicate()[0]
-  sharded_files_to_scan = _ShardString(files_to_scan, '\n', 2000)
  pool = multiprocessing.Pool()
  offending_files_chunks = pool.map_async(
-      _FindCopyrights, sharded_files_to_scan).get(999999)
+      _FindCopyrightViolations, sharded_files_to_scan).get(999999)
  pool.close()
  pool.join()
  # Flatten out the result
@@ -234,7 +201,8 @@ def _ReadFile(path):
    The contents of the file as a string.
  """
-  return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
+  with open(os.path.join(REPOSITORY_ROOT, path), 'rb') as f:
+    return f.read()
 def _FindThirdPartyDirs():
@@ -347,14 +315,16 @@ def main():
  parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
                                 usage='%prog [options]')
  parser.description = (__doc__ +
-                       '\nCommands:\n' \
+                        '\nCommands:\n'
-                       '  scan Check licenses.\n' \
+                        '  scan Check licenses.\n'
-                       '  notice Generate Android NOTICE file on stdout.\n' \
+                        '  notice Generate Android NOTICE file on stdout.\n'
-                       '  incompatible_directories Scan for incompatibly'
+                        '  incompatible_directories Scan for incompatibly'
-                       ' licensed directories.\n'
+                        ' licensed directories.\n'
-                       '  all_incompatible_directories Scan for incompatibly'
+                        '  all_incompatible_directories Scan for incompatibly'
-                       ' licensed directories (even those in'
+                        ' licensed directories (even those in'
-                       ' known_issues.py).\n')
+                        ' known_issues.py).\n'
+                        '  display_copyrights Display autorship on the files'
+                        ' using names provided via stdin.\n')
  (_, args) = parser.parse_args()
  if len(args) != 1:
    parser.print_help()
@@ -372,6 +342,11 @@ def main():
    return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
  elif args[0] == 'all_incompatible_directories':
    return _ProcessIncompatibleResult(GetIncompatibleDirectories())
+  elif args[0] == 'display_copyrights':
+    files = sys.stdin.read().splitlines()
+    for f, c in zip(files, copyright_scanner.FindCopyrights('.', files)):
+      print f, '\t', ' / '.join(sorted(c))
+    return ScanResult.Ok
  parser.print_help()
  return ScanResult.Errors