Commit c7b282ab authored by mnaganov's avatar mnaganov Committed by Commit bot

[Android WebView] Run license scanning in parallel

Split the Perl script into 2 and shard the actual checking, so we can
run it in parallel. On bots, this reduces the time needed to complete
"check licenses" step from 3-4 mins to 40 seconds!

I'm planning to rewrite the scanner in Python soon, but I see that this
change can already help running trybots faster.

BUG=416496

Review URL: https://codereview.chromium.org/589143002

Cr-Commit-Position: refs/heads/master@{#296207}
parent a85c48c2
#!/usr/bin/perl -w
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Use: find_copyrights.pl <start-from> [exclude-dir ...]
# Use: echo filename1.cc ... | find_copyrights.pl
# or: find_copyrights.pl list_file
# or: find_files.pl ... | find_copyrights.pl
use strict;
use warnings;
......@@ -14,29 +16,10 @@ sub start_copyright_parsing();
my $progname = basename($0);
my $root_dir = shift @ARGV;
my @find_args = ();
while (@ARGV) {
my $path = shift @ARGV;
push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )'
}
push @find_args, qw(-follow -type f -print);
open FIND, '-|', 'find', $root_dir, @find_args
or die "$progname: Couldn't exec find: $!\n";
my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
'|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
'|tex|mli?)$';
my @files = ();
while (<FIND>) {
chomp;
push @files, $_ unless (-z $_ || !m%$check_regex%);
}
close FIND;
my $generated_file_scan_boundary = 25;
while (@files) {
my $file = shift @files;
while (<>) {
chomp;
my $file = $_;
my $file_header = '';
my %copyrights;
open (F, "<$file") or die "$progname: Unable to access $file\n";
......
#!/usr/bin/perl -w
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Use: find_files.pl <start-from> [exclude-dir ...]
use strict;
use warnings;
use File::Basename;
my $progname = basename($0);
my $root_dir = shift @ARGV;
my @find_args = ();
while (@ARGV) {
my $path = shift @ARGV;
push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )'
}
push @find_args, qw(-follow -type f -print);
open FIND, '-|', 'find', $root_dir, @find_args
or die "$progname: Couldn't exec find: $!\n";
my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
'|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
'|tex|mli?)$';
my @files = ();
while (<FIND>) {
chomp;
print "$_\n" unless (-z $_ || !m%$check_regex%);
}
close FIND;
......@@ -18,6 +18,7 @@ aren't in a third-party directory with a README.chromium file.
import glob
import imp
import multiprocessing
import optparse
import os
import re
......@@ -95,6 +96,43 @@ def GetUnknownIncompatibleDirectories():
class ScanResult(object):
Ok, Warnings, Errors = range(3)
# Needs to be a top-level function for multiprocessing
def _FindCopyrights(files_to_scan):
args = [os.path.join('android_webview', 'tools', 'find_copyrights.pl')]
p = subprocess.Popen(
args=args, cwd=REPOSITORY_ROOT,
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
lines = p.communicate(files_to_scan)[0].splitlines()
offending_files = []
allowed_copyrights = '^(?:\*No copyright\*' \
'|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
'All rights reserved.*)$'
allowed_copyrights_re = re.compile(allowed_copyrights)
for l in lines:
entries = l.split('\t')
if entries[1] == "GENERATED FILE":
continue
copyrights = entries[1].split(' / ')
for c in copyrights:
if c and not allowed_copyrights_re.match(c):
offending_files.append(os.path.normpath(entries[0]))
break
return offending_files
def _ShardString(s, delimiter, shard_len):
result = []
index = 0
last_pos = 0
for m in re.finditer(delimiter, s):
index += 1
if index % shard_len == 0:
result.append(s[last_pos:m.end()])
last_pos = m.end()
if not index % shard_len == 0:
result.append(s[last_pos:])
return result
def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
"""Checks that all files which are not in a listed third-party directory,
and which do not use the standard Chromium license, are whitelisted.
......@@ -147,26 +185,21 @@ def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
# This is not part of open source chromium, but are included on some bots.
excluded_dirs_list.append('skia/tools/clusterfuzz-data')
args = ['android_webview/tools/find_copyrights.pl',
args = [os.path.join('android_webview', 'tools', 'find_files.pl'),
'.'
] + excluded_dirs_list
p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
lines = p.communicate()[0].splitlines()
offending_files = []
allowed_copyrights = '^(?:\*No copyright\*' \
'|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
'All rights reserved.*)$'
allowed_copyrights_re = re.compile(allowed_copyrights)
for l in lines:
entries = l.split('\t')
if entries[1] == "GENERATED FILE":
continue
copyrights = entries[1].split(' / ')
for c in copyrights:
if c and not allowed_copyrights_re.match(c):
offending_files.append(os.path.normpath(entries[0]))
break
files_to_scan = p.communicate()[0]
sharded_files_to_scan = _ShardString(files_to_scan, '\n', 2000)
pool = multiprocessing.Pool()
offending_files_chunks = pool.map_async(
_FindCopyrights, sharded_files_to_scan).get(999999)
pool.close()
pool.join()
# Flatten out the result
offending_files = \
[item for sublist in offending_files_chunks for item in sublist]
unknown = set(offending_files) - set(whitelisted_files)
if unknown:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment