Make isolate_driver.py process build.ninja and extract dependencies.

This uses a few assumption: - This basically breaks non-ninja build for component builds. This never worked anyway. - This assumes the file format of .ninja files. This will likely be quite obvious when this breaks. - It makes some assumptions about the build steps, for example '.so.TOC' -> '.so'. On the other hand, it creates a deterministic dependency tree, which is awesome. Technically it would work as well for non-component builds but I don't want to go this far yet. But in the end, that's the goal that nobody has to enter the binary dependencies in the .isolate files. R=csharp@chromium.org BUG=360223,333473 Review URL: https://codereview.chromium.org/228463003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263058 0039d316-1c4b-4281-b951-d872f2087c98

Make isolate_driver.py process build.ninja and extract dependencies.
This uses a few assumption: - This basically breaks non-ninja build for component builds. This never worked anyway. - This assumes the file format of .ninja files. This will likely be quite obvious when this breaks. - It makes some assumptions about the build steps, for example '.so.TOC' -> '.so'. On the other hand, it creates a deterministic dependency tree, which is awesome. Technically it would work as well for non-component builds but I don't want to go this far yet. But in the end, that's the goal that nobody has to enter the binary dependencies in the .isolate files. R=csharp@chromium.org BUG=360223,333473 Review URL: https://codereview.chromium.org/228463003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263058 0039d316-1c4b-4281-b951-d872f2087c98
f4b59e75 · maruel@chromium.org · e39d622a · f4b59e75
Commit f4b59e75 authored Apr 10, 2014 by maruel@chromium.org
Hide whitespace changes
Inline Side-by-side

Showing with 162 additions and 60 deletions

tools/isolate_driver.py tools/isolate_driver.py +162 -60

No files found.
--- a/tools/isolate_driver.py
+++ b/tools/isolate_driver.py
@@ -8,16 +8,16 @@
 Creates a wrapping .isolate which 'includes' the original one, that can be
 consumed by tools/swarming_client/isolate.py. Path variables are determined
 based on the current working directory. The relative_cwd in the .isolated file
-is determined based on *the .isolate file that declare the 'command' variable to
-be used* so the wrapping .isolate doesn't affect this value.
+is determined based on the .isolate file that declare the 'command' variable to
+be used so the wrapping .isolate doesn't affect this value.

-It packages all the dynamic libraries found in this wrapping .isolate. This is
-inefficient and non-deterministic. In the very near future, it will parse
-build.ninja, find back the root target and find all the dynamic libraries that
-are marked as a dependency to this target.
+This script loads build.ninja and processes it to determine all the executables
+referenced by the isolated target. It adds them in the wrapping .isolate file.
 """

+import StringIO
 import glob
+import logging
 import os
 import posixpath
 import subprocess
@@ -33,54 +33,149 @@ sys.path.insert(0, SWARMING_CLIENT_DIR)
 import isolate_format


+def load_ninja_recursively(build_dir, ninja_path, build_steps):
+  """Crudely extracts all the subninja and build referenced in ninja_path.

-# Location to grab binaries based on the OS.
-DYNAMIC_LIBRARIES = {
-  'darwin': '*.dylib',
-  'linux2': 'lib/*.so',
-  'win32': '*.dll',
-}
+  In particular, it ignores rule and variable declarations. The goal is to be
+  performant (well, as much as python can be performant) which is currently in
+  the <200ms range for a complete chromium tree. As such the code is laid out
+  for performance instead of readability.
+  """
+  logging.debug('Loading %s', ninja_path)
+  try:
+    with open(os.path.join(build_dir, ninja_path), 'rb') as f:
+      line = None
+      merge_line = ''
+      subninja = []
+      for line in f:
+        line = line.rstrip()
+        if not line:
+          continue

+        if line[-1] == '$':
+          # The next line needs to be merged in.
+          merge_line += line[:-1]
+          continue

-def get_dynamic_libs(build_dir):
-  """Finds all the dynamic libs to map.
+        if merge_line:
+          line = merge_line + line
+          merge_line = ''

-  Returns:
-    list of relative path, e.g. [../out/Debug/lib/libuser_prefs.so].
-  """
-  items = set()
-  root = os.path.join(build_dir, DYNAMIC_LIBRARIES[sys.platform])
-  for i in glob.iglob(root):
-    try:
-      # Will throw on Windows if another process is writing to this file.
-      open(i).close()
-      items.add((i, os.stat(i).st_size))
-    except IOError:
-      continue
-
-  # The following sleep value was carefully selected via random guessing. The
-  # goal is to detect files that are being linked by checking their file size
-  # after a small delay.
-  #
-  # This happens as other build targets can be built simultaneously. For
-  # example, base_unittests.isolated is being processed but dynamic libraries
-  # for chrome are currently being linked.
-  #
-  # TODO(maruel): Obviously, this must go away and be replaced with a proper
-  # ninja parser but we need something now. http://crbug.com/333473
-  time.sleep(10)
-
-  for item in sorted(items):
-    file_name, file_size = item
+        statement = line[:line.find(' ')]
+        if statement == 'build':
+          # Save the dependency list as a raw string. Only the lines needed will
+          # be processed with raw_build_to_deps(). This saves a good 70ms of
+          # processing time.
+          build_target, dependencies = line[6:].split(': ', 1)
+          # Interestingly, trying to be smart and only saving the build steps
+          # with the intended extensions ('', '.stamp', '.so') slows down
+          # parsing even if 90% of the build rules can be skipped.
+          # On Windows, a single step may generate two target, so split items
+          # accordingly. It has only been seen for .exe/.exe.pdb combos.
+          for i in build_target.strip().split():
+            build_steps[i] = dependencies
+        elif statement == 'subninja':
+          subninja.append(line[9:])
+  except IOError:
+    print >> sys.stderr, 'Failed to open %s' % ninja_path
+    raise
+
+  total = 1
+  for rel_path in subninja:
    try:
-      open(file_name).close()
-      if os.stat(file_name).st_size != file_size:
-        items.remove(item)
+      # Load each of the files referenced.
+      # TODO(maruel): Skip the files known to not be needed. It saves an aweful
+      # lot of processing time.
+      total += load_ninja_recursively(build_dir, rel_path, build_steps)
    except IOError:
-      items.remove(item)
-      continue
+      print >> sys.stderr, '... as referenced by %s' % ninja_path
+      raise
+  return total
+
+
+def load_ninja(build_dir):
+  """Loads the tree of .ninja files in build_dir."""
+  build_steps = {}
+  total = load_ninja_recursively(build_dir, 'build.ninja', build_steps)
+  logging.info('Loaded %d ninja files, %d build steps', total, len(build_steps))
+  return build_steps
+
+
+def using_blacklist(item):
+  """Returns True if an item should be analyzed.
+
+  Ignores many rules that are assumed to not depend on a dynamic library. If
+  the assumption doesn't hold true anymore for a file format, remove it from
+  this list. This is simply an optimization.
+  """
+  IGNORED = (
+    '.a', '.cc', '.css', '.def', '.h', '.html', '.js', '.json', '.manifest',
+    '.o', '.obj', '.pak', '.png', '.pdb', '.strings', '.txt',
+  )
+  # ninja files use native path format.
+  ext = os.path.splitext(item)[1]
+  if ext in IGNORED:
+    return False
+  # Special case Windows, keep .dll.lib but discard .lib.
+  if item.endswith('.dll.lib'):
+    return True
+  if ext == '.lib':
+    return False
+  return item not in ('', '|', '||')
+
+
+def raw_build_to_deps(item):
+  """Converts a raw ninja build statement into the list of interesting
+  dependencies.
+  """
+  # TODO(maruel): Use a whitelist instead? .stamp, .so.TOC, .dylib.TOC,
+  # .dll.lib, .exe and empty.
+  # The first item is the build rule, e.g. 'link', 'cxx', 'phony', etc.
+  return filter(using_blacklist, item.split(' ')[1:])
+
+
+def recurse(target, build_steps, rules_seen):
+  """Recursively returns all the interesting dependencies for root_item."""
+  out = []
+  if rules_seen is None:
+    rules_seen = set()
+  if target in rules_seen:
+    # TODO(maruel): Figure out how it happens.
+    logging.warning('Circular dependency for %s!', target)
+    return []
+  rules_seen.add(target)
+  try:
+    dependencies = raw_build_to_deps(build_steps[target])
+  except KeyError:
+    logging.info('Failed to find a build step to generate: %s', target)
+    return []
+  logging.debug('recurse(%s) -> %s', target, dependencies)
+  for dependency in dependencies:
+    out.append(dependency)
+    dependency_raw_dependencies = build_steps.get(dependency)
+    if dependency_raw_dependencies:
+      for i in raw_build_to_deps(dependency_raw_dependencies):
+        out.extend(recurse(i, build_steps, rules_seen))
+    else:
+      logging.info('Failed to find a build step to generate: %s', dependency)
+  return out
+
+
+def post_process_deps(dependencies):
+  """Processes the dependency list with OS specific rules."""
+  def filter_item(i):
+    if i.endswith('.so.TOC'):
+      # Remove only the suffix .TOC, not the .so!
+      return i[:-4]
+    if i.endswith('.dylib.TOC'):
+      # Remove only the suffix .TOC, not the .dylib!
+      return i[:-4]
+    if i.endswith('.dll.lib'):
+      # Remove only the suffix .lib, not the .dll!
+      return i[:-4]
+    return i

-  return [i[0].replace(os.path.sep, '/') for i in items]
+  return map(filter_item, dependencies)


 def create_wrapper(args, isolate_index, isolated_index):
@@ -111,19 +206,24 @@ def create_wrapper(args, isolate_index, isolated_index):
  isolate_relpath = os.path.relpath(
      '.', temp_isolate_dir).replace(os.path.sep, '/')

-  # Will look like ['<(PRODUCT_DIR)/lib/flibuser_prefs.so'].
-  rebased_libs = [
-    '<(PRODUCT_DIR)/%s' % i[len(build_dir)+1:]
-    for i in get_dynamic_libs(build_dir)
-  ]
+  # It's a big assumption here that the name of the isolate file matches the
+  # primary target. Fix accordingly if this doesn't hold true.
+  target = isolate[:-len('.isolate')]
+  build_steps = load_ninja(build_dir)
+  binary_deps = post_process_deps(recurse(target, build_steps, None))
+  logging.debug(
+      'Binary dependencies:%s', ''.join('\n  ' + i for i in binary_deps))

  # Now do actual wrapping .isolate.
-  out = {
+  isolate_dict = {
    'includes': [
      posixpath.join(isolate_relpath, isolate),
    ],
    'variables': {
-      isolate_format.KEY_TRACKED: rebased_libs,
+      # Will look like ['<(PRODUCT_DIR)/lib/flibuser_prefs.so'].
+      isolate_format.KEY_TRACKED: sorted(
+          '<(PRODUCT_DIR)/%s' % i.replace(os.path.sep, '/')
+          for i in binary_deps),
    },
  }
  if not os.path.isdir(temp_isolate_dir):
@@ -131,14 +231,18 @@ def create_wrapper(args, isolate_index, isolated_index):
  comment = (
      '# Warning: this file was AUTOGENERATED.\n'
      '# DO NO EDIT.\n')
+  out = StringIO.StringIO()
+  isolate_format.print_all(comment, isolate_dict, out)
+  isolate_content = out.getvalue()
  with open(temp_isolate, 'wb') as f:
-    isolate_format.print_all(comment, out, f)
-  if '--verbose' in args:
-    print('Added %d dynamic libs' % len(dynamic_libs))
+    f.write(isolate_content)
+  logging.info('Added %d dynamic libs', len(binary_deps))
+  logging.debug('%s', isolate_content)
  args[isolate_index] = temp_isolate


 def main():
+  logging.basicConfig(level=logging.ERROR, format='%(levelname)7s %(message)s')
  args = sys.argv[1:]
  isolate = None
  isolated = None
@@ -154,9 +258,7 @@ def main():
    print >> sys.stderr, 'Internal failure'
    return 1

-  # Implement a ninja parser.
-  # http://crbug.com/360223
-  if is_component and False:
+  if is_component:
    create_wrapper(args, isolate, isolated)

  swarming_client = os.path.join(SRC_DIR, 'tools', 'swarming_client')