build: Run more than one thinlto link in parallel again.

https://chromium-review.googlesource.com/c/chromium/src/+/2464913 made it so that thinlto links can use all available cores, and reduced the number of parallel LTO links to 1. This made individual links 30% to 60% faster, but the reduced parallelism caused bots to still got much slower in aggregate. Based on some handwaving, only a third of a thinlto link seems to be CPU-bound (https://bugs.chromium.org/p/chromium/issues/detail?id=1132930#c14), so try running up to 3 thinlto links in parallel. I expect we'll have to iterate on this heuristic some more. (And we might want to limit ourselves to 1 parallel thinlto link on Windows eventually, see first bug below.) Bug: 1132930,1137812 Change-Id: I77d9446195be4fb1171b8beba45205e91c89a875 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2467360 Commit-Queue: Nico Weber <thakis@chromium.org> Auto-Submit: Nico Weber <thakis@chromium.org> Reviewed-by: Hans Wennborg <hans@chromium.org> Cr-Commit-Position: refs/heads/master@{#816565}

build: Run more than one thinlto link in parallel again.
https://chromium-review.googlesource.com/c/chromium/src/+/2464913 made it so that thinlto links can use all available cores, and reduced the number of parallel LTO links to 1. This made individual links 30% to 60% faster, but the reduced parallelism caused bots to still got much slower in aggregate. Based on some handwaving, only a third of a thinlto link seems to be CPU-bound (https://bugs.chromium.org/p/chromium/issues/detail?id=1132930#c14), so try running up to 3 thinlto links in parallel. I expect we'll have to iterate on this heuristic some more. (And we might want to limit ourselves to 1 parallel thinlto link on Windows eventually, see first bug below.) Bug: 1132930,1137812 Change-Id: I77d9446195be4fb1171b8beba45205e91c89a875 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2467360 Commit-Queue: Nico Weber <thakis@chromium.org> Auto-Submit: Nico Weber <thakis@chromium.org> Reviewed-by: Hans Wennborg <hans@chromium.org> Cr-Commit-Position: refs/heads/master@{#816565}
9a8e5a0c · Nico Weber · ee23e932 · 9a8e5a0c · 9a8e5a0c
Commit 9a8e5a0c authored Oct 13, 2020 by Nico Weber
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 22 deletions

build/toolchain/concurrent_links.gni build/toolchain/concurrent_links.gni +12 -15

build/toolchain/get_concurrent_links.py build/toolchain/get_concurrent_links.py +17 -7

No files found.
--- a/build/toolchain/concurrent_links.gni
+++ b/build/toolchain/concurrent_links.gni
@@ -23,7 +23,10 @@ declare_args() {

 if (concurrent_links == -1) {
  if (use_thin_lto) {
-    _args = [ "--reserve_mem_gb=10" ]
+    _args = [
+      "--thin-lto",
+      "--reserve_mem_gb=10",
+    ]
    if (is_win) {
      # Based on measurements of linking chrome.dll and chrome_child.dll, plus
      # a little padding to account for future growth.
@@ -76,21 +79,15 @@ if (concurrent_links == -1) {
  # so that we can compute better values.
  _command_dict = exec_script("get_concurrent_links.py", _args, "scope")

-  if (use_thin_lto) {
-    concurrent_links = 1
-    concurrent_links_logs =
-        [ "thinlto -- only one link at once but every link can use all cores" ]
-  } else {
-    concurrent_links = _command_dict.primary_pool_size
-    concurrent_links_logs = _command_dict.explanation
+  concurrent_links = _command_dict.primary_pool_size
+  concurrent_links_logs = _command_dict.explanation

-    if (_command_dict.secondary_pool_size >= concurrent_links) {
-      # Have R8 / Lint share the link pool unless we would safely get more
-      # concurrency out of using a separate one.
-      # On low-RAM machines, this allows an apk's native library to link at the
-      # same time as its java is optimized with R8.
-      java_cmd_pool_size = _command_dict.secondary_pool_size
-    }
+  if (_command_dict.secondary_pool_size >= concurrent_links) {
+    # Have R8 / Lint share the link pool unless we would safely get more
+    # concurrency out of using a separate one.
+    # On low-RAM machines, this allows an apk's native library to link at the
+    # same time as its java is optimized with R8.
+    java_cmd_pool_size = _command_dict.secondary_pool_size
  }
 } else {
  assert(!use_thin_lto || concurrent_links == 1,

--- a/build/toolchain/get_concurrent_links.py
+++ b/build/toolchain/get_concurrent_links.py
@@ -57,7 +57,8 @@ def _GetTotalMemoryInBytes():
  return 0


-def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, secondary_per_link_gb):
+def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, is_thin_lto,
+                               secondary_per_link_gb):
  explanation = []
  explanation.append(
      'per_link_gb={} reserve_gb={} secondary_per_link_gb={}'.format(
@@ -67,16 +68,24 @@ def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, secondary_per_link_gb):
  mem_cap = int(max(1, mem_total_gb / per_link_gb))

  try:
-    cpu_cap = multiprocessing.cpu_count()
+    cpu_count = multiprocessing.cpu_count()
  except:
-    cpu_cap = 1
+    cpu_count = 1

-  explanation.append('cpu_count={} mem_total_gb={:.1f}GiB'.format(
-      cpu_cap, mem_total_gb))
+  # LTO links saturate all cores, but only for about a third of a link.
+  cpu_cap = cpu_count
+  if is_thin_lto:
+    cpu_cap = min(cpu_count, 3)
+
+  explanation.append('cpu_count={} cpu_cap={} mem_total_gb={:.1f}GiB'.format(
+      cpu_count, cpu_cap, mem_total_gb))

  num_links = min(mem_cap, cpu_cap)
  if num_links == cpu_cap:
-    reason = 'cpu_count'
+    if cpu_cap == cpu_count:
+      reason = 'cpu_count'
+    else:
+      reason = 'cpu_cap (thinlto)'
  else:
    reason = 'RAM'

@@ -100,11 +109,12 @@ def main():
  parser.add_argument('--mem_per_link_gb', type=int, default=8)
  parser.add_argument('--reserve_mem_gb', type=int, default=0)
  parser.add_argument('--secondary_mem_per_link', type=int, default=0)
+  parser.add_argument('--thin-lto', action='store_true')
  options = parser.parse_args()

  primary_pool_size, secondary_pool_size, explanation = (
      _GetDefaultConcurrentLinks(options.mem_per_link_gb,
-                                 options.reserve_mem_gb,
+                                 options.reserve_mem_gb, options.thin_lto,
                                 options.secondary_mem_per_link))
  sys.stdout.write(
      gn_helpers.ToGNString({