Commit 9a8e5a0c authored by Nico Weber's avatar Nico Weber

build: Run more than one thinlto link in parallel again.

https://chromium-review.googlesource.com/c/chromium/src/+/2464913 made
it so that thinlto links can use all available cores, and reduced the
number of parallel LTO links to 1.

This made individual links 30% to 60% faster, but the reduced parallelism
caused bots to still got much slower in aggregate.

Based on some handwaving, only a third of a thinlto link seems to be
CPU-bound (https://bugs.chromium.org/p/chromium/issues/detail?id=1132930#c14),
so try running up to 3 thinlto links in parallel.

I expect we'll have to iterate on this heuristic some more.

(And we might want to limit ourselves to 1 parallel thinlto link on
Windows eventually, see first bug below.)

Bug: 1132930,1137812
Change-Id: I77d9446195be4fb1171b8beba45205e91c89a875
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2467360
Commit-Queue: Nico Weber <thakis@chromium.org>
Auto-Submit: Nico Weber <thakis@chromium.org>
Reviewed-by: default avatarHans Wennborg <hans@chromium.org>
Cr-Commit-Position: refs/heads/master@{#816565}
parent ee23e932
...@@ -23,7 +23,10 @@ declare_args() { ...@@ -23,7 +23,10 @@ declare_args() {
if (concurrent_links == -1) { if (concurrent_links == -1) {
if (use_thin_lto) { if (use_thin_lto) {
_args = [ "--reserve_mem_gb=10" ] _args = [
"--thin-lto",
"--reserve_mem_gb=10",
]
if (is_win) { if (is_win) {
# Based on measurements of linking chrome.dll and chrome_child.dll, plus # Based on measurements of linking chrome.dll and chrome_child.dll, plus
# a little padding to account for future growth. # a little padding to account for future growth.
...@@ -76,21 +79,15 @@ if (concurrent_links == -1) { ...@@ -76,21 +79,15 @@ if (concurrent_links == -1) {
# so that we can compute better values. # so that we can compute better values.
_command_dict = exec_script("get_concurrent_links.py", _args, "scope") _command_dict = exec_script("get_concurrent_links.py", _args, "scope")
if (use_thin_lto) { concurrent_links = _command_dict.primary_pool_size
concurrent_links = 1 concurrent_links_logs = _command_dict.explanation
concurrent_links_logs =
[ "thinlto -- only one link at once but every link can use all cores" ]
} else {
concurrent_links = _command_dict.primary_pool_size
concurrent_links_logs = _command_dict.explanation
if (_command_dict.secondary_pool_size >= concurrent_links) { if (_command_dict.secondary_pool_size >= concurrent_links) {
# Have R8 / Lint share the link pool unless we would safely get more # Have R8 / Lint share the link pool unless we would safely get more
# concurrency out of using a separate one. # concurrency out of using a separate one.
# On low-RAM machines, this allows an apk's native library to link at the # On low-RAM machines, this allows an apk's native library to link at the
# same time as its java is optimized with R8. # same time as its java is optimized with R8.
java_cmd_pool_size = _command_dict.secondary_pool_size java_cmd_pool_size = _command_dict.secondary_pool_size
}
} }
} else { } else {
assert(!use_thin_lto || concurrent_links == 1, assert(!use_thin_lto || concurrent_links == 1,
......
...@@ -57,7 +57,8 @@ def _GetTotalMemoryInBytes(): ...@@ -57,7 +57,8 @@ def _GetTotalMemoryInBytes():
return 0 return 0
def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, secondary_per_link_gb): def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, is_thin_lto,
secondary_per_link_gb):
explanation = [] explanation = []
explanation.append( explanation.append(
'per_link_gb={} reserve_gb={} secondary_per_link_gb={}'.format( 'per_link_gb={} reserve_gb={} secondary_per_link_gb={}'.format(
...@@ -67,16 +68,24 @@ def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, secondary_per_link_gb): ...@@ -67,16 +68,24 @@ def _GetDefaultConcurrentLinks(per_link_gb, reserve_gb, secondary_per_link_gb):
mem_cap = int(max(1, mem_total_gb / per_link_gb)) mem_cap = int(max(1, mem_total_gb / per_link_gb))
try: try:
cpu_cap = multiprocessing.cpu_count() cpu_count = multiprocessing.cpu_count()
except: except:
cpu_cap = 1 cpu_count = 1
explanation.append('cpu_count={} mem_total_gb={:.1f}GiB'.format( # LTO links saturate all cores, but only for about a third of a link.
cpu_cap, mem_total_gb)) cpu_cap = cpu_count
if is_thin_lto:
cpu_cap = min(cpu_count, 3)
explanation.append('cpu_count={} cpu_cap={} mem_total_gb={:.1f}GiB'.format(
cpu_count, cpu_cap, mem_total_gb))
num_links = min(mem_cap, cpu_cap) num_links = min(mem_cap, cpu_cap)
if num_links == cpu_cap: if num_links == cpu_cap:
reason = 'cpu_count' if cpu_cap == cpu_count:
reason = 'cpu_count'
else:
reason = 'cpu_cap (thinlto)'
else: else:
reason = 'RAM' reason = 'RAM'
...@@ -100,11 +109,12 @@ def main(): ...@@ -100,11 +109,12 @@ def main():
parser.add_argument('--mem_per_link_gb', type=int, default=8) parser.add_argument('--mem_per_link_gb', type=int, default=8)
parser.add_argument('--reserve_mem_gb', type=int, default=0) parser.add_argument('--reserve_mem_gb', type=int, default=0)
parser.add_argument('--secondary_mem_per_link', type=int, default=0) parser.add_argument('--secondary_mem_per_link', type=int, default=0)
parser.add_argument('--thin-lto', action='store_true')
options = parser.parse_args() options = parser.parse_args()
primary_pool_size, secondary_pool_size, explanation = ( primary_pool_size, secondary_pool_size, explanation = (
_GetDefaultConcurrentLinks(options.mem_per_link_gb, _GetDefaultConcurrentLinks(options.mem_per_link_gb,
options.reserve_mem_gb, options.reserve_mem_gb, options.thin_lto,
options.secondary_mem_per_link)) options.secondary_mem_per_link))
sys.stdout.write( sys.stdout.write(
gn_helpers.ToGNString({ gn_helpers.ToGNString({
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment