Commit c51effeb authored by Matthew Cary's avatar Matthew Cary Committed by Commit Bot

Orderfile: phased_orderfile updates

This performs the per-process phased offset processing used for the orderfile.

Bug: 758566
Change-Id: I5cbdb69c4834d95a52f2e43ad3f72e4845413883
Reviewed-on: https://chromium-review.googlesource.com/1144935
Commit-Queue: Matthew Cary <mattcary@chromium.org>
Reviewed-by: default avatarBenoit L <lizeb@chromium.org>
Reviewed-by: default avatarEgor Pasko <pasko@chromium.org>
Cr-Commit-Position: refs/heads/master@{#577537}
parent a14a69a7
......@@ -13,6 +13,13 @@ labeled 1, is called "interaction". These two phases are used to create an
orderfile with three parts: the code touched only in startup, the code
touched only during interaction, and code common to the two phases. We refer to
these parts as the orderfile phases.
Example invocation, with PROFILE_DIR the location of the profile data pulled
from a device and LIBTYPE either monochrome or chrome as appropriate.
./tools/cygprofile/phased_orderfile.py \
--profile-directory=PROFILE_DIR \
--instrumented-build-dir=out-android/Orderfile/ \
--library-name=libLIBTYPE.so --offset-output-base=PROFILE_DIR/offset
"""
import argparse
......@@ -26,7 +33,7 @@ import process_profiles
# Files matched when using this script to analyze directly (see main()).
PROFILE_GLOB = 'cygprofile-*.txt_*'
PROFILE_GLOB = 'profile-hitmap-*.txt_*'
OrderfilePhaseOffsets = collections.namedtuple(
......@@ -45,6 +52,9 @@ class PhasedAnalyzer(object):
COMMON_STABILITY_THRESHOLD = 1.75
INTERACTION_STABILITY_THRESHOLD = 2.5
# The process name of the browser as used in the profile dumps.
BROWSER = 'browser'
def __init__(self, profiles, processor):
"""Intialize.
......@@ -54,7 +64,11 @@ class PhasedAnalyzer(object):
"""
self._profiles = profiles
self._processor = processor
# These members cache various computed values.
self._phase_offsets = None
self._annotated_offsets = None
self._process_list = None
def IsStableProfile(self):
"""Verify that the profiling has been stable.
......@@ -65,7 +79,7 @@ class PhasedAnalyzer(object):
True if the profile was stable as described above.
"""
(startup_stability, common_stability,
interaction_stability) = self.ComputeStability()
interaction_stability) = [s[0] for s in self.ComputeStability()]
stable = True
if startup_stability > self.STARTUP_STABILITY_THRESHOLD:
......@@ -89,11 +103,46 @@ class PhasedAnalyzer(object):
they cover.
Returns:
(float, float, float) A heuristic stability metric for startup, common and
interaction orderfile phases, respectively.
((float, int), (float, int), (float, int)) A heuristic stability metric
for startup, common and interaction orderfile phases,
respectively. Each metric is a pair of the ratio of symbol sizes as
described above, and the size of the intersection.
"""
(startup_intersection, startup_union,
common_intersection, common_union,
interaction_intersection, interaction_union,
_, _) = self.GetCombinedOffsets()
startup_intersection_size = self._processor.OffsetsPrimarySize(
startup_intersection)
common_intersection_size = self._processor.OffsetsPrimarySize(
common_intersection)
interaction_intersection_size = self._processor.OffsetsPrimarySize(
interaction_intersection)
startup_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(startup_union),
startup_intersection_size)
common_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(common_union),
common_intersection_size)
interaction_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(interaction_union),
interaction_intersection_size)
return ((startup_stability, startup_intersection_size),
(common_stability, common_intersection_size),
(interaction_stability, interaction_intersection_size))
def GetCombinedOffsets(self):
"""Get offsets for the union and intersection of orderfile phases.
Returns:
([int] * 8) For each of startup, common, interaction and all, respectively
the intersection and union offsets, in that order.
"""
phase_offsets = self._GetOrderfilePhaseOffsets()
assert len(phase_offsets) > 1 # Otherwise the analysis is silly.
assert phase_offsets
if len(phase_offsets) == 1:
logging.error('Only one run set, the combined offset files will all be '
'identical')
startup_union = set(phase_offsets[0].startup)
startup_intersection = set(phase_offsets[0].startup)
......@@ -108,16 +157,172 @@ class PhasedAnalyzer(object):
common_intersection &= set(offsets.common)
interaction_union |= set(offsets.interaction)
interaction_intersection &= set(offsets.interaction)
startup_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(startup_union),
self._processor.OffsetsPrimarySize(startup_intersection))
common_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(common_union),
self._processor.OffsetsPrimarySize(common_intersection))
interaction_stability = self._SafeDiv(
self._processor.OffsetsPrimarySize(interaction_union),
self._processor.OffsetsPrimarySize(interaction_intersection))
return (startup_stability, common_stability, interaction_stability)
return (startup_intersection, startup_union,
common_intersection, common_union,
interaction_intersection, interaction_union,
(startup_union & common_union & interaction_union),
(startup_union | common_union | interaction_union))
def GetOffsetsForMemoryFootprint(self):
"""Get offsets organized to minimize the memory footprint.
The startup, common and interaction offsets are computed for each
process. Any symbols used by one process in startup or interaction that are
used in a different phase by another process are moved to the common
section. This should minimize the memory footprint by keeping startup- or
interaction-only pages clean, at the possibly expense of startup time, as
more of the common section will need to be loaded. To mitigate that effect,
symbols moved from startup are placed at the beginning of the common
section, and those moved from interaction are placed at the end.
Browser startup symbols are placed at the beginning of the startup section
in the hope of working out with native library prefetching to minimize
startup time.
Returns:
OrdrerfilePhaseOffsets as described above.
"""
startup = []
common_head = []
common = []
common_tail = []
interaction = []
process_offsets = {p: self._GetCombinedProcessOffsets(p)
for p in self._GetProcessList()}
assert self.BROWSER in process_offsets.keys()
any_startup = set()
any_interaction = set()
any_common = set()
for offsets in process_offsets.itervalues():
any_startup |= set(offsets.startup)
any_interaction |= set(offsets.interaction)
any_common |= set(offsets.common)
already_added = set()
# This helper function splits |offsets|, adding to |alternate| all offsets
# that are in |interfering| or are already known to be common, and otherwise
# adding to |target|.
def add_process_offsets(offsets, interfering, target, alternate):
for o in offsets:
if o in already_added:
continue
if o in interfering or o in any_common:
alternate.append(o)
else:
target.append(o)
already_added.add(o)
# This helper updates |common| with new members of |offsets|.
def add_common_offsets(offsets):
for o in offsets:
if o not in already_added:
common.append(o)
already_added.add(o)
add_process_offsets(process_offsets[self.BROWSER].startup,
any_interaction, startup, common_head)
add_process_offsets(process_offsets[self.BROWSER].interaction,
any_startup, interaction, common_tail)
add_common_offsets(process_offsets[self.BROWSER].common)
for p in process_offsets:
if p == self.BROWSER:
continue
add_process_offsets(process_offsets[p].startup,
any_interaction, startup, common_head)
add_process_offsets(process_offsets[p].interaction,
any_startup, interaction, common_tail)
add_common_offsets(process_offsets[p].common)
return OrderfilePhaseOffsets(
startup=startup,
common=(common_head + common + common_tail),
interaction=interaction)
def GetOffsetsForStartup(self):
"""Get offsets organized to minimize startup time.
The startup, common and interaction offsets are computed for each
process. Any symbol used by one process in interaction that appears in a
different phase in another process is moved to common, but any symbol that
appears in startup for *any* process stays in startup.
This should maximize startup performance at the expense of increasing the
memory footprint, as some startup symbols will not be able to page out.
The startup symbols in the browser process appear first in the hope of
working out with native library prefetching to minimize startup time.
"""
startup = []
common = []
interaction = []
already_added = set()
process_offsets = {p: self._GetCombinedProcessOffsets(p)
for p in self._GetProcessList()}
startup.extend(process_offsets[self.BROWSER].startup)
already_added |= set(process_offsets[self.BROWSER].startup)
common.extend(process_offsets[self.BROWSER].common)
already_added |= set(process_offsets[self.BROWSER].common)
interaction.extend(process_offsets[self.BROWSER].interaction)
already_added |= set(process_offsets[self.BROWSER].interaction)
for process, offsets in process_offsets.iteritems():
if process == self.BROWSER:
continue
startup.extend(o for o in offsets.startup
if o not in already_added)
already_added |= set(offsets.startup)
common.extend(o for o in offsets.common
if o not in already_added)
already_added |= set(offsets.common)
interaction.extend(o for o in offsets.interaction
if o not in already_added)
already_added |= set(offsets.interaction)
return OrderfilePhaseOffsets(
startup=startup, common=common, interaction=interaction)
def _GetCombinedProcessOffsets(self, process):
"""Combine offsets across runs for a particular process.
Args:
process (str) The process to combine.
Returns:
OrderfilePhaseOffsets, the startup, common and interaction offsets for the
process in question. The offsets are sorted arbitrarily.
"""
(startup, common, interaction) = ([], [], [])
assert self._profiles.GetPhases() == set([0,1]), 'Unexpected phases'
for o in self._GetAnnotatedOffsets():
startup_count = o.Count(0, process)
interaction_count = o.Count(1, process)
if not startup_count and not interaction_count:
continue
if startup_count and interaction_count:
common.append(o.Offset())
elif startup_count:
startup.append(o.Offset())
else:
interaction.append(o.Offset())
return OrderfilePhaseOffsets(
startup=startup, common=common, interaction=interaction)
def _GetAnnotatedOffsets(self):
if self._annotated_offsets is None:
self._annotated_offsets = self._profiles.GetAnnotatedOffsets()
self._processor.TranslateAnnotatedSymbolOffsets(self._annotated_offsets)
return self._annotated_offsets
def _GetProcessList(self):
if self._process_list is None:
self._process_list = set()
for o in self._GetAnnotatedOffsets():
self._process_list.update(o.Processes())
return self._process_list
def _GetOrderfilePhaseOffsets(self):
"""Compute the phase offsets for each run.
......@@ -129,7 +334,8 @@ class PhasedAnalyzer(object):
if self._phase_offsets is not None:
return self._phase_offsets
assert self._profiles.GetPhases() == set([0, 1]), 'Unexpected phases'
assert self._profiles.GetPhases() == set([0, 1]), (
'Unexpected phases {}'.format(self._profiles.GetPhases()))
self._phase_offsets = []
for first, second in zip(self._profiles.GetRunGroupOffsets(phase=0),
self._profiles.GetRunGroupOffsets(phase=1)):
......@@ -175,10 +381,17 @@ def _CreateArgumentParser():
help=('Directory containing profile runs. Files '
'matching {} are used.'.format(PROFILE_GLOB)))
parser.add_argument('--instrumented-build-dir', type=str,
help='Path to the instrumented build', required=True)
help='Path to the instrumented build (eg, out/Orderfile)',
required=True)
parser.add_argument('--library-name', default='libchrome.so',
help=('Chrome shared library name (usually libchrome.so '
'or libmonochrome.so'))
parser.add_argument('--offset-output-base', default=None, type=str,
help=('If present, a base name to output offsets to. '
'No offsets are output if this is missing. The '
'base name is suffixed with _for_memory and '
'_for_startup, corresponding to the two sets of '
'offsets produced.'))
return parser
......@@ -186,12 +399,25 @@ def main():
logging.basicConfig(level=logging.INFO)
parser = _CreateArgumentParser()
args = parser.parse_args()
profiles = process_profiles.ProfileManager(
glob.glob(os.path.join(args.profile_directory, PROFILE_GLOB)))
profiles = process_profiles.ProfileManager(itertools.chain.from_iterable(
glob.glob(os.path.join(d, PROFILE_GLOB))
for d in args.profile_directory.split(',')))
processor = process_profiles.SymbolOffsetProcessor(os.path.join(
args.instrumented_build_dir, 'lib.unstripped', args.library_name))
phaser = PhasedAnalyzer(profiles, processor)
print 'Stability: {:.2f} {:.2f} {:.2f}'.format(*phaser.ComputeStability())
stability = phaser.ComputeStability()
print 'Stability: {:.2} {:.2} {:.2}'.format(*[s[0] for s in stability])
print 'Sizes: {} {} {}'.format(*[s[1] for s in stability])
if args.offset_output_base is not None:
for name, offsets in zip(
['_for_memory', '_for_startup'],
[phaser.GetOffsetsForMemoryFootprint(),
phaser.GetOffsetsForStartup()]):
with file(args.offset_output_base + name, 'w') as output:
output.write('\n'.join(
str(i) for i in (offsets.startup + offsets.common +
offsets.interaction)))
output.write('\n')
if __name__ == '__main__':
......
......@@ -11,20 +11,47 @@ import unittest
import phased_orderfile
import process_profiles
from test_utils import (SimpleTestSymbol,
from test_utils import (ProfileFile,
SimpleTestSymbol,
TestSymbolOffsetProcessor,
TestProfileManager)
class Mod10Processor(object):
class Mod10Processor(process_profiles.SymbolOffsetProcessor):
"""A restricted mock for a SymbolOffsetProcessor.
This only implements GetReachedOffsetsFromDump, and works by mapping a dump
offset to offset - (offset % 10). If the dump offset is negative, it is marked
as not found.
This only implements {Translate,Get}ReacheOffsetsFromDump, and works by
mapping a dump offset to offset - (offset % 10). If the dump offset is
negative, it is marked as not found.
"""
def GetReachedOffsetsFromDump(self, dump):
return [x - (x % 10) for x in dump if x >= 0]
def __init__(self):
super(Mod10Processor, self).__init__(None)
def _TranslateReachedOffsetsFromDump(self, items, get, update):
for i in items:
x = get(i)
if x >= 0:
update(i, x - (x % 10))
else:
update(i, None)
class IdentityProcessor(process_profiles.SymbolOffsetProcessor):
"""A restricted mock for a SymbolOffsetProcessor.
This only implements {Translate,Get}ReachedOffsetsFromDump, and maps the dump
offset to itself. If the dump offset is negative, it is marked as not found.
"""
def __init__(self):
super(IdentityProcessor, self).__init__(None)
def _TranslateReachedOffsetsFromDump(self, items, get, update):
for i in items:
x = get(i)
if x >= 0:
update(i, x)
else:
update(i, None)
class PhasedOrderfileTestCase(unittest.TestCase):
......@@ -32,11 +59,6 @@ class PhasedOrderfileTestCase(unittest.TestCase):
def setUp(self):
self._file_counter = 0
def File(self, timestamp_sec, phase):
self._file_counter += 1
return 'file-{}-{}.txt_{}'.format(
self._file_counter, timestamp_sec * 1000 * 1000 * 1000, phase)
def testProfileStability(self):
symbols = [SimpleTestSymbol(str(i), i, 10)
for i in xrange(20)]
......@@ -46,7 +68,8 @@ class PhasedOrderfileTestCase(unittest.TestCase):
startup=s, common=c, interaction=i)
phaser._phase_offsets = [opo(range(5), range(6, 10), range(11,15)),
opo(range(4), range(6, 10), range(18, 20))]
self.assertEquals((1.25, 1, None), phaser.ComputeStability())
self.assertEquals((1.25, 1, None),
tuple(s[0] for s in phaser.ComputeStability()))
def testIsStable(self):
symbols = [SimpleTestSymbol(str(i), i, 10)
......@@ -64,12 +87,12 @@ class PhasedOrderfileTestCase(unittest.TestCase):
def testGetOrderfilePhaseOffsets(self):
mgr = TestProfileManager({
self.File(0, 0): [12, 21, -1, 33],
self.File(0, 1): [31, 49, 52],
self.File(100, 0): [113, 128],
self.File(200, 1): [132, 146],
self.File(300, 0): [19, 20, 32],
self.File(300, 1): [24, 39]})
ProfileFile(0, 0): [12, 21, -1, 33],
ProfileFile(0, 1): [31, 49, 52],
ProfileFile(100, 0): [113, 128],
ProfileFile(200, 1): [132, 146],
ProfileFile(300, 0): [19, 20, 32],
ProfileFile(300, 1): [24, 39]})
phaser = phased_orderfile.PhasedAnalyzer(mgr, Mod10Processor())
opo = lambda s, c, i: phased_orderfile.OrderfilePhaseOffsets(
startup=s, common=c, interaction=i)
......@@ -79,6 +102,47 @@ class PhasedOrderfileTestCase(unittest.TestCase):
opo([10], [20, 30], [])],
phaser._GetOrderfilePhaseOffsets())
def testGetCombinedProcessOffsets(self):
mgr = TestProfileManager({
ProfileFile(40, 0, ''): [1, 2, 3],
ProfileFile(50, 1, ''): [3, 4, 5],
ProfileFile(51, 0, 'renderer'): [2, 3, 6],
ProfileFile(51, 1, 'gpu-process'): [6, 7],
ProfileFile(70, 0, ''): [2, 8, 9],
ProfileFile(70, 1, ''): [9]})
phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
offsets = phaser._GetCombinedProcessOffsets('browser')
self.assertListEqual([1, 2, 8], sorted(offsets.startup))
self.assertListEqual([4, 5], sorted(offsets.interaction))
self.assertListEqual([3, 9], sorted(offsets.common))
offsets = phaser._GetCombinedProcessOffsets('gpu-process')
self.assertListEqual([], sorted(offsets.startup))
self.assertListEqual([6, 7], sorted(offsets.interaction))
self.assertListEqual([], sorted(offsets.common))
self.assertListEqual(['browser', 'gpu-process', 'renderer'],
sorted(phaser._GetProcessList()))
def testGetOffsetVariations(self):
mgr = TestProfileManager({
ProfileFile(40, 0, ''): [1, 2, 3],
ProfileFile(50, 1, ''): [3, 4, 5],
ProfileFile(51, 0, 'renderer'): [2, 3, 6],
ProfileFile(51, 1, 'gpu-process'): [6, 7],
ProfileFile(70, 0, ''): [2, 6, 8, 9],
ProfileFile(70, 1, ''): [9]})
phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
offsets = phaser.GetOffsetsForMemoryFootprint()
self.assertListEqual([1, 2, 8], offsets.startup)
self.assertListEqual([6, 3, 9], offsets.common)
self.assertListEqual([4, 5, 7], offsets.interaction)
offsets = phaser.GetOffsetsForStartup()
self.assertListEqual([1, 2, 6, 8], offsets.startup)
self.assertListEqual([3, 9], offsets.common)
self.assertListEqual([4, 5, 7], offsets.interaction)
if __name__ == "__main__":
unittest.main()
......@@ -56,6 +56,7 @@ class SymbolOffsetProcessor(object):
self._name_to_symbol = None
self._offset_to_primary = None
self._offset_to_symbols = None
self._offset_to_symbol_info = None
def SymbolInfos(self):
"""The symbols associated with this processor's binary.
......@@ -152,24 +153,14 @@ class SymbolOffsetProcessor(object):
Returns:
[int] Reached symbol offsets.
"""
dump_offset_to_symbol_info = self._GetDumpOffsetToSymbolInfo()
logging.info('Offset to Symbol size = %d', len(dump_offset_to_symbol_info))
assert max(dump) / 4 <= len(dump_offset_to_symbol_info)
already_seen = set()
reached_offsets = []
reached_return_addresses_not_found = 0
for dump_offset in dump:
symbol_info = dump_offset_to_symbol_info[dump_offset / 4]
if symbol_info is None:
reached_return_addresses_not_found += 1
continue
if symbol_info.offset in already_seen:
continue
reached_offsets.append(symbol_info.offset)
already_seen.add(symbol_info.offset)
if reached_return_addresses_not_found:
logging.warning('%d return addresses don\'t map to any symbol',
reached_return_addresses_not_found)
already_seen = set()
def update(_, symbol_offset):
if symbol_offset is None or symbol_offset in already_seen:
return
reached_offsets.append(symbol_offset)
already_seen.add(symbol_offset)
self._TranslateReachedOffsetsFromDump(dump, lambda x: x, update)
return reached_offsets
def MatchSymbolNames(self, symbol_names):
......@@ -185,6 +176,52 @@ class SymbolOffsetProcessor(object):
matched_names = our_symbol_names.intersection(set(symbol_names))
return [self.NameToSymbolMap()[n] for n in matched_names]
def TranslateAnnotatedSymbolOffsets(self, annotated_offsets):
"""Merges offsets across run groups and translates to symbol offsets.
Like GetReachedOffsetsFromDump, but works with AnnotatedOffsets.
Args:
annotated_offsets (AnnotatedOffset iterable) List of annotated offsets,
eg from ProfileManager.GetAnnotatedOffsets(). This will be mutated to
translate raw offsets to symbol offsets.
"""
self._TranslateReachedOffsetsFromDump(
annotated_offsets,
lambda o: o.Offset(),
lambda o, symbol_offset: o.SetOffset(symbol_offset))
def _TranslateReachedOffsetsFromDump(self, items, get, update):
"""Translate raw binary offsets to symbol offsets.
See GetReachedOffsetsFromDump for details. This version calls
|get(i)| on each element |i| of |items|, then calls
|update(i, symbol_offset)| with the updated offset. If the offset is not
found, update will be called with None.
Args:
items: (iterable) Items containing offsets.
get: (lambda item) As described above.
update: (lambda item, int) As described above.
"""
dump_offset_to_symbol_info = self._GetDumpOffsetToSymbolInfo()
logging.info('Offset to Symbol size = %d', len(dump_offset_to_symbol_info))
reached_return_addresses_not_found = 0
for i in items:
dump_offset = get(i)
idx = dump_offset / 4
assert idx < len(dump_offset_to_symbol_info), (
'Dump offset out of binary range')
symbol_info = dump_offset_to_symbol_info[idx]
if symbol_info is None:
reached_return_addresses_not_found += 1
update(i, None)
else:
update(i, symbol_info.offset)
if reached_return_addresses_not_found:
logging.warning('%d return addresses don\'t map to any symbol',
reached_return_addresses_not_found)
def _GetDumpOffsetToSymbolInfo(self):
"""Computes an array mapping each word in .text to a symbol.
......@@ -192,15 +229,16 @@ class SymbolOffsetProcessor(object):
[symbol_extractor.SymbolInfo or None] For every 4 bytes of the .text
section, maps it to a symbol, or None.
"""
min_offset = min(s.offset for s in self.SymbolInfos())
max_offset = max(s.offset + s.size for s in self.SymbolInfos())
text_length_words = (max_offset - min_offset) / 4
offset_to_symbol_info = [None for _ in xrange(text_length_words)]
for s in self.SymbolInfos():
offset = s.offset - min_offset
for i in range(offset / 4, (offset + s.size) / 4):
offset_to_symbol_info[i] = s
return offset_to_symbol_info
if self._offset_to_symbol_info is None:
min_offset = min(s.offset for s in self.SymbolInfos())
max_offset = max(s.offset + s.size for s in self.SymbolInfos())
text_length_words = (max_offset - min_offset) / 4
self._offset_to_symbol_info = [None for _ in xrange(text_length_words)]
for s in self.SymbolInfos():
offset = s.offset - min_offset
for i in range(offset / 4, (offset + s.size) / 4):
self._offset_to_symbol_info[i] = s
return self._offset_to_symbol_info
class ProfileManager(object):
......@@ -222,11 +260,11 @@ class ProfileManager(object):
example the dump for the startup could be phase 0 and then the steady-state
would be labeled phase 1.
We assume the files are named like *-TIMESTAMP.SUFFIX_PHASE, where TIMESTAMP
is in nanoseconds, SUFFIX is string without dashes, PHASE is an integer
numbering the phases as 0, 1, 2..., and the only dot is the one between
TIMESTAMP and SUFFIX. Note that the current dump filename also includes a
process id which is currently unused.
We assume the files are named like
profile-hitmap-PROCESS-PID-TIMESTAMP.SUFFIX_PHASE, where PROCESS is a possibly
empty string, PID is the process id, TIMESTAMP is in nanoseconds, SUFFIX is
string without dashes, PHASE is an integer numbering the phases as 0, 1, 2...,
and the only dot is the one between TIMESTAMP and SUFFIX.
This manager supports several configurations of dumps.
......@@ -242,6 +280,44 @@ class ProfileManager(object):
time. This files can be grouped into run sets that are within 30 seconds of
each other. Each run set is then grouped into phases as before.
"""
class AnnotatedOffset(object):
"""Describes an offset with how it appeared in a profile set.
Each offset is annotated with the phase and process that it appeared in, and
can report how often it occurred in a specific phase and process.
"""
def __init__(self, offset):
self._offset = offset
self._count = {}
def __str__(self):
return '{}: {}'.format(self._offset, self._count)
def __eq__(self, other):
if other is None:
return False
return (self._offset == other._offset and
self._count == other._count)
def Increment(self, phase, process):
key = (phase, process)
self._count[key] = self._count.setdefault(key, 0) + 1
def Count(self, phase, process):
return self._count.get((phase, process), 0)
def Processes(self):
return set(k[1] for k in self._count.iterkeys())
def Phases(self):
return set(k[0] for k in self._count.iterkeys())
def Offset(self):
return self._offset
def SetOffset(self, o):
self._offset = o
class _RunGroup(object):
RUN_GROUP_THRESHOLD_NS = 30e9
......@@ -295,6 +371,22 @@ class ProfileManager(object):
return self._GetOffsetsForGroup(f for f in self._filenames
if self._Phase(f) == phase)
def GetAnnotatedOffsets(self):
"""Merges offsets across run groups and annotates each one.
Returns:
[AnnotatedOffset]
"""
offset_map = {} # offset int -> AnnotatedOffset
for g in self._GetRunGroups():
for f in g:
phase = self._Phase(f)
process = self._ProcessName(f)
for offset in self._ReadOffsets(f):
offset_map.setdefault(offset, self.AnnotatedOffset(offset)).Increment(
phase, process)
return offset_map.values()
def GetRunGroupOffsets(self, phase=None):
"""Merges files from each run group and returns offset list for each.
......@@ -322,11 +414,21 @@ class ProfileManager(object):
self._ComputeRunGroups()
return [g.Filenames(phase) for g in self._run_groups]
@classmethod
def _ProcessName(cls, filename):
# The filename starts with 'profile-hitmap-' and ends with
# '-PID-TIMESTAMP.text_X'. Anything in between is the process name. The
# browser has an empty process name, which is insterted here.
process_name_parts = os.path.basename(filename).split('-')[2:-2]
if not process_name_parts:
return 'browser'
return '-'.join(process_name_parts)
@classmethod
def _Timestamp(cls, filename):
dash_index = filename.rindex('-')
dot_index = filename.rindex('.')
return int(filename[dash_index+1:dot_index])
dash_index = filename.rindex('-')
dot_index = filename.rindex('.')
return int(filename[dash_index+1:dot_index])
@classmethod
def _Phase(cls, filename):
......@@ -347,6 +449,19 @@ class ProfileManager(object):
g.Add(f)
self._run_groups.append(g)
# Some sanity checks on the run groups.
assert self._run_groups
if len(self._run_groups) < 5:
return # Small runs have too much variance for testing.
sizes = map(lambda g: len(g.Filenames()), self._run_groups)
avg_size = sum(sizes) / len(self._run_groups)
num_outliers = len([s for s in sizes
if s > 1.5 * avg_size or s < 0.75 * avg_size])
expected_outliers = 0.1 * len(self._run_groups)
assert num_outliers < expected_outliers, (
'Saw {} outliers instead of at most {} for average of {}'.format(
num_outliers, expected_outliers, avg_size))
def GetReachedOffsetsFromDumpFiles(dump_filenames, library_filename):
"""Produces a list of symbol offsets reached by the dumps.
......
......@@ -10,7 +10,8 @@ import unittest
import process_profiles
from test_utils import (SimpleTestSymbol,
from test_utils import (ProfileFile,
SimpleTestSymbol,
TestSymbolOffsetProcessor,
TestProfileManager)
......@@ -28,10 +29,10 @@ class ProcessProfilesTestCase(unittest.TestCase):
self.symbol_2, self.symbol_3]
self._file_counter = 0
def File(self, timestamp_sec, phase):
self._file_counter += 1
return 'file-{}-{}.txt_{}'.format(
self._file_counter, timestamp_sec * 1000 * 1000 * 1000, phase)
def MakeAnnotatedOffset(self, offset, counts):
ao = process_profiles.ProfileManager.AnnotatedOffset(offset)
ao._count = counts
return ao
def testGetOffsetToSymbolInfo(self):
processor = TestSymbolOffsetProcessor(self.symbol_infos)
......@@ -103,8 +104,9 @@ class ProcessProfilesTestCase(unittest.TestCase):
self.assertEquals(5, process_profiles._Median([1, 4, 5, 6, 100]))
def testRunGroups(self):
files = [self.File(40, 0), self.File(100, 0), self.File(200, 1),
self.File(35, 1), self.File(42, 0), self.File(95, 0)]
files = [ProfileFile(40, 0), ProfileFile(100, 0),
ProfileFile(200, 1), ProfileFile(35, 1),
ProfileFile(42, 0), ProfileFile(95, 0)]
mgr = process_profiles.ProfileManager(files)
mgr._ComputeRunGroups()
self.assertEquals(3, len(mgr._run_groups))
......@@ -118,11 +120,34 @@ class ProcessProfilesTestCase(unittest.TestCase):
self.assertTrue(files[5] in mgr._run_groups[1].Filenames())
self.assertTrue(files[2] in mgr._run_groups[2].Filenames())
def testRunGroupSanity(self):
files = []
# Generate 20 sets of files in groups separated by 60s.
for ts_base in xrange(0, 20):
ts = ts_base * 60
files.extend([ProfileFile(ts, 0, 'browser'),
ProfileFile(ts + 1, 0, 'renderer'),
ProfileFile(ts + 2, 1, 'browser'),
ProfileFile(ts + 3, 0, 'gpu'),
ProfileFile(ts + 2, 1, 'renderer'),
ProfileFile(ts + 5, 1, 'gpu')])
# The following call should not assert.
process_profiles.ProfileManager(files)._ComputeRunGroups()
files.extend([ProfileFile(20 * 60, 0, 'browser'),
ProfileFile(20 * 60 + 2, 1, 'renderer'),
ProfileFile(21 * 60, 0, 'browser')] +
[ProfileFile(22 * 60, 0, 'renderer')
for _ in xrange(0, 10)])
self.assertRaises(AssertionError,
process_profiles.ProfileManager(files)._ComputeRunGroups)
def testReadOffsets(self):
mgr = TestProfileManager({
self.File(30, 0): [1, 3, 5, 7],
self.File(40, 1): [8, 10],
self.File(50, 0): [13, 15]})
ProfileFile(30, 0): [1, 3, 5, 7],
ProfileFile(40, 1): [8, 10],
ProfileFile(50, 0): [13, 15]})
self.assertListEqual([1, 3, 5, 7, 8, 10, 13, 15],
mgr.GetMergedOffsets())
self.assertListEqual([8, 10], mgr.GetMergedOffsets(1))
......@@ -130,9 +155,9 @@ class ProcessProfilesTestCase(unittest.TestCase):
def testRunGroupOffsets(self):
mgr = TestProfileManager({
self.File(30, 0): [1, 2, 3, 4],
self.File(150, 0): [9, 11, 13],
self.File(40, 1): [5, 6, 7]})
ProfileFile(30, 0): [1, 2, 3, 4],
ProfileFile(150, 0): [9, 11, 13],
ProfileFile(40, 1): [5, 6, 7]})
offsets_list = mgr.GetRunGroupOffsets()
self.assertEquals(2, len(offsets_list))
self.assertListEqual([1, 2, 3, 4, 5, 6, 7], offsets_list[0])
......@@ -150,22 +175,54 @@ class ProcessProfilesTestCase(unittest.TestCase):
# The fact that the ProfileManager sorts by filename is implicit in the
# other tests. It is tested explicitly here.
mgr = TestProfileManager({
self.File(40, 0): [1, 2, 3, 4],
self.File(150, 0): [9, 11, 13],
self.File(30, 1): [5, 6, 7]})
ProfileFile(40, 0): [1, 2, 3, 4],
ProfileFile(150, 0): [9, 11, 13],
ProfileFile(30, 1): [5, 6, 7]})
offsets_list = mgr.GetRunGroupOffsets()
self.assertEquals(2, len(offsets_list))
self.assertListEqual([5, 6, 7, 1, 2, 3, 4], offsets_list[0])
def testPhases(self):
mgr = TestProfileManager({
self.File(40, 0): [],
self.File(150, 0): [],
self.File(30, 1): [],
self.File(30, 2): [],
self.File(30, 0): []})
ProfileFile(40, 0): [],
ProfileFile(150, 0): [],
ProfileFile(30, 1): [],
ProfileFile(30, 2): [],
ProfileFile(30, 0): []})
self.assertEquals(set([0,1,2]), mgr.GetPhases())
def testGetAnnotatedOffsets(self):
mgr = TestProfileManager({
ProfileFile(40, 0, ''): [1, 2, 3],
ProfileFile(50, 1, ''): [3, 4, 5],
ProfileFile(51, 0, 'renderer'): [2, 3, 6],
ProfileFile(51, 1, 'gpu-process'): [6, 7],
ProfileFile(70, 0, ''): [2, 8, 9],
ProfileFile(70, 1, ''): [9]})
offsets = mgr.GetAnnotatedOffsets()
self.assertListEqual([
self.MakeAnnotatedOffset(1, {(0, 'browser'): 1}),
self.MakeAnnotatedOffset(2, {(0, 'browser'): 2,
(0, 'renderer'): 1}),
self.MakeAnnotatedOffset(3, {(0, 'browser'): 1,
(1, 'browser'): 1,
(0, 'renderer'): 1}),
self.MakeAnnotatedOffset(4, {(1, 'browser'): 1}),
self.MakeAnnotatedOffset(5, {(1, 'browser'): 1}),
self.MakeAnnotatedOffset(6, {(0, 'renderer'): 1,
(1, 'gpu-process'): 1}),
self.MakeAnnotatedOffset(7, {(1, 'gpu-process'): 1}),
self.MakeAnnotatedOffset(8, {(0, 'browser'): 1}),
self.MakeAnnotatedOffset(9, {(0, 'browser'): 1,
(1, 'browser'): 1})],
offsets)
self.assertListEqual(['browser', 'renderer'],
sorted(offsets[1].Processes()))
self.assertListEqual(['browser'], list(offsets[0].Processes()))
self.assertListEqual([0], list(offsets[1].Phases()))
self.assertListEqual([0, 1], sorted(offsets[2].Phases()))
self.assertListEqual([0, 1], sorted(mgr.GetPhases()))
if __name__ == '__main__':
unittest.main()
......@@ -8,6 +8,9 @@ import collections
import process_profiles
# Used by ProfileFile to generate unique file names.
_FILE_COUNTER = 0
SimpleTestSymbol = collections.namedtuple(
'SimpleTestSymbol', ['name', 'offset', 'size'])
......@@ -25,3 +28,14 @@ class TestProfileManager(process_profiles.ProfileManager):
def _ReadOffsets(self, filename):
return self._filecontents_mapping[filename]
def ProfileFile(timestamp_sec, phase, process_name=None):
global _FILE_COUNTER
_FILE_COUNTER += 1
if process_name:
name_str = process_name + '-'
else:
name_str = ''
return 'test-directory/profile-hitmap-{}{}-{}.txt_{}'.format(
name_str, _FILE_COUNTER, timestamp_sec * 1000 * 1000 * 1000, phase)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment