Commit 90407d9b authored by Charlie Harrison's avatar Charlie Harrison Committed by Commit Bot

Add histograms.xml diffing tool

This CL adds a new mode to print_histogram_names.py to print the added
and removed histograms relative to a git revision e.g.:

./print_histogram_names.py --diff origin/master
./print_histogram_names.py --diff HEAD~

This is especially useful for CLs which add lots of histogram suffixes,
so that it is easy to learn precisely what histograms are generated by
the change.

Documentation is also updated to encourage using this tool when using
histogram suffixes.

Bug: None
Change-Id: Ibc1a8cde4216ad49f19a795bbfaa31dc61451705
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2205330
Commit-Queue: Charlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarRobert Kaplow <rkaplow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#770364}
parent d8c30db0
......@@ -531,6 +531,14 @@ obsolete. You can also mark individual histograms within the suffix as
obsolete, indicating the expansion for that histogram is obsolete yet the
expansion for other histograms with the same suffix are not.
Histogram suffixes can be difficult to use, especially if they are applied
recursively. Consider using the `print_histogram_names.py --diff` tool to
enumerate all the histogram names that are generated by a particular CL. e.g.
(from the repo root):
```
./tools/metrics/histograms/print_histogram_names.py --diff origin/master
```
### Enum labels
_All_ histograms, including boolean and sparse histograms, may have enum labels
......
......@@ -10,10 +10,9 @@ import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
import path_util
HISTOGRAMS_XML = path_util.GetInputFile(
'tools/metrics/histograms/histograms.xml')
ENUMS_XML = path_util.GetInputFile(
'tools/metrics/histograms/enums.xml')
ALL_XMLS = [HISTOGRAMS_XML, ENUMS_XML]
ALL_XMLS_RELATIVE = [
'tools/metrics/histograms/enums.xml',
'tools/metrics/histograms/histograms.xml'
]
ALL_XMLS = [path_util.GetInputFile(f) for f in ALL_XMLS_RELATIVE]
ENUMS_XML, HISTOGRAMS_XML = ALL_XMLS
......@@ -61,15 +61,17 @@ def MergeTrees(trees):
return doc
def MergeFiles(filenames):
def MergeFiles(filenames=[], files=[]):
"""Merges a list of histograms.xml files.
Args:
filenames: A list of histograms.xml filenames.
files: A list of histograms.xml file-like objects.
Returns:
A merged DOM tree.
"""
trees = [xml.dom.minidom.parse(open(f)) for f in filenames]
all_files = files + [open(f) for f in filenames]
trees = [xml.dom.minidom.parse(f) for f in all_files]
return MergeTrees(trees)
......
......@@ -7,8 +7,16 @@
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import tempfile
try:
from StringIO import StringIO # for Python 2
except ImportError:
from io import StringIO # for Python 3
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
import path_util
......@@ -17,15 +25,73 @@ import extract_histograms
import histogram_paths
import merge_xml
def main():
doc = merge_xml.MergeFiles(histogram_paths.ALL_XMLS)
def get_names(xml_files):
doc = merge_xml.MergeFiles(files=xml_files)
histograms, had_errors = extract_histograms.ExtractHistogramsFromDom(doc)
if had_errors:
raise Error("Error parsing inputs.")
names = extract_histograms.ExtractNames(histograms)
for name in names:
return extract_histograms.ExtractNames(histograms)
def histogram_xml_files():
return [open(f) for f in histogram_paths.ALL_XMLS]
def get_diff(revision):
"""Returns the added / removed histogram names relative to git revision
Args:
revision: A git revision as described in
https://git-scm.com/docs/gitrevisions
Returns:
A tuple of (added names, removed names), where each entry is sorted in
ascending order.
"""
def get_file_at_revision(path):
"""Returns a file-like object containing |path|'s content at |revision|"""
obj = "%s:%s" % (revision, path)
contents = subprocess.check_output(
("git", "cat-file", "--textconv", obj)).decode()
# Just store the contents in memory. histograms.xml is big, but it isn't
# _that_ big.
return StringIO(contents)
current_histogram_names = set(get_names(histogram_xml_files()))
prev_histogram_names = set(
get_names(
[get_file_at_revision(p) for p in histogram_paths.ALL_XMLS_RELATIVE]))
added_names = sorted(list(current_histogram_names - prev_histogram_names))
removed_names = sorted(list(prev_histogram_names - current_histogram_names))
return (added_names, removed_names)
def print_diff_names(revision):
added_names, removed_names = get_diff(revision)
print("%d histograms added:" % len(added_names))
for name in added_names:
print(name)
print("%d histograms removed:" % len(removed_names))
for name in removed_names:
print(name)
def main(argv):
parser = argparse.ArgumentParser(description='Print histogram names.')
parser.add_argument('--diff',
type=str,
help='Git revision to diff against (e.g. HEAD~)')
args = parser.parse_args(argv[1:])
if args.diff is not None:
print_diff_names(args.diff)
else:
for name in get_names(histogram_xml_files()):
print(name)
if __name__ == '__main__':
main()
main(sys.argv)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment