Commit f2d92284 authored by Pawel Pluciennik's avatar Pawel Pluciennik Committed by Commit Bot

Improve performance of generating histograms.xml

This CL implements IterElementsWithTag to avoid not needed recursion.

On local machine it saved around 1s (from 7.6s to 6.6s).

Bug: 1016281
Change-Id: I0fea65892bc52574fd4e6fd3fd2455ae3ec646e0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1872052
Commit-Queue: Alexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Cr-Commit-Position: refs/heads/master@{#709564}
parent 4ac8b448
......@@ -303,9 +303,10 @@ def ExpandHistogramsOWNERS(histograms):
Error: Raised if the OWNERS file with the given path does not exist.
"""
email_pattern = re.compile(_EMAIL_PATTERN)
iter_matches = extract_histograms.IterElementsWithTag
for histogram in histograms.getElementsByTagName('histogram'):
owners = histogram.getElementsByTagName('owner')
for histogram in iter_matches(histograms, 'histogram'):
owners = [owner for owner in iter_matches(histogram, 'owner', 1)]
# owner is a DOM Element with a single child, which is a DOM Text Node.
emails_with_dom_elements = set([
......@@ -316,12 +317,10 @@ def ExpandHistogramsOWNERS(histograms):
# component is a DOM Element with a single child, which is a DOM Text Node.
components_with_dom_elements = set([
extract_histograms.NormalizeString(component.childNodes[0].data)
for component in histogram.getElementsByTagName('component')])
for component in iter_matches(histogram, 'component', 1)])
for index in range(len(owners)):
owner = owners[index]
for index, owner in enumerate(owners):
owner_text = owner.childNodes[0].data
name = histogram.getAttribute('name')
if _IsEmailOrPlaceholder(index == 0, owner_text, name):
continue
......
......@@ -55,6 +55,7 @@ XML below will generate the following five histograms:
</histogram-configuration>
"""
import bisect
import copy
import datetime
......@@ -82,10 +83,59 @@ EXPIRY_DATE_PATTERN = "%Y-%m-%d"
EXPIRY_MILESTONE_RE = re.compile(r'M[0-9]{2,3}\Z')
_ELEMENT_NODE = xml.dom.minidom.Node.ELEMENT_NODE
# List of fields that need to copied by copy.copy to suffixed histogram.
# Other fields may be copied by reference.
_HISTOGRAM_COPY_FIELDS = [
'fieldtrial_groups', 'fieldtrial_names', 'fieldtrial_labels']
class Error(Exception):
pass
def IterElementsWithTag(root, tag, depth=-1):
"""Iterates over DOM tree and yields elements matching tag name.
It's meant to be replacement for `getElementsByTagName`,
(which does recursive search) but without recursive search
(nested tags are not supported in histograms files).
Note: This generator stops going deeper in the tree when it detects
that there are elements with given tag.
Args:
root: XML dom tree.
tag: Element's tag name.
depth: Defines how deep in the tree function should search for a match.
Yields:
xml.dom.minidom.Node: Element matching criteria.
"""
if depth == 0 and root.nodeType == _ELEMENT_NODE and root.tagName == tag:
yield root
return
had_tag = False
skipped = 0
for child in root.childNodes:
if child.nodeType == _ELEMENT_NODE and child.tagName == tag:
had_tag = True
yield child
else:
skipped += 1
depth -= 1
if not had_tag and depth != 0:
for child in root.childNodes:
for match in IterElementsWithTag(child, tag, depth):
yield match
def _GetTextFromChildNodes(node):
"""Returns a string concatenation of the text of the given node's children.
......@@ -160,7 +210,7 @@ def _NormalizeAllAttributeValues(node):
Returns:
The normalized minidom node.
"""
if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
if node.nodeType == _ELEMENT_NODE:
for a in node.attributes.keys():
node.attributes[a].value = NormalizeString(node.attributes[a].value)
......@@ -234,7 +284,7 @@ def ExtractEnumsFromXmlTree(tree):
have_errors = False
last_name = None
for enum in tree.getElementsByTagName('enum'):
for enum in IterElementsWithTag(tree, 'enum'):
name = enum.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Enums %s and %s are not in alphabetical order', last_name,
......@@ -251,7 +301,9 @@ def ExtractEnumsFromXmlTree(tree):
enum_dict['name'] = name
enum_dict['values'] = {}
for int_tag in enum.getElementsByTagName('int'):
nodes = list(IterElementsWithTag(enum, 'int'))
for int_tag in nodes:
value_dict = {}
int_value = int(int_tag.getAttribute('value'))
if int_value in enum_dict['values']:
......@@ -265,7 +317,7 @@ def ExtractEnumsFromXmlTree(tree):
enum_int_values = sorted(enum_dict['values'].keys())
last_int_value = None
for int_tag in enum.getElementsByTagName('int'):
for int_tag in nodes:
int_value = int(int_tag.getAttribute('value'))
if last_int_value is not None and int_value < last_int_value:
logging.error('Enum %s int values %d and %d are not in numerical order',
......@@ -282,9 +334,9 @@ def ExtractEnumsFromXmlTree(tree):
else:
last_int_value = int_value
summary_nodes = enum.getElementsByTagName('summary')
if summary_nodes:
enum_dict['summary'] = _GetTextFromChildNodes(summary_nodes[0])
for summary in IterElementsWithTag(enum, 'summary'):
enum_dict['summary'] = _GetTextFromChildNodes(summary)
break
enums[name] = enum_dict
......@@ -309,8 +361,9 @@ def _ExtractOwners(histogram):
owners = []
has_owner = False
for owner_node in histogram.getElementsByTagName('owner'):
owner_text = _GetTextFromChildNodes(owner_node)
for owner_node in IterElementsWithTag(histogram, 'owner', 1):
child = owner_node.firstChild
owner_text = (child and child.nodeValue) or ''
is_email = email_pattern.match(owner_text)
if owner_text and (is_email or OWNER_PLACEHOLDER in owner_text):
......@@ -355,7 +408,7 @@ def _ExtractHistogramsFromXmlTree(tree, enums):
histograms = {}
have_errors = False
last_name = None
for histogram in tree.getElementsByTagName('histogram'):
for histogram in IterElementsWithTag(tree, 'histogram'):
name = histogram.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Histograms %s and %s are not in alphabetical order',
......@@ -382,12 +435,12 @@ def _ExtractHistogramsFromXmlTree(tree, enums):
have_errors = True
# Find <owner> tag.
owners, hasOwner = _ExtractOwners(histogram)
owners, has_owner = _ExtractOwners(histogram)
if owners:
histogram_entry['owners'] = owners
# Find <summary> tag.
summary_nodes = histogram.getElementsByTagName('summary')
summary_nodes = list(IterElementsWithTag(histogram, 'summary'))
if summary_nodes:
histogram_entry['summary'] = _GetTextFromChildNodes(summary_nodes[0])
......@@ -395,7 +448,7 @@ def _ExtractHistogramsFromXmlTree(tree, enums):
histogram_entry['summary'] = 'TBD'
# Find <obsolete> tag.
obsolete_nodes = histogram.getElementsByTagName('obsolete')
obsolete_nodes = list(IterElementsWithTag(histogram, 'obsolete', 1))
if obsolete_nodes:
reason = _GetTextFromChildNodes(obsolete_nodes[0])
histogram_entry['obsolete'] = reason
......@@ -406,7 +459,7 @@ def _ExtractHistogramsFromXmlTree(tree, enums):
have_errors = True
# Non-obsolete histograms should specify <owner>s.
if not obsolete_nodes and not hasOwner:
if not obsolete_nodes and not has_owner:
logging.error('histogram %s should specify <owner>s', name)
have_errors = True
......@@ -421,9 +474,9 @@ def _ExtractHistogramsFromXmlTree(tree, enums):
histogram_entry['units'] = histogram.getAttribute('units')
# Find <details> tag.
details_nodes = histogram.getElementsByTagName('details')
if details_nodes:
histogram_entry['details'] = _GetTextFromChildNodes(details_nodes[0])
for node in IterElementsWithTag(histogram, 'details'):
histogram_entry['details'] = _GetTextFromChildNodes(node)
break
# Handle enum types.
if histogram.hasAttribute('enum'):
......@@ -476,7 +529,9 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
# Verify order of histogram_suffixes fields first.
last_name = None
for histogram_suffixes in tree.getElementsByTagName(histogram_suffix_tag):
for histogram_suffixes in IterElementsWithTag(
tree, histogram_suffix_tag, depth=1):
name = histogram_suffixes.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('histogram_suffixes %s and %s are not in alphabetical '
......@@ -491,7 +546,7 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
reprocess_queue = []
def GenerateHistogramSuffixes():
for f in tree.getElementsByTagName(histogram_suffix_tag):
for f in IterElementsWithTag(tree, histogram_suffix_tag):
yield 0, f
for r, f in reprocess_queue:
yield r, f
......@@ -499,8 +554,8 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
# Check dependencies first
dependencies_valid = True
affected_histograms = histogram_suffixes.getElementsByTagName(
'affected-histogram')
affected_histograms = list(IterElementsWithTag(
histogram_suffixes, 'affected-histogram', 1))
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
if histogram_name not in histograms:
......@@ -524,7 +579,7 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
group_obsolete_reason = _GetObsoleteReason(histogram_suffixes)
name = histogram_suffixes.getAttribute('name')
suffix_nodes = histogram_suffixes.getElementsByTagName(suffix_tag)
suffix_nodes = list(IterElementsWithTag(histogram_suffixes, suffix_tag, 1))
suffix_labels = {}
for suffix in suffix_nodes:
suffix_name = suffix.getAttribute('name')
......@@ -546,7 +601,7 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
histogram_name, name)
have_errors = True
last_histogram_name = histogram_name
with_suffixes = affected_histogram.getElementsByTagName(with_tag)
with_suffixes = list(IterElementsWithTag(affected_histogram, with_tag, 1))
if with_suffixes:
suffixes_to_add = with_suffixes
else:
......@@ -571,26 +626,20 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
suffix_label = suffix_labels.get(suffix_name, '')
histogram_entry = histograms[new_histogram_name]
for field_name in _HISTOGRAM_COPY_FIELDS:
histogram_entry.setdefault(field_name, [])
# TODO(yiyaoliu): Rename these to be consistent with the new naming.
# It is kept unchanged for now to be it's used by dashboards.
if 'fieldtrial_groups' not in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_groups'] = []
histograms[new_histogram_name]['fieldtrial_groups'].append(
suffix_name)
if 'fieldtrial_names' not in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_names'] = []
histograms[new_histogram_name]['fieldtrial_names'].append(name)
if 'fieldtrial_labels' not in histograms[new_histogram_name]:
histograms[new_histogram_name]['fieldtrial_labels'] = []
histograms[new_histogram_name]['fieldtrial_labels'].append(
suffix_label)
histogram_entry['fieldtrial_groups'].append(suffix_name)
histogram_entry['fieldtrial_names'].append(name)
histogram_entry['fieldtrial_labels'].append(suffix_label)
# If no owners are added for this histogram-suffixes, it inherits the
# owners of its parents.
if owners:
histograms[new_histogram_name]['owners'] = owners
histogram_entry['owners'] = owners
# If a suffix has an obsolete node, it's marked as obsolete for the
# specified reason, overwriting its group's obsoletion reason if the
......@@ -602,9 +651,9 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
# If the suffix has an obsolete tag, all histograms it generates
# inherit it.
if obsolete_reason:
histograms[new_histogram_name]['obsolete'] = obsolete_reason
histogram_entry['obsolete'] = obsolete_reason
_ProcessBaseHistogramAttribute(suffix, histograms[new_histogram_name])
_ProcessBaseHistogramAttribute(suffix, histogram_entry)
except Error:
have_errors = True
......@@ -612,6 +661,26 @@ def _UpdateHistogramsWithSuffixes(tree, histograms):
return have_errors
def _GetTagSubTree(tree, tag, depth):
"""Returns sub tree with tag element as a root.
When no element with tag name is found or there are many of them
original tree is returned.
Args:
tree: XML dom tree.
tag: Element's tag name.
depth: Defines how deep in the tree function should search for a match.
Returns:
xml.dom.minidom.Node: Sub tree (matching criteria) or original one.
"""
entries = list(IterElementsWithTag(tree, tag, depth))
if len(entries) == 1:
tree = entries[0]
return tree
def ExtractHistogramsFromDom(tree):
"""Computes the histogram names and descriptions from the XML representation.
......@@ -621,13 +690,18 @@ def ExtractHistogramsFromDom(tree):
Returns:
a tuple of (histograms, status) where histograms is a dictionary mapping
histogram names to dictionaries containing histogram descriptions and status
is a boolean indicating if errros were encoutered in processing.
is a boolean indicating if errros were encountered in processing.
"""
_NormalizeAllAttributeValues(tree)
enums, enum_errors = ExtractEnumsFromXmlTree(tree)
histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
update_errors = _UpdateHistogramsWithSuffixes(tree, histograms)
enums_tree = _GetTagSubTree(tree, 'enums', 2)
histograms_tree = _GetTagSubTree(tree, 'histograms', 2)
histogram_suffixes_tree = _GetTagSubTree(tree, 'histogram_suffixes_list', 2)
enums, enum_errors = ExtractEnumsFromXmlTree(enums_tree)
histograms, histogram_errors = _ExtractHistogramsFromXmlTree(
histograms_tree, enums)
update_errors = _UpdateHistogramsWithSuffixes(
histogram_suffixes_tree, histograms)
return histograms, enum_errors or histogram_errors or update_errors
......
......@@ -6,9 +6,11 @@
"""A script to merge multiple source xml files into a single histograms.xml."""
import argparse
import expand_owners
import xml.dom.minidom
import expand_owners
import extract_histograms
def GetElementsByTagName(trees, tag):
"""Gets all elements with the specified tag from a set of DOM trees.
......@@ -19,7 +21,8 @@ def GetElementsByTagName(trees, tag):
Returns:
A list of DOM nodes with the specified tag.
"""
return [e for t in trees for e in t.getElementsByTagName(tag)]
iterator = extract_histograms.IterElementsWithTag
return list(e for t in trees for e in iterator(t, tag, 2))
def MakeNodeWithChildren(doc, tag, children):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment