Commit 6a14e6fe authored by mattcary's avatar mattcary Committed by Commit bot

Upgrade loading_model to use the new request_track and loading_trace.

Review URL: https://codereview.chromium.org/1610273002

Cr-Commit-Position: refs/heads/master@{#370673}
parent 33c282f0
...@@ -6,11 +6,11 @@ ...@@ -6,11 +6,11 @@
(Redirect the following to the general model module once we have one) (Redirect the following to the general model module once we have one)
A model is an object with the following methods. A model is an object with the following methods.
CostMs(): return the cost of the cost in milliseconds. CostMs(): return the cost of the model in milliseconds.
Set(): set model-specifical parameters. Set(): set model-specific parameters.
ResourceGraph ResourceGraph
This creates a DAG of resource dependancies from loading.log_requests to model This creates a DAG of resource dependencies from loading.log_requests to model
loading time. The model may be parameterized by changing the loading time of loading time. The model may be parameterized by changing the loading time of
a particular or all resources. a particular or all resources.
""" """
...@@ -21,7 +21,7 @@ import urlparse ...@@ -21,7 +21,7 @@ import urlparse
import sys import sys
import dag import dag
import log_parser import request_dependencies_lens
class ResourceGraph(object): class ResourceGraph(object):
"""A model of loading by a DAG (tree?) of resource dependancies. """A model of loading by a DAG (tree?) of resource dependancies.
...@@ -29,14 +29,13 @@ class ResourceGraph(object): ...@@ -29,14 +29,13 @@ class ResourceGraph(object):
Set parameters: Set parameters:
cache_all: if true, assume zero loading time for all resources. cache_all: if true, assume zero loading time for all resources.
""" """
def __init__(self, trace):
def __init__(self, requests): """Create from a LoadingTrace.
"""Create from a parsed request set.
Args: Args:
requests: [RequestData, ...] filtered RequestData from loading.log_parser. trace: (LoadingTrace) Loading trace.
""" """
self._BuildDag(requests) self._BuildDag(trace)
self._global_start = min([n.StartTime() for n in self._node_info]) self._global_start = min([n.StartTime() for n in self._node_info])
# Sort before splitting children so that we can correctly dectect if a # Sort before splitting children so that we can correctly dectect if a
# reparented child is actually a dependency for a child of its new parent. # reparented child is actually a dependency for a child of its new parent.
...@@ -182,7 +181,7 @@ class ResourceGraph(object): ...@@ -182,7 +181,7 @@ class ResourceGraph(object):
while n.Predecessors(): while n.Predecessors():
n = reduce(lambda costliest, next: n = reduce(lambda costliest, next:
next if (self._node_filter(next) and next if (self._node_filter(next) and
cost[next.Index()] > cost[costliest.Index()]) costs[next.Index()] > costs[costliest.Index()])
else costliest, else costliest,
n.Predecessors()) n.Predecessors())
path_list.insert(0, self._node_info[n.Index()]) path_list.insert(0, self._node_info[n.Index()])
...@@ -322,10 +321,10 @@ class ResourceGraph(object): ...@@ -322,10 +321,10 @@ class ResourceGraph(object):
self._node = node self._node = node
self._edge_costs = {} self._edge_costs = {}
self._edge_annotations = {} self._edge_annotations = {}
# All fields in timing are millis relative to requestTime, which is epoch # All fields in timing are millis relative to request_time, which is epoch
# seconds. # seconds.
self._node_cost = max([t for f, t in request.timing._asdict().iteritems() self._node_cost = max([t for f, t in request.timing._asdict().iteritems()
if f != 'requestTime']) if f != 'request_time'])
def __str__(self): def __str__(self):
return self.ShortName() return self.ShortName()
...@@ -346,20 +345,37 @@ class ResourceGraph(object): ...@@ -346,20 +345,37 @@ class ResourceGraph(object):
return self._edge_costs[s] return self._edge_costs[s]
def StartTime(self): def StartTime(self):
return self._request.timing.requestTime * 1000 return self._request.timing.request_time * 1000
def EndTime(self): def EndTime(self):
return self._request.timing.requestTime * 1000 + self._node_cost return self._request.timing.request_time * 1000 + self._node_cost
def EdgeAnnotation(self, s): def EdgeAnnotation(self, s):
assert s.Node() in self.Node().Successors() assert s.Node() in self.Node().Successors()
return self._edge_annotations.get(s, []) return self._edge_annotations.get(s, [])
def ContentType(self): def ContentType(self):
return log_parser.Resource.FromRequest(self._request).GetContentType() return self._request.GetContentType()
def ShortName(self): def ShortName(self):
return log_parser.Resource.FromRequest(self._request).GetShortName() """Returns either the hostname of the resource, or the filename,
or the end of the path. Tries to include the domain as much as possible.
"""
parsed = urlparse.urlparse(self._request.url)
path = parsed.path
if path != '' and path != '/':
last_path = parsed.path.split('/')[-1]
if len(last_path) < 10:
if len(path) < 10:
return parsed.hostname + '/' + path
else:
return parsed.hostname + '/..' + parsed.path[-10:]
elif len(last_path) > 10:
return parsed.hostname + '/..' + last_path[:5]
else:
return parsed.hostname + '/..' + last_path
else:
return parsed.hostname
def Url(self): def Url(self):
return self._request.url return self._request.url
...@@ -422,7 +438,7 @@ class ResourceGraph(object): ...@@ -422,7 +438,7 @@ class ResourceGraph(object):
return self._node_info[parent.Index()].EdgeAnnotation( return self._node_info[parent.Index()].EdgeAnnotation(
self._node_info[child.Index()]) self._node_info[child.Index()])
def _BuildDag(self, requests): def _BuildDag(self, trace):
"""Build DAG of resources. """Build DAG of resources.
Build a DAG from our requests and augment with _NodeInfo (see above) in a Build a DAG from our requests and augment with _NodeInfo (see above) in a
...@@ -431,112 +447,36 @@ class ResourceGraph(object): ...@@ -431,112 +447,36 @@ class ResourceGraph(object):
Creates self._nodes and self._node_info. Creates self._nodes and self._node_info.
Args: Args:
requests: [Request, ...] Requests from loading.log_parser. trace: A LoadingTrace.
""" """
self._nodes = [] self._nodes = []
self._node_info = [] self._node_info = []
indicies_by_url = {} index_by_request = {}
requests_by_completion = log_parser.SortedByCompletion(requests) for request in trace.request_track.GetEvents():
for request in requests:
next_index = len(self._nodes) next_index = len(self._nodes)
indicies_by_url.setdefault(request.url, []).append(next_index) assert request not in index_by_request
index_by_request[request] = next_index
node = dag.Node(next_index) node = dag.Node(next_index)
node_info = self._NodeInfo(node, request) node_info = self._NodeInfo(node, request)
self._nodes.append(node) self._nodes.append(node)
self._node_info.append(node_info) self._node_info.append(node_info)
for url, indicies in indicies_by_url.iteritems():
if len(indicies) > 1:
logging.warning('Multiple loads (%d) for url: %s' %
(len(indicies), url))
for i in xrange(len(requests)):
request = requests[i]
current_node_info = self._node_info[i]
resource = log_parser.Resource.FromRequest(current_node_info.Request())
initiator = request.initiator
initiator_type = initiator['type']
predecessor_url = None
predecessor_type = None
# Classify & infer the predecessor. If a candidate url we identify as the
# predecessor is not in index_by_url, then we haven't seen it in our
# requests and we will try to find a better predecessor.
if initiator_type == 'parser':
url = initiator['url']
if url in indicies_by_url:
predecessor_url = url
predecessor_type = 'parser'
elif initiator_type == 'script' and 'stackTrace' in initiator:
for frame in initiator['stackTrace']:
url = frame['url']
if url in indicies_by_url:
predecessor_url = url
predecessor_type = 'stack'
break
elif initiator_type == 'script':
# When the initiator is a script without a stackTrace, infer that it
# comes from the most recent script from the same hostname. TLD+1 might
# be better, but finding what is a TLD requires a database.
request_hostname = urlparse.urlparse(request.url).hostname
sorted_script_requests_from_hostname = [
r for r in requests_by_completion
if (resource.GetContentType() in ('script', 'html', 'json')
and urlparse.urlparse(r.url).hostname == request_hostname)]
most_recent = None
# Linear search is bad, but this shouldn't matter here.
for r in sorted_script_requests_from_hostname:
if r.timestamp < request.timing.requestTime:
most_recent = r
else:
break
if most_recent is not None:
url = most_recent.url
if url in indicies_by_url:
predecessor_url = url
predecessor_type = 'script_inferred'
# TODO(mattcary): we skip initiator type other, is that correct?
if predecessor_url is not None:
predecessor = self._FindBestPredecessor(
current_node_info, indicies_by_url[predecessor_url])
edge_cost = current_node_info.StartTime() - predecessor.EndTime()
if edge_cost < 0:
edge_cost = 0
if current_node_info.StartTime() < predecessor.StartTime():
logging.error('Inverted dependency: %s->%s',
predecessor.ShortName(), current_node_info.ShortName())
# Note that current.StartTime() < predecessor.EndTime() appears to
# happen a fair amount in practice.
predecessor.Node().AddSuccessor(current_node_info.Node())
predecessor.SetEdgeCost(current_node_info, edge_cost)
predecessor.AddEdgeAnnotation(current_node_info, predecessor_type)
def _FindBestPredecessor(self, node_info, candidate_indicies):
"""Find best predecessor for node_info
If there is only one candidate, we use it regardless of timings. We will
later warn about inverted dependencies. If there are more than one, we use
the latest whose end time is before node_info's start time. If there is no
such candidate, we throw up our hands and return an arbitrary one.
Args:
node_info: _NodeInfo of interest.
candidate_indicies: indicies of candidate predecessors.
Returns:
_NodeInfo of best predecessor.
"""
assert candidate_indicies
if len(candidate_indicies) == 1:
return self._node_info[candidate_indicies[0]]
candidate = self._node_info[candidate_indicies[0]]
for i in xrange(1, len(candidate_indicies)):
next_candidate = self._node_info[candidate_indicies[i]]
if (next_candidate.EndTime() < node_info.StartTime() and
next_candidate.StartTime() > candidate.StartTime()):
candidate = next_candidate
if candidate.EndTime() > node_info.StartTime():
logging.warning('Multiple candidates but all inverted for ' +
node_info.ShortName())
return candidate
dependencies = request_dependencies_lens.RequestDependencyLens(
trace).GetRequestDependencies()
for child_rq, parent_rq, reason in dependencies:
parent = self._node_info[index_by_request[parent_rq]]
child = self._node_info[index_by_request[child_rq]]
edge_cost = child.StartTime() - parent.EndTime()
if edge_cost < 0:
edge_cost = 0
if child.StartTime() < parent.StartTime():
logging.error('Inverted dependency: %s->%s',
parent.ShortName(), child.ShortName())
# Note that child.StartTime() < parent.EndTime() appears to happen a
# fair amount in practice.
parent.Node().AddSuccessor(child.Node())
parent.SetEdgeCost(child, edge_cost)
parent.AddEdgeAnnotation(child, reason)
def _SplitChildrenByTime(self, parent): def _SplitChildrenByTime(self, parent):
"""Split children of a node by request times. """Split children of a node by request times.
...@@ -624,7 +564,7 @@ class ResourceGraph(object): ...@@ -624,7 +564,7 @@ class ResourceGraph(object):
""" """
node_info = self._node_info[index] node_info = self._node_info[index]
color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()] color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()]
max_age = log_parser.MaxAge(node_info.Request()) max_age = node_info.Request().MaxAge()
shape = 'polygon' if max_age > 300 else 'oval' shape = 'polygon' if max_age > 300 else 'oval'
styles = ['filled'] styles = ['filled']
if highlight: if highlight:
......
...@@ -8,22 +8,62 @@ import unittest ...@@ -8,22 +8,62 @@ import unittest
import dag import dag
import loading_model import loading_model
import log_parser import loading_trace
import request_track
import request_dependencies_lens
class SimpleLens(object):
def __init__(self, trace):
self._trace = trace
def GetRequestDependencies(self):
url_to_rq = {}
deps = []
for rq in self._trace.request_track.GetEvents():
assert rq.url not in url_to_rq
url_to_rq[rq.url] = rq
for rq in self._trace.request_track.GetEvents():
if rq.initiator in url_to_rq:
deps.append((rq, url_to_rq[rq.initiator], ''))
return deps
class MockRequestTrack(object):
def __init__(self, requests):
self._requests = requests
def GetEvents(self):
return self._requests
class LoadingModelTestCase(unittest.TestCase): class LoadingModelTestCase(unittest.TestCase):
def setUp(self):
request_dependencies_lens.RequestDependencyLens = SimpleLens
self._next_request_id = 0
def MakeParserRequest(self, url, source_url, start_time, end_time, def MakeParserRequest(self, url, source_url, start_time, end_time,
magic_content_type=False): magic_content_type=False):
timing_data = {f: -1 for f in log_parser.Timing._fields} rq = request_track.Request.FromJsonDict({
# We should ignore connectEnd. 'request_id': self._next_request_id,
timing_data['connectEnd'] = (end_time - start_time) / 2 'url': 'http://' + str(url),
timing_data['receiveHeadersEnd'] = end_time - start_time 'initiator': 'http://' + str(source_url),
timing_data['requestTime'] = start_time / 1000.0 'response_headers': {'Content-Type':
return log_parser.RequestData( 'null' if not magic_content_type
None, {'Content-Type': 'null' if not magic_content_type else 'magic-debug-content' },
else 'magic-debug-content' }, 'timing': request_track.TimingFromDict({
None, start_time, timing_data, 'http://' + str(url), False, # connectEnd should be ignored.
{'type': 'parser', 'url': 'http://' + str(source_url)}) 'connectEnd': (end_time - start_time) / 2,
'receiveHeadersEnd': end_time - start_time,
'requestTime': start_time / 1000.0})
})
self._next_request_id += 1
return rq
def MakeGraph(self, requests):
return loading_model.ResourceGraph(loading_trace.LoadingTrace(
None, None, None, MockRequestTrack(requests), None))
def SortedIndicies(self, graph): def SortedIndicies(self, graph):
return [n.Index() for n in dag.TopologicalSort(graph._nodes)] return [n.Index() for n in dag.TopologicalSort(graph._nodes)]
...@@ -39,7 +79,7 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -39,7 +79,7 @@ class LoadingModelTestCase(unittest.TestCase):
self.MakeParserRequest(4, 3, 127, 128), self.MakeParserRequest(4, 3, 127, 128),
self.MakeParserRequest(5, 'null', 100, 105), self.MakeParserRequest(5, 'null', 100, 105),
self.MakeParserRequest(6, 5, 105, 110)] self.MakeParserRequest(6, 5, 105, 110)]
graph = loading_model.ResourceGraph(requests) graph = self.MakeGraph(requests)
self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 2]) self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 2])
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [3]) self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [3])
self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), []) self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [])
...@@ -60,7 +100,7 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -60,7 +100,7 @@ class LoadingModelTestCase(unittest.TestCase):
self.MakeParserRequest(4, 3, 127, 128), self.MakeParserRequest(4, 3, 127, 128),
self.MakeParserRequest(5, 'null', 100, 105), self.MakeParserRequest(5, 'null', 100, 105),
self.MakeParserRequest(6, 5, 105, 110)] self.MakeParserRequest(6, 5, 105, 110)]
graph = loading_model.ResourceGraph(requests) graph = self.MakeGraph(requests)
path_list = [] path_list = []
self.assertEqual(28, graph.Cost(path_list)) self.assertEqual(28, graph.Cost(path_list))
self.assertEqual([0, 1, 3, 4], [n.Index() for n in path_list]) self.assertEqual([0, 1, 3, 4], [n.Index() for n in path_list])
...@@ -76,10 +116,11 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -76,10 +116,11 @@ class LoadingModelTestCase(unittest.TestCase):
magic_content_type=True), magic_content_type=True),
self.MakeParserRequest(2, 0, 121, 122, self.MakeParserRequest(2, 0, 121, 122,
magic_content_type=True), magic_content_type=True),
self.MakeParserRequest(3, 0, 112, 119), self.MakeParserRequest(3, 0, 112, 119,
magic_content_type=True),
self.MakeParserRequest(4, 2, 122, 126), self.MakeParserRequest(4, 2, 122, 126),
self.MakeParserRequest(5, 2, 122, 126)] self.MakeParserRequest(5, 2, 122, 126)]
graph = loading_model.ResourceGraph(requests) graph = self.MakeGraph(requests)
self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 3]) self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 3])
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [2]) self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [2])
self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5]) self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5])
...@@ -88,10 +129,10 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -88,10 +129,10 @@ class LoadingModelTestCase(unittest.TestCase):
self.assertEqual(self.SuccessorIndicies(graph._nodes[5]), []) self.assertEqual(self.SuccessorIndicies(graph._nodes[5]), [])
self.assertEqual(self.SortedIndicies(graph), [0, 1, 3, 2, 4, 5]) self.assertEqual(self.SortedIndicies(graph), [0, 1, 3, 2, 4, 5])
# Change node 1 so it is a parent of 3, which become parent of 2. # Change node 1 so it is a parent of 3, which becomes the parent of 2.
requests[1] = self.MakeParserRequest(1, 0, 110, 111, requests[1] = self.MakeParserRequest(1, 0, 110, 111,
magic_content_type=True) magic_content_type=True)
graph = loading_model.ResourceGraph(requests) graph = self.MakeGraph(requests)
self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1]) self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1])
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [3]) self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [3])
self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5]) self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5])
...@@ -101,14 +142,15 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -101,14 +142,15 @@ class LoadingModelTestCase(unittest.TestCase):
self.assertEqual(self.SortedIndicies(graph), [0, 1, 3, 2, 4, 5]) self.assertEqual(self.SortedIndicies(graph), [0, 1, 3, 2, 4, 5])
# Add an initiator dependence to 1 that will become the parent of 3. # Add an initiator dependence to 1 that will become the parent of 3.
requests[1] = self.MakeParserRequest(1, 0, 110, 111)
requests.append(self.MakeParserRequest(6, 1, 111, 112))
graph = loading_model.ResourceGraph(requests)
# Check it doesn't change until we change the content type of 1.
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [3, 6])
requests[1] = self.MakeParserRequest(1, 0, 110, 111, requests[1] = self.MakeParserRequest(1, 0, 110, 111,
magic_content_type=True) magic_content_type=True)
graph = loading_model.ResourceGraph(requests) requests.append(self.MakeParserRequest(6, 1, 111, 112))
graph = self.MakeGraph(requests)
# Check it doesn't change until we change the content type of 6.
self.assertEqual(self.SuccessorIndicies(graph._nodes[6]), [])
requests[6] = self.MakeParserRequest(6, 1, 111, 112,
magic_content_type=True)
graph = self.MakeGraph(requests)
self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1]) self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1])
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [6]) self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [6])
self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5]) self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5])
...@@ -127,8 +169,8 @@ class LoadingModelTestCase(unittest.TestCase): ...@@ -127,8 +169,8 @@ class LoadingModelTestCase(unittest.TestCase):
self.MakeParserRequest(4, 2, 122, 126), self.MakeParserRequest(4, 2, 122, 126),
self.MakeParserRequest(5, 2, 122, 126)] self.MakeParserRequest(5, 2, 122, 126)]
for r in requests: for r in requests:
r.headers['Content-Type'] = 'image/gif' r.response_headers['Content-Type'] = 'image/gif'
graph = loading_model.ResourceGraph(requests) graph = self.MakeGraph(requests)
self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 2, 3]) self.assertEqual(self.SuccessorIndicies(graph._nodes[0]), [1, 2, 3])
self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), []) self.assertEqual(self.SuccessorIndicies(graph._nodes[1]), [])
self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5]) self.assertEqual(self.SuccessorIndicies(graph._nodes[2]), [4, 5])
......
...@@ -117,10 +117,35 @@ class Request(object): ...@@ -117,10 +117,35 @@ class Request(object):
def IsDataRequest(self): def IsDataRequest(self):
return self.protocol == 'data' return self.protocol == 'data'
# For testing. def MaxAge(self):
"""Returns the max-age of a resource, or -1."""
# TODO(lizeb): Handle the "Expires" header as well.
cache_control = {}
if not self.response_headers:
return -1
cache_control_str = self.response_headers.get('Cache-Control', None)
if cache_control_str is not None:
directives = [s.strip() for s in cache_control_str.split(',')]
for directive in directives:
parts = [s.strip() for s in directive.split('=')]
if len(parts) == 1:
cache_control[parts[0]] = True
else:
cache_control[parts[0]] = parts[1]
if (u'no-store' in cache_control
or u'no-cache' in cache_control
or len(cache_control) == 0):
return -1
if 'max-age' in cache_control:
return int(cache_control['max-age'])
return -1
def __eq__(self, o): def __eq__(self, o):
return self.__dict__ == o.__dict__ return self.__dict__ == o.__dict__
def __hash__(self):
return hash(self.request_id)
class RequestTrack(devtools_monitor.Track): class RequestTrack(devtools_monitor.Track):
"""Aggregates request data.""" """Aggregates request data."""
......
...@@ -268,6 +268,21 @@ class RequestTrackTestCase(unittest.TestCase): ...@@ -268,6 +268,21 @@ class RequestTrackTestCase(unittest.TestCase):
request_track = RequestTrack.FromJsonDict(json_dict) request_track = RequestTrack.FromJsonDict(json_dict)
self.assertEquals(self.request_track, request_track) self.assertEquals(self.request_track, request_track)
def testMaxAge(self):
rq = Request()
self.assertEqual(-1, rq.MaxAge())
rq.response_headers = {}
self.assertEqual(-1, rq.MaxAge())
rq.response_headers[
'Cache-Control'] = 'private,s-maxage=0,max-age=0,must-revalidate'
self.assertEqual(0, rq.MaxAge())
rq.response_headers[
'Cache-Control'] = 'private,s-maxage=0,no-store,max-age=100'
self.assertEqual(-1, rq.MaxAge())
rq.response_headers[
'Cache-Control'] = 'private,s-maxage=0'
self.assertEqual(-1, rq.MaxAge())
@classmethod @classmethod
def _ValidSequence(cls, request_track): def _ValidSequence(cls, request_track):
request_track.Handle( request_track.Handle(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment