Commit 334ab13c authored by kalman@chromium.org's avatar kalman@chromium.org

Docserver: Fix 3 issues relating to finding API references in sample files,

contributing to too-long database keys. Most importantly, match whole words
when doing variable name replacement, not just character sequences. Minified
files with variable names like "a" were exploding. Secondly, don't cache the
title of links in reference resolver, it's pointless and leading to even
longer database keys. Thirdly, truncate database keys to avoid crashing the
server from this ever again; log error instead.

BUG=314102
R=jyasskin@chromium.org
NOTRY=true

Review URL: https://codereview.chromium.org/55913003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@232856 0039d316-1c4b-4281-b951-d872f2087c98
parent 50fa4d5f
application: chrome-apps-doc application: chrome-apps-doc
version: 2-34-2 version: 2-35-0
runtime: python27 runtime: python27
api_version: 1 api_version: 1
threadsafe: false threadsafe: false
......
...@@ -2,4 +2,4 @@ cron: ...@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data. - description: Repopulates all cached data.
url: /_cron url: /_cron
schedule: every 5 minutes schedule: every 5 minutes
target: 2-34-2 target: 2-35-0
...@@ -2,11 +2,14 @@ ...@@ -2,11 +2,14 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
from file_system import FileNotFoundError from copy import deepcopy
import logging import logging
import re import re
import string import string
from file_system import FileNotFoundError
def _ClassifySchemaNode(node_name, api): def _ClassifySchemaNode(node_name, api):
"""Attempt to classify |node_name| in an API, determining whether |node_name| """Attempt to classify |node_name| in an API, determining whether |node_name|
refers to a type, function, event, or property in |api|. refers to a type, function, event, or property in |api|.
...@@ -29,8 +32,18 @@ def _ClassifySchemaNode(node_name, api): ...@@ -29,8 +32,18 @@ def _ClassifySchemaNode(node_name, api):
return group, node_name return group, node_name
return None return None
def _MakeKey(namespace, ref, title):
return '%s.%s.%s' % (namespace, ref, title) def _MakeKey(namespace, ref):
key = '%s/%s' % (namespace, ref)
# AppEngine doesn't like keys > 500, but there will be some other stuff
# that goes into this key, so truncate it earlier. This shoudn't be
# happening anyway unless there's a bug, such as http://crbug.com/314102.
max_size = 256
if len(key) > max_size:
logging.error('Key was >%s characters: %s' % (max_size, key))
key = key[:max_size]
return key
class ReferenceResolver(object): class ReferenceResolver(object):
"""Resolves references to $ref's by searching through the APIs to find the """Resolves references to $ref's by searching through the APIs to find the
...@@ -68,7 +81,7 @@ class ReferenceResolver(object): ...@@ -68,7 +81,7 @@ class ReferenceResolver(object):
self._api_models = api_models self._api_models = api_models
self._object_store = object_store self._object_store = object_store
def _GetRefLink(self, ref, api_list, namespace, title): def _GetRefLink(self, ref, api_list, namespace):
# Check nodes within each API the ref might refer to. # Check nodes within each API the ref might refer to.
parts = ref.split('.') parts = ref.split('.')
for i, part in enumerate(parts): for i, part in enumerate(parts):
...@@ -104,7 +117,7 @@ class ReferenceResolver(object): ...@@ -104,7 +117,7 @@ class ReferenceResolver(object):
text = text[len('%s.' % namespace):] text = text[len('%s.' % namespace):]
return { return {
'href': '%s.html#%s-%s' % (api_name, category, name.replace('.', '-')), 'href': '%s.html#%s-%s' % (api_name, category, name.replace('.', '-')),
'text': title if title else text, 'text': text,
'name': node_name 'name': node_name
} }
...@@ -114,7 +127,7 @@ class ReferenceResolver(object): ...@@ -114,7 +127,7 @@ class ReferenceResolver(object):
if ref in api_list: if ref in api_list:
return { return {
'href': '%s.html' % ref, 'href': '%s.html' % ref,
'text': title if title else ref, 'text': ref,
'name': ref 'name': ref
} }
...@@ -124,22 +137,21 @@ class ReferenceResolver(object): ...@@ -124,22 +137,21 @@ class ReferenceResolver(object):
"""Resolve $ref |ref| in namespace |namespace| if not None, returning None """Resolve $ref |ref| in namespace |namespace| if not None, returning None
if it cannot be resolved. if it cannot be resolved.
""" """
link = self._object_store.Get(_MakeKey(namespace, ref, title)).Get() db_key = _MakeKey(namespace, ref)
if link is not None: link = self._object_store.Get(db_key).Get()
return link if link is None:
api_list = self._api_models.GetNames()
api_list = self._api_models.GetNames() link = self._GetRefLink(ref, api_list, namespace)
link = self._GetRefLink(ref, api_list, namespace, title) if link is None and namespace is not None:
# Try to resolve the ref in the current namespace if there is one.
if link is None and namespace is not None: link = self._GetRefLink('%s.%s' % (namespace, ref), api_list, namespace)
# Try to resolve the ref in the current namespace if there is one. if link is None:
link = self._GetRefLink('%s.%s' % (namespace, ref), return None
api_list, self._object_store.Set(db_key, link)
namespace, else:
title) link = deepcopy(link)
if title is not None:
if link is not None: link['text'] = title
self._object_store.Set(_MakeKey(namespace, ref, title), link)
return link return link
def SafeGetLink(self, ref, namespace=None, title=None): def SafeGetLink(self, ref, namespace=None, title=None):
...@@ -154,7 +166,7 @@ class ReferenceResolver(object): ...@@ -154,7 +166,7 @@ class ReferenceResolver(object):
type_name = ref.rsplit('.', 1)[-1] type_name = ref.rsplit('.', 1)[-1]
return { return {
'href': '#type-%s' % type_name, 'href': '#type-%s' % type_name,
'text': title if title else ref, 'text': title or ref,
'name': ref 'name': ref
} }
......
...@@ -56,15 +56,20 @@ class SamplesDataSource(object): ...@@ -56,15 +56,20 @@ class SamplesDataSource(object):
request) request)
def _GetAPIItems(self, js_file): def _GetAPIItems(self, js_file):
chrome_regex = '(chrome\.[a-zA-Z0-9\.]+)' chrome_pattern = r'chrome[\w.]+'
calls = set(re.findall(chrome_regex, js_file)) # Add API calls that appear normally, like "chrome.runtime.connect".
# Find APIs that have been assigned into variables. calls = set(re.findall(chrome_pattern, js_file))
assigned_vars = dict(re.findall('var\s*([^\s]+)\s*=\s*%s;' % chrome_regex, # Add API calls that have been assigned into variables, like
js_file)) # "var storageArea = chrome.storage.sync; storageArea.get", which should
# Replace the variable name with the full API name. # be expanded like "chrome.storage.sync.get".
for var_name, value in assigned_vars.iteritems(): for match in re.finditer(r'var\s+(\w+)\s*=\s*(%s);' % chrome_pattern,
js_file = js_file.replace(var_name, value) js_file):
return calls.union(re.findall(chrome_regex, js_file)) var_name, api_prefix = match.groups()
for var_match in re.finditer(r'\b%s\.([\w.]+)\b' % re.escape(var_name),
js_file):
api_suffix, = var_match.groups()
calls.add('%s.%s' % (api_prefix, api_suffix))
return calls
def _GetDataFromManifest(self, path, file_system): def _GetDataFromManifest(self, path, file_system):
manifest = file_system.ReadSingle(path + '/manifest.json').Get() manifest = file_system.ReadSingle(path + '/manifest.json').Get()
...@@ -130,6 +135,12 @@ class SamplesDataSource(object): ...@@ -130,6 +135,12 @@ class SamplesDataSource(object):
if item.startswith('chrome.'): if item.startswith('chrome.'):
item = item[len('chrome.'):] item = item[len('chrome.'):]
ref_data = self._ref_resolver.GetLink(item) ref_data = self._ref_resolver.GetLink(item)
# TODO(kalman): What about references like chrome.storage.sync.get?
# That should link to either chrome.storage.sync or
# chrome.storage.StorageArea.get (or probably both).
# TODO(kalman): Filter out API-only references? This can happen when
# the API namespace is assigned to a variable, but it's very hard to
# to disambiguate.
if ref_data is None: if ref_data is None:
continue continue
api_calls.append({ api_calls.append({
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment