Commit 95c3874d authored by Kent Tamura's avatar Kent Tamura Committed by Commit Bot

name_style_converter: Ignore characters not matched to _TOKEN_RE.

The new behavior matches to name_utilities.py.
This CL has no behavior changes because the current users of
name_style_converter never pass names with such characters.

Bug: 843927
Change-Id: I8eeafb71ec85bb1a0f92eff317fb07eb2dc373ff
Reviewed-on: https://chromium-review.googlesource.com/1065959
Commit-Queue: Kent Tamura <tkent@chromium.org>
Reviewed-by: default avatarHitoshi Yoshida <peria@chromium.org>
Cr-Commit-Position: refs/heads/master@{#560224}
parent 0c2262ad
...@@ -72,26 +72,20 @@ _TOKEN_RE = re.compile(r'(' + '|'.join(SPECIAL_TOKENS + _TOKEN_PATTERNS) + r')') ...@@ -72,26 +72,20 @@ _TOKEN_RE = re.compile(r'(' + '|'.join(SPECIAL_TOKENS + _TOKEN_PATTERNS) + r')')
def tokenize_name(name): def tokenize_name(name):
"""Tokenize the specified name. """Tokenize the specified name.
Detects special cases that are not easily discernible without additional A token consists of A-Z, a-z, and 0-9 characters. Other characters work as
knowledge, such as recognizing that in SVGSVGElement, the first two SVGs token delimiters, and the resultant list won't contain such characters.
are separate tokens, but WebGL is one token. Capital letters also work as delimiters. E.g. 'FooBar-baz' is tokenized to
['Foo', 'Bar', 'baz']. See _TOKEN_PATTERNS for more details.
This function detects special cases that are not easily discernible without
additional knowledge, such as recognizing that in SVGSVGElement, the first
two SVGs are separate tokens, but WebGL is one token.
Returns: Returns:
A list of token strings. A list of token strings.
""" """
tokens = [] return _TOKEN_RE.findall(name)
while len(name) > 0:
matched_token = None
match = _TOKEN_RE.search(name)
if not match:
matched_token = name
elif match.start(0) != 0:
matched_token = name[:match.start(0)]
else:
matched_token = match.group(0)
tokens.append(matched_token)
name = name[len(matched_token):]
return tokens
class NameStyleConverter(object): class NameStyleConverter(object):
......
...@@ -108,8 +108,10 @@ class SmartTokenizerTest(unittest.TestCase): ...@@ -108,8 +108,10 @@ class SmartTokenizerTest(unittest.TestCase):
self.assertEqual(tokenize_name('IsXHTMLDocument'), ['Is', 'XHTML', 'Document']) self.assertEqual(tokenize_name('IsXHTMLDocument'), ['Is', 'XHTML', 'Document'])
self.assertEqual(tokenize_name('isHTMLDocument'), ['is', 'HTML', 'Document']) self.assertEqual(tokenize_name('isHTMLDocument'), ['is', 'HTML', 'Document'])
self.assertEqual(tokenize_name('Animation.idl'), ['Animation', '.', 'idl']) def test_ignoring_characters(self):
self.assertEqual(tokenize_name('-webkit-appearance'), ['-', 'webkit', '-', 'appearance']) self.assertEqual(tokenize_name('Animation.idl'), ['Animation', 'idl'])
self.assertEqual(tokenize_name('-webkit-appearance'), ['webkit', 'appearance'])
self.assertEqual(tokenize_name(' foo_bar!#"$'), ['foo', 'bar'])
class NameStyleConverterTest(unittest.TestCase): class NameStyleConverterTest(unittest.TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment