grit: Allow inlining srcset attributes without descriptors

According to the spec, the srcset attribute does not need each URL to have an associated descriptor. Just the URL is valid. srcset is also a valid attribute on a <source> tag, so the data should be inlined there as well. Bug: 1006216 Change-Id: I0f782035dfdbf514423cdd430536ed962340c9a9 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1829711 Commit-Queue: Robbie Gibson <rkgibson@google.com> Reviewed-by: Robert Flack <flackr@chromium.org> Reviewed-by: Dan Beam <dbeam@chromium.org> Cr-Commit-Position: refs/heads/master@{#704220}

grit: Allow inlining srcset attributes without descriptors
According to the spec, the srcset attribute does not need each URL to have an associated descriptor. Just the URL is valid. srcset is also a valid attribute on a <source> tag, so the data should be inlined there as well. Bug: 1006216 Change-Id: I0f782035dfdbf514423cdd430536ed962340c9a9 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1829711 Commit-Queue: Robbie Gibson <rkgibson@google.com> Reviewed-by: Robert Flack <flackr@chromium.org> Reviewed-by: Dan Beam <dbeam@chromium.org> Cr-Commit-Position: refs/heads/master@{#704220}
48797b95 · Robbie Gibson · Commit Bot · db10c595 · 48797b95 · 48797b95
Commit 48797b95 authored Oct 09, 2019 by Robbie Gibson Committed by Commit Bot Oct 09, 2019
Showing with 94 additions and 27 deletions

tools/grit/grit/format/html_inline.py tools/grit/grit/format/html_inline.py +28 -26

tools/grit/grit/format/html_inline_unittest.py tools/grit/grit/format/html_inline_unittest.py +66 -1

No files found.
--- a/tools/grit/grit/format/html_inline.py
+++ b/tools/grit/grit/format/html_inline.py
@@ -55,13 +55,14 @@ _INCLUDE_RE = lazy_re.compile(
 _SRC_RE = lazy_re.compile(
    r'<(?!script)(?:[^>]+?\s)src="(?!\[\[|{{)(?P<filename>[^"\']*)"',
    re.MULTILINE)
-# This re matches '<img srcset="..."'
+# This re matches '<img srcset="..."' or '<source srcset="..."'
 _SRCSET_RE = lazy_re.compile(
-    r'<img\b(?:[^>]*?\s)srcset="(?!\[\[|{{|\$i18n{)(?P<srcset>[^"\']*)"',
+    r'<(img|source)\b(?:[^>]*?\s)srcset="(?!\[\[|{{|\$i18n{)'
+    r'(?P<srcset>[^"\']*)"',
    re.MULTILINE)
 # This re is for splitting srcset value string into "image candidate strings".
 # Notes:
-# - HTML 5.2 states that URL cannot start with comma.
+# - HTML 5.2 states that URL cannot start or end with comma.
 # - the "descriptor" is either "width descriptor" or "pixel density descriptor".
 #   The first one consists of "valid non-negative integer + letter 'x'",
 #   the second one is formed of "positive valid floating-point number +
@@ -69,7 +70,9 @@ _SRCSET_RE = lazy_re.compile(
 #   that form both of them.
 # Matches for example "img2.png 2x" or "img9.png 11E-2w".
 _SRCSET_ENTRY_RE = lazy_re.compile(
-    r'\s*(?P<url>[^,]\S+)\s+(?P<descriptor>[\deE.-]+[wx])\s*',
+    r'\s*(?P<url>[^,\s]\S+[^,\s])'
+    r'(?:\s+(?P<descriptor>[\deE.-]+[wx]))?\s*'
+    r'(?P<separator>,|$)',
    re.MULTILINE)
 _ICON_RE = lazy_re.compile(
    r'<link rel="icon"\s(?:[^>]+?\s)?'
@@ -194,6 +197,7 @@ def SrcsetInlineAsDataURL(
  # Each of them consists of URL and descriptor.
  # _SRCSET_ENTRY_RE splits srcset into a list of URLs, descriptors and
  # commas.
+  # The descriptor part will be None if that optional regex didn't match
  parts = _SRCSET_ENTRY_RE.split(srcset)
  if not parts:
@@ -206,37 +210,35 @@ def SrcsetInlineAsDataURL(
  # candidate string: [url, descriptor]
  candidate = [];
-  for part in parts:
+  # Each entry should consist of some text before the entry, the url,
-    if not part:
+  # the descriptor or None if the entry has no descriptor, a comma separator or
-      continue
+  # the end of the line, and finally some text after the entry (which is the
+  # same as the text before the next entry).
+  for i in range(0, len(parts) - 1, 4):
+    before, url, descriptor, separator, after = parts[i:i+5]
-    if part == ',':
+    # There must be a comma-separated next entry or this must be the last entry.
-      # There must be no URL without a descriptor.
+    assert separator == "," or (separator == "" and i == len(parts) - 5), (
-      assert not candidate, "Bad srcset format in '%s'" % srcset_match.group(0)
+           "Bad srcset format in {}".format(srcset_match.group(0)))
-      continue
+    # Both before and after the entry must be empty
+    assert before == after == "", (
-    if candidate:
+           "Bad srcset format in {}".format(srcset_match.group(0)))
-      # descriptor found
-      if candidate[0]:
-        # This is not "names_only" mode.
-        candidate.append(part)
-        new_candidates.append(" ".join(candidate))
-      candidate = []
-      continue
    if filename_expansion_function:
-      filename = filename_expansion_function(part)
+      filename = filename_expansion_function(url)
    else:
-      filename = part
+      filename = url
    data_url = ConvertFileToDataURL(filename, base_path, distribution,
                                    inlined_files, names_only)
-    candidate.append(data_url)
+    # This is not "names_only" mode
+    if data_url:
+      candidate = [data_url]
+      if descriptor:
+        candidate.append(descriptor)
-  # There must be no URL without a descriptor
+      new_candidates.append(" ".join(candidate))
-  assert not candidate, "Bad srcset ending in '%s' " % srcset_match.group(0)
  prefix = srcset_match.string[srcset_match.start():
      srcset_match.start('srcset')]

--- a/tools/grit/grit/format/html_inline_unittest.py
+++ b/tools/grit/grit/format/html_inline_unittest.py
@@ -597,7 +597,8 @@ class HtmlInlineUnittest(unittest.TestCase):
  def testImgSrcset(self):
    '''Tests that img srcset="" attributes are converted.'''
-    # Note that there is no space before "img10.png"
+    # Note that there is no space before "img10.png" and that
+    # "img11.png" has no descriptor.
    files = {
      'index.html': '''
      <html>
@@ -606,6 +607,9 @@ class HtmlInlineUnittest(unittest.TestCase):
      <img src="chrome://theme/img11.png" srcset="img7.png 1x, '''\
          '''chrome://theme/img13.png 2x">
      <img srcset="img8.png 300w, img9.png 11E-2w,img10.png -1e2w">
+      <img srcset="img11.png">
+      <img srcset="img11.png, img2.png 1x">
+      <img srcset="img2.png 1x, img11.png">
      </html>
      ''',
      'img1.png': '''a1''',
@@ -618,6 +622,7 @@ class HtmlInlineUnittest(unittest.TestCase):
      'img8.png': '''a8''',
      'img9.png': '''a9''',
      'img10.png': '''a10''',
+      'img11.png': '''a11''',
    }
    expected_inlined = '''
@@ -630,6 +635,9 @@ class HtmlInlineUnittest(unittest.TestCase):
          '''YTc= 1x,chrome://theme/img13.png 2x">
      <img srcset="data:image/png;base64,YTg= 300w,data:image/png;base64,'''\
          '''YTk= 11E-2w,data:image/png;base64,YTEw -1e2w">
+      <img srcset="data:image/png;base64,YTEx">
+      <img srcset="data:image/png;base64,YTEx,data:image/png;base64,YTI= 1x">
+      <img srcset="data:image/png;base64,YTI= 1x,data:image/png;base64,YTEx">
      </html>
      '''
@@ -681,6 +689,63 @@ class HtmlInlineUnittest(unittest.TestCase):
                         util.FixLineEnd(result.inlined_data, '\n'))
    tmp_dir.CleanUp()
+  def testSourceSrcset(self):
+    '''Tests that source srcset="" attributes are converted.'''
+    # Note that there is no space before "img10.png" and that
+    # "img11.png" has no descriptor.
+    files = {
+      'index.html': '''
+      <html>
+      <source src="img1.png" srcset="img2.png 1x, img3.png 2x">
+      <source src="img4.png" srcset=" img5.png   1x , img6.png 2x ">
+      <source src="chrome://theme/img11.png" srcset="img7.png 1x, '''\
+          '''chrome://theme/img13.png 2x">
+      <source srcset="img8.png 300w, img9.png 11E-2w,img10.png -1e2w">
+      <source srcset="img11.png">
+      </html>
+      ''',
+      'img1.png': '''a1''',
+      'img2.png': '''a2''',
+      'img3.png': '''a3''',
+      'img4.png': '''a4''',
+      'img5.png': '''a5''',
+      'img6.png': '''a6''',
+      'img7.png': '''a7''',
+      'img8.png': '''a8''',
+      'img9.png': '''a9''',
+      'img10.png': '''a10''',
+      'img11.png': '''a11''',
+    }
+    expected_inlined = '''
+      <html>
+      <source src="data:image/png;base64,YTE=" srcset="data:image/png;'''\
+          '''base64,YTI= 1x,data:image/png;base64,YTM= 2x">
+      <source src="data:image/png;base64,YTQ=" srcset="data:image/png;'''\
+          '''base64,YTU= 1x,data:image/png;base64,YTY= 2x">
+      <source src="chrome://theme/img11.png" srcset="data:image/png;'''\
+          '''base64,YTc= 1x,chrome://theme/img13.png 2x">
+      <source srcset="data:image/png;base64,YTg= 300w,data:image/png;'''\
+          '''base64,YTk= 11E-2w,data:image/png;base64,YTEw -1e2w">
+      <source srcset="data:image/png;base64,YTEx">
+      </html>
+      '''
+    source_resources = set()
+    tmp_dir = util.TempDir(files)
+    for filename in files:
+      source_resources.add(tmp_dir.GetPath(filename))
+    # Test normal inlining.
+    result = html_inline.DoInline(tmp_dir.GetPath('index.html'), None)
+    resources = result.inlined_files
+    resources.add(tmp_dir.GetPath('index.html'))
+    self.failUnlessEqual(resources, source_resources)
+    self.failUnlessEqual(expected_inlined,
+                         util.FixLineEnd(result.inlined_data, '\n'))
+    tmp_dir.CleanUp()
  def testConditionalInclude(self):
    '''Tests that output and dependency generation includes only files not'''\
        ''' blocked by  <if> macros.'''