Make XSSAuditor more suspicious about start of path

BUG=613123 Review-Url: https://codereview.chromium.org/2554403004 Cr-Commit-Position: refs/heads/master@{#437574}

Make XSSAuditor more suspicious about start of path
BUG=613123 Review-Url: https://codereview.chromium.org/2554403004 Cr-Commit-Position: refs/heads/master@{#437574}
fe1346fd · tsepez · Commit bot · 2600e8e0 · fe1346fd · fe1346fd
Commit fe1346fd authored Dec 09, 2016 by tsepez Committed by Commit bot Dec 09, 2016
3 changed files
--- a/third_party/WebKit/LayoutTests/http/tests/security/xssAuditor/script-tag-with-source-implied-host-expected.txt
+++ b/third_party/WebKit/LayoutTests/http/tests/security/xssAuditor/script-tag-with-source-implied-host-expected.txt
+CONSOLE ERROR: line 4: The XSS Auditor refused to execute a script in 'http://localhost:8000/security/xssAuditor/resources/echo-intertag.pl?clutter=/xssAuditor/resources/xss.js%3e%3c/script%3e&q=%3cscript%20src=https:127.0.0.1:8443/security' because its source code was found within the request. The server sent an 'X-XSS-Protection' header requesting this behavior.
--- a/third_party/WebKit/LayoutTests/http/tests/security/xssAuditor/script-tag-with-source-implied-host.html
+++ b/third_party/WebKit/LayoutTests/http/tests/security/xssAuditor/script-tag-with-source-implied-host.html
+<!DOCTYPE html>
+<html>
+<head>
+<script>
+if (window.testRunner) {
+  testRunner.dumpAsText();
+  testRunner.setXSSAuditorEnabled(true);
+}
+</script>
+</head>
+<body>
+<iframe src="http://localhost:8000/security/xssAuditor/resources/echo-intertag.pl?clutter=/xssAuditor/resources/xss.js%3e%3c/script%3e&q=%3cscript%20src=https:127.0.0.1:8443/security">
+</iframe>
+</body>
+</html>
--- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
+++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
@@ -96,6 +96,10 @@ static bool isTerminatingCharacter(UChar c) {
          c == '>' || c == ',');
 }
+static bool isSlash(UChar c) {
+  return (c == '/' || c == '\\');
+}
 static bool isHTMLQuote(UChar c) {
  return (c == '"' || c == '\'');
 }
@@ -203,30 +207,54 @@ static String fullyDecodeString(const String& string,
  return workingString;
 }
+// XSSAuditor's task is to determine how much of any given content came
+// from a reflection vs. what occurs normally on the page. It must do
+// this in face of an attacker avoiding detection by splicing on page
+// content in such a way as to remain syntactically valid. The next two
+// functions apply heurisitcs to get the longest possible fragment in
+// face of such trickery.
 static void truncateForSrcLikeAttribute(String& decodedSnippet) {
-  // In HTTP URLs, characters following the first ?, #, or third slash may come
+  // In HTTP URLs, characters in the query string (following the first ?),
-  // from the page itself and can be merely ignored by an attacker's server when
+  // in the fragment (following the first #), or even in the path (typically
-  // a remote script or script-like resource is requested. In DATA URLS, the
+  // following the third slash but subject to generous interpretation of a
-  // payload starts at the first comma, and the the first /*, //, or <!-- may
+  // lack of leading slashes) may be merely ignored by an attacker's server
-  // introduce a comment.
+  // when a remote script or script-like resource is requested. Hence these
+  // are places where organic page content may be spliced.
+  //
+  // In DATA URLS, the payload starts at the first comma, and the the first
+  //  "/*", "//", or "<!--" may introduce a comment, which can then be used
+  // to splice page data harmlessly onto the end of the payload.
  //
  // Also, DATA URLs may use the same string literal tricks as with script
  // content itself. In either case, content following this may come from the
  // page and may be ignored when the script is executed. Also, any of these
  // characters may now be represented by the (enlarged) set of html5 entities.
  //
-  // For simplicity, we don't differentiate based on URL scheme, and stop at the
+  // For simplicity, we don't differentiate based on URL scheme, and stop at
-  // first & (since it might be part of an entity for any of the subsequent
+  // any of the following:
-  // punctuation), the first # or ?, the third slash, or the first slash, <, ',
+  //   - the first &, since it might be part of an entity for any of the
-  // or " once a comma is seen.
+  //     subsequent punctuation.
+  //   - the first # or ?, since the query and fragment can be ignored.
+  //   - the third slash, since this typically starts the path, but account
+  //     for a possible lack of leading slashes following the scheme).
+  //   - the first slash, <, ', or " once a comma is seen, since we
+  //     may now be in a data URL payload.
  int slashCount = 0;
  bool commaSeen = false;
-  for (size_t currentLength = 0; currentLength < decodedSnippet.length();
+  bool colonSeen = false;
-       ++currentLength) {
+  for (size_t currentLength = 0, remainingLength = decodedSnippet.length();
+       remainingLength; ++currentLength, --remainingLength) {
    UChar currentChar = decodedSnippet[currentLength];
+    if (currentChar == ':' && !colonSeen) {
+      if (remainingLength > 1 && !isSlash(decodedSnippet[currentLength + 1]))
+        ++slashCount;
+      if (remainingLength > 2 && !isSlash(decodedSnippet[currentLength + 2]))
+        ++slashCount;
+      colonSeen = true;
+    }
    if (currentChar == '&' || currentChar == '?' || currentChar == '#' ||
-        ((currentChar == '/' || currentChar == '\\') &&
+        (isSlash(currentChar) && (commaSeen || ++slashCount > 2)) ||
-         (commaSeen || ++slashCount > 2)) ||
        (currentChar == '<' && commaSeen) ||
        (currentChar == '\'' && commaSeen) ||
        (currentChar == '"' && commaSeen)) {