Commit c73ae935 authored by Lukasz Anforowicz's avatar Lukasz Anforowicz Committed by Commit Bot

Handle some html/js polyglots in CORB confirmation sniffing.

Cross-Origin Read Blocking (CORB) tries to protect certain resource
types (e.g. text/html).  To be resilient against HTTP responses
mislabeled with an incorrect Content-Type, CORB sniffs the response body
to confirm if it truly is the protected type.

Before this CL the confirmation sniffing logic decided to block
resources that are both a valid html and a valid javascript.
Blocking of such resources is undesirable, because it is disruptive to
existing websites that use such polyglot responses in <script> tags.

After this CL, a HTML comment that contains a Javascript comment will
cause the confirmation sniffing to decide that the response is not
really a HTML document (this will prevent CORB blocking).

Bug: 839425
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_mojo
Change-Id: Ie790a81c2742513aed9fda45edd0bb2976bd0fc6
Reviewed-on: https://chromium-review.googlesource.com/1042820
Commit-Queue: Łukasz Anforowicz <lukasza@chromium.org>
Reviewed-by: default avatarCharlie Reis <creis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#555951}
parent e701f5a0
...@@ -417,11 +417,23 @@ IN_PROC_BROWSER_TEST_F(CrossSiteDocumentBlockingTest, BlockDocuments) { ...@@ -417,11 +417,23 @@ IN_PROC_BROWSER_TEST_F(CrossSiteDocumentBlockingTest, BlockDocuments) {
// jsonp.* - JSONP (i.e., script) mislabeled as a document. // jsonp.* - JSONP (i.e., script) mislabeled as a document.
// img.* - Contents that won't match the document label. // img.* - Contents that won't match the document label.
// valid.* - Correctly labeled responses of non-document types. // valid.* - Correctly labeled responses of non-document types.
const char* sniff_allowed_resources[] = { const char* sniff_allowed_resources[] = {"js.html",
"js.html", "comment_js.html", "js.xml", "js.json", "comment_js.html",
"js.txt", "jsonp.html", "jsonp.xml", "jsonp.json", "js.xml",
"jsonp.txt", "img.html", "img.xml", "img.json", "js.json",
"img.txt", "valid.js", "json-list.js", "nosniff.json-list.js"}; "js.txt",
"jsonp.html",
"jsonp.xml",
"jsonp.json",
"jsonp.txt",
"img.html",
"img.xml",
"img.json",
"img.txt",
"valid.js",
"json-list.js",
"nosniff.json-list.js",
"js-html-polyglot.html"};
for (const char* resource : sniff_allowed_resources) { for (const char* resource : sniff_allowed_resources) {
SCOPED_TRACE(base::StringPrintf("... while testing page: %s", resource)); SCOPED_TRACE(base::StringPrintf("... while testing page: %s", resource));
base::HistogramTester histograms; base::HistogramTester histograms;
......
<!--/*--><html><body><script type="text/javascript"><!--//*/
// This is a regression test for https://crbug.com/839425
// which found out that some script resources are served
// with text/html content-type and with a body that is
// both a valid html and a valid javascript.
var blah = 123;
//--></script></body></html>
...@@ -85,6 +85,88 @@ SniffingResult MatchesSignature(StringPiece* data, ...@@ -85,6 +85,88 @@ SniffingResult MatchesSignature(StringPiece* data,
return CrossOriginReadBlocking::kNo; return CrossOriginReadBlocking::kNo;
} }
// Checks if |data| starts with an HTML comment (i.e. with "<!-- ... -->").
// - If there is a valid, terminated comment then returns kYes.
// - If there is a start of a comment, but the comment is not completed (e.g.
// |data| == "<!-" or |data| == "<!-- not terminated yet") then returns
// kMaybe.
// - Returns kNo otherwise.
//
// Mutates |data| to advance past the comment when returning kYes.
//
// Additionally, if the body of the HTML comment (e.g. normally the kYes and
// kMaybe cases) contains javascript-like comments (e.g. |data| == "<!--/*-->"),
// then kNo will be returned (and |data| won't be mutated). This helps avoid
// CORB blocking of the html/js polyglots identified in
// https://crbug.com/839425.
SniffingResult MaybeSkipHtmlComment(StringPiece* data) {
static const StringPiece kBeginCommentSignature = "<!--";
if (!data->starts_with(kBeginCommentSignature)) {
if (kBeginCommentSignature.starts_with(*data))
return CrossOriginReadBlocking::kMaybe;
return CrossOriginReadBlocking::kNo;
}
enum State {
kNothingSpecial,
kAfterDash,
kAfterDashDash,
kAfterSlash,
} state = kNothingSpecial;
for (size_t i = kBeginCommentSignature.length(); i < data->length(); i++) {
char c = (*data)[i];
switch (state) {
case kNothingSpecial:
if (c == '-')
state = kAfterDash;
else if (c == '/')
state = kAfterSlash;
else
DCHECK_EQ(kNothingSpecial, state);
break;
case kAfterDash:
if (c == '-')
state = kAfterDashDash;
else if (c == '/')
state = kAfterSlash;
else
state = kNothingSpecial;
break;
case kAfterDashDash:
if (c == '>') {
// Found the end of the HTML comment.
data->remove_prefix(i + 1); // Advance past the comment.
return CrossOriginReadBlocking::kYes;
} else if (c == '-') {
state = kAfterDashDash;
} else if (c == '/') {
state = kAfterSlash;
} else {
state = kNothingSpecial;
}
break;
case kAfterSlash:
if (c == '/' || c == '*') {
// html/js polyglot - let's report that this is not HTML, to avoid
// blocking what may be a valid Javascript.
return CrossOriginReadBlocking::kNo;
}
if (c == '/')
state = kAfterSlash;
else if (c == '-')
state = kAfterDash;
else
state = kNothingSpecial;
break;
}
}
return CrossOriginReadBlocking::kMaybe;
}
// Headers from // Headers from
// https://fetch.spec.whatwg.org/#cors-safelisted-response-header-name. // https://fetch.spec.whatwg.org/#cors-safelisted-response-header-name.
// //
...@@ -238,21 +320,9 @@ SniffingResult CrossOriginReadBlocking::SniffForHTML(StringPiece data) { ...@@ -238,21 +320,9 @@ SniffingResult CrossOriginReadBlocking::SniffForHTML(StringPiece data) {
if (signature_match != kNo) if (signature_match != kNo)
return signature_match; return signature_match;
// "<!--" (the HTML comment syntax) is a special case, since it's valid JS SniffingResult comment_match = MaybeSkipHtmlComment(&data);
// as well. Skip over them.
static const StringPiece kBeginCommentSignature[] = {"<!--"};
SniffingResult comment_match = MatchesSignature(
&data, kBeginCommentSignature, arraysize(kBeginCommentSignature),
base::CompareCase::SENSITIVE);
if (comment_match != kYes) if (comment_match != kYes)
return comment_match; return comment_match;
// Look for an end comment.
static const StringPiece kEndComment = "-->";
size_t comment_end = data.find(kEndComment);
if (comment_end == base::StringPiece::npos)
return kMaybe; // Hit end of data with open comment.
data.remove_prefix(comment_end + kEndComment.length());
} }
// All of |data| was consumed, without a clear determination. // All of |data| was consumed, without a clear determination.
......
...@@ -252,10 +252,26 @@ HTML can be embedded cross-origin via `<iframe>` (as noted above), ...@@ -252,10 +252,26 @@ HTML can be embedded cross-origin via `<iframe>` (as noted above),
but otherwise HTML documents can but otherwise HTML documents can
only be loaded by fetch() and XHR, both of which require CORS. HTML sniffing is only be loaded by fetch() and XHR, both of which require CORS. HTML sniffing is
already well-understood, so (unlike JSON) it is relatively easy to identify HTML already well-understood, so (unlike JSON) it is relatively easy to identify HTML
resources with high confidence. Only one ambiguous polyglot case has been resources with high confidence.
One ambiguous polyglot case has been
identified that CORB needs to handle conservatively: HTML-style comments, which identified that CORB needs to handle conservatively: HTML-style comments, which
are part of the JavaScript syntax. CORB handles these by skipping over HTML are [part of the JavaScript syntax](https://www.ecma-international.org/ecma-262/8.0/index.html#sec-html-like-comments).
comment blocks when sniffing to confirm a HTML content type. * CORB skips over HTML comment blocks when sniffing to
confirm a HTML content type. This means that (unlike in
[normal HTML sniffing](https://mimesniff.spec.whatwg.org/#identifying-a-resource-with-an-unknown-mime-type))
presence of "`<!--`" string doesn't immediately confirm that the sniffed resource is a
HTML document - the HTML comment still has to be followed by a valid HTML tag.
* Additionally if the HTML-style comment contains Javascript-style comments
(i.e. either "`/*`" or "`//`" substrings),
then CORB conservatively assumes that the resource is not a HTML document.
This helps avoid blocking html/javascript polyglots which have been observed
in use on real websites - see the example below:
```js
<!--/*--><html><body><script type="text/javascript"><!--//*/
var x = "This is both valid html and valid javascript";
//--></script></body></html>
```
### Protecting XML ### Protecting XML
......
...@@ -50,34 +50,58 @@ TEST(CrossOriginReadBlockingTest, IsValidCorsHeaderSet) { ...@@ -50,34 +50,58 @@ TEST(CrossOriginReadBlockingTest, IsValidCorsHeaderSet) {
} }
TEST(CrossOriginReadBlockingTest, SniffForHTML) { TEST(CrossOriginReadBlockingTest, SniffForHTML) {
StringPiece html_data(" \t\r\n <HtMladfokadfkado"); using CORB = CrossOriginReadBlocking;
StringPiece comment_html_data(" <!-- this is comment --> <html><body>");
StringPiece two_comments_html_data(
"<!-- this is comment -->\n<!-- this is comment --><html><body>");
StringPiece commented_out_html_tag_data("<!-- <html> <?xml> \n<html>--><b");
StringPiece mixed_comments_html_data(
"<!-- this is comment <!-- --> <script></script>");
StringPiece non_html_data(" var name=window.location;\nadfadf");
StringPiece comment_js_data(
" <!-- this is comment\n document.write(1);\n// -->window.open()");
StringPiece empty_data("");
// Something that technically matches the start of a valid HTML tag.
EXPECT_EQ(SniffingResult::kYes, EXPECT_EQ(SniffingResult::kYes,
CrossOriginReadBlocking::SniffForHTML(html_data)); CORB::SniffForHTML(" \t\r\n <HtMladfokadfkado"));
EXPECT_EQ(SniffingResult::kYes,
CrossOriginReadBlocking::SniffForHTML(comment_html_data)); // HTML comment followed by whitespace and valid HTML tags.
EXPECT_EQ(SniffingResult::kYes,
CrossOriginReadBlocking::SniffForHTML(two_comments_html_data));
EXPECT_EQ(SniffingResult::kYes,
CrossOriginReadBlocking::SniffForHTML(commented_out_html_tag_data));
EXPECT_EQ(SniffingResult::kYes, EXPECT_EQ(SniffingResult::kYes,
CrossOriginReadBlocking::SniffForHTML(mixed_comments_html_data)); CORB::SniffForHTML(" <!-- this is comment --> <html><body>"));
// HTML comment, whitespace, more HTML comments, HTML tags.
EXPECT_EQ(
SniffingResult::kYes,
CORB::SniffForHTML(
"<!-- this is comment -->\n<!-- this is comment --><html><body>"));
// HTML comment followed by valid HTML tag.
EXPECT_EQ(
SniffingResult::kYes,
CORB::SniffForHTML("<!-- this is comment <!-- --><script></script>"));
// Whitespace followed by valid Javascript.
EXPECT_EQ(SniffingResult::kNo, EXPECT_EQ(SniffingResult::kNo,
CrossOriginReadBlocking::SniffForHTML(non_html_data)); CORB::SniffForHTML(" var name=window.location;\nadfadf"));
// HTML comment followed by valid Javascript.
EXPECT_EQ(
SniffingResult::kNo,
CORB::SniffForHTML(
" <!-- this is comment\n document.write(1);\n// -->window.open()"));
// HTML/Javascript polyglot should return kNo.
EXPECT_EQ(SniffingResult::kNo,
CORB::SniffForHTML(
"<!--/*--><html><body><script type='text/javascript'><!--//*/\n"
"var blah = 123;\n"
"//--></script></body></html>"));
// Tests to cover more of the state machine inside MaybeSkipHtmlComment.
EXPECT_EQ(SniffingResult::kNo, CORB::SniffForHTML("<!-- -/* --><html>"));
EXPECT_EQ(SniffingResult::kNo, CORB::SniffForHTML("<!-- --/* --><html>"));
EXPECT_EQ(SniffingResult::kYes, CORB::SniffForHTML("<!----><html>"));
EXPECT_EQ(SniffingResult::kYes,
CORB::SniffForHTML("<!-- ---/--> <html><body>"));
// Commented out html tag followed by non-html (" x").
StringPiece commented_out_html_tag_data("<!-- <html> <?xml> \n<html>--> x");
EXPECT_EQ(SniffingResult::kNo, EXPECT_EQ(SniffingResult::kNo,
CrossOriginReadBlocking::SniffForHTML(comment_js_data)); CrossOriginReadBlocking::SniffForHTML(commented_out_html_tag_data));
// Prefixes of |commented_out_html_tag_data| should be indeterminate. // Prefixes of |commented_out_html_tag_data| should be indeterminate.
// This covers testing "<!-" as well as "<!-- not terminated yet...".
StringPiece almost_html = commented_out_html_tag_data; StringPiece almost_html = commented_out_html_tag_data;
while (!almost_html.empty()) { while (!almost_html.empty()) {
almost_html.remove_suffix(1); almost_html.remove_suffix(1);
...@@ -85,6 +109,13 @@ TEST(CrossOriginReadBlockingTest, SniffForHTML) { ...@@ -85,6 +109,13 @@ TEST(CrossOriginReadBlockingTest, SniffForHTML) {
CrossOriginReadBlocking::SniffForHTML(almost_html)) CrossOriginReadBlocking::SniffForHTML(almost_html))
<< almost_html; << almost_html;
} }
// Explicit tests for an unfinished comment (some also covered by the prefix
// tests above).
EXPECT_EQ(SniffingResult::kMaybe, CORB::SniffForHTML(""));
EXPECT_EQ(SniffingResult::kMaybe, CORB::SniffForHTML("<!"));
EXPECT_EQ(SniffingResult::kMaybe, CORB::SniffForHTML("<!-- unterminated..."));
EXPECT_EQ(SniffingResult::kNo, CORB::SniffForHTML("<!-- /* js "));
} }
TEST(CrossOriginReadBlockingTest, SniffForXML) { TEST(CrossOriginReadBlockingTest, SniffForXML) {
......
<!--/*--><html><body><script type="text/javascript"><!--//*/
// This is a regression test for https://crbug.com/839425
// which found out that some script resources are served
// with text/html content-type and with a body that is
// both a valid html and a valid javascript.
window.polyglot = 123;
//--></script></body></html>
<!DOCTYPE html>
<!-- Test verifies that CORB won't block a polyglot script that is
both a valid HTML document and also valid Javascript.
-->
<meta charset="utf-8">
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
async_test(function(t) {
var script = document.createElement("script")
script.onload = t.step_func_done(function(){
// Verify that html-js-polyglot.js wasn't blocked - that script
// should have set window.polyglot to 123.
assert_equals(window.polyglot, 123);
})
addEventListener("error",function(e) {
t.step(function() {
assert_unreached("No errors are expected with or without CORB.");
t.done();
})
});
// www1 is cross-origin, so the HTTP response is CORB-eligible.
script.src = "http://{{domains[www1]}}:{{ports[http][0]}}/fetch/corb/resources/html-js-polyglot.js"
document.body.appendChild(script)
});
</script>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment