Commit 3d1e2e08 authored by Joshua Bell's avatar Joshua Bell Committed by Commit Bot

Loader: use default encoding not parent encoding for XML/JSON

If unspecified (in headers or content), resources like HTML in iframes
fall back to the parent's encoding. This should not be the case for
XML/JSON which have well-defined defaults.

Bug: 904017
Change-Id: I42f8950c5c2ef63c98bcec58a1f4b0bd0b190c33
Reviewed-on: https://chromium-review.googlesource.com/c/1331953Reviewed-by: default avatarNate Chapin <japhet@chromium.org>
Reviewed-by: default avatarJinsuk Kim <jinsukkim@chromium.org>
Commit-Queue: Joshua Bell <jsbell@chromium.org>
Cr-Commit-Position: refs/heads/master@{#608483}
parent d503ca09
This is a testharness.js-based test.
PASS Expect resources/utf-32-big-endian-bom.html to parse as windows-1252
FAIL Expect resources/utf-32-big-endian-bom.xml to parse as UTF-8 assert_equals: expected "UTF-8" but got "windows-1252"
PASS Expect resources/utf-32-big-endian-nobom.html to parse as windows-1252
FAIL Expect resources/utf-32-big-endian-nobom.xml to parse as UTF-8 assert_equals: expected "UTF-8" but got "windows-1252"
PASS Expect resources/utf-32-little-endian-bom.html to parse as UTF-16LE
PASS Expect resources/utf-32-little-endian-bom.xml to parse as UTF-16LE
PASS Expect resources/utf-32-little-endian-nobom.html to parse as windows-1252
FAIL Expect resources/utf-32-little-endian-nobom.xml to parse as UTF-8 assert_equals: expected "UTF-8" but got "windows-1252"
Harness: the test ran to completion.
<!DOCTYPE html>
<meta charset="windows-1252">
<title>Default encodings of XML/JSON resources with Windows-1252 page</title>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<body>
<script>
async_test(t => {
const iframe = document.createElement('iframe');
iframe.src = 'resources/undeclared-encoding.xhtml'
iframe.onload = t.step_func_done(() => {
const doc = iframe.contentDocument;
assert_equals(doc.contentType, 'application/xhtml+xml',
'Resource should be parsed as XHTML');
assert_equals(doc.characterSet, 'UTF-8',
'Resource should use default encoding for XML');
});
document.body.appendChild(iframe);
t.add_cleanup(() => iframe.remove());
}, 'Default encoding of XML resource');
async_test(t => {
const iframe = document.createElement('iframe');
iframe.src = 'resources/undeclared-encoding.json'
iframe.onload = t.step_func_done(() => {
const doc = iframe.contentDocument;
assert_equals(doc.contentType, 'application/json',
'Resource should be parsed as JSON');
assert_equals(doc.characterSet, 'UTF-8',
'Resource should use default encoding for JSON');
// For good measure, parse as JSON and verify the content.
const data = JSON.parse(doc.body.innerText);
assert_true(data.parsed);
// File contains "" (U+00C0), encoded in UTF-8 (0xC3 0x80). If
// misparsed as windows-1252, this would be "À".
assert_equals(data.text, '\u00C0');
});
document.body.appendChild(iframe);
t.add_cleanup(() => iframe.remove());
}, 'Default encoding of JSON resource');
</script>
</body>
<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>XHTML Document with no encoding declared</title>
</head>
<body>
<p>Test default XHTML encoding (in the absence of an XML declaration).</p>
</body>
</html>
...@@ -119,7 +119,7 @@ std::unique_ptr<TextResourceDecoder> BuildTextResourceDecoderFor( ...@@ -119,7 +119,7 @@ std::unique_ptr<TextResourceDecoder> BuildTextResourceDecoderFor(
// could be an attack vector. // could be an attack vector.
// FIXME: This might be too cautious for non-7bit-encodings and we may // FIXME: This might be too cautious for non-7bit-encodings and we may
// consider relaxing this later after testing. // consider relaxing this later after testing.
const bool use_hint_encoding = bool use_hint_encoding =
frame && CanReferToParentFrameEncoding(frame, parent_frame); frame && CanReferToParentFrameEncoding(frame, parent_frame);
std::unique_ptr<TextResourceDecoder> decoder; std::unique_ptr<TextResourceDecoder> decoder;
...@@ -134,9 +134,11 @@ std::unique_ptr<TextResourceDecoder> BuildTextResourceDecoderFor( ...@@ -134,9 +134,11 @@ std::unique_ptr<TextResourceDecoder> BuildTextResourceDecoderFor(
if (DOMImplementation::IsXMLMIMEType(mime_type)) { if (DOMImplementation::IsXMLMIMEType(mime_type)) {
decoder = TextResourceDecoder::Create(TextResourceDecoderOptions( decoder = TextResourceDecoder::Create(TextResourceDecoderOptions(
TextResourceDecoderOptions::kXMLContent, default_encoding)); TextResourceDecoderOptions::kXMLContent, default_encoding));
use_hint_encoding = false;
} else if (DOMImplementation::IsJSONMIMEType(mime_type)) { } else if (DOMImplementation::IsJSONMIMEType(mime_type)) {
decoder = TextResourceDecoder::Create(TextResourceDecoderOptions( decoder = TextResourceDecoder::Create(TextResourceDecoderOptions(
TextResourceDecoderOptions::kJSONContent, default_encoding)); TextResourceDecoderOptions::kJSONContent, default_encoding));
use_hint_encoding = false;
} else { } else {
WTF::TextEncoding hint_encoding; WTF::TextEncoding hint_encoding;
if (use_hint_encoding && if (use_hint_encoding &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment