Commit c3abb8a9 authored by Hiroshige Hayashizaki's avatar Hiroshige Hayashizaki Committed by Commit Bot

XHR: Fallback to UTF-8 encoding if specified encoding is not valid

Previously, if the encoding returned by
XMLHttpRequest::FinalResponseCharset() is invalid, Latin-1 encoding
was used due to the logic in TextResourceDecoder::DefaultEncoding().

After this CL, the encoding determined by the latter half of
XMLHttpRequest::CreateDecoder() is used in such cases,
which is UTF-8 for text.

Bug: 905968
Change-Id: I04e6ae524b8e1d3abd21e4a7a10279eb5638a58c
Reviewed-on: https://chromium-review.googlesource.com/c/1342813
Commit-Queue: Hiroshige Hayashizaki <hiroshige@chromium.org>
Reviewed-by: default avatarHiroshige Hayashizaki <hiroshige@chromium.org>
Reviewed-by: default avatarKouhei Ueno <kouhei@chromium.org>
Reviewed-by: default avatarAdam Rice <ricea@chromium.org>
Reviewed-by: default avatarYutaka Hirano <yhirano@chromium.org>
Cr-Commit-Position: refs/heads/master@{#632816}
parent 01d0468d
......@@ -1557,12 +1557,33 @@ AtomicString XMLHttpRequest::FinalResponseMIMETypeWithFallback() const {
return AtomicString("text/xml");
}
String XMLHttpRequest::FinalResponseCharset() const {
// https://xhr.spec.whatwg.org/#final-charset
WTF::TextEncoding XMLHttpRequest::FinalResponseCharset() const {
// 1. Let label be null. [spec text]
//
// 2. If response MIME type's parameters["charset"] exists, then set label to
// it. [spec text]
String label = response_.TextEncodingName();
// 3. If override MIME type's parameters["charset"] exists, then set label to
// it. [spec text]
String override_response_charset =
ExtractCharsetFromMediaType(mime_type_override_);
if (!override_response_charset.IsEmpty())
return override_response_charset;
return response_.TextEncodingName();
label = override_response_charset;
// 4. If label is null, then return null. [spec text]
//
// 5. Let encoding be the result of getting an encoding from label. [spec
// text]
//
// 6. If encoding is failure, then return null. [spec text]
//
// 7. Return encoding. [spec text]
//
// We rely on WTF::TextEncoding() to return invalid TextEncoding for
// null, empty, or invalid/unsupported |label|.
return WTF::TextEncoding(label);
}
void XMLHttpRequest::UpdateContentTypeAndCharset(
......@@ -1812,15 +1833,12 @@ std::unique_ptr<TextResourceDecoder> XMLHttpRequest::CreateDecoder() const {
if (response_type_code_ == kResponseTypeJSON)
return TextResourceDecoder::Create(decoder_options_for_utf8_plain_text);
String final_response_charset = FinalResponseCharset();
if (!final_response_charset.IsEmpty()) {
// If the final charset is given, use the charset without sniffing the
// content.
// TODO(crbug/905968): If WTF::TextEncoding::IsValid() is false, this
// currently falls back to Latin1Encoding(). Fallback to UTF-8 instead.
WTF::TextEncoding final_response_charset = FinalResponseCharset();
if (final_response_charset.IsValid()) {
// If the final charset is given and valid, use the charset without
// sniffing the content.
return TextResourceDecoder::Create(TextResourceDecoderOptions(
TextResourceDecoderOptions::kPlainTextContent,
WTF::TextEncoding(final_response_charset)));
TextResourceDecoderOptions::kPlainTextContent, final_response_charset));
}
TextResourceDecoderOptions decoder_options_for_xml(
......
......@@ -217,7 +217,7 @@ class XMLHttpRequest final : public XMLHttpRequestEventTarget,
AtomicString FinalResponseMIMETypeWithFallback() const;
// Returns the "final charset" defined in
// https://xhr.spec.whatwg.org/#final-charset.
String FinalResponseCharset() const;
WTF::TextEncoding FinalResponseCharset() const;
bool ResponseIsXML() const;
bool ResponseIsHTML() const;
......
This is a testharness.js-based test.
PASS UTF-7 should not be supported
PASS utf-7 should not be supported
PASS UTF-32 with BOM should decode as UTF-16LE
FAIL UTF-32 with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32 expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS utf-32 with BOM should decode as UTF-16LE
FAIL utf-32 with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32 expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS UTF-32LE with BOM should decode as UTF-16LE
FAIL UTF-32LE with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32LE expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS utf-32le with BOM should decode as UTF-16LE
FAIL utf-32le with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32le expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
FAIL UTF-32be with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32be expected "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+0080" but got "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+00C2/U+20AC"
FAIL UTF-32be with BOM should decode as UTF-8 assert_equals: Decoding with UTF-32be expected "U+0000/U+0000/U+FFFD/U+FFFD/U+0000/U+0000/U+0000/U+0041/U+0000/U+0080/U+0042" but got "U+0000/U+0000/U+00FE/U+00FF/U+0000/U+0000/U+0000/U+0041/U+0000/U+00C2/U+20AC/U+0042"
FAIL utf-32be with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32be expected "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+0080" but got "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+00C2/U+20AC"
FAIL utf-32be with BOM should decode as UTF-8 assert_equals: Decoding with utf-32be expected "U+0000/U+0000/U+FFFD/U+FFFD/U+0000/U+0000/U+0000/U+0041/U+0000/U+0080/U+0042" but got "U+0000/U+0000/U+00FE/U+00FF/U+0000/U+0000/U+0000/U+0041/U+0000/U+00C2/U+20AC/U+0042"
Harness: the test ran to completion.
This is a testharness.js-based test.
PASS UTF-7 should not be supported
PASS utf-7 should not be supported
PASS UTF-32 with BOM should decode as UTF-16LE
FAIL UTF-32 with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32 expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS utf-32 with BOM should decode as UTF-16LE
FAIL utf-32 with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32 expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS UTF-32LE with BOM should decode as UTF-16LE
FAIL UTF-32LE with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32LE expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
PASS utf-32le with BOM should decode as UTF-16LE
FAIL utf-32le with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32le expected "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+0080" but got "U+0041/U+0000/U+0000/U+0000/U+0042/U+0000/U+0000/U+00C2/U+20AC"
FAIL UTF-32be with no BOM should decode as UTF-8 assert_equals: Decoding with UTF-32be expected "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+0080" but got "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+00C2/U+20AC"
FAIL UTF-32be with BOM should decode as UTF-8 assert_equals: Decoding with UTF-32be expected "U+0000/U+0000/U+FFFD/U+FFFD/U+0000/U+0000/U+0000/U+0041/U+0000/U+0080/U+0042" but got "U+0000/U+0000/U+00FE/U+00FF/U+0000/U+0000/U+0000/U+0041/U+0000/U+00C2/U+20AC/U+0042"
FAIL utf-32be with no BOM should decode as UTF-8 assert_equals: Decoding with utf-32be expected "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+0080" but got "U+0000/U+0000/U+0000/U+0041/U+0000/U+0000/U+0000/U+0042/U+00C2/U+20AC"
FAIL utf-32be with BOM should decode as UTF-8 assert_equals: Decoding with utf-32be expected "U+0000/U+0000/U+FFFD/U+FFFD/U+0000/U+0000/U+0000/U+0041/U+0000/U+0080/U+0042" but got "U+0000/U+0000/U+00FE/U+00FF/U+0000/U+0000/U+0000/U+0041/U+0000/U+00C2/U+20AC/U+0042"
Harness: the test ran to completion.
This is a testharness.js-based test.
PASS overrideMimeType() is not reset by open(), basic
PASS overrideMimeType() is not reset by open()
PASS If charset is not overridden by overrideMimeType() the original continues to be used
FAIL Charset can be overridden by overrideMimeType() with a bogus charset assert_equals: expected "\ufffd\ufffd" but got "Âð"
Harness: the test ran to completion.
This is a testharness.js-based test.
PASS XMLHttpRequest: responseText decoding (application/xml %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (text/html %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (application/xml;charset=utf-8 %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (application/xml;charset=windows-1252 %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (text/html;charset=utf-8 %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (text/html;charset=windows-1252 %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E empty)
PASS XMLHttpRequest: responseText decoding (text/plain;charset=windows-1252 %FF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %FF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %FE%FF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %FE%FF%FE%FF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %EF%BB%BF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %EF%BB%BF%EF%BB%BF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %C2 empty)
PASS XMLHttpRequest: responseText decoding (text/xml %FE%FF empty)
PASS XMLHttpRequest: responseText decoding (text/xml %FE%FF%FE%FF empty)
PASS XMLHttpRequest: responseText decoding (text/xml %EF%BB%BF empty)
PASS XMLHttpRequest: responseText decoding (text/xml %EF%BB%BF%EF%BB%BF empty)
PASS XMLHttpRequest: responseText decoding (text/plain %E3%81%B2 empty)
PASS XMLHttpRequest: responseText decoding (application/xml %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (text/html %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (application/xml;charset=utf-8 %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (application/xml;charset=windows-1252 %3C%3Fxml%20version%3D'1.0'%20encoding%3D'windows-1252'%3F%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (text/html;charset=utf-8 %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (text/html;charset=windows-1252 %3C!doctype%20html%3E%3Cmeta%20charset%3Dwindows-1252%3E%3Cx%3E%e6%a9%9f%3C%2Fx%3E text)
PASS XMLHttpRequest: responseText decoding (text/plain;charset=windows-1252 %FF text)
PASS XMLHttpRequest: responseText decoding (text/plain %FF text)
PASS XMLHttpRequest: responseText decoding (text/plain %FE%FF text)
PASS XMLHttpRequest: responseText decoding (text/plain %FE%FF%FE%FF text)
PASS XMLHttpRequest: responseText decoding (text/plain %EF%BB%BF text)
PASS XMLHttpRequest: responseText decoding (text/plain %EF%BB%BF%EF%BB%BF text)
PASS XMLHttpRequest: responseText decoding (text/plain %C2 text)
FAIL XMLHttpRequest: responseText decoding (text/plain;charset=bogus %C2 text) assert_equals: expected "\ufffd" but got "Â"
PASS XMLHttpRequest: responseText decoding (text/xml %FE%FF text)
PASS XMLHttpRequest: responseText decoding (text/xml %FE%FF%FE%FF text)
PASS XMLHttpRequest: responseText decoding (text/xml %EF%BB%BF text)
PASS XMLHttpRequest: responseText decoding (text/xml %EF%BB%BF%EF%BB%BF text)
PASS XMLHttpRequest: responseText decoding (text/plain %E3%81%B2 text)
Harness: the test ran to completion.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment