Commit e141b413 authored by Eero Häkkinen's avatar Eero Häkkinen Committed by Commit Bot

Implement UTF-8 decode without BOM in TextResourceDecoder

This adds no BOM decoding option to TextResourceDecoderOptions which
disabled BOM checking in the decoder.

Bug: 796192
Change-Id: Id0eb00dca451c4898d9ae00f11bf08e7fbb33a1c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1796366Reviewed-by: default avatarYutaka Hirano <yhirano@chromium.org>
Reviewed-by: default avatarHiroshige Hayashizaki <hiroshige@chromium.org>
Commit-Queue: Eero Häkkinen <eero.hakkinen@intel.com>
Cr-Commit-Position: refs/heads/master@{#711196}
parent 43ca4361
......@@ -211,6 +211,11 @@ wtf_size_t TextResourceDecoder::CheckForBOM(const char* data, wtf_size_t len) {
// respectively.
DCHECK(!checked_for_bom_);
if (options_.GetNoBOMDecoding()) {
checked_for_bom_ = true;
return 0;
}
wtf_size_t length_of_bom = 0;
const wtf_size_t max_bom_length = 3;
......
......@@ -8,6 +8,33 @@
namespace blink {
TEST(TextResourceDecoderTest, AlwaysUseUTF8) {
std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(
TextResourceDecoderOptions::CreateAlwaysUseUTF8ForText());
const unsigned char kFooUTF8WithBOM[] = {0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f};
WTF::String decoded = decoder->Decode(
reinterpret_cast<const char*>(kFooUTF8WithBOM), sizeof(kFooUTF8WithBOM));
decoded = decoded + decoder->Flush();
EXPECT_EQ(WTF::UTF8Encoding(), decoder->Encoding());
EXPECT_EQ("foo", decoded);
}
TEST(TextResourceDecoderTest, AlwaysUseUTF8WithoutBOM) {
std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(
TextResourceDecoderOptions::CreateAlwaysUseUTF8WithoutBOMForText());
const unsigned char kFooUTF8WithBOM[] = {0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f};
WTF::String decoded = decoder->Decode(
reinterpret_cast<const char*>(kFooUTF8WithBOM), sizeof(kFooUTF8WithBOM));
decoded = decoded + decoder->Flush();
EXPECT_EQ(WTF::UTF8Encoding(), decoder->Encoding());
EXPECT_EQ(
"\xef\xbb\xbf"
"foo",
decoded.Utf8());
}
TEST(TextResourceDecoderTest, BasicUTF16) {
std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(TextResourceDecoderOptions(
......
......@@ -22,6 +22,13 @@ TextResourceDecoderOptions::CreateAlwaysUseUTF8ForText() {
UTF8Encoding(), nullptr, NullURL());
}
TextResourceDecoderOptions
TextResourceDecoderOptions::CreateAlwaysUseUTF8WithoutBOMForText() {
TextResourceDecoderOptions options = CreateAlwaysUseUTF8ForText();
options.no_bom_decoding_ = true;
return options;
}
TextResourceDecoderOptions TextResourceDecoderOptions::CreateWithAutoDetection(
ContentType content_type,
const WTF::TextEncoding& default_encoding,
......@@ -41,6 +48,7 @@ TextResourceDecoderOptions::TextResourceDecoderOptions(
: encoding_detection_option_(encoding_detection_option),
content_type_(content_type),
default_encoding_(default_encoding),
no_bom_decoding_(false),
use_lenient_xml_decoding_(false),
hint_encoding_(hint_encoding),
hint_url_(hint_url) {
......
......@@ -40,6 +40,10 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
// https://encoding.spec.whatwg.org/#utf-8-decode.
static TextResourceDecoderOptions CreateAlwaysUseUTF8ForText();
// Corresponds to utf-8 decode without BOM in Encoding spec:
// https://encoding.spec.whatwg.org/#utf-8-decode-without-bom.
static TextResourceDecoderOptions CreateAlwaysUseUTF8WithoutBOMForText();
static TextResourceDecoderOptions CreateWithAutoDetection(
ContentType,
const WTF::TextEncoding& default_encoding,
......@@ -79,6 +83,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
}
ContentType GetContentType() const { return content_type_; }
const WTF::TextEncoding& DefaultEncoding() const { return default_encoding_; }
bool GetNoBOMDecoding() const { return no_bom_decoding_; }
bool GetUseLenientXMLDecoding() const { return use_lenient_xml_decoding_; }
const char* HintEncoding() const { return hint_encoding_; }
......@@ -95,6 +100,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
EncodingDetectionOption encoding_detection_option_;
ContentType content_type_;
WTF::TextEncoding default_encoding_;
bool no_bom_decoding_;
bool use_lenient_xml_decoding_; // Don't stop on XML decoding errors.
// Hints for DetectTextEncoding().
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment