Commit e141b413 authored by Eero Häkkinen's avatar Eero Häkkinen Committed by Commit Bot

Implement UTF-8 decode without BOM in TextResourceDecoder

This adds no BOM decoding option to TextResourceDecoderOptions which
disabled BOM checking in the decoder.

Bug: 796192
Change-Id: Id0eb00dca451c4898d9ae00f11bf08e7fbb33a1c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1796366Reviewed-by: default avatarYutaka Hirano <yhirano@chromium.org>
Reviewed-by: default avatarHiroshige Hayashizaki <hiroshige@chromium.org>
Commit-Queue: Eero Häkkinen <eero.hakkinen@intel.com>
Cr-Commit-Position: refs/heads/master@{#711196}
parent 43ca4361
...@@ -211,6 +211,11 @@ wtf_size_t TextResourceDecoder::CheckForBOM(const char* data, wtf_size_t len) { ...@@ -211,6 +211,11 @@ wtf_size_t TextResourceDecoder::CheckForBOM(const char* data, wtf_size_t len) {
// respectively. // respectively.
DCHECK(!checked_for_bom_); DCHECK(!checked_for_bom_);
if (options_.GetNoBOMDecoding()) {
checked_for_bom_ = true;
return 0;
}
wtf_size_t length_of_bom = 0; wtf_size_t length_of_bom = 0;
const wtf_size_t max_bom_length = 3; const wtf_size_t max_bom_length = 3;
......
...@@ -8,6 +8,33 @@ ...@@ -8,6 +8,33 @@
namespace blink { namespace blink {
TEST(TextResourceDecoderTest, AlwaysUseUTF8) {
std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(
TextResourceDecoderOptions::CreateAlwaysUseUTF8ForText());
const unsigned char kFooUTF8WithBOM[] = {0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f};
WTF::String decoded = decoder->Decode(
reinterpret_cast<const char*>(kFooUTF8WithBOM), sizeof(kFooUTF8WithBOM));
decoded = decoded + decoder->Flush();
EXPECT_EQ(WTF::UTF8Encoding(), decoder->Encoding());
EXPECT_EQ("foo", decoded);
}
TEST(TextResourceDecoderTest, AlwaysUseUTF8WithoutBOM) {
std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(
TextResourceDecoderOptions::CreateAlwaysUseUTF8WithoutBOMForText());
const unsigned char kFooUTF8WithBOM[] = {0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f};
WTF::String decoded = decoder->Decode(
reinterpret_cast<const char*>(kFooUTF8WithBOM), sizeof(kFooUTF8WithBOM));
decoded = decoded + decoder->Flush();
EXPECT_EQ(WTF::UTF8Encoding(), decoder->Encoding());
EXPECT_EQ(
"\xef\xbb\xbf"
"foo",
decoded.Utf8());
}
TEST(TextResourceDecoderTest, BasicUTF16) { TEST(TextResourceDecoderTest, BasicUTF16) {
std::unique_ptr<TextResourceDecoder> decoder = std::unique_ptr<TextResourceDecoder> decoder =
std::make_unique<TextResourceDecoder>(TextResourceDecoderOptions( std::make_unique<TextResourceDecoder>(TextResourceDecoderOptions(
......
...@@ -22,6 +22,13 @@ TextResourceDecoderOptions::CreateAlwaysUseUTF8ForText() { ...@@ -22,6 +22,13 @@ TextResourceDecoderOptions::CreateAlwaysUseUTF8ForText() {
UTF8Encoding(), nullptr, NullURL()); UTF8Encoding(), nullptr, NullURL());
} }
TextResourceDecoderOptions
TextResourceDecoderOptions::CreateAlwaysUseUTF8WithoutBOMForText() {
TextResourceDecoderOptions options = CreateAlwaysUseUTF8ForText();
options.no_bom_decoding_ = true;
return options;
}
TextResourceDecoderOptions TextResourceDecoderOptions::CreateWithAutoDetection( TextResourceDecoderOptions TextResourceDecoderOptions::CreateWithAutoDetection(
ContentType content_type, ContentType content_type,
const WTF::TextEncoding& default_encoding, const WTF::TextEncoding& default_encoding,
...@@ -41,6 +48,7 @@ TextResourceDecoderOptions::TextResourceDecoderOptions( ...@@ -41,6 +48,7 @@ TextResourceDecoderOptions::TextResourceDecoderOptions(
: encoding_detection_option_(encoding_detection_option), : encoding_detection_option_(encoding_detection_option),
content_type_(content_type), content_type_(content_type),
default_encoding_(default_encoding), default_encoding_(default_encoding),
no_bom_decoding_(false),
use_lenient_xml_decoding_(false), use_lenient_xml_decoding_(false),
hint_encoding_(hint_encoding), hint_encoding_(hint_encoding),
hint_url_(hint_url) { hint_url_(hint_url) {
......
...@@ -40,6 +40,10 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final { ...@@ -40,6 +40,10 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
// https://encoding.spec.whatwg.org/#utf-8-decode. // https://encoding.spec.whatwg.org/#utf-8-decode.
static TextResourceDecoderOptions CreateAlwaysUseUTF8ForText(); static TextResourceDecoderOptions CreateAlwaysUseUTF8ForText();
// Corresponds to utf-8 decode without BOM in Encoding spec:
// https://encoding.spec.whatwg.org/#utf-8-decode-without-bom.
static TextResourceDecoderOptions CreateAlwaysUseUTF8WithoutBOMForText();
static TextResourceDecoderOptions CreateWithAutoDetection( static TextResourceDecoderOptions CreateWithAutoDetection(
ContentType, ContentType,
const WTF::TextEncoding& default_encoding, const WTF::TextEncoding& default_encoding,
...@@ -79,6 +83,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final { ...@@ -79,6 +83,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
} }
ContentType GetContentType() const { return content_type_; } ContentType GetContentType() const { return content_type_; }
const WTF::TextEncoding& DefaultEncoding() const { return default_encoding_; } const WTF::TextEncoding& DefaultEncoding() const { return default_encoding_; }
bool GetNoBOMDecoding() const { return no_bom_decoding_; }
bool GetUseLenientXMLDecoding() const { return use_lenient_xml_decoding_; } bool GetUseLenientXMLDecoding() const { return use_lenient_xml_decoding_; }
const char* HintEncoding() const { return hint_encoding_; } const char* HintEncoding() const { return hint_encoding_; }
...@@ -95,6 +100,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final { ...@@ -95,6 +100,7 @@ class PLATFORM_EXPORT TextResourceDecoderOptions final {
EncodingDetectionOption encoding_detection_option_; EncodingDetectionOption encoding_detection_option_;
ContentType content_type_; ContentType content_type_;
WTF::TextEncoding default_encoding_; WTF::TextEncoding default_encoding_;
bool no_bom_decoding_;
bool use_lenient_xml_decoding_; // Don't stop on XML decoding errors. bool use_lenient_xml_decoding_; // Don't stop on XML decoding errors.
// Hints for DetectTextEncoding(). // Hints for DetectTextEncoding().
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment