Commit b4edab24 authored by Adam Rice's avatar Adam Rice Committed by Commit Bot

Add UnencodableHandling::kNoUnencodables

The are no unencodable characters in UTF-8, however previously callers
to blink::TextCoder::Encode had to pass a dummy UnencodableHandling
argument. Add a kNoUnencodables value to the UnencodableHandling which is
an explicit no-op. Also add DCHECKs that is is only used for UTF-8 or
UTF-16.

Convert callers to use kNoUnencodables where the encoding is statically
known to be UTF-8.

It's still valid to use other values for the UnencodableHandling
argument with UTF-8 or UTF-16; kNoUnencodables simply documents that it
makes no difference.

Change-Id: I5eebe4c76ae4006ee91f3884f94ec3b5e410685b
Reviewed-on: https://chromium-review.googlesource.com/1206055Reviewed-by: default avatarYutaka Hirano <yhirano@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Reviewed-by: default avatarJoshua Bell <jsbell@chromium.org>
Commit-Queue: Adam Rice <ricea@chromium.org>
Cr-Commit-Position: refs/heads/master@{#589451}
parent c39dc317
...@@ -475,7 +475,7 @@ class ComplexFormDataBytesConsumer final : public BytesConsumer { ...@@ -475,7 +475,7 @@ class ComplexFormDataBytesConsumer final : public BytesConsumer {
FormDataBytesConsumer::FormDataBytesConsumer(const String& string) FormDataBytesConsumer::FormDataBytesConsumer(const String& string)
: impl_(new SimpleFormDataBytesConsumer(EncodedFormData::Create( : impl_(new SimpleFormDataBytesConsumer(EncodedFormData::Create(
UTF8Encoding().Encode(string, WTF::kEntitiesForUnencodables)))) {} UTF8Encoding().Encode(string, WTF::kNoUnencodables)))) {}
FormDataBytesConsumer::FormDataBytesConsumer(DOMArrayBuffer* buffer) FormDataBytesConsumer::FormDataBytesConsumer(DOMArrayBuffer* buffer)
: FormDataBytesConsumer(buffer->Data(), buffer->ByteLength()) {} : FormDataBytesConsumer(buffer->Data(), buffer->ByteLength()) {}
......
...@@ -836,7 +836,7 @@ void XMLHttpRequest::send(Document* document, ExceptionState& exception_state) { ...@@ -836,7 +836,7 @@ void XMLHttpRequest::send(Document* document, ExceptionState& exception_state) {
String body = CreateMarkup(document); String body = CreateMarkup(document);
http_body = EncodedFormData::Create( http_body = EncodedFormData::Create(
UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables)); UTF8Encoding().Encode(body, WTF::kNoUnencodables));
} }
CreateRequest(std::move(http_body), exception_state); CreateRequest(std::move(http_body), exception_state);
...@@ -852,7 +852,7 @@ void XMLHttpRequest::send(const String& body, ExceptionState& exception_state) { ...@@ -852,7 +852,7 @@ void XMLHttpRequest::send(const String& body, ExceptionState& exception_state) {
if (!body.IsNull() && AreMethodAndURLValidForSend()) { if (!body.IsNull() && AreMethodAndURLValidForSend()) {
http_body = EncodedFormData::Create( http_body = EncodedFormData::Create(
UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables)); UTF8Encoding().Encode(body, WTF::kNoUnencodables));
UpdateContentTypeAndCharset("text/plain;charset=UTF-8", "UTF-8"); UpdateContentTypeAndCharset("text/plain;charset=UTF-8", "UTF-8");
} }
......
...@@ -67,10 +67,10 @@ NotShared<DOMUint8Array> TextEncoder::encode(const String& input) { ...@@ -67,10 +67,10 @@ NotShared<DOMUint8Array> TextEncoder::encode(const String& input) {
// are present in the input. // are present in the input.
if (input.Is8Bit()) { if (input.Is8Bit()) {
result = codec_->Encode(input.Characters8(), input.length(), result = codec_->Encode(input.Characters8(), input.length(),
WTF::kEntitiesForUnencodables); WTF::kNoUnencodables);
} else { } else {
result = codec_->Encode(input.Characters16(), input.length(), result = codec_->Encode(input.Characters16(), input.length(),
WTF::kEntitiesForUnencodables); WTF::kNoUnencodables);
} }
const char* buffer = result.data(); const char* buffer = result.data();
......
...@@ -60,14 +60,8 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer { ...@@ -60,14 +60,8 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
// check is needed. // check is needed.
prefix = ReplacementCharacterInUtf8(); prefix = ReplacementCharacterInUtf8();
} }
// Note that the third argument here is ignored since the encoding is
// UTF-8, which will use U+FFFD-replacement rather than ASCII fallback
// substitution when unencodable sequences (for instance, unpaired UTF-16
// surrogates) are present in the input.
// TODO(ricea): Add WTF::kNoUnencodables enum value to make this
// behaviour explicit for UTF-N encodings.
result = encoder_->Encode(input.Characters8(), input.length(), result = encoder_->Encode(input.Characters8(), input.length(),
WTF::kEntitiesForUnencodables); WTF::kNoUnencodables);
} else { } else {
bool have_output = bool have_output =
Encode16BitString(input, high_surrogate, &prefix, &result); Encode16BitString(input, high_surrogate, &prefix, &result);
...@@ -134,7 +128,7 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer { ...@@ -134,7 +128,7 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
// Third argument is ignored, as above. // Third argument is ignored, as above.
*prefix = *prefix =
encoder_->Encode(astral_character, base::size(astral_character), encoder_->Encode(astral_character, base::size(astral_character),
WTF::kEntitiesForUnencodables); WTF::kNoUnencodables);
++begin; ++begin;
if (begin == end) if (begin == end)
return true; return true;
......
...@@ -43,7 +43,7 @@ PushMessageData* PushMessageData::Create( ...@@ -43,7 +43,7 @@ PushMessageData* PushMessageData::Create(
if (message_data.IsUSVString()) { if (message_data.IsUSVString()) {
CString encoded_string = UTF8Encoding().Encode( CString encoded_string = UTF8Encoding().Encode(
message_data.GetAsUSVString(), WTF::kEntitiesForUnencodables); message_data.GetAsUSVString(), WTF::kNoUnencodables);
return new PushMessageData(encoded_string.data(), encoded_string.length()); return new PushMessageData(encoded_string.data(), encoded_string.length());
} }
......
...@@ -215,8 +215,7 @@ void BlobData::AppendText(const String& text, ...@@ -215,8 +215,7 @@ void BlobData::AppendText(const String& text,
bool do_normalize_line_endings_to_native) { bool do_normalize_line_endings_to_native) {
DCHECK_EQ(file_composition_, FileCompositionStatus::NO_UNKNOWN_SIZE_FILES) DCHECK_EQ(file_composition_, FileCompositionStatus::NO_UNKNOWN_SIZE_FILES)
<< "Blobs with a unknown-size file cannot have other items."; << "Blobs with a unknown-size file cannot have other items.";
CString utf8_text = CString utf8_text = UTF8Encoding().Encode(text, WTF::kNoUnencodables);
UTF8Encoding().Encode(text, WTF::kEntitiesForUnencodables);
if (do_normalize_line_endings_to_native) { if (do_normalize_line_endings_to_native) {
if (utf8_text.length() > if (utf8_text.length() >
......
...@@ -632,8 +632,8 @@ String DecodeURLEscapeSequences(const String& string, ...@@ -632,8 +632,8 @@ String DecodeURLEscapeSequences(const String& string,
} }
String EncodeWithURLEscapeSequences(const String& not_encoded_string) { String EncodeWithURLEscapeSequences(const String& not_encoded_string) {
CString utf8 = UTF8Encoding().Encode(not_encoded_string, CString utf8 =
WTF::kURLEncodedEntitiesForUnencodables); UTF8Encoding().Encode(not_encoded_string, WTF::kNoUnencodables);
url::RawCanonOutputT<char> buffer; url::RawCanonOutputT<char> buffer;
int input_length = utf8.length(); int input_length = utf8.length();
......
...@@ -48,6 +48,9 @@ uint32_t TextCodec::GetUnencodableReplacement( ...@@ -48,6 +48,9 @@ uint32_t TextCodec::GetUnencodableReplacement(
snprintf(replacement, sizeof(UnencodableReplacementArray), "\\%x ", snprintf(replacement, sizeof(UnencodableReplacementArray), "\\%x ",
code_point); code_point);
return static_cast<uint32_t>(strlen(replacement)); return static_cast<uint32_t>(strlen(replacement));
case kNoUnencodables:
break;
} }
NOTREACHED(); NOTREACHED();
replacement[0] = 0; replacement[0] = 0;
......
...@@ -52,6 +52,10 @@ enum UnencodableHandling { ...@@ -52,6 +52,10 @@ enum UnencodableHandling {
// Encodes the character as a CSS entity. For example U+06DE // Encodes the character as a CSS entity. For example U+06DE
// would be \06de. See: https://www.w3.org/TR/css-syntax-3/#escaping // would be \06de. See: https://www.w3.org/TR/css-syntax-3/#escaping
kCSSEncodedEntitiesForUnencodables, kCSSEncodedEntitiesForUnencodables,
// Used when all characters can be encoded in the character set. Only
// applicable to UTF-N encodings.
kNoUnencodables,
}; };
typedef char UnencodableReplacementArray[32]; typedef char UnencodableReplacementArray[32];
......
...@@ -616,6 +616,16 @@ static void GbkCallbackSubstitute(const void* context, ...@@ -616,6 +616,16 @@ static void GbkCallbackSubstitute(const void* context,
} }
#endif // USING_SYSTEM_ICU #endif // USING_SYSTEM_ICU
static void NotReachedEntityCallback(const void* context,
UConverterFromUnicodeArgs* from_u_args,
const UChar* code_units,
int32_t length,
UChar32 code_point,
UConverterCallbackReason reason,
UErrorCode* err) {
NOTREACHED();
}
class TextCodecInput final { class TextCodecInput final {
STACK_ALLOCATED(); STACK_ALLOCATED();
...@@ -685,6 +695,13 @@ CString TextCodecICU::EncodeInternal(const TextCodecInput& input, ...@@ -685,6 +695,13 @@ CString TextCodecICU::EncodeInternal(const TextCodecInput& input,
0, 0, 0, &err); 0, 0, 0, &err);
#endif #endif
break; break;
case kNoUnencodables:
DCHECK(encoding_ == UTF16BigEndianEncoding() ||
encoding_ == UTF16LittleEndianEncoding() ||
encoding_ == UTF8Encoding());
ucnv_setFromUCallBack(converter_icu_, NotReachedEntityCallback, nullptr,
nullptr, nullptr, &err);
break;
} }
DCHECK(U_SUCCESS(err)); DCHECK(U_SUCCESS(err));
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "third_party/blink/renderer/platform/wtf/text/text_codec_latin1.h" #include "third_party/blink/renderer/platform/wtf/text/text_codec_latin1.h"
#include <memory> #include <memory>
#include "third_party/blink/renderer/platform/wtf/assertions.h"
#include "third_party/blink/renderer/platform/wtf/text/cstring.h" #include "third_party/blink/renderer/platform/wtf/text/cstring.h"
#include "third_party/blink/renderer/platform/wtf/text/string_buffer.h" #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_ascii_fast_path.h" #include "third_party/blink/renderer/platform/wtf/text/text_codec_ascii_fast_path.h"
...@@ -205,6 +206,7 @@ template <typename CharType> ...@@ -205,6 +206,7 @@ template <typename CharType>
static CString EncodeComplexWindowsLatin1(const CharType* characters, static CString EncodeComplexWindowsLatin1(const CharType* characters,
size_t length, size_t length,
UnencodableHandling handling) { UnencodableHandling handling) {
DCHECK_NE(handling, kNoUnencodables);
size_t target_length = length; size_t target_length = length;
Vector<char> result(target_length); Vector<char> result(target_length);
char* bytes = result.data(); char* bytes = result.data();
......
...@@ -26,8 +26,7 @@ ...@@ -26,8 +26,7 @@
#include "third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.h" #include "third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.h"
#include <memory> #include <memory>
#include "third_party/blink/renderer/platform/wtf/assertions.h"
#include <memory>
#include "third_party/blink/renderer/platform/wtf/text/cstring.h" #include "third_party/blink/renderer/platform/wtf/text/cstring.h"
#include "third_party/blink/renderer/platform/wtf/text/string_buffer.h" #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h" #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
...@@ -70,6 +69,7 @@ template <typename CharType> ...@@ -70,6 +69,7 @@ template <typename CharType>
static CString EncodeComplexUserDefined(const CharType* characters, static CString EncodeComplexUserDefined(const CharType* characters,
size_t length, size_t length,
UnencodableHandling handling) { UnencodableHandling handling) {
DCHECK_NE(handling, kNoUnencodables);
size_t target_length = length; size_t target_length = length;
Vector<char> result(target_length); Vector<char> result(target_length);
char* bytes = result.data(); char* bytes = result.data();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment