Commit b4edab24 authored by Adam Rice's avatar Adam Rice Committed by Commit Bot

Add UnencodableHandling::kNoUnencodables

The are no unencodable characters in UTF-8, however previously callers
to blink::TextCoder::Encode had to pass a dummy UnencodableHandling
argument. Add a kNoUnencodables value to the UnencodableHandling which is
an explicit no-op. Also add DCHECKs that is is only used for UTF-8 or
UTF-16.

Convert callers to use kNoUnencodables where the encoding is statically
known to be UTF-8.

It's still valid to use other values for the UnencodableHandling
argument with UTF-8 or UTF-16; kNoUnencodables simply documents that it
makes no difference.

Change-Id: I5eebe4c76ae4006ee91f3884f94ec3b5e410685b
Reviewed-on: https://chromium-review.googlesource.com/1206055Reviewed-by: default avatarYutaka Hirano <yhirano@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Reviewed-by: default avatarJoshua Bell <jsbell@chromium.org>
Commit-Queue: Adam Rice <ricea@chromium.org>
Cr-Commit-Position: refs/heads/master@{#589451}
parent c39dc317
......@@ -475,7 +475,7 @@ class ComplexFormDataBytesConsumer final : public BytesConsumer {
FormDataBytesConsumer::FormDataBytesConsumer(const String& string)
: impl_(new SimpleFormDataBytesConsumer(EncodedFormData::Create(
UTF8Encoding().Encode(string, WTF::kEntitiesForUnencodables)))) {}
UTF8Encoding().Encode(string, WTF::kNoUnencodables)))) {}
FormDataBytesConsumer::FormDataBytesConsumer(DOMArrayBuffer* buffer)
: FormDataBytesConsumer(buffer->Data(), buffer->ByteLength()) {}
......
......@@ -836,7 +836,7 @@ void XMLHttpRequest::send(Document* document, ExceptionState& exception_state) {
String body = CreateMarkup(document);
http_body = EncodedFormData::Create(
UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables));
UTF8Encoding().Encode(body, WTF::kNoUnencodables));
}
CreateRequest(std::move(http_body), exception_state);
......@@ -852,7 +852,7 @@ void XMLHttpRequest::send(const String& body, ExceptionState& exception_state) {
if (!body.IsNull() && AreMethodAndURLValidForSend()) {
http_body = EncodedFormData::Create(
UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables));
UTF8Encoding().Encode(body, WTF::kNoUnencodables));
UpdateContentTypeAndCharset("text/plain;charset=UTF-8", "UTF-8");
}
......
......@@ -67,10 +67,10 @@ NotShared<DOMUint8Array> TextEncoder::encode(const String& input) {
// are present in the input.
if (input.Is8Bit()) {
result = codec_->Encode(input.Characters8(), input.length(),
WTF::kEntitiesForUnencodables);
WTF::kNoUnencodables);
} else {
result = codec_->Encode(input.Characters16(), input.length(),
WTF::kEntitiesForUnencodables);
WTF::kNoUnencodables);
}
const char* buffer = result.data();
......
......@@ -60,14 +60,8 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
// check is needed.
prefix = ReplacementCharacterInUtf8();
}
// Note that the third argument here is ignored since the encoding is
// UTF-8, which will use U+FFFD-replacement rather than ASCII fallback
// substitution when unencodable sequences (for instance, unpaired UTF-16
// surrogates) are present in the input.
// TODO(ricea): Add WTF::kNoUnencodables enum value to make this
// behaviour explicit for UTF-N encodings.
result = encoder_->Encode(input.Characters8(), input.length(),
WTF::kEntitiesForUnencodables);
WTF::kNoUnencodables);
} else {
bool have_output =
Encode16BitString(input, high_surrogate, &prefix, &result);
......@@ -134,7 +128,7 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
// Third argument is ignored, as above.
*prefix =
encoder_->Encode(astral_character, base::size(astral_character),
WTF::kEntitiesForUnencodables);
WTF::kNoUnencodables);
++begin;
if (begin == end)
return true;
......
......@@ -43,7 +43,7 @@ PushMessageData* PushMessageData::Create(
if (message_data.IsUSVString()) {
CString encoded_string = UTF8Encoding().Encode(
message_data.GetAsUSVString(), WTF::kEntitiesForUnencodables);
message_data.GetAsUSVString(), WTF::kNoUnencodables);
return new PushMessageData(encoded_string.data(), encoded_string.length());
}
......
......@@ -215,8 +215,7 @@ void BlobData::AppendText(const String& text,
bool do_normalize_line_endings_to_native) {
DCHECK_EQ(file_composition_, FileCompositionStatus::NO_UNKNOWN_SIZE_FILES)
<< "Blobs with a unknown-size file cannot have other items.";
CString utf8_text =
UTF8Encoding().Encode(text, WTF::kEntitiesForUnencodables);
CString utf8_text = UTF8Encoding().Encode(text, WTF::kNoUnencodables);
if (do_normalize_line_endings_to_native) {
if (utf8_text.length() >
......
......@@ -632,8 +632,8 @@ String DecodeURLEscapeSequences(const String& string,
}
String EncodeWithURLEscapeSequences(const String& not_encoded_string) {
CString utf8 = UTF8Encoding().Encode(not_encoded_string,
WTF::kURLEncodedEntitiesForUnencodables);
CString utf8 =
UTF8Encoding().Encode(not_encoded_string, WTF::kNoUnencodables);
url::RawCanonOutputT<char> buffer;
int input_length = utf8.length();
......
......@@ -48,6 +48,9 @@ uint32_t TextCodec::GetUnencodableReplacement(
snprintf(replacement, sizeof(UnencodableReplacementArray), "\\%x ",
code_point);
return static_cast<uint32_t>(strlen(replacement));
case kNoUnencodables:
break;
}
NOTREACHED();
replacement[0] = 0;
......
......@@ -52,6 +52,10 @@ enum UnencodableHandling {
// Encodes the character as a CSS entity. For example U+06DE
// would be \06de. See: https://www.w3.org/TR/css-syntax-3/#escaping
kCSSEncodedEntitiesForUnencodables,
// Used when all characters can be encoded in the character set. Only
// applicable to UTF-N encodings.
kNoUnencodables,
};
typedef char UnencodableReplacementArray[32];
......
......@@ -616,6 +616,16 @@ static void GbkCallbackSubstitute(const void* context,
}
#endif // USING_SYSTEM_ICU
static void NotReachedEntityCallback(const void* context,
UConverterFromUnicodeArgs* from_u_args,
const UChar* code_units,
int32_t length,
UChar32 code_point,
UConverterCallbackReason reason,
UErrorCode* err) {
NOTREACHED();
}
class TextCodecInput final {
STACK_ALLOCATED();
......@@ -685,6 +695,13 @@ CString TextCodecICU::EncodeInternal(const TextCodecInput& input,
0, 0, 0, &err);
#endif
break;
case kNoUnencodables:
DCHECK(encoding_ == UTF16BigEndianEncoding() ||
encoding_ == UTF16LittleEndianEncoding() ||
encoding_ == UTF8Encoding());
ucnv_setFromUCallBack(converter_icu_, NotReachedEntityCallback, nullptr,
nullptr, nullptr, &err);
break;
}
DCHECK(U_SUCCESS(err));
......
......@@ -26,6 +26,7 @@
#include "third_party/blink/renderer/platform/wtf/text/text_codec_latin1.h"
#include <memory>
#include "third_party/blink/renderer/platform/wtf/assertions.h"
#include "third_party/blink/renderer/platform/wtf/text/cstring.h"
#include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
#include "third_party/blink/renderer/platform/wtf/text/text_codec_ascii_fast_path.h"
......@@ -205,6 +206,7 @@ template <typename CharType>
static CString EncodeComplexWindowsLatin1(const CharType* characters,
size_t length,
UnencodableHandling handling) {
DCHECK_NE(handling, kNoUnencodables);
size_t target_length = length;
Vector<char> result(target_length);
char* bytes = result.data();
......
......@@ -26,8 +26,7 @@
#include "third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.h"
#include <memory>
#include <memory>
#include "third_party/blink/renderer/platform/wtf/assertions.h"
#include "third_party/blink/renderer/platform/wtf/text/cstring.h"
#include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
......@@ -70,6 +69,7 @@ template <typename CharType>
static CString EncodeComplexUserDefined(const CharType* characters,
size_t length,
UnencodableHandling handling) {
DCHECK_NE(handling, kNoUnencodables);
size_t target_length = length;
Vector<char> result(target_length);
char* bytes = result.data();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment