Add UnencodableHandling::kNoUnencodables

The are no unencodable characters in UTF-8, however previously callers to blink::TextCoder::Encode had to pass a dummy UnencodableHandling argument. Add a kNoUnencodables value to the UnencodableHandling which is an explicit no-op. Also add DCHECKs that is is only used for UTF-8 or UTF-16. Convert callers to use kNoUnencodables where the encoding is statically known to be UTF-8. It's still valid to use other values for the UnencodableHandling argument with UTF-8 or UTF-16; kNoUnencodables simply documents that it makes no difference. Change-Id: I5eebe4c76ae4006ee91f3884f94ec3b5e410685b Reviewed-on: https://chromium-review.googlesource.com/1206055Reviewed-by: Yutaka Hirano <yhirano@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Reviewed-by: Joshua Bell <jsbell@chromium.org> Commit-Queue: Adam Rice <ricea@chromium.org> Cr-Commit-Position: refs/heads/master@{#589451}

Add UnencodableHandling::kNoUnencodables
The are no unencodable characters in UTF-8, however previously callers to blink::TextCoder::Encode had to pass a dummy UnencodableHandling argument. Add a kNoUnencodables value to the UnencodableHandling which is an explicit no-op. Also add DCHECKs that is is only used for UTF-8 or UTF-16. Convert callers to use kNoUnencodables where the encoding is statically known to be UTF-8. It's still valid to use other values for the UnencodableHandling argument with UTF-8 or UTF-16; kNoUnencodables simply documents that it makes no difference. Change-Id: I5eebe4c76ae4006ee91f3884f94ec3b5e410685b Reviewed-on: https://chromium-review.googlesource.com/1206055Reviewed-by: Yutaka Hirano <yhirano@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Reviewed-by: Joshua Bell <jsbell@chromium.org> Commit-Queue: Adam Rice <ricea@chromium.org> Cr-Commit-Position: refs/heads/master@{#589451}
b4edab24 · Adam Rice · Commit Bot · c39dc317 · b4edab24 · b4edab24
Commit b4edab24 authored Sep 07, 2018 by Adam Rice Committed by Commit Bot Sep 07, 2018
12 changed files
--- a/third_party/blink/renderer/core/fetch/form_data_bytes_consumer.cc
+++ b/third_party/blink/renderer/core/fetch/form_data_bytes_consumer.cc
@@ -475,7 +475,7 @@ class ComplexFormDataBytesConsumer final : public BytesConsumer {

 FormDataBytesConsumer::FormDataBytesConsumer(const String& string)
    : impl_(new SimpleFormDataBytesConsumer(EncodedFormData::Create(
-          UTF8Encoding().Encode(string, WTF::kEntitiesForUnencodables)))) {}
+          UTF8Encoding().Encode(string, WTF::kNoUnencodables)))) {}

 FormDataBytesConsumer::FormDataBytesConsumer(DOMArrayBuffer* buffer)
    : FormDataBytesConsumer(buffer->Data(), buffer->ByteLength()) {}

--- a/third_party/blink/renderer/core/xmlhttprequest/xml_http_request.cc
+++ b/third_party/blink/renderer/core/xmlhttprequest/xml_http_request.cc
@@ -836,7 +836,7 @@ void XMLHttpRequest::send(Document* document, ExceptionState& exception_state) {
    String body = CreateMarkup(document);

    http_body = EncodedFormData::Create(
-        UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables));
+        UTF8Encoding().Encode(body, WTF::kNoUnencodables));
  }

  CreateRequest(std::move(http_body), exception_state);
@@ -852,7 +852,7 @@ void XMLHttpRequest::send(const String& body, ExceptionState& exception_state) {

  if (!body.IsNull() && AreMethodAndURLValidForSend()) {
    http_body = EncodedFormData::Create(
-        UTF8Encoding().Encode(body, WTF::kEntitiesForUnencodables));
+        UTF8Encoding().Encode(body, WTF::kNoUnencodables));
    UpdateContentTypeAndCharset("text/plain;charset=UTF-8", "UTF-8");
  }


--- a/third_party/blink/renderer/modules/encoding/text_encoder.cc
+++ b/third_party/blink/renderer/modules/encoding/text_encoder.cc
@@ -67,10 +67,10 @@ NotShared<DOMUint8Array> TextEncoder::encode(const String& input) {
  // are present in the input.
  if (input.Is8Bit()) {
    result = codec_->Encode(input.Characters8(), input.length(),
-                            WTF::kEntitiesForUnencodables);
+                            WTF::kNoUnencodables);
  } else {
    result = codec_->Encode(input.Characters16(), input.length(),
-                            WTF::kEntitiesForUnencodables);
+                            WTF::kNoUnencodables);
  }

  const char* buffer = result.data();

--- a/third_party/blink/renderer/modules/encoding/text_encoder_stream.cc
+++ b/third_party/blink/renderer/modules/encoding/text_encoder_stream.cc
@@ -60,14 +60,8 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
        // check is needed.
        prefix = ReplacementCharacterInUtf8();
      }
-      // Note that the third argument here is ignored since the encoding is
-      // UTF-8, which will use U+FFFD-replacement rather than ASCII fallback
-      // substitution when unencodable sequences (for instance, unpaired UTF-16
-      // surrogates) are present in the input.
-      // TODO(ricea): Add WTF::kNoUnencodables enum value to make this
-      // behaviour explicit for UTF-N encodings.
      result = encoder_->Encode(input.Characters8(), input.length(),
-                                WTF::kEntitiesForUnencodables);
+                                WTF::kNoUnencodables);
    } else {
      bool have_output =
          Encode16BitString(input, high_surrogate, &prefix, &result);
@@ -134,7 +128,7 @@ class TextEncoderStream::Transformer final : public TransformStreamTransformer {
        // Third argument is ignored, as above.
        *prefix =
            encoder_->Encode(astral_character, base::size(astral_character),
-                             WTF::kEntitiesForUnencodables);
+                             WTF::kNoUnencodables);
        ++begin;
        if (begin == end)
          return true;

--- a/third_party/blink/renderer/modules/push_messaging/push_message_data.cc
+++ b/third_party/blink/renderer/modules/push_messaging/push_message_data.cc
@@ -43,7 +43,7 @@ PushMessageData* PushMessageData::Create(

  if (message_data.IsUSVString()) {
    CString encoded_string = UTF8Encoding().Encode(
-        message_data.GetAsUSVString(), WTF::kEntitiesForUnencodables);
+        message_data.GetAsUSVString(), WTF::kNoUnencodables);
    return new PushMessageData(encoded_string.data(), encoded_string.length());
  }


--- a/third_party/blink/renderer/platform/blob/blob_data.cc
+++ b/third_party/blink/renderer/platform/blob/blob_data.cc
@@ -215,8 +215,7 @@ void BlobData::AppendText(const String& text,
                          bool do_normalize_line_endings_to_native) {
  DCHECK_EQ(file_composition_, FileCompositionStatus::NO_UNKNOWN_SIZE_FILES)
      << "Blobs with a unknown-size file cannot have other items.";
-  CString utf8_text =
-      UTF8Encoding().Encode(text, WTF::kEntitiesForUnencodables);
+  CString utf8_text = UTF8Encoding().Encode(text, WTF::kNoUnencodables);

  if (do_normalize_line_endings_to_native) {
    if (utf8_text.length() >

--- a/third_party/blink/renderer/platform/weborigin/kurl.cc
+++ b/third_party/blink/renderer/platform/weborigin/kurl.cc
@@ -632,8 +632,8 @@ String DecodeURLEscapeSequences(const String& string,
 }

 String EncodeWithURLEscapeSequences(const String& not_encoded_string) {
-  CString utf8 = UTF8Encoding().Encode(not_encoded_string,
-                                       WTF::kURLEncodedEntitiesForUnencodables);
+  CString utf8 =
+      UTF8Encoding().Encode(not_encoded_string, WTF::kNoUnencodables);

  url::RawCanonOutputT<char> buffer;
  int input_length = utf8.length();

--- a/third_party/blink/renderer/platform/wtf/text/text_codec.cc
+++ b/third_party/blink/renderer/platform/wtf/text/text_codec.cc
@@ -48,6 +48,9 @@ uint32_t TextCodec::GetUnencodableReplacement(
      snprintf(replacement, sizeof(UnencodableReplacementArray), "\\%x ",
               code_point);
      return static_cast<uint32_t>(strlen(replacement));
+
+    case kNoUnencodables:
+      break;
  }
  NOTREACHED();
  replacement[0] = 0;

--- a/third_party/blink/renderer/platform/wtf/text/text_codec.h
+++ b/third_party/blink/renderer/platform/wtf/text/text_codec.h
@@ -52,6 +52,10 @@ enum UnencodableHandling {
  // Encodes the character as a CSS entity.  For example U+06DE
  // would be \06de.  See: https://www.w3.org/TR/css-syntax-3/#escaping
  kCSSEncodedEntitiesForUnencodables,
+
+  // Used when all characters can be encoded in the character set. Only
+  // applicable to UTF-N encodings.
+  kNoUnencodables,
 };

 typedef char UnencodableReplacementArray[32];

--- a/third_party/blink/renderer/platform/wtf/text/text_codec_icu.cc
+++ b/third_party/blink/renderer/platform/wtf/text/text_codec_icu.cc
@@ -616,6 +616,16 @@ static void GbkCallbackSubstitute(const void* context,
 }
 #endif  // USING_SYSTEM_ICU

+static void NotReachedEntityCallback(const void* context,
+                                     UConverterFromUnicodeArgs* from_u_args,
+                                     const UChar* code_units,
+                                     int32_t length,
+                                     UChar32 code_point,
+                                     UConverterCallbackReason reason,
+                                     UErrorCode* err) {
+  NOTREACHED();
+}
+
 class TextCodecInput final {
  STACK_ALLOCATED();

@@ -685,6 +695,13 @@ CString TextCodecICU::EncodeInternal(const TextCodecInput& input,
                            0, 0, 0, &err);
 #endif
      break;
+    case kNoUnencodables:
+      DCHECK(encoding_ == UTF16BigEndianEncoding() ||
+             encoding_ == UTF16LittleEndianEncoding() ||
+             encoding_ == UTF8Encoding());
+      ucnv_setFromUCallBack(converter_icu_, NotReachedEntityCallback, nullptr,
+                            nullptr, nullptr, &err);
+      break;
  }

  DCHECK(U_SUCCESS(err));

--- a/third_party/blink/renderer/platform/wtf/text/text_codec_latin1.cc
+++ b/third_party/blink/renderer/platform/wtf/text/text_codec_latin1.cc
@@ -26,6 +26,7 @@
 #include "third_party/blink/renderer/platform/wtf/text/text_codec_latin1.h"

 #include <memory>
+#include "third_party/blink/renderer/platform/wtf/assertions.h"
 #include "third_party/blink/renderer/platform/wtf/text/cstring.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
 #include "third_party/blink/renderer/platform/wtf/text/text_codec_ascii_fast_path.h"
@@ -205,6 +206,7 @@ template <typename CharType>
 static CString EncodeComplexWindowsLatin1(const CharType* characters,
                                          size_t length,
                                          UnencodableHandling handling) {
+  DCHECK_NE(handling, kNoUnencodables);
  size_t target_length = length;
  Vector<char> result(target_length);
  char* bytes = result.data();

--- a/third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.cc
+++ b/third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.cc
@@ -26,8 +26,7 @@
 #include "third_party/blink/renderer/platform/wtf/text/text_codec_user_defined.h"

 #include <memory>
-
-#include <memory>
+#include "third_party/blink/renderer/platform/wtf/assertions.h"
 #include "third_party/blink/renderer/platform/wtf/text/cstring.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
@@ -70,6 +69,7 @@ template <typename CharType>
 static CString EncodeComplexUserDefined(const CharType* characters,
                                        size_t length,
                                        UnencodableHandling handling) {
+  DCHECK_NE(handling, kNoUnencodables);
  size_t target_length = length;
  Vector<char> result(target_length);
  char* bytes = result.data();