[base] Clean-up base::string16

This change removes the deprecated base::c16memcpy, base::c16len and base::c16memcpy, updates the comment ontop of string16.h to make clear it is only a type alias nowadays and removes obsolete tests or moves them in more appropriate locations. Bug: 911896 Change-Id: I5f66b8db5d81c962e040c79eed844136fd570ee2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2642270Reviewed-by: Daniel Cheng <dcheng@chromium.org> Commit-Queue: Jan Wilken Dörrie <jdoerrie@chromium.org> Cr-Commit-Position: refs/heads/master@{#846008}

[base] Clean-up base::string16
This change removes the deprecated base::c16memcpy, base::c16len and base::c16memcpy, updates the comment ontop of string16.h to make clear it is only a type alias nowadays and removes obsolete tests or moves them in more appropriate locations. Bug: 911896 Change-Id: I5f66b8db5d81c962e040c79eed844136fd570ee2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2642270Reviewed-by: Daniel Cheng <dcheng@chromium.org> Commit-Queue: Jan Wilken Dörrie <jdoerrie@chromium.org> Cr-Commit-Position: refs/heads/master@{#846008}
3c1c8fdf · Jan Wilken Dörrie · Chromium LUCI CQ · 1456af65 · 3c1c8fdf · 3c1c8fdf
Commit 3c1c8fdf authored Jan 22, 2021 by Jan Wilken Dörrie Committed by Chromium LUCI CQ Jan 22, 2021
6 changed files
--- a/base/BUILD.gn
+++ b/base/BUILD.gn
@@ -542,7 +542,6 @@ component("base") {
    "strings/strcat.cc",
    "strings/strcat.h",
    "strings/strcat_internal.h",
-    "strings/string16.cc",
    "strings/string16.h",
    "strings/string_number_conversions.cc",
    "strings/string_number_conversions.h",
@@ -1861,7 +1860,6 @@ component("base") {
    sources -= [
      "file_descriptor_store.cc",
      "file_descriptor_store.h",
-      "strings/string16.cc",
    ]
    deps += [ "//base/win:base_win_buildflags" ]
@@ -3539,7 +3537,6 @@ if (build_base_unittests) {
        "observer_list_unittest.nc",
        "optional_unittest.nc",
        "sequence_checker_unittest.nc",
-        "strings/string16_unittest.nc",
        "task/task_traits_extension_unittest.nc",
        "task/task_traits_unittest.nc",
        "thread_annotations_unittest.nc",

--- a/base/logging_unittest.cc
+++ b/base/logging_unittest.cc
@@ -14,7 +14,9 @@
 #include "base/no_destructor.h"
 #include "base/run_loop.h"
 #include "base/sanitizer_buildflags.h"
+#include "base/strings/string16.h"
 #include "base/strings/string_piece.h"
+#include "base/strings/utf_string_conversions.h"
 #include "base/test/bind.h"
 #include "base/test/scoped_logging_settings.h"
 #include "base/test/task_environment.h"
@@ -863,6 +865,45 @@ TEST_F(LoggingTest, LogCrosSyslogFormat) {
 }
 #endif  // BUILDFLAG(IS_CHROMEOS_ASH)
+// We define a custom operator<< for string16 so we can use it with logging.
+// This tests that conversion.
+TEST_F(LoggingTest, String16) {
+  // Basic stream test.
+  {
+    std::ostringstream stream;
+    stream << "Empty '" << base::string16() << "' standard '"
+           << base::string16(base::ASCIIToUTF16("Hello, world")) << "'";
+    EXPECT_STREQ("Empty '' standard 'Hello, world'", stream.str().c_str());
+  }
+  // Interesting edge cases.
+  {
+    // These should each get converted to the invalid character: EF BF BD.
+    base::string16 initial_surrogate;
+    initial_surrogate.push_back(0xd800);
+    base::string16 final_surrogate;
+    final_surrogate.push_back(0xdc00);
+    // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
+    base::string16 surrogate_pair;
+    surrogate_pair.push_back(0xd800);
+    surrogate_pair.push_back(0xdf00);
+    surrogate_pair.push_back('z');
+    // Will get converted to the invalid char + 's': EF BF BD 's'.
+    base::string16 unterminated_surrogate;
+    unterminated_surrogate.push_back(0xd800);
+    unterminated_surrogate.push_back('s');
+    std::ostringstream stream;
+    stream << initial_surrogate << "," << final_surrogate << ","
+           << surrogate_pair << "," << unterminated_surrogate;
+    EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
+                 stream.str().c_str());
+  }
+}
 }  // namespace
 }  // namespace logging
--- a/base/strings/string16.cc
+++ b/base/strings/string16.cc
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-#include "base/strings/string16.h"
-#include <string>
-#include "base/strings/utf_string_conversions.h"
-namespace base {
-int c16memcmp(const char16* s1, const char16* s2, size_t n) {
-  return std::char_traits<char16>::compare(s1, s2, n);
-}
-size_t c16len(const char16* s) {
-  return std::char_traits<char16>::length(s);
-}
-char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
-  return std::char_traits<char16>::copy(s1, s2, n);
-}
-}  // namespace base
--- a/base/strings/string16.h
+++ b/base/strings/string16.h
@@ -6,35 +6,18 @@
 #define BASE_STRINGS_STRING16_H_
 // WHAT:
-// A version of std::basic_string that provides 2-byte characters even when
+// Type aliases for string and character types supporting UTF-16 data. Prior to
-// wchar_t is not implemented as a 2-byte type. You can access this class as
+// C++11 there was no standard library solution for this, which is why wstring
-// string16. We also define char16, which string16 is based upon.
+// was used where possible (i.e. where wchar_t holds UTF-16 encoded data).
 //
-// WHY:
+// In C++11 we gained std::u16string, which is a cross-platform solution for
-// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
+// UTF-16 strings. This is now the string16 type where ever wchar_t does not
-// data. Plenty of existing code operates on strings encoded as UTF-16.
+// hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should
-//
+// be used everywhere, at which point this type alias and this file should be
-// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
+// removed. https://crbug.com/911896 tracks the migration effort.
-// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
-// at run time, because it calls some functions (like wcslen) that come from
-// the system's native C library -- which was built with a 4-byte wchar_t!
-// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
-// entirely improper on those systems where the encoding of wchar_t is defined
-// as UTF-32.
-//
-// Here, we define string16, which is similar to std::wstring but replaces all
-// libc functions with custom, 2-byte-char compatible routines. It is capable
-// of carrying UTF-16-encoded data.
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <functional>
-#include <ostream>
 #include <string>
-#include "base/base_export.h"
 #include "build/build_config.h"
 #if defined(WCHAR_T_IS_UTF16)
@@ -48,6 +31,7 @@
 #define STRING16_LITERAL(x) L##x
 namespace base {
+using char16 = wchar_t;
 using string16 = std::wstring;
 }  // namespace base
@@ -56,21 +40,10 @@ using string16 = std::wstring;
 #define STRING16_LITERAL(x) u##x
 namespace base {
+using char16 = char16_t;
 using string16 = std::u16string;
 }  // namespace base
 #endif  // WCHAR_T_IS_UTF16
-namespace base {
-using char16 = ::base::string16::value_type;
-// TODO(crbug.com/911896): Remove these functions in favor of using
-// std::char_traits<base::char16> directly.
-BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
-BASE_EXPORT size_t c16len(const char16* s);
-BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
-}  // namespace base
 #endif  // BASE_STRINGS_STRING16_H_
--- a/base/strings/string16_unittest.cc
+++ b/base/strings/string16_unittest.cc
@@ -2,76 +2,22 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
-#include <sstream>
-#include <unordered_set>
 #include "base/strings/string16.h"
-#include "base/strings/utf_string_conversions.h"
-#include "build/build_config.h"
 #include "testing/gtest/include/gtest/gtest.h"
 namespace base {
+// Ensure that STRING16_LITERAL can be used to instantiate constants of type
+// char16 and char16[], respectively.
 TEST(String16Test, String16Literal) {
-  static constexpr char16 kHelloWorld[] = STRING16_LITERAL("Hello, World");
+  static constexpr char16 kHelloChars[] = {
-  constexpr StringPiece16 kPiece = kHelloWorld;
+      STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'),
-  static_assert(kHelloWorld == kPiece, "");
+      STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'),
-  static_assert(kHelloWorld == kPiece.data(), "");
+  };
-  string16 hello_world = kHelloWorld;
-  EXPECT_EQ(kHelloWorld, hello_world);
-}
-// We define a custom operator<< for string16 so we can use it with logging.
-// This tests that conversion.
-TEST(String16Test, OutputStream) {
-  // Basic stream test.
-  {
-    std::ostringstream stream;
-    stream << "Empty '" << string16() << "' standard '"
-           << string16(ASCIIToUTF16("Hello, world")) << "'";
-    EXPECT_STREQ("Empty '' standard 'Hello, world'",
-                 stream.str().c_str());
-  }
-  // Interesting edge cases.
-  {
-    // These should each get converted to the invalid character: EF BF BD.
-    string16 initial_surrogate;
-    initial_surrogate.push_back(0xd800);
-    string16 final_surrogate;
-    final_surrogate.push_back(0xdc00);
-    // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
-    string16 surrogate_pair;
-    surrogate_pair.push_back(0xd800);
-    surrogate_pair.push_back(0xdf00);
-    surrogate_pair.push_back('z');
-    // Will get converted to the invalid char + 's': EF BF BD 's'.
-    string16 unterminated_surrogate;
-    unterminated_surrogate.push_back(0xd800);
-    unterminated_surrogate.push_back('s');
-    std::ostringstream stream;
-    stream << initial_surrogate << "," << final_surrogate << ","
-           << surrogate_pair << "," << unterminated_surrogate;
-    EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
-                 stream.str().c_str());
-  }
-}
-TEST(String16Test, Hash) {
-  string16 str1 = ASCIIToUTF16("hello");
-  string16 str2 = ASCIIToUTF16("world");
-  std::unordered_set<string16> set;
-  set.insert(str1);
+  static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello");
-  EXPECT_EQ(1u, set.count(str1));
+  EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0);
-  EXPECT_EQ(0u, set.count(str2));
 }
 }  // namespace base
--- a/base/strings/string16_unittest.nc
+++ b/base/strings/string16_unittest.nc
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-// This is a "No Compile Test".
-// http://dev.chromium.org/developers/testing/no-compile-tests
-#include "base/strings/string16.h"
-#if defined(NCTEST_NO_KOENIG_LOOKUP_FOR_STRING16)  // [r"use of undeclared identifier 'ShouldNotBeFound'"]
-// base::string16 is declared as a typedef. It should not cause other functions
-// in base to be found via Argument-dependent lookup.
-namespace base {
-void ShouldNotBeFound(const base::string16& arg) {}
-}
-// Intentionally not in base:: namespace.
-void WontCompile() {
-  base::string16 s;
-  ShouldNotBeFound(s);
-}
-#endif