Commit 3c1c8fdf authored by Jan Wilken Dörrie's avatar Jan Wilken Dörrie Committed by Chromium LUCI CQ

[base] Clean-up base::string16

This change removes the deprecated base::c16memcpy, base::c16len and
base::c16memcpy, updates the comment ontop of string16.h to make clear
it is only a type alias nowadays and removes obsolete tests or moves
them in more appropriate locations.

Bug: 911896
Change-Id: I5f66b8db5d81c962e040c79eed844136fd570ee2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2642270Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Commit-Queue: Jan Wilken Dörrie <jdoerrie@chromium.org>
Cr-Commit-Position: refs/heads/master@{#846008}
parent 1456af65
...@@ -542,7 +542,6 @@ component("base") { ...@@ -542,7 +542,6 @@ component("base") {
"strings/strcat.cc", "strings/strcat.cc",
"strings/strcat.h", "strings/strcat.h",
"strings/strcat_internal.h", "strings/strcat_internal.h",
"strings/string16.cc",
"strings/string16.h", "strings/string16.h",
"strings/string_number_conversions.cc", "strings/string_number_conversions.cc",
"strings/string_number_conversions.h", "strings/string_number_conversions.h",
...@@ -1861,7 +1860,6 @@ component("base") { ...@@ -1861,7 +1860,6 @@ component("base") {
sources -= [ sources -= [
"file_descriptor_store.cc", "file_descriptor_store.cc",
"file_descriptor_store.h", "file_descriptor_store.h",
"strings/string16.cc",
] ]
deps += [ "//base/win:base_win_buildflags" ] deps += [ "//base/win:base_win_buildflags" ]
...@@ -3539,7 +3537,6 @@ if (build_base_unittests) { ...@@ -3539,7 +3537,6 @@ if (build_base_unittests) {
"observer_list_unittest.nc", "observer_list_unittest.nc",
"optional_unittest.nc", "optional_unittest.nc",
"sequence_checker_unittest.nc", "sequence_checker_unittest.nc",
"strings/string16_unittest.nc",
"task/task_traits_extension_unittest.nc", "task/task_traits_extension_unittest.nc",
"task/task_traits_unittest.nc", "task/task_traits_unittest.nc",
"thread_annotations_unittest.nc", "thread_annotations_unittest.nc",
......
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#include "base/no_destructor.h" #include "base/no_destructor.h"
#include "base/run_loop.h" #include "base/run_loop.h"
#include "base/sanitizer_buildflags.h" #include "base/sanitizer_buildflags.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h" #include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/bind.h" #include "base/test/bind.h"
#include "base/test/scoped_logging_settings.h" #include "base/test/scoped_logging_settings.h"
#include "base/test/task_environment.h" #include "base/test/task_environment.h"
...@@ -863,6 +865,45 @@ TEST_F(LoggingTest, LogCrosSyslogFormat) { ...@@ -863,6 +865,45 @@ TEST_F(LoggingTest, LogCrosSyslogFormat) {
} }
#endif // BUILDFLAG(IS_CHROMEOS_ASH) #endif // BUILDFLAG(IS_CHROMEOS_ASH)
// We define a custom operator<< for string16 so we can use it with logging.
// This tests that conversion.
TEST_F(LoggingTest, String16) {
// Basic stream test.
{
std::ostringstream stream;
stream << "Empty '" << base::string16() << "' standard '"
<< base::string16(base::ASCIIToUTF16("Hello, world")) << "'";
EXPECT_STREQ("Empty '' standard 'Hello, world'", stream.str().c_str());
}
// Interesting edge cases.
{
// These should each get converted to the invalid character: EF BF BD.
base::string16 initial_surrogate;
initial_surrogate.push_back(0xd800);
base::string16 final_surrogate;
final_surrogate.push_back(0xdc00);
// Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
base::string16 surrogate_pair;
surrogate_pair.push_back(0xd800);
surrogate_pair.push_back(0xdf00);
surrogate_pair.push_back('z');
// Will get converted to the invalid char + 's': EF BF BD 's'.
base::string16 unterminated_surrogate;
unterminated_surrogate.push_back(0xd800);
unterminated_surrogate.push_back('s');
std::ostringstream stream;
stream << initial_surrogate << "," << final_surrogate << ","
<< surrogate_pair << "," << unterminated_surrogate;
EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
stream.str().c_str());
}
}
} // namespace } // namespace
} // namespace logging } // namespace logging
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string16.h"
#include <string>
#include "base/strings/utf_string_conversions.h"
namespace base {
int c16memcmp(const char16* s1, const char16* s2, size_t n) {
return std::char_traits<char16>::compare(s1, s2, n);
}
size_t c16len(const char16* s) {
return std::char_traits<char16>::length(s);
}
char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
return std::char_traits<char16>::copy(s1, s2, n);
}
} // namespace base
...@@ -6,35 +6,18 @@ ...@@ -6,35 +6,18 @@
#define BASE_STRINGS_STRING16_H_ #define BASE_STRINGS_STRING16_H_
// WHAT: // WHAT:
// A version of std::basic_string that provides 2-byte characters even when // Type aliases for string and character types supporting UTF-16 data. Prior to
// wchar_t is not implemented as a 2-byte type. You can access this class as // C++11 there was no standard library solution for this, which is why wstring
// string16. We also define char16, which string16 is based upon. // was used where possible (i.e. where wchar_t holds UTF-16 encoded data).
// //
// WHY: // In C++11 we gained std::u16string, which is a cross-platform solution for
// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 // UTF-16 strings. This is now the string16 type where ever wchar_t does not
// data. Plenty of existing code operates on strings encoded as UTF-16. // hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should
// // be used everywhere, at which point this type alias and this file should be
// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make // removed. https://crbug.com/911896 tracks the migration effort.
// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
// at run time, because it calls some functions (like wcslen) that come from
// the system's native C library -- which was built with a 4-byte wchar_t!
// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
// entirely improper on those systems where the encoding of wchar_t is defined
// as UTF-32.
//
// Here, we define string16, which is similar to std::wstring but replaces all
// libc functions with custom, 2-byte-char compatible routines. It is capable
// of carrying UTF-16-encoded data.
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <functional>
#include <ostream>
#include <string> #include <string>
#include "base/base_export.h"
#include "build/build_config.h" #include "build/build_config.h"
#if defined(WCHAR_T_IS_UTF16) #if defined(WCHAR_T_IS_UTF16)
...@@ -48,6 +31,7 @@ ...@@ -48,6 +31,7 @@
#define STRING16_LITERAL(x) L##x #define STRING16_LITERAL(x) L##x
namespace base { namespace base {
using char16 = wchar_t;
using string16 = std::wstring; using string16 = std::wstring;
} // namespace base } // namespace base
...@@ -56,21 +40,10 @@ using string16 = std::wstring; ...@@ -56,21 +40,10 @@ using string16 = std::wstring;
#define STRING16_LITERAL(x) u##x #define STRING16_LITERAL(x) u##x
namespace base { namespace base {
using char16 = char16_t;
using string16 = std::u16string; using string16 = std::u16string;
} // namespace base } // namespace base
#endif // WCHAR_T_IS_UTF16 #endif // WCHAR_T_IS_UTF16
namespace base {
using char16 = ::base::string16::value_type;
// TODO(crbug.com/911896): Remove these functions in favor of using
// std::char_traits<base::char16> directly.
BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
BASE_EXPORT size_t c16len(const char16* s);
BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
} // namespace base
#endif // BASE_STRINGS_STRING16_H_ #endif // BASE_STRINGS_STRING16_H_
...@@ -2,76 +2,22 @@ ...@@ -2,76 +2,22 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include <sstream>
#include <unordered_set>
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace base { namespace base {
// Ensure that STRING16_LITERAL can be used to instantiate constants of type
// char16 and char16[], respectively.
TEST(String16Test, String16Literal) { TEST(String16Test, String16Literal) {
static constexpr char16 kHelloWorld[] = STRING16_LITERAL("Hello, World"); static constexpr char16 kHelloChars[] = {
constexpr StringPiece16 kPiece = kHelloWorld; STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'),
static_assert(kHelloWorld == kPiece, ""); STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'),
static_assert(kHelloWorld == kPiece.data(), ""); };
string16 hello_world = kHelloWorld;
EXPECT_EQ(kHelloWorld, hello_world);
}
// We define a custom operator<< for string16 so we can use it with logging.
// This tests that conversion.
TEST(String16Test, OutputStream) {
// Basic stream test.
{
std::ostringstream stream;
stream << "Empty '" << string16() << "' standard '"
<< string16(ASCIIToUTF16("Hello, world")) << "'";
EXPECT_STREQ("Empty '' standard 'Hello, world'",
stream.str().c_str());
}
// Interesting edge cases.
{
// These should each get converted to the invalid character: EF BF BD.
string16 initial_surrogate;
initial_surrogate.push_back(0xd800);
string16 final_surrogate;
final_surrogate.push_back(0xdc00);
// Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
string16 surrogate_pair;
surrogate_pair.push_back(0xd800);
surrogate_pair.push_back(0xdf00);
surrogate_pair.push_back('z');
// Will get converted to the invalid char + 's': EF BF BD 's'.
string16 unterminated_surrogate;
unterminated_surrogate.push_back(0xd800);
unterminated_surrogate.push_back('s');
std::ostringstream stream;
stream << initial_surrogate << "," << final_surrogate << ","
<< surrogate_pair << "," << unterminated_surrogate;
EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
stream.str().c_str());
}
}
TEST(String16Test, Hash) {
string16 str1 = ASCIIToUTF16("hello");
string16 str2 = ASCIIToUTF16("world");
std::unordered_set<string16> set;
set.insert(str1); static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello");
EXPECT_EQ(1u, set.count(str1)); EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0);
EXPECT_EQ(0u, set.count(str2));
} }
} // namespace base } // namespace base
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This is a "No Compile Test".
// http://dev.chromium.org/developers/testing/no-compile-tests
#include "base/strings/string16.h"
#if defined(NCTEST_NO_KOENIG_LOOKUP_FOR_STRING16) // [r"use of undeclared identifier 'ShouldNotBeFound'"]
// base::string16 is declared as a typedef. It should not cause other functions
// in base to be found via Argument-dependent lookup.
namespace base {
void ShouldNotBeFound(const base::string16& arg) {}
}
// Intentionally not in base:: namespace.
void WontCompile() {
base::string16 s;
ShouldNotBeFound(s);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment