Make UnescapeBinaryURLComponent work in-place on strings

One of the top crashers in net/ is out-of-memory with data URLs that comes when the string is needlessly duplicated during unescaping. Looks like data URLs are being used to load fonts. I'm unescaping in-place to avoid requiring two copies of the string. Cq-Include-Trybots: luci.chromium.try:ios-simulator-full-configs;master.tryserver.chromium.mac:ios-simulator-cronet Change-Id: I8724c62b254623e5025478b27388e42d5c4a1473 Reviewed-on: https://chromium-review.googlesource.com/1169635Reviewed-by: Maksim Ivanov <emaxx@chromium.org> Reviewed-by: Alex Moshchuk <alexmos@chromium.org> Reviewed-by: Tatsuhisa Yamaguchi <yamaguchi@chromium.org> Reviewed-by: Ben Wells <benwells@chromium.org> Reviewed-by: Taiju Tsuiki <tzik@chromium.org> Reviewed-by: Varun Khaneja <vakh@chromium.org> Reviewed-by: Mark Cogan <marq@chromium.org> Reviewed-by: Matt Menke <mmenke@chromium.org> Commit-Queue: Paul Jensen <pauljensen@chromium.org> Cr-Commit-Position: refs/heads/master@{#584404}

Make UnescapeBinaryURLComponent work in-place on strings
One of the top crashers in net/ is out-of-memory with data URLs that comes when the string is needlessly duplicated during unescaping. Looks like data URLs are being used to load fonts. I'm unescaping in-place to avoid requiring two copies of the string. Cq-Include-Trybots: luci.chromium.try:ios-simulator-full-configs;master.tryserver.chromium.mac:ios-simulator-cronet Change-Id: I8724c62b254623e5025478b27388e42d5c4a1473 Reviewed-on: https://chromium-review.googlesource.com/1169635Reviewed-by: Maksim Ivanov <emaxx@chromium.org> Reviewed-by: Alex Moshchuk <alexmos@chromium.org> Reviewed-by: Tatsuhisa Yamaguchi <yamaguchi@chromium.org> Reviewed-by: Ben Wells <benwells@chromium.org> Reviewed-by: Taiju Tsuiki <tzik@chromium.org> Reviewed-by: Varun Khaneja <vakh@chromium.org> Reviewed-by: Mark Cogan <marq@chromium.org> Reviewed-by: Matt Menke <mmenke@chromium.org> Commit-Queue: Paul Jensen <pauljensen@chromium.org> Cr-Commit-Position: refs/heads/master@{#584404}
c5ff412b · Paul Jensen · Commit Bot · 8a7ecd9c · c5ff412b · c5ff412b
Commit c5ff412b authored Aug 20, 2018 by Paul Jensen Committed by Commit Bot Aug 20, 2018
14 changed files
--- a/chrome/browser/chromeos/fileapi/external_file_url_util.cc
+++ b/chrome/browser/chromeos/fileapi/external_file_url_util.cc
@@ -44,8 +44,8 @@ GURL FileSystemURLToExternalFileURL(
 base::FilePath ExternalFileURLToVirtualPath(const GURL& url) {
  if (!url.is_valid() || url.scheme() != content::kExternalFileScheme)
    return base::FilePath();
-  const std::string path_string =
+  std::string path_string;
-      net::UnescapeBinaryURLComponent(url.path(), net::UnescapeRule::NORMAL);
+  net::UnescapeBinaryURLComponent(url.path(), &path_string);
  return base::FilePath::FromUTF8Unsafe(path_string);
 }

--- a/components/policy/core/common/cloud/device_management_service_unittest.cc
+++ b/components/policy/core/common/cloud/device_management_service_unittest.cc
@@ -446,14 +446,18 @@ class QueryParams {
  bool Check(const std::string& name, const std::string& expected_value) {
    bool found = false;
    for (ParamMap::const_iterator i(params_.begin()); i != params_.end(); ++i) {
-      std::string unescaped_name(net::UnescapeBinaryURLComponent(
+      std::string unescaped_name;
-          i->first, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE));
+      net::UnescapeBinaryURLComponent(
+          i->first, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
+          &unescaped_name);
      if (unescaped_name == name) {
        if (found)
          return false;
        found = true;
-        std::string unescaped_value(net::UnescapeBinaryURLComponent(
+        std::string unescaped_value;
-            i->second, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE));
+        net::UnescapeBinaryURLComponent(
+            i->second, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
+            &unescaped_value);
        if (unescaped_value != expected_value)
          return false;
      }

--- a/components/safe_browsing/db/v4_protocol_manager_util.cc
+++ b/components/safe_browsing/db/v4_protocol_manager_util.cc
@@ -45,7 +45,7 @@ std::string Unescape(const std::string& url) {
  int loop_var = 0;
  do {
    old_size = unescaped_str.size();
-    unescaped_str = net::UnescapeBinaryURLComponent(unescaped_str);
+    net::UnescapeBinaryURLComponent(unescaped_str, &unescaped_str);
  } while (old_size != unescaped_str.size() &&
           ++loop_var <= kMaxLoopIterations);

--- a/content/browser/web_contents/web_drag_source_mac.mm
+++ b/content/browser/web_contents/web_drag_source_mac.mm
@@ -145,8 +145,9 @@ void PromiseWriterHelper(const DropData& drop_data,
    // If NSURL creation failed, check for a badly-escaped JavaScript URL.
    // Strip out any existing escapes and then re-escape uniformly.
    if (!url && dropData_->url.SchemeIs(url::kJavaScriptScheme)) {
-      std::string unescapedUrlString =
+      std::string unescapedUrlString;
-          net::UnescapeBinaryURLComponent(dropData_->url.spec());
+      net::UnescapeBinaryURLComponent(dropData_->url.spec(),
+                                      &unescapedUrlString);
      std::string escapedUrlString =
          net::EscapeUrlEncodedData(unescapedUrlString, false);
      url = [NSURL URLWithString:SysUTF8ToNSString(escapedUrlString)];

--- a/extensions/browser/api/web_request/form_data_parser.cc
+++ b/extensions/browser/api/web_request/form_data_parser.cc
@@ -395,9 +395,11 @@ bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
    const net::UnescapeRule::Type kUnescapeRules =
        net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
-    result->set_name(net::UnescapeBinaryURLComponent(name_, kUnescapeRules));
+    std::string unescaped_name;
-    const std::string unescaped_value =
+    net::UnescapeBinaryURLComponent(name_, kUnescapeRules, &unescaped_name);
-        net::UnescapeBinaryURLComponent(value_, kUnescapeRules);
+    result->set_name(unescaped_name);
+    std::string unescaped_value;
+    net::UnescapeBinaryURLComponent(value_, kUnescapeRules, &unescaped_value);
    const base::StringPiece unescaped_data(unescaped_value.data(),
                                           unescaped_value.length());
    if (base::IsStringUTF8(unescaped_data)) {
@@ -546,7 +548,8 @@ bool FormDataParserMultipart::GetNextNameValue(Result* result) {
    return_value = FinishReadingPart(value_assigned ? nullptr : &value);
  }
-  std::string unescaped_name = net::UnescapeBinaryURLComponent(name);
+  std::string unescaped_name;
+  net::UnescapeBinaryURLComponent(name.as_string(), &unescaped_name);
  result->set_name(unescaped_name);
  if (value_assigned) {
    // Hold filename as value.

--- a/ios/testing/embedded_test_server_handlers.cc
+++ b/ios/testing/embedded_test_server_handlers.cc
@@ -26,7 +26,8 @@ namespace {
 std::string ExtractUlrSpecFromQuery(
    const net::test_server::HttpRequest& request) {
  GURL request_url = request.GetURL();
-  std::string spec = net::UnescapeBinaryURLComponent(request_url.query());
+  std::string spec;
+  net::UnescapeBinaryURLComponent(request_url.query(), &spec);
  // Escape the URL spec.
  GURL url(spec);

--- a/net/base/data_url.cc
+++ b/net/base/data_url.cc
@@ -101,9 +101,8 @@ bool DataURL::Parse(const GURL& url,
  // For base64, we may have url-escaped whitespace which is not part
  // of the data, and should be stripped. Otherwise, the escaped whitespace
  // could be part of the payload, so don't strip it.
-  if (base64_encoded) {
+  if (base64_encoded)
-    temp_data = UnescapeBinaryURLComponent(temp_data);
+    UnescapeBinaryURLComponent(temp_data, &temp_data);
-  }
  // Strip whitespace.
  if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 ||
@@ -111,9 +110,8 @@ bool DataURL::Parse(const GURL& url,
    base::EraseIf(temp_data, base::IsAsciiWhitespace<wchar_t>);
  }
-  if (!base64_encoded) {
+  if (!base64_encoded)
-    temp_data = UnescapeBinaryURLComponent(temp_data);
+    UnescapeBinaryURLComponent(temp_data, &temp_data);
-  }
  if (base64_encoded) {
    size_t length = temp_data.length();

--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -469,8 +469,9 @@ base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
  return base::UTF8ToUTF16WithAdjustments(text, adjustments);
 }
-std::string UnescapeBinaryURLComponent(base::StringPiece escaped_text,
+void UnescapeBinaryURLComponent(const std::string& escaped_text,
-                                       UnescapeRule::Type rules) {
+                                UnescapeRule::Type rules,
+                                std::string* unescaped_text) {
  // Only NORMAL and REPLACE_PLUS_WITH_SPACE are supported.
  DCHECK(rules != UnescapeRule::NONE);
  DCHECK(!(rules &
@@ -479,31 +480,37 @@ std::string UnescapeBinaryURLComponent(base::StringPiece escaped_text,
  // The output of the unescaping is always smaller than the input, so we can
  // reserve the input size to make sure we have enough buffer and don't have
  // to allocate in the loop below.
-  std::string result;
+  // Increase capacity before size, as just resizing can grow capacity
-  result.reserve(escaped_text.length());
+  // needlessly beyond our requested size.
+  if (unescaped_text->capacity() < escaped_text.size())
+    unescaped_text->reserve(escaped_text.size());
+  if (unescaped_text->size() < escaped_text.size())
+    unescaped_text->resize(escaped_text.size());
-  for (size_t i = 0, max = escaped_text.size(); i < max;) {
+  size_t output_index = 0;
+  for (size_t i = 0, max = unescaped_text->size(); i < max;) {
    unsigned char byte;
    // UnescapeUnsignedByteAtIndex does bounds checking, so this is always safe
    // to call.
    if (UnescapeUnsignedByteAtIndex(escaped_text, i, &byte)) {
-      result.push_back(byte);
+      (*unescaped_text)[output_index++] = byte;
      i += 3;
      continue;
    }
    if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
        escaped_text[i] == '+') {
-      result.push_back(' ');
+      (*unescaped_text)[output_index++] = ' ';
      ++i;
      continue;
    }
-    result.push_back(escaped_text[i]);
+    (*unescaped_text)[output_index++] = escaped_text[i++];
-    ++i;
  }
-  return result;
+  DCHECK_LE(output_index, unescaped_text->size());
+  unescaped_text->resize(output_index);
 }
 base::string16 UnescapeForHTML(base::StringPiece16 input) {

--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -140,9 +140,16 @@ NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
 // be used when displaying the decoded data to the user.
 //
 // Only the NORMAL and REPLACE_PLUS_WITH_SPACE rules are allowed.
-NET_EXPORT std::string UnescapeBinaryURLComponent(
+// |escaped_text| and |unescaped_text| can be the same string.
-    base::StringPiece escaped_text,
+NET_EXPORT void UnescapeBinaryURLComponent(const std::string& escaped_text,
-    UnescapeRule::Type rules = UnescapeRule::NORMAL);
+                                           UnescapeRule::Type rules,
+                                           std::string* unescaped_text);
+NET_EXPORT inline void UnescapeBinaryURLComponent(
+    const std::string& escaped_text,
+    std::string* unescaped_text) {
+  UnescapeBinaryURLComponent(escaped_text, UnescapeRule::NORMAL,
+                             unescaped_text);
+}
 // Unescapes the following ampersand character codes from |text|:
 // &lt; &gt; &amp; &quot; &#39;

--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -411,8 +411,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) {
  };
  for (const auto& test_case : kTestCases) {
-    EXPECT_EQ(std::string(test_case.output),
+    std::string output;
-              UnescapeBinaryURLComponent(test_case.input, test_case.rules));
+    UnescapeBinaryURLComponent(test_case.input, test_case.rules, &output);
+    EXPECT_EQ(std::string(test_case.output), output);
+    // Also test in-place unescaping.
+    output = test_case.input;
+    UnescapeBinaryURLComponent(output, test_case.rules, &output);
+    EXPECT_EQ(std::string(test_case.output), output);
  }
  // Test NULL character unescaping, which can't be tested above since those are
@@ -425,7 +430,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) {
  expected.push_back(0);
  expected.push_back(0);
  expected.append("9Test");
-  EXPECT_EQ(expected, UnescapeBinaryURLComponent(input));
+  std::string output;
+  UnescapeBinaryURLComponent(input, &output);
+  EXPECT_EQ(expected, output);
+  // Also test in-place unescaping.
+  output = input;
+  UnescapeBinaryURLComponent(output, &output);
+  EXPECT_EQ(expected, output);
 }
 TEST(EscapeTest, EscapeForHTML) {

--- a/net/base/filename_util.cc
+++ b/net/base/filename_util.cc
@@ -120,7 +120,7 @@ bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) {
  // Unescape all percent-encoded sequences, including blacklisted-for-display
  // characters, control characters and invalid UTF-8 byte sequences.
  // Percent-encoded bytes are not meaningful in a file system.
-  path = UnescapeBinaryURLComponent(path);
+  UnescapeBinaryURLComponent(path, &path);
 #if defined(OS_WIN)
  if (base::IsStringUTF8(path)) {

--- a/net/test/embedded_test_server/default_handlers.cc
+++ b/net/test/embedded_test_server/default_handlers.cc
@@ -247,9 +247,10 @@ std::unique_ptr<HttpResponse> HandleExpectAndSetCookie(
  http_response->set_content_type("text/html");
  if (got_all_expected) {
    for (const auto& cookie : query_list.at("set")) {
-      http_response->AddCustomHeader(
+      std::string unescaped_cookie;
-          "Set-Cookie", UnescapeBinaryURLComponent(
+      UnescapeBinaryURLComponent(cookie, UnescapeRule::REPLACE_PLUS_WITH_SPACE,
-                            cookie, UnescapeRule::REPLACE_PLUS_WITH_SPACE));
+                                 &unescaped_cookie);
+      http_response->AddCustomHeader("Set-Cookie", unescaped_cookie);
    }
  }
@@ -498,7 +499,8 @@ std::unique_ptr<HttpResponse> HandleAuthDigest(const HttpRequest& request) {
 std::unique_ptr<HttpResponse> HandleServerRedirect(HttpStatusCode redirect_code,
                                                   const HttpRequest& request) {
  GURL request_url = request.GetURL();
-  std::string dest = UnescapeBinaryURLComponent(request_url.query());
+  std::string dest;
+  UnescapeBinaryURLComponent(request_url.query(), &dest);
  std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse);
  http_response->set_code(redirect_code);
@@ -518,8 +520,11 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect(
  if (!ShouldHandle(request, "/cross-site"))
    return nullptr;
-  std::string dest_all = UnescapeBinaryURLComponent(
+  std::string dest_all;
-      request.relative_url.substr(std::string("/cross-site").size() + 1));
+  UnescapeBinaryURLComponent(
+      request.relative_url.substr(std::string("/cross-site").size() + 1),
+      &dest_all);
  std::string dest;
  size_t delimiter = dest_all.find("/");
@@ -543,7 +548,8 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect(
 // Returns a meta redirect to URL.
 std::unique_ptr<HttpResponse> HandleClientRedirect(const HttpRequest& request) {
  GURL request_url = request.GetURL();
-  std::string dest = UnescapeBinaryURLComponent(request_url.query());
+  std::string dest;
+  UnescapeBinaryURLComponent(request_url.query(), &dest);
  std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse);
  http_response->set_content_type("text/html");

--- a/net/test/embedded_test_server/request_handler_util.cc
+++ b/net/test/embedded_test_server/request_handler_util.cc
@@ -86,9 +86,10 @@ std::unique_ptr<HttpResponse> HandlePrefixedRequest(
 RequestQuery ParseQuery(const GURL& url) {
  RequestQuery queries;
  for (QueryIterator it(url); !it.IsAtEnd(); it.Advance()) {
-    queries[UnescapeBinaryURLComponent(it.GetKey(),
+    std::string unescaped_query;
-                                       UnescapeRule::REPLACE_PLUS_WITH_SPACE)]
+    UnescapeBinaryURLComponent(
-        .push_back(it.GetUnescapedValue());
+        it.GetKey(), UnescapeRule::REPLACE_PLUS_WITH_SPACE, &unescaped_query);
+    queries[unescaped_query].push_back(it.GetUnescapedValue());
  }
  return queries;
 }

--- a/storage/common/fileapi/file_system_util.cc
+++ b/storage/common/fileapi/file_system_util.cc
@@ -175,7 +175,8 @@ bool ParseFileSystemSchemeURL(const GURL& url,
  if (file_system_type == kFileSystemTypeUnknown)
    return false;
-  std::string path = net::UnescapeBinaryURLComponent(url.path());
+  std::string path;
+  net::UnescapeBinaryURLComponent(url.path(), &path);
  // Ensure the path is relative.
  while (!path.empty() && path[0] == '/')