Commit c5ff412b authored by Paul Jensen's avatar Paul Jensen Committed by Commit Bot

Make UnescapeBinaryURLComponent work in-place on strings

One of the top crashers in net/ is out-of-memory with data URLs
that comes when the string is needlessly duplicated during
unescaping.  Looks like data URLs are being used to load fonts.
I'm unescaping in-place to avoid requiring two copies of the string.

Cq-Include-Trybots: luci.chromium.try:ios-simulator-full-configs;master.tryserver.chromium.mac:ios-simulator-cronet
Change-Id: I8724c62b254623e5025478b27388e42d5c4a1473
Reviewed-on: https://chromium-review.googlesource.com/1169635Reviewed-by: default avatarMaksim Ivanov <emaxx@chromium.org>
Reviewed-by: default avatarAlex Moshchuk <alexmos@chromium.org>
Reviewed-by: default avatarTatsuhisa Yamaguchi <yamaguchi@chromium.org>
Reviewed-by: default avatarBen Wells <benwells@chromium.org>
Reviewed-by: default avatarTaiju Tsuiki <tzik@chromium.org>
Reviewed-by: default avatarVarun Khaneja <vakh@chromium.org>
Reviewed-by: default avatarMark Cogan <marq@chromium.org>
Reviewed-by: default avatarMatt Menke <mmenke@chromium.org>
Commit-Queue: Paul Jensen <pauljensen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#584404}
parent 8a7ecd9c
...@@ -44,8 +44,8 @@ GURL FileSystemURLToExternalFileURL( ...@@ -44,8 +44,8 @@ GURL FileSystemURLToExternalFileURL(
base::FilePath ExternalFileURLToVirtualPath(const GURL& url) { base::FilePath ExternalFileURLToVirtualPath(const GURL& url) {
if (!url.is_valid() || url.scheme() != content::kExternalFileScheme) if (!url.is_valid() || url.scheme() != content::kExternalFileScheme)
return base::FilePath(); return base::FilePath();
const std::string path_string = std::string path_string;
net::UnescapeBinaryURLComponent(url.path(), net::UnescapeRule::NORMAL); net::UnescapeBinaryURLComponent(url.path(), &path_string);
return base::FilePath::FromUTF8Unsafe(path_string); return base::FilePath::FromUTF8Unsafe(path_string);
} }
......
...@@ -446,14 +446,18 @@ class QueryParams { ...@@ -446,14 +446,18 @@ class QueryParams {
bool Check(const std::string& name, const std::string& expected_value) { bool Check(const std::string& name, const std::string& expected_value) {
bool found = false; bool found = false;
for (ParamMap::const_iterator i(params_.begin()); i != params_.end(); ++i) { for (ParamMap::const_iterator i(params_.begin()); i != params_.end(); ++i) {
std::string unescaped_name(net::UnescapeBinaryURLComponent( std::string unescaped_name;
i->first, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE)); net::UnescapeBinaryURLComponent(
i->first, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
&unescaped_name);
if (unescaped_name == name) { if (unescaped_name == name) {
if (found) if (found)
return false; return false;
found = true; found = true;
std::string unescaped_value(net::UnescapeBinaryURLComponent( std::string unescaped_value;
i->second, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE)); net::UnescapeBinaryURLComponent(
i->second, net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
&unescaped_value);
if (unescaped_value != expected_value) if (unescaped_value != expected_value)
return false; return false;
} }
......
...@@ -45,7 +45,7 @@ std::string Unescape(const std::string& url) { ...@@ -45,7 +45,7 @@ std::string Unescape(const std::string& url) {
int loop_var = 0; int loop_var = 0;
do { do {
old_size = unescaped_str.size(); old_size = unescaped_str.size();
unescaped_str = net::UnescapeBinaryURLComponent(unescaped_str); net::UnescapeBinaryURLComponent(unescaped_str, &unescaped_str);
} while (old_size != unescaped_str.size() && } while (old_size != unescaped_str.size() &&
++loop_var <= kMaxLoopIterations); ++loop_var <= kMaxLoopIterations);
......
...@@ -145,8 +145,9 @@ void PromiseWriterHelper(const DropData& drop_data, ...@@ -145,8 +145,9 @@ void PromiseWriterHelper(const DropData& drop_data,
// If NSURL creation failed, check for a badly-escaped JavaScript URL. // If NSURL creation failed, check for a badly-escaped JavaScript URL.
// Strip out any existing escapes and then re-escape uniformly. // Strip out any existing escapes and then re-escape uniformly.
if (!url && dropData_->url.SchemeIs(url::kJavaScriptScheme)) { if (!url && dropData_->url.SchemeIs(url::kJavaScriptScheme)) {
std::string unescapedUrlString = std::string unescapedUrlString;
net::UnescapeBinaryURLComponent(dropData_->url.spec()); net::UnescapeBinaryURLComponent(dropData_->url.spec(),
&unescapedUrlString);
std::string escapedUrlString = std::string escapedUrlString =
net::EscapeUrlEncodedData(unescapedUrlString, false); net::EscapeUrlEncodedData(unescapedUrlString, false);
url = [NSURL URLWithString:SysUTF8ToNSString(escapedUrlString)]; url = [NSURL URLWithString:SysUTF8ToNSString(escapedUrlString)];
......
...@@ -395,9 +395,11 @@ bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { ...@@ -395,9 +395,11 @@ bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
const net::UnescapeRule::Type kUnescapeRules = const net::UnescapeRule::Type kUnescapeRules =
net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
result->set_name(net::UnescapeBinaryURLComponent(name_, kUnescapeRules)); std::string unescaped_name;
const std::string unescaped_value = net::UnescapeBinaryURLComponent(name_, kUnescapeRules, &unescaped_name);
net::UnescapeBinaryURLComponent(value_, kUnescapeRules); result->set_name(unescaped_name);
std::string unescaped_value;
net::UnescapeBinaryURLComponent(value_, kUnescapeRules, &unescaped_value);
const base::StringPiece unescaped_data(unescaped_value.data(), const base::StringPiece unescaped_data(unescaped_value.data(),
unescaped_value.length()); unescaped_value.length());
if (base::IsStringUTF8(unescaped_data)) { if (base::IsStringUTF8(unescaped_data)) {
...@@ -546,7 +548,8 @@ bool FormDataParserMultipart::GetNextNameValue(Result* result) { ...@@ -546,7 +548,8 @@ bool FormDataParserMultipart::GetNextNameValue(Result* result) {
return_value = FinishReadingPart(value_assigned ? nullptr : &value); return_value = FinishReadingPart(value_assigned ? nullptr : &value);
} }
std::string unescaped_name = net::UnescapeBinaryURLComponent(name); std::string unescaped_name;
net::UnescapeBinaryURLComponent(name.as_string(), &unescaped_name);
result->set_name(unescaped_name); result->set_name(unescaped_name);
if (value_assigned) { if (value_assigned) {
// Hold filename as value. // Hold filename as value.
......
...@@ -26,7 +26,8 @@ namespace { ...@@ -26,7 +26,8 @@ namespace {
std::string ExtractUlrSpecFromQuery( std::string ExtractUlrSpecFromQuery(
const net::test_server::HttpRequest& request) { const net::test_server::HttpRequest& request) {
GURL request_url = request.GetURL(); GURL request_url = request.GetURL();
std::string spec = net::UnescapeBinaryURLComponent(request_url.query()); std::string spec;
net::UnescapeBinaryURLComponent(request_url.query(), &spec);
// Escape the URL spec. // Escape the URL spec.
GURL url(spec); GURL url(spec);
......
...@@ -101,9 +101,8 @@ bool DataURL::Parse(const GURL& url, ...@@ -101,9 +101,8 @@ bool DataURL::Parse(const GURL& url,
// For base64, we may have url-escaped whitespace which is not part // For base64, we may have url-escaped whitespace which is not part
// of the data, and should be stripped. Otherwise, the escaped whitespace // of the data, and should be stripped. Otherwise, the escaped whitespace
// could be part of the payload, so don't strip it. // could be part of the payload, so don't strip it.
if (base64_encoded) { if (base64_encoded)
temp_data = UnescapeBinaryURLComponent(temp_data); UnescapeBinaryURLComponent(temp_data, &temp_data);
}
// Strip whitespace. // Strip whitespace.
if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 || if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 ||
...@@ -111,9 +110,8 @@ bool DataURL::Parse(const GURL& url, ...@@ -111,9 +110,8 @@ bool DataURL::Parse(const GURL& url,
base::EraseIf(temp_data, base::IsAsciiWhitespace<wchar_t>); base::EraseIf(temp_data, base::IsAsciiWhitespace<wchar_t>);
} }
if (!base64_encoded) { if (!base64_encoded)
temp_data = UnescapeBinaryURLComponent(temp_data); UnescapeBinaryURLComponent(temp_data, &temp_data);
}
if (base64_encoded) { if (base64_encoded) {
size_t length = temp_data.length(); size_t length = temp_data.length();
......
...@@ -469,8 +469,9 @@ base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( ...@@ -469,8 +469,9 @@ base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
return base::UTF8ToUTF16WithAdjustments(text, adjustments); return base::UTF8ToUTF16WithAdjustments(text, adjustments);
} }
std::string UnescapeBinaryURLComponent(base::StringPiece escaped_text, void UnescapeBinaryURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules) { UnescapeRule::Type rules,
std::string* unescaped_text) {
// Only NORMAL and REPLACE_PLUS_WITH_SPACE are supported. // Only NORMAL and REPLACE_PLUS_WITH_SPACE are supported.
DCHECK(rules != UnescapeRule::NONE); DCHECK(rules != UnescapeRule::NONE);
DCHECK(!(rules & DCHECK(!(rules &
...@@ -479,31 +480,37 @@ std::string UnescapeBinaryURLComponent(base::StringPiece escaped_text, ...@@ -479,31 +480,37 @@ std::string UnescapeBinaryURLComponent(base::StringPiece escaped_text,
// The output of the unescaping is always smaller than the input, so we can // The output of the unescaping is always smaller than the input, so we can
// reserve the input size to make sure we have enough buffer and don't have // reserve the input size to make sure we have enough buffer and don't have
// to allocate in the loop below. // to allocate in the loop below.
std::string result; // Increase capacity before size, as just resizing can grow capacity
result.reserve(escaped_text.length()); // needlessly beyond our requested size.
if (unescaped_text->capacity() < escaped_text.size())
unescaped_text->reserve(escaped_text.size());
if (unescaped_text->size() < escaped_text.size())
unescaped_text->resize(escaped_text.size());
for (size_t i = 0, max = escaped_text.size(); i < max;) { size_t output_index = 0;
for (size_t i = 0, max = unescaped_text->size(); i < max;) {
unsigned char byte; unsigned char byte;
// UnescapeUnsignedByteAtIndex does bounds checking, so this is always safe // UnescapeUnsignedByteAtIndex does bounds checking, so this is always safe
// to call. // to call.
if (UnescapeUnsignedByteAtIndex(escaped_text, i, &byte)) { if (UnescapeUnsignedByteAtIndex(escaped_text, i, &byte)) {
result.push_back(byte); (*unescaped_text)[output_index++] = byte;
i += 3; i += 3;
continue; continue;
} }
if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) && if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
escaped_text[i] == '+') { escaped_text[i] == '+') {
result.push_back(' '); (*unescaped_text)[output_index++] = ' ';
++i; ++i;
continue; continue;
} }
result.push_back(escaped_text[i]); (*unescaped_text)[output_index++] = escaped_text[i++];
++i;
} }
return result; DCHECK_LE(output_index, unescaped_text->size());
unescaped_text->resize(output_index);
} }
base::string16 UnescapeForHTML(base::StringPiece16 input) { base::string16 UnescapeForHTML(base::StringPiece16 input) {
......
...@@ -140,9 +140,16 @@ NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( ...@@ -140,9 +140,16 @@ NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
// be used when displaying the decoded data to the user. // be used when displaying the decoded data to the user.
// //
// Only the NORMAL and REPLACE_PLUS_WITH_SPACE rules are allowed. // Only the NORMAL and REPLACE_PLUS_WITH_SPACE rules are allowed.
NET_EXPORT std::string UnescapeBinaryURLComponent( // |escaped_text| and |unescaped_text| can be the same string.
base::StringPiece escaped_text, NET_EXPORT void UnescapeBinaryURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules = UnescapeRule::NORMAL); UnescapeRule::Type rules,
std::string* unescaped_text);
NET_EXPORT inline void UnescapeBinaryURLComponent(
const std::string& escaped_text,
std::string* unescaped_text) {
UnescapeBinaryURLComponent(escaped_text, UnescapeRule::NORMAL,
unescaped_text);
}
// Unescapes the following ampersand character codes from |text|: // Unescapes the following ampersand character codes from |text|:
// &lt; &gt; &amp; &quot; &#39; // &lt; &gt; &amp; &quot; &#39;
......
...@@ -411,8 +411,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) { ...@@ -411,8 +411,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) {
}; };
for (const auto& test_case : kTestCases) { for (const auto& test_case : kTestCases) {
EXPECT_EQ(std::string(test_case.output), std::string output;
UnescapeBinaryURLComponent(test_case.input, test_case.rules)); UnescapeBinaryURLComponent(test_case.input, test_case.rules, &output);
EXPECT_EQ(std::string(test_case.output), output);
// Also test in-place unescaping.
output = test_case.input;
UnescapeBinaryURLComponent(output, test_case.rules, &output);
EXPECT_EQ(std::string(test_case.output), output);
} }
// Test NULL character unescaping, which can't be tested above since those are // Test NULL character unescaping, which can't be tested above since those are
...@@ -425,7 +430,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) { ...@@ -425,7 +430,13 @@ TEST(EscapeTest, UnescapeBinaryURLComponent) {
expected.push_back(0); expected.push_back(0);
expected.push_back(0); expected.push_back(0);
expected.append("9Test"); expected.append("9Test");
EXPECT_EQ(expected, UnescapeBinaryURLComponent(input)); std::string output;
UnescapeBinaryURLComponent(input, &output);
EXPECT_EQ(expected, output);
// Also test in-place unescaping.
output = input;
UnescapeBinaryURLComponent(output, &output);
EXPECT_EQ(expected, output);
} }
TEST(EscapeTest, EscapeForHTML) { TEST(EscapeTest, EscapeForHTML) {
......
...@@ -120,7 +120,7 @@ bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) { ...@@ -120,7 +120,7 @@ bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) {
// Unescape all percent-encoded sequences, including blacklisted-for-display // Unescape all percent-encoded sequences, including blacklisted-for-display
// characters, control characters and invalid UTF-8 byte sequences. // characters, control characters and invalid UTF-8 byte sequences.
// Percent-encoded bytes are not meaningful in a file system. // Percent-encoded bytes are not meaningful in a file system.
path = UnescapeBinaryURLComponent(path); UnescapeBinaryURLComponent(path, &path);
#if defined(OS_WIN) #if defined(OS_WIN)
if (base::IsStringUTF8(path)) { if (base::IsStringUTF8(path)) {
......
...@@ -247,9 +247,10 @@ std::unique_ptr<HttpResponse> HandleExpectAndSetCookie( ...@@ -247,9 +247,10 @@ std::unique_ptr<HttpResponse> HandleExpectAndSetCookie(
http_response->set_content_type("text/html"); http_response->set_content_type("text/html");
if (got_all_expected) { if (got_all_expected) {
for (const auto& cookie : query_list.at("set")) { for (const auto& cookie : query_list.at("set")) {
http_response->AddCustomHeader( std::string unescaped_cookie;
"Set-Cookie", UnescapeBinaryURLComponent( UnescapeBinaryURLComponent(cookie, UnescapeRule::REPLACE_PLUS_WITH_SPACE,
cookie, UnescapeRule::REPLACE_PLUS_WITH_SPACE)); &unescaped_cookie);
http_response->AddCustomHeader("Set-Cookie", unescaped_cookie);
} }
} }
...@@ -498,7 +499,8 @@ std::unique_ptr<HttpResponse> HandleAuthDigest(const HttpRequest& request) { ...@@ -498,7 +499,8 @@ std::unique_ptr<HttpResponse> HandleAuthDigest(const HttpRequest& request) {
std::unique_ptr<HttpResponse> HandleServerRedirect(HttpStatusCode redirect_code, std::unique_ptr<HttpResponse> HandleServerRedirect(HttpStatusCode redirect_code,
const HttpRequest& request) { const HttpRequest& request) {
GURL request_url = request.GetURL(); GURL request_url = request.GetURL();
std::string dest = UnescapeBinaryURLComponent(request_url.query()); std::string dest;
UnescapeBinaryURLComponent(request_url.query(), &dest);
std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse); std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse);
http_response->set_code(redirect_code); http_response->set_code(redirect_code);
...@@ -518,8 +520,11 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect( ...@@ -518,8 +520,11 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect(
if (!ShouldHandle(request, "/cross-site")) if (!ShouldHandle(request, "/cross-site"))
return nullptr; return nullptr;
std::string dest_all = UnescapeBinaryURLComponent( std::string dest_all;
request.relative_url.substr(std::string("/cross-site").size() + 1)); UnescapeBinaryURLComponent(
request.relative_url.substr(std::string("/cross-site").size() + 1),
&dest_all);
std::string dest; std::string dest;
size_t delimiter = dest_all.find("/"); size_t delimiter = dest_all.find("/");
...@@ -543,7 +548,8 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect( ...@@ -543,7 +548,8 @@ std::unique_ptr<HttpResponse> HandleCrossSiteRedirect(
// Returns a meta redirect to URL. // Returns a meta redirect to URL.
std::unique_ptr<HttpResponse> HandleClientRedirect(const HttpRequest& request) { std::unique_ptr<HttpResponse> HandleClientRedirect(const HttpRequest& request) {
GURL request_url = request.GetURL(); GURL request_url = request.GetURL();
std::string dest = UnescapeBinaryURLComponent(request_url.query()); std::string dest;
UnescapeBinaryURLComponent(request_url.query(), &dest);
std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse); std::unique_ptr<BasicHttpResponse> http_response(new BasicHttpResponse);
http_response->set_content_type("text/html"); http_response->set_content_type("text/html");
......
...@@ -86,9 +86,10 @@ std::unique_ptr<HttpResponse> HandlePrefixedRequest( ...@@ -86,9 +86,10 @@ std::unique_ptr<HttpResponse> HandlePrefixedRequest(
RequestQuery ParseQuery(const GURL& url) { RequestQuery ParseQuery(const GURL& url) {
RequestQuery queries; RequestQuery queries;
for (QueryIterator it(url); !it.IsAtEnd(); it.Advance()) { for (QueryIterator it(url); !it.IsAtEnd(); it.Advance()) {
queries[UnescapeBinaryURLComponent(it.GetKey(), std::string unescaped_query;
UnescapeRule::REPLACE_PLUS_WITH_SPACE)] UnescapeBinaryURLComponent(
.push_back(it.GetUnescapedValue()); it.GetKey(), UnescapeRule::REPLACE_PLUS_WITH_SPACE, &unescaped_query);
queries[unescaped_query].push_back(it.GetUnescapedValue());
} }
return queries; return queries;
} }
......
...@@ -175,7 +175,8 @@ bool ParseFileSystemSchemeURL(const GURL& url, ...@@ -175,7 +175,8 @@ bool ParseFileSystemSchemeURL(const GURL& url,
if (file_system_type == kFileSystemTypeUnknown) if (file_system_type == kFileSystemTypeUnknown)
return false; return false;
std::string path = net::UnescapeBinaryURLComponent(url.path()); std::string path;
net::UnescapeBinaryURLComponent(url.path(), &path);
// Ensure the path is relative. // Ensure the path is relative.
while (!path.empty() && path[0] == '/') while (!path.empty() && path[0] == '/')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment