Commit 585cd042 authored by Daniel Cheng's avatar Daniel Cheng Committed by Commit Bot

Treat overly long URLs as invalid during canonicalization.

Bug: 571784
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_site_isolation
Change-Id: Ied18e9d76e13028b7077a4ed3631f0a64ce8bdf6
Reviewed-on: https://chromium-review.googlesource.com/678058
Commit-Queue: Daniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarAsanka Herath <asanka@chromium.org>
Reviewed-by: default avatarCharlie Reis <creis@chromium.org>
Reviewed-by: default avatarMike West <mkwst@chromium.org>
Cr-Commit-Position: refs/heads/master@{#506553}
parent 5f7d27b4
...@@ -218,11 +218,18 @@ void NavigationControllerAndroid::LoadUrl( ...@@ -218,11 +218,18 @@ void NavigationControllerAndroid::LoadUrl(
} }
if (data_url_as_string) { if (data_url_as_string) {
// Treat |data_url_as_string| as if we were intending to put it into a GURL // Note that this doesn't use GURL, since //url enforces a max length limit.
// field. Note that kMaxURLChars is only enforced when serializing URLs std::string data_url = ConvertJavaStringToUTF8(env, data_url_as_string);
// for IPC. // Sanity check that Java handed off a valid-looking data: URL.
GURL data_url = GURL(ConvertJavaStringToUTF8(env, data_url_as_string)); DCHECK(base::StartsWith(data_url, "data:", base::CompareCase::SENSITIVE));
DCHECK(data_url.SchemeIs(url::kDataScheme)); #if DCHECK_IS_ON()
{
std::string mime_type, charset, data;
DCHECK(net::DataURL::ParseCanonicalized(data_url, &mime_type, &charset,
&data));
}
#endif // DCHECK_IS_ON()
// params.url should be the empty data URL as well.
DCHECK(params.url.SchemeIs(url::kDataScheme)); DCHECK(params.url.SchemeIs(url::kDataScheme));
#if DCHECK_IS_ON() #if DCHECK_IS_ON()
{ {
...@@ -230,9 +237,8 @@ void NavigationControllerAndroid::LoadUrl( ...@@ -230,9 +237,8 @@ void NavigationControllerAndroid::LoadUrl(
DCHECK(net::DataURL::Parse(params.url, &mime_type, &charset, &data)); DCHECK(net::DataURL::Parse(params.url, &mime_type, &charset, &data));
DCHECK(data.empty()); DCHECK(data.empty());
} }
#endif #endif // DCHECK_IS_ON()
std::string s = data_url.spec(); params.data_url_as_string = base::RefCountedString::TakeString(&data_url);
params.data_url_as_string = base::RefCountedString::TakeString(&s);
} }
if (j_referrer_url) { if (j_referrer_url) {
......
...@@ -321,24 +321,24 @@ bool NavigatorImpl::NavigateToEntry( ...@@ -321,24 +321,24 @@ bool NavigatorImpl::NavigateToEntry(
if (frame_tree_node->IsMainFrame()) { if (frame_tree_node->IsMainFrame()) {
const GURL& virtual_url = entry.GetVirtualURL(); const GURL& virtual_url = entry.GetVirtualURL();
if (!virtual_url.is_valid() && !virtual_url.is_empty()) { if (!virtual_url.is_valid() && !virtual_url.is_empty()) {
LOG(WARNING) << "Refusing to load for invalid virtual URL: " DLOG(WARNING) << "Refusing to load for invalid virtual URL: "
<< virtual_url.possibly_invalid_spec(); << virtual_url.possibly_invalid_spec().substr(0, 1024);
return false; return false;
} }
} }
// Don't attempt to navigate to non-empty invalid URLs. // Don't attempt to navigate to non-empty invalid URLs.
if (!dest_url.is_valid() && !dest_url.is_empty()) { if (!dest_url.is_valid() && !dest_url.is_empty()) {
LOG(WARNING) << "Refusing to load invalid URL: " DLOG(WARNING) << "Refusing to load invalid URL: "
<< dest_url.possibly_invalid_spec(); << dest_url.possibly_invalid_spec().substr(0, 1024);
return false; return false;
} }
// The renderer will reject IPC messages with URLs longer than // The renderer will reject IPC messages with URLs longer than
// this limit, so don't attempt to navigate with a longer URL. // this limit, so don't attempt to navigate with a longer URL.
if (dest_url.spec().size() > url::kMaxURLChars) { if (dest_url.spec().size() > url::kMaxURLChars) {
LOG(WARNING) << "Refusing to load URL as it exceeds " << url::kMaxURLChars DLOG(WARNING) << "Refusing to load URL as it exceeds " << url::kMaxURLChars
<< " characters."; << " characters.";
return false; return false;
} }
......
...@@ -6544,24 +6544,31 @@ void RenderFrameImpl::LoadDataURL( ...@@ -6544,24 +6544,31 @@ void RenderFrameImpl::LoadDataURL(
blink::WebHistoryLoadType history_load_type, blink::WebHistoryLoadType history_load_type,
bool is_client_redirect) { bool is_client_redirect) {
// A loadData request with a specified base URL. // A loadData request with a specified base URL.
GURL data_url = params.url; DCHECK(params.url.is_valid());
#if defined(OS_ANDROID) #if defined(OS_ANDROID)
base::StringPiece data_url = params.url.spec();
if (!request_params.data_url_as_string.empty()) { if (!request_params.data_url_as_string.empty()) {
#if DCHECK_IS_ON() #if DCHECK_IS_ON()
{ {
std::string mime_type, charset, data; std::string mime_type, charset, data;
DCHECK(net::DataURL::Parse(data_url, &mime_type, &charset, &data)); DCHECK(net::DataURL::Parse(params.url, &mime_type, &charset, &data));
DCHECK(data.empty()); DCHECK(data.empty());
} }
#endif #endif // DCHECK_IS_ON()
data_url = GURL(request_params.data_url_as_string); if (base::StartsWith(request_params.data_url_as_string,
if (!data_url.is_valid() || !data_url.SchemeIs(url::kDataScheme)) { "data:", base::CompareCase::SENSITIVE)) {
data_url = params.url; data_url = request_params.data_url_as_string;
} }
} }
#endif #else // defined(OS_ANDROID)
const GURL& data_url = params.url;
#endif // defined(OS_ANDROID)
std::string mime_type, charset, data; std::string mime_type, charset, data;
#if defined(OS_ANDROID)
if (net::DataURL::ParseCanonicalized(data_url, &mime_type, &charset, &data)) {
#else
if (net::DataURL::Parse(data_url, &mime_type, &charset, &data)) { if (net::DataURL::Parse(data_url, &mime_type, &charset, &data)) {
#endif
const GURL base_url = params.base_url_for_data_url.is_empty() ? const GURL base_url = params.base_url_for_data_url.is_empty() ?
params.url : params.base_url_for_data_url; params.url : params.base_url_for_data_url;
bool replace = load_type == WebFrameLoadType::kReloadBypassingCache || bool replace = load_type == WebFrameLoadType::kReloadBypassingCache ||
......
...@@ -20,28 +20,26 @@ ...@@ -20,28 +20,26 @@
namespace net { namespace net {
// static namespace {
bool DataURL::Parse(const GURL& url, std::string* mime_type,
std::string* charset, std::string* data) {
if (!url.is_valid())
return false;
bool ParseInternal(base::StringPiece url,
std::string* mime_type,
std::string* charset,
std::string* data) {
DCHECK(mime_type->empty()); DCHECK(mime_type->empty());
DCHECK(charset->empty()); DCHECK(charset->empty());
std::string::const_iterator begin = url.spec().begin();
std::string::const_iterator end = url.spec().end();
std::string::const_iterator after_colon = std::find(begin, end, ':'); size_t after_colon = url.find(':');
if (after_colon == end) if (after_colon == base::StringPiece::npos)
return false; return false;
++after_colon; ++after_colon;
std::string::const_iterator comma = std::find(after_colon, end, ','); size_t comma = url.find(',', after_colon);
if (comma == end) if (comma == base::StringPiece::npos)
return false; return false;
std::vector<base::StringPiece> meta_data = std::vector<base::StringPiece> meta_data =
base::SplitStringPiece(base::StringPiece(after_colon, comma), ";", base::SplitStringPiece(url.substr(after_colon, comma - after_colon), ";",
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
auto iter = meta_data.cbegin(); auto iter = meta_data.cbegin();
...@@ -95,7 +93,7 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type, ...@@ -95,7 +93,7 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,
// (Spaces in a data URL should be escaped, which is handled below, so any // (Spaces in a data URL should be escaped, which is handled below, so any
// spaces now are wrong. People expect to be able to enter them in the URL // spaces now are wrong. People expect to be able to enter them in the URL
// bar for text, and it can't hurt, so we allow it.) // bar for text, and it can't hurt, so we allow it.)
std::string temp_data = std::string(comma + 1, end); std::string temp_data = url.substr(comma + 1).as_string();
// For base64, we may have url-escaped whitespace which is not part // For base64, we may have url-escaped whitespace which is not part
// of the data, and should be stripped. Otherwise, the escaped whitespace // of the data, and should be stripped. Otherwise, the escaped whitespace
...@@ -139,4 +137,27 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type, ...@@ -139,4 +137,27 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,
return true; return true;
} }
} // namespace
bool DataURL::Parse(const GURL& url,
std::string* mime_type,
std::string* charset,
std::string* data) {
if (!url.is_valid())
return false;
return ParseInternal(url.spec(), mime_type, charset, data);
}
bool DataURL::ParseCanonicalized(base::StringPiece url,
std::string* mime_type,
std::string* charset,
std::string* data) {
// The canonicalized scheme should be lowercase. While this string doesn't
// actually go through URL canonicalization, assume that Android WebView built
// an already canonical URL.
if (!url.starts_with("data:"))
return false;
return ParseInternal(url, mime_type, charset, data);
}
} // namespace net } // namespace net
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <string> #include <string>
#include "base/strings/string_piece.h"
#include "build/build_config.h"
#include "net/base/net_export.h" #include "net/base/net_export.h"
class GURL; class GURL;
...@@ -60,6 +62,14 @@ class NET_EXPORT DataURL { ...@@ -60,6 +62,14 @@ class NET_EXPORT DataURL {
std::string* mime_type, std::string* mime_type,
std::string* charset, std::string* charset,
std::string* data); std::string* data);
// Similar to Parse(), but accepts the data URL as a StringPiece which must
// already be canonicalized. This is mainly useful as an escape hatch for data
// URLs which exceed the size limit for GURLs.
static bool ParseCanonicalized(base::StringPiece data_url,
std::string* mime_type,
std::string* charset,
std::string* data);
}; };
} // namespace net } // namespace net
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
// found in the LICENSE file. // found in the LICENSE file.
#include "net/base/data_url.h" #include "net/base/data_url.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h" #include "url/gurl.h"
...@@ -234,15 +236,31 @@ TEST(DataURLTest, Parse) { ...@@ -234,15 +236,31 @@ TEST(DataURLTest, Parse) {
}; };
for (size_t i = 0; i < arraysize(tests); ++i) { for (size_t i = 0; i < arraysize(tests); ++i) {
std::string mime_type; {
std::string charset; std::string mime_type;
std::string data; std::string charset;
bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data); std::string data;
EXPECT_EQ(ok, tests[i].is_valid); bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
if (tests[i].is_valid) { EXPECT_EQ(ok, tests[i].is_valid);
EXPECT_EQ(tests[i].mime_type, mime_type); if (tests[i].is_valid) {
EXPECT_EQ(tests[i].charset, charset); EXPECT_EQ(tests[i].mime_type, mime_type);
EXPECT_EQ(tests[i].data, data); EXPECT_EQ(tests[i].charset, charset);
EXPECT_EQ(tests[i].data, data);
}
}
{
std::string mime_type;
std::string charset;
std::string data;
bool ok = DataURL::ParseCanonicalized(tests[i].url, &mime_type, &charset,
&data);
EXPECT_EQ(ok, tests[i].is_valid);
if (tests[i].is_valid) {
EXPECT_EQ(tests[i].mime_type, mime_type);
EXPECT_EQ(tests[i].charset, charset);
EXPECT_EQ(tests[i].data, data);
}
} }
} }
} }
......
<!DOCTYPE html>
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<script>
test(() => {
var original_location = location.toString();
var long_hash = "0123456789abcdef".repeat(1048576);
location.hash = long_hash;
assert_equals(location.toString(), original_location);
}, "test that attempts to assign an overly long hash are ignored");
</script>
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/url_canon.h" #include "url/url_canon.h"
#include "url/url_constants.h"
#include "url/url_test_utils.h" #include "url/url_test_utils.h"
namespace url { namespace url {
...@@ -881,4 +882,133 @@ TEST(GURLTest, EqualsIgnoringRef) { ...@@ -881,4 +882,133 @@ TEST(GURLTest, EqualsIgnoringRef) {
} }
} }
TEST(GURLTest, LongURLCanonicalize) {
const std::string kSchemeAndHost = "https://example.com";
// Reserve space for the scheme, host, and the initial '/'.
const size_t kPathLength = kMaxURLChars - kSchemeAndHost.size() - 1;
// URLs that canonicalize to the same size.
{
// A canonicalized GURL with a spec that's exactly kMaxURLChars should be
// valid.
GURL url(kSchemeAndHost + "/" + std::string(kPathLength, 'a'));
EXPECT_TRUE(url.is_valid());
}
{
// One that exceeds it in length should not be considered valid though.
GURL url(kSchemeAndHost + "/" + std::string(kPathLength + 1, 'a'));
EXPECT_FALSE(url.is_valid());
}
// URLs that canonicalize to a smaller size.
{
// The canonical URL won't include the port number, since it matches the
// default port for https. Eventhough the input spec to canonicalize is
// longer than kMaxURLChars, the canonicalized result is under the limit, so
// the url should be valid.
GURL url(kSchemeAndHost + ":443/" + std::string(kPathLength, 'a'));
EXPECT_TRUE(url.is_valid());
}
// URLs that canonicalize to a bigger size.
{
// Raw spaces should be percent encoded, resulting in a longer canonical
// spec.
GURL url(kSchemeAndHost + "/a" + std::string(kPathLength - 2, ' ') + "a");
EXPECT_FALSE(url.is_valid());
}
}
TEST(GURLTest, LongURLReplaceComponents) {
const GURL base_url("https://example.com/");
const size_t kPathLength = kMaxURLChars - base_url.spec().size();
// Replacements with a boring path (canonicalization will not alter the
// characters of the path).
{
url::Replacements<char> replacements;
std::string path(kPathLength, 'a');
replacements.SetPath(path.data(), url::Component(0, path.size()));
// The resulting url spec should be exactly kMaxURLChars in size.
GURL url = base_url.ReplaceComponents(replacements);
EXPECT_TRUE(url.is_valid());
}
{
url::Replacements<char> replacements;
std::string path(kPathLength + 1, 'a');
replacements.SetPath(path.data(), url::Component(0, path.size()));
GURL url = base_url.ReplaceComponents(replacements);
// The resulting url spec should be exceed kMaxURLChars in size.
EXPECT_FALSE(url.is_valid());
}
// Replacements where the input string to canonicalize exceeds kMaxURLChars,
// but the canonicalization will bring it under.
{
url::Replacements<char> replacements;
std::string port("443");
replacements.SetPort(port.data(), url::Component(0, port.size()));
std::string path(kPathLength, 'a');
replacements.SetPath(path.data(), url::Component(0, path.size()));
GURL url = base_url.ReplaceComponents(replacements);
// The resulting url spec should not exceed kMaxURLChars in size: the
// canonicalized form won't include the port.
EXPECT_TRUE(url.is_valid());
}
// Replacements where the input string to canonicalize does not exceed
// kMaxURLChars, but the canonicalization will bring it over.
{
url::Replacements<char> replacements;
std::string path = 'a' + std::string(kPathLength - 2, ' ') + 'a';
replacements.SetPath(path.data(), url::Component(0, path.size()));
GURL url = base_url.ReplaceComponents(replacements);
// The resulting url spec should exceed kMaxURLChars in size, since raw
// spaces will be percent encoded.
EXPECT_FALSE(url.is_valid());
}
}
TEST(GURLTest, LongURLResolve) {
const GURL base_url("https://example.com/");
const size_t kPathLength = kMaxURLChars - base_url.spec().size();
// Resolving with a boring path (canonicalization will not alter the
// characters of the path).
{
std::string path(kPathLength, 'a');
// The resulting url spec should be exactly kMaxURLChars in size.
GURL url = base_url.Resolve(path);
EXPECT_TRUE(url.is_valid());
}
{
std::string path(kPathLength + 1, 'a');
// The resulting url spec should be exceed kMaxURLChars in size.
GURL url = base_url.Resolve(path);
EXPECT_FALSE(url.is_valid());
}
// Resolving with a path where the input spec would exceed kMaxURLChars,
// but the canonicalization will bring it under.
{
std::string path = 'a' + std::string(kPathLength * 2, '\n') + 'a';
GURL url = base_url.Resolve(path);
// The resulting url spec should not exceed kMaxURLChars in size, since the
// newlines should be stripped.
EXPECT_TRUE(url.is_valid());
}
// Resolving with a path where the input spec would not exceed kMaxURLChars,
// but the canonicalization will bring it over.
{
std::string path = 'a' + std::string(kPathLength - 2, ' ') + 'a';
GURL url = base_url.Resolve(path);
// The resulting url spec should exceed kMaxURLChars in size, since raw
// spaces will be percent encoded.
EXPECT_FALSE(url.is_valid());
}
}
} // namespace url } // namespace url
...@@ -32,6 +32,4 @@ const char kHttpsSuboriginScheme[] = "https-so"; ...@@ -32,6 +32,4 @@ const char kHttpsSuboriginScheme[] = "https-so";
const char kStandardSchemeSeparator[] = "://"; const char kStandardSchemeSeparator[] = "://";
const size_t kMaxURLChars = 2 * 1024 * 1024;
} // namespace url } // namespace url
...@@ -41,7 +41,7 @@ URL_EXPORT extern const char kHttpsSuboriginScheme[]; ...@@ -41,7 +41,7 @@ URL_EXPORT extern const char kHttpsSuboriginScheme[];
// Used to separate a standard scheme and the hostname: "://". // Used to separate a standard scheme and the hostname: "://".
URL_EXPORT extern const char kStandardSchemeSeparator[]; URL_EXPORT extern const char kStandardSchemeSeparator[];
URL_EXPORT extern const size_t kMaxURLChars; constexpr int kMaxURLChars = 2 * 1024 * 1024;
} // namespace url } // namespace url
......
...@@ -723,7 +723,8 @@ bool Canonicalize(const char* spec, ...@@ -723,7 +723,8 @@ bool Canonicalize(const char* spec,
CanonOutput* output, CanonOutput* output,
Parsed* output_parsed) { Parsed* output_parsed) {
return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
charset_converter, output, output_parsed); charset_converter, output, output_parsed) &&
output->length() <= kMaxURLChars;
} }
bool Canonicalize(const base::char16* spec, bool Canonicalize(const base::char16* spec,
...@@ -733,7 +734,8 @@ bool Canonicalize(const base::char16* spec, ...@@ -733,7 +734,8 @@ bool Canonicalize(const base::char16* spec,
CanonOutput* output, CanonOutput* output,
Parsed* output_parsed) { Parsed* output_parsed) {
return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
charset_converter, output, output_parsed); charset_converter, output, output_parsed) &&
output->length() <= kMaxURLChars;
} }
bool ResolveRelative(const char* base_spec, bool ResolveRelative(const char* base_spec,
...@@ -744,9 +746,10 @@ bool ResolveRelative(const char* base_spec, ...@@ -744,9 +746,10 @@ bool ResolveRelative(const char* base_spec,
CharsetConverter* charset_converter, CharsetConverter* charset_converter,
CanonOutput* output, CanonOutput* output,
Parsed* output_parsed) { Parsed* output_parsed) {
return DoResolveRelative(base_spec, base_spec_len, base_parsed, return DoResolveRelative(base_spec, base_spec_len, base_parsed, relative,
relative, relative_length, relative_length, charset_converter, output,
charset_converter, output, output_parsed); output_parsed) &&
output->length() <= kMaxURLChars;
} }
bool ResolveRelative(const char* base_spec, bool ResolveRelative(const char* base_spec,
...@@ -757,9 +760,10 @@ bool ResolveRelative(const char* base_spec, ...@@ -757,9 +760,10 @@ bool ResolveRelative(const char* base_spec,
CharsetConverter* charset_converter, CharsetConverter* charset_converter,
CanonOutput* output, CanonOutput* output,
Parsed* output_parsed) { Parsed* output_parsed) {
return DoResolveRelative(base_spec, base_spec_len, base_parsed, return DoResolveRelative(base_spec, base_spec_len, base_parsed, relative,
relative, relative_length, relative_length, charset_converter, output,
charset_converter, output, output_parsed); output_parsed) &&
output->length() <= kMaxURLChars;
} }
bool ReplaceComponents(const char* spec, bool ReplaceComponents(const char* spec,
...@@ -770,7 +774,8 @@ bool ReplaceComponents(const char* spec, ...@@ -770,7 +774,8 @@ bool ReplaceComponents(const char* spec,
CanonOutput* output, CanonOutput* output,
Parsed* out_parsed) { Parsed* out_parsed) {
return DoReplaceComponents(spec, spec_len, parsed, replacements, return DoReplaceComponents(spec, spec_len, parsed, replacements,
charset_converter, output, out_parsed); charset_converter, output, out_parsed) &&
output->length() <= kMaxURLChars;
} }
bool ReplaceComponents(const char* spec, bool ReplaceComponents(const char* spec,
...@@ -781,7 +786,8 @@ bool ReplaceComponents(const char* spec, ...@@ -781,7 +786,8 @@ bool ReplaceComponents(const char* spec,
CanonOutput* output, CanonOutput* output,
Parsed* out_parsed) { Parsed* out_parsed) {
return DoReplaceComponents(spec, spec_len, parsed, replacements, return DoReplaceComponents(spec, spec_len, parsed, replacements,
charset_converter, output, out_parsed); charset_converter, output, out_parsed) &&
output->length() <= kMaxURLChars;
} }
void DecodeURLEscapeSequences(const char* input, void DecodeURLEscapeSequences(const char* input,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment