Commit ff69a10a authored by Nick Carter's avatar Nick Carter Committed by Commit Bot

[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the...

[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the scheme doesn't support them

The goal of this CL is to inhibit port numbers and usernames in internal schemes
like "chrome-extension" and "chrome". Currently, navigations to chrome-extension:// URLs
with ports actually get suprisingly far; it seems like no good can possibly come from
that.

A new SchemeType is added: SCHEME_WITH_HOST_AND_PORT (no user information).
This is only used when canonicalizing the inner URL of filesystem: -- e.g.,
filesystem:http://user@host:20/temp/foo now canonicalizes to
filesystem:http://host:20/temp/foo; whereas filesystem:chrome://user@host:20/temp/foo
canonicalizes to filesystem:chrome://host/temp/foo

Bug: 606001,809062
Cq-Include-Trybots: master.tryserver.chromium.mac:ios-simulator-cronet;master.tryserver.chromium.mac:ios-simulator-full-configs
Change-Id: I77c5ba3d2fe964deb8aadae95a06519ce038c472
Reviewed-on: https://chromium-review.googlesource.com/974380Reviewed-by: default avatarVasilii Sukhanov <vasilii@chromium.org>
Reviewed-by: default avatarTommy Li <tommycli@chromium.org>
Reviewed-by: default avatarMike West <mkwst@chromium.org>
Commit-Queue: Nick Carter <nick@chromium.org>
Cr-Commit-Position: refs/heads/master@{#547882}
parent 3e0db462
...@@ -53,7 +53,8 @@ bool CanonicalizeWebFacetURI(const std::string& input_uri, ...@@ -53,7 +53,8 @@ bool CanonicalizeWebFacetURI(const std::string& input_uri,
url::StdStringCanonOutput canonical_output(canonical_uri); url::StdStringCanonOutput canonical_output(canonical_uri);
bool canonicalization_succeeded = url::CanonicalizeStandardURL( bool canonicalization_succeeded = url::CanonicalizeStandardURL(
input_uri.c_str(), input_uri.size(), input_parsed, nullptr, input_uri.c_str(), input_uri.size(), input_parsed,
url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&canonical_output, &canonical_parsed); &canonical_output, &canonical_parsed);
canonical_output.Complete(); canonical_output.Complete();
......
...@@ -260,10 +260,10 @@ struct FixupCase { ...@@ -260,10 +260,10 @@ struct FixupCase {
{"about:foo", "chrome://foo/"}, {"about:foo", "chrome://foo/"},
{"about:version", "chrome://version/"}, {"about:version", "chrome://version/"},
{"about:blank", "about:blank"}, {"about:blank", "about:blank"},
{"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, {"about:usr:pwd@hst:20/pth?qry#ref", "chrome://hst/pth?qry#ref"},
{"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, {"about://usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
{"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
{"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"}, {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
{"www:123", "http://www:123/"}, {"www:123", "http://www:123/"},
{" www:123", "http://www:123/"}, {" www:123", "http://www:123/"},
{"www.google.com?foo", "http://www.google.com/?foo"}, {"www.google.com?foo", "http://www.google.com/?foo"},
......
...@@ -197,7 +197,7 @@ TEST(GURLTest, CopyFileSystem) { ...@@ -197,7 +197,7 @@ TEST(GURLTest, CopyFileSystem) {
GURL url2(url); GURL url2(url);
EXPECT_TRUE(url2.is_valid()); EXPECT_TRUE(url2.is_valid());
EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec()); EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
EXPECT_EQ("filesystem", url2.scheme()); EXPECT_EQ("filesystem", url2.scheme());
EXPECT_EQ("", url2.username()); EXPECT_EQ("", url2.username());
EXPECT_EQ("", url2.password()); EXPECT_EQ("", url2.password());
...@@ -211,8 +211,8 @@ TEST(GURLTest, CopyFileSystem) { ...@@ -211,8 +211,8 @@ TEST(GURLTest, CopyFileSystem) {
const GURL* inner = url2.inner_url(); const GURL* inner = url2.inner_url();
ASSERT_TRUE(inner); ASSERT_TRUE(inner);
EXPECT_EQ("https", inner->scheme()); EXPECT_EQ("https", inner->scheme());
EXPECT_EQ("user", inner->username()); EXPECT_EQ("", inner->username());
EXPECT_EQ("pass", inner->password()); EXPECT_EQ("", inner->password());
EXPECT_EQ("google.com", inner->host()); EXPECT_EQ("google.com", inner->host());
EXPECT_EQ("99", inner->port()); EXPECT_EQ("99", inner->port());
EXPECT_EQ(99, inner->IntPort()); EXPECT_EQ(99, inner->IntPort());
......
...@@ -60,6 +60,7 @@ bool IsValidInput(const base::StringPiece& scheme, ...@@ -60,6 +60,7 @@ bool IsValidInput(const base::StringPiece& scheme,
return false; return false;
switch (scheme_type) { switch (scheme_type) {
case SCHEME_WITH_HOST_AND_PORT:
case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION: case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
// A URL with |scheme| is required to have the host and port (may be // A URL with |scheme| is required to have the host and port (may be
// omitted in a serialization if it's the same as the default value). // omitted in a serialization if it's the same as the default value).
......
...@@ -220,6 +220,31 @@ class URL_EXPORT CharsetConverter { ...@@ -220,6 +220,31 @@ class URL_EXPORT CharsetConverter {
CanonOutput* output) = 0; CanonOutput* output) = 0;
}; };
// Schemes --------------------------------------------------------------------
// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
enum SchemeType {
// The authority component of a URL with the scheme has the form
// "username:password@host:port". The username and password entries are
// optional; the host may not be empty. The default value of the port can be
// omitted in serialization. This type occurs with network schemes like http,
// https, and ftp.
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
// The authority component of a URL with the scheme has the form "host:port",
// and does not include username or password. The default value of the port
// can be omitted in serialization. Used by inner URLs of filesystem URLs of
// origins with network hosts, from which the username and password are
// stripped.
SCHEME_WITH_HOST_AND_PORT,
// The authority component of an URL with the scheme has the form "host", and
// does not include port, username, or password. Used when the hosts are not
// network addresses; for example, schemes used internally by the browser.
SCHEME_WITH_HOST,
// A URL with the scheme doesn't have the authority component.
SCHEME_WITHOUT_AUTHORITY,
};
// Whitespace ----------------------------------------------------------------- // Whitespace -----------------------------------------------------------------
// Searches for whitespace that should be removed from the middle of URLs, and // Searches for whitespace that should be removed from the middle of URLs, and
...@@ -549,12 +574,14 @@ URL_EXPORT void CanonicalizeRef(const base::char16* spec, ...@@ -549,12 +574,14 @@ URL_EXPORT void CanonicalizeRef(const base::char16* spec,
URL_EXPORT bool CanonicalizeStandardURL(const char* spec, URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
int spec_len, int spec_len,
const Parsed& parsed, const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed); Parsed* new_parsed);
URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec, URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
int spec_len, int spec_len,
const Parsed& parsed, const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed); Parsed* new_parsed);
...@@ -802,6 +829,7 @@ class Replacements { ...@@ -802,6 +829,7 @@ class Replacements {
URL_EXPORT bool ReplaceStandardURL(const char* base, URL_EXPORT bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed, const Parsed& base_parsed,
const Replacements<char>& replacements, const Replacements<char>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed); Parsed* new_parsed);
...@@ -809,6 +837,7 @@ URL_EXPORT bool ReplaceStandardURL( ...@@ -809,6 +837,7 @@ URL_EXPORT bool ReplaceStandardURL(
const char* base, const char* base,
const Parsed& base_parsed, const Parsed& base_parsed,
const Replacements<base::char16>& replacements, const Replacements<base::char16>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed); Parsed* new_parsed);
......
...@@ -43,16 +43,22 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec, ...@@ -43,16 +43,22 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec,
return false; return false;
bool success = true; bool success = true;
SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) { if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
new_inner_parsed.scheme.begin = output->length(); new_inner_parsed.scheme.begin = output->length();
output->Append("file://", 7); output->Append("file://", 7);
new_inner_parsed.scheme.len = 4; new_inner_parsed.scheme.len = 4;
success &= CanonicalizePath(spec, inner_parsed->path, output, success &= CanonicalizePath(spec, inner_parsed->path, output,
&new_inner_parsed.path); &new_inner_parsed.path);
} else if (IsStandard(spec, inner_parsed->scheme)) { } else if (GetStandardSchemeType(spec, inner_parsed->scheme,
success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(), &inner_scheme_type)) {
*parsed.inner_parsed(), charset_converter, if (inner_scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION) {
output, &new_inner_parsed); // Strip out the user information from the inner URL, if any.
inner_scheme_type = SCHEME_WITH_HOST_AND_PORT;
}
success = CanonicalizeStandardURL(
spec, parsed.inner_parsed()->Length(), *parsed.inner_parsed(),
inner_scheme_type, charset_converter, output, &new_inner_parsed);
} else { } else {
// TODO(ericu): The URL is wrong, but should we try to output more of what // TODO(ericu): The URL is wrong, but should we try to output more of what
// we were given? Echoing back filesystem:mailto etc. doesn't seem all that // we were given? Echoing back filesystem:mailto etc. doesn't seem all that
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "url/url_constants.h" #include "url/url_constants.h"
#include "url/url_file.h" #include "url/url_file.h"
#include "url/url_parse_internal.h" #include "url/url_parse_internal.h"
#include "url/url_util.h"
#include "url/url_util_internal.h" #include "url/url_util_internal.h"
namespace url { namespace url {
...@@ -407,7 +408,13 @@ bool DoResolveRelativeHost(const char* base_url, ...@@ -407,7 +408,13 @@ bool DoResolveRelativeHost(const char* base_url,
output->ReserveSizeIfNeeded( output->ReserveSizeIfNeeded(
replacements.components().Length() + replacements.components().Length() +
base_parsed.CountCharactersBefore(Parsed::USERNAME, false)); base_parsed.CountCharactersBefore(Parsed::USERNAME, false));
return ReplaceStandardURL(base_url, base_parsed, replacements, SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (!GetStandardSchemeType(base_url, base_parsed.scheme, &scheme_type)) {
// A path with an authority section gets canonicalized under standard URL
// rules, even though the base was not known to be standard.
scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
}
return ReplaceStandardURL(base_url, base_parsed, replacements, scheme_type,
query_converter, output, out_parsed); query_converter, output, out_parsed);
} }
......
...@@ -13,9 +13,10 @@ namespace url { ...@@ -13,9 +13,10 @@ namespace url {
namespace { namespace {
template<typename CHAR, typename UCHAR> template <typename CHAR, typename UCHAR>
bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source, bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed, const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed) { Parsed* new_parsed) {
...@@ -23,10 +24,18 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source, ...@@ -23,10 +24,18 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
bool success = CanonicalizeScheme(source.scheme, parsed.scheme, bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
output, &new_parsed->scheme); output, &new_parsed->scheme);
bool scheme_supports_user_info =
(scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
bool scheme_supports_ports =
(scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION ||
scheme_type == SCHEME_WITH_HOST_AND_PORT);
// Authority (username, password, host, port) // Authority (username, password, host, port)
bool have_authority; bool have_authority;
if (parsed.username.is_valid() || parsed.password.is_valid() || if ((scheme_supports_user_info &&
parsed.host.is_nonempty() || parsed.port.is_valid()) { (parsed.username.is_valid() || parsed.password.is_valid())) ||
parsed.host.is_nonempty() ||
(scheme_supports_ports && parsed.port.is_valid())) {
have_authority = true; have_authority = true;
// Only write the authority separators when we have a scheme. // Only write the authority separators when we have a scheme.
...@@ -36,11 +45,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source, ...@@ -36,11 +45,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
} }
// User info: the canonicalizer will handle the : and @. // User info: the canonicalizer will handle the : and @.
success &= CanonicalizeUserInfo(source.username, parsed.username, if (scheme_supports_user_info) {
source.password, parsed.password, success &= CanonicalizeUserInfo(
output, source.username, parsed.username, source.password, parsed.password,
&new_parsed->username, output, &new_parsed->username, &new_parsed->password);
&new_parsed->password); } else {
new_parsed->username.reset();
new_parsed->password.reset();
}
success &= CanonicalizeHost(source.host, parsed.host, success &= CanonicalizeHost(source.host, parsed.host,
output, &new_parsed->host); output, &new_parsed->host);
...@@ -50,10 +62,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source, ...@@ -50,10 +62,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
success = false; success = false;
// Port: the port canonicalizer will handle the colon. // Port: the port canonicalizer will handle the colon.
if (scheme_supports_ports) {
int default_port = DefaultPortForScheme( int default_port = DefaultPortForScheme(
&output->data()[new_parsed->scheme.begin], new_parsed->scheme.len); &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
success &= CanonicalizePort(source.port, parsed.port, default_port, success &= CanonicalizePort(source.port, parsed.port, default_port,
output, &new_parsed->port); output, &new_parsed->port);
} else {
new_parsed->port.reset();
}
} else { } else {
// No authority, clear the components. // No authority, clear the components.
have_authority = false; have_authority = false;
...@@ -127,23 +143,25 @@ int DefaultPortForScheme(const char* scheme, int scheme_len) { ...@@ -127,23 +143,25 @@ int DefaultPortForScheme(const char* scheme, int scheme_len) {
bool CanonicalizeStandardURL(const char* spec, bool CanonicalizeStandardURL(const char* spec,
int spec_len, int spec_len,
const Parsed& parsed, const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed) { Parsed* new_parsed) {
return DoCanonicalizeStandardURL<char, unsigned char>( return DoCanonicalizeStandardURL<char, unsigned char>(
URLComponentSource<char>(spec), parsed, query_converter, URLComponentSource<char>(spec), parsed, scheme_type, query_converter,
output, new_parsed); output, new_parsed);
} }
bool CanonicalizeStandardURL(const base::char16* spec, bool CanonicalizeStandardURL(const base::char16* spec,
int spec_len, int spec_len,
const Parsed& parsed, const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed) { Parsed* new_parsed) {
return DoCanonicalizeStandardURL<base::char16, base::char16>( return DoCanonicalizeStandardURL<base::char16, base::char16>(
URLComponentSource<base::char16>(spec), parsed, query_converter, URLComponentSource<base::char16>(spec), parsed, scheme_type,
output, new_parsed); query_converter, output, new_parsed);
} }
// It might be nice in the future to optimize this so unchanged components don't // It might be nice in the future to optimize this so unchanged components don't
...@@ -158,6 +176,7 @@ bool CanonicalizeStandardURL(const base::char16* spec, ...@@ -158,6 +176,7 @@ bool CanonicalizeStandardURL(const base::char16* spec,
bool ReplaceStandardURL(const char* base, bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed, const Parsed& base_parsed,
const Replacements<char>& replacements, const Replacements<char>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed) { Parsed* new_parsed) {
...@@ -165,7 +184,7 @@ bool ReplaceStandardURL(const char* base, ...@@ -165,7 +184,7 @@ bool ReplaceStandardURL(const char* base,
Parsed parsed(base_parsed); Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed); SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizeStandardURL<char, unsigned char>( return DoCanonicalizeStandardURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed); source, parsed, scheme_type, query_converter, output, new_parsed);
} }
// For 16-bit replacements, we turn all the replacements into UTF-8 so the // For 16-bit replacements, we turn all the replacements into UTF-8 so the
...@@ -173,6 +192,7 @@ bool ReplaceStandardURL(const char* base, ...@@ -173,6 +192,7 @@ bool ReplaceStandardURL(const char* base,
bool ReplaceStandardURL(const char* base, bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed, const Parsed& base_parsed,
const Replacements<base::char16>& replacements, const Replacements<base::char16>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter, CharsetConverter* query_converter,
CanonOutput* output, CanonOutput* output,
Parsed* new_parsed) { Parsed* new_parsed) {
...@@ -181,7 +201,7 @@ bool ReplaceStandardURL(const char* base, ...@@ -181,7 +201,7 @@ bool ReplaceStandardURL(const char* base,
Parsed parsed(base_parsed); Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizeStandardURL<char, unsigned char>( return DoCanonicalizeStandardURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed); source, parsed, scheme_type, query_converter, output, new_parsed);
} }
} // namespace url } // namespace url
...@@ -1433,7 +1433,8 @@ TEST(URLCanonTest, CanonicalizeStandardURL) { ...@@ -1433,7 +1433,8 @@ TEST(URLCanonTest, CanonicalizeStandardURL) {
std::string out_str; std::string out_str;
StdStringCanonOutput output(&out_str); StdStringCanonOutput output(&out_str);
bool success = CanonicalizeStandardURL( bool success = CanonicalizeStandardURL(
cases[i].input, url_len, parsed, NULL, &output, &out_parsed); cases[i].input, url_len, parsed,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
output.Complete(); output.Complete();
EXPECT_EQ(cases[i].expected_success, success); EXPECT_EQ(cases[i].expected_success, success);
...@@ -1479,8 +1480,9 @@ TEST(URLCanonTest, ReplaceStandardURL) { ...@@ -1479,8 +1480,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str; std::string out_str;
StdStringCanonOutput output(&out_str); StdStringCanonOutput output(&out_str);
Parsed out_parsed; Parsed out_parsed;
ReplaceStandardURL(replace_cases[i].base, parsed, r, NULL, &output, ReplaceStandardURL(replace_cases[i].base, parsed, r,
&out_parsed); SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output, &out_parsed);
output.Complete(); output.Complete();
EXPECT_EQ(replace_cases[i].expected, out_str); EXPECT_EQ(replace_cases[i].expected, out_str);
...@@ -1501,7 +1503,9 @@ TEST(URLCanonTest, ReplaceStandardURL) { ...@@ -1501,7 +1503,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str1; std::string out_str1;
StdStringCanonOutput output1(&out_str1); StdStringCanonOutput output1(&out_str1);
Parsed new_parsed; Parsed new_parsed;
ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed); ReplaceStandardURL(src, parsed, r,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output1, &new_parsed);
output1.Complete(); output1.Complete();
EXPECT_STREQ("http://www.google.com/", out_str1.c_str()); EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
...@@ -1509,7 +1513,9 @@ TEST(URLCanonTest, ReplaceStandardURL) { ...@@ -1509,7 +1513,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
r.SetPath(reinterpret_cast<char*>(0x00000001), Component()); r.SetPath(reinterpret_cast<char*>(0x00000001), Component());
std::string out_str2; std::string out_str2;
StdStringCanonOutput output2(&out_str2); StdStringCanonOutput output2(&out_str2);
ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed); ReplaceStandardURL(src, parsed, r,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output2, &new_parsed);
output2.Complete(); output2.Complete();
EXPECT_STREQ("http://www.google.com/", out_str2.c_str()); EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
} }
...@@ -1564,24 +1570,39 @@ TEST(URLCanonTest, ReplaceFileURL) { ...@@ -1564,24 +1570,39 @@ TEST(URLCanonTest, ReplaceFileURL) {
TEST(URLCanonTest, ReplaceFileSystemURL) { TEST(URLCanonTest, ReplaceFileSystemURL) {
ReplaceCase replace_cases[] = { ReplaceCase replace_cases[] = {
// Replace everything in the outer URL. // Replace everything in the outer URL.
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"}, {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
// Replace nothing // Replace nothing
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"}, {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
// Clear non-path components (common) // Clear non-path components (common)
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"}, {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, NULL, kDeleteComp, kDeleteComp,
"filesystem:file:///temporary/gaba"},
// Replace path with something that doesn't begin with a slash and make // Replace path with something that doesn't begin with a slash and make
// sure it gets added properly. // sure it gets added properly.
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "filesystem:file:///temporary/interesting/?query#ref"}, {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
// Replace scheme -- shouldn't do anything. NULL, "interesting/", NULL, NULL,
{"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, "filesystem:file:///temporary/interesting/?query#ref"},
// Replace username -- shouldn't do anything. // Replace scheme -- shouldn't do anything except canonicalize.
{"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL,
// Replace password -- shouldn't do anything. NULL, NULL, NULL, NULL, NULL,
{"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, "filesystem:http://bar.com/t/gaba?query#ref"},
// Replace host -- shouldn't do anything. // Replace username -- shouldn't do anything except canonicalize.
{"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, NULL, "foo.com", NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"}, {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL,
// Replace port -- shouldn't do anything. NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"},
{"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, NULL, "41", NULL, NULL, NULL, "filesystem:http://u:p@bar.com:40/t/gaba?query#ref"}, // Replace password -- shouldn't do anything except canonicalize.
{"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2",
NULL, NULL, NULL, NULL, NULL,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace host -- shouldn't do anything except canonicalize.
{"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", NULL, NULL, NULL,
"foo.com", NULL, NULL, NULL, NULL,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace port -- shouldn't do anything except canonicalize.
{"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL,
NULL, "41", NULL, NULL, NULL,
"filesystem:http://bar.com:40/t/gaba?query#ref"},
}; };
for (size_t i = 0; i < arraysize(replace_cases); i++) { for (size_t i = 0; i < arraysize(replace_cases); i++) {
......
...@@ -244,7 +244,7 @@ bool DoCanonicalize(const CHAR* spec, ...@@ -244,7 +244,7 @@ bool DoCanonicalize(const CHAR* spec,
// This is the parsed version of the input URL, we have to canonicalize it // This is the parsed version of the input URL, we have to canonicalize it
// before storing it in our object. // before storing it in our object.
bool success; bool success;
SchemeType unused_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) {
// File URLs are special. // File URLs are special.
ParseFileURL(spec, spec_len, &parsed_input); ParseFileURL(spec, spec_len, &parsed_input);
...@@ -257,10 +257,10 @@ bool DoCanonicalize(const CHAR* spec, ...@@ -257,10 +257,10 @@ bool DoCanonicalize(const CHAR* spec,
charset_converter, output, charset_converter, output,
output_parsed); output_parsed);
} else if (DoIsStandard(spec, scheme, &unused_scheme_type)) { } else if (DoIsStandard(spec, scheme, &scheme_type)) {
// All "normal" URLs. // All "normal" URLs.
ParseStandardURL(spec, spec_len, &parsed_input); ParseStandardURL(spec, spec_len, &parsed_input);
success = CanonicalizeStandardURL(spec, spec_len, parsed_input, success = CanonicalizeStandardURL(spec, spec_len, parsed_input, scheme_type,
charset_converter, output, output_parsed); charset_converter, output, output_parsed);
} else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) { } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
...@@ -442,10 +442,10 @@ bool DoReplaceComponents(const char* spec, ...@@ -442,10 +442,10 @@ bool DoReplaceComponents(const char* spec,
return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter,
output, out_parsed); output, out_parsed);
} }
SchemeType unused_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) { if (DoIsStandard(spec, parsed.scheme, &scheme_type)) {
return ReplaceStandardURL(spec, parsed, replacements, charset_converter, return ReplaceStandardURL(spec, parsed, replacements, scheme_type,
output, out_parsed); charset_converter, output, out_parsed);
} }
if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) { if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) {
return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed); return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed);
...@@ -646,6 +646,12 @@ bool GetStandardSchemeType(const char* spec, ...@@ -646,6 +646,12 @@ bool GetStandardSchemeType(const char* spec,
return DoIsStandard(spec, scheme, type); return DoIsStandard(spec, scheme, type);
} }
bool GetStandardSchemeType(const base::char16* spec,
const Component& scheme,
SchemeType* type) {
return DoIsStandard(spec, scheme, type);
}
bool IsStandard(const base::char16* spec, const Component& scheme) { bool IsStandard(const base::char16* spec, const Component& scheme) {
SchemeType unused_scheme_type; SchemeType unused_scheme_type;
return DoIsStandard(spec, scheme, &unused_scheme_type); return DoIsStandard(spec, scheme, &unused_scheme_type);
......
...@@ -39,24 +39,6 @@ URL_EXPORT void Shutdown(); ...@@ -39,24 +39,6 @@ URL_EXPORT void Shutdown();
// Schemes --------------------------------------------------------------------- // Schemes ---------------------------------------------------------------------
// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
enum SchemeType {
// The authority component of a URL with the scheme, if any, has the form
// "username:password@host:port". The username and password entries are
// optional; the host may not be empty. The default value of the port
// can be omitted in serialization. This type occurs with network schemes
// like http, https, and ftp.
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
// The authority component of a URL with this scheme, if any, consists only
// of a host. It does not contain port, username, or password. Schemes used
// internally by browser features usually work this way, as hostnames do not
// correspond to network hosts.
SCHEME_WITH_HOST,
// A URL with the scheme doesn't have the authority component.
SCHEME_WITHOUT_AUTHORITY,
};
// A pair for representing a standard scheme name and the SchemeType for it. // A pair for representing a standard scheme name and the SchemeType for it.
struct URL_EXPORT SchemeWithType { struct URL_EXPORT SchemeWithType {
const char* scheme; const char* scheme;
...@@ -175,6 +157,9 @@ URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme); ...@@ -175,6 +157,9 @@ URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
URL_EXPORT bool GetStandardSchemeType(const char* spec, URL_EXPORT bool GetStandardSchemeType(const char* spec,
const Component& scheme, const Component& scheme,
SchemeType* type); SchemeType* type);
URL_EXPORT bool GetStandardSchemeType(const base::char16* spec,
const Component& scheme,
SchemeType* type);
// Hosts ---------------------------------------------------------------------- // Hosts ----------------------------------------------------------------------
......
...@@ -318,6 +318,8 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) { ...@@ -318,6 +318,8 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
{"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"}, {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"},
{"data:/Path/../part/part2", "file.html", true, {"data:/Path/../part/part2", "file.html", true,
"data:/Path/../part/file.html"}, "data:/Path/../part/file.html"},
{"data://text/html,payload", "//user:pass@host:33////payload22", true,
"data://user:pass@host:33////payload22"},
// Path URL canonicalization rules also apply to non-standard authority- // Path URL canonicalization rules also apply to non-standard authority-
// based URLs. // based URLs.
{"custom://Authority/", "file.html", true, {"custom://Authority/", "file.html", true,
...@@ -335,24 +337,26 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) { ...@@ -335,24 +337,26 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
// rules, even though the base was non-standard. // rules, even though the base was non-standard.
{"content://content.Provider/", "//other.Provider", true, {"content://content.Provider/", "//other.Provider", true,
"content://other.provider/"}, "content://other.provider/"},
// Resolving an absolute URL doesn't cause canonicalization of the // Resolving an absolute URL doesn't cause canonicalization of the
// result. // result.
{"about:blank", "custom://Authority", true, "custom://Authority"}, {"about:blank", "custom://Authority", true, "custom://Authority"},
// Fragment URLs can be resolved against a non-standard base. // Fragment URLs can be resolved against a non-standard base.
{"scheme://Authority/path", "#fragment", true, {"scheme://Authority/path", "#fragment", true,
"scheme://Authority/path#fragment"}, "scheme://Authority/path#fragment"},
{"scheme://Authority/", "#fragment", true, "scheme://Authority/#fragment"}, {"scheme://Authority/", "#fragment", true,
"scheme://Authority/#fragment"},
// Resolving should fail if the base URL is authority-based but is // Resolving should fail if the base URL is authority-based but is
// missing a path component (the '/' at the end). // missing a path component (the '/' at the end).
{"scheme://Authority", "path", false, ""}, {"scheme://Authority", "path", false, ""},
// Test resolving a fragment (only) against any kind of base-URL. // Test resolving a fragment (only) against any kind of base-URL.
{"about:blank", "#id42", true, "about:blank#id42" }, {"about:blank", "#id42", true, "about:blank#id42"},
{"about:blank", " #id42", true, "about:blank#id42" }, {"about:blank", " #id42", true, "about:blank#id42"},
{"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag" }, {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag"},
// A surprising side effect of allowing fragments to resolve against // A surprising side effect of allowing fragments to resolve against
// any URL scheme is we might break javascript: URLs by doing so... // any URL scheme is we might break javascript: URLs by doing so...
{"javascript:alert('foo#bar')", "#badfrag", true, {"javascript:alert('foo#bar')", "#badfrag", true,
"javascript:alert('foo#badfrag" }, "javascript:alert('foo#badfrag"},
// In this case, the backslashes will not be canonicalized because it's a // In this case, the backslashes will not be canonicalized because it's a
// non-standard URL, but they will be treated as a path separators, // non-standard URL, but they will be treated as a path separators,
// giving the base URL here a path of "\". // giving the base URL here a path of "\".
...@@ -361,8 +365,7 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) { ...@@ -361,8 +365,7 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
// either "aaa://a\" or "aaa://a/" since the path is being replaced with // either "aaa://a\" or "aaa://a/" since the path is being replaced with
// the "current directory". But in the context of resolving on data URLs, // the "current directory". But in the context of resolving on data URLs,
// adding the requested dot doesn't seem wrong either. // adding the requested dot doesn't seem wrong either.
{"aaa://a\\", "aaa:.", true, "aaa://a\\." } {"aaa://a\\", "aaa:.", true, "aaa://a\\."}};
};
for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) { for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) {
const ResolveRelativeCase& test_data = resolve_non_standard_cases[i]; const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment