[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the...

[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the scheme doesn't support them The goal of this CL is to inhibit port numbers and usernames in internal schemes like "chrome-extension" and "chrome". Currently, navigations to chrome-extension:// URLs with ports actually get suprisingly far; it seems like no good can possibly come from that. A new SchemeType is added: SCHEME_WITH_HOST_AND_PORT (no user information). This is only used when canonicalizing the inner URL of filesystem: -- e.g., filesystem:http://user@host:20/temp/foo now canonicalizes to filesystem:http://host:20/temp/foo; whereas filesystem:chrome://user@host:20/temp/foo canonicalizes to filesystem:chrome://host/temp/foo Bug: 606001,809062 Cq-Include-Trybots: master.tryserver.chromium.mac:ios-simulator-cronet;master.tryserver.chromium.mac:ios-simulator-full-configs Change-Id: I77c5ba3d2fe964deb8aadae95a06519ce038c472 Reviewed-on: https://chromium-review.googlesource.com/974380Reviewed-by: Vasilii Sukhanov <vasilii@chromium.org> Reviewed-by: Tommy Li <tommycli@chromium.org> Reviewed-by: Mike West <mkwst@chromium.org> Commit-Queue: Nick Carter <nick@chromium.org> Cr-Commit-Position: refs/heads/master@{#547882}

[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the...
[GURL] (2 of 2) Strip username/password/port when canonicalizing, if the scheme doesn't support them The goal of this CL is to inhibit port numbers and usernames in internal schemes like "chrome-extension" and "chrome". Currently, navigations to chrome-extension:// URLs with ports actually get suprisingly far; it seems like no good can possibly come from that. A new SchemeType is added: SCHEME_WITH_HOST_AND_PORT (no user information). This is only used when canonicalizing the inner URL of filesystem: -- e.g., filesystem:http://user@host:20/temp/foo now canonicalizes to filesystem:http://host:20/temp/foo; whereas filesystem:chrome://user@host:20/temp/foo canonicalizes to filesystem:chrome://host/temp/foo Bug: 606001,809062 Cq-Include-Trybots: master.tryserver.chromium.mac:ios-simulator-cronet;master.tryserver.chromium.mac:ios-simulator-full-configs Change-Id: I77c5ba3d2fe964deb8aadae95a06519ce038c472 Reviewed-on: https://chromium-review.googlesource.com/974380Reviewed-by: Vasilii Sukhanov <vasilii@chromium.org> Reviewed-by: Tommy Li <tommycli@chromium.org> Reviewed-by: Mike West <mkwst@chromium.org> Commit-Queue: Nick Carter <nick@chromium.org> Cr-Commit-Position: refs/heads/master@{#547882}
ff69a10a · Nick Carter · Commit Bot · 3e0db462 · ff69a10a · ff69a10a
Commit ff69a10a authored Apr 04, 2018 by Nick Carter Committed by Commit Bot Apr 04, 2018
12 changed files
--- a/components/password_manager/core/browser/android_affiliation/affiliation_utils.cc
+++ b/components/password_manager/core/browser/android_affiliation/affiliation_utils.cc
@@ -53,7 +53,8 @@ bool CanonicalizeWebFacetURI(const std::string& input_uri,
  url::StdStringCanonOutput canonical_output(canonical_uri);
  bool canonicalization_succeeded = url::CanonicalizeStandardURL(
-      input_uri.c_str(), input_uri.size(), input_parsed, nullptr,
+      input_uri.c_str(), input_uri.size(), input_parsed,
+      url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
      &canonical_output, &canonical_parsed);
  canonical_output.Complete();

--- a/components/url_formatter/url_fixer_unittest.cc
+++ b/components/url_formatter/url_fixer_unittest.cc
@@ -260,10 +260,10 @@ struct FixupCase {
    {"about:foo", "chrome://foo/"},
    {"about:version", "chrome://version/"},
    {"about:blank", "about:blank"},
-  {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+    {"about:usr:pwd@hst:20/pth?qry#ref", "chrome://hst/pth?qry#ref"},
-  {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+    {"about://usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
-  {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+    {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
-  {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
+    {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://hst/pth?qry#ref"},
    {"www:123", "http://www:123/"},
    {"   www:123", "http://www:123/"},
    {"www.google.com?foo", "http://www.google.com/?foo"},

--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -197,7 +197,7 @@ TEST(GURLTest, CopyFileSystem) {
  GURL url2(url);
  EXPECT_TRUE(url2.is_valid());
-  EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
  EXPECT_EQ("filesystem", url2.scheme());
  EXPECT_EQ("", url2.username());
  EXPECT_EQ("", url2.password());
@@ -211,8 +211,8 @@ TEST(GURLTest, CopyFileSystem) {
  const GURL* inner = url2.inner_url();
  ASSERT_TRUE(inner);
  EXPECT_EQ("https", inner->scheme());
-  EXPECT_EQ("user", inner->username());
+  EXPECT_EQ("", inner->username());
-  EXPECT_EQ("pass", inner->password());
+  EXPECT_EQ("", inner->password());
  EXPECT_EQ("google.com", inner->host());
  EXPECT_EQ("99", inner->port());
  EXPECT_EQ(99, inner->IntPort());

--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -60,6 +60,7 @@ bool IsValidInput(const base::StringPiece& scheme,
    return false;
  switch (scheme_type) {
+    case SCHEME_WITH_HOST_AND_PORT:
    case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
      // A URL with |scheme| is required to have the host and port (may be
      // omitted in a serialization if it's the same as the default value).

--- a/url/url_canon.h
+++ b/url/url_canon.h
@@ -220,6 +220,31 @@ class URL_EXPORT CharsetConverter {
                                CanonOutput* output) = 0;
 };
+// Schemes --------------------------------------------------------------------
+// Types of a scheme representing the requirements on the data represented by
+// the authority component of a URL with the scheme.
+enum SchemeType {
+  // The authority component of a URL with the scheme has the form
+  // "username:password@host:port". The username and password entries are
+  // optional; the host may not be empty. The default value of the port can be
+  // omitted in serialization. This type occurs with network schemes like http,
+  // https, and ftp.
+  SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
+  // The authority component of a URL with the scheme has the form "host:port",
+  // and does not include username or password. The default value of the port
+  // can be omitted in serialization. Used by inner URLs of filesystem URLs of
+  // origins with network hosts, from which the username and password are
+  // stripped.
+  SCHEME_WITH_HOST_AND_PORT,
+  // The authority component of an URL with the scheme has the form "host", and
+  // does not include port, username, or password. Used when the hosts are not
+  // network addresses; for example, schemes used internally by the browser.
+  SCHEME_WITH_HOST,
+  // A URL with the scheme doesn't have the authority component.
+  SCHEME_WITHOUT_AUTHORITY,
+};
 // Whitespace -----------------------------------------------------------------
 // Searches for whitespace that should be removed from the middle of URLs, and
@@ -549,12 +574,14 @@ URL_EXPORT void CanonicalizeRef(const base::char16* spec,
 URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
                                        int spec_len,
                                        const Parsed& parsed,
+                                        SchemeType scheme_type,
                                        CharsetConverter* query_converter,
                                        CanonOutput* output,
                                        Parsed* new_parsed);
 URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
                                        int spec_len,
                                        const Parsed& parsed,
+                                        SchemeType scheme_type,
                                        CharsetConverter* query_converter,
                                        CanonOutput* output,
                                        Parsed* new_parsed);
@@ -802,6 +829,7 @@ class Replacements {
 URL_EXPORT bool ReplaceStandardURL(const char* base,
                                   const Parsed& base_parsed,
                                   const Replacements<char>& replacements,
+                                   SchemeType scheme_type,
                                   CharsetConverter* query_converter,
                                   CanonOutput* output,
                                   Parsed* new_parsed);
@@ -809,6 +837,7 @@ URL_EXPORT bool ReplaceStandardURL(
    const char* base,
    const Parsed& base_parsed,
    const Replacements<base::char16>& replacements,
+    SchemeType scheme_type,
    CharsetConverter* query_converter,
    CanonOutput* output,
    Parsed* new_parsed);

--- a/url/url_canon_filesystemurl.cc
+++ b/url/url_canon_filesystemurl.cc
@@ -43,16 +43,22 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec,
    return false;
  bool success = true;
+  SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
  if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
    new_inner_parsed.scheme.begin = output->length();
    output->Append("file://", 7);
    new_inner_parsed.scheme.len = 4;
    success &= CanonicalizePath(spec, inner_parsed->path, output,
                                &new_inner_parsed.path);
-  } else if (IsStandard(spec, inner_parsed->scheme)) {
+  } else if (GetStandardSchemeType(spec, inner_parsed->scheme,
-    success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(),
+                                   &inner_scheme_type)) {
-                                      *parsed.inner_parsed(), charset_converter,
+    if (inner_scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION) {
-                                      output, &new_inner_parsed);
+      // Strip out the user information from the inner URL, if any.
+      inner_scheme_type = SCHEME_WITH_HOST_AND_PORT;
+    }
+    success = CanonicalizeStandardURL(
+        spec, parsed.inner_parsed()->Length(), *parsed.inner_parsed(),
+        inner_scheme_type, charset_converter, output, &new_inner_parsed);
  } else {
    // TODO(ericu): The URL is wrong, but should we try to output more of what
    // we were given?  Echoing back filesystem:mailto etc. doesn't seem all that

--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -12,6 +12,7 @@
 #include "url/url_constants.h"
 #include "url/url_file.h"
 #include "url/url_parse_internal.h"
+#include "url/url_util.h"
 #include "url/url_util_internal.h"
 namespace url {
@@ -407,7 +408,13 @@ bool DoResolveRelativeHost(const char* base_url,
  output->ReserveSizeIfNeeded(
      replacements.components().Length() +
      base_parsed.CountCharactersBefore(Parsed::USERNAME, false));
-  return ReplaceStandardURL(base_url, base_parsed, replacements,
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  if (!GetStandardSchemeType(base_url, base_parsed.scheme, &scheme_type)) {
+    // A path with an authority section gets canonicalized under standard URL
+    // rules, even though the base was not known to be standard.
+    scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  }
+  return ReplaceStandardURL(base_url, base_parsed, replacements, scheme_type,
                            query_converter, output, out_parsed);
 }

--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc
@@ -13,9 +13,10 @@ namespace url {
 namespace {
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
                               const Parsed& parsed,
+                               SchemeType scheme_type,
                               CharsetConverter* query_converter,
                               CanonOutput* output,
                               Parsed* new_parsed) {
@@ -23,10 +24,18 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
                                    output, &new_parsed->scheme);
+  bool scheme_supports_user_info =
+      (scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
+  bool scheme_supports_ports =
+      (scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION ||
+       scheme_type == SCHEME_WITH_HOST_AND_PORT);
  // Authority (username, password, host, port)
  bool have_authority;
-  if (parsed.username.is_valid() || parsed.password.is_valid() ||
+  if ((scheme_supports_user_info &&
-      parsed.host.is_nonempty() || parsed.port.is_valid()) {
+       (parsed.username.is_valid() || parsed.password.is_valid())) ||
+      parsed.host.is_nonempty() ||
+      (scheme_supports_ports && parsed.port.is_valid())) {
    have_authority = true;
    // Only write the authority separators when we have a scheme.
@@ -36,11 +45,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
    }
    // User info: the canonicalizer will handle the : and @.
-    success &= CanonicalizeUserInfo(source.username, parsed.username,
+    if (scheme_supports_user_info) {
-                                    source.password, parsed.password,
+      success &= CanonicalizeUserInfo(
-                                    output,
+          source.username, parsed.username, source.password, parsed.password,
-                                    &new_parsed->username,
+          output, &new_parsed->username, &new_parsed->password);
-                                    &new_parsed->password);
+    } else {
+      new_parsed->username.reset();
+      new_parsed->password.reset();
+    }
    success &= CanonicalizeHost(source.host, parsed.host,
                                output, &new_parsed->host);
@@ -50,10 +62,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
      success = false;
    // Port: the port canonicalizer will handle the colon.
+    if (scheme_supports_ports) {
      int default_port = DefaultPortForScheme(
          &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
      success &= CanonicalizePort(source.port, parsed.port, default_port,
                                  output, &new_parsed->port);
+    } else {
+      new_parsed->port.reset();
+    }
  } else {
    // No authority, clear the components.
    have_authority = false;
@@ -127,23 +143,25 @@ int DefaultPortForScheme(const char* scheme, int scheme_len) {
 bool CanonicalizeStandardURL(const char* spec,
                             int spec_len,
                             const Parsed& parsed,
+                             SchemeType scheme_type,
                             CharsetConverter* query_converter,
                             CanonOutput* output,
                             Parsed* new_parsed) {
  return DoCanonicalizeStandardURL<char, unsigned char>(
-      URLComponentSource<char>(spec), parsed, query_converter,
+      URLComponentSource<char>(spec), parsed, scheme_type, query_converter,
      output, new_parsed);
 }
 bool CanonicalizeStandardURL(const base::char16* spec,
                             int spec_len,
                             const Parsed& parsed,
+                             SchemeType scheme_type,
                             CharsetConverter* query_converter,
                             CanonOutput* output,
                             Parsed* new_parsed) {
  return DoCanonicalizeStandardURL<base::char16, base::char16>(
-      URLComponentSource<base::char16>(spec), parsed, query_converter,
+      URLComponentSource<base::char16>(spec), parsed, scheme_type,
-      output, new_parsed);
+      query_converter, output, new_parsed);
 }
 // It might be nice in the future to optimize this so unchanged components don't
@@ -158,6 +176,7 @@ bool CanonicalizeStandardURL(const base::char16* spec,
 bool ReplaceStandardURL(const char* base,
                        const Parsed& base_parsed,
                        const Replacements<char>& replacements,
+                        SchemeType scheme_type,
                        CharsetConverter* query_converter,
                        CanonOutput* output,
                        Parsed* new_parsed) {
@@ -165,7 +184,7 @@ bool ReplaceStandardURL(const char* base,
  Parsed parsed(base_parsed);
  SetupOverrideComponents(base, replacements, &source, &parsed);
  return DoCanonicalizeStandardURL<char, unsigned char>(
-      source, parsed, query_converter, output, new_parsed);
+      source, parsed, scheme_type, query_converter, output, new_parsed);
 }
 // For 16-bit replacements, we turn all the replacements into UTF-8 so the
@@ -173,6 +192,7 @@ bool ReplaceStandardURL(const char* base,
 bool ReplaceStandardURL(const char* base,
                        const Parsed& base_parsed,
                        const Replacements<base::char16>& replacements,
+                        SchemeType scheme_type,
                        CharsetConverter* query_converter,
                        CanonOutput* output,
                        Parsed* new_parsed) {
@@ -181,7 +201,7 @@ bool ReplaceStandardURL(const char* base,
  Parsed parsed(base_parsed);
  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
  return DoCanonicalizeStandardURL<char, unsigned char>(
-      source, parsed, query_converter, output, new_parsed);
+      source, parsed, scheme_type, query_converter, output, new_parsed);
 }
 }  // namespace url
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -1433,7 +1433,8 @@ TEST(URLCanonTest, CanonicalizeStandardURL) {
    std::string out_str;
    StdStringCanonOutput output(&out_str);
    bool success = CanonicalizeStandardURL(
-        cases[i].input, url_len, parsed, NULL, &output, &out_parsed);
+        cases[i].input, url_len, parsed,
+        SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
    output.Complete();
    EXPECT_EQ(cases[i].expected_success, success);
@@ -1479,8 +1480,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
    std::string out_str;
    StdStringCanonOutput output(&out_str);
    Parsed out_parsed;
-    ReplaceStandardURL(replace_cases[i].base, parsed, r, NULL, &output,
+    ReplaceStandardURL(replace_cases[i].base, parsed, r,
-                       &out_parsed);
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output, &out_parsed);
    output.Complete();
    EXPECT_EQ(replace_cases[i].expected, out_str);
@@ -1501,7 +1503,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
    std::string out_str1;
    StdStringCanonOutput output1(&out_str1);
    Parsed new_parsed;
-    ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed);
+    ReplaceStandardURL(src, parsed, r,
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output1, &new_parsed);
    output1.Complete();
    EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
@@ -1509,7 +1513,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
    r.SetPath(reinterpret_cast<char*>(0x00000001), Component());
    std::string out_str2;
    StdStringCanonOutput output2(&out_str2);
-    ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed);
+    ReplaceStandardURL(src, parsed, r,
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output2, &new_parsed);
    output2.Complete();
    EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
  }
@@ -1564,24 +1570,39 @@ TEST(URLCanonTest, ReplaceFileURL) {
 TEST(URLCanonTest, ReplaceFileSystemURL) {
  ReplaceCase replace_cases[] = {
      // Replace everything in the outer URL.
-    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
+      {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
+       NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
      // Replace nothing
-    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
+      {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
+       NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
      // Clear non-path components (common)
-    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"},
+      {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
+       NULL, NULL, kDeleteComp, kDeleteComp,
+       "filesystem:file:///temporary/gaba"},
      // Replace path with something that doesn't begin with a slash and make
      // sure it gets added properly.
-    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "filesystem:file:///temporary/interesting/?query#ref"},
+      {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
-      // Replace scheme -- shouldn't do anything.
+       NULL, "interesting/", NULL, NULL,
-    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+       "filesystem:file:///temporary/interesting/?query#ref"},
-      // Replace username -- shouldn't do anything.
+      // Replace scheme -- shouldn't do anything except canonicalize.
-    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL,
-      // Replace password -- shouldn't do anything.
+       NULL, NULL, NULL, NULL, NULL,
-    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+       "filesystem:http://bar.com/t/gaba?query#ref"},
-      // Replace host -- shouldn't do anything.
+      // Replace username -- shouldn't do anything except canonicalize.
-    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, NULL, "foo.com", NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL,
-      // Replace port -- shouldn't do anything.
+       NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"},
-    {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, NULL, "41", NULL, NULL, NULL, "filesystem:http://u:p@bar.com:40/t/gaba?query#ref"},
+      // Replace password -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2",
+       NULL, NULL, NULL, NULL, NULL,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace host -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", NULL, NULL, NULL,
+       "foo.com", NULL, NULL, NULL, NULL,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace port -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL,
+       NULL, "41", NULL, NULL, NULL,
+       "filesystem:http://bar.com:40/t/gaba?query#ref"},
  };
  for (size_t i = 0; i < arraysize(replace_cases); i++) {

--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -244,7 +244,7 @@ bool DoCanonicalize(const CHAR* spec,
  // This is the parsed version of the input URL, we have to canonicalize it
  // before storing it in our object.
  bool success;
-  SchemeType unused_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
  if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) {
    // File URLs are special.
    ParseFileURL(spec, spec_len, &parsed_input);
@@ -257,10 +257,10 @@ bool DoCanonicalize(const CHAR* spec,
                                        charset_converter, output,
                                        output_parsed);
-  } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) {
+  } else if (DoIsStandard(spec, scheme, &scheme_type)) {
    // All "normal" URLs.
    ParseStandardURL(spec, spec_len, &parsed_input);
-    success = CanonicalizeStandardURL(spec, spec_len, parsed_input,
+    success = CanonicalizeStandardURL(spec, spec_len, parsed_input, scheme_type,
                                      charset_converter, output, output_parsed);
  } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
@@ -442,10 +442,10 @@ bool DoReplaceComponents(const char* spec,
    return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter,
                                output, out_parsed);
  }
-  SchemeType unused_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
-  if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) {
+  if (DoIsStandard(spec, parsed.scheme, &scheme_type)) {
-    return ReplaceStandardURL(spec, parsed, replacements, charset_converter,
+    return ReplaceStandardURL(spec, parsed, replacements, scheme_type,
-                              output, out_parsed);
+                              charset_converter, output, out_parsed);
  }
  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) {
    return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed);
@@ -646,6 +646,12 @@ bool GetStandardSchemeType(const char* spec,
  return DoIsStandard(spec, scheme, type);
 }
+bool GetStandardSchemeType(const base::char16* spec,
+                           const Component& scheme,
+                           SchemeType* type) {
+  return DoIsStandard(spec, scheme, type);
+}
 bool IsStandard(const base::char16* spec, const Component& scheme) {
  SchemeType unused_scheme_type;
  return DoIsStandard(spec, scheme, &unused_scheme_type);

--- a/url/url_util.h
+++ b/url/url_util.h
@@ -39,24 +39,6 @@ URL_EXPORT void Shutdown();
 // Schemes ---------------------------------------------------------------------
-// Types of a scheme representing the requirements on the data represented by
-// the authority component of a URL with the scheme.
-enum SchemeType {
-  // The authority component of a URL with the scheme, if any, has the form
-  // "username:password@host:port". The username and password entries are
-  // optional; the host may not be empty. The default value of the port
-  // can be omitted in serialization. This type occurs with network schemes
-  // like http, https, and ftp.
-  SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
-  // The authority component of a URL with this scheme, if any, consists only
-  // of a host. It does not contain port, username, or password. Schemes used
-  // internally by browser features usually work this way, as hostnames do not
-  // correspond to network hosts.
-  SCHEME_WITH_HOST,
-  // A URL with the scheme doesn't have the authority component.
-  SCHEME_WITHOUT_AUTHORITY,
-};
 // A pair for representing a standard scheme name and the SchemeType for it.
 struct URL_EXPORT SchemeWithType {
  const char* scheme;
@@ -175,6 +157,9 @@ URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
 URL_EXPORT bool GetStandardSchemeType(const char* spec,
                                      const Component& scheme,
                                      SchemeType* type);
+URL_EXPORT bool GetStandardSchemeType(const base::char16* spec,
+                                      const Component& scheme,
+                                      SchemeType* type);
 // Hosts  ----------------------------------------------------------------------

--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -318,6 +318,8 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
      {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"},
      {"data:/Path/../part/part2", "file.html", true,
       "data:/Path/../part/file.html"},
+      {"data://text/html,payload", "//user:pass@host:33////payload22", true,
+       "data://user:pass@host:33////payload22"},
      // Path URL canonicalization rules also apply to non-standard authority-
      // based URLs.
      {"custom://Authority/", "file.html", true,
@@ -335,24 +337,26 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
      // rules, even though the base was non-standard.
      {"content://content.Provider/", "//other.Provider", true,
       "content://other.provider/"},
      // Resolving an absolute URL doesn't cause canonicalization of the
      // result.
      {"about:blank", "custom://Authority", true, "custom://Authority"},
      // Fragment URLs can be resolved against a non-standard base.
      {"scheme://Authority/path", "#fragment", true,
       "scheme://Authority/path#fragment"},
-    {"scheme://Authority/", "#fragment", true, "scheme://Authority/#fragment"},
+      {"scheme://Authority/", "#fragment", true,
+       "scheme://Authority/#fragment"},
      // Resolving should fail if the base URL is authority-based but is
      // missing a path component (the '/' at the end).
      {"scheme://Authority", "path", false, ""},
      // Test resolving a fragment (only) against any kind of base-URL.
-    {"about:blank", "#id42", true, "about:blank#id42" },
+      {"about:blank", "#id42", true, "about:blank#id42"},
-    {"about:blank", " #id42", true, "about:blank#id42" },
+      {"about:blank", " #id42", true, "about:blank#id42"},
-    {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag" },
+      {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag"},
      // A surprising side effect of allowing fragments to resolve against
      // any URL scheme is we might break javascript: URLs by doing so...
      {"javascript:alert('foo#bar')", "#badfrag", true,
-      "javascript:alert('foo#badfrag" },
+       "javascript:alert('foo#badfrag"},
      // In this case, the backslashes will not be canonicalized because it's a
      // non-standard URL, but they will be treated as a path separators,
      // giving the base URL here a path of "\".
@@ -361,8 +365,7 @@ TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
      // either "aaa://a\" or "aaa://a/" since the path is being replaced with
      // the "current directory". But in the context of resolving on data URLs,
      // adding the requested dot doesn't seem wrong either.
-    {"aaa://a\\", "aaa:.", true, "aaa://a\\." }
+      {"aaa://a\\", "aaa:.", true, "aaa://a\\."}};
-  };
  for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) {
    const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];