Commit 054f4033 authored by pkalinnikov's avatar pkalinnikov Committed by Commit bot

Provide the equivalent of GURL::DomainIs for url::Origin.

Move the implementation of GURL::DomainIs into a shared function in url_util/,
to be used both by GURL and url::Origin.

The reason for this refactoring is to reap significant performance gains in
scenarios where previously url::Origins had to be converted to GURLs for
DomainIs checks. This involved string copying, allocations, and parsing the
entire URL once again.

BUG=517560

Review-Url: https://codereview.chromium.org/2287483002
Cr-Commit-Position: refs/heads/master@{#415606}
parent 6c92845a
......@@ -491,47 +491,13 @@ const GURL& GURL::EmptyGURL() {
#endif // WIN32
bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const {
if (!is_valid_ || lower_ascii_domain.empty())
if (!is_valid_)
return false;
// FileSystem URLs have empty parsed_.host, so check this first.
// FileSystem URLs have empty host_piece, so check this first.
if (SchemeIsFileSystem() && inner_url_)
return inner_url_->DomainIs(lower_ascii_domain);
if (!parsed_.host.is_nonempty())
return false;
// If the host name ends with a dot but the input domain doesn't,
// then we ignore the dot in the host name.
const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
int host_len = parsed_.host.len;
int domain_len = lower_ascii_domain.length();
if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
host_last_pos--;
host_len--;
}
if (host_len < domain_len)
return false;
// |host_first_pos| is the start of the compared part of the host name, not
// start of the whole host name.
const char* host_first_pos = spec_.data() + parsed_.host.begin +
host_len - domain_len;
if (!base::LowerCaseEqualsASCII(
base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
return false;
// Make sure there aren't extra characters in host before the compared part;
// if the host name is longer than the input domain name, then the character
// immediately before the compared part should be a dot. For example,
// www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
'.' != *(host_first_pos - 1))
return false;
return true;
return url::DomainIs(host_piece(), lower_ascii_domain);
}
void GURL::Swap(GURL* other) {
......
......@@ -72,6 +72,10 @@ bool Origin::IsSameOriginWith(const Origin& other) const {
return tuple_.Equals(other.tuple_);
}
bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
return !unique_ && url::DomainIs(tuple_.host(), lower_ascii_domain);
}
bool Origin::operator<(const Origin& other) const {
return tuple_ < other.tuple_;
}
......
......@@ -122,6 +122,9 @@ class URL_EXPORT Origin {
return IsSameOriginWith(other);
}
// Same as GURL::DomainIs. If |this| origin is unique, then returns false.
bool DomainIs(base::StringPiece lower_ascii_domain) const;
// Allows Origin to be used as a key in STL (for example, a std::set or
// std::map).
bool operator<(const Origin& other) const;
......
......@@ -252,4 +252,61 @@ TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
}
}
TEST(OriginTest, DomainIs) {
const struct {
const char* url;
const char* lower_ascii_domain;
bool expected_domain_is;
} kTestCases[] = {
{"http://google.com/foo", "google.com", true},
{"http://www.google.com:99/foo", "google.com", true},
{"http://www.google.com.cn/foo", "google.com", false},
{"http://www.google.comm", "google.com", false},
{"http://www.iamnotgoogle.com/foo", "google.com", false},
{"http://www.google.com/foo", "Google.com", false},
// If the host ends with a dot, it matches domains with or without a dot.
{"http://www.google.com./foo", "google.com", true},
{"http://www.google.com./foo", "google.com.", true},
{"http://www.google.com./foo", ".com", true},
{"http://www.google.com./foo", ".com.", true},
// But, if the host doesn't end with a dot and the input domain does, then
// it's considered to not match.
{"http://google.com/foo", "google.com.", false},
// If the host ends with two dots, it doesn't match.
{"http://www.google.com../foo", "google.com", false},
// Filesystem scheme.
{"filesystem:http://www.google.com:99/foo/", "google.com", true},
{"filesystem:http://www.iamnotgoogle.com/foo/", "google.com", false},
// File scheme.
{"file:///home/user/text.txt", "", false},
{"file:///home/user/text.txt", "txt", false},
};
for (const auto& test_case : kTestCases) {
SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url
<< ", " << test_case.lower_ascii_domain
<< ")");
GURL url(test_case.url);
ASSERT_TRUE(url.is_valid());
url::Origin origin(url);
EXPECT_EQ(test_case.expected_domain_is,
origin.DomainIs(test_case.lower_ascii_domain));
}
// If the URL is invalid, DomainIs returns false.
GURL invalid_url("google.com");
ASSERT_FALSE(invalid_url.is_valid());
EXPECT_FALSE(url::Origin(invalid_url).DomainIs("google.com"));
// Unique origins.
EXPECT_FALSE(url::Origin().DomainIs(""));
EXPECT_FALSE(url::Origin().DomainIs("com"));
}
} // namespace url
......@@ -488,6 +488,43 @@ bool FindAndCompareScheme(const base::char16* str,
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
bool DomainIs(base::StringPiece canonicalized_host,
base::StringPiece lower_ascii_domain) {
if (canonicalized_host.empty() || lower_ascii_domain.empty())
return false;
// If the host name ends with a dot but the input domain doesn't, then we
// ignore the dot in the host name.
size_t host_len = canonicalized_host.length();
if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.')
--host_len;
if (host_len < lower_ascii_domain.length())
return false;
// |host_first_pos| is the start of the compared part of the host name, not
// start of the whole host name.
const char* host_first_pos =
canonicalized_host.data() + host_len - lower_ascii_domain.length();
if (!base::LowerCaseEqualsASCII(
base::StringPiece(host_first_pos, lower_ascii_domain.length()),
lower_ascii_domain)) {
return false;
}
// Make sure there aren't extra characters in host before the compared part;
// if the host name is longer than the input domain name, then the character
// immediately before the compared part should be a dot. For example,
// www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() &&
*(host_first_pos - 1) != '.') {
return false;
}
return true;
}
bool Canonicalize(const char* spec,
int spec_len,
bool trim_path_end,
......
......@@ -8,6 +8,7 @@
#include <string>
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
......@@ -35,7 +36,7 @@ URL_EXPORT void Initialize();
// library.
URL_EXPORT void Shutdown();
// Schemes --------------------------------------------------------------------
// Schemes ---------------------------------------------------------------------
// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
......@@ -132,7 +133,20 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec,
const Component& scheme,
SchemeType* type);
// URL library wrappers -------------------------------------------------------
// Domains ---------------------------------------------------------------------
// Returns true if the |canonicalized_host| matches or is in the same domain as
// the given |lower_ascii_domain| string. For example, if the canonicalized
// hostname is "www.google.com", this will return true for "com", "google.com",
// and "www.google.com" domains.
//
// If either of the input StringPieces is empty, the return value is false. The
// input domain should be a lower-case ASCII string in order to match the
// canonicalized host.
URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
base::StringPiece lower_ascii_domain);
// URL library wrappers --------------------------------------------------------
// Parses the given spec according to the extracted scheme type. Normal users
// should use the URL object, although this may be useful if performance is
......@@ -204,7 +218,7 @@ URL_EXPORT bool ReplaceComponents(
CanonOutput* output,
Parsed* out_parsed);
// String helper functions ----------------------------------------------------
// String helper functions -----------------------------------------------------
// Unescapes the given string using URL escaping rules.
URL_EXPORT void DecodeURLEscapeSequences(const char* input,
......
......@@ -374,4 +374,47 @@ TEST(URLUtilTest, TestNoRefComponent) {
EXPECT_FALSE(resolved_parsed.ref.is_valid());
}
TEST(URLUtilTest, TestDomainIs) {
const struct {
const char* canonicalized_host;
const char* lower_ascii_domain;
bool expected_domain_is;
} kTestCases[] = {
{"google.com", "google.com", true},
{"www.google.com", "google.com", true}, // Subdomain is ignored.
{"www.google.com.cn", "google.com", false}, // Different TLD.
{"www.google.comm", "google.com", false},
{"www.iamnotgoogle.com", "google.com", false}, // Different hostname.
{"www.google.com", "Google.com", false}, // The input is not lower-cased.
// If the host ends with a dot, it matches domains with or without a dot.
{"www.google.com.", "google.com", true},
{"www.google.com.", "google.com.", true},
{"www.google.com.", ".com", true},
{"www.google.com.", ".com.", true},
// But, if the host doesn't end with a dot and the input domain does, then
// it's considered to not match.
{"www.google.com", "google.com.", false},
// If the host ends with two dots, it doesn't match.
{"www.google.com..", "google.com", false},
// Empty parameters.
{"www.google.com", "", false},
{"", "www.google.com", false},
{"", "", false},
};
for (const auto& test_case : kTestCases) {
SCOPED_TRACE(testing::Message() << "(host, domain): ("
<< test_case.canonicalized_host << ", "
<< test_case.lower_ascii_domain << ")");
EXPECT_EQ(
test_case.expected_domain_is,
DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain));
}
}
} // namespace url
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment