Commit 907d7969 authored by Devlin Cronin's avatar Devlin Cronin Committed by Commit Bot

[Extensions] Add URLPattern::CreateIntersection()

Add a URLPattern::CreateIntersection() method to return the
intersection of two URLPatterns, if any. This can construct a new
URLPattern in many cases.

For instance, the intersection returned between http://*.google.com/*
and *://google.com/maps is http://google.com/maps.

This CL does not add any (non-test) usages of the new method.

Bug: 867549

Change-Id: I590081fa6d57bcc88c831095b50d21e20e9ec049
Reviewed-on: https://chromium-review.googlesource.com/1150413Reviewed-by: default avatarKaran Bhatia <karandeepb@chromium.org>
Commit-Queue: Devlin <rdevlin.cronin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#578669}
parent ee437463
......@@ -10,6 +10,7 @@
#include "base/macros.h"
#include "base/strings/pattern.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
......@@ -637,6 +638,110 @@ bool URLPattern::Contains(const URLPattern& other) const {
MatchesPath(StripTrailingWildcard(other.path()));
}
base::Optional<URLPattern> URLPattern::CreateIntersection(
const URLPattern& other) const {
DCHECK(match_effective_tld_);
DCHECK(other.match_effective_tld_);
// Easy case: Schemes don't overlap. Return nullopt.
int intersection_schemes = URLPattern::SCHEME_NONE;
if (valid_schemes_ == URLPattern::SCHEME_ALL)
intersection_schemes = other.valid_schemes_;
else if (other.valid_schemes_ == URLPattern::SCHEME_ALL)
intersection_schemes = valid_schemes_;
else
intersection_schemes = valid_schemes_ & other.valid_schemes_;
if (intersection_schemes == URLPattern::SCHEME_NONE)
return base::nullopt;
{
// In a few cases, we can (mostly) return a copy of one of the patterns.
// This can happen when either:
// - The URLPattern's are identical (possibly excluding valid_schemes_)
// - One of the patterns has match_all_urls() equal to true.
// NOTE(devlin): Theoretically, we could use Contains() instead of
// match_all_urls() here. However, Contains() strips the trailing wildcard
// from the path, which could yield the incorrect result.
const URLPattern* copy_source = nullptr;
if (*this == other || other.match_all_urls())
copy_source = this;
else if (match_all_urls())
copy_source = &other;
if (copy_source) {
// NOTE: equality checks don't take into account valid_schemes_, and
// schemes can be different in the case of match_all_urls() as well, so
// we can't always just return *copy_source.
if (intersection_schemes == copy_source->valid_schemes_)
return *copy_source;
URLPattern result(intersection_schemes);
ParseResult parse_result = result.Parse(copy_source->GetAsString());
CHECK_EQ(PARSE_SUCCESS, parse_result);
return result;
}
}
// No more easy cases. Go through component by component to find the patterns
// that intersect.
// Note: Alias the function type (rather than using auto) because
// MatchesHost() is overloaded.
using match_function_type = bool (URLPattern::*)(base::StringPiece) const;
auto get_intersection = [this, &other](base::StringPiece own_str,
base::StringPiece other_str,
match_function_type match_function,
base::StringPiece* out) {
if ((this->*match_function)(other_str)) {
*out = other_str;
return true;
}
if ((other.*match_function)(own_str)) {
*out = own_str;
return true;
}
return false;
};
base::StringPiece scheme;
base::StringPiece host;
base::StringPiece port;
base::StringPiece path;
// If any pieces fail to overlap, then there is no intersection.
if (!get_intersection(scheme_, other.scheme_, &URLPattern::MatchesScheme,
&scheme) ||
!get_intersection(host_, other.host_, &URLPattern::MatchesHost, &host) ||
!get_intersection(port_, other.port_, &URLPattern::MatchesPortPattern,
&port) ||
!get_intersection(path_, other.path_, &URLPattern::MatchesPath, &path)) {
return base::nullopt;
}
// Only match subdomains if both patterns match subdomains.
base::StringPiece subdomains;
if (match_subdomains_ && other.match_subdomains_) {
// The host may be empty (e.g., in the case of *://*/* - in that case, only
// append '*' instead of '*.'.
subdomains = host.empty() ? "*" : "*.";
}
base::StringPiece scheme_separator =
IsStandardScheme(scheme) ? url::kStandardSchemeSeparator : ":";
std::string pattern_str = base::StrCat(
{scheme, scheme_separator, subdomains, host, ":", port, path});
URLPattern pattern(intersection_schemes);
ParseResult result = pattern.Parse(pattern_str);
// TODO(devlin): I don't think there's any way this should ever fail, but
// use a CHECK() to flush any cases out. If nothing crops up, downgrade this
// to a DCHECK in M72.
CHECK_EQ(PARSE_SUCCESS, result);
return pattern;
}
bool URLPattern::MatchesAnyScheme(
const std::vector<std::string>& schemes) const {
for (std::vector<std::string>::const_iterator i = schemes.begin();
......
......@@ -220,6 +220,21 @@ class URLPattern {
// match. For example, http://*.google.com encompasses http://www.google.com.
bool Contains(const URLPattern& other) const;
// Creates a new URLPattern that represents the intersection of this
// URLPattern with the |other|, or base::nullopt if no intersection exists.
// For instance, given the patterns http://*.google.com/* and
// *://maps.google.com/*, the intersection is http://maps.google.com/*.
// NOTES:
// - This will DCHECK if either pattern has match_effective_tld_ set to false.
// - Though scheme intersections are supported, the serialization of
// URLPatternSet does not record them. Be sure that this is safe for your
// use cases.
// - Path intersection is done on a best-effort basis. If one path clearly
// contains another, it will be handled correctly, but this method does not
// deal with cases like /*a* and /*b* (where technically the intersection
// is /*a*b*|/*b*a*); the intersection returned for that case will be empty.
base::Optional<URLPattern> CreateIntersection(const URLPattern& other) const;
// Converts this URLPattern into an equivalent set of URLPatterns that don't
// use a wildcard in the scheme component. If this URLPattern doesn't use a
// wildcard scheme, then the returned set will contain one element that is
......
......@@ -1092,4 +1092,151 @@ TEST(ExtensionURLPatternTest, UncanonicalizedUrl) {
}
}
// Tests URLPattern::CreateIntersection().
TEST(ExtensionURLPatternTest, Intersection) {
struct {
std::string pattern1;
std::string pattern2;
std::string expected_intersection;
} test_cases[] = {
// Identical.
{"<all_urls>", "<all_urls>", "<all_urls>"},
{"https://google.com/*", "https://google.com/*", "https://google.com/*"},
// <all_urls> always returns the other pattern.
{"<all_urls>", "https://*.google.com/*", "https://*.google.com/*"},
{"<all_urls>", "*://*/*", "*://*/*"},
// Scheme intersection.
{"https://google.com/*", "*://google.com/*", "https://google.com/*"},
// Host intersection.
{"https://*.google.com/*", "https://google.com/*",
"https://google.com/*"},
{"https://*.maps.google.com/*", "https://*.google.com/*",
"https://*.maps.google.com/*"},
// Path intersection.
{"https://google.com/*", "https://google.com/foo*",
"https://google.com/foo*"},
{"https://google.com/foo*", "https://google.com/foo",
"https://google.com/foo"},
// Paths can be interesting, and we support intersections on a best-effort
// basis.
{"https://google.com/*a*", "https://google.com/*",
"https://google.com/*a*"},
{"https://google.com/foo*", "https://google.com/fo*",
"https://google.com/foo*"},
{"https://google.com/*a*", "https://google.com/*ab*",
"https://google.com/*ab*"},
// Technically, these do intersect - e.g., https://google.com/ab. However,
// we don't support that level of path intersection.
{"https://google.com/*a*", "https://google.com/*b*", ""},
// Port intersection.
{"https://google.com/*", "https://google.com:80/*",
"https://google.com:80/*"},
{"https://google.com:*/*", "https://google.com:*/*",
"https://google.com/*"},
// Multi-component intersection (the fun ones).
{"https://*.google.com/maps", "https://google.com/*",
"https://google.com/maps"},
{"*://google.com/*", "https://*/*", "https://google.com/*"},
{"*://*.com/foo", "https://google.com/*", "https://google.com/foo"},
// No intersection.
{"*://*/foo", "*://*/bar", ""},
{"http://*/*", "https://*/*", ""},
{"*://*.com/*", "https://chromium.org/*", ""},
// File URLs.
{"file:///usr/me", "file:///*", "file:///usr/me"},
{"file:///usr/*", "file:///*", "file:///usr/*"},
{"file:///etc/passwd", "file:///usr/*", ""},
};
constexpr int kValidSchemes = URLPattern::SCHEME_ALL;
constexpr char kTestCaseDescriptionTemplate[] =
"Running Test Case:\n"
" Pattern1: %s\n"
" Pattern2: %s\n"
" Expected Result: %s";
for (const auto test_case : test_cases) {
SCOPED_TRACE(base::StringPrintf(
kTestCaseDescriptionTemplate, test_case.pattern1.c_str(),
test_case.pattern2.c_str(), test_case.expected_intersection.c_str()));
URLPattern pattern1(kValidSchemes);
ASSERT_EQ(URLPattern::PARSE_SUCCESS, pattern1.Parse(test_case.pattern1))
<< "Pattern failed to parse: " << test_case.pattern1;
URLPattern pattern2(kValidSchemes);
ASSERT_EQ(URLPattern::PARSE_SUCCESS, pattern2.Parse(test_case.pattern2))
<< "Pattern failed to parse: " << test_case.pattern2;
// Intersection of two URLPatterns should be identical regardless of which
// is the "first".
base::Optional<URLPattern> intersection1 =
pattern1.CreateIntersection(pattern2);
base::Optional<URLPattern> intersection2 =
pattern2.CreateIntersection(pattern1);
if (test_case.expected_intersection.empty()) {
EXPECT_EQ(base::nullopt, intersection1) << intersection1->GetAsString();
EXPECT_EQ(base::nullopt, intersection2) << intersection2->GetAsString();
} else {
ASSERT_TRUE(intersection1);
EXPECT_EQ(test_case.expected_intersection, intersection1->GetAsString());
ASSERT_TRUE(intersection2);
EXPECT_EQ(test_case.expected_intersection, intersection2->GetAsString());
}
}
}
// Tests the special case of URLPattern::CreateIntersection() with different
// valid schemes.
TEST(ExtensionURLPatternTest, ValidSchemeIntersection) {
// Special case: scheme mask intersection.
struct {
int scheme1;
int scheme2;
int expected_scheme;
} scheme_test_cases[] = {
{URLPattern::SCHEME_ALL, URLPattern::SCHEME_ALL, URLPattern::SCHEME_ALL},
{URLPattern::SCHEME_ALL, URLPattern::SCHEME_HTTP,
URLPattern::SCHEME_HTTP},
{URLPattern::SCHEME_HTTPS | URLPattern::SCHEME_HTTP,
URLPattern::SCHEME_HTTP, URLPattern::SCHEME_HTTP},
{URLPattern::SCHEME_HTTP, URLPattern::SCHEME_HTTPS,
URLPattern::SCHEME_NONE},
};
for (const auto test_case : scheme_test_cases) {
SCOPED_TRACE(base::StringPrintf("Test Case: %d, %d, %d", test_case.scheme1,
test_case.scheme2,
test_case.expected_scheme));
URLPattern pattern1(test_case.scheme1);
ASSERT_EQ(URLPattern::PARSE_SUCCESS,
pattern1.Parse(URLPattern::kAllUrlsPattern));
URLPattern pattern2(test_case.scheme2);
ASSERT_EQ(URLPattern::PARSE_SUCCESS,
pattern2.Parse(URLPattern::kAllUrlsPattern));
base::Optional<URLPattern> intersection1 =
pattern1.CreateIntersection(pattern2);
base::Optional<URLPattern> intersection2 =
pattern2.CreateIntersection(pattern1);
if (test_case.expected_scheme == URLPattern::SCHEME_NONE) {
EXPECT_EQ(base::nullopt, intersection1) << intersection1->GetAsString();
EXPECT_EQ(base::nullopt, intersection2) << intersection2->GetAsString();
} else {
ASSERT_TRUE(intersection1);
EXPECT_EQ(test_case.expected_scheme, intersection1->valid_schemes());
ASSERT_TRUE(intersection2);
EXPECT_EQ(test_case.expected_scheme, intersection2->valid_schemes());
}
}
}
} // namespace
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment