Commit 6d16f50c authored by Devlin Cronin's avatar Devlin Cronin Committed by Commit Bot

[Extensions] Improve semantic intersection of URLPatternSet

Improve the semantic intersection logic in URLPatternSet to utilize
URLPattern::CreateIntersection(). This allows callers to create an
intersection between two sets that will include patterns that were
not explicitly in either set, such as constructing the pattern
`http://chromium.org/*` from `http://*/*` and `*://chromium.org/*`.

Since not all callers may want this behavior, and in order to
introduce three separately named Create*Intersection methods in
URLPatternSet, add an enum, IntersectionBehavior, for callers to
specify the type of intersection behavior to use. This includes
string comparison (STL-set-style intersection), patterns explicitly
contained by both sets (the old semantic intersection), and detailed
(the new and improved semantic intersection).

Add unit tests for the same.

Bug: 867549

Change-Id: Ic80bec8c8c8bbac188efcf2862d92582379c5a7c
Reviewed-on: https://chromium-review.googlesource.com/1150919
Commit-Queue: Devlin <rdevlin.cronin@chromium.org>
Reviewed-by: default avatarKaran Bhatia <karandeepb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#578731}
parent cca5bb85
......@@ -149,7 +149,8 @@ ManifestPermission* AutomationManifestPermission::Intersect(
bool interact =
automation_info_->interact && other->automation_info_->interact;
URLPatternSet matches = URLPatternSet::CreateIntersection(
automation_info_->matches, other->automation_info_->matches);
automation_info_->matches, other->automation_info_->matches,
URLPatternSet::IntersectionBehavior::kStringComparison);
return new AutomationManifestPermission(
base::WrapUnique(new const AutomationInfo(desktop, matches, interact)));
}
......
......@@ -82,10 +82,15 @@ std::unique_ptr<const PermissionSet> PermissionSet::CreateIntersection(
set2.manifest_permissions(),
&manifest_permissions);
URLPatternSet explicit_hosts = URLPatternSet::CreateSemanticIntersection(
set1.explicit_hosts(), set2.explicit_hosts());
URLPatternSet scriptable_hosts = URLPatternSet::CreateSemanticIntersection(
set1.scriptable_hosts(), set2.scriptable_hosts());
// TODO(https://crbug.com/867549): Audit callers of CreateIntersection() and
// determine what the proper intersection behavior is. Likely, we'll want to
// introduce an argument to specify it.
constexpr auto kIntersectionBehavior =
URLPatternSet::IntersectionBehavior::kPatternsContainedByBoth;
URLPatternSet explicit_hosts = URLPatternSet::CreateIntersection(
set1.explicit_hosts(), set2.explicit_hosts(), kIntersectionBehavior);
URLPatternSet scriptable_hosts = URLPatternSet::CreateIntersection(
set1.scriptable_hosts(), set2.scriptable_hosts(), kIntersectionBehavior);
return base::WrapUnique(new PermissionSet(apis, manifest_permissions,
explicit_hosts, scriptable_hosts));
......
......@@ -32,24 +32,61 @@ URLPatternSet URLPatternSet::CreateDifference(const URLPatternSet& set1,
}
// static
URLPatternSet URLPatternSet::CreateIntersection(const URLPatternSet& set1,
const URLPatternSet& set2) {
return URLPatternSet(base::STLSetIntersection<std::set<URLPattern>>(
set1.patterns_, set2.patterns_));
}
URLPatternSet URLPatternSet::CreateSemanticIntersection(
URLPatternSet URLPatternSet::CreateIntersection(
const URLPatternSet& set1,
const URLPatternSet& set2) {
const URLPatternSet& set2,
IntersectionBehavior intersection_behavior) {
// Note: leverage return value optimization; always return the same object.
URLPatternSet result;
if (intersection_behavior == IntersectionBehavior::kStringComparison) {
// String comparison just relies on STL set behavior, which looks at the
// string representation.
result = URLPatternSet(base::STLSetIntersection<std::set<URLPattern>>(
set1.patterns_, set2.patterns_));
return result;
}
// Look for a semantic intersection.
// Step 1: Iterate over each set. Find any patterns that are completely
// contained by the other (thus being necessarily present in any intersection)
// and add them, collecting the others in a set of unique items.
// Note: Use a collection of pointers for the uniques to avoid excessive
// copies. Since these are owned by the URLPatternSet passed in, which is
// const, this should be safe.
std::vector<const URLPattern*> unique_set1;
for (const URLPattern& pattern : set1) {
if (set2.ContainsPattern(pattern))
result.patterns_.insert(pattern);
else
unique_set1.push_back(&pattern);
}
std::vector<const URLPattern*> unique_set2;
for (const URLPattern& pattern : set2) {
if (set1.ContainsPattern(pattern))
result.patterns_.insert(pattern);
else
unique_set2.push_back(&pattern);
}
// If we're just looking for patterns contained by both, we're done.
if (intersection_behavior == IntersectionBehavior::kPatternsContainedByBoth)
return result;
DCHECK_EQ(IntersectionBehavior::kDetailed, intersection_behavior);
// Step 2: Iterate over all the unique patterns and find the intersections
// they have with the other patterns.
for (const auto* pattern : unique_set1) {
for (const auto* pattern2 : unique_set2) {
base::Optional<URLPattern> intersection =
pattern->CreateIntersection(*pattern2);
if (intersection)
result.patterns_.insert(std::move(*intersection));
}
}
return result;
}
......
......@@ -32,20 +32,48 @@ class URLPatternSet {
static URLPatternSet CreateDifference(const URLPatternSet& set1,
const URLPatternSet& set2);
// Returns the intersection of |set1| and |set2|.
static URLPatternSet CreateIntersection(const URLPatternSet& set1,
const URLPatternSet& set2);
// Creates an intersection result where result has every element that is
// contained by both |set1| and |set2|. This is different than
// CreateIntersection(), which does string comparisons. For example, the
// semantic intersection of ("http://*.google.com/*") and
// ("http://google.com/*") is ("http://google.com/*"), but the result from
// CreateIntersection() would be ().
// TODO(devlin): This is weird. We probably want to use mostly
// CreateSemanticIntersection().
static URLPatternSet CreateSemanticIntersection(const URLPatternSet& set1,
const URLPatternSet& set2);
enum class IntersectionBehavior {
// For the following descriptions, consider the two URLPatternSets:
// Set 1: {"https://example.com/*", "https://*.google.com/*", "http://*/*"}
// Set 2: {"https://example.com/*", "https://google.com/maps",
// "*://chromium.org/*"}
// Only includes patterns that are exactly in both sets. The intersection of
// the two sets above is {"https://example.com/*"}, since that is the only
// pattern that appears exactly in each.
kStringComparison,
// Includes patterns that are effectively contained by both sets. The
// intersection of the two sets above is
// {
// "https://example.com/*" (contained exactly by each set)
// "https://google.com/maps" (contained exactly by set 2 and a strict
// subset of https://*.google.com/* in set 1)
// }
kPatternsContainedByBoth,
// Includes patterns that are contained by both sets and creates new
// patterns to represent the intersection of any others. The intersection of
// the two sets above is
// {
// "https://example.com/*" (contained exactly by each set)
// "https://google.com/maps" (contained exactly by set 2 and a strict
// subset of https://*.google.com/* in set 1)
// "http://chromium.org/*" (the overlap between "http://*/*" in set 1 and
// *://chromium.org/*" in set 2).
// }
// Note that this is the most computationally expensive - potentially
// O(n^2) - since it can require comparing each pattern in one set to every
// pattern in the other set.
kDetailed,
};
// Returns the intersection of |set1| and |set2| according to
// |intersection_behavior|.
static URLPatternSet CreateIntersection(
const URLPatternSet& set1,
const URLPatternSet& set2,
IntersectionBehavior intersection_behavior);
// Returns the union of |set1| and |set2|.
static URLPatternSet CreateUnion(const URLPatternSet& set1,
......
......@@ -9,7 +9,9 @@
#include <sstream>
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "base/values.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
......@@ -138,11 +140,12 @@ TEST(URLPatternSetTest, CreateDifference) {
EXPECT_FALSE(result.Contains(set2));
EXPECT_FALSE(set2.Contains(result));
URLPatternSet intersection = URLPatternSet::CreateIntersection(result, set2);
URLPatternSet intersection = URLPatternSet::CreateIntersection(
result, set2, URLPatternSet::IntersectionBehavior::kStringComparison);
EXPECT_TRUE(intersection.is_empty());
}
TEST(URLPatternSetTest, CreateIntersection) {
TEST(URLPatternSetTest, CreateIntersection_StringComparison) {
URLPatternSet empty_set;
URLPatternSet expected;
URLPatternSet set1;
......@@ -150,7 +153,8 @@ TEST(URLPatternSetTest, CreateIntersection) {
AddPattern(&set1, "http://www.yahoo.com/b*");
// Intersection with an empty set.
URLPatternSet result = URLPatternSet::CreateIntersection(set1, empty_set);
URLPatternSet result = URLPatternSet::CreateIntersection(
set1, empty_set, URLPatternSet::IntersectionBehavior::kStringComparison);
EXPECT_EQ(expected, result);
EXPECT_TRUE(result.is_empty());
EXPECT_TRUE(empty_set.Contains(result));
......@@ -165,14 +169,15 @@ TEST(URLPatternSetTest, CreateIntersection) {
AddPattern(&expected, "http://www.google.com/f*");
result = URLPatternSet::CreateIntersection(set1, set2);
result = URLPatternSet::CreateIntersection(
set1, set2, URLPatternSet::IntersectionBehavior::kStringComparison);
EXPECT_EQ(expected, result);
EXPECT_FALSE(result.is_empty());
EXPECT_TRUE(set1.Contains(result));
EXPECT_TRUE(set2.Contains(result));
}
TEST(URLPatternSetTest, CreateSemanticIntersection) {
TEST(URLPatternSetTest, CreateIntersection_PatternsContainedByBoth) {
{
URLPatternSet set1;
AddPattern(&set1, "http://*.google.com/*");
......@@ -183,22 +188,101 @@ TEST(URLPatternSetTest, CreateSemanticIntersection) {
// The semantic intersection should contain only those patterns that are in
// both set 1 and set 2, or "http://google.com/*".
URLPatternSet intersection =
URLPatternSet::CreateSemanticIntersection(set1, set2);
URLPatternSet intersection = URLPatternSet::CreateIntersection(
set1, set2,
URLPatternSet::IntersectionBehavior::kPatternsContainedByBoth);
ASSERT_EQ(1u, intersection.size());
EXPECT_EQ("http://google.com/*", intersection.begin()->GetAsString());
}
{
// We don't handle funny intersections, where the resultant pattern is
// neither of the two compared patterns. So the intersection of these two
// is not http://www.google.com/*, but rather nothing.
// IntersectionBehavior::kPatternsContainedByBoth doesn't handle funny
// intersections, where the resultant pattern is neither of the two
// compared patterns. So the intersection of these two is not
// http://www.google.com/*, but rather nothing.
URLPatternSet set1;
AddPattern(&set1, "http://*/*");
URLPatternSet set2;
AddPattern(&set2, "*://www.google.com/*");
EXPECT_TRUE(
URLPatternSet::CreateSemanticIntersection(set1, set2).is_empty());
URLPatternSet::CreateIntersection(
set1, set2,
URLPatternSet::IntersectionBehavior::kPatternsContainedByBoth)
.is_empty());
}
}
TEST(URLPatternSetTest, CreateIntersection_Detailed) {
struct {
std::vector<std::string> set1;
std::vector<std::string> set2;
std::vector<std::string> expected_intersection;
} test_cases[] = {
{{"https://*.google.com/*", "https://chromium.org/*"},
{"*://maps.google.com/*", "*://chromium.org/foo"},
{"https://maps.google.com/*", "https://chromium.org/foo"}},
{{"https://*/*", "http://*/*"},
{"*://google.com/*", "*://chromium.org/*"},
{"https://google.com/*", "http://google.com/*", "https://chromium.org/*",
"http://chromium.org/*"}},
{{"<all_urls>"},
{"https://chromium.org/*", "*://google.com/*"},
{"https://chromium.org/*", "*://google.com/*"}},
{{"*://*/maps", "*://*.example.com/*"},
{"https://*.google.com/*", "https://www.example.com/*"},
{"https://*.google.com/maps", "https://www.example.com/*"}},
{{"https://*/maps", "https://*.google.com/*"},
{"https://*.google.com/*"},
{"https://*.google.com/*"}},
{{"http://*/*"}, {"https://*/*"}, {}},
{{"https://*.google.com/*", "https://maps.google.com/*"},
{"https://*.google.com/*", "https://*/*"},
// NOTE: We don't currently do any additional "collapsing" step after
// finding the intersection. We potentially could, to reduce the number
// of patterns we need to store.
{"https://*.google.com/*", "https://maps.google.com/*"}},
};
constexpr int kValidSchemes = URLPattern::SCHEME_ALL;
constexpr char kTestCaseDescriptionTemplate[] =
"Running Test Case:\n"
" Set1: %s\n"
" Set2: %s\n"
" Expected Result: %s";
for (const auto& test_case : test_cases) {
SCOPED_TRACE(base::StringPrintf(
kTestCaseDescriptionTemplate,
base::JoinString(test_case.set1, ", ").c_str(),
base::JoinString(test_case.set2, ", ").c_str(),
base::JoinString(test_case.expected_intersection, ", ").c_str()));
URLPatternSet set1;
for (const auto& pattern_str : test_case.set1) {
URLPattern pattern(kValidSchemes);
ASSERT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse(pattern_str))
<< "Failed to parse: " << pattern_str;
set1.AddPattern(pattern);
}
URLPatternSet set2;
for (const auto& pattern_str : test_case.set2) {
URLPattern pattern(kValidSchemes);
ASSERT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse(pattern_str))
<< "Failed to parse: " << pattern_str;
set2.AddPattern(pattern);
}
URLPatternSet intersection1 = URLPatternSet::CreateIntersection(
set1, set2, URLPatternSet::IntersectionBehavior::kDetailed);
URLPatternSet intersection2 = URLPatternSet::CreateIntersection(
set2, set1, URLPatternSet::IntersectionBehavior::kDetailed);
EXPECT_THAT(
*intersection1.ToStringVector(),
testing::UnorderedElementsAreArray(test_case.expected_intersection));
EXPECT_THAT(
*intersection2.ToStringVector(),
testing::UnorderedElementsAreArray(test_case.expected_intersection));
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment