Commit ee556059 authored by Tarun Bansal's avatar Tarun Bansal Committed by Commit Bot

Add wildcard string matcher to previews

Add wildcard string matcher to previews. This would be used for
matching URL (aka page) patterns provided by the hints optimization
server.

Also, fix some existing errors in gn files.

Change-Id: Id24365b8f475b5740b0d897d9514dd1d6d3281d4
Bug: 870039
Reviewed-on: https://chromium-review.googlesource.com/1172159
Commit-Queue: Tarun Bansal <tbansal@chromium.org>
Reviewed-by: default avatarRyan Sturm <ryansturm@chromium.org>
Cr-Commit-Position: refs/heads/master@{#582683}
parent 8fae07dc
......@@ -144,6 +144,8 @@ static_library("test_support") {
"//components/data_reduction_proxy/core/common",
"//components/data_reduction_proxy/core/common:test_support",
"//components/prefs:test_support",
"//components/previews/core:core",
"//components/previews/core:test_support",
"//net",
"//net:test_support",
"//testing/gmock",
......@@ -208,6 +210,7 @@ source_set("unit_tests") {
"//components/data_use_measurement/core:ascriber",
"//components/prefs:test_support",
"//components/previews/core",
"//components/previews/core:test_support",
"//components/variations",
"//net:test_support",
"//testing/gmock",
......
......@@ -9,6 +9,8 @@ static_library("optimization_guide") {
"optimization_guide_service.cc",
"optimization_guide_service.h",
"optimization_guide_service_observer.h",
"url_pattern_with_wildcards.cc",
"url_pattern_with_wildcards.h",
]
deps = [
......@@ -35,6 +37,7 @@ source_set("unit_tests") {
testonly = true
sources = [
"optimization_guide_service_unittest.cc",
"url_pattern_with_wildcards_unittest.cc",
]
deps = [
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/optimization_guide/url_pattern_with_wildcards.h"
#include "base/logging.h"
#include "base/macros.h"
namespace {
// Splits |url_pattern| by wildcard, and returns the split patterns.
std::vector<std::string> SplitURLPattern(const std::string& url_pattern) {
std::vector<std::string> split_subpatterns;
size_t search_start_pos = 0;
while (true) {
size_t next_wildcard_pos = url_pattern.find('*', search_start_pos);
if (next_wildcard_pos == std::string::npos) {
// Add rest of the |url_pattern|, and return.
split_subpatterns.push_back(url_pattern.substr(search_start_pos));
return split_subpatterns;
}
if (next_wildcard_pos == search_start_pos) {
// Skip empty subpatterns. This catches any explicit wildcards at the
// front and back, as well as repeated consecutive wildcards.
search_start_pos++;
continue;
}
// Add the subpattern from |search_start_pos| (inclusive) to
// |next_wildcard_pos| (not inclusive).
split_subpatterns.push_back(url_pattern.substr(
search_start_pos, next_wildcard_pos - search_start_pos));
search_start_pos = next_wildcard_pos + 1;
}
return split_subpatterns;
}
} // namespace
namespace optimization_guide {
URLPatternWithWildcards::URLPatternWithWildcards(const std::string& url_pattern)
: split_subpatterns_(SplitURLPattern(url_pattern)) {
DCHECK(!url_pattern.empty());
DCHECK(!split_subpatterns_.empty());
}
URLPatternWithWildcards::~URLPatternWithWildcards() = default;
bool URLPatternWithWildcards::Matches(const std::string& url_string) const {
// Determine if |url_string| matches |this| pattern. This determination is
// made by searching all the subpatterns in |split_subpatterns_| while
// traversing |url_string| . If all the subpatterns in |split_subpatterns_|
// are found in |url_string|, then it's a match.
// Note that each of the subpattern belonging in |split_subpatterns_| should
// be located in |url_string| after the location of the previous subpattern.
//
// Example: If |split_subpatterns_| is {"example.com", "foo"}, and
// |url_string| is example.com/pages/foo.jpg, then first "example.com" is
// searched in |url_string| beginning at index 0. Then, "foo" is searched in
// |url_string| beginning at index 10.
size_t search_start_pos = 0;
for (const auto& subpattern : split_subpatterns_) {
DCHECK_GE(url_string.length(), search_start_pos);
search_start_pos = url_string.find(subpattern, search_start_pos);
// |url_string| does not match |this| pattern.
if (search_start_pos == std::string::npos)
return false;
// Move the search position for next subpattern to be after where
// |subpattern| ends in |url_string|.
search_start_pos += subpattern.length();
}
return true;
}
} // namespace optimization_guide
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OPTIMIZATION_GUIDE_URL_PATTERN_WITH_WILDCARDS_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_URL_PATTERN_WITH_WILDCARDS_H_
#include <stddef.h>
#include <string>
#include <vector>
#include "base/logging.h"
#include "base/macros.h"
namespace optimization_guide {
// URLPatternWithWildcards parses and stores one URL pattern. A URL pattern is a
// single substring to match against a URL. A URL pattern may
// contain multiple wildcard characters ('*'), each of which can match more than
// one character. An implicit wildcard character ('*') is assumed to be present
// at the beginning and end of a pattern.
class URLPatternWithWildcards {
public:
explicit URLPatternWithWildcards(const std::string& url_pattern);
~URLPatternWithWildcards();
// Returns true if |url_string| matches |this| pattern.
bool Matches(const std::string& url_string) const;
private:
// A single pattern string is split into multiple strings (each separated by
// '*'), and stored in |split_subpatterns_|.
const std::vector<std::string> split_subpatterns_;
DISALLOW_COPY_AND_ASSIGN(URLPatternWithWildcards);
};
} // namespace optimization_guide
#endif // COMPONENTS_OPTIMIZATION_GUIDE_URL_PATTERN_WITH_WILDCARDS_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/optimization_guide/url_pattern_with_wildcards.h"
#include <string>
#include "testing/gtest/include/gtest/gtest.h"
namespace optimization_guide {
namespace {
TEST(URLPatternWithWildcardsTest, OnePattern) {
URLPatternWithWildcards url_pattern_with_wildcards("foo.jpg");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", true},
{"https://www.example.com/pages/foo.jpg", true},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", true},
{"http://www.example.com/foo.jpg", true},
{"http://www.example.com/foo.jpg?q=alpha", true},
{"http://www.example.com/bar.jpg?q=foo.jpg", true},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", true},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches,
url_pattern_with_wildcards.Matches(test.url));
}
}
TEST(URLPatternWithWildcardsTest, OnePatternWithOneWildcard) {
URLPatternWithWildcards url_pattern_with_wildcards(
"example.com/bar/*/foo.jpg");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", false},
{"https://www.example.com/pages/foo.jpg", false},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", false},
{"http://www.example.com/foo.jpg", false},
{"http://www.example.com/foo.jpg?q=alpha", false},
{"http://www.example.com/bar.jpg?q=foo.jpg", false},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", false},
{"https://www.example.com/bar/foo.jpg", false},
{"https://www.example.com/bar/pages/foo.jpg", true},
{"https://www.example.com/bar/main_page/foo.jpg", true},
{"https://www.example.com/bar/pages/subpages/foo.jpg", true},
// Try different prefixes.
{"https://m.example.com/bar/main_page/foo.jpg", true},
{"https://in.example.com/bar/main_page/foo.jpg", true},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches,
url_pattern_with_wildcards.Matches(test.url));
}
}
TEST(URLPatternWithWildcardsTest, OnePatternWithOneWildcardAtEnds) {
URLPatternWithWildcards url_pattern_with_wildcards("*example.com/bar/*");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", false},
{"https://www.example.com/pages/foo.jpg", false},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", false},
{"http://www.example.com/foo.jpg", false},
{"http://www.example.com/foo.jpg?q=alpha", false},
{"http://www.example.com/bar.jpg?q=foo.jpg", false},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", false},
{"https://www.example.com/bar/foo.jpg", true},
{"https://www.example.com/bar/pages/foo.jpg", true},
{"https://www.example.com/bar/main_page/foo.jpg", true},
{"https://www.example.com/bar/pages/subpages/foo.jpg", true},
// Try different prefixes.
{"https://m.example.com/bar/main_page/foo.jpg", true},
{"https://in.example.com/bar/main_page/foo.jpg", true},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches,
url_pattern_with_wildcards.Matches(test.url));
}
}
TEST(URLPatternWithWildcardsTest, OnePatternWithOneWildcardAndScheme) {
URLPatternWithWildcards url_pattern_with_wildcards(
"https://www.example.com/bar/*/foo.jpg");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", false},
{"https://www.example.com/pages/foo.jpg", false},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", false},
{"http://www.example.com/foo.jpg", false},
{"http://www.example.com/foo.jpg?q=alpha", false},
{"http://www.example.com/bar.jpg?q=foo.jpg", false},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", false},
{"https://www.example.com/bar/foo.jpg", false},
{"https://www.example.com/bar/pages/foo.jpg", true},
{"https://www.example.com/bar/main_page/foo.jpg", true},
{"https://www.example.com/bar/pages/subpages/foo.jpg", true},
// Different scheme.
{"http://www.example.com/bar/pages/foo.jpg", false},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches,
url_pattern_with_wildcards.Matches(test.url));
}
}
TEST(URLPatternWithWildcardsTest, OnePatternWithMultipleWildcards) {
URLPatternWithWildcards url_pattern_with_wildcards(
"example.com/bar/*/pages/*/*.jpg");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", false},
{"https://www.example.com/pages/foo.jpg", false},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", false},
{"http://www.example.com/foo.jpg", false},
{"http://www.example.com/foo.jpg?q=alpha", false},
{"http://www.example.com/bar.jpg?q=foo.jpg", false},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", false},
{"https://www.example.com/bar/foo.jpg", false},
{"https://www.example.com/bar/pages/foo.jpg", false},
{"https://www.example.com/bar/main_page/foo.jpg", false},
{"https://www.example.com/bar/pages/subpages/foo.jpg", false},
{"https://www.example.com/bar/main/pages/document/foo.jpg", true},
{"https://www.example.com/bar/main/pages/document/foo1.jpg", true},
{"https://www.example.com/bar/main/pages/document/foo1.js", false},
// Out-of-order subpatterns.
{"https://cdn.com/pages/www.example.com/bar/document/foo.jpg", false},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches,
url_pattern_with_wildcards.Matches(test.url));
}
}
TEST(URLPatternWithWildcardsTest,
OnePatternWithMultipleWildcardsImplicitSuffix) {
URLPatternWithWildcards url_pattern_with_wildcards(
"example.com/bar/*/pages/");
const struct {
std::string url;
bool expect_matches;
} tests[] = {
{"https://www.example.com/", false},
{"https://www.example.com/foo.js", false},
{"https://www.example.com/foo.jpg", false},
{"https://www.example.com/pages/foo.jpg", false},
{"https://www.example.com/foobar.jpg", false},
{"https://www.example.com/barfoo.jpg", false},
{"http://www.example.com/foo.jpg", false},
{"http://www.example.com/foo.jpg?q=alpha", false},
{"http://www.example.com/bar.jpg?q=foo.jpg", false},
{"http://www.example.com/bar.jpg?q=foo.jpg#foo.jpg", false},
{"https://www.example.com/bar/foo.jpg", false},
// No gap between "bar" and "pages".
{"https://www.example.com/bar/pages/foo.jpg", false},
{"https://www.example.com/bar/main_page/foo.jpg", false},
// No gap between "bar" and "pages".
{"https://www.example.com/bar/pages/subpages/foo.jpg", false},
{"https://www.example.com/bar/main/pages/document/foo.jpg", true},
{"https://www.example.com/bar/main/pages/document/foo1.jpg", true},
{"https://www.example.com/bar/main/pages/document/foo1.js", true},
// Out-of-order subpatterns.
{"https://cdn.com/pages/www.example.com/bar/document/foo.jpg", false},
};
for (const auto& test : tests) {
EXPECT_EQ(test.expect_matches, url_pattern_with_wildcards.Matches(test.url))
<< " url=" << test.url;
}
}
} // namespace
} // namespace optimization_guide
......@@ -18,8 +18,6 @@ static_library("core") {
"previews_switches.h",
"previews_user_data.cc",
"previews_user_data.h",
"test_previews_decider.cc",
"test_previews_decider.h",
]
deps = [
......@@ -32,6 +30,24 @@ static_library("core") {
]
}
static_library("test_support") {
testonly = true
sources = [
"test_previews_decider.cc",
"test_previews_decider.h",
]
deps = [
":core",
"//base",
"//base/test:test_support",
"//net",
"//net:test_support",
"//testing/gmock",
"//testing/gtest",
]
}
source_set("unit_tests") {
testonly = true
sources = [
......@@ -43,6 +59,7 @@ source_set("unit_tests") {
deps = [
":core",
":test_support",
"//base",
"//base/test:test_support",
"//components/blacklist/opt_out_blacklist",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment