Commit d429ec69 authored by Josh Simmons's avatar Josh Simmons Committed by Commit Bot

Add RewriteHandler to extract URLs from clicktracking redirect URLs.

Bug: 1087123
Change-Id: I7265a9c10261881c196e6e7969ef9937097e1272
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2223109
Commit-Queue: Josh Simmons <jds@google.com>
Reviewed-by: default avatarMegan Jablonski <megjablon@chromium.org>
Reviewed-by: default avatarSophie Chang <sophiechang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#773910}
parent 6d3a727f
......@@ -1140,6 +1140,8 @@ static_library("browser") {
"payments/ssl_validity_checker.cc",
"performance_hints/performance_hints_observer.cc",
"performance_hints/performance_hints_observer.h",
"performance_hints/performance_hints_rewrite_handler.cc",
"performance_hints/performance_hints_rewrite_handler.h",
"performance_manager/browser_child_process_watcher.cc",
"performance_manager/browser_child_process_watcher.h",
"performance_manager/chrome_browser_main_extra_parts_performance_manager.cc",
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/performance_hints/performance_hints_rewrite_handler.h"
#include <utility>
#include "base/strings/string_split.h"
#include "net/base/escape.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
PerformanceHintsRewriteHandler::PerformanceHintsRewriteHandler() = default;
PerformanceHintsRewriteHandler::PerformanceHintsRewriteHandler(
const PerformanceHintsRewriteHandler&) = default;
PerformanceHintsRewriteHandler::~PerformanceHintsRewriteHandler() = default;
base::Optional<GURL> PerformanceHintsRewriteHandler::HandleRewriteIfNecessary(
const GURL& url) const {
if (!url.is_valid()) {
return base::nullopt;
}
base::StringPiece host = url.host_piece();
base::StringPiece path = url.path_piece();
for (const UrlRule& url_rule : url_rules_) {
if (host == url_rule.host && path == url_rule.path) {
std::string query_str = url.query();
url::Component query(0, query_str.length());
url::Component key, value;
while (
url::ExtractQueryKeyValue(query_str.c_str(), &query, &key, &value)) {
if (query_str.substr(key.begin, key.len) == url_rule.query_param) {
// Unescape the inner URL since it was escaped to be made a query
// param.
std::string unescaped = net::UnescapeURLComponent(
query_str.substr(value.begin, value.len),
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
return GURL(unescaped);
}
}
return base::nullopt;
}
}
return base::nullopt;
}
PerformanceHintsRewriteHandler PerformanceHintsRewriteHandler::FromConfigString(
const std::string& config) {
PerformanceHintsRewriteHandler handler;
base::StringPairs pairs;
if (!base::SplitStringIntoKeyValuePairs(config, '?', ',', &pairs)) {
// Empty, will match no URLs.
return handler;
}
for (const std::pair<std::string, std::string>& pair : pairs) {
if (pair.first.empty() || pair.second.empty()) {
continue;
}
PerformanceHintsRewriteHandler::UrlRule url_rule;
const std::string& host_path = pair.first;
size_t path_start = host_path.find('/');
if (path_start == std::string::npos) {
// A path must be specified, even if that path is the root ("/").
continue;
}
url_rule.host = host_path.substr(0, path_start);
url_rule.path = host_path.substr(path_start);
url_rule.query_param = pair.second;
handler.url_rules_.push_back(url_rule);
}
return handler;
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PERFORMANCE_HINTS_PERFORMANCE_HINTS_REWRITE_HANDLER_H_
#define CHROME_BROWSER_PERFORMANCE_HINTS_PERFORMANCE_HINTS_REWRITE_HANDLER_H_
#include <memory>
#include <string>
#include <vector>
#include "base/optional.h"
class GURL;
// PerformanceHintsRewriteHandler checks URLs to see if they match one of the
// preconfigured rewrite patterns. If so, returns the original (non-rewritten)
// URL.
//
// This is the case for many redirectors and click-tracking URLs such as
// https://www.google.com/url?url=https://actualurl.com.
class PerformanceHintsRewriteHandler {
public:
PerformanceHintsRewriteHandler();
PerformanceHintsRewriteHandler(const PerformanceHintsRewriteHandler&);
~PerformanceHintsRewriteHandler();
// If |url| matches one of the configured URLs, return the inner URL included
// in the query params. If the URL is invalid or doesn't match one of the
// configured URLs, return nullopt.
base::Optional<GURL> HandleRewriteIfNecessary(const GURL& url) const;
// Creates a PerformanceHintsRewriteHandler that handles URLs of the forms
// provided by the config. If a syntax error prevents the config from being
// parsed, this will return a PerformanceHintsRewriteHandler that matches no
// URLs (always returns nullopt).
//
// The config string is of the form "host/path?param,host/path?param,...".
// All three values must be included for each form. Other components (port,
// scheme, etc) must be omitted. Only one query param should be specified per
// form, namely the param that contains the inner URL.
//
// An empty config ("") is valid, and indicates no URLs should be matched.
static PerformanceHintsRewriteHandler FromConfigString(
const std::string& config);
private:
struct UrlRule {
// The host to match. No scheme, port, etc is included.
std::string host;
// The path to match. Includes the starting "/".
std::string path;
// The query param that contains the inner URL.
std::string query_param;
};
// The URL forms that this PerformanceHintsRewriteHandler can process.
std::vector<UrlRule> url_rules_;
};
#endif // CHROME_BROWSER_PERFORMANCE_HINTS_PERFORMANCE_HINTS_REWRITE_HANDLER_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/performance_hints/performance_hints_rewrite_handler.h"
#include <memory>
#include <vector>
#include "testing/gmock/include/gmock/gmock.h"
#include "url/gurl.h"
TEST(PerformanceHintsRewriteHandlerTest, ExtraQueryParams) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString(
"www.google.com/url?url");
GURL url(
"https://www.google.com/url?not=used&url=https://theactualurl.com/"
"testpath?testquerytoo=true&unusedparamfromouterurl");
base::Optional<GURL> result = handler.HandleRewriteIfNecessary(url);
ASSERT_TRUE(result.has_value());
EXPECT_EQ("https://theactualurl.com/testpath?testquerytoo=true",
result.value().spec());
}
TEST(PerformanceHintsRewriteHandlerTest, EscapedCharacters) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString(
"www.google.com/url?url");
GURL url(
"https://www.google.com/url?url=https://theactualurl.com/"
"testpath?first=param%26second=param&unusedparamfromouterurl");
base::Optional<GURL> result = handler.HandleRewriteIfNecessary(url);
ASSERT_TRUE(result.has_value());
EXPECT_EQ("https://theactualurl.com/testpath?first=param&second=param",
result.value().spec());
}
TEST(PerformanceHintsRewriteHandlerTest, NoMatchingParam) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString(
"www.google.com/url?url");
GURL url(
"https://www.google.com/url?notactuallyurl=https://theactualurl.com");
ASSERT_FALSE(handler.HandleRewriteIfNecessary(url));
}
TEST(PerformanceHintsRewriteHandlerTest, InvalidUrl) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString(
"www.google.com/url?url");
GURL url("invalid");
ASSERT_FALSE(handler.HandleRewriteIfNecessary(url));
}
TEST(PerformanceHintsRewriteHandlerTest, EmptyConfig) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString("");
GURL url("https://www.google.com/url?url=https://theactualurl.com/testpath");
ASSERT_FALSE(handler.HandleRewriteIfNecessary(url));
}
TEST(PerformanceHintsRewriteHandlerTest, NoQueryParam) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString("www.google.com/url");
GURL url("https://www.google.com/url?url=https://theactualurl.com/testpath");
ASSERT_FALSE(handler.HandleRewriteIfNecessary(url));
}
TEST(PerformanceHintsRewriteHandlerTest, NoHostPath) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString("?url");
GURL url("https://www.google.com/url?url=https://theactualurl.com/testpath");
ASSERT_FALSE(handler.HandleRewriteIfNecessary(url));
}
TEST(PerformanceHintsRewriteHandlerTest, HostOnly) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString("www.google.com/?url");
GURL url("https://www.google.com?url=https://theactualurl.com/testpath");
base::Optional<GURL> result = handler.HandleRewriteIfNecessary(url);
ASSERT_TRUE(result.has_value());
EXPECT_EQ("https://theactualurl.com/testpath", result.value().spec());
}
TEST(PerformanceHintsRewriteHandlerTest, MultipleMatchers) {
PerformanceHintsRewriteHandler handler =
PerformanceHintsRewriteHandler::FromConfigString(
"www.google.com/url?url,www.googleadservices.com/pagead/aclk?adurl");
GURL url("https://www.google.com/url?url=https://theactualurl.com/testpath");
base::Optional<GURL> result = handler.HandleRewriteIfNecessary(url);
ASSERT_TRUE(result.has_value());
EXPECT_EQ("https://theactualurl.com/testpath", result.value().spec());
url = GURL(
"https://www.googleadservices.com/pagead/aclk?adurl=https://"
"theactualurl.com/testpath");
result = handler.HandleRewriteIfNecessary(url);
ASSERT_TRUE(result.has_value());
EXPECT_EQ("https://theactualurl.com/testpath", result.value().spec());
}
......@@ -3254,6 +3254,7 @@ test("unit_tests") {
"../browser/password_manager/chrome_password_manager_client_unittest.cc",
"../browser/password_manager/password_store_x_unittest.cc",
"../browser/performance_hints/performance_hints_observer_unittest.cc",
"../browser/performance_hints/performance_hints_rewrite_handler_unittest.cc",
"../browser/performance_manager/decorators/frame_priority_decorator_unittest.cc",
"../browser/performance_manager/decorators/frozen_frame_aggregator_unittest.cc",
"../browser/performance_manager/decorators/helpers/page_live_state_decorator_helper_unittest.cc",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment