Commit a0cbf17d authored by rajendrant's avatar rajendrant Committed by Commit Bot

Include query string in robots rules checking

Robots rules should consider the path and query parameters of the URL
to decide whether the URL is allowed.

Bug: 1144836
Change-Id: I15caeedfdc498cb361491ba66fe12956065daf34
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2533418
Auto-Submit: rajendrant <rajendrant@chromium.org>
Commit-Queue: Robert Ogden <robertogden@chromium.org>
Reviewed-by: default avatarRobert Ogden <robertogden@chromium.org>
Cr-Commit-Position: refs/heads/master@{#826794}
parent f40910b4
......@@ -7,6 +7,7 @@
#include "base/callback.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
......@@ -132,16 +133,20 @@ void RobotsRulesDecider::UpdateRobotsRules(const std::string& rules) {
pending_check_requests_.clear();
}
void RobotsRulesDecider::CheckRobotsRules(const std::string& url_path,
void RobotsRulesDecider::CheckRobotsRules(const GURL& url,
CheckResultCallback callback) {
std::string path_with_query = url.path();
if (url.has_query())
base::StrAppend(&path_with_query, {"?", url.query()});
if (rules_receive_timeout_timer_.IsRunning()) {
// Rules have not been received yet.
pending_check_requests_.emplace_back(
std::make_pair(std::move(callback), url_path));
std::make_pair(std::move(callback), path_with_query));
return;
}
std::move(callback).Run(IsAllowed(url_path) ? CheckResult::kAllowed
: CheckResult::kDisallowed);
std::move(callback).Run(IsAllowed(path_with_query)
? CheckResult::kAllowed
: CheckResult::kDisallowed);
}
bool RobotsRulesDecider::IsAllowed(const std::string& url_path) const {
......
......@@ -51,13 +51,14 @@ class RobotsRulesDecider {
// processed immediately and called with th result.
void UpdateRobotsRules(const std::string& rules);
// Check whether the URL path is allowed or disallowed by robots rules.
// |callback| will be called with the result. The callback could be immediate
// if rules are available. Otherwise the callback will be added to
// Check whether the URL is allowed or disallowed by robots rules. |callback|
// will be called with the result. The callback could be immediate if rules
// are available. Otherwise the callback will be added to
// |pending_check_requests_| and called when a decision can be made like when
// rules are retrieved, or rule fetch timeout, etc.
void CheckRobotsRules(const std::string& url_path,
CheckResultCallback callback);
// The robots rules check will make use of the |url| path and query
// parameters. The |url| origin, ref fragment, etc are immaterial.
void CheckRobotsRules(const GURL& url, CheckResultCallback callback);
private:
// Contains one robots.txt rule.
......
......@@ -18,6 +18,8 @@ namespace subresource_redirect {
namespace {
constexpr char kTestOrigin[] = "https://test.com";
const bool kRuleTypeAllow = true;
const bool kRuleTypeDisallow = false;
......@@ -76,20 +78,21 @@ class SubresourceRedirectRobotsRulesDeciderTest : public testing::Test {
GetRobotsRulesProtoString(patterns));
}
void CheckRobotsRules(const std::string& url_path,
void CheckRobotsRules(const std::string& url_path_with_query,
RobotsRulesDecider::CheckResult expected_result) {
CheckResultReceiver result_receiver;
robots_rules_decider_.CheckRobotsRules(url_path,
result_receiver.GetCallback());
robots_rules_decider_.CheckRobotsRules(
GURL(kTestOrigin + url_path_with_query), result_receiver.GetCallback());
EXPECT_TRUE(result_receiver.did_receive_result());
EXPECT_EQ(expected_result, result_receiver.check_result());
}
std::unique_ptr<CheckResultReceiver> CheckRobotsRulesAsync(
const std::string& url_path) {
const std::string& url_path_with_query) {
auto result_receiver = std::make_unique<CheckResultReceiver>();
robots_rules_decider_.CheckRobotsRules(url_path,
result_receiver->GetCallback());
robots_rules_decider_.CheckRobotsRules(
GURL(kTestOrigin + url_path_with_query),
result_receiver->GetCallback());
return result_receiver;
}
......@@ -204,8 +207,10 @@ TEST_F(SubresourceRedirectRobotsRulesDeciderTest,
auto receiver1 = std::make_unique<CheckResultReceiver>();
auto receiver2 = std::make_unique<CheckResultReceiver>();
robots_rules_decider->CheckRobotsRules("/foo.jpg", receiver1->GetCallback());
robots_rules_decider->CheckRobotsRules("/bar", receiver2->GetCallback());
robots_rules_decider->CheckRobotsRules(GURL("https://test.com/foo.jpg"),
receiver1->GetCallback());
robots_rules_decider->CheckRobotsRules(GURL("https://test.com/bar"),
receiver2->GetCallback());
EXPECT_FALSE(receiver1->did_receive_result());
EXPECT_FALSE(receiver2->did_receive_result());
VerifyTotalRobotsRulesApplyHistograms(0);
......@@ -334,6 +339,58 @@ TEST_F(SubresourceRedirectRobotsRulesDeciderTest,
VerifyTotalRobotsRulesApplyHistograms(6);
}
TEST_F(SubresourceRedirectRobotsRulesDeciderTest, TestURLWithArguments) {
SetUpRobotsRules({{kRuleTypeAllow, "/*.jpg$"},
{kRuleTypeDisallow, "/*.png?*arg_disallowed"},
{kRuleTypeAllow, "/*.png"},
{kRuleTypeDisallow, "/*.gif?*arg_disallowed"},
{kRuleTypeAllow, "/*.gif"},
{kRuleTypeDisallow, "/"}});
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesDecider::SubresourceRedirectRobotsRulesReceiveResult::
kSuccess);
VerifyReceivedRobotsRulesCountHistogram(6);
CheckRobotsRules("/allowed.jpg", RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/allowed.png", RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/allowed.gif", RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/disallowed.jpg?arg",
RobotsRulesDecider::CheckResult::kDisallowed);
CheckRobotsRules("/allowed.png?arg",
RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/allowed.png?arg_allowed",
RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/allowed.png?arg_allowed&arg_allowed2",
RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/allowed.png?arg_disallowed",
RobotsRulesDecider::CheckResult::kDisallowed);
CheckRobotsRules("/allowed.png?arg_disallowed&arg_disallowed2",
RobotsRulesDecider::CheckResult::kDisallowed);
VerifyTotalRobotsRulesApplyHistograms(9);
}
TEST_F(SubresourceRedirectRobotsRulesDeciderTest, TestRulesAreCaseSensitive) {
SetUpRobotsRules({{kRuleTypeAllow, "/allowed"},
{kRuleTypeAllow, "/CamelCase"},
{kRuleTypeAllow, "/CAPITALIZE"},
{kRuleTypeDisallow, "/"}});
VerifyReceivedRobotsRulesCountHistogram(4);
CheckRobotsRules("/allowed.jpg", RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/CamelCase.jpg", RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/CAPITALIZE.jpg",
RobotsRulesDecider::CheckResult::kAllowed);
CheckRobotsRules("/Allowed.jpg",
RobotsRulesDecider::CheckResult::kDisallowed);
CheckRobotsRules("/camelcase.jpg",
RobotsRulesDecider::CheckResult::kDisallowed);
CheckRobotsRules("/capitalize.jpg",
RobotsRulesDecider::CheckResult::kDisallowed);
VerifyTotalRobotsRulesApplyHistograms(6);
}
} // namespace
} // namespace subresource_redirect
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment