Commit c5fc6eff authored by rajendrant's avatar rajendrant Committed by Chromium LUCI CQ

Change robots rules parser checking

This CL changes how resource URL check for robots rules is returned. If
results are immediately available they are returned, otherwise nullopt
is returned and callback is invoked with the result when available.

This CL also introduces an enum to differentiate whether resource was
disallowed by robots rules due to rule fetch already timedout or due to
new timeout.

Bug: 1144836
Change-Id: I55fa7c57b749dd53907c9deb3e045beafd74463d
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2583846
Commit-Queue: rajendrant <rajendrant@chromium.org>
Reviewed-by: default avatarMichael Crouse <mcrouse@chromium.org>
Cr-Commit-Position: refs/heads/master@{#835774}
parent c57e3e6b
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/renderer/subresource_redirect/login_robots_decider_test_util.h"
#include "components/data_reduction_proxy/proto/robots_rules.pb.h"
namespace subresource_redirect {
std::string GetRobotsRulesProtoString(const std::vector<Rule>& patterns) {
proto::RobotsRules robots_rules;
for (const auto& pattern : patterns) {
auto* new_rule = robots_rules.add_image_ordered_rules();
if (pattern.rule_type == kRuleTypeAllow) {
new_rule->set_allowed_pattern(pattern.pattern);
} else {
new_rule->set_disallowed_pattern(pattern.pattern);
}
}
return robots_rules.SerializeAsString();
}
} // namespace subresource_redirect
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_RENDERER_SUBRESOURCE_REDIRECT_LOGIN_ROBOTS_DECIDER_TEST_UTIL_H_
#define CHROME_RENDERER_SUBRESOURCE_REDIRECT_LOGIN_ROBOTS_DECIDER_TEST_UTIL_H_
#include <string>
namespace subresource_redirect {
const bool kRuleTypeAllow = true;
const bool kRuleTypeDisallow = false;
struct Rule {
Rule(bool rule_type, std::string pattern)
: rule_type(rule_type), pattern(pattern) {}
bool rule_type;
std::string pattern;
};
std::string GetRobotsRulesProtoString(const std::vector<Rule>& patterns);
} // namespace subresource_redirect
#endif // CHROME_RENDERER_SUBRESOURCE_REDIRECT_LOGIN_ROBOTS_DECIDER_TEST_UTIL_H_
......@@ -90,6 +90,7 @@ RobotsRulesParser::RobotsRulesParser() {
FROM_HERE, GetRobotsRulesReceiveTimeout(),
base::BindOnce(&RobotsRulesParser::OnRulesReceiveTimeout,
base::Unretained(this)));
rules_receive_state_ = RulesReceiveState::kTimerRunning;
}
RobotsRulesParser::~RobotsRulesParser() {
......@@ -99,7 +100,7 @@ RobotsRulesParser::~RobotsRulesParser() {
}
void RobotsRulesParser::UpdateRobotsRules(const std::string& rules) {
robots_rules_.reset();
robots_rules_.clear();
rules_receive_timeout_timer_.Stop();
proto::RobotsRules robots_rules;
......@@ -108,67 +109,68 @@ void RobotsRulesParser::UpdateRobotsRules(const std::string& rules) {
is_parse_success
? SubresourceRedirectRobotsRulesReceiveResult::kSuccess
: SubresourceRedirectRobotsRulesReceiveResult::kParseError);
rules_receive_state_ = is_parse_success ? RulesReceiveState::kSuccess
: RulesReceiveState::kParseFailed;
if (is_parse_success) {
robots_rules_ = std::vector<RobotsRule>();
robots_rules_->reserve(robots_rules.image_ordered_rules_size());
robots_rules_.reserve(robots_rules.image_ordered_rules_size());
for (const auto& rule : robots_rules.image_ordered_rules()) {
if (rule.has_allowed_pattern()) {
robots_rules_->emplace_back(true, rule.allowed_pattern());
robots_rules_.emplace_back(true, rule.allowed_pattern());
} else if (rule.has_disallowed_pattern()) {
robots_rules_->emplace_back(false, rule.disallowed_pattern());
robots_rules_.emplace_back(false, rule.disallowed_pattern());
}
}
}
if (robots_rules_) {
UMA_HISTOGRAM_COUNTS_1000("SubresourceRedirect.RobotRulesDecider.Count",
robots_rules_->size());
robots_rules_.size());
}
// Respond to the pending requests, even if robots proto parse failed.
for (auto& request : pending_check_requests_) {
std::move(request.first)
.Run(IsAllowed(request.second) ? CheckResult::kAllowed
: CheckResult::kDisallowed);
std::move(request.first).Run(CheckRobotsRulesImmediate(request.second));
}
pending_check_requests_.clear();
}
void RobotsRulesParser::CheckRobotsRules(const GURL& url,
CheckResultCallback callback) {
base::Optional<RobotsRulesParser::CheckResult>
RobotsRulesParser::CheckRobotsRules(const GURL& url,
CheckResultCallback callback) {
std::string path_with_query = url.path();
if (url.has_query())
base::StrAppend(&path_with_query, {"?", url.query()});
if (rules_receive_timeout_timer_.IsRunning()) {
// Rules have not been received yet.
if (rules_receive_state_ == RulesReceiveState::kTimerRunning) {
DCHECK(rules_receive_timeout_timer_.IsRunning());
pending_check_requests_.emplace_back(
std::make_pair(std::move(callback), path_with_query));
return;
return base::nullopt;
}
std::move(callback).Run(IsAllowed(path_with_query)
? CheckResult::kAllowed
: CheckResult::kDisallowed);
return CheckRobotsRulesImmediate(path_with_query);
}
bool RobotsRulesParser::IsAllowed(const std::string& url_path) const {
// Rules not received. Could be rule parse error or timeout.
if (!robots_rules_)
return false;
RobotsRulesParser::CheckResult RobotsRulesParser::CheckRobotsRulesImmediate(
const std::string& url_path) const {
if (rules_receive_state_ == RulesReceiveState::kParseFailed)
return CheckResult::kDisallowed;
if (rules_receive_state_ == RulesReceiveState::kTimeout)
return CheckResult::kDisallowedAfterTimeout;
DCHECK_EQ(rules_receive_state_, RulesReceiveState::kSuccess);
base::ElapsedTimer rules_apply_timer;
for (const auto& rule : *robots_rules_) {
for (const auto& rule : robots_rules_) {
if (rule.Match(url_path)) {
RecordRobotsRulesApplyDurationHistogram(rules_apply_timer.Elapsed());
return rule.is_allow_rule_;
return rule.is_allow_rule_ ? CheckResult::kAllowed
: CheckResult::kDisallowed;
}
}
RecordRobotsRulesApplyDurationHistogram(rules_apply_timer.Elapsed());
// Treat as allowed when none of the allow/disallow rules match.
return true;
return CheckResult::kAllowed;
}
void RobotsRulesParser::OnRulesReceiveTimeout() {
DCHECK(!rules_receive_timeout_timer_.IsRunning());
rules_receive_state_ = RulesReceiveState::kTimeout;
for (auto& request : pending_check_requests_)
std::move(request.first).Run(CheckResult::kTimedout);
pending_check_requests_.clear();
......
......@@ -10,7 +10,6 @@
#include "base/callback_forward.h"
#include "base/macros.h"
#include "base/optional.h"
#include "base/timer/timer.h"
#include "url/gurl.h"
......@@ -33,9 +32,26 @@ class RobotsRulesParser {
};
enum CheckResult {
kAllowed, // The resource URL passed the robots rules check
kDisallowed, // The resource URL failed the robots rules check
kTimedout, // Timeout in retrieving the robots rules
kAllowed, // The resource URL passed the robots rules check
kDisallowed, // The resource URL failed the robots rules check
kTimedout, // Timeout in retrieving the robots rules
kDisallowedAfterTimeout, // Timeout got triggered already, and the resource
// was disallowed
};
enum class RulesReceiveState {
// Rules are not received yet, and rules receive timer is still running.
// This is the default startup state, and is a non-terminal state.
kTimerRunning,
// Rules are not received, and rules retrieval timeout happened.
kTimeout,
// Rules were received but parsing failed.
kParseFailed,
// Rules were received and are parsed successfully.
kSuccess,
};
// Callback to notify the check robot rules result.
......@@ -51,16 +67,19 @@ class RobotsRulesParser {
// processed immediately and called with th result.
void UpdateRobotsRules(const std::string& rules);
// Check whether the URL is allowed or disallowed by robots rules. |callback|
// will be called with the result. The callback could be immediate if rules
// are available. Otherwise the callback will be added to
// |pending_check_requests_| and called when a decision can be made like when
// rules are retrieved, or rule fetch timeout, etc.
// Check whether the URL is allowed or disallowed by robots rules. When the
// determination can be made immediately, the decision should be returned.
// Otherwise base::nullopt should be returned and the |callback| will be
// added to |pending_check_requests_| and called when a decision can be made
// like when rules are retrieved, or rule fetch timeout, etc.
// The robots rules check will make use of the |url| path and query
// parameters. The |url| origin, ref fragment, etc are immaterial.
void CheckRobotsRules(const GURL& url, CheckResultCallback callback);
// parameters.The |url| origin, ref fragment, etc are immaterial.
base::Optional<CheckResult> CheckRobotsRules(const GURL& url,
CheckResultCallback callback);
private:
friend class SubresourceRedirectRobotsRulesParserTest;
// Contains one robots.txt rule.
struct RobotsRule {
RobotsRule(bool is_allow_rule, const std::string& pattern)
......@@ -72,17 +91,21 @@ class RobotsRulesParser {
const std::string pattern_;
};
// Returns if allowed or disallowed by robots rules.
bool IsAllowed(const std::string& url_path) const;
// Returns the immediate result of whether the URL path is allowed or
// disallowed by robots rules. Should be called only when rules retrieval
// state is in a terminal state, i.e., rules receive timer is not running.
CheckResult CheckRobotsRulesImmediate(const std::string& url_path) const;
// Called on rules receive timeout. All pending checks for robots rules are
// notified that the timeout expired and the requests known to |this| are
// cleared.
void OnRulesReceiveTimeout();
// The list of robots rules. When this is empty, it could mean either the
// rules were not received yet, or rules parsing failed.
base::Optional<std::vector<RobotsRule>> robots_rules_;
// Current state of the rules retrieval.
RulesReceiveState rules_receive_state_;
// Ordered list of robots rules from longest to shortest.
std::vector<RobotsRule> robots_rules_;
// Contains the requests that are pending for robots rules to be received.
// Holds the URL path and the callback.
......
......@@ -9,6 +9,7 @@
#include "base/test/scoped_feature_list.h"
#include "base/test/task_environment.h"
#include "base/time/time.h"
#include "chrome/renderer/subresource_redirect/login_robots_decider_test_util.h"
#include "chrome/renderer/subresource_redirect/robots_rules_parser.h"
#include "components/data_reduction_proxy/proto/robots_rules.pb.h"
#include "testing/gtest/include/gtest/gtest.h"
......@@ -16,21 +17,8 @@
namespace subresource_redirect {
namespace {
constexpr char kTestOrigin[] = "https://test.com";
const bool kRuleTypeAllow = true;
const bool kRuleTypeDisallow = false;
struct Rule {
Rule(bool rule_type, std::string pattern)
: rule_type(rule_type), pattern(pattern) {}
bool rule_type;
std::string pattern;
};
class CheckResultReceiver {
public:
void OnCheckRobotsRulesResult(RobotsRulesParser::CheckResult check_result) {
......@@ -51,19 +39,6 @@ class CheckResultReceiver {
base::WeakPtrFactory<CheckResultReceiver> weak_ptr_factory_{this};
};
std::string GetRobotsRulesProtoString(const std::vector<Rule>& patterns) {
proto::RobotsRules robots_rules;
for (const auto& pattern : patterns) {
auto* new_rule = robots_rules.add_image_ordered_rules();
if (pattern.rule_type == kRuleTypeAllow) {
new_rule->set_allowed_pattern(pattern.pattern);
} else {
new_rule->set_disallowed_pattern(pattern.pattern);
}
}
return robots_rules.SerializeAsString();
}
class SubresourceRedirectRobotsRulesParserTest : public testing::Test {
public:
SubresourceRedirectRobotsRulesParserTest()
......@@ -75,26 +50,37 @@ class SubresourceRedirectRobotsRulesParserTest : public testing::Test {
void SetUpRobotsRules(const std::vector<Rule>& patterns) {
robots_rules_parser_.UpdateRobotsRules(GetRobotsRulesProtoString(patterns));
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kSuccess);
}
// Verify robots rules check result is received synchronously with the
// expected result.
void CheckRobotsRules(const std::string& url_path_with_query,
RobotsRulesParser::CheckResult expected_result) {
CheckResultReceiver result_receiver;
robots_rules_parser_.CheckRobotsRules(
auto result = robots_rules_parser_.CheckRobotsRules(
GURL(kTestOrigin + url_path_with_query), result_receiver.GetCallback());
EXPECT_TRUE(result_receiver.did_receive_result());
EXPECT_EQ(expected_result, result_receiver.check_result());
EXPECT_FALSE(result_receiver.did_receive_result());
EXPECT_EQ(expected_result, result);
}
// Verify robots rules check result is received asynchronously, and returns
// the receiver that can be used to check the result.
std::unique_ptr<CheckResultReceiver> CheckRobotsRulesAsync(
const std::string& url_path_with_query) {
auto result_receiver = std::make_unique<CheckResultReceiver>();
robots_rules_parser_.CheckRobotsRules(
EXPECT_FALSE(robots_rules_parser_.CheckRobotsRules(
GURL(kTestOrigin + url_path_with_query),
result_receiver->GetCallback());
result_receiver->GetCallback()));
return result_receiver;
}
void VerifyRulesReceiveState(
RobotsRulesParser::RulesReceiveState expected_rules_receive_state) {
EXPECT_EQ(robots_rules_parser_.rules_receive_state_,
expected_rules_receive_state);
}
void VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult result) {
histogram_tester().ExpectUniqueSample(
......@@ -122,7 +108,9 @@ class SubresourceRedirectRobotsRulesParserTest : public testing::Test {
TEST_F(SubresourceRedirectRobotsRulesParserTest,
InvalidProtoParseErrorDisallowsAllPaths) {
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimerRunning);
robots_rules_parser_.UpdateRobotsRules("INVALID PROTO");
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kParseFailed);
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult::
kParseError);
......@@ -138,6 +126,7 @@ TEST_F(SubresourceRedirectRobotsRulesParserTest,
}
TEST_F(SubresourceRedirectRobotsRulesParserTest, EmptyRulesAllowsAllPaths) {
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimerRunning);
SetUpRobotsRules({});
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult::kSuccess);
......@@ -154,15 +143,20 @@ TEST_F(SubresourceRedirectRobotsRulesParserTest, EmptyRulesAllowsAllPaths) {
TEST_F(SubresourceRedirectRobotsRulesParserTest,
RulesReceiveTimeoutDisallowsAllPaths) {
// Let the rule fetch timeout.
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimerRunning);
task_environment_.FastForwardBy(base::TimeDelta::FromSeconds(10));
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult::kTimeout);
// All url paths should be disallowed.
CheckRobotsRules("", RobotsRulesParser::CheckResult::kDisallowed);
CheckRobotsRules("/", RobotsRulesParser::CheckResult::kDisallowed);
CheckRobotsRules("/foo.jpg", RobotsRulesParser::CheckResult::kDisallowed);
CheckRobotsRules("/foo/bar.jpg", RobotsRulesParser::CheckResult::kDisallowed);
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimeout);
// All url paths should be disallowed due to timeout.
CheckRobotsRules("", RobotsRulesParser::CheckResult::kDisallowedAfterTimeout);
CheckRobotsRules("/",
RobotsRulesParser::CheckResult::kDisallowedAfterTimeout);
CheckRobotsRules("/foo.jpg",
RobotsRulesParser::CheckResult::kDisallowedAfterTimeout);
CheckRobotsRules("/foo/bar.jpg",
RobotsRulesParser::CheckResult::kDisallowedAfterTimeout);
VerifyTotalRobotsRulesApplyHistograms(0);
}
......@@ -175,6 +169,7 @@ TEST_F(SubresourceRedirectRobotsRulesParserTest,
VerifyTotalRobotsRulesApplyHistograms(0);
// Once the rules are received the callback should get called with the result.
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimerRunning);
SetUpRobotsRules({{kRuleTypeAllow, "/foo"}, {kRuleTypeDisallow, "/"}});
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult::kSuccess);
......@@ -237,6 +232,7 @@ TEST_F(SubresourceRedirectRobotsRulesParserTest,
task_environment_.FastForwardBy(base::TimeDelta::FromSeconds(10));
VerifyRobotsRulesReceiveResultHistogram(
RobotsRulesParser::SubresourceRedirectRobotsRulesReceiveResult::kTimeout);
VerifyRulesReceiveState(RobotsRulesParser::RulesReceiveState::kTimeout);
EXPECT_TRUE(receiver1->did_receive_result());
EXPECT_TRUE(receiver2->did_receive_result());
......@@ -376,6 +372,4 @@ TEST_F(SubresourceRedirectRobotsRulesParserTest, TestRulesAreCaseSensitive) {
VerifyTotalRobotsRulesApplyHistograms(6);
}
} // namespace
} // namespace subresource_redirect
......@@ -3824,6 +3824,8 @@ test("unit_tests") {
"../renderer/media/flash_embed_rewrite_unittest.cc",
"../renderer/net/net_error_helper_core_unittest.cc",
"../renderer/plugins/plugin_uma_unittest.cc",
"../renderer/subresource_redirect/login_robots_decider_test_util.cc",
"../renderer/subresource_redirect/login_robots_decider_test_util.h",
"../renderer/subresource_redirect/robots_rules_parser_unittest.cc",
"../renderer/subresource_redirect/subresource_redirect_util_unittest.cc",
"../renderer/v8_unwinder_unittest.cc",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment