Commit 599af13d authored by rajendrant's avatar rajendrant Committed by Commit Bot

Introduce robots rules from LitePages

This CL adds a fetcher that fetches robots rules from LitePages.

TBR=dullweber@chromium.org

Bug: 1147565
Change-Id: If7a00bf83d387d035f890e6aacd1da430672f80c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2536755
Commit-Queue: rajendrant <rajendrant@chromium.org>
Reviewed-by: default avatarChristian Dullweber <dullweber@chromium.org>
Reviewed-by: default avatarTarun Bansal <tbansal@chromium.org>
Reviewed-by: default avatarRobert Ogden <robertogden@chromium.org>
Cr-Commit-Position: refs/heads/master@{#828168}
parent a9380dcd
......@@ -1753,6 +1753,8 @@ static_library("browser") {
"subresource_redirect/https_image_compression_bypass_decider.h",
"subresource_redirect/https_image_compression_infobar_decider.cc",
"subresource_redirect/https_image_compression_infobar_decider.h",
"subresource_redirect/origin_robots_rules.cc",
"subresource_redirect/origin_robots_rules.h",
"subresource_redirect/subresource_redirect_observer.cc",
"subresource_redirect/subresource_redirect_observer.h",
"subresource_redirect/subresource_redirect_util.cc",
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/subresource_redirect/origin_robots_rules.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "chrome/browser/subresource_redirect/subresource_redirect_util.h"
#include "components/variations/net/variations_http_headers.h"
#include "net/http/http_request_headers.h"
#include "net/http/http_response_headers.h"
#include "net/http/http_status_code.h"
#include "net/traffic_annotation/network_traffic_annotation.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"
namespace subresource_redirect {
OriginRobotsRules::FetcherInfo::FetcherInfo(
std::unique_ptr<network::SimpleURLLoader> url_loader,
OriginRobotsRules::NotifyResponseErrorCallback response_error_callback)
: url_loader(std::move(url_loader)),
response_error_callback(std::move(response_error_callback)) {}
OriginRobotsRules::FetcherInfo::~FetcherInfo() = default;
OriginRobotsRules::OriginRobotsRules(
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
const url::SchemeHostPort& origin,
NotifyResponseErrorCallback response_error_callback) {
net::NetworkTrafficAnnotationTag traffic_annotation =
net::DefineNetworkTrafficAnnotation("litepages_robots_rules",
R"(
semantics {
sender: "LitePages"
description:
"Requests robots.txt rules from the LitePages robots.txt Service "
"to use in providing data saving optimizations for Chrome."
trigger:
"Requested for each unique origin for the images contained in the "
"page, and cached for certain period."
data: "A list of allowed and disallowed robots.txt path patterns"
destination: GOOGLE_OWNED_SERVICE
}
policy {
cookies_allowed: NO
setting:
"Users can control Lite mode on Android via 'Lite mode' setting."
chrome_policy {
DataCompressionProxyEnabled {
DataCompressionProxyEnabled: false
}
}
})");
auto resource_request = std::make_unique<network::ResourceRequest>();
resource_request->url = GetRobotsServerURL(origin);
resource_request->method = "GET";
resource_request->credentials_mode = network::mojom::CredentialsMode::kOmit;
auto url_loader = variations::CreateSimpleURLLoaderWithVariationsHeader(
std::move(resource_request), variations::InIncognito::kNo,
variations::SignedIn::kNo, traffic_annotation);
// url_loader should retry on network changes, but no retries on other
// failures such as 5xx errors.
url_loader->SetRetryOptions(
1 /* max_retries */, network::SimpleURLLoader::RETRY_ON_NETWORK_CHANGE);
url_loader->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
url_loader_factory.get(),
base::BindOnce(&OriginRobotsRules::OnURLLoadComplete,
base::Unretained(this)));
fetcher_info_ = std::make_unique<FetcherInfo>(
std::move(url_loader), std::move(response_error_callback));
}
OriginRobotsRules::~OriginRobotsRules() = default;
void OriginRobotsRules::GetRobotsRules(RobotsRulesReceivedCallback callback) {
if (fetcher_info_) {
// Robots rules fetch is still in progress.
fetcher_info_->pending_callbacks.push_back(std::move(callback));
return;
}
std::move(callback).Run(robots_rules_);
}
void OriginRobotsRules::OnURLLoadComplete(
std::unique_ptr<std::string> response_body) {
const auto response_headers =
fetcher_info_->url_loader->ResponseInfo()
? fetcher_info_->url_loader->ResponseInfo()->headers
: nullptr;
int response_code =
response_headers ? response_code = response_headers->response_code() : -1;
int net_error = fetcher_info_->url_loader->NetError();
UMA_HISTOGRAM_BOOLEAN(
"SubresourceRedirect.RobotsRulesFetcher.CacheHit",
fetcher_info_->url_loader->ResponseInfo()
? fetcher_info_->url_loader->ResponseInfo()->was_fetched_via_cache
: false);
base::UmaHistogramSparse(
"SubresourceRedirect.RobotsRulesFetcher.NetErrorCode", -net_error);
if (response_code != -1) {
UMA_HISTOGRAM_ENUMERATION(
"SubresourceRedirect.RobotsRulesFetcher.ResponseCode",
static_cast<net::HttpStatusCode>(response_code),
net::HTTP_VERSION_NOT_SUPPORTED);
}
// Treat 4xx, 5xx as failures
if (response_code >= 400 && response_code <= 599) {
std::string retry_after_string;
base::TimeDelta retry_after;
if (response_headers &&
response_headers->EnumerateHeader(nullptr, "Retry-After",
&retry_after_string) &&
net::HttpUtil::ParseRetryAfterHeader(retry_after_string,
base::Time::Now(), &retry_after)) {
std::move(fetcher_info_->response_error_callback)
.Run(response_code, retry_after);
} else {
std::move(fetcher_info_->response_error_callback)
.Run(response_code, base::TimeDelta());
}
}
if (response_body && net_error == net::OK &&
(response_code == net::HTTP_OK ||
response_code == net::HTTP_NOT_MODIFIED)) {
robots_rules_ = *response_body;
}
for (auto& callback : fetcher_info_->pending_callbacks)
std::move(callback).Run(robots_rules_);
fetcher_info_.reset();
}
} // namespace subresource_redirect
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_SUBRESOURCE_REDIRECT_ORIGIN_ROBOTS_RULES_H_
#define CHROME_BROWSER_SUBRESOURCE_REDIRECT_ORIGIN_ROBOTS_RULES_H_
#include <memory>
#include <string>
#include <vector>
#include "base/callback.h"
#include "base/macros.h"
#include "base/memory/scoped_refptr.h"
#include "base/optional.h"
#include "url/scheme_host_port.h"
namespace network {
class SharedURLLoaderFactory;
class SimpleURLLoader;
} // namespace network
namespace subresource_redirect {
// Holds the robots rules for one origin. Fetches the robots rules on creation.
class OriginRobotsRules {
public:
// The callback to send the received robots rules. base::nullopt will be sent
// when rule fetch fails.
using RobotsRulesReceivedCallback =
base::OnceCallback<void(base::Optional<std::string>)>;
// The callback to notify 4xx, 5xx response codes. Sends the response code and
// retry-after response header.
using NotifyResponseErrorCallback =
base::OnceCallback<void(int, base::TimeDelta)>;
OriginRobotsRules(
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
const url::SchemeHostPort& origin,
NotifyResponseErrorCallback response_error_callback);
~OriginRobotsRules();
// Get the robots rules for this origin. The callback is called immediately if
// rules have been fetched. When rules fetch is in progress, the callback will
// happen after it is complete.
void GetRobotsRules(RobotsRulesReceivedCallback callback);
private:
// Holds the info pertaining to when robots rules are fetched.
struct FetcherInfo {
FetcherInfo(std::unique_ptr<network::SimpleURLLoader> url_loader,
NotifyResponseErrorCallback response_error_callback);
~FetcherInfo();
// Holds the URLLoader when robots rules are fetched.
std::unique_ptr<network::SimpleURLLoader> url_loader;
// Contains the requests that are pending for robots rules to be received.
std::vector<RobotsRulesReceivedCallback> pending_callbacks;
// Callback to notify response errors.
NotifyResponseErrorCallback response_error_callback;
};
// URL loader completion callback.
void OnURLLoadComplete(std::unique_ptr<std::string> response_body);
// The received robots rules. Set when rules fetch completes successfully.
base::Optional<std::string> robots_rules_;
// Holds the robots rules fetcher state. Exists only when fetch is in
// progress.
std::unique_ptr<FetcherInfo> fetcher_info_;
};
} // namespace subresource_redirect
#endif // CHROME_BROWSER_SUBRESOURCE_REDIRECT_ORIGIN_ROBOTS_RULES_H_
......@@ -11,6 +11,7 @@
#include "chrome/browser/subresource_redirect/https_image_compression_infobar_decider.h"
#include "components/data_reduction_proxy/core/browser/data_reduction_proxy_settings.h"
#include "content/public/browser/web_contents.h"
#include "net/base/escape.h"
#include "third_party/blink/public/common/features.h"
#if defined(OS_ANDROID)
......@@ -121,4 +122,27 @@ void NotifyCompressedImageFetchFailed(content::WebContents* web_contents,
->NotifyCompressedImageFetchFailed(retry_after);
}
GURL GetRobotsServerURL(const url::SchemeHostPort& origin) {
DCHECK(ShouldEnableLoginRobotsCheckedCompression());
DCHECK(origin.IsValid());
auto lite_page_robots_origin = base::GetFieldTrialParamValueByFeature(
blink::features::kSubresourceRedirect, "lite_page_robots_origin");
GURL lite_page_robots_url(lite_page_robots_origin.empty()
? "https://litepages.googlezip.net/"
: lite_page_robots_origin);
std::string query_str =
"u=" +
net::EscapeQueryParamValue(origin.GetURL().spec(), true /* use_plus */);
GURL::Replacements replacements;
replacements.SetPathStr("/robots");
replacements.SetQueryStr(query_str);
lite_page_robots_url = lite_page_robots_url.ReplaceComponents(replacements);
DCHECK(lite_page_robots_url.is_valid());
return lite_page_robots_url;
}
} // namespace subresource_redirect
......@@ -7,6 +7,8 @@
#include "base/macros.h"
#include "base/time/time.h"
#include "url/gurl.h"
#include "url/scheme_host_port.h"
namespace content {
class NavigationHandle;
......@@ -42,6 +44,10 @@ bool ShowInfoBarAndGetImageCompressionState(
void NotifyCompressedImageFetchFailed(content::WebContents* web_contents,
base::TimeDelta retry_after);
// Returns the LitePages robots rules server endpoint URL to fetch for the given
// |origin|.
GURL GetRobotsServerURL(const url::SchemeHostPort& origin);
} // namespace subresource_redirect
#endif // CHROME_BROWSER_SUBRESOURCE_REDIRECT_SUBRESOURCE_REDIRECT_UTIL_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/test/scoped_feature_list.h"
#include "chrome/browser/subresource_redirect/subresource_redirect_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/common/features.h"
namespace subresource_redirect {
namespace {
// Naive function that escapes :, / characters in URL. Useful for simple tests.
std::string EscapeURLForQueryParam(std::string url) {
base::ReplaceChars(url, ":", base::StringPrintf("%%%0X", ':'), &url);
base::ReplaceChars(url, "/", base::StringPrintf("%%%0X", '/'), &url);
return url;
}
} // namespace
TEST(SubresourceRedirectUtilTest, GetRobotsServerURL) {
base::test::ScopedFeatureList scoped_feature_list;
scoped_feature_list.InitWithFeaturesAndParameters(
{{blink::features::kSubresourceRedirect,
{{"enable_login_robots_based_compression", "true"},
{"enable_public_image_hints_based_compression", "false"}}}},
{});
for (auto* origin :
{"https://foo.com/", "https://www.foo.com/", "http://foo.com/"}) {
EXPECT_EQ(GURL("https://litepages.googlezip.net/robots?u=" +
EscapeURLForQueryParam(origin)),
GetRobotsServerURL(url::SchemeHostPort(GURL(origin))));
}
}
TEST(SubresourceRedirectUtilTest, GetRobotsServerURL_ModifiedLitePagesOrigin) {
base::test::ScopedFeatureList scoped_feature_list;
scoped_feature_list.InitWithFeaturesAndParameters(
{{blink::features::kSubresourceRedirect,
{{"enable_login_robots_based_compression", "true"},
{"enable_public_image_hints_based_compression", "false"},
{"lite_page_robots_origin", "https://modified.litepages.com/"}}}},
{});
for (auto* origin :
{"https://foo.com/", "https://www.foo.com/", "http://foo.com/"}) {
EXPECT_EQ(GURL("https://modified.litepages.com/robots?u=" +
EscapeURLForQueryParam(origin)),
GetRobotsServerURL(url::SchemeHostPort(GURL(origin))));
}
}
} // namespace subresource_redirect
......@@ -3665,6 +3665,8 @@ test("unit_tests") {
"../browser/signin/e2e_tests/test_accounts_util_unittest.cc",
"../browser/subresource_redirect/https_image_compression_bypass_decider_unittest.cc",
"../browser/subresource_redirect/https_image_compression_infobar_decider_unittest.cc",
"../browser/subresource_redirect/origin_robots_rules_unittest.cc",
"../browser/subresource_redirect/subresource_redirect_util_unit_test.cc",
# TODO(hashimoto): those tests should be componentized and moved to
# //components:components_unittests, http://crbug.com/527882.
......
......@@ -606,6 +606,36 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
</summary>
</histogram>
<histogram name="SubresourceRedirect.RobotsRulesFetcher.CacheHit" units="ms"
expires_after="M92">
<owner>rajendrant@chromium.org</owner>
<owner>mcrouse@chromium.org</owner>
<summary>
Records whether LitePages robots rules was fetched from Chrome network
cache. Recorded for every fetch.
</summary>
</histogram>
<histogram name="SubresourceRedirect.RobotsRulesFetcher.NetErrorCode"
enum="NetErrorCodes" expires_after="M92">
<owner>rajendrant@chromium.org</owner>
<owner>mcrouse@chromium.org</owner>
<summary>
Net error codes for the LitePages robots rules fetcher. Recorded on every
fetch after completion or failure.
</summary>
</histogram>
<histogram name="SubresourceRedirect.RobotsRulesFetcher.ResponseCode"
units="ms" expires_after="M92">
<owner>rajendrant@chromium.org</owner>
<owner>mcrouse@chromium.org</owner>
<summary>
HTTP response code for the LitePages robots rules fetcher. Recorded on every
fetch after completion or failure.
</summary>
</histogram>
</histograms>
</histogram-configuration>
......@@ -167,6 +167,7 @@ Refer to README.md for content description and update process.
<item id="kaleidoscope_service" added_in_milestone="87" hash_code="49759694" type="0" content_hash_code="14307563" os_list="linux,windows" file_path="chrome/browser/media/kaleidoscope/kaleidoscope_service.cc"/>
<item id="kids_chrome_management_client_classify_url" added_in_milestone="77" hash_code="109987793" type="0" deprecated="2019-07-30" content_hash_code="112740597" file_path=""/>
<item id="lib_address_input" added_in_milestone="62" hash_code="50816767" type="0" content_hash_code="57977576" os_list="linux,windows" file_path="third_party/libaddressinput/chromium/chrome_metadata_source.cc"/>
<item id="litepages_robots_rules" added_in_milestone="89" hash_code="50910588" type="0" content_hash_code="72567080" os_list="linux,windows" file_path="chrome/browser/subresource_redirect/origin_robots_rules.cc"/>
<item id="load_autofill_gstatic_data" added_in_milestone="78" hash_code="119416099" type="0" content_hash_code="8433621" os_list="linux,windows" file_path="chrome/browser/autofill/autofill_gstatic_reader.cc"/>
<item id="logo_service" added_in_milestone="73" hash_code="35473769" type="0" content_hash_code="20271299" os_list="linux,windows" file_path="components/search_provider_logos/logo_service_impl.cc"/>
<item id="logo_tracker" added_in_milestone="62" hash_code="36859107" type="0" deprecated="2018-12-07" content_hash_code="67588075" file_path=""/>
......
......@@ -297,6 +297,7 @@ hidden="true" so that these annotations don't show up in the document.
<traffic_annotation unique_id="optimization_guide_model"/>
<traffic_annotation unique_id="optimization_guide_model_download"/>
<traffic_annotation unique_id="previews_litepage_prober"/>
<traffic_annotation unique_id="litepages_robots_rules"/>
</sender>
<sender name="Network">
<traffic_annotation unique_id="network_time_component"/>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment