Commit 4ea1eb42 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Add a fetcher for schema.org media feeds

Bug: 1056762
Change-Id: I81b58b6408712b8eb0293448a20b1f9f7164a629
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2095443
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarMartin Šrámek <msramek@chromium.org>
Reviewed-by: default avatarTommy Nyquist <nyquist@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#751503}
parent 03474222
......@@ -686,6 +686,8 @@ jumbo_static_library("browser") {
"media/cast_remoting_connector.h",
"media/feeds/media_feeds_contents_observer.cc",
"media/feeds/media_feeds_contents_observer.h",
"media/feeds/media_feeds_fetcher.cc",
"media/feeds/media_feeds_fetcher.h",
"media/feeds/media_feeds_service.cc",
"media/feeds/media_feeds_service.h",
"media/feeds/media_feeds_service_factory.cc",
......@@ -2130,6 +2132,8 @@ jumbo_static_library("browser") {
"//components/safe_browsing/core:public",
"//components/safe_search_api",
"//components/safe_search_api:safe_search_client",
"//components/schema_org:extractor",
"//components/schema_org/common:improved_mojom",
"//components/search",
"//components/search_engines",
"//components/search_provider_logos",
......
......@@ -208,6 +208,7 @@ include_rules = [
"+components/rlz",
"+components/safe_browsing",
"+components/safe_search_api",
"+components/schema_org",
"+components/search",
"+components/search_engines",
"+components/search_provider_logos",
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/media/feeds/media_feeds_fetcher.h"
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/extractor.h"
#include "net/base/net_errors.h"
#include "net/http/http_request_headers.h"
#include "net/http/http_status_code.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"
namespace media_feeds {
MediaFeedsFetcher::MediaFeedsFetcher(
scoped_refptr<::network::SharedURLLoaderFactory> url_loader_factory)
: url_loader_factory_(url_loader_factory) {}
MediaFeedsFetcher::~MediaFeedsFetcher() = default;
void MediaFeedsFetcher::FetchFeed(const GURL& url, MediaFeedCallback callback) {
DCHECK(thread_checker_.CalledOnValidThread());
net::NetworkTrafficAnnotationTag traffic_annotation =
net::DefineNetworkTrafficAnnotation("media_feeds", R"(
semantics {
sender: "Media Feeds Service"
description:
"Media Feeds service fetches a schema.org DataFeed object "
"containing Media Feed items used to provide recommendations to "
"the signed-in user. Feed data will be stored in the Media History "
"database."
trigger:
"Having a discovered feed that has not been fetched recently. "
"Feeds are discovered when the browser visits a page with a feed "
"link element in the header."
data: "User cookies."
destination: OTHER
destination_other: "Media providers which provide media feed data."
}
policy {
cookies_allowed: YES
cookies_store: "user"
setting:
"The feature is enabled by default. The user can disable "
"individual media feeds. The feature does not operate in "
"incognito mode."
chrome_policy {
SavingBrowserHistoryDisabled {
policy_options {mode: MANDATORY}
SavingBrowserHistoryDisabled: false
}
}
})");
auto resource_request = std::make_unique<::network::ResourceRequest>();
resource_request->url = url;
resource_request->method = net::HttpRequestHeaders::kGetMethod;
resource_request->headers.SetHeader(net::HttpRequestHeaders::kAccept,
"application/ld+json");
resource_request->redirect_mode = ::network::mojom::RedirectMode::kError;
resource_request->attach_same_site_cookies = true;
resource_request->site_for_cookies = net::SiteForCookies::FromUrl(url);
DCHECK(!pending_request_);
pending_request_ = network::SimpleURLLoader::Create(
std::move(resource_request), traffic_annotation);
pending_request_->SetAllowHttpErrorResults(true);
auto fetcher_callback =
base::BindOnce(&MediaFeedsFetcher::OnURLFetchComplete,
base::Unretained(this), url, std::move(callback));
pending_request_->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
url_loader_factory_.get(), std::move(fetcher_callback));
}
void MediaFeedsFetcher::OnURLFetchComplete(
const GURL& original_url,
MediaFeedCallback callback,
std::unique_ptr<std::string> feed_data) {
DCHECK(thread_checker_.CalledOnValidThread());
// The SimpleURLLoader will be deleted when the request is handled.
std::unique_ptr<const ::network::SimpleURLLoader> request =
std::move(pending_request_);
DCHECK(request);
if (request->NetError() != net::OK) {
std::move(callback).Run(nullptr, Status::kRequestFailed);
return;
}
int response_code = 0;
if (request->ResponseInfo() && request->ResponseInfo()->headers)
response_code = request->ResponseInfo()->headers->response_code();
if (response_code != net::HTTP_OK) {
std::move(callback).Run(nullptr, Status::kRequestFailed);
return;
}
if (!feed_data || feed_data->empty()) {
std::move(callback).Run(nullptr, Status::kNotFound);
return;
}
// Parse the received data.
schema_org::improved::mojom::EntityPtr parsed_entity =
schema_org::Extractor::Extract(*feed_data);
if (!parsed_entity) {
std::move(callback).Run(nullptr, Status::kInvalidFeedData);
return;
}
std::move(callback).Run(std::move(parsed_entity), Status::kOk);
}
} // namespace media_feeds
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_MEDIA_FEEDS_MEDIA_FEEDS_FETCHER_H_
#define CHROME_BROWSER_MEDIA_FEEDS_MEDIA_FEEDS_FETCHER_H_
#include "base/threading/thread_checker.h"
#include "components/schema_org/common/improved_metadata.mojom.h"
#include "url/gurl.h"
namespace network {
class SharedURLLoaderFactory;
class SimpleURLLoader;
} // namespace network
namespace media_feeds {
// Fetcher object to retrieve a Media Feed schema.org object from a URL.
class MediaFeedsFetcher {
public:
enum class Status {
kOk,
kRequestFailed,
kNotFound,
kInvalidFeedData,
};
using MediaFeedCallback =
base::OnceCallback<void(const schema_org::improved::mojom::EntityPtr&,
Status)>;
explicit MediaFeedsFetcher(
scoped_refptr<::network::SharedURLLoaderFactory> url_loader_factory);
~MediaFeedsFetcher();
void FetchFeed(const GURL& url, MediaFeedCallback callback);
private:
// Called when fetch request completes. Parses the received media feed
// data and dispatches them to callbacks stored in queue.
void OnURLFetchComplete(const GURL& original_url,
MediaFeedCallback callback,
std::unique_ptr<std::string> feed_data);
void ResolveAllCallbacks(schema_org::improved::mojom::EntityPtr response,
Status status);
const scoped_refptr<::network::SharedURLLoaderFactory> url_loader_factory_;
// Contains the current fetch request. Will only have a value while a request
// is pending, and will be reset by |OnURLFetchComplete| or if cancelled.
std::unique_ptr<::network::SimpleURLLoader> pending_request_;
base::ThreadChecker thread_checker_;
};
} // namespace media_feeds
#endif // CHROME_BROWSER_MEDIA_FEEDS_MEDIA_FEEDS_FETCHER_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/media/feeds/media_feeds_fetcher.h"
#include <memory>
#include "base/bind.h"
#include "base/test/bind_test_util.h"
#include "base/test/mock_callback.h"
#include "base/test/task_environment.h"
#include "chrome/test/base/chrome_render_view_host_test_harness.h"
#include "components/schema_org/common/metadata.mojom.h"
#include "content/public/browser/storage_partition.h"
#include "services/network/public/cpp/weak_wrapper_shared_url_loader_factory.h"
#include "services/network/test/test_url_loader_factory.h"
#include "services/network/test/test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
namespace media_feeds {
using testing::_;
const char kTestUrl[] = "https://www.google.com";
class MediaFeedsFetcherTest : public ChromeRenderViewHostTestHarness {
public:
MediaFeedsFetcherTest() = default;
~MediaFeedsFetcherTest() override = default;
MediaFeedsFetcherTest(const MediaFeedsFetcherTest& t) = delete;
MediaFeedsFetcherTest& operator=(const MediaFeedsFetcherTest&) = delete;
void SetUp() override {
ChromeRenderViewHostTestHarness::SetUp();
fetcher_ = std::make_unique<MediaFeedsFetcher>(
base::MakeRefCounted<::network::WeakWrapperSharedURLLoaderFactory>(
url_loader_factory()));
}
MediaFeedsFetcher* fetcher() { return fetcher_.get(); }
::network::TestURLLoaderFactory* url_loader_factory() {
return &url_loader_factory_;
}
bool RespondToFetch(
const std::string& response_body,
net::HttpStatusCode response_code = net::HttpStatusCode::HTTP_OK,
int net_error = net::OK) {
bool rv = url_loader_factory()->SimulateResponseForPendingRequest(
GURL(kTestUrl), ::network::URLLoaderCompletionStatus(net_error),
::network::CreateURLResponseHead(response_code), response_body);
task_environment()->RunUntilIdle();
return rv;
}
void WaitForRequest() {
task_environment()->RunUntilIdle();
ASSERT_TRUE(GetCurrentRequest().url.is_valid());
EXPECT_TRUE(GetCurrentRequest().attach_same_site_cookies);
EXPECT_TRUE(GetCurrentRequest().site_for_cookies.IsEquivalent(
net::SiteForCookies::FromUrl(GURL(kTestUrl))));
EXPECT_EQ(GetCurrentlyQueriedHeaderValue(net::HttpRequestHeaders::kAccept),
"application/ld+json");
EXPECT_EQ(GetCurrentRequest().redirect_mode,
::network::mojom::RedirectMode::kError);
EXPECT_EQ(net::HttpRequestHeaders::kGetMethod, GetCurrentRequest().method);
}
bool SetCookie(content::BrowserContext* browser_context,
const GURL& url,
const std::string& value) {
bool result = false;
base::RunLoop run_loop;
mojo::Remote<network::mojom::CookieManager> cookie_manager;
content::BrowserContext::GetDefaultStoragePartition(browser_context)
->GetNetworkContext()
->GetCookieManager(cookie_manager.BindNewPipeAndPassReceiver());
std::unique_ptr<net::CanonicalCookie> cc(net::CanonicalCookie::Create(
url, value, base::Time::Now(), base::nullopt /* server_time */));
EXPECT_TRUE(cc.get());
net::CookieOptions options;
options.set_include_httponly();
options.set_same_site_cookie_context(
net::CookieOptions::SameSiteCookieContext::SAME_SITE_STRICT);
cookie_manager->SetCanonicalCookie(
*cc.get(), url.scheme(), options,
base::BindOnce(
[](bool* result, base::RunLoop* run_loop,
net::CanonicalCookie::CookieInclusionStatus set_cookie_status) {
*result = set_cookie_status.IsInclude();
run_loop->Quit();
},
&result, &run_loop));
run_loop.Run();
return result;
}
private:
std::string GetCurrentlyQueriedHeaderValue(const base::StringPiece& key) {
std::string out;
GetCurrentRequest().headers.GetHeader(key, &out);
return out;
}
const ::network::ResourceRequest& GetCurrentRequest() {
return url_loader_factory()->pending_requests()->front().request;
}
::network::TestURLLoaderFactory url_loader_factory_;
std::unique_ptr<MediaFeedsFetcher> fetcher_;
};
TEST_F(MediaFeedsFetcherTest, SucceedsOnBasicFetch) {
GURL site_with_cookies(kTestUrl);
ASSERT_TRUE(SetCookie(profile(), site_with_cookies, "testing"));
base::MockCallback<MediaFeedsFetcher::MediaFeedCallback> callback;
schema_org::improved::mojom::EntityPtr expected =
schema_org::improved::mojom::Entity::New();
// TODO(sgbowen): Update this to CompleteDataFeed when this new type is added
// to the checked in schema.org file in
// //third_party/schema_org/schema.jsonld.
expected->type = "DataFeed";
schema_org::improved::mojom::PropertyPtr property =
schema_org::improved::mojom::Property::New();
property->name = "name";
property->values = schema_org::improved::mojom::Values::New();
property->values->string_values.push_back("Media Site");
expected->properties.push_back(std::move(property));
schema_org::improved::mojom::EntityPtr out;
fetcher()->FetchFeed(
GURL("https://www.google.com"),
base::BindLambdaForTesting(
[&](const schema_org::improved::mojom::EntityPtr& response,
MediaFeedsFetcher::Status status) {
EXPECT_EQ(status, MediaFeedsFetcher::Status::kOk);
out = response.Clone();
}));
WaitForRequest();
ASSERT_TRUE(
RespondToFetch("{\"@type\":\"DataFeed\",\"name\":\"Media Site\"}"));
EXPECT_EQ(out, expected);
}
TEST_F(MediaFeedsFetcherTest, ReturnsFailedResponseCode) {
base::MockCallback<MediaFeedsFetcher::MediaFeedCallback> callback;
fetcher()->FetchFeed(
GURL("https://www.google.com"),
base::BindLambdaForTesting(
[&](const schema_org::improved::mojom::EntityPtr& response,
MediaFeedsFetcher::Status status) {
EXPECT_EQ(status, MediaFeedsFetcher::Status::kRequestFailed);
EXPECT_FALSE(response);
}));
WaitForRequest();
ASSERT_TRUE(RespondToFetch("", net::HTTP_BAD_REQUEST));
}
TEST_F(MediaFeedsFetcherTest, ReturnsNetError) {
base::MockCallback<MediaFeedsFetcher::MediaFeedCallback> callback;
fetcher()->FetchFeed(
GURL("https://www.google.com"),
base::BindLambdaForTesting(
[&](const schema_org::improved::mojom::EntityPtr& response,
MediaFeedsFetcher::Status status) {
EXPECT_EQ(status, MediaFeedsFetcher::Status::kRequestFailed);
EXPECT_FALSE(response);
}));
WaitForRequest();
ASSERT_TRUE(RespondToFetch("", net::HTTP_OK, net::ERR_UNEXPECTED));
}
TEST_F(MediaFeedsFetcherTest, ReturnsErrFileNotFoundForEmptyFeedData) {
base::MockCallback<MediaFeedsFetcher::MediaFeedCallback> callback;
fetcher()->FetchFeed(
GURL("https://www.google.com"),
base::BindLambdaForTesting(
[&](const schema_org::improved::mojom::EntityPtr& response,
MediaFeedsFetcher::Status status) {
EXPECT_EQ(status, MediaFeedsFetcher::Status::kNotFound);
EXPECT_FALSE(response);
}));
WaitForRequest();
ASSERT_TRUE(RespondToFetch(""));
}
TEST_F(MediaFeedsFetcherTest, ReturnsErrFailedForBadEntityData) {
base::MockCallback<MediaFeedsFetcher::MediaFeedCallback> callback;
fetcher()->FetchFeed(
GURL("https://www.google.com"),
base::BindLambdaForTesting(
[&](const schema_org::improved::mojom::EntityPtr& response,
MediaFeedsFetcher::Status status) {
EXPECT_EQ(status, MediaFeedsFetcher::Status::kInvalidFeedData);
EXPECT_FALSE(response);
}));
WaitForRequest();
ASSERT_TRUE(RespondToFetch(
"{\"@type\":\"DataFeed\"\"name\":\"Bad json missing a comma\"}"));
}
} // namespace media_feeds
......@@ -3172,6 +3172,7 @@ test("unit_tests") {
"../browser/mac/keystone_glue_unittest.mm",
"../browser/media/android/router/media_router_android_unittest.cc",
"../browser/media/cast_mirroring_service_host_unittest.cc",
"../browser/media/feeds/media_feeds_fetcher_unittest.cc",
"../browser/media/feeds/media_feeds_service_unittest.cc",
"../browser/media/history/media_history_keyed_service_unittest.cc",
"../browser/media/history/media_history_store_unittest.cc",
......@@ -3687,6 +3688,7 @@ test("unit_tests") {
"//components/safe_browsing/core:features",
"//components/safe_browsing/core/db",
"//components/safe_browsing/core/db:test_database_manager",
"//components/schema_org/common:improved_mojom",
"//components/services/patch/content",
"//components/services/unzip/content",
"//components/spellcheck:buildflags",
......
......@@ -73,6 +73,7 @@ static_library("extractor") {
]
deps = [
"//base",
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org",
"//components/schema_org:schema_org_properties",
......
......@@ -10,7 +10,9 @@
#include <utility>
#include <vector>
#include "base/containers/flat_set.h"
#include "base/json/json_parser.h"
#include "base/no_destructor.h"
#include "base/strings/string_number_conversions.h"
#include "base/values.h"
#include "components/schema_org/common/improved_metadata.mojom.h"
......@@ -49,18 +51,19 @@ using improved::mojom::PropertyPtr;
using improved::mojom::Values;
using improved::mojom::ValuesPtr;
const std::unordered_set<std::string> kSupportedTypes{
entity::kVideoObject, entity::kMovie, entity::kTVEpisode, entity::kTVSeason,
entity::kTVSeries};
bool IsSupportedType(const std::string& type) {
return kSupportedTypes.find(type) != kSupportedTypes.end();
static const base::NoDestructor<base::flat_set<base::StringPiece>>
kSupportedTypes(base::flat_set<base::StringPiece>(
{entity::kVideoObject, entity::kMovie, entity::kTVEpisode,
entity::kTVSeason, entity::kTVSeries, entity::kDataFeed}));
return kSupportedTypes->find(type) != kSupportedTypes->end();
}
void ExtractEntity(const base::DictionaryValue&, Entity*, int recursion_level);
// Parses a string into a property value. The string may be parsed as a double,
// date, or time, depending on the types that the property supports. If the
// property supports text, uses the string itself.
// Parses a string into a property value. The string may be parsed as a
// double, date, or time, depending on the types that the property supports.
// If the property supports text, uses the string itself.
bool ParseStringValue(const std::string& property_type,
base::StringPiece value,
Values* values) {
......@@ -238,7 +241,8 @@ void ExtractEntity(const base::DictionaryValue& val,
}
}
// Extract a JSONObject which corresponds to a single (possibly nested) entity.
// Extract a JSONObject which corresponds to a single (possibly nested)
// entity.
EntityPtr ExtractTopLevelEntity(const base::DictionaryValue& val) {
EntityPtr entity = Entity::New();
std::string type;
......
......@@ -154,6 +154,7 @@ Refer to README.md for content description and update process.
<item id="logo_service" hash_code="35473769" type="0" content_hash_code="20271299" os_list="linux,windows" file_path="components/search_provider_logos/logo_service_impl.cc"/>
<item id="logo_tracker" hash_code="36859107" type="0" deprecated="2018-12-07" content_hash_code="67588075" file_path=""/>
<item id="lookup_single_password_leak" hash_code="16927377" type="0" content_hash_code="12158296" os_list="linux,windows" file_path="components/password_manager/core/browser/leak_detection/leak_detection_request.cc"/>
<item id="media_feeds" hash_code="116778918" type="0" content_hash_code="48909601" os_list="linux,windows" file_path="chrome/browser/media/feeds/media_feeds_fetcher.cc"/>
<item id="media_router_global_media_controls_image" hash_code="95983790" type="0" content_hash_code="48851217" os_list="linux,windows" file_path="chrome/browser/ui/global_media_controls/cast_media_notification_item.cc"/>
<item id="metrics_report_ukm" hash_code="727478" type="0" content_hash_code="8746987" os_list="linux,windows" file_path="components/metrics/net/net_metrics_log_uploader.cc"/>
<item id="metrics_report_uma" hash_code="727528" type="0" content_hash_code="10176197" os_list="linux,windows" file_path="components/metrics/net/net_metrics_log_uploader.cc"/>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment