Commit 07e92a56 authored by cjhopman@chromium.org's avatar cjhopman@chromium.org

Pull DomDistillerOptions up to the DistillerFactory

This allows the creator of the DistillerFactory to set the
DomDistillerOptions for all pages distilled by Distillers from that
factory.

Add an option to the content_extractor to extract just the text from the
page.

DEPENDSON= https://codereview.chromium.org/270663005/ https://codereview.chromium.org/286453002/

Review URL: https://codereview.chromium.org/286583002

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@272431 0039d316-1c4b-4281-b951-d872f2087c98
parent 634869e4
...@@ -63,7 +63,8 @@ KeyedService* DomDistillerServiceFactory::BuildServiceInstanceFor( ...@@ -63,7 +63,8 @@ KeyedService* DomDistillerServiceFactory::BuildServiceInstanceFor(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
new DistillerURLFetcherFactory(profile->GetRequestContext())); new DistillerURLFetcherFactory(profile->GetRequestContext()));
scoped_ptr<DistillerFactory> distiller_factory( scoped_ptr<DistillerFactory> distiller_factory(
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass())); new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(),
dom_distiller::proto::DomDistillerOptions()));
DomDistillerContextKeyedService* service = DomDistillerContextKeyedService* service =
new DomDistillerContextKeyedService( new DomDistillerContextKeyedService(
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
], ],
'export_dependent_settings': [ 'export_dependent_settings': [
'distilled_page_proto', 'distilled_page_proto',
'../third_party/dom_distiller_js/dom_distiller_js.gyp:dom_distiller_js_proto',
], ],
'sources': [ 'sources': [
'dom_distiller/android/component_jni_registrar.cc', 'dom_distiller/android/component_jni_registrar.cc',
......
...@@ -36,6 +36,7 @@ class DistillerPageWebContentsTest : public ContentBrowserTest { ...@@ -36,6 +36,7 @@ class DistillerPageWebContentsTest : public ContentBrowserTest {
quit_closure_ = quit_closure; quit_closure_ = quit_closure;
distiller_page_->DistillPage( distiller_page_->DistillPage(
embedded_test_server()->GetURL(url), embedded_test_server()->GetURL(url),
dom_distiller::proto::DomDistillerOptions(),
base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished, base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
this)); this));
} }
......
...@@ -29,15 +29,17 @@ const size_t kMaxPagesInArticle = 32; ...@@ -29,15 +29,17 @@ const size_t kMaxPagesInArticle = 32;
namespace dom_distiller { namespace dom_distiller {
DistillerFactoryImpl::DistillerFactoryImpl( DistillerFactoryImpl::DistillerFactoryImpl(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) { const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()),
dom_distiller_options_(dom_distiller_options) {
} }
DistillerFactoryImpl::~DistillerFactoryImpl() {} DistillerFactoryImpl::~DistillerFactoryImpl() {}
scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
scoped_ptr<DistillerImpl> distiller( scoped_ptr<DistillerImpl> distiller(new DistillerImpl(
new DistillerImpl(*distiller_url_fetcher_factory_)); *distiller_url_fetcher_factory_, dom_distiller_options_));
return distiller.PassAs<Distiller>(); return distiller.PassAs<Distiller>();
} }
...@@ -46,8 +48,10 @@ DistillerImpl::DistilledPageData::DistilledPageData() {} ...@@ -46,8 +48,10 @@ DistillerImpl::DistilledPageData::DistilledPageData() {}
DistillerImpl::DistilledPageData::~DistilledPageData() {} DistillerImpl::DistilledPageData::~DistilledPageData() {}
DistillerImpl::DistillerImpl( DistillerImpl::DistillerImpl(
const DistillerURLFetcherFactory& distiller_url_fetcher_factory) const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory), : distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
dom_distiller_options_(dom_distiller_options),
max_pages_in_article_(kMaxPagesInArticle), max_pages_in_article_(kMaxPagesInArticle),
destruction_allowed_(true), destruction_allowed_(true),
weak_factory_(this) { weak_factory_(this) {
...@@ -120,6 +124,7 @@ void DistillerImpl::DistillNextPage() { ...@@ -120,6 +124,7 @@ void DistillerImpl::DistillNextPage() {
started_pages_index_[page_num] = pages_.size() - 1; started_pages_index_[page_num] = pages_.size() - 1;
distiller_page_->DistillPage( distiller_page_->DistillPage(
url, url,
dom_distiller_options_,
base::Bind(&DistillerImpl::OnPageDistillationFinished, base::Bind(&DistillerImpl::OnPageDistillationFinished,
weak_factory_.GetWeakPtr(), weak_factory_.GetWeakPtr(),
page_num, page_num,
......
...@@ -56,19 +56,22 @@ class DistillerFactory { ...@@ -56,19 +56,22 @@ class DistillerFactory {
class DistillerFactoryImpl : public DistillerFactory { class DistillerFactoryImpl : public DistillerFactory {
public: public:
DistillerFactoryImpl( DistillerFactoryImpl(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
virtual ~DistillerFactoryImpl(); virtual ~DistillerFactoryImpl();
virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
private: private:
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
}; };
// Distills a article from a page and associated pages. // Distills a article from a page and associated pages.
class DistillerImpl : public Distiller { class DistillerImpl : public Distiller {
public: public:
DistillerImpl( DistillerImpl(
const DistillerURLFetcherFactory& distiller_url_fetcher_factory); const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
virtual ~DistillerImpl(); virtual ~DistillerImpl();
virtual void DistillPage( virtual void DistillPage(
...@@ -147,6 +150,8 @@ class DistillerImpl : public Distiller { ...@@ -147,6 +150,8 @@ class DistillerImpl : public Distiller {
const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
scoped_ptr<DistillerPage> distiller_page_; scoped_ptr<DistillerPage> distiller_page_;
dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
DistillationFinishedCallback finished_cb_; DistillationFinishedCallback finished_cb_;
DistillationUpdateCallback update_cb_; DistillationUpdateCallback update_cb_;
......
...@@ -58,14 +58,15 @@ DistillerPage::DistillerPage() : ready_(true) {} ...@@ -58,14 +58,15 @@ DistillerPage::DistillerPage() : ready_(true) {}
DistillerPage::~DistillerPage() {} DistillerPage::~DistillerPage() {}
void DistillerPage::DistillPage(const GURL& gurl, void DistillerPage::DistillPage(
const GURL& gurl,
const dom_distiller::proto::DomDistillerOptions options,
const DistillerPageCallback& callback) { const DistillerPageCallback& callback) {
DCHECK(ready_); DCHECK(ready_);
// It is only possible to distill one page at a time. |ready_| is reset when // It is only possible to distill one page at a time. |ready_| is reset when
// the callback to OnDistillationDone happens. // the callback to OnDistillationDone happens.
ready_ = false; ready_ = false;
distiller_page_callback_ = callback; distiller_page_callback_ = callback;
dom_distiller::proto::DomDistillerOptions options;
DistillPageImpl(gurl, GetDistillerScriptWithOptions(options)); DistillPageImpl(gurl, GetDistillerScriptWithOptions(options));
} }
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "base/memory/scoped_ptr.h" #include "base/memory/scoped_ptr.h"
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "base/values.h" #include "base/values.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "url/gurl.h" #include "url/gurl.h"
namespace dom_distiller { namespace dom_distiller {
...@@ -44,9 +45,11 @@ class DistillerPage { ...@@ -44,9 +45,11 @@ class DistillerPage {
// Loads a URL. |OnDistillationDone| is called when the load completes or // Loads a URL. |OnDistillationDone| is called when the load completes or
// fails. May be called when the distiller is idle. Callers can assume that, // fails. May be called when the distiller is idle. Callers can assume that,
// for a given |url|, any DistillerPage implementation will extract the same // for a given |url| and |options|, any DistillerPage implementation will
// content. // extract the same content.
void DistillPage(const GURL& url, const DistillerPageCallback& callback); void DistillPage(const GURL& url,
const dom_distiller::proto::DomDistillerOptions options,
const DistillerPageCallback& callback);
// Called when the JavaScript execution completes. |page_url| is the url of // Called when the JavaScript execution completes. |page_url| is the url of
// the distilled page. |value| contains data returned by the script. // the distilled page. |value| contains data returned by the script.
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "content/public/test/content_browser_test.h" #include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h" #include "content/shell/browser/shell.h"
#include "net/dns/mock_host_resolver.h" #include "net/dns/mock_host_resolver.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "ui/base/resource/resource_bundle.h" #include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest; using content::ContentBrowserTest;
...@@ -43,6 +44,8 @@ const char* kOutputFile = "output-file"; ...@@ -43,6 +44,8 @@ const char* kOutputFile = "output-file";
// output. // output.
const char* kShouldOutputBinary = "output-binary"; const char* kShouldOutputBinary = "output-binary";
const char* kExtractTextOnly = "extract-text-only";
scoped_ptr<DomDistillerService> CreateDomDistillerService( scoped_ptr<DomDistillerService> CreateDomDistillerService(
content::BrowserContext* context, content::BrowserContext* context,
const base::FilePath& db_path) { const base::FilePath& db_path) {
...@@ -61,8 +64,13 @@ scoped_ptr<DomDistillerService> CreateDomDistillerService( ...@@ -61,8 +64,13 @@ scoped_ptr<DomDistillerService> CreateDomDistillerService(
new DistillerPageWebContentsFactory(context)); new DistillerPageWebContentsFactory(context));
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
new DistillerURLFetcherFactory(context->GetRequestContext())); new DistillerURLFetcherFactory(context->GetRequestContext()));
dom_distiller::proto::DomDistillerOptions options;
if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) {
options.set_extract_text_only(true);
}
scoped_ptr<DistillerFactory> distiller_factory( scoped_ptr<DistillerFactory> distiller_factory(
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass())); new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(), options));
return scoped_ptr<DomDistillerService>(new DomDistillerService( return scoped_ptr<DomDistillerService>(new DomDistillerService(
dom_distiller_store.PassAs<DomDistillerStoreInterface>(), dom_distiller_store.PassAs<DomDistillerStoreInterface>(),
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
'proto_in_dir': 'package/proto', 'proto_in_dir': 'package/proto',
'proto_out_dir': 'third_party/dom_distiller_js', 'proto_out_dir': 'third_party/dom_distiller_js',
}, },
'all_dependent_settings': { 'direct_dependent_settings': {
'include_dirs': ['package/proto_gen'], 'include_dirs': ['package/proto_gen'],
}, },
'includes': [ '../../build/protoc.gypi', ], 'includes': [ '../../build/protoc.gypi', ],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment