Commit 07e92a56 authored by cjhopman@chromium.org's avatar cjhopman@chromium.org

Pull DomDistillerOptions up to the DistillerFactory

This allows the creator of the DistillerFactory to set the
DomDistillerOptions for all pages distilled by Distillers from that
factory.

Add an option to the content_extractor to extract just the text from the
page.

DEPENDSON= https://codereview.chromium.org/270663005/ https://codereview.chromium.org/286453002/

Review URL: https://codereview.chromium.org/286583002

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@272431 0039d316-1c4b-4281-b951-d872f2087c98
parent 634869e4
......@@ -63,7 +63,8 @@ KeyedService* DomDistillerServiceFactory::BuildServiceInstanceFor(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
new DistillerURLFetcherFactory(profile->GetRequestContext()));
scoped_ptr<DistillerFactory> distiller_factory(
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass()));
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(),
dom_distiller::proto::DomDistillerOptions()));
DomDistillerContextKeyedService* service =
new DomDistillerContextKeyedService(
......
......@@ -48,6 +48,7 @@
],
'export_dependent_settings': [
'distilled_page_proto',
'../third_party/dom_distiller_js/dom_distiller_js.gyp:dom_distiller_js_proto',
],
'sources': [
'dom_distiller/android/component_jni_registrar.cc',
......
......@@ -36,6 +36,7 @@ class DistillerPageWebContentsTest : public ContentBrowserTest {
quit_closure_ = quit_closure;
distiller_page_->DistillPage(
embedded_test_server()->GetURL(url),
dom_distiller::proto::DomDistillerOptions(),
base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
this));
}
......
......@@ -29,15 +29,17 @@ const size_t kMaxPagesInArticle = 32;
namespace dom_distiller {
DistillerFactoryImpl::DistillerFactoryImpl(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()),
dom_distiller_options_(dom_distiller_options) {
}
DistillerFactoryImpl::~DistillerFactoryImpl() {}
scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
scoped_ptr<DistillerImpl> distiller(
new DistillerImpl(*distiller_url_fetcher_factory_));
scoped_ptr<DistillerImpl> distiller(new DistillerImpl(
*distiller_url_fetcher_factory_, dom_distiller_options_));
return distiller.PassAs<Distiller>();
}
......@@ -46,8 +48,10 @@ DistillerImpl::DistilledPageData::DistilledPageData() {}
DistillerImpl::DistilledPageData::~DistilledPageData() {}
DistillerImpl::DistillerImpl(
const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
dom_distiller_options_(dom_distiller_options),
max_pages_in_article_(kMaxPagesInArticle),
destruction_allowed_(true),
weak_factory_(this) {
......@@ -120,6 +124,7 @@ void DistillerImpl::DistillNextPage() {
started_pages_index_[page_num] = pages_.size() - 1;
distiller_page_->DistillPage(
url,
dom_distiller_options_,
base::Bind(&DistillerImpl::OnPageDistillationFinished,
weak_factory_.GetWeakPtr(),
page_num,
......
......@@ -56,19 +56,22 @@ class DistillerFactory {
class DistillerFactoryImpl : public DistillerFactory {
public:
DistillerFactoryImpl(
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory);
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
virtual ~DistillerFactoryImpl();
virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
private:
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
};
// Distills a article from a page and associated pages.
class DistillerImpl : public Distiller {
public:
DistillerImpl(
const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
const dom_distiller::proto::DomDistillerOptions& dom_distiller_options);
virtual ~DistillerImpl();
virtual void DistillPage(
......@@ -147,6 +150,8 @@ class DistillerImpl : public Distiller {
const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
scoped_ptr<DistillerPage> distiller_page_;
dom_distiller::proto::DomDistillerOptions dom_distiller_options_;
DistillationFinishedCallback finished_cb_;
DistillationUpdateCallback update_cb_;
......
......@@ -58,14 +58,15 @@ DistillerPage::DistillerPage() : ready_(true) {}
DistillerPage::~DistillerPage() {}
void DistillerPage::DistillPage(const GURL& gurl,
const DistillerPageCallback& callback) {
void DistillerPage::DistillPage(
const GURL& gurl,
const dom_distiller::proto::DomDistillerOptions options,
const DistillerPageCallback& callback) {
DCHECK(ready_);
// It is only possible to distill one page at a time. |ready_| is reset when
// the callback to OnDistillationDone happens.
ready_ = false;
distiller_page_callback_ = callback;
dom_distiller::proto::DomDistillerOptions options;
DistillPageImpl(gurl, GetDistillerScriptWithOptions(options));
}
......
......@@ -11,6 +11,7 @@
#include "base/memory/scoped_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/values.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "url/gurl.h"
namespace dom_distiller {
......@@ -44,9 +45,11 @@ class DistillerPage {
// Loads a URL. |OnDistillationDone| is called when the load completes or
// fails. May be called when the distiller is idle. Callers can assume that,
// for a given |url|, any DistillerPage implementation will extract the same
// content.
void DistillPage(const GURL& url, const DistillerPageCallback& callback);
// for a given |url| and |options|, any DistillerPage implementation will
// extract the same content.
void DistillPage(const GURL& url,
const dom_distiller::proto::DomDistillerOptions options,
const DistillerPageCallback& callback);
// Called when the JavaScript execution completes. |page_url| is the url of
// the distilled page. |value| contains data returned by the script.
......
......@@ -22,6 +22,7 @@
#include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h"
#include "net/dns/mock_host_resolver.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest;
......@@ -43,6 +44,8 @@ const char* kOutputFile = "output-file";
// output.
const char* kShouldOutputBinary = "output-binary";
const char* kExtractTextOnly = "extract-text-only";
scoped_ptr<DomDistillerService> CreateDomDistillerService(
content::BrowserContext* context,
const base::FilePath& db_path) {
......@@ -61,8 +64,13 @@ scoped_ptr<DomDistillerService> CreateDomDistillerService(
new DistillerPageWebContentsFactory(context));
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
new DistillerURLFetcherFactory(context->GetRequestContext()));
dom_distiller::proto::DomDistillerOptions options;
if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) {
options.set_extract_text_only(true);
}
scoped_ptr<DistillerFactory> distiller_factory(
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass()));
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(), options));
return scoped_ptr<DomDistillerService>(new DomDistillerService(
dom_distiller_store.PassAs<DomDistillerStoreInterface>(),
......
......@@ -11,7 +11,7 @@
'proto_in_dir': 'package/proto',
'proto_out_dir': 'third_party/dom_distiller_js',
},
'all_dependent_settings': {
'direct_dependent_settings': {
'include_dirs': ['package/proto_gen'],
},
'includes': [ '../../build/protoc.gypi', ],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment