Commit 3d9a2469 authored by nyquist's avatar nyquist Committed by Commit bot

Remove DistilledPageInfo and related structs.

The DistilledPageInfo and related structs are unnecessary, and it is possible
to pass the DomDistillerResult proto around instead. Currently they contain the
same data, and it is all just copied over.

Removing this intermediate step simplifies the code and makes it easier to add
more data to the DomDistillerResult proto and passing it through to
DistilledPageProto.

BUG=409274

Review URL: https://codereview.chromium.org/493853006

Cr-Commit-Position: refs/heads/master@{#293008}
parent 9ce271f8
......@@ -17,6 +17,7 @@
#include "content/shell/browser/shell.h"
#include "net/test/embedded_test_server/embedded_test_server.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest;
......@@ -47,9 +48,10 @@ class DistillerPageWebContentsTest : public ContentBrowserTest {
this));
}
void OnPageDistillationFinished(scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful) {
page_info_ = distilled_page.Pass();
void OnPageDistillationFinished(
scoped_ptr<proto::DomDistillerResult> distiller_result,
bool distillation_successful) {
distiller_result_ = distiller_result.Pass();
quit_closure_.Run();
}
......@@ -79,7 +81,7 @@ class DistillerPageWebContentsTest : public ContentBrowserTest {
DistillerPageWebContents* distiller_page_;
base::Closure quit_closure_;
scoped_ptr<DistilledPageInfo> page_info_;
scoped_ptr<proto::DomDistillerResult> distiller_result_;
};
// Use this class to be able to leak the WebContents, which is needed for when
......@@ -169,11 +171,13 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
run_loop.Run();
EXPECT_EQ("Test Page Title", page_info_.get()->title);
EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
EXPECT_THAT(page_info_.get()->html, Not(HasSubstr("questionable content")));
EXPECT_EQ("", page_info_.get()->next_page_url);
EXPECT_EQ("", page_info_.get()->prev_page_url);
EXPECT_EQ("Test Page Title", distiller_result_->title());
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
EXPECT_THAT(distiller_result_->distilled_content().html(),
Not(HasSubstr("questionable content")));
EXPECT_EQ("", distiller_result_->pagination_info().next_page());
EXPECT_EQ("", distiller_result_->pagination_info().prev_page());
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
......@@ -188,9 +192,9 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
run_loop.Run();
// A relative link should've been updated.
EXPECT_THAT(page_info_.get()->html,
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
EXPECT_THAT(page_info_.get()->html,
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
}
......@@ -206,9 +210,9 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
run_loop.Run();
// A relative link should've been updated.
EXPECT_THAT(page_info_.get()->html,
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
EXPECT_THAT(page_info_.get()->html,
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
}
......@@ -225,18 +229,15 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
run_loop.Run();
// A relative source/track should've been updated.
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
EXPECT_THAT(
page_info_.get()->html,
ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
EXPECT_THAT(
page_info_.get()->html,
distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
EXPECT_THAT(
page_info_.get()->html,
HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
EXPECT_THAT(
page_info_.get()->html,
HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
......@@ -253,14 +254,16 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/visible_style.html");
run_loop.Run();
EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
}
{
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
run_loop.Run();
EXPECT_THAT(page_info_.get()->html, Not(HasSubstr("Lorem ipsum")));
EXPECT_THAT(distiller_result_->distilled_content().html(),
Not(HasSubstr("Lorem ipsum")));
}
}
......@@ -350,7 +353,7 @@ void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
// Sanity check of distillation process.
EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
EXPECT_EQ("Test Page Title", page_info_.get()->title);
EXPECT_EQ("Test Page Title", distiller_result_->title());
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
......@@ -364,43 +367,44 @@ IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
DistillPage(run_loop.QuitClosure(), "/markup_article.html");
run_loop.Run();
EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
EXPECT_EQ("Marked-up Markup Test Page Title", page_info_.get()->title);
const DistilledPageInfo::MarkupInfo& markup_info = page_info_->markup_info;
EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title);
EXPECT_EQ("Article", markup_info.type);
EXPECT_EQ("http://test/markup.html", markup_info.url);
EXPECT_EQ("This page tests Markup Info.", markup_info.description);
EXPECT_EQ("Whoever Published", markup_info.publisher);
EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright);
EXPECT_EQ("Whoever Authored", markup_info.author);
const DistilledPageInfo::MarkupArticle& markup_article = markup_info.article;
EXPECT_EQ("Whatever Section", markup_article.section);
EXPECT_EQ("July 23, 2014", markup_article.published_time);
EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time);
EXPECT_EQ("", markup_article.expiration_time);
ASSERT_EQ(1U, markup_article.authors.size());
EXPECT_EQ("Whoever Authored", markup_article.authors[0]);
ASSERT_EQ(2U, markup_info.images.size());
const DistilledPageInfo::MarkupImage& markup_image1 = markup_info.images[0];
EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url);
EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url);
EXPECT_EQ("jpeg", markup_image1.type);
EXPECT_EQ("", markup_image1.caption);
EXPECT_EQ(600, markup_image1.width);
EXPECT_EQ(400, markup_image1.height);
const DistilledPageInfo::MarkupImage& markup_image2 = markup_info.images[1];
EXPECT_EQ("http://test/markup2.gif", markup_image2.url);
EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url);
EXPECT_EQ("gif", markup_image2.type);
EXPECT_EQ("", markup_image2.caption);
EXPECT_EQ(1000, markup_image2.width);
EXPECT_EQ(600, markup_image2.height);
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title());
const proto::MarkupInfo markup_info = distiller_result_->markup_info();
EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title());
EXPECT_EQ("Article", markup_info.type());
EXPECT_EQ("http://test/markup.html", markup_info.url());
EXPECT_EQ("This page tests Markup Info.", markup_info.description());
EXPECT_EQ("Whoever Published", markup_info.publisher());
EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright());
EXPECT_EQ("Whoever Authored", markup_info.author());
const proto::MarkupArticle markup_article = markup_info.article();
EXPECT_EQ("Whatever Section", markup_article.section());
EXPECT_EQ("July 23, 2014", markup_article.published_time());
EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time());
EXPECT_EQ("", markup_article.expiration_time());
ASSERT_EQ(1, markup_article.authors_size());
EXPECT_EQ("Whoever Authored", markup_article.authors(0));
ASSERT_EQ(2, markup_info.images_size());
const proto::MarkupImage markup_image1 = markup_info.images(0);
EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url());
EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url());
EXPECT_EQ("jpeg", markup_image1.type());
EXPECT_EQ("", markup_image1.caption());
EXPECT_EQ(600, markup_image1.width());
EXPECT_EQ(400, markup_image1.height());
const proto::MarkupImage markup_image2 = markup_info.images(1);
EXPECT_EQ("http://test/markup2.gif", markup_image2.url());
EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url());
EXPECT_EQ("gif", markup_image2.type());
EXPECT_EQ("", markup_image2.caption());
EXPECT_EQ(1000, markup_image2.width());
EXPECT_EQ(600, markup_image2.height());
}
} // namespace dom_distiller
......@@ -135,38 +135,53 @@ void DistillerImpl::DistillNextPage() {
void DistillerImpl::OnPageDistillationFinished(
int page_num,
const GURL& page_url,
scoped_ptr<DistilledPageInfo> distilled_page,
scoped_ptr<proto::DomDistillerResult> distiller_result,
bool distillation_successful) {
DCHECK(distilled_page.get());
DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
if (distillation_successful) {
DCHECK(distiller_result.get());
DistilledPageData* page_data =
GetPageAtIndex(started_pages_index_[page_num]);
page_data->distilled_page_proto =
new base::RefCountedData<DistilledPageProto>();
page_data->page_num = page_num;
page_data->distilled_page_proto->data.set_title(distilled_page->title);
if (distiller_result->has_title()) {
page_data->distilled_page_proto->data.set_title(
distiller_result->title());
}
page_data->distilled_page_proto->data.set_url(page_url.spec());
page_data->distilled_page_proto->data.set_html(distilled_page->html);
GURL next_page_url(distilled_page->next_page_url);
if (next_page_url.is_valid()) {
// The pages should be in same origin.
DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
AddToDistillationQueue(page_num + 1, next_page_url);
if (distiller_result->has_distilled_content() &&
distiller_result->distilled_content().has_html()) {
page_data->distilled_page_proto->data.set_html(
distiller_result->distilled_content().html());
}
GURL prev_page_url(distilled_page->prev_page_url);
if (prev_page_url.is_valid()) {
DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
AddToDistillationQueue(page_num - 1, prev_page_url);
if (distiller_result->has_pagination_info()) {
proto::PaginationInfo pagination_info =
distiller_result->pagination_info();
if (pagination_info.has_next_page()) {
GURL next_page_url(pagination_info.next_page());
if (next_page_url.is_valid()) {
// The pages should be in same origin.
DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
AddToDistillationQueue(page_num + 1, next_page_url);
}
}
if (pagination_info.has_prev_page()) {
GURL prev_page_url(pagination_info.prev_page());
if (prev_page_url.is_valid()) {
DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
AddToDistillationQueue(page_num - 1, prev_page_url);
}
}
}
for (size_t img_num = 0; img_num < distilled_page->image_urls.size();
for (int img_num = 0; img_num < distiller_result->image_urls_size();
++img_num) {
std::string image_id =
base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
FetchImage(page_num, image_id, distilled_page->image_urls[img_num]);
FetchImage(page_num, image_id, distiller_result->image_urls(img_num));
}
AddPageIfDone(page_num);
......
......@@ -107,10 +107,11 @@ class DistillerImpl : public Distiller {
const std::string& id,
const std::string& response);
void OnPageDistillationFinished(int page_num,
const GURL& page_url,
scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful);
void OnPageDistillationFinished(
int page_num,
const GURL& page_url,
scoped_ptr<proto::DomDistillerResult> distilled_page,
bool distillation_successful);
virtual void FetchImage(int page_num,
const std::string& image_id,
......
......@@ -50,22 +50,6 @@ std::string GetDistillerScriptWithOptions(
}
DistilledPageInfo::DistilledPageInfo() {}
DistilledPageInfo::~DistilledPageInfo() {}
DistilledPageInfo::MarkupArticle::MarkupArticle() {}
DistilledPageInfo::MarkupArticle::~MarkupArticle() {}
DistilledPageInfo::MarkupImage::MarkupImage() {}
DistilledPageInfo::MarkupImage::~MarkupImage() {}
DistilledPageInfo::MarkupInfo::MarkupInfo() {}
DistilledPageInfo::MarkupInfo::~MarkupInfo() {}
DistillerPageFactory::~DistillerPageFactory() {}
DistillerPage::DistillerPage() : ready_(true) {}
......@@ -89,59 +73,18 @@ void DistillerPage::OnDistillationDone(const GURL& page_url,
DCHECK(!ready_);
ready_ = true;
scoped_ptr<DistilledPageInfo> page_info(new DistilledPageInfo());
bool found_content = !value->IsType(base::Value::TYPE_NULL);
if (found_content) {
dom_distiller::proto::DomDistillerResult distiller_result =
dom_distiller::proto::json::DomDistillerResult::ReadFromValue(value);
page_info->title = distiller_result.title();
page_info->html = distiller_result.distilled_content().html();
page_info->next_page_url = distiller_result.pagination_info().next_page();
page_info->prev_page_url = distiller_result.pagination_info().prev_page();
for (int i = 0; i < distiller_result.image_urls_size(); ++i) {
const std::string image_url = distiller_result.image_urls(i);
if (GURL(image_url).is_valid()) {
page_info->image_urls.push_back(image_url);
}
}
const dom_distiller::proto::MarkupInfo& src_markup_info =
distiller_result.markup_info();
DistilledPageInfo::MarkupInfo& dst_markup_info = page_info->markup_info;
dst_markup_info.title = src_markup_info.title();
dst_markup_info.type = src_markup_info.type();
dst_markup_info.url = src_markup_info.url();
dst_markup_info.description = src_markup_info.description();
dst_markup_info.publisher = src_markup_info.publisher();
dst_markup_info.copyright = src_markup_info.copyright();
dst_markup_info.author = src_markup_info.author();
const dom_distiller::proto::MarkupArticle& src_article =
src_markup_info.article();
DistilledPageInfo::MarkupArticle& dst_article = dst_markup_info.article;
dst_article.published_time = src_article.published_time();
dst_article.modified_time = src_article.modified_time();
dst_article.expiration_time = src_article.expiration_time();
dst_article.section = src_article.section();
for (int i = 0; i < src_article.authors_size(); ++i) {
dst_article.authors.push_back(src_article.authors(i));
}
scoped_ptr<dom_distiller::proto::DomDistillerResult> distiller_result;
for (int i = 0; i < src_markup_info.images_size(); ++i) {
const dom_distiller::proto::MarkupImage& src_image =
src_markup_info.images(i);
DistilledPageInfo::MarkupImage dst_image;
dst_image.url = src_image.url();
dst_image.secure_url = src_image.secure_url();
dst_image.type = src_image.type();
dst_image.caption = src_image.caption();
dst_image.width = src_image.width();
dst_image.height = src_image.height();
dst_markup_info.images.push_back(dst_image);
}
if (distiller_result.has_timing_info()) {
bool found_content;
if (value->IsType(base::Value::TYPE_NULL)) {
found_content = false;
} else {
found_content = true;
distiller_result.reset(new dom_distiller::proto::DomDistillerResult(
dom_distiller::proto::json::DomDistillerResult::ReadFromValue(value)));
if (distiller_result->has_timing_info()) {
const dom_distiller::proto::TimingInfo& timing =
distiller_result.timing_info();
distiller_result->timing_info();
if (timing.has_markup_parsing_time()) {
UMA_HISTOGRAM_TIMES(
"DomDistiller.Time.MarkupParsing",
......@@ -174,8 +117,9 @@ void DistillerPage::OnDistillationDone(const GURL& page_url,
base::MessageLoop::current()->PostTask(
FROM_HERE,
base::Bind(
distiller_page_callback_, base::Passed(&page_info), found_content));
base::Bind(distiller_page_callback_,
base::Passed(&distiller_result),
found_content));
}
} // namespace dom_distiller
......@@ -17,59 +17,6 @@
namespace dom_distiller {
struct DistilledPageInfo {
struct MarkupArticle {
std::string published_time;
std::string modified_time;
std::string expiration_time;
std::string section;
std::vector<std::string> authors;
MarkupArticle();
~MarkupArticle();
};
struct MarkupImage {
std::string url;
std::string secure_url;
std::string type;
std::string caption;
int width;
int height;
MarkupImage();
~MarkupImage();
};
struct MarkupInfo {
std::string title;
std::string type;
std::string url;
std::string description;
std::string publisher;
std::string copyright;
std::string author;
MarkupArticle article;
std::vector<MarkupImage> images;
MarkupInfo();
~MarkupInfo();
};
std::string title;
std::string html;
std::string next_page_url;
std::string prev_page_url;
std::vector<std::string> image_urls;
MarkupInfo markup_info;
DistilledPageInfo();
~DistilledPageInfo();
private:
DISALLOW_COPY_AND_ASSIGN(DistilledPageInfo);
};
class SourcePageHandle {
public:
virtual ~SourcePageHandle() {}
......@@ -82,9 +29,9 @@ class SourcePageHandle {
// thrown away without ever being used.
class DistillerPage {
public:
typedef base::Callback<void(scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful)>
DistillerPageCallback;
typedef base::Callback<
void(scoped_ptr<proto::DomDistillerResult> distilled_page,
bool distillation_successful)> DistillerPageCallback;
DistillerPage();
virtual ~DistillerPage();
......@@ -94,7 +41,7 @@ class DistillerPage {
// for a given |url| and |options|, any DistillerPage implementation will
// extract the same content.
void DistillPage(const GURL& url,
const dom_distiller::proto::DomDistillerOptions options,
const proto::DomDistillerOptions options,
const DistillerPageCallback& callback);
// Called when the JavaScript execution completes. |page_url| is the url of
......
......@@ -167,7 +167,7 @@ void VerifyArticleProtoMatchesMultipageData(
const dom_distiller::DistilledArticleProto* article_proto,
const MultipageDistillerData* distiller_data,
size_t pages_size) {
EXPECT_EQ(pages_size, static_cast<size_t>(article_proto->pages_size()));
ASSERT_EQ(pages_size, static_cast<size_t>(article_proto->pages_size()));
EXPECT_EQ(kTitle, article_proto->title());
for (size_t page_num = 0; page_num < pages_size; ++page_num) {
const dom_distiller::DistilledPageProto& page =
......@@ -335,7 +335,7 @@ TEST_F(DistillerTest, DistillPage) {
DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL)).Pass());
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(article_proto_->pages_size(), 1);
ASSERT_EQ(article_proto_->pages_size(), 1);
const DistilledPageProto& first_page = article_proto_->pages(0);
EXPECT_EQ(kContent, first_page.html());
EXPECT_EQ(kURL, first_page.url());
......@@ -353,11 +353,11 @@ TEST_F(DistillerTest, DistillPageWithImages) {
DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL)).Pass());
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(article_proto_->pages_size(), 1);
ASSERT_EQ(article_proto_->pages_size(), 1);
const DistilledPageProto& first_page = article_proto_->pages(0);
EXPECT_EQ(kContent, first_page.html());
EXPECT_EQ(kURL, first_page.url());
EXPECT_EQ(2, first_page.image_size());
ASSERT_EQ(2, first_page.image_size());
EXPECT_EQ(kImageData[0], first_page.image(0).data());
EXPECT_EQ(GetImageName(1, 0), first_page.image(0).name());
EXPECT_EQ(kImageData[1], first_page.image(1).data());
......@@ -536,7 +536,7 @@ TEST_F(DistillerTest, IncrementalUpdates) {
distiller_data.get(), kNumPages, start_page_num).Pass());
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(kNumPages, static_cast<size_t>(article_proto_->pages_size()));
ASSERT_EQ(kNumPages, static_cast<size_t>(article_proto_->pages_size()));
EXPECT_EQ(kNumPages, in_sequence_updates_.size());
VerifyIncrementalUpdatesMatch(
......@@ -581,7 +581,7 @@ TEST_F(DistillerTest, DeletingArticleDoesNotInterfereWithUpdates) {
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kNumPages, in_sequence_updates_.size());
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(kNumPages, static_cast<size_t>(article_proto_->pages_size()));
ASSERT_EQ(kNumPages, static_cast<size_t>(article_proto_->pages_size()));
// Delete the article.
article_proto_.reset();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment