Commit 63a97f9b authored by kuan@chromium.org's avatar kuan@chromium.org

dom distiller: extract markup properties from protobuf

- store the properties into DistilledPageInfo
- add test using OpenGraphProtocol and Schema.Org markup.

BUG=396124
TBR=nyquist

Review URL: https://codereview.chromium.org/411253008

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@285533 0039d316-1c4b-4281-b951-d872f2087c98
parent e9aa71ad
...@@ -354,4 +354,54 @@ void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest( ...@@ -354,4 +354,54 @@ void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
EXPECT_EQ("Test Page Title", page_info_.get()->title); EXPECT_EQ("Test Page Title", page_info_.get()->title);
} }
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/markup_article.html");
run_loop.Run();
EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
EXPECT_EQ("Marked-up Markup Test Page Title", page_info_.get()->title);
const DistilledPageInfo::MarkupInfo& markup_info = page_info_->markup_info;
EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title);
EXPECT_EQ("Article", markup_info.type);
EXPECT_EQ("http://test/markup.html", markup_info.url);
EXPECT_EQ("This page tests Markup Info.", markup_info.description);
EXPECT_EQ("Whoever Published", markup_info.publisher);
EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright);
EXPECT_EQ("Whoever Authored", markup_info.author);
const DistilledPageInfo::MarkupArticle& markup_article = markup_info.article;
EXPECT_EQ("Whatever Section", markup_article.section);
EXPECT_EQ("July 23, 2014", markup_article.published_time);
EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time);
EXPECT_EQ("", markup_article.expiration_time);
ASSERT_EQ(1U, markup_article.authors.size());
EXPECT_EQ("Whoever Authored", markup_article.authors[0]);
ASSERT_EQ(2U, markup_info.images.size());
const DistilledPageInfo::MarkupImage& markup_image1 = markup_info.images[0];
EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url);
EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url);
EXPECT_EQ("jpeg", markup_image1.type);
EXPECT_EQ("", markup_image1.caption);
EXPECT_EQ(600, markup_image1.width);
EXPECT_EQ(400, markup_image1.height);
const DistilledPageInfo::MarkupImage& markup_image2 = markup_info.images[1];
EXPECT_EQ("http://test/markup2.gif", markup_image2.url);
EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url);
EXPECT_EQ("gif", markup_image2.type);
EXPECT_EQ("", markup_image2.caption);
EXPECT_EQ(1000, markup_image2.width);
EXPECT_EQ(600, markup_image2.height);
}
} // namespace dom_distiller } // namespace dom_distiller
...@@ -52,6 +52,18 @@ DistilledPageInfo::DistilledPageInfo() {} ...@@ -52,6 +52,18 @@ DistilledPageInfo::DistilledPageInfo() {}
DistilledPageInfo::~DistilledPageInfo() {} DistilledPageInfo::~DistilledPageInfo() {}
DistilledPageInfo::MarkupArticle::MarkupArticle() {}
DistilledPageInfo::MarkupArticle::~MarkupArticle() {}
DistilledPageInfo::MarkupImage::MarkupImage() {}
DistilledPageInfo::MarkupImage::~MarkupImage() {}
DistilledPageInfo::MarkupInfo::MarkupInfo() {}
DistilledPageInfo::MarkupInfo::~MarkupInfo() {}
DistillerPageFactory::~DistillerPageFactory() {} DistillerPageFactory::~DistillerPageFactory() {}
DistillerPage::DistillerPage() : ready_(true) {} DistillerPage::DistillerPage() : ready_(true) {}
...@@ -91,6 +103,40 @@ void DistillerPage::OnDistillationDone(const GURL& page_url, ...@@ -91,6 +103,40 @@ void DistillerPage::OnDistillationDone(const GURL& page_url,
page_info->image_urls.push_back(image_url); page_info->image_urls.push_back(image_url);
} }
} }
const dom_distiller::proto::MarkupInfo& src_markup_info =
distiller_result.markup_info();
DistilledPageInfo::MarkupInfo& dst_markup_info = page_info->markup_info;
dst_markup_info.title = src_markup_info.title();
dst_markup_info.type = src_markup_info.type();
dst_markup_info.url = src_markup_info.url();
dst_markup_info.description = src_markup_info.description();
dst_markup_info.publisher = src_markup_info.publisher();
dst_markup_info.copyright = src_markup_info.copyright();
dst_markup_info.author = src_markup_info.author();
const dom_distiller::proto::MarkupArticle& src_article =
src_markup_info.article();
DistilledPageInfo::MarkupArticle& dst_article = dst_markup_info.article;
dst_article.published_time = src_article.published_time();
dst_article.modified_time = src_article.modified_time();
dst_article.expiration_time = src_article.expiration_time();
dst_article.section = src_article.section();
for (int i = 0; i < src_article.authors_size(); ++i) {
dst_article.authors.push_back(src_article.authors(i));
}
for (int i = 0; i < src_markup_info.images_size(); ++i) {
const dom_distiller::proto::MarkupImage& src_image =
src_markup_info.images(i);
DistilledPageInfo::MarkupImage dst_image;
dst_image.url = src_image.url();
dst_image.secure_url = src_image.secure_url();
dst_image.type = src_image.type();
dst_image.caption = src_image.caption();
dst_image.width = src_image.width();
dst_image.height = src_image.height();
dst_markup_info.images.push_back(dst_image);
}
} }
base::MessageLoop::current()->PostTask( base::MessageLoop::current()->PostTask(
......
...@@ -18,11 +18,51 @@ ...@@ -18,11 +18,51 @@
namespace dom_distiller { namespace dom_distiller {
struct DistilledPageInfo { struct DistilledPageInfo {
struct MarkupArticle {
std::string published_time;
std::string modified_time;
std::string expiration_time;
std::string section;
std::vector<std::string> authors;
MarkupArticle();
~MarkupArticle();
};
struct MarkupImage {
std::string url;
std::string secure_url;
std::string type;
std::string caption;
int width;
int height;
MarkupImage();
~MarkupImage();
};
struct MarkupInfo {
std::string title;
std::string type;
std::string url;
std::string description;
std::string publisher;
std::string copyright;
std::string author;
MarkupArticle article;
std::vector<MarkupImage> images;
MarkupInfo();
~MarkupInfo();
};
std::string title; std::string title;
std::string html; std::string html;
std::string next_page_url; std::string next_page_url;
std::string prev_page_url; std::string prev_page_url;
std::vector<std::string> image_urls; std::vector<std::string> image_urls;
MarkupInfo markup_info;
DistilledPageInfo(); DistilledPageInfo();
~DistilledPageInfo(); ~DistilledPageInfo();
......
<html>
<head>
<title>Markup Test Page Title</title>
<!-- OpenGraphProtocol Markup Info -->
<meta property="og:title" content="Marked-up Markup Test Page Title">
<meta property="og:type" content="Article">
<meta property="og:url" content="http://test/markup.html">
<meta property="og:image" content="http://test/markup1.jpeg">
<meta property="og:image:url" content="http://test/markup1.jpeg">
<meta property="og:image:secure_url" content="https://test/markup1.jpeg">
<meta property="og:image:type" content="jpeg">
<meta property="og:image:width" content="600">
<meta property="og:image:height" content="400">
<meta property="og:image" content="http://test/markup2.gif">
<meta property="og:image:url" content="http://test/markup2.gif">
<meta property="og:image:secure_url" content="https://test/markup2.gif">
<meta property="og:image:type" content="gif">
<meta property="og:image:width" content="1000">
<meta property="og:image:height" content="600">
</head>
<body>
<div>
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
</div>
<br>
<!-- Schema.Org Markup Info -->
<div itemscope itemtype="http://schema.org/Article">
<span itemprop="description">This page tests Markup Info.</span>
<div itemscope itemtype="http://schema.org/Person" itemprop="author">Author:
<span itemprop="name">Whoever Authored</span>
</div>
<div itemscope itemtype="http://schema.org/Organization" itemprop="publisher">Publisher:
<span itemprop="name">Whoever Published</span>
</div>
<span itemprop="datePublished">July 23, 2014</span> +
<time itemprop="dateModified" datetime="2014-07-23T23:59">July 23, 2014 11:59pm</time>
<span itemprop="copyrightYear">2000-2014</span>
<span itemprop="copyrightHolder">Whoever Copyrighted</span>
<span itemprop="articleSection">Whatever Section</span>
</div>
</body>
</html>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment