Make DistillerPageWebContentsTest a real test that uses the distiller.

Previously, the test used a hardcoded distiller template which didn't
help test the distiller. Updates the test to use the real distiller and
adds tests for relativization of images and anchors.

In the process, attempts to handle a few failure modes of the distiller.

BUG=353349,354737
TBR=blundell

Review URL: https://codereview.chromium.org/211493008

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@261463 0039d316-1c4b-4281-b951-d872f2087c98
parent b102c081
...@@ -548,6 +548,7 @@ ...@@ -548,6 +548,7 @@
'../skia/skia.gyp:skia', '../skia/skia.gyp:skia',
'../testing/gmock.gyp:gmock', '../testing/gmock.gyp:gmock',
'../testing/gtest.gyp:gtest', '../testing/gtest.gyp:gtest',
'../testing/gmock.gyp:gmock',
], ],
'include_dirs': [ 'include_dirs': [
'..', '..',
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// found in the LICENSE file. // found in the LICENSE file.
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "base/path_service.h"
#include "base/run_loop.h" #include "base/run_loop.h"
#include "base/values.h" #include "base/values.h"
#include "components/dom_distiller/content/distiller_page_web_contents.h" #include "components/dom_distiller/content/distiller_page_web_contents.h"
...@@ -10,28 +11,15 @@ ...@@ -10,28 +11,15 @@
#include "content/public/browser/browser_context.h" #include "content/public/browser/browser_context.h"
#include "content/public/test/content_browser_test.h" #include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h" #include "content/shell/browser/shell.h"
#include "grit/component_resources.h"
#include "net/test/embedded_test_server/embedded_test_server.h" #include "net/test/embedded_test_server/embedded_test_server.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest; using content::ContentBrowserTest;
using testing::ContainsRegex;
namespace { using testing::HasSubstr;
// TODO(bengr): Once JavaScript has landed to extract article content from using testing::Not;
// a loaded page, test the interaction of that script with
// DistillerPageWebContents.
static const char kTitle[] = "Test Page Title";
static const char kHtml[] =
"<body>T<img src='http://t.com/t.jpg' id='0'></body>";
static const char kImageUrl[] = "http://t.com/t.jpg";
static const char kScript[] =
" (function () {"
" var result = new Array(3);"
" result[0] = \"Test Page Title\";"
" result[1] = \"<body>T<img src='http://t.com/t.jpg' id='0'></body>\";"
" result[2] = \"http://t.com/t.jpg\";"
" return result;"
" }())";
} // namespace
namespace dom_distiller { namespace dom_distiller {
...@@ -39,14 +27,22 @@ class DistillerPageWebContentsTest ...@@ -39,14 +27,22 @@ class DistillerPageWebContentsTest
: public ContentBrowserTest, : public ContentBrowserTest,
public DistillerPage::Delegate { public DistillerPage::Delegate {
public: public:
// ContentBrowserTest:
virtual void SetUpOnMainThread() OVERRIDE {
AddComponentsResources();
SetUpTestServer();
ContentBrowserTest::SetUpOnMainThread();
}
void DistillPage(const base::Closure& quit_closure, const std::string& url) { void DistillPage(const base::Closure& quit_closure, const std::string& url) {
quit_closure_ = quit_closure; quit_closure_ = quit_closure;
distiller_page_->LoadURL( distiller_page_->LoadURL(embedded_test_server()->GetURL(url));
embedded_test_server()->GetURL(url));
} }
virtual void OnLoadURLDone() OVERRIDE { virtual void OnLoadURLDone() OVERRIDE {
distiller_page_->ExecuteJavaScript(kScript); std::string script = ResourceBundle::GetSharedInstance().
GetRawDataResource(IDR_DISTILLER_JS).as_string();
distiller_page_->ExecuteJavaScript(script);
} }
virtual void OnExecuteJavaScriptDone(const GURL& page_url, virtual void OnExecuteJavaScriptDone(const GURL& page_url,
...@@ -55,35 +51,101 @@ class DistillerPageWebContentsTest ...@@ -55,35 +51,101 @@ class DistillerPageWebContentsTest
quit_closure_.Run(); quit_closure_.Run();
} }
private:
void AddComponentsResources() {
base::FilePath pak_file;
base::FilePath pak_dir;
PathService::Get(base::DIR_MODULE, &pak_dir);
pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
pak_file, ui::SCALE_FACTOR_NONE);
}
void SetUpTestServer() {
base::FilePath path;
PathService::Get(base::DIR_SOURCE_ROOT, &path);
path = path.AppendASCII("components/test/data/dom_distiller");
embedded_test_server()->ServeFilesFromDirectory(path);
ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
}
protected: protected:
DistillerPageWebContents* distiller_page_; DistillerPageWebContents* distiller_page_;
base::Closure quit_closure_; base::Closure quit_closure_;
const base::Value* value_; const base::Value* value_;
}; };
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, LoadPage) { IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this); base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page( DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext()); weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page; distiller_page_ = &distiller_page;
distiller_page.Init(); distiller_page.Init();
base::RunLoop run_loop; base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_page.html"); DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run(); run_loop.Run();
const base::ListValue* result_list = NULL; const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list)); ASSERT_TRUE(value_->GetAsList(&result_list));
ASSERT_EQ(3u, result_list->GetSize()); ASSERT_EQ(4u, result_list->GetSize());
std::string title; std::string title;
result_list->GetString(0, &title); result_list->GetString(0, &title);
ASSERT_EQ(kTitle, title); EXPECT_EQ("Test Page Title", title);
std::string html;
result_list->GetString(1, &html);
EXPECT_THAT(html, HasSubstr("Lorem ipsum"));
EXPECT_THAT(html, Not(HasSubstr("questionable content")));
std::string next_page_url;
result_list->GetString(2, &next_page_url);
EXPECT_EQ("", next_page_url);
std::string unused;
result_list->GetString(3, &unused);
EXPECT_EQ("", unused);
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page;
distiller_page.Init();
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run();
const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list));
std::string html;
result_list->GetString(1, &html);
// A relative link should've been updated.
EXPECT_THAT(html,
ContainsRegex("href=\"http://127.0.0.1:[0-9]*/relativelink.html\""));
EXPECT_THAT(html,
HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page;
distiller_page.Init();
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run();
const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list));
std::string html; std::string html;
result_list->GetString(1, &html); result_list->GetString(1, &html);
ASSERT_EQ(kHtml, html); // A relative link should've been updated.
std::string image_url; EXPECT_THAT(html,
result_list->GetString(2, &image_url); ContainsRegex("src=\"http://127.0.0.1:[0-9]*/relativeimage.png\""));
ASSERT_EQ(kImageUrl, image_url); EXPECT_THAT(html,
HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
} }
} // namespace dom_distiller } // namespace dom_distiller
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// TODO(cjhopman): remove dependency on readability.
// These includes will be processed at build time by grit. // These includes will be processed at build time by grit.
<include src="../../../../third_party/dom_distiller_js/js/domdistiller.js"/> <include src="../../../../third_party/dom_distiller_js/js/domdistiller.js"/>
...@@ -13,12 +11,16 @@ ...@@ -13,12 +11,16 @@
// element is the previous page link. // element is the previous page link.
(function() { (function() {
var result = new Array(4); var result = new Array(4);
result[0] = com.dom_distiller.DocumentTitleGetter.getDocumentTitle( try {
document.title, document.documentElement); result[0] = com.dom_distiller.DocumentTitleGetter.getDocumentTitle(
result[1] = com.dom_distiller.ContentExtractor.extractContent(); document.title, document.documentElement);
result[2] = com.dom_distiller.PagingLinksFinder.findNext( result[1] = com.dom_distiller.ContentExtractor.extractContent();
document.documentElement); result[2] = com.dom_distiller.PagingLinksFinder.findNext(
// TODO(shashishekhar): Add actual previous page link here. document.documentElement);
result[3] = ''; // TODO(shashishekhar): Add actual previous page link here.
result[3] = '';
} catch (e) {
window.console.log("Error during distillation: " + e);
}
return result; return result;
})() })()
...@@ -63,6 +63,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url, ...@@ -63,6 +63,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
scoped_ptr<DistilledPageInfo> page_info(new DistilledPageInfo()); scoped_ptr<DistilledPageInfo> page_info(new DistilledPageInfo());
std::string result; std::string result;
const base::ListValue* result_list = NULL; const base::ListValue* result_list = NULL;
bool found_content = false;
if (!value->GetAsList(&result_list)) { if (!value->GetAsList(&result_list)) {
base::MessageLoop::current()->PostTask( base::MessageLoop::current()->PostTask(
FROM_HERE, FROM_HERE,
...@@ -83,6 +84,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url, ...@@ -83,6 +84,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
break; break;
case 1: case 1:
page_info->html = item; page_info->html = item;
found_content = true;
break; break;
case 2: case 2:
page_info->next_page_url = item; page_info->next_page_url = item;
...@@ -91,12 +93,16 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url, ...@@ -91,12 +93,16 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
page_info->prev_page_url = item; page_info->prev_page_url = item;
break; break;
default: default:
page_info->image_urls.push_back(item); if (GURL(item).is_valid()) {
page_info->image_urls.push_back(item);
}
} }
} }
base::MessageLoop::current()->PostTask( base::MessageLoop::current()->PostTask(
FROM_HERE, FROM_HERE,
base::Bind(page_distiller_callback_, base::Passed(&page_info), true)); base::Bind(page_distiller_callback_,
base::Passed(&page_info),
found_content));
} }
} }
......
<html>
<head><title>Test Page Title</title></head>
<body>
<div>
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
<img src="/relativeimage.png">
<img src="http://www.google.com/absoluteimage.png">
<a href="/relativelink.html">Relative link should be made absolute</a>
<a href="http://www.google.com/absolutelink.html">Absolute link should be unchanged.</a>
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
</div>
<br>
<div>
I am questiontable content. <a href="http://some.questionable.content">Go here</a>
</div>
</body>
</html>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment