Make DistillerPageWebContentsTest a real test that uses the distiller.

Previously, the test used a hardcoded distiller template which didn't
help test the distiller. Updates the test to use the real distiller and
adds tests for relativization of images and anchors.

In the process, attempts to handle a few failure modes of the distiller.

BUG=353349,354737
TBR=blundell

Review URL: https://codereview.chromium.org/211493008

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@261463 0039d316-1c4b-4281-b951-d872f2087c98
parent b102c081
......@@ -548,6 +548,7 @@
'../skia/skia.gyp:skia',
'../testing/gmock.gyp:gmock',
'../testing/gtest.gyp:gtest',
'../testing/gmock.gyp:gmock',
],
'include_dirs': [
'..',
......
......@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "base/memory/weak_ptr.h"
#include "base/path_service.h"
#include "base/run_loop.h"
#include "base/values.h"
#include "components/dom_distiller/content/distiller_page_web_contents.h"
......@@ -10,28 +11,15 @@
#include "content/public/browser/browser_context.h"
#include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h"
#include "grit/component_resources.h"
#include "net/test/embedded_test_server/embedded_test_server.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest;
namespace {
// TODO(bengr): Once JavaScript has landed to extract article content from
// a loaded page, test the interaction of that script with
// DistillerPageWebContents.
static const char kTitle[] = "Test Page Title";
static const char kHtml[] =
"<body>T<img src='http://t.com/t.jpg' id='0'></body>";
static const char kImageUrl[] = "http://t.com/t.jpg";
static const char kScript[] =
" (function () {"
" var result = new Array(3);"
" result[0] = \"Test Page Title\";"
" result[1] = \"<body>T<img src='http://t.com/t.jpg' id='0'></body>\";"
" result[2] = \"http://t.com/t.jpg\";"
" return result;"
" }())";
} // namespace
using testing::ContainsRegex;
using testing::HasSubstr;
using testing::Not;
namespace dom_distiller {
......@@ -39,14 +27,22 @@ class DistillerPageWebContentsTest
: public ContentBrowserTest,
public DistillerPage::Delegate {
public:
// ContentBrowserTest:
virtual void SetUpOnMainThread() OVERRIDE {
AddComponentsResources();
SetUpTestServer();
ContentBrowserTest::SetUpOnMainThread();
}
void DistillPage(const base::Closure& quit_closure, const std::string& url) {
quit_closure_ = quit_closure;
distiller_page_->LoadURL(
embedded_test_server()->GetURL(url));
distiller_page_->LoadURL(embedded_test_server()->GetURL(url));
}
virtual void OnLoadURLDone() OVERRIDE {
distiller_page_->ExecuteJavaScript(kScript);
std::string script = ResourceBundle::GetSharedInstance().
GetRawDataResource(IDR_DISTILLER_JS).as_string();
distiller_page_->ExecuteJavaScript(script);
}
virtual void OnExecuteJavaScriptDone(const GURL& page_url,
......@@ -55,35 +51,101 @@ class DistillerPageWebContentsTest
quit_closure_.Run();
}
private:
void AddComponentsResources() {
base::FilePath pak_file;
base::FilePath pak_dir;
PathService::Get(base::DIR_MODULE, &pak_dir);
pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
pak_file, ui::SCALE_FACTOR_NONE);
}
void SetUpTestServer() {
base::FilePath path;
PathService::Get(base::DIR_SOURCE_ROOT, &path);
path = path.AppendASCII("components/test/data/dom_distiller");
embedded_test_server()->ServeFilesFromDirectory(path);
ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
}
protected:
DistillerPageWebContents* distiller_page_;
base::Closure quit_closure_;
const base::Value* value_;
};
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, LoadPage) {
ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page;
distiller_page.Init();
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_page.html");
DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run();
const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list));
ASSERT_EQ(3u, result_list->GetSize());
ASSERT_EQ(4u, result_list->GetSize());
std::string title;
result_list->GetString(0, &title);
ASSERT_EQ(kTitle, title);
EXPECT_EQ("Test Page Title", title);
std::string html;
result_list->GetString(1, &html);
EXPECT_THAT(html, HasSubstr("Lorem ipsum"));
EXPECT_THAT(html, Not(HasSubstr("questionable content")));
std::string next_page_url;
result_list->GetString(2, &next_page_url);
EXPECT_EQ("", next_page_url);
std::string unused;
result_list->GetString(3, &unused);
EXPECT_EQ("", unused);
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page;
distiller_page.Init();
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run();
const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list));
std::string html;
result_list->GetString(1, &html);
// A relative link should've been updated.
EXPECT_THAT(html,
ContainsRegex("href=\"http://127.0.0.1:[0-9]*/relativelink.html\""));
EXPECT_THAT(html,
HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
base::WeakPtrFactory<DistillerPage::Delegate> weak_factory(this);
DistillerPageWebContents distiller_page(
weak_factory.GetWeakPtr(), shell()->web_contents()->GetBrowserContext());
distiller_page_ = &distiller_page;
distiller_page.Init();
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/simple_article.html");
run_loop.Run();
const base::ListValue* result_list = NULL;
ASSERT_TRUE(value_->GetAsList(&result_list));
std::string html;
result_list->GetString(1, &html);
ASSERT_EQ(kHtml, html);
std::string image_url;
result_list->GetString(2, &image_url);
ASSERT_EQ(kImageUrl, image_url);
// A relative link should've been updated.
EXPECT_THAT(html,
ContainsRegex("src=\"http://127.0.0.1:[0-9]*/relativeimage.png\""));
EXPECT_THAT(html,
HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
}
} // namespace dom_distiller
......@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// TODO(cjhopman): remove dependency on readability.
// These includes will be processed at build time by grit.
<include src="../../../../third_party/dom_distiller_js/js/domdistiller.js"/>
......@@ -13,12 +11,16 @@
// element is the previous page link.
(function() {
var result = new Array(4);
result[0] = com.dom_distiller.DocumentTitleGetter.getDocumentTitle(
document.title, document.documentElement);
result[1] = com.dom_distiller.ContentExtractor.extractContent();
result[2] = com.dom_distiller.PagingLinksFinder.findNext(
document.documentElement);
// TODO(shashishekhar): Add actual previous page link here.
result[3] = '';
try {
result[0] = com.dom_distiller.DocumentTitleGetter.getDocumentTitle(
document.title, document.documentElement);
result[1] = com.dom_distiller.ContentExtractor.extractContent();
result[2] = com.dom_distiller.PagingLinksFinder.findNext(
document.documentElement);
// TODO(shashishekhar): Add actual previous page link here.
result[3] = '';
} catch (e) {
window.console.log("Error during distillation: " + e);
}
return result;
})()
......@@ -63,6 +63,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
scoped_ptr<DistilledPageInfo> page_info(new DistilledPageInfo());
std::string result;
const base::ListValue* result_list = NULL;
bool found_content = false;
if (!value->GetAsList(&result_list)) {
base::MessageLoop::current()->PostTask(
FROM_HERE,
......@@ -83,6 +84,7 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
break;
case 1:
page_info->html = item;
found_content = true;
break;
case 2:
page_info->next_page_url = item;
......@@ -91,12 +93,16 @@ void PageDistiller::OnExecuteJavaScriptDone(const GURL& page_url,
page_info->prev_page_url = item;
break;
default:
page_info->image_urls.push_back(item);
if (GURL(item).is_valid()) {
page_info->image_urls.push_back(item);
}
}
}
base::MessageLoop::current()->PostTask(
FROM_HERE,
base::Bind(page_distiller_callback_, base::Passed(&page_info), true));
base::Bind(page_distiller_callback_,
base::Passed(&page_info),
found_content));
}
}
......
<html>
<head><title>Test Page Title</title></head>
<body>
<div>
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
<img src="/relativeimage.png">
<img src="http://www.google.com/absoluteimage.png">
<a href="/relativelink.html">Relative link should be made absolute</a>
<a href="http://www.google.com/absolutelink.html">Absolute link should be unchanged.</a>
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
<p>Lorem ipsum dolor sit amet, at alia aliquip vel. Quas inani labore an vel. Sed an nemore minimum accusata. Sint inermis tacimates est ex, ad movet iracundia mei, delicata iracundia laboramus ei eos. Illud principes complectitur te nec, ius alienum insolens ea, cu quo oratio omnesque.
</div>
<br>
<div>
I am questiontable content. <a href="http://some.questionable.content">Go here</a>
</div>
</body>
</html>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment