Store page no for distilled pages undergoing distillation.

In order to support distillation of previous pages and enable
meaningful incremental updates for viewers, distiller needs
to maintain page number information for pages under distillation.
This information will be used to add support for incremental updates
and distilling previous pages of an article.

BUG=288015
TEST=Covered by existing tests + added tests for failure   and 
     page limit for distiller.

Review URL: https://codereview.chromium.org/130543003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251546 0039d316-1c4b-4281-b951-d872f2087c98
parent 77a3bfab
This diff is collapsed.
......@@ -68,36 +68,97 @@ class DistillerImpl : public Distiller {
virtual void DistillPage(const GURL& url,
const DistillerCallback& callback) OVERRIDE;
void SetMaxNumPagesInArticle(size_t max_num_pages);
private:
void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
// In case of multiple pages, the Distiller maintains state of multiple pages
// as page numbers relative to the page number where distillation started.
// E.g. if distillation starts at page 2 for a 3 page article. The relative
// page numbers assigned to pages will be [-1,0,1].
// Class representing the state of a page under distillation.
struct DistilledPageData {
DistilledPageData();
virtual ~DistilledPageData();
// Relative page number of the page.
int page_num;
std::string title;
ScopedVector<DistillerURLFetcher> image_fetchers_;
scoped_ptr<DistilledPageProto> proto;
private:
DISALLOW_COPY_AND_ASSIGN(DistilledPageData);
};
void OnFetchImageDone(int page_num,
DistillerURLFetcher* url_fetcher,
const std::string& id,
const std::string& response);
void OnPageDistillationFinished(const GURL& page_url,
void OnPageDistillationFinished(int page_num,
const GURL& page_url,
scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful);
virtual void FetchImage(DistilledPageProto* distilled_page_proto,
virtual void FetchImage(int page_num,
const std::string& image_id,
const std::string& item);
// Distills the page and adds the new page to |article_proto|.
void DistillPage(const GURL& url);
// Distills the next page.
void DistillNextPage();
// Adds the |url| to |pages_to_be_distilled| if |page_num| is a valid relative
// page number and |url| is valid. Ignores duplicate pages and urls.
void AddToDistillationQueue(int page_num, const GURL& url);
// Check if |page_num| is a valid relative page number, i.e. page with
// |page_num| is either under distillation or has already completed
// distillation.
bool IsPageNumberInUse(int page_num) const;
bool AreAllPagesFinished() const;
// Total number of pages in the article that the distiller knows of, this
// includes pages that are pending distillation.
size_t TotalPageCount() const;
// Runs |distillation_cb_| if all distillation callbacks and image fetches are
// complete.
void RunDistillerCallbackIfDone();
// Checks if page |distilled_page_data| has finished distillation, including
// all image fetches.
void AddPageIfDone(int page_num);
DistilledPageData* GetPageAtIndex(size_t index) const;
const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
scoped_ptr<PageDistiller> page_distiller_;
DistillerCallback distillation_cb_;
ScopedVector<DistillerURLFetcher> image_fetchers_;
scoped_ptr<DistilledArticleProto> article_proto_;
bool distillation_in_progress_;
// Set to keep track of which urls are already seen by the distiller.
base::hash_set<std::string> processed_urls_;
// Set of pages that are under distillation or have finished distillation.
// |started_pages_index_| and |finished_pages_index_| maintains the mapping
// from page number to the indices in |pages_|.
ScopedVector<DistilledPageData> pages_;
// Maps page numbers of finished pages to the indices in |pages_|.
std::map<int, size_t> finished_pages_index_;
// Maps page numbers of pages under distillation to the indices in |pages_|.
// If a page is |started_pages_| that means it is still waiting for an action
// (distillation or image fetch) to finish.
base::hash_map<int, size_t> started_pages_index_;
// The list of pages that are still waiting for distillation to start.
// This is a map, to make distiller prefer distilling lower page numbers
// first.
std::map<int, GURL> waiting_pages_;
// Set to keep track of which urls are already seen by the distiller. Used to
// prevent distiller from distilling the same url twice.
base::hash_set<std::string> seen_urls_;
size_t max_pages_in_article_;
DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
};
......
......@@ -117,7 +117,6 @@ class MockDistillerPageFactory : public DistillerPageFactory {
}
};
class DistillerTest : public testing::Test {
public:
virtual ~DistillerTest() {}
......@@ -290,4 +289,117 @@ TEST_F(DistillerTest, DistillLinkLoop) {
EXPECT_EQ(article_proto_->pages_size(), 1);
}
TEST_F(DistillerTest, CheckMaxPageLimit) {
base::MessageLoopForUI loop;
const size_t kMaxPagesInArticle = 10;
string page_urls[kMaxPagesInArticle];
scoped_ptr<base::ListValue> list[kMaxPagesInArticle];
// Note: Next page url of the last page of article is set. So distiller will
// try to do kMaxPagesInArticle + 1 calls if the max article limit does not
// work.
string url_prefix = "http://a.com/";
for (size_t page_num = 0; page_num < kMaxPagesInArticle; ++page_num) {
page_urls[page_num] = url_prefix + base::IntToString(page_num + 1);
string content = "Content for page:" + base::IntToString(page_num);
string next_page_url = url_prefix + base::IntToString(page_num + 2);
list[page_num] = CreateDistilledValueReturnedFromJS(
kTitle, content, vector<int>(), next_page_url);
}
EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
.WillOnce(CreateMockDistillerPages(
list, page_urls, static_cast<int>(kMaxPagesInArticle)));
distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
distiller_->Init();
distiller_->DistillPage(
GURL(page_urls[0]),
base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(kMaxPagesInArticle,
static_cast<size_t>(article_proto_->pages_size()));
// Now check if distilling an article with exactly the page limit works by
// resetting the next page url of the last page of the article.
list[kMaxPagesInArticle - 1] =
CreateDistilledValueReturnedFromJS(kTitle, "Content", vector<int>(), "");
EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
.WillOnce(CreateMockDistillerPages(
list, page_urls, static_cast<int>(kMaxPagesInArticle)));
distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
distiller_->Init();
distiller_->DistillPage(
GURL(page_urls[0]),
base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(kMaxPagesInArticle,
static_cast<size_t>(article_proto_->pages_size()));
}
TEST_F(DistillerTest, SinglePageDistillationFailure) {
base::MessageLoopForUI loop;
// To simulate failure return a null value.
scoped_ptr<base::Value> nullValue(base::Value::CreateNullValue());
EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
.WillOnce(CreateMockDistillerPage(nullValue.get(), GURL(kURL)));
distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
distiller_->Init();
distiller_->DistillPage(
GURL(kURL),
base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ("", article_proto_->title());
EXPECT_EQ(0, article_proto_->pages_size());
}
TEST_F(DistillerTest, MultiplePagesDistillationFailure) {
base::MessageLoopForUI loop;
const int kNumPages = 8;
string content[kNumPages];
string page_urls[kNumPages];
scoped_ptr<base::Value> distilled_values[kNumPages];
// The page number of the failed page.
int failed_page_num = 3;
string url_prefix = "http://a.com/";
for (int page_num = 0; page_num < kNumPages; ++page_num) {
page_urls[page_num] = url_prefix + base::IntToString(page_num);
content[page_num] = "Content for page:" + base::IntToString(page_num);
string next_page_url = url_prefix + base::IntToString(page_num + 1);
if (page_num != failed_page_num) {
distilled_values[page_num] = CreateDistilledValueReturnedFromJS(
kTitle, content[page_num], vector<int>(), next_page_url);
} else {
distilled_values[page_num].reset(base::Value::CreateNullValue());
}
}
// Expect only calls till the failed page number.
EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
.WillOnce(CreateMockDistillerPages(
distilled_values, page_urls, failed_page_num + 1));
distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
distiller_->Init();
distiller_->DistillPage(
GURL(page_urls[0]),
base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_EQ(kTitle, article_proto_->title());
EXPECT_EQ(article_proto_->pages_size(), failed_page_num);
for (int page_num = 0; page_num < failed_page_num; ++page_num) {
const DistilledPageProto& page = article_proto_->pages(page_num);
EXPECT_EQ(content[page_num], page.html());
EXPECT_EQ(page_urls[page_num], page.url());
}
}
} // namespace dom_distiller
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment