Commit b2662554 authored by nyquist@chromium.org's avatar nyquist@chromium.org

[dom_distiller] Add support for lookup by URL for InMemoryContentStore.

The InMemoryContentStore currently only supports lookup by entry ID, but this
CL adds support for also looking up articles by URLs. This will make the class
more usable for when users manually distill web pages.

BUG=379642

Review URL: https://codereview.chromium.org/314573005

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@275618 0039d316-1c4b-4281-b951-d872f2087c98
parent 5fb04d84
...@@ -9,10 +9,13 @@ ...@@ -9,10 +9,13 @@
namespace dom_distiller { namespace dom_distiller {
InMemoryContentStore::InMemoryContentStore(const int max_num_entries) InMemoryContentStore::InMemoryContentStore(const int max_num_entries)
: cache_(max_num_entries) { : cache_(max_num_entries, CacheDeletor(this)) {
} }
InMemoryContentStore::~InMemoryContentStore() { InMemoryContentStore::~InMemoryContentStore() {
// Clear the cache before destruction to ensure the CacheDeletor is not called
// after InMemoryContentStore has been destroyed.
cache_.Clear();
} }
void InMemoryContentStore::SaveContent( void InMemoryContentStore::SaveContent(
...@@ -34,6 +37,19 @@ void InMemoryContentStore::LoadContent( ...@@ -34,6 +37,19 @@ void InMemoryContentStore::LoadContent(
ContentMap::const_iterator it = cache_.Get(entry.entry_id()); ContentMap::const_iterator it = cache_.Get(entry.entry_id());
bool success = it != cache_.end(); bool success = it != cache_.end();
if (!success) {
// Could not find article by entry ID, so try looking it up by URL.
for (int i = 0; i < entry.pages_size(); ++i) {
UrlMap::const_iterator url_it = url_to_id_.find(entry.pages(i).url());
if (url_it != url_to_id_.end()) {
it = cache_.Get(url_it->second);
success = it != cache_.end();
if (success) {
break;
}
}
}
}
scoped_ptr<DistilledArticleProto> distilled_article; scoped_ptr<DistilledArticleProto> distilled_article;
if (success) { if (success) {
distilled_article.reset(new DistilledArticleProto(it->second)); distilled_article.reset(new DistilledArticleProto(it->second));
...@@ -48,6 +64,43 @@ void InMemoryContentStore::LoadContent( ...@@ -48,6 +64,43 @@ void InMemoryContentStore::LoadContent(
void InMemoryContentStore::InjectContent(const ArticleEntry& entry, void InMemoryContentStore::InjectContent(const ArticleEntry& entry,
const DistilledArticleProto& proto) { const DistilledArticleProto& proto) {
cache_.Put(entry.entry_id(), proto); cache_.Put(entry.entry_id(), proto);
AddUrlToIdMapping(entry, proto);
}
void InMemoryContentStore::AddUrlToIdMapping(
const ArticleEntry& entry,
const DistilledArticleProto& proto) {
for (int i = 0; i < proto.pages_size(); i++) {
const DistilledPageProto& page = proto.pages(i);
if (page.has_url()) {
url_to_id_[page.url()] = entry.entry_id();
}
}
}
void InMemoryContentStore::EraseUrlToIdMapping(
const DistilledArticleProto& proto) {
for (int i = 0; i < proto.pages_size(); i++) {
const DistilledPageProto& page = proto.pages(i);
if (page.has_url()) {
url_to_id_.erase(page.url());
}
}
}
InMemoryContentStore::CacheDeletor::CacheDeletor(InMemoryContentStore* store)
: store_(store) {
}
InMemoryContentStore::CacheDeletor::~CacheDeletor() {
}
void InMemoryContentStore::CacheDeletor::operator()(
const DistilledArticleProto& proto) {
// When InMemoryContentStore is deleted, the |store_| pointer becomes invalid,
// but since the ContentMap is cleared in the InMemoryContentStore destructor,
// this should never be called after the destructor.
store_->EraseUrlToIdMapping(proto);
} }
} // namespace dom_distiller } // namespace dom_distiller
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <string> #include <string>
#include "base/bind.h" #include "base/bind.h"
#include "base/containers/hash_tables.h"
#include "base/containers/mru_cache.h" #include "base/containers/mru_cache.h"
#include "components/dom_distiller/core/article_entry.h" #include "components/dom_distiller/core/article_entry.h"
#include "components/dom_distiller/core/proto/distilled_article.pb.h" #include "components/dom_distiller/core/proto/distilled_article.pb.h"
...@@ -40,6 +41,7 @@ class DistilledContentStore { ...@@ -40,6 +41,7 @@ class DistilledContentStore {
// This content store keeps up to |max_num_entries| of the last accessed items // This content store keeps up to |max_num_entries| of the last accessed items
// in its cache. Both loading and saving content is counted as access. // in its cache. Both loading and saving content is counted as access.
// Lookup can be done based on entry ID or URL.
class InMemoryContentStore : public DistilledContentStore { class InMemoryContentStore : public DistilledContentStore {
public: public:
explicit InMemoryContentStore(const int max_num_entries); explicit InMemoryContentStore(const int max_num_entries);
...@@ -57,9 +59,31 @@ class InMemoryContentStore : public DistilledContentStore { ...@@ -57,9 +59,31 @@ class InMemoryContentStore : public DistilledContentStore {
const DistilledArticleProto& proto); const DistilledArticleProto& proto);
private: private:
typedef base::MRUCache<std::string, DistilledArticleProto> ContentMap; // The CacheDeletor gets called when anything is removed from the ContentMap.
class CacheDeletor {
public:
explicit CacheDeletor(InMemoryContentStore* store);
~CacheDeletor();
void operator()(const DistilledArticleProto& proto);
private:
InMemoryContentStore* store_;
};
void AddUrlToIdMapping(const ArticleEntry& entry,
const DistilledArticleProto& proto);
void EraseUrlToIdMapping(const DistilledArticleProto& proto);
typedef base::MRUCacheBase<std::string,
DistilledArticleProto,
InMemoryContentStore::CacheDeletor> ContentMap;
typedef base::hash_map<std::string, std::string> UrlMap;
ContentMap cache_; ContentMap cache_;
UrlMap url_to_id_;
}; };
} // dom_distiller } // dom_distiller
#endif // COMPONENTS_DOM_DISTILLER_CORE_DOM_DISTILLER_CONTENT_CACHE_H_ #endif // COMPONENTS_DOM_DISTILLER_CORE_DOM_DISTILLER_CONTENT_CACHE_H_
...@@ -126,7 +126,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMultipleArticles) { ...@@ -126,7 +126,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMultipleArticles) {
// Store second article. // Store second article.
const ArticleEntry second_entry = const ArticleEntry second_entry =
CreateEntry("second", "url1", "url2", "url3"); CreateEntry("second", "url4", "url5", "url6");
const DistilledArticleProto second_stored_proto = const DistilledArticleProto second_stored_proto =
CreateDistilledArticleForEntry(second_entry); CreateDistilledArticleForEntry(second_entry);
store_->SaveContent(second_entry, store_->SaveContent(second_entry,
...@@ -181,7 +181,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) { ...@@ -181,7 +181,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) {
// Store second article. // Store second article.
const ArticleEntry second_entry = const ArticleEntry second_entry =
CreateEntry("second", "url1", "url2", "url3"); CreateEntry("second", "url4", "url5", "url6");
const DistilledArticleProto second_stored_proto = const DistilledArticleProto second_stored_proto =
CreateDistilledArticleForEntry(second_entry); CreateDistilledArticleForEntry(second_entry);
store_->SaveContent(second_entry, store_->SaveContent(second_entry,
...@@ -193,7 +193,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) { ...@@ -193,7 +193,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) {
save_success_ = false; save_success_ = false;
// Store third article. // Store third article.
const ArticleEntry third_entry = CreateEntry("third", "url1", "url2", "url3"); const ArticleEntry third_entry = CreateEntry("third", "url7", "url8", "url9");
const DistilledArticleProto third_stored_proto = const DistilledArticleProto third_stored_proto =
CreateDistilledArticleForEntry(third_entry); CreateDistilledArticleForEntry(third_entry);
store_->SaveContent(third_entry, store_->SaveContent(third_entry,
...@@ -218,7 +218,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) { ...@@ -218,7 +218,7 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) {
// Store fourth article. // Store fourth article.
const ArticleEntry fourth_entry = const ArticleEntry fourth_entry =
CreateEntry("fourth", "url1", "url2", "url3"); CreateEntry("fourth", "url10", "url11", "url12");
const DistilledArticleProto fourth_stored_proto = const DistilledArticleProto fourth_stored_proto =
CreateDistilledArticleForEntry(fourth_entry); CreateDistilledArticleForEntry(fourth_entry);
store_->SaveContent(fourth_entry, store_->SaveContent(fourth_entry,
...@@ -240,4 +240,94 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) { ...@@ -240,4 +240,94 @@ TEST_F(InMemoryContentStoreTest, SaveAndLoadMoreThanMaxArticles) {
EXPECT_FALSE(load_success_); EXPECT_FALSE(load_success_);
} }
// Tests whether saving and then loading a single article works as expected.
TEST_F(InMemoryContentStoreTest, LookupArticleByURL) {
base::MessageLoop loop;
const ArticleEntry entry = CreateEntry("test-id", "url1", "url2", "url3");
const DistilledArticleProto stored_proto =
CreateDistilledArticleForEntry(entry);
store_->SaveContent(entry,
stored_proto,
base::Bind(&InMemoryContentStoreTest::OnSaveCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(save_success_);
save_success_ = false;
// Create an entry where the entry ID does not match, but the first URL does.
const ArticleEntry lookup_entry1 = CreateEntry("lookup-id", "url1", "", "");
store_->LoadContent(lookup_entry1,
base::Bind(&InMemoryContentStoreTest::OnLoadCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(load_success_);
EXPECT_EQ(stored_proto.SerializeAsString(),
loaded_proto_->SerializeAsString());
// Create an entry where the entry ID does not match, but the second URL does.
const ArticleEntry lookup_entry2 =
CreateEntry("lookup-id", "bogus", "url2", "");
store_->LoadContent(lookup_entry2,
base::Bind(&InMemoryContentStoreTest::OnLoadCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(load_success_);
EXPECT_EQ(stored_proto.SerializeAsString(),
loaded_proto_->SerializeAsString());
}
// Verifies that the content store does not store unlimited number of articles,
// but expires the oldest ones when the limit for number of articles is reached.
TEST_F(InMemoryContentStoreTest, LoadArticleByURLAfterExpungedFromCache) {
base::MessageLoop loop;
// Create a new store with only |kMaxNumArticles| articles as the limit.
const int kMaxNumArticles = 1;
store_.reset(new InMemoryContentStore(kMaxNumArticles));
// Store an article.
const ArticleEntry first_entry = CreateEntry("first", "url1", "url2", "url3");
const DistilledArticleProto first_stored_proto =
CreateDistilledArticleForEntry(first_entry);
store_->SaveContent(first_entry,
first_stored_proto,
base::Bind(&InMemoryContentStoreTest::OnSaveCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(save_success_);
save_success_ = false;
// Looking up the first entry by URL should succeed when it is still in the
// cache.
const ArticleEntry first_entry_lookup =
CreateEntry("lookup-id", "url1", "", "");
store_->LoadContent(first_entry_lookup,
base::Bind(&InMemoryContentStoreTest::OnLoadCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(load_success_);
EXPECT_EQ(first_stored_proto.SerializeAsString(),
loaded_proto_->SerializeAsString());
// Store second article. This will remove the first article from the cache.
const ArticleEntry second_entry =
CreateEntry("second", "url4", "url5", "url6");
const DistilledArticleProto second_stored_proto =
CreateDistilledArticleForEntry(second_entry);
store_->SaveContent(second_entry,
second_stored_proto,
base::Bind(&InMemoryContentStoreTest::OnSaveCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_TRUE(save_success_);
save_success_ = false;
// Looking up the first entry by URL should fail when it is not in the cache.
store_->LoadContent(first_entry_lookup,
base::Bind(&InMemoryContentStoreTest::OnLoadCallback,
base::Unretained(this)));
base::MessageLoop::current()->RunUntilIdle();
EXPECT_FALSE(load_success_);
}
} // namespace dom_distiller } // namespace dom_distiller
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment