Commit 5d592f0f authored by rmcilroy@chromium.org's avatar rmcilroy@chromium.org

Replace FTS in the history_service with a brute force text search.

The only remaining user of the text_database full-text-search (FTS) is the
chrome://history page.  The FTS database's index files are large, causing
storage and overhead problems.  This change replaces the history pages
search to use a brute force search over URLs and titles in the History DB (which
is small enough that this gives acceptable performance).

A follow up CL will remove all of the unused FTS and text_database code.

BUG=247417
TEST=Tested searches in history page using a number of different character sets.

Review URL: https://chromiumcodereview.appspot.com/16776004

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@208068 0039d316-1c4b-4281-b951-d872f2087c98
parent 07cb9bd4
...@@ -1375,8 +1375,14 @@ void HistoryBackend::QueryHistory(scoped_refptr<QueryHistoryRequest> request, ...@@ -1375,8 +1375,14 @@ void HistoryBackend::QueryHistory(scoped_refptr<QueryHistoryRequest> request,
// if (archived_db_.get() && // if (archived_db_.get() &&
// expirer_.GetCurrentArchiveTime() - TimeDelta::FromDays(7)) { // expirer_.GetCurrentArchiveTime() - TimeDelta::FromDays(7)) {
} else { } else {
// Full text history query. // Text history query.
QueryHistoryFTS(text_query, options, &request->value); QueryHistoryText(db_.get(), db_.get(), text_query, options,
&request->value);
if (archived_db_.get() &&
expirer_.GetCurrentArchiveTime() >= options.begin_time) {
QueryHistoryText(archived_db_.get(), archived_db_.get(), text_query,
options, &request->value);
}
} }
} }
...@@ -1441,6 +1447,43 @@ void HistoryBackend::QueryHistoryBasic(URLDatabase* url_db, ...@@ -1441,6 +1447,43 @@ void HistoryBackend::QueryHistoryBasic(URLDatabase* url_db,
result->set_reached_beginning(true); result->set_reached_beginning(true);
} }
// Text-based querying of history.
void HistoryBackend::QueryHistoryText(URLDatabase* url_db,
VisitDatabase* visit_db,
const string16& text_query,
const QueryOptions& options,
QueryResults* result) {
URLRows text_matches;
url_db->GetTextMatches(text_query, &text_matches);
std::vector<URLResult> matching_visits;
VisitVector visits; // Declare outside loop to prevent re-construction.
for (size_t i = 0; i < text_matches.size(); i++) {
const URLRow& text_match = text_matches[i];
// Get all visits for given URL match.
visit_db->GetVisitsForURLWithOptions(text_match.id(), options, &visits);
for (size_t j = 0; j < visits.size(); j++) {
URLResult url_result(text_match);
url_result.set_visit_time(visits[j].visit_time);
matching_visits.push_back(url_result);
}
}
std::sort(matching_visits.begin(), matching_visits.end(),
URLResult::CompareVisitTime);
size_t max_results = options.max_count == 0 ?
std::numeric_limits<size_t>::max() : static_cast<int>(options.max_count);
for (std::vector<URLResult>::iterator it = matching_visits.begin();
it != matching_visits.end() && result->size() < max_results; ++it) {
result->AppendURLBySwapping(&(*it));
}
if (matching_visits.size() == result->size() &&
options.begin_time <= first_recorded_time_)
result->set_reached_beginning(true);
}
void HistoryBackend::QueryHistoryFTS(const string16& text_query, void HistoryBackend::QueryHistoryFTS(const string16& text_query,
const QueryOptions& options, const QueryOptions& options,
QueryResults* result) { QueryResults* result) {
......
...@@ -641,12 +641,18 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, ...@@ -641,12 +641,18 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// Querying ------------------------------------------------------------------ // Querying ------------------------------------------------------------------
// Backends for QueryHistory. *Basic() handles queries that are not FTS (full // Backends for QueryHistory. *Basic() handles queries that are not
// text search) queries and can just be given directly to the history DB). // text search queries and can just be given directly to the history DB.
// The FTS version queries the text_database, then merges with the history DB. // The *Text() version performs a brute force query of the history DB to
// search for results which match the given text query.
// Both functions assume QueryHistory already checked the DB for validity. // Both functions assume QueryHistory already checked the DB for validity.
void QueryHistoryBasic(URLDatabase* url_db, VisitDatabase* visit_db, void QueryHistoryBasic(URLDatabase* url_db, VisitDatabase* visit_db,
const QueryOptions& options, QueryResults* result); const QueryOptions& options, QueryResults* result);
void QueryHistoryText(URLDatabase* url_db,
VisitDatabase* visit_db,
const string16& text_query,
const QueryOptions& options,
QueryResults* result);
void QueryHistoryFTS(const string16& text_query, void QueryHistoryFTS(const string16& text_query,
const QueryOptions& options, const QueryOptions& options,
QueryResults* result); QueryResults* result);
......
...@@ -26,29 +26,25 @@ struct TestEntry { ...@@ -26,29 +26,25 @@ struct TestEntry {
const char* url; const char* url;
const char* title; const char* title;
const int days_ago; const int days_ago;
const char* body;
Time time; // Filled by SetUp. Time time; // Filled by SetUp.
} test_entries[] = { } test_entries[] = {
// This one is visited super long ago so it will be in a different database // This one is visited super long ago so it will be in a different database
// from the next appearance of it at the end. // from the next appearance of it at the end.
{"http://example.com/", "Other", 180, "Other"}, {"http://example.com/", "Other", 180},
// These are deliberately added out of chronological order. The history // These are deliberately added out of chronological order. The history
// service should sort them by visit time when returning query results. // service should sort them by visit time when returning query results.
// The correct index sort order is 4 2 3 1 7 6 5 0. // The correct index sort order is 4 2 3 1 7 6 5 0.
{"http://www.google.com/1", "Title 1", 10, {"http://www.google.com/1", "Title PAGEONE FOO some text", 10},
"PAGEONE FOO some body text"}, {"http://www.google.com/3", "Title PAGETHREE BAR some hello world", 8},
{"http://www.google.com/3", "Title 3", 8, {"http://www.google.com/2", "Title PAGETWO FOO some more blah blah blah", 9},
"PAGETHREE BAR some hello world for you"},
{"http://www.google.com/2", "Title 2", 9,
"PAGETWO FOO some more blah blah blah Title"},
// A more recent visit of the first one. // A more recent visit of the first one.
{"http://example.com/", "Other", 6, "Other"}, {"http://example.com/", "Other", 6},
{"http://www.google.com/6", "Title 6", 13, "I'm the second oldest"}, {"http://www.google.com/6", "Title I'm the second oldest", 13},
{"http://www.google.com/4", "Title 4", 12, "four"}, {"http://www.google.com/4", "Title four", 12},
{"http://www.google.com/5", "Title 5", 11, "five"}, {"http://www.google.com/5", "Title five", 11},
}; };
// Returns true if the nth result in the given results set matches. It will // Returns true if the nth result in the given results set matches. It will
...@@ -91,8 +87,8 @@ class HistoryQueryTest : public testing::Test { ...@@ -91,8 +87,8 @@ class HistoryQueryTest : public testing::Test {
} }
// Test paging through results, with a fixed number of results per page. // Test paging through results, with a fixed number of results per page.
// Defined here so code can be shared for the FTS version and the non-FTS // Defined here so code can be shared for the text search and the non-text
// version. // seach versions.
void TestPaging(const std::string& query_text, void TestPaging(const std::string& query_text,
const int* expected_results, const int* expected_results,
int results_length) { int results_length) {
...@@ -125,10 +121,10 @@ class HistoryQueryTest : public testing::Test { ...@@ -125,10 +121,10 @@ class HistoryQueryTest : public testing::Test {
} }
// Add a couple of entries with duplicate timestamps. Use |query_text| as // Add a couple of entries with duplicate timestamps. Use |query_text| as
// the body of both entries so that they match a full-text query. // the title of both entries so that they match a text query.
TestEntry duplicates[] = { TestEntry duplicates[] = {
{ "http://www.google.com/x", "", 1, query_text.c_str() }, { "http://www.google.com/x", query_text.c_str(), 1, },
{ "http://www.google.com/y", "", 1, query_text.c_str() } { "http://www.google.com/y", query_text.c_str(), 1, }
}; };
AddEntryToHistory(duplicates[0]); AddEntryToHistory(duplicates[0]);
AddEntryToHistory(duplicates[1]); AddEntryToHistory(duplicates[1]);
...@@ -157,7 +153,6 @@ class HistoryQueryTest : public testing::Test { ...@@ -157,7 +153,6 @@ class HistoryQueryTest : public testing::Test {
history::RedirectList(), content::PAGE_TRANSITION_LINK, history::RedirectList(), content::PAGE_TRANSITION_LINK,
history::SOURCE_BROWSED, false); history::SOURCE_BROWSED, false);
history_->SetPageTitle(url, UTF8ToUTF16(entry.title)); history_->SetPageTitle(url, UTF8ToUTF16(entry.title));
history_->SetPageContents(url, UTF8ToUTF16(entry.body));
} }
private: private:
...@@ -313,14 +308,13 @@ TEST_F(HistoryQueryTest, ReachedBeginning) { ...@@ -313,14 +308,13 @@ TEST_F(HistoryQueryTest, ReachedBeginning) {
EXPECT_TRUE(results.reached_beginning()); EXPECT_TRUE(results.reached_beginning());
options.max_count = results.size(); options.max_count = results.size();
QueryHistory("some", options, &results); QueryHistory("some", options, &results);
// Since the query didn't cover the oldest visit in the database, we EXPECT_TRUE(results.reached_beginning());
// expect false here.
EXPECT_FALSE(results.reached_beginning());
} }
// This does most of the same tests above, but searches for a FTS string that // This does most of the same tests above, but performs a text searches for a
// will match the pages in question. This will trigger a different code path. // string that will match the pages in question. This will trigger a
TEST_F(HistoryQueryTest, FTS) { // different code path.
TEST_F(HistoryQueryTest, TextSearch) {
ASSERT_TRUE(history_.get()); ASSERT_TRUE(history_.get());
QueryOptions options; QueryOptions options;
...@@ -349,33 +343,8 @@ TEST_F(HistoryQueryTest, FTS) { ...@@ -349,33 +343,8 @@ TEST_F(HistoryQueryTest, FTS) {
EXPECT_TRUE(NthResultIs(results, 0, 1)); EXPECT_TRUE(NthResultIs(results, 0, 1));
} }
// Searches titles. // Tests prefix searching for text search queries.
TEST_F(HistoryQueryTest, FTSTitle) { TEST_F(HistoryQueryTest, TextSearchPrefix) {
ASSERT_TRUE(history_.get());
QueryOptions options;
QueryResults results;
// First execute a body-only query, to ensure that it works and that that
// version of the statement is not cached for the next query.
options.body_only = true;
QueryHistory("Title", options, &results);
EXPECT_EQ(1U, results.size());
EXPECT_TRUE(NthResultIs(results, 0, 3));
options.body_only = false;
// Query all time but with a limit on the number of entries. We should
// get the N most recent entries.
options.max_count = 3;
QueryHistory("title", options, &results);
EXPECT_EQ(3U, results.size());
EXPECT_TRUE(NthResultIs(results, 0, 2));
EXPECT_TRUE(NthResultIs(results, 1, 3));
EXPECT_TRUE(NthResultIs(results, 2, 1));
}
// Tests prefix searching for Full Text Search queries.
TEST_F(HistoryQueryTest, FTSPrefix) {
ASSERT_TRUE(history_.get()); ASSERT_TRUE(history_.get());
QueryOptions options; QueryOptions options;
...@@ -389,8 +358,8 @@ TEST_F(HistoryQueryTest, FTSPrefix) { ...@@ -389,8 +358,8 @@ TEST_F(HistoryQueryTest, FTSPrefix) {
EXPECT_TRUE(NthResultIs(results, 1, 3)); EXPECT_TRUE(NthResultIs(results, 1, 3));
} }
// Tests max_count feature for Full Text Search queries. // Tests max_count feature for text search queries.
TEST_F(HistoryQueryTest, FTSCount) { TEST_F(HistoryQueryTest, TextSearchCount) {
ASSERT_TRUE(history_.get()); ASSERT_TRUE(history_.get());
QueryOptions options; QueryOptions options;
...@@ -413,21 +382,21 @@ TEST_F(HistoryQueryTest, FTSCount) { ...@@ -413,21 +382,21 @@ TEST_F(HistoryQueryTest, FTSCount) {
EXPECT_TRUE(NthResultIs(results, 0, 3)); EXPECT_TRUE(NthResultIs(results, 0, 3));
} }
// Tests that FTS queries can find URLs when they exist only in the archived // Tests that text search queries can find URLs when they exist only in the
// database. This also tests that imported URLs can be found, since we use // archived database. This also tests that imported URLs can be found, since
// AddPageWithDetails just like the importer. // we use AddPageWithDetails just like the importer.
TEST_F(HistoryQueryTest, FTSArchived) { TEST_F(HistoryQueryTest, TextSearchArchived) {
ASSERT_TRUE(history_.get()); ASSERT_TRUE(history_.get());
URLRows urls_to_add; URLRows urls_to_add;
URLRow row1(GURL("http://foo.bar/")); URLRow row1(GURL("http://foo.bar/"));
row1.set_title(UTF8ToUTF16("archived title")); row1.set_title(UTF8ToUTF16("archived title same"));
row1.set_last_visit(Time::Now() - TimeDelta::FromDays(365)); row1.set_last_visit(Time::Now() - TimeDelta::FromDays(365));
urls_to_add.push_back(row1); urls_to_add.push_back(row1);
URLRow row2(GURL("http://foo.bar/")); URLRow row2(GURL("http://foo.bar/"));
row2.set_title(UTF8ToUTF16("nonarchived title")); row2.set_title(UTF8ToUTF16("nonarchived title same"));
row2.set_last_visit(Time::Now()); row2.set_last_visit(Time::Now());
urls_to_add.push_back(row2); urls_to_add.push_back(row2);
...@@ -436,13 +405,22 @@ TEST_F(HistoryQueryTest, FTSArchived) { ...@@ -436,13 +405,22 @@ TEST_F(HistoryQueryTest, FTSArchived) {
QueryOptions options; QueryOptions options;
QueryResults results; QueryResults results;
// Query all time. The title we get should be the one in the full text // Query all time. The title we get should be the one in the archived and
// database and not the most current title (since otherwise highlighting in // not the most current title (since otherwise highlighting in
// the title might be wrong). // the title might be wrong).
QueryHistory("archived", options, &results); QueryHistory("archived", options, &results);
ASSERT_EQ(1U, results.size()); ASSERT_EQ(1U, results.size());
EXPECT_TRUE(row1.url() == results[0].url()); EXPECT_TRUE(row1.url() == results[0].url());
EXPECT_TRUE(row1.title() == results[0].title()); EXPECT_TRUE(row1.title() == results[0].title());
// Check query is ordered correctly when split between archived and
// non-archived database.
QueryHistory("same", options, &results);
ASSERT_EQ(2U, results.size());
EXPECT_TRUE(row2.url() == results[0].url());
EXPECT_TRUE(row2.title() == results[0].title());
EXPECT_TRUE(row1.url() == results[1].url());
EXPECT_TRUE(row1.title() == results[1].title());
} }
/* TODO(brettw) re-enable this. It is commented out because the current history /* TODO(brettw) re-enable this. It is commented out because the current history
...@@ -451,15 +429,15 @@ TEST_F(HistoryQueryTest, FTSArchived) { ...@@ -451,15 +429,15 @@ TEST_F(HistoryQueryTest, FTSArchived) {
won't get picked up by the deletor and it can happen again. When this is the won't get picked up by the deletor and it can happen again. When this is the
case, we should fix this test to duplicate that situation. case, we should fix this test to duplicate that situation.
// Tests duplicate collapsing and not in Full Text Search situations. // Tests duplicate collapsing and not in text search situations.
TEST_F(HistoryQueryTest, FTSDupes) { TEST_F(HistoryQueryTest, TextSearchDupes) {
ASSERT_TRUE(history_.get()); ASSERT_TRUE(history_.get());
QueryOptions options; QueryOptions options;
QueryResults results; QueryResults results;
QueryHistory("Other", options, &results); QueryHistory("Other", options, &results);
EXPECT_EQ(1, results.urls().size()); EXPECT_EQ(1U, results.size());
EXPECT_TRUE(NthResultIs(results, 0, 4)); EXPECT_TRUE(NthResultIs(results, 0, 4));
} }
*/ */
...@@ -472,7 +450,7 @@ TEST_F(HistoryQueryTest, Paging) { ...@@ -472,7 +450,7 @@ TEST_F(HistoryQueryTest, Paging) {
TestPaging(std::string(), expected_results, arraysize(expected_results)); TestPaging(std::string(), expected_results, arraysize(expected_results));
} }
TEST_F(HistoryQueryTest, FTSPaging) { TEST_F(HistoryQueryTest, TextSearchPaging) {
// Since results are fetched 1 and 2 at a time, entry #0 and #6 will not // Since results are fetched 1 and 2 at a time, entry #0 and #6 will not
// be de-duplicated. Entry #4 does not contain the text "title", so it // be de-duplicated. Entry #4 does not contain the text "title", so it
// shouldn't appear. // shouldn't appear.
......
...@@ -107,6 +107,10 @@ URLResult::URLResult(const GURL& url, ...@@ -107,6 +107,10 @@ URLResult::URLResult(const GURL& url,
: URLRow(url) { : URLRow(url) {
title_match_positions_ = title_matches; title_match_positions_ = title_matches;
} }
URLResult::URLResult(const URLRow& url_row)
: URLRow(url_row),
blocked_visit_(false) {
}
URLResult::~URLResult() { URLResult::~URLResult() {
} }
...@@ -119,6 +123,11 @@ void URLResult::SwapResult(URLResult* other) { ...@@ -119,6 +123,11 @@ void URLResult::SwapResult(URLResult* other) {
std::swap(blocked_visit_, other->blocked_visit_); std::swap(blocked_visit_, other->blocked_visit_);
} }
// static
bool URLResult::CompareVisitTime(const URLResult& lhs, const URLResult& rhs) {
return lhs.visit_time() > rhs.visit_time();
}
// QueryResults ---------------------------------------------------------------- // QueryResults ----------------------------------------------------------------
QueryResults::QueryResults() : reached_beginning_(false) { QueryResults::QueryResults() : reached_beginning_(false) {
......
...@@ -294,6 +294,7 @@ class URLResult : public URLRow { ...@@ -294,6 +294,7 @@ class URLResult : public URLRow {
// Constructor that create a URLResult from the specified URL and title match // Constructor that create a URLResult from the specified URL and title match
// positions from title_matches. // positions from title_matches.
URLResult(const GURL& url, const Snippet::MatchPositions& title_matches); URLResult(const GURL& url, const Snippet::MatchPositions& title_matches);
explicit URLResult(const URLRow& url_row);
virtual ~URLResult(); virtual ~URLResult();
base::Time visit_time() const { return visit_time_; } base::Time visit_time() const { return visit_time_; }
...@@ -315,6 +316,8 @@ class URLResult : public URLRow { ...@@ -315,6 +316,8 @@ class URLResult : public URLRow {
void SwapResult(URLResult* other); void SwapResult(URLResult* other);
static bool CompareVisitTime(const URLResult& lhs, const URLResult& rhs);
private: private:
friend class HistoryBackend; friend class HistoryBackend;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "base/i18n/case_conversion.h" #include "base/i18n/case_conversion.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/stl_util.h" #include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h"
namespace { namespace {
...@@ -88,6 +89,8 @@ class QueryNodeWord : public QueryNode { ...@@ -88,6 +89,8 @@ class QueryNodeWord : public QueryNode {
virtual bool HasMatchIn( virtual bool HasMatchIn(
const std::vector<QueryWord>& words, const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE; Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
private: private:
...@@ -138,6 +141,14 @@ bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words, ...@@ -138,6 +141,14 @@ bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words,
return matched; return matched;
} }
bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words) const {
for (size_t i = 0; i < words.size(); ++i) {
if (Matches(words[i].word, false))
return true;
}
return false;
}
void QueryNodeWord::AppendWords(std::vector<string16>* words) const { void QueryNodeWord::AppendWords(std::vector<string16>* words) const {
words->push_back(word_); words->push_back(word_);
} }
...@@ -164,6 +175,8 @@ class QueryNodeList : public QueryNode { ...@@ -164,6 +175,8 @@ class QueryNodeList : public QueryNode {
virtual bool HasMatchIn( virtual bool HasMatchIn(
const std::vector<QueryWord>& words, const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE; Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; virtual void AppendWords(std::vector<string16>* words) const OVERRIDE;
protected: protected:
...@@ -219,6 +232,11 @@ bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, ...@@ -219,6 +232,11 @@ bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words,
return false; return false;
} }
bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words) const {
NOTREACHED();
return false;
}
void QueryNodeList::AppendWords(std::vector<string16>* words) const { void QueryNodeList::AppendWords(std::vector<string16>* words) const {
for (size_t i = 0; i < children_.size(); ++i) for (size_t i = 0; i < children_.size(); ++i)
children_[i]->AppendWords(words); children_[i]->AppendWords(words);
...@@ -246,8 +264,13 @@ class QueryNodePhrase : public QueryNodeList { ...@@ -246,8 +264,13 @@ class QueryNodePhrase : public QueryNodeList {
virtual bool HasMatchIn( virtual bool HasMatchIn(
const std::vector<QueryWord>& words, const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const OVERRIDE; Snippet::MatchPositions* match_positions) const OVERRIDE;
virtual bool HasMatchIn(
const std::vector<QueryWord>& words) const OVERRIDE;
private: private:
bool MatchesAll(const std::vector<QueryWord>& words,
const QueryWord** first_word,
const QueryWord** last_word) const;
DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase);
}; };
...@@ -262,9 +285,9 @@ int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { ...@@ -262,9 +285,9 @@ int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const {
return num_words; return num_words;
} }
bool QueryNodePhrase::HasMatchIn( bool QueryNodePhrase::MatchesAll(const std::vector<QueryWord>& words,
const std::vector<QueryWord>& words, const QueryWord** first_word,
Snippet::MatchPositions* match_positions) const { const QueryWord** last_word) const {
if (words.size() < children_.size()) if (words.size() < children_.size())
return false; return false;
...@@ -277,16 +300,35 @@ bool QueryNodePhrase::HasMatchIn( ...@@ -277,16 +300,35 @@ bool QueryNodePhrase::HasMatchIn(
} }
} }
if (matched_all) { if (matched_all) {
const QueryWord& last_word = words[i + children_.size() - 1]; *first_word = &words[i];
match_positions->push_back( *last_word = &words[i + children_.size() - 1];
Snippet::MatchPosition(words[i].position,
last_word.position + last_word.word.length()));
return true; return true;
} }
} }
return false; return false;
} }
bool QueryNodePhrase::HasMatchIn(
const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const {
const QueryWord* first_word;
const QueryWord* last_word;
if (MatchesAll(words, &first_word, &last_word)) {
match_positions->push_back(
Snippet::MatchPosition(first_word->position,
last_word->position + last_word->word.length()));
return true;
}
return false;
}
bool QueryNodePhrase::HasMatchIn(const std::vector<QueryWord>& words) const {
const QueryWord* first_word;
const QueryWord* last_word;
return MatchesAll(words, &first_word, &last_word);
}
QueryParser::QueryParser() {} QueryParser::QueryParser() {}
// static // static
...@@ -354,6 +396,18 @@ bool QueryParser::DoesQueryMatch(const string16& text, ...@@ -354,6 +396,18 @@ bool QueryParser::DoesQueryMatch(const string16& text,
return true; return true;
} }
bool QueryParser::DoesQueryMatch(const std::vector<QueryWord>& query_words,
const std::vector<QueryNode*>& query_nodes) {
if (query_nodes.empty() || query_words.empty())
return false;
for (size_t i = 0; i < query_nodes.size(); ++i) {
if (!query_nodes[i]->HasMatchIn(query_words))
return false;
}
return true;
}
bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) {
base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
// TODO(evanm): support a locale here // TODO(evanm): support a locale here
......
...@@ -46,6 +46,9 @@ class QueryNode { ...@@ -46,6 +46,9 @@ class QueryNode {
virtual bool HasMatchIn(const std::vector<QueryWord>& words, virtual bool HasMatchIn(const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const = 0; Snippet::MatchPositions* match_positions) const = 0;
// Returns true if this node matches at least one of the words in |words|.
virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0;
// Appends the words that make up this node in |words|. // Appends the words that make up this node in |words|.
virtual void AppendWords(std::vector<string16>* words) const = 0; virtual void AppendWords(std::vector<string16>* words) const = 0;
}; };
...@@ -87,14 +90,19 @@ class QueryParser { ...@@ -87,14 +90,19 @@ class QueryParser {
const std::vector<QueryNode*>& nodes, const std::vector<QueryNode*>& nodes,
Snippet::MatchPositions* match_positions); Snippet::MatchPositions* match_positions);
// Returns true if all of the |words| match the query |nodes| created by a
// call to ParseQuery.
bool DoesQueryMatch(const std::vector<QueryWord>& words,
const std::vector<QueryNode*>& nodes);
// Extracts the words from |text|, placing each word into |words|.
void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words);
private: private:
// Does the work of parsing |query|; creates nodes in |root| as appropriate. // Does the work of parsing |query|; creates nodes in |root| as appropriate.
// This is invoked from both of the ParseQuery methods. // This is invoked from both of the ParseQuery methods.
bool ParseQueryImpl(const string16& query, QueryNodeList* root); bool ParseQueryImpl(const string16& query, QueryNodeList* root);
// Extracts the words from |text|, placing each word into |words|.
void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words);
DISALLOW_COPY_AND_ASSIGN(QueryParser); DISALLOW_COPY_AND_ASSIGN(QueryParser);
}; };
......
...@@ -358,6 +358,32 @@ bool URLDatabase::FindShortestURLFromBase(const std::string& base, ...@@ -358,6 +358,32 @@ bool URLDatabase::FindShortestURLFromBase(const std::string& base,
return true; return true;
} }
bool URLDatabase::GetTextMatches(const string16& query,
URLRows* results) {
ScopedVector<QueryNode> query_nodes;
query_parser_.ParseQueryNodes(query, &query_nodes.get());
results->clear();
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0"));
while (statement.Step()) {
std::vector<QueryWord> query_words;
string16 url = base::i18n::ToLower(statement.ColumnString16(1));
query_parser_.ExtractQueryWords(url, &query_words);
string16 title = base::i18n::ToLower(statement.ColumnString16(2));
query_parser_.ExtractQueryWords(title, &query_words);
if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) {
history::URLResult info;
FillURLRow(statement, &info);
if (info.url().is_valid())
results->push_back(info);
}
}
return !results->empty();
}
bool URLDatabase::InitKeywordSearchTermsTable() { bool URLDatabase::InitKeywordSearchTermsTable() {
has_keyword_search_terms_ = true; has_keyword_search_terms_ = true;
if (!GetDB().DoesTableExist("keyword_search_terms")) { if (!GetDB().DoesTableExist("keyword_search_terms")) {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "base/basictypes.h" #include "base/basictypes.h"
#include "chrome/browser/history/history_types.h" #include "chrome/browser/history/history_types.h"
#include "chrome/browser/history/query_parser.h"
#include "chrome/browser/search_engines/template_url_id.h" #include "chrome/browser/search_engines/template_url_id.h"
#include "sql/statement.h" #include "sql/statement.h"
...@@ -189,6 +190,12 @@ class URLDatabase { ...@@ -189,6 +190,12 @@ class URLDatabase {
bool allow_base, bool allow_base,
history::URLRow* info); history::URLRow* info);
// History search ------------------------------------------------------------
// Performs a brute force search over the database to find any URLs or titles
// which match the |query| string. Returns any matches in |results|.
bool GetTextMatches(const string16& query, URLRows* results);
// Keyword Search Terms ------------------------------------------------------ // Keyword Search Terms ------------------------------------------------------
// Sets the search terms for the specified url/keyword pair. // Sets the search terms for the specified url/keyword pair.
...@@ -289,6 +296,8 @@ class URLDatabase { ...@@ -289,6 +296,8 @@ class URLDatabase {
// have keyword search terms. // have keyword search terms.
bool has_keyword_search_terms_; bool has_keyword_search_terms_;
QueryParser query_parser_;
DISALLOW_COPY_AND_ASSIGN(URLDatabase); DISALLOW_COPY_AND_ASSIGN(URLDatabase);
}; };
...@@ -303,6 +312,6 @@ class URLDatabase { ...@@ -303,6 +312,6 @@ class URLDatabase {
" urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \ " urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \
"urls.last_visit_time, urls.hidden " "urls.last_visit_time, urls.hidden "
} // history } // namespace history
#endif // CHROME_BROWSER_HISTORY_URL_DATABASE_H_ #endif // CHROME_BROWSER_HISTORY_URL_DATABASE_H_
...@@ -118,6 +118,39 @@ bool VisitDatabase::FillVisitVector(sql::Statement& statement, ...@@ -118,6 +118,39 @@ bool VisitDatabase::FillVisitVector(sql::Statement& statement,
return statement.Succeeded(); return statement.Succeeded();
} }
// static
bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement,
const QueryOptions& options,
VisitVector* visits) {
std::set<URLID> found_urls;
// Keeps track of the day that |found_urls| is holding the URLs for, in order
// to handle removing per-day duplicates.
base::Time found_urls_midnight;
while (statement.Step()) {
VisitRow visit;
FillVisitRow(statement, &visit);
if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) {
if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY &&
found_urls_midnight != visit.visit_time.LocalMidnight()) {
found_urls.clear();
found_urls_midnight = visit.visit_time.LocalMidnight();
}
// Make sure the URL this visit corresponds to is unique.
if (found_urls.find(visit.url_id) != found_urls.end())
continue;
found_urls.insert(visit.url_id);
}
if (static_cast<int>(visits->size()) >= options.EffectiveMaxCount())
return true;
visits->push_back(visit);
}
return false;
}
VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) { VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) {
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"INSERT INTO visits " "INSERT INTO visits "
...@@ -245,6 +278,31 @@ bool VisitDatabase::GetIndexedVisitsForURL(URLID url_id, VisitVector* visits) { ...@@ -245,6 +278,31 @@ bool VisitDatabase::GetIndexedVisitsForURL(URLID url_id, VisitVector* visits) {
return FillVisitVector(statement, visits); return FillVisitVector(statement, visits);
} }
bool VisitDatabase::GetVisitsForURLWithOptions(URLID url_id,
const QueryOptions& options,
VisitVector* visits) {
visits->clear();
if (options.REMOVE_ALL_DUPLICATES) {
VisitRow visit_row;
VisitID visit_id = GetMostRecentVisitForURL(url_id, &visit_row);
if (visit_id && options.EffectiveMaxCount() != 0) {
visits->push_back(visit_row);
}
return options.EffectiveMaxCount() == 0 && visit_id;
} else {
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT" HISTORY_VISIT_ROW_FIELDS
"FROM visits "
"WHERE url=? AND visit_time >= ? AND visit_time < ? "
"ORDER BY visit_time DESC"));
statement.BindInt64(0, url_id);
statement.BindInt64(1, options.EffectiveBeginTime());
statement.BindInt64(2, options.EffectiveEndTime());
return FillVisitVectorWithOptions(statement, options, visits);
}
}
bool VisitDatabase::GetVisitsForTimes(const std::vector<base::Time>& times, bool VisitDatabase::GetVisitsForTimes(const std::vector<base::Time>& times,
VisitVector* visits) { VisitVector* visits) {
...@@ -333,33 +391,7 @@ bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options, ...@@ -333,33 +391,7 @@ bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options,
statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME); statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED); statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED);
std::set<URLID> found_urls; return FillVisitVectorWithOptions(statement, options, visits);
// Keeps track of the day that |found_urls| is holding the URLs for, in order
// to handle removing per-day duplicates.
base::Time found_urls_midnight;
while (statement.Step()) {
VisitRow visit;
FillVisitRow(statement, &visit);
if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) {
if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY &&
found_urls_midnight != visit.visit_time.LocalMidnight()) {
found_urls.clear();
found_urls_midnight = visit.visit_time.LocalMidnight();
}
// Make sure the URL this visit corresponds to is unique.
if (found_urls.find(visit.url_id) != found_urls.end())
continue;
found_urls.insert(visit.url_id);
}
if (static_cast<int>(visits->size()) >= options.EffectiveMaxCount())
return true;
visits->push_back(visit);
}
return false;
} }
void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter, void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter,
......
...@@ -63,6 +63,15 @@ class VisitDatabase { ...@@ -63,6 +63,15 @@ class VisitDatabase {
// Returns true on success (although there may still be no matches). // Returns true on success (although there may still be no matches).
bool GetIndexedVisitsForURL(URLID url_id, VisitVector* visits); bool GetIndexedVisitsForURL(URLID url_id, VisitVector* visits);
// Fills in the given vector with the visits for the given page ID which
// match the set of options passed, sorted in ascending order of date.
//
// Returns true if there are more results available, i.e. if the number of
// results was restricted by |options.max_count|.
bool GetVisitsForURLWithOptions(URLID url_id,
const QueryOptions& options,
VisitVector* visits);
// Fills the vector with all visits with times in the given list. // Fills the vector with all visits with times in the given list.
// //
// The results will be in no particular order. Also, no duplicate // The results will be in no particular order. Also, no duplicate
...@@ -202,6 +211,14 @@ class VisitDatabase { ...@@ -202,6 +211,14 @@ class VisitDatabase {
// hasn't happened yet. // hasn't happened yet.
static bool FillVisitVector(sql::Statement& statement, VisitVector* visits); static bool FillVisitVector(sql::Statement& statement, VisitVector* visits);
// Convenience to fill a VisitVector while respecting the set of options.
// |statement| should order the query decending by visit_time to ensure
// correct duplicate management behavior. Assumes that statement.step()
// hasn't happened yet.
static bool FillVisitVectorWithOptions(sql::Statement& statement,
const QueryOptions& options,
VisitVector* visits);
// Called by the derived classes to migrate the older visits table which // Called by the derived classes to migrate the older visits table which
// don't have visit_duration column yet. // don't have visit_duration column yet.
bool MigrateVisitsWithoutDuration(); bool MigrateVisitsWithoutDuration();
...@@ -216,6 +233,6 @@ class VisitDatabase { ...@@ -216,6 +233,6 @@ class VisitDatabase {
" id,url,visit_time,from_visit,transition,segment_id,is_indexed," \ " id,url,visit_time,from_visit,transition,segment_id,is_indexed," \
"visit_duration " "visit_duration "
} // history } // namespace history
#endif // CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_ #endif // CHROME_BROWSER_HISTORY_VISIT_DATABASE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment