Commit 685e93d8 authored by Ce Chen's avatar Ce Chen Committed by Commit Bot

Add on device head serving component in omnibox, which will be used by on...

Add on device head serving component in omnibox, which will be used by on device head suggest provider later.

Tested on iPhone (https://photos.app.goo.gl/6T49WV6fxMcbP4Gy7; using https://crrev.com/c/1388125)

Bug: 925072
Change-Id: If5b0a3a3402ebd35158c3d319f406db720e42f26
Reviewed-on: https://chromium-review.googlesource.com/c/1452367
Commit-Queue: Ce Chen <cch@chromium.org>
Reviewed-by: default avatarJustin Donnelly <jdonnelly@chromium.org>
Cr-Commit-Position: refs/heads/master@{#631994}
parent 5f034182
......@@ -161,6 +161,8 @@ jumbo_static_library("browser") {
"omnibox_pref_names.h",
"omnibox_view.cc",
"omnibox_view.h",
"on_device_head_serving.cc",
"on_device_head_serving.h",
"scored_history_match.cc",
"scored_history_match.h",
"search_provider.cc",
......@@ -350,6 +352,7 @@ bundle_data("unit_tests_bundle_data") {
"//components/test/data/omnibox/Shortcuts.v0.sql",
"//components/test/data/omnibox/in_memory_url_index_test.sql",
"//components/test/data/omnibox/in_memory_url_index_test_limited.sql",
"//components/test/data/omnibox/on_device_head_test_model.bin",
]
outputs = [
"{{bundle_resources_dir}}/" +
......@@ -388,6 +391,7 @@ source_set("unit_tests") {
"omnibox_pedal_unittest.cc",
"omnibox_popup_model_unittest.cc",
"omnibox_view_unittest.cc",
"on_device_head_serving_unittest.cc",
"scored_history_match_unittest.cc",
"search_suggestion_parser_unittest.cc",
"shortcuts_backend_unittest.cc",
......
This diff is collapsed.
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OMNIBOX_BROWSER_ON_DEVICE_HEAD_SERVING_H_
#define COMPONENTS_OMNIBOX_BROWSER_ON_DEVICE_HEAD_SERVING_H_
#include <fstream>
#include <list>
#include <memory>
#include <string>
#include <utility>
#include <vector>
// On device head serving feature uses an on device model which encodes some
// top queries into a radix tree (https://en.wikipedia.org/wiki/Radix_tree), to
// help users quickly get head suggestions when they are under poor network
// condition. When serving, it performs a search in the tree similar as BFS but
// only keeping children with high scores, to find top N queries which match
// the given prefix.
//
// Each node in the tree is encoded using following format to optimize storage
// (see on_device_head_serving_unittest.cc for an example tree model):
// ------------------------------------------------------------------------
// | max_score_as_root | child_0 | child_1 | ... | child_n-1 | 0 (1 byte) |
// ------------------------------------------------------------------------
//
// Usage of each block in the node:
// 1) Block max_score_as_root at the beginning of each node contains the
// maximum leaf score can be found in its subtree, which is used for pruning
// during tree traversal to improve the search performance: for example,
// imagining we have already visited some nodes, sorted them based on their
// scores and saved some of them in a structure; now we meet a node with higher
// max_score_as_root, since we know we should only show users top N suggestions
// with highest scores, we can quickly determine whether we can discard some
// node with lower max_score_as_root without physically visiting any of its
// children, as none of the children has a score higher than this low
// max_score_as_root.
// This block has following format:
// --------------------------------------
// | 1 (1 bit) | score_max | leaf_score |
// --------------------------------------
// OR
// -------------------------
// | 0 (1 bit) | score_max |
/// -------------------------
// 1-bit indicator: whether there is a leaf_score at the end of this block.
// score_max: the maximum leaf_score can be found if using current node as
// root.
// leaf_score: only exists when indicator is 1; it is the score of some
// complete suggestion ends at current node.
//
// 2) Block child_i (0 <= i <= n-1) has following format:
// -------------------------------------------------------------
// | length of text (1 byte) | text | 1 | address of next node |
// -------------------------------------------------------------
// OR
// ---------------------------------------------------
// | length of text (1 byte) | text | 0 | leaf_score |
// ---------------------------------------------------
// We use 1 bit after text field as an indicator to determine whether this child
// is an intermediate node or leaf node. If it is a leaf node, the sequence of
// texts visited so far from the start node to here can be returned as a valid
// suggestion to users with leaf_score.
//
// The size of score and address will be given in the first two bytes of the
// model file.
class OnDeviceHeadServing {
public:
// Creates and returns an instance for serving on device head model.
static std::unique_ptr<OnDeviceHeadServing> Create(
const std::string& model_filename,
int max_num_matches_to_return);
void set_max_num_matches_to_return(uint32_t max_num_matches_to_return) {
max_num_matches_to_return_ = max_num_matches_to_return;
}
uint32_t max_num_matches_to_return() const {
return max_num_matches_to_return_;
}
uint32_t num_bytes_of_score() const { return score_size_; }
uint32_t num_bytes_of_address() const { return address_size_; }
// Gets top "max_num_matches_to_return" suggestions and their scores which
// matches given prefix.
std::vector<std::pair<std::string, uint32_t>> GetSuggestionsForPrefix(
const std::string& prefix);
~OnDeviceHeadServing();
private:
OnDeviceHeadServing(const std::string& model_filename,
uint32_t max_num_matches_to_return);
// A useful data structure to keep track of the tree nodes should be and have
// been visited during tree traversal.
struct MatchCandidate {
// The sequences of characters from the start node to current node.
std::string text;
// Whether the text above can be returned as a suggestion; if false it is
// the prefix of some other complete suggestion.
bool is_complete_suggestion;
// If is_complete_suggestion is true, this is the score for the suggestion;
// Otherwise it will be set as max_score_as_root of the node.
uint32_t score;
// The address of the node in the model file. It is not required if
// is_complete_suggestion is true.
uint32_t address;
};
// Doubly linked list structure, which will be sorted based on candidates'
// scores (from low to high), to track nodes during tree search. We use two of
// this list to keep max_num_matches_to_return_ nodes in total with
// highest score during the search, and prune children and branches with low
// score.
// In theory, using RBTree might give a better search performance
// (i.e. log(n)) compared with linear from linked list here when inserting
// new candidates with high score into the struct, but since n is usually
// small, using linked list shall be okay.
using CandidateQueue = std::list<MatchCandidate>;
void InsertCandidateToQueue(const MatchCandidate& candidate,
CandidateQueue* leaf_queue,
CandidateQueue* non_leaf_queue);
uint32_t GetMinScoreFromQueues(const CandidateQueue& queue_1,
const CandidateQueue& queue_2);
// Finds start node which matches given prefix, returns true if found and
// the start node using param match_candidate.
bool FindStartNode(const std::string& prefix,
MatchCandidate* match_candidate);
// Reads tree node from given match candidate, convert all possible
// suggestions and children of this node into structure MatchCandidate.
std::vector<MatchCandidate> ReadTreeNode(const MatchCandidate& current);
// Reads block max_score_as_root at the beginning of the node from the given
// address. If there is a leaf score at the end of the block, return the leaf
// score using param leaf_candidate;
uint32_t ReadMaxScoreAsRoot(uint32_t address,
MatchCandidate* leaf_candidate,
bool* is_successful);
// Reads a child block and move ifstream cursor to next child; returns false
// when reaching the end of the node or ifstream read error happens.
bool ReadNextChild(MatchCandidate* candidate);
// Performs a search starting from the address specified by start_match and
// returns max_num_matches_to_return_ number of complete suggestions with
// highest scores.
std::vector<std::pair<std::string, uint32_t>> DoSearch(
const MatchCandidate& start_match);
// Reads next num_bytes from the file stream.
bool ReadNextNumBytes(uint32_t num_bytes, char* buf);
uint32_t ReadNextNumBytesAsInt(uint32_t num_bytes, bool* is_successful);
// Checks if size of score and size of address read from the model file are
// valid.
// For score, we use size of 2 bytes (15 bits), 3 bytes (23 bits) or 4 bytes
// (31 bits); For address, we use size of 3 bytes (23 bits) or 4 bytes
// (31 bits).
bool AreSizesValid();
bool OpenModelFileStream(const uint32_t start_address);
void MaybeCloseModelFileStream();
std::string model_filename_;
std::ifstream model_filestream_;
uint32_t score_size_;
uint32_t address_size_;
uint32_t max_num_matches_to_return_;
};
#endif // COMPONENTS_OMNIBOX_BROWSER_ON_DEVICE_HEAD_SERVING_H_
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/on_device_head_serving.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/path_service.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
using testing::ElementsAre;
using testing::Pair;
namespace {
// The test head model used for unittests contains 14 queries and their scores
// shown below; the test model uses 3-bytes address and 2-bytes score so the
// highest score is 32767:
// ----------------------
// Query Score
// ----------------------
// g 32767
// gmail 32766
// google maps 32765
// google 32764
// get out 32763
// googler 32762
// gamestop 32761
// maps 32761
// mail 32760
// map 32759
// 谷歌 32759
// ガツガツしてる人 32759
// 비데 두꺼비 32759
// переводчик 32759
// ----------------------
// The tree structure for queries above is similar as this:
// [ g | ma | 谷歌 | ガツガツしてる人| 비데 두꺼비 | переводчик ]
// | |
// | [ p | il ]
// | |
// | [ # | s ]
// |
// [ # | oogle | mail | et out | amestop ]
// |
// [ # | _maps | er ]
base::FilePath GetTestModelPath() {
base::FilePath file_path;
base::PathService::Get(base::DIR_SOURCE_ROOT, &file_path);
file_path = file_path.AppendASCII(
"components/test/data/omnibox/on_device_head_test_model.bin");
return file_path;
}
} // namespace
class OnDeviceHeadServingTest : public testing::Test {
protected:
void SetUp() override {
base::FilePath file_path = GetTestModelPath();
ASSERT_TRUE(base::PathExists(file_path));
#if defined(OS_WIN)
serving_ =
OnDeviceHeadServing::Create(base::WideToUTF8(file_path.value()), 4);
#else
serving_ = OnDeviceHeadServing::Create(file_path.value(), 4);
#endif
ASSERT_TRUE(serving_);
}
void TearDown() override { serving_.reset(); }
std::unique_ptr<OnDeviceHeadServing> serving_;
};
TEST_F(OnDeviceHeadServingTest, SizeOfScoreAndAddress) {
EXPECT_EQ((int)serving_->num_bytes_of_score(), 2);
EXPECT_EQ((int)serving_->num_bytes_of_address(), 3);
}
TEST_F(OnDeviceHeadServingTest, GetSuggestions) {
auto suggestions = serving_->GetSuggestionsForPrefix("go");
EXPECT_THAT(suggestions,
ElementsAre(Pair("google maps", 32765), Pair("google", 32764),
Pair("googler", 32762)));
suggestions = serving_->GetSuggestionsForPrefix("ge");
EXPECT_THAT(suggestions, ElementsAre(Pair("get out", 32763)));
suggestions = serving_->GetSuggestionsForPrefix("ga");
EXPECT_THAT(suggestions, ElementsAre(Pair("gamestop", 32761)));
}
TEST_F(OnDeviceHeadServingTest, NoMatch) {
auto suggestions = serving_->GetSuggestionsForPrefix("x");
EXPECT_TRUE(suggestions.empty());
}
TEST_F(OnDeviceHeadServingTest, MatchTheEndOfSuggestion) {
auto suggestions = serving_->GetSuggestionsForPrefix("ap");
EXPECT_TRUE(suggestions.empty());
}
TEST_F(OnDeviceHeadServingTest, MatchAtTheMiddleOfSuggestion) {
auto suggestions = serving_->GetSuggestionsForPrefix("st");
EXPECT_TRUE(suggestions.empty());
}
TEST_F(OnDeviceHeadServingTest, EmptyInput) {
auto suggestions = serving_->GetSuggestionsForPrefix("");
EXPECT_TRUE(suggestions.empty());
}
TEST_F(OnDeviceHeadServingTest, SetMaxSuggestionsToReturn) {
serving_->set_max_num_matches_to_return(5);
auto suggestions = serving_->GetSuggestionsForPrefix("g");
EXPECT_THAT(suggestions,
ElementsAre(Pair("g", 32767), Pair("gmail", 32766),
Pair("google maps", 32765), Pair("google", 32764),
Pair("get out", 32763)));
serving_->set_max_num_matches_to_return(2);
suggestions = serving_->GetSuggestionsForPrefix("ma");
EXPECT_THAT(suggestions,
ElementsAre(Pair("maps", 32761), Pair("mail", 32760)));
}
TEST_F(OnDeviceHeadServingTest, NonEnglishLanguage) {
// Chinese.
auto suggestions = serving_->GetSuggestionsForPrefix("谷");
EXPECT_THAT(suggestions, ElementsAre(Pair("谷歌", 32759)));
// Japanese.
suggestions = serving_->GetSuggestionsForPrefix("ガツガツ");
EXPECT_THAT(suggestions, ElementsAre(Pair("ガツガツしてる人", 32759)));
// Korean.
suggestions = serving_->GetSuggestionsForPrefix("비데 ");
EXPECT_THAT(suggestions, ElementsAre(Pair("비데 두꺼비", 32759)));
// Russian.
suggestions = serving_->GetSuggestionsForPrefix("пере");
EXPECT_THAT(suggestions, ElementsAre(Pair("переводчик", 32759)));
}
This diff was suppressed by a .gitattributes entry.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment