Commit 7f6c7602 authored by tby's avatar tby Committed by Commit Bot

[Suggested files] Change ranking algorithm.

During test review we've found a significant issue with the current
re-ranking algorithm for suggested files, which makes it easy for the
ranking to get into a permanently bad state and not show file results.

For now, I'm changing the feature back to a simpler version of the
ranking algorithm, entirely based on group scores.

I've had to delete quite a few unit tests for this change, which I'll
re-implement in a follow-up CL. I've tested the behaviour manually and
want to submit the core algorithm on its own to unblock the test
reviewer.

Bug: 1034842
Change-Id: I0f59241af6e5977f899f2be867d7ae0e4b9606ee
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2128032
Commit-Queue: Tony Yeoman <tby@chromium.org>
Reviewed-by: default avatarThanh Nguyen <thanhdng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#755235}
parent 9aca48ed
......@@ -21,61 +21,59 @@
namespace app_list {
namespace {
// Apps have a boost of 8.0 + app ranker score in range [0, 1],
// hence the range of scores is [8.0, 9.0].
constexpr double kScoreHi = 9.0;
constexpr double kScoreLo = 8.0;
// Returns whether the model should be trained on this type of data.
bool ShouldTrain(RankingItemType type) {
switch (type) {
case RankingItemType::kApp:
case RankingItemType::kChip:
case RankingItemType::kZeroStateFile:
case RankingItemType::kDriveQuickAccess:
return true;
default:
return false;
}
constexpr int kNumChips = 5;
constexpr char kApp[] = "app";
constexpr char kFile[] = "file";
// A small number that we expect to be smaller than the difference between the
// scores of any two results. This means it can be used to insert a result A
// between results B and C by setting A's score to B's score + kScoreEpsilon.
constexpr float kScoreEpsilon = 1e-5f;
void SortHighToLow(std::vector<Mixer::SortData*>* results) {
std::sort(results->begin(), results->end(),
[](const Mixer::SortData* const a, const Mixer::SortData* const b) {
return a->score > b->score;
});
}
double FetchScore(const std::map<std::string, float> ranks,
ChromeSearchResult* r) {
const auto it = ranks.find(NormalizeAppId(r->id()));
if (it != ranks.end())
float GetScore(const std::map<std::string, float>& scores,
const std::string& key) {
const auto it = scores.find(key);
// We expect to always find a score for |key| in |scores|, because the ranker
// is initialized with some default scores. However a state without scores is
// possible, eg. if the recurrence ranker file is corrupted. In this case,
// default a score to 1.
if (it == scores.end()) {
return 1.0f;
}
return it->second;
return 0.0;
}
int GetNextMatchingIndex(
Mixer::SortedResults* results,
const base::RepeatingCallback<bool(const ChromeSearchResult*)>&
result_filter,
int from_index) {
int i = from_index + 1;
while (i < static_cast<int>(results->size())) {
if (result_filter.Run((*results)[i].result)) {
return i;
}
++i;
}
return -1;
void InitializeRanker(RecurrenceRanker* ranker) {
// This initialization puts two files and three apps in the chips.
ranker->Record(kFile);
ranker->Record(kFile);
ranker->Record(kApp);
ranker->Record(kApp);
ranker->Record(kApp);
}
} // namespace
ChipRanker::ChipRanker(Profile* profile) : profile_(profile) {
DCHECK(profile);
// Set up ranker model.
RecurrenceRankerConfigProto config;
config.set_min_seconds_between_saves(240u);
config.set_condition_limit(1u);
config.set_condition_decay(0.6f);
config.set_target_limit(200);
config.set_target_decay(0.9f);
config.set_condition_decay(0.5f);
config.set_target_limit(5u);
config.set_target_decay(0.95f);
config.mutable_predictor()->mutable_default_predictor();
ranker_ = std::make_unique<RecurrenceRanker>(
type_ranker_ = std::make_unique<RecurrenceRanker>(
"", profile_->GetPath().AppendASCII("suggested_files_ranker.pb"), config,
chromeos::ProfileHelper::IsEphemeralUserProfile(profile_));
}
......@@ -83,89 +81,73 @@ ChipRanker::ChipRanker(Profile* profile) : profile_(profile) {
ChipRanker::~ChipRanker() = default;
void ChipRanker::Train(const AppLaunchData& app_launch_data) {
// ID normalisation will ensure that a file launched from the zero-state
// result list is counted as the same item as the same file launched from
// the suggestion chips.
if (ShouldTrain(app_launch_data.ranking_item_type)) {
ranker_->Record(NormalizeAppId(app_launch_data.id));
const auto type = app_launch_data.ranking_item_type;
if (type == RankingItemType::kApp) {
type_ranker_->Record(kApp);
} else if (type == RankingItemType::kChip ||
type == RankingItemType::kZeroStateFile ||
type == RankingItemType::kDriveQuickAccess) {
type_ranker_->Record(kFile);
}
}
void ChipRanker::Rank(Mixer::SortedResults* results) {
std::sort(results->begin(), results->end());
const auto app_chip_filter =
base::BindRepeating([](const ChromeSearchResult* r) -> bool {
return (r->display_type() == ash::SearchResultDisplayType::kTile ||
r->display_type() == ash::SearchResultDisplayType::kChip) &&
r->is_recommendation();
});
const auto file_chip_filter =
base::BindRepeating([](const ChromeSearchResult* r) -> bool {
return r->result_type() == ash::AppListSearchResultType::kFileChip ||
r->result_type() ==
ash::AppListSearchResultType::kDriveQuickAccessChip;
});
// Use filters to find first two app chips and first file chip
int app1 = GetNextMatchingIndex(results, app_chip_filter, -1);
int app2 = GetNextMatchingIndex(results, app_chip_filter, app1);
int file = GetNextMatchingIndex(results, file_chip_filter, -1);
int prev_file = -1;
// If we couldn't find any files or couldn't find two or more apps.
if (file < 0 || app1 < 0 || app2 < 0) {
return;
// Construct two lists of pointers, containing file chip and app results
// respectively, sorted in decreasing order of score.
std::vector<Mixer::SortData*> app_results;
std::vector<Mixer::SortData*> file_results;
for (auto& result : *results) {
if (RankingItemTypeFromSearchResult(*result.result) ==
RankingItemType::kApp) {
app_results.emplace_back(&result);
} else if (RankingItemTypeFromSearchResult(*result.result) ==
RankingItemType::kChip) {
file_results.emplace_back(&result);
}
// Fetch rankings from |ranker_|.
std::map<std::string, float> ranks = ranker_->Rank();
// Refer to class comment.
double app1_rescore = FetchScore(ranks, (*results)[app1].result);
double app2_rescore = FetchScore(ranks, (*results)[app2].result);
double file_rescore = 0.0;
double prev_file_rescore = kScoreHi;
double hi = 0.0;
double lo = 0.0;
while (file >= 0 && app1 >= 0) {
file_rescore = FetchScore(ranks, (*results)[file].result);
// File should sit above lowest of two app scores.
if (file_rescore > app2_rescore) {
// Find upper and lower bounds on score.
hi = prev_file > 0 ? (*results)[prev_file].score : kScoreHi;
lo = app2 > 0 ? (*results)[app2].score : kScoreLo;
if (prev_file_rescore > app1_rescore) {
if (file_rescore < app1_rescore)
hi = (*results)[app1].score;
else if (file_rescore > app1_rescore)
lo = (*results)[app1].score;
}
SortHighToLow(&app_results);
SortHighToLow(&file_results);
// Place new score at midpoint between hi and lo.
(*results)[file].score = lo + ((hi - lo) / 2);
// The chip ranker only has work to do if both apps and files are present.
if (app_results.empty() || file_results.empty())
return;
prev_file = file;
file = GetNextMatchingIndex(results, file_chip_filter, file);
prev_file_rescore = file_rescore;
// If this is the first initialization of the ranker, warm it up with some
// default scores for apps and files.
// TODO(crbug.com/921444): Getting the ranks here just to check if they're
// empty is inefficient. We should add a size() method to RecurrenceRanker and
// update this.
if (type_ranker_->Rank().empty()) {
InitializeRanker(type_ranker_.get());
}
} else {
// File should sit below both current app scores.
app1 = app2;
app1_rescore = app2_rescore;
app2 = GetNextMatchingIndex(results, app_chip_filter, app1);
app2_rescore =
app2 < 0 ? kScoreLo : FetchScore(ranks, (*results)[app2].result);
// Get the two type scores from the ranker.
const auto ranks = type_ranker_->Rank();
float app_score = GetScore(ranks, kApp);
float file_score = GetScore(ranks, kFile);
const float score_delta = (file_score + app_score) / kNumChips;
// Tweak file result scores to fit in with app scores. See header comment for
// ChipRanker::Rank for more details.
const int num_apps = static_cast<int>(app_results.size());
const int num_files = static_cast<int>(file_results.size());
int current_app = 0;
int current_file = 0;
for (int i = 0; i < kNumChips; ++i) {
if (app_score > file_score) {
app_score -= score_delta;
++current_app;
} else if (current_file < num_files && current_app < num_apps) {
file_results[current_file]->score =
app_results[current_app]->score + kScoreEpsilon;
++current_file;
file_score -= score_delta;
}
}
}
void ChipRanker::SetForTest(std::unique_ptr<RecurrenceRanker> ranker) {
ranker_ = std::move(ranker);
RecurrenceRanker* ChipRanker::GetRankerForTest() {
return type_ranker_.get();
}
} // namespace app_list
......@@ -8,6 +8,7 @@
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "chrome/browser/history/history_service_factory.h"
#include "chrome/browser/profiles/profile.h"
......@@ -21,22 +22,24 @@ namespace app_list {
// A ChipRanker provides a method for ranking suggestion chips in the Chrome OS
// Launcher. Given a list of SortedResults from the Mixer, the ChipRanker will
// rescore the chip items so that they are appropriately ranked, while
// preserving the original ordering of all groups of results.
// preserving the original ordering of all types of results.
//
// The ranking algorithm works as follows:
// - Start with sorting the results already scored from the Mixer
// - Take the top two app items, app1 and app2
// - For each chip in the SortedResults list:
// 1. Rank app1, app2 and chip using a Dolphin model
// 2. Adjust chip score to sit in the correct position
// relative to the two apps:
// - If chip should be first
// set chip score > app1 score
// - If chip should sit between
// set chip score > app2 score, < app1 score
// - If chip is ranked last
// take app2 and the next app item, app3, and continue
// with same file.
// To combine the two app and file items, a type score is stored for the two
// categories 'apps' and 'files', tracking the user's overall usage of those
// categories. This is updated when results are launched. To produce a combined
// list of apps and files, we do the following:
//
// - Make a copy of the type scores: app_score and file_score.
// - Calculate delta = (app_score + file_score) / number_of_chips
// - Until we have number_of_chips results:
// - Select the highest scoring unchosen app or file, depending on whether
// app_score > file_score.
// - Decrease the score of the selected type by delta.
//
// The types of the shown results reflect the proportion of the type scores and,
// as a type's score increases, its results appear closer to the front of the
// list. Note the implementation also handles the case of one type not having
// enough results.
class ChipRanker {
public:
explicit ChipRanker(Profile* profile);
......@@ -52,12 +55,11 @@ class ChipRanker {
// ranking algorithm detailed above.
void Rank(Mixer::SortedResults* results);
// Set a fake ranker for tests.
void SetForTest(std::unique_ptr<RecurrenceRanker> ranker);
// Get a pointer to the ranker for testing.
RecurrenceRanker* GetRankerForTest();
// Ranker generates scores used for re-arranging items, not
// raw result scores.
std::unique_ptr<RecurrenceRanker> ranker_;
// Stores scores tracking a user's overall usage of apps or files.
std::unique_ptr<RecurrenceRanker> type_ranker_;
private:
Profile* profile_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment