Commit 5e86f89f authored by Charles Zhao's avatar Charles Zhao Committed by Commit Bot

TabRanker: Change to a new model trained on discarded data only.

Changes made in this CL:

(1) new preprocessor_config.pb and new native_inference weights.

(2) the native_inference.cc logic is also changed a bit because new
    quantized model does not have dequantization any more.

(3) only kept 'score tests' to confirm this model returns same result
    as the model trained in google3. Other 'expectation tests' are all
    removed, because we have to fix them for every new model. Instead
    we do a full feature analysis before submit the model to verify
    that.

Bug: 902196

Change-Id: I3dd4382f750b9216369c7de14edd9c692cf6db71
Reviewed-on: https://chromium-review.googlesource.com/c/1319209
Commit-Queue: Charles . <charleszhao@chromium.org>
Reviewed-by: default avatarMichael Giuffrida <michaelpg@chromium.org>
Cr-Commit-Position: refs/heads/master@{#608243}
parent c91e8fca
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
namespace tab_ranker { namespace tab_ranker {
namespace tfnative_model { namespace tfnative_model {
constexpr int DNN_WEIGHTS_SIZE = 29160;
constexpr int DNN_RANK = 2; constexpr int DNN_RANK = 2;
constexpr int FEATURES_SIZE = 729; constexpr int FEATURES_SIZE = 281;
constexpr int DNN_BIASES_SIZE = 40; constexpr int DNN_BIASES_SIZE = 40;
constexpr int DNN_WEIGHTS_SIZE = FEATURES_SIZE * DNN_BIASES_SIZE;
struct alignas(16) FixedAllocations { struct alignas(16) FixedAllocations {
float alloc0[DNN_WEIGHTS_SIZE]; float alloc0[DNN_BIASES_SIZE];
int32_t alloc0_shape[DNN_RANK]; int32_t alloc0_shape[DNN_RANK];
float alloc1[DNN_BIASES_SIZE]; float alloc1[DNN_BIASES_SIZE];
int32_t alloc1_shape[DNN_RANK]; int32_t alloc1_shape[DNN_RANK];
......
...@@ -56,6 +56,7 @@ void PopulateRankerExample(assist_ranker::RankerExample* example, ...@@ -56,6 +56,7 @@ void PopulateRankerExample(assist_ranker::RankerExample* example,
features["HasBeforeUnloadHandler"].set_bool_value( features["HasBeforeUnloadHandler"].set_bool_value(
tab.has_before_unload_handler); tab.has_before_unload_handler);
features["HasFormEntry"].set_bool_value(tab.has_form_entry); features["HasFormEntry"].set_bool_value(tab.has_form_entry);
features["IsActive"].set_bool_value(window.is_active);
features["IsPinned"].set_bool_value(tab.is_pinned); features["IsPinned"].set_bool_value(tab.is_pinned);
features["KeyEventCount"].set_int32_value(tab.key_event_count); features["KeyEventCount"].set_int32_value(tab.key_event_count);
features["MRUIndex"].set_int32_value(mru.index); features["MRUIndex"].set_int32_value(mru.index);
...@@ -79,6 +80,7 @@ void PopulateRankerExample(assist_ranker::RankerExample* example, ...@@ -79,6 +80,7 @@ void PopulateRankerExample(assist_ranker::RankerExample* example,
features["SiteEngagementScore"].set_int32_value( features["SiteEngagementScore"].set_int32_value(
tab.site_engagement_score.value()); tab.site_engagement_score.value());
} }
features["ShowState"].set_int32_value(window.show_state);
features["TabCount"].set_int32_value(window.tab_count); features["TabCount"].set_int32_value(window.tab_count);
features["TimeFromBackgrounded"].set_int32_value(tab.time_from_backgrounded); features["TimeFromBackgrounded"].set_int32_value(tab.time_from_backgrounded);
features["TopDomain"].set_string_value( features["TopDomain"].set_string_value(
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <memory> #include <memory>
#include "base/time/time.h"
#include "chrome/browser/resource_coordinator/tab_ranker/mru_features.h" #include "chrome/browser/resource_coordinator/tab_ranker/mru_features.h"
#include "chrome/browser/resource_coordinator/tab_ranker/tab_features.h" #include "chrome/browser/resource_coordinator/tab_ranker/tab_features.h"
#include "chrome/browser/resource_coordinator/tab_ranker/window_features.h" #include "chrome/browser/resource_coordinator/tab_ranker/window_features.h"
...@@ -17,72 +16,18 @@ ...@@ -17,72 +16,18 @@
namespace tab_ranker { namespace tab_ranker {
namespace { namespace {
// A URL whose host is not one of the top sites in the model.
const char* kUnseenHost = "www.example.com";
// Returns a set of features for a fairly typcial tab. If |user_activity| is
// true, the features will reflect some amount of user activity, e.g.
// navigations and input events.
// The TabFeatures returned can be used as a baseline for testing how changing a
// feature affects the score, but this is an imperfect testing method because
// features are inter-dependent in our model.
TabFeatures GetTabFeatures(std::string host, bool user_activity = false) {
// Assign typical/reasonable values for background tabs.
TabFeatures tab;
tab.has_before_unload_handler = true;
tab.has_form_entry = false;
tab.host = host;
tab.is_pinned = false;
tab.key_event_count = user_activity ? 20 : 0;
tab.mouse_event_count = user_activity ? 20 : 0;
tab.navigation_entry_count = user_activity ? 4 : 1;
tab.num_reactivations = user_activity ? 1 : 0;
tab.page_transition_core_type = ui::PAGE_TRANSITION_LINK;
tab.page_transition_from_address_bar = true;
tab.page_transition_is_redirect = false;
tab.site_engagement_score = user_activity ? 20 : 0;
tab.time_from_backgrounded =
base::TimeDelta::FromMinutes(10).InMilliseconds();
// Even tabs with activity usually have 0 touch events.
tab.touch_event_count = 0;
tab.was_recently_audible = false;
return tab;
}
// Returns a fairly typical set of window features. // Returns a fairly typical set of window features.
WindowFeatures GetWindowFeatures() { WindowFeatures GetWindowFeatures() {
WindowFeatures window(SessionID::NewUnique(), WindowFeatures window(SessionID::NewUnique(),
metrics::WindowMetricsEvent::TYPE_TABBED); metrics::WindowMetricsEvent::TYPE_TABBED);
window.tab_count = 3; window.tab_count = 3;
window.is_active = true;
window.show_state = metrics::WindowMetricsEvent::SHOW_STATE_NORMAL;
return window; return window;
} }
// Returns some MRU features. // These tests verify that the example_preprocessor_config.pb and
// // TODO: find actual nums // native_inference.h code together generates correct scores.
MRUFeatures GetMRUFeatures() {
MRUFeatures mru;
mru.index = 3;
mru.total = 6;
return mru;
}
// These tests try to sanity-check the model by seeing how changing one feature
// impacts the score while everything else remains constant.
// For example, it seems reasonable to expect that a pinned tab would have a
// higher score than an unpinned tab, all else being equal.
//
// This won't always be the case, so these tests will be fragile, but *only*
// when updating the model; if the tests fail without changing the model,
// then the input to the model must have changed, which is probably a
// regression.
//
// If several tests fail locally after updating the model, check whether the
// features are being logged and set correctly, and think about why the model
// might be making counter-intuitive predictions. If everything checks out, try
// to find more realistic tab examples where the feature behaves as expected.
// If that fails, just remove the test -- the point is to avoid accidental model
// regressions, not to force fit a model to these expectations.
class TabScorePredictorTest : public testing::Test { class TabScorePredictorTest : public testing::Test {
public: public:
TabScorePredictorTest() = default; TabScorePredictorTest() = default;
...@@ -91,8 +36,8 @@ class TabScorePredictorTest : public testing::Test { ...@@ -91,8 +36,8 @@ class TabScorePredictorTest : public testing::Test {
protected: protected:
// Returns a prediction for the tab example. // Returns a prediction for the tab example.
float ScoreTab(const TabFeatures& tab, float ScoreTab(const TabFeatures& tab,
const WindowFeatures& window = GetWindowFeatures(), const WindowFeatures& window,
const MRUFeatures& mru = GetMRUFeatures()) { const MRUFeatures& mru) {
float score = 0; float score = 0;
EXPECT_EQ(TabRankerResult::kSuccess, EXPECT_EQ(TabRankerResult::kSuccess,
tab_score_predictor_.ScoreTab(tab, window, mru, &score)); tab_score_predictor_.ScoreTab(tab, window, mru, &score));
...@@ -135,7 +80,7 @@ TEST_F(TabScorePredictorTest, KnownScore) { ...@@ -135,7 +80,7 @@ TEST_F(TabScorePredictorTest, KnownScore) {
window.tab_count = 27; window.tab_count = 27;
// Pre-calculated score using the generated model outside of Chrome. // Pre-calculated score using the generated model outside of Chrome.
EXPECT_FLOAT_EQ(8.0287771, ScoreTab(tab, window, mru)); EXPECT_FLOAT_EQ(1.8338085, ScoreTab(tab, window, mru));
} }
// Checks the score for a different example that we have calculated a known // Checks the score for a different example that we have calculated a known
...@@ -164,107 +109,7 @@ TEST_F(TabScorePredictorTest, KnownScoreMissingOptionalFeatures) { ...@@ -164,107 +109,7 @@ TEST_F(TabScorePredictorTest, KnownScoreMissingOptionalFeatures) {
window.tab_count = 127; window.tab_count = 127;
// Pre-calculated score using the generated model outside of Chrome. // Pre-calculated score using the generated model outside of Chrome.
EXPECT_FLOAT_EQ(10.577342, ScoreTab(tab, window, mru)); EXPECT_FLOAT_EQ(8.7163248, ScoreTab(tab, window, mru));
}
TEST_F(TabScorePredictorTest, InactiveDuration) {
// A tab that has been in the background for a much shorter time is more
// likely to be reactivated.
TabFeatures shorter_example = GetTabFeatures(kUnseenHost);
TabFeatures longer_example = GetTabFeatures(kUnseenHost);
shorter_example.time_from_backgrounded =
base::TimeDelta::FromMinutes(1).InMilliseconds();
longer_example.time_from_backgrounded =
base::TimeDelta::FromMinutes(1000).InMilliseconds();
ASSERT_GT(ScoreTab(shorter_example), ScoreTab(longer_example));
}
TEST_F(TabScorePredictorTest, NavigationEntryCount) {
TabFeatures example = GetTabFeatures(kUnseenHost, true /*user_activity*/);
float default_score = ScoreTab(example);
// A tab with more navigations is more likely to be reactivated.
example.navigation_entry_count = 10;
float navigated_score = ScoreTab(example);
ASSERT_GT(navigated_score, default_score);
}
TEST_F(TabScorePredictorTest, NumReactivationBefore) {
TabFeatures example = GetTabFeatures(kUnseenHost, true /*user_activity*/);
example.num_reactivations = 0;
float no_reactivations_score = ScoreTab(example);
// A tab with reactivations is more likely to be reactivated than one without.
example.num_reactivations = 10;
float reactivations_score = ScoreTab(example);
ASSERT_GT(reactivations_score, no_reactivations_score);
// A tab with more reactivations is more likely to be reactivated.
example.num_reactivations = 20;
float more_reactivations_score = ScoreTab(example);
ASSERT_GT(more_reactivations_score, reactivations_score);
}
TEST_F(TabScorePredictorTest, PageTransitionTypes) {
TabFeatures example = GetTabFeatures(kUnseenHost, true /*user_activity*/);
example.page_transition_core_type = ui::PAGE_TRANSITION_LINK;
float link_score = ScoreTab(example);
example.page_transition_core_type = ui::PAGE_TRANSITION_RELOAD;
float reload_score = ScoreTab(example);
// A tab the user manually reloaded is more likely to be reactivated.
ASSERT_GT(reload_score, link_score);
}
TEST_F(TabScorePredictorTest, SiteEngagementScore) {
TabFeatures example = GetTabFeatures(kUnseenHost, true /*user_activity*/);
example.site_engagement_score.reset();
float engagement_score_0 = ScoreTab(example);
// A site with low engagement ranks higher than one with no engagement.
example.site_engagement_score = 10;
float engagement_score_low = ScoreTab(example);
ASSERT_GT(engagement_score_low, engagement_score_0);
// A site with moderate engagement ranks higher than one with low engagement.
example.site_engagement_score = 50;
float engagement_score_medium = ScoreTab(example);
ASSERT_GT(engagement_score_medium, engagement_score_low);
example.site_engagement_score = 80;
float engagement_score_high = ScoreTab(example);
ASSERT_GT(engagement_score_high, engagement_score_medium);
}
TEST_F(TabScorePredictorTest, TopURLHigherScore) {
// mail.google.com is more likely to be reactivated (ie, the user is more
// likely to return to a mail tab than an ordinary tab).
TabFeatures unseen_example = GetTabFeatures(kUnseenHost, true
/*user_activity*/);
TabFeatures higher_example = GetTabFeatures("mail.google.com", true
/*user_activity*/);
ASSERT_GT(ScoreTab(higher_example), ScoreTab(unseen_example));
}
TEST_F(TabScorePredictorTest, TopURLLowerScore) {
// Expect www.google.com tabs to be less likely to be reactivated.
TabFeatures unseen_example =
GetTabFeatures(kUnseenHost, true /*user_activity*/);
TabFeatures lower_example =
GetTabFeatures("www.google.com", true /*user_activity*/);
ASSERT_LT(ScoreTab(lower_example), ScoreTab(unseen_example));
}
TEST_F(TabScorePredictorTest, WasRecentlyAudible) {
TabFeatures example = GetTabFeatures(kUnseenHost, true /*user_activity*/);
float default_score = ScoreTab(example);
// A recently audible tab is more likely to be reactivated.
example.was_recently_audible = true;
float recently_audible_score = ScoreTab(example);
ASSERT_GT(recently_audible_score, default_score);
} }
} // namespace tab_ranker } // namespace tab_ranker
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment