tab_ranker: add ScoreTabsWithPairwiseScorer

(1) If ScoreTabs is called with type_ == KPairwiseScorer, then more sophisticated ScoreTabsWithPairwiseScorer is used for scoring. (2) The ScoreTabsWithPairwiseScorer puts all tabs without TabFeatures first, and then sort the result with MRU. After that, best candidate for each position j is picked one by one (3) In unit tests, we directly call ScoreTabsWithPairwiseScorer with type_ = kFrecencyScorer, so that the tests can be independent with the ML model. (This won't happen from public interface in real applications.) Bug: 999908 Change-Id: I85302cda11fc0fc754f5402fdacd1e3796a41b9e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1788774Reviewed-by: Charles . <charleszhao@chromium.org> Reviewed-by: Tony Yeoman <tby@chromium.org> Commit-Queue: Charles . <charleszhao@chromium.org> Auto-Submit: Charles . <charleszhao@chromium.org> Cr-Commit-Position: refs/heads/master@{#695051}

tab_ranker: add ScoreTabsWithPairwiseScorer
(1) If ScoreTabs is called with type_ == KPairwiseScorer, then more sophisticated ScoreTabsWithPairwiseScorer is used for scoring. (2) The ScoreTabsWithPairwiseScorer puts all tabs without TabFeatures first, and then sort the result with MRU. After that, best candidate for each position j is picked one by one (3) In unit tests, we directly call ScoreTabsWithPairwiseScorer with type_ = kFrecencyScorer, so that the tests can be independent with the ML model. (This won't happen from public interface in real applications.) Bug: 999908 Change-Id: I85302cda11fc0fc754f5402fdacd1e3796a41b9e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1788774Reviewed-by: Charles . <charleszhao@chromium.org> Reviewed-by: Tony Yeoman <tby@chromium.org> Commit-Queue: Charles . <charleszhao@chromium.org> Auto-Submit: Charles . <charleszhao@chromium.org> Cr-Commit-Position: refs/heads/master@{#695051}
a398fb15 · Charles Zhao · Commit Bot · bea4e814 · a398fb15 · a398fb15
Commit a398fb15 authored Sep 10, 2019 by Charles Zhao Committed by Commit Bot Sep 10, 2019
3 changed files
--- a/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor.cc
+++ b/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor.cc
@@ -88,8 +88,8 @@ TabRankerResult TabScorePredictor::ScoreTab(const TabFeatures& tab,
    result = ScoreTabWithMRUScorer(tab, score);
  } else if (type_ == kMLScorer) {
    result = ScoreTabWithMLScorer(tab, score);
-  } else if (type_ == KPairwiseScorer) {
-    result = ScoreTabsWithPairwiseScorer(tab, TabFeatures(), score);
+  } else if (type_ == kPairwiseScorer) {
+    result = ScoreTabsPairs(tab, TabFeatures(), score);
  } else if (type_ == kFrecencyScorer) {
    result = ScoreTabWithFrecencyScorer(tab, score);
  } else {
@@ -108,17 +108,21 @@ TabRankerResult TabScorePredictor::ScoreTab(const TabFeatures& tab,

 std::map<int32_t, float> TabScorePredictor::ScoreTabs(
    const std::map<int32_t, base::Optional<TabFeatures>>& tabs) {
-  std::map<int32_t, float> reactivation_scores;
-  for (const auto& pair : tabs) {
-    float score = 0.0f;
-    if (pair.second &&
-        (ScoreTab(pair.second.value(), &score) == TabRankerResult::kSuccess)) {
-      reactivation_scores[pair.first] = score;
-    } else {
-      reactivation_scores[pair.first] = std::numeric_limits<float>::max();
+  if (type_ != kPairwiseScorer) {
+    std::map<int32_t, float> reactivation_scores;
+    for (const auto& pair : tabs) {
+      float score = 0.0f;
+      if (pair.second && (ScoreTab(pair.second.value(), &score) ==
+                          TabRankerResult::kSuccess)) {
+        reactivation_scores[pair.first] = score;
+      } else {
+        reactivation_scores[pair.first] = std::numeric_limits<float>::max();
+      }
    }
+    return reactivation_scores;
+  } else {
+    return ScoreTabsWithPairwiseScorer(tabs);
  }
-  return reactivation_scores;
 }

 TabRankerResult TabScorePredictor::ScoreTabWithMLScorer(const TabFeatures& tab,
@@ -159,7 +163,7 @@ TabRankerResult TabScorePredictor::PredictWithPreprocess(
  if (type_ == kMLScorer)
    tfnative_model::Inference(vectorized_features.data(), score,
                              tfnative_alloc_.get());
-  if (type_ == KPairwiseScorer)
+  if (type_ == kPairwiseScorer)
    pairwise_model::Inference(vectorized_features.data(), score,
                              pairwise_alloc_.get());

@@ -179,32 +183,121 @@ TabRankerResult TabScorePredictor::ScoreTabWithMRUScorer(const TabFeatures& tab,
  return TabRankerResult::kSuccess;
 }

-TabRankerResult TabScorePredictor::ScoreTabsWithPairwiseScorer(
-    const TabFeatures& tab1,
-    const TabFeatures& tab2,
-    float* score) {
-  // Lazy-load the preprocessor config.
-  LazyInitialize();
-  if (!preprocessor_config_ || !pairwise_alloc_) {
-    return TabRankerResult::kPreprocessorInitializationFailed;
+TabRankerResult TabScorePredictor::ScoreTabsPairs(const TabFeatures& tab1,
+                                                  const TabFeatures& tab2,
+                                                  float* score) {
+  if (type_ == kPairwiseScorer) {
+    // Lazy-load the preprocessor config.
+    LazyInitialize();
+    if (!preprocessor_config_ || !pairwise_alloc_) {
+      return TabRankerResult::kPreprocessorInitializationFailed;
+    }
+
+    // Build the RankerExamples using the tab's features.
+    assist_ranker::RankerExample example1, example2;
+    PopulateTabFeaturesToRankerExample(tab1, &example1);
+    PopulateTabFeaturesToRankerExample(tab2, &example2);
+
+    // Merge features from example2 to example1.
+    auto& features = *example1.mutable_features();
+    for (const auto& feature : example2.features()) {
+      const std::string new_name = base::StrCat({feature.first, "_1"});
+      features[new_name] = feature.second;
+    }
+
+    // Inference on example1.
+    return PredictWithPreprocess(&example1, score);
+  } else {
+    // For non-pairwise scorer, we simply calculate the score of each tab and
+    // return the difference.
+    float score1, score2;
+    const TabRankerResult result1 = ScoreTab(tab1, &score1);
+    const TabRankerResult result2 = ScoreTab(tab2, &score2);
+    *score = score1 - score2;
+    return std::max(result1, result2);
  }
+}

-  // Build the RankerExamples using the tab's features.
-  assist_ranker::RankerExample example1, example2;
-  PopulateTabFeaturesToRankerExample(tab1, &example1);
-  PopulateTabFeaturesToRankerExample(tab2, &example2);
+std::map<int32_t, float> TabScorePredictor::ScoreTabsWithPairwiseScorer(
+    const std::map<int32_t, base::Optional<TabFeatures>>& tabs) {
+  const int N = tabs.size();

-  // Merge features from example2 to example1.
-  auto& features = *example1.mutable_features();
-  for (const auto& feature : example2.features()) {
-    const std::string new_name = base::StrCat({feature.first, "_1"});
-    features[new_name] = feature.second;
+  std::vector<int32_t> ids;
+  for (const auto& pair : tabs) {
+    ids.push_back(pair.first);
  }

-  // Inference on example1.
-  return PredictWithPreprocess(&example1, score);
-}
+  // Sort ids by MRU first.
+  // Put the tabs without TabFeatures in front so that they won't be discarded
+  // mistakenly (including current Foregrounded tab).
+  std::sort(ids.begin(), ids.end(),
+            [&tabs](const int32_t id1, const int32_t id2) {
+              const auto& tab1 = tabs.at(id1);
+              const auto& tab2 = tabs.at(id2);
+              if (!tab1)
+                return true;
+              if (!tab2)
+                return false;
+              return tab1->mru_index < tab2->mru_index;
+            });
+
+  std::map<int32_t, float> reactivation_scores;
+
+  // start_index is the first one that has tab_features.
+  int start_index = 0;
+  for (int i = 0; i < N; ++i) {
+    if (!tabs.at(ids[i])) {
+      reactivation_scores[ids[i]] = N - i;
+      start_index = i + 1;
+    } else {
+      break;
+    }
+  }

+  // winning_indices records what's the best tab to be put at pos i.
+  std::vector<int> winning_indices;
+  for (int i = 0; i < N; ++i)
+    winning_indices.push_back(i);
+
+  int winning_index = N - 1;
+  int swapped_index = N - 1;
+  for (int j = start_index; j < N; ++j) {
+    // Find the best candidate at j.
+
+    // swapped_index < N - 1 means that one element has
+    // just been swapped to swapped_index, we should re-calculate
+    // winning_indices from swapped_index to j;
+    if (swapped_index < N - 1) {
+      // Set winning_index as the winning_indices at swapped_index + 1, since
+      // ids from ids.back() to ids[swapped_index + 1] are not
+      // changed.
+      winning_index = winning_indices[swapped_index + 1];
+    }
+
+    for (int i = swapped_index; i >= j; --i) {
+      // Compare ids[i] with ids[winning_index]; inference score > 0 means
+      // that ids[i] is more likely to be reactivated, so we should prefer
+      // ids[i] as new winning_index.
+      float score = 0.0f;
+      const TabRankerResult result = ScoreTabsPairs(
+          tabs.at(ids[i]).value(), tabs.at(ids[winning_index]).value(), &score);
+      if (result == TabRankerResult::kSuccess && score > 0.0f) {
+        winning_index = i;
+      }
+
+      // Always update winning_indices.
+      winning_indices[i] = winning_index;
+    }
+
+    // swap winning_index with j;
+    std::swap(ids[winning_index], ids[j]);
+    swapped_index = winning_index;
+
+    // Find the best candidate for position j, set the score for ids[j].
+    reactivation_scores[ids[j]] = N - j;
+  }
+  return reactivation_scores;
+}
 void TabScorePredictor::LazyInitialize() {
  // Load correct config and alloc based on type_.
  if (type_ == kMLScorer) {
@@ -218,7 +311,7 @@ void TabScorePredictor::LazyInitialize() {
              static_cast<std::size_t>(tfnative_model::FEATURES_SIZE));
  }

-  if (type_ == KPairwiseScorer) {
+  if (type_ == kPairwiseScorer) {
    if (!preprocessor_config_)
      preprocessor_config_ = LoadExamplePreprocessorConfig(
          IDR_TAB_RANKER_PAIRWISE_EXAMPLE_PREPROCESSOR_CONFIG_PB);

--- a/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor.h
+++ b/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor.h
@@ -46,7 +46,7 @@ class TabScorePredictor {
  enum ScorerType {
    kMRUScorer = 0,
    kMLScorer = 1,
-    KPairwiseScorer = 2,
+    kPairwiseScorer = 2,
    kFrecencyScorer = 3,
    kMaxValue = kFrecencyScorer
  };
@@ -69,16 +69,27 @@ class TabScorePredictor {
      const std::map<int32_t, base::Optional<TabFeatures>>& tabs);

 private:
+  friend class ScoreTabsWithPairwiseScorerTest;
+
  // Loads the preprocessor config if not already loaded.
  void LazyInitialize();

+  // Calculates reactivation score of a single tab with mru feature.
  TabRankerResult ScoreTabWithMRUScorer(const TabFeatures& tab, float* score);
+  // Calculates reactivation score of a single tab with ml model.
  TabRankerResult ScoreTabWithMLScorer(const TabFeatures& tab, float* score);
+  // Preprocess and inferences on the |example|.
  TabRankerResult PredictWithPreprocess(assist_ranker::RankerExample* example,
                                        float* score);
-  TabRankerResult ScoreTabsWithPairwiseScorer(const TabFeatures& tab1,
-                                              const TabFeatures& tab2,
-                                              float* score);
+  // Calculates the relative reaction score between tab1 and tab2.
+  // For pairwise model, the ml model is applied to the pair(tab1, tab2).
+  // For non-pairwise model, the score is the difference of reactivation
+  // scores on these two tabs.
+  TabRankerResult ScoreTabsPairs(const TabFeatures& tab1,
+                                 const TabFeatures& tab2,
+                                 float* score);
+  std::map<int32_t, float> ScoreTabsWithPairwiseScorer(
+      const std::map<int32_t, base::Optional<TabFeatures>>& tabs);
  TabRankerResult ScoreTabWithFrecencyScorer(const TabFeatures& tab,
                                             float* score);


--- a/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor_unittest.cc
+++ b/chrome/browser/resource_coordinator/tab_ranker/tab_score_predictor_unittest.cc
@@ -6,6 +6,7 @@

 #include <memory>

+#include "base/rand_util.h"
 #include "base/test/scoped_feature_list.h"
 #include "chrome/browser/resource_coordinator/tab_manager_features.h"
 #include "chrome/browser/resource_coordinator/tab_ranker/tab_features.h"
@@ -109,4 +110,86 @@ TEST_F(TabScorePredictorTest, ScoreTabWithFrecencyScorer) {
  EXPECT_FLOAT_EQ(ScoreTab(tab), 0.25874191);
 }

+class ScoreTabsWithPairwiseScorerTest : public testing::Test {
+ protected:
+  std::map<int32_t, float> ScoreTabsWithPairwiseScorer(
+      const std::map<int32_t, base::Optional<TabFeatures>>& tabs) {
+    return TabScorePredictor().ScoreTabsWithPairwiseScorer(tabs);
+  }
+};
+
+TEST_F(ScoreTabsWithPairwiseScorerTest, EmptyTabFeaturesFirst) {
+  base::test::ScopedFeatureList scoped_feature_list;
+  scoped_feature_list.InitAndEnableFeatureWithParameters(
+      features::kTabRanker, {{"scorer_type", "3"}});
+
+  for (int length = 1; length < 30; ++length) {
+    // Generates random order of ids.
+    std::vector<int32_t> ids;
+    for (int i = 0; i < length; ++i) {
+      ids.push_back(i + 100);
+    }
+    base::RandomShuffle(ids.begin(), ids.end());
+
+    std::map<int32_t, base::Optional<TabFeatures>> tabs;
+    for (int i = 0; i < length; ++i) {
+      TabFeatures tab;
+      tab.mru_index = base::RandInt(0, 3000);
+      // Set the frecency score in reverse order.
+      tab.frecency_score = -i * 5;
+
+      // Set half of the TabFeatures to be null.
+      if (i < length / 2) {
+        tabs[ids[i]] = base::nullopt;
+      } else {
+        tabs[ids[i]] = tab;
+      }
+    }
+
+    const std::map<int32_t, float> scores = ScoreTabsWithPairwiseScorer(tabs);
+    for (int i = 0; i < length; ++i) {
+      if (i < length / 2) {
+        // First half should be all null which have scores > (length+1) / 2.0f;
+        EXPECT_GT(scores.at(ids[i]), (length + 1) / 2.0f);
+      } else {
+        // The second half should be non-empty tab features with descending
+        // scores.
+        EXPECT_FLOAT_EQ(scores.at(ids[i]), length - i);
+      }
+    }
+  }
+}
+
+TEST_F(ScoreTabsWithPairwiseScorerTest, SortByScore) {
+  base::test::ScopedFeatureList scoped_feature_list;
+  scoped_feature_list.InitAndEnableFeatureWithParameters(
+      features::kTabRanker, {{"scorer_type", "3"}});
+
+  // Test all cases with length from 1 to 30.
+  for (int length = 1; length < 30; ++length) {
+    // Generates random order of ids.
+    std::vector<int32_t> ids;
+    for (int i = 0; i < length; ++i) {
+      ids.push_back(i);
+    }
+    base::RandomShuffle(ids.begin(), ids.end());
+
+    // set ids[i] to have frecency_score i*5;
+    std::map<int32_t, base::Optional<TabFeatures>> tabs;
+    for (int i = 0; i < length; ++i) {
+      TabFeatures tab;
+      tab.mru_index = base::RandInt(0, 3000);
+      tab.frecency_score = i * 5;
+      tabs[ids[i]] = tab;
+    }
+
+    const std::map<int32_t, float> scores = ScoreTabsWithPairwiseScorer(tabs);
+
+    // Should return the same order as the shuffled result.
+    for (int i = 0; i < length; ++i) {
+      EXPECT_FLOAT_EQ(scores.at(ids[i]), i + 1);
+    }
+  }
+}
+
 }  // namespace tab_ranker