Commit fedabd26 authored by Aidan Beggs's avatar Aidan Beggs Committed by Commit Bot

Fixes the top 500 keywords generation to split keywords on "-".

Initially, the keywords generation algorithm would not split the input
URLs on "-", creating a conflict with the sensitive keywords search
heuristic (which does split on "-"). This CL remedies that, by ensuring
that keywords are split on "-" before being put into the keywords list.

Bug: 1015843
Change-Id: Iad3127788f4fd363a9ff7817ef55072c07768a02
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869119Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Commit-Queue: Aidan Beggs <beggs@google.com>
Cr-Commit-Position: refs/heads/master@{#707476}
parent 1932f4cd
...@@ -130,14 +130,23 @@ int main(int argc, char* argv[]) { ...@@ -130,14 +130,23 @@ int main(int argc, char* argv[]) {
} }
if (keywords.size() < kTopN) { if (keywords.size() < kTopN) {
std::string keyword; std::string keywords_for_current_line;
base::TrimString( base::TrimString(
url_formatter::top_domains::HostnameWithoutRegistry(line), ".", url_formatter::top_domains::HostnameWithoutRegistry(line), ".",
&keyword); &keywords_for_current_line);
CHECK(keyword.find('.') == std::string::npos); CHECK(keywords_for_current_line.find('.') == std::string::npos);
for (const std::string& keyword : base::SplitString(
keywords_for_current_line, "-", base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY)) {
if (keywords.find(keyword) == keywords.end()) { if (keywords.find(keyword) == keywords.end()) {
keywords.insert(keyword); keywords.insert(keyword);
} }
if (keywords.size() >= kTopN) {
break;
}
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment