Commit fedabd26 authored by Aidan Beggs's avatar Aidan Beggs Committed by Commit Bot

Fixes the top 500 keywords generation to split keywords on "-".

Initially, the keywords generation algorithm would not split the input
URLs on "-", creating a conflict with the sensitive keywords search
heuristic (which does split on "-"). This CL remedies that, by ensuring
that keywords are split on "-" before being put into the keywords list.

Bug: 1015843
Change-Id: Iad3127788f4fd363a9ff7817ef55072c07768a02
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869119Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Commit-Queue: Aidan Beggs <beggs@google.com>
Cr-Commit-Position: refs/heads/master@{#707476}
parent 1932f4cd
......@@ -130,13 +130,22 @@ int main(int argc, char* argv[]) {
}
if (keywords.size() < kTopN) {
std::string keyword;
std::string keywords_for_current_line;
base::TrimString(
url_formatter::top_domains::HostnameWithoutRegistry(line), ".",
&keyword);
CHECK(keyword.find('.') == std::string::npos);
if (keywords.find(keyword) == keywords.end()) {
keywords.insert(keyword);
&keywords_for_current_line);
CHECK(keywords_for_current_line.find('.') == std::string::npos);
for (const std::string& keyword : base::SplitString(
keywords_for_current_line, "-", base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY)) {
if (keywords.find(keyword) == keywords.end()) {
keywords.insert(keyword);
}
if (keywords.size() >= kTopN) {
break;
}
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment