Commit 157b9af0 authored by manukh's avatar manukh Committed by Commit Bot

[omnibox] [rich-autocomplete] Add & extract rich AC util methods.

Rich AC uses FindAtWordbreak and split rich AC (future CL) will use 2
additional utility methods regarding searching for terms in strings.
This CL adds the latter 2 methods and moves the 3 methods to
inline_autocompletion_util.cc.

Bug: 1062446
Change-Id: I49a90c9ce8a17c0ff81a18c56c5151ca6c2f8608
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2450700Reviewed-by: default avatarTommy Li <tommycli@chromium.org>
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#813916}
parent 08c24f84
...@@ -125,6 +125,8 @@ static_library("browser") { ...@@ -125,6 +125,8 @@ static_library("browser") {
"in_memory_url_index.h", "in_memory_url_index.h",
"in_memory_url_index_types.cc", "in_memory_url_index_types.cc",
"in_memory_url_index_types.h", "in_memory_url_index_types.h",
"inline_autocompletion_util.cc",
"inline_autocompletion_util.h",
"keyword_extensions_delegate.cc", "keyword_extensions_delegate.cc",
"keyword_extensions_delegate.h", "keyword_extensions_delegate.h",
"keyword_provider.cc", "keyword_provider.cc",
...@@ -487,6 +489,7 @@ source_set("unit_tests") { ...@@ -487,6 +489,7 @@ source_set("unit_tests") {
"history_url_provider_unittest.cc", "history_url_provider_unittest.cc",
"in_memory_url_index_types_unittest.cc", "in_memory_url_index_types_unittest.cc",
"in_memory_url_index_unittest.cc", "in_memory_url_index_unittest.cc",
"inline_autocompletion_util_unittest.cc",
"keyword_provider_unittest.cc", "keyword_provider_unittest.cc",
"local_history_zero_suggest_provider_unittest.cc", "local_history_zero_suggest_provider_unittest.cc",
"location_bar_model_impl_unittest.cc", "location_bar_model_impl_unittest.cc",
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "components/search_engines/search_engine_utils.h" #include "components/search_engines/search_engine_utils.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_service.h" #include "components/search_engines/template_url_service.h"
#include "inline_autocompletion_util.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "ui/gfx/vector_icon_types.h" #include "ui/gfx/vector_icon_types.h"
#include "url/third_party/mozilla/url_parse.h" #include "url/third_party/mozilla/url_parse.h"
...@@ -75,24 +76,6 @@ bool WordMatchesURLContent( ...@@ -75,24 +76,6 @@ bool WordMatchesURLContent(
return false; return false;
} }
// Finds the first occurrence of |search| at a wordbreak within |text|.
size_t FindAtWordbreak(const base::string16& text,
const base::string16& search) {
WordStarts word_starts;
String16VectorFromString16(text, false, &word_starts);
size_t next_occurrence = std::string::npos;
for (auto word_start : word_starts) {
if (next_occurrence != std::string::npos && word_start < next_occurrence)
continue;
next_occurrence = text.find(search, word_start);
if (next_occurrence == std::string::npos)
break;
if (word_start == next_occurrence)
return next_occurrence;
}
return std::string::npos;
}
} // namespace } // namespace
// static // static
...@@ -1134,7 +1117,7 @@ size_t AutocompleteMatch::EstimateMemoryUsage() const { ...@@ -1134,7 +1117,7 @@ size_t AutocompleteMatch::EstimateMemoryUsage() const {
void AutocompleteMatch::UpgradeMatchWithPropertiesFrom( void AutocompleteMatch::UpgradeMatchWithPropertiesFrom(
AutocompleteMatch& duplicate_match) { AutocompleteMatch& duplicate_match) {
// For Entity Matches, absorb the duplicate match's |allowed_to_be_default| // For Entity Matches, absorb the duplicate match's |allowed_to_be_default|
// and |inline_autocomplete| properties. // and |inline_autocompletion| properties.
if (type == AutocompleteMatchType::SEARCH_SUGGEST_ENTITY && if (type == AutocompleteMatchType::SEARCH_SUGGEST_ENTITY &&
fill_into_edit == duplicate_match.fill_into_edit && fill_into_edit == duplicate_match.fill_into_edit &&
duplicate_match.allowed_to_be_default_match) { duplicate_match.allowed_to_be_default_match) {
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "inline_autocompletion_util.h"
size_t FindAtWordbreak(const base::string16& text,
const base::string16& search,
size_t search_start) {
std::vector<size_t> word_starts;
String16VectorFromString16(text, false, &word_starts);
size_t next_occurrence = std::string::npos;
for (auto word_start : word_starts) {
if (word_start < search_start)
continue;
if (next_occurrence != std::string::npos && word_start < next_occurrence)
continue;
next_occurrence = text.find(search, word_start);
if (next_occurrence == std::string::npos)
break;
if (word_start == next_occurrence)
return next_occurrence;
}
return std::string::npos;
}
std::vector<std::pair<size_t, size_t>> FindWordsSequentiallyAtWordbreak(
const base::string16& text,
const base::string16& search) {
std::vector<std::pair<size_t, size_t>> occurrences;
size_t cursor = 0u;
std::vector<size_t> search_word_starts{};
auto search_words =
String16VectorFromString16(search, false, &search_word_starts);
for (size_t i = 0; i < search_word_starts.size(); ++i) {
auto search_word = search_words[i];
// The non-word characters following |search_word|. Can be empty for the
// last word. Can be multiple characters.
auto delimiter =
search
.substr(search_word_starts[i],
i == search_word_starts.size() - 1
? base::string16::npos
: search_word_starts[i + 1] - search_word_starts[i])
.substr(search_word.size());
if ((cursor = FindAtWordbreak(text, search_word, cursor)) ==
std::string::npos)
return {};
occurrences.emplace_back(cursor, cursor + search_word.size());
cursor += search_word.size();
if (delimiter.empty())
continue;
if ((cursor = text.find(delimiter, cursor)) == std::string::npos)
return {};
occurrences.emplace_back(cursor, cursor + delimiter.size());
cursor += delimiter.size();
}
return occurrences;
}
std::vector<gfx::Range> InvertAndReverseRanges(
size_t length,
std::vector<std::pair<size_t, size_t>> ranges) {
std::vector<gfx::Range> inverted;
size_t cursor = length;
for (size_t i = ranges.size(); i-- != 0;) {
auto range = ranges[i];
// Skip empty ranges.
if (range.first == range.second)
continue;
// Merge adjacent ranges.
if (cursor != range.second)
inverted.emplace_back(cursor, range.second);
cursor = range.first;
}
if (cursor != 0)
inverted.emplace_back(cursor, 0);
return inverted;
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OMNIBOX_BROWSER_INLINE_AUTOCOMPLETION_UTIL_H_
#define COMPONENTS_OMNIBOX_BROWSER_INLINE_AUTOCOMPLETION_UTIL_H_
#include <stddef.h>
#include <vector>
#include "base/strings/string16.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"
#include "ui/gfx/range/range.h"
// Finds the first occurrence of |search| at a wordbreak within |text| starting
// at |search_start|.
size_t FindAtWordbreak(const base::string16& text,
const base::string16& search,
size_t search_start = 0);
// Splits |search| into words and finds them in |text|, returning a vector of
// occurrence starts and ends.
// - Occurrences must be sequential. E.g. 'a c' can be found in 'a b c' but not
// in 'c b a'.
// - Occurrences must be at word breaks. E.g. 'a c' cannot be found in 'a bc'.
// - Whitespaces must also match. E.g. 'a c' cannot be found in 'a-c' but can
// be found in 'a -c' and 'a- c'.
// If all words in |search| were not found, then returns an empty vector.
std::vector<std::pair<size_t, size_t>> FindWordsSequentiallyAtWordbreak(
const base::string16& text,
const base::string16& search);
// Inverts and reverses |ranges| in a domain of [0, |length|). Ranges are
// interpreted as {start, end}. E.g., if |length| is 10 and |ranges| are
// {{2, 3} {5, 9}}, th |InvertRanges| will return {{10, 9}, {5, 3}, {2, 0}}.
// Assumes |ranges| is in forward order; i.e. |ranges[i+1]| occurs after
// |ranges[i]| and |ranges[i].second| after |ranges[i].first|.
std::vector<gfx::Range> InvertAndReverseRanges(
size_t length,
std::vector<std::pair<size_t, size_t>> ranges);
#endif // COMPONENTS_OMNIBOX_BROWSER_INLINE_AUTOCOMPLETION_UTIL_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/inline_autocompletion_util.h"
#include <stddef.h>
#include "base/strings/utf_string_conversions.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
TEST(InlineAutocompletionUtilTest, FindAtWordbreak) {
// Should find the first wordbreak occurrence.
EXPECT_EQ(FindAtWordbreak(base::UTF8ToUTF16("prefixmatch wordbreak_match"),
base::UTF8ToUTF16("match")),
22u);
// Should return npos when no occurrences exist.
EXPECT_EQ(FindAtWordbreak(base::UTF8ToUTF16("prefixmatch"),
base::UTF8ToUTF16("match")),
std::string::npos);
// Should skip occurrences before |search_start|.
EXPECT_EQ(FindAtWordbreak(base::UTF8ToUTF16("match match"),
base::UTF8ToUTF16("match"), 1),
6u);
}
TEST(InlineAutocompletionUtilTest, FindWordsSequentiallyAtWordbreak) {
using pair = std::pair<size_t, size_t>;
// Occurrences must be sequential.
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a b c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{1, 2}, pair{4, 5}));
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("c b a"),
base::UTF8ToUTF16("a b")),
testing::ElementsAre());
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("b a b"),
base::UTF8ToUTF16("a b")),
testing::ElementsAre(pair{2, 3}, pair{3, 4}, pair{4, 5}));
// Occurrences must be at word breaks.
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a b-c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{1, 2}, pair{4, 5}));
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a bc"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre());
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a bc c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{1, 2}, pair{5, 6}));
// Whitespaces must also match
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a-c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre());
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a c"),
base::UTF8ToUTF16("a c ")),
testing::ElementsAre());
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a -c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{1, 2}, pair{3, 4}));
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a- c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{2, 3}, pair{3, 4}));
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a c c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre());
EXPECT_THAT(FindWordsSequentiallyAtWordbreak(base::UTF8ToUTF16("a c c"),
base::UTF8ToUTF16("a c")),
testing::ElementsAre(pair{0, 1}, pair{3, 5}, pair{5, 6}));
}
TEST(InlineAutocompletionUtilTest, InvertAndReverseRanges) {
// Empty |ranges| in empty |length|.
EXPECT_THAT(InvertAndReverseRanges(0, {}), testing::ElementsAre());
// Empty |ranges| in non-empty |length|. 12345 -> [12345]
EXPECT_THAT(InvertAndReverseRanges(5, {}),
testing::ElementsAre(gfx::Range{5, 0}));
// Single empty range in |ranges|. 12|345 -> [12345]
EXPECT_THAT(InvertAndReverseRanges(5, {{2, 2}}),
testing::ElementsAre(gfx::Range{5, 0}));
// Single range in |ranges|. 12[3]45 -> [12]3[45]
EXPECT_THAT(InvertAndReverseRanges(5, {{2, 3}}),
testing::ElementsAre(gfx::Range{5, 3}, gfx::Range{2, 0}));
// Single range in |ranges| spanning all of |length|. [12345] -> 12345
EXPECT_THAT(InvertAndReverseRanges(5, {{0, 5}}), testing::ElementsAre());
// Multiple ranges in |ranges|, including adjacent and empty ranges.
// 1[23][45]6[78]9 -> [1]2345[6]78[9]
EXPECT_THAT(InvertAndReverseRanges(9, {{1, 3}, {3, 5}, {6, 8}}),
testing::ElementsAre(gfx::Range{9, 8}, gfx::Range{6, 5},
gfx::Range{1, 0}));
// |ranges| ending at |length| and starting 0.
// [123][45]6[789] -> 12345[6]789
EXPECT_THAT(InvertAndReverseRanges(9, {{0, 3}, {3, 5}, {6, 9}}),
testing::ElementsAre(gfx::Range{6, 5}));
}
} // namespace
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment