Commit 2b2ea3c0 authored by Jia's avatar Jia Committed by Commit Bot

[cros search service] Move shared string matching functions to //chrome

We are implementing a local search service using approximate string
matching for query search. Currently the approximate string matching
algorithm is implemented as FuzzyTokenizedStringMatch under

chrome/browser/ui/app_list/search/search_utils/

and we will need to migrate this function to a location that can be used
by the local search service (under chrome/services/).

This cl moves FuzzyTokenizedStringMatch, TokenizedString and all other
related string matching functions to //chrome/common/string_matching so
that they can be shared by current clients and also future local search
service.

Design doc for local search service: go/cros-search-service

Bug: 1018613
Change-Id: If5648bf3046d52dd92ab5479dd94916552a99786
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1939128Reviewed-by: default avatarThanh Nguyen <thanhdng@chromium.org>
Reviewed-by: default avatarColin Blundell <blundell@chromium.org>
Reviewed-by: default avatarXiyuan Xia <xiyuan@chromium.org>
Commit-Queue: Jia Meng <jiameng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#721300}
parent 9ea5013b
...@@ -7,8 +7,6 @@ ...@@ -7,8 +7,6 @@
#include <map> #include <map>
#include "ash/app_list/model/search/search_result_observer.h" #include "ash/app_list/model/search/search_result_observer.h"
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "ash/public/cpp/app_list/tokenized_string_match.h"
#include "ui/base/models/menu_model.h" #include "ui/base/models/menu_model.h"
namespace ash { namespace ash {
......
...@@ -38,14 +38,6 @@ component("cpp") { ...@@ -38,14 +38,6 @@ component("cpp") {
"app_list/app_list_types.cc", "app_list/app_list_types.cc",
"app_list/app_list_types.h", "app_list/app_list_types.h",
"app_list/internal_app_id_constants.h", "app_list/internal_app_id_constants.h",
"app_list/term_break_iterator.cc",
"app_list/term_break_iterator.h",
"app_list/tokenized_string.cc",
"app_list/tokenized_string.h",
"app_list/tokenized_string_char_iterator.cc",
"app_list/tokenized_string_char_iterator.h",
"app_list/tokenized_string_match.cc",
"app_list/tokenized_string_match.h",
"app_menu_constants.h", "app_menu_constants.h",
"app_types.h", "app_types.h",
"arc_app_id_provider.cc", "arc_app_id_provider.cc",
......
...@@ -82,6 +82,7 @@ source_set("chromeos") { ...@@ -82,6 +82,7 @@ source_set("chromeos") {
"//chrome/browser/web_applications/extensions:extensions", "//chrome/browser/web_applications/extensions:extensions",
"//chrome/common", "//chrome/common",
"//chrome/common/extensions/api", "//chrome/common/extensions/api",
"//chrome/common/string_matching",
"//chrome/services/app_service:lib", "//chrome/services/app_service:lib",
"//chrome/services/app_service/public/cpp:app_update", "//chrome/services/app_service/public/cpp:app_update",
"//chrome/services/file_util/public/cpp", "//chrome/services/file_util/public/cpp",
......
...@@ -8,13 +8,13 @@ ...@@ -8,13 +8,13 @@
#include <utility> #include <utility>
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "ash/public/cpp/app_list/tokenized_string_match.h"
#include "base/numerics/ranges.h" #include "base/numerics/ranges.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "chrome/browser/chromeos/launcher_search_provider/launcher_search_provider_service_factory.h" #include "chrome/browser/chromeos/launcher_search_provider/launcher_search_provider_service_factory.h"
#include "chrome/browser/ui/app_list/search/launcher_search/launcher_search_provider.h" #include "chrome/browser/ui/app_list/search/launcher_search/launcher_search_provider.h"
#include "chrome/browser/ui/app_list/search/launcher_search/launcher_search_result.h" #include "chrome/browser/ui/app_list/search/launcher_search/launcher_search_result.h"
#include "chrome/common/string_matching/tokenized_string.h"
#include "chrome/common/string_matching/tokenized_string_match.h"
#include "extensions/browser/extension_registry.h" #include "extensions/browser/extension_registry.h"
#include "extensions/common/extension_set.h" #include "extensions/common/extension_set.h"
#include "extensions/common/permissions/permissions_data.h" #include "extensions/common/permissions/permissions_data.h"
...@@ -135,9 +135,9 @@ void Service::SetSearchResults( ...@@ -135,9 +135,9 @@ void Service::SetSearchResults(
// set the title tags (highlighting which parts of the title matched the // set the title tags (highlighting which parts of the title matched the
// search query). // search query).
const base::string16 title = base::UTF8ToUTF16(result.title); const base::string16 title = base::UTF8ToUTF16(result.title);
ash::TokenizedString tokenized_title(title); string_matching::TokenizedString tokenized_title(title);
ash::TokenizedStringMatch match; string_matching::TokenizedStringMatch match;
ash::TokenizedString tokenized_query(base::UTF8ToUTF16(query_)); string_matching::TokenizedString tokenized_query(base::UTF8ToUTF16(query_));
if (!match.Calculate(tokenized_query, tokenized_title)) if (!match.Calculate(tokenized_query, tokenized_title))
continue; continue;
......
...@@ -361,6 +361,7 @@ jumbo_static_library("ui") { ...@@ -361,6 +361,7 @@ jumbo_static_library("ui") {
# have the same dependencies. Once browser_ui is untangled from # have the same dependencies. Once browser_ui is untangled from
# browser, then we can clean up these dependencies. # browser, then we can clean up these dependencies.
public_deps = [ public_deps = [
"//chrome/common/string_matching",
"//components/dom_distiller/core", "//components/dom_distiller/core",
"//components/safe_browsing:buildflags", "//components/safe_browsing:buildflags",
"//components/sync", "//components/sync",
...@@ -3693,10 +3694,6 @@ jumbo_static_library("ui") { ...@@ -3693,10 +3694,6 @@ jumbo_static_library("ui") {
"app_list/search/search_result_ranker/search_ranking_event_logger.h", "app_list/search/search_result_ranker/search_ranking_event_logger.h",
"app_list/search/search_result_ranker/search_result_ranker.cc", "app_list/search/search_result_ranker/search_result_ranker.cc",
"app_list/search/search_result_ranker/search_result_ranker.h", "app_list/search/search_result_ranker/search_result_ranker.h",
"app_list/search/search_utils/fuzzy_tokenized_string_match.cc",
"app_list/search/search_utils/fuzzy_tokenized_string_match.h",
"app_list/search/search_utils/sequence_matcher.cc",
"app_list/search/search_utils/sequence_matcher.h",
"app_list/search/settings_shortcut/settings_shortcut_metadata.cc", "app_list/search/settings_shortcut/settings_shortcut_metadata.cc",
"app_list/search/settings_shortcut/settings_shortcut_metadata.h", "app_list/search/settings_shortcut/settings_shortcut_metadata.h",
"app_list/search/settings_shortcut/settings_shortcut_provider.cc", "app_list/search/settings_shortcut/settings_shortcut_provider.cc",
......
...@@ -16,8 +16,6 @@ ...@@ -16,8 +16,6 @@
#include "ash/public/cpp/app_list/app_list_config.h" #include "ash/public/cpp/app_list/app_list_config.h"
#include "ash/public/cpp/app_list/app_list_features.h" #include "ash/public/cpp/app_list/app_list_features.h"
#include "ash/public/cpp/app_list/internal_app_id_constants.h" #include "ash/public/cpp/app_list/internal_app_id_constants.h"
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "ash/public/cpp/app_list/tokenized_string_match.h"
#include "base/bind.h" #include "base/bind.h"
#include "base/callback_list.h" #include "base/callback_list.h"
#include "base/macros.h" #include "base/macros.h"
...@@ -41,7 +39,9 @@ ...@@ -41,7 +39,9 @@
#include "chrome/browser/ui/app_list/search/app_service_app_result.h" #include "chrome/browser/ui/app_list/search/app_service_app_result.h"
#include "chrome/browser/ui/app_list/search/search_result_ranker/app_search_result_ranker.h" #include "chrome/browser/ui/app_list/search/search_result_ranker/app_search_result_ranker.h"
#include "chrome/browser/ui/app_list/search/search_result_ranker/ranking_item_util.h" #include "chrome/browser/ui/app_list/search/search_result_ranker/ranking_item_util.h"
#include "chrome/browser/ui/app_list/search/search_utils/fuzzy_tokenized_string_match.h" #include "chrome/common/string_matching/fuzzy_tokenized_string_match.h"
#include "chrome/common/string_matching/tokenized_string.h"
#include "chrome/common/string_matching/tokenized_string_match.h"
#include "components/sync/base/model_type.h" #include "components/sync/base/model_type.h"
#include "components/sync_sessions/session_sync_service.h" #include "components/sync_sessions/session_sync_service.h"
...@@ -130,12 +130,13 @@ class AppSearchProvider::App { ...@@ -130,12 +130,13 @@ class AppSearchProvider::App {
} }
}; };
ash::TokenizedString* GetTokenizedIndexedName() { string_matching::TokenizedString* GetTokenizedIndexedName() {
// Tokenizing a string is expensive. Don't pay the price for it at // Tokenizing a string is expensive. Don't pay the price for it at
// construction of every App, but rather, only when needed (i.e. when the // construction of every App, but rather, only when needed (i.e. when the
// query is not empty and cache the result. // query is not empty and cache the result.
if (!tokenized_indexed_name_) if (!tokenized_indexed_name_)
tokenized_indexed_name_ = std::make_unique<ash::TokenizedString>(name_); tokenized_indexed_name_ =
std::make_unique<string_matching::TokenizedString>(name_);
return tokenized_indexed_name_.get(); return tokenized_indexed_name_.get();
} }
...@@ -147,16 +148,16 @@ class AppSearchProvider::App { ...@@ -147,16 +148,16 @@ class AppSearchProvider::App {
return base::Time(); return base::Time();
} }
bool MatchSearchableText(const ash::TokenizedString& query) { bool MatchSearchableText(const string_matching::TokenizedString& query) {
if (searchable_text_.empty()) if (searchable_text_.empty())
return false; return false;
if (tokenized_indexed_searchable_text_.empty()) { if (tokenized_indexed_searchable_text_.empty()) {
for (const base::string16& curr_text : searchable_text_) { for (const base::string16& curr_text : searchable_text_) {
tokenized_indexed_searchable_text_.push_back( tokenized_indexed_searchable_text_.push_back(
std::make_unique<ash::TokenizedString>(curr_text)); std::make_unique<string_matching::TokenizedString>(curr_text));
} }
} }
ash::TokenizedStringMatch match; string_matching::TokenizedStringMatch match;
for (auto& curr_text : tokenized_indexed_searchable_text_) { for (auto& curr_text : tokenized_indexed_searchable_text_) {
match.Calculate(query, *curr_text); match.Calculate(query, *curr_text);
if (match.relevance() > relevance_threshold()) if (match.relevance() > relevance_threshold())
...@@ -196,8 +197,8 @@ class AppSearchProvider::App { ...@@ -196,8 +197,8 @@ class AppSearchProvider::App {
private: private:
AppSearchProvider::DataSource* data_source_; AppSearchProvider::DataSource* data_source_;
std::unique_ptr<ash::TokenizedString> tokenized_indexed_name_; std::unique_ptr<string_matching::TokenizedString> tokenized_indexed_name_;
std::vector<std::unique_ptr<ash::TokenizedString>> std::vector<std::unique_ptr<string_matching::TokenizedString>>
tokenized_indexed_searchable_text_; tokenized_indexed_searchable_text_;
const std::string id_; const std::string id_;
const base::string16 name_; const base::string16 name_;
...@@ -497,14 +498,15 @@ void AppSearchProvider::UpdateQueriedResults() { ...@@ -497,14 +498,15 @@ void AppSearchProvider::UpdateQueriedResults() {
const size_t apps_size = apps_.size(); const size_t apps_size = apps_.size();
new_results.reserve(apps_size); new_results.reserve(apps_size);
const ash::TokenizedString query_terms(query_); const string_matching::TokenizedString query_terms(query_);
for (auto& app : apps_) { for (auto& app : apps_) {
if (!app->searchable()) if (!app->searchable())
continue; continue;
ash::TokenizedString* indexed_name = app->GetTokenizedIndexedName(); string_matching::TokenizedString* indexed_name =
app->GetTokenizedIndexedName();
if (!app_list_features::IsFuzzyAppSearchEnabled()) { if (!app_list_features::IsFuzzyAppSearchEnabled()) {
ash::TokenizedStringMatch match; string_matching::TokenizedStringMatch match;
if (match.Calculate(query_terms, *indexed_name)) { if (match.Calculate(query_terms, *indexed_name)) {
// Exact matches should be shown even if the threshold isn't reached, // Exact matches should be shown even if the threshold isn't reached,
// e.g. due to a localized name being particularly short. // e.g. due to a localized name being particularly short.
...@@ -521,8 +523,28 @@ void AppSearchProvider::UpdateQueriedResults() { ...@@ -521,8 +523,28 @@ void AppSearchProvider::UpdateQueriedResults() {
result->UpdateFromMatch(*indexed_name, match); result->UpdateFromMatch(*indexed_name, match);
MaybeAddResult(&new_results, std::move(result), &seen_or_filtered_apps); MaybeAddResult(&new_results, std::move(result), &seen_or_filtered_apps);
} else { } else {
FuzzyTokenizedStringMatch match; string_matching::FuzzyTokenizedStringMatch match;
if (match.IsRelevant(query_terms, *indexed_name) ||
// TODO(crbug.com/1018613): consolidate finch parameters.
const bool use_prefix_only = base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_prefix_only", false);
const bool use_weighted_ratio = base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_weighted_ratio", true);
const bool use_edit_distance = base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_edit_distance", false);
const double relevance_threshold =
base::GetFieldTrialParamByFeatureAsDouble(
app_list_features::kEnableFuzzyAppSearch, "relevance_threshold",
0.3);
const double partial_match_penalty_rate =
base::GetFieldTrialParamByFeatureAsDouble(
app_list_features::kEnableFuzzyAppSearch,
"partial_match_penalty_rate", 0.9);
if (match.IsRelevant(query_terms, *indexed_name, relevance_threshold,
use_prefix_only, use_weighted_ratio,
use_edit_distance, partial_match_penalty_rate) ||
app->MatchSearchableText(query_terms) || app->MatchSearchableText(query_terms) ||
base::EqualsCaseInsensitiveASCII(query_, app->name())) { base::EqualsCaseInsensitiveASCII(query_, app->name())) {
std::unique_ptr<AppResult> result = app->data_source()->CreateResult( std::unique_ptr<AppResult> result = app->data_source()->CreateResult(
......
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
#include <map> #include <map>
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "ash/public/cpp/app_list/tokenized_string_match.h"
#include "base/containers/adapters.h" #include "base/containers/adapters.h"
#include "chrome/browser/ui/app_list/app_context_menu.h" #include "chrome/browser/ui/app_list/app_context_menu.h"
#include "chrome/common/string_matching/tokenized_string.h"
#include "chrome/common/string_matching/tokenized_string_match.h"
ChromeSearchResult::ChromeSearchResult() ChromeSearchResult::ChromeSearchResult()
: metadata_(std::make_unique<ash::SearchResultMetadata>()) {} : metadata_(std::make_unique<ash::SearchResultMetadata>()) {}
...@@ -160,9 +160,9 @@ void ChromeSearchResult::OnVisibilityChanged(bool visibility) { ...@@ -160,9 +160,9 @@ void ChromeSearchResult::OnVisibilityChanged(bool visibility) {
} }
void ChromeSearchResult::UpdateFromMatch( void ChromeSearchResult::UpdateFromMatch(
const ash::TokenizedString& title, const string_matching::TokenizedString& title,
const ash::TokenizedStringMatch& match) { const string_matching::TokenizedStringMatch& match) {
const ash::TokenizedStringMatch::Hits& hits = match.hits(); const string_matching::TokenizedStringMatch::Hits& hits = match.hits();
Tags tags; Tags tags;
tags.reserve(hits.size()); tags.reserve(hits.size());
......
...@@ -19,10 +19,10 @@ namespace app_list { ...@@ -19,10 +19,10 @@ namespace app_list {
class AppContextMenu; class AppContextMenu;
} // namespace app_list } // namespace app_list
namespace ash { namespace string_matching {
class TokenizedString; class TokenizedString;
class TokenizedStringMatch; class TokenizedStringMatch;
} // namespace ash } // namespace string_matching
// ChromeSearchResult consists of an icon, title text and details text. Title // ChromeSearchResult consists of an icon, title text and details text. Title
// and details text can have tagged ranges that are displayed differently from // and details text can have tagged ranges that are displayed differently from
...@@ -131,8 +131,8 @@ class ChromeSearchResult { ...@@ -131,8 +131,8 @@ class ChromeSearchResult {
// Updates the result's relevance score, and sets its title and title tags, // Updates the result's relevance score, and sets its title and title tags,
// based on a string match result. // based on a string match result.
void UpdateFromMatch(const ash::TokenizedString& title, void UpdateFromMatch(const string_matching::TokenizedString& title,
const ash::TokenizedStringMatch& match); const string_matching::TokenizedStringMatch& match);
// Returns the context menu model for this item, or NULL if there is currently // Returns the context menu model for this item, or NULL if there is currently
// no menu for the item (e.g. during install). |callback| takes the ownership // no menu for the item (e.g. during install). |callback| takes the ownership
......
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//build/config/ui.gni")
import("//chrome/common/features.gni")
source_set("string_matching") {
sources = [
"fuzzy_tokenized_string_match.cc",
"fuzzy_tokenized_string_match.h",
"sequence_matcher.cc",
"sequence_matcher.h",
"term_break_iterator.cc",
"term_break_iterator.h",
"tokenized_string.cc",
"tokenized_string.h",
"tokenized_string_char_iterator.cc",
"tokenized_string_char_iterator.h",
"tokenized_string_match.cc",
"tokenized_string_match.h",
]
deps = [
"//base",
"//base:i18n",
"//cc",
]
public_deps = [
"//base",
"//ui/gfx",
]
}
jiameng@chromium.org
thanhdng@chromium.org
...@@ -2,19 +2,19 @@ ...@@ -2,19 +2,19 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_FUZZY_TOKENIZED_STRING_MATCH_H_ #ifndef CHROME_COMMON_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_
#define CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_FUZZY_TOKENIZED_STRING_MATCH_H_ #define CHROME_COMMON_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "base/gtest_prod_util.h" #include "base/gtest_prod_util.h"
#include "base/macros.h" #include "base/macros.h"
#include "chrome/common/string_matching/tokenized_string.h"
#include "ui/gfx/range/range.h" #include "ui/gfx/range/range.h"
namespace ash { namespace string_matching {
class TokenizedString; class TokenizedString;
} // namespace ash } // namespace string_matching
namespace app_list { namespace string_matching {
// FuzzyTokenizedStringMatch takes two tokenized strings: one as the text and // FuzzyTokenizedStringMatch takes two tokenized strings: one as the text and
// the other one as the query. It matches the query against the text, // the other one as the query. It matches the query against the text,
...@@ -22,6 +22,8 @@ namespace app_list { ...@@ -22,6 +22,8 @@ namespace app_list {
// of text. A relevance of zero means the two are completely different to each // of text. A relevance of zero means the two are completely different to each
// other. The higher the relevance score, the better the two strings are // other. The higher the relevance score, the better the two strings are
// matched. Matched portions of text are stored as index ranges. // matched. Matched portions of text are stored as index ranges.
// TODO(crbug.com/1018613): each of these functions have too many input params,
// we should revise the structure and remove unnecessary ones.
class FuzzyTokenizedStringMatch { class FuzzyTokenizedStringMatch {
public: public:
typedef std::vector<gfx::Range> Hits; typedef std::vector<gfx::Range> Hits;
...@@ -31,8 +33,13 @@ class FuzzyTokenizedStringMatch { ...@@ -31,8 +33,13 @@ class FuzzyTokenizedStringMatch {
// Calculates the relevance of two strings. Returns true if two strings are // Calculates the relevance of two strings. Returns true if two strings are
// somewhat matched, i.e. relevance score is greater than a threshold. // somewhat matched, i.e. relevance score is greater than a threshold.
bool IsRelevant(const ash::TokenizedString& query, bool IsRelevant(const TokenizedString& query,
const ash::TokenizedString& text); const TokenizedString& text,
double relevance_threshold,
bool use_prefix_only,
bool use_weighted_ratio,
bool use_edit_distance,
double partial_match_penalty_rate);
double relevance() const { return relevance_; } double relevance() const { return relevance_; }
const Hits& hits() const { return hits_; } const Hits& hits() const { return hits_; }
...@@ -44,32 +51,41 @@ class FuzzyTokenizedStringMatch { ...@@ -44,32 +51,41 @@ class FuzzyTokenizedStringMatch {
// Finds the best ratio of shorter text with a part of longer text. // Finds the best ratio of shorter text with a part of longer text.
// This function assumes that TokenizedString is already normalized (converted // This function assumes that TokenizedString is already normalized (converted
// to lower case). The return score is in range of [0, 1]. // to lower case). The return score is in range of [0, 1].
double PartialRatio(const base::string16& query, const base::string16& text); double PartialRatio(const base::string16& query,
const base::string16& text,
double partial_match_penalty_rate,
bool use_edit_distance);
// TokenSetRatio takes two sets of tokens, finds their intersection and // TokenSetRatio takes two sets of tokens, finds their intersection and
// differences. From the intersection and differences, it rewrites the |query| // differences. From the intersection and differences, it rewrites the |query|
// and |text| and find the similarity ratio between them. This function // and |text| and find the similarity ratio between them. This function
// assumes that TokenizedString is already normalized (converted to lower // assumes that TokenizedString is already normalized (converted to lower
// case). Duplicates tokens will be removed for ratio computation. // case). Duplicates tokens will be removed for ratio computation.
double TokenSetRatio(const ash::TokenizedString& query, double TokenSetRatio(const TokenizedString& query,
const ash::TokenizedString& text, const TokenizedString& text,
bool partial); bool partial,
double partial_match_penalty_rate,
bool use_edit_distance);
// TokenSortRatio takes two set of tokens, sorts them and find the similarity // TokenSortRatio takes two set of tokens, sorts them and find the similarity
// between two sorted strings. This function assumes that TokenizedString is // between two sorted strings. This function assumes that TokenizedString is
// already normalized (converted to lower case) // already normalized (converted to lower case)
double TokenSortRatio(const ash::TokenizedString& query, double TokenSortRatio(const TokenizedString& query,
const ash::TokenizedString& text, const TokenizedString& text,
bool partial); bool partial,
double partial_match_penalty_rate,
bool use_edit_distance);
// Combines scores from different ratio functions. This function assumes that // Combines scores from different ratio functions. This function assumes that
// TokenizedString is already normalized (converted to lower cases). // TokenizedString is already normalized (converted to lower cases).
// The return score is in range of [0, 1]. // The return score is in range of [0, 1].
double WeightedRatio(const ash::TokenizedString& query, double WeightedRatio(const TokenizedString& query,
const ash::TokenizedString& text); const TokenizedString& text,
double partial_match_penalty_rate,
bool use_edit_distance);
// Since prefix match should always be favored over other matches, this // Since prefix match should always be favored over other matches, this
// function is dedicated to calculate a prefix match score in range of [0, 1]. // function is dedicated to calculate a prefix match score in range of [0, 1].
// This score has two components: first character match and whole prefix // This score has two components: first character match and whole prefix
// match. // match.
double PrefixMatcher(const ash::TokenizedString& query, double PrefixMatcher(const TokenizedString& query,
const ash::TokenizedString& text); const TokenizedString& text);
// Score in range of [0,1] representing how well the query matches the text. // Score in range of [0,1] representing how well the query matches the text.
double relevance_ = 0; double relevance_ = 0;
Hits hits_; Hits hits_;
...@@ -78,11 +94,10 @@ class FuzzyTokenizedStringMatch { ...@@ -78,11 +94,10 @@ class FuzzyTokenizedStringMatch {
}; };
namespace internal { namespace internal {
double FirstCharacterMatch(const ash::TokenizedString& query, double FirstCharacterMatch(const TokenizedString& query,
const ash::TokenizedString& text); const TokenizedString& text);
double PrefixMatch(const ash::TokenizedString& query, double PrefixMatch(const TokenizedString& query, const TokenizedString& text);
const ash::TokenizedString& text);
} // namespace internal } // namespace internal
} // namespace app_list } // namespace string_matching
#endif // CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_FUZZY_TOKENIZED_STRING_MATCH_H_ #endif // CHROME_COMMON_STRING_MATCHING_FUZZY_TOKENIZED_STRING_MATCH_H_
...@@ -2,18 +2,14 @@ ...@@ -2,18 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "chrome/browser/ui/app_list/search/search_utils/sequence_matcher.h" #include "chrome/common/string_matching/sequence_matcher.h"
#include <algorithm> #include <algorithm>
#include <queue> #include <queue>
#include "ash/public/cpp/app_list/app_list_features.h" namespace string_matching {
#include "base/metrics/field_trial_params.h"
namespace app_list {
namespace { namespace {
constexpr bool kDefaultUseEditDistance = false;
using Match = SequenceMatcher::Match; using Match = SequenceMatcher::Match;
using Matches = std::vector<Match>; using Matches = std::vector<Match>;
...@@ -31,7 +27,8 @@ SequenceMatcher::Match::Match(int pos_first, int pos_second, int len) ...@@ -31,7 +27,8 @@ SequenceMatcher::Match::Match(int pos_first, int pos_second, int len)
} }
SequenceMatcher::SequenceMatcher(const base::string16& first_string, SequenceMatcher::SequenceMatcher(const base::string16& first_string,
const base::string16& second_string) const base::string16& second_string,
bool use_edit_distance)
: first_string_(first_string), : first_string_(first_string),
second_string_(second_string), second_string_(second_string),
dp_common_string_(second_string.size() + 1, 0) { dp_common_string_(second_string.size() + 1, 0) {
...@@ -40,9 +37,7 @@ SequenceMatcher::SequenceMatcher(const base::string16& first_string, ...@@ -40,9 +37,7 @@ SequenceMatcher::SequenceMatcher(const base::string16& first_string,
for (size_t i = 0; i < second_string_.size(); i++) { for (size_t i = 0; i < second_string_.size(); i++) {
char_to_positions_[second_string_[i]].emplace_back(i); char_to_positions_[second_string_[i]].emplace_back(i);
} }
use_edit_distance_ = base::GetFieldTrialParamByFeatureAsBool( use_edit_distance_ = use_edit_distance;
app_list_features::kEnableFuzzyAppSearch, "use_edit_distance",
kDefaultUseEditDistance);
} }
Match SequenceMatcher::FindLongestMatch(int first_start, Match SequenceMatcher::FindLongestMatch(int first_start,
...@@ -202,4 +197,4 @@ double SequenceMatcher::Ratio() { ...@@ -202,4 +197,4 @@ double SequenceMatcher::Ratio() {
return block_matching_ratio_; return block_matching_ratio_;
} }
} // namespace app_list } // namespace string_matching
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_SEQUENCE_MATCHER_H_ #ifndef CHROME_COMMON_STRING_MATCHING_SEQUENCE_MATCHER_H_
#define CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_SEQUENCE_MATCHER_H_ #define CHROME_COMMON_STRING_MATCHING_SEQUENCE_MATCHER_H_
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
namespace app_list { namespace string_matching {
// Performs the calculation of similarity level between 2 strings. This class's // Performs the calculation of similarity level between 2 strings. This class's
// functionality is inspired by python's difflib.SequenceMatcher library. // functionality is inspired by python's difflib.SequenceMatcher library.
...@@ -32,7 +32,8 @@ class SequenceMatcher { ...@@ -32,7 +32,8 @@ class SequenceMatcher {
int length; int length;
}; };
SequenceMatcher(const base::string16& first_string, SequenceMatcher(const base::string16& first_string,
const base::string16& second_string); const base::string16& second_string,
bool use_edit_distance);
~SequenceMatcher() = default; ~SequenceMatcher() = default;
...@@ -75,6 +76,6 @@ class SequenceMatcher { ...@@ -75,6 +76,6 @@ class SequenceMatcher {
DISALLOW_COPY_AND_ASSIGN(SequenceMatcher); DISALLOW_COPY_AND_ASSIGN(SequenceMatcher);
}; };
} // namespace app_list } // namespace string_matching
#endif // CHROME_BROWSER_UI_APP_LIST_SEARCH_SEARCH_UTILS_SEQUENCE_MATCHER_H_ #endif // CHROME_COMMON_STRING_MATCHING_SEQUENCE_MATCHER_H_
...@@ -2,18 +2,17 @@ ...@@ -2,18 +2,17 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "chrome/browser/ui/app_list/search/search_utils/sequence_matcher.h" #include "chrome/common/string_matching/sequence_matcher.h"
#include "ash/public/cpp/app_list/app_list_features.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace app_list { namespace string_matching {
namespace { namespace {
constexpr bool kDefaultUseEditDistance = false;
using Match = SequenceMatcher::Match; using Match = SequenceMatcher::Match;
bool MatchEqual(const Match& match1, const Match& match2) { bool MatchEqual(const Match& match1, const Match& match2) {
return match1.pos_first_string == match2.pos_first_string && return match1.pos_first_string == match2.pos_first_string &&
...@@ -26,64 +25,72 @@ class SequenceMatcherTest : public testing::Test {}; ...@@ -26,64 +25,72 @@ class SequenceMatcherTest : public testing::Test {};
TEST_F(SequenceMatcherTest, TestEditDistance) { TEST_F(SequenceMatcherTest, TestEditDistance) {
// Transposition // Transposition
ASSERT_EQ( ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("abdc")) base::UTF8ToUTF16("abdc"), kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
1); 1);
// Deletion // Deletion
ASSERT_EQ( ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcde"),
SequenceMatcher(base::UTF8ToUTF16("abcde"), base::UTF8ToUTF16("abcd")) base::UTF8ToUTF16("abcd"), kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
1); 1);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("12"), base::UTF8ToUTF16("")) ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("12"), base::UTF8ToUTF16(""),
kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
2); 2);
// Insertion // Insertion
ASSERT_EQ( ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abc"),
SequenceMatcher(base::UTF8ToUTF16("abc"), base::UTF8ToUTF16("abxbc")) base::UTF8ToUTF16("abxbc"), kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
2); 2);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abxbc")) ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abxbc"),
kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
5); 5);
// Substitution // Substitution
ASSERT_EQ( ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("book"),
SequenceMatcher(base::UTF8ToUTF16("book"), base::UTF8ToUTF16("back")) base::UTF8ToUTF16("back"), kDefaultUseEditDistance)
.EditDistance(), .EditDistance(),
2); 2);
// Combination // Combination
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("caclulation"), ASSERT_EQ(
base::UTF8ToUTF16("calculator")) SequenceMatcher(base::UTF8ToUTF16("caclulation"),
.EditDistance(), base::UTF8ToUTF16("calculator"), kDefaultUseEditDistance)
3); .EditDistance(),
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("sunday"), 3);
base::UTF8ToUTF16("saturday")) ASSERT_EQ(
.EditDistance(), SequenceMatcher(base::UTF8ToUTF16("sunday"),
3); base::UTF8ToUTF16("saturday"), kDefaultUseEditDistance)
.EditDistance(),
3);
} }
TEST_F(SequenceMatcherTest, TestFindLongestMatch) { TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
SequenceMatcher sequence_match(base::UTF8ToUTF16("miscellanious"), SequenceMatcher sequence_match(base::UTF8ToUTF16("miscellanious"),
base::UTF8ToUTF16("miscellaneous")); base::UTF8ToUTF16("miscellaneous"),
kDefaultUseEditDistance);
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(0, 13, 0, 13), ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(0, 13, 0, 13),
Match(0, 0, 9))); Match(0, 0, 9)));
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(7, 13, 7, 13), ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(7, 13, 7, 13),
Match(10, 10, 3))); Match(10, 10, 3)));
ASSERT_TRUE(MatchEqual( ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abcd")) SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abcd"),
kDefaultUseEditDistance)
.FindLongestMatch(0, 0, 0, 4), .FindLongestMatch(0, 0, 0, 4),
Match(0, 0, 0))); Match(0, 0, 0)));
ASSERT_TRUE(MatchEqual(SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
base::UTF8ToUTF16("ababbaba"))
.FindLongestMatch(0, 12, 0, 8),
Match(2, 0, 8)));
ASSERT_TRUE(MatchEqual( ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16("aaaaaa"), base::UTF8ToUTF16("aaaaa")) SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
base::UTF8ToUTF16("ababbaba"), kDefaultUseEditDistance)
.FindLongestMatch(0, 12, 0, 8),
Match(2, 0, 8)));
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16("aaaaaa"), base::UTF8ToUTF16("aaaaa"),
kDefaultUseEditDistance)
.FindLongestMatch(0, 6, 0, 5), .FindLongestMatch(0, 6, 0, 5),
Match(0, 0, 5))); Match(0, 0, 5)));
} }
...@@ -91,7 +98,8 @@ TEST_F(SequenceMatcherTest, TestFindLongestMatch) { ...@@ -91,7 +98,8 @@ TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) { TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
SequenceMatcher sequence_match( SequenceMatcher sequence_match(
base::UTF8ToUTF16("This is a demo sentence!!!"), base::UTF8ToUTF16("This is a demo sentence!!!"),
base::UTF8ToUTF16("This demo sentence is good!!!")); base::UTF8ToUTF16("This demo sentence is good!!!"),
kDefaultUseEditDistance);
const std::vector<Match> true_matches = {Match(0, 0, 4), Match(9, 4, 14), const std::vector<Match> true_matches = {Match(0, 0, 4), Match(9, 4, 14),
Match(23, 26, 3), Match(26, 29, 0)}; Match(23, 26, 3), Match(26, 29, 0)};
const std::vector<Match> matches = sequence_match.GetMatchingBlocks(); const std::vector<Match> matches = sequence_match.GetMatchingBlocks();
...@@ -102,36 +110,25 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) { ...@@ -102,36 +110,25 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
} }
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatio) { TEST_F(SequenceMatcherTest, TestSequenceMatcherRatio) {
base::test::ScopedFeatureList feature_list; ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
feature_list.InitWithFeaturesAndParameters( base::UTF8ToUTF16("adbc"), kDefaultUseEditDistance)
{{app_list_features::kEnableFuzzyAppSearch, .Ratio(),
{{"use_edit_distance", "false"}}}}, 0.75);
{});
ASSERT_EQ( ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("adbc")) SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"), kDefaultUseEditDistance)
.Ratio(), .Ratio(),
0.75); 0.5);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"))
.Ratio(),
0.5);
} }
TEST_F(SequenceMatcherTest, TestEditDistanceRatio) { TEST_F(SequenceMatcherTest, TestEditDistanceRatio) {
base::test::ScopedFeatureList feature_list; ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
feature_list.InitWithFeaturesAndParameters( base::UTF8ToUTF16("adbc"), true)
{{app_list_features::kEnableFuzzyAppSearch, .Ratio(),
{{"use_edit_distance", "true"}}}}, 0.5);
{});
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("adbc"))
.Ratio(),
0.5);
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("white cats"), EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white")) base::UTF8ToUTF16("cats white"), true)
.Ratio(), .Ratio(),
0.2, 0.01); 0.2, 0.01);
} }
} // namespace app_list } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/term_break_iterator.h" #include "chrome/common/string_matching/term_break_iterator.h"
#include "base/i18n/char_iterator.h" #include "base/i18n/char_iterator.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "third_party/icu/source/common/unicode/uchar.h" #include "third_party/icu/source/common/unicode/uchar.h"
namespace ash { namespace string_matching {
TermBreakIterator::TermBreakIterator(const base::string16& word) TermBreakIterator::TermBreakIterator(const base::string16& word)
: word_(word), : word_(word),
...@@ -72,4 +72,4 @@ TermBreakIterator::State TermBreakIterator::GetNewState(base::char16 ch) { ...@@ -72,4 +72,4 @@ TermBreakIterator::State TermBreakIterator::GetNewState(base::char16 ch) {
return STATE_CHAR; return STATE_CHAR;
} }
} // namespace ash } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef ASH_PUBLIC_CPP_APP_LIST_TERM_BREAK_ITERATOR_H_ #ifndef CHROME_COMMON_STRING_MATCHING_TERM_BREAK_ITERATOR_H_
#define ASH_PUBLIC_CPP_APP_LIST_TERM_BREAK_ITERATOR_H_ #define CHROME_COMMON_STRING_MATCHING_TERM_BREAK_ITERATOR_H_
#include <stddef.h> #include <stddef.h>
#include <memory> #include <memory>
#include "ash/public/cpp/ash_public_export.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
...@@ -19,7 +18,7 @@ class UTF16CharIterator; ...@@ -19,7 +18,7 @@ class UTF16CharIterator;
} }
} // namespace base } // namespace base
namespace ash { namespace string_matching {
// TermBreakIterator breaks terms out of a word. Terms are broken on // TermBreakIterator breaks terms out of a word. Terms are broken on
// camel case boundaries and alpha/number boundaries. Numbers are defined // camel case boundaries and alpha/number boundaries. Numbers are defined
...@@ -27,7 +26,7 @@ namespace ash { ...@@ -27,7 +26,7 @@ namespace ash {
// e.g. // e.g.
// CamelCase -> Camel, Case // CamelCase -> Camel, Case
// Python2.7 -> Python, 2.7 // Python2.7 -> Python, 2.7
class ASH_PUBLIC_EXPORT TermBreakIterator { class TermBreakIterator {
public: public:
// Note that |word| must out live this iterator. // Note that |word| must out live this iterator.
explicit TermBreakIterator(const base::string16& word); explicit TermBreakIterator(const base::string16& word);
...@@ -68,6 +67,6 @@ class ASH_PUBLIC_EXPORT TermBreakIterator { ...@@ -68,6 +67,6 @@ class ASH_PUBLIC_EXPORT TermBreakIterator {
DISALLOW_COPY_AND_ASSIGN(TermBreakIterator); DISALLOW_COPY_AND_ASSIGN(TermBreakIterator);
}; };
} // namespace ash } // namespace string_matching
#endif // ASH_PUBLIC_CPP_APP_LIST_TERM_BREAK_ITERATOR_H_ #endif // CHROME_COMMON_STRING_MATCHING_TERM_BREAK_ITERATOR_H_
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/term_break_iterator.h" #include "chrome/common/string_matching/term_break_iterator.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using base::UTF8ToUTF16; using base::UTF8ToUTF16;
namespace app_list { namespace string_matching {
namespace test { namespace test {
TEST(TermBreakIteratorTest, EmptyWord) { TEST(TermBreakIteratorTest, EmptyWord) {
base::string16 empty; base::string16 empty;
ash::TermBreakIterator iter(empty); TermBreakIterator iter(empty);
EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.Advance());
} }
TEST(TermBreakIteratorTest, Simple) { TEST(TermBreakIteratorTest, Simple) {
base::string16 word(UTF8ToUTF16("simple")); base::string16 word(UTF8ToUTF16("simple"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("simple"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("simple"), iter.GetCurrentTerm());
EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
...@@ -28,7 +28,7 @@ TEST(TermBreakIteratorTest, Simple) { ...@@ -28,7 +28,7 @@ TEST(TermBreakIteratorTest, Simple) {
TEST(TermBreakIteratorTest, CamelCase) { TEST(TermBreakIteratorTest, CamelCase) {
base::string16 word(UTF8ToUTF16("CamelCase")); base::string16 word(UTF8ToUTF16("CamelCase"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("Camel"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("Camel"), iter.GetCurrentTerm());
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
...@@ -38,7 +38,7 @@ TEST(TermBreakIteratorTest, CamelCase) { ...@@ -38,7 +38,7 @@ TEST(TermBreakIteratorTest, CamelCase) {
TEST(TermBreakIteratorTest, LowerToUpper) { TEST(TermBreakIteratorTest, LowerToUpper) {
base::string16 word(UTF8ToUTF16("lowerToUpper")); base::string16 word(UTF8ToUTF16("lowerToUpper"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("lower"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("lower"), iter.GetCurrentTerm());
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
...@@ -50,7 +50,7 @@ TEST(TermBreakIteratorTest, LowerToUpper) { ...@@ -50,7 +50,7 @@ TEST(TermBreakIteratorTest, LowerToUpper) {
TEST(TermBreakIteratorTest, AlphaNumber) { TEST(TermBreakIteratorTest, AlphaNumber) {
base::string16 word(UTF8ToUTF16("Chromium26.0.0.0")); base::string16 word(UTF8ToUTF16("Chromium26.0.0.0"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("Chromium"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("Chromium"), iter.GetCurrentTerm());
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
...@@ -60,7 +60,7 @@ TEST(TermBreakIteratorTest, AlphaNumber) { ...@@ -60,7 +60,7 @@ TEST(TermBreakIteratorTest, AlphaNumber) {
TEST(TermBreakIteratorTest, StartsWithNumber) { TEST(TermBreakIteratorTest, StartsWithNumber) {
base::string16 word(UTF8ToUTF16("123startWithNumber")); base::string16 word(UTF8ToUTF16("123startWithNumber"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("123"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("123"), iter.GetCurrentTerm());
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
...@@ -75,7 +75,7 @@ TEST(TermBreakIteratorTest, StartsWithNumber) { ...@@ -75,7 +75,7 @@ TEST(TermBreakIteratorTest, StartsWithNumber) {
TEST(TermBreakIteratorTest, CaseAndNoCase) { TEST(TermBreakIteratorTest, CaseAndNoCase) {
// "English" + two Chinese chars U+4E2D U+6587 + "Word" // "English" + two Chinese chars U+4E2D U+6587 + "Word"
base::string16 word(UTF8ToUTF16("English\xe4\xb8\xad\xe6\x96\x87Word")); base::string16 word(UTF8ToUTF16("English\xe4\xb8\xad\xe6\x96\x87Word"));
ash::TermBreakIterator iter(word); TermBreakIterator iter(word);
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
EXPECT_EQ(UTF8ToUTF16("English"), iter.GetCurrentTerm()); EXPECT_EQ(UTF8ToUTF16("English"), iter.GetCurrentTerm());
EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.Advance());
...@@ -86,4 +86,4 @@ TEST(TermBreakIteratorTest, CaseAndNoCase) { ...@@ -86,4 +86,4 @@ TEST(TermBreakIteratorTest, CaseAndNoCase) {
} }
} // namespace test } // namespace test
} // namespace app_list } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string.h" #include "chrome/common/string_matching/tokenized_string.h"
#include <stddef.h> #include <stddef.h>
#include "ash/public/cpp/app_list/term_break_iterator.h"
#include "base/i18n/break_iterator.h" #include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h" #include "base/i18n/case_conversion.h"
#include "base/logging.h" #include "base/logging.h"
#include "chrome/common/string_matching/term_break_iterator.h"
using base::i18n::BreakIterator; using base::i18n::BreakIterator;
namespace ash { namespace string_matching {
TokenizedString::TokenizedString(const base::string16& text) : text_(text) { TokenizedString::TokenizedString(const base::string16& text) : text_(text) {
Tokenize(); Tokenize();
...@@ -44,4 +44,4 @@ void TokenizedString::Tokenize() { ...@@ -44,4 +44,4 @@ void TokenizedString::Tokenize() {
} }
} }
} // namespace ash } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_H_ #ifndef CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_H_
#define ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_H_ #define CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_H_
#include <vector> #include <vector>
#include "ash/public/cpp/ash_public_export.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "ui/gfx/range/range.h" #include "ui/gfx/range/range.h"
namespace ash { namespace string_matching {
// TokenizedString takes a string and breaks it down into token words. It // TokenizedString takes a string and breaks it down into token words. It
// first breaks using BreakIterator to get all the words. Then it breaks // first breaks using BreakIterator to get all the words. Then it breaks
// the words again at camel case boundaries and alpha/number boundaries. // the words again at camel case boundaries and alpha/number boundaries.
class ASH_PUBLIC_EXPORT TokenizedString { class TokenizedString {
public: public:
typedef std::vector<base::string16> Tokens; typedef std::vector<base::string16> Tokens;
typedef std::vector<gfx::Range> Mappings; typedef std::vector<gfx::Range> Mappings;
...@@ -42,6 +41,6 @@ class ASH_PUBLIC_EXPORT TokenizedString { ...@@ -42,6 +41,6 @@ class ASH_PUBLIC_EXPORT TokenizedString {
DISALLOW_COPY_AND_ASSIGN(TokenizedString); DISALLOW_COPY_AND_ASSIGN(TokenizedString);
}; };
} // namespace ash } // namespace string_matching
#endif // ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_H_ #endif // CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_H_
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string_char_iterator.h" #include "chrome/common/string_matching/tokenized_string_char_iterator.h"
#include "base/i18n/char_iterator.h" #include "base/i18n/char_iterator.h"
#include "base/logging.h" #include "base/logging.h"
#include "third_party/icu/source/common/unicode/utf16.h" #include "third_party/icu/source/common/unicode/utf16.h"
namespace ash { namespace string_matching {
TokenizedStringCharIterator::State::State() : token_index(0u), char_index(0) {} TokenizedStringCharIterator::State::State() : token_index(0u), char_index(0) {}
...@@ -86,4 +86,4 @@ void TokenizedStringCharIterator::CreateTokenCharIterator() { ...@@ -86,4 +86,4 @@ void TokenizedStringCharIterator::CreateTokenCharIterator() {
new base::i18n::UTF16CharIterator(&tokens_[current_token_])); new base::i18n::UTF16CharIterator(&tokens_[current_token_]));
} }
} // namespace ash } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_CHAR_ITERATOR_H_ #ifndef CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_CHAR_ITERATOR_H_
#define ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_CHAR_ITERATOR_H_ #define CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_CHAR_ITERATOR_H_
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <memory> #include <memory>
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "ash/public/cpp/ash_public_export.h"
#include "base/macros.h" #include "base/macros.h"
#include "chrome/common/string_matching/tokenized_string.h"
namespace base { namespace base {
namespace i18n { namespace i18n {
...@@ -20,10 +19,10 @@ class UTF16CharIterator; ...@@ -20,10 +19,10 @@ class UTF16CharIterator;
} }
} // namespace base } // namespace base
namespace ash { namespace string_matching {
// An UTF16 char iterator for a TokenizedString. // An UTF16 char iterator for a TokenizedString.
class ASH_PUBLIC_EXPORT TokenizedStringCharIterator { class TokenizedStringCharIterator {
public: public:
struct State { struct State {
State(); State();
...@@ -76,6 +75,6 @@ class ASH_PUBLIC_EXPORT TokenizedStringCharIterator { ...@@ -76,6 +75,6 @@ class ASH_PUBLIC_EXPORT TokenizedStringCharIterator {
DISALLOW_COPY_AND_ASSIGN(TokenizedStringCharIterator); DISALLOW_COPY_AND_ASSIGN(TokenizedStringCharIterator);
}; };
} // namespace ash } // namespace string_matching
#endif // ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_CHAR_ITERATOR_H_ #endif // CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_CHAR_ITERATOR_H_
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string_char_iterator.h" #include "chrome/common/string_matching/tokenized_string_char_iterator.h"
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace app_list { namespace string_matching {
namespace test { namespace test {
namespace { namespace {
...@@ -21,13 +21,13 @@ namespace { ...@@ -21,13 +21,13 @@ namespace {
// has three fields. The first is the current char. The second is the offset of // has three fields. The first is the current char. The second is the offset of
// the current char in terms of the original text of the TokenizedString. The // the current char in terms of the original text of the TokenizedString. The
// last one is optional and only shows up when IsFirstCharOfToken returns true. // last one is optional and only shows up when IsFirstCharOfToken returns true.
std::string GetIterateState(const ash::TokenizedStringCharIterator& iter) { std::string GetIterateState(const TokenizedStringCharIterator& iter) {
return base::StringPrintf( return base::StringPrintf(
"%s%d%s", base::UTF16ToUTF8(base::string16(1, iter.Get())).c_str(), "%s%d%s", base::UTF16ToUTF8(base::string16(1, iter.Get())).c_str(),
iter.GetArrayPos(), iter.IsFirstCharOfToken() ? "!" : ""); iter.GetArrayPos(), iter.IsFirstCharOfToken() ? "!" : "");
} }
void TestBeyondTheEnd(ash::TokenizedStringCharIterator* iter) { void TestBeyondTheEnd(TokenizedStringCharIterator* iter) {
ASSERT_TRUE(iter->end()); ASSERT_TRUE(iter->end());
ASSERT_FALSE(iter->NextChar()); ASSERT_FALSE(iter->NextChar());
ASSERT_FALSE(iter->NextToken()); ASSERT_FALSE(iter->NextToken());
...@@ -37,8 +37,8 @@ void TestBeyondTheEnd(ash::TokenizedStringCharIterator* iter) { ...@@ -37,8 +37,8 @@ void TestBeyondTheEnd(ash::TokenizedStringCharIterator* iter) {
} }
void TestEveryChar(const std::string& text, const std::string& expects) { void TestEveryChar(const std::string& text, const std::string& expects) {
ash::TokenizedString tokens(base::UTF8ToUTF16(text)); TokenizedString tokens(base::UTF8ToUTF16(text));
ash::TokenizedStringCharIterator iter(tokens); TokenizedStringCharIterator iter(tokens);
std::vector<std::string> results; std::vector<std::string> results;
while (!iter.end()) { while (!iter.end()) {
...@@ -51,8 +51,8 @@ void TestEveryChar(const std::string& text, const std::string& expects) { ...@@ -51,8 +51,8 @@ void TestEveryChar(const std::string& text, const std::string& expects) {
} }
void TestNextToken(const std::string& text, const std::string& expects) { void TestNextToken(const std::string& text, const std::string& expects) {
ash::TokenizedString tokens(base::UTF8ToUTF16(text)); TokenizedString tokens(base::UTF8ToUTF16(text));
ash::TokenizedStringCharIterator iter(tokens); TokenizedStringCharIterator iter(tokens);
std::vector<std::string> results; std::vector<std::string> results;
while (!iter.end()) { while (!iter.end()) {
...@@ -66,8 +66,8 @@ void TestNextToken(const std::string& text, const std::string& expects) { ...@@ -66,8 +66,8 @@ void TestNextToken(const std::string& text, const std::string& expects) {
void TestFirstTwoCharInEveryToken(const std::string& text, void TestFirstTwoCharInEveryToken(const std::string& text,
const std::string& expects) { const std::string& expects) {
ash::TokenizedString tokens(base::UTF8ToUTF16(text)); TokenizedString tokens(base::UTF8ToUTF16(text));
ash::TokenizedStringCharIterator iter(tokens); TokenizedStringCharIterator iter(tokens);
std::vector<std::string> results; std::vector<std::string> results;
while (!iter.end()) { while (!iter.end()) {
...@@ -148,4 +148,4 @@ TEST(TokenizedStringCharIteratorTest, Basic) { ...@@ -148,4 +148,4 @@ TEST(TokenizedStringCharIteratorTest, Basic) {
} }
} // namespace test } // namespace test
} // namespace app_list } // namespace string_matching
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "chrome/common/string_matching/tokenized_string.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 1 || size % 2 != 0) if (size < 1 || size % 2 != 0)
...@@ -12,6 +12,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { ...@@ -12,6 +12,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// Test for base::string16 if size is even. // Test for base::string16 if size is even.
base::string16 string_input16(reinterpret_cast<const base::char16*>(data), base::string16 string_input16(reinterpret_cast<const base::char16*>(data),
size / 2); size / 2);
ash::TokenizedString tokenized_string_from_string16(string_input16); string_matching::TokenizedString tokenized_string_from_string16(
string_input16);
return 0; return 0;
} }
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string_match.h" #include "chrome/common/string_matching/tokenized_string_match.h"
#include <stddef.h> #include <stddef.h>
#include <cmath> #include <cmath>
#include "ash/public/cpp/app_list/tokenized_string_char_iterator.h"
#include "base/i18n/string_search.h" #include "base/i18n/string_search.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
#include "chrome/common/string_matching/tokenized_string_char_iterator.h"
namespace ash { namespace string_matching {
namespace { namespace {
...@@ -235,4 +235,4 @@ bool TokenizedStringMatch::Calculate(const base::string16& query, ...@@ -235,4 +235,4 @@ bool TokenizedStringMatch::Calculate(const base::string16& query,
return Calculate(tokenized_query, tokenized_text); return Calculate(tokenized_query, tokenized_text);
} }
} // namespace ash } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_MATCH_H_ #ifndef CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_MATCH_H_
#define ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_MATCH_H_ #define CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_MATCH_H_
#include <vector> #include <vector>
#include "ash/public/cpp/ash_public_export.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "ui/gfx/range/range.h" #include "ui/gfx/range/range.h"
namespace ash { namespace string_matching {
class TokenizedString; class TokenizedString;
...@@ -22,7 +21,7 @@ class TokenizedString; ...@@ -22,7 +21,7 @@ class TokenizedString;
// of text. A relevance of zero means the two are completely different to each // of text. A relevance of zero means the two are completely different to each
// other. The higher the relevance score, the better the two strings are // other. The higher the relevance score, the better the two strings are
// matched. Matched portions of text are stored as index ranges. // matched. Matched portions of text are stored as index ranges.
class ASH_PUBLIC_EXPORT TokenizedStringMatch { class TokenizedStringMatch {
public: public:
typedef std::vector<gfx::Range> Hits; typedef std::vector<gfx::Range> Hits;
...@@ -49,6 +48,6 @@ class ASH_PUBLIC_EXPORT TokenizedStringMatch { ...@@ -49,6 +48,6 @@ class ASH_PUBLIC_EXPORT TokenizedStringMatch {
DISALLOW_COPY_AND_ASSIGN(TokenizedStringMatch); DISALLOW_COPY_AND_ASSIGN(TokenizedStringMatch);
}; };
} // namespace ash } // namespace string_matching
#endif // ASH_PUBLIC_CPP_APP_LIST_TOKENIZED_STRING_MATCH_H_ #endif // CHROME_COMMON_STRING_MATCHING_TOKENIZED_STRING_MATCH_H_
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string_match.h" #include "chrome/common/string_matching/tokenized_string_match.h"
#include <stddef.h> #include <stddef.h>
...@@ -12,18 +12,17 @@ ...@@ -12,18 +12,17 @@
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace app_list { namespace string_matching {
namespace test { namespace test {
// Returns a string of |text| marked the hits in |match| using block bracket. // Returns a string of |text| marked the hits in |match| using block bracket.
// e.g. text= "Text", match.hits = [{0,1}], returns "[T]ext". // e.g. text= "Text", match.hits = [{0,1}], returns "[T]ext".
std::string MatchHit(const base::string16& text, std::string MatchHit(const base::string16& text,
const ash::TokenizedStringMatch& match) { const TokenizedStringMatch& match) {
base::string16 marked = text; base::string16 marked = text;
const ash::TokenizedStringMatch::Hits& hits = match.hits(); const TokenizedStringMatch::Hits& hits = match.hits();
for (ash::TokenizedStringMatch::Hits::const_reverse_iterator it = for (TokenizedStringMatch::Hits::const_reverse_iterator it = hits.rbegin();
hits.rbegin();
it != hits.rend(); ++it) { it != hits.rend(); ++it) {
const gfx::Range& hit = *it; const gfx::Range& hit = *it;
marked.insert(hit.end(), 1, ']'); marked.insert(hit.end(), 1, ']');
...@@ -43,7 +42,7 @@ TEST(TokenizedStringMatchTest, NotMatch) { ...@@ -43,7 +42,7 @@ TEST(TokenizedStringMatchTest, NotMatch) {
{"abd", "abcd"}, {"cd", "abcd"}, {"abd", "abcd"}, {"cd", "abcd"},
}; };
ash::TokenizedStringMatch match; TokenizedStringMatch match;
for (size_t i = 0; i < base::size(kTestCases); ++i) { for (size_t i = 0; i < base::size(kTestCases); ++i) {
const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text)); const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text));
EXPECT_FALSE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text)) EXPECT_FALSE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text))
...@@ -71,7 +70,7 @@ TEST(TokenizedStringMatchTest, Match) { ...@@ -71,7 +70,7 @@ TEST(TokenizedStringMatchTest, Match) {
{"Netflix", "flix", "Net[flix]"}, {"Netflix", "flix", "Net[flix]"},
}; };
ash::TokenizedStringMatch match; TokenizedStringMatch match;
for (size_t i = 0; i < base::size(kTestCases); ++i) { for (size_t i = 0; i < base::size(kTestCases); ++i) {
const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text)); const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text));
EXPECT_TRUE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text)); EXPECT_TRUE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text));
...@@ -102,8 +101,8 @@ TEST(TokenizedStringMatchTest, Relevance) { ...@@ -102,8 +101,8 @@ TEST(TokenizedStringMatchTest, Relevance) {
{"Google Chrome", "oo", "ch"}, {"Google Chrome", "oo", "ch"},
}; };
ash::TokenizedStringMatch match_low; TokenizedStringMatch match_low;
ash::TokenizedStringMatch match_high; TokenizedStringMatch match_high;
for (size_t i = 0; i < base::size(kTestCases); ++i) { for (size_t i = 0; i < base::size(kTestCases); ++i) {
const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text)); const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text));
EXPECT_TRUE( EXPECT_TRUE(
...@@ -137,7 +136,7 @@ TEST(TokenizedStringMatchTest, AbsoluteRelevance) { ...@@ -137,7 +136,7 @@ TEST(TokenizedStringMatchTest, AbsoluteRelevance) {
{"Google Chrome", "goog", 0.94}, {"Google Chrome", "goog", 0.94},
}; };
ash::TokenizedStringMatch match; TokenizedStringMatch match;
for (size_t i = 0; i < base::size(kTestCases); ++i) { for (size_t i = 0; i < base::size(kTestCases); ++i) {
const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text)); const base::string16 text(base::UTF8ToUTF16(kTestCases[i].text));
EXPECT_TRUE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text)); EXPECT_TRUE(match.Calculate(base::UTF8ToUTF16(kTestCases[i].query), text));
...@@ -149,4 +148,4 @@ TEST(TokenizedStringMatchTest, AbsoluteRelevance) { ...@@ -149,4 +148,4 @@ TEST(TokenizedStringMatchTest, AbsoluteRelevance) {
} }
} // namespace test } // namespace test
} // namespace app_list } // namespace string_matching
// Copyright 2013 The Chromium Authors. All rights reserved. // Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "ash/public/cpp/app_list/tokenized_string.h" #include "chrome/common/string_matching/tokenized_string.h"
#include <stddef.h> #include <stddef.h>
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace app_list { namespace string_matching {
namespace test { namespace test {
namespace { namespace {
base::string16 GetContent(const ash::TokenizedString& tokenized) { base::string16 GetContent(const TokenizedString& tokenized) {
const ash::TokenizedString::Tokens& tokens = tokenized.tokens(); const TokenizedString::Tokens& tokens = tokenized.tokens();
const ash::TokenizedString::Mappings& mappings = tokenized.mappings(); const TokenizedString::Mappings& mappings = tokenized.mappings();
base::string16 str; base::string16 str;
for (size_t i = 0; i < tokens.size(); ++i) { for (size_t i = 0; i < tokens.size(); ++i) {
...@@ -32,56 +32,56 @@ base::string16 GetContent(const ash::TokenizedString& tokenized) { ...@@ -32,56 +32,56 @@ base::string16 GetContent(const ash::TokenizedString& tokenized) {
TEST(TokenizedStringTest, Empty) { TEST(TokenizedStringTest, Empty) {
base::string16 empty; base::string16 empty;
ash::TokenizedString tokens(empty); TokenizedString tokens(empty);
EXPECT_EQ(base::string16(), GetContent(tokens)); EXPECT_EQ(base::string16(), GetContent(tokens));
} }
TEST(TokenizedStringTest, Basic) { TEST(TokenizedStringTest, Basic) {
{ {
base::string16 text(base::UTF8ToUTF16("ScratchPad")); base::string16 text(base::UTF8ToUTF16("ScratchPad"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("scratch{0,7} pad{7,10}"), GetContent(tokens)); EXPECT_EQ(base::UTF8ToUTF16("scratch{0,7} pad{7,10}"), GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("Chess2.0")); base::string16 text(base::UTF8ToUTF16("Chess2.0"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("chess{0,5} 2.0{5,8}"), GetContent(tokens)); EXPECT_EQ(base::UTF8ToUTF16("chess{0,5} 2.0{5,8}"), GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("Cut the rope")); base::string16 text(base::UTF8ToUTF16("Cut the rope"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("cut{0,3} the{4,7} rope{8,12}"), EXPECT_EQ(base::UTF8ToUTF16("cut{0,3} the{4,7} rope{8,12}"),
GetContent(tokens)); GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("AutoCAD WS")); base::string16 text(base::UTF8ToUTF16("AutoCAD WS"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("auto{0,4} cad{4,7} ws{8,10}"), EXPECT_EQ(base::UTF8ToUTF16("auto{0,4} cad{4,7} ws{8,10}"),
GetContent(tokens)); GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("Great TweetDeck")); base::string16 text(base::UTF8ToUTF16("Great TweetDeck"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("great{0,5} tweet{6,11} deck{11,15}"), EXPECT_EQ(base::UTF8ToUTF16("great{0,5} tweet{6,11} deck{11,15}"),
GetContent(tokens)); GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("Draw-It!")); base::string16 text(base::UTF8ToUTF16("Draw-It!"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("draw{0,4} it{5,7}"), GetContent(tokens)); EXPECT_EQ(base::UTF8ToUTF16("draw{0,4} it{5,7}"), GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("Faxing & Signing")); base::string16 text(base::UTF8ToUTF16("Faxing & Signing"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16("faxing{0,6} signing{9,16}"), EXPECT_EQ(base::UTF8ToUTF16("faxing{0,6} signing{9,16}"),
GetContent(tokens)); GetContent(tokens));
} }
{ {
base::string16 text(base::UTF8ToUTF16("!@#$%^&*()<<<**>>>")); base::string16 text(base::UTF8ToUTF16("!@#$%^&*()<<<**>>>"));
ash::TokenizedString tokens(text); TokenizedString tokens(text);
EXPECT_EQ(base::UTF8ToUTF16(""), GetContent(tokens)); EXPECT_EQ(base::UTF8ToUTF16(""), GetContent(tokens));
} }
} }
} // namespace test } // namespace test
} // namespace app_list } // namespace string_matching
...@@ -3379,6 +3379,12 @@ test("unit_tests") { ...@@ -3379,6 +3379,12 @@ test("unit_tests") {
"../common/origin_trials/chrome_origin_trial_policy_unittest.cc", "../common/origin_trials/chrome_origin_trial_policy_unittest.cc",
"../common/pref_names_util_unittest.cc", "../common/pref_names_util_unittest.cc",
"../common/profiler/thread_profiler_unittest.cc", "../common/profiler/thread_profiler_unittest.cc",
"../common/string_matching/fuzzy_tokenized_string_match_unittest.cc",
"../common/string_matching/sequence_matcher_unittest.cc",
"../common/string_matching/term_break_iterator_unittest.cc",
"../common/string_matching/tokenized_string_char_iterator_unittest.cc",
"../common/string_matching/tokenized_string_match_unittest.cc",
"../common/string_matching/tokenized_string_unittest.cc",
"../renderer/chrome_content_renderer_client_unittest.cc", "../renderer/chrome_content_renderer_client_unittest.cc",
"../renderer/content_settings_agent_impl_unittest.cc", "../renderer/content_settings_agent_impl_unittest.cc",
"../renderer/instant_restricted_id_cache_unittest.cc", "../renderer/instant_restricted_id_cache_unittest.cc",
...@@ -4198,17 +4204,11 @@ test("unit_tests") { ...@@ -4198,17 +4204,11 @@ test("unit_tests") {
"../browser/ui/app_list/search/search_result_ranker/recurrence_ranker_util_unittest.cc", "../browser/ui/app_list/search/search_result_ranker/recurrence_ranker_util_unittest.cc",
"../browser/ui/app_list/search/search_result_ranker/search_ranking_event_logger_unittest.cc", "../browser/ui/app_list/search/search_result_ranker/search_ranking_event_logger_unittest.cc",
"../browser/ui/app_list/search/search_result_ranker/search_result_ranker_unittest.cc", "../browser/ui/app_list/search/search_result_ranker/search_result_ranker_unittest.cc",
"../browser/ui/app_list/search/search_utils/fuzzy_tokenized_string_match_unittest.cc",
"../browser/ui/app_list/search/search_utils/sequence_matcher_unittest.cc",
"../browser/ui/app_list/search/settings_shortcut/settings_shortcut_provider_unittest.cc", "../browser/ui/app_list/search/settings_shortcut/settings_shortcut_provider_unittest.cc",
"../browser/ui/app_list/search/settings_shortcut/settings_shortcut_result_unittest.cc", "../browser/ui/app_list/search/settings_shortcut/settings_shortcut_result_unittest.cc",
"../browser/ui/app_list/search/tests/app_search_provider_unittest.cc", "../browser/ui/app_list/search/tests/app_search_provider_unittest.cc",
"../browser/ui/app_list/search/tests/mixer_unittest.cc", "../browser/ui/app_list/search/tests/mixer_unittest.cc",
"../browser/ui/app_list/search/tests/omnibox_result_unittest.cc", "../browser/ui/app_list/search/tests/omnibox_result_unittest.cc",
"../browser/ui/app_list/search/tests/term_break_iterator_unittest.cc",
"../browser/ui/app_list/search/tests/tokenized_string_char_iterator_unittest.cc",
"../browser/ui/app_list/search/tests/tokenized_string_match_unittest.cc",
"../browser/ui/app_list/search/tests/tokenized_string_unittest.cc",
"../browser/ui/app_list/search/tests/zero_state_file_provider_unittest.cc", "../browser/ui/app_list/search/tests/zero_state_file_provider_unittest.cc",
"../browser/ui/app_list/search/tests/zero_state_file_result_unittest.cc", "../browser/ui/app_list/search/tests/zero_state_file_result_unittest.cc",
"../browser/ui/app_list/test/fake_app_list_model_updater.cc", "../browser/ui/app_list/test/fake_app_list_model_updater.cc",
...@@ -6483,7 +6483,7 @@ if (is_win) { ...@@ -6483,7 +6483,7 @@ if (is_win) {
if (is_chromeos) { if (is_chromeos) {
fuzzer_test("tokenized_string_fuzzer") { fuzzer_test("tokenized_string_fuzzer") {
sources = [ sources = [
"../browser/ui/app_list/search/tests/tokenized_string_fuzzer.cc", "../common/string_matching/tokenized_string_fuzzer.cc",
] ]
deps = [ deps = [
"//ash/public/cpp", "//ash/public/cpp",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment