Commit 02e8ad6a authored by Tom Lukaszewicz's avatar Tom Lukaszewicz Committed by Josip Sokcevic

Tab Search: Exact search update

This CL updates search such that if an exact match search is expected
an exact match algorithm is used.

We will rank items for exact match as follows:
 1. Rank all items that match at the beginning of the title or
    hostname first.
 2. Rank all items that match at the beginning of a word next.
 3. All remaining items with an exact match are ranked at the end.

Change-Id: Icd6cd647ce3b0d39eb535998dab5e49973c24b0b
Reviewed-on: https://chrome-internal-review.googlesource.com/c/chrome/browser/resources/tab_search/+/3299647Reviewed-by: default avatarJohn Lee <johntlee@chromium.org>
Reviewed-by: default avatarYuheng Huang <yuhengh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#819621}
parent 1784cd30
......@@ -36,6 +36,7 @@ js_library("app") {
}
js_library("fuzzy_search") {
deps = [ "//ui/webui/resources/js:util.m" ]
}
js_library("tab_search_api_proxy") {
......
......@@ -2,9 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import {quoteString} from 'chrome://resources/js/util.m.js';
import Fuse from './fuse.js';
/**
* TODO(tluk): Fix the typing for tabSearch.mojom.Tab here given we are updating
* the fields on this object ( https://crbug.com/1133558 ).
* @suppress {checkTypes}
* @param {string} input
* @param {!Array<!tabSearch.mojom.Tab>} records
* @param {!Object} options
......@@ -14,18 +19,31 @@ export function fuzzySearch(input, records, options) {
if (input.length === 0) {
return records;
}
return new Fuse(records, options).search(input).map(result => {
const titleMatch = result.matches.find(e => e.key === 'title');
const hostnameMatch = result.matches.find(e => e.key === 'hostname');
const item = Object.assign({}, result.item);
if (titleMatch) {
item.titleHighlightRanges = convertToRanges(titleMatch.indices);
}
if (hostnameMatch) {
item.hostnameHighlightRanges = convertToRanges(hostnameMatch.indices);
}
return item;
});
// Fuse does not handle exact match searches well. It indiscriminately
// searches for direct matches that appear anywhere in the string. This
// results in a bad search experience as users expect matches at the beginning
// of the title / hostname, or at the beginning of words to receive
// preferential treatment. Matched ranges returned by Fuse also fail to
// highlight only the matching text, but instead match to any character
// present in the input string.
// To address these shortcomings we use the exactSearch implementation below
// if the options indicate an exact matching algorithm should be used.
if (options.threshold === 0.0) {
return exactSearch(input, records);
} else {
return new Fuse(records, options).search(input).map(result => {
const titleMatch = result.matches.find(e => e.key === 'title');
const hostnameMatch = result.matches.find(e => e.key === 'hostname');
const item = Object.assign({}, result.item);
if (titleMatch) {
item.titleHighlightRanges = convertToRanges(titleMatch.indices);
}
if (hostnameMatch) {
item.hostnameHighlightRanges = convertToRanges(hostnameMatch.indices);
}
return item;
});
}
}
/**
......@@ -39,3 +57,109 @@ function convertToRanges(matches) {
return matches.map(
([start, end]) => ({start: start, length: end - start + 1}));
}
////////////////////////////////////////////////////////////////////////////////
// Exact Match Implementation :
/**
* The exact match algorithm returns records ranked according to the following
* priorities (highest to lowest priority):
* 1. All items with |title| or |hostname| matching the searchText at the
* beginning of the string.
* 2. All items with |title| or |hostname| matching the searchText at the
* beginning of a word in the string.
* 3. All remaining items with |title| or |hostname| matching the searchText
* elsewhere in the string.
* @suppress {checkTypes}
* @param {string} searchText
* @param {!Array<!tabSearch.mojom.Tab>} records
* @return {!Array<!tabSearch.mojom.Tab>}
*/
function exactSearch(searchText, records) {
if (searchText.length === 0) {
return records;
}
// Perform an exact match search with range discovery.
const exactMatches = [];
for (let tab of records) {
const titleHighlightRanges = getRanges(tab.title, searchText);
const hostnameHighlightRanges = getRanges(tab.hostname, searchText);
if (!titleHighlightRanges.length && !hostnameHighlightRanges.length) {
continue;
}
const matchedTab = Object.assign({}, tab);
if (titleHighlightRanges.length) {
matchedTab.titleHighlightRanges = titleHighlightRanges;
}
if (hostnameHighlightRanges.length) {
matchedTab.hostnameHighlightRanges = hostnameHighlightRanges;
}
exactMatches.push(matchedTab);
}
// Prioritize items.
const itemsMatchingStringStart = [];
const itemsMatchingWordStart = [];
const others = [];
const wordStartRegexp = new RegExp(`\\b${quoteString(searchText)}`, 'i');
for (let tab of exactMatches) {
// Find matches that occur at the beginning of the string.
if (hasMatchStringStart(tab)) {
itemsMatchingStringStart.push(tab);
} else if (hasRegexMatch(tab, wordStartRegexp)) {
itemsMatchingWordStart.push(tab);
} else {
others.push(tab);
}
}
return itemsMatchingStringStart.concat(itemsMatchingWordStart, others);
}
/**
* Determines whether the given tab has a title or hostname with identified
* matches at the beginning of the string.
* @suppress {checkTypes}
* @param {!tabSearch.mojom.Tab} tab
* @return {boolean}
*/
function hasMatchStringStart(tab) {
return (tab.titleHighlightRanges &&
tab.titleHighlightRanges[0].start === 0) ||
(tab.hostnameHighlightRanges &&
tab.hostnameHighlightRanges[0].start === 0);
}
/**
* Determines whether the given tab has a match for the given regexp in its
* title or hostname.
* @suppress {checkTypes}
* @param {!tabSearch.mojom.Tab} tab
* @param {RegExp} regexp
* @return {boolean}
*/
function hasRegexMatch(tab, regexp) {
return (tab.titleHighlightRanges && tab.title.search(regexp) !== -1) ||
(tab.hostnameHighlightRanges && tab.hostname.search(regexp) !== -1);
}
/**
* Returns an array of matches that indicate where in the target string the
* searchText appears. If there are no identified matches an empty array is
* returned.
* @param {string} target
* @param {string} searchText
* @return {!Array<!{start: number, length: number}>}
*/
function getRanges(target, searchText) {
const escapedText = quoteString(searchText);
let ranges = [];
let match = null;
for (const re = new RegExp(escapedText, 'gi'); match = re.exec(target);) {
ranges.push({
start : match.index,
length : searchText.length,
});
}
return ranges;
}
......@@ -56,4 +56,142 @@ suite('FuzzySearchTest', () => {
assertDeepEquals(records, fuzzySearch('', records, options));
assertDeepEquals([], fuzzySearch('z', records, options));
});
test('Test the exact match ranking order.', () => {
// Set threshold to 0.0 to assert an exact match search.
const options = {
threshold: 0.0,
};
// Initial pre-search item list.
const records = [
{
title: 'Code Search',
hostname: 'search.chromium.search',
},
{title: 'Marching band', hostname: 'en.marching.band.com'},
{
title: 'Chrome Desktop Architecture',
hostname: 'drive.google.com',
},
{
title: 'Arch Linux',
hostname: 'www.archlinux.org',
},
{
title: 'Arches National Park',
hostname: 'www.nps.gov',
},
{
title: 'Search Engine Land - Search Engines',
hostname: 'searchengineland.com'
},
];
// Resuts for 'arch'.
const archMatchedRecords = [
{
title: 'Arch Linux',
hostname: 'www.archlinux.org',
titleHighlightRanges: [ {start: 0, length: 4} ],
hostnameHighlightRanges: [ {start: 4, length: 4} ],
},
{
title: 'Arches National Park',
hostname: 'www.nps.gov',
titleHighlightRanges: [ {start: 0, length: 4} ],
},
{
title: 'Chrome Desktop Architecture',
hostname: 'drive.google.com',
titleHighlightRanges: [ {start: 15, length: 4} ],
},
{
title: 'Code Search',
hostname: 'search.chromium.search',
titleHighlightRanges: [ {start: 7, length: 4} ],
hostnameHighlightRanges:
[ {start: 2, length: 4}, {start: 18, length: 4} ],
},
{
title: 'Marching band',
hostname: 'en.marching.band.com',
titleHighlightRanges: [ {start: 1, length: 4} ],
hostnameHighlightRanges: [ {start: 4, length: 4} ]
},
{
title: 'Search Engine Land - Search Engines',
hostname: 'searchengineland.com',
titleHighlightRanges:
[ {start: 2, length: 4}, {start: 23, length: 4} ],
hostnameHighlightRanges: [ {start: 2, length: 4} ]
},
];
// Results for 'search'.
const searchMatchedRecords = [
{
title: 'Code Search',
hostname: 'search.chromium.search',
titleHighlightRanges: [ {start: 5, length: 6} ],
hostnameHighlightRanges:
[ {start: 0, length: 6}, {start: 16, length: 6} ],
},
{
title: 'Search Engine Land - Search Engines',
hostname: 'searchengineland.com',
titleHighlightRanges:
[ {start: 0, length: 6}, {start: 21, length: 6} ],
hostnameHighlightRanges: [ {start: 0, length: 6} ]
},
];
// Empty search should return the full list.
assertDeepEquals(records, fuzzySearch('', records, options));
assertDeepEquals(archMatchedRecords, fuzzySearch('arch', records, options));
assertDeepEquals(searchMatchedRecords,
fuzzySearch('search', records, options));
// No matches should return an empty list.
assertDeepEquals([], fuzzySearch('archh', records, options));
});
test('Test exact search with escaped characters.', () => {
// Set threshold to 0.0 to assert an exact match search.
const options = {
threshold: 0.0,
};
// Initial pre-search item list.
const records = [ {
title: '\'beginning\\test\\end',
hostname: 'beginning\\test\"end',
} ];
// Expected results for '\test'.
const backslashMatchedRecords = [
{
title: '\'beginning\\test\\end',
hostname: 'beginning\\test\"end',
titleHighlightRanges: [ {start: 10, length: 5} ],
hostnameHighlightRanges: [ {start: 9, length: 5} ]
},
];
// Expected results for '"end'.
const quoteMatchedRecords = [
{
title: '\'beginning\\test\\end',
hostname: 'beginning\\test\"end',
hostnameHighlightRanges: [ {start: 14, length: 4} ],
},
];
assertDeepEquals(backslashMatchedRecords,
fuzzySearch('\\test', records, options));
assertDeepEquals(quoteMatchedRecords,
fuzzySearch('\"end', records, options));
});
});
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment