Commit d714ca2e authored by Yuheng Huang's avatar Yuheng Huang Committed by Josip Sokcevic

Implement exact match scoring

A simple scoring function gives us better search results based on
match index and match count. It also enables us to control title vs
hostname matching weight similar to fuzzy search.

Bug: 1099917
Change-Id: Ifdc9a71f856854b77851a7b133cfb34439441d16
Reviewed-on: https://chrome-internal-review.googlesource.com/c/chrome/browser/resources/tab_search/+/3301747Reviewed-by: default avatarJohn Lee <johntlee@chromium.org>
Reviewed-by: default avatarTom Lukaszewicz <tluk@chromium.org>
Cr-Commit-Position: refs/heads/master@{#819625}
parent b4ccd392
......@@ -29,7 +29,7 @@ export function fuzzySearch(input, records, options) {
// To address these shortcomings we use the exactSearch implementation below
// if the options indicate an exact matching algorithm should be used.
if (options.threshold === 0.0) {
return exactSearch(input, records);
return exactSearch(input, records, options);
} else {
return new Fuse(records, options).search(input).map(result => {
const titleMatch = result.matches.find(e => e.key === 'title');
......@@ -73,13 +73,24 @@ function convertToRanges(matches) {
* @suppress {checkTypes}
* @param {string} searchText
* @param {!Array<!tabSearch.mojom.Tab>} records
* @param {!Object} options
* @return {!Array<!tabSearch.mojom.Tab>}
*/
function exactSearch(searchText, records) {
function exactSearch(searchText, records, options) {
if (searchText.length === 0) {
return records;
}
// Controls how heavily weighted the tab's title is relative to the hostname
// in the scoring function.
const key =
options.keys ? options.keys.find(e => e.name === 'title') : undefined;
const titleToHostnameWeightRatio = key ? key.weight : 1;
// Default distance to calculate score for title/hostname based on match
// position.
const defaultDistance = 200;
const distance = options.distance || defaultDistance;
// Perform an exact match search with range discovery.
const exactMatches = [];
for (let tab of records) {
......@@ -95,15 +106,21 @@ function exactSearch(searchText, records) {
if (hostnameHighlightRanges.length) {
matchedTab.hostnameHighlightRanges = hostnameHighlightRanges;
}
exactMatches.push(matchedTab);
exactMatches.push({
tab: matchedTab,
score: scoringFunction(matchedTab, distance, titleToHostnameWeightRatio)
});
}
// Sort by score.
exactMatches.sort((a, b) => (b.score - a.score));
// Prioritize items.
const itemsMatchingStringStart = [];
const itemsMatchingWordStart = [];
const others = [];
const wordStartRegexp = new RegExp(`\\b${quoteString(searchText)}`, 'i');
for (let tab of exactMatches) {
for (let {tab} of exactMatches) {
// Find matches that occur at the beginning of the string.
if (hasMatchStringStart(tab)) {
itemsMatchingStringStart.push(tab);
......@@ -163,3 +180,31 @@ function getRanges(target, searchText) {
}
return ranges;
}
/**
* A scoring function based on match indices of title and hostname.
* Matches near the beginning of the string will have a higher score than
* matches near the end of the string. Multiple matches will have a higher score
* than single matches.
* @suppress {checkTypes}
* @param {!tabSearch.mojom.Tab} tab
* @param {number} distance
* @param {number} titleToHostnameWeightRatio
*/
function scoringFunction(tab, distance, titleToHostnameWeightRatio) {
let score = 0;
// For every match, map the match index in [0, distance] to a scalar value in
// [1, 0].
if (tab.titleHighlightRanges) {
for (const {start} of tab.titleHighlightRanges) {
score += Math.max((distance - start) / distance, 0) *
titleToHostnameWeightRatio;
}
}
if (tab.hostnameHighlightRanges) {
for (const {start} of tab.hostnameHighlightRanges) {
score += Math.max((distance - start) / distance, 0);
}
}
return score;
}
......@@ -6,6 +6,25 @@ import {fuzzySearch} from 'chrome://tab-search/fuzzy_search.js'
import {assertDeepEquals, assertEquals} from '../../chai_assert.js';
/**
* Assert search results return in specific order.
* TODO(tluk): Fix the typing for tabSearch.mojom.Tab here given we are updating
* the fields on this object ( https://crbug.com/1133558 ).
* @suppress {checkTypes}
* @param {string} input
* @param {!Array<!tabSearch.mojom.Tab>} items
* @param {!Object} options
* @param {!Array<number>} expectedIndices
*/
function assertSearchOrders(input, items, options, expectedIndices) {
const results = fuzzySearch(input, items, options);
assertEquals(results.length, expectedIndices.length);
for (let i = 0; i < results.length; ++i) {
assertEquals(items[expectedIndices[i]].title, results[i].title);
assertEquals(items[expectedIndices[i]].hostname, results[i].hostname);
}
}
suite('FuzzySearchTest', () => {
test('fuzzySearch', () => {
const records = [
......@@ -93,38 +112,37 @@ suite('FuzzySearchTest', () => {
{
title: 'Arch Linux',
hostname: 'www.archlinux.org',
titleHighlightRanges: [ {start: 0, length: 4} ],
hostnameHighlightRanges: [ {start: 4, length: 4} ],
titleHighlightRanges: [{start: 0, length: 4}],
hostnameHighlightRanges: [{start: 4, length: 4}],
},
{
title: 'Arches National Park',
hostname: 'www.nps.gov',
titleHighlightRanges: [ {start: 0, length: 4} ],
titleHighlightRanges: [{start: 0, length: 4}],
},
{
title: 'Chrome Desktop Architecture',
hostname: 'drive.google.com',
titleHighlightRanges: [ {start: 15, length: 4} ],
titleHighlightRanges: [{start: 15, length: 4}],
},
{
title: 'Code Search',
hostname: 'search.chromium.search',
titleHighlightRanges: [ {start: 7, length: 4} ],
titleHighlightRanges: [{start: 7, length: 4}],
hostnameHighlightRanges:
[ {start: 2, length: 4}, {start: 18, length: 4} ],
},
{
title: 'Marching band',
hostname: 'en.marching.band.com',
titleHighlightRanges: [ {start: 1, length: 4} ],
hostnameHighlightRanges: [ {start: 4, length: 4} ]
[{start: 2, length: 4}, {start: 18, length: 4}],
},
{
title: 'Search Engine Land - Search Engines',
hostname: 'searchengineland.com',
titleHighlightRanges:
[ {start: 2, length: 4}, {start: 23, length: 4} ],
hostnameHighlightRanges: [ {start: 2, length: 4} ]
titleHighlightRanges: [{start: 2, length: 4}, {start: 23, length: 4}],
hostnameHighlightRanges: [{start: 2, length: 4}]
},
{
title: 'Marching band',
hostname: 'en.marching.band.com',
titleHighlightRanges: [{start: 1, length: 4}],
hostnameHighlightRanges: [{start: 4, length: 4}]
},
];
......@@ -133,16 +151,15 @@ suite('FuzzySearchTest', () => {
{
title: 'Code Search',
hostname: 'search.chromium.search',
titleHighlightRanges: [ {start: 5, length: 6} ],
titleHighlightRanges: [{start: 5, length: 6}],
hostnameHighlightRanges:
[ {start: 0, length: 6}, {start: 16, length: 6} ],
[{start: 0, length: 6}, {start: 16, length: 6}],
},
{
title: 'Search Engine Land - Search Engines',
hostname: 'searchengineland.com',
titleHighlightRanges:
[ {start: 0, length: 6}, {start: 21, length: 6} ],
hostnameHighlightRanges: [ {start: 0, length: 6} ]
titleHighlightRanges: [{start: 0, length: 6}, {start: 21, length: 6}],
hostnameHighlightRanges: [{start: 0, length: 6}]
},
];
......@@ -194,4 +211,57 @@ suite('FuzzySearchTest', () => {
fuzzySearch('\"end', records, options));
});
test('Test exact match result scoring accounts for match position.', () => {
const options = {
threshold: 0.0,
};
assertSearchOrders(
'two',
[
{title: 'three one two'}, {title: 'three two one'},
{title: 'one two three'}
],
options, [2, 1, 0]);
});
test(
'Test exact match result scoring takes into account the number of matches per item.',
() => {
const options = {
threshold: 0.0,
};
assertSearchOrders(
'one',
[
{title: 'one two three'}, {title: 'one one three'},
{title: 'one one one'}
],
options, [2, 1, 0]);
});
test(
'Test exact match result scoring abides by the titleToHostnameWeightRatio.',
() => {
const options = {
threshold: 0.0,
keys: [
{
name: 'title',
weight: 2,
},
{
name: 'hostname',
weight: 1,
}
]
};
assertSearchOrders(
'search',
[
{hostname: 'chrome://tab-search'}, {title: 'chrome://tab-search'},
{title: 'chrome://tab-search', hostname: 'chrome://tab-search'}
],
options, [2, 1, 0]);
});
});
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment