Commit 121f61fa authored by David Bokan's avatar David Bokan Committed by Commit Bot

Make text fragment matching match spec

The spec was recently updated in
https://github.com/WICG/scroll-to-text-fragment/pull/148 to remove
redundant restrictions on word boundaries. This CL makes the
implementation match these updates and refactors the text fragment
matching algorithm to more closely resemble the spec.

This change actually fixes a few edge-cases. Updated tests to provide
comprehensive coverage of the matching algorithm, linking test cases to
individual algorithm steps.

Bug: 1148178
Change-Id: Ia2621bcdf151e22b4029cfde3dd9e319e5fb281e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2533585
Commit-Queue: David Bokan <bokan@chromium.org>
Reviewed-by: default avatarNick Burris <nburris@chromium.org>
Cr-Commit-Position: refs/heads/master@{#827079}
parent e46f248b
......@@ -16,6 +16,7 @@
#include "third_party/blink/renderer/core/editing/finder/find_options.h"
#include "third_party/blink/renderer/core/editing/iterators/character_iterator.h"
#include "third_party/blink/renderer/core/editing/position.h"
#include "third_party/blink/renderer/core/editing/position_iterator.h"
#include "third_party/blink/renderer/core/html/list_item_ordinal.h"
#include "third_party/blink/renderer/core/page/scrolling/text_fragment_selector.h"
#include "third_party/blink/renderer/platform/text/text_boundaries.h"
......@@ -24,17 +25,18 @@ namespace blink {
namespace {
const char kNoContext[] = "";
// Determines whether the start and end positions of |range| are on word
// boundaries.
// TODO(crbug/924965): Determine how this should check node boundaries. This
// treats node boundaries as word boundaries, for example "o" is a whole word
// match in "f<i>o</i>o".
bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
// Determines whether the |start| and/or |end| positions of |range| are on a
// word boundaries.
bool IsWordBounded(EphemeralRangeInFlatTree range, bool start, bool end) {
if (!start && !end)
return true;
wtf_size_t start_position = range.StartPosition().OffsetInContainerNode();
if (start_position != 0) {
if (start_position != 0 && start) {
String start_text = range.StartPosition().AnchorNode()->textContent();
start_text.Ensure16Bit();
wtf_size_t word_start = FindWordStartBoundary(
......@@ -46,7 +48,7 @@ bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
wtf_size_t end_position = range.EndPosition().OffsetInContainerNode();
String end_text = range.EndPosition().AnchorNode()->textContent();
if (end_position != end_text.length()) {
if (end_position != end_text.length() && end) {
end_text.Ensure16Bit();
// We expect end_position to be a word boundary, and FindWordEndBoundary
// finds the next word boundary, so start from end_position - 1.
......@@ -59,15 +61,44 @@ bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
return true;
}
PositionInFlatTree FirstWordBoundaryAfter(PositionInFlatTree position) {
wtf_size_t offset = position.OffsetInContainerNode();
String text = position.AnchorNode()->textContent();
if (offset == text.length()) {
PositionIteratorInFlatTree itr(position);
if (itr.AtEnd())
return position;
itr.Increment();
return itr.ComputePosition();
}
text.Ensure16Bit();
wtf_size_t word_end =
FindWordEndBoundary(text.Characters16(), text.length(), offset);
PositionInFlatTree end_pos(position.AnchorNode(), word_end);
PositionIteratorInFlatTree itr(end_pos);
if (itr.AtEnd())
return end_pos;
itr.Increment();
return itr.ComputePosition();
}
EphemeralRangeInFlatTree FindMatchInRange(String search_text,
PositionInFlatTree search_start,
PositionInFlatTree search_end) {
PositionInFlatTree search_end,
bool word_start_bounded,
bool word_end_bounded) {
while (search_start < search_end) {
const EphemeralRangeInFlatTree search_range(search_start, search_end);
EphemeralRangeInFlatTree potential_match = FindBuffer::FindMatchInRange(
search_range, search_text, kCaseInsensitive);
if (potential_match.IsNull() || IsWholeWordMatch(potential_match))
if (potential_match.IsNull() ||
IsWordBounded(potential_match, word_start_bounded, word_end_bounded))
return potential_match;
search_start = potential_match.EndPosition();
......@@ -90,39 +121,9 @@ PositionInFlatTree NextTextPosition(PositionInFlatTree position,
return end_position;
}
// Find and return the range of |search_text| if the first text in the search
// range is |search_text|, skipping over whitespace and element boundaries.
EphemeralRangeInFlatTree FindImmediateMatch(String search_text,
PositionInFlatTree search_start,
PositionInFlatTree search_end) {
if (search_text.IsEmpty())
return EphemeralRangeInFlatTree();
search_start = NextTextPosition(search_start, search_end);
if (search_start == search_end)
return EphemeralRangeInFlatTree();
FindBuffer buffer(EphemeralRangeInFlatTree(search_start, search_end));
// TODO(nburris): FindBuffer will search the rest of the document for a match,
// but we only need to check for an immediate match, so we should stop
// searching if there's no immediate match.
FindBuffer::Results match_results =
buffer.FindMatches(search_text, kCaseInsensitive);
if (!match_results.IsEmpty() && match_results.front().start == 0u) {
FindBuffer::BufferMatchResult buffer_match = match_results.front();
EphemeralRangeInFlatTree match = buffer.RangeFromBufferIndex(
buffer_match.start, buffer_match.start + buffer_match.length);
if (IsWholeWordMatch(match))
return match;
}
return EphemeralRangeInFlatTree();
}
EphemeralRangeInFlatTree FindMatchInRangeWithContext(
const String& search_text,
const String& start_text,
const String& end_text,
const String& prefix,
const String& suffix,
PositionInFlatTree search_start,
......@@ -131,43 +132,99 @@ EphemeralRangeInFlatTree FindMatchInRangeWithContext(
EphemeralRangeInFlatTree potential_match;
if (!prefix.IsEmpty()) {
EphemeralRangeInFlatTree prefix_match =
FindMatchInRange(prefix, search_start, search_end);
EphemeralRangeInFlatTree prefix_match = FindMatchInRange(
prefix, search_start, search_end, /*word_start_bounded=*/true,
/*word_end_bounded=*/false);
// No prefix_match in remaining range
if (prefix_match.IsNull())
return EphemeralRangeInFlatTree();
search_start = prefix_match.EndPosition();
potential_match =
FindImmediateMatch(search_text, search_start, search_end);
// No search_text match after current prefix_match
// If we iterate again, start searching from the first boundary after the
// prefix start (since prefix must start at a boundary). Note, we don't
// advance to the prefix end; this is done since, if this prefix isn't
// the one we're looking for, the next occurrence might be overlapping
// with the current one. e.g. If |prefix| is "a a" and our search range
// currently starts with "a a a b...", the next iteration should start at
// the second a which is part of the current |prefix_match|.
search_start = FirstWordBoundaryAfter(prefix_match.StartPosition());
EphemeralRangeInFlatTree match_range(
NextTextPosition(prefix_match.EndPosition(), search_end), search_end);
// The match text need not be bounded at the end. If this is an exact
// match (i.e. no |end_text|) and we have a suffix then the suffix will
// be required to end on the word boundary instead. Since we have a
// prefix, we don't need the match to be word bounded. See
// https://github.com/WICG/scroll-to-text-fragment/issues/137 for
// details.
const bool end_at_word_boundary = !end_text.IsEmpty() || suffix.IsEmpty();
potential_match = FindMatchInRange(
start_text, match_range.StartPosition(), match_range.EndPosition(),
/*word_start_bounded=*/false, end_at_word_boundary);
// No start_text match after current prefix_match
if (potential_match.IsNull())
return EphemeralRangeInFlatTree();
// We found a potential match but it didn't immediately follow the prefix.
if (potential_match.StartPosition() != match_range.StartPosition())
continue;
} else {
potential_match = FindMatchInRange(search_text, search_start, search_end);
const bool end_at_word_boundary = !end_text.IsEmpty() || suffix.IsEmpty();
// No search_text match in remaining range
potential_match =
FindMatchInRange(start_text, search_start, search_end,
/*word_start_bounded=*/true, end_at_word_boundary);
// No start_text match in remaining range
if (potential_match.IsNull())
return EphemeralRangeInFlatTree();
search_start = potential_match.EndPosition();
search_start = FirstWordBoundaryAfter(potential_match.StartPosition());
}
PositionInFlatTree suffix_start = potential_match.EndPosition();
DCHECK(potential_match.IsNotNull());
if (!suffix.IsEmpty()) {
EphemeralRangeInFlatTree suffix_match =
FindImmediateMatch(suffix, suffix_start, search_end);
// If we've gotten here, we've found a |prefix| (if one was specified)
// that's followed by the |start_text|. We'll now try to expand that into a
// range match if |end_text| is specified.
if (!end_text.IsEmpty()) {
EphemeralRangeInFlatTree text_end_range(potential_match.EndPosition(),
search_end);
const bool end_at_word_boundary = suffix.IsEmpty();
// No suffix match after current potential_match
if (suffix_match.IsNull())
continue;
EphemeralRangeInFlatTree text_end_match =
FindMatchInRange(end_text, text_end_range.StartPosition(),
text_end_range.EndPosition(),
/*word_start_bounded=*/true, end_at_word_boundary);
if (text_end_match.IsNull())
return EphemeralRangeInFlatTree();
potential_match = EphemeralRangeInFlatTree(
potential_match.StartPosition(), text_end_match.EndPosition());
}
// If we reach here without a return or continue, we have a full match.
return potential_match;
DCHECK(!potential_match.IsNull());
if (suffix.IsEmpty())
return potential_match;
// Now we just have to ensure the match is followed by the |suffix|.
EphemeralRangeInFlatTree suffix_range(
NextTextPosition(potential_match.EndPosition(), search_end),
search_end);
EphemeralRangeInFlatTree suffix_match = FindMatchInRange(
suffix, suffix_range.StartPosition(), suffix_range.EndPosition(),
/*word_start_bounded=*/false, /*word_end_bounded=*/true);
// If no suffix appears in what follows the match, there's no way we can
// possibly satisfy the constraints so bail.
if (suffix_match.IsNull())
return EphemeralRangeInFlatTree();
if (suffix_match.StartPosition() == suffix_range.StartPosition())
return potential_match;
}
return EphemeralRangeInFlatTree();
......@@ -256,34 +313,9 @@ EphemeralRangeInFlatTree TextFragmentFinder::FindMatchFromPosition(
search_end = PositionInFlatTree::LastPositionInNode(document);
}
// TODO(crbug.com/930156): Make FindMatch work asynchronously.
EphemeralRangeInFlatTree match;
if (selector_.Type() == TextFragmentSelector::kExact) {
match = FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
selector_.Suffix(), search_start,
search_end);
} else {
EphemeralRangeInFlatTree start_match =
FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
kNoContext, search_start, search_end);
if (start_match.IsNull())
return start_match;
// TODO(crbug.com/924964): Determine what we should do if the start text and
// end text are the same (and there are no context terms). This
// implementation continues searching for the next instance of the text,
// from the end of the first instance.
search_start = start_match.EndPosition();
EphemeralRangeInFlatTree end_match = FindMatchInRangeWithContext(
selector_.End(), kNoContext, selector_.Suffix(), search_start,
search_end);
if (end_match.IsNotNull()) {
match = EphemeralRangeInFlatTree(start_match.StartPosition(),
end_match.EndPosition());
}
}
return match;
return FindMatchInRangeWithContext(selector_.Start(), selector_.End(),
selector_.Prefix(), selector_.Suffix(),
search_start, search_end);
}
} // namespace blink
<!doctype html>
<title>Tests find-a-range-from-a-text-directive algorithm</title>
<script src="stash.js"></script>
<script>
window.didScroll = false;
function checkScroll() {
let results = {
didScroll: window.scrollY != 0
};
let key = (new URL(document.location)).searchParams.get("key");
stashResultsThenClose(key, results);
}
// Ensure two animation frames on load to test the fallback to element anchor,
// which gets queued for the next frame if the text fragment is not found.
window.onload = function() {
window.requestAnimationFrame(function() {
window.requestAnimationFrame(checkScroll);
});
}
</script>
<style>
.spacer {
width: 50vw;
height: 200vh;
}
</style>
<body>
<div class="spacer"></div>
<p>
The quick brown fox jumped over the lazy dog.
a a b b b c
</p>
<p>
foo foo foo bar bar bar
</p>
<p>
Lorem
<br> <i> </i>
<div> &nbsp </div>
<!-- This <p> puts lots of non textual stuff between words -->
<div style="display: none">This isn't rendered</div>
<div style="visibility: hidden">This also isn't visible</div>
<iframe srcdoc="Inner Iframe"></iframe>
<img src="">
&nbsp; &nbsp Ipsum
Whitespace
<br> <i> </i>
<div> &nbsp </div>
<!-- This <p> puts lots of non textual stuff between words -->
<div style="display: none">This isn't rendered</div>
<div style="visibility: hidden">This also isn't visible</div>
<iframe srcdoc="Inner Iframe"></iframe>
<img src="">
&nbsp; &nbsp
Dipsum
</p>
<p>
This text appears at the end of the document
</p
</body>
<!doctype html>
<title>Tests find-a-range-from-a-text-directive algorithm</title>
<meta charset=utf-8>
<link rel="help" href="https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-text-directive">
<meta name="timeout" content="long">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script src="/resources/testdriver.js"></script>
<script src="/resources/testdriver-vendor.js"></script>
<script src="/common/utils.js"></script>
<script src="stash.js"></script>
<!--
This test suite performs scroll to text navigations to
find-range-from-text-directive-target.html and then checks the results, which are
communicated back from the target page via the WPT Stash server (see stash.py).
This structure is necessary because scroll to text security restrictions
specifically restrict the navigator from being able to observe the result of
the navigation, e.g. the target page cannot have a window opener.
-->
<script>
// This test suite specifically exercises the "find a range from a text
// directive" algorithm in the spec.
let test_cases = [
{
fragment: '#:~:text=jumped',
expect_to_scroll: true,
description: 'Basic smoke test - full word match'
},
{
// Step 2.2.1
fragment: '#:~:text=u-,mped',
expect_to_scroll: false,
description: 'Prefix must start on a word boundary'
},
{
// Step 2.2.1
fragment: '#:~:text=ju-,mped',
expect_to_scroll: true,
description: 'Prefix need not end on a word boundary'
},
{
// Step 2.2.2
fragment: '#:~:text=null-,The%20quick',
expect_to_scroll: false,
description: 'Prefix doesn\'t exist'
},
{
// Step 2.2.3
fragment: '#:~:text=foo%20foo-,bar',
expect_to_scroll: true,
description: 'Multiple overlapping prefixes'
},
{
// Step 2.2.3
fragment: '#:~:text=a%20a-,b',
expect_to_scroll: true,
description: 'Multiple overlapping one letter prefixes'
},
{
// Step 2.2.4
fragment: '#:~:text=quick%20brown-,brown%20fox',
expect_to_scroll: false,
description: 'Prefix overlaps match text'
},
{
// Step 2.2.4
fragment: '#:~:text=quick%20brown-,fox',
expect_to_scroll: true,
description: 'Match text after prefix'
},
{
// Step 2.2.5
fragment: '#:~:text=Lorem-,Ipsum',
expect_to_scroll: true,
description: 'Search invisible content between prefix and match'
},
{
// Step 2.2.6
fragment: '#:~:text=end%20of%20the%20document-,test',
expect_to_scroll: false,
description: 'Prefix appears at end of document'
},
{
// Step 2.2.8
fragment: '#:~:text=fox-,jum,over',
expect_to_scroll: false,
description: '|end| forces |start| to end on word boundary'
},
{
// Step 2.2.8
fragment: '#:~:text=fox-,jum',
expect_to_scroll: false,
description: 'no |end| or suffix forces |start| to end on word boundary'
},
{
// Step 2.2.8
fragment: '#:~:text=fox-,jum,-ped',
expect_to_scroll: true,
description: 'suffix means |start| need not end on word boundary'
},
{
// Step 2.2.9
fragment: '#:~:text=jum-,ped',
expect_to_scroll: true,
description: '|start| doesn\'t need to start on word boundary'
},
{
// Step 2.2.10
fragment: '#:~:text=jumped-,null',
expect_to_scroll: false,
description: 'prefix with non-existent exact match'
},
{
// Step 2.2.10
fragment: '#:~:text=jumped-,null,lazy',
expect_to_scroll: false,
description: 'prefix with non-existent range match'
},
{
// Step 2.2.11
fragment: '#:~:text=brown-,jumped',
expect_to_scroll: false,
description: 'match doesn\'t immediately follow prefix'
},
{
// Step 2.2.11
fragment: '#:~:text=foo-,bar',
expect_to_scroll: true,
description: 'match doesn\'t immediately follow first prefix instance'
},
{
// Step 2.3.1
fragment: '#:~:text=jum,over',
expect_to_scroll: false,
description: 'no-prefix; |end| forces |start| to end on word boundary'
},
{
// Step 2.3.1
fragment: '#:~:text=jum',
expect_to_scroll: false,
description: 'no-prefix; no |end| or suffix forces |start| to end on word boundary'
},
{
// Step 2.3.1
fragment: '#:~:text=jum,-ped',
expect_to_scroll: true,
description: 'no-prefix; suffix means |start| need not end on word boundary'
},
{
// Step 2.3.2
fragment: '#:~:text=umped',
expect_to_scroll: false,
description: '|start| must start on a word boundary'
},
{
// Step 2.3.3
fragment: '#:~:text=null',
expect_to_scroll: false,
description: 'non-existent exact match'
},
{
// Step 2.3.3
fragment: '#:~:text=null,lazy',
expect_to_scroll: false,
description: 'non-existent range match'
},
{
// Step 2.3.4
fragment: '#:~:text=b%20b,-c',
expect_to_scroll: true,
description: 'overlapping exact matches with suffix'
},
{
// Step 2.3.4
fragment: '#:~:text=foo%20foo,-bar',
expect_to_scroll: true,
description: 'overlapping one letter exact matches with suffix'
},
{
// Step 2.4.1
fragment: '#:~:text=brown,fox',
expect_to_scroll: true,
description: 'matching range search'
},
{
// Step 2.4.1
fragment: '#:~:text=brown,quick',
expect_to_scroll: false,
description: 'inverted range search'
},
{
// Step 2.4.2
fragment: '#:~:text=quick,bro',
expect_to_scroll: false,
description: 'no suffix forces |end| to be end bounded'
},
{
// Step 2.4.2
fragment: '#:~:text=quick,bro,-wn',
expect_to_scroll: true,
description: 'suffix means |end| need not be end bounded'
},
{
// Step 2.4.3
fragment: '#:~:text=quick,ro,-wn',
expect_to_scroll: false,
description: '|end| must be start bounded'
},
{
// Step 2.4.3
fragment: '#:~:text=bro,wn',
expect_to_scroll: false,
description: '|end| must be start bounded even if full range is word bounded'
},
{
// Step 2.4.4
fragment: '#:~:text=quick,null',
expect_to_scroll: false,
description: 'non-existent |end|'
},
{
// Step 2.4.5
fragment: '#:~:text=quick,jumped,-fox',
expect_to_scroll: false,
description: 'Range with preceeding suffix'
},
{
// Step 2.6
fragment: '#:~:text=The-,quick,brown',
expect_to_scroll: true,
description: 'Match with no suffix'
},
{
// Step 2.7
fragment: '#:~:text=The-,quick,fox,-brown',
expect_to_scroll: false,
description: 'Suffix comes before |end|'
},
{
// Step 2.8
fragment: '#:~:text=Lorem-,Ipsum,Whitespace,-Dipsum',
expect_to_scroll: true,
description: 'Search invisible content between |end| and suffix'
},
{
// Step 2.9
fragment: '#:~:text=quick,-bro',
expect_to_scroll: false,
description: 'Suffix must be end bounded'
},
{
// Step 2.9
fragment: '#:~:text=qu,-ick',
expect_to_scroll: true,
description: 'Suffix need not be start bounded'
},
{
// Step 2.10
fragment: '#:~:text=quick,-null',
expect_to_scroll: false,
description: 'Non-existent suffix'
},
{
// Step 2.11
fragment: '#:~:text=quick,-fox',
expect_to_scroll: false,
description: 'Content appears between match and suffix'
}
];
for (const test_case of test_cases) {
promise_test(t => new Promise((resolve, reject) => {
let key = token();
test_driver.bless('Open a URL with a text fragment directive', () => {
window.open(`find-range-from-text-directive-target.html?key=${key}${test_case.fragment}`, '_blank', 'noopener');
});
fetchResults(key, resolve, reject);
}).then(data => {
const expectation_string = test_case.expect_to_scroll ? 'to scroll' : 'not to scroll';
assert_equals(data.didScroll, test_case.expect_to_scroll,
`Expected ${expectation_string}`);
}), `${test_case.description}.`);
}
</script>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment