Commit 55a477b6 authored by Nick Burris's avatar Nick Burris Committed by Commit Bot

Implement word boundary matching

Ensure a match is bounded by word boundaries. This implementation uses a
similar approach to providing FindOptions::kWholeWord to
FindBuffer::FindMatchInRange, however that option requires the search
text to be a single word.

Added tests and a web platform test case.

Bug: 924965
Change-Id: Iaa6785181eb29f0d25f64026c3be05d095ace3ad
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1803723
Commit-Queue: Nick Burris <nburris@chromium.org>
Reviewed-by: default avatarDavid Bokan <bokan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#697244}
parent b2775899
...@@ -1319,6 +1319,102 @@ TEST_F(TextFragmentAnchorTest, NoMatchFoundFallsBackToElementFragment) { ...@@ -1319,6 +1319,102 @@ TEST_F(TextFragmentAnchorTest, NoMatchFoundFallsBackToElementFragment) {
<< LayoutViewport()->GetScrollOffset().ToString(); << LayoutViewport()->GetScrollOffset().ToString();
} }
// Test that we don't match partial words at the beginning or end of the text.
TEST_F(TextFragmentAnchorTest, CheckForWordBoundary) {
SimRequest request(
"https://example.com/"
"test.html#targetText=This%20is%20a%20te&tagetText=st%20page",
"text/html");
LoadURL(
"https://example.com/"
"test.html#targetText=This%20is%20a%20te&tagetText=st%20page");
request.Complete(R"HTML(
<!DOCTYPE html>
<style>
body {
height: 1200px;
}
p {
position: absolute;
top: 1000px;
}
</style>
<p id="text">This is a test page</p>
)HTML");
Compositor().BeginFrame();
RunAsyncMatchingTasks();
EXPECT_EQ(ScrollOffset(), LayoutViewport()->GetScrollOffset());
EXPECT_TRUE(GetDocument().Markers().Markers().IsEmpty());
}
// Test that we don't match partial words with context
TEST_F(TextFragmentAnchorTest, CheckForWordBoundaryWithContext) {
SimRequest request("https://example.com/test.html#targetText=est-,page",
"text/html");
LoadURL("https://example.com/test.html#targetText=est-,page");
request.Complete(R"HTML(
<!DOCTYPE html>
<style>
body {
height: 1200px;
}
p {
position: absolute;
top: 1000px;
}
</style>
<p id="text">This is a test page</p>
)HTML");
Compositor().BeginFrame();
RunAsyncMatchingTasks();
EXPECT_EQ(ScrollOffset(), LayoutViewport()->GetScrollOffset());
EXPECT_TRUE(GetDocument().Markers().Markers().IsEmpty());
}
// Test that we correctly match a whole word when it appears as a partial word
// earlier in the page.
TEST_F(TextFragmentAnchorTest, CheckForWordBoundaryWithPartialWord) {
SimRequest request("https://example.com/test.html#targetText=tes,age",
"text/html");
LoadURL("https://example.com/test.html#targetText=tes,age");
request.Complete(R"HTML(
<!DOCTYPE html>
<style>
body {
height: 1200px;
}
#first {
position: absolute;
top: 1000px;
}
#second {
position: absolute;
top: 2000px;
}
</style>
<p id="first">This is a test page</p>
<p id="second">This is a tes age</p>
)HTML");
Compositor().BeginFrame();
RunAsyncMatchingTasks();
Element& p = *GetDocument().getElementById("second");
EXPECT_TRUE(ViewportRect().Contains(BoundingRectInFrame(p)))
<< "Should have scrolled <p> into view but didn't, scroll offset: "
<< LayoutViewport()->GetScrollOffset().ToString();
// Expect marker on only "tes age"
EXPECT_EQ(1u, GetDocument().Markers().Markers().size());
DocumentMarkerVector markers = GetDocument().Markers().MarkersFor(
*To<Text>(p.firstChild()), DocumentMarker::MarkerTypes::TextMatch());
ASSERT_EQ(1u, markers.size());
EXPECT_EQ(10u, markers.at(0)->StartOffset());
EXPECT_EQ(17u, markers.at(0)->EndOffset());
}
} // namespace } // namespace
} // namespace blink } // namespace blink
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "third_party/blink/renderer/core/editing/iterators/character_iterator.h" #include "third_party/blink/renderer/core/editing/iterators/character_iterator.h"
#include "third_party/blink/renderer/core/editing/position.h" #include "third_party/blink/renderer/core/editing/position.h"
#include "third_party/blink/renderer/core/page/scrolling/text_fragment_selector.h" #include "third_party/blink/renderer/core/page/scrolling/text_fragment_selector.h"
#include "third_party/blink/renderer/platform/text/text_boundaries.h"
namespace blink { namespace blink {
...@@ -22,12 +23,54 @@ namespace { ...@@ -22,12 +23,54 @@ namespace {
const char kNoContext[] = ""; const char kNoContext[] = "";
// Determines whether the start and end positions of |range| are on word
// boundaries.
// TODO(crbug/924965): Determine how this should check node boundaries. This
// treats node boundaries as word boundaries, for example "o" is a whole word
// match in "f<i>o</i>o".
bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
wtf_size_t start_position = range.StartPosition().OffsetInContainerNode();
if (start_position != 0) {
String start_text = range.StartPosition().AnchorNode()->textContent();
start_text.Ensure16Bit();
wtf_size_t word_start = FindWordStartBoundary(
start_text.Characters16(), start_text.length(), start_position);
if (word_start != start_position)
return false;
}
wtf_size_t end_position = range.EndPosition().OffsetInContainerNode();
String end_text = range.EndPosition().AnchorNode()->textContent();
if (end_position != end_text.length()) {
end_text.Ensure16Bit();
// We expect end_position to be a word boundary, and FindWordEndBoundary
// finds the next word boundary, so start from end_position - 1.
wtf_size_t word_end = FindWordEndBoundary(
end_text.Characters16(), end_text.length(), end_position - 1);
if (word_end != end_position)
return false;
}
return true;
}
EphemeralRangeInFlatTree FindMatchInRange(String search_text, EphemeralRangeInFlatTree FindMatchInRange(String search_text,
PositionInFlatTree search_start, PositionInFlatTree search_start,
PositionInFlatTree search_end) { PositionInFlatTree search_end) {
const EphemeralRangeInFlatTree search_range(search_start, search_end); while (search_start < search_end) {
return FindBuffer::FindMatchInRange(search_range, search_text, const EphemeralRangeInFlatTree search_range(search_start, search_end);
kCaseInsensitive); EphemeralRangeInFlatTree potential_match = FindBuffer::FindMatchInRange(
search_range, search_text, kCaseInsensitive);
if (potential_match.IsNull() || IsWholeWordMatch(potential_match))
return potential_match;
search_start = potential_match.EndPosition();
}
return EphemeralRangeInFlatTree();
} }
PositionInFlatTree NextTextPosition(PositionInFlatTree position, PositionInFlatTree NextTextPosition(PositionInFlatTree position,
...@@ -58,12 +101,18 @@ EphemeralRangeInFlatTree FindImmediateMatch(String search_text, ...@@ -58,12 +101,18 @@ EphemeralRangeInFlatTree FindImmediateMatch(String search_text,
FindBuffer buffer(EphemeralRangeInFlatTree(search_start, search_end)); FindBuffer buffer(EphemeralRangeInFlatTree(search_start, search_end));
// TODO(nburris): FindBuffer will search the rest of the document for a match,
// but we only need to check for an immediate match, so we should stop
// searching if there's no immediate match.
std::unique_ptr<FindBuffer::Results> match_results = std::unique_ptr<FindBuffer::Results> match_results =
buffer.FindMatches(search_text, kCaseInsensitive); buffer.FindMatches(search_text, kCaseInsensitive);
if (!match_results->IsEmpty() && match_results->front().start == 0u) { if (!match_results->IsEmpty() && match_results->front().start == 0u) {
FindBuffer::BufferMatchResult match = match_results->front(); FindBuffer::BufferMatchResult buffer_match = match_results->front();
return buffer.RangeFromBufferIndex(match.start, match.start + match.length); EphemeralRangeInFlatTree match = buffer.RangeFromBufferIndex(
buffer_match.start, buffer_match.start + buffer_match.length);
if (IsWholeWordMatch(match))
return match;
} }
return EphemeralRangeInFlatTree(); return EphemeralRangeInFlatTree();
......
...@@ -16,6 +16,7 @@ let test_cases = [ ...@@ -16,6 +16,7 @@ let test_cases = [
{ fragment: '##targetText=this,test,-page', expect_position: 'text' }, { fragment: '##targetText=this,test,-page', expect_position: 'text' },
{ fragment: '##targetText=this%20is%20a%20test%20page', expect_position: 'text' }, { fragment: '##targetText=this%20is%20a%20test%20page', expect_position: 'text' },
{ fragment: '##targetText=this&targetText=test,page', expect_position: 'text' }, { fragment: '##targetText=this&targetText=test,page', expect_position: 'text' },
{ fragment: '##targetText=tes&targetText=age', expect_position: 'top' },
{ fragment: '#pagestate##targetText=test', expect_position: 'text' }, { fragment: '#pagestate##targetText=test', expect_position: 'text' },
{ fragment: '#pagestate##targetText=nomatch', expect_position: 'top' }, { fragment: '#pagestate##targetText=nomatch', expect_position: 'top' },
{ fragment: '#element##targetText=nomatch', expect_position: 'element' }, { fragment: '#element##targetText=nomatch', expect_position: 'element' },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment