Commit 47080ffd authored by Yoshifumi Inoue's avatar Yoshifumi Inoue Committed by Commit Bot

Introduce FindWord{Back,For}ward()

This patch introduces |FindWordBackward()| and |FindWordForward()| for a
preparation of the patch[1].

[1] http://crrev.com/c/737981 Simplify word granularity handling

Bug: 778507
Change-Id: I405683cd07a0dc3a5f8cc26c307e1d980dcb2961
Reviewed-on: https://chromium-review.googlesource.com/970191
Commit-Queue: Yoshifumi Inoue <yosin@chromium.org>
Reviewed-by: default avatarDominik Röttsches <drott@chromium.org>
Reviewed-by: default avatarKoji Ishii <kojii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#545030}
parent 041865f6
...@@ -1914,6 +1914,7 @@ jumbo_source_set("blink_platform_unittests_sources") { ...@@ -1914,6 +1914,7 @@ jumbo_source_set("blink_platform_unittests_sources") {
"text/PlatformLocaleTest.cpp", "text/PlatformLocaleTest.cpp",
"text/SegmentedStringTest.cpp", "text/SegmentedStringTest.cpp",
"text/SuffixTreeTest.cpp", "text/SuffixTreeTest.cpp",
"text/TextBoundariesTest.cpp",
"text/TextBreakIteratorTest.cpp", "text/TextBreakIteratorTest.cpp",
"text/TextEncodingDetectorTest.cpp", "text/TextEncodingDetectorTest.cpp",
"text/TextRunTest.cpp", "text/TextRunTest.cpp",
......
...@@ -89,6 +89,38 @@ int FindNextWordBackward(const UChar* chars, int len, int position) { ...@@ -89,6 +89,38 @@ int FindNextWordBackward(const UChar* chars, int len, int position) {
return 0; return 0;
} }
std::pair<int, int> FindWordBackward(const UChar* chars,
int len,
int position) {
DCHECK_GE(len, 0);
DCHECK_LE(position, len);
if (len == 0)
return {0, 0};
TextBreakIterator* it = WordBreakIterator(chars, len);
const int start = it->preceding(position);
const int end = it->next();
if (start < 0) {
// There are no words at |position|.
return {0, 0};
}
return {start, end};
}
std::pair<int, int> FindWordForward(const UChar* chars, int len, int position) {
DCHECK_GE(len, 0);
DCHECK_LE(position, len);
if (len == 0)
return {0, 0};
TextBreakIterator* it = WordBreakIterator(chars, len);
const int end = it->following(position);
const int start = it->previous();
if (end < 0) {
// There are no words at |position|.
return {len, len};
}
return {start, end};
}
int FindWordStartBoundary(const UChar* chars, int len, int position) { int FindWordStartBoundary(const UChar* chars, int len, int position) {
TextBreakIterator* it = WordBreakIterator(chars, len); TextBreakIterator* it = WordBreakIterator(chars, len);
it->following(position); it->following(position);
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#ifndef TextBoundaries_h #ifndef TextBoundaries_h
#define TextBoundaries_h #define TextBoundaries_h
#include <utility>
#include "platform/PlatformExport.h" #include "platform/PlatformExport.h"
#include "platform/wtf/text/Unicode.h" #include "platform/wtf/text/Unicode.h"
...@@ -42,6 +44,12 @@ PLATFORM_EXPORT int StartOfLastWordBoundaryContext(const UChar* characters, ...@@ -42,6 +44,12 @@ PLATFORM_EXPORT int StartOfLastWordBoundaryContext(const UChar* characters,
// |UChar*| should be a string in logical order instead of visual order, since // |UChar*| should be a string in logical order instead of visual order, since
// |FindWordBoundary()| uses ICU, which works on logical order strings // |FindWordBoundary()| uses ICU, which works on logical order strings
PLATFORM_EXPORT std::pair<int, int> FindWordBackward(const UChar*,
int len,
int position);
PLATFORM_EXPORT std::pair<int, int> FindWordForward(const UChar*,
int len,
int position);
PLATFORM_EXPORT int FindWordStartBoundary(const UChar*, int len, int position); PLATFORM_EXPORT int FindWordStartBoundary(const UChar*, int len, int position);
PLATFORM_EXPORT int FindWordEndBoundary(const UChar*, int len, int position); PLATFORM_EXPORT int FindWordEndBoundary(const UChar*, int len, int position);
PLATFORM_EXPORT int FindNextWordBackward(const UChar*, int len, int position); PLATFORM_EXPORT int FindNextWordBackward(const UChar*, int len, int position);
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string>
#include "platform/text/TextBoundaries.h"
#include "platform/wtf/text/StringBuilder.h"
#include "platform/wtf/text/WTFString.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace blink {
class TextBoundariesTest : public ::testing::Test {};
namespace {
std::pair<String, int> ParsePositionMarker(const std::string input8) {
String input16 = String::FromUTF8(input8.data(), input8.size());
input16.Ensure16Bit();
const size_t position = input16.find('|');
DCHECK(position != kNotFound) << input8 << " should have position marker(|).";
String text16 = input16.Left(position);
text16.append(input16.Substring(position + 1));
text16.Ensure16Bit();
return {text16, position};
}
std::string MakeResultText(const String& text, int start, int end) {
StringBuilder builder;
if (start < 0 && end < 0) {
builder.Append(text);
} else if (start < 0) {
builder.Append(text.Left(end));
builder.Append('^');
builder.Append(text.Substring(end));
} else if (end < 0) {
builder.Append(text.Left(start));
builder.Append('|');
builder.Append(text.Substring(start));
} else {
builder.Append(text.Left(start));
builder.Append('^');
builder.Append(text.Substring(start, end - start));
builder.Append('|');
}
builder.Append(text.Substring(end));
const CString result8 = builder.ToString().Utf8();
return std::string(result8.data(), result8.length());
}
// Returns word boundray with start(^) and end(|) markes from text with
// position(|) marker.
std::string TryFindWordBackward(const std::string input8) {
std::pair<String, int> string_and_offset = ParsePositionMarker(input8);
const String text16 = string_and_offset.first;
const int position = string_and_offset.second;
std::pair<int, int> start_and_end =
FindWordBackward(text16.Characters16(), text16.length(), position);
return MakeResultText(text16, start_and_end.first, start_and_end.second);
}
// Returns word boundray with start(^) and end(|) markes from text with
// position(|) marker.
std::string TryFindWordForward(const std::string input8) {
std::pair<String, int> string_and_offset = ParsePositionMarker(input8);
const String text16 = string_and_offset.first;
const int position = string_and_offset.second;
std::pair<int, int> start_and_end =
FindWordForward(text16.Characters16(), text16.length(), position);
return MakeResultText(text16, start_and_end.first, start_and_end.second);
}
} // namespace
TEST_F(TextBoundariesTest, BackwardBasic) {
EXPECT_EQ("^|abc def", TryFindWordBackward("|abc def"));
EXPECT_EQ("^abc| def", TryFindWordBackward("a|bc def"));
EXPECT_EQ("^abc| def", TryFindWordBackward("ab|c def"));
EXPECT_EQ("^abc| def", TryFindWordBackward("abc| def"));
EXPECT_EQ("abc^ |def", TryFindWordBackward("abc |def"));
EXPECT_EQ("abc ^def|", TryFindWordBackward("abc d|ef"));
EXPECT_EQ("abc ^def|", TryFindWordBackward("abc de|f"));
EXPECT_EQ("abc ^def|", TryFindWordBackward("abc def|"));
}
TEST_F(TextBoundariesTest, ForwardBasic) {
EXPECT_EQ("^abc| def", TryFindWordForward("|abc def"));
EXPECT_EQ("^abc| def", TryFindWordForward("a|bc def"));
EXPECT_EQ("^abc| def", TryFindWordForward("ab|c def"));
EXPECT_EQ("abc^ |def", TryFindWordForward("abc| def"));
EXPECT_EQ("abc ^def|", TryFindWordForward("abc |def"));
EXPECT_EQ("abc ^def|", TryFindWordForward("abc d|ef"));
EXPECT_EQ("abc ^def|", TryFindWordForward("abc de|f"));
EXPECT_EQ("abc def^|", TryFindWordForward("abc def|"));
}
TEST_F(TextBoundariesTest, ForwardBiDi) {
EXPECT_EQ(u8"^\u0620\u0621\u0622| \u0623\u0624\u0625",
TryFindWordForward(u8"|\u0620\u0621\u0622 \u0623\u0624\u0625"));
EXPECT_EQ(u8"^\u0620\u0621\u0622| \u0623\u0624\u0625",
TryFindWordForward(u8"\u0620|\u0621\u0622 \u0623\u0624\u0625"));
EXPECT_EQ(u8"^\u0620\u0621\u0622| \u0623\u0624\u0625",
TryFindWordForward(u8"\u0620\u0621|\u0622 \u0623\u0624\u0625"));
EXPECT_EQ(u8"\u0620\u0621\u0622^ |\u0623\u0624\u0625",
TryFindWordForward(u8"\u0620\u0621\u0622| \u0623\u0624\u0625"));
EXPECT_EQ(u8"\u0620\u0621\u0622 ^\u0623\u0624\u0625|",
TryFindWordForward(u8"\u0620\u0621\u0622 |\u0623\u0624\u0625"));
EXPECT_EQ(u8"\u0620\u0621\u0622 \u0623\u0624\u0625^|",
TryFindWordForward(u8"\u0620\u0621\u0622 \u0623\u0624\u0625|"));
}
TEST_F(TextBoundariesTest, ForwardBiDiMixed) {
EXPECT_EQ(u8"^abc\u0620\u0621\u0622|",
TryFindWordForward(u8"|abc\u0620\u0621\u0622"));
EXPECT_EQ(u8"^abc\u0620\u0621\u0622|",
TryFindWordForward(u8"ab|c\u0620\u0621\u0622"));
EXPECT_EQ(u8"^abc\u0620\u0621\u0622|",
TryFindWordForward(u8"abc|\u0620\u0621\u0622"))
<< "At L1/L2 boundary";
EXPECT_EQ(u8"^abc\u0620\u0621\u0622|",
TryFindWordForward(u8"abc\u0620|\u0621\u0622"));
EXPECT_EQ(u8"^\u0620\u0621\u0622xyz|",
TryFindWordForward(u8"|\u0620\u0621\u0622xyz"));
EXPECT_EQ(u8"^\u0620\u0621\u0622xyz|",
TryFindWordForward(u8"\u0620|\u0621\u0622xyz"));
EXPECT_EQ(u8"^\u0620\u0621\u0622xyz|",
TryFindWordForward(u8"\u0620\u0621\u0622|xyz"))
<< "At L2/L1 boundary";
EXPECT_EQ(u8"^\u0620\u0621\u0622xyz|",
TryFindWordForward(u8"\u0620\u0621\u0622xy|z"));
}
TEST_F(TextBoundariesTest, ForwardOne) {
EXPECT_EQ("^a|", TryFindWordForward("|a"));
EXPECT_EQ("a^|", TryFindWordForward("a|")) << "No word after |";
}
TEST_F(TextBoundariesTest, ForwardParenthesis) {
EXPECT_EQ("^(|abc)", TryFindWordForward("|(abc)"));
EXPECT_EQ("(^abc|)", TryFindWordForward("(|abc)"));
EXPECT_EQ("(^abc|)", TryFindWordForward("(a|bc)"));
EXPECT_EQ("(^abc|)", TryFindWordForward("(ab|c)"));
EXPECT_EQ("(abc)^|", TryFindWordForward("(abc)|")) << "No word after |";
}
TEST_F(TextBoundariesTest, ForwardPunctuations) {
EXPECT_EQ("^abc|,,", TryFindWordForward("|abc,,"));
EXPECT_EQ("abc^,|,", TryFindWordForward("abc|,,"));
}
TEST_F(TextBoundariesTest, ForwardWhitespaces) {
EXPECT_EQ("^ | abc def ", TryFindWordForward("| abc def "));
EXPECT_EQ(" ^ |abc def ", TryFindWordForward(" | abc def "));
EXPECT_EQ(" ^abc| def ", TryFindWordForward(" |abc def "));
EXPECT_EQ(" ^abc| def ", TryFindWordForward(" a|bc def "));
EXPECT_EQ(" ^abc| def ", TryFindWordForward(" ab|c def "));
EXPECT_EQ(" abc^ | def ", TryFindWordForward(" abc| def "));
EXPECT_EQ(" abc ^ |def ", TryFindWordForward(" abc | def "));
EXPECT_EQ(" abc ^def| ", TryFindWordForward(" abc |def "));
EXPECT_EQ(" abc ^def| ", TryFindWordForward(" abc d|ef "));
EXPECT_EQ(" abc ^def| ", TryFindWordForward(" abc de|f "));
EXPECT_EQ(" abc def^ | ", TryFindWordForward(" abc def| "));
EXPECT_EQ(" abc def ^ |", TryFindWordForward(" abc def | "));
EXPECT_EQ(" abc def ^|", TryFindWordForward(" abc def |"))
<< "No word after |";
}
TEST_F(TextBoundariesTest, ForwardZero) {
EXPECT_EQ("^|", TryFindWordForward("|"));
}
} // namespace blink
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment