Commit 9851937b authored by Akihiro Ota's avatar Akihiro Ota Committed by Commit Bot

Add functions to get word start/end indices in AXTextUtils.

This change adds new functions to AXTextUtils that return the
start and end indices of a string. This function will support a
new API in automation and was implemented in AXTextUtils to
maximize the possibility of shared code.

Bug: 948700
Change-Id: I4c414280910feb1cf6f3180d09e5088264959aba
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1616383
Commit-Queue: Akihiro Ota <akihiroota@chromium.org>
Reviewed-by: default avatarNektarios Paisios <nektar@chromium.org>
Cr-Commit-Position: refs/heads/master@{#667367}
parent 6a88b0d8
......@@ -6,6 +6,7 @@
#include "base/i18n/break_iterator.h"
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/optional.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
......@@ -226,4 +227,33 @@ base::string16 ActionVerbToUnlocalizedString(
return base::string16();
}
std::vector<int> GetWordStartOffsets(const base::string16& text) {
std::vector<int> word_starts;
base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
if (!iter.Init())
return word_starts;
// iter.Advance() returns false if we've run past end of the text.
while (iter.Advance()) {
if (!iter.IsWord())
continue;
word_starts.push_back(
base::checked_cast<int>(iter.prev()) /* start index */);
}
return word_starts;
}
std::vector<int> GetWordEndOffsets(const base::string16& text) {
std::vector<int> word_ends;
base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
if (!iter.Init())
return word_ends;
// iter.Advance() returns false if we've run past end of the text.
while (iter.Advance()) {
if (!iter.IsWord())
continue;
word_ends.push_back(base::checked_cast<int>(iter.pos()) /* end index */);
}
return word_ends;
}
} // namespace ui
......@@ -29,7 +29,7 @@ enum TextBoundaryDirection {
// (depending on |direction|) from the given |start_offset| until the
// given boundary is found, and return the offset of that boundary,
// using the vector of line break character offsets in |line_breaks|.
size_t AX_EXPORT FindAccessibleTextBoundary(const base::string16& text,
AX_EXPORT size_t FindAccessibleTextBoundary(const base::string16& text,
const std::vector<int>& line_breaks,
AXTextBoundary boundary,
size_t start_offset,
......@@ -37,13 +37,18 @@ size_t AX_EXPORT FindAccessibleTextBoundary(const base::string16& text,
ax::mojom::TextAffinity affinity);
// Returns a string ID that corresponds to the name of the given action.
base::string16 AX_EXPORT
ActionVerbToLocalizedString(const ax::mojom::DefaultActionVerb action_verb);
AX_EXPORT base::string16 ActionVerbToLocalizedString(
const ax::mojom::DefaultActionVerb action_verb);
// Returns the non-localized string representation of a supported action.
// Some APIs on Linux and Windows need to return non-localized action names.
base::string16 AX_EXPORT
ActionVerbToUnlocalizedString(const ax::mojom::DefaultActionVerb action_verb);
AX_EXPORT base::string16 ActionVerbToUnlocalizedString(
const ax::mojom::DefaultActionVerb action_verb);
// Returns indices of all word starts in |text|.
AX_EXPORT std::vector<int> GetWordStartOffsets(const base::string16& text);
// Returns indices of all word ends in |text|.
AX_EXPORT std::vector<int> GetWordEndOffsets(const base::string16& text);
} // namespace ui
......
......@@ -8,6 +8,7 @@
#include <utility>
#include "base/strings/utf_string_conversions.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "ui/accessibility/ax_enums.mojom.h"
#include "ui/accessibility/ax_text_boundary.h"
......@@ -251,4 +252,29 @@ TEST(AXTextUtils, FindAccessibleTextBoundaryCharacter) {
verify_boundaries_at_offset(18, 18L, 19UL);
}
TEST(AXTextUtils, GetWordOffsetsEmptyTest) {
const base::string16 text = base::UTF8ToUTF16("");
std::vector<int> word_starts = GetWordStartOffsets(text);
std::vector<int> word_ends = GetWordEndOffsets(text);
EXPECT_EQ(0UL, word_starts.size());
EXPECT_EQ(0UL, word_ends.size());
}
TEST(AXTextUtils, GetWordStartOffsetsBasicTest) {
const base::string16 text = base::UTF8ToUTF16("This is very simple input");
EXPECT_THAT(GetWordStartOffsets(text), testing::ElementsAre(0, 5, 8, 13, 20));
}
TEST(AXTextUtils, GetWordEndOffsetsBasicTest) {
const base::string16 text = base::UTF8ToUTF16("This is very simple input");
EXPECT_THAT(GetWordEndOffsets(text), testing::ElementsAre(4, 7, 12, 19, 25));
}
TEST(AXTextUtils, GetWordStartOffsetsMalformedInputTest) {
const base::string16 text =
base::UTF8ToUTF16("..we *## should parse $#@$ through bad ,, input");
EXPECT_THAT(GetWordStartOffsets(text),
testing::ElementsAre(2, 9, 16, 27, 35, 43));
}
} // namespace ui
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment