Commit 781d63b1 authored by Ethan Jimenez's avatar Ethan Jimenez Committed by Commit Bot

Optimize AXRange::GetText to avoid multiple tree traversals

1. Refactoring `AXPosition::CreateNextLeafTextPosition` to take an
   optional input parameter `crossed_line_breaking_object` which will
   be set to `true` if any call to `CreateNextAnchorBoundary` made while
   moving to the next leaf anchor crosses a line breaking object.

2. Refactoring `AXRange::GetText` to remove usage of `AtEndOfParagraph`.

   This optimization comes from analyzing how paragraph boundaries are
   computed: in order to determine if the end of an anchor is the end of
   a paragraph, we traverse forward to the next unignored leaf node (if
   it exists), then go back to the previous non-whitespace unignored
   leaf node (if it exists) looking for any line breaking object
   boundary being crossed in our tree traversal.

   The procedure described above is very redundant if we're already
   traversing the leaf nodes of the tree to compute `GetText`, this
   change uses the new parameter in `CreateNextLeafTextPosition` to
   efficiently compute paragraph boundaries without "going back".

   Notice that we still need to call `AtStartOfParagraph` from the first
   non-whitespace leaf node in the range since there could be whitespace
   or ignored leaf nodes preceding the AXRange's start, but such
   scenario could only appear once in any given `GetText` call.

3. As a result of the previous changes, `AtEndOfParagraph` disappears
   completely from the `GetText` call stack, the `AtStartOfParagraph`
   call has no noticeable impact, and the weight of `GetText` is now
   entirely reliant on a single traversal over the tree's leaves.

   Considering the total weight of `CreateNextLeafTextPosition` as a
   reference of "linear" complexity, in average, computing `GetText`
   measures 3.41 times faster with the optimization.

Bug: 1029867
Change-Id: I4ec070a6f96d9118ded08af4c93eb181451bb387
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2024910
Commit-Queue: Nektarios Paisios <nektar@chromium.org>
Reviewed-by: default avatarNektarios Paisios <nektar@chromium.org>
Reviewed-by: default avatarKevin Babbitt <kbabbitt@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#741096}
parent 9195b452
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <stdint.h> #include <stdint.h>
#include <functional>
#include <memory> #include <memory>
#include <ostream> #include <ostream>
#include <string> #include <string>
...@@ -590,10 +591,11 @@ class AXPosition { ...@@ -590,10 +591,11 @@ class AXPosition {
// the start of a paragraph. // the start of a paragraph.
// This will return a null position when an anchor movement would // This will return a null position when an anchor movement would
// cross a paragraph boundary, or the start of document was reached. // cross a paragraph boundary, or the start of document was reached.
bool crossed_potential_boundary_token = false; bool crossed_line_breaking_object_token = false;
const AbortMovePredicate abort_move_predicate = const AbortMovePredicate abort_move_predicate =
base::BindRepeating(&AbortMoveAtParagraphBoundary, base::BindRepeating(&AbortMoveAtParagraphBoundary,
std::ref(crossed_potential_boundary_token)); std::ref(crossed_line_breaking_object_token));
AXPositionInstance previous_text_position = text_position->Clone(); AXPositionInstance previous_text_position = text_position->Clone();
do { do {
previous_text_position = previous_text_position =
...@@ -661,10 +663,10 @@ class AXPosition { ...@@ -661,10 +663,10 @@ class AXPosition {
// of a paragraph, or the end of document was reached. // of a paragraph, or the end of document was reached.
// There are some fringe cases related to whitespace collapse that // There are some fringe cases related to whitespace collapse that
// cannot be handled easily with only |AbortMoveAtParagraphBoundary|. // cannot be handled easily with only |AbortMoveAtParagraphBoundary|.
bool crossed_potential_boundary_token = false; bool crossed_line_breaking_object_token = false;
const AbortMovePredicate abort_move_predicate = const AbortMovePredicate abort_move_predicate =
base::BindRepeating(&AbortMoveAtParagraphBoundary, base::BindRepeating(&AbortMoveAtParagraphBoundary,
std::ref(crossed_potential_boundary_token)); std::ref(crossed_line_breaking_object_token));
AXPositionInstance next_text_position = text_position->Clone(); AXPositionInstance next_text_position = text_position->Clone();
do { do {
...@@ -1716,11 +1718,26 @@ class AXPosition { ...@@ -1716,11 +1718,26 @@ class AXPosition {
base::BindRepeating(&DefaultAbortMovePredicate)); base::BindRepeating(&DefaultAbortMovePredicate));
} }
// Creates a text position using the next text-only node as its anchor. // Creates the next text position anchored at a leaf node of the AXTree.
// Assumes that text-only nodes are leaf nodes. //
AXPositionInstance CreateNextLeafTextPosition() const { // If a pointer |crossed_line_breaking_object| is provided, it'll be set to
return CreateNextTextAnchorPosition( // |true| if any line breaking object boundary was crossed by moving from this
base::BindRepeating(&DefaultAbortMovePredicate)); // leaf text position to the next (if it exists), |false| otherwise.
AXPositionInstance CreateNextLeafTextPosition(
bool* crossed_line_breaking_object = nullptr) const {
if (crossed_line_breaking_object)
*crossed_line_breaking_object = false;
// If this is an ancestor text position, resolve to its leaf text position.
if (IsTextPosition() && AnchorChildCount())
return AsLeafTextPosition();
AbortMovePredicate abort_move_predicate =
crossed_line_breaking_object
? base::BindRepeating(&UpdateCrossedLineBreakingObjectToken,
std::ref(*crossed_line_breaking_object))
: base::BindRepeating(&DefaultAbortMovePredicate);
return CreateNextLeafTreePosition(abort_move_predicate)->AsTextPosition();
} }
// Creates a text position using the previous text-only node as its anchor. // Creates a text position using the previous text-only node as its anchor.
...@@ -3226,16 +3243,10 @@ class AXPosition { ...@@ -3226,16 +3243,10 @@ class AXPosition {
move_to.AsLeafTreePosition()->GetTextStyles(); move_to.AsLeafTreePosition()->GetTextStyles();
} }
// AbortMovePredicate function used to detect paragraph boundaries. static bool MoveCrossesLineBreakingObject(const AXPosition& move_from,
static bool AbortMoveAtParagraphBoundary( const AXPosition& move_to,
bool& crossed_potential_boundary_token, const AXMoveType move_type,
const AXPosition& move_from, const AXMoveDirection direction) {
const AXPosition& move_to,
const AXMoveType move_type,
const AXMoveDirection direction) {
if (move_from.IsNullPosition() || move_to.IsNullPosition())
return true;
const bool move_from_break = move_from.IsInLineBreakingObject(); const bool move_from_break = move_from.IsInLineBreakingObject();
const bool move_to_break = move_to.IsInLineBreakingObject(); const bool move_to_break = move_to.IsInLineBreakingObject();
...@@ -3244,29 +3255,61 @@ class AXPosition { ...@@ -3244,29 +3255,61 @@ class AXPosition {
// For Ancestor moves, only abort when exiting a block descendant. // For Ancestor moves, only abort when exiting a block descendant.
// We don't care if the ancestor is a block or not, since the // We don't care if the ancestor is a block or not, since the
// descendant is contained by it. // descendant is contained by it.
crossed_potential_boundary_token |= move_from_break; return move_from_break;
break;
case AXMoveType::kDescendant: case AXMoveType::kDescendant:
// For Descendant moves, only abort when entering a block descendant. // For Descendant moves, only abort when entering a block descendant.
// We don't care if the ancestor is a block or not, since the // We don't care if the ancestor is a block or not, since the
// descendant is contained by it. // descendant is contained by it.
crossed_potential_boundary_token |= move_to_break; return move_to_break;
break;
case AXMoveType::kSibling: case AXMoveType::kSibling:
// For Sibling moves, abort if at least one of the siblings are a block, // For Sibling moves, abort if at least one of the siblings are a block,
// because that would mean exiting and/or entering a block. // because that would mean exiting and/or entering a block.
crossed_potential_boundary_token |= (move_from_break || move_to_break); return move_from_break || move_to_break;
break;
} }
NOTREACHED();
return false;
}
if (crossed_potential_boundary_token && !move_to.AnchorChildCount()) { // AbortMovePredicate function used to detect paragraph boundaries.
// We don't want to abort immediately after crossing a line breaking object
// boundary if the anchor we're moving to is not a leaf, this is necessary to
// avoid aborting if the next leaf position is whitespace-only; update
// |crossed_line_breaking_object_token| and wait until a leaf anchor is
// reached in order to correctly determine paragraph boundaries.
static bool AbortMoveAtParagraphBoundary(
bool& crossed_line_breaking_object_token,
const AXPosition& move_from,
const AXPosition& move_to,
const AXMoveType move_type,
const AXMoveDirection direction) {
if (move_from.IsNullPosition() || move_to.IsNullPosition())
return true;
if (!crossed_line_breaking_object_token) {
crossed_line_breaking_object_token = MoveCrossesLineBreakingObject(
move_from, move_to, move_type, direction);
}
if (crossed_line_breaking_object_token && !move_to.AnchorChildCount()) {
// If there's a sequence of whitespace-only anchors, collapse so only the // If there's a sequence of whitespace-only anchors, collapse so only the
// last whitespace-only anchor is considered a paragraph boundary. // last whitespace-only anchor is considered a paragraph boundary.
if (direction == AXMoveDirection::kNextInTree && return direction != AXMoveDirection::kNextInTree ||
move_to.IsInWhiteSpace()) { !move_to.IsInWhiteSpace();
return false; }
} return false;
return true; }
// This AbortMovePredicate never aborts, but detects whether a sequence of
// consecutive moves cross any line breaking object boundary.
static bool UpdateCrossedLineBreakingObjectToken(
bool& crossed_line_breaking_object_token,
const AXPosition& move_from,
const AXPosition& move_to,
const AXMoveType move_type,
const AXMoveDirection direction) {
if (!crossed_line_breaking_object_token) {
crossed_line_breaking_object_token = MoveCrossesLineBreakingObject(
move_from, move_to, move_type, direction);
} }
return false; return false;
} }
......
...@@ -265,60 +265,100 @@ class AXRange { ...@@ -265,60 +265,100 @@ class AXRange {
// Returns the concatenation of the accessible names of all text nodes // Returns the concatenation of the accessible names of all text nodes
// contained between this AXRange's endpoints. // contained between this AXRange's endpoints.
// Pass -1 for max_count to retrieve all text. // Pass a |max_count| of -1 to retrieve all text in the AXRange.
// Note that if this AXRange has its anchor or focus located at an ignored
// position, we shrink the range to the closest unignored positions.
base::string16 GetText(AXTextConcatenationBehavior concatenation_behavior = base::string16 GetText(AXTextConcatenationBehavior concatenation_behavior =
AXTextConcatenationBehavior::kAsTextContent, AXTextConcatenationBehavior::kAsTextContent,
int max_count = -1, int max_count = -1,
bool include_ignored = false, bool include_ignored = false,
size_t* appended_newlines_count = nullptr) const { size_t* appended_newlines_count = nullptr) const {
if (max_count == 0 || IsNull())
return base::string16();
base::Optional<int> endpoint_comparison =
CompareEndpoints(anchor(), focus());
if (!endpoint_comparison)
return base::string16();
AXPositionInstance start = (endpoint_comparison.value() < 0)
? anchor_->AsLeafTextPosition()
: focus_->AsLeafTextPosition();
AXPositionInstance end = (endpoint_comparison.value() < 0)
? focus_->AsLeafTextPosition()
: anchor_->AsLeafTextPosition();
base::string16 range_text; base::string16 range_text;
bool should_append_newline = false;
bool found_trailing_newline = false;
size_t computed_newlines_count = 0; size_t computed_newlines_count = 0;
for (const AXRange& leaf_text_range : *this) { bool is_first_non_whitespace_leaf = true;
DCHECK(leaf_text_range.IsLeafTextRange()); bool crossed_paragraph_boundary = false;
AXPositionType* start = leaf_text_range.anchor(); bool is_first_unignored_leaf = true;
AXPositionType* end = leaf_text_range.focus(); bool found_trailing_newline = false;
while (!start->IsNullPosition()) {
DCHECK(start->IsLeafTextPosition());
DCHECK_GE(start->text_offset(), 0); DCHECK_GE(start->text_offset(), 0);
DCHECK_LE(start->text_offset(), end->text_offset());
if (should_append_newline) { if (include_ignored || !start->IsIgnored()) {
range_text += base::ASCIIToUTF16("\n"); if (concatenation_behavior ==
computed_newlines_count++; AXTextConcatenationBehavior::kAsInnerText &&
} !start->IsInWhiteSpace()) {
if (is_first_non_whitespace_leaf) {
// The first non-whitespace leaf in the range could be preceded by
// whitespace spanning even before the start of this range, we need
// to check such positions in order to correctly determine if this
// is a paragraph's start (see |AXPosition::AtStartOfParagraph|).
crossed_paragraph_boundary =
!is_first_unignored_leaf && start->AtStartOfParagraph();
}
// When preserving layout line breaks, don't append `\n` next if the
// previous leaf position was a <br> (already ending with a newline).
if (crossed_paragraph_boundary && !found_trailing_newline) {
range_text += base::ASCIIToUTF16("\n");
computed_newlines_count++;
}
is_first_non_whitespace_leaf = false;
crossed_paragraph_boundary = false;
}
base::string16 current_anchor_text = start->GetText(); int current_end_offset = (start->GetAnchor() != end->GetAnchor())
int current_leaf_text_length = end->text_offset() - start->text_offset(); ? start->MaxTextOffset()
: end->text_offset();
if (current_leaf_text_length > 0) { if (current_end_offset > start->text_offset()) {
int characters_to_append = int characters_to_append =
(max_count >= 0) ? std::min(max_count - int{range_text.length()}, (max_count > 0)
current_leaf_text_length) ? std::min(max_count - int{range_text.length()},
: current_leaf_text_length; current_end_offset - start->text_offset())
: current_end_offset - start->text_offset();
// Collapse all whitespace following any line break. range_text += start->GetText().substr(start->text_offset(),
found_trailing_newline = characters_to_append);
start->IsInLineBreak() ||
(found_trailing_newline && start->IsInWhiteSpace());
if (!include_ignored && !start->IsIgnored()) { // Collapse all whitespace following any line break.
range_text += current_anchor_text.substr(start->text_offset(), found_trailing_newline =
characters_to_append); start->IsInLineBreak() ||
(found_trailing_newline && start->IsInWhiteSpace());
} }
DCHECK(max_count < 0 || int{range_text.length()} <= max_count);
is_first_unignored_leaf = false;
} }
DCHECK(max_count < 0 || int{range_text.length()} <= max_count); if (start->GetAnchor() == end->GetAnchor() ||
if (int{range_text.length()} == max_count) int{range_text.length()} == max_count) {
break; break;
} else if (concatenation_behavior ==
// When preserving layout line breaks, don't append a newline next if the AXTextConcatenationBehavior::kAsInnerText &&
// current leaf range is a <br> (already ending with a '\n' character) or !crossed_paragraph_boundary && !is_first_non_whitespace_leaf) {
// its respective anchor is invisible to the text representation. start = start->CreateNextLeafTextPosition(&crossed_paragraph_boundary);
if (concatenation_behavior == AXTextConcatenationBehavior::kAsInnerText) } else {
should_append_newline = start = start->CreateNextLeafTextPosition();
!found_trailing_newline && end->AtEndOfParagraph(); }
} }
if (appended_newlines_count) if (appended_newlines_count)
*appended_newlines_count = computed_newlines_count; *appended_newlines_count = computed_newlines_count;
return range_text; return range_text;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment