Commit 41ab8b8c authored by Xiaocheng Hu's avatar Xiaocheng Hu Committed by Commit Bot

Make WebFrameContentDumper use Element.innerText

This patch changes WebFrameContentDumper to use Element.innerText
instead of its ad-hoc TextDumper, so that its output is more
predictable.

This patch also allows all WebFrameSwapTest to pass in LayoutNG.

Bug: 781434
Change-Id: I961fa43c1821e2a0e7eae6b01a6affe884eacc72
Reviewed-on: https://chromium-review.googlesource.com/c/1371145Reviewed-by: default avatarKent Tamura <tkent@chromium.org>
Commit-Queue: Xiaocheng Hu <xiaochengh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#615560}
parent 7bd65736
...@@ -26,138 +26,6 @@ namespace blink { ...@@ -26,138 +26,6 @@ namespace blink {
namespace { namespace {
const int text_dumper_max_depth = 512;
bool IsRenderedAndVisible(const Node& node) {
if (node.GetLayoutObject() &&
node.GetLayoutObject()->Style()->Visibility() == EVisibility::kVisible)
return true;
if (node.IsElementNode() && ToElement(node).HasDisplayContentsStyle())
return true;
return false;
}
size_t RequiredLineBreaksAround(const Node& node) {
if (!IsRenderedAndVisible(node))
return 0;
if (node.IsTextNode())
return 0;
if (IsHTMLParagraphElement(node))
return 2;
if (LayoutObject* layout_object = node.GetLayoutObject()) {
if (!layout_object->Style()->IsDisplayInlineType())
return 1;
if (layout_object->Style()->Display() == EDisplay::kTableCaption)
return 1;
}
return 0;
}
// This class dumps innerText of a node into a StringBuilder, following the spec
// [*] but with a simplified whitespace handling algorithm when processing text
// nodes: only leading and trailing collapsed whitespaces are removed; all other
// whitespace characters are left as-is, without any collapsing or conversion.
// For example, from HTML <p>\na\n\nb\n</p>, we get text dump "a\n\nb".
// [*] https://developer.mozilla.org/en-US/docs/Web/API/Node/innerText
class TextDumper final {
STACK_ALLOCATED();
public:
TextDumper(StringBuilder& builder, size_t max_length)
: builder_(builder), max_length_(max_length) {}
void DumpTextFrom(const Node& node) {
DCHECK(!has_emitted_);
DCHECK(!required_line_breaks_);
HandleNode(node, 0);
}
private:
void HandleNode(const Node& node, int depth) {
const size_t required_line_breaks_around = RequiredLineBreaksAround(node);
AddRequiredLineBreaks(required_line_breaks_around);
if (depth < text_dumper_max_depth) {
for (const Node& child : NodeTraversal::ChildrenOf(node)) {
HandleNode(child, depth + 1);
if (builder_.length() >= max_length_)
return;
}
}
if (!IsRenderedAndVisible(node))
return;
if (node.IsTextNode())
return HandleTextNode(ToText(node));
if (IsHTMLBRElement(node))
return DumpText("\n");
if (LayoutObject* layout_object = node.GetLayoutObject()) {
if (layout_object->IsTableCell() &&
ToLayoutTableCell(layout_object)->NextCell())
return DumpText("\t");
if (layout_object->IsTableRow() &&
ToLayoutTableRow(layout_object)->NextRow())
return DumpText("\n");
}
AddRequiredLineBreaks(required_line_breaks_around);
}
void HandleTextNode(const Text& node) {
const LayoutText* layout_text = node.GetLayoutObject();
if (!layout_text)
return;
if (layout_text->IsTextFragment() &&
ToLayoutTextFragment(layout_text)->IsRemainingTextLayoutObject()) {
const LayoutText* first_letter =
ToLayoutText(AssociatedLayoutObjectOf(node, 0));
if (first_letter && first_letter != layout_text)
HandleLayoutText(*first_letter);
}
HandleLayoutText(*layout_text);
}
void HandleLayoutText(const LayoutText& text) {
if (!text.HasNonCollapsedText())
return;
size_t text_start = text.CaretMinOffset();
size_t text_end = text.CaretMaxOffset();
String dump = text.GetText().Substring(text_start, text_end - text_start);
DumpText(dump);
}
void AddRequiredLineBreaks(size_t required) {
required_line_breaks_ = std::max(required, required_line_breaks_);
}
void DumpText(String text) {
if (!text.length())
return;
if (has_emitted_ && required_line_breaks_) {
for (size_t i = 0; i < required_line_breaks_; ++i)
builder_.Append('\n');
}
required_line_breaks_ = 0;
builder_.Append(text);
has_emitted_ = true;
if (builder_.length() > max_length_)
builder_.Resize(max_length_);
}
bool has_emitted_ = false;
size_t required_line_breaks_ = 0;
StringBuilder& builder_;
const size_t max_length_;
DISALLOW_COPY_AND_ASSIGN(TextDumper);
};
void FrameContentAsPlainText(size_t max_chars, void FrameContentAsPlainText(size_t max_chars,
LocalFrame* frame, LocalFrame* frame,
StringBuilder& output) { StringBuilder& output) {
...@@ -171,8 +39,11 @@ void FrameContentAsPlainText(size_t max_chars, ...@@ -171,8 +39,11 @@ void FrameContentAsPlainText(size_t max_chars,
DCHECK(!frame->View()->NeedsLayout()); DCHECK(!frame->View()->NeedsLayout());
DCHECK(!document->NeedsLayoutTreeUpdate()); DCHECK(!document->NeedsLayoutTreeUpdate());
if (document->documentElement()) if (document->documentElement()) {
TextDumper(output, max_chars).DumpTextFrom(*document->documentElement()); output.Append(document->documentElement()->innerText());
if (output.length() >= max_chars)
output.Resize(max_chars);
}
// The separator between frames when the frames are converted to plain text. // The separator between frames when the frames are converted to plain text.
const LChar kFrameSeparator[] = {'\n', '\n'}; const LChar kFrameSeparator[] = {'\n', '\n'};
......
...@@ -9219,7 +9219,7 @@ TEST_F(WebFrameSwapTest, SwapFirstChild) { ...@@ -9219,7 +9219,7 @@ TEST_F(WebFrameSwapTest, SwapFirstChild) {
frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html"); frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html");
std::string content = std::string content =
WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8(); WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8();
EXPECT_EQ("\n\nhello\n\nb\n\n\na\n\nc", content); EXPECT_EQ(" \n\nhello\n\nb \n\na\n\nc", content);
} }
void WebFrameTest::SwapAndVerifyMiddleChildConsistency( void WebFrameTest::SwapAndVerifyMiddleChildConsistency(
...@@ -9255,7 +9255,7 @@ TEST_F(WebFrameSwapTest, SwapMiddleChild) { ...@@ -9255,7 +9255,7 @@ TEST_F(WebFrameSwapTest, SwapMiddleChild) {
frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html"); frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html");
std::string content = std::string content =
WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8(); WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8();
EXPECT_EQ("\n\na\n\nhello\n\nc", content); EXPECT_EQ(" \n\na\n\nhello\n\nc", content);
} }
void WebFrameTest::SwapAndVerifyLastChildConsistency(const char* const message, void WebFrameTest::SwapAndVerifyLastChildConsistency(const char* const message,
...@@ -9285,7 +9285,7 @@ TEST_F(WebFrameSwapTest, SwapLastChild) { ...@@ -9285,7 +9285,7 @@ TEST_F(WebFrameSwapTest, SwapLastChild) {
frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html"); frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html");
std::string content = std::string content =
WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8(); WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8();
EXPECT_EQ("\n\na\n\nb\n\n\na\n\nhello", content); EXPECT_EQ(" \n\na\n\nb \n\na\n\nhello", content);
} }
TEST_F(WebFrameSwapTest, DetachProvisionalFrame) { TEST_F(WebFrameSwapTest, DetachProvisionalFrame) {
...@@ -9398,7 +9398,7 @@ TEST_F(WebFrameSwapTest, SwapParentShouldDetachChildren) { ...@@ -9398,7 +9398,7 @@ TEST_F(WebFrameSwapTest, SwapParentShouldDetachChildren) {
frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html"); frame_test_helpers::LoadFrame(local_frame, base_url_ + "subframe-hello.html");
std::string content = std::string content =
WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8(); WebFrameContentDumper::DumpWebViewAsText(WebView(), 1024).Utf8();
EXPECT_EQ("\n\na\n\nhello\n\nc", content); EXPECT_EQ(" \n\na\n\nhello\n\nc", content);
} }
TEST_F(WebFrameSwapTest, SwapPreservesGlobalContext) { TEST_F(WebFrameSwapTest, SwapPreservesGlobalContext) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment