Commit f80f4b5f authored by Benjamin Beaudry's avatar Benjamin Beaudry Committed by Commit Bot

Reland "PDF a11y: kTextDirection attribute support"

This is a reland of [1] reverted in [2]. The test failure comes from [3]
that got merged a few minutes before [1]. The test expectations are now
updated to work with the changes brought by [3] to paragraphs break
heuristics.

[1] Original CL: crrev.com/753f8707
[2] Reverted in: crrev.com/003b20ab
[3] Caused by: crrev.com/f39b7b73

Original change's description:
> PDF a11y: kTextDirection attribute support
>
> The text direction attribute is now properly set for text runs in PDF.
> It is inferred by the position of the first and the last characters of
> a text run.
>
> This change includes a pdf-template file (text-direction.in) with RTL
> text representation used to generate our unit test PDF file.
>
> Bug: 985604
> Change-Id: If5d82296463331e95c4ded4e5244d65be3fbadfa
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1737019
> Commit-Queue: Benjamin Beaudry <benjamin.beaudry@microsoft.com>
> Reviewed-by: Lei Zhang <thestig@chromium.org>
> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com>
> Cr-Commit-Position: refs/heads/master@{#690163}

Change-Id: Ic8b7e62d1bce71db39af01473a66ea033aeaf7a1
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1771551
Commit-Queue: Benjamin Beaudry <benjamin.beaudry@microsoft.com>
Commit-Queue: Kevin Babbitt <kbabbitt@microsoft.com>
Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarKevin Babbitt <kbabbitt@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#690395}
parent a284533d
...@@ -2407,13 +2407,44 @@ class PDFExtensionAccessibilityTreeDumpTest ...@@ -2407,13 +2407,44 @@ class PDFExtensionAccessibilityTreeDumpTest
private: private:
using PropertyFilter = content::AccessibilityTreeFormatter::PropertyFilter; using PropertyFilter = content::AccessibilityTreeFormatter::PropertyFilter;
// See chrome/test/data/pdf/accessibility/readme.md for more info.
void ParsePdfForExtraDirectives(
const std::string& pdf_contents,
content::AccessibilityTreeFormatter* formatter,
std::vector<PropertyFilter>* property_filters) {
const char kCommentMark = '%';
for (const std::string& line : base::SplitString(
pdf_contents, "\n", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL)) {
if (line.size() > 1 && line[0] == kCommentMark) {
// Remove first character since it's the comment mark.
std::string trimmed_line = line.substr(1);
const std::string& allow_str = formatter->GetAllowString();
if (base::StartsWith(trimmed_line, allow_str,
base::CompareCase::SENSITIVE)) {
property_filters->push_back(PropertyFilter(
base::UTF8ToUTF16(trimmed_line.substr(allow_str.size())),
PropertyFilter::ALLOW));
}
}
}
}
void RunTest(const base::FilePath& test_file_path, const char* file_dir) { void RunTest(const base::FilePath& test_file_path, const char* file_dir) {
// Set up the tree formatter. std::string pdf_contents;
{
base::ScopedAllowBlockingForTesting allow_blocking;
ASSERT_TRUE(base::ReadFileToString(test_file_path, &pdf_contents));
}
// Set up the tree formatter. Parse filters and other directives in the test
// file.
std::unique_ptr<content::AccessibilityTreeFormatter> formatter = std::unique_ptr<content::AccessibilityTreeFormatter> formatter =
test_pass_.create_formatter(); test_pass_.create_formatter();
std::vector<PropertyFilter> property_filters; std::vector<PropertyFilter> property_filters;
formatter->AddDefaultFilters(&property_filters); formatter->AddDefaultFilters(&property_filters);
AddDefaultFilters(&property_filters); AddDefaultFilters(&property_filters);
ParsePdfForExtraDirectives(pdf_contents, formatter.get(),
&property_filters);
formatter->SetPropertyFilters(property_filters); formatter->SetPropertyFilters(property_filters);
// Exit without running the test if we can't find an expectation file or if // Exit without running the test if we can't find an expectation file or if
...@@ -2532,3 +2563,7 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, ...@@ -2532,3 +2563,7 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest,
DirectionalTextRuns) { DirectionalTextRuns) {
RunPDFTest(FILE_PATH_LITERAL("directional-text-runs.pdf")); RunPDFTest(FILE_PATH_LITERAL("directional-text-runs.pdf"));
} }
IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, TextDirection) {
RunPDFTest(FILE_PATH_LITERAL("text-direction.pdf"));
}
\ No newline at end of file
# PdfExtensionAccessibilityTreeDumpTest
A `PdfExtensionAccessibilityTreeDumpTest` loads a PDF file, waits for it
to load, then dumps the accessibility tree. More about accessibility tree
dump tests at `content/test/data/accessibility/readme.md`.
## Filters and directives
Please refer to `content/test/data/accessibility/readme.md` to see
how to use filters. Supported filters and directives are:
* [x] `@<platform>-ALLOW:`
* [ ] `@<platform>-ALLOW-EMPTY:`
* [ ] `@<platform>-DENY:`
* [ ] `@WAIT-FOR:`
The PdfExtensionAccessibilityTreeDumpTest implementation of extra
directives/filters is similar to the DumpAccessibilityTreeTest. If you
need to support additional directives/filters, see
/src/content/test/data/accessibility/readme.md to respect the same
interface as the one used for HTML accessibility tree dump tests.
Implementation details for parsing extra directives in HTML can be
found in `DumpAccessibilityTestBase::ParseHtmlForExtraDirectives()`.
embeddedObject textDirection=ltr
++document restriction=readOnly
++++region name='Page 1' restriction=readOnly isPageBreakingObject=true
++++++paragraph restriction=readOnly
++++++++staticText name='abcdef<newline>' restriction=readOnly
++++++++++inlineTextBox name='abcdef<newline>' restriction=readOnly textDirection=ltr
++++++paragraph restriction=readOnly
++++++++staticText name='شممشا' restriction=readOnly
++++++++++inlineTextBox name='شممشا' restriction=readOnly textDirection=rtl
{{header}}
%@BLINK-ALLOW:textDirection=*
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
>>
endobj
{{object 2 0}} <<
/Type /Pages
/MediaBox [0 0 200 200]
/Count 1
/Kids [3 0 R]
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Resources <<
/Font <<
/F1 4 0 R
>>
>>
/Contents 8 0 R
>>
endobj
{{object 4 0}} <<
/Type /Font
/Subtype /Type0
/BaseFont /Arial
/Encoding /Identity-H
/DescendantFonts [6 0 R]
/ToUnicode 7 0 R
>>
endobj
{{object 5 0}} <<
/Type /FontDescriptor
/FontName /Arial
>>
endobj
{{object 6 0}} <<
/Type /Font
/FontDescriptor 5 0 R
/BaseFont /Arial
/Subtype /CIDFontType2
/W [0 [778] 68 [444 500 444 500 444 333] 910 [229] 951 952 531 996 [394]]
>>
endobj
{{object 7 0}} <<
{{streamlen}}
>>
stream
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
2 beginbfchar
<038E> <FE8E>
<03E4> <FEE4>
endbfchar
2 beginbfrange
<0044> <0049> <0061>
<03B7> <03B8> <FEB7>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
endstream
endobj
{{object 8 0}} <<
{{streamlen}}
>>
stream
BT
/F1 16 Tf
30 100 Td
<004400450046004700480049> Tj
ET
BT
/F1 16 Tf
30 50 Td
<038E03B803E403E403B7> Tj
ET
endstream
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF
\ No newline at end of file
This diff was suppressed by a .gitattributes entry.
...@@ -574,6 +574,8 @@ ui::AXNodeData* PdfAccessibilityTree::CreateInlineTextBoxNode( ...@@ -574,6 +574,8 @@ ui::AXNodeData* PdfAccessibilityTree::CreateInlineTextBoxNode(
std::string chars_utf8 = GetTextRunCharsAsUTF8(text_run, chars, char_index); std::string chars_utf8 = GetTextRunCharsAsUTF8(text_run, chars, char_index);
inline_text_box_node->AddStringAttribute(ax::mojom::StringAttribute::kName, inline_text_box_node->AddStringAttribute(ax::mojom::StringAttribute::kName,
chars_utf8); chars_utf8);
inline_text_box_node->AddIntAttribute(ax::mojom::IntAttribute::kTextDirection,
text_run.direction);
inline_text_box_node->relative_bounds.bounds = inline_text_box_node->relative_bounds.bounds =
ToGfxRectF(text_run.bounds) + page_bounds.OffsetFromOrigin(); ToGfxRectF(text_run.bounds) + page_bounds.OffsetFromOrigin();
std::vector<int32_t> char_offsets = std::vector<int32_t> char_offsets =
......
...@@ -40,33 +40,51 @@ bool GetAccessibilityInfo( ...@@ -40,33 +40,51 @@ bool GetAccessibilityInfo(
int char_index = 0; int char_index = 0;
while (char_index < char_count) { while (char_index < char_count) {
PP_PrivateAccessibilityTextRunInfo text_run_info; PP_PrivateAccessibilityTextRunInfo text_run_info;
pp::FloatRect bounds; engine->GetTextRunInfo(page_index, char_index, &text_run_info);
engine->GetTextRunInfo(page_index, char_index, &text_run_info.len,
&text_run_info.font_size, &bounds);
DCHECK_LE(char_index + text_run_info.len, DCHECK_LE(char_index + text_run_info.len,
static_cast<uint32_t>(char_count)); static_cast<uint32_t>(char_count));
text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
text_run_info.bounds = bounds;
text_runs->push_back(text_run_info); text_runs->push_back(text_run_info);
// We need to provide enough information to draw a bounding box // We need to provide enough information to draw a bounding box
// around any arbitrary text range, but the bounding boxes of characters // around any arbitrary text range, but the bounding boxes of characters
// we get from PDFium don't necessarily "line up". Walk through the // we get from PDFium don't necessarily "line up".
// Example for LTR text direction: walk through the
// characters in each text run and let the width of each character be // characters in each text run and let the width of each character be
// the difference between the x coordinate of one character and the // the difference between the x coordinate of one character and the
// x coordinate of the next. The rest of the bounds of each character // x coordinate of the next. The rest of the bounds of each character
// can be computed from the bounds of the text run. // can be computed from the bounds of the text run.
// The same idea is used for RTL, TTB and BTT text direction.
pp::FloatRect char_bounds = engine->GetCharBounds(page_index, char_index); pp::FloatRect char_bounds = engine->GetCharBounds(page_index, char_index);
for (uint32_t i = 0; i < text_run_info.len - 1; i++) { for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count)); DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count));
pp::FloatRect next_char_bounds = pp::FloatRect next_char_bounds =
engine->GetCharBounds(page_index, char_index + i + 1); engine->GetCharBounds(page_index, char_index + i + 1);
(*chars)[char_index + i].char_width = double& char_width = (*chars)[char_index + i].char_width;
next_char_bounds.x() - char_bounds.x(); switch (text_run_info.direction) {
case PP_PRIVATEDIRECTION_NONE:
case PP_PRIVATEDIRECTION_LTR:
char_width = next_char_bounds.x() - char_bounds.x();
break;
case PP_PRIVATEDIRECTION_TTB:
char_width = next_char_bounds.y() - char_bounds.y();
break;
case PP_PRIVATEDIRECTION_RTL:
char_width = char_bounds.right() - next_char_bounds.right();
break;
case PP_PRIVATEDIRECTION_BTT:
char_width = char_bounds.bottom() - next_char_bounds.bottom();
break;
}
char_bounds = next_char_bounds; char_bounds = next_char_bounds;
} }
(*chars)[char_index + text_run_info.len - 1].char_width = double& char_width =
char_bounds.width(); (*chars)[char_index + text_run_info.len - 1].char_width;
if (text_run_info.direction == PP_PRIVATEDIRECTION_BTT ||
text_run_info.direction == PP_PRIVATEDIRECTION_TTB) {
char_width = char_bounds.height();
} else {
char_width = char_bounds.width();
}
char_index += text_run_info.len; char_index += text_run_info.len;
} }
......
...@@ -41,6 +41,7 @@ typedef void (*PDFEnsureTypefaceCharactersAccessible)(const LOGFONT* font, ...@@ -41,6 +41,7 @@ typedef void (*PDFEnsureTypefaceCharactersAccessible)(const LOGFONT* font,
struct PP_PdfAccessibilityActionData; struct PP_PdfAccessibilityActionData;
struct PP_PdfPrintSettings_Dev; struct PP_PdfPrintSettings_Dev;
struct PP_PrivateAccessibilityTextRunInfo;
namespace gfx { namespace gfx {
class Rect; class Rect;
...@@ -358,13 +359,13 @@ class PDFEngine { ...@@ -358,13 +359,13 @@ class PDFEngine {
// Get a given unicode character on a given page. // Get a given unicode character on a given page.
virtual uint32_t GetCharUnicode(int page_index, int char_index) = 0; virtual uint32_t GetCharUnicode(int page_index, int char_index) = 0;
// Given a start char index, find the longest continuous run of text that's // Given a start char index, find the longest continuous run of text that's
// in a single direction and with the same style and font size. Return the // in a single direction and with the same style and font size. Fill the
// length of that sequence and its font size and bounding box. // |text_run_info| with the length of that sequence, text direction, bounding
virtual void GetTextRunInfo(int page_index, // box and font size.
int start_char_index, virtual void GetTextRunInfo(
uint32_t* out_len, int page_index,
double* out_font_size, int start_char_index,
pp::FloatRect* out_bounds) = 0; PP_PrivateAccessibilityTextRunInfo* text_run_info) = 0;
// Gets the PDF document's print scaling preference. True if the document can // Gets the PDF document's print scaling preference. True if the document can
// be scaled to fit. // be scaled to fit.
virtual bool GetPrintScaling() = 0; virtual bool GetPrintScaling() = 0;
......
...@@ -2189,14 +2189,12 @@ uint32_t PDFiumEngine::GetCharUnicode(int page_index, int char_index) { ...@@ -2189,14 +2189,12 @@ uint32_t PDFiumEngine::GetCharUnicode(int page_index, int char_index) {
return pages_[page_index]->GetCharUnicode(char_index); return pages_[page_index]->GetCharUnicode(char_index);
} }
void PDFiumEngine::GetTextRunInfo(int page_index, void PDFiumEngine::GetTextRunInfo(
int start_char_index, int page_index,
uint32_t* out_len, int start_char_index,
double* out_font_size, PP_PrivateAccessibilityTextRunInfo* text_run_info) {
pp::FloatRect* out_bounds) {
DCHECK(PageIndexInBounds(page_index)); DCHECK(PageIndexInBounds(page_index));
return pages_[page_index]->GetTextRunInfo(start_char_index, out_len, return pages_[page_index]->GetTextRunInfo(start_char_index, text_run_info);
out_font_size, out_bounds);
} }
bool PDFiumEngine::GetPrintScaling() { bool PDFiumEngine::GetPrintScaling() {
......
...@@ -113,11 +113,10 @@ class PDFiumEngine : public PDFEngine, ...@@ -113,11 +113,10 @@ class PDFiumEngine : public PDFEngine,
int GetCharCount(int page_index) override; int GetCharCount(int page_index) override;
pp::FloatRect GetCharBounds(int page_index, int char_index) override; pp::FloatRect GetCharBounds(int page_index, int char_index) override;
uint32_t GetCharUnicode(int page_index, int char_index) override; uint32_t GetCharUnicode(int page_index, int char_index) override;
void GetTextRunInfo(int page_index, void GetTextRunInfo(
int start_char_index, int page_index,
uint32_t* out_len, int start_char_index,
double* out_font_size, PP_PrivateAccessibilityTextRunInfo* text_run_info) override;
pp::FloatRect* out_bounds) override;
bool GetPrintScaling() override; bool GetPrintScaling() override;
int GetCopiesToPrint() override; int GetCopiesToPrint() override;
int GetDuplexType() override; int GetDuplexType() override;
......
...@@ -251,29 +251,30 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() { ...@@ -251,29 +251,30 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
return text_page(); return text_page();
} }
void PDFiumPage::GetTextRunInfo(int start_char_index, void PDFiumPage::GetTextRunInfo(
uint32_t* out_len, int start_char_index,
double* out_font_size, PP_PrivateAccessibilityTextRunInfo* text_run_info) {
pp::FloatRect* out_bounds) {
if (start_char_index < 0) { if (start_char_index < 0) {
*out_len = 0; text_run_info->len = 0;
*out_font_size = 0; text_run_info->font_size = 0;
*out_bounds = pp::FloatRect(); text_run_info->bounds = pp::FloatRect();
text_run_info->direction = PP_PRIVATEDIRECTION_NONE;
return; return;
} }
FPDF_PAGE page = GetPage(); FPDF_PAGE page = GetPage();
FPDF_TEXTPAGE text_page = GetTextPage(); FPDF_TEXTPAGE text_page = GetTextPage();
int chars_count = FPDFText_CountChars(text_page); int chars_count = FPDFText_CountChars(text_page);
int char_index = GetFirstNonUnicodeWhiteSpaceCharIndex( int actual_start_char_index = GetFirstNonUnicodeWhiteSpaceCharIndex(
text_page, start_char_index, chars_count); text_page, start_char_index, chars_count);
if (char_index >= chars_count) { if (actual_start_char_index >= chars_count) {
*out_len = 0; text_run_info->len = 0;
*out_font_size = 0; text_run_info->font_size = 0;
*out_bounds = pp::FloatRect(); text_run_info->bounds = pp::FloatRect();
text_run_info->direction = PP_PRIVATEDIRECTION_NONE;
return; return;
} }
int char_index = actual_start_char_index;
pp::FloatRect start_char_rect = pp::FloatRect start_char_rect =
GetFloatCharRectInPixels(page, text_page, char_index); GetFloatCharRectInPixels(page, text_page, char_index);
...@@ -376,9 +377,16 @@ void PDFiumPage::GetTextRunInfo(int start_char_index, ...@@ -376,9 +377,16 @@ void PDFiumPage::GetTextRunInfo(int start_char_index,
text_run_font_size = estimated_font_size; text_run_font_size = estimated_font_size;
} }
*out_len = char_index - start_char_index; // Infer text direction from first and last character of the text run. We
*out_font_size = text_run_font_size; // can't base our decision on the character direction, since a character of a
*out_bounds = text_run_bounds; // RTL language will have an angle of 0 when not rotated, just like a
// character in a LTR language.
text_run_info->direction = char_index - actual_start_char_index > 1
? GetDirectionFromAngle(text_run_angle)
: PP_PRIVATEDIRECTION_NONE;
text_run_info->len = char_index - start_char_index;
text_run_info->font_size = text_run_font_size;
text_run_info->bounds = text_run_bounds;
} }
uint32_t PDFiumPage::GetCharUnicode(int char_index) { uint32_t PDFiumPage::GetCharUnicode(int char_index) {
......
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
#include "third_party/pdfium/public/fpdf_text.h" #include "third_party/pdfium/public/fpdf_text.h"
#include "ui/gfx/geometry/point_f.h" #include "ui/gfx/geometry/point_f.h"
struct PP_PrivateAccessibilityTextRunInfo;
namespace chrome_pdf { namespace chrome_pdf {
class PDFiumEngine; class PDFiumEngine;
...@@ -43,12 +45,11 @@ class PDFiumPage { ...@@ -43,12 +45,11 @@ class PDFiumPage {
FPDF_TEXTPAGE GetTextPage(); FPDF_TEXTPAGE GetTextPage();
// Given a start char index, find the longest continuous run of text that's // Given a start char index, find the longest continuous run of text that's
// in a single direction and with the same style and font size. Return the // in a single direction and with the same style and font size. Fill the
// length of that sequence and its font size and bounding box. // |text_run_info| with the length of that sequence, text direction, bounding
// box and font size.
void GetTextRunInfo(int start_char_index, void GetTextRunInfo(int start_char_index,
uint32_t* out_len, PP_PrivateAccessibilityTextRunInfo* text_run_info);
double* out_font_size,
pp::FloatRect* out_bounds);
// Get a unicode character from the page. // Get a unicode character from the page.
uint32_t GetCharUnicode(int char_index); uint32_t GetCharUnicode(int char_index);
// Get the bounds of a character in page pixels. // Get the bounds of a character in page pixels.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment