Commit 36aae1e1 authored by Lei Zhang's avatar Lei Zhang Committed by Commit Bot

PDF a11y: Properly handle pages with only whitespaces.

https://crrev.com/690041 added handling for out of bound indices in
PDFiumPage::GetTextRunInfo(), but failed to consider a valid situation
where all the text in a text run are whitespaces. In this case,
GetTextRunInfo() needs to report the number of characters processed.

Add a test case with just a whitespace to test this condition.

Also update hello-world.in to have a {{streamlen}} macro and regenerate
hello-world.pdf

Bug: 997747
Change-Id: Id9e074080b19e4c642cf75fa6b72c5eb969197a0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1769842Reviewed-by: default avatarIan Prest <iapres@microsoft.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Cr-Commit-Position: refs/heads/master@{#690811}
parent 23f2521d
...@@ -2373,6 +2373,13 @@ IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest, ...@@ -2373,6 +2373,13 @@ IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest,
RunTextExtractionTest(FILE_PATH_LITERAL("font-change.pdf")); RunTextExtractionTest(FILE_PATH_LITERAL("font-change.pdf"));
} }
// Test one property of pdf_private/accessibility_crash_2.pdf, where a page has
// only whitespace characters.
IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest,
OnlyWhitespaceText) {
RunTextExtractionTest(FILE_PATH_LITERAL("whitespace.pdf"));
}
class PDFExtensionAccessibilityTreeDumpTest class PDFExtensionAccessibilityTreeDumpTest
: public PDFExtensionTest, : public PDFExtensionTest,
public ::testing::WithParamInterface<size_t> { public ::testing::WithParamInterface<size_t> {
...@@ -2566,4 +2573,4 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, ...@@ -2566,4 +2573,4 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest,
IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, TextDirection) { IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, TextDirection) {
RunPDFTest(FILE_PATH_LITERAL("text-direction.pdf")); RunPDFTest(FILE_PATH_LITERAL("text-direction.pdf"));
} }
\ No newline at end of file
...@@ -29,6 +29,7 @@ endobj ...@@ -29,6 +29,7 @@ endobj
>> >>
endobj endobj
{{object 5 0}} << {{object 5 0}} <<
{{streamlen}}
>> >>
stream stream
BT BT
......
This diff was suppressed by a .gitattributes entry.
{{header}}
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
>>
endobj
{{object 2 0}} <<
/Type /Pages
/MediaBox [ 0 0 200 200 ]
/Count 1
/Kids [ 3 0 R ]
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Resources <<
/Font <<
/F1 4 0 R
>>
>>
/Contents 5 0 R
>>
endobj
{{object 4 0}} <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
endobj
{{object 5 0}} <<
{{streamlen}}
>>
stream
BT
20 100 Td
/F1 16 Tf
( ) Tj
ET
endstream
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF
This diff was suppressed by a .gitattributes entry.
...@@ -254,21 +254,27 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() { ...@@ -254,21 +254,27 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
void PDFiumPage::GetTextRunInfo( void PDFiumPage::GetTextRunInfo(
int start_char_index, int start_char_index,
PP_PrivateAccessibilityTextRunInfo* text_run_info) { PP_PrivateAccessibilityTextRunInfo* text_run_info) {
if (start_char_index < 0) { FPDF_PAGE page = GetPage();
FPDF_TEXTPAGE text_page = GetTextPage();
int chars_count = FPDFText_CountChars(text_page);
// Check to make sure |start_char_index| is within bounds.
if (start_char_index < 0 || start_char_index >= chars_count) {
text_run_info->len = 0; text_run_info->len = 0;
text_run_info->font_size = 0; text_run_info->font_size = 0;
text_run_info->bounds = pp::FloatRect(); text_run_info->bounds = pp::FloatRect();
text_run_info->direction = PP_PRIVATEDIRECTION_NONE; text_run_info->direction = PP_PRIVATEDIRECTION_NONE;
return; return;
} }
FPDF_PAGE page = GetPage();
FPDF_TEXTPAGE text_page = GetTextPage();
int chars_count = FPDFText_CountChars(text_page);
int actual_start_char_index = GetFirstNonUnicodeWhiteSpaceCharIndex( int actual_start_char_index = GetFirstNonUnicodeWhiteSpaceCharIndex(
text_page, start_char_index, chars_count); text_page, start_char_index, chars_count);
// Check to see if GetFirstNonUnicodeWhiteSpaceCharIndex() iterated through
// all the characters.
if (actual_start_char_index >= chars_count) { if (actual_start_char_index >= chars_count) {
text_run_info->len = 0; // If so, |text_run_info->len| needs to take the number of characters
// iterated into account.
DCHECK_GT(actual_start_char_index, start_char_index);
text_run_info->len = chars_count - start_char_index;
text_run_info->font_size = 0; text_run_info->font_size = 0;
text_run_info->bounds = pp::FloatRect(); text_run_info->bounds = pp::FloatRect();
text_run_info->direction = PP_PRIVATEDIRECTION_NONE; text_run_info->direction = PP_PRIVATEDIRECTION_NONE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment