Commit 09806a06 authored by Mohit Bansal's avatar Mohit Bansal Committed by Commit Bot

Add tests for enhanced PDF accessibility support

This CL adds a few tests to validate text extraction and accessibility
tree for the newly enhanced accessibility support for PDFs. The tests
added make use of an already available test file as well as add another
test file.

Bug: 981448
Change-Id: I821751efc9cf2c586c41ecdf29a071d95a52dd70
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1817857
Commit-Queue: Mohit Bansal <mohitb@microsoft.com>
Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarKevin Babbitt <kbabbitt@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#702572}
parent 18945ca4
......@@ -2404,6 +2404,21 @@ IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest, WebLinks) {
RunTextExtractionTest(FILE_PATH_LITERAL("weblinks.pdf"));
}
// Test data of inline text boxes for PDF with multi-line and various font-sized
// text.
IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest,
ParagraphsAndHeadingUntagged) {
RunTextExtractionTest(
FILE_PATH_LITERAL("paragraphs-and-heading-untagged.pdf"));
}
// Test data of inline text boxes for PDF with text, weblinks, images and
// annotation links.
IN_PROC_BROWSER_TEST_F(PDFExtensionAccessibilityTextExtractionTest,
LinksImagesAndText) {
RunTextExtractionTest(FILE_PATH_LITERAL("text-image-link.pdf"));
}
class PDFExtensionAccessibilityTreeDumpTest
: public PDFExtensionTest,
public ::testing::WithParamInterface<size_t> {
......@@ -2606,3 +2621,8 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, WebLinks) {
IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest, Images) {
RunPDFTest(FILE_PATH_LITERAL("image_alt_text.pdf"));
}
IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest,
LinksImagesAndText) {
RunPDFTest(FILE_PATH_LITERAL("text-image-link.pdf"));
}
Heading
This is a small pdf file:
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s.
It has survived not only five centuries, but also the leap into electronic typesetting,
remaining essentially unchanged. It was popularised in the 1960s with the release of
Letraset sheets containing Lorem Ipsum passages, and more recently with desktop
publishing software like Aldus PageMaker including versions of Lorem Ipsum.
Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots
in a piece of classical Latin literature from 45 BC, making it over 2000 years old.
\ No newline at end of file
[embedded component]
++[document frame]
++++[landmark] name='Page 1'
++++++[heading]
++++++++[text] name='Link Annotations - Page 1<newline>'
++++++[heading]
++++++++[link] name='Link with destination to second page<newline>'
++++++++++[text] name='Link with destination to second page<newline>'
++++++[paragraph]
++++++++[link] name='PDF Reference, Version 1.7,'
++++++++++[text] name='PDF Reference, Version 1.7,'
++++++++[text] name=' Section 8.4.5 defines Annotations<newline>3. Hello World<newline>'
++++++++[link] name='https://pdfium.googlesource.com/pdfium'
++++++++++[text] name='https://pdfium.googlesource.com/pdfium'
++++++++[text] name=' is link in plain text, not link annotation. These are referred to<newline>as '
++++++++[link] name='WebLinks in PDFium'
++++++++++[text] name='WebLinks in PDFium'
++++++++[text] name='.'
++++++++[link]
++++++++[image] name='Image 1'
++++++++[image] name='Unlabeled image'
++++++++[image] name='Unlabeled image'
++++[landmark] name='Page 2'
++++++[paragraph]
++++++++[text] name='Second Page'
\ No newline at end of file
AXGroup AXDescription='Page 1 Page 2'
++AXGroup
++++AXGroup AXDescription='Page 1'
++++++AXHeading AXValue='2'
++++++++AXStaticText AXValue='Link Annotations - Page 1<newline>'
++++++AXHeading AXValue='2'
++++++++AXLink AXDescription='Link with destination to second page<newline>'
++++++++++AXStaticText AXValue='Link with destination to second page<newline>'
++++++AXGroup
++++++++AXLink AXDescription='PDF Reference, Version 1.7,'
++++++++++AXStaticText AXValue='PDF Reference, Version 1.7,'
++++++++AXStaticText AXValue=' Section 8.4.5 defines Annotations<newline>3. Hello World<newline>'
++++++++AXLink AXDescription='https://pdfium.googlesource.com/pdfium'
++++++++++AXStaticText AXValue='https://pdfium.googlesource.com/pdfium'
++++++++AXStaticText AXValue=' is link in plain text, not link annotation. These are referred to<newline>as '
++++++++AXLink AXDescription='WebLinks in PDFium'
++++++++++AXStaticText AXValue='WebLinks in PDFium'
++++++++AXStaticText AXValue='.'
++++++++AXLink
++++++++AXImage AXDescription='Image 1'
++++++++AXImage AXDescription='Unlabeled image'
++++++++AXImage AXDescription='Unlabeled image'
++++AXGroup AXDescription='Page 2'
++++++AXGroup
++++++++AXStaticText AXValue='Second Page'
\ No newline at end of file
group
++document
++++region Name='Page 1'
++++++heading
++++++heading
++++++++link Name='Link with destination to second page<newline>'
++++++group
++++++++link Name='PDF Reference, Version 1.7,'
++++++++description Name=' Section 8.4.5 defines Annotations<newline>3. Hello World<newline>'
++++++++link Name='https://pdfium.googlesource.com/pdfium'
++++++++description Name=' is link in plain text, not link annotation. These are referred to<newline>as '
++++++++link Name='WebLinks in PDFium'
++++++++description Name='.'
++++++++link
++++++++img Name='Image 1'
++++++++img Name='Unlabeled graphic'
++++++++img Name='Unlabeled graphic'
++++region Name='Page 2'
++++++group
++++++++description Name='Second Page'
\ No newline at end of file
ROLE_SYSTEM_GROUPING FOCUSABLE
++ROLE_SYSTEM_DOCUMENT READONLY FOCUSABLE
++++IA2_ROLE_LANDMARK name='Page 1' READONLY
++++++IA2_ROLE_HEADING READONLY
++++++++ROLE_SYSTEM_STATICTEXT name='Link Annotations - Page 1<newline>' READONLY
++++++IA2_ROLE_HEADING READONLY
++++++++ROLE_SYSTEM_LINK name='Link with destination to second page<newline>' READONLY
++++++++++ROLE_SYSTEM_STATICTEXT name='Link with destination to second page<newline>' READONLY
++++++IA2_ROLE_PARAGRAPH READONLY
++++++++ROLE_SYSTEM_LINK name='PDF Reference, Version 1.7,' READONLY
++++++++++ROLE_SYSTEM_STATICTEXT name='PDF Reference, Version 1.7,' READONLY
++++++++ROLE_SYSTEM_STATICTEXT name=' Section 8.4.5 defines Annotations<newline>3. Hello World<newline>' READONLY
++++++++ROLE_SYSTEM_LINK name='https://pdfium.googlesource.com/pdfium' READONLY
++++++++++ROLE_SYSTEM_STATICTEXT name='https://pdfium.googlesource.com/pdfium' READONLY
++++++++ROLE_SYSTEM_STATICTEXT name=' is link in plain text, not link annotation. These are referred to<newline>as ' READONLY
++++++++ROLE_SYSTEM_LINK name='WebLinks in PDFium' READONLY
++++++++++ROLE_SYSTEM_STATICTEXT name='WebLinks in PDFium' READONLY
++++++++ROLE_SYSTEM_STATICTEXT name='.' READONLY
++++++++ROLE_SYSTEM_LINK name='' READONLY
++++++++ROLE_SYSTEM_GRAPHIC name='Image 1' READONLY
++++++++ROLE_SYSTEM_GRAPHIC name='Unlabeled graphic' READONLY
++++++++ROLE_SYSTEM_GRAPHIC name='Unlabeled graphic' READONLY
++++IA2_ROLE_LANDMARK name='Page 2' READONLY
++++++IA2_ROLE_PARAGRAPH READONLY
++++++++ROLE_SYSTEM_STATICTEXT name='Second Page' READONLY
\ No newline at end of file
Link Annotations - Page 1
Link with destination to second page
PDF Reference, Version 1.7,
Section 8.4.5 defines Annotations
3. Hello World
https://pdfium.googlesource.com/pdfium
is link in plain text, not link annotation. These are referred to
as
WebLinks in PDFium
.
Second Page
\ No newline at end of file
{{header}}
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
/StructTreeRoot 8 0 R
/Lang (en-US)
/MarkInfo <<
/Marked true
>>
>>
endobj
{{object 2 0}} <<
/Type /Pages
/Count 2
/Kids [3 0 R 4 0 R]
/MediaBox [0 0 612 792]
/CropBox [0 0 612 792]
/Resources <<
/Font <<
/F1 13 0 R
/F2 14 0 R
>>
>>
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Contents 5 0 R
/Annots [15 0 R 16 0 R 17 0 R 18 0 R]
/Group <<
/CS /DeviceRGB
/I true
/S /Transparency
>>
/Resources <<
/ProcSet [/PDF /ImageC /ImageI /ImageB]
/XObject <<
/Tr8 19 0 R
/Im7 20 0 R
>>
/ExtGState <<
/EGS9 7 0 R
>>
>>
/StructParents 0
>>
endobj
{{object 4 0}} <<
/Type /Page
/Parent 2 0 R
/Contents 6 0 R
>>
endobj
{{object 5 0}} <<
{{streamlen}}
>>
stream
BT
70 700 Td
/F1 18 Tf
(Link Annotations - Page 1) Tj
10 -85 Td
/F2 14 Tf
(Link with destination to second page) Tj
-12 -84 Td
/F2 10 Tf
(PDF Reference, Version 1.7, Section 8.4.5 defines Annotations) Tj
2 -53 Td
(3. Hello World) Tj
0 -18 Td
(https://pdfium.googlesource.com/pdfium is link in plain text, not link annotation. These are referred to) Tj
0 -17 Td
(as WebLinks in PDFium.)Tj
ET
0.1 w
/Artifact
BMC
q
0 0 612 792 re
W* n
EMC
/Figure<</MCID 0>>
BDC
Q
q
281 685.3 50 50 re
W* n
q
49.9 0 0 50 281.1 685.4 cm
/Im7 Do
Q
EMC
/Figure<</MCID 1>>
BDC
Q
q
281 485.3 20 20 re
W* n
q
20 0 0 20 281.1 485.4 cm
/Im7 Do
Q
EMC
/Figure<</MCID 2>>
BDC
Q
q
281 285.3 0 0 re
W* n
q
0 0 0 0 281.1 285.4 cm
/Im7 Do
Q
EMC
Q
q
EGS9 gs /Tr8 Do
Q
endstream
endobj
{{object 6 0}} <<
{{streamlen}}
>>
stream
BT
70 700 Td
/F1 18 Tf
(Second Page) Tj
ET
endstream
endobj
{{object 7 0}} <<
/ca 0.5
/CA 0.5
>>
endobj
{{object 8 0}} <<
/Type /StructTreeRoot
/ParentTree <<
/Nums [0 [10 0 R 11 0 R]]
>>
/K [9 0 R]
/RoleMap <<
/Document /Document
/Standard /P
/Figure /Figure
>>
>>
endobj
{{object 9 0}} <<
/Type /StructElem
/S /Document
/K [12 0 R]
/P 8 0 R
/T (TitleText)
/Pg 3 0 R
>>
endobj
{{object 10 0}} <<
/Type /StructElem
/S /Figure
/K 0
/P 12 0 R
/Alt <feff0049006d00610067006500200031>
/Pg 3 0 R
>>
endobj
{{object 11 0}} <<
/Type /StructElem
/S /Figure
/K 1
/P 12 0 R
/Pg 3 0 R
>>
endobj
{{object 12 0}} <<
/Type /StructElem
/S /Standard
/K [10 0 R 11 0 R]
/P 9 0 R
/T <feff00730079006d0062006f006c003a0020003100300030006b>
/Pg 3 0 R
>>
endobj
{{object 13 0}} <<
/Type /Font
/Subtype /Type1
/BaseFont /Times-Roman
>>
endobj
{{object 14 0}} <<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
>>
endobj
{{object 15 0}} <<
/Type /Annot
/Subtype /Link
/BS <<
/W 0
>>
/Rect [80 613 542 633]
/Dest [4 0 R /XYZ 200 725 0]
/F 4
>>
endobj
{{object 16 0}} <<
/Type /Annot
/Subtype /Link
/BS <<
/W 0
>>
/Rect [196 544 66 529]
/A <<
/Type /Action
/S /URI
/URI (https://www.adobe.com)
>>
/F 4
>>
endobj
{{object 17 0}} <<
/Type /Annot
/Subtype /Link
/BS <<
/W 0
>>
/Rect [83 430 178 453]
/QuadPoints [83 453 178 453 83 440 178 440 83 440 178 440 83 430 178 430]
/A <<
/Type /Action
/S /URI
/URI (https://cs.chromium.org)
>>
/F 4
>>
endobj
{{object 18 0}} <<
/Type /Annot
/Subtype /Link
/BS <<
/W 0
>>
/Rect [40 80 60 100]
/A <<
/Type /Action
/S /URI
/URI (https://www.google.com)
>>
/F 4
>>
endobj
{{object 19 0}} <<
/Type /XObject
/Subtype /Form
/BBox [-140 395 753 395.1]
/Group <<
/CS /DeviceRGB
/K true
/S /Transparency
>>
{{streamlen}}
>>
stream
endstream
endobj
{{object 20 0}} <<
/Type /XObject
/Subtype /Image
/Width 50
/Height 50
/BitsPerComponent 8
/ColorSpace /DeviceRGB
/Filter [/ASCIIHexDecode /FlateDecode]
{{streamlen}}
>>
stream
789cedc13101000000c2a0f54fed6f06a00000000000000078031d4c0001
endstream
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF
This diff was suppressed by a .gitattributes entry.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment