Commit 7e49da29 authored by Pratish Kumar's avatar Pratish Kumar Committed by Commit Bot

Populate Images in PDFiumPage

This CL introduces a new method PDFiumPage::CalculateImage() which is
responsible for populating images vector in PDFiumPage.

Added a unit test TestCalculateImages() as well as pdf test file
containing three images to verify the image data.

Bug: 981448
Change-Id: Ifbc8426cb47a3a7a76b2bca309c7a68e1792b3fe
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1738690Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarKevin Babbitt <kbabbitt@microsoft.com>
Commit-Queue: Pratish Kumar <prkum@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#689109}
parent 75b4ff79
......@@ -590,6 +590,31 @@ void PDFiumPage::CalculateLinks() {
FPDFLink_CloseWebLinks(links);
}
void PDFiumPage::CalculateImages() {
if (calculated_images_)
return;
calculated_images_ = true;
FPDF_PAGE page = GetPage();
int page_object_count = FPDFPage_CountObjects(page);
for (int i = 0; i < page_object_count; ++i) {
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE)
continue;
float left;
float top;
float right;
float bottom;
FPDF_BOOL ret =
FPDFPageObj_GetBounds(page_object, &left, &bottom, &right, &top);
DCHECK(ret);
Image image;
image.bounding_rect = PageToScreen(pp::Point(), 1.0, left, top, right,
bottom, PageOrientation::kOriginal);
images_.push_back(image);
}
}
bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
int* start_index,
uint32_t* char_len) {
......@@ -726,6 +751,12 @@ PDFiumPage::Link::Link(const Link& that) = default;
PDFiumPage::Link::~Link() = default;
PDFiumPage::Image::Image() = default;
PDFiumPage::Image::Image(const Image& that) = default;
PDFiumPage::Image::~Image() = default;
int ToPDFiumRotation(PageOrientation orientation) {
// Could static_cast<int>(orientation), but using an exhaustive switch will
// trigger an error if we ever change the definition of PageOrientation.
......
......@@ -136,6 +136,9 @@ class PDFiumPage {
private:
friend class PDFiumPageLinkTest;
friend class PDFiumTestBase;
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, TestCalculateImages);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestLinkGeneration);
// Returns a link index if the given character index is over a link, or -1
......@@ -143,6 +146,8 @@ class PDFiumPage {
int GetLink(int char_index, LinkTarget* target);
// Calculate the locations of any links on the page.
void CalculateLinks();
// Calculate the locations of images on the page.
void CalculateImages();
// Returns link type and fills target associated with a link. Returns
// NONSELECTABLE_AREA if link detection failed.
Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
......@@ -178,6 +183,15 @@ class PDFiumPage {
std::string url;
};
// Represents an Image inside the page.
struct Image {
Image();
Image(const Image& other);
~Image();
pp::Rect bounding_rect;
};
PDFiumEngine* engine_;
ScopedFPDFPage page_;
ScopedFPDFTextPage text_page_;
......@@ -186,6 +200,8 @@ class PDFiumPage {
pp::Rect rect_;
bool calculated_links_ = false;
std::vector<Link> links_;
bool calculated_images_ = false;
std::vector<Image> images_;
bool available_;
PDFEngine::PageFeatures page_features_;
......
......@@ -102,4 +102,22 @@ TEST_F(PDFiumPageLinkTest, TestLinkGeneration) {
}
}
} // namespace chrome_pdf
\ No newline at end of file
using PDFiumPageImageTest = PDFiumTestBase;
TEST_F(PDFiumPageImageTest, TestCalculateImages) {
TestClient client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
ASSERT_EQ(1, engine->GetNumberOfPages());
PDFiumPage* page = GetPDFiumPageForTest(engine.get(), 0);
ASSERT_TRUE(page);
page->CalculateImages();
ASSERT_EQ(3u, page->images_.size());
CompareRect({380, 78, 67, 68}, page->images_[0].bounding_rect);
CompareRect({380, 385, 27, 28}, page->images_[1].bounding_rect);
CompareRect({380, 678, 1, 1}, page->images_[2].bounding_rect);
}
} // namespace chrome_pdf
{{header}}
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
/StructTreeRoot 8 0 R
/Lang (en-US)
/MarkInfo <<
/Marked true
>>
>>
endobj
{{object 2 0}} <<
/Type /Pages
/Count 1
/Kids [3 0 R]
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Contents 4 0 R
/MediaBox [0 0 612 792]
/Group <<
/CS /DeviceRGB
/I true
/S /Transparency
>>
/Resources <<
/ProcSet [/PDF /ImageC /ImageI /ImageB]
/XObject <<
/Tr8 5 0 R
/Im7 6 0 R
>>
/ExtGState <<
/EGS9 7 0 R
>>
>>
/StructParents 0
>>
endobj
{{object 4 0}} <<
{{streamlen}}
>>
stream
0.1 w
/Artifact
BMC
q
0 0 612 792 re
W* n
EMC
/Figure<</MCID 0>>
BDC
Q
q
281 685.3 50 50 re
W* n
q
49.9 0 0 50 281.1 685.4 cm
/Im7 Do
Q
EMC
/Figure<</MCID 1>>
BDC
Q
q
281 485.3 20 20 re
W* n
q
20 0 0 20 281.1 485.4 cm
/Im7 Do
Q
EMC
/Figure<</MCID 2>>
BDC
Q
q
281 285.3 0 0 re
W* n
q
0 0 0 0 281.1 285.4 cm
/Im7 Do
Q
EMC
Q
q
EGS9 gs /Tr8 Do
Q
endstream
endobj
{{object 5 0}} <<
/Type /XObject
/Subtype /Form
/BBox [-140 395 753 395.1]
/Group <<
/CS /DeviceRGB
/K true
/S /Transparency
>>
{{streamlen}}
>>
stream
endstream
endobj
{{object 6 0}} <<
/Type /XObject
/Subtype /Image
/Width 50
/Height 50
/BitsPerComponent 8
/ColorSpace /DeviceRGB
/Filter [/ASCIIHexDecode /FlateDecode]
{{streamlen}}
>>
stream
789cedc13101000000c2a0f54fed6f06a00000000000000078031d4c0001
endstream
endobj
{{object 7 0}} <<
/ca 0.5
/CA 0.5
>>
endobj
{{object 8 0}} <<
/Type /StructTreeRoot
/ParentTree 9 0 R
/K [11 0 R]
/RoleMap <<
/Document /Document
/Standard /P
/Figure /Figure
>>
>>
endobj
{{object 9 0}} <<
/Nums [0 [10 0 R 13 0 R 14 0 R]]
>>
endobj
{{object 10 0}} <<
/Type /StructElem
/S /Figure
/K 0
/P 12 0 R
/Alt <FEFF0049006d00610067006500200031>
/Pg 3 0 R
>>
endobj
{{object 11 0}} <<
/Type /StructElem
/S /Document
/K [12 0 R]
/P 8 0 R
/T (TitleText)
/Pg 3 0 R
>>
endobj
{{object 12 0}} <<
/Type /StructElem
/S /Standard
/K [10 0 R 13 0 R 14 0 R]
/P 11 0 R
/T <feff00730079006d0062006f006c003a0020003100300030006b>
/Pg 3 0 R
>>
endobj
{{object 13 0}} <<
/Type /StructElem
/S /Figure
/K 1
/P 12 0 R
/Alt <FEFF0049006d00610067006500200032>
/Pg 3 0 R
>>
endobj
{{object 14 0}} <<
/Type /StructElem
/S /Figure
/K 2
/P 12 0 R
/Alt <FEFF0049006d00610067006500200033>
/Pg 3 0 R
>>
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF
This diff was suppressed by a .gitattributes entry.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment