Commit 774ac27c authored by Hui Yingst's avatar Hui Yingst Committed by Commit Bot

Add a method to get the content of a PDF attachment.

1. Add method PDFiumEngine::GetAttachmentData(). Giving an attachment
   index number, the method returns the content of that attachment in
   a buffer.

2. Add unit tests for GetAttachmentData() in an existing
   PDFiumEngineTest, and rename the test "GetDocumentAttachments" since
   it not only checks the meta data of attachments, but also checks
   the contents of attachments now.

Bug: 177188
Change-Id: I418173abd77033d8edba33de61bac0d30ae51120
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2229765Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Commit-Queue: Hui Yingst <nigi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#798781}
parent 5e960b15
...@@ -385,6 +385,14 @@ class PDFEngine { ...@@ -385,6 +385,14 @@ class PDFEngine {
// Gets the list of DocumentAttachmentInfo from the document. // Gets the list of DocumentAttachmentInfo from the document.
virtual const std::vector<DocumentAttachmentInfo>& virtual const std::vector<DocumentAttachmentInfo>&
GetDocumentAttachmentInfoList() const = 0; GetDocumentAttachmentInfoList() const = 0;
// Gets the content of an attachment by the attachment's |index|. |index|
// must be in the range of [0, attachment_count-1), where |attachment_count|
// is the number of attachments embedded in the document.
// The caller of this method is responsible for checking whether the
// attachment is readable, attachment size is not 0 byte, and the return
// value's size matches the corresponding DocumentAttachmentInfo's
// |size_bytes|.
virtual std::vector<uint8_t> GetAttachmentData(size_t index) = 0;
// Gets metadata about the document. // Gets metadata about the document.
virtual const DocumentMetadata& GetDocumentMetadata() const = 0; virtual const DocumentMetadata& GetDocumentMetadata() const = 0;
// Gets the number of pages in the document. // Gets the number of pages in the document.
......
...@@ -789,7 +789,7 @@ void PDFiumEngine::FinishLoadingDocument() { ...@@ -789,7 +789,7 @@ void PDFiumEngine::FinishLoadingDocument() {
if (doc()) { if (doc()) {
DocumentFeatures document_features; DocumentFeatures document_features;
document_features.page_count = pages_.size(); document_features.page_count = pages_.size();
document_features.has_attachments = (FPDFDoc_GetAttachmentCount(doc()) > 0); document_features.has_attachments = !doc_attachment_info_list_.empty();
document_features.is_tagged = FPDFCatalog_IsTagged(doc()); document_features.is_tagged = FPDFCatalog_IsTagged(doc());
document_features.form_type = document_features.form_type =
static_cast<FormType>(FPDF_GetFormType(doc())); static_cast<FormType>(FPDF_GetFormType(doc()));
...@@ -2191,6 +2191,25 @@ PDFiumEngine::GetDocumentAttachmentInfoList() const { ...@@ -2191,6 +2191,25 @@ PDFiumEngine::GetDocumentAttachmentInfoList() const {
return doc_attachment_info_list_; return doc_attachment_info_list_;
} }
std::vector<uint8_t> PDFiumEngine::GetAttachmentData(size_t index) {
DCHECK_LT(index, doc_attachment_info_list_.size());
DCHECK(doc_attachment_info_list_[index].is_readable);
unsigned long length_bytes = doc_attachment_info_list_[index].size_bytes;
DCHECK_NE(length_bytes, 0u);
FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc(), index);
std::vector<uint8_t> content_buf(length_bytes);
unsigned long data_size_bytes;
bool is_attachment_readable = FPDFAttachment_GetFile(
attachment, content_buf.data(), length_bytes, &data_size_bytes);
if (!is_attachment_readable || length_bytes != data_size_bytes) {
NOTREACHED();
return std::vector<uint8_t>();
}
return content_buf;
}
const DocumentMetadata& PDFiumEngine::GetDocumentMetadata() const { const DocumentMetadata& PDFiumEngine::GetDocumentMetadata() const {
DCHECK(document_loaded_); DCHECK(document_loaded_);
return doc_metadata_; return doc_metadata_;
......
...@@ -124,6 +124,7 @@ class PDFiumEngine : public PDFEngine, ...@@ -124,6 +124,7 @@ class PDFiumEngine : public PDFEngine,
void SelectAll() override; void SelectAll() override;
const std::vector<DocumentAttachmentInfo>& GetDocumentAttachmentInfoList() const std::vector<DocumentAttachmentInfo>& GetDocumentAttachmentInfoList()
const override; const override;
std::vector<uint8_t> GetAttachmentData(size_t index) override;
const DocumentMetadata& GetDocumentMetadata() const override; const DocumentMetadata& GetDocumentMetadata() const override;
int GetNumberOfPages() override; int GetNumberOfPages() override;
pp::VarArray GetBookmarks() override; pp::VarArray GetBookmarks() override;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "pdf/pdfium/pdfium_engine.h" #include "pdf/pdfium/pdfium_engine.h"
#include "base/hash/md5.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h" #include "base/test/scoped_feature_list.h"
#include "base/test/task_environment.h" #include "base/test/task_environment.h"
...@@ -215,7 +216,7 @@ TEST_F(PDFiumEngineTest, ApplyDocumentLayoutAvoidsInfiniteLoop) { ...@@ -215,7 +216,7 @@ TEST_F(PDFiumEngineTest, ApplyDocumentLayoutAvoidsInfiniteLoop) {
EXPECT_EQ(gfx::Size(343, 1463), engine->ApplyDocumentLayout(options)); EXPECT_EQ(gfx::Size(343, 1463), engine->ApplyDocumentLayout(options));
} }
TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) { TEST_F(PDFiumEngineTest, GetDocumentAttachments) {
NiceMock<MockTestClient> client; NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine = std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("embedded_attachments.pdf")); InitializeEngine(&client, FILE_PATH_LITERAL("embedded_attachments.pdf"));
...@@ -233,9 +234,15 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) { ...@@ -233,9 +234,15 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) {
EXPECT_EQ("D:20170712214438-07'00'", EXPECT_EQ("D:20170712214438-07'00'",
base::UTF16ToUTF8(attachment.creation_date)); base::UTF16ToUTF8(attachment.creation_date));
EXPECT_EQ("D:20160115091400", base::UTF16ToUTF8(attachment.modified_date)); EXPECT_EQ("D:20160115091400", base::UTF16ToUTF8(attachment.modified_date));
std::vector<uint8_t> content = engine->GetAttachmentData(0);
ASSERT_EQ(attachment.size_bytes, content.size());
std::string content_str(content.begin(), content.end());
EXPECT_EQ("test", content_str);
} }
{ {
static constexpr char kCheckSum[] = "72afcddedf554dda63c0c88e06f1ce18";
const DocumentAttachmentInfo& attachment = attachments[1]; const DocumentAttachmentInfo& attachment = attachments[1];
EXPECT_EQ("attached.pdf", base::UTF16ToUTF8(attachment.name)); EXPECT_EQ("attached.pdf", base::UTF16ToUTF8(attachment.name));
EXPECT_TRUE(attachment.is_readable); EXPECT_TRUE(attachment.is_readable);
...@@ -243,6 +250,14 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) { ...@@ -243,6 +250,14 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) {
EXPECT_EQ("D:20170712214443-07'00'", EXPECT_EQ("D:20170712214443-07'00'",
base::UTF16ToUTF8(attachment.creation_date)); base::UTF16ToUTF8(attachment.creation_date));
EXPECT_EQ("D:20170712214410", base::UTF16ToUTF8(attachment.modified_date)); EXPECT_EQ("D:20170712214410", base::UTF16ToUTF8(attachment.modified_date));
std::vector<uint8_t> content = engine->GetAttachmentData(1);
ASSERT_EQ(attachment.size_bytes, content.size());
// The whole attachment content is too long to do string comparison.
// Instead, we only verify the checksum value here.
base::MD5Digest hash;
base::MD5Sum(content.data(), content.size(), &hash);
EXPECT_EQ(kCheckSum, base::MD5DigestToBase16(hash));
} }
{ {
...@@ -253,6 +268,11 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) { ...@@ -253,6 +268,11 @@ TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) {
EXPECT_EQ(5u, attachment.size_bytes); EXPECT_EQ(5u, attachment.size_bytes);
EXPECT_THAT(attachment.creation_date, IsEmpty()); EXPECT_THAT(attachment.creation_date, IsEmpty());
EXPECT_THAT(attachment.modified_date, IsEmpty()); EXPECT_THAT(attachment.modified_date, IsEmpty());
std::vector<uint8_t> content = engine->GetAttachmentData(2);
ASSERT_EQ(attachment.size_bytes, content.size());
std::string content_str(content.begin(), content.end());
EXPECT_EQ("test\n", content_str);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment