Commit 24b11ad4 authored by Daniel Hosseinian's avatar Daniel Hosseinian Committed by Chromium LUCI CQ

Add PDF linearization to DocumentMetadata

Meanwhile, consolidate calls to FPDFAvail_IsLinearized() to a private
helper.

Bug: 93619
Change-Id: I0b1fd52e9067e2c05e66034caf3d3ceb641411de
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2622515
Commit-Queue: Daniel Hosseinian <dhoss@chromium.org>
Reviewed-by: default avatarHui Yingst <nigi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#842440}
parent a1e8e6f4
...@@ -30,8 +30,8 @@ enum class PdfVersion { ...@@ -30,8 +30,8 @@ enum class PdfVersion {
// dictionary (see section 14.3.3 "Document Information Dictionary" of the ISO // dictionary (see section 14.3.3 "Document Information Dictionary" of the ISO
// 32000-1 standard), as well as other properties about the file. // 32000-1 standard), as well as other properties about the file.
// TODO(crbug.com/93619): Finish adding information dictionary fields like // TODO(crbug.com/93619): Finish adding information dictionary fields like
// |keywords|, |creation_date|, and |mod_date|. Also add fields like // `keywords`, `creation_date`, and `mod_date`. Also add fields like
// |size_bytes|, |is_encrypted|, and |is_linearized|. // `size_bytes` and `is_encrypted`.
struct DocumentMetadata { struct DocumentMetadata {
DocumentMetadata(); DocumentMetadata();
DocumentMetadata(const DocumentMetadata&) = delete; DocumentMetadata(const DocumentMetadata&) = delete;
...@@ -41,6 +41,9 @@ struct DocumentMetadata { ...@@ -41,6 +41,9 @@ struct DocumentMetadata {
// Version of the document // Version of the document
PdfVersion version = PdfVersion::kUnknown; PdfVersion version = PdfVersion::kUnknown;
// Whether the document is optimized by linearization.
bool linearized = false;
// The document's title. // The document's title.
std::string title; std::string title;
......
...@@ -674,13 +674,13 @@ void PDFiumEngine::OnPendingRequestComplete() { ...@@ -674,13 +674,13 @@ void PDFiumEngine::OnPendingRequestComplete() {
if (!fpdf_availability()) { if (!fpdf_availability()) {
document_->file_access().m_FileLen = doc_loader_->GetDocumentSize(); document_->file_access().m_FileLen = doc_loader_->GetDocumentSize();
document_->CreateFPDFAvailability(); document_->CreateFPDFAvailability();
DCHECK(fpdf_availability());
// Currently engine does not deal efficiently with some non-linearized // Currently engine does not deal efficiently with some non-linearized
// files. // files.
// See http://code.google.com/p/chromium/issues/detail?id=59400 // See http://code.google.com/p/chromium/issues/detail?id=59400
// To improve user experience we download entire file for non-linearized // To improve user experience we download entire file for non-linearized
// PDF. // PDF.
if (FPDFAvail_IsLinearized(fpdf_availability()) != PDF_LINEARIZED) { if (!IsLinearized()) {
// Wait complete document. // Wait complete document.
process_when_pending_request_complete_ = false; process_when_pending_request_complete_ = false;
document_->ResetFPDFAvailability(); document_->ResetFPDFAvailability();
...@@ -2752,9 +2752,8 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes( ...@@ -2752,9 +2752,8 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes(
pending_pages_.clear(); pending_pages_.clear();
size_t new_page_count = FPDF_GetPageCount(doc()); size_t new_page_count = FPDF_GetPageCount(doc());
bool doc_complete = doc_loader_->IsDocumentComplete(); const bool doc_complete = doc_loader_->IsDocumentComplete();
bool is_linear = const bool is_linear = IsLinearized();
FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED;
for (size_t i = 0; i < new_page_count; ++i) { for (size_t i = 0; i < new_page_count; ++i) {
// Get page availability. If |document_loaded_| == true and the page is not // Get page availability. If |document_loaded_| == true and the page is not
// new, then the page has been constructed already. Get page availability // new, then the page has been constructed already. Get page availability
...@@ -2805,12 +2804,10 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes( ...@@ -2805,12 +2804,10 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes(
void PDFiumEngine::LoadBody() { void PDFiumEngine::LoadBody() {
DCHECK(doc()); DCHECK(doc());
DCHECK(fpdf_availability());
if (doc_loader_->IsDocumentComplete()) { if (doc_loader_->IsDocumentComplete()) {
LoadForm(); LoadForm();
} else if (FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED && } else if (IsLinearized() && FPDF_GetPageCount(doc()) == 1) {
FPDF_GetPageCount(doc()) == 1) { // If we have only one page we should load form first, because it may be an
// If we have only one page we should load form first, bacause it is may be
// XFA document. And after loading form the page count and its contents may // XFA document. And after loading form the page count and its contents may
// be changed. // be changed.
LoadForm(); LoadForm();
...@@ -2865,6 +2862,11 @@ void PDFiumEngine::LoadForm() { ...@@ -2865,6 +2862,11 @@ void PDFiumEngine::LoadForm() {
} }
} }
bool PDFiumEngine::IsLinearized() {
DCHECK(fpdf_availability());
return FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED;
}
void PDFiumEngine::CalculateVisiblePages() { void PDFiumEngine::CalculateVisiblePages() {
// Early return if the PDF isn't being loaded or if we don't have the document // Early return if the PDF isn't being loaded or if we don't have the document
// info yet. The latter is important because otherwise as the PDF is being // info yet. The latter is important because otherwise as the PDF is being
...@@ -3948,8 +3950,10 @@ void PDFiumEngine::LoadDocumentAttachmentInfoList() { ...@@ -3948,8 +3950,10 @@ void PDFiumEngine::LoadDocumentAttachmentInfoList() {
void PDFiumEngine::LoadDocumentMetadata() { void PDFiumEngine::LoadDocumentMetadata() {
DCHECK(document_loaded_); DCHECK(document_loaded_);
// Document information dictionary entries
doc_metadata_.version = GetDocumentVersion(); doc_metadata_.version = GetDocumentVersion();
doc_metadata_.linearized = IsLinearized();
// Document information dictionary entries
doc_metadata_.title = GetTrimmedMetadataByField("Title"); doc_metadata_.title = GetTrimmedMetadataByField("Title");
doc_metadata_.author = GetTrimmedMetadataByField("Author"); doc_metadata_.author = GetTrimmedMetadataByField("Author");
doc_metadata_.subject = GetTrimmedMetadataByField("Subject"); doc_metadata_.subject = GetTrimmedMetadataByField("Subject");
......
...@@ -302,6 +302,9 @@ class PDFiumEngine : public PDFEngine, ...@@ -302,6 +302,9 @@ class PDFiumEngine : public PDFEngine,
void LoadForm(); void LoadForm();
// Checks whether the document is optimized by linearization.
bool IsLinearized();
// Calculates which pages should be displayed right now. // Calculates which pages should be displayed right now.
void CalculateVisiblePages(); void CalculateVisiblePages();
......
...@@ -349,6 +349,7 @@ TEST_F(PDFiumEngineTest, GetDocumentMetadata) { ...@@ -349,6 +349,7 @@ TEST_F(PDFiumEngineTest, GetDocumentMetadata) {
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata(); const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version); EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version);
EXPECT_FALSE(doc_metadata.linearized);
EXPECT_EQ("Sample PDF Document Info", doc_metadata.title); EXPECT_EQ("Sample PDF Document Info", doc_metadata.title);
EXPECT_EQ("Chromium Authors", doc_metadata.author); EXPECT_EQ("Chromium Authors", doc_metadata.author);
EXPECT_EQ("Testing", doc_metadata.subject); EXPECT_EQ("Testing", doc_metadata.subject);
...@@ -365,6 +366,7 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) { ...@@ -365,6 +366,7 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) {
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata(); const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version); EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version);
EXPECT_FALSE(doc_metadata.linearized);
EXPECT_THAT(doc_metadata.title, IsEmpty()); EXPECT_THAT(doc_metadata.title, IsEmpty());
EXPECT_THAT(doc_metadata.author, IsEmpty()); EXPECT_THAT(doc_metadata.author, IsEmpty());
EXPECT_THAT(doc_metadata.subject, IsEmpty()); EXPECT_THAT(doc_metadata.subject, IsEmpty());
...@@ -372,6 +374,14 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) { ...@@ -372,6 +374,14 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) {
EXPECT_THAT(doc_metadata.producer, IsEmpty()); EXPECT_THAT(doc_metadata.producer, IsEmpty());
} }
TEST_F(PDFiumEngineTest, GetLinearizedDocumentMetadata) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("linearized.pdf"));
ASSERT_TRUE(engine);
EXPECT_TRUE(engine->GetDocumentMetadata().linearized);
}
TEST_F(PDFiumEngineTest, GetBadPdfVersion) { TEST_F(PDFiumEngineTest, GetBadPdfVersion) {
NiceMock<MockTestClient> client; NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine = std::unique_ptr<PDFiumEngine> engine =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment