Commit 24b11ad4 authored by Daniel Hosseinian's avatar Daniel Hosseinian Committed by Chromium LUCI CQ

Add PDF linearization to DocumentMetadata

Meanwhile, consolidate calls to FPDFAvail_IsLinearized() to a private
helper.

Bug: 93619
Change-Id: I0b1fd52e9067e2c05e66034caf3d3ceb641411de
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2622515
Commit-Queue: Daniel Hosseinian <dhoss@chromium.org>
Reviewed-by: default avatarHui Yingst <nigi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#842440}
parent a1e8e6f4
......@@ -30,8 +30,8 @@ enum class PdfVersion {
// dictionary (see section 14.3.3 "Document Information Dictionary" of the ISO
// 32000-1 standard), as well as other properties about the file.
// TODO(crbug.com/93619): Finish adding information dictionary fields like
// |keywords|, |creation_date|, and |mod_date|. Also add fields like
// |size_bytes|, |is_encrypted|, and |is_linearized|.
// `keywords`, `creation_date`, and `mod_date`. Also add fields like
// `size_bytes` and `is_encrypted`.
struct DocumentMetadata {
DocumentMetadata();
DocumentMetadata(const DocumentMetadata&) = delete;
......@@ -41,6 +41,9 @@ struct DocumentMetadata {
// Version of the document
PdfVersion version = PdfVersion::kUnknown;
// Whether the document is optimized by linearization.
bool linearized = false;
// The document's title.
std::string title;
......
......@@ -674,13 +674,13 @@ void PDFiumEngine::OnPendingRequestComplete() {
if (!fpdf_availability()) {
document_->file_access().m_FileLen = doc_loader_->GetDocumentSize();
document_->CreateFPDFAvailability();
DCHECK(fpdf_availability());
// Currently engine does not deal efficiently with some non-linearized
// files.
// See http://code.google.com/p/chromium/issues/detail?id=59400
// To improve user experience we download entire file for non-linearized
// PDF.
if (FPDFAvail_IsLinearized(fpdf_availability()) != PDF_LINEARIZED) {
if (!IsLinearized()) {
// Wait complete document.
process_when_pending_request_complete_ = false;
document_->ResetFPDFAvailability();
......@@ -2752,9 +2752,8 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes(
pending_pages_.clear();
size_t new_page_count = FPDF_GetPageCount(doc());
bool doc_complete = doc_loader_->IsDocumentComplete();
bool is_linear =
FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED;
const bool doc_complete = doc_loader_->IsDocumentComplete();
const bool is_linear = IsLinearized();
for (size_t i = 0; i < new_page_count; ++i) {
// Get page availability. If |document_loaded_| == true and the page is not
// new, then the page has been constructed already. Get page availability
......@@ -2805,12 +2804,10 @@ std::vector<gfx::Size> PDFiumEngine::LoadPageSizes(
void PDFiumEngine::LoadBody() {
DCHECK(doc());
DCHECK(fpdf_availability());
if (doc_loader_->IsDocumentComplete()) {
LoadForm();
} else if (FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED &&
FPDF_GetPageCount(doc()) == 1) {
// If we have only one page we should load form first, bacause it is may be
} else if (IsLinearized() && FPDF_GetPageCount(doc()) == 1) {
// If we have only one page we should load form first, because it may be an
// XFA document. And after loading form the page count and its contents may
// be changed.
LoadForm();
......@@ -2865,6 +2862,11 @@ void PDFiumEngine::LoadForm() {
}
}
bool PDFiumEngine::IsLinearized() {
DCHECK(fpdf_availability());
return FPDFAvail_IsLinearized(fpdf_availability()) == PDF_LINEARIZED;
}
void PDFiumEngine::CalculateVisiblePages() {
// Early return if the PDF isn't being loaded or if we don't have the document
// info yet. The latter is important because otherwise as the PDF is being
......@@ -3948,8 +3950,10 @@ void PDFiumEngine::LoadDocumentAttachmentInfoList() {
void PDFiumEngine::LoadDocumentMetadata() {
DCHECK(document_loaded_);
// Document information dictionary entries
doc_metadata_.version = GetDocumentVersion();
doc_metadata_.linearized = IsLinearized();
// Document information dictionary entries
doc_metadata_.title = GetTrimmedMetadataByField("Title");
doc_metadata_.author = GetTrimmedMetadataByField("Author");
doc_metadata_.subject = GetTrimmedMetadataByField("Subject");
......
......@@ -302,6 +302,9 @@ class PDFiumEngine : public PDFEngine,
void LoadForm();
// Checks whether the document is optimized by linearization.
bool IsLinearized();
// Calculates which pages should be displayed right now.
void CalculateVisiblePages();
......
......@@ -349,6 +349,7 @@ TEST_F(PDFiumEngineTest, GetDocumentMetadata) {
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version);
EXPECT_FALSE(doc_metadata.linearized);
EXPECT_EQ("Sample PDF Document Info", doc_metadata.title);
EXPECT_EQ("Chromium Authors", doc_metadata.author);
EXPECT_EQ("Testing", doc_metadata.subject);
......@@ -365,6 +366,7 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) {
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
EXPECT_EQ(PdfVersion::k1_7, doc_metadata.version);
EXPECT_FALSE(doc_metadata.linearized);
EXPECT_THAT(doc_metadata.title, IsEmpty());
EXPECT_THAT(doc_metadata.author, IsEmpty());
EXPECT_THAT(doc_metadata.subject, IsEmpty());
......@@ -372,6 +374,14 @@ TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) {
EXPECT_THAT(doc_metadata.producer, IsEmpty());
}
TEST_F(PDFiumEngineTest, GetLinearizedDocumentMetadata) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("linearized.pdf"));
ASSERT_TRUE(engine);
EXPECT_TRUE(engine->GetDocumentMetadata().linearized);
}
TEST_F(PDFiumEngineTest, GetBadPdfVersion) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment