Commit d0cb5e40 authored by Jay Civelli's avatar Jay Civelli Committed by Commit Bot

Changing the FileAccessor API in zip.h to improve perfs over IPC.

When using zip::Zip() with an IPC based FileAccessor, zipping
directories with large number of files triggers many IPC calls
making the entire operation significantly slower than with direct file
access.
In order to alleviate this performance hit, this patch groups file
reads by modifying the FileAccessor read method so it reads multiple
files at once. zip::Zip() can then group these reads when writing the
ZIP file.
The writing code has been factored out into a new ZipWriter class to
make that code more readable.

Bug: 773310
Change-Id: I8121980bf05d87a174c63164840ec6bf325c7e52
Reviewed-on: https://chromium-review.googlesource.com/719356
Commit-Queue: Jay Civelli <jcivelli@chromium.org>
Reviewed-by: default avatarIlya Sherman <isherman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#509693}
parent d2c5843c
...@@ -13,6 +13,8 @@ if (build_with_chromium) { ...@@ -13,6 +13,8 @@ if (build_with_chromium) {
"zip_internal.h", "zip_internal.h",
"zip_reader.cc", "zip_reader.cc",
"zip_reader.h", "zip_reader.h",
"zip_writer.cc",
"zip_writer.h",
] ]
deps = [ deps = [
"//base", "//base",
......
...@@ -12,81 +12,15 @@ ...@@ -12,81 +12,15 @@
#include "base/files/file.h" #include "base/files/file.h"
#include "base/files/file_enumerator.h" #include "base/files/file_enumerator.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "build/build_config.h" #include "build/build_config.h"
#include "third_party/zlib/google/zip_internal.h" #include "third_party/zlib/google/zip_internal.h"
#include "third_party/zlib/google/zip_reader.h" #include "third_party/zlib/google/zip_reader.h"
#include "third_party/zlib/google/zip_writer.h"
#if defined(USE_SYSTEM_MINIZIP)
#include <minizip/unzip.h>
#include <minizip/zip.h>
#else
#include "third_party/zlib/contrib/minizip/unzip.h"
#include "third_party/zlib/contrib/minizip/zip.h"
#endif
namespace zip { namespace zip {
namespace { namespace {
bool AddFileToZip(zipFile zip_file,
const base::FilePath& src_dir,
FileAccessor* file_accessor) {
base::File file = file_accessor->OpenFileForReading(src_dir);
if (!file.IsValid()) {
DLOG(ERROR) << "Could not open file for path " << src_dir.value();
return false;
}
int num_bytes;
char buf[zip::internal::kZipBufSize];
do {
num_bytes = file.ReadAtCurrentPos(buf, zip::internal::kZipBufSize);
if (num_bytes > 0) {
if (ZIP_OK != zipWriteInFileInZip(zip_file, buf, num_bytes)) {
DLOG(ERROR) << "Could not write data to zip for path "
<< src_dir.value();
return false;
}
}
} while (num_bytes > 0);
return true;
}
bool AddEntryToZip(zipFile zip_file,
const base::FilePath& path,
const base::FilePath& root_path,
FileAccessor* file_accessor) {
base::FilePath relative_path;
bool result = root_path.AppendRelativePath(path, &relative_path);
DCHECK(result);
std::string str_path = relative_path.AsUTF8Unsafe();
#if defined(OS_WIN)
base::ReplaceSubstringsAfterOffset(&str_path, 0u, "\\", "/");
#endif
bool is_directory = file_accessor->DirectoryExists(path);
if (is_directory)
str_path += "/";
if (!zip::internal::ZipOpenNewFileInZip(
zip_file, str_path, file_accessor->GetLastModifiedTime(path)))
return false;
bool success = true;
if (!is_directory) {
success = AddFileToZip(zip_file, path, file_accessor);
}
if (ZIP_OK != zipCloseFileInZip(zip_file)) {
DLOG(ERROR) << "Could not close zip file entry " << str_path;
return false;
}
return success;
}
bool IsHiddenFile(const base::FilePath& file_path) { bool IsHiddenFile(const base::FilePath& file_path) {
return file_path.BaseName().value()[0] == '.'; return file_path.BaseName().value()[0] == '.';
} }
...@@ -101,10 +35,20 @@ bool ExcludeHiddenFilesFilter(const base::FilePath& file_path) { ...@@ -101,10 +35,20 @@ bool ExcludeHiddenFilesFilter(const base::FilePath& file_path) {
class DirectFileAccessor : public FileAccessor { class DirectFileAccessor : public FileAccessor {
public: public:
explicit DirectFileAccessor(base::FilePath src_dir) : src_dir_(src_dir) {}
~DirectFileAccessor() override = default; ~DirectFileAccessor() override = default;
base::File OpenFileForReading(const base::FilePath& file) override { std::vector<base::File> OpenFilesForReading(
return base::File(file, base::File::FLAG_OPEN | base::File::FLAG_READ); const std::vector<base::FilePath>& paths) override {
std::vector<base::File> files;
for (const auto& path : paths) {
base::File file;
if (base::PathExists(path) && !base::DirectoryExists(path)) {
file = base::File(path, base::File::FLAG_OPEN | base::File::FLAG_READ);
}
files.push_back(std::move(file));
}
return files;
} }
bool DirectoryExists(const base::FilePath& file) override { bool DirectoryExists(const base::FilePath& file) override {
...@@ -132,6 +76,11 @@ class DirectFileAccessor : public FileAccessor { ...@@ -132,6 +76,11 @@ class DirectFileAccessor : public FileAccessor {
} }
return file_info.last_modified; return file_info.last_modified;
} }
private:
base::FilePath src_dir_;
DISALLOW_COPY_AND_ASSIGN(DirectFileAccessor);
}; };
} // namespace } // namespace
...@@ -140,42 +89,17 @@ ZipParams::ZipParams(const base::FilePath& src_dir, ...@@ -140,42 +89,17 @@ ZipParams::ZipParams(const base::FilePath& src_dir,
const base::FilePath& dest_file) const base::FilePath& dest_file)
: src_dir_(src_dir), : src_dir_(src_dir),
dest_file_(dest_file), dest_file_(dest_file),
file_accessor_(new DirectFileAccessor()) {} file_accessor_(new DirectFileAccessor(src_dir)) {}
#if defined(OS_POSIX) #if defined(OS_POSIX)
// Does not take ownership of |fd|. // Does not take ownership of |fd|.
ZipParams::ZipParams(const base::FilePath& src_dir, int dest_fd) ZipParams::ZipParams(const base::FilePath& src_dir, int dest_fd)
: src_dir_(src_dir), : src_dir_(src_dir),
dest_fd_(dest_fd), dest_fd_(dest_fd),
file_accessor_(new DirectFileAccessor()) {} file_accessor_(new DirectFileAccessor(src_dir)) {}
#endif #endif
bool Zip(const ZipParams& params) { bool Zip(const ZipParams& params) {
DCHECK(params.file_accessor()->DirectoryExists(params.src_dir()));
zipFile zip_file = nullptr;
#if defined(OS_POSIX)
int dest_fd = params.dest_fd();
if (dest_fd != base::kInvalidPlatformFile) {
DCHECK(params.dest_file().empty());
zip_file = internal::OpenFdForZipping(dest_fd, APPEND_STATUS_CREATE);
if (!zip_file) {
DLOG(ERROR) << "Couldn't create ZIP file for FD " << dest_fd;
return false;
}
}
#endif
if (!zip_file) {
const base::FilePath& dest_file = params.dest_file();
DCHECK(!dest_file.empty());
zip_file = internal::OpenForZipping(dest_file.AsUTF8Unsafe(),
APPEND_STATUS_CREATE);
if (!zip_file) {
DLOG(WARNING) << "Couldn't create ZIP file at path " << dest_file;
return false;
}
}
// Using a pointer to avoid copies of a potentially large array. // Using a pointer to avoid copies of a potentially large array.
const std::vector<base::FilePath>* files_to_add = &params.files_to_zip(); const std::vector<base::FilePath>* files_to_add = &params.files_to_zip();
std::vector<base::FilePath> all_files; std::vector<base::FilePath> all_files;
...@@ -214,23 +138,23 @@ bool Zip(const ZipParams& params) { ...@@ -214,23 +138,23 @@ bool Zip(const ZipParams& params) {
} }
} }
bool success = true; std::unique_ptr<internal::ZipWriter> zip_writer;
for (auto iter = files_to_add->begin(); iter != files_to_add->end(); ++iter) { #if defined(OS_POSIX)
const base::FilePath& path = params.src_dir().Append(*iter); if (params.dest_fd() != base::kInvalidPlatformFile) {
if (!AddEntryToZip(zip_file, path, params.src_dir(), DCHECK(params.dest_file().empty());
params.file_accessor())) { zip_writer = internal::ZipWriter::CreateWithFd(
// TODO(hshi): clean up the partial zip file when error occurs. params.dest_fd(), params.src_dir(), params.file_accessor());
success = false; if (!zip_writer)
break; return false;
}
} }
#endif
if (ZIP_OK != zipClose(zip_file, NULL)) { if (!zip_writer) {
DLOG(ERROR) << "Error closing zip file " << params.dest_file().value(); zip_writer = internal::ZipWriter::Create(
return false; params.dest_file(), params.src_dir(), params.file_accessor());
if (!zip_writer)
return false;
} }
return zip_writer->WriteEntries(*files_to_add);
return success;
} }
bool Unzip(const base::FilePath& src_file, const base::FilePath& dest_dir) { bool Unzip(const base::FilePath& src_file, const base::FilePath& dest_dir) {
......
...@@ -23,6 +23,7 @@ namespace zip { ...@@ -23,6 +23,7 @@ namespace zip {
// Can be passed to the ZipParams for providing custom access to the files, // Can be passed to the ZipParams for providing custom access to the files,
// for example over IPC. // for example over IPC.
// If none is provided, the files are accessed directly. // If none is provided, the files are accessed directly.
// All parameters paths are expected to be absolute.
class FileAccessor { class FileAccessor {
public: public:
virtual ~FileAccessor() = default; virtual ~FileAccessor() = default;
...@@ -34,7 +35,11 @@ class FileAccessor { ...@@ -34,7 +35,11 @@ class FileAccessor {
bool is_directory = false; bool is_directory = false;
}; };
virtual base::File OpenFileForReading(const base::FilePath& path) = 0; // Opens files specified in |paths|.
// Directories should be mapped to invalid files.
virtual std::vector<base::File> OpenFilesForReading(
const std::vector<base::FilePath>& paths) = 0;
virtual bool DirectoryExists(const base::FilePath& path) = 0; virtual bool DirectoryExists(const base::FilePath& path) = 0;
virtual std::vector<DirectoryContentEntry> ListDirectoryContent( virtual std::vector<DirectoryContentEntry> ListDirectoryContent(
const base::FilePath& dir_path) = 0; const base::FilePath& dir_path) = 0;
......
...@@ -86,13 +86,15 @@ class VirtualFileSystem : public zip::FileAccessor { ...@@ -86,13 +86,15 @@ class VirtualFileSystem : public zip::FileAccessor {
~VirtualFileSystem() override = default; ~VirtualFileSystem() override = default;
private: private:
base::File OpenFileForReading(const base::FilePath& file) override { std::vector<base::File> OpenFilesForReading(
auto iter = files_.find(file); const std::vector<base::FilePath>& paths) override {
if (iter == files_.end()) { std::vector<base::File> files;
NOTREACHED(); for (const auto& path : paths) {
return base::File(); auto iter = files_.find(path);
files.push_back(iter == files_.end() ? base::File()
: std::move(iter->second));
} }
return std::move(iter->second); return files;
} }
bool DirectoryExists(const base::FilePath& file) override { bool DirectoryExists(const base::FilePath& file) override {
...@@ -345,7 +347,7 @@ TEST_F(ZipTest, Zip) { ...@@ -345,7 +347,7 @@ TEST_F(ZipTest, Zip) {
ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
base::FilePath zip_file = temp_dir.GetPath().AppendASCII("out.zip"); base::FilePath zip_file = temp_dir.GetPath().AppendASCII("out.zip");
EXPECT_TRUE(zip::Zip(src_dir, zip_file, true)); EXPECT_TRUE(zip::Zip(src_dir, zip_file, /*include_hidden_files=*/true));
TestUnzipFile(zip_file, true); TestUnzipFile(zip_file, true);
} }
...@@ -358,7 +360,7 @@ TEST_F(ZipTest, ZipIgnoreHidden) { ...@@ -358,7 +360,7 @@ TEST_F(ZipTest, ZipIgnoreHidden) {
ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
base::FilePath zip_file = temp_dir.GetPath().AppendASCII("out.zip"); base::FilePath zip_file = temp_dir.GetPath().AppendASCII("out.zip");
EXPECT_TRUE(zip::Zip(src_dir, zip_file, false)); EXPECT_TRUE(zip::Zip(src_dir, zip_file, /*include_hidden_files=*/false));
TestUnzipFile(zip_file, false); TestUnzipFile(zip_file, false);
} }
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/zlib/google/zip_writer.h"
#include "base/files/file.h"
#include "base/strings/string_util.h"
#include "third_party/zlib/google/zip_internal.h"
namespace zip {
namespace internal {
namespace {
// Numbers of pending entries that trigger writting them to the ZIP file.
constexpr size_t kMaxPendingEntriesCount = 50;
bool AddFileContentToZip(zipFile zip_file,
base::File file,
const base::FilePath& file_path) {
int num_bytes;
char buf[zip::internal::kZipBufSize];
do {
num_bytes = file.ReadAtCurrentPos(buf, zip::internal::kZipBufSize);
if (num_bytes > 0) {
if (zipWriteInFileInZip(zip_file, buf, num_bytes) != ZIP_OK) {
DLOG(ERROR) << "Could not write data to zip for path "
<< file_path.value();
return false;
}
}
} while (num_bytes > 0);
return true;
}
bool OpenNewFileEntry(zipFile zip_file,
const base::FilePath& path,
bool is_directory,
base::Time last_modified) {
std::string str_path = path.AsUTF8Unsafe();
#if defined(OS_WIN)
base::ReplaceSubstringsAfterOffset(&str_path, 0u, "\\", "/");
#endif
if (is_directory)
str_path += "/";
return zip::internal::ZipOpenNewFileInZip(zip_file, str_path, last_modified);
}
bool CloseNewFileEntry(zipFile zip_file) {
return zipCloseFileInZip(zip_file) == ZIP_OK;
}
bool AddFileEntryToZip(zipFile zip_file,
const base::FilePath& path,
base::File file) {
base::File::Info file_info;
if (!file.GetInfo(&file_info))
return false;
if (!OpenNewFileEntry(zip_file, path, /*is_directory=*/false,
file_info.last_modified))
return false;
bool success = AddFileContentToZip(zip_file, std::move(file), path);
if (!CloseNewFileEntry(zip_file))
return false;
return success;
}
bool AddDirectoryEntryToZip(zipFile zip_file,
const base::FilePath& path,
base::Time last_modified) {
return OpenNewFileEntry(zip_file, path, /*is_directory=*/true,
last_modified) &&
CloseNewFileEntry(zip_file);
}
} // namespace
#if defined(OS_POSIX)
// static
std::unique_ptr<ZipWriter> ZipWriter::CreateWithFd(
int zip_file_fd,
const base::FilePath& root_dir,
FileAccessor* file_accessor) {
DCHECK(zip_file_fd != base::kInvalidPlatformFile);
zipFile zip_file =
internal::OpenFdForZipping(zip_file_fd, APPEND_STATUS_CREATE);
if (!zip_file) {
DLOG(ERROR) << "Couldn't create ZIP file for FD " << zip_file_fd;
return nullptr;
}
return std::unique_ptr<ZipWriter>(
new ZipWriter(zip_file, root_dir, file_accessor));
}
#endif
// static
std::unique_ptr<ZipWriter> ZipWriter::Create(
const base::FilePath& zip_file_path,
const base::FilePath& root_dir,
FileAccessor* file_accessor) {
DCHECK(!zip_file_path.empty());
zipFile zip_file = internal::OpenForZipping(zip_file_path.AsUTF8Unsafe(),
APPEND_STATUS_CREATE);
if (!zip_file) {
DLOG(ERROR) << "Couldn't create ZIP file at path " << zip_file_path;
return nullptr;
}
return std::unique_ptr<ZipWriter>(
new ZipWriter(zip_file, root_dir, file_accessor));
}
ZipWriter::ZipWriter(zipFile zip_file,
const base::FilePath& root_dir,
FileAccessor* file_accessor)
: zip_file_(zip_file), root_dir_(root_dir), file_accessor_(file_accessor) {}
ZipWriter::~ZipWriter() {
DCHECK(pending_entries_.empty());
}
bool ZipWriter::WriteEntries(const std::vector<base::FilePath>& paths) {
return AddEntries(paths) && Close();
}
bool ZipWriter::AddEntries(const std::vector<base::FilePath>& paths) {
DCHECK(zip_file_);
pending_entries_.insert(pending_entries_.end(), paths.begin(), paths.end());
return FlushEntriesIfNeeded(/*force=*/false);
}
bool ZipWriter::Close() {
bool success = FlushEntriesIfNeeded(/*force=*/true) &&
zipClose(zip_file_, nullptr) == ZIP_OK;
zip_file_ = nullptr;
return success;
}
bool ZipWriter::FlushEntriesIfNeeded(bool force) {
if (pending_entries_.size() < kMaxPendingEntriesCount && !force)
return true;
while (pending_entries_.size() >= kMaxPendingEntriesCount ||
(force && !pending_entries_.empty())) {
size_t entry_count =
std::min(pending_entries_.size(), kMaxPendingEntriesCount);
std::vector<base::FilePath> relative_paths;
std::vector<base::FilePath> absolute_paths;
relative_paths.insert(relative_paths.begin(), pending_entries_.begin(),
pending_entries_.begin() + entry_count);
for (auto iter = pending_entries_.begin();
iter != pending_entries_.begin() + entry_count; ++iter) {
// The FileAccessor requires absolute paths.
absolute_paths.push_back(root_dir_.Append(*iter));
}
pending_entries_.erase(pending_entries_.begin(),
pending_entries_.begin() + entry_count);
// We don't know which paths are files and which ones are directories, and
// we want to avoid making a call to file_accessor_ for each entry. Open the
// files instead, invalid files are returned for directories.
std::vector<base::File> files =
file_accessor_->OpenFilesForReading(absolute_paths);
DCHECK_EQ(files.size(), relative_paths.size());
for (size_t i = 0; i < files.size(); i++) {
const base::FilePath& relative_path = relative_paths[i];
const base::FilePath& absolute_path = absolute_paths[i];
base::File file = std::move(files[i]);
if (file.IsValid()) {
if (!AddFileEntryToZip(zip_file_, relative_path, std::move(file))) {
LOG(ERROR) << "Failed to write file " << relative_path.value()
<< " to ZIP file.";
return false;
}
} else {
// Missing file or directory case.
base::Time last_modified =
file_accessor_->GetLastModifiedTime(absolute_path);
if (last_modified.is_null()) {
LOG(ERROR) << "Failed to write entry " << relative_path.value()
<< " to ZIP file.";
return false;
}
DCHECK(file_accessor_->DirectoryExists(absolute_path));
if (!AddDirectoryEntryToZip(zip_file_, relative_path, last_modified)) {
LOG(ERROR) << "Failed to write directory " << relative_path.value()
<< " to ZIP file.";
return false;
}
}
}
}
return true;
}
} // namespace internal
} // namespace zip
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_WRITER_H_
#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_WRITER_H_
#include <memory>
#include <vector>
#include "base/files/file_path.h"
#include "build/build_config.h"
#include "third_party/zlib/google/zip.h"
#if defined(USE_SYSTEM_MINIZIP)
#include <minizip/unzip.h>
#include <minizip/zip.h>
#else
#include "third_party/zlib/contrib/minizip/unzip.h"
#include "third_party/zlib/contrib/minizip/zip.h"
#endif
namespace zip {
namespace internal {
// A class used to write entries to a ZIP file and buffering the reading of
// files to limit the number of calls to the FileAccessor. This is for
// performance reasons as these calls may be expensive when IPC based).
// This class is so far internal and only used by zip.cc, but could be made
// public if needed.
class ZipWriter {
public:
// Creates a writer that will write a ZIP file to |zip_file_fd|/|zip_file|
// and which entries (specifies with AddEntries) are relative to |root_dir|.
// All file reads are performed using |file_accessor|.
#if defined(OS_POSIX)
static std::unique_ptr<ZipWriter> CreateWithFd(int zip_file_fd,
const base::FilePath& root_dir,
FileAccessor* file_accessor);
#endif
static std::unique_ptr<ZipWriter> Create(const base::FilePath& zip_file,
const base::FilePath& root_dir,
FileAccessor* file_accessor);
~ZipWriter();
// Writes the files at |paths| to the ZIP file and closes this Zip file.
// Note that the the FilePaths must be relative to |root_dir| specified in the
// Create method.
// Returns true if all entries were written successfuly.
bool WriteEntries(const std::vector<base::FilePath>& paths);
private:
ZipWriter(zipFile zip_file,
const base::FilePath& root_dir,
FileAccessor* file_accessor);
// Writes the pending entries to the ZIP file if there are at least
// |kMaxPendingEntriesCount| of them. If |force| is true, all pending entries
// are written regardless of how many there are.
// Returns false if writing an entry fails, true if no entry was written or
// there was no error writing entries.
bool FlushEntriesIfNeeded(bool force);
// Adds the files at |paths| to the ZIP file. These FilePaths must be relative
// to |root_dir| specified in the Create method.
bool AddEntries(const std::vector<base::FilePath>& paths);
// Closes the ZIP file.
// Returns true if successful, false otherwise (typically if an entry failed
// to be written).
bool Close();
// The entries that have been added but not yet written to the ZIP file.
std::vector<base::FilePath> pending_entries_;
// The actual zip file.
zipFile zip_file_;
// Path to the directory entry paths are relative to.
base::FilePath root_dir_;
// Abstraction over file access methods used to read files.
FileAccessor* file_accessor_;
DISALLOW_COPY_AND_ASSIGN(ZipWriter);
};
} // namespace internal
} // namespace zip
#endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_WRITER_H_
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment