Commit 5700ef4d authored by asanka@chromium.org's avatar asanka@chromium.org

[Downloads] Retry renames after transient failures.

If the cause of a rename failure is suspected of being transient
(sharing violations, suspected race conditions in the file system), then
retry the rename with an exponential backoff.

BUG=368455

Review URL: https://codereview.chromium.org/319603003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@278483 0039d316-1c4b-4281-b951-d872f2087c98
parent cad4d0d4
......@@ -159,15 +159,18 @@ DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) {
// permissions / security descriptors that makes sense in the new directory.
rename_result = MoveFileAndAdjustPermissions(new_path);
if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE) {
if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE)
full_path_ = new_path;
// Re-open the file if we were still using it.
if (was_in_progress)
rename_result = Open();
}
// Re-open the file if we were still using it regardless of the interrupt
// reason.
DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE;
if (was_in_progress)
open_result = Open();
bound_net_log_.EndEvent(net::NetLog::TYPE_DOWNLOAD_FILE_RENAMED);
return rename_result;
return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result
: rename_result;
}
void BaseFile::Detach() {
......@@ -326,11 +329,10 @@ DownloadInterruptReason BaseFile::LogSystemError(
const char* operation,
logging::SystemErrorCode os_error) {
// There's no direct conversion from a system error to an interrupt reason.
net::Error net_error = net::MapSystemError(os_error);
base::File::Error file_error = base::File::OSErrorToFileError(os_error);
return LogInterruptReason(
operation, os_error,
ConvertNetErrorToInterruptReason(
net_error, DOWNLOAD_INTERRUPT_FROM_DISK));
ConvertFileErrorToInterruptReason(file_error));
}
DownloadInterruptReason BaseFile::LogInterruptReason(
......
......@@ -56,7 +56,10 @@ class CONTENT_EXPORT BaseFile {
DownloadInterruptReason AppendDataToFile(const char* data, size_t data_len);
// Rename the download file. Returns a DownloadInterruptReason indicating the
// result of the operation.
// result of the operation. A return code of NONE indicates that the rename
// was successful. After a failure, the full_path() and in_progress() can be
// used to determine the last known filename and whether the file is available
// for writing or retrying the rename.
virtual DownloadInterruptReason Rename(const base::FilePath& full_path);
// Detach the file so it is not deleted on destruction.
......@@ -79,8 +82,15 @@ class CONTENT_EXPORT BaseFile {
// Windows to ensure the correct app client ID is available.
DownloadInterruptReason AnnotateWithSourceInformation();
base::FilePath full_path() const { return full_path_; }
// Returns the last known path to the download file. Can be empty if there's
// no file.
const base::FilePath& full_path() const { return full_path_; }
// Returns true if the file is open. If true, the file can be written to or
// renamed.
bool in_progress() const { return file_.IsValid(); }
// Returns the number of bytes in the file pointed to by full_path().
int64 bytes_so_far() const { return bytes_so_far_; }
// Fills |hash| with the hash digest for the file.
......
......@@ -27,8 +27,6 @@ DownloadInterruptReason BaseFile::MoveFileAndAdjustPermissions(
if (!stat_succeeded)
LogSystemError("stat", errno);
// TODO(estade): Move() falls back to copying and deleting when a simple
// rename fails. Copying sucks for large downloads. crbug.com/8737
if (!base::Move(full_path_, new_path))
return LogSystemError("Move", errno);
......
......@@ -476,6 +476,58 @@ TEST_F(BaseFileTest, RenameWithError) {
base_file_->Finish();
}
// Test that if a rename fails for an in-progress BaseFile, it remains writeable
// and renameable.
TEST_F(BaseFileTest, RenameWithErrorInProgress) {
ASSERT_TRUE(InitializeFile());
base::FilePath test_dir(temp_dir_.path().AppendASCII("TestDir"));
ASSERT_TRUE(base::CreateDirectory(test_dir));
base::FilePath new_path(test_dir.AppendASCII("TestFile"));
EXPECT_FALSE(base::PathExists(new_path));
// Write some data to start with.
ASSERT_TRUE(AppendDataToFile(kTestData1));
ASSERT_TRUE(base_file_->in_progress());
base::FilePath old_path = base_file_->full_path();
{
file_util::PermissionRestorer restore_permissions_for(test_dir);
ASSERT_TRUE(file_util::MakeFileUnwritable(test_dir));
EXPECT_EQ(DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED,
base_file_->Rename(new_path));
// The file should still be open and we should be able to continue writing
// to it.
ASSERT_TRUE(base_file_->in_progress());
ASSERT_TRUE(AppendDataToFile(kTestData2));
ASSERT_EQ(old_path.value(), base_file_->full_path().value());
// Try to rename again, just for kicks. It should still fail with
// ACCESS_DENIED.
EXPECT_EQ(DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED,
base_file_->Rename(new_path));
}
// Now that TestDir is writeable again, we should be able to successfully
// rename the file.
EXPECT_EQ(DOWNLOAD_INTERRUPT_REASON_NONE, base_file_->Rename(new_path));
ASSERT_EQ(new_path.value(), base_file_->full_path().value());
ASSERT_TRUE(AppendDataToFile(kTestData3));
base_file_->Finish();
// The contents of the file should be intact.
std::string file_contents;
std::string expected_contents(kTestData1);
expected_contents += kTestData2;
expected_contents += kTestData3;
ASSERT_TRUE(base::ReadFileToString(new_path, &file_contents));
EXPECT_EQ(expected_contents, file_contents);
}
// Test that a failed write reports an error.
TEST_F(BaseFileTest, WriteWithError) {
base::FilePath path;
......
......@@ -10,6 +10,7 @@
#include <shellapi.h>
#include "base/file_util.h"
#include "base/files/file.h"
#include "base/guid.h"
#include "base/metrics/histogram.h"
#include "base/strings/utf_string_conversions.h"
......@@ -25,6 +26,8 @@ namespace {
const int kAllSpecialShFileOperationCodes[] = {
// Should be kept in sync with the case statement below.
ERROR_ACCESS_DENIED,
ERROR_SHARING_VIOLATION,
ERROR_INVALID_PARAMETER,
0x71,
0x72,
0x73,
......@@ -69,10 +72,25 @@ DownloadInterruptReason MapShFileOperationCodes(int code) {
// above.
switch (code) {
// Not a pre-Win32 error code; here so that this particular
// case shows up in our histograms. This is redundant with the
// mapping function net::MapSystemError used later.
// case shows up in our histograms.
case ERROR_ACCESS_DENIED: // Access is denied.
result = DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED;
result = DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
break;
// This isn't documented but returned from SHFileOperation. Sharing
// violations indicate that another process had the file open while we were
// trying to rename. Anti-virus is believed to be the cause of this error in
// the wild. Treated as a transient error on the assumption that the file
// will be made available for renaming at a later time.
case ERROR_SHARING_VIOLATION:
result = DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
break;
// This is also not a documented return value of SHFileOperation, but has
// been observed in the wild. We are treating it as a transient error based
// on the cases we have seen so far. See http://crbug.com/368455.
case ERROR_INVALID_PARAMETER:
result = DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
break;
// The source and destination files are the same file.
......@@ -250,12 +268,20 @@ DownloadInterruptReason MapShFileOperationCodes(int code) {
arraysize(kAllSpecialShFileOperationCodes)));
}
if (result == DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR) {
UMA_HISTOGRAM_CUSTOM_ENUMERATION(
"Download.MapWinShErrorTransientError", code,
base::CustomHistogram::ArrayToCustomRanges(
kAllSpecialShFileOperationCodes,
arraysize(kAllSpecialShFileOperationCodes)));
}
if (result != DOWNLOAD_INTERRUPT_REASON_NONE)
return result;
// If not one of the above codes, it should be a standard Windows error code.
return ConvertNetErrorToInterruptReason(
net::MapSystemError(code), DOWNLOAD_INTERRUPT_FROM_DISK);
return ConvertFileErrorToInterruptReason(
base::File::OSErrorToFileError(code));
}
// Maps a return code from ScanAndSaveDownloadedFile() to a
......
......@@ -25,6 +25,12 @@ namespace content {
const int kUpdatePeriodMs = 500;
const int kMaxTimeBlockingFileThreadMs = 1000;
// These constants control the default retry behavior for failing renames. Each
// retry is performed after a delay that is twice the previous delay. The
// initial delay is specified by kInitialRenameRetryDelayMs.
const int kMaxRenameRetries = 3;
const int kInitialRenameRetryDelayMs = 200;
int DownloadFile::number_active_objects_ = 0;
DownloadFileImpl::DownloadFileImpl(
......@@ -36,20 +42,20 @@ DownloadFileImpl::DownloadFileImpl(
scoped_ptr<ByteStreamReader> stream,
const net::BoundNetLog& bound_net_log,
base::WeakPtr<DownloadDestinationObserver> observer)
: file_(save_info->file_path,
url,
referrer_url,
save_info->offset,
calculate_hash,
save_info->hash_state,
save_info->file.Pass(),
bound_net_log),
default_download_directory_(default_download_directory),
stream_reader_(stream.Pass()),
bytes_seen_(0),
bound_net_log_(bound_net_log),
observer_(observer),
weak_factory_(this) {
: file_(save_info->file_path,
url,
referrer_url,
save_info->offset,
calculate_hash,
save_info->hash_state,
save_info->file.Pass(),
bound_net_log),
default_download_directory_(default_download_directory),
stream_reader_(stream.Pass()),
bytes_seen_(0),
bound_net_log_(bound_net_log),
observer_(observer),
weak_factory_(this) {
}
DownloadFileImpl::~DownloadFileImpl() {
......@@ -103,47 +109,84 @@ DownloadInterruptReason DownloadFileImpl::AppendDataToFile(
void DownloadFileImpl::RenameAndUniquify(
const base::FilePath& full_path,
const RenameCompletionCallback& callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
base::FilePath new_path(full_path);
int uniquifier = base::GetUniquePathNumber(
new_path, base::FilePath::StringType());
if (uniquifier > 0) {
new_path = new_path.InsertBeforeExtensionASCII(
base::StringPrintf(" (%d)", uniquifier));
}
DownloadInterruptReason reason = file_.Rename(new_path);
if (reason != DOWNLOAD_INTERRUPT_REASON_NONE) {
// Make sure our information is updated, since we're about to
// error out.
SendUpdate();
RenameWithRetryInternal(
full_path, UNIQUIFY, kMaxRenameRetries, base::TimeTicks(), callback);
}
// Null out callback so that we don't do any more stream processing.
stream_reader_->RegisterCallback(base::Closure());
void DownloadFileImpl::RenameAndAnnotate(
const base::FilePath& full_path,
const RenameCompletionCallback& callback) {
RenameWithRetryInternal(full_path,
ANNOTATE_WITH_SOURCE_INFORMATION,
kMaxRenameRetries,
base::TimeTicks(),
callback);
}
new_path.clear();
}
base::TimeDelta DownloadFileImpl::GetRetryDelayForFailedRename(
int attempt_number) {
DCHECK_GE(attempt_number, 0);
// |delay| starts at kInitialRenameRetryDelayMs and increases by a factor of
// 2 at each subsequent retry. Assumes that |retries_left| starts at
// kMaxRenameRetries. Also assumes that kMaxRenameRetries is less than the
// number of bits in an int.
return base::TimeDelta::FromMilliseconds(kInitialRenameRetryDelayMs) *
(1 << attempt_number);
}
BrowserThread::PostTask(
BrowserThread::UI, FROM_HERE,
base::Bind(callback, reason, new_path));
bool DownloadFileImpl::ShouldRetryFailedRename(DownloadInterruptReason reason) {
return reason == DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
}
void DownloadFileImpl::RenameAndAnnotate(
void DownloadFileImpl::RenameWithRetryInternal(
const base::FilePath& full_path,
RenameOption option,
int retries_left,
base::TimeTicks time_of_first_failure,
const RenameCompletionCallback& callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
base::FilePath new_path(full_path);
DownloadInterruptReason reason = DOWNLOAD_INTERRUPT_REASON_NONE;
// Short circuit null rename.
if (full_path != file_.full_path())
reason = file_.Rename(new_path);
if ((option & UNIQUIFY) && full_path != file_.full_path()) {
int uniquifier =
base::GetUniquePathNumber(new_path, base::FilePath::StringType());
if (uniquifier > 0)
new_path = new_path.InsertBeforeExtensionASCII(
base::StringPrintf(" (%d)", uniquifier));
}
DownloadInterruptReason reason = file_.Rename(new_path);
// Attempt to retry the rename if possible. If the rename failed and the
// subsequent open also failed, then in_progress() would be false. We don't
// try to retry renames if the in_progress() was false to begin with since we
// have less assurance that the file at file_.full_path() was the one we were
// working with.
if (ShouldRetryFailedRename(reason) && file_.in_progress() &&
retries_left > 0) {
int attempt_number = kMaxRenameRetries - retries_left;
BrowserThread::PostDelayedTask(
BrowserThread::FILE,
FROM_HERE,
base::Bind(&DownloadFileImpl::RenameWithRetryInternal,
weak_factory_.GetWeakPtr(),
full_path,
option,
--retries_left,
time_of_first_failure.is_null() ? base::TimeTicks::Now()
: time_of_first_failure,
callback),
GetRetryDelayForFailedRename(attempt_number));
return;
}
if (!time_of_first_failure.is_null())
RecordDownloadFileRenameResultAfterRetry(
base::TimeTicks::Now() - time_of_first_failure, reason);
if (reason == DOWNLOAD_INTERRUPT_REASON_NONE) {
if (reason == DOWNLOAD_INTERRUPT_REASON_NONE &&
(option & ANNOTATE_WITH_SOURCE_INFORMATION)) {
// Doing the annotation after the rename rather than before leaves
// a very small window during which the file has the final name but
// hasn't been marked with the Mark Of The Web. However, it allows
......
......@@ -68,7 +68,36 @@ class CONTENT_EXPORT DownloadFileImpl : virtual public DownloadFile {
virtual DownloadInterruptReason AppendDataToFile(
const char* data, size_t data_len);
virtual base::TimeDelta GetRetryDelayForFailedRename(int attempt_number);
virtual bool ShouldRetryFailedRename(DownloadInterruptReason reason);
private:
friend class DownloadFileTest;
// Options for RenameWithRetryInternal.
enum RenameOption {
UNIQUIFY = 1 << 0, // If there's already a file on disk that conflicts with
// |new_path|, try to create a unique file by appending
// a uniquifier.
ANNOTATE_WITH_SOURCE_INFORMATION = 1 << 1
};
// Rename file_ to |new_path|.
// |option| specifies additional operations to be performed during the rename.
// See RenameOption above.
// |retries_left| indicates how many times to retry the operation if the
// rename fails with a transient error.
// |time_of_first_failure| Set to an empty base::TimeTicks during the first
// call. Once the first failure is seen, subsequent calls of
// RenameWithRetryInternal will have a non-empty value keeping track of
// the time of first observed failure. Used for UMA.
void RenameWithRetryInternal(const base::FilePath& new_path,
RenameOption option,
int retries_left,
base::TimeTicks time_of_first_failure,
const RenameCompletionCallback& callback);
// Send an update on our progress.
void SendUpdate();
......
......@@ -8,6 +8,35 @@
namespace content {
DownloadInterruptReason ConvertFileErrorToInterruptReason(
base::File::Error file_error) {
switch (file_error) {
case base::File::FILE_OK:
return DOWNLOAD_INTERRUPT_REASON_NONE;
case base::File::FILE_ERROR_IN_USE:
return DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
case base::File::FILE_ERROR_ACCESS_DENIED:
return DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED;
case base::File::FILE_ERROR_TOO_MANY_OPENED:
return DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
case base::File::FILE_ERROR_NO_MEMORY:
return DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR;
case base::File::FILE_ERROR_NO_SPACE:
return DOWNLOAD_INTERRUPT_REASON_FILE_NO_SPACE;
case base::File::FILE_ERROR_SECURITY:
return DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED;
default:
return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED;
}
}
DownloadInterruptReason ConvertNetErrorToInterruptReason(
net::Error net_error, DownloadInterruptSource source) {
switch (net_error) {
......
......@@ -5,6 +5,7 @@
#ifndef CONTENT_BROWSER_DOWNLOAD_DOWNLOAD_INTERRUPT_REASONS_IMPL_H_
#define CONTENT_BROWSER_DOWNLOAD_DOWNLOAD_INTERRUPT_REASONS_IMPL_H_
#include "base/files/file.h"
#include "content/public/browser/download_interrupt_reasons.h"
#include "net/base/net_errors.h"
......@@ -22,6 +23,10 @@ enum DownloadInterruptSource {
DownloadInterruptReason CONTENT_EXPORT ConvertNetErrorToInterruptReason(
net::Error file_error, DownloadInterruptSource source);
// Safe to call from any thread.
DownloadInterruptReason CONTENT_EXPORT ConvertFileErrorToInterruptReason(
base::File::Error file_error);
} // namespace content
#endif // CONTENT_BROWSER_DOWNLOAD_DOWNLOAD_INTERRUPT_REASONS_IMPL_H_
......@@ -611,6 +611,18 @@ void RecordFileBandwidth(size_t length,
disk_write_time_ms * 100 / elapsed_time_ms);
}
void RecordDownloadFileRenameResultAfterRetry(
base::TimeDelta time_since_first_failure,
DownloadInterruptReason interrupt_reason) {
if (interrupt_reason == DOWNLOAD_INTERRUPT_REASON_NONE) {
UMA_HISTOGRAM_TIMES("Download.TimeToRenameSuccessAfterInitialFailure",
time_since_first_failure);
} else {
UMA_HISTOGRAM_TIMES("Download.TimeToRenameFailureAfterInitialFailure",
time_since_first_failure);
}
}
void RecordSavePackageEvent(SavePackageEvent event) {
UMA_HISTOGRAM_ENUMERATION("Download.SavePackage",
event,
......
......@@ -200,6 +200,11 @@ void RecordFileBandwidth(size_t length,
base::TimeDelta disk_write_time,
base::TimeDelta elapsed_time);
// Record the result of a download file rename.
void RecordDownloadFileRenameResultAfterRetry(
base::TimeDelta time_since_first_failure,
DownloadInterruptReason interrupt_reason);
enum SavePackageEvent {
// The user has started to save a page as a package.
SAVE_PACKAGE_STARTED,
......
......@@ -14,7 +14,6 @@
INTERRUPT_REASON(FILE_FAILED, 1)
// The file cannot be accessed due to security restrictions.
// The file cannot be accessed.
// "Access Denied".
INTERRUPT_REASON(FILE_ACCESS_DENIED, 2)
......
......@@ -4736,6 +4736,15 @@ Therefore, the affected-histogram name has to have at least one dot in it.
</summary>
</histogram>
<histogram name="Download.MapWinShErrorTransientError"
enum="SpecialShFileOperationCodes">
<owner>asanka@chromium.org</owner>
<summary>
Windows error that produced a DOWNLOAD_INTERRUPT_REASON_FILE_TRANSIENT_ERROR
result in MapShFileOperationCodes().
</summary>
</histogram>
<histogram name="Download.OnChanged">
<owner>asanka@chromium.org</owner>
<summary>
......@@ -4869,6 +4878,24 @@ Therefore, the affected-histogram name has to have at least one dot in it.
<summary>Time between the start of a download and its completion.</summary>
</histogram>
<histogram name="Download.TimeToRenameFailureAfterInitialFailure"
units="milliseconds">
<owner>asanka@chromium.org</owner>
<summary>
Time elapsed until a retried download file rename operation failed for the
last time after the initial rename failed.
</summary>
</histogram>
<histogram name="Download.TimeToRenameSuccessAfterInitialFailure"
units="milliseconds">
<owner>asanka@chromium.org</owner>
<summary>
Time elapsed until a retried download file rename operation succeeded after
the initial rename failed.
</summary>
</histogram>
<histogram name="Download.UserDiscard" enum="DownloadItem.DangerType">
<owner>asanka@chromium.org</owner>
<owner>felt@chromium.org</owner>
......@@ -44086,7 +44113,9 @@ Therefore, the affected-histogram name has to have at least one dot in it.
<enum name="SpecialShFileOperationCodes" type="int">
<summary>Legacy error codes still returned by |ShFileOperation()|</summary>
<int value="5" label="Access denied"/>
<int value="5" label="Access denied (Win32)"/>
<int value="32" label="Sharing violation (Win32)"/>
<int value="87" label="Invalid parameter (Win32)"/>
<int value="113" label="Source and Destination are same file"/>
<int value="114" label="Multiple source mapped to single destination"/>
<int value="115" label="Rename to different directory"/>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment