Commit ebf5f001 authored by arthursonzogni's avatar arthursonzogni Committed by Commit Bot

[MHTML] Always succeed commit. [3/3]

[3/3] of:
https://docs.google.com/document/d/13jcj_mATRxKakPEGy72YYHZBTBkkNrAHzaNIIjVsC8I/edit

In theory, the MHTML file can be any arbitrary file. It can be
malformed. In practise, this should be rare, because the majority of
the archives are produced by Chrome for offline pages.

We would like to make blink not to be able to fail to fail to commit the
document. If the browser process initially classified it to be an MHTML
document, it must be considered to be an MHTML document by blink.
This patch do not clear the `archive_` on malformed documents. By doing
it, it will continue to be considered as a MHTML document. For instance,
it will be sandboxed (=> no javascript, => opaque origin)

Along the way some minor improvement:
- The console message has been moved after the commit, fixing
  https://crbug.com/967377
- Several rafactoring and comments have been added to help my own comprehension.

Fixed:967377,1139283
Bug:967377,1139283

Bug: 1139283
Change-Id: I4a2b5f6ce3dfac40f0b36fa067c2965583cacbe2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2480083
Commit-Queue: Arthur Sonzogni <arthursonzogni@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#820183}
parent 2da190d8
...@@ -172,7 +172,7 @@ void WebDocumentLoaderImpl::ResumeParser() { ...@@ -172,7 +172,7 @@ void WebDocumentLoaderImpl::ResumeParser() {
} }
bool WebDocumentLoaderImpl::HasBeenLoadedAsWebArchive() const { bool WebDocumentLoaderImpl::HasBeenLoadedAsWebArchive() const {
return archive_ || (archive_load_result_ != mojom::MHTMLLoadResult::kSuccess); return archive_;
} }
PreviewsState WebDocumentLoaderImpl::GetPreviewsState() const { PreviewsState WebDocumentLoaderImpl::GetPreviewsState() const {
...@@ -180,12 +180,24 @@ PreviewsState WebDocumentLoaderImpl::GetPreviewsState() const { ...@@ -180,12 +180,24 @@ PreviewsState WebDocumentLoaderImpl::GetPreviewsState() const {
} }
WebArchiveInfo WebDocumentLoaderImpl::GetArchiveInfo() const { WebArchiveInfo WebDocumentLoaderImpl::GetArchiveInfo() const {
if (archive_) { if (archive_ &&
DCHECK(archive_->MainResource()); archive_->LoadResult() == mojom::blink::MHTMLLoadResult::kSuccess) {
return {archive_load_result_, archive_->MainResource()->Url(), return {
archive_->Date()}; archive_->LoadResult(),
archive_->MainResource()->Url(),
archive_->Date(),
};
} }
return {archive_load_result_, WebURL(), base::Time()};
// TODO(arthursonzogni): Returning MHTMLLoadResult::kSuccess when there are no
// archive is very misleading. Consider adding a new enum value to
// discriminate success versus no archive.
return {
archive_ ? archive_->LoadResult()
: mojom::blink::MHTMLLoadResult::kSuccess,
WebURL(),
base::Time(),
};
} }
bool WebDocumentLoaderImpl::HadUserGesture() const { bool WebDocumentLoaderImpl::HadUserGesture() const {
......
...@@ -253,10 +253,7 @@ DocumentLoader::DocumentLoader( ...@@ -253,10 +253,7 @@ DocumentLoader::DocumentLoader(
params_->is_cross_browsing_context_group_navigation) { params_->is_cross_browsing_context_group_navigation) {
DCHECK(frame_); DCHECK(frame_);
// TODO(nasko): How should this work with OOPIF? // See `archive_` attribute documentation.
// The MHTMLArchive is parsed as a whole, but can be constructed from frames
// in multiple processes. In that case, which process should parse it and how
// should the output be spread back across multiple processes?
if (!frame_->IsMainFrame()) { if (!frame_->IsMainFrame()) {
if (auto* parent = DynamicTo<LocalFrame>(frame_->Tree().Parent())) if (auto* parent = DynamicTo<LocalFrame>(frame_->Tree().Parent()))
archive_ = parent->Loader().GetDocumentLoader()->archive_; archive_ = parent->Loader().GetDocumentLoader()->archive_;
...@@ -635,7 +632,7 @@ void DocumentLoader::BodyDataReceived(base::span<const char> data) { ...@@ -635,7 +632,7 @@ void DocumentLoader::BodyDataReceived(base::span<const char> data) {
DCHECK(!frame_->GetPage()->Paused()); DCHECK(!frame_->GetPage()->Paused());
time_of_last_data_received_ = clock_->NowTicks(); time_of_last_data_received_ = clock_->NowTicks();
if (listing_ftp_directory_ || loading_mhtml_archive_) { if (listing_ftp_directory_ || loading_main_document_from_mhtml_archive_) {
// 1) Ftp directory listings accumulate data buffer and transform it later // 1) Ftp directory listings accumulate data buffer and transform it later
// to the actual document content. // to the actual document content.
// 2) Mhtml archives accumulate data buffer and parse it as mhtml later // 2) Mhtml archives accumulate data buffer and parse it as mhtml later
...@@ -740,8 +737,12 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) { ...@@ -740,8 +737,12 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) {
ProcessDataBuffer(); ProcessDataBuffer();
} }
if (loading_mhtml_archive_ && state_ < kCommitted) { if (loading_main_document_from_mhtml_archive_ && state_ < kCommitted) {
FinalizeMHTMLArchiveLoad(); // The browser process should block any navigation to an MHTML archive
// inside iframes. See NavigationRequest::OnResponseStarted().
CHECK(frame_->IsMainFrame());
archive_ = MHTMLArchive::Create(url_, std::move(data_buffer_));
} }
// We should not call FinishedLoading before committing navigation, // We should not call FinishedLoading before committing navigation,
...@@ -749,7 +750,7 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) { ...@@ -749,7 +750,7 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) {
// has to be validated before committing the navigation. The validation // has to be validated before committing the navigation. The validation
// process loads the entire body of the archive, which will move the state to // process loads the entire body of the archive, which will move the state to
// FinishedLoading. // FinishedLoading.
if (!loading_mhtml_archive_) if (!loading_main_document_from_mhtml_archive_)
DCHECK_GE(state_, kCommitted); DCHECK_GE(state_, kCommitted);
base::TimeTicks response_end_time = finish_time; base::TimeTicks response_end_time = finish_time;
...@@ -772,28 +773,6 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) { ...@@ -772,28 +773,6 @@ void DocumentLoader::FinishedLoading(base::TimeTicks finish_time) {
} }
} }
void DocumentLoader::FinalizeMHTMLArchiveLoad() {
// The browser process is blocking any navigation toward MHTML archive inside
// iframes. See NavigationRequest::OnResponseStarted().
CHECK(frame_->IsMainFrame());
archive_ = MHTMLArchive::Create(url_, data_buffer_);
archive_load_result_ = archive_->LoadResult();
if (archive_load_result_ != mojom::blink::MHTMLLoadResult::kSuccess) {
// TODO(arthursonzogni): Remove this. Once approved by the browser process,
// loading the MHTML archive shouldn't fail. We can serve empty document
// instead.
archive_.Clear();
// Log if attempting to load an invalid archive resource.
frame_->Console().AddMessage(MakeGarbageCollected<ConsoleMessage>(
mojom::ConsoleMessageSource::kJavaScript,
mojom::ConsoleMessageLevel::kError,
"Malformed multipart archive: " + url_.GetString()));
}
data_buffer_ = nullptr;
}
void DocumentLoader::HandleRedirect(const KURL& current_request_url) { void DocumentLoader::HandleRedirect(const KURL& current_request_url) {
// Browser process should have already checked that redirecting url is // Browser process should have already checked that redirecting url is
// allowed to display content from the target origin. // allowed to display content from the target origin.
...@@ -1278,28 +1257,19 @@ void DocumentLoader::StartLoadingInternal() { ...@@ -1278,28 +1257,19 @@ void DocumentLoader::StartLoadingInternal() {
HandleResponse(); HandleResponse();
loading_mhtml_archive_ = loading_main_document_from_mhtml_archive_ =
EqualIgnoringASCIICase("multipart/related", response_.MimeType()) || EqualIgnoringASCIICase("multipart/related", response_.MimeType()) ||
EqualIgnoringASCIICase("message/rfc822", response_.MimeType()); EqualIgnoringASCIICase("message/rfc822", response_.MimeType());
if (loading_mhtml_archive_) { if (loading_main_document_from_mhtml_archive_) {
// The browser process should block any navigation to an MHTML archive
// inside iframes. See NavigationRequest::OnResponseStarted().
CHECK(frame_->IsMainFrame());
// To commit an mhtml archive synchronously we have to load the whole body // To commit an mhtml archive synchronously we have to load the whole body
// synchronously and parse it, and it's already loaded in a buffer usually. // synchronously and parse it, and it's already loaded in a buffer usually.
// This means we should not defer, and we'll finish loading synchronously // This means we should not defer, and we'll finish loading synchronously
// from StartLoadingBody(). // from StartLoadingBody().
body_loader_->StartLoadingBody(this, false /* use_isolated_code_cache */); body_loader_->StartLoadingBody(this, false /* use_isolated_code_cache */);
if (body_loader_) {
// Finalize the load of the MHTML archive. If the load fail (ie. did not
// finish synchronously), |body_loader_| will be null and the load will
// not be finalized. When StartLoadingResponse is called later, an empty
// document will be loaded instead of the MHTML archive.
// TODO(clamy): Simplify this code path.
// TODO(arthursonzogni): Make the load impossible to fail. Once approved
// by the browser process, it shouldn't be possible to fail committing the
// document. We can fallback to empty response. Alternatively, we can make
// MHTML document to be served from a dedicated URLLoader and fail
// earlier.
FinalizeMHTMLArchiveLoad();
}
return; return;
} }
...@@ -1325,17 +1295,33 @@ void DocumentLoader::StartLoadingResponse() { ...@@ -1325,17 +1295,33 @@ void DocumentLoader::StartLoadingResponse() {
CreateParserPostCommit(); CreateParserPostCommit();
// Finish load of MHTML archives and empty documents. // The main document from an MHTML archive is not loaded from its HTTP
ArchiveResource* main_resource = // response, but from the main resource within the archive (in the response).
loading_mhtml_archive_ && archive_ ? archive_->MainResource() : nullptr; if (loading_main_document_from_mhtml_archive_) {
if (main_resource) { // If the `archive_` contains a main resource, load the main document from
data_buffer_ = main_resource->Data(); // the archive, else it will remain empty.
ProcessDataBuffer(); if (ArchiveResource* resource = archive_->MainResource()) {
DCHECK_EQ(archive_->LoadResult(),
mojom::blink::MHTMLLoadResult::kSuccess);
data_buffer_ = resource->Data();
ProcessDataBuffer();
FinishedLoading(base::TimeTicks::Now());
return;
}
// Log attempts loading a malformed archive.
DCHECK_NE(archive_->LoadResult(), mojom::blink::MHTMLLoadResult::kSuccess);
frame_->Console().AddMessage(MakeGarbageCollected<ConsoleMessage>(
mojom::blink::ConsoleMessageSource::kJavaScript,
mojom::blink::ConsoleMessageLevel::kError,
"Malformed multipart archive: " + url_.GetString()));
FinishedLoading(base::TimeTicks::Now());
return;
} }
if (loading_mhtml_archive_ || loading_url_as_empty_document_) { // Empty documents are empty by definition. Nothing to load.
// Finish the load of an empty document if the URL was meant to load as an if (loading_url_as_empty_document_) {
// empty document or the load of the MHTML archive failed.
FinishedLoading(base::TimeTicks::Now()); FinishedLoading(base::TimeTicks::Now());
return; return;
} }
...@@ -1789,9 +1775,14 @@ void DocumentLoader::CommitNavigation() { ...@@ -1789,9 +1775,14 @@ void DocumentLoader::CommitNavigation() {
frame_->Tree().CrossBrowsingContextGroupSetNulledName(); frame_->Tree().CrossBrowsingContextGroupSetNulledName();
} }
if (loading_mhtml_archive_ && archive_ && // MHTML archive's URL is usually a local file. However the main resource
!archive_->MainResource()->Url().IsEmpty()) { // within the archive has a public URL and must be used to resolve all the
document->SetBaseURLOverride(archive_->MainResource()->Url()); // relative links.
if (loading_main_document_from_mhtml_archive_) {
ArchiveResource* main_resource = archive_->MainResource();
KURL main_resource_url = main_resource ? main_resource->Url() : KURL();
if (!main_resource_url.IsEmpty())
document->SetBaseURLOverride(main_resource_url);
} }
if (commit_reason_ == CommitReason::kXSLT) if (commit_reason_ == CommitReason::kXSLT)
...@@ -1960,8 +1951,11 @@ void DocumentLoader::CreateParserPostCommit() { ...@@ -1960,8 +1951,11 @@ void DocumentLoader::CreateParserPostCommit() {
const AtomicString& DocumentLoader::MimeType() const { const AtomicString& DocumentLoader::MimeType() const {
// In the case of mhtml archive, |response_| has an archive mime type, // In the case of mhtml archive, |response_| has an archive mime type,
// while the document has a different mime type. // while the document has a different mime type.
if (archive_ && loading_mhtml_archive_) if (loading_main_document_from_mhtml_archive_) {
return archive_->MainResource()->MimeType(); if (ArchiveResource* main_resource = archive_->MainResource())
return main_resource->MimeType();
}
return response_.MimeType(); return response_.MimeType();
} }
......
...@@ -325,13 +325,31 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>, ...@@ -325,13 +325,31 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>,
protected: protected:
Vector<KURL> redirect_chain_; Vector<KURL> redirect_chain_;
// Archive used to load document and/or subresources. If one of the ancestor // Based on its MIME type, if the main document's response corresponds to an
// frames was loaded as an archive, we'll load the document resource from it. // MHTML archive, then every resources will be loaded from this archive.
// Otherwise, if the document resource is an archive itself (based on mime //
// type), we'll create one and use it for subresources. // This includes:
// - The main document.
// - Every nested document.
// - Every subresource.
//
// This excludes:
// - data-URLs documents and subresources.
// - about:srcdoc documents.
// - Error pages.
//
// Whether about:blank and derivative should be loaded from the archive is
// weird edge case: Please refer to the tests:
// - NavigationMhtmlBrowserTest.IframeAboutBlankNotFound
// - NavigationMhtmlBrowserTest.IframeAboutBlankFound
//
// Nested documents are loaded in the same process and grab a reference to the
// same `archive_` as their parent.
//
// Resources:
// - https://tools.ietf.org/html/rfc822
// - https://tools.ietf.org/html/rfc2387
Member<MHTMLArchive> archive_; Member<MHTMLArchive> archive_;
mojom::MHTMLLoadResult archive_load_result_ =
mojom::MHTMLLoadResult::kSuccess;
private: private:
network::mojom::blink::WebSandboxFlags CalculateSandboxFlags(); network::mojom::blink::WebSandboxFlags CalculateSandboxFlags();
...@@ -374,7 +392,6 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>, ...@@ -374,7 +392,6 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>,
void FinishedLoading(base::TimeTicks finish_time); void FinishedLoading(base::TimeTicks finish_time);
void CancelLoadAfterCSPDenied(const ResourceResponse&); void CancelLoadAfterCSPDenied(const ResourceResponse&);
void FinalizeMHTMLArchiveLoad();
void HandleRedirect(const KURL& current_request_url); void HandleRedirect(const KURL& current_request_url);
void HandleResponse(); void HandleResponse();
...@@ -526,7 +543,11 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>, ...@@ -526,7 +543,11 @@ class CORE_EXPORT DocumentLoader : public GarbageCollected<DocumentLoader>,
const bool was_discarded_ = false; const bool was_discarded_ = false;
bool listing_ftp_directory_ = false; bool listing_ftp_directory_ = false;
bool loading_mhtml_archive_ = false;
// True when loading the main document from the MHTML archive. It implies an
// |archive_| to be created. Nested documents will also inherit from the same
// |archive_|, but won't have |loading_main_document_from_mhtml_archive_| set.
bool loading_main_document_from_mhtml_archive_ = false;
const bool loading_srcdoc_ = false; const bool loading_srcdoc_ = false;
const bool loading_url_as_empty_document_ = false; const bool loading_url_as_empty_document_ = false;
CommitReason commit_reason_ = CommitReason::kRegular; CommitReason commit_reason_ = CommitReason::kRegular;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment