Commit 55815c37 authored by abarth@chromium.org's avatar abarth@chromium.org

Add 8bit Content-Transfer-Encoding support for reading MHTML files

R=abarth@chromium.org
BUG=274232

Review URL: https://codereview.chromium.org/22292008

git-svn-id: svn://svn.chromium.org/blink/trunk@156720 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent e0214bc0
......@@ -138,6 +138,7 @@ http/tests/security/isolatedWorld/resources/iframe.html -crlf
http/tests/security/isolatedWorld/resources/userGestureEvents-second-window.html -crlf
http/tests/security/isolatedWorld/userGestureEvents.html -crlf
http/tests/security/resources/empty-svg.php -crlf
mhtml/*.mht -crlf
platform/win/fast/events/panScroll-event-fired.html -crlf
platform/win/fast/events/panScroll-image-no-scroll.html -crlf
platform/win/fast/events/panScroll-imageMap-href-no-scroll.html -crlf
......
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2013, Opera Software ASA. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of Opera Software ASA nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
import email
import email.encoders
import email.generator
import email.mime.image
import email.mime.multipart
import email.mime.nonmultipart
import email.mime.text
import mimetypes
import os.path
import quopri
import sys
class ArgumentError(Exception):
pass
def _encode_quopri(msg):
"""Own version of quopri isntead of email.encoders.quopri which seems to
be buggy in python3"""
orig = msg.get_payload()
encdata = quopri.encodestring(orig, quotetabs=True)
encdata.replace(b' ', b'=20')
msg.set_payload(encdata.decode('ascii', 'surrogateescape'))
msg['Content-Transfer-Encoding'] = 'quoted-printable'
def _encode_binary(msg):
email.encoders.encode_noop(msg)
msg['Content-Transfer-Encoding'] = 'binary'
TRANSFER_ENCODINGS = {
"8bit": email.encoders.encode_7or8bit,
"7bit": email.encoders.encode_7or8bit,
"base64": email.encoders.encode_base64,
"binary": _encode_binary,
"none": email.encoders.encode_noop,
"quoted-printable": _encode_quopri}
BASE = "http://test/"
def generate_message(parts):
"""Generate a mime message from the given parts"""
main = email.mime.multipart.MIMEMultipart("related")
main.add_header("Content-Location", BASE + parts[0]["name"])
for part in parts:
with open(part["name"], 'rb') as payload:
sub = email.mime.text.MIMENonMultipart(*part["mime"].split("/"))
sub.add_header("Content-Location", BASE + part["name"])
sub.set_payload(payload.read())
TRANSFER_ENCODINGS[part["transfer_encoding"]](sub)
main.attach(sub)
return main
def parse_arguments(args):
"""Parse arguments to extract file, transfer encoding, mime pairs"""
parts = []
current = {}
for arg in args:
if os.path.isfile(arg):
if current:
parts.append(current)
current = {}
current["name"] = arg
current["transfer_encoding"] = "binary"
current["mime"] = mimetypes.guess_type(arg)[0]
elif arg.lower() in TRANSFER_ENCODINGS:
current["transfer_encoding"] = arg.lower()
elif "/" in arg:
current["mime"] = arg.lower()
else:
raise ArgumentError("Unknown argument '" + arg + "'")
if current:
parts.append(current)
return parts
def main():
PARTS = parse_arguments(sys.argv[1:])
MESSAGE = generate_message(PARTS)
GENERATOR = email.generator.Generator(sys.stdout, mangle_from_=True,
maxheaderlen=1000)
GENERATOR.flatten(MESSAGE, linesep="\r\n")
if __name__ == "__main__":
main()
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>MHTML Tests</title>
<script>
if (window.testRunner)
testRunner.dumpAsText();
</script>
</head>
<body>
<h1>MHTML Content-Transfer-Encoding 7-bit test - ;)</h1>
</body>
</html>
Content-Type: multipart/related; boundary="===============6683215525243774815=="
MIME-Version: 1.0
Content-Location: transfer_encoding_7bit.html_original
--===============6683215525243774815==
Content-Type: text/html
MIME-Version: 1.0
Content-Location: transfer_encoding_7bit.html_original
Content-Transfer-Encoding: 7bit
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>MHTML Tests</title>
<script>
if (window.testRunner)
testRunner.dumpAsText();
</script>
</head>
<body>
<h1>MHTML Content-Transfer-Encoding 7-bit test - ;)</h1>
</body>
</html>
--===============6683215525243774815==--
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>MHTML Tests</title>
<script>
if (window.testRunner)
testRunner.dumpAsText();
</script>
</head>
<body>
<h1>MHTML Content-Transfer-Encoding 8-bit test - 😉</h1>
</body>
</html>
Content-Type: multipart/related; boundary="===============0861565773462052241=="
MIME-Version: 1.0
Content-Location: transfer_encoding_8bit.html_original
--===============0861565773462052241==
Content-Type: text/html
MIME-Version: 1.0
Content-Location: transfer_encoding_8bit.html_original
Content-Transfer-Encoding: 8bit
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>MHTML Tests</title>
<script>
if (window.testRunner)
testRunner.dumpAsText();
</script>
</head>
<body>
<h1>MHTML Content-Transfer-Encoding 8-bit test - 😉</h1>
</body>
</html>
--===============0861565773462052241==--
......@@ -719,7 +719,7 @@ bool DocumentLoader::isLoadingInAPISense() const
void DocumentLoader::createArchive()
{
m_archive = MHTMLArchive::create(m_response.url(), mainResourceData().get());
ASSERT(m_archive);
RELEASE_ASSERT(m_archive);
addAllArchiveResources(m_archive.get());
ArchiveResource* mainResource = m_archive->mainResource();
......
......@@ -202,6 +202,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
case MIMEHeader::QuotedPrintable:
quotedPrintableDecode(content->data(), content->size(), data);
break;
case MIMEHeader::EightBit:
case MIMEHeader::SevenBit:
case MIMEHeader::Binary:
data.append(content->data(), content->size());
......
......@@ -120,6 +120,8 @@ MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(const String& text
return Base64;
if (encoding == "quoted-printable")
return QuotedPrintable;
if (encoding == "8bit")
return EightBit;
if (encoding == "7bit")
return SevenBit;
if (encoding == "binary")
......
......@@ -46,6 +46,7 @@ public:
enum Encoding {
QuotedPrintable,
Base64,
EightBit,
SevenBit,
Binary,
Unknown
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment