Commit 4372a3a9 authored by Dmitry Gozman's avatar Dmitry Gozman Committed by Commit Bot

Merge quoted_printable into platform/mhtml

These utility functions have a single client, and abstract away
from it with a delegate. Instead, we can just inline the code
and simplify it a bit.

Bug: none
Change-Id: I0a0e0f13077331a917f430e92b62183e2d98f762
Reviewed-on: https://chromium-review.googlesource.com/c/1283937
Commit-Queue: Dmitry Gozman <dgozman@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarŁukasz Anforowicz <lukasza@chromium.org>
Cr-Commit-Position: refs/heads/master@{#601764}
parent 4eee0be8
......@@ -1294,8 +1294,6 @@ jumbo_component("platform") {
"text/mac/hyphenation_mac.cc",
"text/platform_locale.cc",
"text/platform_locale.h",
"text/quoted_printable.cc",
"text/quoted_printable.h",
"text/segmented_string.cc",
"text/segmented_string.h",
"text/suffix_tree.h",
......
......@@ -30,6 +30,7 @@
#include "third_party/blink/renderer/platform/mhtml/mhtml_archive.h"
#include <stddef.h>
#include "build/build_config.h"
#include "third_party/blink/renderer/platform/date_components.h"
#include "third_party/blink/renderer/platform/mhtml/archive_resource.h"
......@@ -37,13 +38,14 @@
#include "third_party/blink/renderer/platform/network/mime/mime_type_registry.h"
#include "third_party/blink/renderer/platform/serialized_resource.h"
#include "third_party/blink/renderer/platform/shared_buffer.h"
#include "third_party/blink/renderer/platform/text/quoted_printable.h"
#include "third_party/blink/renderer/platform/weborigin/scheme_registry.h"
#include "third_party/blink/renderer/platform/wtf/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/assertions.h"
#include "third_party/blink/renderer/platform/wtf/date_math.h"
#include "third_party/blink/renderer/platform/wtf/text/base64.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
#include "third_party/blink/renderer/platform/wtf/time.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"
namespace blink {
......@@ -60,66 +62,115 @@ const char kQuotedPrintable[] = "quoted-printable";
const char kBase64[] = "base64";
const char kBinary[] = "binary";
} // namespace
// Controls quoted-printable encoding characters in body, per RFC 2045.
class QuotedPrintableEncodeBodyDelegate : public QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeBodyDelegate() = default;
~QuotedPrintableEncodeBodyDelegate() override = default;
size_t GetMaxLineLengthForEncodedContent() const override {
return kMaximumLineLength;
// Returns the length of a line-ending if one is present starting at
// |input[index]| or zero if no line-ending is present at the given |index|.
size_t LengthOfLineEndingAtIndex(const char* input,
size_t input_length,
size_t index) {
SECURITY_DCHECK(index < input_length);
if (input[index] == '\n')
return 1; // Single LF.
if (input[index] == '\r') {
if ((index + 1) == input_length || input[index + 1] != '\n')
return 1; // Single CR (Classic Mac OS).
return 2; // CR-LF.
}
bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
// They should be encoded only if they appear at the end of a body line.
return end_of_line;
}
return 0;
}
void DidStartLine(Vector<char>& out) override {
// Nothing to add.
}
// Performs quoted-printable encoding characters, per RFC 2047.
void QuotedPrintableEncode(const char* input,
wtf_size_t input_length,
bool is_header,
Vector<char>& out) {
out.clear();
out.ReserveCapacity(input_length);
if (is_header)
out.Append(kRFC2047EncodingPrefix, kRFC2047EncodingPrefixLength);
size_t current_line_length = 0;
for (size_t i = 0; i < input_length; ++i) {
bool is_last_character = (i == input_length - 1);
char current_character = input[i];
bool requires_encoding = false;
// All non-printable ASCII characters and = require encoding.
if ((current_character < ' ' || current_character > '~' ||
current_character == '=') &&
current_character != '\t')
requires_encoding = true;
// Decide if space and tab characters need to be encoded.
if (!requires_encoding &&
(current_character == '\t' || current_character == ' ')) {
if (is_header) {
// White space characters should always be encoded if they appear
// anywhere in the header.
requires_encoding = true;
} else {
bool end_of_line = is_last_character || LengthOfLineEndingAtIndex(
input, input_length, i + 1);
requires_encoding = end_of_line;
}
}
void DidFinishLine(bool last_line, Vector<char>& out) override {
if (!last_line) {
out.push_back('=');
out.Append("\r\n", 2);
// End of line should be converted to CR-LF sequences.
if (!is_last_character) {
size_t length_of_line_ending =
LengthOfLineEndingAtIndex(input, input_length, i);
if (length_of_line_ending) {
out.Append("\r\n", 2);
current_line_length = 0;
i += (length_of_line_ending -
1); // -1 because we'll ++ in the for() above.
continue;
}
}
}
};
// Controls quoted-printable encoding characters in headers, per RFC 2047.
class QuotedPrintableEncodeHeaderDelegate
: public QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeHeaderDelegate() = default;
~QuotedPrintableEncodeHeaderDelegate() override = default;
size_t GetMaxLineLengthForEncodedContent() const override {
return kMaximumLineLength - kRFC2047EncodingPrefixLength -
kRFC2047EncodingSuffixLength;
}
bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
// They should always be encoded if they appear anywhere in the header.
return true;
}
size_t length_of_encoded_character = 1;
if (requires_encoding)
length_of_encoded_character += 2;
if (!is_last_character)
length_of_encoded_character += 1; // + 1 for the = (soft line break).
// Insert a soft line break if necessary.
size_t max_line_length_for_encoded_content = kMaximumLineLength;
if (is_header) {
max_line_length_for_encoded_content -= kRFC2047EncodingPrefixLength;
max_line_length_for_encoded_content -= kRFC2047EncodingSuffixLength;
}
void DidStartLine(Vector<char>& out) override {
out.Append(kRFC2047EncodingPrefix, kRFC2047EncodingPrefixLength);
}
if (current_line_length + length_of_encoded_character >
max_line_length_for_encoded_content) {
if (is_header) {
out.Append(kRFC2047EncodingSuffix, kRFC2047EncodingSuffixLength);
out.Append("\r\n", 2);
out.push_back(' ');
} else {
out.push_back('=');
out.Append("\r\n", 2);
}
current_line_length = 0;
if (is_header)
out.Append(kRFC2047EncodingPrefix, kRFC2047EncodingPrefixLength);
}
void DidFinishLine(bool last_line, Vector<char>& out) override {
out.Append(kRFC2047EncodingSuffix, kRFC2047EncodingSuffixLength);
if (!last_line) {
out.Append("\r\n", 2);
out.push_back(' ');
// Finally, insert the actual character(s).
if (requires_encoding) {
out.push_back('=');
out.push_back(UpperNibbleToASCIIHexDigit(current_character));
out.push_back(LowerNibbleToASCIIHexDigit(current_character));
current_line_length += 3;
} else {
out.push_back(current_character);
current_line_length++;
}
}
};
if (is_header)
out.Append(kRFC2047EncodingSuffix, kRFC2047EncodingSuffixLength);
}
static String ConvertToPrintableCharacters(const String& text) {
String ConvertToPrintableCharacters(const String& text) {
// If the text contains all printable ASCII characters, no need for encoding.
bool found_non_printable_char = false;
for (wtf_size_t i = 0; i < text.length(); ++i) {
......@@ -137,13 +188,14 @@ static String ConvertToPrintableCharacters(const String& text) {
// where, "utf-8" is the chosen charset to represent the text and "Q" is the
// Quoted-Printable format to convert to 7-bit printable ASCII characters.
CString utf8_text = text.Utf8();
QuotedPrintableEncodeHeaderDelegate header_delegate;
Vector<char> encoded_text;
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(), &header_delegate,
encoded_text);
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(),
true /* is_header */, encoded_text);
return String(encoded_text.data(), encoded_text.size());
}
} // namespace
MHTMLArchive::MHTMLArchive() = default;
MHTMLArchive* MHTMLArchive::Create(const KURL& url,
......@@ -314,9 +366,8 @@ void MHTMLArchive::GenerateMHTMLPart(const String& boundary,
size_t data_length = flat_data.size();
Vector<char> encoded_data;
if (!strcmp(content_encoding, kQuotedPrintable)) {
QuotedPrintableEncodeBodyDelegate body_delegate;
QuotedPrintableEncode(data, SafeCast<wtf_size_t>(data_length),
&body_delegate, encoded_data);
false /* is_header */, encoded_data);
output_buffer.Append(encoded_data.data(), encoded_data.size());
} else {
DCHECK(!strcmp(content_encoding, kBase64));
......
......@@ -30,19 +30,63 @@
#include "third_party/blink/renderer/platform/mhtml/mhtml_parser.h"
#include <stddef.h>
#include "third_party/blink/renderer/platform/mhtml/archive_resource.h"
#include "third_party/blink/renderer/platform/network/http_parsers.h"
#include "third_party/blink/renderer/platform/network/parsed_content_type.h"
#include "third_party/blink/renderer/platform/text/quoted_printable.h"
#include "third_party/blink/renderer/platform/wtf/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/hash_map.h"
#include "third_party/blink/renderer/platform/wtf/text/base64.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
#include "third_party/blink/renderer/platform/wtf/text/string_concatenate.h"
#include "third_party/blink/renderer/platform/wtf/text/string_hash.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"
namespace blink {
namespace {
void QuotedPrintableDecode(const char* data,
size_t data_length,
Vector<char>& out) {
out.clear();
if (!data_length)
return;
for (size_t i = 0; i < data_length; ++i) {
char current_character = data[i];
if (current_character != '=') {
out.push_back(current_character);
continue;
}
// We are dealing with a '=xx' sequence.
if (data_length - i < 3) {
// Unfinished = sequence, append as is.
out.push_back(current_character);
continue;
}
char upper_character = data[++i];
char lower_character = data[++i];
if (upper_character == '\r' && lower_character == '\n')
continue;
if (!IsASCIIHexDigit(upper_character) ||
!IsASCIIHexDigit(lower_character)) {
// Invalid sequence, = followed by non hex digits, just insert the
// characters as is.
out.push_back('=');
out.push_back(upper_character);
out.push_back(lower_character);
continue;
}
out.push_back(
static_cast<char>(ToASCIIHexValue(upper_character, lower_character)));
}
}
} // namespace
// This class is a limited MIME parser used to parse the MIME headers of MHTML
// files.
class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> {
......
/*
* Copyright (C) 2011 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "third_party/blink/renderer/platform/text/quoted_printable.h"
#include "third_party/blink/renderer/platform/wtf/ascii_ctype.h"
namespace blink {
static size_t LengthOfLineEndingAtIndex(const char* input,
size_t input_length,
size_t index) {
SECURITY_DCHECK(index < input_length);
if (input[index] == '\n')
return 1; // Single LF.
if (input[index] == '\r') {
if ((index + 1) == input_length || input[index + 1] != '\n')
return 1; // Single CR (Classic Mac OS).
return 2; // CR-LF.
}
return 0;
}
void QuotedPrintableEncode(const char* input,
wtf_size_t input_length,
QuotedPrintableEncodeDelegate* delegate,
Vector<char>& out) {
out.clear();
out.ReserveCapacity(input_length);
delegate->DidStartLine(out);
size_t current_line_length = 0;
for (size_t i = 0; i < input_length; ++i) {
bool is_last_character = (i == input_length - 1);
char current_character = input[i];
bool requires_encoding = false;
// All non-printable ASCII characters and = require encoding.
if ((current_character < ' ' || current_character > '~' ||
current_character == '=') &&
current_character != '\t')
requires_encoding = true;
// Decide if space and tab characters need to be encoded.
if (!requires_encoding &&
(current_character == '\t' || current_character == ' ')) {
bool end_of_line = is_last_character ||
LengthOfLineEndingAtIndex(input, input_length, i + 1);
requires_encoding =
delegate->ShouldEncodeWhiteSpaceCharacters(end_of_line);
}
// End of line should be converted to CR-LF sequences.
if (!is_last_character) {
size_t length_of_line_ending =
LengthOfLineEndingAtIndex(input, input_length, i);
if (length_of_line_ending) {
out.Append("\r\n", 2);
current_line_length = 0;
i += (length_of_line_ending -
1); // -1 because we'll ++ in the for() above.
continue;
}
}
size_t length_of_encoded_character = 1;
if (requires_encoding)
length_of_encoded_character += 2;
if (!is_last_character)
length_of_encoded_character += 1; // + 1 for the = (soft line break).
// Insert a soft line break if necessary.
if (current_line_length + length_of_encoded_character >
delegate->GetMaxLineLengthForEncodedContent()) {
delegate->DidFinishLine(false /*last_line*/, out);
current_line_length = 0;
delegate->DidStartLine(out);
}
// Finally, insert the actual character(s).
if (requires_encoding) {
out.push_back('=');
out.push_back(UpperNibbleToASCIIHexDigit(current_character));
out.push_back(LowerNibbleToASCIIHexDigit(current_character));
current_line_length += 3;
} else {
out.push_back(current_character);
current_line_length++;
}
}
delegate->DidFinishLine(true /*last_line*/, out);
}
void QuotedPrintableDecode(const Vector<char>& in, Vector<char>& out) {
QuotedPrintableDecode(in.data(), in.size(), out);
}
void QuotedPrintableDecode(const char* data,
size_t data_length,
Vector<char>& out) {
out.clear();
if (!data_length)
return;
for (size_t i = 0; i < data_length; ++i) {
char current_character = data[i];
if (current_character != '=') {
out.push_back(current_character);
continue;
}
// We are dealing with a '=xx' sequence.
if (data_length - i < 3) {
// Unfinished = sequence, append as is.
out.push_back(current_character);
continue;
}
char upper_character = data[++i];
char lower_character = data[++i];
if (upper_character == '\r' && lower_character == '\n')
continue;
if (!IsASCIIHexDigit(upper_character) ||
!IsASCIIHexDigit(lower_character)) {
// Invalid sequence, = followed by non hex digits, just insert the
// characters as is.
out.push_back('=');
out.push_back(upper_character);
out.push_back(lower_character);
continue;
}
out.push_back(
static_cast<char>(ToASCIIHexValue(upper_character, lower_character)));
}
}
} // namespace blink
/*
* Copyright (C) 2011 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_QUOTED_PRINTABLE_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_QUOTED_PRINTABLE_H_
#include "third_party/blink/renderer/platform/platform_export.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"
#include "third_party/blink/renderer/platform/wtf/wtf_size_t.h"
namespace blink {
// Delegate for controling the behavior of quoted-printable encoding. The
// original characters may be encoded a bit differently depending on where
// they live, header or body. For example, "=CRLF" should be used to break
// long line in body while "CRLF+SPACE" should be used to break long line in
// header.
class PLATFORM_EXPORT QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeDelegate() = default;
virtual ~QuotedPrintableEncodeDelegate() = default;
// Returns maximum number of characters allowed for an encoded line, excluding
// prefix and soft line break.
virtual size_t GetMaxLineLengthForEncodedContent() const = 0;
// Returns true if space and tab characters need to be encoded.
virtual bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const = 0;
// Called when an encoded line starts. The delegate can take this chance to
// add any prefix.
virtual void DidStartLine(Vector<char>& out) = 0;
// Called when an encoded line ends. The delegate can take this chance to add
// any suffix. If it is not last line, a soft line break should also
// be added after the suffix.
virtual void DidFinishLine(bool last_line, Vector<char>& out) = 0;
};
PLATFORM_EXPORT void QuotedPrintableEncode(const char*,
wtf_size_t,
QuotedPrintableEncodeDelegate*,
Vector<char>&);
PLATFORM_EXPORT void QuotedPrintableDecode(const char*, size_t, Vector<char>&);
} // namespace blink
#endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_QUOTED_PRINTABLE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment