Commit c0399701 authored by Bence Béky's avatar Bence Béky Committed by Commit Bot

Modify whitespace trimming for MIME parameters.

* Do not trim trailing whitespace from MIME parameter names.
* Do not trim leading whitespace from MIME parameter values.
* Except keep trimming leading whitespace from "charset" value.

Bug: 772834
Cq-Include-Trybots: master.tryserver.chromium.android:android_cronet_tester;master.tryserver.chromium.mac:ios-simulator-cronet
Change-Id: Ie7f5bc493ea0c8fece9a6a8159b27c85bb58ac2b
Reviewed-on: https://chromium-review.googlesource.com/763667Reviewed-by: default avatarMatt Menke <mmenke@chromium.org>
Commit-Queue: Bence Béky <bnc@chromium.org>
Cr-Commit-Position: refs/heads/master@{#516348}
parent e6fdf403
......@@ -98,12 +98,14 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
std::string::const_iterator param_name_begin = tokenizer.token_begin();
std::string::const_iterator param_name_end = equals_sign;
TrimLWS(&param_name_begin, &param_name_end);
std::string::const_iterator param_value_begin = equals_sign + 1;
std::string::const_iterator param_value_end = tokenizer.token_end();
DCHECK(param_value_begin <= tokenizer.token_end());
TrimLWS(&param_value_begin, &param_value_end);
// From parameter name, only trim leading whitespace.
// From parameter value, only trim trailing whitespace.
// See https://crbug.com/772834.
TrimLWS(&param_name_begin, &param_value_end);
if (base::LowerCaseEqualsASCII(
base::StringPiece(param_name_begin, param_name_end), "charset")) {
......@@ -121,9 +123,7 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
}
if (type_has_charset) {
// Trim leading and trailing whitespace from charset_val. We include
// '(' in the trailing trim set to catch media-type comments, which are
// not at all standard, but may occur in rare cases.
// Trim leading whitespace from charset_val.
charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
charset_val = std::min(charset_val, charset_end);
char first_char = content_type_str[charset_val];
......@@ -133,36 +133,40 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
++charset_val;
DCHECK(charset_end >= charset_val);
} else {
charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
charset_val),
// Ignore the part after '('. This is not in the standard, but may occur
// in rare cases.
// TODO(bnc): Do not ignore the part after '('.
// See https://crbug.com/772343.
charset_end = std::min(content_type_str.find_first_of("(", charset_val),
charset_end);
}
}
// if the server sent "*/*", it is meaningless, so do not store it.
// also, if type_val is the same as mime_type, then just update the
// charset. however, if charset is empty and mime_type hasn't
// changed, then don't wipe-out an existing charset. We
// also want to reject a mime-type if it does not include a slash.
// some servers give junk after the charset parameter, which may
// If the server sent "*/*", it is meaningless, so do not store it.
// Also, reject a mime-type if it does not include a slash.
// Some servers give junk after the charset parameter, which may
// include a comma, so this check makes us a bit more tolerant.
if (content_type_str.length() != 0 &&
content_type_str != "*/*" &&
content_type_str.find_first_of('/') != std::string::npos) {
// Common case here is that mime_type is empty
bool eq = !mime_type->empty() &&
base::LowerCaseEqualsASCII(
base::StringPiece(begin + type_val, begin + type_end),
mime_type->data());
if (!eq) {
*mime_type = base::ToLowerASCII(
base::StringPiece(begin + type_val, begin + type_end));
}
if ((!eq && *had_charset) || type_has_charset) {
*had_charset = true;
*charset = base::ToLowerASCII(
base::StringPiece(begin + charset_val, begin + charset_end));
}
if (content_type_str.length() == 0 || content_type_str == "*/*" ||
content_type_str.find_first_of('/') == std::string::npos) {
return;
}
// If type_val is the same as mime_type, then just update the charset.
// However, if charset is empty and mime_type hasn't changed, then don't
// wipe-out an existing charset.
// It is common that mime_type is empty.
bool eq = !mime_type->empty() &&
base::LowerCaseEqualsASCII(
base::StringPiece(begin + type_val, begin + type_end),
mime_type->data());
if (!eq) {
*mime_type = base::ToLowerASCII(
base::StringPiece(begin + type_val, begin + type_end));
}
if ((!eq && *had_charset) || type_has_charset) {
*had_charset = true;
*charset = base::ToLowerASCII(
base::StringPiece(begin + charset_val, begin + charset_end));
}
}
......
......@@ -740,10 +740,11 @@ TEST(HttpUtilTest, ParseContentType) {
true,
""
},
// Parameter name is "charset ", not "charset". See https://crbug.com/772834.
{ "text/html; charset =utf-8",
"text/html",
"utf-8",
true,
"",
false,
""
},
{ "text/html; charset= utf-8",
......@@ -764,23 +765,27 @@ TEST(HttpUtilTest, ParseContentType) {
false,
"\"WebKit-ada-df-dsf-adsfadsfs\""
},
// Parameter name is "boundary ", not "boundary".
// See https://crbug.com/772834.
{ "text/html; boundary =\"WebKit-ada-df-dsf-adsfadsfs\"",
"text/html",
"",
false,
"\"WebKit-ada-df-dsf-adsfadsfs\""
""
},
// Parameter value includes leading space. See https://crbug.com/772834.
{ "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"",
"text/html",
"",
false,
"\"WebKit-ada-df-dsf-adsfadsfs\""
" \"WebKit-ada-df-dsf-adsfadsfs\""
},
// Parameter value includes leading space. See https://crbug.com/772834.
{ "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\" ",
"text/html",
"",
false,
"\"WebKit-ada-df-dsf-adsfadsfs\""
" \"WebKit-ada-df-dsf-adsfadsfs\""
},
{ "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs \"",
"text/html",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment