Modify whitespace trimming for MIME parameters.

* Do not trim trailing whitespace from MIME parameter names. * Do not trim leading whitespace from MIME parameter values. * Except keep trimming leading whitespace from "charset" value. Bug: 772834 Cq-Include-Trybots: master.tryserver.chromium.android:android_cronet_tester;master.tryserver.chromium.mac:ios-simulator-cronet Change-Id: Ie7f5bc493ea0c8fece9a6a8159b27c85bb58ac2b Reviewed-on: https://chromium-review.googlesource.com/763667Reviewed-by: Matt Menke <mmenke@chromium.org> Commit-Queue: Bence Béky <bnc@chromium.org> Cr-Commit-Position: refs/heads/master@{#516348}

Modify whitespace trimming for MIME parameters.
* Do not trim trailing whitespace from MIME parameter names. * Do not trim leading whitespace from MIME parameter values. * Except keep trimming leading whitespace from "charset" value. Bug: 772834 Cq-Include-Trybots: master.tryserver.chromium.android:android_cronet_tester;master.tryserver.chromium.mac:ios-simulator-cronet Change-Id: Ie7f5bc493ea0c8fece9a6a8159b27c85bb58ac2b Reviewed-on: https://chromium-review.googlesource.com/763667Reviewed-by: Matt Menke <mmenke@chromium.org> Commit-Queue: Bence Béky <bnc@chromium.org> Cr-Commit-Position: refs/heads/master@{#516348}
c0399701 · Bence Béky · Commit Bot · e6fdf403 · c0399701 · c0399701
Commit c0399701 authored Nov 14, 2017 by Bence Béky Committed by Commit Bot Nov 14, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 36 deletions

net/http/http_util.cc net/http/http_util.cc +35 -31

net/http/http_util_unittest.cc net/http/http_util_unittest.cc +10 -5

No files found.
--- a/net/http/http_util.cc
+++ b/net/http/http_util.cc
@@ -98,12 +98,14 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
      std::string::const_iterator param_name_begin = tokenizer.token_begin();
      std::string::const_iterator param_name_end = equals_sign;
-      TrimLWS(&param_name_begin, &param_name_end);
      std::string::const_iterator param_value_begin = equals_sign + 1;
      std::string::const_iterator param_value_end = tokenizer.token_end();
      DCHECK(param_value_begin <= tokenizer.token_end());
-      TrimLWS(&param_value_begin, &param_value_end);
+      // From parameter name, only trim leading whitespace.
+      // From parameter value, only trim trailing whitespace.
+      // See https://crbug.com/772834.
+      TrimLWS(&param_name_begin, &param_value_end);
      if (base::LowerCaseEqualsASCII(
              base::StringPiece(param_name_begin, param_name_end), "charset")) {
@@ -121,9 +123,7 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
  }
  if (type_has_charset) {
-    // Trim leading and trailing whitespace from charset_val.  We include
+    // Trim leading whitespace from charset_val.
-    // '(' in the trailing trim set to catch media-type comments, which are
-    // not at all standard, but may occur in rare cases.
    charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
    charset_val = std::min(charset_val, charset_end);
    char first_char = content_type_str[charset_val];
@@ -133,36 +133,40 @@ void HttpUtil::ParseContentType(const std::string& content_type_str,
      ++charset_val;
      DCHECK(charset_end >= charset_val);
    } else {
-      charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
+      // Ignore the part after '('.  This is not in the standard, but may occur
-                                                            charset_val),
+      // in rare cases.
+      // TODO(bnc): Do not ignore the part after '('.
+      // See https://crbug.com/772343.
+      charset_end = std::min(content_type_str.find_first_of("(", charset_val),
                             charset_end);
    }
  }
-  // if the server sent "*/*", it is meaningless, so do not store it.
+  // If the server sent "*/*", it is meaningless, so do not store it.
-  // also, if type_val is the same as mime_type, then just update the
+  // Also, reject a mime-type if it does not include a slash.
-  // charset.  however, if charset is empty and mime_type hasn't
+  // Some servers give junk after the charset parameter, which may
-  // changed, then don't wipe-out an existing charset.  We
-  // also want to reject a mime-type if it does not include a slash.
-  // some servers give junk after the charset parameter, which may
  // include a comma, so this check makes us a bit more tolerant.
-  if (content_type_str.length() != 0 &&
+  if (content_type_str.length() == 0 || content_type_str == "*/*" ||
-      content_type_str != "*/*" &&
+      content_type_str.find_first_of('/') == std::string::npos) {
-      content_type_str.find_first_of('/') != std::string::npos) {
+    return;
-    // Common case here is that mime_type is empty
+  }
-    bool eq = !mime_type->empty() &&
-              base::LowerCaseEqualsASCII(
+  // If type_val is the same as mime_type, then just update the charset.
-                  base::StringPiece(begin + type_val, begin + type_end),
+  // However, if charset is empty and mime_type hasn't changed, then don't
-                  mime_type->data());
+  // wipe-out an existing charset.
-    if (!eq) {
+  // It is common that mime_type is empty.
-      *mime_type = base::ToLowerASCII(
+  bool eq = !mime_type->empty() &&
-          base::StringPiece(begin + type_val, begin + type_end));
+            base::LowerCaseEqualsASCII(
-    }
+                base::StringPiece(begin + type_val, begin + type_end),
-    if ((!eq && *had_charset) || type_has_charset) {
+                mime_type->data());
-      *had_charset = true;
+  if (!eq) {
-      *charset = base::ToLowerASCII(
+    *mime_type = base::ToLowerASCII(
-          base::StringPiece(begin + charset_val, begin + charset_end));
+        base::StringPiece(begin + type_val, begin + type_end));
-    }
+  }
+  if ((!eq && *had_charset) || type_has_charset) {
+    *had_charset = true;
+    *charset = base::ToLowerASCII(
+        base::StringPiece(begin + charset_val, begin + charset_end));
  }
 }

--- a/net/http/http_util_unittest.cc
+++ b/net/http/http_util_unittest.cc
@@ -740,10 +740,11 @@ TEST(HttpUtilTest, ParseContentType) {
      true,
      ""
    },
+    // Parameter name is "charset ", not "charset".  See https://crbug.com/772834.
    { "text/html; charset =utf-8",
      "text/html",
-      "utf-8",
+      "",
-      true,
+      false,
      ""
    },
    { "text/html; charset= utf-8",
@@ -764,23 +765,27 @@ TEST(HttpUtilTest, ParseContentType) {
      false,
      "\"WebKit-ada-df-dsf-adsfadsfs\""
    },
+    // Parameter name is "boundary ", not "boundary".
+    // See https://crbug.com/772834.
    { "text/html; boundary =\"WebKit-ada-df-dsf-adsfadsfs\"",
      "text/html",
      "",
      false,
-      "\"WebKit-ada-df-dsf-adsfadsfs\""
+      ""
    },
+    // Parameter value includes leading space.  See https://crbug.com/772834.
    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"",
      "text/html",
      "",
      false,
-      "\"WebKit-ada-df-dsf-adsfadsfs\""
+      " \"WebKit-ada-df-dsf-adsfadsfs\""
    },
+    // Parameter value includes leading space.  See https://crbug.com/772834.
    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"   ",
      "text/html",
      "",
      false,
-      "\"WebKit-ada-df-dsf-adsfadsfs\""
+      " \"WebKit-ada-df-dsf-adsfadsfs\""
    },
    { "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs  \"",
      "text/html",