Commit 9dade7b1 authored by Matt Menke's avatar Matt Menke Committed by Commit Bot

Add MIME sniffer overloads that take base::StringPieces, plumbing

StringPieces all the way through the MIME sniffing code.

This CL only updates net/ consumers to use the overloads, which are all
test-only code. I'll update consumers and remove the old methods in
another CL.

Bug: 1123179
Change-Id: I04328913fa1c7bfd9049fa7aaae6a8a538cbe8e3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2382896
Commit-Queue: Matt Menke <mmenke@chromium.org>
Reviewed-by: default avatarEric Roman <eroman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#803339}
parent 4b059f06
...@@ -283,65 +283,53 @@ static const MagicNumber kSniffableTags[] = { ...@@ -283,65 +283,53 @@ static const MagicNumber kSniffableTags[] = {
// Compare content header to a magic number where magic_entry can contain '.' // Compare content header to a magic number where magic_entry can contain '.'
// for single character of anything, allowing some bytes to be skipped. // for single character of anything, allowing some bytes to be skipped.
static bool MagicCmp(const char* magic_entry, const char* content, size_t len) { static bool MagicCmp(base::StringPiece content, base::StringPiece magic_entry) {
while (len) { DCHECK_GE(content.length(), magic_entry.length());
if ((*magic_entry != '.') && (*magic_entry != *content))
for (size_t i = 0; i < magic_entry.length(); ++i) {
if (magic_entry[i] != '.' && magic_entry[i] != content[i])
return false; return false;
++magic_entry;
++content;
--len;
} }
return true; return true;
} }
// Like MagicCmp() except that it ANDs each byte with a mask before // Like MagicCmp() except that it ANDs each byte with a mask before
// the comparison, because there are some bits we don't care about. // the comparison, because there are some bits we don't care about.
static bool MagicMaskCmp(const char* magic_entry, static bool MagicMaskCmp(base::StringPiece content,
const char* content, base::StringPiece magic_entry,
size_t len, base::StringPiece magic_mask) {
const char* mask) { DCHECK_GE(content.length(), magic_entry.length());
while (len) {
if ((*magic_entry != '.') && (*magic_entry != (*mask & *content))) for (size_t i = 0; i < magic_entry.length(); ++i) {
if (magic_entry[i] != '.' && magic_entry[i] != (magic_mask[i] & content[i]))
return false; return false;
++magic_entry;
++content;
++mask;
--len;
} }
return true; return true;
} }
static bool MatchMagicNumber(const char* content, static bool MatchMagicNumber(base::StringPiece content,
size_t size,
const MagicNumber& magic_entry, const MagicNumber& magic_entry,
std::string* result) { std::string* result) {
const size_t len = magic_entry.magic_len; const size_t magic_len = magic_entry.magic_len;
// Keep kBytesRequiredForMagic honest. // Keep kBytesRequiredForMagic honest.
DCHECK_LE(len, kBytesRequiredForMagic); DCHECK_LE(magic_len, kBytesRequiredForMagic);
// To compare with magic strings, we need to compute strlen(content), but
// content might not actually have a null terminator. In that case, we
// pretend the length is content_size.
const char* end = static_cast<const char*>(memchr(content, '\0', size));
const size_t content_strlen =
(end != nullptr) ? static_cast<size_t>(end - content) : size;
bool match = false; bool match = false;
if (magic_entry.is_string) { base::StringPiece magic_string(magic_entry.magic, magic_len);
if (content_strlen >= len) { if (content.length() >= magic_len) {
if (magic_entry.is_string) {
// Consistency check - string entries should have no embedded nulls.
DCHECK_EQ(strlen(magic_entry.magic), magic_len);
// Do a case-insensitive prefix comparison. // Do a case-insensitive prefix comparison.
DCHECK_EQ(strlen(magic_entry.magic), len); match = base::StartsWith(content, magic_string,
match = base::EqualsCaseInsensitiveASCII(magic_entry.magic, base::CompareCase::INSENSITIVE_ASCII);
base::StringPiece(content, len)); } else if (!magic_entry.mask) {
} match = MagicCmp(content, magic_string);
} else { } else {
if (size >= len) { base::StringPiece magic_mask(magic_entry.mask, magic_len);
if (!magic_entry.mask) { match = MagicMaskCmp(content, magic_string, magic_mask);
match = MagicCmp(magic_entry.magic, content, len);
} else {
match = MagicMaskCmp(magic_entry.magic, content, len, magic_entry.mask);
}
} }
} }
...@@ -352,78 +340,69 @@ static bool MatchMagicNumber(const char* content, ...@@ -352,78 +340,69 @@ static bool MatchMagicNumber(const char* content,
return false; return false;
} }
static bool CheckForMagicNumbers(const char* content, static bool CheckForMagicNumbers(base::StringPiece content,
size_t size,
base::span<const MagicNumber> magic_numbers, base::span<const MagicNumber> magic_numbers,
std::string* result) { std::string* result) {
for (const MagicNumber& magic : magic_numbers) { for (const MagicNumber& magic : magic_numbers) {
if (MatchMagicNumber(content, size, magic, result)) if (MatchMagicNumber(content, magic, result))
return true; return true;
} }
return false; return false;
} }
// Truncates |size| to |max_size| and returns true if |size| is at least // Truncates |string_piece| to length |max_size| and returns true if
// |max_size|. // |string_piece| is now exactly |max_size|.
static bool TruncateSize(const size_t max_size, size_t* size) { static bool TruncateStringPiece(const size_t max_size,
base::StringPiece* string_piece) {
// Keep kMaxBytesToSniff honest. // Keep kMaxBytesToSniff honest.
DCHECK_LE(static_cast<int>(max_size), kMaxBytesToSniff); DCHECK_LE(static_cast<int>(max_size), kMaxBytesToSniff);
if (*size >= max_size) { *string_piece = string_piece->substr(0, max_size);
*size = max_size; return string_piece->length() == max_size;
return true;
}
return false;
} }
// Returns true and sets result if the content appears to be HTML. // Returns true and sets result if the content appears to be HTML.
// Clears have_enough_content if more data could possibly change the result. // Clears have_enough_content if more data could possibly change the result.
static bool SniffForHTML(const char* content, static bool SniffForHTML(base::StringPiece content,
size_t size,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
// For HTML, we are willing to consider up to 512 bytes. This may be overly // For HTML, we are willing to consider up to 512 bytes. This may be overly
// conservative as IE only considers 256. // conservative as IE only considers 256.
*have_enough_content &= TruncateSize(512, &size); *have_enough_content &= TruncateStringPiece(512, &content);
// We adopt a strategy similar to that used by Mozilla to sniff HTML tags, // We adopt a strategy similar to that used by Mozilla to sniff HTML tags,
// but with some modifications to better match the HTML5 spec. // but with some modifications to better match the HTML5 spec.
const char* const end = content + size; base::StringPiece trimmed =
const char* pos; base::TrimWhitespaceASCII(content, base::TRIM_LEADING);
for (pos = content; pos < end; ++pos) {
if (!base::IsAsciiWhitespace(*pos)) // |trimmed| now starts at first non-whitespace character (or is empty).
break; return CheckForMagicNumbers(trimmed, kSniffableTags, result);
}
// |pos| now points to first non-whitespace character (or at end).
return CheckForMagicNumbers(pos, end - pos, kSniffableTags, result);
} }
// Returns true and sets result if the content matches any of kMagicNumbers. // Returns true and sets result if the content matches any of kMagicNumbers.
// Clears have_enough_content if more data could possibly change the result. // Clears have_enough_content if more data could possibly change the result.
static bool SniffForMagicNumbers(const char* content, static bool SniffForMagicNumbers(base::StringPiece content,
size_t size,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
*have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); *have_enough_content &= TruncateStringPiece(kBytesRequiredForMagic, &content);
// Check our big table of Magic Numbers // Check our big table of Magic Numbers
return CheckForMagicNumbers(content, size, kMagicNumbers, result); return CheckForMagicNumbers(content, kMagicNumbers, result);
} }
// Returns true and sets result if the content matches any of // Returns true and sets result if the content matches any of
// kOfficeMagicNumbers, and the URL has the proper extension. // kOfficeMagicNumbers, and the URL has the proper extension.
// Clears |have_enough_content| if more data could possibly change the result. // Clears |have_enough_content| if more data could possibly change the result.
static bool SniffForOfficeDocs(const char* content, static bool SniffForOfficeDocs(base::StringPiece content,
size_t size,
const GURL& url, const GURL& url,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
*have_enough_content &= TruncateSize(kBytesRequiredForOfficeMagic, &size); *have_enough_content &=
TruncateStringPiece(kBytesRequiredForOfficeMagic, &content);
// Check our table of magic numbers for Office file types. // Check our table of magic numbers for Office file types.
std::string office_version; std::string office_version;
if (!CheckForMagicNumbers(content, size, kOfficeMagicNumbers, if (!CheckForMagicNumbers(content, kOfficeMagicNumbers, &office_version))
&office_version))
return false; return false;
OfficeDocType type = DOC_TYPE_NONE; OfficeDocType type = DOC_TYPE_NONE;
...@@ -513,18 +492,16 @@ static bool IsOfficeType(const std::string& type_hint) { ...@@ -513,18 +492,16 @@ static bool IsOfficeType(const std::string& type_hint) {
// //
// Returns false if additional data is required to determine the file type, or // Returns false if additional data is required to determine the file type, or
// true if there is enough data to make a decision. // true if there is enough data to make a decision.
static bool SniffForInvalidOfficeDocs(const char* content, static bool SniffForInvalidOfficeDocs(base::StringPiece content,
size_t size,
const GURL& url, const GURL& url,
std::string* result) { std::string* result) {
if (!TruncateSize(kBytesRequiredForOfficeMagic, &size)) if (!TruncateStringPiece(kBytesRequiredForOfficeMagic, &content))
return false; return false;
// Check our table of magic numbers for Office file types. If it does not // Check our table of magic numbers for Office file types. If it does not
// match one, the MIME type was invalid. Set it instead to a safe value. // match one, the MIME type was invalid. Set it instead to a safe value.
std::string office_version; std::string office_version;
if (!CheckForMagicNumbers(content, size, kOfficeMagicNumbers, if (!CheckForMagicNumbers(content, kOfficeMagicNumbers, &office_version)) {
&office_version)) {
*result = "application/octet-stream"; *result = "application/octet-stream";
} }
...@@ -547,29 +524,27 @@ static const MagicNumber kMagicXML[] = { ...@@ -547,29 +524,27 @@ static const MagicNumber kMagicXML[] = {
// while HTML5 has a different recommendation -- what should we do? // while HTML5 has a different recommendation -- what should we do?
// TODO(evanm): this is incorrect for documents whose encoding isn't a superset // TODO(evanm): this is incorrect for documents whose encoding isn't a superset
// of ASCII -- do we care? // of ASCII -- do we care?
static bool SniffXML(const char* content, static bool SniffXML(base::StringPiece content,
size_t size,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
// We allow at most 300 bytes of content before we expect the opening tag. // We allow at most 300 bytes of content before we expect the opening tag.
*have_enough_content &= TruncateSize(300, &size); *have_enough_content &= TruncateStringPiece(300, &content);
const char* pos = content;
const char* const end = content + size;
// This loop iterates through tag-looking offsets in the file. // This loop iterates through tag-looking offsets in the file.
// We want to skip XML processing instructions (of the form "<?xml ...") // We want to skip XML processing instructions (of the form "<?xml ...")
// and stop at the first "plain" tag, then make a decision on the mime-type // and stop at the first "plain" tag, then make a decision on the mime-type
// based on the name (or possibly attributes) of that tag. // based on the name (or possibly attributes) of that tag.
const int kMaxTagIterations = 5; const int kMaxTagIterations = 5;
for (int i = 0; i < kMaxTagIterations && pos < end; ++i) { size_t pos = 0;
pos = reinterpret_cast<const char*>(memchr(pos, '<', end - pos)); for (size_t i = 0; i < kMaxTagIterations && pos < content.length(); ++i) {
if (!pos) pos = content.find('<', pos);
if (pos == base::StringPiece::npos)
return false; return false;
static constexpr base::StringPiece kXmlPrefix("<?xml"); static constexpr base::StringPiece kXmlPrefix("<?xml");
static constexpr base::StringPiece kDocTypePrefix("<!DOCTYPE"); static constexpr base::StringPiece kDocTypePrefix("<!DOCTYPE");
base::StringPiece current(pos, end - pos); base::StringPiece current = content.substr(pos);
if (base::EqualsCaseInsensitiveASCII(current.substr(0, kXmlPrefix.size()), if (base::EqualsCaseInsensitiveASCII(current.substr(0, kXmlPrefix.size()),
kXmlPrefix)) { kXmlPrefix)) {
// Skip XML declarations. // Skip XML declarations.
...@@ -584,7 +559,7 @@ static bool SniffXML(const char* content, ...@@ -584,7 +559,7 @@ static bool SniffXML(const char* content,
continue; continue;
} }
if (CheckForMagicNumbers(pos, end - pos, kMagicXML, result)) if (CheckForMagicNumbers(current, kMagicXML, result))
return true; return true;
// TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult // TODO(evanm): handle RSS 1.0, which is an RDF format and more difficult
...@@ -598,7 +573,7 @@ static bool SniffXML(const char* content, ...@@ -598,7 +573,7 @@ static bool SniffXML(const char* content,
// We iterated too far without finding a start tag. // We iterated too far without finding a start tag.
// If we have more content to look at, we aren't going to change our mind by // If we have more content to look at, we aren't going to change our mind by
// seeing more bytes from the network. // seeing more bytes from the network.
return pos < end; return pos < content.length();
} }
// Byte order marks // Byte order marks
...@@ -611,8 +586,7 @@ static const MagicNumber kByteOrderMark[] = { ...@@ -611,8 +586,7 @@ static const MagicNumber kByteOrderMark[] = {
// Returns true and sets result to "application/octet-stream" if the content // Returns true and sets result to "application/octet-stream" if the content
// appears to be binary data. Otherwise, returns false and sets "text/plain". // appears to be binary data. Otherwise, returns false and sets "text/plain".
// Clears have_enough_content if more data could possibly change the result. // Clears have_enough_content if more data could possibly change the result.
static bool SniffBinary(const char* content, static bool SniffBinary(base::StringPiece content,
size_t size,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
// There is no concensus about exactly how to sniff for binary content. // There is no concensus about exactly how to sniff for binary content.
...@@ -622,18 +596,18 @@ static bool SniffBinary(const char* content, ...@@ -622,18 +596,18 @@ static bool SniffBinary(const char* content,
// because it is small enough to comfortably fit into a single packet (after // because it is small enough to comfortably fit into a single packet (after
// allowing for headers) and yet large enough to account for binary formats // allowing for headers) and yet large enough to account for binary formats
// that have a significant amount of ASCII at the beginning (crbug.com/15314). // that have a significant amount of ASCII at the beginning (crbug.com/15314).
const bool is_truncated = TruncateSize(kMaxBytesToSniff, &size); const bool is_truncated = TruncateStringPiece(kMaxBytesToSniff, &content);
// First, we look for a BOM. // First, we look for a BOM.
std::string unused; std::string unused;
if (CheckForMagicNumbers(content, size, kByteOrderMark, &unused)) { if (CheckForMagicNumbers(content, kByteOrderMark, &unused)) {
// If there is BOM, we think the buffer is not binary. // If there is BOM, we think the buffer is not binary.
result->assign("text/plain"); result->assign("text/plain");
return false; return false;
} }
// Next we look to see if any of the bytes "look binary." // Next we look to see if any of the bytes "look binary."
if (LooksLikeBinary(content, size)) { if (LooksLikeBinary(content)) {
result->assign("application/octet-stream"); result->assign("application/octet-stream");
return true; return true;
} }
...@@ -646,7 +620,7 @@ static bool SniffBinary(const char* content, ...@@ -646,7 +620,7 @@ static bool SniffBinary(const char* content,
return false; return false;
} }
static bool IsUnknownMimeType(const std::string& mime_type) { static bool IsUnknownMimeType(base::StringPiece mime_type) {
// TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here. // TODO(tc): Maybe reuse some code in net/http/http_response_headers.* here.
// If we do, please be careful not to alter the semantics at all. // If we do, please be careful not to alter the semantics at all.
static const char* const kUnknownMimeTypes[] = { static const char* const kUnknownMimeTypes[] = {
...@@ -663,7 +637,7 @@ static bool IsUnknownMimeType(const std::string& mime_type) { ...@@ -663,7 +637,7 @@ static bool IsUnknownMimeType(const std::string& mime_type) {
if (mime_type == unknown_mime_type) if (mime_type == unknown_mime_type)
return true; return true;
} }
if (mime_type.find('/') == std::string::npos) { if (mime_type.find('/') == base::StringPiece::npos) {
// Firefox rejects a mime type if it does not contain a slash // Firefox rejects a mime type if it does not contain a slash
return true; return true;
} }
...@@ -673,8 +647,7 @@ static bool IsUnknownMimeType(const std::string& mime_type) { ...@@ -673,8 +647,7 @@ static bool IsUnknownMimeType(const std::string& mime_type) {
// Returns true and sets result if the content appears to be a crx (Chrome // Returns true and sets result if the content appears to be a crx (Chrome
// extension) file. // extension) file.
// Clears have_enough_content if more data could possibly change the result. // Clears have_enough_content if more data could possibly change the result.
static bool SniffCRX(const char* content, static bool SniffCRX(base::StringPiece content,
size_t size,
const GURL& url, const GURL& url,
bool* have_enough_content, bool* have_enough_content,
std::string* result) { std::string* result) {
...@@ -690,11 +663,11 @@ static bool SniffCRX(const char* content, ...@@ -690,11 +663,11 @@ static bool SniffCRX(const char* content,
if (!base::EndsWith(url.path_piece(), ".crx", base::CompareCase::SENSITIVE)) if (!base::EndsWith(url.path_piece(), ".crx", base::CompareCase::SENSITIVE))
return false; return false;
*have_enough_content &= TruncateSize(kBytesRequiredForMagic, &size); *have_enough_content &= TruncateStringPiece(kBytesRequiredForMagic, &content);
return CheckForMagicNumbers(content, size, kCRXMagicNumbers, result); return CheckForMagicNumbers(content, kCRXMagicNumbers, result);
} }
bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { bool ShouldSniffMimeType(const GURL& url, base::StringPiece mime_type) {
bool sniffable_scheme = url.is_empty() || bool sniffable_scheme = url.is_empty() ||
url.SchemeIsHTTPOrHTTPS() || url.SchemeIsHTTPOrHTTPS() ||
#if defined(OS_ANDROID) #if defined(OS_ANDROID)
...@@ -744,14 +717,13 @@ bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) { ...@@ -744,14 +717,13 @@ bool ShouldSniffMimeType(const GURL& url, const std::string& mime_type) {
return false; return false;
} }
bool SniffMimeType(const char* content, bool SniffMimeType(base::StringPiece content,
size_t content_size,
const GURL& url, const GURL& url,
const std::string& type_hint, const std::string& type_hint,
ForceSniffFileUrlsForHtml force_sniff_file_url_for_html, ForceSniffFileUrlsForHtml force_sniff_file_url_for_html,
std::string* result) { std::string* result) {
DCHECK_LT(content_size, 1000000U); // sanity check // Sanity check.
DCHECK(content); DCHECK_LT(content.length(), 1000000U);
DCHECK(result); DCHECK(result);
// By default, we assume we have enough content. // By default, we assume we have enough content.
...@@ -766,7 +738,7 @@ bool SniffMimeType(const char* content, ...@@ -766,7 +738,7 @@ bool SniffMimeType(const char* content,
// is a valid Office file. Because this is the only reason we sniff files // is a valid Office file. Because this is the only reason we sniff files
// with a Microsoft Office MIME type, we can return early. // with a Microsoft Office MIME type, we can return early.
if (IsOfficeType(type_hint)) if (IsOfficeType(type_hint))
return SniffForInvalidOfficeDocs(content, content_size, url, result); return SniffForInvalidOfficeDocs(content, url, result);
// Cache information about the type_hint // Cache information about the type_hint
bool hint_is_unknown_mime_type = IsUnknownMimeType(type_hint); bool hint_is_unknown_mime_type = IsUnknownMimeType(type_hint);
...@@ -779,7 +751,7 @@ bool SniffMimeType(const char* content, ...@@ -779,7 +751,7 @@ bool SniffMimeType(const char* content,
// We're only willing to sniff HTML if the server has not supplied a mime // We're only willing to sniff HTML if the server has not supplied a mime
// type, or if the type it did supply indicates that it doesn't know what // type, or if the type it did supply indicates that it doesn't know what
// the type should be. // the type should be.
if (SniffForHTML(content, content_size, &have_enough_content, result)) if (SniffForHTML(content, &have_enough_content, result))
return true; // We succeeded in sniffing HTML. No more content needed. return true; // We succeeded in sniffing HTML. No more content needed.
} }
...@@ -791,7 +763,7 @@ bool SniffMimeType(const char* content, ...@@ -791,7 +763,7 @@ bool SniffMimeType(const char* content,
// could be indicative of a mis-configuration that we shield the user from. // could be indicative of a mis-configuration that we shield the user from.
const bool hint_is_text_plain = (type_hint == "text/plain"); const bool hint_is_text_plain = (type_hint == "text/plain");
if (hint_is_unknown_mime_type || hint_is_text_plain) { if (hint_is_unknown_mime_type || hint_is_text_plain) {
if (!SniffBinary(content, content_size, &have_enough_content, result)) { if (!SniffBinary(content, &have_enough_content, result)) {
// If the server said the content was text/plain and it doesn't appear // If the server said the content was text/plain and it doesn't appear
// to be binary, then we trust it. // to be binary, then we trust it.
if (hint_is_text_plain) { if (hint_is_text_plain) {
...@@ -805,22 +777,22 @@ bool SniffMimeType(const char* content, ...@@ -805,22 +777,22 @@ bool SniffMimeType(const char* content,
// We're not interested in sniffing these types for images and the like. // We're not interested in sniffing these types for images and the like.
// Instead, we're looking explicitly for a feed. If we don't find one // Instead, we're looking explicitly for a feed. If we don't find one
// we're done and return early. // we're done and return early.
if (SniffXML(content, content_size, &have_enough_content, result)) if (SniffXML(content, &have_enough_content, result))
return true; return true;
return have_enough_content; return have_enough_content;
} }
// CRX files (Chrome extensions) have a special sniffing algorithm. It is // CRX files (Chrome extensions) have a special sniffing algorithm. It is
// tighter than the others because we don't have to match legacy behavior. // tighter than the others because we don't have to match legacy behavior.
if (SniffCRX(content, content_size, url, &have_enough_content, result)) if (SniffCRX(content, url, &have_enough_content, result))
return true; return true;
// Check the file extension and magic numbers to see if this is an Office // Check the file extension and magic numbers to see if this is an Office
// document. This needs to be checked before the general magic numbers // document. This needs to be checked before the general magic numbers
// because zip files and Office documents (OOXML) have the same magic number. // because zip files and Office documents (OOXML) have the same magic number.
if (SniffForOfficeDocs(content, content_size, url, if (SniffForOfficeDocs(content, url, &have_enough_content, result)) {
&have_enough_content, result))
return true; // We've matched a magic number. No more content needed. return true; // We've matched a magic number. No more content needed.
}
// We're not interested in sniffing for magic numbers when the type_hint // We're not interested in sniffing for magic numbers when the type_hint
// is application/octet-stream. Time to bail out. // is application/octet-stream. Time to bail out.
...@@ -829,24 +801,38 @@ bool SniffMimeType(const char* content, ...@@ -829,24 +801,38 @@ bool SniffMimeType(const char* content,
// Now we look in our large table of magic numbers to see if we can find // Now we look in our large table of magic numbers to see if we can find
// anything that matches the content. // anything that matches the content.
if (SniffForMagicNumbers(content, content_size, if (SniffForMagicNumbers(content, &have_enough_content, result))
&have_enough_content, result))
return true; // We've matched a magic number. No more content needed. return true; // We've matched a magic number. No more content needed.
return have_enough_content; return have_enough_content;
} }
bool SniffMimeTypeFromLocalData(const char* content, bool SniffMimeType(const char* content,
size_t size, size_t content_size,
std::string* result) { const GURL& url,
const std::string& type_hint,
ForceSniffFileUrlsForHtml force_sniff_file_url_for_html,
std::string* result) {
return SniffMimeType(base::StringPiece(content, content_size), url, type_hint,
force_sniff_file_url_for_html, result);
}
NET_EXPORT bool SniffMimeTypeFromLocalData(base::StringPiece content,
std::string* result) {
// First check the extra table. // First check the extra table.
if (CheckForMagicNumbers(content, size, kExtraMagicNumbers, result)) if (CheckForMagicNumbers(content, kExtraMagicNumbers, result))
return true; return true;
// Finally check the original table. // Finally check the original table.
return CheckForMagicNumbers(content, size, kMagicNumbers, result); return CheckForMagicNumbers(content, kMagicNumbers, result);
}
bool SniffMimeTypeFromLocalData(const char* content,
size_t size,
std::string* result) {
return SniffMimeTypeFromLocalData(base::StringPiece(content, size), result);
} }
bool LooksLikeBinary(const char* content, size_t size) { bool LooksLikeBinary(base::StringPiece content) {
// The definition of "binary bytes" is from the spec at // The definition of "binary bytes" is from the spec at
// https://mimesniff.spec.whatwg.org/#binary-data-byte // https://mimesniff.spec.whatwg.org/#binary-data-byte
// //
...@@ -856,7 +842,7 @@ bool LooksLikeBinary(const char* content, size_t size) { ...@@ -856,7 +842,7 @@ bool LooksLikeBinary(const char* content, size_t size) {
// represents byte 0x1F. // represents byte 0x1F.
const uint32_t kBinaryBits = const uint32_t kBinaryBits =
~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b'); ~(1u << '\t' | 1u << '\n' | 1u << '\r' | 1u << '\f' | 1u << '\x1b');
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < content.length(); ++i) {
uint8_t byte = static_cast<uint8_t>(content[i]); uint8_t byte = static_cast<uint8_t>(content[i]);
if (byte < 0x20 && (kBinaryBits & (1u << byte))) if (byte < 0x20 && (kBinaryBits & (1u << byte)))
return true; return true;
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <string> #include <string>
#include "base/strings/string_piece.h"
#include "net/base/net_export.h" #include "net/base/net_export.h"
class GURL; class GURL;
...@@ -35,13 +36,12 @@ enum class ForceSniffFileUrlsForHtml { ...@@ -35,13 +36,12 @@ enum class ForceSniffFileUrlsForHtml {
// |mime_type| is the current mime type, e.g. from the Content-Type header. // |mime_type| is the current mime type, e.g. from the Content-Type header.
// Returns true if the mime type should be sniffed. // Returns true if the mime type should be sniffed.
NET_EXPORT bool ShouldSniffMimeType(const GURL& url, NET_EXPORT bool ShouldSniffMimeType(const GURL& url,
const std::string& mime_type); base::StringPiece mime_type);
// Guess a mime type from the first few bytes of content an its URL. Always // Guess a mime type from the first few bytes of content an its URL. Always
// assigns |result| with its best guess of a mime type. // assigns |result| with its best guess of a mime type.
// //
// |content| is the buffer containing the bytes to sniff. // |content| contains the bytes to sniff.
// |content_size| is the number of bytes in the |content| buffer.
// |url| is the URL from which the content was obtained. // |url| is the URL from which the content was obtained.
// |type_hint| is the current mime type, e.g. from the Content-Type header. // |type_hint| is the current mime type, e.g. from the Content-Type header.
// |result| is the address at which to place the sniffed mime type. // |result| is the address at which to place the sniffed mime type.
...@@ -52,6 +52,16 @@ NET_EXPORT bool ShouldSniffMimeType(const GURL& url, ...@@ -52,6 +52,16 @@ NET_EXPORT bool ShouldSniffMimeType(const GURL& url,
// Returns true if |content| had enough data to guess the mime type. Otherwise, // Returns true if |content| had enough data to guess the mime type. Otherwise,
// |result| will be populated with a putative MIME type, but the method should // |result| will be populated with a putative MIME type, but the method should
// be called again with more of the content. // be called again with more of the content.
NET_EXPORT bool SniffMimeType(
base::StringPiece content,
const GURL& url,
const std::string& type_hint,
ForceSniffFileUrlsForHtml force_sniff_file_url_for_html,
std::string* result);
// Older deprecated version of the above function.
//
// TODO(https://crbug.com/1123179): Update callers and remove this method.
NET_EXPORT bool SniffMimeType( NET_EXPORT bool SniffMimeType(
const char* content, const char* content,
size_t content_size, size_t content_size,
...@@ -68,20 +78,22 @@ NET_EXPORT bool SniffMimeType( ...@@ -68,20 +78,22 @@ NET_EXPORT bool SniffMimeType(
// The caller should understand the security ramifications of trusting // The caller should understand the security ramifications of trusting
// uncontrolled data before accepting the results of this function. // uncontrolled data before accepting the results of this function.
// //
// @param content A buffer containing the bytes to sniff. // |content| contains the bytes to sniff.
// @param content_size The number of bytes in the |content| buffer. // |result| is address at which to place the sniffed mime type.
// @param result Address at which to place the sniffed mime type. // Returns true if a MIME type match was found.
// @return Returns true if a MIME type match was found. NET_EXPORT bool SniffMimeTypeFromLocalData(base::StringPiece content,
std::string* result);
// Older deprecated version of the above function.
//
// TODO(https://crbug.com/1123179): Update callers and remove this method.
NET_EXPORT bool SniffMimeTypeFromLocalData(const char* content, NET_EXPORT bool SniffMimeTypeFromLocalData(const char* content,
size_t content_size, size_t content_size,
std::string* result); std::string* result);
// Returns true if |content| contains bytes that are control codes that do // Returns true if |content| contains bytes that are control codes that do
// not usually appear in plain text. // not usually appear in plain text.
// @param content A buffer contains bytes that may be binary. NET_EXPORT_PRIVATE bool LooksLikeBinary(base::StringPiece content);
// @param size The number of bytes in the |content| buffer.
// @return Returns true if |content| looks like binary.
NET_EXPORT_PRIVATE bool LooksLikeBinary(const char* content, size_t size);
} // namespace net } // namespace net
......
...@@ -44,10 +44,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { ...@@ -44,10 +44,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
std::string input = data_provider.ConsumeRemainingBytesAsString(); std::string input = data_provider.ConsumeRemainingBytesAsString();
std::string result; std::string result;
net::SniffMimeType(input.data(), input.length(), GURL(url_string), net::SniffMimeType(input, GURL(url_string), mime_type_hint,
mime_type_hint, force_sniff_file_urls_for_html, &result); force_sniff_file_urls_for_html, &result);
net::SniffMimeTypeFromLocalData(input.data(), input.length(), &result); net::SniffMimeTypeFromLocalData(input, &result);
return 0; return 0;
} }
...@@ -72,7 +72,7 @@ const char kRepresentativePlainText[] = ...@@ -72,7 +72,7 @@ const char kRepresentativePlainText[] =
void RunLooksLikeBinary(const std::string& plaintext, size_t iterations) { void RunLooksLikeBinary(const std::string& plaintext, size_t iterations) {
bool looks_like_binary = false; bool looks_like_binary = false;
for (size_t i = 0; i < iterations; ++i) { for (size_t i = 0; i < iterations; ++i) {
if (LooksLikeBinary(plaintext.data(), plaintext.size())) if (LooksLikeBinary(plaintext))
looks_like_binary = true; looks_like_binary = true;
} }
CHECK(!looks_like_binary); CHECK(!looks_like_binary);
......
...@@ -24,11 +24,11 @@ std::string MakeConstantString(const char (&str)[N]) { ...@@ -24,11 +24,11 @@ std::string MakeConstantString(const char (&str)[N]) {
return std::string(str, N - 1); return std::string(str, N - 1);
} }
static std::string SniffMimeType(const std::string& content, static std::string SniffMimeType(base::StringPiece content,
const std::string& url, const std::string& url,
const std::string& mime_type_hint) { const std::string& mime_type_hint) {
std::string mime_type; std::string mime_type;
SniffMimeType(content.data(), content.size(), GURL(url), mime_type_hint, SniffMimeType(content, GURL(url), mime_type_hint,
ForceSniffFileUrlsForHtml::kDisabled, &mime_type); ForceSniffFileUrlsForHtml::kDisabled, &mime_type);
return mime_type; return mime_type;
} }
...@@ -74,14 +74,14 @@ TEST(MimeSnifferTest, BoundaryConditionsTest) { ...@@ -74,14 +74,14 @@ TEST(MimeSnifferTest, BoundaryConditionsTest) {
GURL url; GURL url;
SniffMimeType(buf, 0, url, type_hint, ForceSniffFileUrlsForHtml::kDisabled, SniffMimeType(base::StringPiece(), url, type_hint,
&mime_type); ForceSniffFileUrlsForHtml::kDisabled, &mime_type);
EXPECT_EQ("text/plain", mime_type); EXPECT_EQ("text/plain", mime_type);
SniffMimeType(buf, 1, url, type_hint, ForceSniffFileUrlsForHtml::kDisabled, SniffMimeType(base::StringPiece(buf, 1), url, type_hint,
&mime_type); ForceSniffFileUrlsForHtml::kDisabled, &mime_type);
EXPECT_EQ("text/plain", mime_type); EXPECT_EQ("text/plain", mime_type);
SniffMimeType(buf, 2, url, type_hint, ForceSniffFileUrlsForHtml::kDisabled, SniffMimeType(base::StringPiece(buf, 2), url, type_hint,
&mime_type); ForceSniffFileUrlsForHtml::kDisabled, &mime_type);
EXPECT_EQ("application/octet-stream", mime_type); EXPECT_EQ("application/octet-stream", mime_type);
} }
...@@ -256,11 +256,11 @@ TEST(MimeSnifferTest, SniffFilesAsHtml) { ...@@ -256,11 +256,11 @@ TEST(MimeSnifferTest, SniffFilesAsHtml) {
const GURL kUrl("file:///C/test.unusualextension"); const GURL kUrl("file:///C/test.unusualextension");
std::string mime_type; std::string mime_type;
SniffMimeType(kContent.c_str(), kContent.length(), kUrl, "" /* type_hint */, SniffMimeType(kContent, kUrl, "" /* type_hint */,
ForceSniffFileUrlsForHtml::kDisabled, &mime_type); ForceSniffFileUrlsForHtml::kDisabled, &mime_type);
EXPECT_EQ("text/plain", mime_type); EXPECT_EQ("text/plain", mime_type);
SniffMimeType(kContent.c_str(), kContent.length(), kUrl, "" /* type_hint */, SniffMimeType(kContent, kUrl, "" /* type_hint */,
ForceSniffFileUrlsForHtml::kEnabled, &mime_type); ForceSniffFileUrlsForHtml::kEnabled, &mime_type);
EXPECT_EQ("text/html", mime_type); EXPECT_EQ("text/html", mime_type);
} }
...@@ -371,7 +371,7 @@ TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) { ...@@ -371,7 +371,7 @@ TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
// content.size() >= 1024 so the sniff is unambiguous. // content.size() >= 1024 so the sniff is unambiguous.
std::string mime_type; std::string mime_type;
EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), "text/xml", EXPECT_TRUE(SniffMimeType(content, GURL(), "text/xml",
ForceSniffFileUrlsForHtml::kDisabled, &mime_type)); ForceSniffFileUrlsForHtml::kDisabled, &mime_type));
EXPECT_EQ("text/xml", mime_type); EXPECT_EQ("text/xml", mime_type);
} }
...@@ -387,9 +387,8 @@ TEST(MimeSnifferTest, LooksBinary) { ...@@ -387,9 +387,8 @@ TEST(MimeSnifferTest, LooksBinary) {
// content.size() >= 1024 so the sniff is unambiguous. // content.size() >= 1024 so the sniff is unambiguous.
std::string mime_type; std::string mime_type;
EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), EXPECT_TRUE(SniffMimeType(content, GURL(), "text/plain",
"text/plain", ForceSniffFileUrlsForHtml::kDisabled, ForceSniffFileUrlsForHtml::kDisabled, &mime_type));
&mime_type));
EXPECT_EQ("application/octet-stream", mime_type); EXPECT_EQ("application/octet-stream", mime_type);
} }
...@@ -450,27 +449,27 @@ TEST(MimeSnifferTest, OfficeTest) { ...@@ -450,27 +449,27 @@ TEST(MimeSnifferTest, OfficeTest) {
TEST(MimeSnifferTest, AudioVideoTest) { TEST(MimeSnifferTest, AudioVideoTest) {
std::string mime_type; std::string mime_type;
const char kOggTestData[] = "OggS\x00"; const char kOggTestData[] = "OggS\x00";
EXPECT_TRUE(SniffMimeTypeFromLocalData(kOggTestData, sizeof(kOggTestData) - 1, EXPECT_TRUE(SniffMimeTypeFromLocalData(
&mime_type)); base::StringPiece(kOggTestData, sizeof(kOggTestData) - 1), &mime_type));
EXPECT_EQ("audio/ogg", mime_type); EXPECT_EQ("audio/ogg", mime_type);
mime_type.clear(); mime_type.clear();
// Check ogg header requires the terminal '\0' to be sniffed. // Check ogg header requires the terminal '\0' to be sniffed.
EXPECT_FALSE(SniffMimeTypeFromLocalData( EXPECT_FALSE(SniffMimeTypeFromLocalData(
kOggTestData, sizeof(kOggTestData) - 2, &mime_type)); base::StringPiece(kOggTestData, sizeof(kOggTestData) - 2), &mime_type));
EXPECT_EQ("", mime_type); EXPECT_EQ("", mime_type);
mime_type.clear(); mime_type.clear();
const char kFlacTestData[] = const char kFlacTestData[] =
"fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00"; "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
EXPECT_TRUE(SniffMimeTypeFromLocalData( EXPECT_TRUE(SniffMimeTypeFromLocalData(
kFlacTestData, sizeof(kFlacTestData) - 1, &mime_type)); base::StringPiece(kFlacTestData, sizeof(kFlacTestData) - 1), &mime_type));
EXPECT_EQ("audio/x-flac", mime_type); EXPECT_EQ("audio/x-flac", mime_type);
mime_type.clear(); mime_type.clear();
const char kWMATestData[] = const char kWMATestData[] =
"\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c"; "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData, sizeof(kWMATestData) - 1, EXPECT_TRUE(SniffMimeTypeFromLocalData(
&mime_type)); base::StringPiece(kWMATestData, sizeof(kWMATestData) - 1), &mime_type));
EXPECT_EQ("video/x-ms-asf", mime_type); EXPECT_EQ("video/x-ms-asf", mime_type);
mime_type.clear(); mime_type.clear();
...@@ -478,22 +477,22 @@ TEST(MimeSnifferTest, AudioVideoTest) { ...@@ -478,22 +477,22 @@ TEST(MimeSnifferTest, AudioVideoTest) {
// format. // format.
const char kMP4TestData[] = const char kMP4TestData[] =
"\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00"; "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData, sizeof(kMP4TestData) - 1, EXPECT_TRUE(SniffMimeTypeFromLocalData(
&mime_type)); base::StringPiece(kMP4TestData, sizeof(kMP4TestData) - 1), &mime_type));
EXPECT_EQ("video/mp4", mime_type); EXPECT_EQ("video/mp4", mime_type);
mime_type.clear(); mime_type.clear();
const char kAACTestData[] = const char kAACTestData[] =
"\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1"; "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData, sizeof(kAACTestData) - 1, EXPECT_TRUE(SniffMimeTypeFromLocalData(
&mime_type)); base::StringPiece(kAACTestData, sizeof(kAACTestData) - 1), &mime_type));
EXPECT_EQ("audio/mpeg", mime_type); EXPECT_EQ("audio/mpeg", mime_type);
mime_type.clear(); mime_type.clear();
const char kAMRTestData[] = const char kAMRTestData[] =
"\x23\x21\x41\x4d\x52\x0a\x3c\x53\x0a\x7c\xe8\xb8\x41\xa5\x80\xca"; "\x23\x21\x41\x4d\x52\x0a\x3c\x53\x0a\x7c\xe8\xb8\x41\xa5\x80\xca";
EXPECT_TRUE(SniffMimeTypeFromLocalData(kAMRTestData, sizeof(kAMRTestData) - 1, EXPECT_TRUE(SniffMimeTypeFromLocalData(
&mime_type)); base::StringPiece(kAMRTestData, sizeof(kAMRTestData) - 1), &mime_type));
EXPECT_EQ("audio/amr", mime_type); EXPECT_EQ("audio/amr", mime_type);
mime_type.clear(); mime_type.clear();
} }
...@@ -502,19 +501,22 @@ TEST(MimeSnifferTest, ImageTest) { ...@@ -502,19 +501,22 @@ TEST(MimeSnifferTest, ImageTest) {
std::string mime_type; std::string mime_type;
const char kWebPSimpleFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8 "; const char kWebPSimpleFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8 ";
EXPECT_TRUE(SniffMimeTypeFromLocalData( EXPECT_TRUE(SniffMimeTypeFromLocalData(
kWebPSimpleFormat, sizeof(kWebPSimpleFormat) - 1, &mime_type)); base::StringPiece(kWebPSimpleFormat, sizeof(kWebPSimpleFormat) - 1),
&mime_type));
EXPECT_EQ("image/webp", mime_type); EXPECT_EQ("image/webp", mime_type);
mime_type.clear(); mime_type.clear();
const char kWebPLosslessFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8L"; const char kWebPLosslessFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8L";
EXPECT_TRUE(SniffMimeTypeFromLocalData( EXPECT_TRUE(SniffMimeTypeFromLocalData(
kWebPLosslessFormat, sizeof(kWebPLosslessFormat) - 1, &mime_type)); base::StringPiece(kWebPLosslessFormat, sizeof(kWebPLosslessFormat) - 1),
&mime_type));
EXPECT_EQ("image/webp", mime_type); EXPECT_EQ("image/webp", mime_type);
mime_type.clear(); mime_type.clear();
const char kWebPExtendedFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8X"; const char kWebPExtendedFormat[] = "RIFF\xee\x81\x00\x00WEBPVP8X";
EXPECT_TRUE(SniffMimeTypeFromLocalData( EXPECT_TRUE(SniffMimeTypeFromLocalData(
kWebPExtendedFormat, sizeof(kWebPExtendedFormat) - 1, &mime_type)); base::StringPiece(kWebPExtendedFormat, sizeof(kWebPExtendedFormat) - 1),
&mime_type));
EXPECT_EQ("image/webp", mime_type); EXPECT_EQ("image/webp", mime_type);
mime_type.clear(); mime_type.clear();
} }
...@@ -529,8 +531,8 @@ class MimeSnifferBinaryTest : public ::testing::TestWithParam<int> {}; ...@@ -529,8 +531,8 @@ class MimeSnifferBinaryTest : public ::testing::TestWithParam<int> {};
// 0x0B (VT), a byte in the range 0x0E to 0x1A (SO to SUB), or a byte in the // 0x0B (VT), a byte in the range 0x0E to 0x1A (SO to SUB), or a byte in the
// range 0x1C to 0x1F (FS to US). // range 0x1C to 0x1F (FS to US).
TEST_P(MimeSnifferBinaryTest, IsBinaryControlCode) { TEST_P(MimeSnifferBinaryTest, IsBinaryControlCode) {
char param = static_cast<char>(GetParam()); std::string param(1, static_cast<char>(GetParam()));
EXPECT_TRUE(LooksLikeBinary(&param, 1)); EXPECT_TRUE(LooksLikeBinary(param));
} }
// ::testing::Range(a, b) tests an open-ended range, ie. "b" is not included. // ::testing::Range(a, b) tests an open-ended range, ie. "b" is not included.
...@@ -553,8 +555,8 @@ INSTANTIATE_TEST_SUITE_P(MimeSnifferBinaryTestRange3, ...@@ -553,8 +555,8 @@ INSTANTIATE_TEST_SUITE_P(MimeSnifferBinaryTestRange3,
class MimeSnifferPlainTextTest : public ::testing::TestWithParam<int> {}; class MimeSnifferPlainTextTest : public ::testing::TestWithParam<int> {};
TEST_P(MimeSnifferPlainTextTest, NotBinaryControlCode) { TEST_P(MimeSnifferPlainTextTest, NotBinaryControlCode) {
char param = static_cast<char>(GetParam()); std::string param(1, static_cast<char>(GetParam()));
EXPECT_FALSE(LooksLikeBinary(&param, 1)); EXPECT_FALSE(LooksLikeBinary(param));
} }
INSTANTIATE_TEST_SUITE_P(MimeSnifferPlainTextTestPlainTextControlCodes, INSTANTIATE_TEST_SUITE_P(MimeSnifferPlainTextTestPlainTextControlCodes,
...@@ -569,8 +571,7 @@ class MimeSnifferControlCodesEdgeCaseTest ...@@ -569,8 +571,7 @@ class MimeSnifferControlCodesEdgeCaseTest
: public ::testing::TestWithParam<const char*> {}; : public ::testing::TestWithParam<const char*> {};
TEST_P(MimeSnifferControlCodesEdgeCaseTest, EdgeCase) { TEST_P(MimeSnifferControlCodesEdgeCaseTest, EdgeCase) {
const char* param = GetParam(); EXPECT_TRUE(LooksLikeBinary(GetParam()));
EXPECT_TRUE(LooksLikeBinary(param, strlen(param)));
} }
INSTANTIATE_TEST_SUITE_P(MimeSnifferControlCodesEdgeCaseTest, INSTANTIATE_TEST_SUITE_P(MimeSnifferControlCodesEdgeCaseTest,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment