Commit 4858757b authored by Matt Menke's avatar Matt Menke Committed by Commit Bot

Remove UnescapeURLComponent() overload that takes a base::string16.

The method has some safe-for-display safety checks that assume the input
is UTF-8 / output is UTF-8.  This change makes it at least a little
harder to avoid those checks, and makes output no longer vary based on
whether passing in a std::string or a string16 (By removing the latter
option entirely).

Bug: 831321
Change-Id: Ib39a2cccd71861213341e92932525e8ecafc60cd
Reviewed-on: https://chromium-review.googlesource.com/1004855Reviewed-by: default avatarMatt Giuca <mgiuca@chromium.org>
Reviewed-by: default avatarJustin Donnelly <jdonnelly@chromium.org>
Commit-Queue: Matt Menke <mmenke@chromium.org>
Cr-Commit-Position: refs/heads/master@{#550720}
parent f3e10319
......@@ -190,10 +190,13 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
// the final filtering we need whitespace separated substrings possibly
// containing escaped characters.
base::string16 lower_raw_string(base::i18n::ToLower(search_string));
base::string16 lower_unescaped_string = net::UnescapeURLComponent(
lower_raw_string,
net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
// Have to convert to UTF-8 and back, because UnescapeURLComponent doesn't
// support unescaping UTF-8 characters and converting them to UTF-16.
base::string16 lower_unescaped_string =
base::UTF8ToUTF16(net::UnescapeURLComponent(
base::UTF16ToUTF8(lower_raw_string),
net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS));
// Extract individual 'words' (as opposed to 'terms'; see comment in
// HistoryIdsToScoredMatches()) from the search string. When the user types
......
......@@ -101,18 +101,15 @@ const char kUrlUnescape[128] = {
// Attempts to unescape the sequence at |index| within |escaped_text|. If
// successful, sets |value| to the unescaped value. Returns whether
// unescaping succeeded.
template <typename STR>
bool UnescapeUnsignedCharAtIndex(STR escaped_text,
bool UnescapeUnsignedCharAtIndex(base::StringPiece escaped_text,
size_t index,
unsigned char* value) {
if ((index + 2) >= escaped_text.size())
return false;
if (escaped_text[index] != '%')
return false;
const typename STR::value_type most_sig_digit(
static_cast<typename STR::value_type>(escaped_text[index + 1]));
const typename STR::value_type least_sig_digit(
static_cast<typename STR::value_type>(escaped_text[index + 2]));
char most_sig_digit(escaped_text[index + 1]);
char least_sig_digit(escaped_text[index + 2]);
if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {
*value = base::HexDigitToInt(most_sig_digit) * 16 +
base::HexDigitToInt(least_sig_digit);
......@@ -123,8 +120,7 @@ bool UnescapeUnsignedCharAtIndex(STR escaped_text,
// Returns true if there is an Arabic Language Mark at |index|. |first_byte|
// is the byte at |index|.
template <typename STR>
bool HasArabicLanguageMarkAtIndex(STR escaped_text,
bool HasArabicLanguageMarkAtIndex(base::StringPiece escaped_text,
unsigned char first_byte,
size_t index) {
if (first_byte != 0xD8)
......@@ -137,8 +133,7 @@ bool HasArabicLanguageMarkAtIndex(STR escaped_text,
// Returns true if there is a BiDi control char at |index|. |first_byte| is the
// byte at |index|.
template <typename STR>
bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,
bool HasThreeByteBidiControlCharAtIndex(base::StringPiece escaped_text,
unsigned char first_byte,
size_t index) {
if (first_byte != 0xE2)
......@@ -161,8 +156,7 @@ bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,
// Returns true if there is a four-byte banned char at |index|. |first_byte| is
// the byte at |index|.
template <typename STR>
bool HasFourByteBannedCharAtIndex(STR escaped_text,
bool HasFourByteBannedCharAtIndex(base::StringPiece escaped_text,
unsigned char first_byte,
size_t index) {
// The following characters are blacklisted for spoofability concerns.
......@@ -196,9 +190,8 @@ bool HasFourByteBannedCharAtIndex(STR escaped_text,
// the alterations done to the string that are not one-character-to-one-
// character. The resulting |adjustments| will always be sorted by increasing
// offset.
template <typename STR>
STR UnescapeURLWithAdjustmentsImpl(
base::BasicStringPiece<STR> escaped_text,
std::string UnescapeURLWithAdjustmentsImpl(
base::StringPiece escaped_text,
UnescapeRule::Type rules,
base::OffsetAdjuster::Adjustments* adjustments) {
if (adjustments)
......@@ -210,7 +203,7 @@ STR UnescapeURLWithAdjustmentsImpl(
// The output of the unescaping is always smaller than the input, so we can
// reserve the input size to make sure we have enough buffer and don't have
// to allocate in the loop below.
STR result;
std::string result;
result.reserve(escaped_text.length());
// Locations of adjusted text.
......@@ -436,11 +429,6 @@ std::string UnescapeURLComponent(base::StringPiece escaped_text,
return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
}
base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,
UnescapeRule::Type rules) {
return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
}
base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text,
UnescapeRule::Type rules) {
return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);
......
......@@ -118,15 +118,13 @@ class UnescapeRule {
// a hex digit, and converting to the character with the numerical value of
// those digits. Thus "i%20=%203%3b" unescapes to "i = 3;".
//
// Watch out: this doesn't necessarily result in the correct final result,
// because the encoding may be unknown. For example, the input might be ASCII,
// which, after unescaping, is supposed to be interpreted as UTF-8, and then
// converted into full UTF-16 chars. This function won't tell you if any
// conversions need to take place, it only unescapes.
// This method does not ensure that the output is a valid string using any
// character encoding. However, unless SPOOFING_AND_CONTROL_CHARS is set, it
// does leave escaped certain byte sequences that would be dangerous to display
// to the user, because if interpreted as UTF-8, they could be used to mislead
// the user.
NET_EXPORT std::string UnescapeURLComponent(base::StringPiece escaped_text,
UnescapeRule::Type rules);
NET_EXPORT base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,
UnescapeRule::Type rules);
// Unescapes the given substring as a URL, and then tries to interpret the
// result as being encoded as UTF-8. If the result is convertable into UTF-8, it
......
This diff is collapsed.
......@@ -18,18 +18,5 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
net::UnescapeURLComponent(path, static_cast<net::UnescapeRule::Type>(i));
}
// When non-empty, align |data| to sizeof(char16).
if ((size > 0) && ((size % 2) == 1)) {
data++;
size--;
}
// Test for StringPiece16.
base::StringPiece16 path16(reinterpret_cast<const base::char16*>(data),
size / 2);
for (int i = 0; i <= kMaxUnescapeRule; i++) {
net::UnescapeURLComponent(path16, static_cast<net::UnescapeRule::Type>(i));
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment