Commit 92a354fc authored by asanka's avatar asanka Committed by Commit bot

Update ReplaceIllegalCharactersInPath to handle quirks in HFS+ and VFAT

This change:

- Re-introduces U+200C and U+200D as illegal characters since these are
  ignored on HFS+ and can interfere with filename sanitization. All code
  points in the Cf general category are now considered illegal in a
  filename.

- Leading and trailing WSpace and '.' characters are now considered
  illegal.

- Due to being confused for short names on VFAT filesystems, the tilde
  ('~') is now considered illegal.

- Prior to this change, only ASCII whitespace were trimmed from
  filenames on Mac OSX. All UTF-8 encoded WSpace characters are now
  handled on Mac OSX.

- Instead of trimming leading and trailing whitespace, they are now
  replaced by the replacement character. Trimming could cause a
  previously hidden extension or basename to be exposed.

BUG=444102
BUG=446538

Review URL: https://codereview.chromium.org/869823003

Cr-Commit-Position: refs/heads/master@{#314041}
parent 745ff1db
...@@ -30,12 +30,19 @@ class IllegalCharacters { ...@@ -30,12 +30,19 @@ class IllegalCharacters {
return Singleton<IllegalCharacters>::get(); return Singleton<IllegalCharacters>::get();
} }
bool contains(UChar32 ucs4) { bool DisallowedEverywhere(UChar32 ucs4) {
return !!set->contains(ucs4); return !!illegal_anywhere_->contains(ucs4);
} }
bool containsNone(const string16 &s) { bool DisallowedLeadingOrTrailing(UChar32 ucs4) {
return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); return !!illegal_at_ends_->contains(ucs4);
}
bool IsAllowedName(const string16& s) {
return s.empty() || (!!illegal_anywhere_->containsNone(
icu::UnicodeString(s.c_str(), s.size())) &&
!illegal_at_ends_->contains(*s.begin()) &&
!illegal_at_ends_->contains(*s.rbegin()));
} }
private: private:
...@@ -45,60 +52,61 @@ class IllegalCharacters { ...@@ -45,60 +52,61 @@ class IllegalCharacters {
IllegalCharacters(); IllegalCharacters();
~IllegalCharacters() { } ~IllegalCharacters() { }
scoped_ptr<icu::UnicodeSet> set; // set of characters considered invalid anywhere inside a filename.
scoped_ptr<icu::UnicodeSet> illegal_anywhere_;
// set of characters considered invalid at either end of a filename.
scoped_ptr<icu::UnicodeSet> illegal_at_ends_;
DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);
}; };
IllegalCharacters::IllegalCharacters() { IllegalCharacters::IllegalCharacters() {
UErrorCode status = U_ZERO_ERROR; UErrorCode everywhere_status = U_ZERO_ERROR;
// Control characters, formatting characters, non-characters, and UErrorCode ends_status = U_ZERO_ERROR;
// some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). // Control characters, formatting characters, non-characters, path separators,
// and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
// See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
// and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
// TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they // Note that code points in the "Other, Format" (Cf) category are ignored on
// are legitimate in Arabic and some S/SE Asian scripts. However, when used // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
// elsewhere, they can be confusing/problematic. // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
// Also, consider wrapping the set with our Singleton class to create and // also excluded due to the possibility of interacting poorly with short
// freeze it only once. Note that there's a trade-off between memory and // filenames on VFAT. (Related to CVE-2014-9390)
// speed. illegal_anywhere_.reset(new icu::UnicodeSet(
#if defined(WCHAR_T_IS_UTF16) UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
set.reset(new icu::UnicodeSet(icu::UnicodeString( everywhere_status));
L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); illegal_at_ends_.reset(new icu::UnicodeSet(
#else UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status));
set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( DCHECK(U_SUCCESS(everywhere_status));
"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), DCHECK(U_SUCCESS(ends_status));
status));
#endif
DCHECK(U_SUCCESS(status));
// Add non-characters. If this becomes a performance bottleneck by // Add non-characters. If this becomes a performance bottleneck by
// any chance, do not add these to |set| and change IsFilenameLegal() // any chance, do not add these to |set| and change IsFilenameLegal()
// to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling
// containsNone(). // IsAllowedName().
set->add(0xFDD0, 0xFDEF); illegal_anywhere_->add(0xFDD0, 0xFDEF);
for (int i = 0; i <= 0x10; ++i) { for (int i = 0; i <= 0x10; ++i) {
int plane_base = 0x10000 * i; int plane_base = 0x10000 * i;
set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF);
} }
set->freeze(); illegal_anywhere_->freeze();
illegal_at_ends_->freeze();
} }
} // namespace } // namespace
bool IsFilenameLegal(const string16& file_name) { bool IsFilenameLegal(const string16& file_name) {
return IllegalCharacters::GetInstance()->containsNone(file_name); return IllegalCharacters::GetInstance()->IsAllowedName(file_name);
} }
void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
char replace_char) { char replace_char) {
DCHECK(file_name); IllegalCharacters* illegal = IllegalCharacters::GetInstance();
DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char)));
// Remove leading and trailing whitespace. DCHECK(!(illegal->DisallowedEverywhere(replace_char)));
TrimWhitespace(*file_name, TRIM_ALL, file_name); DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char)));
IllegalCharacters* illegal = IllegalCharacters::GetInstance();
int cursor = 0; // The ICU macros expect an int. int cursor = 0; // The ICU macros expect an int.
while (cursor < static_cast<int>(file_name->size())) { while (cursor < static_cast<int>(file_name->size())) {
int char_begin = cursor; int char_begin = cursor;
...@@ -122,7 +130,9 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, ...@@ -122,7 +130,9 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
NOTREACHED(); NOTREACHED();
#endif #endif
if (illegal->contains(code_point)) { if (illegal->DisallowedEverywhere(code_point) ||
((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
illegal->DisallowedLeadingOrTrailing(code_point))) {
file_name->replace(char_begin, cursor - char_begin, 1, replace_char); file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
// We just made the potentially multi-byte/word char into one that only // We just made the potentially multi-byte/word char into one that only
// takes one byte/word, so need to adjust the cursor to point to the next // takes one byte/word, so need to adjust the cursor to point to the next
......
...@@ -18,13 +18,23 @@ namespace i18n { ...@@ -18,13 +18,23 @@ namespace i18n {
// param has the same restriction as that for ReplaceIllegalCharacters. // param has the same restriction as that for ReplaceIllegalCharacters.
BASE_I18N_EXPORT bool IsFilenameLegal(const string16& file_name); BASE_I18N_EXPORT bool IsFilenameLegal(const string16& file_name);
// Replaces characters in 'file_name' that are illegal for file names with // Replaces characters in |file_name| that are illegal for file names with
// 'replace_char'. 'file_name' must not be a full or relative path, but just the // |replace_char|. |file_name| must not be a full or relative path, but just the
// file name component (since slashes are considered illegal). Any leading or // file name component (since slashes are considered illegal). Any leading or
// trailing whitespace in 'file_name' is removed. // trailing whitespace or periods in |file_name| is also replaced with the
// |replace_char|.
//
// Example: // Example:
// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when // "bad:file*name?.txt" will be turned into "bad_file_name_.txt" when
// 'replace_char' is '-'. // |replace_char| is '_'.
//
// Warning: Do not use this function as the sole means of sanitizing a filename.
// While the resulting filename itself would be legal, it doesn't necessarily
// mean that the file will behave safely. On Windows, certain reserved names
// refer to devices rather than files (E.g. LPT1), and some filenames could be
// interpreted as shell namespace extensions (E.g. Foo.{<GUID>}).
//
// TODO(asanka): Move full filename sanitization logic here.
BASE_I18N_EXPORT void ReplaceIllegalCharactersInPath( BASE_I18N_EXPORT void ReplaceIllegalCharactersInPath(
FilePath::StringType* file_name, FilePath::StringType* file_name,
char replace_char); char replace_char);
......
...@@ -20,24 +20,28 @@ class FileUtilICUTest : public PlatformTest { ...@@ -20,24 +20,28 @@ class FileUtilICUTest : public PlatformTest {
#if defined(OS_POSIX) && !defined(OS_MACOSX) #if defined(OS_POSIX) && !defined(OS_MACOSX)
// Linux disallows some evil ASCII characters, but passes all non-ASCII. // Linux disallows some evil ASCII characters, but passes all non-ASCII.
static const struct goodbad_pair { static const struct GoodBadPairLinux {
const char* bad_name; const char* bad_name;
const char* good_name; const char* good_name;
} kIllegalCharacterCases[] = { } kLinuxIllegalCharacterCases[] = {
{"bad*file:name?.jpg", "bad-file-name-.jpg"}, {"bad*\\/file:name?.jpg", "bad---file-name-.jpg"},
{"**********::::.txt", "--------------.txt"}, {"**********::::.txt", "--------------.txt"},
{"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"}, {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
{" _ ", "-_-"},
{".", "-"},
{" .( ). ", "-.( ).-"},
{" ", "- -"},
}; };
TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) { TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) {
for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { for (size_t i = 0; i < arraysize(kLinuxIllegalCharacterCases); ++i) {
std::string bad_name(kIllegalCharacterCases[i].bad_name); std::string bad_name(kLinuxIllegalCharacterCases[i].bad_name);
ReplaceIllegalCharactersInPath(&bad_name, '-'); ReplaceIllegalCharactersInPath(&bad_name, '-');
EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); EXPECT_EQ(kLinuxIllegalCharacterCases[i].good_name, bad_name);
} }
} }
#else #endif
// For Mac & Windows, which both do Unicode validation on filenames. These // For Mac & Windows, which both do Unicode validation on filenames. These
// characters are given as wide strings since its more convenient to specify // characters are given as wide strings since its more convenient to specify
...@@ -46,29 +50,36 @@ static const struct goodbad_pair { ...@@ -46,29 +50,36 @@ static const struct goodbad_pair {
const wchar_t* bad_name; const wchar_t* bad_name;
const wchar_t* good_name; const wchar_t* good_name;
} kIllegalCharacterCases[] = { } kIllegalCharacterCases[] = {
{L"bad*file:name?.jpg", L"bad-file-name-.jpg"}, {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
{L"**********::::.txt", L"--------------.txt"}, {L"**********::::.txt", L"--------------.txt"},
// We can't use UCNs (universal character names) for C0/C1 characters and // We can't use UCNs (universal character names) for C0/C1 characters and
// U+007F, but \x escape is interpreted by MSVC and gcc as we intend. // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
{L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"}, {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
#if defined(OS_WIN) {L"bad*file\\?name.jpg", L"bad-file--name.jpg"},
{L"bad*file\\name.jpg", L"bad-file-name.jpg"}, {L"\t bad*file\\name/.jpg", L"- bad-file-name-.jpg"},
{L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"}, {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
#elif defined(OS_MACOSX) {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
{L"bad*file?name.jpg", L"bad-file-name.jpg"}, {L"\u0635\u200C\u0644.mp3", L"\u0635-\u0644.mp3"},
{L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"}, {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
#endif // Unassigned codepoints are ok.
{L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"}, {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
{L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"}, // Non-characters are not allowed.
{L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"}, {L"bad\uFFFFfile\U0010FFFEname.jpg", L"bad-file-name.jpg"},
{L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"}, {L"bad\uFDD0file\uFDEFname.jpg", L"bad-file-name.jpg"},
// Unassigned codepoints are ok. // CVE-2014-9390
{L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"}, {L"(\u200C.\u200D.\u200E.\u200F.\u202A.\u202B.\u202C.\u202D.\u202E.\u206A."
// Non-characters are not allowed. L"\u206B.\u206C.\u206D.\u206F.\uFEFF)",
{L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"}, L"(-.-.-.-.-.-.-.-.-.-.-.-.-.-.-)"},
{L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"}, {L"config~1", L"config-1"},
{L" _ ", L"-_-"},
{L" ", L"-"},
{L"\u2008.(\u2007).\u3000", L"-.(\u2007).-"},
{L" ", L"- -"},
{L". ", L"- -"}
}; };
#if defined(OS_WIN) || defined(OS_MACOSX)
TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) { TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
#if defined(OS_WIN) #if defined(OS_WIN)
...@@ -85,6 +96,19 @@ TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) { ...@@ -85,6 +96,19 @@ TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
#endif #endif
TEST_F(FileUtilICUTest, IsFilenameLegalTest) {
EXPECT_TRUE(IsFilenameLegal(string16()));
for (const auto& test_case : kIllegalCharacterCases) {
string16 bad_name = WideToUTF16(test_case.bad_name);
string16 good_name = WideToUTF16(test_case.good_name);
EXPECT_TRUE(IsFilenameLegal(good_name)) << good_name;
if (good_name != bad_name)
EXPECT_FALSE(IsFilenameLegal(bad_name)) << bad_name;
}
}
#if defined(OS_CHROMEOS) #if defined(OS_CHROMEOS)
static const struct normalize_name_encoding_test_cases { static const struct normalize_name_encoding_test_cases {
const char* original_path; const char* original_path;
......
...@@ -1242,7 +1242,7 @@ base::FilePath SavePackage::GetSuggestedNameForSaveAs( ...@@ -1242,7 +1242,7 @@ base::FilePath SavePackage::GetSuggestedNameForSaveAs(
name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext); name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
base::FilePath::StringType file_name = name_with_proper_ext.value(); base::FilePath::StringType file_name = name_with_proper_ext.value();
base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' '); base::i18n::ReplaceIllegalCharactersInPath(&file_name, '_');
return base::FilePath(file_name); return base::FilePath(file_name);
} }
......
...@@ -382,7 +382,7 @@ static const struct SuggestedSaveNameTestCase { ...@@ -382,7 +382,7 @@ static const struct SuggestedSaveNameTestCase {
// A URL-like title that does not match the title is respected in full. // A URL-like title that does not match the title is respected in full.
{ "http://foo.com", { "http://foo.com",
base::ASCIIToUTF16("http://www.foo.com/path/title.txt"), base::ASCIIToUTF16("http://www.foo.com/path/title.txt"),
FPL("http www.foo.com path title.txt"), FPL("http___www.foo.com_path_title.txt"),
false false
}, },
}; };
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment