Commit 120a8148 authored by Jesse McKenna's avatar Jesse McKenna Committed by Chromium LUCI CQ

Cleanup in IllegalCharacters class

This cleanup change makes the following changes to IllegalCharacters:
* use base::Optional for delayed initialization instead of unique_ptr
* remove unneeded line `friend class Singleton<IllegalCharacters>;`
* make `code_point` variable descriptive type UChar32 instead of its
  underlying type uint32_t
* rename DisallowedEverywhere() and DisallowedLeadingAndTrailing()
  methods to IsDisallowed*()
* minor comment typo fixes

Change-Id: I410fbbbb063ef191d92d326d1c1073007336ff11
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2586955
Commit-Queue: Jesse McKenna <jessemckenna@google.com>
Reviewed-by: default avatarGabriel Charette <gab@chromium.org>
Cr-Commit-Position: refs/heads/master@{#838469}
parent fa7b15cf
...@@ -8,8 +8,6 @@ ...@@ -8,8 +8,6 @@
#include <stdint.h> #include <stdint.h>
#include <memory>
#include "base/check.h" #include "base/check.h"
#include "base/files/file_path.h" #include "base/files/file_path.h"
#include "base/i18n/icu_string_conversions.h" #include "base/i18n/icu_string_conversions.h"
...@@ -37,33 +35,32 @@ class IllegalCharacters { ...@@ -37,33 +35,32 @@ class IllegalCharacters {
return Singleton<IllegalCharacters>::get(); return Singleton<IllegalCharacters>::get();
} }
bool DisallowedEverywhere(UChar32 ucs4) const { bool IsDisallowedEverywhere(UChar32 ucs4) const {
return !!illegal_anywhere_->contains(ucs4); return !!illegal_anywhere_.contains(ucs4);
} }
bool DisallowedLeadingOrTrailing(UChar32 ucs4) const { bool IsDisallowedLeadingOrTrailing(UChar32 ucs4) const {
return !!illegal_at_ends_->contains(ucs4); return !!illegal_at_ends_.contains(ucs4);
} }
bool IsAllowedName(const string16& s) const { bool IsAllowedName(const string16& s) const {
return s.empty() || (!!illegal_anywhere_->containsNone( return s.empty() || (!!illegal_anywhere_.containsNone(
icu::UnicodeString(s.c_str(), s.size())) && icu::UnicodeString(s.c_str(), s.size())) &&
!illegal_at_ends_->contains(*s.begin()) && !illegal_at_ends_.contains(*s.begin()) &&
!illegal_at_ends_->contains(*s.rbegin())); !illegal_at_ends_.contains(*s.rbegin()));
} }
private: private:
friend class Singleton<IllegalCharacters>;
friend struct DefaultSingletonTraits<IllegalCharacters>; friend struct DefaultSingletonTraits<IllegalCharacters>;
IllegalCharacters(); IllegalCharacters();
~IllegalCharacters() = default; ~IllegalCharacters() = default;
// set of characters considered invalid anywhere inside a filename. // Set of characters considered invalid anywhere inside a filename.
std::unique_ptr<icu::UnicodeSet> illegal_anywhere_; icu::UnicodeSet illegal_anywhere_;
// set of characters considered invalid at either end of a filename. // Set of characters considered invalid at either end of a filename.
std::unique_ptr<icu::UnicodeSet> illegal_at_ends_; icu::UnicodeSet illegal_at_ends_;
}; };
IllegalCharacters::IllegalCharacters() { IllegalCharacters::IllegalCharacters() {
...@@ -71,32 +68,32 @@ IllegalCharacters::IllegalCharacters() { ...@@ -71,32 +68,32 @@ IllegalCharacters::IllegalCharacters() {
UErrorCode ends_status = U_ZERO_ERROR; UErrorCode ends_status = U_ZERO_ERROR;
// Control characters, formatting characters, non-characters, path separators, // Control characters, formatting characters, non-characters, path separators,
// and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
// See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
// and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
// Note that code points in the "Other, Format" (Cf) category are ignored on // Note that code points in the "Other, Format" (Cf) category are ignored on
// HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
// legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
// also excluded due to the possibility of interacting poorly with short // also excluded due to the possibility of interacting poorly with short
// filenames on VFAT. (Related to CVE-2014-9390) // filenames on VFAT. (Related to CVE-2014-9390)
illegal_anywhere_.reset(new icu::UnicodeSet( illegal_anywhere_ =
UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
everywhere_status)); everywhere_status);
illegal_at_ends_.reset(new icu::UnicodeSet( illegal_at_ends_ =
UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status);
DCHECK(U_SUCCESS(everywhere_status)); DCHECK(U_SUCCESS(everywhere_status));
DCHECK(U_SUCCESS(ends_status)); DCHECK(U_SUCCESS(ends_status));
// Add non-characters. If this becomes a performance bottleneck by // Add non-characters. If this becomes a performance bottleneck by
// any chance, do not add these to |set| and change IsFilenameLegal() // any chance, do not add these to |set| and change IsFilenameLegal()
// to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addition to calling
// IsAllowedName(). // IsAllowedName().
illegal_anywhere_->add(0xFDD0, 0xFDEF); illegal_anywhere_.add(0xFDD0, 0xFDEF);
for (int i = 0; i <= 0x10; ++i) { for (int i = 0; i <= 0x10; ++i) {
int plane_base = 0x10000 * i; int plane_base = 0x10000 * i;
illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); illegal_anywhere_.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
} }
illegal_anywhere_->freeze(); illegal_anywhere_.freeze();
illegal_at_ends_->freeze(); illegal_at_ends_.freeze();
} }
} // namespace } // namespace
...@@ -109,13 +106,13 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, ...@@ -109,13 +106,13 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
char replace_char) { char replace_char) {
IllegalCharacters* illegal = IllegalCharacters::GetInstance(); IllegalCharacters* illegal = IllegalCharacters::GetInstance();
DCHECK(!(illegal->DisallowedEverywhere(replace_char))); DCHECK(!(illegal->IsDisallowedEverywhere(replace_char)));
DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); DCHECK(!(illegal->IsDisallowedLeadingOrTrailing(replace_char)));
int cursor = 0; // The ICU macros expect an int. int cursor = 0; // The ICU macros expect an int.
while (cursor < static_cast<int>(file_name->size())) { while (cursor < static_cast<int>(file_name->size())) {
int char_begin = cursor; int char_begin = cursor;
uint32_t code_point; UChar32 code_point;
#if defined(OS_WIN) #if defined(OS_WIN)
// Windows uses UTF-16 encoding for filenames. // Windows uses UTF-16 encoding for filenames.
U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
...@@ -130,9 +127,9 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, ...@@ -130,9 +127,9 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
#error Unsupported platform #error Unsupported platform
#endif #endif
if (illegal->DisallowedEverywhere(code_point) || if (illegal->IsDisallowedEverywhere(code_point) ||
((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
illegal->DisallowedLeadingOrTrailing(code_point))) { illegal->IsDisallowedLeadingOrTrailing(code_point))) {
file_name->replace(char_begin, cursor - char_begin, 1, replace_char); file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
// We just made the potentially multi-byte/word char into one that only // We just made the potentially multi-byte/word char into one that only
// takes one byte/word, so need to adjust the cursor to point to the next // takes one byte/word, so need to adjust the cursor to point to the next
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment