Commit c40bf13d authored by Richard Knoll's avatar Richard Knoll Committed by Commit Bot

Fix phone number regex whitespace matching.

We only handled some whitespace characters by using \s in the regex. One
example that is not handled by that is the non breaking space which is
quite common in HTML formatted text. Switching to \p{Z} to consider all
unicode characters for whitespace fixes this.

Bug: 1015189
Change-Id: Id3e6b3fe98a303e84d1b8427a227bc12a42d6664
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1872089
Commit-Queue: Alex Chau <alexchau@chromium.org>
Reviewed-by: default avatarAlex Chau <alexchau@chromium.org>
Cr-Commit-Position: refs/heads/master@{#708142}
parent a0a241c8
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
#include "chrome/browser/sharing/click_to_call/click_to_call_utils.h" #include "chrome/browser/sharing/click_to_call/click_to_call_utils.h"
#include "base/metrics/histogram_functions.h" #include "base/metrics/histogram_functions.h"
#include "base/no_destructor.h"
#include "base/optional.h" #include "base/optional.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
...@@ -27,14 +26,12 @@ namespace { ...@@ -27,14 +26,12 @@ namespace {
constexpr int kSelectionTextMaxLength = 30; constexpr int kSelectionTextMaxLength = 30;
// Heuristical regex to search for phone number. // Heuristical regex to search for phone number.
// (^|\\s) makes sure the pattern begins with a new word. // (^|\p{Z}) makes sure the pattern begins with a new word.
// (\\(?\\+[0-9]+\\)?)? checks for optional international code in number. // (\(?\+[0-9]+\)?) checks for optional international code in number.
// ([.\\s\\-\\(]?[0-9][\\s\\-\\)]?){8,} checks for at least eight occurrences of // ([.\p{Z}\-\(]?[0-9][\p{Z}\-\)]?){8,} checks for at least eight occurrences of
// pattern denoted to reduce false positives. // digits with optional separators to reduce false positives.
// The first pattern matched is (^|\\s) and the second pattern matched is the
// phone number we are looking for.
const char kPhoneNumberRegexPattern[] = const char kPhoneNumberRegexPattern[] =
"(?:^|\\s)((\\(?\\+[0-9]+\\)?)?([.\\s\\-\\(]?[0-9][\\s\\-\\)]?){8,})"; R"((?:^|\p{Z})((?:\(?\+[0-9]+\)?)?(?:[.\p{Z}\-(]?[0-9][\p{Z}\-)]?){8,}))";
bool IsClickToCallEnabled(content::BrowserContext* browser_context) { bool IsClickToCallEnabled(content::BrowserContext* browser_context) {
SharingService* sharing_service = SharingService* sharing_service =
...@@ -81,16 +78,15 @@ base::Optional<std::string> ExtractPhoneNumberForClickToCall( ...@@ -81,16 +78,15 @@ base::Optional<std::string> ExtractPhoneNumberForClickToCall(
ScopedUmaHistogramMicrosecondsTimer scoped_uma_timer; ScopedUmaHistogramMicrosecondsTimer scoped_uma_timer;
// TODO(crbug.com/992906): Find a better way to parse phone numbers. // TODO(crbug.com/992906): Find a better way to parse phone numbers.
static const base::NoDestructor<re2::RE2> kPhoneNumberRegex( static const re2::LazyRE2 kPhoneNumberRegex = {kPhoneNumberRegexPattern};
kPhoneNumberRegexPattern);
std::string parsed_phone_number; std::string parsed_phone_number;
if (!re2::RE2::PartialMatch(selection_text, *kPhoneNumberRegex, if (!re2::RE2::PartialMatch(selection_text, *kPhoneNumberRegex,
&parsed_phone_number)) { &parsed_phone_number)) {
return base::nullopt; return base::nullopt;
} }
return base::TrimWhitespaceASCII(parsed_phone_number, base::TRIM_ALL) return base::UTF16ToUTF8(base::TrimWhitespace(
.as_string(); base::UTF8ToUTF16(parsed_phone_number), base::TRIM_ALL));
} }
std::string GetUnescapedURLContent(const GURL& url) { std::string GetUnescapedURLContent(const GURL& url) {
......
...@@ -178,6 +178,8 @@ TEST_F(ClickToCallUtilsTest, ...@@ -178,6 +178,8 @@ TEST_F(ClickToCallUtilsTest,
expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0"); expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0");
// Two spaces in between. // Two spaces in between.
expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0"); expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0");
// Non breaking spaces around number.
expectations.emplace("\u00A09876543210\u00A0", "9876543210");
for (auto& expectation : expectations) { for (auto& expectation : expectations) {
base::Optional<std::string> phone_number = base::Optional<std::string> phone_number =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment