Commit 3b0bebb8 authored by Richard Knoll's avatar Richard Knoll Committed by Commit Bot

Add new regex to detect phone numbers for Click to Call

This adds a new regex based on Androids TextClassifier to improve the
phone number detection on selected text.

Bug: 1001033
Change-Id: I759de1d14e0d9439a709dbdc8db62b7ca7fc70b3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1940246Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarMichael van Ouwerkerk <mvanouwerkerk@chromium.org>
Commit-Queue: Richard Knoll <knollr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#720222}
parent 2b2f31e6
...@@ -74,6 +74,11 @@ base::Optional<std::string> ExtractPhoneNumberForClickToCall( ...@@ -74,6 +74,11 @@ base::Optional<std::string> ExtractPhoneNumberForClickToCall(
if (!IsClickToCallEnabled(browser_context)) if (!IsClickToCallEnabled(browser_context))
return base::nullopt; return base::nullopt;
if (base::FeatureList::IsEnabled(kClickToCallDetectionV2)) {
return ExtractPhoneNumber(selection_text,
PhoneNumberRegexVariant::kLowConfidenceModified);
}
return ExtractPhoneNumber(selection_text, PhoneNumberRegexVariant::kSimple); return ExtractPhoneNumber(selection_text, PhoneNumberRegexVariant::kSimple);
} }
......
...@@ -127,7 +127,8 @@ TEST_F(ClickToCallUtilsTest, NonTelLink_DoNotOfferForLink) { ...@@ -127,7 +127,8 @@ TEST_F(ClickToCallUtilsTest, NonTelLink_DoNotOfferForLink) {
TEST_F(ClickToCallUtilsTest, TEST_F(ClickToCallUtilsTest,
SelectionText_ValidPhoneNumberRegex_OfferForSelection) { SelectionText_ValidPhoneNumberRegex_OfferForSelection) {
scoped_feature_list_.InitAndEnableFeature(kClickToCallUI); scoped_feature_list_.InitWithFeatures({kClickToCallUI},
{kClickToCallDetectionV2});
// Stores a mapping of selected text to expected phone number parsed. // Stores a mapping of selected text to expected phone number parsed.
std::map<std::string, std::string> expectations; std::map<std::string, std::string> expectations;
...@@ -151,6 +152,10 @@ TEST_F(ClickToCallUtilsTest, ...@@ -151,6 +152,10 @@ TEST_F(ClickToCallUtilsTest,
expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0"); expectations.emplace("9 8 7 6 5 4 3 2 1 0", "9 8 7 6 5 4 3 2 1 0");
// Non breaking spaces around number. // Non breaking spaces around number.
expectations.emplace("\u00A09876543210\u00A0", "9876543210"); expectations.emplace("\u00A09876543210\u00A0", "9876543210");
// Example for a credit card
expectations.emplace("4111 1111 1111 1111", "4111 1111 1111 1111");
// Chrome version string
expectations.emplace("78.0.3904.108", "78.0.3904.108");
for (auto& expectation : expectations) { for (auto& expectation : expectations) {
base::Optional<std::string> phone_number = base::Optional<std::string> phone_number =
...@@ -162,27 +167,71 @@ TEST_F(ClickToCallUtilsTest, ...@@ -162,27 +167,71 @@ TEST_F(ClickToCallUtilsTest,
TEST_F(ClickToCallUtilsTest, TEST_F(ClickToCallUtilsTest,
SelectionText_InvalidPhoneNumberRegex_DoNotOfferForSelection) { SelectionText_InvalidPhoneNumberRegex_DoNotOfferForSelection) {
scoped_feature_list_.InitAndEnableFeature(kClickToCallUI); scoped_feature_list_.InitWithFeatures({kClickToCallUI},
{kClickToCallDetectionV2});
std::vector<std::string> invalid_selection_texts; std::vector<std::string> invalid_selection_texts;
// Does not contain any number. // Does not contain any number.
invalid_selection_texts.emplace_back("Call me maybe"); invalid_selection_texts.push_back("Call me maybe");
// We only parse smaller text sizes to avoid performance impact on Chromium. // We only parse smaller text sizes to avoid performance impact on Chromium.
invalid_selection_texts.emplace_back( invalid_selection_texts.push_back(
"This is a huge text. It also contains a phone number 9876543210"); "This is a huge text. It also contains a phone number 9876543210");
// Although this is a valid number, its not caught by the regex. // Although this is a valid number, its not caught by the regex.
invalid_selection_texts.emplace_back("+44 1800-FLOWERS"); invalid_selection_texts.push_back("+44 1800-FLOWERS");
// Number does not start as new word. // Number does not start as new word.
invalid_selection_texts.emplace_back("No space9876543210"); invalid_selection_texts.push_back("No space9876543210");
// Minimum length for regex match not satisfied. // Minimum length for regex match not satisfied.
invalid_selection_texts.emplace_back("Small number 98765"); invalid_selection_texts.push_back("Small number 98765");
// Number does not start as new word. // Number does not start as new word.
invalid_selection_texts.emplace_back("Buy for $9876543210"); invalid_selection_texts.push_back("Buy for $9876543210");
// More than two spaces in between. // More than two spaces in between.
invalid_selection_texts.emplace_back( invalid_selection_texts.push_back("9 8 7 6 5 4 3 2 1 0");
"9 8 7 6 5 4 3 2 1 0");
// Space dash space formatting. // Space dash space formatting.
invalid_selection_texts.emplace_back("999 - 999 - 9999"); invalid_selection_texts.push_back("999 - 999 - 9999");
for (auto& text : invalid_selection_texts)
ExpectClickToCallDisabledForSelectionText(text);
}
TEST_F(ClickToCallUtilsTest, SelectionText_LowConfidenceModifiedRegex_Matches) {
scoped_feature_list_.InitWithFeatures(
{kClickToCallUI, kClickToCallDetectionV2}, {});
// Stores a mapping of selected text to expected phone number parsed.
std::map<std::string, std::string> expectations = {
{"+91 77997 12345", "+91 77997 12345"},
{"(+0091) 040 12345678", "(+0091) 040 12345678"},
{"+1 800 444 4444", "+1 800 444 4444"},
{"754-1234", "754-1234"},
{"+55-955-1234-1234 (landline)", "+55-955-1234-1234"},
{"+44(0)20-1234 1234", "+44(0)20-1234 1234"},
{"07700123123", "07700123123"},
{"Call +49 231 1234567 now!", "+49 231 1234567"},
{"tel:+49-89-636-12345", "+49-89-636-12345"},
{"Number (021) 12345678", "(021) 12345678"},
{"(private) +90 312 123 12 12", "+90 312 123 12 12"},
{"+34 913 12 12 12", "+34 913 12 12 12"},
{"(000)\u00A00000000", "(000)\u00A00000000"},
};
for (auto& expectation : expectations) {
base::Optional<std::string> phone_number =
ExtractPhoneNumberForClickToCall(&profile_, expectation.first);
EXPECT_EQ(expectation.second, phone_number.value_or(""));
}
}
TEST_F(ClickToCallUtilsTest, SelectionText_LowConfidenceModifiedRegex_NoMatch) {
scoped_feature_list_.InitWithFeatures(
{kClickToCallUI, kClickToCallDetectionV2}, {});
std::vector<std::string> invalid_selection_texts = {
// Example for a credit card
"4111 1111 1111 1111",
// Chrome version string
"78.0.3904.108",
// Too many spaces
"9 8 7 6 5 4 3 2 1 0",
};
for (auto& text : invalid_selection_texts) for (auto& text : invalid_selection_texts)
ExpectClickToCallDisabledForSelectionText(text); ExpectClickToCallDisabledForSelectionText(text);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "base/task/post_task.h" #include "base/task/post_task.h"
#include "base/time/time.h" #include "base/time/time.h"
#include "chrome/browser/sharing/click_to_call/feature.h" #include "chrome/browser/sharing/click_to_call/feature.h"
#include "third_party/re2/src/re2/re2.h"
namespace { namespace {
...@@ -23,11 +24,117 @@ namespace { ...@@ -23,11 +24,117 @@ namespace {
const char kPhoneNumberRegexPatternSimple[] = const char kPhoneNumberRegexPatternSimple[] =
R"((?:^|\p{Z})((?:\(?\+[0-9]+\)?)?(?:[.\p{Z}\-(]?[0-9][\p{Z}\-)]?){8,}))"; R"((?:^|\p{Z})((?:\(?\+[0-9]+\)?)?(?:[.\p{Z}\-(]?[0-9][\p{Z}\-)]?){8,}))";
// Regex based on the low confidence pattern used in TextClassifier on Android.
// The original low confidence regex has been modified to support some more
// patterns that are common in our user base.
// https://developer.android.com/reference/android/view/textclassifier/TextClassifier
// https://android.googlesource.com/platform/external/libtextclassifier/+/refs/heads/master/models/
const char kPhoneNumberRegexPatternLowConfidenceModified[] =
R"((?:^|[^\/+.\d\p{Pd}:\p{Z})]|(?:^|[^\/+.\d\p{Pd})])[:\p{Z})])(?:^|(?sm:\)"
R"(b)|[\p{Z}:.])((?:(?:(?:\(\d{4}\)[\p{Pd}\p{Z}]\d{2}(?:\p{Pd}\d{2}){2}[\p)"
R"({Pd}\p{Z}]|\+[\p{Pd}\p{Z}]\d(?:\d{2}[\p{Pd}\p{Z}]\(\d{2}\)[\p{Pd}\p{Z}])"
R"(\d|[\p{Pd}\p{Z}]\(\d{4}\)[\p{Pd}\p{Z}])|\d{3}[\p{Pd}\p{Z}]\(\d{2}\)[\p{)"
R"(Pd}\p{Z}]\d)(?:\d{2}\p{Pd}){2}|(?:\d{2}(?:[\p{Pd}\p{Z}](?:\d{2}(?:[\p{P)"
R"(d}\p{Z}]\d{2}(?:[\p{Pd}\p{Z}]\d{4}[\p{Pd}\p{Z}]|\d{2}\p{Pd}|\d\p{Pd}\d))"
R"(|\d{2}\p{Pd}?\d)|\(\d{2}\)[\p{Pd}\p{Z}]\d)\d{2}[\p{Pd}\p{Z}]|(?:\p{Pd}()"
R"(?:\d{2}[\p{Pd}\p{Z}]){2}\d{4}\p{Pd}|\d\p{Pd}\d|\d{2}[\p{Pd}\p{Z}]|\d[\p)"
R"({Pd}\p{Z}])\d{2}[\p{Pd}\p{Z}])|\+[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]\(\d{2})"
R"(\)[\p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}])\d{2}[\p{Pd}\p{Z}]|\+(?:\d(?:\d(?:[\p)"
R"({Pd}\p{Z}](?:\(\d(?:(?:\)(?:[\p{Pd}\p{Z}]\d{4}\p{Pd}\d{2,3}|\d{4}\p{Pd})"
R"(\d{3})|(?:\)(?:[\p{Pd}\p{Z}]\d{3}\p{Pd}\d|\d[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p)"
R"({Z}]|\d{3}\p{Pd}\d)|\d{3}\)[\p{Pd}\p{Z}]\d)\d{2})[\p{Pd}\p{Z}]\d{2}[\p{)"
R"(Pd}\p{Z}]|\d(?:\d(?:\d\)[\p{Pd}\p{Z}]\d{2}(?:\p{Pd}\d{2}\p{Pd}|\d{2})|\)"
R"()[\p{Pd}\p{Z}]\d{5})|\)[\p{Pd}\p{Z}]\d{3}(?:[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p)"
R"({Z}]|\p{Pd}\d{2}\p{Pd})))|\d{3}(?:[\p{Pd}\p{Z}]\d{5}|\d\p{Pd}\d{3}[\p{P)"
R"(d}\p{Z}]|\d{2}[\p{Pd}\p{Z}]\d{3}))|(?:\d[\p{Pd}\p{Z}]\(\d{2}\)[\p{Pd}\p)"
R"({Z}]\d{3}[\p{Pd}\p{Z}]|[\p{Pd}\p{Z}]\d(?:[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z})"
R"(]|\d{2}(?:\d\p{Pd}?|[\p{Pd}\p{Z}]))\d{2}[\p{Pd}\p{Z}])\d{2}[\p{Pd}\p{Z})"
R"(]|\d(?:(?:[\p{Pd}\p{Z}]\(\d{2}\)[\p{Pd}\p{Z}]\d{3}|\d{4,5})\p{Pd}\d{2}\)"
R"(p{Pd}|[\p{Pd}\p{Z}]\(\d{3}\)[\p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}]?\d{2}|\d{8})"
R"([\p{Pd}\p{Z}]\p{Pd}[\p{Pd}\p{Z}]))|(?:\d(?:[\p{Pd}\p{Z}](?:\(\d(?:\d(?:)"
R"(\d\)[\p{Pd}\p{Z}]\d{3}(?:\d\p{Pd}|[\p{Pd}\p{Z}])|\)(?:[\p{Pd}\p{Z}]\d{4)"
R"(}(?:\d?\p{Pd}|[\p{Pd}\p{Z}])|\d{4}\p{Pd}))|(?:\)\d(?:\d\p{Pd}\d{2}|\d[\)"
R"(p{Pd}\p{Z}]\d|\d{2}[\p{Pd}\p{Z}]|[\p{Pd}\p{Z}]\d{2})|\d{2}\)\d)\d{2})|\)"
R"(d{2}(?:[\p{Pd}\p{Z}]\d{4}[\p{Pd}\p{Z}]|\p{Pd}\d{4}))|(?:\(\d\)\d{2}\p{P)"
R"(d}\d{4}|\d[\p{Pd}\p{Z}]\d{4,5})[\p{Pd}\p{Z}]|\p{Pd}\d{2}\p{Pd}\d{3}\p{P)"
R"(d}\d)|(?:\d\p{Pd}\d{3}\p{Pd}\d|[\p{Pd}\p{Z}]\d{3}\p{Pd})\d{3}\p{Pd}|(?:)"
R"(\d\.|[\p{Pd}\p{Z}])\d{3}\.\d{3}\.|[\p{Pd}\p{Z}](?:\d{3}[\p{Pd}\p{Z}]){2)"
R"(}|\.\d{3}(?:\.\d{3}\.|\p{Pd}\d{3}\p{Pd}))\d{2}|[\p{Pd}\p{Z}]\(\d{3}(?:\)"
R"()(?:[\p{Pd}\p{Z}]\d{3}(?:\p{Pd}\d{2}\p{Pd}|[\p{Pd}\p{Z}]?\d{2})|\d{3}\p)"
R"({Pd}?\d{2})|\d(?:(?:\d\)[\p{Pd}\p{Z}]|\)[\p{Pd}\p{Z}]\d)\d\p{Pd}\d{2}\p)"
R"({Pd}|\)[\p{Pd}\p{Z}]\d{3}\p{Pd}?\d)))|[\p{Pd}\p{Z}]\d(?:[\p{Pd}\p{Z}]\()"
R"(\d{3}\)[\p{Pd}\p{Z}]\d{3}(?:[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]|\d{2})|\d[\)"
R"(p{Pd}\p{Z}]\(\d{3}\)[\p{Pd}\p{Z}]\d{5}))|(?:\((?:(?:\+\d{2}(?:\d{2}\)[\)"
R"(p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}]|\)[\p{Pd}\p{Z}]\d{2}\p{Pd}?)\d|\d\)[\p{Pd)"
R"(}\p{Z}]\(\d{3}\)[\p{Pd}\p{Z}])\d{3}|\d{2}(?:\)(?:[\p{Pd}\p{Z}](?:\(\d{4)"
R"(}\)[\p{Pd}\p{Z}]\d{3}|\d{3}(?:\p{Pd}\d{3}\p{Pd}|\d{2}\p{Pd}|\d\p{Pd}|\d)"
R"([\p{Pd}\p{Z}]))|\d{4}\p{Pd})|\d(?:\)(?:[\p{Pd}\p{Z}]\d{3}(?:[\p{Pd}\p{Z)"
R"(}]\d{3}[\p{Pd}\p{Z}]|\p{Pd}\d{3}\p{Pd}|\d\p{Pd})|\d{3}\p{Pd})|\d\)[\p{P)"
R"(d}\p{Z}]\d{3}\p{Pd}))|\+\d\)[\p{Pd}\p{Z}]\d{3}(?:\.\d{3}\.|\p{Pd}\d{3}\)"
R"(p{Pd}))|\+[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]\(\d{2}(?:\)[\p{Pd}\p{Z}]\d{4})"
R"(\p{Pd}|\d\)[\p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}])|(?:\((?:\+\d)?\d\)|\d)[\p{P)"
R"(d}\p{Z}](?:\d{3}\p{Pd}){2}|\d(?:\d(?:(?:\d[\p{Pd}\p{Z}]\(\d{3}\)[\p{Pd})"
R"(\p{Z}]\d|\d{2}\/|\p{Pd}\d{2})\d{2}|\d(?:[\p{Pd}\p{Z}]\d{4}[\p{Pd}\p{Z}])"
R"(|\d[\p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}]|\.\d{3}\.|\d{2}\p{Pd}\d{2}|\d{3}\p{P)"
R"(d})|[\p{Pd}\p{Z}]\d{4}[\p{Pd}\p{Z}])|\.\d{3}\.\d{3}\.))\d{2}|\((?:\+\d{)"
R"(2}\)(?:\d{2}\.){4}|\d{2}(?:\d{2}\)[\p{Pd}\p{Z}]\d{3}(?:[\p{Pd}\p{Z}](?:)"
R"(\d{2}[\p{Pd}\p{Z}]){2}|\d{2})|\)[\p{Pd}\p{Z}](?:\(\d{2}(?:\)[\p{Pd}\p{Z)"
R"(}]\d|\d\)[\p{Pd}\p{Z}])\d{3}\p{Pd}?\d{2}|\d{3}(?:(?:\p{Pd}\d{2}){2}[\p{)"
R"(Pd}\p{Z}]\d|[\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|\d{3}))))|\d(?:\d(?:)"
R"(\d(?:[\p{Pd}\p{Z}]\d{4}\p{Pd}(?:\d{3}[\p{Pd}\p{Z}]){2}|\d\p{Pd}\d{3}[\p)"
R"({Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|\d{3}[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z})"
R"(]|\p{Pd}\d{5}|(?:\d{2}[\p{Pd}\p{Z}]){2}|\d{6}|\p{Pd}\d{2}\p{Pd})|[\p{Pd)"
R"(}\p{Z}](?:\(\d{2}(?:\d\)[\p{Pd}\p{Z}]|\)[\p{Pd}\p{Z}]\d)\d{3}[\p{Pd}\p{)"
R"(Z}]?\d{2}|(?:\d{2}[\p{Pd}\p{Z}]){3})|\.(?:\d{2}\.){3})|\p{Pd}\d{3}\p{Pd)"
R"(}\d{5})|(?:\+[\d\p{Pd}\p{Z}]\d[\p{Pd}\p{Z}]\(\d{3}\)[\p{Pd}\p{Z}]|\d[\p)"
R"({Pd}\p{Z}]\(\d{3}\)[\p{Pd}\p{Z}]|\d\p{Pd}\d{3}\p{Pd})\d{3}\p{Pd}\d{2}\p)"
R"({Pd}?|(?:\+\d{2}[\p{Pd}\p{Z}]\d{2,3}[\p{Pd}\p{Z}]|\d{3}[\d\p{Pd}\p{Z}]))"
R"(\d{3}[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]?)\d{2}|\+\d(?:\d(?:[\p{Pd}\p{Z}](?)"
R"(:(?:\(\d(?:\)\d{4}\p{Pd}(?:\d{2}[\p{Pd}\p{Z}]){3}|\d{4}\)[\p{Pd}\p{Z}]\)"
R"(d{5}[\p{Pd}\p{Z}]|\d\)[\p{Pd}\p{Z}]\d{8})|\(\d(?:\)\d{4}\p{Pd}\d{2}(?:[)"
R"(\p{Pd}\p{Z}]\d)?\d[\p{Pd}\p{Z}]|\d(?:\d{3}\)[\p{Pd}\p{Z}]|\)[\p{Pd}\p{Z)"
R"(}]\d{3})\d{3})\d)\d|\d{2}(?:\d{2}(?:[\p{Pd}\p{Z}]?\d{4}[\p{Pd}\p{Z}]\d{)"
R"(3}|[\p{Pd}\p{Z}]\d{5}(?:\d[\p{Pd}\p{Z}]|[\p{Pd}\p{Z}]\d)|\d{6}[\p{Pd}\p)"
R"({Z}])|(?:(?:[\d\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|[\p{Pd}\p{Z}]\d{5})"
R"()\d{2}[\p{Pd}\p{Z}]|[\p{Pd}\p{Z}]\d{6})\d)\d|\d{2}(?:(?:(?:[\d\p{Pd}\p{)"
R"(Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|[\p{Pd}\p{Z}]\d{5})\d{2}[\p{Pd}\p{Z}]|(?:[)"
R"(\d\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|[\p{Pd}\p{Z}]\d{5})\d)\d|\d{2}()"
R"(?:[\p{Pd}\p{Z}]?\d{4}[\p{Pd}\p{Z}](?:\d{2}|\d)|[\p{Pd}\p{Z}]\d{4}(?:\d[)"
R"(\p{Pd}\p{Z}]\d|\d{2}|\d)?|\d{4,6})))|\d{3}(?:(?:[\p{Pd}\p{Z}]\d{2}(?:[\)"
R"(p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}\d|\d{4}(?:\d[\p{Pd}\p{Z}]|[\p{Pd}\)"
R"(p{Z}]\d))|\d{3}(?:[\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}|\d{3}[\p{Pd}\p)"
R"({Z}]\d|[\p{Pd}\p{Z}]\d{3}|\d{3}))\d|[\p{Pd}\p{Z}]\d{2}(?:(?:[\p{Pd}\p{Z)"
R"(}]\d{2}){2}(?:[\p{Pd}\p{Z}]\d)?|\d{4}(?:[\p{Pd}\p{Z}]?\d)?)|\d{3}(?:(?:)"
R"([\p{Pd}\p{Z}]\d{2}){2}|\d{2}(?:\d[\p{Pd}\p{Z}]\d|\d)?)))|[\p{Pd}\p{Z}]\)"
R"((\d{3}\)[\p{Pd}\p{Z}]\d{3}\p{Pd}\d{3,4})|\(\d{2}(?:\d(?:\)(?:(?:[\p{Pd})"
R"(\p{Z}]\d{3}(?:\p{Pd}\d{2}(?:\d{2}[\p{Pd}\p{Z}]\d{5}|\p{Pd}\d{2}[\p{Pd}\)"
R"(p{Z}])|[\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z}]){2}\d|[\p{Pd}\p{Z}]\d{3}|\d{4)"
R"(})|\d{7}[\p{Pd}\p{Z}]|\d{6})\d|[\p{Pd}\p{Z}]\d{3}(?:\p{Pd}\d{2}(?:\d{2})"
R"((?:[\p{Pd}\p{Z}]\d{3,4})?|\p{Pd}\d{2})|[\p{Pd}\p{Z}](?:\d{2}[\p{Pd}\p{Z)"
R"(}]){2}\d|(?:[\p{Pd}\p{Z}]\d{2}){2}|\d{4}))|\d\)[\p{Pd}\p{Z}]\d{2}(?:(?:)"
R"(\p{Pd}\d{2}){2}(?:[\p{Pd}\p{Z}]\d)?|\d(?:[\p{Pd}\p{Z}]\d{2}){2}))|\)[\p)"
R"({Pd}\p{Z}]\d{3}(?:\p{Pd}\d{2}\p{Pd}|[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}])\d{)"
R"(2})|\d(?:[\p{Pd}\p{Z}]\(\d{3}(?:\d\)[\p{Pd}\p{Z}](?:\d{2}\p{Pd}){2}|\)()"
R"(?:[\p{Pd}\p{Z}]\d{3}[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]?|[\p{Pd}\p{Z}]?\d{5)"
R"(}))\d{2}|\d(?:\d(?:(?:(?:\p{Pd}\d{3}\p{Pd}\d{4}[\p{Pd}\p{Z}](?:\d{2}|\d)"
R"()|\d\p{Pd}(?:\d{2}[\p{Pd}\p{Z}]){3}|\d{3}[\p{Pd}\p{Z}]\d{3}|\d\p{Pd}\d{)"
R"(5})\d|\d\p{Pd}(?:\d{2}[\p{Pd}\p{Z}]){3}|\d{3}[\p{Pd}\p{Z}]\d{3}|\d\p{Pd)"
R"(}\d{5})\d|[\p{Pd}\p{Z}]\d{4}(?:(?:\p{Pd}\d{3}[\p{Pd}\p{Z}])?\d{4}|\d{1,)"
R"(3})|\d(?:(?:\p{Pd}\d{2}(?:\d[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}]\d|[\p{Pd}\p)"
R"({Z}]\d{2}[\p{Pd}\p{Z}]\d|\d[\p{Pd}\p{Z}]\d|\d)|(?:[\p{Pd}\p{Z}]\d{2}|\d)"
R"([\p{Pd}\p{Z}]\d|\d\/)\d{5})\d|[\p{Pd}\p{Z}]\d{4,7}|\d(?:[\p{Pd}\p{Z}]\d)"
R"({4,6}|\/\d{4,5}))|\p{Pd}\d{3}(?:\p{Pd}\d{3,4}|\d))|[\p{Pd}\p{Z}]\d{2}(?)"
R"(:\d{2}(?:\p{Pd}?\d)?\d|[\p{Pd}\p{Z}]\d{2}[\p{Pd}\p{Z}])\d{2})))(?i:[\,\)"
R"(;]?(?:\p{Z})?(?:[x\#\~]|ext[\.\:\=]?)(?:\p{Z})?\d{1,6})?)(?:$|(?sm:\b)|)"
R"(\p{Z})(?:$|\p{Z}(?:$|[^\p{Z}\d\/+=#\p{Pd}])|\.(?:$|\D)|[^.\p{Z}\d\/+=#\)"
R"(p{Pd}]))";
void PrecompilePhoneNumberRegexes() { void PrecompilePhoneNumberRegexes() {
SCOPED_UMA_HISTOGRAM_TIMER("Sharing.ClickToCallPhoneNumberPrecompileTime"); SCOPED_UMA_HISTOGRAM_TIMER("Sharing.ClickToCallPhoneNumberPrecompileTime");
static const char kExampleInput[] = "+01(2)34-5678 9012"; static const char kExampleInput[] = "+01(2)34-5678 9012";
std::string parsed; std::string parsed;
for (auto variant : {PhoneNumberRegexVariant::kSimple}) { for (auto variant : {PhoneNumberRegexVariant::kSimple,
PhoneNumberRegexVariant::kLowConfidenceModified}) {
// Run RE2::PartialMatch over some example input to speed up future queries. // Run RE2::PartialMatch over some example input to speed up future queries.
re2::RE2::PartialMatch(kExampleInput, GetPhoneNumberRegex(variant), re2::RE2::PartialMatch(kExampleInput, GetPhoneNumberRegex(variant),
&parsed); &parsed);
...@@ -38,10 +145,14 @@ void PrecompilePhoneNumberRegexes() { ...@@ -38,10 +145,14 @@ void PrecompilePhoneNumberRegexes() {
const re2::RE2& GetPhoneNumberRegex(PhoneNumberRegexVariant variant) { const re2::RE2& GetPhoneNumberRegex(PhoneNumberRegexVariant variant) {
static const re2::LazyRE2 kRegexSimple = {kPhoneNumberRegexPatternSimple}; static const re2::LazyRE2 kRegexSimple = {kPhoneNumberRegexPatternSimple};
static const re2::LazyRE2 kRegexLowConfidenceModified = {
kPhoneNumberRegexPatternLowConfidenceModified};
switch (variant) { switch (variant) {
case PhoneNumberRegexVariant::kSimple: case PhoneNumberRegexVariant::kSimple:
return *kRegexSimple; return *kRegexSimple;
case PhoneNumberRegexVariant::kLowConfidenceModified:
return *kRegexLowConfidenceModified;
} }
} }
......
...@@ -5,8 +5,15 @@ ...@@ -5,8 +5,15 @@
#ifndef CHROME_BROWSER_SHARING_CLICK_TO_CALL_PHONE_NUMBER_REGEX_H_ #ifndef CHROME_BROWSER_SHARING_CLICK_TO_CALL_PHONE_NUMBER_REGEX_H_
#define CHROME_BROWSER_SHARING_CLICK_TO_CALL_PHONE_NUMBER_REGEX_H_ #define CHROME_BROWSER_SHARING_CLICK_TO_CALL_PHONE_NUMBER_REGEX_H_
#include "chrome/browser/sharing/sharing_metrics.h" namespace re2 {
#include "third_party/re2/src/re2/re2.h" class RE2;
} // namespace re2
// Phone number regex to use to detect numbers from text selections.
enum class PhoneNumberRegexVariant {
kSimple = 0,
kLowConfidenceModified = 1,
};
// Returns an RE2 instance for the given |variant| to detect phone numbers. // Returns an RE2 instance for the given |variant| to detect phone numbers.
const re2::RE2& GetPhoneNumberRegex(PhoneNumberRegexVariant variant); const re2::RE2& GetPhoneNumberRegex(PhoneNumberRegexVariant variant);
......
...@@ -47,11 +47,14 @@ const char* ClickToCallEntryPointToSuffix( ...@@ -47,11 +47,14 @@ const char* ClickToCallEntryPointToSuffix(
} }
} }
// The returned values must match the values of the PhoneNumberRegexVariant
// suffixes defined in histograms.xml.
const char* PhoneNumberRegexVariantToSuffix(PhoneNumberRegexVariant variant) { const char* PhoneNumberRegexVariantToSuffix(PhoneNumberRegexVariant variant) {
switch (variant) { switch (variant) {
case PhoneNumberRegexVariant::kSimple: case PhoneNumberRegexVariant::kSimple:
// Keep the initial regex in the default metric. return "Simple";
return ""; case PhoneNumberRegexVariant::kLowConfidenceModified:
return "LowConfidenceModified";
} }
} }
...@@ -80,11 +83,19 @@ ScopedUmaHistogramMicrosecondsTimer::ScopedUmaHistogramMicrosecondsTimer( ...@@ -80,11 +83,19 @@ ScopedUmaHistogramMicrosecondsTimer::ScopedUmaHistogramMicrosecondsTimer(
: variant_(variant) {} : variant_(variant) {}
ScopedUmaHistogramMicrosecondsTimer::~ScopedUmaHistogramMicrosecondsTimer() { ScopedUmaHistogramMicrosecondsTimer::~ScopedUmaHistogramMicrosecondsTimer() {
constexpr char kPrefix[] =
"Sharing.ClickToCallContextMenuPhoneNumberParsingDelay";
constexpr base::TimeDelta kMinTime = base::TimeDelta::FromMicroseconds(1);
constexpr base::TimeDelta kMaxTime = base::TimeDelta::FromSeconds(1);
constexpr int kBuckets = 50;
base::TimeDelta elapsed = timer_.Elapsed();
// Default bucket for all variants.
base::UmaHistogramCustomMicrosecondsTimes(kPrefix, elapsed, kMinTime,
kMaxTime, kBuckets);
base::UmaHistogramCustomMicrosecondsTimes( base::UmaHistogramCustomMicrosecondsTimes(
base::StrCat({"Sharing.ClickToCallContextMenuPhoneNumberParsingDelay", base::StrCat({kPrefix, ".", PhoneNumberRegexVariantToSuffix(variant_)}),
PhoneNumberRegexVariantToSuffix(variant_)}), elapsed, kMinTime, kMaxTime, kBuckets);
timer_.Elapsed(), base::TimeDelta::FromMicroseconds(1),
base::TimeDelta::FromSeconds(1), 50);
} }
chrome_browser_sharing::MessageType SharingPayloadCaseToMessageType( chrome_browser_sharing::MessageType SharingPayloadCaseToMessageType(
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "base/macros.h" #include "base/macros.h"
#include "base/time/time.h" #include "base/time/time.h"
#include "base/timer/elapsed_timer.h" #include "base/timer/elapsed_timer.h"
#include "chrome/browser/sharing/click_to_call/phone_number_regex.h"
#include "chrome/browser/sharing/shared_clipboard/remote_copy_handle_message_result.h" #include "chrome/browser/sharing/shared_clipboard/remote_copy_handle_message_result.h"
#include "chrome/browser/sharing/sharing_constants.h" #include "chrome/browser/sharing/sharing_constants.h"
#include "chrome/browser/sharing/sharing_send_message_result.h" #include "chrome/browser/sharing/sharing_send_message_result.h"
...@@ -21,11 +22,6 @@ class WebContents; ...@@ -21,11 +22,6 @@ class WebContents;
enum class SharingDeviceRegistrationResult; enum class SharingDeviceRegistrationResult;
// Phone number regex to use to detect numbers from text selections.
enum class PhoneNumberRegexVariant {
kSimple = 0,
};
// Result of VAPID key creation during Sharing registration. // Result of VAPID key creation during Sharing registration.
// These values are logged to UMA. Entries should not be renumbered and numeric // These values are logged to UMA. Entries should not be renumbered and numeric
// values should never be reused. Please keep in sync with // values should never be reused. Please keep in sync with
......
...@@ -138953,6 +138953,8 @@ should be kept until we remove incident reporting. --> ...@@ -138953,6 +138953,8 @@ should be kept until we remove incident reporting. -->
<histogram name="Sharing.ClickToCallContextMenuPhoneNumberParsingDelay" <histogram name="Sharing.ClickToCallContextMenuPhoneNumberParsingDelay"
units="microseconds" expires_after="M81"> units="microseconds" expires_after="M81">
<!-- Name completed by histogram_suffixes name="PhoneNumberRegexVariant" -->
<owner>himanshujaju@chromium.org</owner> <owner>himanshujaju@chromium.org</owner>
<owner>knollr@chromium.org</owner> <owner>knollr@chromium.org</owner>
<owner>peter@chromium.org</owner> <owner>peter@chromium.org</owner>
...@@ -182060,6 +182062,13 @@ regressions. --> ...@@ -182060,6 +182062,13 @@ regressions. -->
<affected-histogram name="UMA.PersistentAllocator.UsedPct"/> <affected-histogram name="UMA.PersistentAllocator.UsedPct"/>
</histogram_suffixes> </histogram_suffixes>
<histogram_suffixes name="PhoneNumberRegexVariant" separator=".">
<suffix name="LowConfidenceModified" label="Low confidence modified"/>
<suffix name="Simple" label="Simple regex"/>
<affected-histogram
name="Sharing.ClickToCallContextMenuPhoneNumberParsingDelay"/>
</histogram_suffixes>
<histogram_suffixes name="PhysicalWebDebugActions" separator="."> <histogram_suffixes name="PhysicalWebDebugActions" separator=".">
<suffix name="ChromeStart" label="when Chrome first starts up"/> <suffix name="ChromeStart" label="when Chrome first starts up"/>
<suffix name="LaunchFromDiagnostics" <suffix name="LaunchFromDiagnostics"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment