Commit e999bccc authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

DNR: Change regexSubstitution semantics.

Instead of using RE2::Extract, use RE2::Replace. This is preferred because:
  - This is the more common variant of substitutions. E.g. Javascript replace
    also does this.
  - With this change, it would be easier to add global replace later if need be
    by adding a single key to the Redirect dictionary.

BUG=974391

Change-Id: Id1b3bf2fe9d21b3aa75e5215b1abaa6450d70979
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2021164
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#735532}
parent c356bece
...@@ -307,16 +307,15 @@ base::Optional<RequestAction> ...@@ -307,16 +307,15 @@ base::Optional<RequestAction>
RegexRulesMatcher::CreateRegexSubstitutionRedirectAction( RegexRulesMatcher::CreateRegexSubstitutionRedirectAction(
const RequestParams& params, const RequestParams& params,
const RegexRuleInfo& info) const { const RegexRuleInfo& info) const {
std::string redirect_str;
// We could have extracted the captured strings during the matching stage // We could have extracted the captured strings during the matching stage
// and directly used RE2::Rewrite here (which doesn't need to match the // and directly used RE2::Rewrite here (which doesn't need to match the
// regex again). However we prefer to capture the strings only when // regex again). However we prefer to capture the strings only when
// necessary. Not capturing the strings should allow re2 to perform // necessary. Not capturing the strings should allow re2 to perform
// additional optimizations during the matching stage. // additional optimizations during the matching stage.
bool success = RE2::Extract( std::string redirect_str = params.url->spec();
params.url->spec(), *info.regex, bool success =
ToRE2StringPiece(*info.regex_rule->regex_substitution()), &redirect_str); RE2::Replace(&redirect_str, *info.regex,
ToRE2StringPiece(*info.regex_rule->regex_substitution()));
if (!success) { if (!success) {
// This should generally not happen since we had already checked for a // This should generally not happen since we had already checked for a
// match and during indexing, had verified that the substitution pattern // match and during indexing, had verified that the substitution pattern
......
...@@ -1015,10 +1015,13 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) { ...@@ -1015,10 +1015,13 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) {
} rule_info[] = { } rule_info[] = {
// "\0" captures the complete matched string. // "\0" captures the complete matched string.
{1, R"(^.*google\.com.*$)", R"(https://redirect.com?original=\0)"}, {1, R"(^.*google\.com.*$)", R"(https://redirect.com?original=\0)"},
{2, R"(http://((?:abc|def)\.xyz.com.*))", R"(https://www.\1)"}, {2, R"(http://((?:abc|def)\.xyz.com.*)$)", R"(https://www.\1)"},
{3, R"((http|https)://example.com.*(\?|&)redirect=(.*?)(?:&|$))", {3, R"(^(http|https)://example\.com.*(\?|&)redirect=(.*?)(?:&|$).*$)",
R"(\1://\3)"}, R"(\1://\3)"},
{4, R"(reddit\.com)", "https://abc.com"}}; {4, R"(reddit\.com)", "abc.com"},
{5, R"(^http://www\.(pqr|rst)\.xyz\.com)", R"(https://\1.xyz.com)"},
{6, R"(\.in)", ".co.in"},
};
std::vector<TestRule> rules; std::vector<TestRule> rules;
for (const auto& info : rule_info) { for (const auto& info : rule_info) {
...@@ -1057,6 +1060,10 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) { ...@@ -1057,6 +1060,10 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) {
// The redirect url here would have been "http://" which is invalid. // The redirect url here would have been "http://" which is invalid.
{"http://example.com/path?q1=1&redirect=&q2=2", base::nullopt}, {"http://example.com/path?q1=1&redirect=&q2=2", base::nullopt},
{"https://reddit.com", create_redirect_action(4, "https://abc.com")}, {"https://reddit.com", create_redirect_action(4, "https://abc.com")},
{"http://www.rst.xyz.com/path",
create_redirect_action(5, "https://rst.xyz.com/path")},
{"http://yahoo.in/path",
create_redirect_action(6, "http://yahoo.co.in/path")},
// No match. // No match.
{"http://example.com", base::nullopt}}; {"http://example.com", base::nullopt}};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment