Commit e999bccc authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

DNR: Change regexSubstitution semantics.

Instead of using RE2::Extract, use RE2::Replace. This is preferred because:
  - This is the more common variant of substitutions. E.g. Javascript replace
    also does this.
  - With this change, it would be easier to add global replace later if need be
    by adding a single key to the Redirect dictionary.

BUG=974391

Change-Id: Id1b3bf2fe9d21b3aa75e5215b1abaa6450d70979
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2021164
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#735532}
parent c356bece
......@@ -307,16 +307,15 @@ base::Optional<RequestAction>
RegexRulesMatcher::CreateRegexSubstitutionRedirectAction(
const RequestParams& params,
const RegexRuleInfo& info) const {
std::string redirect_str;
// We could have extracted the captured strings during the matching stage
// and directly used RE2::Rewrite here (which doesn't need to match the
// regex again). However we prefer to capture the strings only when
// necessary. Not capturing the strings should allow re2 to perform
// additional optimizations during the matching stage.
bool success = RE2::Extract(
params.url->spec(), *info.regex,
ToRE2StringPiece(*info.regex_rule->regex_substitution()), &redirect_str);
std::string redirect_str = params.url->spec();
bool success =
RE2::Replace(&redirect_str, *info.regex,
ToRE2StringPiece(*info.regex_rule->regex_substitution()));
if (!success) {
// This should generally not happen since we had already checked for a
// match and during indexing, had verified that the substitution pattern
......
......@@ -1015,10 +1015,13 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) {
} rule_info[] = {
// "\0" captures the complete matched string.
{1, R"(^.*google\.com.*$)", R"(https://redirect.com?original=\0)"},
{2, R"(http://((?:abc|def)\.xyz.com.*))", R"(https://www.\1)"},
{3, R"((http|https)://example.com.*(\?|&)redirect=(.*?)(?:&|$))",
{2, R"(http://((?:abc|def)\.xyz.com.*)$)", R"(https://www.\1)"},
{3, R"(^(http|https)://example\.com.*(\?|&)redirect=(.*?)(?:&|$).*$)",
R"(\1://\3)"},
{4, R"(reddit\.com)", "https://abc.com"}};
{4, R"(reddit\.com)", "abc.com"},
{5, R"(^http://www\.(pqr|rst)\.xyz\.com)", R"(https://\1.xyz.com)"},
{6, R"(\.in)", ".co.in"},
};
std::vector<TestRule> rules;
for (const auto& info : rule_info) {
......@@ -1057,6 +1060,10 @@ TEST_F(RulesetMatcherTest, RegexSubstitution) {
// The redirect url here would have been "http://" which is invalid.
{"http://example.com/path?q1=1&redirect=&q2=2", base::nullopt},
{"https://reddit.com", create_redirect_action(4, "https://abc.com")},
{"http://www.rst.xyz.com/path",
create_redirect_action(5, "https://rst.xyz.com/path")},
{"http://yahoo.in/path",
create_redirect_action(6, "http://yahoo.co.in/path")},
// No match.
{"http://example.com", base::nullopt}};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment