Commit 72917401 authored by Matthias Körber's avatar Matthias Körber Committed by Commit Bot

[Autofill][Slimshady] Added tools to build parsing patterns.

Tools to build patterns for parsing autofill types from named capture groups.

Bug: 1099202
Change-Id: I2bf6f1e3ed0e767c897830732adc6b508cbf99db
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2302172
Commit-Queue: Matthias Körber <koerber@google.com>
Reviewed-by: default avatarDominic Battré <battre@chromium.org>
Cr-Commit-Position: refs/heads/master@{#790416}
parent de7cc438
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h"
......@@ -139,5 +138,48 @@ std::string GetPlaceholderToken(const std::string& value) {
return base::StrCat({"${", value, "}"});
}
std::string CaptureTypeWithPattern(
const ServerFieldType& type,
std::initializer_list<base::StringPiece> pattern_span_initializer_list) {
return CaptureTypeWithPattern(type, pattern_span_initializer_list,
CaptureOptions());
}
std::string CaptureTypeWithPattern(
const ServerFieldType& type,
std::initializer_list<base::StringPiece> pattern_span_initializer_list,
const CaptureOptions& options) {
return CaptureTypeWithPattern(
type, base::StrCat(base::make_span(pattern_span_initializer_list)),
options);
}
std::string CaptureTypeWithPattern(const ServerFieldType& type,
const std::string& pattern,
const CaptureOptions& options) {
std::string quantifier;
switch (options.quantifier) {
// Makes the match optional.
case MATCH_OPTIONAL:
quantifier = "?";
break;
// Makes the match lazy meaning that it is avoided if possible.
case MATCH_LAZY_OPTIONAL:
quantifier = "??";
break;
// Makes the match required.
case MATCH_REQUIRED:
quantifier = "";
}
return base::StrCat({"(?:(?P<", AutofillType(type).ToString(), ">", pattern,
")(?:", options.separator, "))", quantifier});
}
std::string CaptureTypeWithPattern(const ServerFieldType& type,
const std::string& pattern) {
return CaptureTypeWithPattern(type, pattern, CaptureOptions());
}
} // namespace structured_address
} // namespace autofill
......@@ -10,14 +10,40 @@
#include <string>
#include <vector>
#include "base/containers/span.h"
#include "base/macros.h"
#include "base/no_destructor.h"
#include "base/synchronization/lock.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/field_types.h"
#include "third_party/re2/src/re2/re2.h"
namespace autofill {
namespace structured_address {
// Enum to express the few quantifiers needed to parse values.
enum MatchQuantifier {
// The capture group is required.
MATCH_REQUIRED,
// The capture group is optional.
MATCH_OPTIONAL,
// The capture group is lazy optional meaning that it is avoided if an overall
// match is possible.
MATCH_LAZY_OPTIONAL,
};
// Options for capturing a named group using the
// |CaptureTypeWithPattern(...)| functions.
struct CaptureOptions {
// A separator that must be matched after a capture group.
// By default, a group must be either followed by a space-like character (\s)
// or it must be the last group in the line. The separator is allowed to be
// empty.
std::string separator = "\\s|$";
// Indicates if the group is required, optional or even lazy optional.
MatchQuantifier quantifier = MATCH_REQUIRED;
};
// A cache for compiled RE2 regular expressions.
class Re2RegExCache {
public:
......@@ -86,6 +112,31 @@ std::vector<std::string> ExtractAllPlaceholders(const std::string& value);
// Returns |value| as a placeholder token: ${value}.
std::string GetPlaceholderToken(const std::string& value);
// Returns a named capture group created by the concatenation of the
// StringPieces in |pattern_span_initializer_list|. The group is named by the
// string representation of |type| and respects |options|.
std::string CaptureTypeWithPattern(
const ServerFieldType& type,
std::initializer_list<base::StringPiece> pattern_span_initializer_list,
const CaptureOptions& options);
// Same as |CaptureTypeWithPattern(type, pattern_span_initializer_list,
// options)| but uses default options.
std::string CaptureTypeWithPattern(
const ServerFieldType& type,
std::initializer_list<base::StringPiece> pattern_span_initializer_list);
// Returns a capture group named by the string representation of |type| that
// matches |pattern|.
std::string CaptureTypeWithPattern(const ServerFieldType& type,
const std::string& pattern,
const CaptureOptions& options);
// Same as |CaptureTypeWithPattern(type, pattern, options)| but uses default
// options.
std::string CaptureTypeWithPattern(const ServerFieldType& type,
const std::string& pattern);
} // namespace structured_address
} // namespace autofill
......
......@@ -192,5 +192,20 @@ TEST(AutofillStructuredAddressUtils, TestGetPlaceholderToken) {
EXPECT_EQ("${VAR}", GetPlaceholderToken("VAR"));
}
TEST(AutofillStructuredAddressUtils, CaptureTypeWithPattern) {
EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))?",
CaptureTypeWithPattern(NAME_FULL, {"abs", "\\w"},
{.quantifier = MATCH_OPTIONAL}));
EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))",
CaptureTypeWithPattern(NAME_FULL, {"abs", "\\w"}));
EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))??",
CaptureTypeWithPattern(NAME_FULL, "abs\\w",
{.quantifier = MATCH_LAZY_OPTIONAL}));
EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))",
CaptureTypeWithPattern(NAME_FULL, "abs\\w"));
EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:_))",
CaptureTypeWithPattern(NAME_FULL, "abs\\w", {.separator = "_"}));
}
} // namespace structured_address
} // namespace autofill
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment