Commit f8ee6bd1 authored by Piotr Pawliczek's avatar Piotr Pawliczek Committed by Commit Bot

Simple URI: A class representing simplified URI - part 3

This is a part of implementation of class Uri. This CL contains
implementation of URI parser along with normalization and validation.
The file with unit tests was also included.
Other CLs:
 - https://crrev.com/c/2103143
 - https://crrev.com/c/2118426

BUG=chromium:821497
TEST=on my workstation

Change-Id: Ieb9f3c105d842d4cb2a8f2678d34130e3b94b374
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2123640
Commit-Queue: Piotr Pawliczek <pawliczek@chromium.org>
Reviewed-by: default avatarSean Kau <skau@chromium.org>
Cr-Commit-Position: refs/heads/master@{#761030}
parent d0e620d5
...@@ -71,8 +71,12 @@ component("chromeos") { ...@@ -71,8 +71,12 @@ component("chromeos") {
"printing/printer_configuration.h", "printing/printer_configuration.h",
"printing/printer_translator.cc", "printing/printer_translator.cc",
"printing/printer_translator.h", "printing/printer_translator.h",
"printing/uri.cc",
"printing/uri.h",
"printing/uri_components.cc", "printing/uri_components.cc",
"printing/uri_components.h", "printing/uri_components.h",
"printing/uri_impl.cc",
"printing/uri_impl.h",
"printing/usb_printer_id.cc", "printing/usb_printer_id.cc",
"printing/usb_printer_id.h", "printing/usb_printer_id.h",
"process_proxy/process_output_watcher.cc", "process_proxy/process_output_watcher.cc",
...@@ -200,6 +204,9 @@ test("chromeos_unittests") { ...@@ -200,6 +204,9 @@ test("chromeos_unittests") {
"printing/ppd_line_reader_unittest.cc", "printing/ppd_line_reader_unittest.cc",
"printing/printer_configuration_unittest.cc", "printing/printer_configuration_unittest.cc",
"printing/printer_translator_unittest.cc", "printing/printer_translator_unittest.cc",
"printing/uri_unittest.cc",
"printing/uri_unittest.h",
"printing/uri_unittest_consistency.cc",
"printing/usb_printer_id_unittest.cc", "printing/usb_printer_id_unittest.cc",
"process_proxy/process_output_watcher_unittest.cc", "process_proxy/process_output_watcher_unittest.cc",
"process_proxy/process_proxy_unittest.cc", "process_proxy/process_proxy_unittest.cc",
......
...@@ -104,6 +104,10 @@ bool HasNonASCII(const std::string& str) { ...@@ -104,6 +104,10 @@ bool HasNonASCII(const std::string& str) {
} // namespace } // namespace
Uri::Pim::Pim() = default;
Uri::Pim::Pim(const Pim&) = default;
Uri::Pim::~Pim() = default;
Uri::Uri() : pim_(std::make_unique<Pim>()) {} Uri::Uri() : pim_(std::make_unique<Pim>()) {}
Uri::Uri(const std::string& uri) : pim_(std::make_unique<Pim>()) { Uri::Uri(const std::string& uri) : pim_(std::make_unique<Pim>()) {
...@@ -178,6 +182,7 @@ std::string Uri::GetNormalized(bool always_print_port) const { ...@@ -178,6 +182,7 @@ std::string Uri::GetNormalized(bool always_print_port) const {
out.push_back('@'); out.push_back('@');
} }
// Host. // Host.
enc.Disallow(":");
enc.EncodeAndAppend(pim_->host(), &out); enc.EncodeAndAppend(pim_->host(), &out);
// Port. // Port.
if (!port.empty()) { if (!port.empty()) {
...@@ -187,7 +192,7 @@ std::string Uri::GetNormalized(bool always_print_port) const { ...@@ -187,7 +192,7 @@ std::string Uri::GetNormalized(bool always_print_port) const {
} }
// Adds Path. // Adds Path.
enc.Allow("@"); enc.Allow(":@");
for (auto& segment : pim_->path()) { for (auto& segment : pim_->path()) {
out.push_back('/'); out.push_back('/');
enc.EncodeAndAppend(segment, &out); enc.EncodeAndAppend(segment, &out);
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "chromeos/chromeos_export.h"
namespace chromeos { namespace chromeos {
// This is a simple URI builder/parser. // This is a simple URI builder/parser.
...@@ -206,7 +208,7 @@ namespace chromeos { ...@@ -206,7 +208,7 @@ namespace chromeos {
// Case-sensitive : YES // Case-sensitive : YES
// //
class Uri { class CHROMEOS_EXPORT Uri {
public: public:
enum class ParserStatus { enum class ParserStatus {
kNoErrors, kNoErrors,
...@@ -236,6 +238,8 @@ class Uri { ...@@ -236,6 +238,8 @@ class Uri {
struct ParserError { struct ParserError {
ParserStatus status = ParserStatus::kNoErrors; ParserStatus status = ParserStatus::kNoErrors;
// The position in the input string where the parser error occurred. // The position in the input string where the parser error occurred.
// When an error occurred for %-escaped character, it is the position of
// the corresponding '%' sign.
// If |status| == kNoErrors, then this value is undefined. // If |status| == kNoErrors, then this value is undefined.
size_t parsed_chars = 0; size_t parsed_chars = 0;
// This field is relevant only for the methods SetQuery(...), // This field is relevant only for the methods SetQuery(...),
...@@ -361,7 +365,7 @@ class Uri { ...@@ -361,7 +365,7 @@ class Uri {
bool operator!=(const Uri& uri) const { return !(*this == uri); } bool operator!=(const Uri& uri) const { return !(*this == uri); }
private: private:
struct Pim; class Pim;
std::unique_ptr<Pim> pim_; std::unique_ptr<Pim> pim_;
}; };
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/printing/uri_impl.h"
#include <algorithm>
#include <array>
#include <map>
#include <set>
#include "base/i18n/streaming_utf8_validator.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "chromeos/printing/uri.h"
namespace chromeos {
namespace {
constexpr int kPortInvalid = -2;
constexpr int kPortUnspecified = -1;
constexpr int kPortMaxNumber = 65535;
// Parses a single character from *|current| and interprets it as a hex
// digit ('0'-'9' or 'A'-'F' or 'a'-'f'). If the character is incorrect or
// *|current| is not less than |end|, the function returns false.
// Otherwise, the value in *|out| is shifted left by 4 bits and the parsed
// value is saved on its rightmost 4 bits. The iterator *|current| is
// increased by one, and the function returns true.
// |current| and |out| must be not nullptr.
bool ParseHexDigit(const Iter& end, Iter* current, unsigned char* out) {
Iter& it = *current;
if (it >= end)
return false;
*out <<= 4;
if (base::IsAsciiDigit(*it)) {
*out += (*it - '0');
} else if (*it >= 'A' && *it <= 'F') {
*out += (*it - 'A' + 10);
} else if (*it >= 'a' && *it <= 'f') {
*out += (*it - 'a' + 10);
} else {
return false;
}
++it;
return true;
}
// The function parses from *|current|-|end| the first character and saves it
// to |out|. If |encoded| equals true, the % sign is treated as the beginning
// of %-escaped character - in this case the whole escaped character is read
// and decoded. The function fails and returns false when unexpected end of
// string is reached or invalid %-escaped character is spotted. The iterator
// *|current| is shifted accordingly.
// |current| and |out| must be not nullptr and *|current| must be less than
// |end|.
template <bool encoded>
bool ParseCharacter(const Iter& end, Iter* current, char* out) {
Iter& it = *current;
DCHECK(it < end);
*out = *it;
++it;
if (encoded && *out == '%') {
unsigned char c = 0;
if (!ParseHexDigit(end, &it, &c))
return false;
if (!ParseHexDigit(end, &it, &c))
return false;
*out = static_cast<char>(c);
}
return true;
}
// Tries to parse the input string |begin|-|end| as a Port number.
// For a number from range [0,65535] it returns this number.
// For an empty string it returns -1 (not specified).
// For all other inputs it returns -2 (invalid Port number).
// The input requirement: |begin| <= |end|.
int ParsePort(const Iter& begin, const Iter& end) {
if (begin == end)
return kPortUnspecified;
int number = 0;
for (Iter it = begin; it < end; ++it) {
if (!base::IsAsciiDigit(*it))
return kPortInvalid;
number *= 10;
number += *it - '0';
if (number > kPortMaxNumber)
return kPortInvalid;
}
return number;
}
// Helper struct for the function below.
class Comparator {
public:
// The string given as a parameter must be valid for the whole lifetime
// of this object.
explicit Comparator(const std::string& chars) : chars_(chars) {}
bool operator()(std::string::value_type element) const {
return (chars_.find(element) != std::string::npos);
}
private:
const std::string& chars_;
};
// Returns iterator to the first occurrence of any character from |chars|
// in |begin|-|end|. Returns |end| if none of the characters were found.
Iter FindFirstOf(Iter begin, Iter end, const std::string& chars) {
return std::find_if(begin, end, Comparator(chars));
}
} // namespace
const std::map<std::string, int> Uri::Pim::kDefaultPorts = {
{"ipp", 631}, {"ipps", 443}, {"http", 80},
{"https", 443}, {"lpd", 515}, {"socket", 9100}};
template <bool encoded, bool case_insensitive>
bool Uri::Pim::ParseString(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space) {
parser_error_.parsed_chars = 0;
out->reserve(end - begin);
for (Iter it = begin; it < end;) {
char c;
// Read and decode a single character or a %-escaped character.
if (plus_to_space && *it == '+') {
c = ' ';
++it;
} else if (!ParseCharacter<encoded>(end, &it, &c)) {
parser_error_.status = ParserStatus::kInvalidPercentEncoding;
return false;
}
// Analyze the character.
if (base::IsAsciiPrintable(c)) { // c >= 0x20(' ') && c <= 0x7E('~')
// Copy the character with normalization.
out->push_back(case_insensitive ? base::ToLowerASCII(c) : c);
parser_error_.parsed_chars = it - begin;
} else {
// Try to parse UTF-8 character.
base::StreamingUtf8Validator utf_parser;
base::StreamingUtf8Validator::State state = utf_parser.AddBytes(&c, 1);
if (state != base::StreamingUtf8Validator::State::VALID_MIDPOINT) {
parser_error_.status = ParserStatus::kDisallowedASCIICharacter;
return false;
}
std::string utf8_character(1, c);
parser_error_.parsed_chars = it - begin;
do {
if (it == end) {
parser_error_.status = ParserStatus::kInvalidUTF8Character;
return false;
}
if (!ParseCharacter<encoded>(end, &it, &c)) {
parser_error_.status = ParserStatus::kInvalidPercentEncoding;
return false;
}
state = utf_parser.AddBytes(&c, 1);
if (state == base::StreamingUtf8Validator::State::INVALID) {
parser_error_.status = ParserStatus::kInvalidUTF8Character;
return false;
}
utf8_character.push_back(c);
parser_error_.parsed_chars = it - begin;
} while (state != base::StreamingUtf8Validator::State::VALID_ENDPOINT);
// Saves the UTF-8 character to the output.
out->append(std::move(utf8_character));
}
}
return true;
}
template <bool encoded>
bool Uri::Pim::SaveUserinfo(const std::string& val) {
std::string out;
if (!ParseString<encoded>(val.begin(), val.end(), &out))
return false;
userinfo_ = std::move(out);
return true;
}
template <bool encoded>
bool Uri::Pim::SaveHost(const std::string& val) {
std::string out;
if (!ParseString<encoded, true>(val.begin(), val.end(), &out))
return false;
host_ = std::move(out);
return true;
}
bool Uri::Pim::SavePort(int value) {
if (value == kPortInvalid) {
parser_error_.parsed_chars = 0;
parser_error_.status = ParserStatus::kInvalidPortNumber;
return false;
}
if (value == kPortUnspecified && kDefaultPorts.count(scheme_)) {
value = kDefaultPorts.at(scheme_);
}
port_ = value;
return true;
}
template <bool encoded>
bool Uri::Pim::SavePath(const std::vector<std::string>& val) {
std::vector<std::string> out;
out.reserve(val.size());
for (size_t i = 0; i < val.size(); ++i) {
std::string segment;
auto it1 = val[i].begin();
auto it2 = val[i].end();
if (!ParseString<encoded>(it1, it2, &segment))
return false;
if (segment == ".") {
// do nothing
} else if (segment == ".." && !out.empty() && out.back() != "..") {
out.pop_back();
} else if (segment.empty()) {
parser_error_.status = ParserStatus::kEmptySegmentInPath;
return false;
} else {
out.push_back(std::move(segment));
}
++parser_error_.parsed_strings;
}
path_ = std::move(out);
return true;
}
template <bool encoded>
bool Uri::Pim::SaveQuery(
const std::vector<std::pair<std::string, std::string>>& val) {
std::vector<std::pair<std::string, std::string>> out(val.size());
for (size_t i = 0; i < out.size(); ++i) {
// Process parameter name.
auto it1 = val[i].first.begin();
auto it2 = val[i].first.end();
if (!ParseString<encoded>(it1, it2, &out[i].first, true))
return false;
if (out[i].first.empty()) {
parser_error_.status = ParserStatus::kEmptyParameterNameInQuery;
return false;
}
++parser_error_.parsed_strings;
// Process parameter value.
it1 = val[i].second.begin();
it2 = val[i].second.end();
if (!ParseString<encoded>(it1, it2, &out[i].second, true))
return false;
++parser_error_.parsed_strings;
}
query_ = std::move(out);
return true;
}
template <bool encoded>
bool Uri::Pim::SaveFragment(const std::string& val) {
std::string out;
if (!ParseString<encoded>(val.begin(), val.end(), &out))
return false;
fragment_ = std::move(out);
return true;
}
bool Uri::Pim::ParseScheme(const Iter& begin, const Iter& end) {
// Special case for an empty string on the input.
if (begin == end) {
scheme_.clear();
return true;
}
// Temporary output string.
std::string out;
out.reserve(end - begin);
// Checks the first character - must be an ASCII letter.
auto it = begin;
if (base::IsAsciiAlpha(*it)) {
out.push_back(base::ToLowerASCII(*it));
} else {
parser_error_.status = ParserStatus::kInvalidScheme;
return false;
}
// Checks the rest of characters.
for (++it; it < end; ++it) {
if (base::IsAsciiAlpha(*it) || base::IsAsciiDigit(*it) || *it == '+' ||
*it == '-' || *it == '.') {
out.push_back(base::ToLowerASCII(*it));
} else {
parser_error_.status = ParserStatus::kInvalidScheme;
parser_error_.parsed_chars = it - begin;
return false;
}
}
// Success - save the Scheme.
scheme_ = std::move(out);
// If the current Port is unspecified and the new Scheme has default port
// number, set the default port number.
if (port_ == kPortUnspecified && kDefaultPorts.count(scheme_))
port_ = kDefaultPorts.at(scheme_);
return true;
}
bool Uri::Pim::ParseAuthority(const Iter& begin, const Iter& end) {
// Parse and save Userinfo.
Iter it = std::find(begin, end, '@');
if (it != end) {
if (!SaveUserinfo<true>(std::string(begin, it))) {
parser_error_.parsed_chars += it - begin;
return false;
}
++it; // to omit '@' character
} else {
it = begin;
}
// Parse and save Host.
Iter it2 = std::find(it, end, ':');
if (!SaveHost<true>(std::string(it, it2))) {
parser_error_.parsed_chars += it - begin;
return false;
}
// Parse and save Port.
if (it2 != end) {
++it2; // omit the ':' character
if (it2 < end && !SavePort(ParsePort(it2, end))) {
parser_error_.parsed_chars += it2 - begin;
return false;
}
}
return true;
}
bool Uri::Pim::ParsePath(const Iter& begin, const Iter& end) {
// This holds Path's segments.
std::vector<std::string> path;
// This stores offset from begin of every segment.
std::vector<size_t> strings_positions;
// Parsing...
for (Iter it1 = begin; it1 < end;) {
DCHECK_EQ(*it1, '/');
if (++it1 == end) // omit '/' character
break;
Iter it2 = std::find(it1, end, '/');
path.push_back(std::string(it1, it2));
strings_positions.push_back(it1 - begin);
it1 = it2;
}
// Try to set the new Path and return true if succeed.
if (SavePath<true>(path))
return true;
// An error occurred, adjust parser error fields set by SetPath(...).
parser_error_.parsed_chars += strings_positions[parser_error_.parsed_strings];
parser_error_.parsed_strings = 0;
return false;
}
bool Uri::Pim::ParseQuery(const Iter& begin, const Iter& end) {
// This holds pairs name=value.
std::vector<std::pair<std::string, std::string>> query;
// This stores offset from begin of every name and value.
std::vector<size_t> strings_positions;
// Parsing...
for (Iter it = begin; it < end;) {
Iter it_am = std::find(it, end, '&');
Iter it_eq = std::find(it, it_am, '=');
// Extract name.
std::string name(it, it_eq);
// Extract value.
if (it_eq < it_am) // to omit '=' character
++it_eq;
std::string value(it_eq, it_am);
// Save the pair (name,value).
query.push_back(std::make_pair(std::move(name), std::move(value)));
// Store the offset of the name.
strings_positions.push_back(it - begin);
// Store the offset of the value.
strings_positions.push_back(it_eq - begin);
// Move |it| to the beginning of the next pair.
if (it_am < end)
++it_am; // to omit '&' character
it = it_am;
}
// Try to set the new Query and return true if succeed.
if (SaveQuery<true>(query))
return true;
// An error occurred, adjust the |parser_error| set by SetQuery(...).
parser_error_.parsed_chars += strings_positions[parser_error_.parsed_strings];
parser_error_.parsed_strings = 0;
return false;
}
bool Uri::Pim::ParseFragment(const Iter& begin, const Iter& end) {
std::string out;
if (!ParseString<true>(begin, end, &out))
return false;
fragment_ = std::move(out);
return true;
}
bool Uri::Pim::ParseUri(const Iter& begin, const Iter end) {
Iter it1 = begin;
// The Scheme component ends at the first colon (":").
{
auto it2 = std::find(it1, end, ':');
if (it2 == end) {
parser_error_.status = ParserStatus::kInvalidScheme;
return false;
}
if (!ParseScheme(it1, it2))
return false;
it1 = ++it2;
}
// The authority component is preceded by a double slash ("//") and is
// terminated by the next slash ("/"), question mark ("?"), or number
// sign ("#") character, or by the end of the URI.
if (it1 < end && *it1 == '/') {
++it1;
if (it1 < end && *it1 == '/') {
++it1;
auto it_auth_end = FindFirstOf(it1, end, "/?#");
if (!ParseAuthority(it1, it_auth_end)) {
parser_error_.parsed_chars += it1 - begin;
return false;
}
it1 = it_auth_end;
} else {
--it1;
}
}
// The Path is terminated by the first question mark ("?") or number
// sign ("#") character, or by the end of the URI.
if (it1 < end && *it1 == '/') {
auto it2 = FindFirstOf(it1, end, "?#");
if (!ParsePath(it1, it2)) {
parser_error_.parsed_chars += it1 - begin;
return false;
}
it1 = it2;
}
// The Query component is indicated by the first question mark ("?")
// character and terminated by a number sign ("#") character or by the end
// of the URI.
if (it1 < end && *it1 == '?') {
++it1;
auto it2 = std::find(it1, end, '#');
if (!ParseQuery(it1, it2)) {
parser_error_.parsed_chars += it1 - begin;
return false;
}
it1 = it2;
}
// A Fragment component is indicated by the presence of a number
// sign ("#") character and terminated by the end of the URI.
if (it1 < end) {
DCHECK_EQ(*it1, '#');
++it1; // to omit '#' character
if (!ParseFragment(it1, end)) {
parser_error_.parsed_chars += it1 - begin;
return false;
}
}
// Success!
return true;
}
template bool Uri::Pim::ParseString<false, false>(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space);
template bool Uri::Pim::ParseString<false, true>(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space);
template bool Uri::Pim::ParseString<true, false>(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space);
template bool Uri::Pim::ParseString<true, true>(const Iter& begin,
const Iter& end,
std::string* out,
bool plus_to_space);
template bool Uri::Pim::SaveUserinfo<false>(const std::string& val);
template bool Uri::Pim::SaveUserinfo<true>(const std::string& val);
template bool Uri::Pim::SaveHost<false>(const std::string& val);
template bool Uri::Pim::SaveHost<true>(const std::string& val);
template bool Uri::Pim::SavePath<false>(const std::vector<std::string>& val);
template bool Uri::Pim::SavePath<true>(const std::vector<std::string>& val);
template bool Uri::Pim::SaveQuery<false>(
const std::vector<std::pair<std::string, std::string>>& val);
template bool Uri::Pim::SaveQuery<true>(
const std::vector<std::pair<std::string, std::string>>& val);
template bool Uri::Pim::SaveFragment<false>(const std::string& val);
template bool Uri::Pim::SaveFragment<true>(const std::string& val);
} // namespace chromeos
...@@ -25,6 +25,10 @@ class Uri::Pim { ...@@ -25,6 +25,10 @@ class Uri::Pim {
// The map with pairs scheme -> default_port. // The map with pairs scheme -> default_port.
static const std::map<std::string, int> kDefaultPorts; static const std::map<std::string, int> kDefaultPorts;
Pim();
Pim(const Pim&);
~Pim();
// Resets the internal field |parser_error|. // Resets the internal field |parser_error|.
void ResetParserError() { void ResetParserError() {
parser_error_.parsed_chars = 0; parser_error_.parsed_chars = 0;
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/printing/uri_unittest.h"
#include "base/strings/string_number_conversions.h"
#include "chromeos/printing/uri.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace uri_unittest {
UriComponents::UriComponents() = default;
UriComponents::UriComponents(const UriComponents&) = default;
UriComponents::UriComponents(
const std::string& scheme,
const std::string& userinfo,
const std::string& host,
int port,
const std::vector<std::string>& path,
const std::vector<std::pair<std::string, std::string>>& query,
const std::string& fragment)
: scheme(scheme),
userinfo(userinfo),
host(host),
port(port),
path(path),
query(query),
fragment(fragment) {}
UriComponents::~UriComponents() = default;
} // namespace uri_unittest
namespace {
using UriComponents = uri_unittest::UriComponents;
// Verifies that |components| set by Set*() methods produces given
// |normalized_uri|. Runs also consistency test on the created Uri object.
void TestBuilder(const UriComponents& components,
const std::string& normalized_uri) {
Uri uri;
uri.SetFragment(components.fragment);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetHost(components.host);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetPath(components.path);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetPort(components.port);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetQuery(components.query);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetScheme(components.scheme);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetUserinfo(components.userinfo);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
// Check URI.
EXPECT_EQ(uri.GetNormalized(), normalized_uri);
}
// Verifies that |input_uri| set as parameter in Uri constructor is parsed
// as |components|. Runs also consistency test on the created Uri object.
void TestParser(const std::string& input_uri, const UriComponents& components) {
Uri uri(input_uri);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
// Check components values.
EXPECT_EQ(uri.GetScheme(), components.scheme);
EXPECT_EQ(uri.GetUserinfo(), components.userinfo);
EXPECT_EQ(uri.GetHost(), components.host);
EXPECT_EQ(uri.GetPort(), components.port);
EXPECT_EQ(uri.GetPath(), components.path);
EXPECT_EQ(uri.GetQuery(), components.query);
EXPECT_EQ(uri.GetFragment(), components.fragment);
}
// Verifies that |input_uri| set as parameter in Uri constructor is normalized
// to |normalized_uri|. Runs also consistency test on the created Uri object.
void TestNormalization(const std::string& input_uri,
const std::string& normalized_uri) {
Uri uri(input_uri);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetNormalized(), normalized_uri);
}
TEST(UriTest, DefaultConstructor) {
Uri uri;
EXPECT_EQ(uri.GetNormalized(), ":");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetScheme(), "");
EXPECT_EQ(uri.GetUserinfo(), "");
EXPECT_EQ(uri.GetUserinfoEncoded(), "");
EXPECT_EQ(uri.GetHost(), "");
EXPECT_EQ(uri.GetHostEncoded(), "");
EXPECT_EQ(uri.GetPort(), -1);
EXPECT_TRUE(uri.GetPath().empty());
EXPECT_TRUE(uri.GetPathEncoded().empty());
EXPECT_TRUE(uri.GetQuery().empty());
EXPECT_TRUE(uri.GetQueryEncoded().empty());
EXPECT_EQ(uri.GetFragment(), "");
EXPECT_EQ(uri.GetFragmentEncoded(), "");
}
TEST(UriTest, SchemeIsCaseInsensitive) {
Uri uri;
uri.SetScheme("ExAmplE+SchemA-X");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetScheme(), "example+schema-x");
}
TEST(UriTest, HostIsCaseInsensitive) {
Uri uri;
uri.SetHost("ExAmplE.COM");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetHost(), "example.com");
EXPECT_EQ(uri.GetHostEncoded(), "example.com");
}
TEST(UriTest, EncodingInHostComponent) {
Uri uri;
uri.SetHost("new.EX%41MPLE.COM");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetHost(), "new.ex%41mple.com");
EXPECT_EQ(uri.GetHostEncoded(),
"new.ex%2541mple.com"); // %-character was escaped
uri.SetHostEncoded("new.EX%41MPLE.COM");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetHost(), "new.example.com");
EXPECT_EQ(uri.GetHostEncoded(), "new.example.com");
uri.SetHost("ExAmPlE._!_@_#_$_%_^_");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetHost(), "example._!_@_#_$_%_^_");
EXPECT_EQ(uri.GetHostEncoded(), "example._!_%40_%23_$_%25_%5E_");
uri.SetHostEncoded("ExAmPlE._!_@_#_$_%25_^_._%21_%40_%23_%24_%25_%5E_");
EXPECT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri.GetHost(), "example._!_@_#_$_%_^_._!_@_#_$_%_^_");
EXPECT_EQ(uri.GetHostEncoded(),
"example._!_%40_%23_$_%25_%5E_._!_%40_%23_$_%25_%5E_");
}
TEST(UriTest, UriWithAllPrintableASCII) {
Uri uri;
std::string host = kPrintableASCII;
const std::vector<std::string> path = {kPrintableASCII};
std::vector<std::pair<std::string, std::string>> query = {
{kPrintableASCII, kPrintableASCII}};
uri.SetUserinfo(kPrintableASCII);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetHost(host);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetPath(path);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetQuery(query);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri.SetFragment(kPrintableASCII);
ASSERT_EQ(uri.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
// Host is case-insensitive, uppercase letters are normalized to lowercase.
std::for_each(host.begin(), host.end(), [](char& c) {
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
});
// In Query, all occurrences of '+' on the input are treated as ' ' (space).
std::replace(query[0].first.begin(), query[0].first.end(), '+', ' ');
std::replace(query[0].second.begin(), query[0].second.end(), '+', ' ');
EXPECT_EQ(uri.GetUserinfo(), kPrintableASCII);
EXPECT_EQ(uri.GetHost(), host);
EXPECT_EQ(uri.GetPath(), path);
EXPECT_EQ(uri.GetQuery(), query);
EXPECT_EQ(uri.GetFragment(), kPrintableASCII);
}
TEST(UriTest, BuildingHttpUriWithQuery) {
UriComponents components("http", "", "example.com", 1234);
components.query = {{"par1", "val1"}, {"par2", ""}, {"par3", "val3"}};
TestBuilder(components, "http://example.com:1234?par1=val1&par2&par3=val3");
}
TEST(UriTest, BuildingUriWithAllComponents) {
UriComponents components("A", "B", "C", 1);
components.path = {"D", "E"};
components.query = {{"F", "G"}, {"H", "I"}};
components.fragment = "J";
TestBuilder(components, "a://B@c:1/D/E?F=G&H=I#J");
}
TEST(UriTest, BuildingUriWithoutAuthority) {
UriComponents components("A+1-b.C", "", "", -1);
components.path = {"//", " "};
components.fragment = "?#@/";
TestBuilder(components, "a+1-b.c:/%2F%2F/%20#?%23@/");
}
// Special path segments "." and ".." are reduced when possible.
TEST(UriTest, ParsingOfUriWithReduciblePath) {
const std::string input_uri =
"hTTp://exAmple.c%4Fm:234"
"/very/../../long/.././pAth?parAm=vAlue#?%3f?";
UriComponents components("http", "", "example.com", 234);
components.path = {"..", "pAth"};
components.query = {{"parAm", "vAlue"}};
components.fragment = "???";
TestParser(input_uri, components);
}
TEST(UriTest, ParsingOfUriWithoutPort) {
// When a Port is not specified and the Scheme has a default port number,
// the default port number is set.
TestParser("hTTp://exAmple.com",
UriComponents("http", "", "example.com", 80));
// When the Scheme does not have a default port number, the value of Port
// remains "unspecified".
TestParser("X-x://exAmple.com", UriComponents("x-x", "", "example.com"));
}
TEST(UriTest, ParsingOfUriWithUTF8Characters) {
// On the input, bytes defining UTF-8 characters can be %-escaped or
// specified directly.
const std::string uri =
"http://utf8.test?"
"zażółć=za%c5%bc%c3%b3%c5%82%c4%87&"
"gęślą=\x67\xC4\x99\xC5%9B%6C%C4\x85&"
"jaźń=ja%c5%ba%c5%84";
UriComponents components("http", "", "utf8.test", 80);
components.query = {
{"zażółć", "zażółć"}, {"gęślą", "gęślą"}, {"jaźń", "jaźń"}};
TestParser(uri, components);
}
// Leading and trailing whitespaces are ignored.
TEST(UriTest, ParsingOfUriWithLeadingAndTrailingWhitespaces) {
const std::string uri = " \t\n\r\f\vSC://WITH.whitespaces# END \t\n\r\f\v";
UriComponents components("sc", "", "with.whitespaces");
components.fragment = " END";
TestParser(uri, components);
}
// Empty components are accepted.
TEST(UriTest, NormalizationOfEmptyUri) {
TestNormalization("://@:/?#", ":");
}
TEST(UriTest, NormalizationOfUriWithoutAuthority) {
// When Userinfo, Host and Port are not specified, the "//" prefix is
// skipped.
TestNormalization("xx://@:/my/path?#fragment", "xx:/my/path#fragment");
TestNormalization("xx:///my/path?#fragment", "xx:/my/path#fragment");
// The same happens when the Port number is equal to the default port number
// of the Scheme.
TestNormalization("http://:80/my/path?#fragment", "http:/my/path#fragment");
}
// In the normalized URI, all bytes being part of UTF-8 characters must be
// %-escaped.
TEST(UriTest, NormalizationOfUriWithUTF8Characters) {
const std::string uri =
"http://utf8.test?"
"zażółć=za%c5%bc%c3%b3%c5%82%c4%87&"
"gęślą=\x67\xC4\x99\xC5%9B%6C%C4\x85&"
"jaźń=ja%c5%ba%c5%84";
const std::string uri_normalized =
"http://utf8.test?"
"za%C5%BC%C3%B3%C5%82%C4%87=za%C5%BC%C3%B3%C5%82%C4%87&"
"g%C4%99%C5%9Bl%C4%85=g%C4%99%C5%9Bl%C4%85&"
"ja%C5%BA%C5%84=ja%C5%BA%C5%84";
TestNormalization(uri, uri_normalized);
}
TEST(UriTest, ParserErrorDisallowedASCIICharacter) {
// Non-printable character (0xFF) inside the Host component.
Uri uri(" \t\n\r\f\vHTTP://BAD.\xff.CHaracter# \t\n\r\f\v");
const Uri::ParserError pe = uri.GetLastParsingError();
EXPECT_EQ(pe.status, Uri::ParserStatus::kDisallowedASCIICharacter);
EXPECT_EQ(pe.parsed_chars, 17u);
EXPECT_EQ(pe.parsed_strings, 0u);
}
TEST(UriTest, ParserErrorInvalidPercentEncoding) {
Uri uri;
// The first percent character has no following ASCII code.
uri.SetHostEncoded("ExAmPlE._!_@_#_$_%_^_._%21_%40_%23_%24_%25_%5E_");
EXPECT_EQ(uri.GetLastParsingError().status,
Uri::ParserStatus::kInvalidPercentEncoding);
EXPECT_EQ(uri.GetLastParsingError().parsed_chars, 17u);
}
TEST(UriTest, ParserErrorInvalidUTF8Character) {
// Broken UTF-8 character in the Path (the byte after 0xC5 is wrong).
Uri uri("http://host/utf8_\xC5\x3C_is_broken");
const Uri::ParserError pe = uri.GetLastParsingError();
EXPECT_EQ(pe.status, Uri::ParserStatus::kInvalidUTF8Character);
EXPECT_EQ(pe.parsed_chars, 18u);
EXPECT_EQ(pe.parsed_strings, 0u);
}
// Parameters in Query cannot have empty names.
TEST(UriTest, ParserErrorEmptyParameterNameInQuery) {
Uri uri;
std::vector<std::pair<std::string, std::string>> query;
query = {{"name1", "value1"}, {"", "value2"}};
EXPECT_FALSE(uri.SetQuery(query));
const Uri::ParserError pe1 = uri.GetLastParsingError();
EXPECT_EQ(pe1.status, Uri::ParserStatus::kEmptyParameterNameInQuery);
EXPECT_EQ(pe1.parsed_chars, 0u);
EXPECT_EQ(pe1.parsed_strings, 2u);
}
// Path cannot have empty segments.
TEST(UriTest, ParserErrorEmptySegmentInPath) {
Uri uri;
EXPECT_FALSE(uri.SetPathEncoded("/segment1//segment3"));
const Uri::ParserError pe2 = uri.GetLastParsingError();
EXPECT_EQ(pe2.status, Uri::ParserStatus::kEmptySegmentInPath);
EXPECT_EQ(pe2.parsed_chars, 10u);
EXPECT_EQ(pe2.parsed_strings, 0u);
}
} // namespace
} // namespace chromeos
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_PRINTING_URI_UNITTEST_H_
#define CHROMEOS_PRINTING_URI_UNITTEST_H_
#include <string>
#include <utility>
#include <vector>
#include "chromeos/printing/uri.h"
// This file contains a declaration of struct and constant used only in the
// implementation of unit tests for class Uri declared in uri.h. This file is
// not supposed to be included anywhere outside the files uri_unittest*.cc.
namespace chromeos {
// All printable ASCII characters ('"' and '\' are escaped with \).
constexpr char kPrintableASCII[] =
" !\"#$%&'()*+,-./0123456789:;<=>?"
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
"`abcdefghijklmnopqrstuvwxyz{|}~";
namespace uri_unittest {
// A simple structure with all URI components.
struct UriComponents {
std::string scheme;
std::string userinfo;
std::string host;
int port = -1; // -1 means "unspecified"
std::vector<std::string> path;
std::vector<std::pair<std::string, std::string>> query;
std::string fragment;
UriComponents();
UriComponents(const UriComponents&);
UriComponents(
const std::string& scheme,
const std::string& userinfo,
const std::string& host,
int port = -1,
const std::vector<std::string>& path = {},
const std::vector<std::pair<std::string, std::string>>& query = {},
const std::string& fragment = "");
~UriComponents();
};
} // namespace uri_unittest
} // namespace chromeos
#endif // CHROMEOS_PRINTING_URI_UNITTEST_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_number_conversions.h"
#include "chromeos/printing/uri.h"
#include "chromeos/printing/uri_unittest.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace {
using UriComponents = uri_unittest::UriComponents;
// Returns true <=> |c| belongs to STD_CHARS.
bool IsStdChar(char c) {
if (c >= 'A' && c <= 'Z')
return true;
if (c >= 'a' && c <= 'z')
return true;
if (c >= '0' && c <= '9')
return true;
return (c == '-' || c == '.' || c == '_' || c == '~' || c == '!' ||
c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' ||
c == ',' || c == ';');
}
// Returns a copy of |input| where all characters outside the set
// {STD_CHARS + |allowed_schars|} are replaced by %-escaped sequences.
std::string Encode(const std::string& input, const std::string& allowed_chars) {
std::string out;
for (char c : input) {
if (IsStdChar(c) || allowed_chars.find(c) != std::string::npos) {
out.push_back(c);
} else {
out.push_back('%');
out.append(base::HexEncode(&c, 1));
}
}
return out;
}
// A version of Encode function for a different parameter type.
std::vector<std::string> Encode(const std::vector<std::string>& input,
const std::string& allowed_chars) {
std::vector<std::string> v;
for (auto& s : input)
v.push_back(Encode(s, allowed_chars));
return v;
}
// A version of Encode function for a different parameter type.
std::vector<std::pair<std::string, std::string>> Encode(
const std::vector<std::pair<std::string, std::string>>& input,
const std::string& allowed_chars) {
std::vector<std::pair<std::string, std::string>> v;
for (auto& p : input)
v.push_back(std::make_pair(Encode(p.first, allowed_chars),
Encode(p.second, allowed_chars)));
return v;
}
// This test suite consists of tests accepting a single parameter of type
// UriComponents. Each test creates Uri object from the parameter and checks
// its consistency by comparing results returned by different methods.
class UriConsistencyTest : public testing::TestWithParam<UriComponents> {
public:
void SetUp() override {
const UriComponents& components = GetParam();
uri_.SetFragment(components.fragment);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetHost(components.host);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetPath(components.path);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetPort(components.port);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetQuery(components.query);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetScheme(components.scheme);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
uri_.SetUserinfo(components.userinfo);
ASSERT_EQ(uri_.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
}
protected:
Uri uri_;
};
// Make sure that components returned by Get*Encoded() methods are %-escaped
// versions of components returned by corresponding Get*() methods.
TEST_P(UriConsistencyTest, ComponentsEncoding) {
EXPECT_EQ(uri_.GetUserinfoEncoded(), Encode(uri_.GetUserinfo(), "+&=:"));
EXPECT_EQ(uri_.GetHostEncoded(), Encode(uri_.GetHost(), "+&="));
EXPECT_EQ(uri_.GetPathEncoded(), Encode(uri_.GetPath(), "+&=:@"));
EXPECT_EQ(uri_.GetQueryEncoded(), Encode(uri_.GetQuery(), ":@/?"));
EXPECT_EQ(uri_.GetFragmentEncoded(), Encode(uri_.GetFragment(), "+&=:@/?"));
}
// Build Path and verify GetPathEncodedAsString().
TEST_P(UriConsistencyTest, PathBuilding) {
std::string expected_path;
for (auto& segment : uri_.GetPathEncoded())
expected_path += "/" + segment;
EXPECT_EQ(expected_path, uri_.GetPathEncodedAsString());
}
// Build Query and verify GetQueryEncodedAsString().
TEST_P(UriConsistencyTest, QueryBuilding) {
std::string expected_query;
for (auto& param_value : uri_.GetQueryEncoded()) {
if (!expected_query.empty())
expected_query += "&";
expected_query += param_value.first;
if (!param_value.second.empty())
expected_query += "=" + param_value.second;
}
EXPECT_EQ(expected_query, uri_.GetQueryEncodedAsString());
}
// Build normalized URI from encoded components and make sure that it is
// equal to the value returned by GetNormalized().
TEST_P(UriConsistencyTest, UriBuilding) {
std::string expected_uri = uri_.GetScheme() + ":";
// Build a part of URI called Authority (Userinfo@Host:Port).
std::string authority_encoded;
if (!uri_.GetUserinfoEncoded().empty())
authority_encoded = uri_.GetUserinfoEncoded() + "@";
authority_encoded += uri_.GetHostEncoded();
if (uri_.GetPort() != -1 &&
uri_.GetPort() != Uri::GetDefaultPort(uri_.GetScheme())) {
authority_encoded += ":" + base::NumberToString(uri_.GetPort());
}
// If Authority is not empty, add it to |expected_uri|.
if (!authority_encoded.empty())
expected_uri += "//" + authority_encoded;
// Add Path and Query.
expected_uri += uri_.GetPathEncodedAsString();
const std::string expected_query = uri_.GetQueryEncodedAsString();
if (!expected_query.empty())
expected_uri += "?" + expected_query;
// Add Fragment to |expected_uri|.
if (!uri_.GetFragmentEncoded().empty())
expected_uri += "#" + uri_.GetFragmentEncoded();
EXPECT_EQ(uri_.GetNormalized(), expected_uri);
}
// Checks if the normalization algorithm is consistent.
TEST_P(UriConsistencyTest, Normalization) {
// Normalization of normalized uri must not change it.
Uri uri2(uri_.GetNormalized());
EXPECT_EQ(uri2.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri_.GetNormalized(), uri2.GetNormalized());
// Normalization of normalized Path must not change it.
uri2.SetPathEncoded(uri_.GetPathEncodedAsString());
EXPECT_EQ(uri2.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri_.GetPath(), uri2.GetPath());
// Normalization of normalized Query must not change it.
uri2.SetQueryEncoded(uri_.GetQueryEncodedAsString());
EXPECT_EQ(uri2.GetLastParsingError().status, Uri::ParserStatus::kNoErrors);
EXPECT_EQ(uri_.GetQuery(), uri2.GetQuery());
}
INSTANTIATE_TEST_SUITE_P(
UriConsistencyTestInstantiation,
UriConsistencyTest,
testing::Values(
UriComponents(), // empty URI
UriComponents("ExAmplE+SchemA-X",
"",
"ExAmplE.COM",
123,
{"D", "E"},
{{"F", "G"}, {"H", "I"}},
"J"),
UriComponents("",
kPrintableASCII,
kPrintableASCII,
0,
{kPrintableASCII},
{{kPrintableASCII, kPrintableASCII}},
kPrintableASCII),
UriComponents("A+1-b.C", "", "", -1, {"//", " "}, {}, "?#@/"),
UriComponents("http",
"",
"utf8.test",
-1,
{},
{{"zażółć", "za\xc5\xbc\xc3\xb3\xc5\x82\xc4\x87"},
{"gęślą", "\x67\xC4\x99\xC5\x9B\x6C\xC4\x85"},
{"jaźń", "ja\xc5\xba\xc5\x84"}})));
} // namespace
} // namespace chromeos
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment