Commit 2aeae7fc authored by Sebastien Lalancette's avatar Sebastien Lalancette Committed by Commit Bot

[SH] Consolidate Text Fragments Utility Functions

Moving the existing utility functions in ios/web to the new common
folder along with the other utility functions.

Bug: 1136043
Change-Id: I6970bce02c72c9794f6574c22816d5c3608364b0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2494990Reviewed-by: default avatarTommy Martino <tmartino@chromium.org>
Reviewed-by: default avatarEugene But <eugenebut@chromium.org>
Commit-Queue: Sebastien Lalancette <seblalancette@chromium.org>
Cr-Commit-Position: refs/heads/master@{#820864}
parent 687201f2
......@@ -6,13 +6,15 @@
#include <sstream>
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "components/shared_highlighting/core/common/text_fragments_constants.h"
#include "net/base/escape.h"
namespace {
std::string Escape(std::string str) {
// Escapes any special character such that the fragment can be added to a URL.
std::string Escape(const std::string& str) {
std::string escaped = net::EscapeQueryParamValue(str, /*usePlus=*/false);
// Hyphens must also be escaped since they are used to indicate prefix/suffix
......@@ -22,6 +24,12 @@ std::string Escape(std::string str) {
return final_string;
}
// Unescapes any special character from a fragment which may be coming from a
// URL.
std::string Unescape(const std::string& str) {
return base::UnescapeBinaryURLComponent(str);
}
} // namespace
namespace shared_highlighting {
......@@ -46,7 +54,56 @@ TextFragment::TextFragment(const TextFragment& other)
TextFragment::~TextFragment() = default;
std::string TextFragment::ToString() {
base::Optional<TextFragment> TextFragment::FromEscapedString(
std::string escaped_string) {
// Text fragments have the format: [prefix-,]textStart[,textEnd][,-suffix]
// That is, textStart is the only required param, all params are separated by
// commas, and prefix/suffix have a trailing/leading hyphen.
// Any commas, ampersands, or hyphens inside of these values must be
// URL-encoded.
// First, try to extract the optional prefix and suffix params. These have a
// '-' as their last or first character, respectively, which should not be
// carried over to the final dict.
std::string prefix = "";
size_t prefix_delimiter_pos = escaped_string.find("-,");
if (prefix_delimiter_pos != std::string::npos) {
prefix = escaped_string.substr(0, prefix_delimiter_pos);
escaped_string.erase(0, prefix_delimiter_pos + 2);
}
std::string suffix = "";
size_t suffix_delimiter_pos = escaped_string.rfind(",-");
if (suffix_delimiter_pos != std::string::npos) {
suffix = escaped_string.substr(suffix_delimiter_pos + 2);
escaped_string.erase(suffix_delimiter_pos);
}
std::vector<std::string> pieces = base::SplitString(
escaped_string, ",", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
if (pieces.size() > 2 || pieces.empty() || pieces[0].empty()) {
// Malformed if no piece is left for the textStart
return base::nullopt;
}
std::string text_start = pieces[0];
std::string text_end = pieces.size() == 2 ? pieces[1] : "";
if (prefix.find_first_of("&-,") != std::string::npos ||
text_start.find_first_of("&-,") != std::string::npos ||
text_end.find_first_of("&-,") != std::string::npos ||
suffix.find_first_of("&-,") != std::string::npos) {
// Malformed if any of the pieces contain characters that are supposed to be
// URL-encoded.
return base::nullopt;
}
return TextFragment(Unescape(text_start), Unescape(text_end),
Unescape(prefix), Unescape(suffix));
}
std::string TextFragment::ToEscapedString() {
if (text_start_.empty()) {
return std::string();
}
......@@ -70,4 +127,21 @@ std::string TextFragment::ToString() {
return ss.str();
}
base::Value TextFragment::ToValue() {
base::Value dict(base::Value::Type::DICTIONARY);
if (prefix_.size())
dict.SetKey(kFragmentPrefixKey, base::Value(prefix_));
dict.SetKey(kFragmentTextStartKey, base::Value(text_start_));
if (text_end_.size())
dict.SetKey(kFragmentTextEndKey, base::Value(text_end_));
if (suffix_.size())
dict.SetKey(kFragmentSuffixKey, base::Value(suffix_));
return dict;
}
} // namespace shared_highlighting
......@@ -7,11 +7,16 @@
#include <string>
#include "base/optional.h"
#include "base/values.h"
namespace shared_highlighting {
// Class representing a text fragment.
class TextFragment {
public:
// Constructors for TextFragment instances. Special characters in the string
// parameters must not be escaped.
explicit TextFragment(const std::string& text_start);
TextFragment(const std::string& text_start,
const std::string& text_end,
......@@ -20,17 +25,29 @@ class TextFragment {
TextFragment(const TextFragment& other);
~TextFragment();
// Returns a TextFragment instance created from a |fragment_string| whose
// special characters have been escaped. The given string is expected to have
// the traditional text fragment format:
// [prefix-,]textStart[,textEnd][,-suffix]
// Returns |base::nullopt| if parsing failed.
static base::Optional<TextFragment> FromEscapedString(
std::string escaped_string);
const std::string text_start() const { return text_start_; }
const std::string text_end() const { return text_end_; }
const std::string prefix() const { return prefix_; }
const std::string suffix() const { return suffix_; }
// Converts the current fragment to its URL parameter format:
// Converts the current fragment to its escaped URL parameter format:
// text=[prefix-,]textStart[,textEnd][,-suffix]
// Returns an empty string if |text_start| does not have a value.
std::string ToString();
std::string ToEscapedString();
// Converts the current fragment to a dictionary Value.
base::Value ToValue();
private:
// Values of a fragment, stored unescaped.
std::string text_start_;
std::string text_end_;
std::string prefix_;
......
......@@ -4,41 +4,138 @@
#include "components/shared_highlighting/core/common/text_fragment.h"
#include "base/values.h"
#include "components/shared_highlighting/core/common/text_fragments_constants.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
namespace shared_highlighting {
namespace {
TEST(TextFragmentTest, FragmentToStringEmpty) {
EXPECT_EQ("", TextFragment("").ToString());
base::Value TextFragmentToValue(const std::string& fragment) {
base::Optional<TextFragment> opt_frag =
TextFragment::FromEscapedString(fragment);
return opt_frag ? opt_frag->ToValue() : base::Value(base::Value::Type::NONE);
}
TEST(TextFragmentTest, FragmentToStringEmptyTextStart) {
EXPECT_EQ("", TextFragment("", "a", "b", "c").ToString());
TEST(TextFragmentTest, FragmentToValueFromEncodedString) {
// Success cases
std::string fragment = "start";
base::Value result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kFragmentPrefixKey));
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentTextEndKey));
EXPECT_FALSE(result.FindKey(kFragmentSuffixKey));
fragment = "start,end";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kFragmentPrefixKey));
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kFragmentTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentSuffixKey));
fragment = "prefix-,start";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kFragmentPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentTextEndKey));
EXPECT_FALSE(result.FindKey(kFragmentSuffixKey));
fragment = "start,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kFragmentPrefixKey));
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentTextEndKey));
EXPECT_EQ("suffix", result.FindKey(kFragmentSuffixKey)->GetString());
fragment = "prefix-,start,end";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kFragmentPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kFragmentTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentSuffixKey));
fragment = "start,end,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kFragmentPrefixKey));
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kFragmentTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kFragmentSuffixKey)->GetString());
fragment = "prefix-,start,end,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kFragmentPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kFragmentTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kFragmentSuffixKey)->GetString());
// Trailing comma doesn't break otherwise valid fragment
fragment = "start,";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kFragmentPrefixKey));
EXPECT_EQ("start", result.FindKey(kFragmentTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kFragmentTextEndKey));
EXPECT_FALSE(result.FindKey(kFragmentSuffixKey));
// Failure Cases
fragment = "";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "some,really-,malformed,-thing,with,too,many,commas";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "start,prefix-,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix,start";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
}
TEST(TextFragmentTest, FragmentToStringOnlyTextStart) {
EXPECT_EQ("text=only%20start", TextFragment("only start").ToString());
TEST(TextFragmentTest, FragmentToEscapedStringEmpty) {
EXPECT_EQ("", TextFragment("").ToEscapedString());
}
TEST(TextFragmentTest, FragmentToStringWithTextEnd) {
TEST(TextFragmentTest, FragmentToEscapedStringEmptyTextStart) {
EXPECT_EQ("", TextFragment("", "a", "b", "c").ToEscapedString());
}
TEST(TextFragmentTest, FragmentToEscapedStringOnlyTextStart) {
EXPECT_EQ("text=only%20start", TextFragment("only start").ToEscapedString());
}
TEST(TextFragmentTest, FragmentToEscapedStringWithTextEnd) {
EXPECT_EQ("text=only%20start,and%20end",
TextFragment("only start", "and end", "", "").ToString());
TextFragment("only start", "and end", "", "").ToEscapedString());
}
TEST(TextFragmentTest, FragmentToStringWithPrefix) {
TEST(TextFragmentTest, FragmentToEscapedStringWithPrefix) {
EXPECT_EQ("text=and%20prefix-,only%20start",
TextFragment("only start", "", "and prefix", "").ToString());
TextFragment("only start", "", "and prefix", "").ToEscapedString());
}
TEST(TextFragmentTest, FragmentToStringWithPrefixAndSuffix) {
EXPECT_EQ(
"text=and%20prefix-,only%20start,-and%20suffix",
TextFragment("only start", "", "and prefix", "and suffix").ToString());
TEST(TextFragmentTest, FragmentToEscapedStringWithPrefixAndSuffix) {
EXPECT_EQ("text=and%20prefix-,only%20start,-and%20suffix",
TextFragment("only start", "", "and prefix", "and suffix")
.ToEscapedString());
}
TEST(TextFragmentTest, FragmentToStringAllWithSpecialCharacters) {
TEST(TextFragmentTest, FragmentToEscapedStringAllWithSpecialCharacters) {
TextFragment test_fragment("text, Start-&", "end of, & Text-", "pre-fix&, !",
"suff,i,x-+&");
EXPECT_EQ(
......@@ -46,7 +143,7 @@ TEST(TextFragmentTest, FragmentToStringAllWithSpecialCharacters) {
"text%2C%20Start%2D%26"
",end%20of%2C%20%26%20Text%2D"
",-suff%2Ci%2Cx%2D%2B%26",
test_fragment.ToString());
test_fragment.ToEscapedString());
}
} // namespace
......
......@@ -10,4 +10,9 @@ const char kFragmentsUrlDelimiter[] = ":~:";
const char kFragmentParameterName[] = "text=";
const char kFragmentPrefixKey[] = "prefix";
const char kFragmentTextStartKey[] = "textStart";
const char kFragmentTextEndKey[] = "textEnd";
const char kFragmentSuffixKey[] = "suffix";
} // namespace shared_highlighting
......@@ -13,6 +13,13 @@ extern const char kFragmentsUrlDelimiter[];
// Parameter name for a single text fragment in a URL.
extern const char kFragmentParameterName[];
// These values correspond to the keys used to store text fragment's values
// in a dictionary Value.
extern const char kFragmentPrefixKey[];
extern const char kFragmentTextStartKey[];
extern const char kFragmentTextEndKey[];
extern const char kFragmentSuffixKey[];
} // namespace shared_highlighting
#endif // COMPONENTS_SHARED_HIGHLIGHTING_CORE_COMMON_TEXT_FRAGMENTS_CONSTANTS_H_
......@@ -6,12 +6,60 @@
#include <sstream>
#include "base/json/json_writer.h"
#include "base/optional.h"
#include "base/strings/escape.h"
#include "base/strings/string_util.h"
#include "components/shared_highlighting/core/common/text_fragment.h"
#include "components/shared_highlighting/core/common/text_fragments_constants.h"
namespace shared_highlighting {
base::Value ParseTextFragments(const GURL& url) {
if (!url.has_ref())
return {};
std::vector<std::string> fragments = ExtractTextFragments(url.ref());
if (fragments.empty())
return {};
base::Value parsed(base::Value::Type::LIST);
for (const std::string& fragment : fragments) {
base::Optional<TextFragment> opt_frag =
TextFragment::FromEscapedString(fragment);
if (opt_frag.has_value()) {
parsed.Append(opt_frag->ToValue());
}
}
return parsed;
}
std::vector<std::string> ExtractTextFragments(std::string ref_string) {
size_t start_pos = ref_string.find(kFragmentsUrlDelimiter);
if (start_pos == std::string::npos)
return {};
ref_string.erase(0, start_pos + strlen(kFragmentsUrlDelimiter));
std::vector<std::string> fragment_strings;
while (ref_string.size()) {
// Consume everything up to and including the text= prefix
size_t prefix_pos = ref_string.find(kFragmentParameterName);
if (prefix_pos == std::string::npos)
break;
ref_string.erase(0, prefix_pos + strlen(kFragmentParameterName));
// A & indicates the end of the fragment (and the start of the next).
// Save everything up to this point, and then consume it (including the &).
size_t ampersand_pos = ref_string.find("&");
if (ampersand_pos != 0)
fragment_strings.push_back(ref_string.substr(0, ampersand_pos));
if (ampersand_pos == std::string::npos)
break;
ref_string.erase(0, ampersand_pos + 1);
}
return fragment_strings;
}
GURL AppendFragmentDirectives(const GURL& base_url,
std::vector<TextFragment> fragments) {
if (!base_url.is_valid()) {
......@@ -20,7 +68,7 @@ GURL AppendFragmentDirectives(const GURL& base_url,
std::vector<std::string> fragment_strings;
for (auto it = std::begin(fragments); it != std::end(fragments); ++it) {
std::string fragment_string = (*it).ToString();
std::string fragment_string = (*it).ToEscapedString();
if (!fragment_string.empty()) {
fragment_strings.push_back(fragment_string);
}
......
......@@ -7,12 +7,26 @@
#include <vector>
#include "base/values.h"
#include "url/gurl.h"
namespace shared_highlighting {
class TextFragment;
// This file contains helper functions relating to Text Fragments, which are
// appended to the reference fragment in the URL and instruct the user agent
// to highlight a given snippet of text and the page and scroll it into view.
// See also: https://wicg.github.io/scroll-to-text-fragment/
// Checks the fragment portion of the URL for Text Fragments. Returns zero or
// more dictionaries containing the parsed parameters used by the fragment-
// finding algorithm, as defined in the spec.J
base::Value ParseTextFragments(const GURL& url);
// Extracts the text fragments, if any, from a ref string.
std::vector<std::string> ExtractTextFragments(std::string ref_string);
// Appends a set of text |fragments| with the correct format to the given
// |base_url|. Returns an empty GURL if |base_url| is invalid.
GURL AppendFragmentDirectives(const GURL& base_url,
......
......@@ -5,12 +5,45 @@
#include "components/shared_highlighting/core/common/text_fragments_utils.h"
#include "components/shared_highlighting/core/common/text_fragment.h"
#include "components/shared_highlighting/core/common/text_fragments_constants.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
namespace shared_highlighting {
namespace {
TEST(TextFragmentsUtilsTest, ParseTextFragments) {
GURL url_with_fragment(
"https://www.example.com/#idFrag:~:text=text%201&text=text%202");
base::Value result = ParseTextFragments(url_with_fragment);
ASSERT_EQ(2u, result.GetList().size());
EXPECT_EQ("text 1",
result.GetList()[0].FindKey(kFragmentTextStartKey)->GetString());
EXPECT_EQ("text 2",
result.GetList()[1].FindKey(kFragmentTextStartKey)->GetString());
GURL url_no_fragment("www.example.com");
base::Value empty_result = ParseTextFragments(url_no_fragment);
EXPECT_TRUE(empty_result.is_none());
}
TEST(TextFragmentsUtilsTest, ExtractTextFragments) {
std::vector<std::string> expected = {"test1", "test2", "test3"};
// Ensure presence/absence of a trailing & doesn't break anything
EXPECT_EQ(expected,
ExtractTextFragments("#id:~:text=test1&text=test2&text=test3"));
EXPECT_EQ(expected,
ExtractTextFragments("#id:~:text=test1&text=test2&text=test3&"));
// Test that empty tokens (&& or &text=&) are discarded
EXPECT_EQ(expected, ExtractTextFragments(
"#id:~:text=test1&&text=test2&text=&text=test3"));
expected.clear();
EXPECT_EQ(expected, ExtractTextFragments("#idButNoTextFragmentsHere"));
EXPECT_EQ(expected, ExtractTextFragments(""));
}
TEST(TextFragmentsUtilsTest, AppendFragmentDirectivesOneFragment) {
GURL base_url("https://www.chromium.org");
TextFragment test_fragment("only start");
......
......@@ -273,7 +273,6 @@ source_set("ios_web_navigation_unittests") {
"navigation/navigation_manager_util_unittest.mm",
"navigation/nscoder_util_unittest.mm",
"navigation/session_storage_builder_unittest.mm",
"navigation/text_fragments_utils_unittest.mm",
"navigation/wk_back_forward_list_item_holder_unittest.mm",
"navigation/wk_based_navigation_manager_impl_unittest.mm",
"navigation/wk_navigation_action_policy_util_unittest.mm",
......
......@@ -65,8 +65,6 @@ source_set("navigation") {
"serializable_user_data_manager_impl.mm",
"session_storage_builder.h",
"session_storage_builder.mm",
"text_fragments_utils.h",
"text_fragments_utils.mm",
"time_smoother.cc",
"time_smoother.h",
"url_schemes.mm",
......
......@@ -8,8 +8,8 @@
#import "base/strings/string_util.h"
#import "base/strings/utf_string_conversions.h"
#import "components/shared_highlighting/core/common/shared_highlighting_metrics.h"
#import "components/shared_highlighting/core/common/text_fragments_utils.h"
#import "ios/web/common/features.h"
#import "ios/web/navigation/text_fragments_utils.h"
#import "ios/web/public/js_messaging/web_frame.h"
#import "ios/web/public/navigation/navigation_context.h"
#import "ios/web/public/navigation/referrer.h"
......@@ -73,8 +73,8 @@ const double kMaxSelectorCount = 200.0;
return;
}
base::Value parsedFragments =
web::ParseTextFragments(self.webStateImpl->GetLastCommittedURL());
base::Value parsedFragments = shared_highlighting::ParseTextFragments(
self.webStateImpl->GetLastCommittedURL());
if (parsedFragments.type() == base::Value::Type::NONE) {
return;
......
......@@ -8,7 +8,6 @@
#import "base/test/metrics/histogram_tester.h"
#import "base/test/scoped_feature_list.h"
#import "ios/web/common/features.h"
#import "ios/web/navigation/text_fragments_utils.h"
#import "ios/web/public/navigation/referrer.h"
#import "ios/web/public/test/fakes/fake_navigation_context.h"
#import "ios/web/public/test/fakes/fake_web_frame.h"
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef IOS_WEB_NAVIGATION_TEXT_FRAGMENTS_UTILS_H_
#define IOS_WEB_NAVIGATION_TEXT_FRAGMENTS_UTILS_H_
#include "base/values.h"
class GURL;
namespace web {
// This file contains helper functions relating to Text Fragments, which are
// appended to the reference fragment in the URL and instruct the user agent
// to highlight a given snippet of text and the page and scroll it into view.
// See also: https://wicg.github.io/scroll-to-text-fragment/
// Checks the fragment portion of the URL for Text Fragments. Returns zero or
// more dictionaries containing the parsed parameters used by the fragment-
// finding algorithm, as defined in the spec.
base::Value ParseTextFragments(const GURL& url);
// Extracts the text fragments, if any, from a ref string.
std::vector<std::string> ExtractTextFragments(std::string ref_string);
// Breaks a text fragment into its component parts, as needed for the algorithm
// described in the spec. Returns a dictionary Value, or a None Value if the
// fragment is malformed.
base::Value TextFragmentToValue(std::string fragment);
} // namespace web
#endif // IOS_WEB_NAVIGATION_TEXT_FRAGMENTS_UTILS_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#import "ios/web/navigation/text_fragments_utils.h"
#include <cstring.h>
#include "base/json/json_writer.h"
#include "base/strings/escape.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "ios/web/common/features.h"
#import "ios/web/public/navigation/navigation_context.h"
#import "ios/web/public/web_state.h"
#if !defined(__has_feature) || !__has_feature(objc_arc)
#error "This file requires ARC support."
#endif
namespace {
const char kDirectivePrefix[] = ":~:";
const char kTextFragmentPrefix[] = "text=";
base::Value DecodeStringToValue(const std::string& str) {
return base::Value(base::UnescapeBinaryURLComponent(str));
}
} // namespace
namespace web {
base::Value ParseTextFragments(const GURL& url) {
if (!url.has_ref())
return {};
std::vector<std::string> fragments = ExtractTextFragments(url.ref());
if (fragments.empty())
return {};
base::Value parsed(base::Value::Type::LIST);
for (const std::string& fragment : fragments) {
base::Value parsed_fragment = TextFragmentToValue(fragment);
if (parsed_fragment.type() == base::Value::Type::NONE)
continue;
parsed.Append(std::move(parsed_fragment));
}
return parsed;
}
std::vector<std::string> ExtractTextFragments(std::string ref_string) {
size_t start_pos = ref_string.find(kDirectivePrefix);
if (start_pos == std::string::npos)
return {};
ref_string.erase(0, start_pos + strlen(kDirectivePrefix));
std::vector<std::string> fragment_strings;
while (ref_string.size()) {
// Consume everything up to and including the text= prefix
size_t prefix_pos = ref_string.find(kTextFragmentPrefix);
if (prefix_pos == std::string::npos)
break;
ref_string.erase(0, prefix_pos + strlen(kTextFragmentPrefix));
// A & indicates the end of the fragment (and the start of the next).
// Save everything up to this point, and then consume it (including the &).
size_t ampersand_pos = ref_string.find("&");
if (ampersand_pos != 0)
fragment_strings.push_back(ref_string.substr(0, ampersand_pos));
if (ampersand_pos == std::string::npos)
break;
ref_string.erase(0, ampersand_pos + 1);
}
return fragment_strings;
}
base::Value TextFragmentToValue(std::string fragment) {
// Text fragments have the format: [prefix-,]textStart[,textEnd][,-suffix]
// That is, textStart is the only required param, all params are separated by
// commas, and prefix/suffix have a trailing/leading hyphen.
// Any commas, ampersands, or hypens inside of these values must be
// URL-encoded.
base::Value dict(base::Value::Type::DICTIONARY);
// First, try to extract the optional prefix and suffix params. These have a
// '-' as their last or first character, respectively, which should not be
// carried over to the final dict.
std::string prefix = "";
size_t prefix_delimiter_pos = fragment.find("-,");
if (prefix_delimiter_pos != std::string::npos) {
prefix = fragment.substr(0, prefix_delimiter_pos);
fragment.erase(0, prefix_delimiter_pos + 2);
}
std::string suffix = "";
size_t suffix_delimiter_pos = fragment.rfind(",-");
if (suffix_delimiter_pos != std::string::npos) {
suffix = fragment.substr(suffix_delimiter_pos + 2);
fragment.erase(suffix_delimiter_pos);
}
std::vector<std::string> pieces = base::SplitString(
fragment, ",", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
if (pieces.size() > 2 || pieces.empty() || pieces[0].empty()) {
// Malformed if no piece is left for the textStart
return base::Value(base::Value::Type::NONE);
}
std::string text_start = pieces[0];
std::string text_end = pieces.size() == 2 ? pieces[1] : "";
if (prefix.find_first_of("&-,") != std::string::npos ||
text_start.find_first_of("&-,") != std::string::npos ||
text_end.find_first_of("&-,") != std::string::npos ||
suffix.find_first_of("&-,") != std::string::npos) {
// Malformed if any of the pieces contain characters that are supposed to be
// URL-encoded.
return base::Value(base::Value::Type::NONE);
}
if (prefix.size())
dict.SetKey("prefix", DecodeStringToValue(prefix));
// Guaranteed non-empty after checking for malformed input above.
dict.SetKey("textStart", DecodeStringToValue(text_start));
if (text_end.size())
dict.SetKey("textEnd", DecodeStringToValue(text_end));
if (suffix.size())
dict.SetKey("suffix", DecodeStringToValue(suffix));
return dict;
}
} // namespace web
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#import "ios/web/navigation/text_fragments_utils.h"
#import "testing/gtest/include/gtest/gtest.h"
#import "testing/platform_test.h"
#import "url/gurl.h"
#if !defined(__has_feature) || !__has_feature(objc_arc)
#error "This file requires ARC support."
#endif
namespace {
// These values correspond to the members that the JavaScript implementation is
// expecting.
const char kPrefixKey[] = "prefix";
const char kTextStartKey[] = "textStart";
const char kTextEndKey[] = "textEnd";
const char kSuffixKey[] = "suffix";
} // namespace
namespace web {
typedef PlatformTest TextFragmentsUtilsTest;
TEST_F(TextFragmentsUtilsTest, ParseTextFragments) {
GURL url_with_fragment(
"https://www.example.com/#idFrag:~:text=text%201&text=text%202");
base::Value result = ParseTextFragments(url_with_fragment);
ASSERT_EQ(2u, result.GetList().size());
EXPECT_EQ("text 1", result.GetList()[0].FindKey(kTextStartKey)->GetString());
EXPECT_EQ("text 2", result.GetList()[1].FindKey(kTextStartKey)->GetString());
GURL url_no_fragment("www.example.com");
base::Value empty_result = ParseTextFragments(url_no_fragment);
EXPECT_TRUE(empty_result.is_none());
}
TEST_F(TextFragmentsUtilsTest, ExtractTextFragments) {
std::vector<std::string> expected = {"test1", "test2", "test3"};
// Ensure presence/absence of a trailing & doesn't break anything
EXPECT_EQ(expected,
ExtractTextFragments("#id:~:text=test1&text=test2&text=test3"));
EXPECT_EQ(expected,
ExtractTextFragments("#id:~:text=test1&text=test2&text=test3&"));
// Test that empty tokens (&& or &text=&) are discarded
EXPECT_EQ(expected, ExtractTextFragments(
"#id:~:text=test1&&text=test2&text=&text=test3"));
expected.clear();
EXPECT_EQ(expected, ExtractTextFragments("#idButNoTextFragmentsHere"));
EXPECT_EQ(expected, ExtractTextFragments(""));
}
TEST_F(TextFragmentsUtilsTest, TextFragmentToValue) {
// Success cases
std::string fragment = "start";
base::Value result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,end";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "prefix-,start";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
fragment = "prefix-,start,end";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,end,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
fragment = "prefix-,start,end,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
// Trailing comma doesn't break otherwise valid fragment
fragment = "start,";
result = TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
// Failure Cases
fragment = "";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "some,really-,malformed,-thing,with,too,many,commas";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "start,prefix-,-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix,start";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "-suffix";
result = TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
}
} // namespace web
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment