Commit 191a023a authored by Tommy Martino's avatar Tommy Martino Committed by Commit Bot

[SH iOS] Adding text fragment parsing

Before the text fragment can be sent to the JavaScript that will
highlight/scroll the page accordingly, it needs to be parsed into
components according to the spec.

Bug: 1099268
Change-Id: I75d281996b922542aea25501bc22f064a1a5ac73
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2337407Reviewed-by: default avatarGayane Petrosyan <gayane@chromium.org>
Reviewed-by: default avatarEugene But <eugenebut@chromium.org>
Commit-Queue: Tommy Martino <tmartino@chromium.org>
Cr-Commit-Position: refs/heads/master@{#795486}
parent 2f326951
......@@ -5,6 +5,10 @@
#ifndef IOS_WEB_NAVIGATION_TEXT_FRAGMENT_UTILS_H_
#define IOS_WEB_NAVIGATION_TEXT_FRAGMENT_UTILS_H_
#include "base/values.h"
class GURL;
namespace web {
class NavigationContext;
......@@ -23,6 +27,23 @@ bool AreTextFragmentsAllowed(NavigationContext* context);
// matching text, highlights the text, and scrolls the first into view.
void HandleTextFragments(NavigationContext* context);
// Exposed for testing only.
namespace internal {
// Checks the fragment portion of the URL for Text Fragments. Returns zero or
// more dictionaries containing the parsed parameters used by the fragment-
// finding algorithm, as defined in the spec.
std::vector<base::Value> ParseTextFragments(const GURL& url);
// Extracts the text fragments, if any, from a ref string.
std::vector<std::string> ExtractTextFragments(std::string ref_string);
// Breaks a text fragment into its component parts, as needed for the algorithm
// described in the spec. Returns a dictionary Value, or a None Value if the
// fragment is malformed.
base::Value TextFragmentToValue(std::string fragment);
} // namespace internal
} // namespace web
#endif // IOS_WEB_NAVIGATION_TEXT_FRAGMENT_UTILS_H_
......@@ -4,6 +4,7 @@
#import "ios/web/navigation/text_fragment_utils.h"
#include "base/strings/string_split.h"
#include "ios/web/common/features.h"
#import "ios/web/public/navigation/navigation_context.h"
#import "ios/web/public/web_state.h"
......@@ -12,6 +13,11 @@
#error "This file requires ARC support."
#endif
namespace {
const std::string kDirectivePrefix = ":~:";
const std::string kTextFragmentPrefix = "text=";
} // namespace
namespace web {
bool AreTextFragmentsAllowed(NavigationContext* context) {
......@@ -33,4 +39,112 @@ void HandleTextFragments(NavigationContext* context) {
// params.
}
namespace internal {
std::vector<base::Value> ParseTextFragments(const GURL& url) {
if (!url.has_ref())
return {};
std::vector<std::string> fragments = ExtractTextFragments(url.ref());
if (fragments.empty())
return {};
std::vector<base::Value> parsed;
for (const std::string& fragment : fragments) {
base::Value parsed_fragment = TextFragmentToValue(fragment);
if (parsed_fragment.type() == base::Value::Type::NONE)
continue;
parsed.push_back(std::move(parsed_fragment));
}
return parsed;
}
std::vector<std::string> ExtractTextFragments(std::string ref_string) {
size_t start_pos = ref_string.find(kDirectivePrefix);
if (start_pos == std::string::npos)
return {};
ref_string.erase(0, start_pos + kDirectivePrefix.size());
std::vector<std::string> fragment_strings;
while (ref_string.size()) {
// Consume everything up to and including the text= prefix
size_t prefix_pos = ref_string.find(kTextFragmentPrefix);
if (prefix_pos == std::string::npos)
break;
ref_string.erase(0, prefix_pos + kTextFragmentPrefix.size());
// A & indicates the end of the fragment (and the start of the next).
// Save everything up to this point, and then consume it (including the &).
size_t ampersand_pos = ref_string.find("&");
if (ampersand_pos != 0)
fragment_strings.push_back(ref_string.substr(0, ampersand_pos));
if (ampersand_pos == std::string::npos)
break;
ref_string.erase(0, ampersand_pos + 1);
}
return fragment_strings;
}
base::Value TextFragmentToValue(std::string fragment) {
// Text fragments have the format: [prefix-,]textStart[,textEnd][,-suffix]
// That is, textStart is the only required param, all params are separated by
// commas, and prefix/suffix have a trailing/leading hyphen.
// Any commas, ampersands, or hypens inside of these values must be
// URL-encoded.
base::Value dict(base::Value::Type::DICTIONARY);
// First, try to extract the optional prefix and suffix params. These have a
// '-' as their last or first character, respectively, which should not be
// carried over to the final dict.
std::string prefix = "";
size_t prefix_delimiter_pos = fragment.find("-,");
if (prefix_delimiter_pos != std::string::npos) {
prefix = fragment.substr(0, prefix_delimiter_pos);
fragment.erase(0, prefix_delimiter_pos + 2);
}
std::string suffix = "";
size_t suffix_delimiter_pos = fragment.rfind(",-");
if (suffix_delimiter_pos != std::string::npos) {
suffix = fragment.substr(suffix_delimiter_pos + 2);
fragment.erase(suffix_delimiter_pos);
}
std::vector<std::string> pieces = base::SplitString(
fragment, ",", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
if (pieces.size() > 2 || pieces.empty() || pieces[0].empty()) {
// Malformed if no piece is left for the textStart
return base::Value(base::Value::Type::NONE);
}
std::string text_start = pieces[0];
std::string text_end = pieces.size() == 2 ? pieces[1] : "";
if (prefix.find_first_of("&-,") != std::string::npos ||
text_start.find_first_of("&-,") != std::string::npos ||
text_end.find_first_of("&-,") != std::string::npos ||
suffix.find_first_of("&-,") != std::string::npos) {
// Malformed if any of the pieces contain characters that are supposed to be
// URL-encoded.
return base::Value(base::Value::Type::NONE);
}
if (prefix.size())
dict.SetKey("prefix", base::Value(prefix));
// Guaranteed non-empty after checking for malformed input above.
dict.SetKey("textStart", base::Value(text_start));
if (text_end.size())
dict.SetKey("textEnd", base::Value(text_end));
if (suffix.size())
dict.SetKey("suffix", base::Value(suffix));
return dict;
}
} // namespace internal
} // namespace web
......@@ -12,11 +12,19 @@
#import "ios/web/public/test/fakes/test_web_state.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/platform_test.h"
#include "url/gurl.h"
#if !defined(__has_feature) || !__has_feature(objc_arc)
#error "This file requires ARC support."
#endif
// These values correspond to the members that the JavaScript implementation is
// expecting.
const std::string kPrefixKey = "prefix";
const std::string kTextStartKey = "textStart";
const std::string kTextEndKey = "textEnd";
const std::string kSuffixKey = "suffix";
namespace web {
typedef PlatformTest TextFragmentUtilsTest;
......@@ -55,4 +63,126 @@ TEST_F(TextFragmentUtilsTest, AreTextFragmentsAllowed) {
EXPECT_FALSE(AreTextFragmentsAllowed(&context));
}
TEST_F(TextFragmentUtilsTest, ParseTextFragments) {
GURL url_with_fragment(
"https://www.example.com/#idFrag:~:text=text%201&text=text%202");
std::vector<base::Value> result =
internal::ParseTextFragments(url_with_fragment);
ASSERT_EQ(2u, result.size());
EXPECT_EQ("text%201", result[0].FindKey(kTextStartKey)->GetString());
EXPECT_EQ("text%202", result[1].FindKey(kTextStartKey)->GetString());
GURL url_no_fragment("www.example.com");
std::vector<base::Value> empty_result =
internal::ParseTextFragments(url_no_fragment);
EXPECT_TRUE(empty_result.empty());
}
TEST_F(TextFragmentUtilsTest, ExtractTextFragments) {
std::vector<std::string> expected = {"test1", "test2", "test3"};
// Ensure presence/absence of a trailing & doesn't break anything
EXPECT_EQ(expected, internal::ExtractTextFragments(
"#id:~:text=test1&text=test2&text=test3"));
EXPECT_EQ(expected, internal::ExtractTextFragments(
"#id:~:text=test1&text=test2&text=test3&"));
// Test that empty tokens (&& or &text=&) are discarded
EXPECT_EQ(expected, internal::ExtractTextFragments(
"#id:~:text=test1&&text=test2&text=&text=test3"));
expected = {};
EXPECT_EQ(expected,
internal::ExtractTextFragments("#idButNoTextFragmentsHere"));
EXPECT_EQ(expected, internal::ExtractTextFragments(""));
}
TEST_F(TextFragmentUtilsTest, TextFragmentToValue) {
// Success cases
std::string fragment = "start";
base::Value result = internal::TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,end";
result = internal::TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "prefix-,start";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
fragment = "prefix-,start,end";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_FALSE(result.FindKey(kSuffixKey));
fragment = "start,end,-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
fragment = "prefix-,start,end,-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ("prefix", result.FindKey(kPrefixKey)->GetString());
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_EQ("end", result.FindKey(kTextEndKey)->GetString());
EXPECT_EQ("suffix", result.FindKey(kSuffixKey)->GetString());
// Trailing comma doesn't break otherwise valid fragment
fragment = "start,";
result = internal::TextFragmentToValue(fragment);
EXPECT_FALSE(result.FindKey(kPrefixKey));
EXPECT_EQ("start", result.FindKey(kTextStartKey)->GetString());
EXPECT_FALSE(result.FindKey(kTextEndKey));
EXPECT_FALSE(result.FindKey(kSuffixKey));
// Failure Cases
fragment = "";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "some,really-,malformed,-thing,with,too,many,commas";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "start,prefix-,-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-,-suffix,start";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "prefix-";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
fragment = "-suffix";
result = internal::TextFragmentToValue(fragment);
EXPECT_EQ(base::Value::Type::NONE, result.type());
}
} // namespace web
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment