Commit 775abc2d authored by csharrison's avatar csharrison Committed by Commit bot

Optimize KURL protocols

This patch optimizes KURL::protocol and KURL::protocolIs by keeping
an AtomicString m_protocol on KURL. This reduces string allocations
throughout the code using KURL::protocol().

This also fixes an inconsistency with KURL::protocolIs that will return
true for invalid URLs.

BUG=348655

Review-Url: https://codereview.chromium.org/2463703002
Cr-Commit-Position: refs/heads/master@{#438197}
parent ff9d9941
...@@ -38,7 +38,6 @@ TEST(MixedContentCheckerTest, IsMixedContent) { ...@@ -38,7 +38,6 @@ TEST(MixedContentCheckerTest, IsMixedContent) {
{"https://example.com/foo", "blob:null/foo", false}, {"https://example.com/foo", "blob:null/foo", false},
{"https://example.com/foo", "filesystem:https://example.com/foo", false}, {"https://example.com/foo", "filesystem:https://example.com/foo", false},
{"https://example.com/foo", "filesystem:http://example.com/foo", false}, {"https://example.com/foo", "filesystem:http://example.com/foo", false},
{"https://example.com/foo", "filesystem:null/foo", false},
{"https://example.com/foo", "http://example.com/foo", true}, {"https://example.com/foo", "http://example.com/foo", true},
{"https://example.com/foo", "http://google.com/foo", true}, {"https://example.com/foo", "http://google.com/foo", true},
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "wtf/StdLibExtras.h" #include "wtf/StdLibExtras.h"
#include "wtf/text/CString.h" #include "wtf/text/CString.h"
#include "wtf/text/StringHash.h" #include "wtf/text/StringHash.h"
#include "wtf/text/StringStatics.h"
#include "wtf/text/StringUTF8Adaptor.h" #include "wtf/text/StringUTF8Adaptor.h"
#include "wtf/text/TextEncoding.h" #include "wtf/text/TextEncoding.h"
#include <algorithm> #include <algorithm>
...@@ -48,6 +49,7 @@ static const int invalidPortNumber = 0xFFFF; ...@@ -48,6 +49,7 @@ static const int invalidPortNumber = 0xFFFF;
static void assertProtocolIsGood(const char* protocol) { static void assertProtocolIsGood(const char* protocol) {
#if ENABLE(ASSERT) #if ENABLE(ASSERT)
DCHECK_NE(protocol, "");
const char* p = protocol; const char* p = protocol;
while (*p) { while (*p) {
ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z')); ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));
...@@ -241,7 +243,7 @@ KURL::KURL(const AtomicString& canonicalString, ...@@ -241,7 +243,7 @@ KURL::KURL(const AtomicString& canonicalString,
m_protocolIsInHTTPFamily(false), m_protocolIsInHTTPFamily(false),
m_parsed(parsed), m_parsed(parsed),
m_string(canonicalString) { m_string(canonicalString) {
initProtocolIsInHTTPFamily(); initProtocolMetadata();
initInnerURL(); initInnerURL();
} }
...@@ -253,6 +255,7 @@ KURL::KURL(WTF::HashTableDeletedValueType) ...@@ -253,6 +255,7 @@ KURL::KURL(WTF::HashTableDeletedValueType)
KURL::KURL(const KURL& other) KURL::KURL(const KURL& other)
: m_isValid(other.m_isValid), : m_isValid(other.m_isValid),
m_protocolIsInHTTPFamily(other.m_protocolIsInHTTPFamily), m_protocolIsInHTTPFamily(other.m_protocolIsInHTTPFamily),
m_protocol(other.m_protocol),
m_parsed(other.m_parsed), m_parsed(other.m_parsed),
m_string(other.m_string) { m_string(other.m_string) {
if (other.m_innerURL.get()) if (other.m_innerURL.get())
...@@ -264,6 +267,7 @@ KURL::~KURL() {} ...@@ -264,6 +267,7 @@ KURL::~KURL() {}
KURL& KURL::operator=(const KURL& other) { KURL& KURL::operator=(const KURL& other) {
m_isValid = other.m_isValid; m_isValid = other.m_isValid;
m_protocolIsInHTTPFamily = other.m_protocolIsInHTTPFamily; m_protocolIsInHTTPFamily = other.m_protocolIsInHTTPFamily;
m_protocol = other.m_protocol;
m_parsed = other.m_parsed; m_parsed = other.m_parsed;
m_string = other.m_string; m_string = other.m_string;
if (other.m_innerURL) if (other.m_innerURL)
...@@ -277,6 +281,7 @@ KURL KURL::copy() const { ...@@ -277,6 +281,7 @@ KURL KURL::copy() const {
KURL result; KURL result;
result.m_isValid = m_isValid; result.m_isValid = m_isValid;
result.m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily; result.m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily;
result.m_protocol = m_protocol.isolatedCopy();
result.m_parsed = m_parsed; result.m_parsed = m_parsed;
result.m_string = m_string.isolatedCopy(); result.m_string = m_string.isolatedCopy();
if (m_innerURL) if (m_innerURL)
...@@ -312,7 +317,7 @@ bool KURL::hasPath() const { ...@@ -312,7 +317,7 @@ bool KURL::hasPath() const {
String KURL::lastPathComponent() const { String KURL::lastPathComponent() const {
if (!m_isValid) if (!m_isValid)
return stringForInvalidComponent(); return stringViewForInvalidComponent().toString();
ASSERT(!m_string.isNull()); ASSERT(!m_string.isNull());
// When the output ends in a slash, WebCore has different expectations than // When the output ends in a slash, WebCore has different expectations than
...@@ -336,7 +341,8 @@ String KURL::lastPathComponent() const { ...@@ -336,7 +341,8 @@ String KURL::lastPathComponent() const {
} }
String KURL::protocol() const { String KURL::protocol() const {
return componentString(m_parsed.scheme); DCHECK_EQ(componentString(m_parsed.scheme), m_protocol);
return m_protocol;
} }
String KURL::host() const { String KURL::host() const {
...@@ -365,6 +371,9 @@ unsigned short KURL::port() const { ...@@ -365,6 +371,9 @@ unsigned short KURL::port() const {
return static_cast<unsigned short>(port); return static_cast<unsigned short>(port);
} }
// TODO(csharrison): Migrate pass() and user() to return a StringView. Most
// consumers just need to know if the string is empty.
String KURL::pass() const { String KURL::pass() const {
// Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
// a null string when the password is empty, which we duplicate here. // a null string when the password is empty, which we duplicate here.
...@@ -767,9 +776,8 @@ void KURL::init(const KURL& base, ...@@ -767,9 +776,8 @@ void KURL::init(const KURL& base,
m_string = AtomicString::fromUTF8(output.data(), output.length()); m_string = AtomicString::fromUTF8(output.data(), output.length());
} }
initProtocolIsInHTTPFamily(); initProtocolMetadata();
initInnerURL(); initInnerURL();
DCHECK_EQ(protocol(), protocol().lower());
} }
void KURL::initInnerURL() { void KURL::initInnerURL() {
...@@ -786,50 +794,26 @@ void KURL::initInnerURL() { ...@@ -786,50 +794,26 @@ void KURL::initInnerURL() {
m_innerURL.reset(); m_innerURL.reset();
} }
template <typename CHAR> void KURL::initProtocolMetadata() {
bool internalProtocolIs(const url::Component& scheme,
const CHAR* spec,
const char* protocol) {
const CHAR* begin = spec + scheme.begin;
const CHAR* end = begin + scheme.len;
while (begin != end && *protocol) {
ASSERT(toASCIILower(*protocol) == *protocol);
if (toASCIILower(*begin++) != *protocol++)
return false;
}
// Both strings are equal (ignoring case) if and only if all of the characters
// were equal, and the end of both has been reached.
return begin == end && !*protocol;
}
template <typename CHAR>
bool checkIfProtocolIsInHTTPFamily(const url::Component& scheme,
const CHAR* spec) {
if (scheme.len == 4)
return internalProtocolIs(scheme, spec, "http");
if (scheme.len == 5)
return internalProtocolIs(scheme, spec, "https");
if (scheme.len == 7)
return internalProtocolIs(scheme, spec, "http-so");
if (scheme.len == 8)
return internalProtocolIs(scheme, spec, "https-so");
return false;
}
void KURL::initProtocolIsInHTTPFamily() {
if (!m_isValid) { if (!m_isValid) {
m_protocolIsInHTTPFamily = false; m_protocolIsInHTTPFamily = false;
m_protocol = componentString(m_parsed.scheme);
return; return;
} }
ASSERT(!m_string.isNull()); DCHECK(!m_string.isNull());
StringView protocol = componentStringView(m_parsed.scheme);
m_protocolIsInHTTPFamily = true;
if (protocol == WTF::httpsAtom) {
m_protocol = WTF::httpsAtom;
} else if (protocol == WTF::httpAtom) {
m_protocol = WTF::httpAtom;
} else {
m_protocol = AtomicString(protocol.toString());
m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily =
m_string.is8Bit() ? checkIfProtocolIsInHTTPFamily(m_parsed.scheme, m_protocol == "http-so" || m_protocol == "https-so";
m_string.characters8()) }
: checkIfProtocolIsInHTTPFamily( DCHECK_EQ(m_protocol, m_protocol.lower());
m_parsed.scheme, m_string.characters16());
} }
bool KURL::protocolIs(const char* protocol) const { bool KURL::protocolIs(const char* protocol) const {
...@@ -840,26 +824,16 @@ bool KURL::protocolIs(const char* protocol) const { ...@@ -840,26 +824,16 @@ bool KURL::protocolIs(const char* protocol) const {
// instead. // instead.
// FIXME: Chromium code needs to be fixed for this assert to be enabled. // FIXME: Chromium code needs to be fixed for this assert to be enabled.
// ASSERT(strcmp(protocol, "javascript")); // ASSERT(strcmp(protocol, "javascript"));
return m_protocol == protocol;
if (m_string.isNull() || m_parsed.scheme.len <= 0)
return *protocol == '\0';
return m_string.is8Bit()
? internalProtocolIs(m_parsed.scheme, m_string.characters8(),
protocol)
: internalProtocolIs(m_parsed.scheme, m_string.characters16(),
protocol);
} }
String KURL::stringForInvalidComponent() const { StringView KURL::stringViewForInvalidComponent() const {
if (m_string.isNull()) return m_string.isNull() ? StringView() : StringView("", 0);
return String();
return emptyString();
} }
String KURL::componentString(const url::Component& component) const { StringView KURL::componentStringView(const url::Component& component) const {
if (!m_isValid || component.len <= 0) if (!m_isValid || component.len <= 0)
return stringForInvalidComponent(); return stringViewForInvalidComponent();
// begin and len are in terms of bytes which do not match // begin and len are in terms of bytes which do not match
// if string() is UTF-16 and input contains non-ASCII characters. // if string() is UTF-16 and input contains non-ASCII characters.
// However, the only part in urlString that can contain non-ASCII // However, the only part in urlString that can contain non-ASCII
...@@ -868,7 +842,14 @@ String KURL::componentString(const url::Component& component) const { ...@@ -868,7 +842,14 @@ String KURL::componentString(const url::Component& component) const {
// byte) will be longer than what's needed by 'mid'. However, mid // byte) will be longer than what's needed by 'mid'. However, mid
// truncates len to avoid go past the end of a string so that we can // truncates len to avoid go past the end of a string so that we can
// get away without doing anything here. // get away without doing anything here.
return getString().substring(component.begin, component.len);
int maxLength = getString().length() - component.begin;
return StringView(getString(), component.begin,
component.len > maxLength ? maxLength : component.len);
}
String KURL::componentString(const url::Component& component) const {
return componentStringView(component).toString();
} }
template <typename CHAR> template <typename CHAR>
...@@ -882,6 +863,7 @@ void KURL::replaceComponents(const url::Replacements<CHAR>& replacements) { ...@@ -882,6 +863,7 @@ void KURL::replaceComponents(const url::Replacements<CHAR>& replacements) {
m_parsed = newParsed; m_parsed = newParsed;
m_string = AtomicString::fromUTF8(output.data(), output.length()); m_string = AtomicString::fromUTF8(output.data(), output.length());
initProtocolMetadata();
} }
bool KURL::isSafeToSendToAnotherThread() const { bool KURL::isSafeToSendToAnotherThread() const {
......
...@@ -196,17 +196,24 @@ class PLATFORM_EXPORT KURL { ...@@ -196,17 +196,24 @@ class PLATFORM_EXPORT KURL {
const String& relative, const String& relative,
const WTF::TextEncoding* queryEncoding); const WTF::TextEncoding* queryEncoding);
StringView componentStringView(const url::Component&) const;
String componentString(const url::Component&) const; String componentString(const url::Component&) const;
String stringForInvalidComponent() const; StringView stringViewForInvalidComponent() const;
template <typename CHAR> template <typename CHAR>
void replaceComponents(const url::Replacements<CHAR>&); void replaceComponents(const url::Replacements<CHAR>&);
void initInnerURL(); void initInnerURL();
void initProtocolIsInHTTPFamily(); void initProtocolMetadata();
bool m_isValid; bool m_isValid;
bool m_protocolIsInHTTPFamily; bool m_protocolIsInHTTPFamily;
// Keep a separate string for the protocol to avoid copious copies for
// protocol(). Normally this will be Atomic, except when constructed via
// KURL::copy(), which is deep.
String m_protocol;
url::Parsed m_parsed; url::Parsed m_parsed;
String m_string; String m_string;
std::unique_ptr<KURL> m_innerURL; std::unique_ptr<KURL> m_innerURL;
......
...@@ -367,46 +367,46 @@ TEST(KURLTest, Valid_HTTP_FTP_URLsHaveHosts) { ...@@ -367,46 +367,46 @@ TEST(KURLTest, Valid_HTTP_FTP_URLsHaveHosts) {
url::AddStandardScheme("http-so", url::SCHEME_WITH_PORT); url::AddStandardScheme("http-so", url::SCHEME_WITH_PORT);
url::AddStandardScheme("https-so", url::SCHEME_WITH_PORT); url::AddStandardScheme("https-so", url::SCHEME_WITH_PORT);
KURL kurl; KURL kurl(ParsedURLString, "foo://www.google.com/");
EXPECT_TRUE(kurl.setProtocol("http")); EXPECT_TRUE(kurl.setProtocol("http"));
EXPECT_TRUE(kurl.protocolIs("http")); EXPECT_TRUE(kurl.protocolIs("http"));
EXPECT_FALSE(kurl.isValid()); EXPECT_TRUE(kurl.protocolIsInHTTPFamily());
EXPECT_TRUE(kurl.isValid());
EXPECT_TRUE(kurl.setProtocol("http-so")); EXPECT_TRUE(kurl.setProtocol("http-so"));
EXPECT_TRUE(kurl.protocolIs("http-so")); EXPECT_TRUE(kurl.protocolIs("http-so"));
EXPECT_FALSE(kurl.isValid()); EXPECT_TRUE(kurl.isValid());
EXPECT_TRUE(kurl.setProtocol("https")); EXPECT_TRUE(kurl.setProtocol("https"));
EXPECT_TRUE(kurl.protocolIs("https")); EXPECT_TRUE(kurl.protocolIs("https"));
EXPECT_FALSE(kurl.isValid()); EXPECT_TRUE(kurl.isValid());
EXPECT_TRUE(kurl.setProtocol("https-so")); EXPECT_TRUE(kurl.setProtocol("https-so"));
EXPECT_TRUE(kurl.protocolIs("https-so")); EXPECT_TRUE(kurl.protocolIs("https-so"));
EXPECT_FALSE(kurl.isValid()); EXPECT_TRUE(kurl.isValid());
EXPECT_TRUE(kurl.setProtocol("ftp")); EXPECT_TRUE(kurl.setProtocol("ftp"));
EXPECT_TRUE(kurl.protocolIs("ftp")); EXPECT_TRUE(kurl.protocolIs("ftp"));
EXPECT_FALSE(kurl.isValid()); EXPECT_TRUE(kurl.isValid());
kurl = KURL(KURL(), "http://"); kurl = KURL(KURL(), "http://");
EXPECT_FALSE(kurl.protocolIs("http"));
kurl = KURL(KURL(), "http://wide#鸡");
EXPECT_TRUE(kurl.protocolIs("http")); EXPECT_TRUE(kurl.protocolIs("http"));
EXPECT_FALSE(kurl.isValid()); EXPECT_EQ(kurl.protocol(), "http");
kurl = KURL(KURL(), "http-so://"); kurl = KURL(KURL(), "http-so://foo");
EXPECT_TRUE(kurl.protocolIs("http-so")); EXPECT_TRUE(kurl.protocolIs("http-so"));
EXPECT_FALSE(kurl.isValid());
kurl = KURL(KURL(), "https://"); kurl = KURL(KURL(), "https://foo");
EXPECT_TRUE(kurl.protocolIs("https")); EXPECT_TRUE(kurl.protocolIs("https"));
EXPECT_FALSE(kurl.isValid());
kurl = KURL(KURL(), "https-so://"); kurl = KURL(KURL(), "https-so://foo");
EXPECT_TRUE(kurl.protocolIs("https-so")); EXPECT_TRUE(kurl.protocolIs("https-so"));
EXPECT_FALSE(kurl.isValid());
kurl = KURL(KURL(), "ftp://"); kurl = KURL(KURL(), "ftp://foo");
EXPECT_TRUE(kurl.protocolIs("ftp")); EXPECT_TRUE(kurl.protocolIs("ftp"));
EXPECT_FALSE(kurl.isValid());
kurl = KURL(KURL(), "http://host/"); kurl = KURL(KURL(), "http://host/");
EXPECT_TRUE(kurl.isValid()); EXPECT_TRUE(kurl.isValid());
...@@ -699,7 +699,6 @@ TEST(KURLTest, ProtocolIs) { ...@@ -699,7 +699,6 @@ TEST(KURLTest, ProtocolIs) {
KURL invalidUTF8(ParsedURLString, "http://a@9%aa%:"); KURL invalidUTF8(ParsedURLString, "http://a@9%aa%:");
EXPECT_FALSE(invalidUTF8.protocolIs("http")); EXPECT_FALSE(invalidUTF8.protocolIs("http"));
EXPECT_TRUE(invalidUTF8.protocolIs(""));
KURL capital(KURL(), "HTTP://www.example.text"); KURL capital(KURL(), "HTTP://www.example.text");
EXPECT_TRUE(capital.protocolIs("http")); EXPECT_TRUE(capital.protocolIs("http"));
......
...@@ -291,6 +291,8 @@ WTF_EXPORT extern const AtomicString& starAtom; ...@@ -291,6 +291,8 @@ WTF_EXPORT extern const AtomicString& starAtom;
WTF_EXPORT extern const AtomicString& xmlAtom; WTF_EXPORT extern const AtomicString& xmlAtom;
WTF_EXPORT extern const AtomicString& xmlnsAtom; WTF_EXPORT extern const AtomicString& xmlnsAtom;
WTF_EXPORT extern const AtomicString& xlinkAtom; WTF_EXPORT extern const AtomicString& xlinkAtom;
WTF_EXPORT extern const AtomicString& httpAtom;
WTF_EXPORT extern const AtomicString& httpsAtom;
// AtomicStringHash is the default hash for AtomicString // AtomicStringHash is the default hash for AtomicString
template <typename T> template <typename T>
......
...@@ -54,6 +54,8 @@ WTF_EXPORT DEFINE_GLOBAL(AtomicString, starAtom); ...@@ -54,6 +54,8 @@ WTF_EXPORT DEFINE_GLOBAL(AtomicString, starAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlAtom); WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlnsAtom); WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlnsAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xlinkAtom); WTF_EXPORT DEFINE_GLOBAL(AtomicString, xlinkAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, httpAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, httpsAtom);
// This is not an AtomicString because it is unlikely to be used as an // This is not an AtomicString because it is unlikely to be used as an
// event/element/attribute name, so it shouldn't pollute the AtomicString hash // event/element/attribute name, so it shouldn't pollute the AtomicString hash
...@@ -93,6 +95,8 @@ void StringStatics::init() { ...@@ -93,6 +95,8 @@ void StringStatics::init() {
new (NotNull, (void*)&xmlnsAtom) AtomicString(addStaticASCIILiteral("xmlns")); new (NotNull, (void*)&xmlnsAtom) AtomicString(addStaticASCIILiteral("xmlns"));
new (NotNull, (void*)&xlinkAtom) AtomicString(addStaticASCIILiteral("xlink")); new (NotNull, (void*)&xlinkAtom) AtomicString(addStaticASCIILiteral("xlink"));
new (NotNull, (void*)&xmlnsWithColon) String("xmlns:"); new (NotNull, (void*)&xmlnsWithColon) String("xmlns:");
new (NotNull, (void*)&httpAtom) AtomicString(addStaticASCIILiteral("http"));
new (NotNull, (void*)&httpsAtom) AtomicString(addStaticASCIILiteral("https"));
} }
} // namespace WTF } // namespace WTF
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment