Commit ff38f027 authored by stuartmorgan's avatar stuartmorgan Committed by Commit bot

Upstream ios/web/ HTML tokenizer

This upstreams the exising "third"-party HTML tokenizer used for
network-level injection, which is used to find the insertion point
for network JS injection.

BUG=464810

Review URL: https://codereview.chromium.org/1031023002

Cr-Commit-Position: refs/heads/master@{#322155}
parent 3e737f93
Copyright (C) 2008 Apple Inc. All Rights Reserved.
Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
Copyright (C) 2010 Google, Inc. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
*
THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
rohitrao@chromium.org
stuartmorgan@chromium.org
Name: blink HTMLTokenizer
Short Name: blink
URL: http://www.chromium.org/blink
Version: commit 2e924d852a814dee5efde8deb412a70a4d90c5c7
License: BSD License
Security Critical: yes
Description:
The HTMLTokenizer and associated classes from blink handle tokenizing HTML
content into runtime usable data structures.
Local Modifications:
The blink code was used as a starting point and heavily modified to remove
unnecessary code, dependancies on WTF, and dependencies on GPL code.
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
{
'targets': [
{
'target_name': 'blink_html_tokenizer',
'type': 'static_library',
'dependencies': [
'../../../base/base.gyp:base',
],
'include_dirs': [
'../../..',
],
'sources': [
'src/html_character_provider.h',
'src/html_input_stream_preprocessor.h',
'src/html_markup_tokenizer_inlines.h',
'src/html_token.h',
'src/html_token.mm',
'src/html_tokenizer.h',
'src/html_tokenizer.mm',
'src/html_tokenizer_adapter.h',
],
},
],
}
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef IOS_THIRD_PARTY_BLINK_SRC_HTML_CHARACTER_PROVIDER_H_
#define IOS_THIRD_PARTY_BLINK_SRC_HTML_CHARACTER_PROVIDER_H_
#include "ios/third_party/blink/src/html_tokenizer_adapter.h"
namespace WebCore {
const LChar kEndOfFileMarker = 0;
// CharacterProvider provides input characters to WebCore::HTMLTokenizer.
// It replaces WebCore::SegmentedString (which sits ontop of WTF::String).
class CharacterProvider {
WTF_MAKE_NONCOPYABLE(CharacterProvider);
public:
CharacterProvider()
: _totalBytes(0)
, _remainingBytes(0)
, _singleBytePtr(nullptr)
, _doubleBytePtr(nullptr)
, _littleEndian(false)
{
}
void setContents(const LChar* str, size_t numberOfBytes)
{
_totalBytes = numberOfBytes;
_remainingBytes = numberOfBytes;
_singleBytePtr = str;
_doubleBytePtr = nullptr;
_littleEndian = false;
}
void setContents(const UChar* str, size_t numberOfBytes)
{
_totalBytes = numberOfBytes;
_remainingBytes = numberOfBytes;
_singleBytePtr = nullptr;
_doubleBytePtr = str;
_littleEndian = false;
}
void clear()
{
_totalBytes = 0;
_remainingBytes = 0;
_singleBytePtr = nullptr;
_doubleBytePtr = nullptr;
_littleEndian = false;
}
bool startsWith(const LChar* str,
size_t byteCount,
bool caseInsensitive = false) const
{
if (!str || byteCount > _remainingBytes)
return false;
for (size_t index = 0; index < byteCount; ++index) {
UChar lhs = characterAtIndex(index);
UChar rhs = str[index];
if (caseInsensitive) {
if (isASCIIUpper(lhs))
lhs = toLowerCase(lhs);
if (isASCIIUpper(rhs))
rhs = toLowerCase(rhs);
}
if (lhs != rhs)
return false;
}
return true;
}
inline UChar currentCharacter() const
{
return characterAtIndex(0);
}
inline UChar nextCharacter()
{
advanceBytePointer();
return characterAtIndex(0);
}
inline void next()
{
advanceBytePointer();
}
inline bool isEmpty() const
{
return !_remainingBytes;
}
inline size_t remainingBytes() const
{
return _remainingBytes;
}
inline size_t bytesProvided() const
{
return _totalBytes - _remainingBytes;
}
inline void setLittleEndian()
{
_littleEndian = true;
}
private:
void advanceBytePointer()
{
--_remainingBytes;
if (!_remainingBytes)
return;
if (_singleBytePtr)
++_singleBytePtr;
else {
DCHECK(_doubleBytePtr);
++_doubleBytePtr;
}
}
UChar characterAtIndex(size_t index) const
{
if (!_remainingBytes) {
// There is a quirk in the blink implementation wherein the empty state
// is not set on the source until next() has been called when
// _remainingBytes is zero. In this case, return kEndOfFileMarker.
return kEndOfFileMarker;
}
ASSERT(_singleBytePtr || _doubleBytePtr);
UChar character = kEndOfFileMarker;
if (_singleBytePtr)
character = _singleBytePtr[index];
else
character = _doubleBytePtr[index];
if (_littleEndian)
character = ByteSwap(character);
return character;
}
private:
size_t _totalBytes;
size_t _remainingBytes;
const LChar* _singleBytePtr;
const UChar* _doubleBytePtr;
bool _littleEndian;
};
}
#endif // IOS_THIRD_PARTY_BLINK_SRC_HTML_CHARACTER_PROVIDER_H_
/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef InputStreamPreprocessor_h
#define InputStreamPreprocessor_h
#include "html_character_provider.h"
namespace WebCore {
// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
template <typename Tokenizer>
class InputStreamPreprocessor {
WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
public:
InputStreamPreprocessor(Tokenizer* tokenizer)
: m_tokenizer(tokenizer)
{
reset();
}
ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
// Returns whether we succeeded in peeking at the next character.
// The only way we can fail to peek is if there are no more
// characters in |source| (after collapsing \r\n, etc).
ALWAYS_INLINE bool peek(CharacterProvider& source)
{
m_nextInputCharacter = source.currentCharacter();
// Every branch in this function is expensive, so we have a
// fast-reject branch for characters that don't require special
// handling. Please run the parser benchmark whenever you touch
// this function. It's very hot.
static const UChar specialCharacterMask = '\n' | '\r' | '\0';
if (m_nextInputCharacter & ~specialCharacterMask) {
m_skipNextNewLine = false;
return true;
}
return processNextInputCharacter(source);
}
// Returns whether there are more characters in |source| after advancing.
ALWAYS_INLINE bool advance(CharacterProvider& source)
{
source.next();
if (source.isEmpty())
return false;
return peek(source);
}
void reset(bool skipNextNewLine = false)
{
m_nextInputCharacter = '\0';
m_skipNextNewLine = skipNextNewLine;
}
private:
bool processNextInputCharacter(CharacterProvider& source)
{
ProcessAgain:
ASSERT(m_nextInputCharacter == source.currentCharacter());
if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
m_skipNextNewLine = false;
source.next();
if (source.isEmpty())
return false;
m_nextInputCharacter = source.currentCharacter();
}
if (m_nextInputCharacter == '\r') {
m_nextInputCharacter = '\n';
m_skipNextNewLine = true;
} else {
m_skipNextNewLine = false;
// FIXME: The spec indicates that the surrogate pair range as well as
// a number of specific character values are parse errors and should be replaced
// by the replacement character. We suspect this is a problem with the spec as doing
// that filtering breaks surrogate pair handling and causes us not to match Minefield.
if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
if (m_tokenizer->shouldSkipNullCharacters()) {
source.next();
if (source.isEmpty())
return false;
m_nextInputCharacter = source.currentCharacter();
goto ProcessAgain;
}
m_nextInputCharacter = 0xFFFD;
}
}
return true;
}
bool shouldTreatNullAsEndOfFileMarker(CharacterProvider& source) const
{
return source.remainingBytes() == 1;
}
Tokenizer* m_tokenizer;
// http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
UChar m_nextInputCharacter;
bool m_skipNextNewLine;
};
}
#endif // InputStreamPreprocessor_h
/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MarkupTokenizerInlines_h
#define MarkupTokenizerInlines_h
#include "html_character_provider.h"
namespace WebCore {
template <typename CharType>
inline bool isTokenizerWhitespace(CharType cc)
{
return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C';
}
void advanceStringAndASSERTIgnoringCase(CharacterProvider& source, const LChar* expectedCharacters)
{
while (*expectedCharacters) {
#ifndef NDEBUG
ASSERT(isASCIILower(*expectedCharacters));
UChar currentCharacter = source.currentCharacter();
if (isASCIIUpper(currentCharacter))
currentCharacter = toLowerCase(currentCharacter);
ASSERT(currentCharacter == *expectedCharacters);
#endif
++expectedCharacters;
source.next();
}
}
inline void advanceAndASSERT(CharacterProvider& source, UChar expectedCharacter)
{
ASSERT(source.currentCharacter() == expectedCharacter);
source.next();
}
#define BEGIN_STATE(prefix, stateName) case prefix::stateName: stateName:
#define END_STATE() ASSERT_NOT_REACHED(); break;
// We use this macro when the HTML5 spec says "reconsume the current input
// character in the <mumble> state."
#define RECONSUME_IN(prefix, stateName) \
do { \
m_state = prefix::stateName; \
goto stateName; \
} while (false)
// We use this macro when the HTML5 spec says "consume the next input
// character ... and switch to the <mumble> state."
#define ADVANCE_TO(prefix, stateName) \
do { \
m_state = prefix::stateName; \
if (!m_inputStreamPreprocessor.advance(source)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
// Sometimes there's more complicated logic in the spec that separates when
// we consume the next input character and when we switch to a particular
// state. We handle those cases by advancing the source directly and using
// this macro to switch to the indicated state.
#define SWITCH_TO(prefix, stateName) \
do { \
m_state = prefix::stateName; \
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
}
#endif // MarkupTokenizerInlines_h
/*
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTMLToken_h
#define HTMLToken_h
#include <vector>
#include "ios/third_party/blink/src/html_tokenizer_adapter.h"
namespace WebCore {
class HTMLToken {
WTF_MAKE_NONCOPYABLE(HTMLToken);
public:
enum Type {
Uninitialized,
DOCTYPE,
StartTag,
EndTag,
Comment,
Character,
EndOfFile,
};
HTMLToken();
~HTMLToken();
void clear()
{
m_type = Uninitialized;
m_data.clear();
}
Type type() const { return m_type; }
void makeEndOfFile()
{
ASSERT(m_type == Uninitialized);
m_type = EndOfFile;
}
void appendToName(LChar character)
{
ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
ASSERT(character);
m_data.push_back(character);
}
bool nameEquals(const LChar* name, size_t length)
{
ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
if (length != m_data.size())
return false;
for (size_t index = 0; index < length; ++index) {
if (m_data.at(index) != name[index])
return false;
}
return true;
}
/* DOCTYPE Tokens */
void beginDOCTYPE()
{
ASSERT(m_type == Uninitialized);
m_type = DOCTYPE;
}
/* Start/End Tag Tokens */
void beginStartTag(LChar character)
{
ASSERT(character);
ASSERT(m_type == Uninitialized);
m_type = StartTag;
m_data.push_back(character);
}
void beginEndTag(LChar character)
{
ASSERT(m_type == Uninitialized);
m_type = EndTag;
m_data.push_back(character);
}
/* Character Tokens */
// Starting a character token works slightly differently than starting
// other types of tokens because we want to save a per-character branch.
void ensureIsCharacterToken()
{
ASSERT(m_type == Uninitialized || m_type == Character);
m_type = Character;
}
/* Comment Tokens */
void beginComment()
{
ASSERT(m_type == Uninitialized);
m_type = Comment;
}
private:
Type m_type;
std::vector<LChar> m_data;
};
}
#endif
/*
* Copyright (C) 2014 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ios/third_party/blink/src/html_token.h"
namespace {
size_t kDefaultDataSize = 256;
}
namespace WebCore {
HTMLToken::HTMLToken()
: m_type(Uninitialized)
{
m_data.reserve(kDefaultDataSize);
}
HTMLToken::~HTMLToken()
{
}
}
/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTMLTokenizer_h
#define HTMLTokenizer_h
#include "ios/third_party/blink/src/html_input_stream_preprocessor.h"
#include "ios/third_party/blink/src/html_token.h"
namespace WebCore {
class HTMLTokenizer {
WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
public:
HTMLTokenizer();
~HTMLTokenizer();
void reset();
enum State {
DataState,
TagOpenState,
EndTagOpenState,
TagNameState,
BeforeAttributeNameState,
AttributeNameState,
AfterAttributeNameState,
BeforeAttributeValueState,
AttributeValueDoubleQuotedState,
AttributeValueSingleQuotedState,
AttributeValueUnquotedState,
AfterAttributeValueQuotedState,
SelfClosingStartTagState,
BogusCommentState,
// The ContinueBogusCommentState is not in the HTML5 spec, but we use
// it internally to keep track of whether we've started the bogus
// comment token yet.
ContinueBogusCommentState,
MarkupDeclarationOpenState,
CommentStartState,
CommentStartDashState,
CommentState,
CommentEndDashState,
CommentEndState,
CommentEndBangState,
DOCTYPEState,
BeforeDOCTYPENameState,
DOCTYPENameState,
AfterDOCTYPENameState,
AfterDOCTYPEPublicKeywordState,
BeforeDOCTYPEPublicIdentifierState,
DOCTYPEPublicIdentifierDoubleQuotedState,
DOCTYPEPublicIdentifierSingleQuotedState,
AfterDOCTYPEPublicIdentifierState,
BetweenDOCTYPEPublicAndSystemIdentifiersState,
AfterDOCTYPESystemKeywordState,
BeforeDOCTYPESystemIdentifierState,
DOCTYPESystemIdentifierDoubleQuotedState,
DOCTYPESystemIdentifierSingleQuotedState,
AfterDOCTYPESystemIdentifierState,
BogusDOCTYPEState,
CDATASectionState,
// These CDATA states are not in the HTML5 spec, but we use them internally.
CDATASectionRightSquareBracketState,
CDATASectionDoubleRightSquareBracketState,
};
// This function returns true if it emits a token. Otherwise, callers
// must provide the same (in progress) token on the next call (unless
// they call reset() first).
bool nextToken(CharacterProvider&, HTMLToken&);
State state() const { return m_state; }
void setState(State state) { m_state = state; }
inline bool shouldSkipNullCharacters() const
{
return m_state == HTMLTokenizer::DataState;
}
private:
inline void parseError();
inline bool emitAndResumeIn(CharacterProvider& source, State state)
{
ASSERT(m_token->type() != HTMLToken::Uninitialized);
m_state = state;
source.next();
return true;
}
inline bool emitAndReconsumeIn(CharacterProvider&, State state)
{
ASSERT(m_token->type() != HTMLToken::Uninitialized);
m_state = state;
return true;
}
inline bool emitEndOfFile(CharacterProvider& source)
{
if (haveBufferedCharacterToken())
return true;
m_state = HTMLTokenizer::DataState;
source.next();
m_token->clear();
m_token->makeEndOfFile();
return true;
}
// Return whether we need to emit a character token before dealing with
// the buffered end tag.
inline bool flushBufferedEndTag(CharacterProvider&);
inline bool haveBufferedCharacterToken()
{
return m_token->type() == HTMLToken::Character;
}
State m_state;
// m_token is owned by the caller. If nextToken is not on the stack,
// this member might be pointing to unallocated memory.
HTMLToken* m_token;
// http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
LChar m_additionalAllowedCharacter;
// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
};
}
#endif
This diff is collapsed.
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef IOS_THIRD_PARTY_BLINK_SRC_TOKENIZER_ADAPTER_H_
#define IOS_THIRD_PARTY_BLINK_SRC_TOKENIZER_ADAPTER_H_
#include "base/basictypes.h"
#include "base/logging.h"
#define ALWAYS_INLINE inline __attribute__((always_inline))
#define DEFINE_STATIC_LOCAL_STRING(name, arguments) \
static const WebCore::LChar* name = (const WebCore::LChar*)arguments; \
static const size_t name##Length = (arraysize(arguments) - 1); \
DCHECK(name##Length == strlen((const char*)name))
#define WTF_MAKE_NONCOPYABLE(x) DISALLOW_COPY_AND_ASSIGN(x)
#define ASSERT(x) DCHECK(x)
#define ASSERT_NOT_REACHED NOTREACHED
#define notImplemented()
namespace WebCore {
typedef uint16 UChar;
typedef uint8 LChar;
template <typename CharType>
inline bool isASCIIUpper(CharType c) {
return c >= 'A' && c <= 'Z';
}
template <typename CharType>
inline bool isASCIILower(CharType c) {
return c >= 'a' && c <= 'z';
}
template <typename CharType>
inline CharType toLowerCase(CharType c) {
ASSERT(isASCIIUpper(c));
const int lowerCaseOffset = 0x20;
return c + lowerCaseOffset;
}
inline UChar ByteSwap(UChar c) {
return ((c & 0x00ff) << 8) | ((c & 0xff00) >> 8);
}
}
#endif // IOS_THIRD_PARTY_BLINK_SRC_TOKENIZER_ADAPTER_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment