Commit c1a02303 authored by timloh@chromium.org's avatar timloh@chromium.org

CSS Tokenizer: Fix handling of escaped newlines

This patch fixes our handling of escaped newlines in the css-syntax
tokenizer. Since we don't perform preprocessing, the logic is slightly
trickier than in the spec. The preprocessing replaces \r, \r\n, \f with
\n.

Regarding escaped newlines in strings, the spec states:
"Otherwise, if the next input code point is a newline, consume it."
In this case, we may need to consume two tokens if we have \r\n.

The other cases don't require special handling, since \r\n starts with
\r (also a newline) and in these cases the following token is just going
to be a whitespace token.

BUG=424988

Review URL: https://codereview.chromium.org/656033010

git-svn-id: svn://svn.chromium.org/blink/trunk@184347 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent f4cefe72
...@@ -31,10 +31,16 @@ static bool isNameChar(UChar c) ...@@ -31,10 +31,16 @@ static bool isNameChar(UChar c)
return isNameStart(c) || isASCIIDigit(c) || c == '-'; return isNameStart(c) || isASCIIDigit(c) || c == '-';
} }
static bool isNewLine(UChar cc)
{
// We check \r and \f here, since we have no preprocessing stage
return (cc == '\r' || cc == '\n' || cc == '\f');
}
// http://dev.w3.org/csswg/css-syntax/#check-if-two-code-points-are-a-valid-escape // http://dev.w3.org/csswg/css-syntax/#check-if-two-code-points-are-a-valid-escape
static bool twoCharsAreValidEscape(UChar first, UChar second) static bool twoCharsAreValidEscape(UChar first, UChar second)
{ {
return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)); return first == '\\' && !isNewLine(second) && second != kEndOfFileMarker;
} }
CSSTokenizer::CSSTokenizer(CSSTokenizerInputStream& inputStream) CSSTokenizer::CSSTokenizer(CSSTokenizerInputStream& inputStream)
...@@ -353,12 +359,6 @@ CSSParserToken CSSTokenizer::consumeIdentLikeToken() ...@@ -353,12 +359,6 @@ CSSParserToken CSSTokenizer::consumeIdentLikeToken()
return CSSParserToken(IdentToken, name); return CSSParserToken(IdentToken, name);
} }
static bool isNewLine(UChar cc)
{
// We check \r and \f here, since we have no preprocessing stage
return (cc == '\r' || cc == '\n' || cc == '\f');
}
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token // http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
{ {
...@@ -379,7 +379,7 @@ CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) ...@@ -379,7 +379,7 @@ CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
if (m_input.nextInputChar() == kEndOfFileMarker) if (m_input.nextInputChar() == kEndOfFileMarker)
continue; continue;
if (isNewLine(m_input.nextInputChar())) if (isNewLine(m_input.nextInputChar()))
consume(); consumeSingleWhitespaceIfNext(); // This handles \r\n for us
else else
output.append(consumeEscape()); output.append(consumeEscape());
} else { } else {
...@@ -456,7 +456,7 @@ String CSSTokenizer::consumeName() ...@@ -456,7 +456,7 @@ String CSSTokenizer::consumeName()
UChar CSSTokenizer::consumeEscape() UChar CSSTokenizer::consumeEscape()
{ {
UChar cc = consume(); UChar cc = consume();
ASSERT(cc != '\n'); ASSERT(!isNewLine(cc));
if (isASCIIHexDigit(cc)) { if (isASCIIHexDigit(cc)) {
unsigned consumedHexDigits = 1; unsigned consumedHexDigits = 1;
StringBuilder hexChars; StringBuilder hexChars;
......
...@@ -157,6 +157,9 @@ TEST(CSSTokenizerTest, Escapes) ...@@ -157,6 +157,9 @@ TEST(CSSTokenizerTest, Escapes)
TEST_TOKENS("te\\s\\t", ident("test")); TEST_TOKENS("te\\s\\t", ident("test"));
TEST_TOKENS("spaces\\ in\\\tident", ident("spaces in\tident")); TEST_TOKENS("spaces\\ in\\\tident", ident("spaces in\tident"));
TEST_TOKENS("\\.\\,\\:\\!", ident(".,:!")); TEST_TOKENS("\\.\\,\\:\\!", ident(".,:!"));
TEST_TOKENS("\\\r", delim('\\'), whitespace);
TEST_TOKENS("\\\f", delim('\\'), whitespace);
TEST_TOKENS("\\\r\n", delim('\\'), whitespace);
// FIXME: We don't correctly return replacement characters // FIXME: We don't correctly return replacement characters
// String replacement = fromUChar32(0xFFFD); // String replacement = fromUChar32(0xFFFD);
// TEST_TOKENS("null\\0", ident("null" + replacement)); // TEST_TOKENS("null\\0", ident("null" + replacement));
...@@ -168,10 +171,6 @@ TEST(CSSTokenizerTest, Escapes) ...@@ -168,10 +171,6 @@ TEST(CSSTokenizerTest, Escapes)
// FIXME: We don't correctly return supplementary plane characters // FIXME: We don't correctly return supplementary plane characters
// TEST_TOKENS("\\10fFfF", ident(fromUChar32(0x10ffff) + "0")); // TEST_TOKENS("\\10fFfF", ident(fromUChar32(0x10ffff) + "0"));
// TEST_TOKENS("\\10000000", ident(fromUChar32(0x100000) + "000")); // TEST_TOKENS("\\10000000", ident(fromUChar32(0x100000) + "000"));
// FIXME: We don't correctly match newlines (normally handled in preprocessing)
// TEST_TOKENS("\\\r", delim('\\'), whitespace);
// TEST_TOKENS("\\\f", delim('\\'), whitespace);
// TEST_TOKENS("\\\r\n", delim('\\'), whitespace);
} }
TEST(CSSTokenizerTest, IdentToken) TEST(CSSTokenizerTest, IdentToken)
...@@ -218,14 +217,13 @@ TEST(CSSTokenizerTest, StringToken) ...@@ -218,14 +217,13 @@ TEST(CSSTokenizerTest, StringToken)
TEST_TOKENS("'esca\\\nped'", string("escaped")); TEST_TOKENS("'esca\\\nped'", string("escaped"));
TEST_TOKENS("\"esc\\\faped\"", string("escaped")); TEST_TOKENS("\"esc\\\faped\"", string("escaped"));
TEST_TOKENS("'new\\\rline'", string("newline")); TEST_TOKENS("'new\\\rline'", string("newline"));
TEST_TOKENS("\"new\\\r\nline\"", string("newline"));
TEST_TOKENS("'bad\nstring", badString, whitespace, ident("string")); TEST_TOKENS("'bad\nstring", badString, whitespace, ident("string"));
TEST_TOKENS("'bad\rstring", badString, whitespace, ident("string")); TEST_TOKENS("'bad\rstring", badString, whitespace, ident("string"));
TEST_TOKENS("'bad\r\nstring", badString, whitespace, ident("string")); TEST_TOKENS("'bad\r\nstring", badString, whitespace, ident("string"));
TEST_TOKENS("'bad\fstring", badString, whitespace, ident("string")); TEST_TOKENS("'bad\fstring", badString, whitespace, ident("string"));
// FIXME: Preprocessing is supposed to replace U+0000 with U+FFFD // FIXME: Preprocessing is supposed to replace U+0000 with U+FFFD
// TEST_TOKENS("'\0'", string(fromUChar32(0xFFFD))); // TEST_TOKENS("'\0'", string(fromUChar32(0xFFFD)));
// FIXME: We don't correctly match newlines (normally handled in preprocessing)
// TEST_TOKENS("\"new\\\r\nline\"", string("newline"));
} }
TEST(CSSTokenizerTest, NumberToken) TEST(CSSTokenizerTest, NumberToken)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment