OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "config.h" | 5 #include "config.h" |
6 #include "core/css/parser/CSSTokenizer.h" | 6 #include "core/css/parser/CSSTokenizer.h" |
7 | 7 |
8 namespace blink { | 8 namespace blink { |
9 #include "core/CSSTokenizerCodepoints.cpp" | 9 #include "core/CSSTokenizerCodepoints.cpp" |
10 } | 10 } |
11 | 11 |
| 12 #include "core/css/parser/CSSParserTokenRange.h" |
12 #include "core/css/parser/CSSTokenizerInputStream.h" | 13 #include "core/css/parser/CSSTokenizerInputStream.h" |
13 #include "core/html/parser/HTMLParserIdioms.h" | 14 #include "core/html/parser/HTMLParserIdioms.h" |
14 #include "wtf/unicode/CharacterNames.h" | 15 #include "wtf/unicode/CharacterNames.h" |
15 | 16 |
16 namespace blink { | 17 namespace blink { |
17 | 18 |
| 19 CSSTokenizer::Scope::Scope(const String& string) |
| 20 { |
| 21 // According to the spec, we should perform preprocessing here. |
| 22 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing |
| 23 // |
| 24 // However, we can skip this step since: |
| 25 // * We're using HTML spaces (which accept \r and \f as a valid white space) |
| 26 // * Do not count white spaces |
| 27 // * consumeEscape replaces NULLs for replacement characters |
| 28 |
| 29 if (string.isEmpty()) |
| 30 return; |
| 31 |
| 32 // To avoid resizing we err on the side of reserving too much space. |
| 33 // Most strings we tokenize have about 3.5 to 5 characters per token. |
| 34 m_tokens.reserveInitialCapacity(string.length() / 3); |
| 35 |
| 36 CSSTokenizerInputStream input(string); |
| 37 CSSTokenizer tokenizer(input); |
| 38 while (true) { |
| 39 CSSParserToken token = tokenizer.nextToken(); |
| 40 if (token.type() == EOFToken) |
| 41 return; |
| 42 m_tokens.append(token); |
| 43 } |
| 44 } |
| 45 |
| 46 CSSParserTokenRange CSSTokenizer::Scope::tokenRange() |
| 47 { |
| 48 return m_tokens; |
| 49 } |
| 50 |
18 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point | 51 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point |
19 static bool isNameStart(UChar c) | 52 static bool isNameStart(UChar c) |
20 { | 53 { |
21 if (isASCIIAlpha(c)) | 54 if (isASCIIAlpha(c)) |
22 return true; | 55 return true; |
23 if (c == '_') | 56 if (c == '_') |
24 return true; | 57 return true; |
25 return !isASCII(c); | 58 return !isASCII(c); |
26 } | 59 } |
27 | 60 |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 CSSParserToken CSSTokenizer::stringStart(UChar cc) | 321 CSSParserToken CSSTokenizer::stringStart(UChar cc) |
289 { | 322 { |
290 return consumeStringTokenUntil(cc); | 323 return consumeStringTokenUntil(cc); |
291 } | 324 } |
292 | 325 |
293 CSSParserToken CSSTokenizer::endOfFile(UChar cc) | 326 CSSParserToken CSSTokenizer::endOfFile(UChar cc) |
294 { | 327 { |
295 return CSSParserToken(EOFToken); | 328 return CSSParserToken(EOFToken); |
296 } | 329 } |
297 | 330 |
298 void CSSTokenizer::tokenize(String string, Vector<CSSParserToken>& outTokens) | |
299 { | |
300 // According to the spec, we should perform preprocessing here. | |
301 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing | |
302 // | |
303 // However, we can skip this step since: | |
304 // * We're using HTML spaces (which accept \r and \f as a valid white space) | |
305 // * Do not count white spaces | |
306 // * consumeEscape replaces NULLs for replacement characters | |
307 | |
308 if (string.isEmpty()) | |
309 return; | |
310 | |
311 // To avoid resizing we err on the side of reserving too much space. | |
312 // Most strings we tokenize have about 3.5 to 5 characters per token. | |
313 outTokens.reserveInitialCapacity(string.length() / 3); | |
314 | |
315 CSSTokenizerInputStream input(string); | |
316 CSSTokenizer tokenizer(input); | |
317 while (true) { | |
318 CSSParserToken token = tokenizer.nextToken(); | |
319 if (token.type() == EOFToken) | |
320 return; | |
321 outTokens.append(token); | |
322 } | |
323 } | |
324 | |
325 CSSParserToken CSSTokenizer::nextToken() | 331 CSSParserToken CSSTokenizer::nextToken() |
326 { | 332 { |
327 // Unlike the HTMLTokenizer, the CSS Syntax spec is written | 333 // Unlike the HTMLTokenizer, the CSS Syntax spec is written |
328 // as a stateless, (fixed-size) look-ahead tokenizer. | 334 // as a stateless, (fixed-size) look-ahead tokenizer. |
329 // We could move to the stateful model and instead create | 335 // We could move to the stateful model and instead create |
330 // states for all the "next 3 codepoints are X" cases. | 336 // states for all the "next 3 codepoints are X" cases. |
331 // State-machine tokenizers are easier to write to handle | 337 // State-machine tokenizers are easier to write to handle |
332 // incremental tokenization of partial sources. | 338 // incremental tokenization of partial sources. |
333 // However, for now we follow the spec exactly. | 339 // However, for now we follow the spec exactly. |
334 UChar cc = consume(); | 340 UChar cc = consume(); |
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
712 | 718 |
713 bool CSSTokenizer::nextCharsAreIdentifier() | 719 bool CSSTokenizer::nextCharsAreIdentifier() |
714 { | 720 { |
715 UChar first = consume(); | 721 UChar first = consume(); |
716 bool areIdentifier = nextCharsAreIdentifier(first); | 722 bool areIdentifier = nextCharsAreIdentifier(first); |
717 reconsume(first); | 723 reconsume(first); |
718 return areIdentifier; | 724 return areIdentifier; |
719 } | 725 } |
720 | 726 |
721 } // namespace blink | 727 } // namespace blink |
OLD | NEW |