Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: Source/core/css/parser/CSSTokenizer.cpp

Issue 962093002: CSS Tokenizer: Add an on-stack tokenizer scope (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/core/css/parser/CSSTokenizer.h ('k') | Source/core/css/parser/CSSTokenizerTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "config.h" 5 #include "config.h"
6 #include "core/css/parser/CSSTokenizer.h" 6 #include "core/css/parser/CSSTokenizer.h"
7 7
8 namespace blink { 8 namespace blink {
9 #include "core/CSSTokenizerCodepoints.cpp" 9 #include "core/CSSTokenizerCodepoints.cpp"
10 } 10 }
11 11
12 #include "core/css/parser/CSSParserTokenRange.h"
12 #include "core/css/parser/CSSTokenizerInputStream.h" 13 #include "core/css/parser/CSSTokenizerInputStream.h"
13 #include "core/html/parser/HTMLParserIdioms.h" 14 #include "core/html/parser/HTMLParserIdioms.h"
14 #include "wtf/unicode/CharacterNames.h" 15 #include "wtf/unicode/CharacterNames.h"
15 16
16 namespace blink { 17 namespace blink {
17 18
19 CSSTokenizer::Scope::Scope(const String& string)
20 {
21 // According to the spec, we should perform preprocessing here.
22 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
23 //
24 // However, we can skip this step since:
25 // * We're using HTML spaces (which accept \r and \f as a valid white space)
26 // * Do not count white spaces
27 // * consumeEscape replaces NULLs for replacement characters
28
29 if (string.isEmpty())
30 return;
31
32 // To avoid resizing we err on the side of reserving too much space.
33 // Most strings we tokenize have about 3.5 to 5 characters per token.
34 m_tokens.reserveInitialCapacity(string.length() / 3);
35
36 CSSTokenizerInputStream input(string);
37 CSSTokenizer tokenizer(input);
38 while (true) {
39 CSSParserToken token = tokenizer.nextToken();
40 if (token.type() == EOFToken)
41 return;
42 m_tokens.append(token);
43 }
44 }
45
46 CSSParserTokenRange CSSTokenizer::Scope::tokenRange()
47 {
48 return m_tokens;
49 }
50
18 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point 51 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
19 static bool isNameStart(UChar c) 52 static bool isNameStart(UChar c)
20 { 53 {
21 if (isASCIIAlpha(c)) 54 if (isASCIIAlpha(c))
22 return true; 55 return true;
23 if (c == '_') 56 if (c == '_')
24 return true; 57 return true;
25 return !isASCII(c); 58 return !isASCII(c);
26 } 59 }
27 60
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 CSSParserToken CSSTokenizer::stringStart(UChar cc) 321 CSSParserToken CSSTokenizer::stringStart(UChar cc)
289 { 322 {
290 return consumeStringTokenUntil(cc); 323 return consumeStringTokenUntil(cc);
291 } 324 }
292 325
293 CSSParserToken CSSTokenizer::endOfFile(UChar cc) 326 CSSParserToken CSSTokenizer::endOfFile(UChar cc)
294 { 327 {
295 return CSSParserToken(EOFToken); 328 return CSSParserToken(EOFToken);
296 } 329 }
297 330
298 void CSSTokenizer::tokenize(String string, Vector<CSSParserToken>& outTokens)
299 {
300 // According to the spec, we should perform preprocessing here.
301 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
302 //
303 // However, we can skip this step since:
304 // * We're using HTML spaces (which accept \r and \f as a valid white space)
305 // * Do not count white spaces
306 // * consumeEscape replaces NULLs for replacement characters
307
308 if (string.isEmpty())
309 return;
310
311 // To avoid resizing we err on the side of reserving too much space.
312 // Most strings we tokenize have about 3.5 to 5 characters per token.
313 outTokens.reserveInitialCapacity(string.length() / 3);
314
315 CSSTokenizerInputStream input(string);
316 CSSTokenizer tokenizer(input);
317 while (true) {
318 CSSParserToken token = tokenizer.nextToken();
319 if (token.type() == EOFToken)
320 return;
321 outTokens.append(token);
322 }
323 }
324
325 CSSParserToken CSSTokenizer::nextToken() 331 CSSParserToken CSSTokenizer::nextToken()
326 { 332 {
327 // Unlike the HTMLTokenizer, the CSS Syntax spec is written 333 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
328 // as a stateless, (fixed-size) look-ahead tokenizer. 334 // as a stateless, (fixed-size) look-ahead tokenizer.
329 // We could move to the stateful model and instead create 335 // We could move to the stateful model and instead create
330 // states for all the "next 3 codepoints are X" cases. 336 // states for all the "next 3 codepoints are X" cases.
331 // State-machine tokenizers are easier to write to handle 337 // State-machine tokenizers are easier to write to handle
332 // incremental tokenization of partial sources. 338 // incremental tokenization of partial sources.
333 // However, for now we follow the spec exactly. 339 // However, for now we follow the spec exactly.
334 UChar cc = consume(); 340 UChar cc = consume();
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after
712 718
713 bool CSSTokenizer::nextCharsAreIdentifier() 719 bool CSSTokenizer::nextCharsAreIdentifier()
714 { 720 {
715 UChar first = consume(); 721 UChar first = consume();
716 bool areIdentifier = nextCharsAreIdentifier(first); 722 bool areIdentifier = nextCharsAreIdentifier(first);
717 reconsume(first); 723 reconsume(first);
718 return areIdentifier; 724 return areIdentifier;
719 } 725 }
720 726
721 } // namespace blink 727 } // namespace blink
OLDNEW
« no previous file with comments | « Source/core/css/parser/CSSTokenizer.h ('k') | Source/core/css/parser/CSSTokenizerTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698