Source/core/css/parser/CSSTokenizer.cpp - Issue 962093002: CSS Tokenizer: Add an on-stack tokenizer scope

Side by Side Diff: Source/core/css/parser/CSSTokenizer.cpp

Issue 962093002: CSS Tokenizer: Add an on-stack tokenizer scope (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "config.h"	5 #include "config.h"

6 #include "core/css/parser/CSSTokenizer.h"	6 #include "core/css/parser/CSSTokenizer.h"

7	7

8 namespace blink {	8 namespace blink {

9 #include "core/CSSTokenizerCodepoints.cpp"	9 #include "core/CSSTokenizerCodepoints.cpp"

10 }	10 }

11	11

	12 #include "core/css/parser/CSSParserTokenRange.h"

12 #include "core/css/parser/CSSTokenizerInputStream.h"	13 #include "core/css/parser/CSSTokenizerInputStream.h"

13 #include "core/html/parser/HTMLParserIdioms.h"	14 #include "core/html/parser/HTMLParserIdioms.h"

14 #include "wtf/unicode/CharacterNames.h"	15 #include "wtf/unicode/CharacterNames.h"

15	16

16 namespace blink {	17 namespace blink {

17	18

	19 CSSTokenizer::Scope::Scope(const String& string)

	20 {

	21 // According to the spec, we should perform preprocessing here.

	22 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing

	23 //

	24 // However, we can skip this step since:

	25 // * We're using HTML spaces (which accept \r and \f as a valid white space)

	26 // * Do not count white spaces

	27 // * consumeEscape replaces NULLs for replacement characters

	28

	29 if (string.isEmpty())

	30 return;

	31

	32 // To avoid resizing we err on the side of reserving too much space.

	33 // Most strings we tokenize have about 3.5 to 5 characters per token.

	34 m_tokens.reserveInitialCapacity(string.length() / 3);

	35

	36 CSSTokenizerInputStream input(string);

	37 CSSTokenizer tokenizer(input);

	38 while (true) {

	39 CSSParserToken token = tokenizer.nextToken();

	40 if (token.type() == EOFToken)

	41 return;

	42 m_tokens.append(token);

	43 }

	44 }

	45

	46 CSSParserTokenRange CSSTokenizer::Scope::tokenRange()

	47 {

	48 return m_tokens;

	49 }

	50

18 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point	51 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point

19 static bool isNameStart(UChar c)	52 static bool isNameStart(UChar c)

20 {	53 {

21 if (isASCIIAlpha(c))	54 if (isASCIIAlpha(c))

22 return true;	55 return true;

23 if (c == '_')	56 if (c == '_')

24 return true;	57 return true;

25 return !isASCII(c);	58 return !isASCII(c);

26 }	59 }

27	60

(...skipping 260 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
288 CSSParserToken CSSTokenizer::stringStart(UChar cc)	321 CSSParserToken CSSTokenizer::stringStart(UChar cc)

289 {	322 {

290 return consumeStringTokenUntil(cc);	323 return consumeStringTokenUntil(cc);

291 }	324 }

292	325

293 CSSParserToken CSSTokenizer::endOfFile(UChar cc)	326 CSSParserToken CSSTokenizer::endOfFile(UChar cc)

294 {	327 {

295 return CSSParserToken(EOFToken);	328 return CSSParserToken(EOFToken);

296 }	329 }

297	330

298 void CSSTokenizer::tokenize(String string, Vector<CSSParserToken>& outTokens)

299 {

300 // According to the spec, we should perform preprocessing here.

301 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing

302 //

303 // However, we can skip this step since:

304 // * We're using HTML spaces (which accept \r and \f as a valid white space)

305 // * Do not count white spaces

306 // * consumeEscape replaces NULLs for replacement characters

307

308 if (string.isEmpty())

309 return;

310

311 // To avoid resizing we err on the side of reserving too much space.

312 // Most strings we tokenize have about 3.5 to 5 characters per token.

313 outTokens.reserveInitialCapacity(string.length() / 3);

314

315 CSSTokenizerInputStream input(string);

316 CSSTokenizer tokenizer(input);

317 while (true) {

318 CSSParserToken token = tokenizer.nextToken();

319 if (token.type() == EOFToken)

320 return;

321 outTokens.append(token);

322 }

323 }

324

325 CSSParserToken CSSTokenizer::nextToken()	331 CSSParserToken CSSTokenizer::nextToken()

326 {	332 {

327 // Unlike the HTMLTokenizer, the CSS Syntax spec is written	333 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

328 // as a stateless, (fixed-size) look-ahead tokenizer.	334 // as a stateless, (fixed-size) look-ahead tokenizer.

329 // We could move to the stateful model and instead create	335 // We could move to the stateful model and instead create

330 // states for all the "next 3 codepoints are X" cases.	336 // states for all the "next 3 codepoints are X" cases.

331 // State-machine tokenizers are easier to write to handle	337 // State-machine tokenizers are easier to write to handle

332 // incremental tokenization of partial sources.	338 // incremental tokenization of partial sources.

333 // However, for now we follow the spec exactly.	339 // However, for now we follow the spec exactly.

334 UChar cc = consume();	340 UChar cc = consume();

(...skipping 377 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
712	718

713 bool CSSTokenizer::nextCharsAreIdentifier()	719 bool CSSTokenizer::nextCharsAreIdentifier()

714 {	720 {

715 UChar first = consume();	721 UChar first = consume();

716 bool areIdentifier = nextCharsAreIdentifier(first);	722 bool areIdentifier = nextCharsAreIdentifier(first);

717 reconsume(first);	723 reconsume(first);

718 return areIdentifier;	724 return areIdentifier;

719 }	725 }

720	726

721 } // namespace blink	727 } // namespace blink

OLD	NEW

« no previous file with comments | « Source/core/css/parser/CSSTokenizer.h ('k') | Source/core/css/parser/CSSTokenizerTest.cpp » ('j') | no next file with comments »