Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: tests/compiler/dart2js/scanner_test.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: fixes compiler tests Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "package:expect/expect.dart"; 5 import "package:expect/expect.dart";
6 import 'dart:collection';
7 import 'dart:utf';
8 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scannerlib.da rt'; 6 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scannerlib.da rt';
9 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scanner_imple mentation.dart';
10 import '../../../sdk/lib/_internal/compiler/implementation/util/characters.dart' ; 7 import '../../../sdk/lib/_internal/compiler/implementation/util/characters.dart' ;
11 part '../../../sdk/lib/_internal/compiler/implementation/scanner/byte_strings.da rt'; 8 import 'dart:typed_data';
12 part '../../../sdk/lib/_internal/compiler/implementation/scanner/byte_array_scan ner.dart';
13 9
14 Token scan(List<int> bytes) => new ByteArrayScanner(bytes).tokenize(); 10 Token scan(List<int> bytes) => new Utf8BytesScanner.fromBytes(bytes).tokenize();
11
12 Token scanUTF8(List<int> bytes) {
13 int l = bytes.length;
14 List<int> stringLiteral = new Uint8List(l + 3);
15 stringLiteral[0] = 0x27; // single quote
16 stringLiteral[l+1] = 0x27; // single quote
17 stringLiteral[l+2] = $EOF;
18 for (int i = 0; i < l; i++) {
19 stringLiteral[i+1] = bytes[i];
20 }
21 return new Utf8BytesScanner.fromBytes(stringLiteral).tokenize();
22 }
15 23
16 bool isRunningOnJavaScript() => identical(1, 1.0); 24 bool isRunningOnJavaScript() => identical(1, 1.0);
17 25
18 main() { 26 main() {
19 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ". 27 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ".
20 Token token = scan([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72, 28 Token token = scanUTF8([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72,
21 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3, 29 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3,
22 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae, 30 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae,
23 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1, 31 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1,
24 0xbb, 0x9d, 0xc3, 0xb1, $EOF]); 32 0xbb, 0x9d, 0xc3, 0xb1]);
25 Expect.stringEquals("Îñţérñåţîöñåļîžåţîờñ", token.value.slowToString()); 33 Expect.stringEquals("'Îñţérñåţîöñåļîžåţîờñ'", token.value);
26 34
27 // Blueberry porridge in Danish: "blåbærgrød". 35 // Blueberry porridge in Danish: "blåbærgrød".
28 token = scan([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72, 36 token = scanUTF8([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72,
29 0xc3, 0xb8, 0x64, $EOF]); 37 0xc3, 0xb8, 0x64]);
30 Expect.stringEquals("blåbærgrød", token.value.slowToString()); 38 Expect.stringEquals("'blåbærgrød'", token.value);
31 39
32 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil. 40 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil.
33 token = scan([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0, 41 token = scanUTF8([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0,
34 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0, 42 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0,
35 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf, 43 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf,
36 0x88, 0xe0, 0xae, 0xb2, $EOF]); 44 0x88, 0xe0, 0xae, 0xb2]);
37 Expect.stringEquals("சிவா", token.value.slowToString()); 45 Expect.stringEquals("'சிவா அணாமாைல'", token.value);
38 Expect.stringEquals("அணாமாைல", token.next.value.slowToString());
39 46
40 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari. 47 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari.
41 token = scan([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0, 48 token = scanUTF8([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0,
42 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0, 49 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0,
43 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, 50 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4,
44 0xb2, 0xe0, 0xa5, 0x88, $EOF]); 51 0xb2, 0xe0, 0xa5, 0x88]);
45 Expect.stringEquals("िसवा", token.value.slowToString()); 52 Expect.stringEquals("'िसवा अणामालै'", token.value);
46 Expect.stringEquals("अणामालै", token.next.value.slowToString());
47 53
48 if (!isRunningOnJavaScript()) { 54 if (!isRunningOnJavaScript()) {
49 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12) 55 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12)
50 // UTF-8: F0 90 90 92 56 // UTF-8: F0 90 90 92
51 token = scan([0xf0, 0x90, 0x90, 0x92, $EOF]); 57 token = scanUTF8([0xf0, 0x90, 0x90, 0x92]);
52 Expect.stringEquals("𐐒", token.value.slowToString()); 58 Expect.stringEquals("'𐐒'", token.value);
53 } else { 59 } else {
54 print('Skipping non-BMP character test'); 60 print('Skipping non-BMP character test');
55 } 61 }
56 62
57 // Regression test for issue 1761. 63 // Regression test for issue 1761.
58 // "#!" 64 // "#!"
59 token = scan([0x23, 0x21, $EOF]); 65 token = scan([0x23, 0x21]);
60 Expect.equals(token.info, EOF_INFO); // Treated as a comment. 66 Expect.equals(token.info, EOF_INFO); // Treated as a comment.
61 67
62 // Regression test for issue 1761. 68 // Regression test for issue 1761.
63 // "#! Hello, World!" 69 // "#! Hello, World!"
64 token = scan([0x23, 0x21, 0x20, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 70 token = scan([0x23, 0x21, 0x20, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
65 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21, $EOF]); 71 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21]);
66 Expect.equals(token.info, EOF_INFO); // Treated as a comment. 72 Expect.equals(token.info, EOF_INFO); // Treated as a comment.
67 } 73 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698