OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 import "package:expect/expect.dart"; | 5 import "package:expect/expect.dart"; |
6 import 'dart:collection'; | |
7 import 'dart:utf'; | |
8 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scannerlib.da
rt'; | 6 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scannerlib.da
rt'; |
9 import '../../../sdk/lib/_internal/compiler/implementation/scanner/scanner_imple
mentation.dart'; | |
10 import '../../../sdk/lib/_internal/compiler/implementation/util/characters.dart'
; | 7 import '../../../sdk/lib/_internal/compiler/implementation/util/characters.dart'
; |
11 part '../../../sdk/lib/_internal/compiler/implementation/scanner/byte_strings.da
rt'; | 8 import 'dart:typed_data'; |
12 part '../../../sdk/lib/_internal/compiler/implementation/scanner/byte_array_scan
ner.dart'; | |
13 | 9 |
14 Token scan(List<int> bytes) => new ByteArrayScanner(bytes).tokenize(); | 10 Token scan(List<int> bytes) => new Utf8BytesScanner.fromBytes(bytes).tokenize(); |
| 11 |
| 12 Token scanUTF8(List<int> bytes) { |
| 13 int l = bytes.length; |
| 14 List<int> stringLiteral = new Uint8List(l + 3); |
| 15 stringLiteral[0] = 0x27; // single quote |
| 16 stringLiteral[l+1] = 0x27; // single quote |
| 17 stringLiteral[l+2] = $EOF; |
| 18 for (int i = 0; i < l; i++) { |
| 19 stringLiteral[i+1] = bytes[i]; |
| 20 } |
| 21 return new Utf8BytesScanner.fromBytes(stringLiteral).tokenize(); |
| 22 } |
15 | 23 |
16 bool isRunningOnJavaScript() => identical(1, 1.0); | 24 bool isRunningOnJavaScript() => identical(1, 1.0); |
17 | 25 |
18 main() { | 26 main() { |
19 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ". | 27 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ". |
20 Token token = scan([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72, | 28 Token token = scanUTF8([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72, |
21 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3, | 29 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3, |
22 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae, | 30 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae, |
23 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1, | 31 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1, |
24 0xbb, 0x9d, 0xc3, 0xb1, $EOF]); | 32 0xbb, 0x9d, 0xc3, 0xb1]); |
25 Expect.stringEquals("Îñţérñåţîöñåļîžåţîờñ", token.value.slowToString()); | 33 Expect.stringEquals("'Îñţérñåţîöñåļîžåţîờñ'", token.value); |
26 | 34 |
27 // Blueberry porridge in Danish: "blåbærgrød". | 35 // Blueberry porridge in Danish: "blåbærgrød". |
28 token = scan([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72, | 36 token = scanUTF8([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72, |
29 0xc3, 0xb8, 0x64, $EOF]); | 37 0xc3, 0xb8, 0x64]); |
30 Expect.stringEquals("blåbærgrød", token.value.slowToString()); | 38 Expect.stringEquals("'blåbærgrød'", token.value); |
31 | 39 |
32 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil. | 40 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil. |
33 token = scan([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0, | 41 token = scanUTF8([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0, |
34 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0, | 42 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0, |
35 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf, | 43 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf, |
36 0x88, 0xe0, 0xae, 0xb2, $EOF]); | 44 0x88, 0xe0, 0xae, 0xb2]); |
37 Expect.stringEquals("சிவா", token.value.slowToString()); | 45 Expect.stringEquals("'சிவா அணாமாைல'", token.value); |
38 Expect.stringEquals("அணாமாைல", token.next.value.slowToString()); | |
39 | 46 |
40 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari. | 47 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari. |
41 token = scan([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0, | 48 token = scanUTF8([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0, |
42 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0, | 49 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0, |
43 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, | 50 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, |
44 0xb2, 0xe0, 0xa5, 0x88, $EOF]); | 51 0xb2, 0xe0, 0xa5, 0x88]); |
45 Expect.stringEquals("िसवा", token.value.slowToString()); | 52 Expect.stringEquals("'िसवा अणामालै'", token.value); |
46 Expect.stringEquals("अणामालै", token.next.value.slowToString()); | |
47 | 53 |
48 if (!isRunningOnJavaScript()) { | 54 if (!isRunningOnJavaScript()) { |
49 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12) | 55 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12) |
50 // UTF-8: F0 90 90 92 | 56 // UTF-8: F0 90 90 92 |
51 token = scan([0xf0, 0x90, 0x90, 0x92, $EOF]); | 57 token = scanUTF8([0xf0, 0x90, 0x90, 0x92]); |
52 Expect.stringEquals("𐐒", token.value.slowToString()); | 58 Expect.stringEquals("'𐐒'", token.value); |
53 } else { | 59 } else { |
54 print('Skipping non-BMP character test'); | 60 print('Skipping non-BMP character test'); |
55 } | 61 } |
56 | 62 |
57 // Regression test for issue 1761. | 63 // Regression test for issue 1761. |
58 // "#!" | 64 // "#!" |
59 token = scan([0x23, 0x21, $EOF]); | 65 token = scan([0x23, 0x21]); |
60 Expect.equals(token.info, EOF_INFO); // Treated as a comment. | 66 Expect.equals(token.info, EOF_INFO); // Treated as a comment. |
61 | 67 |
62 // Regression test for issue 1761. | 68 // Regression test for issue 1761. |
63 // "#! Hello, World!" | 69 // "#! Hello, World!" |
64 token = scan([0x23, 0x21, 0x20, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, | 70 token = scan([0x23, 0x21, 0x20, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, |
65 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21, $EOF]); | 71 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21]); |
66 Expect.equals(token.info, EOF_INFO); // Treated as a comment. | 72 Expect.equals(token.info, EOF_INFO); // Treated as a comment. |
67 } | 73 } |
OLD | NEW |