Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1089)

Side by Side Diff: utils/tests/string_encoding/utf8_tests.dart

Issue 10154010: test rename overhaul: step 3 _tests.dart => _test.dart (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env dart
2 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
3 // for details. All rights reserved. Use of this source code is governed by a
4 // BSD-style license that can be found in the LICENSE file.
5
6 #library("utf8_tests");
7 #import("dunit.dart");
8 #import("../../../lib/utf/utf.dart");
9
10 void main() {
11 TestSuite suite = new TestSuite();
12 suite.registerTestClass(new Utf8Tests());
13 suite.run();
14 }
15
16 class Utf8Tests extends TestClass {
17 static final String testEnglishPhrase =
18 "The quick brown fox jumps over the lazy dog.";
19
20 static final List<int> testEnglishUtf8 = const<int> [
21 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
22 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
23 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
24 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
25 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
26 0x64, 0x6f, 0x67, 0x2e];
27
28 static final String testDanishPhrase = "Quizdeltagerne spiste jordbær med " +
29 "fløde mens cirkusklovnen Wolther spillede på xylofon.";
30
31 static final List<int> testDanishUtf8 = const<int>[
32 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
33 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
34 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
35 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
36 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
37 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
38 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
39 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
40 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
41 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
42 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
43 0x6e, 0x2e];
44
45 // unusual formatting due to strange editor interaction w/ text direction.
46 static final String
47 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
48
49 static final List<int> testHebrewUtf8 = const<int>[
50 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
51 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
52 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
53 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
54 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
55 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
56 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
57 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
58 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
59 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
60 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
61 0xd7, 0x94];
62
63 static final String testRussianPhrase = "Съешь же ещё этих мягких " +
64 "французских булок да выпей чаю";
65
66 static final List<int> testRussianUtf8 = const<int>[
67 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
68 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
69 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
70 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
71 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
72 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
73 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
74 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
75 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
76 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
77 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
78 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
79 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
80
81 static final String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " +
82 "στὸ χρυσαφὶ ξέφωτο";
83
84 static final List<int> testGreekUtf8 = const<int>[
85 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
86 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
87 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
88 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
89 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
90 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
91 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
92 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
93 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
94 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
95 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
96 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
97 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
98
99 static final String testKatakanaPhrase = """
100 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
101 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
102
103 static final List<int> testKatakanaUtf8 = const<int>[
104 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
105 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
106 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
107 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
108 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
109 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
110 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
111 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
112 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
113 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
114 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
115 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
116 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
117 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
118 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
119 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
120 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
121 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
122 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
123
124 void registerTests(TestSuite suite) {
125 register("Utf8Tests.testUtf8bytesToCodepoints", testUtf8bytesToCodepoints,
126 suite);
127 register("Utf8Tests.testUtf8BytesToString", testUtf8BytesToString, suite);
128 register("Utf8Tests.testEncodeToUtf8", testEncodeToUtf8, suite);
129 register("Utf8Tests.testIterableMethods", testIterableMethods, suite);
130 }
131
132 void testEncodeToUtf8() {
133 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
134 "english to utf8");
135
136 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
137 "encode danish to utf8");
138
139 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
140 "Hebrew to utf8");
141
142 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
143 "Russian to utf8");
144
145 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
146 "Greek to utf8");
147
148 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
149 "Katakana to utf8");
150 }
151
152 void testUtf8bytesToCodepoints() {
153 Expect.listEquals([954, 972, 963, 956, 949],
154 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,
155 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
156
157 // boundary conditions: First possible sequence of a certain length
158 Expect.listEquals([], utf8ToCodepoints([]), "no input");
159 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
160 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
161 Expect.listEquals([0x800],
162 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
163 Expect.listEquals([0x10000],
164 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
165 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
166 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
167 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
168 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
169 "4000000");
170
171 // boundary conditions: Last possible sequence of a certain length
172 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
173 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
174 Expect.listEquals([0xffff],
175 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
176 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
177 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
178 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
179 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
180 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
181 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
182 "4000000");
183
184 // other boundary conditions
185 Expect.listEquals([0xd7ff],
186 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
187 Expect.listEquals([0xe000],
188 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
190 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
191 Expect.listEquals([0x10ffff],
192 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
193 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
194 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
195
196 // unexpected continuation bytes
197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
198 utf8ToCodepoints([0x80]), "80 => replacement character");
199 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
200 utf8ToCodepoints([0xbf]), "bf => replacement character");
201
202 List<int> allContinuationBytes = <int>[];
203 List<int> matchingReplacementChars = <int>[];
204 for (int i = 0x80; i < 0xc0; i++) {
205 allContinuationBytes.add(i);
206 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
207 }
208 Expect.listEquals(matchingReplacementChars,
209 utf8ToCodepoints(allContinuationBytes),
210 "80 - bf => replacement character x 64");
211
212 List<int> allFirstTwoByteSeq = <int>[];
213 matchingReplacementChars = <int>[];
214 for (int i = 0xc0; i < 0xe0; i++) {
215 allFirstTwoByteSeq.addAll([i, 0x20]);
216 matchingReplacementChars.addAll(
217 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
218 }
219 Expect.listEquals(matchingReplacementChars,
220 utf8ToCodepoints(allFirstTwoByteSeq),
221 "c0 - df + space => replacement character + space x 32");
222
223 List<int> allFirstThreeByteSeq = <int>[];
224 matchingReplacementChars = <int>[];
225 for (int i = 0xe0; i < 0xf0; i++) {
226 allFirstThreeByteSeq.addAll([i, 0x20]);
227 matchingReplacementChars.addAll(
228 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
229 }
230 Expect.listEquals(matchingReplacementChars,
231 utf8ToCodepoints(allFirstThreeByteSeq),
232 "e0 - ef + space => replacement character x 16");
233
234 List<int> allFirstFourByteSeq = <int>[];
235 matchingReplacementChars = <int>[];
236 for (int i = 0xf0; i < 0xf8; i++) {
237 allFirstFourByteSeq.addAll([i, 0x20]);
238 matchingReplacementChars.addAll(
239 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
240 }
241 Expect.listEquals(matchingReplacementChars,
242 utf8ToCodepoints(allFirstFourByteSeq),
243 "f0 - f7 + space => replacement character x 8");
244
245 List<int> allFirstFiveByteSeq = <int>[];
246 matchingReplacementChars = <int>[];
247 for (int i = 0xf8; i < 0xfc; i++) {
248 allFirstFiveByteSeq.addAll([i, 0x20]);
249 matchingReplacementChars.addAll(
250 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
251 }
252 Expect.listEquals(matchingReplacementChars,
253 utf8ToCodepoints(allFirstFiveByteSeq),
254 "f8 - fb + space => replacement character x 4");
255
256 List<int> allFirstSixByteSeq = <int>[];
257 matchingReplacementChars = <int>[];
258 for (int i = 0xfc; i < 0xfe; i++) {
259 allFirstSixByteSeq.addAll([i, 0x20]);
260 matchingReplacementChars.addAll(
261 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
262 }
263 Expect.listEquals(matchingReplacementChars,
264 utf8ToCodepoints(allFirstSixByteSeq),
265 "fc - fd + space => replacement character x 2");
266
267 // Sequences with last continuation byte missing
268 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
269 utf8ToCodepoints([0xc2]),
270 "2-byte sequence with last byte missing");
271 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
272 utf8ToCodepoints([0xe0, 0x80]),
273 "3-byte sequence with last byte missing");
274 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
275 utf8ToCodepoints([0xf0, 0x80, 0x80]),
276 "4-byte sequence with last byte missing");
277 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
278 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
279 "5-byte sequence with last byte missing");
280 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
281 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
282 "6-byte sequence with last byte missing");
283
284 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
285 utf8ToCodepoints([0xdf]),
286 "2-byte sequence with last byte missing (hi)");
287 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
288 utf8ToCodepoints([0xef, 0xbf]),
289 "3-byte sequence with last byte missing (hi)");
290 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
291 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
292 "4-byte sequence with last byte missing (hi)");
293 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
294 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
295 "5-byte sequence with last byte missing (hi)");
296 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
297 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
298 "6-byte sequence with last byte missing (hi)");
299
300 // Concatenation of incomplete sequences
301 Expect.listEquals(
302 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
304 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
305 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
306 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],
312 utf8ToCodepoints(
313 [ 0xc2,
314 0xe0, 0x80,
315 0xf0, 0x80, 0x80,
316 0xf8, 0x88, 0x80, 0x80,
317 0xfc, 0x80, 0x80, 0x80, 0x80,
318 0xdf,
319 0xef, 0xbf,
320 0xf7, 0xbf, 0xbf,
321 0xfb, 0xbf, 0xbf, 0xbf,
322 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
323 "Concatenation of incomplete sequences");
324
325 // Impossible bytes
326 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
327 utf8ToCodepoints([0xfe]), "fe");
328 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
329 utf8ToCodepoints([0xff]), "ff");
330 Expect.listEquals([
331 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
332 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
333 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
334 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
335 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
336
337 // Overlong sequences
338 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
339 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");
340 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
341 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
343 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
345 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
347 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
348 "fc 80 80 80 80 af");
349
350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
351 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");
352 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
353 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
355 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
357 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
359 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
360 "fc 83 bf bf bf bf");
361
362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
363 utf8ToCodepoints([0xc0, 0x80]), "c0 80");
364 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
365 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");
366 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
367 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
368 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
369 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
370 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
371 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
372 "fc 80 80 80 80 80");
373
374 // Illegal code positions
375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
376 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");
377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
378 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
380 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");
381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
382 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");
383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
384 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");
385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
386 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");
387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
388 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");
389
390 // Paired UTF-16 surrogates
391 Expect.listEquals([
392 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
393 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
394 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),
395 "U+D800 U+DC00");
396 Expect.listEquals([
397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
398 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
399 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),
400 "U+D800 U+DFFF");
401 Expect.listEquals([
402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
403 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
404 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),
405 "U+DB7F U+DC00");
406 Expect.listEquals([
407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
408 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
409 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),
410 "U+DB7F U+DFFF");
411 Expect.listEquals([
412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
413 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
414 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),
415 "U+DB80 U+DC00");
416 Expect.listEquals([
417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
418 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
419 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),
420 "U+DB80 U+DFFF");
421 Expect.listEquals([
422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
423 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
424 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),
425 "U+DBFF U+DC00");
426 Expect.listEquals([
427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
428 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
429 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),
430 "U+DBFF U+DFFF");
431
432 // Other illegal code positions (???)
433 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),
434 "U+FFFE");
435 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),
436 "U+FFFF");
437 }
438
439 void testUtf8BytesToString() {
440 Expect.stringEquals(testEnglishPhrase,
441 decodeUtf8(testEnglishUtf8), "English");
442
443 Expect.stringEquals(testDanishPhrase,
444 decodeUtf8(testDanishUtf8), "Danish");
445
446 Expect.stringEquals(testHebrewPhrase,
447 decodeUtf8(testHebrewUtf8), "Hebrew");
448
449 Expect.stringEquals(testRussianPhrase,
450 decodeUtf8(testRussianUtf8), "Russian");
451
452 Expect.stringEquals(testGreekPhrase,
453 decodeUtf8(testGreekUtf8), "Greek");
454
455 Expect.stringEquals(testKatakanaPhrase,
456 decodeUtf8(testKatakanaUtf8), "Katakana");
457 }
458
459 void testIterableMethods() {
460 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);
461 // get the first character
462 Expect.equals(testEnglishUtf8[0], englishDecoder.iterator().next());
463 // get the whole translation using the Iterable interface
464 Expect.stringEquals(testEnglishPhrase,
465 new String.fromCharCodes(new List<int>.from(englishDecoder)));
466
467 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);
468 // get the first character
469 Expect.equals(testKatakanaPhrase.charCodes()[0],
470 kataDecoder.iterator().next());
471 // get the whole translation using the Iterable interface
472 Expect.stringEquals(testKatakanaPhrase,
473 new String.fromCharCodes(new List<int>.from(kataDecoder)));
474 }
475 }
OLDNEW
« client/tests/client/client.status ('K') | « utils/tests/string_encoding/utf8_test.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698