| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env dart | |
| 2 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 3 // for details. All rights reserved. Use of this source code is governed by a | |
| 4 // BSD-style license that can be found in the LICENSE file. | |
| 5 | |
| 6 #library("utf8_tests"); | |
| 7 #import("dunit.dart"); | |
| 8 #import("../../../lib/utf/utf.dart"); | |
| 9 | |
| 10 void main() { | |
| 11 TestSuite suite = new TestSuite(); | |
| 12 suite.registerTestClass(new Utf8Tests()); | |
| 13 suite.run(); | |
| 14 } | |
| 15 | |
| 16 class Utf8Tests extends TestClass { | |
| 17 static final String testEnglishPhrase = | |
| 18 "The quick brown fox jumps over the lazy dog."; | |
| 19 | |
| 20 static final List<int> testEnglishUtf8 = const<int> [ | |
| 21 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, | |
| 22 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, | |
| 23 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, | |
| 24 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, | |
| 25 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, | |
| 26 0x64, 0x6f, 0x67, 0x2e]; | |
| 27 | |
| 28 static final String testDanishPhrase = "Quizdeltagerne spiste jordbær med " + | |
| 29 "fløde mens cirkusklovnen Wolther spillede på xylofon."; | |
| 30 | |
| 31 static final List<int> testDanishUtf8 = const<int>[ | |
| 32 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, | |
| 33 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, | |
| 34 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, | |
| 35 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, | |
| 36 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, | |
| 37 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, | |
| 38 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, | |
| 39 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, | |
| 40 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, | |
| 41 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, | |
| 42 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, | |
| 43 0x6e, 0x2e]; | |
| 44 | |
| 45 // unusual formatting due to strange editor interaction w/ text direction. | |
| 46 static final String | |
| 47 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; | |
| 48 | |
| 49 static final List<int> testHebrewUtf8 = const<int>[ | |
| 50 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, | |
| 51 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, | |
| 52 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, | |
| 53 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, | |
| 54 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, | |
| 55 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, | |
| 56 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, | |
| 57 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, | |
| 58 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, | |
| 59 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, | |
| 60 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, | |
| 61 0xd7, 0x94]; | |
| 62 | |
| 63 static final String testRussianPhrase = "Съешь же ещё этих мягких " + | |
| 64 "французских булок да выпей чаю"; | |
| 65 | |
| 66 static final List<int> testRussianUtf8 = const<int>[ | |
| 67 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, | |
| 68 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, | |
| 69 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, | |
| 70 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, | |
| 71 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, | |
| 72 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, | |
| 73 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, | |
| 74 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, | |
| 75 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, | |
| 76 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, | |
| 77 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, | |
| 78 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, | |
| 79 0x87, 0xd0, 0xb0, 0xd1, 0x8e]; | |
| 80 | |
| 81 static final String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " + | |
| 82 "στὸ χρυσαφὶ ξέφωτο"; | |
| 83 | |
| 84 static final List<int> testGreekUtf8 = const<int>[ | |
| 85 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, | |
| 86 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, | |
| 87 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, | |
| 88 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, | |
| 89 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, | |
| 90 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, | |
| 91 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, | |
| 92 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, | |
| 93 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, | |
| 94 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, | |
| 95 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, | |
| 96 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, | |
| 97 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; | |
| 98 | |
| 99 static final String testKatakanaPhrase = """ | |
| 100 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム | |
| 101 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"""; | |
| 102 | |
| 103 static final List<int> testKatakanaUtf8 = const<int>[ | |
| 104 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, | |
| 105 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, | |
| 106 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, | |
| 107 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, | |
| 108 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, | |
| 109 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, | |
| 110 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, | |
| 111 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, | |
| 112 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, | |
| 113 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, | |
| 114 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, | |
| 115 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, | |
| 116 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, | |
| 117 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, | |
| 118 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, | |
| 119 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, | |
| 120 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, | |
| 121 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, | |
| 122 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; | |
| 123 | |
| 124 void registerTests(TestSuite suite) { | |
| 125 register("Utf8Tests.testUtf8bytesToCodepoints", testUtf8bytesToCodepoints, | |
| 126 suite); | |
| 127 register("Utf8Tests.testUtf8BytesToString", testUtf8BytesToString, suite); | |
| 128 register("Utf8Tests.testEncodeToUtf8", testEncodeToUtf8, suite); | |
| 129 register("Utf8Tests.testIterableMethods", testIterableMethods, suite); | |
| 130 } | |
| 131 | |
| 132 void testEncodeToUtf8() { | |
| 133 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), | |
| 134 "english to utf8"); | |
| 135 | |
| 136 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), | |
| 137 "encode danish to utf8"); | |
| 138 | |
| 139 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), | |
| 140 "Hebrew to utf8"); | |
| 141 | |
| 142 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), | |
| 143 "Russian to utf8"); | |
| 144 | |
| 145 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), | |
| 146 "Greek to utf8"); | |
| 147 | |
| 148 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), | |
| 149 "Katakana to utf8"); | |
| 150 } | |
| 151 | |
| 152 void testUtf8bytesToCodepoints() { | |
| 153 Expect.listEquals([954, 972, 963, 956, 949], | |
| 154 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf, | |
| 155 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); | |
| 156 | |
| 157 // boundary conditions: First possible sequence of a certain length | |
| 158 Expect.listEquals([], utf8ToCodepoints([]), "no input"); | |
| 159 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0"); | |
| 160 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80"); | |
| 161 Expect.listEquals([0x800], | |
| 162 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800"); | |
| 163 Expect.listEquals([0x10000], | |
| 164 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000"); | |
| 165 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 166 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); | |
| 167 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 168 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), | |
| 169 "4000000"); | |
| 170 | |
| 171 // boundary conditions: Last possible sequence of a certain length | |
| 172 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f"); | |
| 173 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff"); | |
| 174 Expect.listEquals([0xffff], | |
| 175 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff"); | |
| 176 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 177 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); | |
| 178 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 179 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); | |
| 180 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 181 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), | |
| 182 "4000000"); | |
| 183 | |
| 184 // other boundary conditions | |
| 185 Expect.listEquals([0xd7ff], | |
| 186 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff"); | |
| 187 Expect.listEquals([0xe000], | |
| 188 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000"); | |
| 189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 190 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd"); | |
| 191 Expect.listEquals([0x10ffff], | |
| 192 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); | |
| 193 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 194 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000"); | |
| 195 | |
| 196 // unexpected continuation bytes | |
| 197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 198 utf8ToCodepoints([0x80]), "80 => replacement character"); | |
| 199 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 200 utf8ToCodepoints([0xbf]), "bf => replacement character"); | |
| 201 | |
| 202 List<int> allContinuationBytes = <int>[]; | |
| 203 List<int> matchingReplacementChars = <int>[]; | |
| 204 for (int i = 0x80; i < 0xc0; i++) { | |
| 205 allContinuationBytes.add(i); | |
| 206 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT); | |
| 207 } | |
| 208 Expect.listEquals(matchingReplacementChars, | |
| 209 utf8ToCodepoints(allContinuationBytes), | |
| 210 "80 - bf => replacement character x 64"); | |
| 211 | |
| 212 List<int> allFirstTwoByteSeq = <int>[]; | |
| 213 matchingReplacementChars = <int>[]; | |
| 214 for (int i = 0xc0; i < 0xe0; i++) { | |
| 215 allFirstTwoByteSeq.addAll([i, 0x20]); | |
| 216 matchingReplacementChars.addAll( | |
| 217 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
| 218 } | |
| 219 Expect.listEquals(matchingReplacementChars, | |
| 220 utf8ToCodepoints(allFirstTwoByteSeq), | |
| 221 "c0 - df + space => replacement character + space x 32"); | |
| 222 | |
| 223 List<int> allFirstThreeByteSeq = <int>[]; | |
| 224 matchingReplacementChars = <int>[]; | |
| 225 for (int i = 0xe0; i < 0xf0; i++) { | |
| 226 allFirstThreeByteSeq.addAll([i, 0x20]); | |
| 227 matchingReplacementChars.addAll( | |
| 228 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
| 229 } | |
| 230 Expect.listEquals(matchingReplacementChars, | |
| 231 utf8ToCodepoints(allFirstThreeByteSeq), | |
| 232 "e0 - ef + space => replacement character x 16"); | |
| 233 | |
| 234 List<int> allFirstFourByteSeq = <int>[]; | |
| 235 matchingReplacementChars = <int>[]; | |
| 236 for (int i = 0xf0; i < 0xf8; i++) { | |
| 237 allFirstFourByteSeq.addAll([i, 0x20]); | |
| 238 matchingReplacementChars.addAll( | |
| 239 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
| 240 } | |
| 241 Expect.listEquals(matchingReplacementChars, | |
| 242 utf8ToCodepoints(allFirstFourByteSeq), | |
| 243 "f0 - f7 + space => replacement character x 8"); | |
| 244 | |
| 245 List<int> allFirstFiveByteSeq = <int>[]; | |
| 246 matchingReplacementChars = <int>[]; | |
| 247 for (int i = 0xf8; i < 0xfc; i++) { | |
| 248 allFirstFiveByteSeq.addAll([i, 0x20]); | |
| 249 matchingReplacementChars.addAll( | |
| 250 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
| 251 } | |
| 252 Expect.listEquals(matchingReplacementChars, | |
| 253 utf8ToCodepoints(allFirstFiveByteSeq), | |
| 254 "f8 - fb + space => replacement character x 4"); | |
| 255 | |
| 256 List<int> allFirstSixByteSeq = <int>[]; | |
| 257 matchingReplacementChars = <int>[]; | |
| 258 for (int i = 0xfc; i < 0xfe; i++) { | |
| 259 allFirstSixByteSeq.addAll([i, 0x20]); | |
| 260 matchingReplacementChars.addAll( | |
| 261 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
| 262 } | |
| 263 Expect.listEquals(matchingReplacementChars, | |
| 264 utf8ToCodepoints(allFirstSixByteSeq), | |
| 265 "fc - fd + space => replacement character x 2"); | |
| 266 | |
| 267 // Sequences with last continuation byte missing | |
| 268 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 269 utf8ToCodepoints([0xc2]), | |
| 270 "2-byte sequence with last byte missing"); | |
| 271 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 272 utf8ToCodepoints([0xe0, 0x80]), | |
| 273 "3-byte sequence with last byte missing"); | |
| 274 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 275 utf8ToCodepoints([0xf0, 0x80, 0x80]), | |
| 276 "4-byte sequence with last byte missing"); | |
| 277 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 278 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]), | |
| 279 "5-byte sequence with last byte missing"); | |
| 280 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 281 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]), | |
| 282 "6-byte sequence with last byte missing"); | |
| 283 | |
| 284 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 285 utf8ToCodepoints([0xdf]), | |
| 286 "2-byte sequence with last byte missing (hi)"); | |
| 287 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 288 utf8ToCodepoints([0xef, 0xbf]), | |
| 289 "3-byte sequence with last byte missing (hi)"); | |
| 290 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 291 utf8ToCodepoints([0xf7, 0xbf, 0xbf]), | |
| 292 "4-byte sequence with last byte missing (hi)"); | |
| 293 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 294 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]), | |
| 295 "5-byte sequence with last byte missing (hi)"); | |
| 296 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 297 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), | |
| 298 "6-byte sequence with last byte missing (hi)"); | |
| 299 | |
| 300 // Concatenation of incomplete sequences | |
| 301 Expect.listEquals( | |
| 302 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 304 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 305 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 306 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ], | |
| 312 utf8ToCodepoints( | |
| 313 [ 0xc2, | |
| 314 0xe0, 0x80, | |
| 315 0xf0, 0x80, 0x80, | |
| 316 0xf8, 0x88, 0x80, 0x80, | |
| 317 0xfc, 0x80, 0x80, 0x80, 0x80, | |
| 318 0xdf, | |
| 319 0xef, 0xbf, | |
| 320 0xf7, 0xbf, 0xbf, | |
| 321 0xfb, 0xbf, 0xbf, 0xbf, | |
| 322 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), | |
| 323 "Concatenation of incomplete sequences"); | |
| 324 | |
| 325 // Impossible bytes | |
| 326 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 327 utf8ToCodepoints([0xfe]), "fe"); | |
| 328 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 329 utf8ToCodepoints([0xff]), "ff"); | |
| 330 Expect.listEquals([ | |
| 331 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 332 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 333 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 334 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 335 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); | |
| 336 | |
| 337 // Overlong sequences | |
| 338 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 339 utf8ToCodepoints([0xc0, 0xaf]), "c0 af"); | |
| 340 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 341 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af"); | |
| 342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 343 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); | |
| 344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 345 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); | |
| 346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 347 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), | |
| 348 "fc 80 80 80 80 af"); | |
| 349 | |
| 350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 351 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf"); | |
| 352 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 353 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf"); | |
| 354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 355 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); | |
| 356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 357 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); | |
| 358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 359 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), | |
| 360 "fc 83 bf bf bf bf"); | |
| 361 | |
| 362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 363 utf8ToCodepoints([0xc0, 0x80]), "c0 80"); | |
| 364 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 365 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80"); | |
| 366 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 367 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); | |
| 368 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 369 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); | |
| 370 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 371 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), | |
| 372 "fc 80 80 80 80 80"); | |
| 373 | |
| 374 // Illegal code positions | |
| 375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 376 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800"); | |
| 377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 378 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F"); | |
| 379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 380 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80"); | |
| 381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 382 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF"); | |
| 383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 384 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00"); | |
| 385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 386 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80"); | |
| 387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 388 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF"); | |
| 389 | |
| 390 // Paired UTF-16 surrogates | |
| 391 Expect.listEquals([ | |
| 392 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 393 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 394 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), | |
| 395 "U+D800 U+DC00"); | |
| 396 Expect.listEquals([ | |
| 397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 398 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 399 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), | |
| 400 "U+D800 U+DFFF"); | |
| 401 Expect.listEquals([ | |
| 402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 403 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 404 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), | |
| 405 "U+DB7F U+DC00"); | |
| 406 Expect.listEquals([ | |
| 407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 408 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 409 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), | |
| 410 "U+DB7F U+DFFF"); | |
| 411 Expect.listEquals([ | |
| 412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 413 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 414 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), | |
| 415 "U+DB80 U+DC00"); | |
| 416 Expect.listEquals([ | |
| 417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 418 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 419 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), | |
| 420 "U+DB80 U+DFFF"); | |
| 421 Expect.listEquals([ | |
| 422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 423 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 424 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), | |
| 425 "U+DBFF U+DC00"); | |
| 426 Expect.listEquals([ | |
| 427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
| 428 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
| 429 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), | |
| 430 "U+DBFF U+DFFF"); | |
| 431 | |
| 432 // Other illegal code positions (???) | |
| 433 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), | |
| 434 "U+FFFE"); | |
| 435 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), | |
| 436 "U+FFFF"); | |
| 437 } | |
| 438 | |
| 439 void testUtf8BytesToString() { | |
| 440 Expect.stringEquals(testEnglishPhrase, | |
| 441 decodeUtf8(testEnglishUtf8), "English"); | |
| 442 | |
| 443 Expect.stringEquals(testDanishPhrase, | |
| 444 decodeUtf8(testDanishUtf8), "Danish"); | |
| 445 | |
| 446 Expect.stringEquals(testHebrewPhrase, | |
| 447 decodeUtf8(testHebrewUtf8), "Hebrew"); | |
| 448 | |
| 449 Expect.stringEquals(testRussianPhrase, | |
| 450 decodeUtf8(testRussianUtf8), "Russian"); | |
| 451 | |
| 452 Expect.stringEquals(testGreekPhrase, | |
| 453 decodeUtf8(testGreekUtf8), "Greek"); | |
| 454 | |
| 455 Expect.stringEquals(testKatakanaPhrase, | |
| 456 decodeUtf8(testKatakanaUtf8), "Katakana"); | |
| 457 } | |
| 458 | |
| 459 void testIterableMethods() { | |
| 460 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8); | |
| 461 // get the first character | |
| 462 Expect.equals(testEnglishUtf8[0], englishDecoder.iterator().next()); | |
| 463 // get the whole translation using the Iterable interface | |
| 464 Expect.stringEquals(testEnglishPhrase, | |
| 465 new String.fromCharCodes(new List<int>.from(englishDecoder))); | |
| 466 | |
| 467 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8); | |
| 468 // get the first character | |
| 469 Expect.equals(testKatakanaPhrase.charCodes()[0], | |
| 470 kataDecoder.iterator().next()); | |
| 471 // get the whole translation using the Iterable interface | |
| 472 Expect.stringEquals(testKatakanaPhrase, | |
| 473 new String.fromCharCodes(new List<int>.from(kataDecoder))); | |
| 474 } | |
| 475 } | |
| OLD | NEW |