| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <string.h> | 8 #include <string.h> |
| 9 | 9 |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, | 114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, |
| 115 // Thai + Common | 115 // Thai + Common |
| 116 {"xn---123-9goxcp8c9db2r.th", | 116 {"xn---123-9goxcp8c9db2r.th", |
| 117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, | 117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, |
| 118 // Devangari (Hindi) | 118 // Devangari (Hindi) |
| 119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, | 119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, |
| 120 // Devanagari + Common | 120 // Devanagari + Common |
| 121 {"xn---123-kbjl2j0bl2k.in", | 121 {"xn---123-kbjl2j0bl2k.in", |
| 122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, | 122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, |
| 123 | 123 |
| 124 // 5 Aspirational scripts | 124 // 4 Aspirational scripts |
| 125 // Unifieid Canadian Syllabary | 125 // Unifieid Canadian Syllabary |
| 126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, | 126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, |
| 127 // Tifinagh | 127 // Tifinagh |
| 128 {"xn--4ljxa2bb4a6bxb.ma", | 128 {"xn--4ljxa2bb4a6bxb.ma", |
| 129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, | 129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, |
| 130 // Tifinagh with a disallowed character(U+2D6F) | 130 // Tifinagh with a disallowed character(U+2D6F) |
| 131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, | 131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, |
| 132 // Yi | 132 // Yi |
| 133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, | 133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, |
| 134 // Mongolian - 'ordu' (place, camp) | |
| 135 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true}, | |
| 136 // Mongolian with a disallowed character | |
| 137 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, | |
| 138 // Miao/Pollad | 134 // Miao/Pollad |
| 139 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, | 135 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, |
| 140 | 136 |
| 137 // Mongolian is disallowed because it's written vertically. |
| 138 // Mongolian - 'ordu' (place, camp) |
| 139 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", false}, |
| 140 // Mongolian with a disallowed character |
| 141 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, |
| 142 |
| 141 // Script mixing tests | 143 // Script mixing tests |
| 142 // The following script combinations are allowed. | 144 // The following script combinations are allowed. |
| 143 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. | 145 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. |
| 144 // ASCII-Latin + Japn (Kana + Han) | 146 // ASCII-Latin + Japn (Kana + Han) |
| 145 // ASCII-Latin + Kore (Hangul + Han) | 147 // ASCII-Latin + Kore (Hangul + Han) |
| 146 // ASCII-Latin + Han + Bopomofo | 148 // ASCII-Latin + Han + Bopomofo |
| 147 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee | 149 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee |
| 148 // and Unified Canadian Syllabary | 150 // and Unified Canadian Syllabary |
| 149 // "payp<alpha>l.com" | 151 // "payp<alpha>l.com" |
| 150 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, | 152 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 202 // Canadian Syllabary + Latin | 204 // Canadian Syllabary + Latin |
| 203 {"xn--ab-lym.com", L"ab\x14BF.com", false}, | 205 {"xn--ab-lym.com", L"ab\x14BF.com", false}, |
| 204 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false}, | 206 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false}, |
| 205 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false}, | 207 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false}, |
| 206 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false}, | 208 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false}, |
| 207 // Tifinagh + Latin | 209 // Tifinagh + Latin |
| 208 {"xn--liy-go4a.com", L"li\u24dfy.com", false}, | 210 {"xn--liy-go4a.com", L"li\u24dfy.com", false}, |
| 209 {"xn--rol-ho4a.com", L"rol\u24df.com", false}, | 211 {"xn--rol-ho4a.com", L"rol\u24df.com", false}, |
| 210 {"xn--ily-eo4a.com", L"\u24dfily.com", false}, | 212 {"xn--ily-eo4a.com", L"\u24dfily.com", false}, |
| 211 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false}, | 213 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false}, |
| 214 // Miao + Latin |
| 215 {"xn--liy-rc12a.com", L"li\U00016FD8y.com", false}, |
| 216 {"xn--rol-sc12a.com", L"roll\U00016FD8.com", false}, |
| 217 {"xn--ily-pc12a.com", L"\U00016FD8ily.com", false}, |
| 218 {"xn--1ly-pc12a.com", L"\U00016FD81ly.com", false}, |
| 212 | 219 |
| 213 // Invisibility check | 220 // Invisibility check |
| 214 // Thai tone mark malek(U+0E48) repeated | 221 // Thai tone mark malek(U+0E48) repeated |
| 215 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, | 222 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, |
| 216 // Accute accent repeated | 223 // Accute accent repeated |
| 217 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, | 224 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, |
| 218 // 'a' with acuted accent + another acute accent | 225 // 'a' with acuted accent + another acute accent |
| 219 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, | 226 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, |
| 220 // Combining mark at the beginning | 227 // Combining mark at the beginning |
| 221 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, | 228 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, |
| 222 | 229 |
| 223 // Mixed script confusable | 230 // Mixed script confusable |
| 224 // google with Armenian Small Letter Oh(U+0585) | 231 // Armenian օ, ո, հ, and զ mixed with Latin |
| 225 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, | 232 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, |
| 226 {"xn--range-kkg.com", L"\x0585range.com", false}, | 233 {"xn--range-kkg.com", L"\x0585range.com", false}, |
| 227 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, | 234 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, |
| 228 // Latin 'o' in Armenian. | 235 {"xn--an-bed.com", L"\x0578" L"an.com", false}, |
| 236 {"xn--hig-tee.com", L"hig\x0570.com", false}, |
| 237 {"xn---ray-fef.com", L"\x0566-ray.com", false}, |
| 238 // Latin 'o', 'h' and 'n' in Armenian |
| 229 {"xn--o-ybcg0cu0cq.com", | 239 {"xn--o-ybcg0cu0cq.com", |
| 230 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, | 240 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, |
| 241 {"xn--h-qccm4a.com", L"\x0580\x0574\x0578h.com", false}, |
| 242 {"xn--n-rccm3a.com", L"\x0580n\x0574\x0578.com", false}, |
| 243 {"xn--n1-0ddq0b.com", L"\x0580n1\x0574\x0578.com", false}, |
| 231 // Hiragana HE(U+3078) mixed with Katakana | 244 // Hiragana HE(U+3078) mixed with Katakana |
| 232 {"xn--49jxi3as0d0fpc.com", | 245 {"xn--49jxi3as0d0fpc.com", |
| 233 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, | 246 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, |
| 234 | 247 |
| 235 // U+30FC should be preceded by a Hiragana/Katakana. | 248 // U+30FC should be preceded by a Hiragana/Katakana. |
| 236 // Katakana + U+30FC + Han | 249 // Katakana + U+30FC + Han |
| 237 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, | 250 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, |
| 238 // Hiragana + U+30FC + Han | 251 // Hiragana + U+30FC + Han |
| 239 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, | 252 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, |
| 240 // U+30FC + Han | 253 // U+30FC + Han |
| (...skipping 808 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1049 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 1062 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
| 1050 0, 1, 2, 3, 4, 5, 6, 7 | 1063 0, 1, 2, 3, 4, 5, 6, 7 |
| 1051 }; | 1064 }; |
| 1052 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 1065 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
| 1053 net::UnescapeRule::NORMAL, omit_all_offsets); | 1066 net::UnescapeRule::NORMAL, omit_all_offsets); |
| 1054 } | 1067 } |
| 1055 | 1068 |
| 1056 } // namespace | 1069 } // namespace |
| 1057 | 1070 |
| 1058 } // namespace url_formatter | 1071 } // namespace url_formatter |
| OLD | NEW |