| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 | 9 |
| 10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
| (...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 421 allowed_set.addAll(*inclusion_set); | 421 allowed_set.addAll(*inclusion_set); |
| 422 | 422 |
| 423 // Five aspirational scripts are taken from UTR 31 Table 6 at | 423 // Five aspirational scripts are taken from UTR 31 Table 6 at |
| 424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts . | 424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts . |
| 425 // Not all the characters of aspirational scripts are suitable for | 425 // Not all the characters of aspirational scripts are suitable for |
| 426 // identifiers. Therefore, only characters belonging to | 426 // identifiers. Therefore, only characters belonging to |
| 427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational' | 427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational' |
| 428 // section at | 428 // section at |
| 429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are | 429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are |
| 430 // are added to the allowed set. The list has to be updated when a new | 430 // are added to the allowed set. The list has to be updated when a new |
| 431 // version of Unicode is released. The current version is 8.0.0 and ICU 58 | 431 // version of Unicode is released. The current version is 9.0.0 and ICU 60 |
| 432 // will have Unicode 9.0 data. | 432 // will have Unicode 10.0 data. |
| 433 #if U_ICU_VERSION_MAJOR_NUM < 58 | 433 #if U_ICU_VERSION_MAJOR_NUM < 60 |
| 434 const icu::UnicodeSet aspirational_scripts( | 434 const icu::UnicodeSet aspirational_scripts( |
| 435 icu::UnicodeString( | 435 icu::UnicodeString( |
| 436 // Unified Canadian Syllabics | 436 // Unified Canadian Syllabics |
| 437 "[\\u1401-\\u166C\\u166F-\\u167F" | 437 "[\\u1401-\\u166C\\u166F-\\u167F" |
| 438 // Mongolian | 438 // Mongolian |
| 439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA" | 439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA" |
| 440 // Unified Canadian Syllabics | 440 // Unified Canadian Syllabics |
| 441 "\\u18B0-\\u18F5" | 441 "\\u18B0-\\u18F5" |
| 442 // Tifinagh | 442 // Tifinagh |
| 443 "\\u2D30-\\u2D67\\u2D7F" | 443 "\\u2D30-\\u2D67\\u2D7F" |
| 444 // Yi | 444 // Yi |
| 445 "\\uA000-\\uA48C" | 445 "\\uA000-\\uA48C" |
| 446 // Miao | 446 // Miao |
| 447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7F" | 447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E" |
| 448 "\\U00016F8F-\\U00016F9F]", | 448 "\\U00016F8F-\\U00016F9F]", |
| 449 -1, US_INV), | 449 -1, US_INV), |
| 450 *status); | 450 *status); |
| 451 allowed_set.addAll(aspirational_scripts); | 451 allowed_set.addAll(aspirational_scripts); |
| 452 #else | 452 #else |
| 453 #error "Update aspirational_scripts per Unicode 9.0" | 453 #error "Update aspirational_scripts per Unicode 10.0" |
| 454 #endif | 454 #endif |
| 455 | 455 |
| 456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in | 456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in |
| 457 // the inclusion set. However, they are blacklisted as a part of Mozilla's | 457 // the inclusion set. However, they are blacklisted as a part of Mozilla's |
| 458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). | 458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). |
| 459 // U+0338 and U+2027 are dropped; the former can look like a slash when | 459 // U+0338 and U+2027 are dropped; the former can look like a slash when |
| 460 // rendered with a broken font, and the latter can be confused with U+30FB | 460 // rendered with a broken font, and the latter can be confused with U+30FB |
| 461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, | 461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, |
| 462 // even though it can look like a double quotation mark. Using it in Hebrew | 462 // even though it can look like a double quotation mark. Using it in Hebrew |
| 463 // should be safe. When used with a non-Hebrew script, it'd be filtered by | 463 // should be safe. When used with a non-Hebrew script, it'd be filtered by |
| (...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
| 791 ? text.substr(www.length()) : text; | 791 ? text.substr(www.length()) : text; |
| 792 } | 792 } |
| 793 | 793 |
| 794 base::string16 StripWWWFromHost(const GURL& url) { | 794 base::string16 StripWWWFromHost(const GURL& url) { |
| 795 DCHECK(url.is_valid()); | 795 DCHECK(url.is_valid()); |
| 796 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 796 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
| 797 } | 797 } |
| 798 | 798 |
| 799 } // namespace url_formatter | 799 } // namespace url_formatter |
| OLD | NEW |