components/url_formatter/url_formatter.cc - Issue 2436113003: Update aspirational_scripts per Unicode 9

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2436113003: Update aspirational_scripts per Unicode 9 (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « no previous file | no next file » | no next file with comments »

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/url_formatter/url_formatter.h"	5 #include "components/url_formatter/url_formatter.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <utility>	8 #include <utility>

9	9

10 #include "base/lazy_instance.h"	10 #include "base/lazy_instance.h"

(...skipping 410 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
421 allowed_set.addAll(*inclusion_set);	421 allowed_set.addAll(*inclusion_set);

422	422

423 // Five aspirational scripts are taken from UTR 31 Table 6 at	423 // Five aspirational scripts are taken from UTR 31 Table 6 at

424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts .	424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts .

425 // Not all the characters of aspirational scripts are suitable for	425 // Not all the characters of aspirational scripts are suitable for

426 // identifiers. Therefore, only characters belonging to	426 // identifiers. Therefore, only characters belonging to

427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational'	427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational'

428 // section at	428 // section at

429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are	429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are

430 // are added to the allowed set. The list has to be updated when a new	430 // are added to the allowed set. The list has to be updated when a new

431 // version of Unicode is released. The current version is 8.0.0 and ICU 58	431 // version of Unicode is released. The current version is 9.0.0 and ICU 60

432 // will have Unicode 9.0 data.	432 // will have Unicode 10.0 data.

433 #if U_ICU_VERSION_MAJOR_NUM < 58	433 #if U_ICU_VERSION_MAJOR_NUM < 60

434 const icu::UnicodeSet aspirational_scripts(	434 const icu::UnicodeSet aspirational_scripts(

435 icu::UnicodeString(	435 icu::UnicodeString(

436 // Unified Canadian Syllabics	436 // Unified Canadian Syllabics

437 "[\\u1401-\\u166C\\u166F-\\u167F"	437 "[\\u1401-\\u166C\\u166F-\\u167F"

438 // Mongolian	438 // Mongolian

439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA"	439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA"

440 // Unified Canadian Syllabics	440 // Unified Canadian Syllabics

441 "\\u18B0-\\u18F5"	441 "\\u18B0-\\u18F5"

442 // Tifinagh	442 // Tifinagh

443 "\\u2D30-\\u2D67\\u2D7F"	443 "\\u2D30-\\u2D67\\u2D7F"

444 // Yi	444 // Yi

445 "\\uA000-\\uA48C"	445 "\\uA000-\\uA48C"

446 // Miao	446 // Miao

447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7F"	447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E"

448 "\\U00016F8F-\\U00016F9F]",	448 "\\U00016F8F-\\U00016F9F]",

449 -1, US_INV),	449 -1, US_INV),

450 *status);	450 *status);

451 allowed_set.addAll(aspirational_scripts);	451 allowed_set.addAll(aspirational_scripts);

452 #else	452 #else

453 #error "Update aspirational_scripts per Unicode 9.0"	453 #error "Update aspirational_scripts per Unicode 10.0"

454 #endif	454 #endif

455	455

456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in	456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in

457 // the inclusion set. However, they are blacklisted as a part of Mozilla's	457 // the inclusion set. However, they are blacklisted as a part of Mozilla's

458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars).	458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars).

459 // U+0338 and U+2027 are dropped; the former can look like a slash when	459 // U+0338 and U+2027 are dropped; the former can look like a slash when

460 // rendered with a broken font, and the latter can be confused with U+30FB	460 // rendered with a broken font, and the latter can be confused with U+30FB

461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept,	461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept,

462 // even though it can look like a double quotation mark. Using it in Hebrew	462 // even though it can look like a double quotation mark. Using it in Hebrew

463 // should be safe. When used with a non-Hebrew script, it'd be filtered by	463 // should be safe. When used with a non-Hebrew script, it'd be filtered by

(...skipping 326 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)	790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)

791 ? text.substr(www.length()) : text;	791 ? text.substr(www.length()) : text;

792 }	792 }

793	793

794 base::string16 StripWWWFromHost(const GURL& url) {	794 base::string16 StripWWWFromHost(const GURL& url) {

795 DCHECK(url.is_valid());	795 DCHECK(url.is_valid());

796 return StripWWW(base::ASCIIToUTF16(url.host_piece()));	796 return StripWWW(base::ASCIIToUTF16(url.host_piece()));

797 }	797 }

798	798

799 } // namespace url_formatter	799 } // namespace url_formatter

OLD	NEW