| Index: icu51/source/common/uscript_props.cpp
|
| ===================================================================
|
| --- icu51/source/common/uscript_props.cpp (revision 0)
|
| +++ icu51/source/common/uscript_props.cpp (revision 0)
|
| @@ -0,0 +1,267 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2013, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* file name: uscript_props.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2013feb16
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/uscript.h"
|
| +#include "unicode/utf16.h"
|
| +#include "ustr_imp.h"
|
| +
|
| +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
| +
|
| +namespace {
|
| +
|
| +// Script metadata (script properties).
|
| +// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
|
| +
|
| +// 0 = NOT_ENCODED, no sample character, default false script properties.
|
| +// Bits 20.. 0: sample character
|
| +
|
| +// Bits 23..21: usage
|
| +const int32_t UNKNOWN = 1 << 21;
|
| +const int32_t EXCLUSION = 2 << 21;
|
| +const int32_t LIMITED_USE = 3 << 21;
|
| +const int32_t ASPIRATIONAL = 4 << 21;
|
| +const int32_t RECOMMENDED = 5 << 21;
|
| +
|
| +// Bits 31..24: Single-bit flags
|
| +const int32_t RTL = 1 << 24;
|
| +const int32_t LB_LETTERS = 1 << 25;
|
| +const int32_t CASED = 1 << 26;
|
| +
|
| +const int32_t SCRIPT_PROPS[] = {
|
| + // Begin copy-paste output from
|
| + // tools/trunk/unicode/py/parsescriptmetadata.py
|
| + 0x0040 | UNKNOWN, // Zyyy
|
| + 0x0308 | UNKNOWN, // Zinh
|
| + 0x0628 | RECOMMENDED | RTL, // Arab
|
| + 0x0531 | RECOMMENDED | CASED, // Armn
|
| + 0x0995 | RECOMMENDED, // Beng
|
| + 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
|
| + 0x13C4 | LIMITED_USE, // Cher
|
| + 0x03E2 | EXCLUSION | CASED, // Copt
|
| + 0x042F | RECOMMENDED | CASED, // Cyrl
|
| + 0x10414 | EXCLUSION | CASED, // Dsrt
|
| + 0x0905 | RECOMMENDED, // Deva
|
| + 0x12A0 | RECOMMENDED, // Ethi
|
| + 0x10D3 | RECOMMENDED, // Geor
|
| + 0x10330 | EXCLUSION, // Goth
|
| + 0x03A9 | RECOMMENDED | CASED, // Grek
|
| + 0x0A95 | RECOMMENDED, // Gujr
|
| + 0x0A15 | RECOMMENDED, // Guru
|
| + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
|
| + 0xAC00 | RECOMMENDED, // Hang
|
| + 0x05D0 | RECOMMENDED | RTL, // Hebr
|
| + 0x304B | RECOMMENDED | LB_LETTERS, // Hira
|
| + 0x0C95 | RECOMMENDED, // Knda
|
| + 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
|
| + 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
|
| + 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
|
| + 0x004C | RECOMMENDED | CASED, // Latn
|
| + 0x0D15 | RECOMMENDED, // Mlym
|
| + 0x1826 | ASPIRATIONAL, // Mong
|
| + 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
|
| + 0x168F | EXCLUSION, // Ogam
|
| + 0x10300 | EXCLUSION, // Ital
|
| + 0x0B15 | RECOMMENDED, // Orya
|
| + 0x16A0 | EXCLUSION, // Runr
|
| + 0x0D85 | RECOMMENDED, // Sinh
|
| + 0x0710 | LIMITED_USE | RTL, // Syrc
|
| + 0x0B95 | RECOMMENDED, // Taml
|
| + 0x0C15 | RECOMMENDED, // Telu
|
| + 0x078C | RECOMMENDED | RTL, // Thaa
|
| + 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
|
| + 0x0F40 | RECOMMENDED, // Tibt
|
| + 0x14C0 | ASPIRATIONAL, // Cans
|
| + 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
|
| + 0x1703 | EXCLUSION, // Tglg
|
| + 0x1723 | EXCLUSION, // Hano
|
| + 0x1743 | EXCLUSION, // Buhd
|
| + 0x1763 | EXCLUSION, // Tagb
|
| + 0x2800 | UNKNOWN, // Brai
|
| + 0x10800 | EXCLUSION | RTL, // Cprt
|
| + 0x1900 | LIMITED_USE, // Limb
|
| + 0x10000 | EXCLUSION, // Linb
|
| + 0x10480 | EXCLUSION, // Osma
|
| + 0x10450 | EXCLUSION, // Shaw
|
| + 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
|
| + 0x10380 | EXCLUSION, // Ugar
|
| + 0,
|
| + 0x1A00 | EXCLUSION, // Bugi
|
| + 0x2C00 | EXCLUSION | CASED, // Glag
|
| + 0x10A00 | EXCLUSION | RTL, // Khar
|
| + 0xA800 | LIMITED_USE, // Sylo
|
| + 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
|
| + 0x2D30 | ASPIRATIONAL, // Tfng
|
| + 0x103A0 | EXCLUSION, // Xpeo
|
| + 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
|
| + 0x1BC0 | LIMITED_USE, // Batk
|
| + 0,
|
| + 0x11005 | EXCLUSION, // Brah
|
| + 0xAA00 | LIMITED_USE, // Cham
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0x13153 | EXCLUSION, // Egyp
|
| + 0,
|
| + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
|
| + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0xA984 | LIMITED_USE | LB_LETTERS, // Java
|
| + 0xA90A | LIMITED_USE, // Kali
|
| + 0,
|
| + 0,
|
| + 0x1C00 | LIMITED_USE, // Lepc
|
| + 0,
|
| + 0x0840 | LIMITED_USE | RTL, // Mand
|
| + 0,
|
| + 0x10980 | EXCLUSION | RTL, // Mero
|
| + 0x07CA | LIMITED_USE | RTL, // Nkoo
|
| + 0x10C00 | EXCLUSION | RTL, // Orkh
|
| + 0,
|
| + 0xA840 | EXCLUSION, // Phag
|
| + 0x10900 | EXCLUSION | RTL, // Phnx
|
| + 0x16F00 | ASPIRATIONAL, // Plrd
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0xA549 | LIMITED_USE, // Vaii
|
| + 0,
|
| + 0x12000 | EXCLUSION, // Xsux
|
| + 0,
|
| + 0xFDD0 | UNKNOWN, // Zzzz
|
| + 0x102A0 | EXCLUSION, // Cari
|
| + 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
|
| + 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
|
| + 0x10280 | EXCLUSION, // Lyci
|
| + 0x10920 | EXCLUSION | RTL, // Lydi
|
| + 0x1C5A | LIMITED_USE, // Olck
|
| + 0xA930 | EXCLUSION, // Rjng
|
| + 0xA882 | LIMITED_USE, // Saur
|
| + 0,
|
| + 0x1B83 | LIMITED_USE, // Sund
|
| + 0,
|
| + 0xABC0 | LIMITED_USE, // Mtei
|
| + 0x10840 | EXCLUSION | RTL, // Armi
|
| + 0x10B00 | EXCLUSION | RTL, // Avst
|
| + 0x11103 | LIMITED_USE, // Cakm
|
| + 0xAC00 | RECOMMENDED, // Kore
|
| + 0x11083 | EXCLUSION, // Kthi
|
| + 0,
|
| + 0x10B60 | EXCLUSION | RTL, // Phli
|
| + 0,
|
| + 0,
|
| + 0x10B40 | EXCLUSION | RTL, // Prti
|
| + 0x0800 | EXCLUSION | RTL, // Samr
|
| + 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
|
| + 0,
|
| + 0,
|
| + 0xA6A0 | LIMITED_USE, // Bamu
|
| + 0xA4D0 | LIMITED_USE, // Lisu
|
| + 0,
|
| + 0x10A60 | EXCLUSION | RTL, // Sarb
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0x109A0 | EXCLUSION | RTL, // Merc
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0x11183 | EXCLUSION, // Shrd
|
| + 0x110D0 | EXCLUSION, // Sora
|
| + 0x11680 | EXCLUSION, // Takr
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + 0,
|
| + // End copy-paste from parsescriptmetadata.py
|
| +};
|
| +
|
| +int32_t getScriptProps(UScriptCode script) {
|
| + if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
|
| + return SCRIPT_PROPS[script];
|
| + } else {
|
| + return 0;
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +U_CAPI int32_t U_EXPORT2
|
| +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
|
| + if(U_FAILURE(*pErrorCode)) { return 0; }
|
| + if(capacity < 0 || (capacity > 0 && dest == NULL)) {
|
| + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| + int32_t sampleChar = getScriptProps(script) & 0x1fffff;
|
| + int32_t length;
|
| + if(sampleChar == 0) {
|
| + length = 0;
|
| + } else {
|
| + length = U16_LENGTH(sampleChar);
|
| + if(length <= capacity) {
|
| + int32_t i = 0;
|
| + U16_APPEND_UNSAFE(dest, i, sampleChar);
|
| + }
|
| + }
|
| + return u_terminateUChars(dest, capacity, length, pErrorCode);
|
| +}
|
| +
|
| +U_COMMON_API icu::UnicodeString U_EXPORT2
|
| +uscript_getSampleUnicodeString(UScriptCode script) {
|
| + icu::UnicodeString sample;
|
| + int32_t sampleChar = getScriptProps(script) & 0x1fffff;
|
| + if(sampleChar != 0) {
|
| + sample.append(sampleChar);
|
| + }
|
| + return sample;
|
| +}
|
| +
|
| +U_CAPI UScriptUsage U_EXPORT2
|
| +uscript_getUsage(UScriptCode script) {
|
| + return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uscript_isRightToLeft(UScriptCode script) {
|
| + return (getScriptProps(script) & RTL) != 0;
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uscript_breaksBetweenLetters(UScriptCode script) {
|
| + return (getScriptProps(script) & LB_LETTERS) != 0;
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uscript_isCased(UScriptCode script) {
|
| + return (getScriptProps(script) & CASED) != 0;
|
| +}
|
|
|
| Property changes on: icu51/source/common/uscript_props.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|