icu51/source/common/ucase.h - Issue 20882002: Check in the pristine copy of ICU 51.2

Unified Diff: icu51/source/common/ucase.h

Issue 20882002: Check in the pristine copy of ICU 51.2 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu51/source/common/ucase.h

===================================================================

--- icu51/source/common/ucase.h (revision 0)

+++ icu51/source/common/ucase.h (revision 0)

@@ -0,0 +1,409 @@

+/*

+*******************************************************************************

+* file name: ucase.h

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2004aug30

+* created by: Markus W. Scherer

+* Low-level Unicode character/string case mapping code.

+*/

+#ifndef __UCASE_H__

+#define __UCASE_H__

+#include "unicode/utypes.h"

+#include "unicode/uset.h"

+#include "putilimp.h"

+#include "uset_imp.h"

+#include "udataswp.h"

+#ifdef __cplusplus

+U_NAMESPACE_BEGIN

+class UnicodeString;

+U_NAMESPACE_END

+#endif

+/* library API -------------------------------------------------------------- */

+U_CDECL_BEGIN

+struct UCaseProps;

+typedef struct UCaseProps UCaseProps;

+U_CDECL_END

+U_CAPI const UCaseProps * U_EXPORT2

+ucase_getSingleton(void);

+U_CFUNC void U_EXPORT2

+ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode);

+/**

+ * Requires non-NULL locale ID but otherwise does the equivalent of

+ * checking for language codes as if uloc_getLanguage() were called:

+ * Accepts both 2- and 3-letter codes and accepts case variants.

+ */

+U_CFUNC int32_t

+ucase_getCaseLocale(const char *locale, int32_t *locCache);

+/* Casing locale types for ucase_getCaseLocale */

+enum {

+ UCASE_LOC_UNKNOWN,

+ UCASE_LOC_ROOT,

+ UCASE_LOC_TURKISH,

+ UCASE_LOC_LITHUANIAN,

+ UCASE_LOC_DUTCH

+};

+/**

+ * Bit mask for getting just the options from a string compare options word

+ * that are relevant for case-insensitive string comparison.

+ * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.

+ * @internal

+ */

+#define _STRCASECMP_OPTIONS_MASK 0xffff

+/**

+ * Bit mask for getting just the options from a string compare options word

+ * that are relevant for case folding (of a single string or code point).

+ * See uchar.h.

+ * @internal

+ */

+#define _FOLD_CASE_OPTIONS_MASK 0xff

+/* single-code point functions */

+U_CAPI UChar32 U_EXPORT2

+ucase_tolower(const UCaseProps *csp, UChar32 c);

+U_CAPI UChar32 U_EXPORT2

+ucase_toupper(const UCaseProps *csp, UChar32 c);

+U_CAPI UChar32 U_EXPORT2

+ucase_totitle(const UCaseProps *csp, UChar32 c);

+U_CAPI UChar32 U_EXPORT2

+ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options);

+/**

+ * Adds all simple case mappings and the full case folding for c to sa,

+ * and also adds special case closure mappings.

+ * c itself is not added.

+ * For example, the mappings

+ * - for s include long s

+ * - for sharp s include ss

+ * - for k include the Kelvin sign

+ */

+U_CFUNC void U_EXPORT2

+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);

+/**

+ * Maps the string to single code points and adds the associated case closure

+ * mappings.

+ * The string is mapped to code points if it is their full case folding string.

+ * In other words, this performs a reverse full case folding and then

+ * adds the case closure items of the resulting code points.

+ * If the string is found and its closure applied, then

+ * the string itself is added as well as part of its code points' closure.

+ * It must be length>=0.

+ *

+ * @return TRUE if the string was found

+ */

+U_CFUNC UBool U_EXPORT2

+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);

+#ifdef __cplusplus

+U_NAMESPACE_BEGIN

+/**

+ * Iterator over characters with more than one code point in the full default Case_Folding.

+ */

+class U_COMMON_API FullCaseFoldingIterator {

+public:

+ /** Constructor. */

+ FullCaseFoldingIterator();

+ /**

+ * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.

+ * Returns a negative cp value at the end of the iteration.

+ */

+ UChar32 next(UnicodeString &full);

+private:

+ FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy

+ FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment

+ const UChar *unfold;

+ int32_t unfoldRows;

+ int32_t unfoldRowWidth;

+ int32_t unfoldStringWidth;

+ int32_t currentRow;

+ int32_t rowCpIndex;

+};

+U_NAMESPACE_END

+#endif

+/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */

+U_CAPI int32_t U_EXPORT2

+ucase_getType(const UCaseProps *csp, UChar32 c);

+/** @return same as ucase_getType(), or <0 if c is case-ignorable */

+U_CAPI int32_t U_EXPORT2

+ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c);

+U_CAPI UBool U_EXPORT2

+ucase_isSoftDotted(const UCaseProps *csp, UChar32 c);

+U_CAPI UBool U_EXPORT2

+ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c);

+/* string case mapping functions */

+U_CDECL_BEGIN

+/**

+ * Iterator function for string case mappings, which need to look at the

+ * context (surrounding text) of a given character for conditional mappings.

+ *

+ * The iterator only needs to go backward or forward away from the

+ * character in question. It does not use any indexes on this interface.

+ * It does not support random access or an arbitrary change of

+ * iteration direction.

+ *

+ * The code point being case-mapped itself is never returned by

+ * this iterator.

+ *

+ * @param context A pointer to the iterator's working data.

+ * @param dir If <0 then start iterating backward from the character;

+ * if >0 then start iterating forward from the character;

+ * if 0 then continue iterating in the current direction.

+ * @return Next code point, or <0 when the iteration is done.

+ */

+typedef UChar32 U_CALLCONV

+UCaseContextIterator(void *context, int8_t dir);

+/**

+ * Sample struct which may be used by some implementations of

+ * UCaseContextIterator.

+ */

+struct UCaseContext {

+ void *p;

+ int32_t start, index, limit;

+ int32_t cpStart, cpLimit;

+ int8_t dir;

+ int8_t b1, b2, b3;

+};

+typedef struct UCaseContext UCaseContext;

+U_CDECL_END

+#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }

+enum {

+ /**

+ * For string case mappings, a single character (a code point) is mapped

+ * either to itself (in which case in-place mapping functions do nothing),

+ * or to another single code point, or to a string.

+ * Aside from the string contents, these are indicated with a single int32_t

+ * value as follows:

+ *

+ * Mapping to self: Negative values (~self instead of -self to support U+0000)

+ *

+ * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH

+ *

+ * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is

+ * returned. Note that the string result may indeed have zero length.

+ */

+ UCASE_MAX_STRING_LENGTH=0x1f

+};

+/**

+ * Get the full lowercase mapping for c.

+ *

+ * @param csp Case mapping properties.

+ * @param c Character to be mapped.

+ * @param iter Character iterator, used for context-sensitive mappings.

+ * See UCaseContextIterator for details.

+ * If iter==NULL then a context-independent result is returned.

+ * @param context Pointer to be passed into iter.

+ * @param pString If the mapping result is a string, then the pointer is

+ * written to *pString.

+ * @param locale Locale ID for locale-dependent mappings.

+ * @param locCache Initialize to 0; may be used to cache the result of parsing

+ * the locale ID for subsequent calls.

+ * Can be NULL.

+ * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.

+ *

+ * @see UCaseContextIterator

+ * @see UCASE_MAX_STRING_LENGTH

+ * @internal

+ */

+U_CAPI int32_t U_EXPORT2

+ucase_toFullLower(const UCaseProps *csp, UChar32 c,

+ UCaseContextIterator *iter, void *context,

+ const UChar **pString,

+ const char *locale, int32_t *locCache);

+U_CAPI int32_t U_EXPORT2

+ucase_toFullUpper(const UCaseProps *csp, UChar32 c,

+ UCaseContextIterator *iter, void *context,

+ const UChar **pString,

+ const char *locale, int32_t *locCache);

+U_CAPI int32_t U_EXPORT2

+ucase_toFullTitle(const UCaseProps *csp, UChar32 c,

+ UCaseContextIterator *iter, void *context,

+ const UChar **pString,

+ const char *locale, int32_t *locCache);

+U_CAPI int32_t U_EXPORT2

+ucase_toFullFolding(const UCaseProps *csp, UChar32 c,

+ const UChar **pString,

+ uint32_t options);

+U_CFUNC int32_t U_EXPORT2

+ucase_hasBinaryProperty(UChar32 c, UProperty which);

+U_CDECL_BEGIN

+/**

+ * @internal

+ */

+typedef int32_t U_CALLCONV

+UCaseMapFull(const UCaseProps *csp, UChar32 c,

+ UCaseContextIterator *iter, void *context,

+ const UChar **pString,

+ const char *locale, int32_t *locCache);

+U_CDECL_END

+/* file definitions --------------------------------------------------------- */

+#define UCASE_DATA_NAME "ucase"

+#define UCASE_DATA_TYPE "icu"

+/* format "cAsE" */

+#define UCASE_FMT_0 0x63

+#define UCASE_FMT_1 0x41

+#define UCASE_FMT_2 0x53

+#define UCASE_FMT_3 0x45

+/* indexes into indexes[] */

+enum {

+ UCASE_IX_INDEX_TOP,

+ UCASE_IX_LENGTH,

+ UCASE_IX_TRIE_SIZE,

+ UCASE_IX_EXC_LENGTH,

+ UCASE_IX_UNFOLD_LENGTH,

+ UCASE_IX_MAX_FULL_LENGTH=15,

+ UCASE_IX_TOP=16

+};

+/* definitions for 16-bit case properties word ------------------------------ */

+/* 2-bit constants for types of cased characters */

+#define UCASE_TYPE_MASK 3

+enum {

+ UCASE_NONE,

+ UCASE_LOWER,

+ UCASE_UPPER,

+ UCASE_TITLE

+};

+#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)

+#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)

+#define UCASE_IGNORABLE 4

+#define UCASE_SENSITIVE 8

+#define UCASE_EXCEPTION 0x10

+#define UCASE_DOT_MASK 0x60

+enum {

+ UCASE_NO_DOT=0, /* normal characters with cc=0 */

+ UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */

+ UCASE_ABOVE=0x40, /* "above" accents with cc=230 */

+ UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */

+};

+/* no exception: bits 15..7 are a 9-bit signed case mapping delta */

+#define UCASE_DELTA_SHIFT 7

+#define UCASE_DELTA_MASK 0xff80

+#define UCASE_MAX_DELTA 0xff

+#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1)

+#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC

+# define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)

+#else

+# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))

+#endif

+/* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */

+#define UCASE_EXC_SHIFT 5

+#define UCASE_EXC_MASK 0xffe0

+#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)

+/* definitions for 16-bit main exceptions word ------------------------------ */

+/* first 8 bits indicate values in optional slots */

+enum {

+ UCASE_EXC_LOWER,

+ UCASE_EXC_FOLD,

+ UCASE_EXC_UPPER,

+ UCASE_EXC_TITLE,

+ UCASE_EXC_4, /* reserved */

+ UCASE_EXC_5, /* reserved */

+ UCASE_EXC_CLOSURE,

+ UCASE_EXC_FULL_MAPPINGS,

+ UCASE_EXC_ALL_SLOTS /* one past the last slot */

+};

+/* each slot is 2 uint16_t instead of 1 */

+#define UCASE_EXC_DOUBLE_SLOTS 0x100

+/* reserved: exception bits 11..9 */

+/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */

+#define UCASE_EXC_DOT_SHIFT 7

+/* normally stored in the main word, but pushed out for larger exception indexes */

+#define UCASE_EXC_DOT_MASK 0x3000

+enum {

+ UCASE_EXC_NO_DOT=0,

+ UCASE_EXC_SOFT_DOTTED=0x1000,

+ UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */

+ UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */

+};

+/* complex/conditional mappings */

+#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000

+#define UCASE_EXC_CONDITIONAL_FOLD 0x8000

+/* definitions for lengths word for full case mappings */

+#define UCASE_FULL_LOWER 0xf

+#define UCASE_FULL_FOLDING 0xf0

+#define UCASE_FULL_UPPER 0xf00

+#define UCASE_FULL_TITLE 0xf000

+/* maximum lengths */

+#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)

+#define UCASE_CLOSURE_MAX_LENGTH 0xf

+/* constants for reverse case folding ("unfold") data */

+enum {

+ UCASE_UNFOLD_ROWS,

+ UCASE_UNFOLD_ROW_WIDTH,

+ UCASE_UNFOLD_STRING_WIDTH

+};

+#endif

Property changes on: icu51/source/common/ucase.h

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu51/source/common/ubrkimpl.h ('k') | icu51/source/common/ucase.cpp » ('j') | no next file with comments »