| Index: icu51/source/common/filterednormalizer2.cpp
|
| ===================================================================
|
| --- icu51/source/common/filterednormalizer2.cpp (revision 0)
|
| +++ icu51/source/common/filterednormalizer2.cpp (revision 0)
|
| @@ -0,0 +1,288 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 2009-2012, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +* file name: filterednormalizer2.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2009dec10
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_NORMALIZATION
|
| +
|
| +#include "unicode/normalizer2.h"
|
| +#include "unicode/uniset.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/unorm.h"
|
| +#include "cpputils.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +FilteredNormalizer2::~FilteredNormalizer2() {}
|
| +
|
| +UnicodeString &
|
| +FilteredNormalizer2::normalize(const UnicodeString &src,
|
| + UnicodeString &dest,
|
| + UErrorCode &errorCode) const {
|
| + uprv_checkCanGetBuffer(src, errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + dest.setToBogus();
|
| + return dest;
|
| + }
|
| + if(&dest==&src) {
|
| + errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
| + return dest;
|
| + }
|
| + dest.remove();
|
| + return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
|
| +}
|
| +
|
| +// Internal: No argument checking, and appends to dest.
|
| +// Pass as input spanCondition the one that is likely to yield a non-zero
|
| +// span length at the start of src.
|
| +// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
|
| +// USET_SPAN_SIMPLE should be passed in for the start of src
|
| +// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
|
| +// an in-filter prefix.
|
| +UnicodeString &
|
| +FilteredNormalizer2::normalize(const UnicodeString &src,
|
| + UnicodeString &dest,
|
| + USetSpanCondition spanCondition,
|
| + UErrorCode &errorCode) const {
|
| + UnicodeString tempDest; // Don't throw away destination buffer between iterations.
|
| + for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
|
| + int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
|
| + int32_t spanLength=spanLimit-prevSpanLimit;
|
| + if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
| + if(spanLength!=0) {
|
| + dest.append(src, prevSpanLimit, spanLength);
|
| + }
|
| + spanCondition=USET_SPAN_SIMPLE;
|
| + } else {
|
| + if(spanLength!=0) {
|
| + // Not norm2.normalizeSecondAndAppend() because we do not want
|
| + // to modify the non-filter part of dest.
|
| + dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
|
| + tempDest, errorCode));
|
| + if(U_FAILURE(errorCode)) {
|
| + break;
|
| + }
|
| + }
|
| + spanCondition=USET_SPAN_NOT_CONTAINED;
|
| + }
|
| + prevSpanLimit=spanLimit;
|
| + }
|
| + return dest;
|
| +}
|
| +
|
| +UnicodeString &
|
| +FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
|
| + const UnicodeString &second,
|
| + UErrorCode &errorCode) const {
|
| + return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
| +}
|
| +
|
| +UnicodeString &
|
| +FilteredNormalizer2::append(UnicodeString &first,
|
| + const UnicodeString &second,
|
| + UErrorCode &errorCode) const {
|
| + return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
| +}
|
| +
|
| +UnicodeString &
|
| +FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
|
| + const UnicodeString &second,
|
| + UBool doNormalize,
|
| + UErrorCode &errorCode) const {
|
| + uprv_checkCanGetBuffer(first, errorCode);
|
| + uprv_checkCanGetBuffer(second, errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + return first;
|
| + }
|
| + if(&first==&second) {
|
| + errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
| + return first;
|
| + }
|
| + if(first.isEmpty()) {
|
| + if(doNormalize) {
|
| + return normalize(second, first, errorCode);
|
| + } else {
|
| + return first=second;
|
| + }
|
| + }
|
| + // merge the in-filter suffix of the first string with the in-filter prefix of the second
|
| + int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
|
| + if(prefixLimit!=0) {
|
| + UnicodeString prefix(second.tempSubString(0, prefixLimit));
|
| + int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
|
| + if(suffixStart==0) {
|
| + if(doNormalize) {
|
| + norm2.normalizeSecondAndAppend(first, prefix, errorCode);
|
| + } else {
|
| + norm2.append(first, prefix, errorCode);
|
| + }
|
| + } else {
|
| + UnicodeString middle(first, suffixStart, INT32_MAX);
|
| + if(doNormalize) {
|
| + norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
|
| + } else {
|
| + norm2.append(middle, prefix, errorCode);
|
| + }
|
| + first.replace(suffixStart, INT32_MAX, middle);
|
| + }
|
| + }
|
| + if(prefixLimit<second.length()) {
|
| + UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
|
| + if(doNormalize) {
|
| + normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
|
| + } else {
|
| + first.append(rest);
|
| + }
|
| + }
|
| + return first;
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
| + return set.contains(c) && norm2.getDecomposition(c, decomposition);
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
| + return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
|
| +}
|
| +
|
| +UChar32
|
| +FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
|
| + return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
|
| +}
|
| +
|
| +uint8_t
|
| +FilteredNormalizer2::getCombiningClass(UChar32 c) const {
|
| + return set.contains(c) ? norm2.getCombiningClass(c) : 0;
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
| + uprv_checkCanGetBuffer(s, errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + return FALSE;
|
| + }
|
| + USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
| + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
| + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
| + if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
| + spanCondition=USET_SPAN_SIMPLE;
|
| + } else {
|
| + if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
|
| + U_FAILURE(errorCode)
|
| + ) {
|
| + return FALSE;
|
| + }
|
| + spanCondition=USET_SPAN_NOT_CONTAINED;
|
| + }
|
| + prevSpanLimit=spanLimit;
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +UNormalizationCheckResult
|
| +FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
| + uprv_checkCanGetBuffer(s, errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + return UNORM_MAYBE;
|
| + }
|
| + UNormalizationCheckResult result=UNORM_YES;
|
| + USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
| + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
| + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
| + if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
| + spanCondition=USET_SPAN_SIMPLE;
|
| + } else {
|
| + UNormalizationCheckResult qcResult=
|
| + norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
|
| + if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
|
| + return qcResult;
|
| + } else if(qcResult==UNORM_MAYBE) {
|
| + result=qcResult;
|
| + }
|
| + spanCondition=USET_SPAN_NOT_CONTAINED;
|
| + }
|
| + prevSpanLimit=spanLimit;
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +int32_t
|
| +FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
| + uprv_checkCanGetBuffer(s, errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + return 0;
|
| + }
|
| + USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
| + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
| + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
|
| + if(spanCondition==USET_SPAN_NOT_CONTAINED) {
|
| + spanCondition=USET_SPAN_SIMPLE;
|
| + } else {
|
| + int32_t yesLimit=
|
| + prevSpanLimit+
|
| + norm2.spanQuickCheckYes(
|
| + s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
|
| + if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
|
| + return yesLimit;
|
| + }
|
| + spanCondition=USET_SPAN_NOT_CONTAINED;
|
| + }
|
| + prevSpanLimit=spanLimit;
|
| + }
|
| + return s.length();
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
|
| + return !set.contains(c) || norm2.hasBoundaryBefore(c);
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
|
| + return !set.contains(c) || norm2.hasBoundaryAfter(c);
|
| +}
|
| +
|
| +UBool
|
| +FilteredNormalizer2::isInert(UChar32 c) const {
|
| + return !set.contains(c) || norm2.isInert(c);
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +// C API ------------------------------------------------------------------- ***
|
| +
|
| +U_NAMESPACE_USE
|
| +
|
| +U_CAPI UNormalizer2 * U_EXPORT2
|
| +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
|
| + if(U_FAILURE(*pErrorCode)) {
|
| + return NULL;
|
| + }
|
| + if(filterSet==NULL) {
|
| + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
| + return NULL;
|
| + }
|
| + Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
|
| + *UnicodeSet::fromUSet(filterSet));
|
| + if(fn2==NULL) {
|
| + *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
| + }
|
| + return (UNormalizer2 *)fn2;
|
| +}
|
| +
|
| +#endif // !UCONFIG_NO_NORMALIZATION
|
|
|
| Property changes on: icu51/source/common/filterednormalizer2.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|