icu51/source/common/filterednormalizer2.cpp - Issue 20882002: Check in the pristine copy of ICU 51.2

Unified Diff: icu51/source/common/filterednormalizer2.cpp

Issue 20882002: Check in the pristine copy of ICU 51.2 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu51/source/common/filterednormalizer2.cpp

===================================================================

--- icu51/source/common/filterednormalizer2.cpp (revision 0)

+++ icu51/source/common/filterednormalizer2.cpp (revision 0)

@@ -0,0 +1,288 @@

+/*

+*******************************************************************************

+* file name: filterednormalizer2.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2009dec10

+* created by: Markus W. Scherer

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_NORMALIZATION

+#include "unicode/normalizer2.h"

+#include "unicode/uniset.h"

+#include "unicode/unistr.h"

+#include "unicode/unorm.h"

+#include "cpputils.h"

+U_NAMESPACE_BEGIN

+FilteredNormalizer2::~FilteredNormalizer2() {}

+UnicodeString &

+FilteredNormalizer2::normalize(const UnicodeString &src,

+ UnicodeString &dest,

+ UErrorCode &errorCode) const {

+ uprv_checkCanGetBuffer(src, errorCode);

+ if(U_FAILURE(errorCode)) {

+ dest.setToBogus();

+ return dest;

+ }

+ if(&dest==&src) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return dest;

+ }

+ dest.remove();

+ return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);

+// Internal: No argument checking, and appends to dest.

+// Pass as input spanCondition the one that is likely to yield a non-zero

+// span length at the start of src.

+// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,

+// USET_SPAN_SIMPLE should be passed in for the start of src

+// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after

+// an in-filter prefix.

+UnicodeString &

+FilteredNormalizer2::normalize(const UnicodeString &src,

+ UnicodeString &dest,

+ USetSpanCondition spanCondition,

+ UErrorCode &errorCode) const {

+ UnicodeString tempDest; // Don't throw away destination buffer between iterations.

+ for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {

+ int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);

+ int32_t spanLength=spanLimit-prevSpanLimit;

+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {

+ if(spanLength!=0) {

+ dest.append(src, prevSpanLimit, spanLength);

+ }

+ spanCondition=USET_SPAN_SIMPLE;

+ } else {

+ if(spanLength!=0) {

+ // Not norm2.normalizeSecondAndAppend() because we do not want

+ // to modify the non-filter part of dest.

+ dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),

+ tempDest, errorCode));

+ if(U_FAILURE(errorCode)) {

+ break;

+ }

+ spanCondition=USET_SPAN_NOT_CONTAINED;

+ }

+ prevSpanLimit=spanLimit;

+ }

+ return dest;

+UnicodeString &

+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

+ const UnicodeString &second,

+ UErrorCode &errorCode) const {

+ return normalizeSecondAndAppend(first, second, TRUE, errorCode);

+UnicodeString &

+FilteredNormalizer2::append(UnicodeString &first,

+ const UnicodeString &second,

+ UErrorCode &errorCode) const {

+ return normalizeSecondAndAppend(first, second, FALSE, errorCode);

+UnicodeString &

+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

+ const UnicodeString &second,

+ UBool doNormalize,

+ UErrorCode &errorCode) const {

+ uprv_checkCanGetBuffer(first, errorCode);

+ uprv_checkCanGetBuffer(second, errorCode);

+ if(U_FAILURE(errorCode)) {

+ return first;

+ }

+ if(&first==&second) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return first;

+ }

+ if(first.isEmpty()) {

+ if(doNormalize) {

+ return normalize(second, first, errorCode);

+ } else {

+ return first=second;

+ }

+ // merge the in-filter suffix of the first string with the in-filter prefix of the second

+ int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);

+ if(prefixLimit!=0) {

+ UnicodeString prefix(second.tempSubString(0, prefixLimit));

+ int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);

+ if(suffixStart==0) {

+ if(doNormalize) {

+ norm2.normalizeSecondAndAppend(first, prefix, errorCode);

+ } else {

+ norm2.append(first, prefix, errorCode);

+ }

+ } else {

+ UnicodeString middle(first, suffixStart, INT32_MAX);

+ if(doNormalize) {

+ norm2.normalizeSecondAndAppend(middle, prefix, errorCode);

+ } else {

+ norm2.append(middle, prefix, errorCode);

+ }

+ first.replace(suffixStart, INT32_MAX, middle);

+ }

+ if(prefixLimit<second.length()) {

+ UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));

+ if(doNormalize) {

+ normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);

+ } else {

+ first.append(rest);

+ }

+ return first;

+UBool

+FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {

+ return set.contains(c) && norm2.getDecomposition(c, decomposition);

+UBool

+FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {

+ return set.contains(c) && norm2.getRawDecomposition(c, decomposition);

+UChar32

+FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {

+ return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;

+uint8_t

+FilteredNormalizer2::getCombiningClass(UChar32 c) const {

+ return set.contains(c) ? norm2.getCombiningClass(c) : 0;

+UBool

+FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {

+ uprv_checkCanGetBuffer(s, errorCode);

+ if(U_FAILURE(errorCode)) {

+ return FALSE;

+ }

+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {

+ spanCondition=USET_SPAN_SIMPLE;

+ } else {

+ if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||

+ U_FAILURE(errorCode)

+ ) {

+ return FALSE;

+ }

+ spanCondition=USET_SPAN_NOT_CONTAINED;

+ }

+ prevSpanLimit=spanLimit;

+ }

+ return TRUE;

+UNormalizationCheckResult

+FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {

+ uprv_checkCanGetBuffer(s, errorCode);

+ if(U_FAILURE(errorCode)) {

+ return UNORM_MAYBE;

+ }

+ UNormalizationCheckResult result=UNORM_YES;

+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {

+ spanCondition=USET_SPAN_SIMPLE;

+ } else {

+ UNormalizationCheckResult qcResult=

+ norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);

+ if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {

+ return qcResult;

+ } else if(qcResult==UNORM_MAYBE) {

+ result=qcResult;

+ }

+ spanCondition=USET_SPAN_NOT_CONTAINED;

+ }

+ prevSpanLimit=spanLimit;

+ }

+ return result;

+int32_t

+FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {

+ uprv_checkCanGetBuffer(s, errorCode);

+ if(U_FAILURE(errorCode)) {

+ return 0;

+ }

+ USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

+ for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

+ int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

+ if(spanCondition==USET_SPAN_NOT_CONTAINED) {

+ spanCondition=USET_SPAN_SIMPLE;

+ } else {

+ int32_t yesLimit=

+ prevSpanLimit+

+ norm2.spanQuickCheckYes(

+ s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);

+ if(U_FAILURE(errorCode) || yesLimit<spanLimit) {

+ return yesLimit;

+ }

+ spanCondition=USET_SPAN_NOT_CONTAINED;

+ }

+ prevSpanLimit=spanLimit;

+ }

+ return s.length();

+UBool

+FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {

+ return !set.contains(c) || norm2.hasBoundaryBefore(c);

+UBool

+FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {

+ return !set.contains(c) || norm2.hasBoundaryAfter(c);

+UBool

+FilteredNormalizer2::isInert(UChar32 c) const {

+ return !set.contains(c) || norm2.isInert(c);

+U_NAMESPACE_END

+// C API ------------------------------------------------------------------- ***

+U_NAMESPACE_USE

+U_CAPI UNormalizer2 * U_EXPORT2

+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {

+ if(U_FAILURE(*pErrorCode)) {

+ return NULL;

+ }

+ if(filterSet==NULL) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,

+ *UnicodeSet::fromUSet(filterSet));

+ if(fn2==NULL) {

+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

+ }

+ return (UNormalizer2 *)fn2;

+#endif // !UCONFIG_NO_NORMALIZATION

Property changes on: icu51/source/common/filterednormalizer2.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu51/source/common/errorcode.cpp ('k') | icu51/source/common/hash.h » ('j') | no next file with comments »