| Index: icu51/source/common/unistr_cnv.cpp
|
| ===================================================================
|
| --- icu51/source/common/unistr_cnv.cpp (revision 0)
|
| +++ icu51/source/common/unistr_cnv.cpp (revision 0)
|
| @@ -0,0 +1,425 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 1999-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +* file name: unistr_cnv.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:2
|
| +*
|
| +* created on: 2004aug19
|
| +* created by: Markus W. Scherer
|
| +*
|
| +* Character conversion functions moved here from unistr.cpp
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_CONVERSION
|
| +
|
| +#include "unicode/putil.h"
|
| +#include "cstring.h"
|
| +#include "cmemory.h"
|
| +#include "unicode/ustring.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/ucnv.h"
|
| +#include "ucnv_imp.h"
|
| +#include "putilimp.h"
|
| +#include "ustr_cnv.h"
|
| +#include "ustr_imp.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +//========================================
|
| +// Constructors
|
| +//========================================
|
| +
|
| +#if !U_CHARSET_IS_UTF8
|
| +
|
| +UnicodeString::UnicodeString(const char *codepageData)
|
| + : fShortLength(0),
|
| + fFlags(kShortString)
|
| +{
|
| + if(codepageData != 0) {
|
| + doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
|
| + }
|
| +}
|
| +
|
| +UnicodeString::UnicodeString(const char *codepageData,
|
| + int32_t dataLength)
|
| + : fShortLength(0),
|
| + fFlags(kShortString)
|
| +{
|
| + if(codepageData != 0) {
|
| + doCodepageCreate(codepageData, dataLength, 0);
|
| + }
|
| +}
|
| +
|
| +// else see unistr.cpp
|
| +#endif
|
| +
|
| +UnicodeString::UnicodeString(const char *codepageData,
|
| + const char *codepage)
|
| + : fShortLength(0),
|
| + fFlags(kShortString)
|
| +{
|
| + if(codepageData != 0) {
|
| + doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
|
| + }
|
| +}
|
| +
|
| +UnicodeString::UnicodeString(const char *codepageData,
|
| + int32_t dataLength,
|
| + const char *codepage)
|
| + : fShortLength(0),
|
| + fFlags(kShortString)
|
| +{
|
| + if(codepageData != 0) {
|
| + doCodepageCreate(codepageData, dataLength, codepage);
|
| + }
|
| +}
|
| +
|
| +UnicodeString::UnicodeString(const char *src, int32_t srcLength,
|
| + UConverter *cnv,
|
| + UErrorCode &errorCode)
|
| + : fShortLength(0),
|
| + fFlags(kShortString)
|
| +{
|
| + if(U_SUCCESS(errorCode)) {
|
| + // check arguments
|
| + if(src==NULL) {
|
| + // treat as an empty string, do nothing more
|
| + } else if(srcLength<-1) {
|
| + errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
| + } else {
|
| + // get input length
|
| + if(srcLength==-1) {
|
| + srcLength=(int32_t)uprv_strlen(src);
|
| + }
|
| + if(srcLength>0) {
|
| + if(cnv!=0) {
|
| + // use the provided converter
|
| + ucnv_resetToUnicode(cnv);
|
| + doCodepageCreate(src, srcLength, cnv, errorCode);
|
| + } else {
|
| + // use the default converter
|
| + cnv=u_getDefaultConverter(&errorCode);
|
| + doCodepageCreate(src, srcLength, cnv, errorCode);
|
| + u_releaseDefaultConverter(cnv);
|
| + }
|
| + }
|
| + }
|
| +
|
| + if(U_FAILURE(errorCode)) {
|
| + setToBogus();
|
| + }
|
| + }
|
| +}
|
| +
|
| +//========================================
|
| +// Codeset conversion
|
| +//========================================
|
| +
|
| +#if !U_CHARSET_IS_UTF8
|
| +
|
| +int32_t
|
| +UnicodeString::extract(int32_t start,
|
| + int32_t length,
|
| + char *target,
|
| + uint32_t dstSize) const {
|
| + return extract(start, length, target, dstSize, 0);
|
| +}
|
| +
|
| +// else see unistr.cpp
|
| +#endif
|
| +
|
| +int32_t
|
| +UnicodeString::extract(int32_t start,
|
| + int32_t length,
|
| + char *target,
|
| + uint32_t dstSize,
|
| + const char *codepage) const
|
| +{
|
| + // if the arguments are illegal, then do nothing
|
| + if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
|
| + return 0;
|
| + }
|
| +
|
| + // pin the indices to legal values
|
| + pinIndices(start, length);
|
| +
|
| + // We need to cast dstSize to int32_t for all subsequent code.
|
| + // I don't know why the API was defined with uint32_t but we are stuck with it.
|
| + // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
|
| + // as a limit in some functions, it may wrap around and yield a pointer
|
| + // that compares less-than target.
|
| + int32_t capacity;
|
| + if(dstSize < 0x7fffffff) {
|
| + // Assume that the capacity is real and a limit pointer won't wrap around.
|
| + capacity = (int32_t)dstSize;
|
| + } else {
|
| + // Pin the capacity so that a limit pointer does not wrap around.
|
| + char *targetLimit = (char *)U_MAX_PTR(target);
|
| + // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
|
| + // greater than target and does not wrap around the top of the address space.
|
| + capacity = (int32_t)(targetLimit - target);
|
| + }
|
| +
|
| + // create the converter
|
| + UConverter *converter;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| +
|
| + // just write the NUL if the string length is 0
|
| + if(length == 0) {
|
| + return u_terminateChars(target, capacity, 0, &status);
|
| + }
|
| +
|
| + // if the codepage is the default, use our cache
|
| + // if it is an empty string, then use the "invariant character" conversion
|
| + if (codepage == 0) {
|
| + const char *defaultName = ucnv_getDefaultName();
|
| + if(UCNV_FAST_IS_UTF8(defaultName)) {
|
| + return toUTF8(start, length, target, capacity);
|
| + }
|
| + converter = u_getDefaultConverter(&status);
|
| + } else if (*codepage == 0) {
|
| + // use the "invariant characters" conversion
|
| + int32_t destLength;
|
| + if(length <= capacity) {
|
| + destLength = length;
|
| + } else {
|
| + destLength = capacity;
|
| + }
|
| + u_UCharsToChars(getArrayStart() + start, target, destLength);
|
| + return u_terminateChars(target, capacity, length, &status);
|
| + } else {
|
| + converter = ucnv_open(codepage, &status);
|
| + }
|
| +
|
| + length = doExtract(start, length, target, capacity, converter, status);
|
| +
|
| + // close the converter
|
| + if (codepage == 0) {
|
| + u_releaseDefaultConverter(converter);
|
| + } else {
|
| + ucnv_close(converter);
|
| + }
|
| +
|
| + return length;
|
| +}
|
| +
|
| +int32_t
|
| +UnicodeString::extract(char *dest, int32_t destCapacity,
|
| + UConverter *cnv,
|
| + UErrorCode &errorCode) const
|
| +{
|
| + if(U_FAILURE(errorCode)) {
|
| + return 0;
|
| + }
|
| +
|
| + if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
|
| + errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + // nothing to do?
|
| + if(isEmpty()) {
|
| + return u_terminateChars(dest, destCapacity, 0, &errorCode);
|
| + }
|
| +
|
| + // get the converter
|
| + UBool isDefaultConverter;
|
| + if(cnv==0) {
|
| + isDefaultConverter=TRUE;
|
| + cnv=u_getDefaultConverter(&errorCode);
|
| + if(U_FAILURE(errorCode)) {
|
| + return 0;
|
| + }
|
| + } else {
|
| + isDefaultConverter=FALSE;
|
| + ucnv_resetFromUnicode(cnv);
|
| + }
|
| +
|
| + // convert
|
| + int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
|
| +
|
| + // release the converter
|
| + if(isDefaultConverter) {
|
| + u_releaseDefaultConverter(cnv);
|
| + }
|
| +
|
| + return len;
|
| +}
|
| +
|
| +int32_t
|
| +UnicodeString::doExtract(int32_t start, int32_t length,
|
| + char *dest, int32_t destCapacity,
|
| + UConverter *cnv,
|
| + UErrorCode &errorCode) const
|
| +{
|
| + if(U_FAILURE(errorCode)) {
|
| + if(destCapacity!=0) {
|
| + *dest=0;
|
| + }
|
| + return 0;
|
| + }
|
| +
|
| + const UChar *src=getArrayStart()+start, *srcLimit=src+length;
|
| + char *originalDest=dest;
|
| + const char *destLimit;
|
| +
|
| + if(destCapacity==0) {
|
| + destLimit=dest=0;
|
| + } else if(destCapacity==-1) {
|
| + // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
|
| + destLimit=(char*)U_MAX_PTR(dest);
|
| + // for NUL-termination, translate into highest int32_t
|
| + destCapacity=0x7fffffff;
|
| + } else {
|
| + destLimit=dest+destCapacity;
|
| + }
|
| +
|
| + // perform the conversion
|
| + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
|
| + length=(int32_t)(dest-originalDest);
|
| +
|
| + // if an overflow occurs, then get the preflighting length
|
| + if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
| + char buffer[1024];
|
| +
|
| + destLimit=buffer+sizeof(buffer);
|
| + do {
|
| + dest=buffer;
|
| + errorCode=U_ZERO_ERROR;
|
| + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
|
| + length+=(int32_t)(dest-buffer);
|
| + } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
|
| + }
|
| +
|
| + return u_terminateChars(originalDest, destCapacity, length, &errorCode);
|
| +}
|
| +
|
| +void
|
| +UnicodeString::doCodepageCreate(const char *codepageData,
|
| + int32_t dataLength,
|
| + const char *codepage)
|
| +{
|
| + // if there's nothing to convert, do nothing
|
| + if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
|
| + return;
|
| + }
|
| + if(dataLength == -1) {
|
| + dataLength = (int32_t)uprv_strlen(codepageData);
|
| + }
|
| +
|
| + UErrorCode status = U_ZERO_ERROR;
|
| +
|
| + // create the converter
|
| + // if the codepage is the default, use our cache
|
| + // if it is an empty string, then use the "invariant character" conversion
|
| + UConverter *converter;
|
| + if (codepage == 0) {
|
| + const char *defaultName = ucnv_getDefaultName();
|
| + if(UCNV_FAST_IS_UTF8(defaultName)) {
|
| + setToUTF8(StringPiece(codepageData, dataLength));
|
| + return;
|
| + }
|
| + converter = u_getDefaultConverter(&status);
|
| + } else if(*codepage == 0) {
|
| + // use the "invariant characters" conversion
|
| + if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
|
| + u_charsToUChars(codepageData, getArrayStart(), dataLength);
|
| + setLength(dataLength);
|
| + } else {
|
| + setToBogus();
|
| + }
|
| + return;
|
| + } else {
|
| + converter = ucnv_open(codepage, &status);
|
| + }
|
| +
|
| + // if we failed, set the appropriate flags and return
|
| + if(U_FAILURE(status)) {
|
| + setToBogus();
|
| + return;
|
| + }
|
| +
|
| + // perform the conversion
|
| + doCodepageCreate(codepageData, dataLength, converter, status);
|
| + if(U_FAILURE(status)) {
|
| + setToBogus();
|
| + }
|
| +
|
| + // close the converter
|
| + if(codepage == 0) {
|
| + u_releaseDefaultConverter(converter);
|
| + } else {
|
| + ucnv_close(converter);
|
| + }
|
| +}
|
| +
|
| +void
|
| +UnicodeString::doCodepageCreate(const char *codepageData,
|
| + int32_t dataLength,
|
| + UConverter *converter,
|
| + UErrorCode &status)
|
| +{
|
| + if(U_FAILURE(status)) {
|
| + return;
|
| + }
|
| +
|
| + // set up the conversion parameters
|
| + const char *mySource = codepageData;
|
| + const char *mySourceEnd = mySource + dataLength;
|
| + UChar *array, *myTarget;
|
| +
|
| + // estimate the size needed:
|
| + int32_t arraySize;
|
| + if(dataLength <= US_STACKBUF_SIZE) {
|
| + // try to use the stack buffer
|
| + arraySize = US_STACKBUF_SIZE;
|
| + } else {
|
| + // 1.25 UChar's per source byte should cover most cases
|
| + arraySize = dataLength + (dataLength >> 2);
|
| + }
|
| +
|
| + // we do not care about the current contents
|
| + UBool doCopyArray = FALSE;
|
| + for(;;) {
|
| + if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
|
| + setToBogus();
|
| + break;
|
| + }
|
| +
|
| + // perform the conversion
|
| + array = getArrayStart();
|
| + myTarget = array + length();
|
| + ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
|
| + &mySource, mySourceEnd, 0, TRUE, &status);
|
| +
|
| + // update the conversion parameters
|
| + setLength((int32_t)(myTarget - array));
|
| +
|
| + // allocate more space and copy data, if needed
|
| + if(status == U_BUFFER_OVERFLOW_ERROR) {
|
| + // reset the error code
|
| + status = U_ZERO_ERROR;
|
| +
|
| + // keep the previous conversion results
|
| + doCopyArray = TRUE;
|
| +
|
| + // estimate the new size needed, larger than before
|
| + // try 2 UChar's per remaining source byte
|
| + arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
|
| + } else {
|
| + break;
|
| + }
|
| + }
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif
|
|
|
| Property changes on: icu51/source/common/unistr_cnv.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|