icu51/source/common/ucnv.c - Issue 20882002: Check in the pristine copy of ICU 51.2

Unified Diff: icu51/source/common/ucnv.c

Issue 20882002: Check in the pristine copy of ICU 51.2 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu51/source/common/ucnv.c

===================================================================

--- icu51/source/common/ucnv.c (revision 0)

+++ icu51/source/common/ucnv.c (revision 0)

@@ -0,0 +1,2918 @@

+/*

+******************************************************************************

+* ucnv.c:

+* Implements APIs for the ICU's codeset conversion library;

+* mostly calls through internal functions;

+* created by Bertrand A. Damiba

+* Modification History:

+* Date Name Description

+* 04/04/99 helena Fixed internal header inclusion.

+* 05/09/00 helena Added implementation to handle fallback mappings.

+* 06/20/2000 helena OS/400 port changes; mostly typecast.

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_CONVERSION

+#include "unicode/ustring.h"

+#include "unicode/ucnv.h"

+#include "unicode/ucnv_err.h"

+#include "unicode/uset.h"

+#include "unicode/utf.h"

+#include "unicode/utf16.h"

+#include "putilimp.h"

+#include "cmemory.h"

+#include "cstring.h"

+#include "uassert.h"

+#include "utracimp.h"

+#include "ustr_imp.h"

+#include "ucnv_imp.h"

+#include "ucnv_cnv.h"

+#include "ucnv_bld.h"

+/* size of intermediate and preflighting buffers in ucnv_convert() */

+#define CHUNK_SIZE 1024

+typedef struct UAmbiguousConverter {

+ const char *name;

+ const UChar variant5c;

+} UAmbiguousConverter;

+static const UAmbiguousConverter ambiguousConverters[]={

+ { "ibm-897_P100-1995", 0xa5 },

+ { "ibm-942_P120-1999", 0xa5 },

+ { "ibm-943_P130-1999", 0xa5 },

+ { "ibm-946_P100-1995", 0xa5 },

+ { "ibm-33722_P120-1999", 0xa5 },

+ { "ibm-1041_P100-1995", 0xa5 },

+ /*{ "ibm-54191_P100-2006", 0xa5 },*/

+ /*{ "ibm-62383_P100-2007", 0xa5 },*/

+ /*{ "ibm-891_P100-1995", 0x20a9 },*/

+ { "ibm-944_P100-1995", 0x20a9 },

+ { "ibm-949_P110-1999", 0x20a9 },

+ { "ibm-1363_P110-1997", 0x20a9 },

+ { "ISO_2022,locale=ko,version=0", 0x20a9 },

+ { "ibm-1088_P100-1995", 0x20a9 }

+};

+/*Calls through createConverter */

+U_CAPI UConverter* U_EXPORT2

+ucnv_open (const char *name,

+ UErrorCode * err)

+ UConverter *r;

+ if (err == NULL || U_FAILURE (*err)) {

+ return NULL;

+ }

+ r = ucnv_createConverter(NULL, name, err);

+ return r;

+U_CAPI UConverter* U_EXPORT2

+ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)

+ return ucnv_createConverterFromPackage(packageName, converterName, err);

+/*Extracts the UChar* to a char* and calls through createConverter */

+U_CAPI UConverter* U_EXPORT2

+ucnv_openU (const UChar * name,

+ UErrorCode * err)

+ char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];

+ if (err == NULL || U_FAILURE(*err))

+ return NULL;

+ if (name == NULL)

+ return ucnv_open (NULL, err);

+ if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)

+ {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ return ucnv_open(u_austrcpy(asciiName, name), err);

+/* Copy the string that is represented by the UConverterPlatform enum

+ * @param platformString An output buffer

+ * @param platform An enum representing a platform

+ * @return the length of the copied string.

+ */

+static int32_t

+ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)

+ switch (pltfrm)

+ {

+ case UCNV_IBM:

+ uprv_strcpy(platformString, "ibm-");

+ return 4;

+ case UCNV_UNKNOWN:

+ break;

+ }

+ /* default to empty string */

+ *platformString = 0;

+ return 0;

+/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls

+ *through createConverter*/

+U_CAPI UConverter* U_EXPORT2

+ucnv_openCCSID (int32_t codepage,

+ UConverterPlatform platform,

+ UErrorCode * err)

+ char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];

+ int32_t myNameLen;

+ if (err == NULL || U_FAILURE (*err))

+ return NULL;

+ /* ucnv_copyPlatformString could return "ibm-" or "cp" */

+ myNameLen = ucnv_copyPlatformString(myName, platform);

+ T_CString_integerToString(myName + myNameLen, codepage, 10);

+ return ucnv_createConverter(NULL, myName, err);

+/* Creating a temporary stack-based object that can be used in one thread,

+and created from a converter that is shared across threads.

+*/

+U_CAPI UConverter* U_EXPORT2

+ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)

+ UConverter *localConverter, *allocatedConverter;

+ int32_t bufferSizeNeeded;

+ char *stackBufferChars = (char *)stackBuffer;

+ UErrorCode cbErr;

+ UConverterToUnicodeArgs toUArgs = {

+ sizeof(UConverterToUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ UConverterFromUnicodeArgs fromUArgs = {

+ sizeof(UConverterFromUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);

+ if (status == NULL || U_FAILURE(*status)){

+ UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);

+ return 0;

+ }

+ if (!pBufferSize || !cnv){

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ UTRACE_EXIT_STATUS(*status);

+ return 0;

+ }

+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",

+ ucnv_getName(cnv, status), cnv, stackBuffer);

+ if (cnv->sharedData->impl->safeClone != NULL) {

+ /* call the custom safeClone function for sizing */

+ bufferSizeNeeded = 0;

+ cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);

+ }

+ else

+ {

+ /* inherent sizing */

+ bufferSizeNeeded = sizeof(UConverter);

+ }

+ if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */

+ *pBufferSize = bufferSizeNeeded;

+ UTRACE_EXIT_VALUE(bufferSizeNeeded);

+ return 0;

+ }

+ /* Pointers on 64-bit platforms need to be aligned

+ * on a 64-bit boundary in memory.

+ */

+ if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {

+ int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);

+ if(*pBufferSize > offsetUp) {

+ *pBufferSize -= offsetUp;

+ stackBufferChars += offsetUp;

+ } else {

+ /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */

+ *pBufferSize = 1;

+ }

+ stackBuffer = (void *)stackBufferChars;

+ /* Now, see if we must allocate any memory */

+ if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)

+ {

+ /* allocate one here...*/

+ localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);

+ if(localConverter == NULL) {

+ *status = U_MEMORY_ALLOCATION_ERROR;

+ UTRACE_EXIT_STATUS(*status);

+ return NULL;

+ }

+ if (U_SUCCESS(*status)) {

+ *status = U_SAFECLONE_ALLOCATED_WARNING;

+ }

+ /* record the fact that memory was allocated */

+ *pBufferSize = bufferSizeNeeded;

+ } else {

+ /* just use the stack buffer */

+ localConverter = (UConverter*) stackBuffer;

+ allocatedConverter = NULL;

+ }

+ uprv_memset(localConverter, 0, bufferSizeNeeded);

+ /* Copy initial state */

+ uprv_memcpy(localConverter, cnv, sizeof(UConverter));

+ localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;

+ /* copy the substitution string */

+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {

+ localConverter->subChars = (uint8_t *)localConverter->subUChars;

+ } else {

+ localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);

+ if (localConverter->subChars == NULL) {

+ uprv_free(allocatedConverter);

+ UTRACE_EXIT_STATUS(*status);

+ return NULL;

+ }

+ uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);

+ }

+ /* now either call the safeclone fcn or not */

+ if (cnv->sharedData->impl->safeClone != NULL) {

+ /* call the custom safeClone function */

+ localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);

+ }

+ if(localConverter==NULL || U_FAILURE(*status)) {

+ if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {

+ uprv_free(allocatedConverter->subChars);

+ }

+ uprv_free(allocatedConverter);

+ UTRACE_EXIT_STATUS(*status);

+ return NULL;

+ }

+ /* increment refcount of shared data if needed */

+ /*

+ Checking whether it's an algorithic converter is okay

+ in multithreaded applications because the value never changes.

+ Don't check referenceCounter for any other value.

+ */

+ if (cnv->sharedData->referenceCounter != ~0) {

+ ucnv_incrementRefCount(cnv->sharedData);

+ }

+ if(localConverter == (UConverter*)stackBuffer) {

+ /* we're using user provided data - set to not destroy */

+ localConverter->isCopyLocal = TRUE;

+ }

+ /* allow callback functions to handle any memory allocation */

+ toUArgs.converter = fromUArgs.converter = localConverter;

+ cbErr = U_ZERO_ERROR;

+ cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);

+ cbErr = U_ZERO_ERROR;

+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);

+ UTRACE_EXIT_PTR_STATUS(localConverter, *status);

+ return localConverter;

+/*Decreases the reference counter in the shared immutable section of the object

+ *and frees the mutable part*/

+U_CAPI void U_EXPORT2

+ucnv_close (UConverter * converter)

+ UErrorCode errorCode = U_ZERO_ERROR;

+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);

+ if (converter == NULL)

+ {

+ UTRACE_EXIT();

+ return;

+ }

+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",

+ ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);

+ /* In order to speed up the close, only call the callbacks when they have been changed.

+ This performance check will only work when the callbacks are set within a shared library

+ or from user code that statically links this code. */

+ /* first, notify the callback functions that the converter is closed */

+ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {

+ UConverterToUnicodeArgs toUArgs = {

+ sizeof(UConverterToUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ toUArgs.converter = converter;

+ errorCode = U_ZERO_ERROR;

+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);

+ }

+ if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {

+ UConverterFromUnicodeArgs fromUArgs = {

+ sizeof(UConverterFromUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ fromUArgs.converter = converter;

+ errorCode = U_ZERO_ERROR;

+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);

+ }

+ if (converter->sharedData->impl->close != NULL) {

+ converter->sharedData->impl->close(converter);

+ }

+ if (converter->subChars != (uint8_t *)converter->subUChars) {

+ uprv_free(converter->subChars);

+ }

+ /*

+ Checking whether it's an algorithic converter is okay

+ in multithreaded applications because the value never changes.

+ Don't check referenceCounter for any other value.

+ */

+ if (converter->sharedData->referenceCounter != ~0) {

+ ucnv_unloadSharedDataIfReady(converter->sharedData);

+ }

+ if(!converter->isCopyLocal){

+ uprv_free(converter);

+ }

+ UTRACE_EXIT();

+/*returns a single Name from the list, will return NULL if out of bounds

+ */

+U_CAPI const char* U_EXPORT2

+ucnv_getAvailableName (int32_t n)

+ if (0 <= n && n <= 0xffff) {

+ UErrorCode err = U_ZERO_ERROR;

+ const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);

+ if (U_SUCCESS(err)) {

+ return name;

+ }

+ return NULL;

+U_CAPI int32_t U_EXPORT2

+ucnv_countAvailable ()

+ UErrorCode err = U_ZERO_ERROR;

+ return ucnv_bld_countAvailableConverters(&err);

+U_CAPI void U_EXPORT2

+ucnv_getSubstChars (const UConverter * converter,

+ char *mySubChar,

+ int8_t * len,

+ UErrorCode * err)

+ if (U_FAILURE (*err))

+ return;

+ if (converter->subCharLen <= 0) {

+ /* Unicode string or empty string from ucnv_setSubstString(). */

+ *len = 0;

+ return;

+ }

+ if (*len < converter->subCharLen) /*not enough space in subChars */

+ {

+ *err = U_INDEX_OUTOFBOUNDS_ERROR;

+ return;

+ }

+ uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */

+ *len = converter->subCharLen; /*store # of bytes copied to buffer */

+U_CAPI void U_EXPORT2

+ucnv_setSubstChars (UConverter * converter,

+ const char *mySubChar,

+ int8_t len,

+ UErrorCode * err)

+ if (U_FAILURE (*err))

+ return;

+ /*Makes sure that the subChar is within the codepages char length boundaries */

+ if ((len > converter->sharedData->staticData->maxBytesPerChar)

+ || (len < converter->sharedData->staticData->minBytesPerChar))

+ {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */

+ converter->subCharLen = len; /*sets the new len */

+ /*

+ * There is currently (2001Feb) no separate API to set/get subChar1.

+ * In order to always have subChar written after it is explicitly set,

+ * we set subChar1 to 0.

+ */

+ converter->subChar1 = 0;

+ return;

+U_CAPI void U_EXPORT2

+ucnv_setSubstString(UConverter *cnv,

+ const UChar *s,

+ int32_t length,

+ UErrorCode *err) {

+ UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];

+ char chars[UCNV_ERROR_BUFFER_LENGTH];

+ UConverter *clone;

+ uint8_t *subChars;

+ int32_t cloneSize, length8;

+ /* Let the following functions check all arguments. */

+ cloneSize = sizeof(cloneBuffer);

+ clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);

+ ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);

+ length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);

+ ucnv_close(clone);

+ if (U_FAILURE(*err)) {

+ return;

+ }

+ if (cnv->sharedData->impl->writeSub == NULL

+#if !UCONFIG_NO_LEGACY_CONVERSION

+ || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&

+ ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)

+#endif

+ ) {

+ /* The converter is not stateful. Store the charset bytes as a fixed string. */

+ subChars = (uint8_t *)chars;

+ } else {

+ /*

+ * The converter has a non-default writeSub() function, indicating

+ * that it is stateful.

+ * Store the Unicode string for on-the-fly conversion for correct

+ * state handling.

+ */

+ if (length > UCNV_ERROR_BUFFER_LENGTH) {

+ /*

+ * Should not occur. The converter should output at least one byte

+ * per UChar, which means that ucnv_fromUChars() should catch all

+ * overflows.

+ */

+ *err = U_BUFFER_OVERFLOW_ERROR;

+ return;

+ }

+ subChars = (uint8_t *)s;

+ if (length < 0) {

+ length = u_strlen(s);

+ }

+ length8 = length * U_SIZEOF_UCHAR;

+ }

+ /*

+ * For storing the substitution string, select either the small buffer inside

+ * UConverter or allocate a subChars buffer.

+ */

+ if (length8 > UCNV_MAX_SUBCHAR_LEN) {

+ /* Use a separate buffer for the string. Outside UConverter to not make it too large. */

+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {

+ /* Allocate a new buffer for the string. */

+ cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);

+ if (cnv->subChars == NULL) {

+ cnv->subChars = (uint8_t *)cnv->subUChars;

+ *err = U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);

+ }

+ /* Copy the substitution string into the UConverter or its subChars buffer. */

+ if (length8 == 0) {

+ cnv->subCharLen = 0;

+ } else {

+ uprv_memcpy(cnv->subChars, subChars, length8);

+ if (subChars == (uint8_t *)chars) {

+ cnv->subCharLen = (int8_t)length8;

+ } else /* subChars == s */ {

+ cnv->subCharLen = (int8_t)-length;

+ }

+ /* See comment in ucnv_setSubstChars(). */

+ cnv->subChar1 = 0;

+/*resets the internal states of a converter

+ *goal : have the same behaviour than a freshly created converter

+ */

+static void _reset(UConverter *converter, UConverterResetChoice choice,

+ UBool callCallback) {

+ if(converter == NULL) {

+ return;

+ }

+ if(callCallback) {

+ /* first, notify the callback functions that the converter is reset */

+ UErrorCode errorCode;

+ if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {

+ UConverterToUnicodeArgs toUArgs = {

+ sizeof(UConverterToUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ toUArgs.converter = converter;

+ errorCode = U_ZERO_ERROR;

+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);

+ }

+ if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {

+ UConverterFromUnicodeArgs fromUArgs = {

+ sizeof(UConverterFromUnicodeArgs),

+ TRUE,

+ NULL,

+ NULL

+ };

+ fromUArgs.converter = converter;

+ errorCode = U_ZERO_ERROR;

+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);

+ }

+ /* now reset the converter itself */

+ if(choice<=UCNV_RESET_TO_UNICODE) {

+ converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;

+ converter->mode = 0;

+ converter->toULength = 0;

+ converter->invalidCharLength = converter->UCharErrorBufferLength = 0;

+ converter->preToULength = 0;

+ }

+ if(choice!=UCNV_RESET_TO_UNICODE) {

+ converter->fromUnicodeStatus = 0;

+ converter->fromUChar32 = 0;

+ converter->invalidUCharLength = converter->charErrorBufferLength = 0;

+ converter->preFromUFirstCP = U_SENTINEL;

+ converter->preFromULength = 0;

+ }

+ if (converter->sharedData->impl->reset != NULL) {

+ /* call the custom reset function */

+ converter->sharedData->impl->reset(converter, choice);

+ }

+U_CAPI void U_EXPORT2

+ucnv_reset(UConverter *converter)

+ _reset(converter, UCNV_RESET_BOTH, TRUE);

+U_CAPI void U_EXPORT2

+ucnv_resetToUnicode(UConverter *converter)

+ _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);

+U_CAPI void U_EXPORT2

+ucnv_resetFromUnicode(UConverter *converter)

+ _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);

+U_CAPI int8_t U_EXPORT2

+ucnv_getMaxCharSize (const UConverter * converter)

+ return converter->maxBytesPerUChar;

+U_CAPI int8_t U_EXPORT2

+ucnv_getMinCharSize (const UConverter * converter)

+ return converter->sharedData->staticData->minBytesPerChar;

+U_CAPI const char* U_EXPORT2

+ucnv_getName (const UConverter * converter, UErrorCode * err)

+ if (U_FAILURE (*err))

+ return NULL;

+ if(converter->sharedData->impl->getName){

+ const char* temp= converter->sharedData->impl->getName(converter);

+ if(temp)

+ return temp;

+ }

+ return converter->sharedData->staticData->name;

+U_CAPI int32_t U_EXPORT2

+ucnv_getCCSID(const UConverter * converter,

+ UErrorCode * err)

+ int32_t ccsid;

+ if (U_FAILURE (*err))

+ return -1;

+ ccsid = converter->sharedData->staticData->codepage;

+ if (ccsid == 0) {

+ /* Rare case. This is for cases like gb18030,

+ which doesn't have an IBM canonical name, but does have an IBM alias. */

+ const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);

+ if (U_SUCCESS(*err) && standardName) {

+ const char *ccsidStr = uprv_strchr(standardName, '-');

+ if (ccsidStr) {

+ ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */

+ }

+ return ccsid;

+U_CAPI UConverterPlatform U_EXPORT2

+ucnv_getPlatform (const UConverter * converter,

+ UErrorCode * err)

+ if (U_FAILURE (*err))

+ return UCNV_UNKNOWN;

+ return (UConverterPlatform)converter->sharedData->staticData->platform;

+U_CAPI void U_EXPORT2

+ ucnv_getToUCallBack (const UConverter * converter,

+ UConverterToUCallback *action,

+ const void **context)

+ *action = converter->fromCharErrorBehaviour;

+ *context = converter->toUContext;

+U_CAPI void U_EXPORT2

+ ucnv_getFromUCallBack (const UConverter * converter,

+ UConverterFromUCallback *action,

+ const void **context)

+ *action = converter->fromUCharErrorBehaviour;

+ *context = converter->fromUContext;

+U_CAPI void U_EXPORT2

+ucnv_setToUCallBack (UConverter * converter,

+ UConverterToUCallback newAction,

+ const void* newContext,

+ UConverterToUCallback *oldAction,

+ const void** oldContext,

+ UErrorCode * err)

+ if (U_FAILURE (*err))

+ return;

+ if (oldAction) *oldAction = converter->fromCharErrorBehaviour;

+ converter->fromCharErrorBehaviour = newAction;

+ if (oldContext) *oldContext = converter->toUContext;

+ converter->toUContext = newContext;

+U_CAPI void U_EXPORT2

+ucnv_setFromUCallBack (UConverter * converter,

+ UConverterFromUCallback newAction,

+ const void* newContext,

+ UConverterFromUCallback *oldAction,

+ const void** oldContext,

+ UErrorCode * err)

+ if (U_FAILURE (*err))

+ return;

+ if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;

+ converter->fromUCharErrorBehaviour = newAction;

+ if (oldContext) *oldContext = converter->fromUContext;

+ converter->fromUContext = newContext;

+static void

+_updateOffsets(int32_t *offsets, int32_t length,

+ int32_t sourceIndex, int32_t errorInputLength) {

+ int32_t *limit;

+ int32_t delta, offset;

+ if(sourceIndex>=0) {

+ /*

+ * adjust each offset by adding the previous sourceIndex

+ * minus the length of the input sequence that caused an

+ * error, if any

+ */

+ delta=sourceIndex-errorInputLength;

+ } else {

+ /*

+ * set each offset to -1 because this conversion function

+ * does not handle offsets

+ */

+ delta=-1;

+ }

+ limit=offsets+length;

+ if(delta==0) {

+ /* most common case, nothing to do */

+ } else if(delta>0) {

+ /* add the delta to each offset (but not if the offset is <0) */

+ while(offsets<limit) {

+ offset=*offsets;

+ if(offset>=0) {

+ *offsets=offset+delta;

+ }

+ ++offsets;

+ }

+ } else /* delta<0 */ {

+ /*

+ * set each offset to -1 because this conversion function

+ * does not handle offsets

+ * or the error input sequence started in a previous buffer

+ */

+ while(offsets<limit) {

+ *offsets++=-1;

+ }

+/* ucnv_fromUnicode --------------------------------------------------------- */

+/*

+ * Implementation note for m:n conversions

+ *

+ * While collecting source units to find the longest match for m:n conversion,

+ * some source units may need to be stored for a partial match.

+ * When a second buffer does not yield a match on all of the previously stored

+ * source units, then they must be "replayed", i.e., fed back into the converter.

+ *

+ * The code relies on the fact that replaying will not nest -

+ * converting a replay buffer will not result in a replay.

+ * This is because a replay is necessary only after the _continuation_ of a

+ * partial match failed, but a replay buffer is converted as a whole.

+ * It may result in some of its units being stored again for a partial match,

+ * but there will not be a continuation _during_ the replay which could fail.

+ *

+ * It is conceivable that a callback function could call the converter

+ * recursively in a way that causes another replay to be stored, but that

+ * would be an error in the callback function.

+ * Such violations will cause assertion failures in a debug build,

+ * and wrong output, but they will not cause a crash.

+ */

+static void

+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {

+ UConverterFromUnicode fromUnicode;

+ UConverter *cnv;

+ const UChar *s;

+ char *t;

+ int32_t *offsets;

+ int32_t sourceIndex;

+ int32_t errorInputLength;

+ UBool converterSawEndOfInput, calledCallback;

+ /* variables for m:n conversion */

+ UChar replay[UCNV_EXT_MAX_UCHARS];

+ const UChar *realSource, *realSourceLimit;

+ int32_t realSourceIndex;

+ UBool realFlush;

+ cnv=pArgs->converter;

+ s=pArgs->source;

+ t=pArgs->target;

+ offsets=pArgs->offsets;

+ /* get the converter implementation function */

+ sourceIndex=0;

+ if(offsets==NULL) {

+ fromUnicode=cnv->sharedData->impl->fromUnicode;

+ } else {

+ fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;

+ if(fromUnicode==NULL) {

+ /* there is no WithOffsets implementation */

+ fromUnicode=cnv->sharedData->impl->fromUnicode;

+ /* we will write -1 for each offset */

+ sourceIndex=-1;

+ }

+ if(cnv->preFromULength>=0) {

+ /* normal mode */

+ realSource=NULL;

+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */

+ realSourceLimit=NULL;

+ realFlush=FALSE;

+ realSourceIndex=0;

+ } else {

+ /*

+ * Previous m:n conversion stored source units from a partial match

+ * and failed to consume all of them.

+ * We need to "replay" them from a temporary buffer and convert them first.

+ */

+ realSource=pArgs->source;

+ realSourceLimit=pArgs->sourceLimit;

+ realFlush=pArgs->flush;

+ realSourceIndex=sourceIndex;

+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);

+ pArgs->source=replay;

+ pArgs->sourceLimit=replay-cnv->preFromULength;

+ pArgs->flush=FALSE;

+ sourceIndex=-1;

+ cnv->preFromULength=0;

+ }

+ /*

+ * loop for conversion and error handling

+ *

+ * loop {

+ * convert

+ * loop {

+ * update offsets

+ * handle end of input

+ * handle errors/call callback

+ * }

+ */

+ for(;;) {

+ if(U_SUCCESS(*err)) {

+ /* convert */

+ fromUnicode(pArgs, err);

+ /*

+ * set a flag for whether the converter

+ * successfully processed the end of the input

+ *

+ * need not check cnv->preFromULength==0 because a replay (<0) will cause

+ * s<sourceLimit before converterSawEndOfInput is checked

+ */

+ converterSawEndOfInput=

+ (UBool)(U_SUCCESS(*err) &&

+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&

+ cnv->fromUChar32==0);

+ } else {

+ /* handle error from ucnv_convertEx() */

+ converterSawEndOfInput=FALSE;

+ }

+ /* no callback called yet for this iteration */

+ calledCallback=FALSE;

+ /* no sourceIndex adjustment for conversion, only for callback output */

+ errorInputLength=0;

+ /*

+ * loop for offsets and error handling

+ *

+ * iterates at most 3 times:

+ * 1. to clean up after the conversion function

+ * 2. after the callback

+ * 3. after the callback again if there was truncated input

+ */

+ for(;;) {

+ /* update offsets if we write any */

+ if(offsets!=NULL) {

+ int32_t length=(int32_t)(pArgs->target-t);

+ if(length>0) {

+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);

+ /*

+ * if a converter handles offsets and updates the offsets

+ * pointer at the end, then pArgs->offset should not change

+ * here;

+ * however, some converters do not handle offsets at all

+ * (sourceIndex<0) or may not update the offsets pointer

+ */

+ pArgs->offsets=offsets+=length;

+ }

+ if(sourceIndex>=0) {

+ sourceIndex+=(int32_t)(pArgs->source-s);

+ }

+ if(cnv->preFromULength<0) {

+ /*

+ * switch the source to new replay units (cannot occur while replaying)

+ * after offset handling and before end-of-input and callback handling

+ */

+ if(realSource==NULL) {

+ realSource=pArgs->source;

+ realSourceLimit=pArgs->sourceLimit;

+ realFlush=pArgs->flush;

+ realSourceIndex=sourceIndex;

+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);

+ pArgs->source=replay;

+ pArgs->sourceLimit=replay-cnv->preFromULength;

+ pArgs->flush=FALSE;

+ if((sourceIndex+=cnv->preFromULength)<0) {

+ sourceIndex=-1;

+ }

+ cnv->preFromULength=0;

+ } else {

+ /* see implementation note before _fromUnicodeWithCallback() */

+ U_ASSERT(realSource==NULL);

+ *err=U_INTERNAL_PROGRAM_ERROR;

+ }

+ /* update pointers */

+ s=pArgs->source;

+ t=pArgs->target;

+ if(U_SUCCESS(*err)) {

+ if(s<pArgs->sourceLimit) {

+ /*

+ * continue with the conversion loop while there is still input left

+ * (continue converting by breaking out of only the inner loop)

+ */

+ break;

+ } else if(realSource!=NULL) {

+ /* switch back from replaying to the real source and continue */

+ pArgs->source=realSource;

+ pArgs->sourceLimit=realSourceLimit;

+ pArgs->flush=realFlush;

+ sourceIndex=realSourceIndex;

+ realSource=NULL;

+ break;

+ } else if(pArgs->flush && cnv->fromUChar32!=0) {

+ /*

+ * the entire input stream is consumed

+ * and there is a partial, truncated input sequence left

+ */

+ /* inject an error and continue with callback handling */

+ *err=U_TRUNCATED_CHAR_FOUND;

+ calledCallback=FALSE; /* new error condition */

+ } else {

+ /* input consumed */

+ if(pArgs->flush) {

+ /*

+ * return to the conversion loop once more if the flush

+ * flag is set and the conversion function has not

+ * successfully processed the end of the input yet

+ *

+ * (continue converting by breaking out of only the inner loop)

+ */

+ if(!converterSawEndOfInput) {

+ break;

+ }

+ /* reset the converter without calling the callback function */

+ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);

+ }

+ /* done successfully */

+ return;

+ }

+ /* U_FAILURE(*err) */

+ {

+ UErrorCode e;

+ if( calledCallback ||

+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||

+ (e!=U_INVALID_CHAR_FOUND &&

+ e!=U_ILLEGAL_CHAR_FOUND &&

+ e!=U_TRUNCATED_CHAR_FOUND)

+ ) {

+ /*

+ * the callback did not or cannot resolve the error:

+ * set output pointers and return

+ *

+ * the check for buffer overflow is redundant but it is

+ * a high-runner case and hopefully documents the intent

+ * well

+ *

+ * if we were replaying, then the replay buffer must be

+ * copied back into the UConverter

+ * and the real arguments must be restored

+ */

+ if(realSource!=NULL) {

+ int32_t length;

+ U_ASSERT(cnv->preFromULength==0);

+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);

+ if(length>0) {

+ uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);

+ cnv->preFromULength=(int8_t)-length;

+ }

+ pArgs->source=realSource;

+ pArgs->sourceLimit=realSourceLimit;

+ pArgs->flush=realFlush;

+ }

+ return;

+ }

+ /* callback handling */

+ {

+ UChar32 codePoint;

+ /* get and write the code point */

+ codePoint=cnv->fromUChar32;

+ errorInputLength=0;

+ U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);

+ cnv->invalidUCharLength=(int8_t)errorInputLength;

+ /* set the converter state to deal with the next character */

+ cnv->fromUChar32=0;

+ /* call the callback function */

+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,

+ cnv->invalidUCharBuffer, errorInputLength, codePoint,

+ *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,

+ err);

+ }

+ /*

+ * loop back to the offset handling

+ *

+ * this flag will indicate after offset handling

+ * that a callback was called;

+ * if the callback did not resolve the error, then we return

+ */

+ calledCallback=TRUE;

+ }

+/*

+ * Output the fromUnicode overflow buffer.

+ * Call this function if(cnv->charErrorBufferLength>0).

+ * @return TRUE if overflow

+ */

+static UBool

+ucnv_outputOverflowFromUnicode(UConverter *cnv,

+ char **target, const char *targetLimit,

+ int32_t **pOffsets,

+ UErrorCode *err) {

+ int32_t *offsets;

+ char *overflow, *t;

+ int32_t i, length;

+ t=*target;

+ if(pOffsets!=NULL) {

+ offsets=*pOffsets;

+ } else {

+ offsets=NULL;

+ }

+ overflow=(char *)cnv->charErrorBuffer;

+ length=cnv->charErrorBufferLength;

+ i=0;

+ while(i<length) {

+ if(t==targetLimit) {

+ /* the overflow buffer contains too much, keep the rest */

+ int32_t j=0;

+ do {

+ overflow[j++]=overflow[i++];

+ } while(i<length);

+ cnv->charErrorBufferLength=(int8_t)j;

+ *target=t;

+ if(offsets!=NULL) {

+ *pOffsets=offsets;

+ }

+ *err=U_BUFFER_OVERFLOW_ERROR;

+ return TRUE;

+ }

+ /* copy the overflow contents to the target */

+ *t++=overflow[i++];

+ if(offsets!=NULL) {

+ *offsets++=-1; /* no source index available for old output */

+ }

+ /* the overflow buffer is completely copied to the target */

+ cnv->charErrorBufferLength=0;

+ *target=t;

+ if(offsets!=NULL) {

+ *pOffsets=offsets;

+ }

+ return FALSE;

+U_CAPI void U_EXPORT2

+ucnv_fromUnicode(UConverter *cnv,

+ char **target, const char *targetLimit,

+ const UChar **source, const UChar *sourceLimit,

+ int32_t *offsets,

+ UBool flush,

+ UErrorCode *err) {

+ UConverterFromUnicodeArgs args;

+ const UChar *s;

+ char *t;

+ /* check parameters */

+ if(err==NULL || U_FAILURE(*err)) {

+ return;

+ }

+ if(cnv==NULL || target==NULL || source==NULL) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ s=*source;

+ t=*target;

+ if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {

+ /*

+ Prevent code from going into an infinite loop in case we do hit this

+ limit. The limit pointer is expected to be on a UChar * boundary.

+ This also prevents the next argument check from failing.

+ */

+ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);

+ }

+ /*

+ * All these conditions should never happen.

+ *

+ * 1) Make sure that the limits are >= to the address source or target

+ *

+ * 2) Make sure that the buffer sizes do not exceed the number range for

+ * int32_t because some functions use the size (in units or bytes)

+ * rather than comparing pointers, and because offsets are int32_t values.

+ *

+ * size_t is guaranteed to be unsigned and large enough for the job.

+ *

+ * Return with an error instead of adjusting the limits because we would

+ * not be able to maintain the semantics that either the source must be

+ * consumed or the target filled (unless an error occurs).

+ * An adjustment would be targetLimit=t+0x7fffffff; for example.

+ *

+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer

+ * to a char * pointer and provide an incomplete UChar code unit.

+ */

+ if (sourceLimit<s || targetLimit<t ||

+ ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||

+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||

+ (((const char *)sourceLimit-(const char *)s) & 1) != 0)

+ {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ /* output the target overflow buffer */

+ if( cnv->charErrorBufferLength>0 &&

+ ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)

+ ) {

+ /* U_BUFFER_OVERFLOW_ERROR */

+ return;

+ }

+ /* *target may have moved, therefore stop using t */

+ if(!flush && s==sourceLimit && cnv->preFromULength>=0) {

+ /* the overflow buffer is emptied and there is no new input: we are done */

+ return;

+ }

+ /*

+ * Do not simply return with a buffer overflow error if

+ * !flush && t==targetLimit

+ * because it is possible that the source will not generate any output.

+ * For example, the skip callback may be called;

+ * it does not output anything.

+ */

+ /* prepare the converter arguments */

+ args.converter=cnv;

+ args.flush=flush;

+ args.offsets=offsets;

+ args.source=s;

+ args.sourceLimit=sourceLimit;

+ args.target=*target;

+ args.targetLimit=targetLimit;

+ args.size=sizeof(args);

+ _fromUnicodeWithCallback(&args, err);

+ *source=args.source;

+ *target=args.target;

+/* ucnv_toUnicode() --------------------------------------------------------- */

+static void

+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {

+ UConverterToUnicode toUnicode;

+ UConverter *cnv;

+ const char *s;

+ UChar *t;

+ int32_t *offsets;

+ int32_t sourceIndex;

+ int32_t errorInputLength;

+ UBool converterSawEndOfInput, calledCallback;

+ /* variables for m:n conversion */

+ char replay[UCNV_EXT_MAX_BYTES];

+ const char *realSource, *realSourceLimit;

+ int32_t realSourceIndex;

+ UBool realFlush;

+ cnv=pArgs->converter;

+ s=pArgs->source;

+ t=pArgs->target;

+ offsets=pArgs->offsets;

+ /* get the converter implementation function */

+ sourceIndex=0;

+ if(offsets==NULL) {

+ toUnicode=cnv->sharedData->impl->toUnicode;

+ } else {

+ toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;

+ if(toUnicode==NULL) {

+ /* there is no WithOffsets implementation */

+ toUnicode=cnv->sharedData->impl->toUnicode;

+ /* we will write -1 for each offset */

+ sourceIndex=-1;

+ }

+ if(cnv->preToULength>=0) {

+ /* normal mode */

+ realSource=NULL;

+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */

+ realSourceLimit=NULL;

+ realFlush=FALSE;

+ realSourceIndex=0;

+ } else {

+ /*

+ * Previous m:n conversion stored source units from a partial match

+ * and failed to consume all of them.

+ * We need to "replay" them from a temporary buffer and convert them first.

+ */

+ realSource=pArgs->source;

+ realSourceLimit=pArgs->sourceLimit;

+ realFlush=pArgs->flush;

+ realSourceIndex=sourceIndex;

+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);

+ pArgs->source=replay;

+ pArgs->sourceLimit=replay-cnv->preToULength;

+ pArgs->flush=FALSE;

+ sourceIndex=-1;

+ cnv->preToULength=0;

+ }

+ /*

+ * loop for conversion and error handling

+ *

+ * loop {

+ * convert

+ * loop {

+ * update offsets

+ * handle end of input

+ * handle errors/call callback

+ * }

+ */

+ for(;;) {

+ if(U_SUCCESS(*err)) {

+ /* convert */

+ toUnicode(pArgs, err);

+ /*

+ * set a flag for whether the converter

+ * successfully processed the end of the input

+ *

+ * need not check cnv->preToULength==0 because a replay (<0) will cause

+ * s<sourceLimit before converterSawEndOfInput is checked

+ */

+ converterSawEndOfInput=

+ (UBool)(U_SUCCESS(*err) &&

+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&

+ cnv->toULength==0);

+ } else {

+ /* handle error from getNextUChar() or ucnv_convertEx() */

+ converterSawEndOfInput=FALSE;

+ }

+ /* no callback called yet for this iteration */

+ calledCallback=FALSE;

+ /* no sourceIndex adjustment for conversion, only for callback output */

+ errorInputLength=0;

+ /*

+ * loop for offsets and error handling

+ *

+ * iterates at most 3 times:

+ * 1. to clean up after the conversion function

+ * 2. after the callback

+ * 3. after the callback again if there was truncated input

+ */

+ for(;;) {

+ /* update offsets if we write any */

+ if(offsets!=NULL) {

+ int32_t length=(int32_t)(pArgs->target-t);

+ if(length>0) {

+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);

+ /*

+ * if a converter handles offsets and updates the offsets

+ * pointer at the end, then pArgs->offset should not change

+ * here;

+ * however, some converters do not handle offsets at all

+ * (sourceIndex<0) or may not update the offsets pointer

+ */

+ pArgs->offsets=offsets+=length;

+ }

+ if(sourceIndex>=0) {

+ sourceIndex+=(int32_t)(pArgs->source-s);

+ }

+ if(cnv->preToULength<0) {

+ /*

+ * switch the source to new replay units (cannot occur while replaying)

+ * after offset handling and before end-of-input and callback handling

+ */

+ if(realSource==NULL) {

+ realSource=pArgs->source;

+ realSourceLimit=pArgs->sourceLimit;

+ realFlush=pArgs->flush;

+ realSourceIndex=sourceIndex;

+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);

+ pArgs->source=replay;

+ pArgs->sourceLimit=replay-cnv->preToULength;

+ pArgs->flush=FALSE;

+ if((sourceIndex+=cnv->preToULength)<0) {

+ sourceIndex=-1;

+ }

+ cnv->preToULength=0;

+ } else {

+ /* see implementation note before _fromUnicodeWithCallback() */

+ U_ASSERT(realSource==NULL);

+ *err=U_INTERNAL_PROGRAM_ERROR;

+ }

+ /* update pointers */

+ s=pArgs->source;

+ t=pArgs->target;

+ if(U_SUCCESS(*err)) {

+ if(s<pArgs->sourceLimit) {

+ /*

+ * continue with the conversion loop while there is still input left

+ * (continue converting by breaking out of only the inner loop)

+ */

+ break;

+ } else if(realSource!=NULL) {

+ /* switch back from replaying to the real source and continue */

+ pArgs->source=realSource;

+ pArgs->sourceLimit=realSourceLimit;

+ pArgs->flush=realFlush;

+ sourceIndex=realSourceIndex;

+ realSource=NULL;

+ break;

+ } else if(pArgs->flush && cnv->toULength>0) {

+ /*

+ * the entire input stream is consumed

+ * and there is a partial, truncated input sequence left

+ */

+ /* inject an error and continue with callback handling */

+ *err=U_TRUNCATED_CHAR_FOUND;

+ calledCallback=FALSE; /* new error condition */

+ } else {

+ /* input consumed */

+ if(pArgs->flush) {

+ /*

+ * return to the conversion loop once more if the flush

+ * flag is set and the conversion function has not

+ * successfully processed the end of the input yet

+ *

+ * (continue converting by breaking out of only the inner loop)

+ */

+ if(!converterSawEndOfInput) {

+ break;

+ }

+ /* reset the converter without calling the callback function */

+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);

+ }

+ /* done successfully */

+ return;

+ }

+ /* U_FAILURE(*err) */

+ {

+ UErrorCode e;

+ if( calledCallback ||

+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||

+ (e!=U_INVALID_CHAR_FOUND &&

+ e!=U_ILLEGAL_CHAR_FOUND &&

+ e!=U_TRUNCATED_CHAR_FOUND &&

+ e!=U_ILLEGAL_ESCAPE_SEQUENCE &&

+ e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)

+ ) {

+ /*

+ * the callback did not or cannot resolve the error:

+ * set output pointers and return

+ *

+ * the check for buffer overflow is redundant but it is

+ * a high-runner case and hopefully documents the intent

+ * well

+ *

+ * if we were replaying, then the replay buffer must be

+ * copied back into the UConverter

+ * and the real arguments must be restored

+ */

+ if(realSource!=NULL) {

+ int32_t length;

+ U_ASSERT(cnv->preToULength==0);

+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);

+ if(length>0) {

+ uprv_memcpy(cnv->preToU, pArgs->source, length);

+ cnv->preToULength=(int8_t)-length;

+ }

+ pArgs->source=realSource;

+ pArgs->sourceLimit=realSourceLimit;

+ pArgs->flush=realFlush;

+ }

+ return;

+ }

+ /* copy toUBytes[] to invalidCharBuffer[] */

+ errorInputLength=cnv->invalidCharLength=cnv->toULength;

+ if(errorInputLength>0) {

+ uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);

+ }

+ /* set the converter state to deal with the next character */

+ cnv->toULength=0;

+ /* call the callback function */

+ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {

+ cnv->toUCallbackReason = UCNV_UNASSIGNED;

+ }

+ cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,

+ cnv->invalidCharBuffer, errorInputLength,

+ cnv->toUCallbackReason,

+ err);

+ cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */

+ /*

+ * loop back to the offset handling

+ *

+ * this flag will indicate after offset handling

+ * that a callback was called;

+ * if the callback did not resolve the error, then we return

+ */

+ calledCallback=TRUE;

+ }

+/*

+ * Output the toUnicode overflow buffer.

+ * Call this function if(cnv->UCharErrorBufferLength>0).

+ * @return TRUE if overflow

+ */

+static UBool

+ucnv_outputOverflowToUnicode(UConverter *cnv,

+ UChar **target, const UChar *targetLimit,

+ int32_t **pOffsets,

+ UErrorCode *err) {

+ int32_t *offsets;

+ UChar *overflow, *t;

+ int32_t i, length;

+ t=*target;

+ if(pOffsets!=NULL) {

+ offsets=*pOffsets;

+ } else {

+ offsets=NULL;

+ }

+ overflow=cnv->UCharErrorBuffer;

+ length=cnv->UCharErrorBufferLength;

+ i=0;

+ while(i<length) {

+ if(t==targetLimit) {

+ /* the overflow buffer contains too much, keep the rest */

+ int32_t j=0;

+ do {

+ overflow[j++]=overflow[i++];

+ } while(i<length);

+ cnv->UCharErrorBufferLength=(int8_t)j;

+ *target=t;

+ if(offsets!=NULL) {

+ *pOffsets=offsets;

+ }

+ *err=U_BUFFER_OVERFLOW_ERROR;

+ return TRUE;

+ }

+ /* copy the overflow contents to the target */

+ *t++=overflow[i++];

+ if(offsets!=NULL) {

+ *offsets++=-1; /* no source index available for old output */

+ }

+ /* the overflow buffer is completely copied to the target */

+ cnv->UCharErrorBufferLength=0;

+ *target=t;

+ if(offsets!=NULL) {

+ *pOffsets=offsets;

+ }

+ return FALSE;

+U_CAPI void U_EXPORT2

+ucnv_toUnicode(UConverter *cnv,

+ UChar **target, const UChar *targetLimit,

+ const char **source, const char *sourceLimit,

+ int32_t *offsets,

+ UBool flush,

+ UErrorCode *err) {

+ UConverterToUnicodeArgs args;

+ const char *s;

+ UChar *t;

+ /* check parameters */

+ if(err==NULL || U_FAILURE(*err)) {

+ return;

+ }

+ if(cnv==NULL || target==NULL || source==NULL) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ s=*source;

+ t=*target;

+ if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {

+ /*

+ Prevent code from going into an infinite loop in case we do hit this

+ limit. The limit pointer is expected to be on a UChar * boundary.

+ This also prevents the next argument check from failing.

+ */

+ targetLimit = (const UChar *)(((const char *)targetLimit) - 1);

+ }

+ /*

+ * All these conditions should never happen.

+ *

+ * 1) Make sure that the limits are >= to the address source or target

+ *

+ * 2) Make sure that the buffer sizes do not exceed the number range for

+ * int32_t because some functions use the size (in units or bytes)

+ * rather than comparing pointers, and because offsets are int32_t values.

+ *

+ * size_t is guaranteed to be unsigned and large enough for the job.

+ *

+ * Return with an error instead of adjusting the limits because we would

+ * not be able to maintain the semantics that either the source must be

+ * consumed or the target filled (unless an error occurs).

+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.

+ *

+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer

+ * to a char * pointer and provide an incomplete UChar code unit.

+ */

+ if (sourceLimit<s || targetLimit<t ||

+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||

+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||

+ (((const char *)targetLimit-(const char *)t) & 1) != 0

+ ) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ /* output the target overflow buffer */

+ if( cnv->UCharErrorBufferLength>0 &&

+ ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)

+ ) {

+ /* U_BUFFER_OVERFLOW_ERROR */

+ return;

+ }

+ /* *target may have moved, therefore stop using t */

+ if(!flush && s==sourceLimit && cnv->preToULength>=0) {

+ /* the overflow buffer is emptied and there is no new input: we are done */

+ return;

+ }

+ /*

+ * Do not simply return with a buffer overflow error if

+ * !flush && t==targetLimit

+ * because it is possible that the source will not generate any output.

+ * For example, the skip callback may be called;

+ * it does not output anything.

+ */

+ /* prepare the converter arguments */

+ args.converter=cnv;

+ args.flush=flush;

+ args.offsets=offsets;

+ args.source=s;

+ args.sourceLimit=sourceLimit;

+ args.target=*target;

+ args.targetLimit=targetLimit;

+ args.size=sizeof(args);

+ _toUnicodeWithCallback(&args, err);

+ *source=args.source;

+ *target=args.target;

+/* ucnv_to/fromUChars() ----------------------------------------------------- */

+U_CAPI int32_t U_EXPORT2

+ucnv_fromUChars(UConverter *cnv,

+ char *dest, int32_t destCapacity,

+ const UChar *src, int32_t srcLength,

+ UErrorCode *pErrorCode) {

+ const UChar *srcLimit;

+ char *originalDest, *destLimit;

+ int32_t destLength;

+ /* check arguments */

+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if( cnv==NULL ||

+ destCapacity<0 || (destCapacity>0 && dest==NULL) ||

+ srcLength<-1 || (srcLength!=0 && src==NULL)

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ /* initialize */

+ ucnv_resetFromUnicode(cnv);

+ originalDest=dest;

+ if(srcLength==-1) {

+ srcLength=u_strlen(src);

+ }

+ if(srcLength>0) {

+ srcLimit=src+srcLength;

+ destLimit=dest+destCapacity;

+ /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */

+ if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {

+ destLimit=(char *)U_MAX_PTR(dest);

+ }

+ /* perform the conversion */

+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);

+ destLength=(int32_t)(dest-originalDest);

+ /* if an overflow occurs, then get the preflighting length */

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ char buffer[1024];

+ destLimit=buffer+sizeof(buffer);

+ do {

+ dest=buffer;

+ *pErrorCode=U_ZERO_ERROR;

+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);

+ destLength+=(int32_t)(dest-buffer);

+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

+ }

+ } else {

+ destLength=0;

+ }

+ return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);

+U_CAPI int32_t U_EXPORT2

+ucnv_toUChars(UConverter *cnv,

+ UChar *dest, int32_t destCapacity,

+ const char *src, int32_t srcLength,

+ UErrorCode *pErrorCode) {

+ const char *srcLimit;

+ UChar *originalDest, *destLimit;

+ int32_t destLength;

+ /* check arguments */

+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if( cnv==NULL ||

+ destCapacity<0 || (destCapacity>0 && dest==NULL) ||

+ srcLength<-1 || (srcLength!=0 && src==NULL))

+ {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ /* initialize */

+ ucnv_resetToUnicode(cnv);

+ originalDest=dest;

+ if(srcLength==-1) {

+ srcLength=(int32_t)uprv_strlen(src);

+ }

+ if(srcLength>0) {

+ srcLimit=src+srcLength;

+ destLimit=dest+destCapacity;

+ /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */

+ if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {

+ destLimit=(UChar *)U_MAX_PTR(dest);

+ }

+ /* perform the conversion */

+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);

+ destLength=(int32_t)(dest-originalDest);

+ /* if an overflow occurs, then get the preflighting length */

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)

+ {

+ UChar buffer[1024];

+ destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;

+ do {

+ dest=buffer;

+ *pErrorCode=U_ZERO_ERROR;

+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);

+ destLength+=(int32_t)(dest-buffer);

+ }

+ while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

+ }

+ } else {

+ destLength=0;

+ }

+ return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);

+/* ucnv_getNextUChar() ------------------------------------------------------ */

+U_CAPI UChar32 U_EXPORT2

+ucnv_getNextUChar(UConverter *cnv,

+ const char **source, const char *sourceLimit,

+ UErrorCode *err) {

+ UConverterToUnicodeArgs args;

+ UChar buffer[U16_MAX_LENGTH];

+ const char *s;

+ UChar32 c;

+ int32_t i, length;

+ /* check parameters */

+ if(err==NULL || U_FAILURE(*err)) {

+ return 0xffff;

+ }

+ if(cnv==NULL || source==NULL) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0xffff;

+ }

+ s=*source;

+ if(sourceLimit<s) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0xffff;

+ }

+ /*

+ * Make sure that the buffer sizes do not exceed the number range for

+ * int32_t because some functions use the size (in units or bytes)

+ * rather than comparing pointers, and because offsets are int32_t values.

+ *

+ * size_t is guaranteed to be unsigned and large enough for the job.

+ *

+ * Return with an error instead of adjusting the limits because we would

+ * not be able to maintain the semantics that either the source must be

+ * consumed or the target filled (unless an error occurs).

+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.

+ */

+ if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {

+ *err=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0xffff;

+ }

+ c=U_SENTINEL;

+ /* flush the target overflow buffer */

+ if(cnv->UCharErrorBufferLength>0) {

+ UChar *overflow;

+ overflow=cnv->UCharErrorBuffer;

+ i=0;

+ length=cnv->UCharErrorBufferLength;

+ U16_NEXT(overflow, i, length, c);

+ /* move the remaining overflow contents up to the beginning */

+ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {

+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,

+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);

+ }

+ if(!U16_IS_LEAD(c) || i<length) {

+ return c;

+ }

+ /*

+ * Continue if the overflow buffer contained only a lead surrogate,

+ * in case the converter outputs single surrogates from complete

+ * input sequences.

+ */

+ }

+ /*

+ * flush==TRUE is implied for ucnv_getNextUChar()

+ *

+ * do not simply return even if s==sourceLimit because the converter may

+ * not have seen flush==TRUE before

+ */

+ /* prepare the converter arguments */

+ args.converter=cnv;

+ args.flush=TRUE;

+ args.offsets=NULL;

+ args.source=s;

+ args.sourceLimit=sourceLimit;

+ args.target=buffer;

+ args.targetLimit=buffer+1;

+ args.size=sizeof(args);

+ if(c<0) {

+ /*

+ * call the native getNextUChar() implementation if we are

+ * at a character boundary (toULength==0)

+ *

+ * unlike with _toUnicode(), getNextUChar() implementations must set

+ * U_TRUNCATED_CHAR_FOUND for truncated input,

+ * in addition to setting toULength/toUBytes[]

+ */

+ if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {

+ c=cnv->sharedData->impl->getNextUChar(&args, err);

+ *source=s=args.source;

+ if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {

+ /* reset the converter without calling the callback function */

+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);

+ return 0xffff; /* no output */

+ } else if(U_SUCCESS(*err) && c>=0) {

+ return c;

+ /*

+ * else fall through to use _toUnicode() because

+ * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all

+ * U_FAILURE: call _toUnicode() for callback handling (do not output c)

+ */

+ }

+ /* convert to one UChar in buffer[0], or handle getNextUChar() errors */

+ _toUnicodeWithCallback(&args, err);

+ if(*err==U_BUFFER_OVERFLOW_ERROR) {

+ *err=U_ZERO_ERROR;

+ }

+ i=0;

+ length=(int32_t)(args.target-buffer);

+ } else {

+ /* write the lead surrogate from the overflow buffer */

+ buffer[0]=(UChar)c;

+ args.target=buffer+1;

+ i=0;

+ length=1;

+ }

+ /* buffer contents starts at i and ends before length */

+ if(U_FAILURE(*err)) {

+ c=0xffff; /* no output */

+ } else if(length==0) {

+ /* no input or only state changes */

+ *err=U_INDEX_OUTOFBOUNDS_ERROR;

+ /* no need to reset explicitly because _toUnicodeWithCallback() did it */

+ c=0xffff; /* no output */

+ } else {

+ c=buffer[0];

+ i=1;

+ if(!U16_IS_LEAD(c)) {

+ /* consume c=buffer[0], done */

+ } else {

+ /* got a lead surrogate, see if a trail surrogate follows */

+ UChar c2;

+ if(cnv->UCharErrorBufferLength>0) {

+ /* got overflow output from the conversion */

+ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {

+ /* got a trail surrogate, too */

+ c=U16_GET_SUPPLEMENTARY(c, c2);

+ /* move the remaining overflow contents up to the beginning */

+ if((--cnv->UCharErrorBufferLength)>0) {

+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,

+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);

+ }

+ } else {

+ /* c is an unpaired lead surrogate, just return it */

+ }

+ } else if(args.source<sourceLimit) {

+ /* convert once more, to buffer[1] */

+ args.targetLimit=buffer+2;

+ _toUnicodeWithCallback(&args, err);

+ if(*err==U_BUFFER_OVERFLOW_ERROR) {

+ *err=U_ZERO_ERROR;

+ }

+ length=(int32_t)(args.target-buffer);

+ if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {

+ /* got a trail surrogate, too */

+ c=U16_GET_SUPPLEMENTARY(c, c2);

+ i=2;

+ }

+ /*

+ * move leftover output from buffer[i..length[

+ * into the beginning of the overflow buffer

+ */

+ if(i<length) {

+ /* move further overflow back */

+ int32_t delta=length-i;

+ if((length=cnv->UCharErrorBufferLength)>0) {

+ uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,

+ length*U_SIZEOF_UCHAR);

+ }

+ cnv->UCharErrorBufferLength=(int8_t)(length+delta);

+ cnv->UCharErrorBuffer[0]=buffer[i++];

+ if(delta>1) {

+ cnv->UCharErrorBuffer[1]=buffer[i];

+ }

+ *source=args.source;

+ return c;

+/* ucnv_convert() and siblings ---------------------------------------------- */

+U_CAPI void U_EXPORT2

+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,

+ char **target, const char *targetLimit,

+ const char **source, const char *sourceLimit,

+ UChar *pivotStart, UChar **pivotSource,

+ UChar **pivotTarget, const UChar *pivotLimit,

+ UBool reset, UBool flush,

+ UErrorCode *pErrorCode) {

+ UChar pivotBuffer[CHUNK_SIZE];

+ const UChar *myPivotSource;

+ UChar *myPivotTarget;

+ const char *s;

+ char *t;

+ UConverterToUnicodeArgs toUArgs;

+ UConverterFromUnicodeArgs fromUArgs;

+ UConverterConvert convert;

+ /* error checking */

+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ return;

+ }

+ if( targetCnv==NULL || sourceCnv==NULL ||

+ source==NULL || *source==NULL ||

+ target==NULL || *target==NULL || targetLimit==NULL

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ s=*source;

+ t=*target;

+ if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ /*

+ * Make sure that the buffer sizes do not exceed the number range for

+ * int32_t. See ucnv_toUnicode() for a more detailed comment.

+ */

+ if(

+ (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||

+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ if(pivotStart==NULL) {

+ if(!flush) {

+ /* streaming conversion requires an explicit pivot buffer */

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ /* use the stack pivot buffer */

+ myPivotSource=myPivotTarget=pivotStart=pivotBuffer;

+ pivotSource=(UChar **)&myPivotSource;

+ pivotTarget=&myPivotTarget;

+ pivotLimit=pivotBuffer+CHUNK_SIZE;

+ } else if( pivotStart>=pivotLimit ||

+ pivotSource==NULL || *pivotSource==NULL ||

+ pivotTarget==NULL || *pivotTarget==NULL ||

+ pivotLimit==NULL

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ if(sourceLimit==NULL) {

+ /* get limit of single-byte-NUL-terminated source string */

+ sourceLimit=uprv_strchr(*source, 0);

+ }

+ if(reset) {

+ ucnv_resetToUnicode(sourceCnv);

+ ucnv_resetFromUnicode(targetCnv);

+ *pivotSource=*pivotTarget=pivotStart;

+ } else if(targetCnv->charErrorBufferLength>0) {

+ /* output the targetCnv overflow buffer */

+ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {

+ /* U_BUFFER_OVERFLOW_ERROR */

+ return;

+ }

+ /* *target has moved, therefore stop using t */

+ if( !flush &&

+ targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&

+ sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit

+ ) {

+ /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */

+ return;

+ }

+ /* Is direct-UTF-8 conversion available? */

+ if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&

+ targetCnv->sharedData->impl->fromUTF8!=NULL

+ ) {

+ convert=targetCnv->sharedData->impl->fromUTF8;

+ } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&

+ sourceCnv->sharedData->impl->toUTF8!=NULL

+ ) {

+ convert=sourceCnv->sharedData->impl->toUTF8;

+ } else {

+ convert=NULL;

+ }

+ /*

+ * If direct-UTF-8 conversion is available, then we use a smaller

+ * pivot buffer for error handling and partial matches

+ * so that we quickly return to direct conversion.

+ *

+ * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.

+ *

+ * We could reduce the pivot buffer size further, at the cost of

+ * buffer overflows from callbacks.

+ * The pivot buffer should not be smaller than the maximum number of

+ * fromUnicode extension table input UChars

+ * (for m:n conversion, see

+ * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])

+ * or 2 for surrogate pairs.

+ *

+ * Too small a buffer can cause thrashing between pivoting and direct

+ * conversion, with function call overhead outweighing the benefits

+ * of direct conversion.

+ */

+ if(convert!=NULL && (pivotLimit-pivotStart)>32) {

+ pivotLimit=pivotStart+32;

+ }

+ /* prepare the converter arguments */

+ fromUArgs.converter=targetCnv;

+ fromUArgs.flush=FALSE;

+ fromUArgs.offsets=NULL;

+ fromUArgs.target=*target;

+ fromUArgs.targetLimit=targetLimit;

+ fromUArgs.size=sizeof(fromUArgs);

+ toUArgs.converter=sourceCnv;

+ toUArgs.flush=flush;

+ toUArgs.offsets=NULL;

+ toUArgs.source=s;

+ toUArgs.sourceLimit=sourceLimit;

+ toUArgs.targetLimit=pivotLimit;

+ toUArgs.size=sizeof(toUArgs);

+ /*

+ * TODO: Consider separating this function into two functions,

+ * extracting exactly the conversion loop,

+ * for readability and to reduce the set of visible variables.

+ *

+ * Otherwise stop using s and t from here on.

+ */

+ s=t=NULL;

+ /*

+ * conversion loop

+ *

+ * The sequence of steps in the loop may appear backward,

+ * but the principle is simple:

+ * In the chain of

+ * source - sourceCnv overflow - pivot - targetCnv overflow - target

+ * empty out later buffers before refilling them from earlier ones.

+ *

+ * The targetCnv overflow buffer is flushed out only once before the loop.

+ */

+ for(;;) {

+ /*

+ * if(pivot not empty or error or replay or flush fromUnicode) {

+ * fromUnicode(pivot -> target);

+ * }

+ *

+ * For pivoting conversion; and for direct conversion for

+ * error callback handling and flushing the replay buffer.

+ */

+ if( *pivotSource<*pivotTarget ||

+ U_FAILURE(*pErrorCode) ||

+ targetCnv->preFromULength<0 ||

+ fromUArgs.flush

+ ) {

+ fromUArgs.source=*pivotSource;

+ fromUArgs.sourceLimit=*pivotTarget;

+ _fromUnicodeWithCallback(&fromUArgs, pErrorCode);

+ if(U_FAILURE(*pErrorCode)) {

+ /* target overflow, or conversion error */

+ *pivotSource=(UChar *)fromUArgs.source;

+ break;

+ }

+ /*

+ * _fromUnicodeWithCallback() must have consumed the pivot contents

+ * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()

+ */

+ }

+ /* The pivot buffer is empty; reset it so we start at pivotStart. */

+ *pivotSource=*pivotTarget=pivotStart;

+ /*

+ * if(sourceCnv overflow buffer not empty) {

+ * move(sourceCnv overflow buffer -> pivot);

+ * continue;

+ * }

+ */

+ /* output the sourceCnv overflow buffer */

+ if(sourceCnv->UCharErrorBufferLength>0) {

+ if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {

+ /* U_BUFFER_OVERFLOW_ERROR */

+ *pErrorCode=U_ZERO_ERROR;

+ }

+ continue;

+ }

+ /*

+ * check for end of input and break if done

+ *

+ * Checking both flush and fromUArgs.flush ensures that the converters

+ * have been called with the flush flag set if the ucnv_convertEx()

+ * caller set it.

+ */

+ if( toUArgs.source==sourceLimit &&

+ sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&

+ (!flush || fromUArgs.flush)

+ ) {

+ /* done successfully */

+ break;

+ }

+ /*

+ * use direct conversion if available

+ * but not if continuing a partial match

+ * or flushing the toUnicode replay buffer

+ */

+ if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {

+ if(*pErrorCode==U_USING_DEFAULT_WARNING) {

+ /* remove a warning that may be set by this function */

+ *pErrorCode=U_ZERO_ERROR;

+ }

+ convert(&fromUArgs, &toUArgs, pErrorCode);

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ break;

+ } else if(U_FAILURE(*pErrorCode)) {

+ if(sourceCnv->toULength>0) {

+ /*

+ * Fall through to calling _toUnicodeWithCallback()

+ * for callback handling.

+ *

+ * The pivot buffer will be reset with

+ * *pivotSource=*pivotTarget=pivotStart;

+ * which indicates a toUnicode error to the caller

+ * (*pivotSource==pivotStart shows no pivot UChars consumed).

+ */

+ } else {

+ /*

+ * Indicate a fromUnicode error to the caller

+ * (*pivotSource>pivotStart shows some pivot UChars consumed).

+ */

+ *pivotSource=*pivotTarget=pivotStart+1;

+ /*

+ * Loop around to calling _fromUnicodeWithCallbacks()

+ * for callback handling.

+ */

+ continue;

+ }

+ } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {

+ /*

+ * No error, but the implementation requested to temporarily

+ * fall back to pivoting.

+ */

+ *pErrorCode=U_ZERO_ERROR;

+ /*

+ * The following else branches are almost identical to the end-of-input

+ * handling in _toUnicodeWithCallback().

+ * Avoid calling it just for the end of input.

+ */

+ } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */

+ /*

+ * the entire input stream is consumed

+ * and there is a partial, truncated input sequence left

+ */

+ /* inject an error and continue with callback handling */

+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;

+ } else {

+ /* input consumed */

+ if(flush) {

+ /* reset the converters without calling the callback functions */

+ _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);

+ _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);

+ }

+ /* done successfully */

+ break;

+ }

+ /*

+ * toUnicode(source -> pivot);

+ *

+ * For pivoting conversion; and for direct conversion for

+ * error callback handling, continuing partial matches

+ * and flushing the replay buffer.

+ *

+ * The pivot buffer is empty and reset.

+ */

+ toUArgs.target=pivotStart; /* ==*pivotTarget */

+ /* toUArgs.targetLimit=pivotLimit; already set before the loop */

+ _toUnicodeWithCallback(&toUArgs, pErrorCode);

+ *pivotTarget=toUArgs.target;

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ /* pivot overflow: continue with the conversion loop */

+ *pErrorCode=U_ZERO_ERROR;

+ } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {

+ /* conversion error, or there was nothing left to convert */

+ break;

+ }

+ /*

+ * else:

+ * _toUnicodeWithCallback() wrote into the pivot buffer,

+ * continue with fromUnicode conversion.

+ *

+ * Set the fromUnicode flush flag if we flush and if toUnicode has

+ * processed the end of the input.

+ */

+ if( flush && toUArgs.source==sourceLimit &&

+ sourceCnv->preToULength>=0 &&

+ sourceCnv->UCharErrorBufferLength==0

+ ) {

+ fromUArgs.flush=TRUE;

+ }

+ /*

+ * The conversion loop is exited when one of the following is true:

+ * - the entire source text has been converted successfully to the target buffer

+ * - a target buffer overflow occurred

+ * - a conversion error occurred

+ */

+ *source=toUArgs.source;

+ *target=fromUArgs.target;

+ /* terminate the target buffer if possible */

+ if(flush && U_SUCCESS(*pErrorCode)) {

+ if(*target!=targetLimit) {

+ **target=0;

+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

+ *pErrorCode=U_ZERO_ERROR;

+ }

+ } else {

+ *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;

+ }

+/* internal implementation of ucnv_convert() etc. with preflighting */

+static int32_t

+ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,

+ char *target, int32_t targetCapacity,

+ const char *source, int32_t sourceLength,

+ UErrorCode *pErrorCode) {

+ UChar pivotBuffer[CHUNK_SIZE];

+ UChar *pivot, *pivot2;

+ char *myTarget;

+ const char *sourceLimit;

+ const char *targetLimit;

+ int32_t targetLength=0;

+ /* set up */

+ if(sourceLength<0) {

+ sourceLimit=uprv_strchr(source, 0);

+ } else {

+ sourceLimit=source+sourceLength;

+ }

+ /* if there is no input data, we're done */

+ if(source==sourceLimit) {

+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);

+ }

+ pivot=pivot2=pivotBuffer;

+ myTarget=target;

+ targetLength=0;

+ if(targetCapacity>0) {

+ /* perform real conversion */

+ targetLimit=target+targetCapacity;

+ ucnv_convertEx(outConverter, inConverter,

+ &myTarget, targetLimit,

+ &source, sourceLimit,

+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,

+ FALSE,

+ TRUE,

+ pErrorCode);

+ targetLength=(int32_t)(myTarget-target);

+ }

+ /*

+ * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing

+ * to it but continue the conversion in order to store in targetCapacity

+ * the number of bytes that was required.

+ */

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)

+ {

+ char targetBuffer[CHUNK_SIZE];

+ targetLimit=targetBuffer+CHUNK_SIZE;

+ do {

+ *pErrorCode=U_ZERO_ERROR;

+ myTarget=targetBuffer;

+ ucnv_convertEx(outConverter, inConverter,

+ &myTarget, targetLimit,

+ &source, sourceLimit,

+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,

+ FALSE,

+ TRUE,

+ pErrorCode);

+ targetLength+=(int32_t)(myTarget-targetBuffer);

+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

+ /* done with preflighting, set warnings and errors as appropriate */

+ return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);

+ }

+ /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */

+ return targetLength;

+U_CAPI int32_t U_EXPORT2

+ucnv_convert(const char *toConverterName, const char *fromConverterName,

+ char *target, int32_t targetCapacity,

+ const char *source, int32_t sourceLength,

+ UErrorCode *pErrorCode) {

+ UConverter in, out; /* stack-allocated */

+ UConverter *inConverter, *outConverter;

+ int32_t targetLength;

+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if( source==NULL || sourceLength<-1 ||

+ targetCapacity<0 || (targetCapacity>0 && target==NULL)

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ /* if there is no input data, we're done */

+ if(sourceLength==0 || (sourceLength<0 && *source==0)) {

+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);

+ }

+ /* create the converters */

+ inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);

+ if(U_FAILURE(*pErrorCode)) {

+ ucnv_close(inConverter);

+ return 0;

+ }

+ targetLength=ucnv_internalConvert(outConverter, inConverter,

+ target, targetCapacity,

+ source, sourceLength,

+ pErrorCode);

+ ucnv_close(inConverter);

+ ucnv_close(outConverter);

+ return targetLength;

+/* @internal */

+static int32_t

+ucnv_convertAlgorithmic(UBool convertToAlgorithmic,

+ UConverterType algorithmicType,

+ UConverter *cnv,

+ char *target, int32_t targetCapacity,

+ const char *source, int32_t sourceLength,

+ UErrorCode *pErrorCode) {

+ UConverter algoConverterStatic; /* stack-allocated */

+ UConverter *algoConverter, *to, *from;

+ int32_t targetLength;

+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if( cnv==NULL || source==NULL || sourceLength<-1 ||

+ targetCapacity<0 || (targetCapacity>0 && target==NULL)

+ ) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ /* if there is no input data, we're done */

+ if(sourceLength==0 || (sourceLength<0 && *source==0)) {

+ return u_terminateChars(target, targetCapacity, 0, pErrorCode);

+ }

+ /* create the algorithmic converter */

+ algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,

+ "", 0, pErrorCode);

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ /* reset the other converter */

+ if(convertToAlgorithmic) {

+ /* cnv->Unicode->algo */

+ ucnv_resetToUnicode(cnv);

+ to=algoConverter;

+ from=cnv;

+ } else {

+ /* algo->Unicode->cnv */

+ ucnv_resetFromUnicode(cnv);

+ from=algoConverter;

+ to=cnv;

+ }

+ targetLength=ucnv_internalConvert(to, from,

+ target, targetCapacity,

+ source, sourceLength,

+ pErrorCode);

+ ucnv_close(algoConverter);

+ return targetLength;

+U_CAPI int32_t U_EXPORT2

+ucnv_toAlgorithmic(UConverterType algorithmicType,

+ UConverter *cnv,

+ char *target, int32_t targetCapacity,

+ const char *source, int32_t sourceLength,

+ UErrorCode *pErrorCode) {

+ return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,

+ target, targetCapacity,

+ source, sourceLength,

+ pErrorCode);

+U_CAPI int32_t U_EXPORT2

+ucnv_fromAlgorithmic(UConverter *cnv,

+ UConverterType algorithmicType,

+ char *target, int32_t targetCapacity,

+ const char *source, int32_t sourceLength,

+ UErrorCode *pErrorCode) {

+ return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,

+ target, targetCapacity,

+ source, sourceLength,

+ pErrorCode);

+U_CAPI UConverterType U_EXPORT2

+ucnv_getType(const UConverter* converter)

+ int8_t type = converter->sharedData->staticData->conversionType;

+#if !UCONFIG_NO_LEGACY_CONVERSION

+ if(type == UCNV_MBCS) {

+ return ucnv_MBCSGetType(converter);

+ }

+#endif

+ return (UConverterType)type;

+U_CAPI void U_EXPORT2

+ucnv_getStarters(const UConverter* converter,

+ UBool starters[256],

+ UErrorCode* err)

+ if (err == NULL || U_FAILURE(*err)) {

+ return;

+ }

+ if(converter->sharedData->impl->getStarters != NULL) {

+ converter->sharedData->impl->getStarters(converter, starters, err);

+ } else {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ }

+static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)

+ UErrorCode errorCode;

+ const char *name;

+ int32_t i;

+ if(cnv==NULL) {

+ return NULL;

+ }

+ errorCode=U_ZERO_ERROR;

+ name=ucnv_getName(cnv, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ return NULL;

+ }

+ for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)

+ {

+ if(0==uprv_strcmp(name, ambiguousConverters[i].name))

+ {

+ return ambiguousConverters+i;

+ }

+ return NULL;

+U_CAPI void U_EXPORT2

+ucnv_fixFileSeparator(const UConverter *cnv,

+ UChar* source,

+ int32_t sourceLength) {

+ const UAmbiguousConverter *a;

+ int32_t i;

+ UChar variant5c;

+ if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)

+ {

+ return;

+ }

+ variant5c=a->variant5c;

+ for(i=0; i<sourceLength; ++i) {

+ if(source[i]==variant5c) {

+ source[i]=0x5c;

+ }

+U_CAPI UBool U_EXPORT2

+ucnv_isAmbiguous(const UConverter *cnv) {

+ return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);

+U_CAPI void U_EXPORT2

+ucnv_setFallback(UConverter *cnv, UBool usesFallback)

+ cnv->useFallback = usesFallback;

+U_CAPI UBool U_EXPORT2

+ucnv_usesFallback(const UConverter *cnv)

+ return cnv->useFallback;

+U_CAPI void U_EXPORT2

+ucnv_getInvalidChars (const UConverter * converter,

+ char *errBytes,

+ int8_t * len,

+ UErrorCode * err)

+ if (err == NULL || U_FAILURE(*err))

+ {

+ return;

+ }

+ if (len == NULL || errBytes == NULL || converter == NULL)

+ {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ if (*len < converter->invalidCharLength)

+ {

+ *err = U_INDEX_OUTOFBOUNDS_ERROR;

+ return;

+ }

+ if ((*len = converter->invalidCharLength) > 0)

+ {

+ uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);

+ }

+U_CAPI void U_EXPORT2

+ucnv_getInvalidUChars (const UConverter * converter,

+ UChar *errChars,

+ int8_t * len,

+ UErrorCode * err)

+ if (err == NULL || U_FAILURE(*err))

+ {

+ return;

+ }

+ if (len == NULL || errChars == NULL || converter == NULL)

+ {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ if (*len < converter->invalidUCharLength)

+ {

+ *err = U_INDEX_OUTOFBOUNDS_ERROR;

+ return;

+ }

+ if ((*len = converter->invalidUCharLength) > 0)

+ {

+ uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));

+ }

+#define SIG_MAX_LEN 5

+U_CAPI const char* U_EXPORT2

+ucnv_detectUnicodeSignature( const char* source,

+ int32_t sourceLength,

+ int32_t* signatureLength,

+ UErrorCode* pErrorCode) {

+ int32_t dummy;

+ /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN

+ * bytes we don't misdetect something

+ */

+ char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };

+ int i = 0;

+ if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){

+ return NULL;

+ }

+ if(source == NULL || sourceLength < -1){

+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ if(signatureLength == NULL) {

+ signatureLength = &dummy;

+ }

+ if(sourceLength==-1){

+ sourceLength=(int32_t)uprv_strlen(source);

+ }

+ while(i<sourceLength&& i<SIG_MAX_LEN){

+ start[i]=source[i];

+ i++;

+ }

+ if(start[0] == '\xFE' && start[1] == '\xFF') {

+ *signatureLength=2;

+ return "UTF-16BE";

+ } else if(start[0] == '\xFF' && start[1] == '\xFE') {

+ if(start[2] == '\x00' && start[3] =='\x00') {

+ *signatureLength=4;

+ return "UTF-32LE";

+ } else {

+ *signatureLength=2;

+ return "UTF-16LE";

+ }

+ } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {

+ *signatureLength=3;

+ return "UTF-8";

+ } else if(start[0] == '\x00' && start[1] == '\x00' &&

+ start[2] == '\xFE' && start[3]=='\xFF') {

+ *signatureLength=4;

+ return "UTF-32BE";

+ } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {

+ *signatureLength=3;

+ return "SCSU";

+ } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {

+ *signatureLength=3;

+ return "BOCU-1";

+ } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {

+ /*

+ * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/

+ * depending on the second UTF-16 code unit.

+ * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF

+ * if it occurs.

+ *

+ * So far we have +/v

+ */

+ if(start[3] == '\x38' && start[4] == '\x2D') {

+ /* 5 bytes +/v8- */

+ *signatureLength=5;

+ return "UTF-7";

+ } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {

+ /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */

+ *signatureLength=4;

+ return "UTF-7";

+ }

+ }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){

+ *signatureLength=4;

+ return "UTF-EBCDIC";

+ }

+ /* no known Unicode signature byte sequence recognized */

+ *signatureLength=0;

+ return NULL;

+U_CAPI int32_t U_EXPORT2

+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)

+ if(status == NULL || U_FAILURE(*status)){

+ return -1;

+ }

+ if(cnv == NULL){

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ return -1;

+ }

+ if(cnv->preFromUFirstCP >= 0){

+ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;

+ }else if(cnv->preFromULength < 0){

+ return -cnv->preFromULength ;

+ }else if(cnv->fromUChar32 > 0){

+ return 1;

+ }

+ return 0;

+U_CAPI int32_t U_EXPORT2

+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){

+ if(status == NULL || U_FAILURE(*status)){

+ return -1;

+ }

+ if(cnv == NULL){

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ return -1;

+ }

+ if(cnv->preToULength > 0){

+ return cnv->preToULength ;

+ }else if(cnv->preToULength < 0){

+ return -cnv->preToULength;

+ }else if(cnv->toULength > 0){

+ return cnv->toULength;

+ }

+ return 0;

+U_CAPI UBool U_EXPORT2

+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){

+ if (U_FAILURE(*status)) {

+ return FALSE;

+ }

+ if (cnv == NULL) {

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ return FALSE;

+ }

+ switch (ucnv_getType(cnv)) {

+ case UCNV_SBCS:

+ case UCNV_DBCS:

+ case UCNV_UTF32_BigEndian:

+ case UCNV_UTF32_LittleEndian:

+ case UCNV_UTF32:

+ case UCNV_US_ASCII:

+ return TRUE;

+ default:

+ return FALSE;

+ }

+#endif

+/*

+ * Hey, Emacs, please set the following:

+ *

+ * Local Variables:

+ * indent-tabs-mode: nil

+ * End:

+ *

+ */

Property changes on: icu51/source/common/ucnv.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu51/source/common/ucmndata.c ('k') | icu51/source/common/ucnv2022.cpp » ('j') | no next file with comments »