| Index: icu51/source/common/ucnvlat1.c
|
| ===================================================================
|
| --- icu51/source/common/ucnvlat1.c (revision 0)
|
| +++ icu51/source/common/ucnvlat1.c (revision 0)
|
| @@ -0,0 +1,744 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2000-2012, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +**********************************************************************
|
| +* file name: ucnvlat1.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2000feb07
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_CONVERSION
|
| +
|
| +#include "unicode/ucnv.h"
|
| +#include "unicode/uset.h"
|
| +#include "unicode/utf8.h"
|
| +#include "ucnv_bld.h"
|
| +#include "ucnv_cnv.h"
|
| +
|
| +/* control optimizations according to the platform */
|
| +#define LATIN1_UNROLL_FROM_UNICODE 1
|
| +
|
| +/* ISO 8859-1 --------------------------------------------------------------- */
|
| +
|
| +/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
|
| +static void
|
| +_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
| + UErrorCode *pErrorCode) {
|
| + const uint8_t *source;
|
| + UChar *target;
|
| + int32_t targetCapacity, length;
|
| + int32_t *offsets;
|
| +
|
| + int32_t sourceIndex;
|
| +
|
| + /* set up the local pointers */
|
| + source=(const uint8_t *)pArgs->source;
|
| + target=pArgs->target;
|
| + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
|
| + offsets=pArgs->offsets;
|
| +
|
| + sourceIndex=0;
|
| +
|
| + /*
|
| + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
|
| + * for the minimum of the sourceLength and targetCapacity
|
| + */
|
| + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
|
| + if(length<=targetCapacity) {
|
| + targetCapacity=length;
|
| + } else {
|
| + /* target will be full */
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + length=targetCapacity;
|
| + }
|
| +
|
| + if(targetCapacity>=8) {
|
| + /* This loop is unrolled for speed and improved pipelining. */
|
| + int32_t count, loops;
|
| +
|
| + loops=count=targetCapacity>>3;
|
| + length=targetCapacity&=0x7;
|
| + do {
|
| + target[0]=source[0];
|
| + target[1]=source[1];
|
| + target[2]=source[2];
|
| + target[3]=source[3];
|
| + target[4]=source[4];
|
| + target[5]=source[5];
|
| + target[6]=source[6];
|
| + target[7]=source[7];
|
| + target+=8;
|
| + source+=8;
|
| + } while(--count>0);
|
| +
|
| + if(offsets!=NULL) {
|
| + do {
|
| + offsets[0]=sourceIndex++;
|
| + offsets[1]=sourceIndex++;
|
| + offsets[2]=sourceIndex++;
|
| + offsets[3]=sourceIndex++;
|
| + offsets[4]=sourceIndex++;
|
| + offsets[5]=sourceIndex++;
|
| + offsets[6]=sourceIndex++;
|
| + offsets[7]=sourceIndex++;
|
| + offsets+=8;
|
| + } while(--loops>0);
|
| + }
|
| + }
|
| +
|
| + /* conversion loop */
|
| + while(targetCapacity>0) {
|
| + *target++=*source++;
|
| + --targetCapacity;
|
| + }
|
| +
|
| + /* write back the updated pointers */
|
| + pArgs->source=(const char *)source;
|
| + pArgs->target=target;
|
| +
|
| + /* set offsets */
|
| + if(offsets!=NULL) {
|
| + while(length>0) {
|
| + *offsets++=sourceIndex++;
|
| + --length;
|
| + }
|
| + pArgs->offsets=offsets;
|
| + }
|
| +}
|
| +
|
| +/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
|
| +static UChar32
|
| +_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
|
| + UErrorCode *pErrorCode) {
|
| + const uint8_t *source=(const uint8_t *)pArgs->source;
|
| + if(source<(const uint8_t *)pArgs->sourceLimit) {
|
| + pArgs->source=(const char *)(source+1);
|
| + return *source;
|
| + }
|
| +
|
| + /* no output because of empty input */
|
| + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| + return 0xffff;
|
| +}
|
| +
|
| +/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
|
| +static void
|
| +_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
| + UErrorCode *pErrorCode) {
|
| + UConverter *cnv;
|
| + const UChar *source, *sourceLimit;
|
| + uint8_t *target, *oldTarget;
|
| + int32_t targetCapacity, length;
|
| + int32_t *offsets;
|
| +
|
| + UChar32 cp;
|
| + UChar c, max;
|
| +
|
| + int32_t sourceIndex;
|
| +
|
| + /* set up the local pointers */
|
| + cnv=pArgs->converter;
|
| + source=pArgs->source;
|
| + sourceLimit=pArgs->sourceLimit;
|
| + target=oldTarget=(uint8_t *)pArgs->target;
|
| + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
|
| + offsets=pArgs->offsets;
|
| +
|
| + if(cnv->sharedData==&_Latin1Data) {
|
| + max=0xff; /* Latin-1 */
|
| + } else {
|
| + max=0x7f; /* US-ASCII */
|
| + }
|
| +
|
| + /* get the converter state from UConverter */
|
| + cp=cnv->fromUChar32;
|
| +
|
| + /* sourceIndex=-1 if the current character began in the previous buffer */
|
| + sourceIndex= cp==0 ? 0 : -1;
|
| +
|
| + /*
|
| + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
|
| + * for the minimum of the sourceLength and targetCapacity
|
| + */
|
| + length=(int32_t)(sourceLimit-source);
|
| + if(length<targetCapacity) {
|
| + targetCapacity=length;
|
| + }
|
| +
|
| + /* conversion loop */
|
| + if(cp!=0 && targetCapacity>0) {
|
| + goto getTrail;
|
| + }
|
| +
|
| +#if LATIN1_UNROLL_FROM_UNICODE
|
| + /* unroll the loop with the most common case */
|
| + if(targetCapacity>=16) {
|
| + int32_t count, loops;
|
| + UChar u, oredChars;
|
| +
|
| + loops=count=targetCapacity>>4;
|
| + do {
|
| + oredChars=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| + oredChars|=u=*source++;
|
| + *target++=(uint8_t)u;
|
| +
|
| + /* were all 16 entries really valid? */
|
| + if(oredChars>max) {
|
| + /* no, return to the first of these 16 */
|
| + source-=16;
|
| + target-=16;
|
| + break;
|
| + }
|
| + } while(--count>0);
|
| + count=loops-count;
|
| + targetCapacity-=16*count;
|
| +
|
| + if(offsets!=NULL) {
|
| + oldTarget+=16*count;
|
| + while(count>0) {
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + *offsets++=sourceIndex++;
|
| + --count;
|
| + }
|
| + }
|
| + }
|
| +#endif
|
| +
|
| + /* conversion loop */
|
| + c=0;
|
| + while(targetCapacity>0 && (c=*source++)<=max) {
|
| + /* convert the Unicode code point */
|
| + *target++=(uint8_t)c;
|
| + --targetCapacity;
|
| + }
|
| +
|
| + if(c>max) {
|
| + cp=c;
|
| + if(!U_IS_SURROGATE(cp)) {
|
| + /* callback(unassigned) */
|
| + } else if(U_IS_SURROGATE_LEAD(cp)) {
|
| +getTrail:
|
| + if(source<sourceLimit) {
|
| + /* test the following code unit */
|
| + UChar trail=*source;
|
| + if(U16_IS_TRAIL(trail)) {
|
| + ++source;
|
| + cp=U16_GET_SUPPLEMENTARY(cp, trail);
|
| + /* this codepage does not map supplementary code points */
|
| + /* callback(unassigned) */
|
| + } else {
|
| + /* this is an unmatched lead code unit (1st surrogate) */
|
| + /* callback(illegal) */
|
| + }
|
| + } else {
|
| + /* no more input */
|
| + cnv->fromUChar32=cp;
|
| + goto noMoreInput;
|
| + }
|
| + } else {
|
| + /* this is an unmatched trail code unit (2nd surrogate) */
|
| + /* callback(illegal) */
|
| + }
|
| +
|
| + *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
|
| + cnv->fromUChar32=cp;
|
| + }
|
| +noMoreInput:
|
| +
|
| + /* set offsets since the start */
|
| + if(offsets!=NULL) {
|
| + size_t count=target-oldTarget;
|
| + while(count>0) {
|
| + *offsets++=sourceIndex++;
|
| + --count;
|
| + }
|
| + }
|
| +
|
| + if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
|
| + /* target is full */
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + /* write back the updated pointers */
|
| + pArgs->source=source;
|
| + pArgs->target=(char *)target;
|
| + pArgs->offsets=offsets;
|
| +}
|
| +
|
| +/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
|
| +static void
|
| +ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
| + UConverterToUnicodeArgs *pToUArgs,
|
| + UErrorCode *pErrorCode) {
|
| + UConverter *utf8;
|
| + const uint8_t *source, *sourceLimit;
|
| + uint8_t *target;
|
| + int32_t targetCapacity;
|
| +
|
| + UChar32 c;
|
| + uint8_t b, t1;
|
| +
|
| + /* set up the local pointers */
|
| + utf8=pToUArgs->converter;
|
| + source=(uint8_t *)pToUArgs->source;
|
| + sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
|
| + target=(uint8_t *)pFromUArgs->target;
|
| + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
| +
|
| + /* get the converter state from the UTF-8 UConverter */
|
| + c=(UChar32)utf8->toUnicodeStatus;
|
| + if(c!=0 && source<sourceLimit) {
|
| + if(targetCapacity==0) {
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + return;
|
| + } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
|
| + ++source;
|
| + *target++=(uint8_t)(((c&3)<<6)|t1);
|
| + --targetCapacity;
|
| +
|
| + utf8->toUnicodeStatus=0;
|
| + utf8->toULength=0;
|
| + } else {
|
| + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
|
| + *pErrorCode=U_USING_DEFAULT_WARNING;
|
| + return;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * Make sure that the last byte sequence before sourceLimit is complete
|
| + * or runs into a lead byte.
|
| + * In the conversion loop compare source with sourceLimit only once
|
| + * per multi-byte character.
|
| + * For Latin-1, adjust sourceLimit only for 1 trail byte because
|
| + * the conversion loop handles at most 2-byte sequences.
|
| + */
|
| + if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
|
| + --sourceLimit;
|
| + }
|
| +
|
| + /* conversion loop */
|
| + while(source<sourceLimit) {
|
| + if(targetCapacity>0) {
|
| + b=*source++;
|
| + if((int8_t)b>=0) {
|
| + /* convert ASCII */
|
| + *target++=(uint8_t)b;
|
| + --targetCapacity;
|
| + } else if( /* handle U+0080..U+00FF inline */
|
| + b>=0xc2 && b<=0xc3 &&
|
| + (t1=(uint8_t)(*source-0x80)) <= 0x3f
|
| + ) {
|
| + ++source;
|
| + *target++=(uint8_t)(((b&3)<<6)|t1);
|
| + --targetCapacity;
|
| + } else {
|
| + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
|
| + pToUArgs->source=(char *)(source-1);
|
| + pFromUArgs->target=(char *)target;
|
| + *pErrorCode=U_USING_DEFAULT_WARNING;
|
| + return;
|
| + }
|
| + } else {
|
| + /* target is full */
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + break;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * The sourceLimit may have been adjusted before the conversion loop
|
| + * to stop before a truncated sequence.
|
| + * If so, then collect the truncated sequence now.
|
| + * For Latin-1, there is at most exactly one lead byte because of the
|
| + * smaller sourceLimit adjustment logic.
|
| + */
|
| + if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
|
| + utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
|
| + utf8->toULength=1;
|
| + utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
|
| + }
|
| +
|
| + /* write back the updated pointers */
|
| + pToUArgs->source=(char *)source;
|
| + pFromUArgs->target=(char *)target;
|
| +}
|
| +
|
| +static void
|
| +_Latin1GetUnicodeSet(const UConverter *cnv,
|
| + const USetAdder *sa,
|
| + UConverterUnicodeSet which,
|
| + UErrorCode *pErrorCode) {
|
| + sa->addRange(sa->set, 0, 0xff);
|
| +}
|
| +
|
| +static const UConverterImpl _Latin1Impl={
|
| + UCNV_LATIN_1,
|
| +
|
| + NULL,
|
| + NULL,
|
| +
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| +
|
| + _Latin1ToUnicodeWithOffsets,
|
| + _Latin1ToUnicodeWithOffsets,
|
| + _Latin1FromUnicodeWithOffsets,
|
| + _Latin1FromUnicodeWithOffsets,
|
| + _Latin1GetNextUChar,
|
| +
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| + _Latin1GetUnicodeSet,
|
| +
|
| + NULL,
|
| + ucnv_Latin1FromUTF8
|
| +};
|
| +
|
| +static const UConverterStaticData _Latin1StaticData={
|
| + sizeof(UConverterStaticData),
|
| + "ISO-8859-1",
|
| + 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
|
| + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
|
| + 0,
|
| + 0,
|
| + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
|
| +};
|
| +
|
| +const UConverterSharedData _Latin1Data={
|
| + sizeof(UConverterSharedData), ~((uint32_t) 0),
|
| + NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,
|
| + 0
|
| +};
|
| +
|
| +/* US-ASCII ----------------------------------------------------------------- */
|
| +
|
| +/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
|
| +static void
|
| +_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
| + UErrorCode *pErrorCode) {
|
| + const uint8_t *source, *sourceLimit;
|
| + UChar *target, *oldTarget;
|
| + int32_t targetCapacity, length;
|
| + int32_t *offsets;
|
| +
|
| + int32_t sourceIndex;
|
| +
|
| + uint8_t c;
|
| +
|
| + /* set up the local pointers */
|
| + source=(const uint8_t *)pArgs->source;
|
| + sourceLimit=(const uint8_t *)pArgs->sourceLimit;
|
| + target=oldTarget=pArgs->target;
|
| + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
|
| + offsets=pArgs->offsets;
|
| +
|
| + /* sourceIndex=-1 if the current character began in the previous buffer */
|
| + sourceIndex=0;
|
| +
|
| + /*
|
| + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
|
| + * for the minimum of the sourceLength and targetCapacity
|
| + */
|
| + length=(int32_t)(sourceLimit-source);
|
| + if(length<targetCapacity) {
|
| + targetCapacity=length;
|
| + }
|
| +
|
| + if(targetCapacity>=8) {
|
| + /* This loop is unrolled for speed and improved pipelining. */
|
| + int32_t count, loops;
|
| + UChar oredChars;
|
| +
|
| + loops=count=targetCapacity>>3;
|
| + do {
|
| + oredChars=target[0]=source[0];
|
| + oredChars|=target[1]=source[1];
|
| + oredChars|=target[2]=source[2];
|
| + oredChars|=target[3]=source[3];
|
| + oredChars|=target[4]=source[4];
|
| + oredChars|=target[5]=source[5];
|
| + oredChars|=target[6]=source[6];
|
| + oredChars|=target[7]=source[7];
|
| +
|
| + /* were all 16 entries really valid? */
|
| + if(oredChars>0x7f) {
|
| + /* no, return to the first of these 16 */
|
| + break;
|
| + }
|
| + source+=8;
|
| + target+=8;
|
| + } while(--count>0);
|
| + count=loops-count;
|
| + targetCapacity-=count*8;
|
| +
|
| + if(offsets!=NULL) {
|
| + oldTarget+=count*8;
|
| + while(count>0) {
|
| + offsets[0]=sourceIndex++;
|
| + offsets[1]=sourceIndex++;
|
| + offsets[2]=sourceIndex++;
|
| + offsets[3]=sourceIndex++;
|
| + offsets[4]=sourceIndex++;
|
| + offsets[5]=sourceIndex++;
|
| + offsets[6]=sourceIndex++;
|
| + offsets[7]=sourceIndex++;
|
| + offsets+=8;
|
| + --count;
|
| + }
|
| + }
|
| + }
|
| +
|
| + /* conversion loop */
|
| + c=0;
|
| + while(targetCapacity>0 && (c=*source++)<=0x7f) {
|
| + *target++=c;
|
| + --targetCapacity;
|
| + }
|
| +
|
| + if(c>0x7f) {
|
| + /* callback(illegal); copy the current bytes to toUBytes[] */
|
| + UConverter *cnv=pArgs->converter;
|
| + cnv->toUBytes[0]=c;
|
| + cnv->toULength=1;
|
| + *pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
| + } else if(source<sourceLimit && target>=pArgs->targetLimit) {
|
| + /* target is full */
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + /* set offsets since the start */
|
| + if(offsets!=NULL) {
|
| + size_t count=target-oldTarget;
|
| + while(count>0) {
|
| + *offsets++=sourceIndex++;
|
| + --count;
|
| + }
|
| + }
|
| +
|
| + /* write back the updated pointers */
|
| + pArgs->source=(const char *)source;
|
| + pArgs->target=target;
|
| + pArgs->offsets=offsets;
|
| +}
|
| +
|
| +/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
|
| +static UChar32
|
| +_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
| + UErrorCode *pErrorCode) {
|
| + const uint8_t *source;
|
| + uint8_t b;
|
| +
|
| + source=(const uint8_t *)pArgs->source;
|
| + if(source<(const uint8_t *)pArgs->sourceLimit) {
|
| + b=*source++;
|
| + pArgs->source=(const char *)source;
|
| + if(b<=0x7f) {
|
| + return b;
|
| + } else {
|
| + UConverter *cnv=pArgs->converter;
|
| + cnv->toUBytes[0]=b;
|
| + cnv->toULength=1;
|
| + *pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
| + return 0xffff;
|
| + }
|
| + }
|
| +
|
| + /* no output because of empty input */
|
| + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| + return 0xffff;
|
| +}
|
| +
|
| +/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
|
| +static void
|
| +ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
| + UConverterToUnicodeArgs *pToUArgs,
|
| + UErrorCode *pErrorCode) {
|
| + const uint8_t *source, *sourceLimit;
|
| + uint8_t *target;
|
| + int32_t targetCapacity, length;
|
| +
|
| + uint8_t c;
|
| +
|
| + if(pToUArgs->converter->toUnicodeStatus!=0) {
|
| + /* no handling of partial UTF-8 characters here, fall back to pivoting */
|
| + *pErrorCode=U_USING_DEFAULT_WARNING;
|
| + return;
|
| + }
|
| +
|
| + /* set up the local pointers */
|
| + source=(const uint8_t *)pToUArgs->source;
|
| + sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
|
| + target=(uint8_t *)pFromUArgs->target;
|
| + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
| +
|
| + /*
|
| + * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
|
| + * for the minimum of the sourceLength and targetCapacity
|
| + */
|
| + length=(int32_t)(sourceLimit-source);
|
| + if(length<targetCapacity) {
|
| + targetCapacity=length;
|
| + }
|
| +
|
| + /* unroll the loop with the most common case */
|
| + if(targetCapacity>=16) {
|
| + int32_t count, loops;
|
| + uint8_t oredChars;
|
| +
|
| + loops=count=targetCapacity>>4;
|
| + do {
|
| + oredChars=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| + oredChars|=*target++=*source++;
|
| +
|
| + /* were all 16 entries really valid? */
|
| + if(oredChars>0x7f) {
|
| + /* no, return to the first of these 16 */
|
| + source-=16;
|
| + target-=16;
|
| + break;
|
| + }
|
| + } while(--count>0);
|
| + count=loops-count;
|
| + targetCapacity-=16*count;
|
| + }
|
| +
|
| + /* conversion loop */
|
| + c=0;
|
| + while(targetCapacity>0 && (c=*source)<=0x7f) {
|
| + ++source;
|
| + *target++=c;
|
| + --targetCapacity;
|
| + }
|
| +
|
| + if(c>0x7f) {
|
| + /* non-ASCII character, handle in standard converter */
|
| + *pErrorCode=U_USING_DEFAULT_WARNING;
|
| + } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
|
| + /* target is full */
|
| + *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + /* write back the updated pointers */
|
| + pToUArgs->source=(const char *)source;
|
| + pFromUArgs->target=(char *)target;
|
| +}
|
| +
|
| +static void
|
| +_ASCIIGetUnicodeSet(const UConverter *cnv,
|
| + const USetAdder *sa,
|
| + UConverterUnicodeSet which,
|
| + UErrorCode *pErrorCode) {
|
| + sa->addRange(sa->set, 0, 0x7f);
|
| +}
|
| +
|
| +static const UConverterImpl _ASCIIImpl={
|
| + UCNV_US_ASCII,
|
| +
|
| + NULL,
|
| + NULL,
|
| +
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| +
|
| + _ASCIIToUnicodeWithOffsets,
|
| + _ASCIIToUnicodeWithOffsets,
|
| + _Latin1FromUnicodeWithOffsets,
|
| + _Latin1FromUnicodeWithOffsets,
|
| + _ASCIIGetNextUChar,
|
| +
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| + NULL,
|
| + _ASCIIGetUnicodeSet,
|
| +
|
| + NULL,
|
| + ucnv_ASCIIFromUTF8
|
| +};
|
| +
|
| +static const UConverterStaticData _ASCIIStaticData={
|
| + sizeof(UConverterStaticData),
|
| + "US-ASCII",
|
| + 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
|
| + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
|
| + 0,
|
| + 0,
|
| + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
|
| +};
|
| +
|
| +const UConverterSharedData _ASCIIData={
|
| + sizeof(UConverterSharedData), ~((uint32_t) 0),
|
| + NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
|
| + 0
|
| +};
|
| +
|
| +#endif
|
|
|
| Property changes on: icu51/source/common/ucnvlat1.c
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|