icu51/source/common/dictionarydata.cpp - Issue 20882002: Check in the pristine copy of ICU 51.2

Unified Diff: icu51/source/common/dictionarydata.cpp

Issue 20882002: Check in the pristine copy of ICU 51.2 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu51/source/common/dictionarydata.cpp

===================================================================

--- icu51/source/common/dictionarydata.cpp (revision 0)

+++ icu51/source/common/dictionarydata.cpp (revision 0)

@@ -0,0 +1,228 @@

+/*

+*******************************************************************************

+* dictionarydata.h

+* created on: 2012may31

+* created by: Markus W. Scherer & Maxime Serrano

+*/

+#include "dictionarydata.h"

+#include "unicode/ucharstrie.h"

+#include "unicode/bytestrie.h"

+#include "unicode/udata.h"

+#include "cmemory.h"

+#if !UCONFIG_NO_BREAK_ITERATION

+U_NAMESPACE_BEGIN

+#ifndef CYGWINMSVC /* On Cygwin/MSVC, the error redefinition of symbols occurs.*/

+const int32_t DictionaryData::TRIE_TYPE_BYTES;

+const int32_t DictionaryData::TRIE_TYPE_UCHARS;

+#endif

+DictionaryMatcher::~DictionaryMatcher() {

+UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {

+ udata_close(file);

+int32_t UCharsDictionaryMatcher::getType() const {

+ return DictionaryData::TRIE_TYPE_UCHARS;

+int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {

+ UCharsTrie uct(characters);

+ UChar32 c = utext_next32(text);

+ if (c < 0) {

+ return 0;

+ }

+ UStringTrieResult result = uct.first(c);

+ int32_t numChars = 1;

+ count = 0;

+ for (;;) {

+ if (USTRINGTRIE_HAS_VALUE(result)) {

+ if (count < limit) {

+ if (values != NULL) {

+ values[count] = uct.getValue();

+ }

+ lengths[count++] = numChars;

+ }

+ if (result == USTRINGTRIE_FINAL_VALUE) {

+ break;

+ }

+ else if (result == USTRINGTRIE_NO_MATCH) {

+ break;

+ }

+ // TODO: why do we have a text limit if the UText knows its length?

+ if (numChars >= maxLength) {

+ break;

+ }

+ c = utext_next32(text);

+ if (c < 0) {

+ break;

+ }

+ ++numChars;

+ result = uct.next(c);

+ }

+ return numChars;

+BytesDictionaryMatcher::~BytesDictionaryMatcher() {

+ udata_close(file);

+UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {

+ if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {

+ if (c == 0x200D) {

+ return 0xFF;

+ } else if (c == 0x200C) {

+ return 0xFE;

+ }

+ int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);

+ if (delta < 0 || 0xFD < delta) {

+ return U_SENTINEL;

+ }

+ return (UChar32)delta;

+ }

+ return c;

+int32_t BytesDictionaryMatcher::getType() const {

+ return DictionaryData::TRIE_TYPE_BYTES;

+int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {

+ BytesTrie bt(characters);

+ UChar32 c = utext_next32(text);

+ if (c < 0) {

+ return 0;

+ }

+ UStringTrieResult result = bt.first(transform(c));

+ int32_t numChars = 1;

+ count = 0;

+ for (;;) {

+ if (USTRINGTRIE_HAS_VALUE(result)) {

+ if (count < limit) {

+ if (values != NULL) {

+ values[count] = bt.getValue();

+ }

+ lengths[count++] = numChars;

+ }

+ if (result == USTRINGTRIE_FINAL_VALUE) {

+ break;

+ }

+ else if (result == USTRINGTRIE_NO_MATCH) {

+ break;

+ }

+ // TODO: why do we have a text limit if the UText knows its length?

+ if (numChars >= maxLength) {

+ break;

+ }

+ c = utext_next32(text);

+ if (c < 0) {

+ break;

+ }

+ ++numChars;

+ result = bt.next(transform(c));

+ }

+ return numChars;

+U_NAMESPACE_END

+U_NAMESPACE_USE

+U_CAPI int32_t U_EXPORT2

+udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,

+ void *outData, UErrorCode *pErrorCode) {

+ const UDataInfo *pInfo;

+ int32_t headerSize;

+ const uint8_t *inBytes;

+ uint8_t *outBytes;

+ const int32_t *inIndexes;

+ int32_t indexes[DictionaryData::IX_COUNT];

+ int32_t i, offset, size;

+ headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

+ if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;

+ pInfo = (const UDataInfo *)((const char *)inData + 4);

+ if (!(pInfo->dataFormat[0] == 0x44 &&

+ pInfo->dataFormat[1] == 0x69 &&

+ pInfo->dataFormat[2] == 0x63 &&

+ pInfo->dataFormat[3] == 0x74 &&

+ pInfo->formatVersion[0] == 1)) {

+ udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",

+ pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);

+ *pErrorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ inBytes = (const uint8_t *)inData + headerSize;

+ outBytes = (uint8_t *)outData + headerSize;

+ inIndexes = (const int32_t *)inBytes;

+ if (length >= 0) {

+ length -= headerSize;

+ if (length < (int32_t)(sizeof(indexes))) {

+ udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);

+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ for (i = 0; i < DictionaryData::IX_COUNT; i++) {

+ indexes[i] = udata_readInt32(ds, inIndexes[i]);

+ }

+ size = indexes[DictionaryData::IX_TOTAL_SIZE];

+ if (length >= 0) {

+ if (length < size) {

+ udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);

+ *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ if (inBytes != outBytes) {

+ uprv_memcpy(outBytes, inBytes, size);

+ }

+ offset = 0;

+ ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);

+ offset = (int32_t)sizeof(indexes);

+ int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;

+ int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];

+ if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {

+ ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);

+ } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {

+ // nothing to do

+ } else {

+ udata_printError(ds, "udict_swap(): unknown trie type!\n");

+ *pErrorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ // these next two sections are empty in the current format,

+ // but may be used later.

+ offset = nextOffset;

+ nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];

+ offset = nextOffset;

+ nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];

+ offset = nextOffset;

+ }

+ return headerSize + size;

+#endif

Property changes on: icu51/source/common/dictionarydata.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu51/source/common/dictionarydata.h ('k') | icu51/source/common/dtintrv.cpp » ('j') | no next file with comments »