| Index: icu51/source/common/unicode/stringtriebuilder.h
|
| ===================================================================
|
| --- icu51/source/common/unicode/stringtriebuilder.h (revision 0)
|
| +++ icu51/source/common/unicode/stringtriebuilder.h (revision 0)
|
| @@ -0,0 +1,400 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2010-2012, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* file name: stringtriebuilder.h
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2010dec24
|
| +* created by: Markus W. Scherer
|
| +*/
|
| +
|
| +#ifndef __STRINGTRIEBUILDER_H__
|
| +#define __STRINGTRIEBUILDER_H__
|
| +
|
| +#include "unicode/utypes.h"
|
| +#include "unicode/uobject.h"
|
| +
|
| +/**
|
| + * \file
|
| + * \brief C++ API: Builder API for trie builders
|
| + */
|
| +
|
| +// Forward declaration.
|
| +struct UHashtable;
|
| +typedef struct UHashtable UHashtable;
|
| +
|
| +/**
|
| + * Build options for BytesTrieBuilder and CharsTrieBuilder.
|
| + * @stable ICU 4.8
|
| + */
|
| +enum UStringTrieBuildOption {
|
| + /**
|
| + * Builds a trie quickly.
|
| + * @stable ICU 4.8
|
| + */
|
| + USTRINGTRIE_BUILD_FAST,
|
| + /**
|
| + * Builds a trie more slowly, attempting to generate
|
| + * a shorter but equivalent serialization.
|
| + * This build option also uses more memory.
|
| + *
|
| + * This option can be effective when many integer values are the same
|
| + * and string/byte sequence suffixes can be shared.
|
| + * Runtime speed is not expected to improve.
|
| + * @stable ICU 4.8
|
| + */
|
| + USTRINGTRIE_BUILD_SMALL
|
| +};
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +/**
|
| + * Base class for string trie builder classes.
|
| + *
|
| + * This class is not intended for public subclassing.
|
| + * @stable ICU 4.8
|
| + */
|
| +class U_COMMON_API StringTrieBuilder : public UObject {
|
| +public:
|
| +#ifndef U_HIDE_INTERNAL_API
|
| + /** @internal */
|
| + static UBool hashNode(const void *node);
|
| + /** @internal */
|
| + static UBool equalNodes(const void *left, const void *right);
|
| +#endif /* U_HIDE_INTERNAL_API */
|
| +
|
| +protected:
|
| + // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
|
| + // or else the compiler will create a public default constructor.
|
| + /** @internal */
|
| + StringTrieBuilder();
|
| + /** @internal */
|
| + virtual ~StringTrieBuilder();
|
| +
|
| +#ifndef U_HIDE_INTERNAL_API
|
| + /** @internal */
|
| + void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
|
| + /** @internal */
|
| + void deleteCompactBuilder();
|
| +
|
| + /** @internal */
|
| + void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
|
| +
|
| + /** @internal */
|
| + int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
|
| + /** @internal */
|
| + int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
|
| +#endif /* U_HIDE_INTERNAL_API */
|
| +
|
| + class Node;
|
| +
|
| +#ifndef U_HIDE_INTERNAL_API
|
| + /** @internal */
|
| + Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
|
| + /** @internal */
|
| + Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
|
| + int32_t length, UErrorCode &errorCode);
|
| +#endif /* U_HIDE_INTERNAL_API */
|
| +
|
| + /** @internal */
|
| + virtual int32_t getElementStringLength(int32_t i) const = 0;
|
| + /** @internal */
|
| + virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
|
| + /** @internal */
|
| + virtual int32_t getElementValue(int32_t i) const = 0;
|
| +
|
| + // Finds the first unit index after this one where
|
| + // the first and last element have different units again.
|
| + /** @internal */
|
| + virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
|
| +
|
| + // Number of different units at unitIndex.
|
| + /** @internal */
|
| + virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
|
| + /** @internal */
|
| + virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
|
| + /** @internal */
|
| + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
|
| +
|
| + /** @internal */
|
| + virtual UBool matchNodesCanHaveValues() const = 0;
|
| +
|
| + /** @internal */
|
| + virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
|
| + /** @internal */
|
| + virtual int32_t getMinLinearMatch() const = 0;
|
| + /** @internal */
|
| + virtual int32_t getMaxLinearMatchLength() const = 0;
|
| +
|
| +#ifndef U_HIDE_INTERNAL_API
|
| + // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
|
| + /** @internal */
|
| + static const int32_t kMaxBranchLinearSubNodeLength=5;
|
| +
|
| + // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.
|
| + // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
|
| + /** @internal */
|
| + static const int32_t kMaxSplitBranchLevels=14;
|
| +
|
| + /**
|
| + * Makes sure that there is only one unique node registered that is
|
| + * equivalent to newNode.
|
| + * @param newNode Input node. The builder takes ownership.
|
| + * @param errorCode ICU in/out UErrorCode.
|
| + Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
|
| + * @return newNode if it is the first of its kind, or
|
| + * an equivalent node if newNode is a duplicate.
|
| + * @internal
|
| + */
|
| + Node *registerNode(Node *newNode, UErrorCode &errorCode);
|
| + /**
|
| + * Makes sure that there is only one unique FinalValueNode registered
|
| + * with this value.
|
| + * Avoids creating a node if the value is a duplicate.
|
| + * @param value A final value.
|
| + * @param errorCode ICU in/out UErrorCode.
|
| + Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
|
| + * @return A FinalValueNode with the given value.
|
| + * @internal
|
| + */
|
| + Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
|
| +
|
| + /*
|
| + * C++ note:
|
| + * registerNode() and registerFinalValue() take ownership of their input nodes,
|
| + * and only return owned nodes.
|
| + * If they see a failure UErrorCode, they will delete the input node.
|
| + * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
|
| + * If there is a failure, they return NULL.
|
| + *
|
| + * NULL Node pointers can be safely passed into other Nodes because
|
| + * they call the static Node::hashCode() which checks for a NULL pointer first.
|
| + *
|
| + * Therefore, as long as builder functions register a new node,
|
| + * they need to check for failures only before explicitly dereferencing
|
| + * a Node pointer, or before setting a new UErrorCode.
|
| + */
|
| +
|
| + // Hash set of nodes, maps from nodes to integer 1.
|
| + /** @internal */
|
| + UHashtable *nodes;
|
| +
|
| + /** @internal */
|
| + class Node : public UObject {
|
| + public:
|
| + Node(int32_t initialHash) : hash(initialHash), offset(0) {}
|
| + inline int32_t hashCode() const { return hash; }
|
| + // Handles node==NULL.
|
| + static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
|
| + // Base class operator==() compares the actual class types.
|
| + virtual UBool operator==(const Node &other) const;
|
| + inline UBool operator!=(const Node &other) const { return !operator==(other); }
|
| + /**
|
| + * Traverses the Node graph and numbers branch edges, with rightmost edges first.
|
| + * This is to avoid writing a duplicate node twice.
|
| + *
|
| + * Branch nodes in this trie data structure are not symmetric.
|
| + * Most branch edges "jump" to other nodes but the rightmost branch edges
|
| + * just continue without a jump.
|
| + * Therefore, write() must write the rightmost branch edge last
|
| + * (trie units are written backwards), and must write it at that point even if
|
| + * it is a duplicate of a node previously written elsewhere.
|
| + *
|
| + * This function visits and marks right branch edges first.
|
| + * Edges are numbered with increasingly negative values because we share the
|
| + * offset field which gets positive values when nodes are written.
|
| + * A branch edge also remembers the first number for any of its edges.
|
| + *
|
| + * When a further-left branch edge has a number in the range of the rightmost
|
| + * edge's numbers, then it will be written as part of the required right edge
|
| + * and we can avoid writing it first.
|
| + *
|
| + * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
|
| + * edge numbers.
|
| + *
|
| + * @param edgeNumber The first edge number for this node and its sub-nodes.
|
| + * @return An edge number that is at least the maximum-negative
|
| + * of the input edge number and the numbers of this node and all of its sub-nodes.
|
| + */
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + // write() must set the offset to a positive value.
|
| + virtual void write(StringTrieBuilder &builder) = 0;
|
| + // See markRightEdgesFirst.
|
| + inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
|
| + StringTrieBuilder &builder) {
|
| + // Note: Edge numbers are negative, lastRight<=firstRight.
|
| + // If offset>0 then this node and its sub-nodes have been written already
|
| + // and we need not write them again.
|
| + // If this node is part of the unwritten right branch edge,
|
| + // then we wait until that is written.
|
| + if(offset<0 && (offset<lastRight || firstRight<offset)) {
|
| + write(builder);
|
| + }
|
| + }
|
| + inline int32_t getOffset() const { return offset; }
|
| + protected:
|
| + int32_t hash;
|
| + int32_t offset;
|
| + };
|
| +
|
| + // This class should not be overridden because
|
| + // registerFinalValue() compares a stack-allocated FinalValueNode
|
| + // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
|
| + // with the input node, and the
|
| + // !Node::operator==(other) used inside FinalValueNode::operator==(other)
|
| + // will be false if the typeid's are different.
|
| + /** @internal */
|
| + class FinalValueNode : public Node {
|
| + public:
|
| + FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual void write(StringTrieBuilder &builder);
|
| + protected:
|
| + int32_t value;
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class ValueNode : public Node {
|
| + public:
|
| + ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + void setValue(int32_t v) {
|
| + hasValue=TRUE;
|
| + value=v;
|
| + hash=hash*37+v;
|
| + }
|
| + protected:
|
| + UBool hasValue;
|
| + int32_t value;
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class IntermediateValueNode : public ValueNode {
|
| + public:
|
| + IntermediateValueNode(int32_t v, Node *nextNode)
|
| + : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + virtual void write(StringTrieBuilder &builder);
|
| + protected:
|
| + Node *next;
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class LinearMatchNode : public ValueNode {
|
| + public:
|
| + LinearMatchNode(int32_t len, Node *nextNode)
|
| + : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
|
| + length(len), next(nextNode) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + protected:
|
| + int32_t length;
|
| + Node *next;
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class BranchNode : public Node {
|
| + public:
|
| + BranchNode(int32_t initialHash) : Node(initialHash) {}
|
| + protected:
|
| + int32_t firstEdgeNumber;
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class ListBranchNode : public BranchNode {
|
| + public:
|
| + ListBranchNode() : BranchNode(0x444444), length(0) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + virtual void write(StringTrieBuilder &builder);
|
| + // Adds a unit with a final value.
|
| + void add(int32_t c, int32_t value) {
|
| + units[length]=(UChar)c;
|
| + equal[length]=NULL;
|
| + values[length]=value;
|
| + ++length;
|
| + hash=(hash*37+c)*37+value;
|
| + }
|
| + // Adds a unit which leads to another match node.
|
| + void add(int32_t c, Node *node) {
|
| + units[length]=(UChar)c;
|
| + equal[length]=node;
|
| + values[length]=0;
|
| + ++length;
|
| + hash=(hash*37+c)*37+hashCode(node);
|
| + }
|
| + protected:
|
| + Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
|
| + int32_t length;
|
| + int32_t values[kMaxBranchLinearSubNodeLength];
|
| + UChar units[kMaxBranchLinearSubNodeLength];
|
| + };
|
| +
|
| + /**
|
| + * @internal
|
| + */
|
| + class SplitBranchNode : public BranchNode {
|
| + public:
|
| + SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
|
| + : BranchNode(((0x555555*37+middleUnit)*37+
|
| + hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
|
| + unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + virtual void write(StringTrieBuilder &builder);
|
| + protected:
|
| + UChar unit;
|
| + Node *lessThan;
|
| + Node *greaterOrEqual;
|
| + };
|
| +
|
| + // Branch head node, for writing the actual node lead unit.
|
| + /** @internal */
|
| + class BranchHeadNode : public ValueNode {
|
| + public:
|
| + BranchHeadNode(int32_t len, Node *subNode)
|
| + : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
|
| + length(len), next(subNode) {}
|
| + virtual UBool operator==(const Node &other) const;
|
| + virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
| + virtual void write(StringTrieBuilder &builder);
|
| + protected:
|
| + int32_t length;
|
| + Node *next; // A branch sub-node.
|
| + };
|
| +#endif /* U_HIDE_INTERNAL_API */
|
| +
|
| + /** @internal */
|
| + virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
| + Node *nextNode) const = 0;
|
| +
|
| + /** @internal */
|
| + virtual int32_t write(int32_t unit) = 0;
|
| + /** @internal */
|
| + virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
|
| + /** @internal */
|
| + virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
|
| + /** @internal */
|
| + virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
|
| + /** @internal */
|
| + virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif // __STRINGTRIEBUILDER_H__
|
|
|
| Property changes on: icu51/source/common/unicode/stringtriebuilder.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|