| Index: third_party/re2/re2/unicode_casefold.h
|
| diff --git a/third_party/re2/re2/unicode_casefold.h b/third_party/re2/re2/unicode_casefold.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..160b07ea6ad07017dfd87a9fa2160330b0975c33
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/unicode_casefold.h
|
| @@ -0,0 +1,75 @@
|
| +// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| +// Use of this source code is governed by a BSD-style
|
| +// license that can be found in the LICENSE file.
|
| +
|
| +// Unicode case folding tables.
|
| +
|
| +// The Unicode case folding tables encode the mapping from one Unicode point
|
| +// to the next largest Unicode point with equivalent folding. The largest
|
| +// point wraps back to the first. For example, the tables map:
|
| +//
|
| +// 'A' -> 'a'
|
| +// 'a' -> 'A'
|
| +//
|
| +// 'K' -> 'k'
|
| +// 'k' -> 'K' (Kelvin symbol)
|
| +// 'K' -> 'K'
|
| +//
|
| +// Like everything Unicode, these tables are big. If we represent the table
|
| +// as a sorted list of uint32 pairs, it has 2049 entries and is 16 kB.
|
| +// Most table entries look like the ones around them:
|
| +// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc.
|
| +// Instead of listing all the pairs explicitly, we make a list of ranges
|
| +// and deltas, so that the table entries for 'A' through 'Z' can be represented
|
| +// as a single entry { 'A', 'Z', +32 }.
|
| +//
|
| +// In addition to blocks that map to each other (A-Z mapping to a-z)
|
| +// there are blocks of pairs that individually map to each other
|
| +// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...).
|
| +// For those, the special delta value EvenOdd marks even/odd pairs
|
| +// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs.
|
| +//
|
| +// In this form, the table has 274 entries, about 3kB. If we were to split
|
| +// the table into one for 16-bit codes and an overflow table for larger ones,
|
| +// we could get it down to about 1.5kB, but that's not worth the complexity.
|
| +//
|
| +// The grouped form also allows for efficient fold range calculations
|
| +// rather than looping one character at a time.
|
| +
|
| +#ifndef RE2_UNICODE_CASEFOLD_H__
|
| +#define RE2_UNICODE_CASEFOLD_H__
|
| +
|
| +#include "util/util.h"
|
| +
|
| +namespace re2 {
|
| +
|
| +enum {
|
| + EvenOdd = 1,
|
| + OddEven = -1,
|
| + EvenOddSkip = 1<<30,
|
| + OddEvenSkip,
|
| +};
|
| +
|
| +struct CaseFold {
|
| + uint32 lo;
|
| + uint32 hi;
|
| + int32 delta;
|
| +};
|
| +
|
| +extern CaseFold unicode_casefold[];
|
| +extern int num_unicode_casefold;
|
| +
|
| +extern CaseFold unicode_tolower[];
|
| +extern int num_unicode_tolower;
|
| +
|
| +// Returns the CaseFold* in the tables that contains rune.
|
| +// If rune is not in the tables, returns the first CaseFold* after rune.
|
| +// If rune is larger than any value in the tables, returns NULL.
|
| +extern CaseFold* LookupCaseFold(CaseFold*, int, Rune rune);
|
| +
|
| +// Returns the result of applying the fold f to the rune r.
|
| +extern Rune ApplyFold(CaseFold *f, Rune r);
|
| +
|
| +} // namespace re2
|
| +
|
| +#endif // RE2_UNICODE_CASEFOLD_H__
|
|
|