Index: third_party/re2/re2/make_unicode_casefold.py |
diff --git a/third_party/re2/re2/make_unicode_casefold.py b/third_party/re2/re2/make_unicode_casefold.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..3375d2ef1bf90b2b554a50a07594110555f6341b |
--- /dev/null |
+++ b/third_party/re2/re2/make_unicode_casefold.py |
@@ -0,0 +1,146 @@ |
+#!/usr/bin/python |
+# coding=utf-8 |
+# |
+# Copyright 2008 The RE2 Authors. All Rights Reserved. |
+# Use of this source code is governed by a BSD-style |
+# license that can be found in the LICENSE file. |
+ |
+# See unicode_casefold.h for description of case folding tables. |
+ |
+"""Generate C++ table for Unicode case folding.""" |
+ |
+import unicode, sys |
+ |
+_header = """ |
+// GENERATED BY make_unicode_casefold.py; DO NOT EDIT. |
+// make_unicode_casefold.py >unicode_casefold.cc |
+ |
+#include "re2/unicode_casefold.h" |
+ |
+namespace re2 { |
+ |
+""" |
+ |
+_trailer = """ |
+ |
+} // namespace re2 |
+ |
+""" |
+ |
+def _Delta(a, b): |
+ """Compute the delta for b - a. Even/odd and odd/even |
+ are handled specially, as described above.""" |
+ if a+1 == b: |
+ if a%2 == 0: |
+ return 'EvenOdd' |
+ else: |
+ return 'OddEven' |
+ if a == b+1: |
+ if a%2 == 0: |
+ return 'OddEven' |
+ else: |
+ return 'EvenOdd' |
+ return b - a |
+ |
+def _AddDelta(a, delta): |
+ """Return a + delta, handling EvenOdd and OddEven specially.""" |
+ if type(delta) == int: |
+ return a+delta |
+ if delta == 'EvenOdd': |
+ if a%2 == 0: |
+ return a+1 |
+ else: |
+ return a-1 |
+ if delta == 'OddEven': |
+ if a%2 == 1: |
+ return a+1 |
+ else: |
+ return a-1 |
+ print >>sys.stderr, "Bad Delta: ", delta |
+ raise "Bad Delta" |
+ |
+def _MakeRanges(pairs): |
+ """Turn a list like [(65,97), (66, 98), ..., (90,122)] |
+ into [(65, 90, +32)].""" |
+ ranges = [] |
+ last = -100 |
+ |
+ def evenodd(last, a, b, r): |
+ if a != last+1 or b != _AddDelta(a, r[2]): |
+ return False |
+ r[1] = a |
+ return True |
+ |
+ def evenoddpair(last, a, b, r): |
+ if a != last+2: |
+ return False |
+ delta = r[2] |
+ d = delta |
+ if type(delta) is not str: |
+ return False |
+ if delta.endswith('Skip'): |
+ d = delta[:-4] |
+ else: |
+ delta = d + 'Skip' |
+ if b != _AddDelta(a, d): |
+ return False |
+ r[1] = a |
+ r[2] = delta |
+ return True |
+ |
+ for a, b in pairs: |
+ if ranges and evenodd(last, a, b, ranges[-1]): |
+ pass |
+ elif ranges and evenoddpair(last, a, b, ranges[-1]): |
+ pass |
+ else: |
+ ranges.append([a, a, _Delta(a, b)]) |
+ last = a |
+ return ranges |
+ |
+# The maximum size of a case-folding group. |
+# Case folding is implemented in parse.cc by a recursive process |
+# with a recursion depth equal to the size of the largest |
+# case-folding group, so it is important that this bound be small. |
+# The current tables have no group bigger than 4. |
+# If there are ever groups bigger than 10 or so, it will be |
+# time to rework the code in parse.cc. |
+MaxCasefoldGroup = 4 |
+ |
+def main(): |
+ lowergroups, casegroups = unicode.CaseGroups() |
+ foldpairs = [] |
+ seen = {} |
+ for c in casegroups: |
+ if len(c) > MaxCasefoldGroup: |
+ raise unicode.Error("casefold group too long: %s" % (c,)) |
+ for i in range(len(c)): |
+ if c[i-1] in seen: |
+ raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i])) |
+ seen[c[i-1]] = True |
+ foldpairs.append([c[i-1], c[i]]) |
+ |
+ lowerpairs = [] |
+ for lower, group in lowergroups.iteritems(): |
+ for g in group: |
+ if g != lower: |
+ lowerpairs.append([g, lower]) |
+ |
+ def printpairs(name, foldpairs): |
+ foldpairs.sort() |
+ foldranges = _MakeRanges(foldpairs) |
+ print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs), len(foldranges)) |
+ print "CaseFold unicode_%s[] = {" % (name,) |
+ for lo, hi, delta in foldranges: |
+ print "\t{ %d, %d, %s }," % (lo, hi, delta) |
+ print "};" |
+ print "int num_unicode_%s = %d;" % (name, len(foldranges),) |
+ print "" |
+ |
+ print _header |
+ printpairs("casefold", foldpairs) |
+ printpairs("tolower", lowerpairs) |
+ print _trailer |
+ |
+if __name__ == '__main__': |
+ main() |