Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Unified Diff: third_party/re2/re2/make_unicode_casefold.py

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/make_unicode_casefold.py
diff --git a/third_party/re2/re2/make_unicode_casefold.py b/third_party/re2/re2/make_unicode_casefold.py
new file mode 100755
index 0000000000000000000000000000000000000000..3375d2ef1bf90b2b554a50a07594110555f6341b
--- /dev/null
+++ b/third_party/re2/re2/make_unicode_casefold.py
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+# coding=utf-8
+#
+# Copyright 2008 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# See unicode_casefold.h for description of case folding tables.
+
+"""Generate C++ table for Unicode case folding."""
+
+import unicode, sys
+
+_header = """
+// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
+// make_unicode_casefold.py >unicode_casefold.cc
+
+#include "re2/unicode_casefold.h"
+
+namespace re2 {
+
+"""
+
+_trailer = """
+
+} // namespace re2
+
+"""
+
+def _Delta(a, b):
+ """Compute the delta for b - a. Even/odd and odd/even
+ are handled specially, as described above."""
+ if a+1 == b:
+ if a%2 == 0:
+ return 'EvenOdd'
+ else:
+ return 'OddEven'
+ if a == b+1:
+ if a%2 == 0:
+ return 'OddEven'
+ else:
+ return 'EvenOdd'
+ return b - a
+
+def _AddDelta(a, delta):
+ """Return a + delta, handling EvenOdd and OddEven specially."""
+ if type(delta) == int:
+ return a+delta
+ if delta == 'EvenOdd':
+ if a%2 == 0:
+ return a+1
+ else:
+ return a-1
+ if delta == 'OddEven':
+ if a%2 == 1:
+ return a+1
+ else:
+ return a-1
+ print >>sys.stderr, "Bad Delta: ", delta
+ raise "Bad Delta"
+
+def _MakeRanges(pairs):
+ """Turn a list like [(65,97), (66, 98), ..., (90,122)]
+ into [(65, 90, +32)]."""
+ ranges = []
+ last = -100
+
+ def evenodd(last, a, b, r):
+ if a != last+1 or b != _AddDelta(a, r[2]):
+ return False
+ r[1] = a
+ return True
+
+ def evenoddpair(last, a, b, r):
+ if a != last+2:
+ return False
+ delta = r[2]
+ d = delta
+ if type(delta) is not str:
+ return False
+ if delta.endswith('Skip'):
+ d = delta[:-4]
+ else:
+ delta = d + 'Skip'
+ if b != _AddDelta(a, d):
+ return False
+ r[1] = a
+ r[2] = delta
+ return True
+
+ for a, b in pairs:
+ if ranges and evenodd(last, a, b, ranges[-1]):
+ pass
+ elif ranges and evenoddpair(last, a, b, ranges[-1]):
+ pass
+ else:
+ ranges.append([a, a, _Delta(a, b)])
+ last = a
+ return ranges
+
+# The maximum size of a case-folding group.
+# Case folding is implemented in parse.cc by a recursive process
+# with a recursion depth equal to the size of the largest
+# case-folding group, so it is important that this bound be small.
+# The current tables have no group bigger than 4.
+# If there are ever groups bigger than 10 or so, it will be
+# time to rework the code in parse.cc.
+MaxCasefoldGroup = 4
+
+def main():
+ lowergroups, casegroups = unicode.CaseGroups()
+ foldpairs = []
+ seen = {}
+ for c in casegroups:
+ if len(c) > MaxCasefoldGroup:
+ raise unicode.Error("casefold group too long: %s" % (c,))
+ for i in range(len(c)):
+ if c[i-1] in seen:
+ raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))
+ seen[c[i-1]] = True
+ foldpairs.append([c[i-1], c[i]])
+
+ lowerpairs = []
+ for lower, group in lowergroups.iteritems():
+ for g in group:
+ if g != lower:
+ lowerpairs.append([g, lower])
+
+ def printpairs(name, foldpairs):
+ foldpairs.sort()
+ foldranges = _MakeRanges(foldpairs)
+ print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs), len(foldranges))
+ print "CaseFold unicode_%s[] = {" % (name,)
+ for lo, hi, delta in foldranges:
+ print "\t{ %d, %d, %s }," % (lo, hi, delta)
+ print "};"
+ print "int num_unicode_%s = %d;" % (name, len(foldranges),)
+ print ""
+
+ print _header
+ printpairs("casefold", foldpairs)
+ printpairs("tolower", lowerpairs)
+ print _trailer
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698