Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: third_party/re2/re2/make_unicode_casefold.py

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # coding=utf-8
3 #
4 # Copyright 2008 The RE2 Authors. All Rights Reserved.
5 # Use of this source code is governed by a BSD-style
6 # license that can be found in the LICENSE file.
7
8 # See unicode_casefold.h for description of case folding tables.
9
10 """Generate C++ table for Unicode case folding."""
11
12 import unicode, sys
13
14 _header = """
15 // GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
16 // make_unicode_casefold.py >unicode_casefold.cc
17
18 #include "re2/unicode_casefold.h"
19
20 namespace re2 {
21
22 """
23
24 _trailer = """
25
26 } // namespace re2
27
28 """
29
30 def _Delta(a, b):
31 """Compute the delta for b - a. Even/odd and odd/even
32 are handled specially, as described above."""
33 if a+1 == b:
34 if a%2 == 0:
35 return 'EvenOdd'
36 else:
37 return 'OddEven'
38 if a == b+1:
39 if a%2 == 0:
40 return 'OddEven'
41 else:
42 return 'EvenOdd'
43 return b - a
44
45 def _AddDelta(a, delta):
46 """Return a + delta, handling EvenOdd and OddEven specially."""
47 if type(delta) == int:
48 return a+delta
49 if delta == 'EvenOdd':
50 if a%2 == 0:
51 return a+1
52 else:
53 return a-1
54 if delta == 'OddEven':
55 if a%2 == 1:
56 return a+1
57 else:
58 return a-1
59 print >>sys.stderr, "Bad Delta: ", delta
60 raise "Bad Delta"
61
62 def _MakeRanges(pairs):
63 """Turn a list like [(65,97), (66, 98), ..., (90,122)]
64 into [(65, 90, +32)]."""
65 ranges = []
66 last = -100
67
68 def evenodd(last, a, b, r):
69 if a != last+1 or b != _AddDelta(a, r[2]):
70 return False
71 r[1] = a
72 return True
73
74 def evenoddpair(last, a, b, r):
75 if a != last+2:
76 return False
77 delta = r[2]
78 d = delta
79 if type(delta) is not str:
80 return False
81 if delta.endswith('Skip'):
82 d = delta[:-4]
83 else:
84 delta = d + 'Skip'
85 if b != _AddDelta(a, d):
86 return False
87 r[1] = a
88 r[2] = delta
89 return True
90
91 for a, b in pairs:
92 if ranges and evenodd(last, a, b, ranges[-1]):
93 pass
94 elif ranges and evenoddpair(last, a, b, ranges[-1]):
95 pass
96 else:
97 ranges.append([a, a, _Delta(a, b)])
98 last = a
99 return ranges
100
101 # The maximum size of a case-folding group.
102 # Case folding is implemented in parse.cc by a recursive process
103 # with a recursion depth equal to the size of the largest
104 # case-folding group, so it is important that this bound be small.
105 # The current tables have no group bigger than 4.
106 # If there are ever groups bigger than 10 or so, it will be
107 # time to rework the code in parse.cc.
108 MaxCasefoldGroup = 4
109
110 def main():
111 lowergroups, casegroups = unicode.CaseGroups()
112 foldpairs = []
113 seen = {}
114 for c in casegroups:
115 if len(c) > MaxCasefoldGroup:
116 raise unicode.Error("casefold group too long: %s" % (c,))
117 for i in range(len(c)):
118 if c[i-1] in seen:
119 raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))
120 seen[c[i-1]] = True
121 foldpairs.append([c[i-1], c[i]])
122
123 lowerpairs = []
124 for lower, group in lowergroups.iteritems():
125 for g in group:
126 if g != lower:
127 lowerpairs.append([g, lower])
128
129 def printpairs(name, foldpairs):
130 foldpairs.sort()
131 foldranges = _MakeRanges(foldpairs)
132 print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs) , len(foldranges))
133 print "CaseFold unicode_%s[] = {" % (name,)
134 for lo, hi, delta in foldranges:
135 print "\t{ %d, %d, %s }," % (lo, hi, delta)
136 print "};"
137 print "int num_unicode_%s = %d;" % (name, len(foldranges),)
138 print ""
139
140 print _header
141 printpairs("casefold", foldpairs)
142 printpairs("tolower", lowerpairs)
143 print _trailer
144
145 if __name__ == '__main__':
146 main()
OLDNEW
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698