Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Unified Diff: third_party/re2/re2/make_unicode_groups.py

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/make_unicode_casefold.py ('k') | third_party/re2/re2/mimics_pcre.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/make_unicode_groups.py
diff --git a/third_party/re2/re2/make_unicode_groups.py b/third_party/re2/re2/make_unicode_groups.py
new file mode 100755
index 0000000000000000000000000000000000000000..c2e25c1fc75ca0626c5092043feb1cd8cde31c94
--- /dev/null
+++ b/third_party/re2/re2/make_unicode_groups.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+# Copyright 2008 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Generate C++ tables for Unicode Script and Category groups."""
+
+import sys
+import unicode
+
+_header = """
+// GENERATED BY make_unicode_groups.py; DO NOT EDIT.
+// make_unicode_groups.py >unicode_groups.cc
+
+#include "re2/unicode_groups.h"
+
+namespace re2 {
+
+"""
+
+_trailer = """
+
+} // namespace re2
+
+"""
+
+n16 = 0
+n32 = 0
+
+def MakeRanges(codes):
+ """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]"""
+ ranges = []
+ last = -100
+ for c in codes:
+ if c == last+1:
+ ranges[-1][1] = c
+ else:
+ ranges.append([c, c])
+ last = c
+ return ranges
+
+def PrintRanges(type, name, ranges):
+ """Print the ranges as an array of type named name."""
+ print "static %s %s[] = {" % (type, name,)
+ for lo, hi in ranges:
+ print "\t{ %d, %d }," % (lo, hi)
+ print "};"
+
+# def PrintCodes(type, name, codes):
+# """Print the codes as an array of type named name."""
+# print "static %s %s[] = {" % (type, name,)
+# for c in codes:
+# print "\t%d," % (c,)
+# print "};"
+
+def PrintGroup(name, codes):
+ """Print the data structures for the group of codes.
+ Return a UGroup literal for the group."""
+
+ # See unicode_groups.h for a description of the data structure.
+
+ # Split codes into 16-bit ranges and 32-bit ranges.
+ range16 = MakeRanges([c for c in codes if c < 65536])
+ range32 = MakeRanges([c for c in codes if c >= 65536])
+
+ # Pull singleton ranges out of range16.
+ # code16 = [lo for lo, hi in range16 if lo == hi]
+ # range16 = [[lo, hi] for lo, hi in range16 if lo != hi]
+
+ global n16
+ global n32
+ n16 += len(range16)
+ n32 += len(range32)
+
+ ugroup = "{ \"%s\", +1" % (name,)
+ # if len(code16) > 0:
+ # PrintCodes("uint16", name+"_code16", code16)
+ # ugroup += ", %s_code16, %d" % (name, len(code16))
+ # else:
+ # ugroup += ", 0, 0"
+ if len(range16) > 0:
+ PrintRanges("URange16", name+"_range16", range16)
+ ugroup += ", %s_range16, %d" % (name, len(range16))
+ else:
+ ugroup += ", 0, 0"
+ if len(range32) > 0:
+ PrintRanges("URange32", name+"_range32", range32)
+ ugroup += ", %s_range32, %d" % (name, len(range32))
+ else:
+ ugroup += ", 0, 0"
+ ugroup += " }"
+ return ugroup
+
+def main():
+ print _header
+ ugroups = []
+ for name, codes in unicode.Categories().iteritems():
+ ugroups.append(PrintGroup(name, codes))
+ for name, codes in unicode.Scripts().iteritems():
+ ugroups.append(PrintGroup(name, codes))
+ print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32)
+ print "UGroup unicode_groups[] = {";
+ ugroups.sort()
+ for ug in ugroups:
+ print "\t%s," % (ug,)
+ print "};"
+ print "int num_unicode_groups = %d;" % (len(ugroups),)
+ print _trailer
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « third_party/re2/re2/make_unicode_casefold.py ('k') | third_party/re2/re2/mimics_pcre.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698