Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(936)

Side by Side Diff: third_party/hyphen/substrings.c

Issue 20860003: Remove hyphenation code from Chromium. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/hyphen/ooopatch.sed ('k') | third_party/hyphen/substrings.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
2 * licenses follows.
3 */
4
5 /* LibHnj - a library for high quality hyphenation and justification
6 * Copyright (C) 1998 Raph Levien,
7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Library General Public License for more details.
20 *
21 * You should have received a copy of the GNU Library General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307 USA.
25 */
26
27 /*
28 * The contents of this file are subject to the Mozilla Public License
29 * Version 1.0 (the "MPL"); you may not use this file except in
30 * compliance with the MPL. You may obtain a copy of the MPL at
31 * http://www.mozilla.org/MPL/
32 *
33 * Software distributed under the MPL is distributed on an "AS IS" basis,
34 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
35 * for the specific language governing rights and limitations under the
36 * MPL.
37 *
38 */
39
40 //
41 // A utility for finding substring embeddings in patterns
42
43 #include <stdio.h>
44 #include <string.h>
45 #include <stdlib.h>
46
47 #define MAXPATHS (256*1024)
48
49 //
50 //
51 static void die(
52 const char*msg
53 ) {
54 fprintf(stderr,"%s\n",msg);
55 exit(1);
56 }
57
58
59 // Finds the index of an entry, only used on xxx_key arrays
60 // Caveat: the table has to be sorted
61 static int find_in(
62 char *tab[],
63 int max,
64 const char *pat
65 ) {
66 int left=0, right=max-1;
67 while (left <= right) {
68 int mid = ((right-left)/2)+left;
69 int v = strcmp(pat,tab[mid]);
70 if (v>0) {
71 left = mid + 1;
72 } else if (v<0) {
73 right = mid -1;
74 } else {
75 return mid;
76 }
77 }
78 return -1;
79 }
80
81
82 // used by partition (which is used by qsort_arr)
83 //
84 static void swap2(
85 char *a[],
86 char *b[],
87 int i,
88 int j
89 ) {
90 if (i==j) return;
91 char*v;
92 v=a[i]; a[i]=a[j]; a[j]=v;
93 v=b[i]; b[i]=b[j]; b[j]=v;
94 }
95
96
97 // used by qsort_arr
98 //
99 static int partition(
100 char *a[],
101 char *b[],
102 int left,
103 int right,
104 int p
105 ) {
106 const char *pivotValue = a[p];
107 int i;
108 swap2(a,b,p,right); // Move pivot to end
109 p = left;
110 for (i=left; i<right; i++) {
111 if (strcmp(a[i],pivotValue)<=0) {
112 swap2(a,b,p,i);
113 p++;
114 }
115 }
116 swap2(a,b,right,p); // Move pivot to its final place
117 return p;
118 }
119
120
121 //
122 //
123 static void qsort_arr(
124 char *a[],
125 char *b[],
126 int left,
127 int right
128 ) {
129 while (right > left) {
130 int p = left + (right-left)/2; //select a pivot
131 p = partition(a,b, left, right, p);
132 if ((p-1) - left < right - (p+1)) {
133 qsort_arr(a,b, left, p-1);
134 left = p+1;
135 } else {
136 qsort_arr(a,b, p+1, right);
137 right = p-1;
138 }
139 }
140 }
141
142
143 // Removes extra '0' entries from the string
144 //
145 static char* compact(
146 char *expr
147 ) {
148 int l=strlen(expr);
149 int i,j;
150 for (i=0,j=0; i<l; i++) {
151 if (expr[i]!='0') expr[j++] = expr[i];
152 }
153 expr[j]=0;
154 return expr;
155 }
156
157
158 // convert 'n1im' to 0n1i0m0 expressed as a string
159 //
160 static void expand(
161 char *expr,
162 const char *pat,
163 int l
164 ) {
165 int el = 0;
166 char last = '.';
167 int i;
168 for (i=0; i<l; i++) {
169 char c = pat[i];
170 if ( (last<'0' || last>'9')
171 && (c <'0' || c >'9')
172 ) {
173 expr[el++] = '0';
174 }
175 expr[el++] = c;
176 last = c;
177 }
178 if (last<'0' || last>'9') expr[el++] = '0';
179 expr[el]=0;
180 }
181
182
183 // Combine two patterns, i.e. .ad4der + a2d becomes .a2d4der
184 // The second pattern needs to be a right side match of the first
185 // (modulo digits)
186 static char *combine(
187 char *expr,
188 const char *subexpr
189 ) {
190 int l1 = strlen(expr);
191 int l2 = strlen(subexpr);
192 int off = l1-l2;
193 int j;
194 // this works also for utf8 sequences because the substring is identical
195 // to the last substring-length bytes of expr except for the (single byte)
196 // hyphenation encoders
197 for (j=0; j<l2; j++) {
198 if (subexpr[j]>expr[off+j]) {
199 expr[off+j] = subexpr[j];
200 }
201 }
202 return expr;
203 }
204
205
206 //
207 //
208 int main(int argc, const char* argv[]) {
209 FILE *in, *out;
210 char *pattab_key[MAXPATHS];
211 char *pattab_val[MAXPATHS];
212 int patterns = 0;
213 char *newpattab_key[MAXPATHS];
214 char *newpattab_val[MAXPATHS];
215 int newpatterns = 0;
216 char format[132]; // 64+65+newline+zero+spare
217 int p;
218 if (argc!=3) die("Usage: <orig-file> <new-file>\n");
219 if ((in = fopen(argv[1],"r"))==NULL) die("Could not read input");
220 if ((out = fopen(argv[2],"w"))==NULL) die("Could not create output");
221 // read all patterns and split in pure text (_key) & expanded patterns (_val)
222 while(fgets(format,132,in) != NULL) {
223 int l = strlen(format);
224 if (format[l-1]=='\n') { l--; format[l]=0; } // Chomp
225 if (format[0]=='%' || format[0]==0) {
226 // skip
227 } else {
228 if (format[l-1]=='%') {
229 l--;
230 format[l] = 0; // remove '%'
231 }
232 int i,j;
233 char *pat = (char*) malloc(l+1);
234 char *org = (char*) malloc(l*2+1);
235 if (pat==NULL || org==NULL) die("not enough memory");
236 expand(org,format,l);
237 // remove hyphenation encoders (digits) from pat
238 for (i=0,j=0; i<l; i++) {
239 // odd, but utf-8 proof
240 char c = format[i];
241 if (c<'0' || c>'9') pat[j++]=c;
242 }
243 pat[j]=0;
244 p = patterns;
245 pattab_key[patterns] = pat;
246 pattab_val[patterns++] = org;
247 if (patterns>MAXPATHS) die("to many base patterns");
248 }
249 }
250 fclose(in);
251 // As we use binairy search, make sure it is sorted
252 qsort_arr(pattab_key,pattab_val,0,patterns-1);
253
254 for (p=0; p<patterns; p++) {
255 char *pat = pattab_key[p];
256 int patsize = strlen(pat);
257 int j,l;
258 for (l=1; l<=patsize; l++) {
259 for (j=1; j<=l; j++) {
260 int i = l-j;
261 int subpat_ndx;
262 char subpat[132];
263 strncpy(subpat,pat+i,j); subpat[j]=0;
264 if ((subpat_ndx = find_in(pattab_key,patterns,subpat))>=0) {
265 int newpat_ndx;
266 char *newpat=malloc(l+1);
267 if (newpat==NULL) die("not enough memory");
268 //printf("%s is embedded in %s\n",pattab_val[subpat_ndx],pattab_val[p]);
269 strncpy(newpat, pat+0,l); newpat[l]=0;
270 if ((newpat_ndx = find_in(newpattab_key,newpatterns,newpat))<0) {
271 char *neworg = malloc(132); // TODO: compute exact length
272 if (neworg==NULL) die("not enough memory");
273 expand(neworg,newpat,l);
274 newpattab_key[newpatterns] = newpat;
275 newpattab_val[newpatterns++] = combine(neworg,pattab_val[subpat_ndx] );
276 if (newpatterns>MAXPATHS) die("to many new patterns");
277 //printf("%*.*s|%*.*s[%s] (%s|%s) = %s\n",i,i,pat,j,j,pat+i,pat+i+j,pattab_v al[p],pattab_val[subpat_ndx],neworg);
278 } else {
279 free(newpat);
280 newpattab_val[newpat_ndx] = combine(
281 newpattab_val[newpat_ndx], pattab_val[subpat_ndx] );
282 }
283 }
284 }
285 }
286 }
287
288 /* for some tiny extra speed, one could forget the free()s
289 * as the memory is freed anyway on exit().
290 * However, the gain is minimal and now the code can be cleanly
291 * incorporated into other code */
292 for (p=0; p<newpatterns; p++) {
293 fprintf(out,"%s\n",compact(newpattab_val[p]));
294 free(newpattab_key[p]);
295 free(newpattab_val[p]);
296 }
297 fclose(out);
298
299 for (p=0; p<patterns; p++) {
300 free(pattab_key[p]);
301 free(pattab_val[p]);
302 }
303 return 0;
304 }
OLDNEW
« no previous file with comments | « third_party/hyphen/ooopatch.sed ('k') | third_party/hyphen/substrings.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698