Index: third_party/hyphen/substrings.c |
diff --git a/third_party/hyphen/substrings.c b/third_party/hyphen/substrings.c |
deleted file mode 100644 |
index bd01afd56a6ed7845b07baa21c6d0645b8b5591e..0000000000000000000000000000000000000000 |
--- a/third_party/hyphen/substrings.c |
+++ /dev/null |
@@ -1,304 +0,0 @@ |
-/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both |
- * licenses follows. |
- */ |
- |
-/* LibHnj - a library for high quality hyphenation and justification |
- * Copyright (C) 1998 Raph Levien, |
- * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), |
- * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) |
- * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) |
- * |
- * This library is free software; you can redistribute it and/or |
- * modify it under the terms of the GNU Library General Public |
- * License as published by the Free Software Foundation; either |
- * version 2 of the License, or (at your option) any later version. |
- * |
- * This library is distributed in the hope that it will be useful, |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
- * Library General Public License for more details. |
- * |
- * You should have received a copy of the GNU Library General Public |
- * License along with this library; if not, write to the |
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
- * Boston, MA 02111-1307 USA. |
-*/ |
- |
-/* |
- * The contents of this file are subject to the Mozilla Public License |
- * Version 1.0 (the "MPL"); you may not use this file except in |
- * compliance with the MPL. You may obtain a copy of the MPL at |
- * http://www.mozilla.org/MPL/ |
- * |
- * Software distributed under the MPL is distributed on an "AS IS" basis, |
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL |
- * for the specific language governing rights and limitations under the |
- * MPL. |
- * |
- */ |
- |
-// |
-// A utility for finding substring embeddings in patterns |
- |
-#include <stdio.h> |
-#include <string.h> |
-#include <stdlib.h> |
- |
-#define MAXPATHS (256*1024) |
- |
-// |
-// |
-static void die( |
- const char*msg |
-) { |
- fprintf(stderr,"%s\n",msg); |
- exit(1); |
-} |
- |
- |
-// Finds the index of an entry, only used on xxx_key arrays |
-// Caveat: the table has to be sorted |
-static int find_in( |
- char *tab[], |
- int max, |
- const char *pat |
-) { |
- int left=0, right=max-1; |
- while (left <= right) { |
- int mid = ((right-left)/2)+left; |
- int v = strcmp(pat,tab[mid]); |
- if (v>0) { |
- left = mid + 1; |
- } else if (v<0) { |
- right = mid -1; |
- } else { |
- return mid; |
- } |
- } |
- return -1; |
-} |
- |
- |
-// used by partition (which is used by qsort_arr) |
-// |
-static void swap2( |
- char *a[], |
- char *b[], |
- int i, |
- int j |
-) { |
- if (i==j) return; |
- char*v; |
- v=a[i]; a[i]=a[j]; a[j]=v; |
- v=b[i]; b[i]=b[j]; b[j]=v; |
-} |
- |
- |
-// used by qsort_arr |
-// |
-static int partition( |
- char *a[], |
- char *b[], |
- int left, |
- int right, |
- int p |
-) { |
- const char *pivotValue = a[p]; |
- int i; |
- swap2(a,b,p,right); // Move pivot to end |
- p = left; |
- for (i=left; i<right; i++) { |
- if (strcmp(a[i],pivotValue)<=0) { |
- swap2(a,b,p,i); |
- p++; |
- } |
- } |
- swap2(a,b,right,p); // Move pivot to its final place |
- return p; |
-} |
- |
- |
-// |
-// |
-static void qsort_arr( |
- char *a[], |
- char *b[], |
- int left, |
- int right |
-) { |
- while (right > left) { |
- int p = left + (right-left)/2; //select a pivot |
- p = partition(a,b, left, right, p); |
- if ((p-1) - left < right - (p+1)) { |
- qsort_arr(a,b, left, p-1); |
- left = p+1; |
- } else { |
- qsort_arr(a,b, p+1, right); |
- right = p-1; |
- } |
- } |
-} |
- |
- |
-// Removes extra '0' entries from the string |
-// |
-static char* compact( |
- char *expr |
-) { |
- int l=strlen(expr); |
- int i,j; |
- for (i=0,j=0; i<l; i++) { |
- if (expr[i]!='0') expr[j++] = expr[i]; |
- } |
- expr[j]=0; |
- return expr; |
-} |
- |
- |
-// convert 'n1im' to 0n1i0m0 expressed as a string |
-// |
-static void expand( |
- char *expr, |
- const char *pat, |
- int l |
-) { |
- int el = 0; |
- char last = '.'; |
- int i; |
- for (i=0; i<l; i++) { |
- char c = pat[i]; |
- if ( (last<'0' || last>'9') |
- && (c <'0' || c >'9') |
- ) { |
- expr[el++] = '0'; |
- } |
- expr[el++] = c; |
- last = c; |
- } |
- if (last<'0' || last>'9') expr[el++] = '0'; |
- expr[el]=0; |
-} |
- |
- |
-// Combine two patterns, i.e. .ad4der + a2d becomes .a2d4der |
-// The second pattern needs to be a right side match of the first |
-// (modulo digits) |
-static char *combine( |
- char *expr, |
- const char *subexpr |
-) { |
- int l1 = strlen(expr); |
- int l2 = strlen(subexpr); |
- int off = l1-l2; |
- int j; |
- // this works also for utf8 sequences because the substring is identical |
- // to the last substring-length bytes of expr except for the (single byte) |
- // hyphenation encoders |
- for (j=0; j<l2; j++) { |
- if (subexpr[j]>expr[off+j]) { |
- expr[off+j] = subexpr[j]; |
- } |
- } |
- return expr; |
-} |
- |
- |
-// |
-// |
-int main(int argc, const char* argv[]) { |
- FILE *in, *out; |
- char *pattab_key[MAXPATHS]; |
- char *pattab_val[MAXPATHS]; |
- int patterns = 0; |
- char *newpattab_key[MAXPATHS]; |
- char *newpattab_val[MAXPATHS]; |
- int newpatterns = 0; |
- char format[132]; // 64+65+newline+zero+spare |
- int p; |
- if (argc!=3) die("Usage: <orig-file> <new-file>\n"); |
- if ((in = fopen(argv[1],"r"))==NULL) die("Could not read input"); |
- if ((out = fopen(argv[2],"w"))==NULL) die("Could not create output"); |
- // read all patterns and split in pure text (_key) & expanded patterns (_val) |
- while(fgets(format,132,in) != NULL) { |
- int l = strlen(format); |
- if (format[l-1]=='\n') { l--; format[l]=0; } // Chomp |
- if (format[0]=='%' || format[0]==0) { |
- // skip |
- } else { |
- if (format[l-1]=='%') { |
- l--; |
- format[l] = 0; // remove '%' |
- } |
- int i,j; |
- char *pat = (char*) malloc(l+1); |
- char *org = (char*) malloc(l*2+1); |
- if (pat==NULL || org==NULL) die("not enough memory"); |
- expand(org,format,l); |
- // remove hyphenation encoders (digits) from pat |
- for (i=0,j=0; i<l; i++) { |
- // odd, but utf-8 proof |
- char c = format[i]; |
- if (c<'0' || c>'9') pat[j++]=c; |
- } |
- pat[j]=0; |
- p = patterns; |
- pattab_key[patterns] = pat; |
- pattab_val[patterns++] = org; |
- if (patterns>MAXPATHS) die("to many base patterns"); |
- } |
- } |
- fclose(in); |
- // As we use binairy search, make sure it is sorted |
- qsort_arr(pattab_key,pattab_val,0,patterns-1); |
- |
- for (p=0; p<patterns; p++) { |
- char *pat = pattab_key[p]; |
- int patsize = strlen(pat); |
- int j,l; |
- for (l=1; l<=patsize; l++) { |
- for (j=1; j<=l; j++) { |
- int i = l-j; |
- int subpat_ndx; |
- char subpat[132]; |
- strncpy(subpat,pat+i,j); subpat[j]=0; |
- if ((subpat_ndx = find_in(pattab_key,patterns,subpat))>=0) { |
- int newpat_ndx; |
- char *newpat=malloc(l+1); |
- if (newpat==NULL) die("not enough memory"); |
- //printf("%s is embedded in %s\n",pattab_val[subpat_ndx],pattab_val[p]); |
- strncpy(newpat, pat+0,l); newpat[l]=0; |
- if ((newpat_ndx = find_in(newpattab_key,newpatterns,newpat))<0) { |
- char *neworg = malloc(132); // TODO: compute exact length |
- if (neworg==NULL) die("not enough memory"); |
- expand(neworg,newpat,l); |
- newpattab_key[newpatterns] = newpat; |
- newpattab_val[newpatterns++] = combine(neworg,pattab_val[subpat_ndx]); |
- if (newpatterns>MAXPATHS) die("to many new patterns"); |
- //printf("%*.*s|%*.*s[%s] (%s|%s) = %s\n",i,i,pat,j,j,pat+i,pat+i+j,pattab_val[p],pattab_val[subpat_ndx],neworg); |
- } else { |
- free(newpat); |
- newpattab_val[newpat_ndx] = combine( |
- newpattab_val[newpat_ndx], pattab_val[subpat_ndx] ); |
- } |
- } |
- } |
- } |
- } |
- |
- /* for some tiny extra speed, one could forget the free()s |
- * as the memory is freed anyway on exit(). |
- * However, the gain is minimal and now the code can be cleanly |
- * incorporated into other code */ |
- for (p=0; p<newpatterns; p++) { |
- fprintf(out,"%s\n",compact(newpattab_val[p])); |
- free(newpattab_key[p]); |
- free(newpattab_val[p]); |
- } |
- fclose(out); |
- |
- for (p=0; p<patterns; p++) { |
- free(pattab_key[p]); |
- free(pattab_val[p]); |
- } |
- return 0; |
-} |