Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(500)

Unified Diff: third_party/hyphen/substrings.c

Issue 20860003: Remove hyphenation code from Chromium. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/hyphen/ooopatch.sed ('k') | third_party/hyphen/substrings.pl » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/hyphen/substrings.c
diff --git a/third_party/hyphen/substrings.c b/third_party/hyphen/substrings.c
deleted file mode 100644
index bd01afd56a6ed7845b07baa21c6d0645b8b5591e..0000000000000000000000000000000000000000
--- a/third_party/hyphen/substrings.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
- * licenses follows.
- */
-
-/* LibHnj - a library for high quality hyphenation and justification
- * Copyright (C) 1998 Raph Levien,
- * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
- * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
- * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307 USA.
-*/
-
-/*
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.0 (the "MPL"); you may not use this file except in
- * compliance with the MPL. You may obtain a copy of the MPL at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the MPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
- * for the specific language governing rights and limitations under the
- * MPL.
- *
- */
-
-//
-// A utility for finding substring embeddings in patterns
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#define MAXPATHS (256*1024)
-
-//
-//
-static void die(
- const char*msg
-) {
- fprintf(stderr,"%s\n",msg);
- exit(1);
-}
-
-
-// Finds the index of an entry, only used on xxx_key arrays
-// Caveat: the table has to be sorted
-static int find_in(
- char *tab[],
- int max,
- const char *pat
-) {
- int left=0, right=max-1;
- while (left <= right) {
- int mid = ((right-left)/2)+left;
- int v = strcmp(pat,tab[mid]);
- if (v>0) {
- left = mid + 1;
- } else if (v<0) {
- right = mid -1;
- } else {
- return mid;
- }
- }
- return -1;
-}
-
-
-// used by partition (which is used by qsort_arr)
-//
-static void swap2(
- char *a[],
- char *b[],
- int i,
- int j
-) {
- if (i==j) return;
- char*v;
- v=a[i]; a[i]=a[j]; a[j]=v;
- v=b[i]; b[i]=b[j]; b[j]=v;
-}
-
-
-// used by qsort_arr
-//
-static int partition(
- char *a[],
- char *b[],
- int left,
- int right,
- int p
-) {
- const char *pivotValue = a[p];
- int i;
- swap2(a,b,p,right); // Move pivot to end
- p = left;
- for (i=left; i<right; i++) {
- if (strcmp(a[i],pivotValue)<=0) {
- swap2(a,b,p,i);
- p++;
- }
- }
- swap2(a,b,right,p); // Move pivot to its final place
- return p;
-}
-
-
-//
-//
-static void qsort_arr(
- char *a[],
- char *b[],
- int left,
- int right
-) {
- while (right > left) {
- int p = left + (right-left)/2; //select a pivot
- p = partition(a,b, left, right, p);
- if ((p-1) - left < right - (p+1)) {
- qsort_arr(a,b, left, p-1);
- left = p+1;
- } else {
- qsort_arr(a,b, p+1, right);
- right = p-1;
- }
- }
-}
-
-
-// Removes extra '0' entries from the string
-//
-static char* compact(
- char *expr
-) {
- int l=strlen(expr);
- int i,j;
- for (i=0,j=0; i<l; i++) {
- if (expr[i]!='0') expr[j++] = expr[i];
- }
- expr[j]=0;
- return expr;
-}
-
-
-// convert 'n1im' to 0n1i0m0 expressed as a string
-//
-static void expand(
- char *expr,
- const char *pat,
- int l
-) {
- int el = 0;
- char last = '.';
- int i;
- for (i=0; i<l; i++) {
- char c = pat[i];
- if ( (last<'0' || last>'9')
- && (c <'0' || c >'9')
- ) {
- expr[el++] = '0';
- }
- expr[el++] = c;
- last = c;
- }
- if (last<'0' || last>'9') expr[el++] = '0';
- expr[el]=0;
-}
-
-
-// Combine two patterns, i.e. .ad4der + a2d becomes .a2d4der
-// The second pattern needs to be a right side match of the first
-// (modulo digits)
-static char *combine(
- char *expr,
- const char *subexpr
-) {
- int l1 = strlen(expr);
- int l2 = strlen(subexpr);
- int off = l1-l2;
- int j;
- // this works also for utf8 sequences because the substring is identical
- // to the last substring-length bytes of expr except for the (single byte)
- // hyphenation encoders
- for (j=0; j<l2; j++) {
- if (subexpr[j]>expr[off+j]) {
- expr[off+j] = subexpr[j];
- }
- }
- return expr;
-}
-
-
-//
-//
-int main(int argc, const char* argv[]) {
- FILE *in, *out;
- char *pattab_key[MAXPATHS];
- char *pattab_val[MAXPATHS];
- int patterns = 0;
- char *newpattab_key[MAXPATHS];
- char *newpattab_val[MAXPATHS];
- int newpatterns = 0;
- char format[132]; // 64+65+newline+zero+spare
- int p;
- if (argc!=3) die("Usage: <orig-file> <new-file>\n");
- if ((in = fopen(argv[1],"r"))==NULL) die("Could not read input");
- if ((out = fopen(argv[2],"w"))==NULL) die("Could not create output");
- // read all patterns and split in pure text (_key) & expanded patterns (_val)
- while(fgets(format,132,in) != NULL) {
- int l = strlen(format);
- if (format[l-1]=='\n') { l--; format[l]=0; } // Chomp
- if (format[0]=='%' || format[0]==0) {
- // skip
- } else {
- if (format[l-1]=='%') {
- l--;
- format[l] = 0; // remove '%'
- }
- int i,j;
- char *pat = (char*) malloc(l+1);
- char *org = (char*) malloc(l*2+1);
- if (pat==NULL || org==NULL) die("not enough memory");
- expand(org,format,l);
- // remove hyphenation encoders (digits) from pat
- for (i=0,j=0; i<l; i++) {
- // odd, but utf-8 proof
- char c = format[i];
- if (c<'0' || c>'9') pat[j++]=c;
- }
- pat[j]=0;
- p = patterns;
- pattab_key[patterns] = pat;
- pattab_val[patterns++] = org;
- if (patterns>MAXPATHS) die("to many base patterns");
- }
- }
- fclose(in);
- // As we use binairy search, make sure it is sorted
- qsort_arr(pattab_key,pattab_val,0,patterns-1);
-
- for (p=0; p<patterns; p++) {
- char *pat = pattab_key[p];
- int patsize = strlen(pat);
- int j,l;
- for (l=1; l<=patsize; l++) {
- for (j=1; j<=l; j++) {
- int i = l-j;
- int subpat_ndx;
- char subpat[132];
- strncpy(subpat,pat+i,j); subpat[j]=0;
- if ((subpat_ndx = find_in(pattab_key,patterns,subpat))>=0) {
- int newpat_ndx;
- char *newpat=malloc(l+1);
- if (newpat==NULL) die("not enough memory");
- //printf("%s is embedded in %s\n",pattab_val[subpat_ndx],pattab_val[p]);
- strncpy(newpat, pat+0,l); newpat[l]=0;
- if ((newpat_ndx = find_in(newpattab_key,newpatterns,newpat))<0) {
- char *neworg = malloc(132); // TODO: compute exact length
- if (neworg==NULL) die("not enough memory");
- expand(neworg,newpat,l);
- newpattab_key[newpatterns] = newpat;
- newpattab_val[newpatterns++] = combine(neworg,pattab_val[subpat_ndx]);
- if (newpatterns>MAXPATHS) die("to many new patterns");
- //printf("%*.*s|%*.*s[%s] (%s|%s) = %s\n",i,i,pat,j,j,pat+i,pat+i+j,pattab_val[p],pattab_val[subpat_ndx],neworg);
- } else {
- free(newpat);
- newpattab_val[newpat_ndx] = combine(
- newpattab_val[newpat_ndx], pattab_val[subpat_ndx] );
- }
- }
- }
- }
- }
-
- /* for some tiny extra speed, one could forget the free()s
- * as the memory is freed anyway on exit().
- * However, the gain is minimal and now the code can be cleanly
- * incorporated into other code */
- for (p=0; p<newpatterns; p++) {
- fprintf(out,"%s\n",compact(newpattab_val[p]));
- free(newpattab_key[p]);
- free(newpattab_val[p]);
- }
- fclose(out);
-
- for (p=0; p<patterns; p++) {
- free(pattab_key[p]);
- free(pattab_val[p]);
- }
- return 0;
-}
« no previous file with comments | « third_party/hyphen/ooopatch.sed ('k') | third_party/hyphen/substrings.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698