OLD | NEW |
| (Empty) |
1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both | |
2 * licenses follows. | |
3 */ | |
4 | |
5 /* LibHnj - a library for high quality hyphenation and justification | |
6 * Copyright (C) 1998 Raph Levien, | |
7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), | |
8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) | |
9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) | |
10 * | |
11 * This library is free software; you can redistribute it and/or | |
12 * modify it under the terms of the GNU Library General Public | |
13 * License as published by the Free Software Foundation; either | |
14 * version 2 of the License, or (at your option) any later version. | |
15 * | |
16 * This library is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 * Library General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU Library General Public | |
22 * License along with this library; if not, write to the | |
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 * Boston, MA 02111-1307 USA. | |
25 */ | |
26 | |
27 /* | |
28 * The contents of this file are subject to the Mozilla Public License | |
29 * Version 1.0 (the "MPL"); you may not use this file except in | |
30 * compliance with the MPL. You may obtain a copy of the MPL at | |
31 * http://www.mozilla.org/MPL/ | |
32 * | |
33 * Software distributed under the MPL is distributed on an "AS IS" basis, | |
34 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL | |
35 * for the specific language governing rights and limitations under the | |
36 * MPL. | |
37 * | |
38 */ | |
39 | |
40 #include <string.h> | |
41 #include <stdlib.h> | |
42 #include <stdio.h> | |
43 #include <ctype.h> | |
44 | |
45 #include "hyphen.h" | |
46 | |
47 #define BUFSIZE 1000 | |
48 | |
49 void help() { | |
50 fprintf(stderr,"correct syntax is:\n"); | |
51 fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_c
heck\n"); | |
52 fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n"
); | |
53 fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n
"); | |
54 } | |
55 | |
56 /* get a pointer to the nth 8-bit or UTF-8 character of the word */ | |
57 char * hindex(char * word, int n, int utf8) { | |
58 int j = 0; | |
59 while (j < n) { | |
60 j++; | |
61 word++; | |
62 while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++; | |
63 } | |
64 return word; | |
65 } | |
66 | |
67 /* list possible hyphenations with -dd option (example for the usage of the hyph
enate2() function) */ | |
68 void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int
* cut, int utf8) { | |
69 int i, k, j = 0; | |
70 char r; | |
71 for (i = 0; (i + 1) < strlen(word); i++) { | |
72 if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue; | |
73 if ((hyphen[j] & 1)) { | |
74 if (rep && rep[j]) { | |
75 k = hindex(word, j - pos[j] + 1, utf8) - word; | |
76 r = word[k]; | |
77 word[k] = 0; | |
78 printf(" - %s%s", word, rep[j]); | |
79 word[k] = r; | |
80 printf("%s\n", hindex(word + k, cut[j], utf8)); | |
81 } else { | |
82 k = hindex(word, j + 1, utf8) - word; | |
83 r = word[k]; | |
84 word[k] = 0; | |
85 printf(" - %s=", word); | |
86 word[k] = r; | |
87 printf("%s\n", word + k); | |
88 } | |
89 } | |
90 j++; | |
91 } | |
92 } | |
93 | |
94 int | |
95 main(int argc, char** argv) | |
96 { | |
97 | |
98 HyphenDict *dict; | |
99 int df; | |
100 int wtc; | |
101 FILE* wtclst; | |
102 int k, n, i, j, c; | |
103 char buf[BUFSIZE + 1]; | |
104 int nHyphCount; | |
105 char *hyphens; | |
106 char *lcword; | |
107 char *hyphword; | |
108 char hword[BUFSIZE * 2]; | |
109 int arg = 1; | |
110 int optd = 1; | |
111 int optdd = 0; | |
112 char ** rep; | |
113 int * pos; | |
114 int * cut; | |
115 | |
116 /* first parse the command line options */ | |
117 /* arg1 - hyphen dictionary file, arg2 - file of words to check */ | |
118 | |
119 if (argv[arg]) { | |
120 if (strcmp(argv[arg], "-o") == 0) { | |
121 optd = 0; | |
122 arg++; | |
123 } | |
124 if (argv[arg] && strcmp(argv[arg], "-d") == 0) { | |
125 optd = 1; | |
126 optdd = 1; | |
127 arg++; | |
128 } | |
129 } | |
130 | |
131 if (argv[arg]) { | |
132 df = arg++; | |
133 } else { | |
134 help(); | |
135 exit(1); | |
136 } | |
137 | |
138 if (argv[arg]) { | |
139 wtc = arg++; | |
140 } else { | |
141 help(); | |
142 exit(1); | |
143 } | |
144 | |
145 /* load the hyphenation dictionary */ | |
146 if ((dict = hnj_hyphen_load(argv[df])) == NULL) { | |
147 fprintf(stderr, "Couldn't find file %s\n", argv[df]); | |
148 fflush(stderr); | |
149 exit(1); | |
150 } | |
151 | |
152 /* open the words to check list */ | |
153 wtclst = fopen(argv[wtc],"r"); | |
154 if (!wtclst) { | |
155 fprintf(stderr,"Error - could not open file of words to check\n"); | |
156 exit(1); | |
157 } | |
158 | |
159 | |
160 /* now read each word from the wtc file */ | |
161 while(fgets(buf,BUFSIZE,wtclst) != NULL) { | |
162 k = strlen(buf); | |
163 if (buf[k - 1] == '\n') buf[k - 1] = '\0'; | |
164 if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0'; | |
165 | |
166 /* set aside some buffers to hold lower cased */ | |
167 /* and hyphen information */ | |
168 lcword = (char *) malloc(k+1); | |
169 hyphens = (char *)malloc(k+5); | |
170 /* basic ascii lower-case, not suitable for real-world usage*/ | |
171 for (i = 0; i < k; ++i) | |
172 lcword[i] = tolower(buf[i]); | |
173 | |
174 /* first remove any trailing periods */ | |
175 n = k-1; | |
176 while((n >=0) && (lcword[n] == '.')) n--; | |
177 n++; | |
178 | |
179 /* now actually try to hyphenate the word */ | |
180 | |
181 rep = NULL; | |
182 pos = NULL; | |
183 cut = NULL; | |
184 hword[0] = '\0'; | |
185 | |
186 if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) || | |
187 (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &r
ep, &pos, &cut))) { | |
188 free(hyphens); | |
189 free(lcword); | |
190 fprintf(stderr, "hyphenation error\n"); | |
191 exit(1); | |
192 } | |
193 | |
194 if (!optd) { | |
195 /* now backfill hyphens[] for any removed periods */ | |
196 for (c = n; c < k; c++) hyphens[c] = '0'; | |
197 hyphens[k] = '\0'; | |
198 | |
199 /* now create a new char string showing hyphenation positions */ | |
200 /* count the hyphens and allocate space for the new hypehanted string *
/ | |
201 nHyphCount = 0; | |
202 for (i = 0; i < n; i++) | |
203 if (hyphens[i]&1) | |
204 nHyphCount++; | |
205 hyphword = (char *) malloc(k+1+nHyphCount); | |
206 j = 0; | |
207 for (i = 0; i < n; i++) { | |
208 hyphword[j++] = buf[i]; | |
209 if (hyphens[i]&1) { | |
210 hyphword[j++] = '-'; | |
211 } | |
212 } | |
213 hyphword[j] = '\0'; | |
214 fprintf(stdout,"%s\n",hyphword); | |
215 fflush(stdout); | |
216 free(hyphword); | |
217 } else { | |
218 /* fprintf(stderr, "vasz: %s", hyphens); */ | |
219 fprintf(stdout,"%s\n", hword); | |
220 | |
221 | |
222 if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->ut
f8); | |
223 if (rep) { | |
224 for (i = 0; i < n - 1; i++) { | |
225 if (rep[i]) free(rep[i]); | |
226 } | |
227 free(rep); | |
228 free(pos); | |
229 free(cut); | |
230 } | |
231 } | |
232 free(hyphens); | |
233 free(lcword); | |
234 } | |
235 | |
236 fclose(wtclst); | |
237 hnj_hyphen_free(dict); | |
238 return 0; | |
239 } | |
OLD | NEW |