| OLD | NEW |
| (Empty) |
| 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both | |
| 2 * licenses follows. | |
| 3 */ | |
| 4 | |
| 5 /* LibHnj - a library for high quality hyphenation and justification | |
| 6 * Copyright (C) 1998 Raph Levien, | |
| 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), | |
| 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) | |
| 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) | |
| 10 * | |
| 11 * This library is free software; you can redistribute it and/or | |
| 12 * modify it under the terms of the GNU Library General Public | |
| 13 * License as published by the Free Software Foundation; either | |
| 14 * version 2 of the License, or (at your option) any later version. | |
| 15 * | |
| 16 * This library is distributed in the hope that it will be useful, | |
| 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 19 * Library General Public License for more details. | |
| 20 * | |
| 21 * You should have received a copy of the GNU Library General Public | |
| 22 * License along with this library; if not, write to the | |
| 23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 24 * Boston, MA 02111-1307 USA. | |
| 25 */ | |
| 26 | |
| 27 /* | |
| 28 * The contents of this file are subject to the Mozilla Public License | |
| 29 * Version 1.0 (the "MPL"); you may not use this file except in | |
| 30 * compliance with the MPL. You may obtain a copy of the MPL at | |
| 31 * http://www.mozilla.org/MPL/ | |
| 32 * | |
| 33 * Software distributed under the MPL is distributed on an "AS IS" basis, | |
| 34 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL | |
| 35 * for the specific language governing rights and limitations under the | |
| 36 * MPL. | |
| 37 * | |
| 38 */ | |
| 39 #include <stdlib.h> /* for NULL, malloc */ | |
| 40 #include <stdio.h> /* for fprintf */ | |
| 41 #include <string.h> /* for strdup */ | |
| 42 | |
| 43 #ifdef UNX | |
| 44 #include <unistd.h> /* for exit */ | |
| 45 #endif | |
| 46 | |
| 47 #define noVERBOSE | |
| 48 | |
| 49 /* calculate hyphenmin values with long ligature length (2 or 3 characters | |
| 50 * instead of 1 or 2) for comparison with hyphenation without ligatures */ | |
| 51 #define noLONG_LIGATURE | |
| 52 | |
| 53 #ifdef LONG_LIGATURE | |
| 54 #define LIG_xx 1 | |
| 55 #define LIG_xxx 2 | |
| 56 #else | |
| 57 #define LIG_xx 0 | |
| 58 #define LIG_xxx 1 | |
| 59 #endif | |
| 60 | |
| 61 #include "hnjalloc.h" | |
| 62 #include "hyphen.h" | |
| 63 | |
| 64 static char * | |
| 65 hnj_strdup (const char *s) | |
| 66 { | |
| 67 char *new; | |
| 68 int l; | |
| 69 | |
| 70 l = strlen (s); | |
| 71 new = hnj_malloc (l + 1); | |
| 72 memcpy (new, s, l); | |
| 73 new[l] = 0; | |
| 74 return new; | |
| 75 } | |
| 76 | |
| 77 /* remove cross-platform text line end characters */ | |
| 78 void hnj_strchomp(char * s) | |
| 79 { | |
| 80 int k = strlen(s); | |
| 81 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; | |
| 82 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; | |
| 83 } | |
| 84 | |
| 85 /* a little bit of a hash table implementation. This simply maps strings | |
| 86 to state numbers */ | |
| 87 | |
| 88 typedef struct _HashTab HashTab; | |
| 89 typedef struct _HashEntry HashEntry; | |
| 90 | |
| 91 /* A cheap, but effective, hack. */ | |
| 92 #define HASH_SIZE 31627 | |
| 93 | |
| 94 struct _HashTab { | |
| 95 HashEntry *entries[HASH_SIZE]; | |
| 96 }; | |
| 97 | |
| 98 struct _HashEntry { | |
| 99 HashEntry *next; | |
| 100 char *key; | |
| 101 int val; | |
| 102 }; | |
| 103 | |
| 104 /* a char* hash function from ASU - adapted from Gtk+ */ | |
| 105 static unsigned int | |
| 106 hnj_string_hash (const char *s) | |
| 107 { | |
| 108 const char *p; | |
| 109 unsigned int h=0, g; | |
| 110 for(p = s; *p != '\0'; p += 1) { | |
| 111 h = ( h << 4 ) + *p; | |
| 112 if ( ( g = h & 0xf0000000 ) ) { | |
| 113 h = h ^ (g >> 24); | |
| 114 h = h ^ g; | |
| 115 } | |
| 116 } | |
| 117 return h /* % M */; | |
| 118 } | |
| 119 | |
| 120 static HashTab * | |
| 121 hnj_hash_new (void) | |
| 122 { | |
| 123 HashTab *hashtab; | |
| 124 int i; | |
| 125 | |
| 126 hashtab = hnj_malloc (sizeof(HashTab)); | |
| 127 for (i = 0; i < HASH_SIZE; i++) | |
| 128 hashtab->entries[i] = NULL; | |
| 129 | |
| 130 return hashtab; | |
| 131 } | |
| 132 | |
| 133 static void | |
| 134 hnj_hash_free (HashTab *hashtab) | |
| 135 { | |
| 136 int i; | |
| 137 HashEntry *e, *next; | |
| 138 | |
| 139 for (i = 0; i < HASH_SIZE; i++) | |
| 140 for (e = hashtab->entries[i]; e; e = next) | |
| 141 { | |
| 142 next = e->next; | |
| 143 hnj_free (e->key); | |
| 144 hnj_free (e); | |
| 145 } | |
| 146 | |
| 147 hnj_free (hashtab); | |
| 148 } | |
| 149 | |
| 150 /* assumes that key is not already present! */ | |
| 151 static void | |
| 152 hnj_hash_insert (HashTab *hashtab, const char *key, int val) | |
| 153 { | |
| 154 int i; | |
| 155 HashEntry *e; | |
| 156 | |
| 157 i = hnj_string_hash (key) % HASH_SIZE; | |
| 158 e = hnj_malloc (sizeof(HashEntry)); | |
| 159 e->next = hashtab->entries[i]; | |
| 160 e->key = hnj_strdup (key); | |
| 161 e->val = val; | |
| 162 hashtab->entries[i] = e; | |
| 163 } | |
| 164 | |
| 165 /* return val if found, otherwise -1 */ | |
| 166 static int | |
| 167 hnj_hash_lookup (HashTab *hashtab, const char *key) | |
| 168 { | |
| 169 int i; | |
| 170 HashEntry *e; | |
| 171 i = hnj_string_hash (key) % HASH_SIZE; | |
| 172 for (e = hashtab->entries[i]; e; e = e->next) | |
| 173 if (!strcmp (key, e->key)) | |
| 174 return e->val; | |
| 175 return -1; | |
| 176 } | |
| 177 | |
| 178 /* Get the state number, allocating a new state if necessary. */ | |
| 179 static int | |
| 180 hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string) | |
| 181 { | |
| 182 int state_num; | |
| 183 | |
| 184 state_num = hnj_hash_lookup (hashtab, string); | |
| 185 | |
| 186 if (state_num >= 0) | |
| 187 return state_num; | |
| 188 | |
| 189 hnj_hash_insert (hashtab, string, dict->num_states); | |
| 190 /* predicate is true if dict->num_states is a power of two */ | |
| 191 if (!(dict->num_states & (dict->num_states - 1))) | |
| 192 { | |
| 193 dict->states = hnj_realloc (dict->states, | |
| 194 (dict->num_states << 1) * | |
| 195 sizeof(HyphenState)); | |
| 196 } | |
| 197 dict->states[dict->num_states].match = NULL; | |
| 198 dict->states[dict->num_states].repl = NULL; | |
| 199 dict->states[dict->num_states].fallback_state = -1; | |
| 200 dict->states[dict->num_states].num_trans = 0; | |
| 201 dict->states[dict->num_states].trans = NULL; | |
| 202 return dict->num_states++; | |
| 203 } | |
| 204 | |
| 205 /* add a transition from state1 to state2 through ch - assumes that the | |
| 206 transition does not already exist */ | |
| 207 static void | |
| 208 hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch) | |
| 209 { | |
| 210 int num_trans; | |
| 211 | |
| 212 num_trans = dict->states[state1].num_trans; | |
| 213 if (num_trans == 0) | |
| 214 { | |
| 215 dict->states[state1].trans = hnj_malloc (sizeof(HyphenTrans)); | |
| 216 } | |
| 217 else if (!(num_trans & (num_trans - 1))) | |
| 218 { | |
| 219 dict->states[state1].trans = hnj_realloc (dict->states[state1].trans, | |
| 220 (num_trans << 1) * | |
| 221 sizeof(HyphenTrans)); | |
| 222 } | |
| 223 dict->states[state1].trans[num_trans].ch = ch; | |
| 224 dict->states[state1].trans[num_trans].new_state = state2; | |
| 225 dict->states[state1].num_trans++; | |
| 226 } | |
| 227 | |
| 228 #ifdef VERBOSE | |
| 229 HashTab *global; | |
| 230 | |
| 231 static char * | |
| 232 get_state_str (int state) | |
| 233 { | |
| 234 int i; | |
| 235 HashEntry *e; | |
| 236 | |
| 237 for (i = 0; i < HASH_SIZE; i++) | |
| 238 for (e = global->entries[i]; e; e = e->next) | |
| 239 if (e->val == state) | |
| 240 return e->key; | |
| 241 return NULL; | |
| 242 } | |
| 243 #endif | |
| 244 | |
| 245 HyphenDict * | |
| 246 hnj_hyphen_load (const char *fn) | |
| 247 { | |
| 248 HyphenDict *result; | |
| 249 FILE *f; | |
| 250 f = fopen (fn, "r"); | |
| 251 if (f == NULL) | |
| 252 return NULL; | |
| 253 | |
| 254 result = hnj_hyphen_load_file(f); | |
| 255 | |
| 256 fclose(f); | |
| 257 return result; | |
| 258 } | |
| 259 | |
| 260 HyphenDict * | |
| 261 hnj_hyphen_load_file (FILE *f) | |
| 262 { | |
| 263 HyphenDict *dict[2]; | |
| 264 HashTab *hashtab; | |
| 265 char buf[MAX_CHARS]; | |
| 266 char word[MAX_CHARS]; | |
| 267 char pattern[MAX_CHARS]; | |
| 268 char * repl; | |
| 269 signed char replindex; | |
| 270 signed char replcut; | |
| 271 int state_num = 0, last_state; | |
| 272 int i, j, k; | |
| 273 char ch; | |
| 274 int found; | |
| 275 HashEntry *e; | |
| 276 int nextlevel = 0; | |
| 277 | |
| 278 // loading one or two dictionaries (separated by NEXTLEVEL keyword) | |
| 279 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { | |
| 280 hashtab = hnj_hash_new (); | |
| 281 #ifdef VERBOSE | |
| 282 global = hashtab; | |
| 283 #endif | |
| 284 hnj_hash_insert (hashtab, "", 0); | |
| 285 dict[k] = hnj_malloc (sizeof(HyphenDict)); | |
| 286 dict[k]->num_states = 1; | |
| 287 dict[k]->states = hnj_malloc (sizeof(HyphenState)); | |
| 288 dict[k]->states[0].match = NULL; | |
| 289 dict[k]->states[0].repl = NULL; | |
| 290 dict[k]->states[0].fallback_state = -1; | |
| 291 dict[k]->states[0].num_trans = 0; | |
| 292 dict[k]->states[0].trans = NULL; | |
| 293 dict[k]->nextlevel = NULL; | |
| 294 dict[k]->lhmin = 0; | |
| 295 dict[k]->rhmin = 0; | |
| 296 dict[k]->clhmin = 0; | |
| 297 dict[k]->crhmin = 0; | |
| 298 | |
| 299 /* read in character set info */ | |
| 300 if (k == 0) { | |
| 301 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; | |
| 302 if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { | |
| 303 for (i=0;i<MAX_NAME;i++) | |
| 304 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) | |
| 305 dict[k]->cset[i] = 0; | |
| 306 } else { | |
| 307 dict[k]->cset[0] = 0; | |
| 308 } | |
| 309 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); | |
| 310 } else { | |
| 311 strcpy(dict[k]->cset, dict[0]->cset); | |
| 312 dict[k]->utf8 = dict[0]->utf8; | |
| 313 } | |
| 314 | |
| 315 while (fgets (buf, sizeof(buf), f) != NULL) | |
| 316 { | |
| 317 if (buf[0] != '%') | |
| 318 { | |
| 319 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { | |
| 320 nextlevel = 1; | |
| 321 break; | |
| 322 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { | |
| 323 dict[k]->lhmin = atoi(buf + 13); | |
| 324 continue; | |
| 325 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { | |
| 326 dict[k]->rhmin = atoi(buf + 14); | |
| 327 continue; | |
| 328 } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { | |
| 329 dict[k]->clhmin = atoi(buf + 21); | |
| 330 continue; | |
| 331 } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { | |
| 332 dict[k]->crhmin = atoi(buf + 22); | |
| 333 continue; | |
| 334 } | |
| 335 j = 0; | |
| 336 pattern[j] = '0'; | |
| 337 repl = strchr(buf, '/'); | |
| 338 replindex = 0; | |
| 339 replcut = 0; | |
| 340 if (repl) { | |
| 341 char * index = strchr(repl + 1, ','); | |
| 342 *repl = '\0'; | |
| 343 if (index) { | |
| 344 char * index2 = strchr(index + 1, ','); | |
| 345 *index = '\0'; | |
| 346 if (index2) { | |
| 347 *index2 = '\0'; | |
| 348 replindex = (signed char) atoi(index + 1) - 1; | |
| 349 replcut = (signed char) atoi(index2 + 1); | |
| 350 } | |
| 351 } else { | |
| 352 hnj_strchomp(repl + 1); | |
| 353 replindex = 0; | |
| 354 replcut = (signed char) strlen(buf); | |
| 355 } | |
| 356 repl = hnj_strdup(repl + 1); | |
| 357 } | |
| 358 for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++) | |
| 359 { | |
| 360 if (buf[i] >= '0' && buf[i] <= '9') | |
| 361 pattern[j] = buf[i]; | |
| 362 else | |
| 363 { | |
| 364 word[j] = buf[i]; | |
| 365 pattern[++j] = '0'; | |
| 366 } | |
| 367 } | |
| 368 word[j] = '\0'; | |
| 369 pattern[j + 1] = '\0'; | |
| 370 | |
| 371 i = 0; | |
| 372 if (!repl) { | |
| 373 /* Optimize away leading zeroes */ | |
| 374 for (; pattern[i] == '0'; i++); | |
| 375 } else { | |
| 376 if (*word == '.') i++; | |
| 377 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ | |
| 378 if (dict[k]->utf8) { | |
| 379 int pu = -1; /* unicode character position */ | |
| 380 int ps = -1; /* unicode start position (original replinde
x) */ | |
| 381 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ | |
| 382 for (; pc < (strlen(word) + 1); pc++) { | |
| 383 /* beginning of an UTF-8 character (not '10' start bits) */ | |
| 384 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; | |
| 385 if ((ps < 0) && (replindex == pu)) { | |
| 386 ps = replindex; | |
| 387 replindex = (signed char) pc; | |
| 388 } | |
| 389 if ((ps >= 0) && ((pu - ps) == replcut)) { | |
| 390 replcut = (signed char) (pc - replindex); | |
| 391 break; | |
| 392 } | |
| 393 } | |
| 394 if (*word == '.') replindex--; | |
| 395 } | |
| 396 } | |
| 397 | |
| 398 #ifdef VERBOSE | |
| 399 printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j
, repl); | |
| 400 #endif | |
| 401 found = hnj_hash_lookup (hashtab, word); | |
| 402 state_num = hnj_get_state (dict[k], hashtab, word); | |
| 403 dict[k]->states[state_num].match = hnj_strdup (pattern + i); | |
| 404 dict[k]->states[state_num].repl = repl; | |
| 405 dict[k]->states[state_num].replindex = replindex; | |
| 406 if (!replcut) { | |
| 407 dict[k]->states[state_num].replcut = (signed char) strlen(word); | |
| 408 } else { | |
| 409 dict[k]->states[state_num].replcut = replcut; | |
| 410 } | |
| 411 | |
| 412 /* now, put in the prefix transitions */ | |
| 413 for (; found < 0 ;j--) | |
| 414 { | |
| 415 last_state = state_num; | |
| 416 ch = word[j - 1]; | |
| 417 word[j - 1] = '\0'; | |
| 418 found = hnj_hash_lookup (hashtab, word); | |
| 419 state_num = hnj_get_state (dict[k], hashtab, word); | |
| 420 hnj_add_trans (dict[k], state_num, last_state, ch); | |
| 421 } | |
| 422 } | |
| 423 } | |
| 424 | |
| 425 /* Could do unioning of matches here (instead of the preprocessor script). | |
| 426 If we did, the pseudocode would look something like this: | |
| 427 | |
| 428 foreach state in the hash table | |
| 429 foreach i = [1..length(state) - 1] | |
| 430 state to check is substr (state, i) | |
| 431 look it up | |
| 432 if found, and if there is a match, union the match in. | |
| 433 | |
| 434 It's also possible to avoid the quadratic blowup by doing the | |
| 435 search in order of increasing state string sizes - then you | |
| 436 can break the loop after finding the first match. | |
| 437 | |
| 438 This step should be optional in any case - if there is a | |
| 439 preprocessed rule table, it's always faster to use that. | |
| 440 | |
| 441 */ | |
| 442 | |
| 443 /* put in the fallback states */ | |
| 444 for (i = 0; i < HASH_SIZE; i++) | |
| 445 for (e = hashtab->entries[i]; e; e = e->next) | |
| 446 { | |
| 447 if (*(e->key)) for (j = 1; 1; j++) | |
| 448 { | |
| 449 state_num = hnj_hash_lookup (hashtab, e->key + j); | |
| 450 if (state_num >= 0) | |
| 451 break; | |
| 452 } | |
| 453 /* KBH: FIXME state 0 fallback_state should always be -1? */ | |
| 454 if (e->val) | |
| 455 dict[k]->states[e->val].fallback_state = state_num; | |
| 456 } | |
| 457 #ifdef VERBOSE | |
| 458 for (i = 0; i < HASH_SIZE; i++) | |
| 459 for (e = hashtab->entries[i]; e; e = e->next) | |
| 460 { | |
| 461 printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val, | |
| 462 dict[k]->states[e->val].fallback_state); | |
| 463 for (j = 0; j < dict[k]->states[e->val].num_trans; j++) | |
| 464 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, | |
| 465 dict[k]->states[e->val].trans[j].new_state); | |
| 466 } | |
| 467 #endif | |
| 468 | |
| 469 #ifndef VERBOSE | |
| 470 hnj_hash_free (hashtab); | |
| 471 #endif | |
| 472 state_num = 0; | |
| 473 } | |
| 474 if (k == 2) dict[0]->nextlevel = dict[1]; | |
| 475 return dict[0]; | |
| 476 } | |
| 477 | |
| 478 void hnj_hyphen_free (HyphenDict *dict) | |
| 479 { | |
| 480 int state_num; | |
| 481 HyphenState *hstate; | |
| 482 | |
| 483 for (state_num = 0; state_num < dict->num_states; state_num++) | |
| 484 { | |
| 485 hstate = &dict->states[state_num]; | |
| 486 if (hstate->match) | |
| 487 hnj_free (hstate->match); | |
| 488 if (hstate->repl) | |
| 489 hnj_free (hstate->repl); | |
| 490 if (hstate->trans) | |
| 491 hnj_free (hstate->trans); | |
| 492 } | |
| 493 if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel); | |
| 494 | |
| 495 hnj_free (dict->states); | |
| 496 | |
| 497 hnj_free (dict); | |
| 498 } | |
| 499 | |
| 500 #define MAX_WORD 256 | |
| 501 | |
| 502 int hnj_hyphen_hyphenate (HyphenDict *dict, | |
| 503 const char *word, int word_size, | |
| 504 char *hyphens) | |
| 505 { | |
| 506 char prep_word_buf[MAX_WORD]; | |
| 507 char *prep_word; | |
| 508 int i, j, k; | |
| 509 int state; | |
| 510 char ch; | |
| 511 HyphenState *hstate; | |
| 512 char *match; | |
| 513 int offset; | |
| 514 | |
| 515 if (word_size + 3 < MAX_WORD) | |
| 516 prep_word = prep_word_buf; | |
| 517 else | |
| 518 prep_word = hnj_malloc (word_size + 3); | |
| 519 | |
| 520 j = 0; | |
| 521 prep_word[j++] = '.'; | |
| 522 | |
| 523 for (i = 0; i < word_size; i++) | |
| 524 prep_word[j++] = word[i]; | |
| 525 | |
| 526 prep_word[j++] = '.'; | |
| 527 prep_word[j] = '\0'; | |
| 528 | |
| 529 for (i = 0; i < word_size + 5; i++) | |
| 530 hyphens[i] = '0'; | |
| 531 | |
| 532 #ifdef VERBOSE | |
| 533 printf ("prep_word = %s\n", prep_word); | |
| 534 #endif | |
| 535 | |
| 536 /* now, run the finite state machine */ | |
| 537 state = 0; | |
| 538 for (i = 0; i < j; i++) | |
| 539 { | |
| 540 ch = prep_word[i]; | |
| 541 for (;;) | |
| 542 { | |
| 543 | |
| 544 if (state == -1) { | |
| 545 /* return 1; */ | |
| 546 /* KBH: FIXME shouldn't this be as follows? */ | |
| 547 state = 0; | |
| 548 goto try_next_letter; | |
| 549 } | |
| 550 | |
| 551 #ifdef VERBOSE | |
| 552 char *state_str; | |
| 553 state_str = get_state_str (state); | |
| 554 | |
| 555 for (k = 0; k < i - strlen (state_str); k++) | |
| 556 putchar (' '); | |
| 557 printf ("%s", state_str); | |
| 558 #endif | |
| 559 | |
| 560 hstate = &dict->states[state]; | |
| 561 for (k = 0; k < hstate->num_trans; k++) | |
| 562 if (hstate->trans[k].ch == ch) | |
| 563 { | |
| 564 state = hstate->trans[k].new_state; | |
| 565 goto found_state; | |
| 566 } | |
| 567 state = hstate->fallback_state; | |
| 568 #ifdef VERBOSE | |
| 569 printf (" falling back, fallback_state %d\n", state); | |
| 570 #endif | |
| 571 } | |
| 572 found_state: | |
| 573 #ifdef VERBOSE | |
| 574 printf ("found state %d\n",state); | |
| 575 #endif | |
| 576 /* Additional optimization is possible here - especially, | |
| 577 elimination of trailing zeroes from the match. Leading zeroes | |
| 578 have already been optimized. */ | |
| 579 match = dict->states[state].match; | |
| 580 /* replacing rules not handled by hyphen_hyphenate() */ | |
| 581 if (match && !dict->states[state].repl) | |
| 582 { | |
| 583 offset = i + 1 - strlen (match); | |
| 584 #ifdef VERBOSE | |
| 585 for (k = 0; k < offset; k++) | |
| 586 putchar (' '); | |
| 587 printf ("%s\n", match); | |
| 588 #endif | |
| 589 /* This is a linear search because I tried a binary search and | |
| 590 found it to be just a teeny bit slower. */ | |
| 591 for (k = 0; match[k]; k++) | |
| 592 if (hyphens[offset + k] < match[k]) | |
| 593 hyphens[offset + k] = match[k]; | |
| 594 } | |
| 595 | |
| 596 /* KBH: we need this to make sure we keep looking in a word */ | |
| 597 /* for patterns even if the current character is not known in state 0 */ | |
| 598 /* since patterns for hyphenation may occur anywhere in the word */ | |
| 599 try_next_letter: ; | |
| 600 | |
| 601 } | |
| 602 #ifdef VERBOSE | |
| 603 for (i = 0; i < j; i++) | |
| 604 putchar (hyphens[i]); | |
| 605 putchar ('\n'); | |
| 606 #endif | |
| 607 | |
| 608 for (i = 0; i < j - 4; i++) | |
| 609 #if 0 | |
| 610 if (hyphens[i + 1] & 1) | |
| 611 hyphens[i] = '-'; | |
| 612 #else | |
| 613 hyphens[i] = hyphens[i + 1]; | |
| 614 #endif | |
| 615 hyphens[0] = '0'; | |
| 616 for (; i < word_size; i++) | |
| 617 hyphens[i] = '0'; | |
| 618 hyphens[word_size] = '\0'; | |
| 619 | |
| 620 if (prep_word != prep_word_buf) | |
| 621 hnj_free (prep_word); | |
| 622 | |
| 623 return 0; | |
| 624 } | |
| 625 | |
| 626 /* Unicode ligature length */ | |
| 627 int hnj_ligature(unsigned char c) { | |
| 628 switch (c) { | |
| 629 case 0x80: /* ff */ | |
| 630 case 0x81: /* fi */ | |
| 631 case 0x82: return LIG_xx; /* fl */ | |
| 632 case 0x83: /* ffi */ | |
| 633 case 0x84: return LIG_xxx; /* ffl */ | |
| 634 case 0x85: /* long st */ | |
| 635 case 0x86: return LIG_xx; /* st */ | |
| 636 } | |
| 637 return 0; | |
| 638 } | |
| 639 | |
| 640 /* character length of the first n byte of the input word */ | |
| 641 int hnj_hyphen_strnlen(const char * word, int n, int utf8) | |
| 642 { | |
| 643 int i = 0; | |
| 644 int j = 0; | |
| 645 while (j < n && word[j] != '\0') { | |
| 646 i++; | |
| 647 // Unicode ligature support | |
| 648 if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j +
1] == 0xAC)) { | |
| 649 i += hnj_ligature(word[j + 2]); | |
| 650 } | |
| 651 for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++); | |
| 652 } | |
| 653 return i; | |
| 654 } | |
| 655 | |
| 656 int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens, | |
| 657 char *** rep, int ** pos, int ** cut, int lhmin) | |
| 658 { | |
| 659 int i = 1, j; | |
| 660 | |
| 661 // Unicode ligature support | |
| 662 if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] ==
0xAC)) { | |
| 663 i += hnj_ligature(word[2]); | |
| 664 } | |
| 665 | |
| 666 for (j = 0; i < lhmin && word[j] != '\0'; i++) do { | |
| 667 // check length of the non-standard part | |
| 668 if (*rep && *pos && *cut && (*rep)[j]) { | |
| 669 char * rh = strchr((*rep)[j], '='); | |
| 670 if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) + | |
| 671 hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) { | |
| 672 free((*rep)[j]); | |
| 673 (*rep)[j] = NULL; | |
| 674 hyphens[j] = '0'; | |
| 675 } | |
| 676 } else { | |
| 677 hyphens[j] = '0'; | |
| 678 } | |
| 679 j++; | |
| 680 | |
| 681 // Unicode ligature support | |
| 682 if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j
+ 1] == 0xAC)) { | |
| 683 i += hnj_ligature(word[j + 2]); | |
| 684 } | |
| 685 } while (utf8 && (word[j] & 0xc0) == 0x80); | |
| 686 return 0; | |
| 687 } | |
| 688 | |
| 689 int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, | |
| 690 char *** rep, int ** pos, int ** cut, int rhmin) | |
| 691 { | |
| 692 int i; | |
| 693 int j = word_size - 2; | |
| 694 for (i = 1; i < rhmin && j > 0; j--) { | |
| 695 // check length of the non-standard part | |
| 696 if (*rep && *pos && *cut && (*rep)[j]) { | |
| 697 char * rh = strchr((*rep)[j], '='); | |
| 698 if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100,
utf8) + | |
| 699 hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) { | |
| 700 free((*rep)[j]); | |
| 701 (*rep)[j] = NULL; | |
| 702 hyphens[j] = '0'; | |
| 703 } | |
| 704 } else { | |
| 705 hyphens[j] = '0'; | |
| 706 } | |
| 707 if (!utf8 || (word[j] & 0xc0) != 0xc0) i++; | |
| 708 } | |
| 709 return 0; | |
| 710 } | |
| 711 | |
| 712 // recursive function for compound level hyphenation | |
| 713 int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, | |
| 714 char * hyphens, char *** rep, int ** pos, int ** cut, | |
| 715 int clhmin, int crhmin, int lend, int rend) | |
| 716 { | |
| 717 char prep_word_buf[MAX_WORD]; | |
| 718 char *prep_word; | |
| 719 int i, j, k; | |
| 720 int state; | |
| 721 char ch; | |
| 722 HyphenState *hstate; | |
| 723 char *match; | |
| 724 char *repl; | |
| 725 signed char replindex; | |
| 726 signed char replcut; | |
| 727 int offset; | |
| 728 int matchlen_buf[MAX_CHARS]; | |
| 729 int matchindex_buf[MAX_CHARS]; | |
| 730 char * matchrepl_buf[MAX_CHARS]; | |
| 731 int * matchlen; | |
| 732 int * matchindex; | |
| 733 char ** matchrepl; | |
| 734 int isrepl = 0; | |
| 735 int nHyphCount; | |
| 736 | |
| 737 if (word_size + 3 < MAX_CHARS) { | |
| 738 prep_word = prep_word_buf; | |
| 739 matchlen = matchlen_buf; | |
| 740 matchindex = matchindex_buf; | |
| 741 matchrepl = matchrepl_buf; | |
| 742 } else { | |
| 743 prep_word = hnj_malloc (word_size + 3); | |
| 744 matchlen = hnj_malloc ((word_size + 3) * sizeof(int)); | |
| 745 matchindex = hnj_malloc ((word_size + 3) * sizeof(int)); | |
| 746 matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *)); | |
| 747 } | |
| 748 | |
| 749 j = 0; | |
| 750 prep_word[j++] = '.'; | |
| 751 | |
| 752 for (i = 0; i < word_size; i++) | |
| 753 prep_word[j++] = word[i]; | |
| 754 | |
| 755 prep_word[j++] = '.'; | |
| 756 prep_word[j] = '\0'; | |
| 757 | |
| 758 for (i = 0; i < j; i++) | |
| 759 hyphens[i] = '0'; | |
| 760 | |
| 761 #ifdef VERBOSE | |
| 762 printf ("prep_word = %s\n", prep_word); | |
| 763 #endif | |
| 764 | |
| 765 /* now, run the finite state machine */ | |
| 766 state = 0; | |
| 767 for (i = 0; i < j; i++) | |
| 768 { | |
| 769 ch = prep_word[i]; | |
| 770 for (;;) | |
| 771 { | |
| 772 | |
| 773 if (state == -1) { | |
| 774 /* return 1; */ | |
| 775 /* KBH: FIXME shouldn't this be as follows? */ | |
| 776 state = 0; | |
| 777 goto try_next_letter; | |
| 778 } | |
| 779 | |
| 780 #ifdef VERBOSE | |
| 781 char *state_str; | |
| 782 state_str = get_state_str (state); | |
| 783 | |
| 784 for (k = 0; k < i - strlen (state_str); k++) | |
| 785 putchar (' '); | |
| 786 printf ("%s", state_str); | |
| 787 #endif | |
| 788 | |
| 789 hstate = &dict->states[state]; | |
| 790 for (k = 0; k < hstate->num_trans; k++) | |
| 791 if (hstate->trans[k].ch == ch) | |
| 792 { | |
| 793 state = hstate->trans[k].new_state; | |
| 794 goto found_state; | |
| 795 } | |
| 796 state = hstate->fallback_state; | |
| 797 #ifdef VERBOSE | |
| 798 printf (" falling back, fallback_state %d\n", state); | |
| 799 #endif | |
| 800 } | |
| 801 found_state: | |
| 802 #ifdef VERBOSE | |
| 803 printf ("found state %d\n",state); | |
| 804 #endif | |
| 805 /* Additional optimization is possible here - especially, | |
| 806 elimination of trailing zeroes from the match. Leading zeroes | |
| 807 have already been optimized. */ | |
| 808 match = dict->states[state].match; | |
| 809 repl = dict->states[state].repl; | |
| 810 replindex = dict->states[state].replindex; | |
| 811 replcut = dict->states[state].replcut; | |
| 812 /* replacing rules not handled by hyphen_hyphenate() */ | |
| 813 if (match) | |
| 814 { | |
| 815 offset = i + 1 - strlen (match); | |
| 816 #ifdef VERBOSE | |
| 817 for (k = 0; k < offset; k++) | |
| 818 putchar (' '); | |
| 819 printf ("%s (%s)\n", match, repl); | |
| 820 #endif | |
| 821 if (repl) { | |
| 822 if (!isrepl) for(; isrepl < word_size; isrepl++) { | |
| 823 matchrepl[isrepl] = NULL; | |
| 824 matchindex[isrepl] = -1; | |
| 825 } | |
| 826 matchlen[offset + replindex] = replcut; | |
| 827 } | |
| 828 /* This is a linear search because I tried a binary search and | |
| 829 found it to be just a teeny bit slower. */ | |
| 830 for (k = 0; match[k]; k++) { | |
| 831 if ((hyphens[offset + k] < match[k])) { | |
| 832 hyphens[offset + k] = match[k]; | |
| 833 if (match[k]&1) { | |
| 834 matchrepl[offset + k] = repl; | |
| 835 if (repl && (k >= replindex) && (k <= replindex + replcut)) { | |
| 836 matchindex[offset + replindex] = offset + k; | |
| 837 } | |
| 838 } | |
| 839 } | |
| 840 } | |
| 841 | |
| 842 } | |
| 843 | |
| 844 /* KBH: we need this to make sure we keep looking in a word */ | |
| 845 /* for patterns even if the current character is not known in state 0 */ | |
| 846 /* since patterns for hyphenation may occur anywhere in the word */ | |
| 847 try_next_letter: ; | |
| 848 | |
| 849 } | |
| 850 #ifdef VERBOSE | |
| 851 for (i = 0; i < j; i++) | |
| 852 putchar (hyphens[i]); | |
| 853 putchar ('\n'); | |
| 854 #endif | |
| 855 | |
| 856 for (i = 0; i < j - 3; i++) | |
| 857 #if 0 | |
| 858 if (hyphens[i + 1] & 1) | |
| 859 hyphens[i] = '-'; | |
| 860 #else | |
| 861 hyphens[i] = hyphens[i + 1]; | |
| 862 #endif | |
| 863 for (; i < word_size; i++) | |
| 864 hyphens[i] = '0'; | |
| 865 hyphens[word_size] = '\0'; | |
| 866 | |
| 867 /* now create a new char string showing hyphenation positions */ | |
| 868 /* count the hyphens and allocate space for the new hyphenated string */ | |
| 869 nHyphCount = 0; | |
| 870 for (i = 0; i < word_size; i++) | |
| 871 if (hyphens[i]&1) | |
| 872 nHyphCount++; | |
| 873 j = 0; | |
| 874 for (i = 0; i < word_size; i++) { | |
| 875 if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { | |
| 876 if (rep && pos && cut) { | |
| 877 if (!*rep && !*pos && !*cut) { | |
| 878 int k; | |
| 879 *rep = (char **) malloc(sizeof(char *) * word_size); | |
| 880 *pos = (int *) malloc(sizeof(int) * word_size); | |
| 881 *cut = (int *) malloc(sizeof(int) * word_size); | |
| 882 for (k = 0; k < word_size; k++) { | |
| 883 (*rep)[k] = NULL; | |
| 884 (*pos)[k] = 0; | |
| 885 (*cut)[k] = 0; | |
| 886 } | |
| 887 } | |
| 888 (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[
i]]); | |
| 889 (*pos)[matchindex[i] - 1] = matchindex[i] - i; | |
| 890 (*cut)[matchindex[i] - 1] = matchlen[i]; | |
| 891 } | |
| 892 j += strlen(matchrepl[matchindex[i]]); | |
| 893 i += matchlen[i] - 1; | |
| 894 } | |
| 895 } | |
| 896 | |
| 897 if (matchrepl != matchrepl_buf) { | |
| 898 hnj_free (matchrepl); | |
| 899 hnj_free (matchlen); | |
| 900 hnj_free (matchindex); | |
| 901 } | |
| 902 | |
| 903 // recursive hyphenation of the first (compound) level segments | |
| 904 if (dict->nextlevel) { | |
| 905 char * rep2_buf[MAX_WORD]; | |
| 906 int pos2_buf[MAX_WORD]; | |
| 907 int cut2_buf[MAX_WORD]; | |
| 908 char hyphens2_buf[MAX_WORD]; | |
| 909 char ** rep2; | |
| 910 int * pos2; | |
| 911 int * cut2; | |
| 912 char * hyphens2; | |
| 913 int begin = 0; | |
| 914 if (word_size < MAX_CHARS) { | |
| 915 rep2 = rep2_buf; | |
| 916 pos2 = pos2_buf; | |
| 917 cut2 = cut2_buf; | |
| 918 hyphens2 = hyphens2_buf; | |
| 919 } else { | |
| 920 rep2 = hnj_malloc (word_size * sizeof(char *)); | |
| 921 pos2 = hnj_malloc (word_size * sizeof(int)); | |
| 922 cut2 = hnj_malloc (word_size * sizeof(int)); | |
| 923 hyphens2 = hnj_malloc (word_size); | |
| 924 } | |
| 925 for (i = 0; i < word_size; i++) rep2[i] = NULL; | |
| 926 for (i = 0; i < word_size; i++) if | |
| 927 (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { | |
| 928 if (i - begin > 1) { | |
| 929 int hyph = 0; | |
| 930 prep_word[i + 2] = '\0'; | |
| 931 /* non-standard hyphenation at compound boundary (Schiffahrt) */ | |
| 932 if (*rep && *pos && *cut && (*rep)[i]) { | |
| 933 char * l = strchr((*rep)[i], '='); | |
| 934 strcpy(prep_word + 2 + i - (*pos)[i], (*rep)[i]); | |
| 935 if (l) { | |
| 936 hyph = (l - (*rep)[i]) - (*pos)[i]; | |
| 937 prep_word[2 + i + hyph] = '\0'; | |
| 938 } | |
| 939 } | |
| 940 hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, | |
| 941 hyphens2, &rep2, &pos2, &cut2, clhmin, | |
| 942 crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); | |
| 943 for (j = 0; j < i - begin - 1; j++) { | |
| 944 hyphens[begin + j] = hyphens2[j]; | |
| 945 if (rep2[j] && rep && pos && cut) { | |
| 946 if (!*rep && !*pos && !*cut) { | |
| 947 int k; | |
| 948 *rep = (char **) malloc(sizeof(char *) * word_size); | |
| 949 *pos = (int *) malloc(sizeof(int) * word_size); | |
| 950 *cut = (int *) malloc(sizeof(int) * word_size); | |
| 951 for (k = 0; k < word_size; k++) { | |
| 952 (*rep)[k] = NULL; | |
| 953 (*pos)[k] = 0; | |
| 954 (*cut)[k] = 0; | |
| 955 } | |
| 956 } | |
| 957 (*rep)[begin + j] = rep2[j]; | |
| 958 (*pos)[begin + j] = pos2[j]; | |
| 959 (*cut)[begin + j] = cut2[j]; | |
| 960 } | |
| 961 } | |
| 962 prep_word[i + 2] = word[i + 1]; | |
| 963 if (*rep && *pos && *cut && (*rep)[i]) { | |
| 964 strcpy(prep_word + 1, word); | |
| 965 } | |
| 966 } | |
| 967 begin = i + 1; | |
| 968 for (j = 0; j < word_size; j++) rep2[j] = NULL; | |
| 969 } | |
| 970 | |
| 971 // non-compound | |
| 972 if (begin == 0) { | |
| 973 hnj_hyphen_hyph_(dict->nextlevel, word, word_size, | |
| 974 hyphens, rep, pos, cut, clhmin, crhmin, lend, rend); | |
| 975 if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, | |
| 976 rep, pos, cut, clhmin); | |
| 977 if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, | |
| 978 rep, pos, cut, crhmin); | |
| 979 } | |
| 980 | |
| 981 if (rep2 != rep2_buf) { | |
| 982 free(rep2); | |
| 983 free(cut2); | |
| 984 free(pos2); | |
| 985 free(hyphens2); | |
| 986 } | |
| 987 } | |
| 988 | |
| 989 if (prep_word != prep_word_buf) hnj_free (prep_word); | |
| 990 return 0; | |
| 991 } | |
| 992 | |
| 993 /* UTF-8 normalization of hyphen and non-standard positions */ | |
| 994 int hnj_hyphen_norm(const char *word, int word_size, char * hyphens, | |
| 995 char *** rep, int ** pos, int ** cut) | |
| 996 { | |
| 997 int i, j, k; | |
| 998 if ((((unsigned char) word[0]) >> 6) == 2) { | |
| 999 fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word); | |
| 1000 return 1; | |
| 1001 } | |
| 1002 | |
| 1003 /* calculate UTF-8 character positions */ | |
| 1004 for (i = 0, j = -1; i < word_size; i++) { | |
| 1005 /* beginning of an UTF-8 character (not '10' start bits) */ | |
| 1006 if ((((unsigned char) word[i]) >> 6) != 2) j++; | |
| 1007 hyphens[j] = hyphens[i]; | |
| 1008 if (rep && pos && cut && *rep && *pos && *cut) { | |
| 1009 int l = (*pos)[i]; | |
| 1010 (*pos)[j] = 0; | |
| 1011 for (k = 0; k < l; k++) { | |
| 1012 if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++; | |
| 1013 } | |
| 1014 k = i - l + 1; | |
| 1015 l = k + (*cut)[i]; | |
| 1016 (*cut)[j] = 0; | |
| 1017 for (; k < l; k++) { | |
| 1018 if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++; | |
| 1019 } | |
| 1020 (*rep)[j] = (*rep)[i]; | |
| 1021 if (j < i) { | |
| 1022 (*rep)[i] = NULL; | |
| 1023 (*pos)[i] = 0; | |
| 1024 (*cut)[i] = 0; | |
| 1025 } | |
| 1026 } | |
| 1027 } | |
| 1028 hyphens[j + 1] = '\0'; | |
| 1029 return 0; | |
| 1030 } | |
| 1031 | |
| 1032 /* get the word with all possible hyphenations (output: hyphword) */ | |
| 1033 void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, | |
| 1034 char * hyphword, char *** rep, int ** pos, int ** cut) | |
| 1035 { | |
| 1036 int i, j; | |
| 1037 for (i = 0, j = 0; i < l; i++, j++) { | |
| 1038 if (hyphens[i]&1) { | |
| 1039 hyphword[j] = word[i]; | |
| 1040 if (*rep && *pos && *cut && (*rep)[i]) { | |
| 1041 strcpy(hyphword + j - (*pos)[i] + 1, (*rep)[i]); | |
| 1042 j += strlen((*rep)[i]) - (*pos)[i]; | |
| 1043 i += (*cut)[i] - (*pos)[i]; | |
| 1044 } else hyphword[++j] = '='; | |
| 1045 } else hyphword[j] = word[i]; | |
| 1046 } | |
| 1047 hyphword[j] = '\0'; | |
| 1048 } | |
| 1049 | |
| 1050 | |
| 1051 /* main api function with default hyphenmin parameters */ | |
| 1052 int hnj_hyphen_hyphenate2 (HyphenDict *dict, | |
| 1053 const char *word, int word_size, char * hyphens, | |
| 1054 char *hyphword, char *** rep, int ** pos, int ** cut) | |
| 1055 { | |
| 1056 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, | |
| 1057 dict->clhmin, dict->crhmin, 1, 1); | |
| 1058 hnj_hyphen_lhmin(dict->utf8, word, word_size, | |
| 1059 hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2)); | |
| 1060 hnj_hyphen_rhmin(dict->utf8, word, word_size, | |
| 1061 hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2)); | |
| 1062 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); | |
| 1063 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); | |
| 1064 return 0; | |
| 1065 } | |
| 1066 | |
| 1067 /* previous main api function with hyphenmin parameters */ | |
| 1068 int hnj_hyphen_hyphenate3 (HyphenDict *dict, | |
| 1069 const char *word, int word_size, char * hyphens, | |
| 1070 char *hyphword, char *** rep, int ** pos, int ** cut, | |
| 1071 int lhmin, int rhmin, int clhmin, int crhmin) | |
| 1072 { | |
| 1073 lhmin = (lhmin > 0 ? lhmin : dict->lhmin); | |
| 1074 rhmin = (rhmin > 0 ? rhmin : dict->rhmin); | |
| 1075 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, | |
| 1076 clhmin, crhmin, 1, 1); | |
| 1077 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, | |
| 1078 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); | |
| 1079 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, | |
| 1080 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); | |
| 1081 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); | |
| 1082 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); | |
| 1083 return 0; | |
| 1084 } | |
| OLD | NEW |