Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: courgette/third_party/divsufsort/sssort.cc

Issue 2156223002: [Courgette] Add third party-library: libdivsufsort. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update checklicenses.py. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
2 //
3 // For the terms under which this work may be distributed, please see
4 // the adjoining file "LICENSE".
5 //
6 // ChangeLog:
7 // 2016-07-22 - Initial commit and adaption to use PagedArray.
8 // --Samuel Huang <huangs@chromium.org>
9
10 #include "courgette/third_party/divsufsort/divsufsort_private.h"
11
12 #if defined(SS_INSERTIONSORT_THRESHOLD)
13 # if SS_INSERTIONSORT_THRESHOLD < 1
14 # undef SS_INSERTIONSORT_THRESHOLD
15 # define SS_INSERTIONSORT_THRESHOLD (1)
16 # endif
17 #else
18 # define SS_INSERTIONSORT_THRESHOLD (8)
19 #endif
20 #if defined(SS_BLOCKSIZE)
21 # if SS_BLOCKSIZE < 0
22 # undef SS_BLOCKSIZE
23 # define SS_BLOCKSIZE (0)
24 # elif 32768 <= SS_BLOCKSIZE
25 # undef SS_BLOCKSIZE
26 # define SS_BLOCKSIZE (32767)
27 # endif
28 #else
29 # define SS_BLOCKSIZE (1024)
30 #endif
31 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
32 #if SS_BLOCKSIZE == 0
33 # define SS_MISORT_STACKSIZE (64)
34 #elif SS_BLOCKSIZE <= 4096
35 # define SS_MISORT_STACKSIZE (16)
36 #else
37 # define SS_MISORT_STACKSIZE (24)
38 #endif
39 #define SS_SMERGE_STACKSIZE (32)
40
41 #define STACK_PUSH(_a, _b, _c, _d)\
42 do {\
43 assert(ssize < STACK_SIZE);\
44 stack[ssize].a = (_a), stack[ssize].b = (_b),\
45 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
46 } while(0)
47 #define STACK_POP(_a, _b, _c, _d)\
48 do {\
49 assert(0 <= ssize);\
50 if(ssize == 0) { return; }\
51 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
52 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
53 } while(0)
54
55 namespace divsuf {
56
57 namespace {
58
59 /*- Private Functions -*/
60
61 const saint_t lg_table[256]= {
62 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
63 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
64 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
65 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
66 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
67 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
68 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
69 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
70 };
71
72 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
73
74 inline
75 saint_t
76 ss_ilg(saidx_t n) {
77 #if SS_BLOCKSIZE == 0
78 return (n & 0xffff0000) ?
79 ((n & 0xff000000) ?
80 24 + lg_table[(n >> 24) & 0xff] :
81 16 + lg_table[(n >> 16) & 0xff]) :
82 ((n & 0x0000ff00) ?
83 8 + lg_table[(n >> 8) & 0xff] :
84 0 + lg_table[(n >> 0) & 0xff]);
85 #elif SS_BLOCKSIZE < 256
86 return lg_table[n];
87 #else
88 return (n & 0xff00) ?
89 8 + lg_table[(n >> 8) & 0xff] :
90 0 + lg_table[(n >> 0) & 0xff];
91 #endif
92 }
93
94 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
95
96 #if SS_BLOCKSIZE != 0
97
98 const saint_t sqq_table[256] = {
99 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
100 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
101 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
102 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
103 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
104 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
105 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
106 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
107 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
108 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
109 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
110 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
111 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
112 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
113 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
114 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
115 };
116
117 inline
118 saidx_t
119 ss_isqrt(saidx_t x) {
120 saidx_t y, e;
121
122 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
123 e = (x & 0xffff0000) ?
124 ((x & 0xff000000) ?
125 24 + lg_table[(x >> 24) & 0xff] :
126 16 + lg_table[(x >> 16) & 0xff]) :
127 ((x & 0x0000ff00) ?
128 8 + lg_table[(x >> 8) & 0xff] :
129 0 + lg_table[(x >> 0) & 0xff]);
130
131 if(e >= 16) {
132 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
133 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
134 y = (y + 1 + x / y) >> 1;
135 } else if(e >= 8) {
136 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
137 } else {
138 return sqq_table[x] >> 4;
139 }
140
141 return (x < (y * y)) ? y - 1 : y;
142 }
143
144 #endif /* SS_BLOCKSIZE != 0 */
145
146
147 /*---------------------------------------------------------------------------*/
148
149 /* Compares two suffixes. */
150 inline
151 saint_t
152 ss_compare_internal(const sauchar_t *T,
153 saidx_t p1_lo,
154 saidx_t p1_hi,
155 saidx_t p2_lo,
156 saidx_t p2_hi,
157 saidx_t depth) {
158 const sauchar_t *U1, *U2, *U1n, *U2n;
159
160 for(U1 = T + depth + p1_lo,
161 U2 = T + depth + p2_lo,
162 U1n = T + p1_hi + 2,
163 U2n = T + p2_hi + 2;
164 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
165 ++U1, ++U2) {
166 }
167
168 return U1 < U1n ?
169 (U2 < U2n ? *U1 - *U2 : 1) :
170 (U2 < U2n ? -1 : 0);
171 }
172
173 /* Compares two suffixes. */
174 inline
175 saint_t
176 ss_compare(const sauchar_t *T,
177 const_saidx_it p1, const_saidx_it p2,
178 saidx_t depth) {
179 return ss_compare_internal(T, *p1, *(p1 + 1), *p2, *(p2 + 1), depth);
180 }
181
182
183 /*---------------------------------------------------------------------------*/
184
185 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
186
187 /* Insertionsort for small size groups */
188 void
189 ss_insertionsort(const sauchar_t *T, const_saidx_it PA,
190 saidx_it first, saidx_it last, saidx_t depth) {
191 saidx_it i, j;
192 saidx_t t;
193 saint_t r;
194
195 for(i = last - 2; first <= i; --i) {
196 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
197 do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
198 if(last <= j) { break; }
199 }
200 if(r == 0) { *j = ~*j; }
201 *(j - 1) = t;
202 }
203 }
204
205 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
206
207
208 /*---------------------------------------------------------------------------*/
209
210 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
211
212 inline
213 void
214 ss_fixdown(const sauchar_t *Td, const_saidx_it PA,
215 saidx_it SA, saidx_t i, saidx_t size) {
216 saidx_t j, k;
217 saidx_t v;
218 saint_t c, d, e;
219
220 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
221 d = Td[PA[SA[k = j++]]];
222 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
223 if(d <= c) { break; }
224 }
225 SA[i] = v;
226 }
227
228 /* Simple top-down heapsort. */
229 void
230 ss_heapsort(const sauchar_t *Td, const_saidx_it PA, saidx_it SA, saidx_t size) {
231 saidx_t i, m;
232 saidx_t t;
233
234 m = size;
235 if((size % 2) == 0) {
236 m--;
237 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
238 }
239
240 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
241 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
242 for(i = m - 1; 0 < i; --i) {
243 t = SA[0], SA[0] = SA[i];
244 ss_fixdown(Td, PA, SA, 0, i);
245 SA[i] = t;
246 }
247 }
248
249
250 /*---------------------------------------------------------------------------*/
251
252 /* Returns the median of three elements. */
253 inline
254 saidx_it
255 ss_median3(const sauchar_t *Td, const_saidx_it PA,
256 saidx_it v1, saidx_it v2, saidx_it v3) {
257 saidx_it t;
258 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
259 if(Td[PA[*v2]] > Td[PA[*v3]]) {
260 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
261 else { return v3; }
262 }
263 return v2;
264 }
265
266 /* Returns the median of five elements. */
267 inline
268 saidx_it
269 ss_median5(const sauchar_t *Td, const_saidx_it PA,
270 saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
271 saidx_it t;
272 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
273 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
274 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
275 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
276 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
277 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
278 return v3;
279 }
280
281 /* Returns the pivot element. */
282 inline
283 saidx_it
284 ss_pivot(const sauchar_t *Td, const_saidx_it PA, saidx_it first, saidx_it last) {
285 saidx_it middle;
286 saidx_t t;
287
288 t = last - first;
289 middle = first + t / 2;
290
291 if(t <= 512) {
292 if(t <= 32) {
293 return ss_median3(Td, PA, first, middle, last - 1);
294 } else {
295 t >>= 2;
296 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1 );
297 }
298 }
299 t >>= 3;
300 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
301 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
302 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
303 return ss_median3(Td, PA, first, middle, last);
304 }
305
306
307 /*---------------------------------------------------------------------------*/
308
309 /* Binary partition for substrings. */
310 inline
311 saidx_it
312 ss_partition(const_saidx_it PA,
313 saidx_it first, saidx_it last, saidx_t depth) {
314 saidx_it a, b;
315 saidx_t t;
316 for(a = first - 1, b = last;;) {
317 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
318 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
319 if(b <= a) { break; }
320 t = ~*b;
321 *b = *a;
322 *a = t;
323 }
324 if(first < a) { *first = ~*first; }
325 return a;
326 }
327
328 /* Multikey introsort for medium size groups. */
329 void
330 ss_mintrosort(const sauchar_t *T, const_saidx_it PA,
331 saidx_it first, saidx_it last,
332 saidx_t depth) {
333 #define STACK_SIZE SS_MISORT_STACKSIZE
334 struct { saidx_it a, b; saidx_t c; saint_t d; } stack[STACK_SIZE];
335 const sauchar_t *Td;
336 saidx_it a, b, c, d, e, f;
337 saidx_t s, t;
338 saint_t ssize;
339 saint_t limit;
340 saint_t v, x = 0;
341
342 for(ssize = 0, limit = ss_ilg(last - first);;) {
343
344 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
345 #if 1 < SS_INSERTIONSORT_THRESHOLD
346 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
347 #endif
348 STACK_POP(first, last, depth, limit);
349 continue;
350 }
351
352 Td = T + depth;
353 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
354 if(limit < 0) {
355 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
356 if((x = Td[PA[*a]]) != v) {
357 if(1 < (a - first)) { break; }
358 v = x;
359 first = a;
360 }
361 }
362 if(Td[PA[*first] - 1] < v) {
363 first = ss_partition(PA, first, a, depth);
364 }
365 if((a - first) <= (last - a)) {
366 if(1 < (a - first)) {
367 STACK_PUSH(a, last, depth, -1);
368 last = a, depth += 1, limit = ss_ilg(a - first);
369 } else {
370 first = a, limit = -1;
371 }
372 } else {
373 if(1 < (last - a)) {
374 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
375 first = a, limit = -1;
376 } else {
377 last = a, depth += 1, limit = ss_ilg(a - first);
378 }
379 }
380 continue;
381 }
382
383 /* choose pivot */
384 a = ss_pivot(Td, PA, first, last);
385 v = Td[PA[*a]];
386 SWAP(*first, *a);
387
388 /* partition */
389 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
390 if(((a = b) < last) && (x < v)) {
391 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
392 if(x == v) { SWAP(*b, *a); ++a; }
393 }
394 }
395 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
396 if((b < (d = c)) && (x > v)) {
397 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
398 if(x == v) { SWAP(*c, *d); --d; }
399 }
400 }
401 for(; b < c;) {
402 SWAP(*b, *c);
403 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
404 if(x == v) { SWAP(*b, *a); ++a; }
405 }
406 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
407 if(x == v) { SWAP(*c, *d); --d; }
408 }
409 }
410
411 if(a <= d) {
412 c = b - 1;
413
414 if((s = a - first) > (t = b - a)) { s = t; }
415 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
416 if((s = d - c) > (t = last - d - 1)) { s = t; }
417 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
418
419 a = first + (b - a), c = last - (d - c);
420 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
421
422 if((a - first) <= (last - c)) {
423 if((last - c) <= (c - b)) {
424 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
425 STACK_PUSH(c, last, depth, limit);
426 last = a;
427 } else if((a - first) <= (c - b)) {
428 STACK_PUSH(c, last, depth, limit);
429 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
430 last = a;
431 } else {
432 STACK_PUSH(c, last, depth, limit);
433 STACK_PUSH(first, a, depth, limit);
434 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
435 }
436 } else {
437 if((a - first) <= (c - b)) {
438 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
439 STACK_PUSH(first, a, depth, limit);
440 first = c;
441 } else if((last - c) <= (c - b)) {
442 STACK_PUSH(first, a, depth, limit);
443 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
444 first = c;
445 } else {
446 STACK_PUSH(first, a, depth, limit);
447 STACK_PUSH(c, last, depth, limit);
448 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
449 }
450 }
451 } else {
452 limit += 1;
453 if(Td[PA[*first] - 1] < v) {
454 first = ss_partition(PA, first, last, depth);
455 limit = ss_ilg(last - first);
456 }
457 depth += 1;
458 }
459 }
460 #undef STACK_SIZE
461 }
462
463 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
464
465
466 /*---------------------------------------------------------------------------*/
467
468 #if SS_BLOCKSIZE != 0
469
470 inline
471 void
472 ss_blockswap(saidx_it a, saidx_it b, saidx_t n) {
473 saidx_t t;
474 for(; 0 < n; --n, ++a, ++b) {
475 t = *a, *a = *b, *b = t;
476 }
477 }
478
479 inline
480 void
481 ss_rotate(saidx_it first, saidx_it middle, saidx_it last) {
482 saidx_it a, b;
483 saidx_t t;
484 saidx_t l, r;
485 l = middle - first, r = last - middle;
486 for(; (0 < l) && (0 < r);) {
487 if(l == r) { ss_blockswap(first, middle, l); break; }
488 if(l < r) {
489 a = last - 1, b = middle - 1;
490 t = *a;
491 do {
492 *a-- = *b, *b-- = *a;
493 if(b < first) {
494 *a = t;
495 last = a;
496 if((r -= l + 1) <= l) { break; }
497 a -= 1, b = middle - 1;
498 t = *a;
499 }
500 } while(1);
501 } else {
502 a = first, b = middle;
503 t = *a;
504 do {
505 *a++ = *b, *b++ = *a;
506 if(last <= b) {
507 *a = t;
508 first = a + 1;
509 if((l -= r + 1) <= r) { break; }
510 a += 1, b = middle;
511 t = *a;
512 }
513 } while(1);
514 }
515 }
516 }
517
518
519 /*---------------------------------------------------------------------------*/
520
521 void
522 ss_inplacemerge(const sauchar_t *T, const_saidx_it PA,
523 saidx_it first, saidx_it middle, saidx_it last,
524 saidx_t depth) {
525 const_saidx_it p;
526 saidx_it a, b;
527 saidx_t len, half;
528 saint_t q, r;
529 saint_t x;
530
531 for(;;) {
532 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
533 else { x = 0; p = PA + *(last - 1); }
534 for(a = first, len = middle - first, half = len >> 1, r = -1;
535 0 < len;
536 len = half, half >>= 1) {
537 b = a + half;
538 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
539 if(q < 0) {
540 a = b + 1;
541 half -= (len & 1) ^ 1;
542 } else {
543 r = q;
544 }
545 }
546 if(a < middle) {
547 if(r == 0) { *a = ~*a; }
548 ss_rotate(a, middle, last);
549 last -= middle - a;
550 middle = a;
551 if(first == middle) { break; }
552 }
553 --last;
554 if(x != 0) { while(*--last < 0) { } }
555 if(middle == last) { break; }
556 }
557 }
558
559
560 /*---------------------------------------------------------------------------*/
561
562 /* Merge-forward with internal buffer. */
563 static
564 void
565 ss_mergeforward(const sauchar_t *T, const_saidx_it PA,
566 saidx_it first, saidx_it middle, saidx_it last,
567 saidx_it buf, saidx_t depth) {
568 saidx_it a, b, c, bufend;
569 saidx_t t;
570 saint_t r;
571
572 bufend = buf + (middle - first) - 1;
573 ss_blockswap(buf, first, middle - first);
574
575 for(t = *(a = first), b = buf, c = middle;;) {
576 r = ss_compare(T, PA + *b, PA + *c, depth);
577 if(r < 0) {
578 do {
579 *a++ = *b;
580 if(bufend <= b) { *bufend = t; return; }
581 *b++ = *a;
582 } while(*b < 0);
583 } else if(r > 0) {
584 do {
585 *a++ = *c, *c++ = *a;
586 if(last <= c) {
587 while(b < bufend) { *a++ = *b, *b++ = *a; }
588 *a = *b, *b = t;
589 return;
590 }
591 } while(*c < 0);
592 } else {
593 *c = ~*c;
594 do {
595 *a++ = *b;
596 if(bufend <= b) { *bufend = t; return; }
597 *b++ = *a;
598 } while(*b < 0);
599
600 do {
601 *a++ = *c, *c++ = *a;
602 if(last <= c) {
603 while(b < bufend) { *a++ = *b, *b++ = *a; }
604 *a = *b, *b = t;
605 return;
606 }
607 } while(*c < 0);
608 }
609 }
610 }
611
612 /* Merge-backward with internal buffer. */
613 void
614 ss_mergebackward(const sauchar_t *T, const_saidx_it PA,
615 saidx_it first, saidx_it middle, saidx_it last,
616 saidx_it buf, saidx_t depth) {
617 const_saidx_it p1, p2;
618 saidx_it a, b, c, bufend;
619 saidx_t t;
620 saint_t r;
621 saint_t x;
622
623 bufend = buf + (last - middle) - 1;
624 ss_blockswap(buf, middle, last - middle);
625
626 x = 0;
627 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
628 else { p1 = PA + *bufend; }
629 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
630 else { p2 = PA + *(middle - 1); }
631 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
632 r = ss_compare(T, p1, p2, depth);
633 if(0 < r) {
634 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
635 *a-- = *b;
636 if(b <= buf) { *buf = t; break; }
637 *b-- = *a;
638 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
639 else { p1 = PA + *b; }
640 } else if(r < 0) {
641 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
642 *a-- = *c, *c-- = *a;
643 if(c < first) {
644 while(buf < b) { *a-- = *b, *b-- = *a; }
645 *a = *b, *b = t;
646 break;
647 }
648 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
649 else { p2 = PA + *c; }
650 } else {
651 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
652 *a-- = ~*b;
653 if(b <= buf) { *buf = t; break; }
654 *b-- = *a;
655 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
656 *a-- = *c, *c-- = *a;
657 if(c < first) {
658 while(buf < b) { *a-- = *b, *b-- = *a; }
659 *a = *b, *b = t;
660 break;
661 }
662 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
663 else { p1 = PA + *b; }
664 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
665 else { p2 = PA + *c; }
666 }
667 }
668 }
669
670 /* D&C based merge. */
671 void
672 ss_swapmerge(const sauchar_t *T, const_saidx_it PA,
673 saidx_it first, saidx_it middle, saidx_it last,
674 saidx_it buf, saidx_t bufsize, saidx_t depth) {
675 #define STACK_SIZE SS_SMERGE_STACKSIZE
676 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
677 #define MERGE_CHECK(a, b, c)\
678 do {\
679 if(((c) & 1) ||\
680 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) = = 0))) {\
681 *(a) = ~*(a);\
682 }\
683 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) = = 0))) {\
684 *(b) = ~*(b);\
685 }\
686 } while(0)
687 struct { saidx_it a, b, c; saint_t d; } stack[STACK_SIZE];
688 saidx_it l, r, lm, rm;
689 saidx_t m, len, half;
690 saint_t ssize;
691 saint_t check, next;
692
693 for(check = 0, ssize = 0;;) {
694 if((last - middle) <= bufsize) {
695 if((first < middle) && (middle < last)) {
696 ss_mergebackward(T, PA, first, middle, last, buf, depth);
697 }
698 MERGE_CHECK(first, last, check);
699 STACK_POP(first, middle, last, check);
700 continue;
701 }
702
703 if((middle - first) <= bufsize) {
704 if(first < middle) {
705 ss_mergeforward(T, PA, first, middle, last, buf, depth);
706 }
707 MERGE_CHECK(first, last, check);
708 STACK_POP(first, middle, last, check);
709 continue;
710 }
711
712 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
713 0 < len;
714 len = half, half >>= 1) {
715 if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
716 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
717 m += half + 1;
718 half -= (len & 1) ^ 1;
719 }
720 }
721
722 if(0 < m) {
723 lm = middle - m, rm = middle + m;
724 ss_blockswap(lm, middle, m);
725 l = r = middle, next = 0;
726 if(rm < last) {
727 if(*rm < 0) {
728 *rm = ~*rm;
729 if(first < lm) { for(; *--l < 0;) { } next |= 4; }
730 next |= 1;
731 } else if(first < lm) {
732 for(; *r < 0; ++r) { }
733 next |= 2;
734 }
735 }
736
737 if((l - first) <= (last - r)) {
738 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
739 middle = lm, last = l, check = (check & 3) | (next & 4);
740 } else {
741 if((next & 2) && (r == middle)) { next ^= 6; }
742 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
743 first = r, middle = rm, check = (next & 3) | (check & 4);
744 }
745 } else {
746 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
747 *middle = ~*middle;
748 }
749 MERGE_CHECK(first, last, check);
750 STACK_POP(first, middle, last, check);
751 }
752 }
753 #undef STACK_SIZE
754 }
755
756 #endif /* SS_BLOCKSIZE != 0 */
757
758 } // namespace
759
760 /*---------------------------------------------------------------------------*/
761
762 /*- Function -*/
763
764 /* Substring sort */
765 void
766 sssort(const sauchar_t *T, const_saidx_it PA,
767 saidx_it first, saidx_it last,
768 saidx_it buf, saidx_t bufsize,
769 saidx_t depth, saidx_t n, saint_t lastsuffix) {
770 saidx_it a;
771 #if SS_BLOCKSIZE != 0
772 saidx_it b, middle, curbuf;
773 saidx_t j, k, curbufsize, limit;
774 #endif
775 saidx_t i;
776
777 if(lastsuffix != 0) { ++first; }
778
779 #if SS_BLOCKSIZE == 0
780 ss_mintrosort(T, PA, first, last, depth);
781 #else
782 if((bufsize < SS_BLOCKSIZE) &&
783 (bufsize < (last - first)) &&
784 (bufsize < (limit = ss_isqrt(last - first)))) {
785 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
786 buf = middle = last - limit, bufsize = limit;
787 } else {
788 middle = last, limit = 0;
789 }
790 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
791 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
792 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
793 #elif 1 < SS_BLOCKSIZE
794 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
795 #endif
796 curbufsize = last - (a + SS_BLOCKSIZE);
797 curbuf = a + SS_BLOCKSIZE;
798 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
799 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
800 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
801 }
802 }
803 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
804 ss_mintrosort(T, PA, a, middle, depth);
805 #elif 1 < SS_BLOCKSIZE
806 ss_insertionsort(T, PA, a, middle, depth);
807 #endif
808 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
809 if(i & 1) {
810 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
811 a -= k;
812 }
813 }
814 if(limit != 0) {
815 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
816 ss_mintrosort(T, PA, middle, last, depth);
817 #elif 1 < SS_BLOCKSIZE
818 ss_insertionsort(T, PA, middle, last, depth);
819 #endif
820 ss_inplacemerge(T, PA, first, middle, last, depth);
821 }
822 #endif
823
824 if(lastsuffix != 0) {
825 /* Insert last type B* suffix. */
826 for(a = first, i = *(first - 1);
827 (a < last) && ((*a < 0) ||
828 (0 < ss_compare_internal(T, PA[*(first - 1)], n - 2,
829 PA[*a], PA[*a + 1], depth)));
830 ++a) {
831 *(a - 1) = *a;
832 }
833 *(a - 1) = i;
834 }
835 }
836
837 } // namespace divsuf
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698