Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: courgette/third_party/divsufsort/trsort.cc

Issue 2156223002: [Courgette] Add third party-library: libdivsufsort. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update checklicenses.py. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
2 //
3 // For the terms under which this work may be distributed, please see
4 // the adjoining file "LICENSE".
5 //
6 // ChangeLog:
7 // 2016-07-22 - Initial commit and adaption to use PagedArray.
8 // --Samuel Huang <huangs@chromium.org>
9
10 #include "courgette/third_party/divsufsort/divsufsort_private.h"
11
12 #define TR_INSERTIONSORT_THRESHOLD (8)
13 #define TR_STACKSIZE (64)
14
15 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
16 do {\
17 assert(ssize < STACK_SIZE);\
18 stack[ssize].a = (_a), stack[ssize].b = (_b),\
19 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
20 } while(0)
21 #define STACK_POP5(_a, _b, _c, _d, _e)\
22 do {\
23 assert(0 <= ssize);\
24 if(ssize == 0) { return; }\
25 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
26 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
27 } while(0)
28
29
30 namespace divsuf {
31
32 namespace {
33
34 /*- Private Functions -*/
35
36 const saint_t lg_table[256]= {
37 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
38 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
39 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
40 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
41 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
42 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
43 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
44 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
45 };
46
47 inline
48 saint_t
49 tr_ilg(saidx_t n) {
50 return (n & 0xffff0000) ?
51 ((n & 0xff000000) ?
52 24 + lg_table[(n >> 24) & 0xff] :
53 16 + lg_table[(n >> 16) & 0xff]) :
54 ((n & 0x0000ff00) ?
55 8 + lg_table[(n >> 8) & 0xff] :
56 0 + lg_table[(n >> 0) & 0xff]);
57 }
58
59
60 /*---------------------------------------------------------------------------*/
61
62 /* Simple insertionsort for small size groups. */
63 void
64 tr_insertionsort(const_saidx_it ISAd, saidx_it first, saidx_it last) {
65 saidx_it a, b;
66 saidx_t t, r;
67
68 for(a = first + 1; a < last; ++a) {
69 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
70 do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
71 if(b < first) { break; }
72 }
73 if(r == 0) { *b = ~*b; }
74 *(b + 1) = t;
75 }
76 }
77
78
79 /*---------------------------------------------------------------------------*/
80
81 inline
82 void
83 tr_fixdown(const_saidx_it ISAd, saidx_it SA, saidx_t i, saidx_t size) {
84 saidx_t j, k;
85 saidx_t v;
86 saidx_t c, d, e;
87
88 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
89 d = ISAd[SA[k = j++]];
90 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
91 if(d <= c) { break; }
92 }
93 SA[i] = v;
94 }
95
96 /* Simple top-down heapsort. */
97 void
98 tr_heapsort(const_saidx_it ISAd, saidx_it SA, saidx_t size) {
99 saidx_t i, m;
100 saidx_t t;
101
102 m = size;
103 if((size % 2) == 0) {
104 m--;
105 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
106 }
107
108 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
109 if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
110 for(i = m - 1; 0 < i; --i) {
111 t = SA[0], SA[0] = SA[i];
112 tr_fixdown(ISAd, SA, 0, i);
113 SA[i] = t;
114 }
115 }
116
117
118 /*---------------------------------------------------------------------------*/
119
120 /* Returns the median of three elements. */
121 inline
122 saidx_it
123 tr_median3(const_saidx_it ISAd, saidx_it v1, saidx_it v2, saidx_it v3) {
124 saidx_it t;
125 if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
126 if(ISAd[*v2] > ISAd[*v3]) {
127 if(ISAd[*v1] > ISAd[*v3]) { return v1; }
128 else { return v3; }
129 }
130 return v2;
131 }
132
133 /* Returns the median of five elements. */
134 inline
135 saidx_it
136 tr_median5(const_saidx_it ISAd,
137 saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
138 saidx_it t;
139 if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
140 if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
141 if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
142 if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
143 if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
144 if(ISAd[*v3] > ISAd[*v4]) { return v4; }
145 return v3;
146 }
147
148 /* Returns the pivot element. */
149 inline
150 saidx_it
151 tr_pivot(const_saidx_it ISAd, saidx_it first, saidx_it last) {
152 saidx_it middle;
153 saidx_t t;
154
155 t = last - first;
156 middle = first + t / 2;
157
158 if(t <= 512) {
159 if(t <= 32) {
160 return tr_median3(ISAd, first, middle, last - 1);
161 } else {
162 t >>= 2;
163 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
164 }
165 }
166 t >>= 3;
167 first = tr_median3(ISAd, first, first + t, first + (t << 1));
168 middle = tr_median3(ISAd, middle - t, middle, middle + t);
169 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
170 return tr_median3(ISAd, first, middle, last);
171 }
172
173
174 /*---------------------------------------------------------------------------*/
175
176 typedef struct _trbudget_t trbudget_t;
177 struct _trbudget_t {
178 saidx_t chance;
179 saidx_t remain;
180 saidx_t incval;
181 saidx_t count;
182 };
183
184 inline
185 void
186 trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
187 budget->chance = chance;
188 budget->remain = budget->incval = incval;
189 }
190
191 inline
192 saint_t
193 trbudget_check(trbudget_t *budget, saidx_t size) {
194 if(size <= budget->remain) { budget->remain -= size; return 1; }
195 if(budget->chance == 0) { budget->count += size; return 0; }
196 budget->remain += budget->incval - size;
197 budget->chance -= 1;
198 return 1;
199 }
200
201
202 /*---------------------------------------------------------------------------*/
203
204 inline
205 void
206 tr_partition(const_saidx_it ISAd,
207 saidx_it first, saidx_it middle, saidx_it last,
208 saidx_it* pa, saidx_it* pb, saidx_t v) {
209 saidx_it a, b, c, d, e, f;
210 saidx_t t, s;
211 saidx_t x = 0;
212
213 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
214 if(((a = b) < last) && (x < v)) {
215 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
216 if(x == v) { SWAP(*b, *a); ++a; }
217 }
218 }
219 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
220 if((b < (d = c)) && (x > v)) {
221 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
222 if(x == v) { SWAP(*c, *d); --d; }
223 }
224 }
225 for(; b < c;) {
226 SWAP(*b, *c);
227 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
228 if(x == v) { SWAP(*b, *a); ++a; }
229 }
230 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
231 if(x == v) { SWAP(*c, *d); --d; }
232 }
233 }
234
235 if(a <= d) {
236 c = b - 1;
237 if((s = a - first) > (t = b - a)) { s = t; }
238 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
239 if((s = d - c) > (t = last - d - 1)) { s = t; }
240 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
241 first += (b - a), last -= (d - c);
242 }
243 *pa = first, *pb = last;
244 }
245
246 void
247 tr_copy(saidx_it ISA, const_saidx_it SA,
248 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
249 saidx_t depth) {
250 /* sort suffixes of middle partition
251 by using sorted order of suffixes of left and right partition. */
252 saidx_it c, d, e;
253 saidx_t s, v;
254
255 v = b - SA - 1;
256 for(c = first, d = a - 1; c <= d; ++c) {
257 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
258 *++d = s;
259 ISA[s] = d - SA;
260 }
261 }
262 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
263 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
264 *--d = s;
265 ISA[s] = d - SA;
266 }
267 }
268 }
269
270 void
271 tr_partialcopy(saidx_it ISA, const_saidx_it SA,
272 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
273 saidx_t depth) {
274 saidx_it c, d, e;
275 saidx_t s, v;
276 saidx_t rank, lastrank, newrank = -1;
277
278 v = b - SA - 1;
279 lastrank = -1;
280 for(c = first, d = a - 1; c <= d; ++c) {
281 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
282 *++d = s;
283 rank = ISA[s + depth];
284 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
285 ISA[s] = newrank;
286 }
287 }
288
289 lastrank = -1;
290 for(e = d; first <= e; --e) {
291 rank = ISA[*e];
292 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
293 if(newrank != rank) { ISA[*e] = newrank; }
294 }
295
296 lastrank = -1;
297 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
298 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
299 *--d = s;
300 rank = ISA[s + depth];
301 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
302 ISA[s] = newrank;
303 }
304 }
305 }
306
307 void
308 tr_introsort(saidx_it ISA, const_saidx_it ISAd,
309 saidx_it SA, saidx_it first, saidx_it last,
310 trbudget_t *budget) {
311 #define STACK_SIZE TR_STACKSIZE
312 struct { const_saidx_it a; saidx_it b, c; saint_t d, e; }stack[STACK_SIZE];
313 saidx_it a, b, c;
314 saidx_t t;
315 saidx_t v, x = 0;
316 saidx_t incr = ISAd - ISA;
317 saint_t limit, next;
318 saint_t ssize, trlink = -1;
319
320 for(ssize = 0, limit = tr_ilg(last - first);;) {
321
322 if(limit < 0) {
323 if(limit == -1) {
324 /* tandem repeat partition */
325 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
326
327 /* update ranks */
328 if(a < last) {
329 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
330 }
331 if(b < last) {
332 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
333 }
334
335 /* push */
336 if(1 < (b - a)) {
337 STACK_PUSH5(NULL, a, b, 0, 0);
338 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
339 trlink = ssize - 2;
340 }
341 if((a - first) <= (last - b)) {
342 if(1 < (a - first)) {
343 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
344 last = a, limit = tr_ilg(a - first);
345 } else if(1 < (last - b)) {
346 first = b, limit = tr_ilg(last - b);
347 } else {
348 STACK_POP5(ISAd, first, last, limit, trlink);
349 }
350 } else {
351 if(1 < (last - b)) {
352 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
353 first = b, limit = tr_ilg(last - b);
354 } else if(1 < (a - first)) {
355 last = a, limit = tr_ilg(a - first);
356 } else {
357 STACK_POP5(ISAd, first, last, limit, trlink);
358 }
359 }
360 } else if(limit == -2) {
361 /* tandem repeat copy */
362 a = stack[--ssize].b, b = stack[ssize].c;
363 if(stack[ssize].d == 0) {
364 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
365 } else {
366 if(0 <= trlink) { stack[trlink].d = -1; }
367 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
368 }
369 STACK_POP5(ISAd, first, last, limit, trlink);
370 } else {
371 /* sorted partition */
372 if(0 <= *first) {
373 a = first;
374 do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
375 first = a;
376 }
377 if(first < last) {
378 a = first; do { *a = ~*a; } while(*++a < 0);
379 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
380 if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
381
382 /* push */
383 if(trbudget_check(budget, a - first)) {
384 if((a - first) <= (last - a)) {
385 STACK_PUSH5(ISAd, a, last, -3, trlink);
386 ISAd += incr, last = a, limit = next;
387 } else {
388 if(1 < (last - a)) {
389 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
390 first = a, limit = -3;
391 } else {
392 ISAd += incr, last = a, limit = next;
393 }
394 }
395 } else {
396 if(0 <= trlink) { stack[trlink].d = -1; }
397 if(1 < (last - a)) {
398 first = a, limit = -3;
399 } else {
400 STACK_POP5(ISAd, first, last, limit, trlink);
401 }
402 }
403 } else {
404 STACK_POP5(ISAd, first, last, limit, trlink);
405 }
406 }
407 continue;
408 }
409
410 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
411 tr_insertionsort(ISAd, first, last);
412 limit = -3;
413 continue;
414 }
415
416 if(limit-- == 0) {
417 tr_heapsort(ISAd, first, last - first);
418 for(a = last - 1; first < a; a = b) {
419 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
420 }
421 limit = -3;
422 continue;
423 }
424
425 /* choose pivot */
426 a = tr_pivot(ISAd, first, last);
427 SWAP(*first, *a);
428 v = ISAd[*first];
429
430 /* partition */
431 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
432 if((last - first) != (b - a)) {
433 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
434
435 /* update ranks */
436 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
437 if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
438
439 /* push */
440 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
441 if((a - first) <= (last - b)) {
442 if((last - b) <= (b - a)) {
443 if(1 < (a - first)) {
444 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
445 STACK_PUSH5(ISAd, b, last, limit, trlink);
446 last = a;
447 } else if(1 < (last - b)) {
448 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
449 first = b;
450 } else {
451 ISAd += incr, first = a, last = b, limit = next;
452 }
453 } else if((a - first) <= (b - a)) {
454 if(1 < (a - first)) {
455 STACK_PUSH5(ISAd, b, last, limit, trlink);
456 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
457 last = a;
458 } else {
459 STACK_PUSH5(ISAd, b, last, limit, trlink);
460 ISAd += incr, first = a, last = b, limit = next;
461 }
462 } else {
463 STACK_PUSH5(ISAd, b, last, limit, trlink);
464 STACK_PUSH5(ISAd, first, a, limit, trlink);
465 ISAd += incr, first = a, last = b, limit = next;
466 }
467 } else {
468 if((a - first) <= (b - a)) {
469 if(1 < (last - b)) {
470 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
471 STACK_PUSH5(ISAd, first, a, limit, trlink);
472 first = b;
473 } else if(1 < (a - first)) {
474 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
475 last = a;
476 } else {
477 ISAd += incr, first = a, last = b, limit = next;
478 }
479 } else if((last - b) <= (b - a)) {
480 if(1 < (last - b)) {
481 STACK_PUSH5(ISAd, first, a, limit, trlink);
482 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
483 first = b;
484 } else {
485 STACK_PUSH5(ISAd, first, a, limit, trlink);
486 ISAd += incr, first = a, last = b, limit = next;
487 }
488 } else {
489 STACK_PUSH5(ISAd, first, a, limit, trlink);
490 STACK_PUSH5(ISAd, b, last, limit, trlink);
491 ISAd += incr, first = a, last = b, limit = next;
492 }
493 }
494 } else {
495 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
496 if((a - first) <= (last - b)) {
497 if(1 < (a - first)) {
498 STACK_PUSH5(ISAd, b, last, limit, trlink);
499 last = a;
500 } else if(1 < (last - b)) {
501 first = b;
502 } else {
503 STACK_POP5(ISAd, first, last, limit, trlink);
504 }
505 } else {
506 if(1 < (last - b)) {
507 STACK_PUSH5(ISAd, first, a, limit, trlink);
508 first = b;
509 } else if(1 < (a - first)) {
510 last = a;
511 } else {
512 STACK_POP5(ISAd, first, last, limit, trlink);
513 }
514 }
515 }
516 } else {
517 if(trbudget_check(budget, last - first)) {
518 limit = tr_ilg(last - first), ISAd += incr;
519 } else {
520 if(0 <= trlink) { stack[trlink].d = -1; }
521 STACK_POP5(ISAd, first, last, limit, trlink);
522 }
523 }
524 }
525 #undef STACK_SIZE
526 }
527
528 } // namespace
529
530 /*---------------------------------------------------------------------------*/
531
532 /*- Function -*/
533
534 /* Tandem repeat sort */
535 void
536 trsort(saidx_it ISA, saidx_it SA, saidx_t n, saidx_t depth) {
537 saidx_it ISAd;
538 saidx_it first, last;
539 trbudget_t budget;
540 saidx_t t, skip, unsorted;
541
542 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
543 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
544 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
545 first = SA;
546 skip = 0;
547 unsorted = 0;
548 do {
549 if((t = *first) < 0) { first -= t; skip += t; }
550 else {
551 if(skip != 0) { *(first + skip) = skip; skip = 0; }
552 last = SA + ISA[t] + 1;
553 if(1 < (last - first)) {
554 budget.count = 0;
555 tr_introsort(ISA, ISAd, SA, first, last, &budget);
556 if(budget.count != 0) { unsorted += budget.count; }
557 else { skip = first - last; }
558 } else if((last - first) == 1) {
559 skip = -1;
560 }
561 first = last;
562 }
563 } while(first < (SA + n));
564 if(skip != 0) { *(first + skip) = skip; }
565 if(unsorted == 0) { break; }
566 }
567 }
568
569 } // namespace divsuf
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698