OLD | NEW |
| (Empty) |
1 | |
2 /*-------------------------------------------------------------*/ | |
3 /*--- Block sorting machinery ---*/ | |
4 /*--- blocksort.c ---*/ | |
5 /*-------------------------------------------------------------*/ | |
6 | |
7 /* ------------------------------------------------------------------ | |
8 This file is part of bzip2/libbzip2, a program and library for | |
9 lossless, block-sorting data compression. | |
10 | |
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010 | |
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> | |
13 | |
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the | |
15 README file. | |
16 | |
17 This program is released under the terms of the license contained | |
18 in the file LICENSE. | |
19 ------------------------------------------------------------------ */ | |
20 | |
21 | |
22 #include "bzlib_private.h" | |
23 | |
24 /*---------------------------------------------*/ | |
25 /*--- Fallback O(N log(N)^2) sorting ---*/ | |
26 /*--- algorithm, for repetitive blocks ---*/ | |
27 /*---------------------------------------------*/ | |
28 | |
29 /*---------------------------------------------*/ | |
30 static | |
31 __inline__ | |
32 void fallbackSimpleSort ( UInt32* fmap, | |
33 UInt32* eclass, | |
34 Int32 lo, | |
35 Int32 hi ) | |
36 { | |
37 Int32 i, j, tmp; | |
38 UInt32 ec_tmp; | |
39 | |
40 if (lo == hi) return; | |
41 | |
42 if (hi - lo > 3) { | |
43 for ( i = hi-4; i >= lo; i-- ) { | |
44 tmp = fmap[i]; | |
45 ec_tmp = eclass[tmp]; | |
46 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) | |
47 fmap[j-4] = fmap[j]; | |
48 fmap[j-4] = tmp; | |
49 } | |
50 } | |
51 | |
52 for ( i = hi-1; i >= lo; i-- ) { | |
53 tmp = fmap[i]; | |
54 ec_tmp = eclass[tmp]; | |
55 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) | |
56 fmap[j-1] = fmap[j]; | |
57 fmap[j-1] = tmp; | |
58 } | |
59 } | |
60 | |
61 | |
62 /*---------------------------------------------*/ | |
63 #define fswap(zz1, zz2) \ | |
64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } | |
65 | |
66 #define fvswap(zzp1, zzp2, zzn) \ | |
67 { \ | |
68 Int32 yyp1 = (zzp1); \ | |
69 Int32 yyp2 = (zzp2); \ | |
70 Int32 yyn = (zzn); \ | |
71 while (yyn > 0) { \ | |
72 fswap(fmap[yyp1], fmap[yyp2]); \ | |
73 yyp1++; yyp2++; yyn--; \ | |
74 } \ | |
75 } | |
76 | |
77 | |
78 #define fmin(a,b) ((a) < (b)) ? (a) : (b) | |
79 | |
80 #define fpush(lz,hz) { stackLo[sp] = lz; \ | |
81 stackHi[sp] = hz; \ | |
82 sp++; } | |
83 | |
84 #define fpop(lz,hz) { sp--; \ | |
85 lz = stackLo[sp]; \ | |
86 hz = stackHi[sp]; } | |
87 | |
88 #define FALLBACK_QSORT_SMALL_THRESH 10 | |
89 #define FALLBACK_QSORT_STACK_SIZE 100 | |
90 | |
91 | |
92 static | |
93 void fallbackQSort3 ( UInt32* fmap, | |
94 UInt32* eclass, | |
95 Int32 loSt, | |
96 Int32 hiSt ) | |
97 { | |
98 Int32 unLo, unHi, ltLo, gtHi, n, m; | |
99 Int32 sp, lo, hi; | |
100 UInt32 med, r, r3; | |
101 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; | |
102 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; | |
103 | |
104 r = 0; | |
105 | |
106 sp = 0; | |
107 fpush ( loSt, hiSt ); | |
108 | |
109 while (sp > 0) { | |
110 | |
111 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); | |
112 | |
113 fpop ( lo, hi ); | |
114 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { | |
115 fallbackSimpleSort ( fmap, eclass, lo, hi ); | |
116 continue; | |
117 } | |
118 | |
119 /* Random partitioning. Median of 3 sometimes fails to | |
120 avoid bad cases. Median of 9 seems to help but | |
121 looks rather expensive. This too seems to work but | |
122 is cheaper. Guidance for the magic constants | |
123 7621 and 32768 is taken from Sedgewick's algorithms | |
124 book, chapter 35. | |
125 */ | |
126 r = ((r * 7621) + 1) % 32768; | |
127 r3 = r % 3; | |
128 if (r3 == 0) med = eclass[fmap[lo]]; else | |
129 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else | |
130 med = eclass[fmap[hi]]; | |
131 | |
132 unLo = ltLo = lo; | |
133 unHi = gtHi = hi; | |
134 | |
135 while (1) { | |
136 while (1) { | |
137 if (unLo > unHi) break; | |
138 n = (Int32)eclass[fmap[unLo]] - (Int32)med; | |
139 if (n == 0) { | |
140 fswap(fmap[unLo], fmap[ltLo]); | |
141 ltLo++; unLo++; | |
142 continue; | |
143 }; | |
144 if (n > 0) break; | |
145 unLo++; | |
146 } | |
147 while (1) { | |
148 if (unLo > unHi) break; | |
149 n = (Int32)eclass[fmap[unHi]] - (Int32)med; | |
150 if (n == 0) { | |
151 fswap(fmap[unHi], fmap[gtHi]); | |
152 gtHi--; unHi--; | |
153 continue; | |
154 }; | |
155 if (n < 0) break; | |
156 unHi--; | |
157 } | |
158 if (unLo > unHi) break; | |
159 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; | |
160 } | |
161 | |
162 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); | |
163 | |
164 if (gtHi < ltLo) continue; | |
165 | |
166 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); | |
167 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); | |
168 | |
169 n = lo + unLo - ltLo - 1; | |
170 m = hi - (gtHi - unHi) + 1; | |
171 | |
172 if (n - lo > hi - m) { | |
173 fpush ( lo, n ); | |
174 fpush ( m, hi ); | |
175 } else { | |
176 fpush ( m, hi ); | |
177 fpush ( lo, n ); | |
178 } | |
179 } | |
180 } | |
181 | |
182 #undef fmin | |
183 #undef fpush | |
184 #undef fpop | |
185 #undef fswap | |
186 #undef fvswap | |
187 #undef FALLBACK_QSORT_SMALL_THRESH | |
188 #undef FALLBACK_QSORT_STACK_SIZE | |
189 | |
190 | |
191 /*---------------------------------------------*/ | |
192 /* Pre: | |
193 nblock > 0 | |
194 eclass exists for [0 .. nblock-1] | |
195 ((UChar*)eclass) [0 .. nblock-1] holds block | |
196 ptr exists for [0 .. nblock-1] | |
197 | |
198 Post: | |
199 ((UChar*)eclass) [0 .. nblock-1] holds block | |
200 All other areas of eclass destroyed | |
201 fmap [0 .. nblock-1] holds sorted order | |
202 bhtab [ 0 .. 2+(nblock/32) ] destroyed | |
203 */ | |
204 | |
205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) | |
206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) | |
207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) | |
208 #define WORD_BH(zz) bhtab[(zz) >> 5] | |
209 #define UNALIGNED_BH(zz) ((zz) & 0x01f) | |
210 | |
211 static | |
212 void fallbackSort ( UInt32* fmap, | |
213 UInt32* eclass, | |
214 UInt32* bhtab, | |
215 Int32 nblock, | |
216 Int32 verb ) | |
217 { | |
218 Int32 ftab[257]; | |
219 Int32 ftabCopy[256]; | |
220 Int32 H, i, j, k, l, r, cc, cc1; | |
221 Int32 nNotDone; | |
222 Int32 nBhtab; | |
223 UChar* eclass8 = (UChar*)eclass; | |
224 | |
225 /*-- | |
226 Initial 1-char radix sort to generate | |
227 initial fmap and initial BH bits. | |
228 --*/ | |
229 if (verb >= 4) | |
230 VPrintf0 ( " bucket sorting ...\n" ); | |
231 for (i = 0; i < 257; i++) ftab[i] = 0; | |
232 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; | |
233 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; | |
234 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; | |
235 | |
236 for (i = 0; i < nblock; i++) { | |
237 j = eclass8[i]; | |
238 k = ftab[j] - 1; | |
239 ftab[j] = k; | |
240 fmap[k] = i; | |
241 } | |
242 | |
243 nBhtab = 2 + (nblock / 32); | |
244 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; | |
245 for (i = 0; i < 256; i++) SET_BH(ftab[i]); | |
246 | |
247 /*-- | |
248 Inductively refine the buckets. Kind-of an | |
249 "exponential radix sort" (!), inspired by the | |
250 Manber-Myers suffix array construction algorithm. | |
251 --*/ | |
252 | |
253 /*-- set sentinel bits for block-end detection --*/ | |
254 for (i = 0; i < 32; i++) { | |
255 SET_BH(nblock + 2*i); | |
256 CLEAR_BH(nblock + 2*i + 1); | |
257 } | |
258 | |
259 /*-- the log(N) loop --*/ | |
260 H = 1; | |
261 while (1) { | |
262 | |
263 if (verb >= 4) | |
264 VPrintf1 ( " depth %6d has ", H ); | |
265 | |
266 j = 0; | |
267 for (i = 0; i < nblock; i++) { | |
268 if (ISSET_BH(i)) j = i; | |
269 k = fmap[i] - H; if (k < 0) k += nblock; | |
270 eclass[k] = j; | |
271 } | |
272 | |
273 nNotDone = 0; | |
274 r = -1; | |
275 while (1) { | |
276 | |
277 /*-- find the next non-singleton bucket --*/ | |
278 k = r + 1; | |
279 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; | |
280 if (ISSET_BH(k)) { | |
281 while (WORD_BH(k) == 0xffffffff) k += 32; | |
282 while (ISSET_BH(k)) k++; | |
283 } | |
284 l = k - 1; | |
285 if (l >= nblock) break; | |
286 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; | |
287 if (!ISSET_BH(k)) { | |
288 while (WORD_BH(k) == 0x00000000) k += 32; | |
289 while (!ISSET_BH(k)) k++; | |
290 } | |
291 r = k - 1; | |
292 if (r >= nblock) break; | |
293 | |
294 /*-- now [l, r] bracket current bucket --*/ | |
295 if (r > l) { | |
296 nNotDone += (r - l + 1); | |
297 fallbackQSort3 ( fmap, eclass, l, r ); | |
298 | |
299 /*-- scan bucket and generate header bits-- */ | |
300 cc = -1; | |
301 for (i = l; i <= r; i++) { | |
302 cc1 = eclass[fmap[i]]; | |
303 if (cc != cc1) { SET_BH(i); cc = cc1; }; | |
304 } | |
305 } | |
306 } | |
307 | |
308 if (verb >= 4) | |
309 VPrintf1 ( "%6d unresolved strings\n", nNotDone ); | |
310 | |
311 H *= 2; | |
312 if (H > nblock || nNotDone == 0) break; | |
313 } | |
314 | |
315 /*-- | |
316 Reconstruct the original block in | |
317 eclass8 [0 .. nblock-1], since the | |
318 previous phase destroyed it. | |
319 --*/ | |
320 if (verb >= 4) | |
321 VPrintf0 ( " reconstructing block ...\n" ); | |
322 j = 0; | |
323 for (i = 0; i < nblock; i++) { | |
324 while (ftabCopy[j] == 0) j++; | |
325 ftabCopy[j]--; | |
326 eclass8[fmap[i]] = (UChar)j; | |
327 } | |
328 AssertH ( j < 256, 1005 ); | |
329 } | |
330 | |
331 #undef SET_BH | |
332 #undef CLEAR_BH | |
333 #undef ISSET_BH | |
334 #undef WORD_BH | |
335 #undef UNALIGNED_BH | |
336 | |
337 | |
338 /*---------------------------------------------*/ | |
339 /*--- The main, O(N^2 log(N)) sorting ---*/ | |
340 /*--- algorithm. Faster for "normal" ---*/ | |
341 /*--- non-repetitive blocks. ---*/ | |
342 /*---------------------------------------------*/ | |
343 | |
344 /*---------------------------------------------*/ | |
345 static | |
346 __inline__ | |
347 Bool mainGtU ( UInt32 i1, | |
348 UInt32 i2, | |
349 UChar* block, | |
350 UInt16* quadrant, | |
351 UInt32 nblock, | |
352 Int32* budget ) | |
353 { | |
354 Int32 k; | |
355 UChar c1, c2; | |
356 UInt16 s1, s2; | |
357 | |
358 AssertD ( i1 != i2, "mainGtU" ); | |
359 /* 1 */ | |
360 c1 = block[i1]; c2 = block[i2]; | |
361 if (c1 != c2) return (c1 > c2); | |
362 i1++; i2++; | |
363 /* 2 */ | |
364 c1 = block[i1]; c2 = block[i2]; | |
365 if (c1 != c2) return (c1 > c2); | |
366 i1++; i2++; | |
367 /* 3 */ | |
368 c1 = block[i1]; c2 = block[i2]; | |
369 if (c1 != c2) return (c1 > c2); | |
370 i1++; i2++; | |
371 /* 4 */ | |
372 c1 = block[i1]; c2 = block[i2]; | |
373 if (c1 != c2) return (c1 > c2); | |
374 i1++; i2++; | |
375 /* 5 */ | |
376 c1 = block[i1]; c2 = block[i2]; | |
377 if (c1 != c2) return (c1 > c2); | |
378 i1++; i2++; | |
379 /* 6 */ | |
380 c1 = block[i1]; c2 = block[i2]; | |
381 if (c1 != c2) return (c1 > c2); | |
382 i1++; i2++; | |
383 /* 7 */ | |
384 c1 = block[i1]; c2 = block[i2]; | |
385 if (c1 != c2) return (c1 > c2); | |
386 i1++; i2++; | |
387 /* 8 */ | |
388 c1 = block[i1]; c2 = block[i2]; | |
389 if (c1 != c2) return (c1 > c2); | |
390 i1++; i2++; | |
391 /* 9 */ | |
392 c1 = block[i1]; c2 = block[i2]; | |
393 if (c1 != c2) return (c1 > c2); | |
394 i1++; i2++; | |
395 /* 10 */ | |
396 c1 = block[i1]; c2 = block[i2]; | |
397 if (c1 != c2) return (c1 > c2); | |
398 i1++; i2++; | |
399 /* 11 */ | |
400 c1 = block[i1]; c2 = block[i2]; | |
401 if (c1 != c2) return (c1 > c2); | |
402 i1++; i2++; | |
403 /* 12 */ | |
404 c1 = block[i1]; c2 = block[i2]; | |
405 if (c1 != c2) return (c1 > c2); | |
406 i1++; i2++; | |
407 | |
408 k = nblock + 8; | |
409 | |
410 do { | |
411 /* 1 */ | |
412 c1 = block[i1]; c2 = block[i2]; | |
413 if (c1 != c2) return (c1 > c2); | |
414 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
415 if (s1 != s2) return (s1 > s2); | |
416 i1++; i2++; | |
417 /* 2 */ | |
418 c1 = block[i1]; c2 = block[i2]; | |
419 if (c1 != c2) return (c1 > c2); | |
420 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
421 if (s1 != s2) return (s1 > s2); | |
422 i1++; i2++; | |
423 /* 3 */ | |
424 c1 = block[i1]; c2 = block[i2]; | |
425 if (c1 != c2) return (c1 > c2); | |
426 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
427 if (s1 != s2) return (s1 > s2); | |
428 i1++; i2++; | |
429 /* 4 */ | |
430 c1 = block[i1]; c2 = block[i2]; | |
431 if (c1 != c2) return (c1 > c2); | |
432 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
433 if (s1 != s2) return (s1 > s2); | |
434 i1++; i2++; | |
435 /* 5 */ | |
436 c1 = block[i1]; c2 = block[i2]; | |
437 if (c1 != c2) return (c1 > c2); | |
438 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
439 if (s1 != s2) return (s1 > s2); | |
440 i1++; i2++; | |
441 /* 6 */ | |
442 c1 = block[i1]; c2 = block[i2]; | |
443 if (c1 != c2) return (c1 > c2); | |
444 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
445 if (s1 != s2) return (s1 > s2); | |
446 i1++; i2++; | |
447 /* 7 */ | |
448 c1 = block[i1]; c2 = block[i2]; | |
449 if (c1 != c2) return (c1 > c2); | |
450 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
451 if (s1 != s2) return (s1 > s2); | |
452 i1++; i2++; | |
453 /* 8 */ | |
454 c1 = block[i1]; c2 = block[i2]; | |
455 if (c1 != c2) return (c1 > c2); | |
456 s1 = quadrant[i1]; s2 = quadrant[i2]; | |
457 if (s1 != s2) return (s1 > s2); | |
458 i1++; i2++; | |
459 | |
460 if (i1 >= nblock) i1 -= nblock; | |
461 if (i2 >= nblock) i2 -= nblock; | |
462 | |
463 k -= 8; | |
464 (*budget)--; | |
465 } | |
466 while (k >= 0); | |
467 | |
468 return False; | |
469 } | |
470 | |
471 | |
472 /*---------------------------------------------*/ | |
473 /*-- | |
474 Knuth's increments seem to work better | |
475 than Incerpi-Sedgewick here. Possibly | |
476 because the number of elems to sort is | |
477 usually small, typically <= 20. | |
478 --*/ | |
479 static | |
480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, | |
481 9841, 29524, 88573, 265720, | |
482 797161, 2391484 }; | |
483 | |
484 static | |
485 void mainSimpleSort ( UInt32* ptr, | |
486 UChar* block, | |
487 UInt16* quadrant, | |
488 Int32 nblock, | |
489 Int32 lo, | |
490 Int32 hi, | |
491 Int32 d, | |
492 Int32* budget ) | |
493 { | |
494 Int32 i, j, h, bigN, hp; | |
495 UInt32 v; | |
496 | |
497 bigN = hi - lo + 1; | |
498 if (bigN < 2) return; | |
499 | |
500 hp = 0; | |
501 while (incs[hp] < bigN) hp++; | |
502 hp--; | |
503 | |
504 for (; hp >= 0; hp--) { | |
505 h = incs[hp]; | |
506 | |
507 i = lo + h; | |
508 while (True) { | |
509 | |
510 /*-- copy 1 --*/ | |
511 if (i > hi) break; | |
512 v = ptr[i]; | |
513 j = i; | |
514 while ( mainGtU ( | |
515 ptr[j-h]+d, v+d, block, quadrant, nblock, budget | |
516 ) ) { | |
517 ptr[j] = ptr[j-h]; | |
518 j = j - h; | |
519 if (j <= (lo + h - 1)) break; | |
520 } | |
521 ptr[j] = v; | |
522 i++; | |
523 | |
524 /*-- copy 2 --*/ | |
525 if (i > hi) break; | |
526 v = ptr[i]; | |
527 j = i; | |
528 while ( mainGtU ( | |
529 ptr[j-h]+d, v+d, block, quadrant, nblock, budget | |
530 ) ) { | |
531 ptr[j] = ptr[j-h]; | |
532 j = j - h; | |
533 if (j <= (lo + h - 1)) break; | |
534 } | |
535 ptr[j] = v; | |
536 i++; | |
537 | |
538 /*-- copy 3 --*/ | |
539 if (i > hi) break; | |
540 v = ptr[i]; | |
541 j = i; | |
542 while ( mainGtU ( | |
543 ptr[j-h]+d, v+d, block, quadrant, nblock, budget | |
544 ) ) { | |
545 ptr[j] = ptr[j-h]; | |
546 j = j - h; | |
547 if (j <= (lo + h - 1)) break; | |
548 } | |
549 ptr[j] = v; | |
550 i++; | |
551 | |
552 if (*budget < 0) return; | |
553 } | |
554 } | |
555 } | |
556 | |
557 | |
558 /*---------------------------------------------*/ | |
559 /*-- | |
560 The following is an implementation of | |
561 an elegant 3-way quicksort for strings, | |
562 described in a paper "Fast Algorithms for | |
563 Sorting and Searching Strings", by Robert | |
564 Sedgewick and Jon L. Bentley. | |
565 --*/ | |
566 | |
567 #define mswap(zz1, zz2) \ | |
568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } | |
569 | |
570 #define mvswap(zzp1, zzp2, zzn) \ | |
571 { \ | |
572 Int32 yyp1 = (zzp1); \ | |
573 Int32 yyp2 = (zzp2); \ | |
574 Int32 yyn = (zzn); \ | |
575 while (yyn > 0) { \ | |
576 mswap(ptr[yyp1], ptr[yyp2]); \ | |
577 yyp1++; yyp2++; yyn--; \ | |
578 } \ | |
579 } | |
580 | |
581 static | |
582 __inline__ | |
583 UChar mmed3 ( UChar a, UChar b, UChar c ) | |
584 { | |
585 UChar t; | |
586 if (a > b) { t = a; a = b; b = t; }; | |
587 if (b > c) { | |
588 b = c; | |
589 if (a > b) b = a; | |
590 } | |
591 return b; | |
592 } | |
593 | |
594 #define mmin(a,b) ((a) < (b)) ? (a) : (b) | |
595 | |
596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ | |
597 stackHi[sp] = hz; \ | |
598 stackD [sp] = dz; \ | |
599 sp++; } | |
600 | |
601 #define mpop(lz,hz,dz) { sp--; \ | |
602 lz = stackLo[sp]; \ | |
603 hz = stackHi[sp]; \ | |
604 dz = stackD [sp]; } | |
605 | |
606 | |
607 #define mnextsize(az) (nextHi[az]-nextLo[az]) | |
608 | |
609 #define mnextswap(az,bz) \ | |
610 { Int32 tz; \ | |
611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ | |
612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ | |
613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } | |
614 | |
615 | |
616 #define MAIN_QSORT_SMALL_THRESH 20 | |
617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) | |
618 #define MAIN_QSORT_STACK_SIZE 100 | |
619 | |
620 static | |
621 void mainQSort3 ( UInt32* ptr, | |
622 UChar* block, | |
623 UInt16* quadrant, | |
624 Int32 nblock, | |
625 Int32 loSt, | |
626 Int32 hiSt, | |
627 Int32 dSt, | |
628 Int32* budget ) | |
629 { | |
630 Int32 unLo, unHi, ltLo, gtHi, n, m, med; | |
631 Int32 sp, lo, hi, d; | |
632 | |
633 Int32 stackLo[MAIN_QSORT_STACK_SIZE]; | |
634 Int32 stackHi[MAIN_QSORT_STACK_SIZE]; | |
635 Int32 stackD [MAIN_QSORT_STACK_SIZE]; | |
636 | |
637 Int32 nextLo[3]; | |
638 Int32 nextHi[3]; | |
639 Int32 nextD [3]; | |
640 | |
641 sp = 0; | |
642 mpush ( loSt, hiSt, dSt ); | |
643 | |
644 while (sp > 0) { | |
645 | |
646 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); | |
647 | |
648 mpop ( lo, hi, d ); | |
649 if (hi - lo < MAIN_QSORT_SMALL_THRESH || | |
650 d > MAIN_QSORT_DEPTH_THRESH) { | |
651 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); | |
652 if (*budget < 0) return; | |
653 continue; | |
654 } | |
655 | |
656 med = (Int32) | |
657 mmed3 ( block[ptr[ lo ]+d], | |
658 block[ptr[ hi ]+d], | |
659 block[ptr[ (lo+hi)>>1 ]+d] ); | |
660 | |
661 unLo = ltLo = lo; | |
662 unHi = gtHi = hi; | |
663 | |
664 while (True) { | |
665 while (True) { | |
666 if (unLo > unHi) break; | |
667 n = ((Int32)block[ptr[unLo]+d]) - med; | |
668 if (n == 0) { | |
669 mswap(ptr[unLo], ptr[ltLo]); | |
670 ltLo++; unLo++; continue; | |
671 }; | |
672 if (n > 0) break; | |
673 unLo++; | |
674 } | |
675 while (True) { | |
676 if (unLo > unHi) break; | |
677 n = ((Int32)block[ptr[unHi]+d]) - med; | |
678 if (n == 0) { | |
679 mswap(ptr[unHi], ptr[gtHi]); | |
680 gtHi--; unHi--; continue; | |
681 }; | |
682 if (n < 0) break; | |
683 unHi--; | |
684 } | |
685 if (unLo > unHi) break; | |
686 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; | |
687 } | |
688 | |
689 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); | |
690 | |
691 if (gtHi < ltLo) { | |
692 mpush(lo, hi, d+1 ); | |
693 continue; | |
694 } | |
695 | |
696 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); | |
697 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); | |
698 | |
699 n = lo + unLo - ltLo - 1; | |
700 m = hi - (gtHi - unHi) + 1; | |
701 | |
702 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; | |
703 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; | |
704 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; | |
705 | |
706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); | |
707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); | |
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); | |
709 | |
710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); | |
711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); | |
712 | |
713 mpush (nextLo[0], nextHi[0], nextD[0]); | |
714 mpush (nextLo[1], nextHi[1], nextD[1]); | |
715 mpush (nextLo[2], nextHi[2], nextD[2]); | |
716 } | |
717 } | |
718 | |
719 #undef mswap | |
720 #undef mvswap | |
721 #undef mpush | |
722 #undef mpop | |
723 #undef mmin | |
724 #undef mnextsize | |
725 #undef mnextswap | |
726 #undef MAIN_QSORT_SMALL_THRESH | |
727 #undef MAIN_QSORT_DEPTH_THRESH | |
728 #undef MAIN_QSORT_STACK_SIZE | |
729 | |
730 | |
731 /*---------------------------------------------*/ | |
732 /* Pre: | |
733 nblock > N_OVERSHOOT | |
734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] | |
735 ((UChar*)block32) [0 .. nblock-1] holds block | |
736 ptr exists for [0 .. nblock-1] | |
737 | |
738 Post: | |
739 ((UChar*)block32) [0 .. nblock-1] holds block | |
740 All other areas of block32 destroyed | |
741 ftab [0 .. 65536 ] destroyed | |
742 ptr [0 .. nblock-1] holds sorted order | |
743 if (*budget < 0), sorting was abandoned | |
744 */ | |
745 | |
746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) | |
747 #define SETMASK (1 << 21) | |
748 #define CLEARMASK (~(SETMASK)) | |
749 | |
750 static | |
751 void mainSort ( UInt32* ptr, | |
752 UChar* block, | |
753 UInt16* quadrant, | |
754 UInt32* ftab, | |
755 Int32 nblock, | |
756 Int32 verb, | |
757 Int32* budget ) | |
758 { | |
759 Int32 i, j, k, ss, sb; | |
760 Int32 runningOrder[256]; | |
761 Bool bigDone[256]; | |
762 Int32 copyStart[256]; | |
763 Int32 copyEnd [256]; | |
764 UChar c1; | |
765 Int32 numQSorted; | |
766 UInt16 s; | |
767 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); | |
768 | |
769 /*-- set up the 2-byte frequency table --*/ | |
770 for (i = 65536; i >= 0; i--) ftab[i] = 0; | |
771 | |
772 j = block[0] << 8; | |
773 i = nblock-1; | |
774 for (; i >= 3; i -= 4) { | |
775 quadrant[i] = 0; | |
776 j = (j >> 8) | ( ((UInt16)block[i]) << 8); | |
777 ftab[j]++; | |
778 quadrant[i-1] = 0; | |
779 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); | |
780 ftab[j]++; | |
781 quadrant[i-2] = 0; | |
782 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); | |
783 ftab[j]++; | |
784 quadrant[i-3] = 0; | |
785 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); | |
786 ftab[j]++; | |
787 } | |
788 for (; i >= 0; i--) { | |
789 quadrant[i] = 0; | |
790 j = (j >> 8) | ( ((UInt16)block[i]) << 8); | |
791 ftab[j]++; | |
792 } | |
793 | |
794 /*-- (emphasises close relationship of block & quadrant) --*/ | |
795 for (i = 0; i < BZ_N_OVERSHOOT; i++) { | |
796 block [nblock+i] = block[i]; | |
797 quadrant[nblock+i] = 0; | |
798 } | |
799 | |
800 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); | |
801 | |
802 /*-- Complete the initial radix sort --*/ | |
803 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; | |
804 | |
805 s = block[0] << 8; | |
806 i = nblock-1; | |
807 for (; i >= 3; i -= 4) { | |
808 s = (s >> 8) | (block[i] << 8); | |
809 j = ftab[s] -1; | |
810 ftab[s] = j; | |
811 ptr[j] = i; | |
812 s = (s >> 8) | (block[i-1] << 8); | |
813 j = ftab[s] -1; | |
814 ftab[s] = j; | |
815 ptr[j] = i-1; | |
816 s = (s >> 8) | (block[i-2] << 8); | |
817 j = ftab[s] -1; | |
818 ftab[s] = j; | |
819 ptr[j] = i-2; | |
820 s = (s >> 8) | (block[i-3] << 8); | |
821 j = ftab[s] -1; | |
822 ftab[s] = j; | |
823 ptr[j] = i-3; | |
824 } | |
825 for (; i >= 0; i--) { | |
826 s = (s >> 8) | (block[i] << 8); | |
827 j = ftab[s] -1; | |
828 ftab[s] = j; | |
829 ptr[j] = i; | |
830 } | |
831 | |
832 /*-- | |
833 Now ftab contains the first loc of every small bucket. | |
834 Calculate the running order, from smallest to largest | |
835 big bucket. | |
836 --*/ | |
837 for (i = 0; i <= 255; i++) { | |
838 bigDone [i] = False; | |
839 runningOrder[i] = i; | |
840 } | |
841 | |
842 { | |
843 Int32 vv; | |
844 Int32 h = 1; | |
845 do h = 3 * h + 1; while (h <= 256); | |
846 do { | |
847 h = h / 3; | |
848 for (i = h; i <= 255; i++) { | |
849 vv = runningOrder[i]; | |
850 j = i; | |
851 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { | |
852 runningOrder[j] = runningOrder[j-h]; | |
853 j = j - h; | |
854 if (j <= (h - 1)) goto zero; | |
855 } | |
856 zero: | |
857 runningOrder[j] = vv; | |
858 } | |
859 } while (h != 1); | |
860 } | |
861 | |
862 /*-- | |
863 The main sorting loop. | |
864 --*/ | |
865 | |
866 numQSorted = 0; | |
867 | |
868 for (i = 0; i <= 255; i++) { | |
869 | |
870 /*-- | |
871 Process big buckets, starting with the least full. | |
872 Basically this is a 3-step process in which we call | |
873 mainQSort3 to sort the small buckets [ss, j], but | |
874 also make a big effort to avoid the calls if we can. | |
875 --*/ | |
876 ss = runningOrder[i]; | |
877 | |
878 /*-- | |
879 Step 1: | |
880 Complete the big bucket [ss] by quicksorting | |
881 any unsorted small buckets [ss, j], for j != ss. | |
882 Hopefully previous pointer-scanning phases have already | |
883 completed many of the small buckets [ss, j], so | |
884 we don't have to sort them at all. | |
885 --*/ | |
886 for (j = 0; j <= 255; j++) { | |
887 if (j != ss) { | |
888 sb = (ss << 8) + j; | |
889 if ( ! (ftab[sb] & SETMASK) ) { | |
890 Int32 lo = ftab[sb] & CLEARMASK; | |
891 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; | |
892 if (hi > lo) { | |
893 if (verb >= 4) | |
894 VPrintf4 ( " qsort [0x%x, 0x%x] " | |
895 "done %d this %d\n", | |
896 ss, j, numQSorted, hi - lo + 1 ); | |
897 mainQSort3 ( | |
898 ptr, block, quadrant, nblock, | |
899 lo, hi, BZ_N_RADIX, budget | |
900 ); | |
901 numQSorted += (hi - lo + 1); | |
902 if (*budget < 0) return; | |
903 } | |
904 } | |
905 ftab[sb] |= SETMASK; | |
906 } | |
907 } | |
908 | |
909 AssertH ( !bigDone[ss], 1006 ); | |
910 | |
911 /*-- | |
912 Step 2: | |
913 Now scan this big bucket [ss] so as to synthesise the | |
914 sorted order for small buckets [t, ss] for all t, | |
915 including, magically, the bucket [ss,ss] too. | |
916 This will avoid doing Real Work in subsequent Step 1's. | |
917 --*/ | |
918 { | |
919 for (j = 0; j <= 255; j++) { | |
920 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; | |
921 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; | |
922 } | |
923 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { | |
924 k = ptr[j]-1; if (k < 0) k += nblock; | |
925 c1 = block[k]; | |
926 if (!bigDone[c1]) | |
927 ptr[ copyStart[c1]++ ] = k; | |
928 } | |
929 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { | |
930 k = ptr[j]-1; if (k < 0) k += nblock; | |
931 c1 = block[k]; | |
932 if (!bigDone[c1]) | |
933 ptr[ copyEnd[c1]-- ] = k; | |
934 } | |
935 } | |
936 | |
937 AssertH ( (copyStart[ss]-1 == copyEnd[ss]) | |
938 || | |
939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. | |
940 Necessity for this case is demonstrated by compressing | |
941 a sequence of approximately 48.5 million of character | |
942 251; 1.0.0/1.0.1 will then die here. */ | |
943 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), | |
944 1007 ) | |
945 | |
946 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | |
947 | |
948 /*-- | |
949 Step 3: | |
950 The [ss] big bucket is now done. Record this fact, | |
951 and update the quadrant descriptors. Remember to | |
952 update quadrants in the overshoot area too, if | |
953 necessary. The "if (i < 255)" test merely skips | |
954 this updating for the last bucket processed, since | |
955 updating for the last bucket is pointless. | |
956 | |
957 The quadrant array provides a way to incrementally | |
958 cache sort orderings, as they appear, so as to | |
959 make subsequent comparisons in fullGtU() complete | |
960 faster. For repetitive blocks this makes a big | |
961 difference (but not big enough to be able to avoid | |
962 the fallback sorting mechanism, exponential radix sort). | |
963 | |
964 The precise meaning is: at all times: | |
965 | |
966 for 0 <= i < nblock and 0 <= j <= nblock | |
967 | |
968 if block[i] != block[j], | |
969 | |
970 then the relative values of quadrant[i] and | |
971 quadrant[j] are meaningless. | |
972 | |
973 else { | |
974 if quadrant[i] < quadrant[j] | |
975 then the string starting at i lexicographically | |
976 precedes the string starting at j | |
977 | |
978 else if quadrant[i] > quadrant[j] | |
979 then the string starting at j lexicographically | |
980 precedes the string starting at i | |
981 | |
982 else | |
983 the relative ordering of the strings starting | |
984 at i and j has not yet been determined. | |
985 } | |
986 --*/ | |
987 bigDone[ss] = True; | |
988 | |
989 if (i < 255) { | |
990 Int32 bbStart = ftab[ss << 8] & CLEARMASK; | |
991 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; | |
992 Int32 shifts = 0; | |
993 | |
994 while ((bbSize >> shifts) > 65534) shifts++; | |
995 | |
996 for (j = bbSize-1; j >= 0; j--) { | |
997 Int32 a2update = ptr[bbStart + j]; | |
998 UInt16 qVal = (UInt16)(j >> shifts); | |
999 quadrant[a2update] = qVal; | |
1000 if (a2update < BZ_N_OVERSHOOT) | |
1001 quadrant[a2update + nblock] = qVal; | |
1002 } | |
1003 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); | |
1004 } | |
1005 | |
1006 } | |
1007 | |
1008 if (verb >= 4) | |
1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", | |
1010 nblock, numQSorted, nblock - numQSorted ); | |
1011 } | |
1012 | |
1013 #undef BIGFREQ | |
1014 #undef SETMASK | |
1015 #undef CLEARMASK | |
1016 | |
1017 | |
1018 /*---------------------------------------------*/ | |
1019 /* Pre: | |
1020 nblock > 0 | |
1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] | |
1022 ((UChar*)arr2) [0 .. nblock-1] holds block | |
1023 arr1 exists for [0 .. nblock-1] | |
1024 | |
1025 Post: | |
1026 ((UChar*)arr2) [0 .. nblock-1] holds block | |
1027 All other areas of block destroyed | |
1028 ftab [ 0 .. 65536 ] destroyed | |
1029 arr1 [0 .. nblock-1] holds sorted order | |
1030 */ | |
1031 void BZ2_blockSort ( EState* s ) | |
1032 { | |
1033 UInt32* ptr = s->ptr; | |
1034 UChar* block = s->block; | |
1035 UInt32* ftab = s->ftab; | |
1036 Int32 nblock = s->nblock; | |
1037 Int32 verb = s->verbosity; | |
1038 Int32 wfact = s->workFactor; | |
1039 UInt16* quadrant; | |
1040 Int32 budget; | |
1041 Int32 budgetInit; | |
1042 Int32 i; | |
1043 | |
1044 if (nblock < 10000) { | |
1045 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); | |
1046 } else { | |
1047 /* Calculate the location for quadrant, remembering to get | |
1048 the alignment right. Assumes that &(block[0]) is at least | |
1049 2-byte aligned -- this should be ok since block is really | |
1050 the first section of arr2. | |
1051 */ | |
1052 i = nblock+BZ_N_OVERSHOOT; | |
1053 if (i & 1) i++; | |
1054 quadrant = (UInt16*)(&(block[i])); | |
1055 | |
1056 /* (wfact-1) / 3 puts the default-factor-30 | |
1057 transition point at very roughly the same place as | |
1058 with v0.1 and v0.9.0. | |
1059 Not that it particularly matters any more, since the | |
1060 resulting compressed stream is now the same regardless | |
1061 of whether or not we use the main sort or fallback sort. | |
1062 */ | |
1063 if (wfact < 1 ) wfact = 1; | |
1064 if (wfact > 100) wfact = 100; | |
1065 budgetInit = nblock * ((wfact-1) / 3); | |
1066 budget = budgetInit; | |
1067 | |
1068 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); | |
1069 if (verb >= 3) | |
1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", | |
1071 budgetInit - budget, | |
1072 nblock, | |
1073 (float)(budgetInit - budget) / | |
1074 (float)(nblock==0 ? 1 : nblock) ); | |
1075 if (budget < 0) { | |
1076 if (verb >= 2) | |
1077 VPrintf0 ( " too repetitive; using fallback" | |
1078 " sorting algorithm\n" ); | |
1079 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); | |
1080 } | |
1081 } | |
1082 | |
1083 s->origPtr = -1; | |
1084 for (i = 0; i < s->nblock; i++) | |
1085 if (ptr[i] == 0) | |
1086 { s->origPtr = i; break; }; | |
1087 | |
1088 AssertH( s->origPtr != -1, 1003 ); | |
1089 } | |
1090 | |
1091 | |
1092 /*-------------------------------------------------------------*/ | |
1093 /*--- end blocksort.c ---*/ | |
1094 /*-------------------------------------------------------------*/ | |
OLD | NEW |