abc-master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
blocksort.c
Go to the documentation of this file.
1 
2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
6 
7 /* ------------------------------------------------------------------
8  This file is part of bzip2/libbzip2, a program and library for
9  lossless, block-sorting data compression.
10 
11  bzip2/libbzip2 version 1.0.5 of 10 December 2007
12  Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13 
14  Please read the WARNING, DISCLAIMER and PATENTS sections in the
15  README file.
16 
17  This program is released under the terms of the license contained
18  in the file LICENSE.
19  ------------------------------------------------------------------ */
20 
21 
22 #include "bzlib_private.h"
23 
25 
26 /*---------------------------------------------*/
27 /*--- Fallback O(N log(N)^2) sorting ---*/
28 /*--- algorithm, for repetitive blocks ---*/
29 /*---------------------------------------------*/
30 
31 /*---------------------------------------------*/
32 static
34 void fallbackSimpleSort ( UInt32* fmap,
35  UInt32* eclass,
36  Int32 lo,
37  Int32 hi )
38 {
39  Int32 i, j, tmp;
40  UInt32 ec_tmp;
41 
42  if (lo == hi) return;
43 
44  if (hi - lo > 3) {
45  for ( i = hi-4; i >= lo; i-- ) {
46  tmp = fmap[i];
47  ec_tmp = eclass[tmp];
48  for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
49  fmap[j-4] = fmap[j];
50  fmap[j-4] = tmp;
51  }
52  }
53 
54  for ( i = hi-1; i >= lo; i-- ) {
55  tmp = fmap[i];
56  ec_tmp = eclass[tmp];
57  for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
58  fmap[j-1] = fmap[j];
59  fmap[j-1] = tmp;
60  }
61 }
62 
63 
64 /*---------------------------------------------*/
65 #define fswap(zz1, zz2) \
66  { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
67 
68 #define fvswap(zzp1, zzp2, zzn) \
69 { \
70  Int32 yyp1 = (zzp1); \
71  Int32 yyp2 = (zzp2); \
72  Int32 yyn = (zzn); \
73  while (yyn > 0) { \
74  fswap(fmap[yyp1], fmap[yyp2]); \
75  yyp1++; yyp2++; yyn--; \
76  } \
77 }
78 
79 
80 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
81 
82 #define fpush(lz,hz) { stackLo[sp] = lz; \
83  stackHi[sp] = hz; \
84  sp++; }
85 
86 #define fpop(lz,hz) { sp--; \
87  lz = stackLo[sp]; \
88  hz = stackHi[sp]; }
89 
90 #define FALLBACK_QSORT_SMALL_THRESH 10
91 #define FALLBACK_QSORT_STACK_SIZE 100
92 
93 
94 static
95 void fallbackQSort3 ( UInt32* fmap,
96  UInt32* eclass,
97  Int32 loSt,
98  Int32 hiSt )
99 {
100  Int32 unLo, unHi, ltLo, gtHi, n, m;
101  Int32 sp, lo, hi;
102  UInt32 med, r, r3;
105 
106  r = 0;
107 
108  sp = 0;
109  fpush ( loSt, hiSt );
110 
111  while (sp > 0) {
112 
113  AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
114 
115  fpop ( lo, hi );
116  if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
117  fallbackSimpleSort ( fmap, eclass, lo, hi );
118  continue;
119  }
120 
121  /* Random partitioning. Median of 3 sometimes fails to
122  avoid bad cases. Median of 9 seems to help but
123  looks rather expensive. This too seems to work but
124  is cheaper. Guidance for the magic constants
125  7621 and 32768 is taken from Sedgewick's algorithms
126  book, chapter 35.
127  */
128  r = ((r * 7621) + 1) % 32768;
129  r3 = r % 3;
130  if (r3 == 0) med = eclass[fmap[lo]]; else
131  if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
132  med = eclass[fmap[hi]];
133 
134  unLo = ltLo = lo;
135  unHi = gtHi = hi;
136 
137  while (1) {
138  while (1) {
139  if (unLo > unHi) break;
140  n = (Int32)eclass[fmap[unLo]] - (Int32)med;
141  if (n == 0) {
142  fswap(fmap[unLo], fmap[ltLo]);
143  ltLo++; unLo++;
144  continue;
145  };
146  if (n > 0) break;
147  unLo++;
148  }
149  while (1) {
150  if (unLo > unHi) break;
151  n = (Int32)eclass[fmap[unHi]] - (Int32)med;
152  if (n == 0) {
153  fswap(fmap[unHi], fmap[gtHi]);
154  gtHi--; unHi--;
155  continue;
156  };
157  if (n < 0) break;
158  unHi--;
159  }
160  if (unLo > unHi) break;
161  fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
162  }
163 
164  AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
165 
166  if (gtHi < ltLo) continue;
167 
168  n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
169  m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
170 
171  n = lo + unLo - ltLo - 1;
172  m = hi - (gtHi - unHi) + 1;
173 
174  if (n - lo > hi - m) {
175  fpush ( lo, n );
176  fpush ( m, hi );
177  } else {
178  fpush ( m, hi );
179  fpush ( lo, n );
180  }
181  }
182 }
183 
184 #undef fmin
185 #undef fpush
186 #undef fpop
187 #undef fswap
188 #undef fvswap
189 #undef FALLBACK_QSORT_SMALL_THRESH
190 #undef FALLBACK_QSORT_STACK_SIZE
191 
192 
193 /*---------------------------------------------*/
194 /* Pre:
195  nblock > 0
196  eclass exists for [0 .. nblock-1]
197  ((UChar*)eclass) [0 .. nblock-1] holds block
198  ptr exists for [0 .. nblock-1]
199 
200  Post:
201  ((UChar*)eclass) [0 .. nblock-1] holds block
202  All other areas of eclass destroyed
203  fmap [0 .. nblock-1] holds sorted order
204  bhtab [ 0 .. 2+(nblock/32) ] destroyed
205 */
206 
207 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
208 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
209 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
210 #define WORD_BH(zz) bhtab[(zz) >> 5]
211 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
212 
213 static
214 void fallbackSort ( UInt32* fmap,
215  UInt32* eclass,
216  UInt32* bhtab,
217  Int32 nblock,
218  Int32 verb )
219 {
220  Int32 ftab[257];
221  Int32 ftabCopy[256];
222  Int32 H, i, j, k, l, r, cc, cc1;
223  Int32 nNotDone;
224  Int32 nBhtab;
225  UChar* eclass8 = (UChar*)eclass;
226 
227  /*--
228  Initial 1-char radix sort to generate
229  initial fmap and initial BH bits.
230  --*/
231  if (verb >= 4)
232  VPrintf0 ( " bucket sorting ...\n" );
233  for (i = 0; i < 257; i++) ftab[i] = 0;
234  for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
235  for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
236  for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
237 
238  for (i = 0; i < nblock; i++) {
239  j = eclass8[i];
240  k = ftab[j] - 1;
241  ftab[j] = k;
242  fmap[k] = i;
243  }
244 
245  nBhtab = 2 + (nblock / 32);
246  for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
247  for (i = 0; i < 256; i++) SET_BH(ftab[i]);
248 
249  /*--
250  Inductively refine the buckets. Kind-of an
251  "exponential radix sort" (!), inspired by the
252  Manber-Myers suffix array construction algorithm.
253  --*/
254 
255  /*-- set sentinel bits for block-end detection --*/
256  for (i = 0; i < 32; i++) {
257  SET_BH(nblock + 2*i);
258  CLEAR_BH(nblock + 2*i + 1);
259  }
260 
261  /*-- the log(N) loop --*/
262  H = 1;
263  while (1) {
264 
265  if (verb >= 4)
266  VPrintf1 ( " depth %6d has ", H );
267 
268  j = 0;
269  for (i = 0; i < nblock; i++) {
270  if (ISSET_BH(i)) j = i;
271  k = fmap[i] - H; if (k < 0) k += nblock;
272  eclass[k] = j;
273  }
274 
275  nNotDone = 0;
276  r = -1;
277  while (1) {
278 
279  /*-- find the next non-singleton bucket --*/
280  k = r + 1;
281  while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
282  if (ISSET_BH(k)) {
283  while (WORD_BH(k) == 0xffffffff) k += 32;
284  while (ISSET_BH(k)) k++;
285  }
286  l = k - 1;
287  if (l >= nblock) break;
288  while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
289  if (!ISSET_BH(k)) {
290  while (WORD_BH(k) == 0x00000000) k += 32;
291  while (!ISSET_BH(k)) k++;
292  }
293  r = k - 1;
294  if (r >= nblock) break;
295 
296  /*-- now [l, r] bracket current bucket --*/
297  if (r > l) {
298  nNotDone += (r - l + 1);
299  fallbackQSort3 ( fmap, eclass, l, r );
300 
301  /*-- scan bucket and generate header bits-- */
302  cc = -1;
303  for (i = l; i <= r; i++) {
304  cc1 = eclass[fmap[i]];
305  if (cc != cc1) { SET_BH(i); cc = cc1; };
306  }
307  }
308  }
309 
310  if (verb >= 4)
311  VPrintf1 ( "%6d unresolved strings\n", nNotDone );
312 
313  H *= 2;
314  if (H > nblock || nNotDone == 0) break;
315  }
316 
317  /*--
318  Reconstruct the original block in
319  eclass8 [0 .. nblock-1], since the
320  previous phase destroyed it.
321  --*/
322  if (verb >= 4)
323  VPrintf0 ( " reconstructing block ...\n" );
324  j = 0;
325  for (i = 0; i < nblock; i++) {
326  while (ftabCopy[j] == 0) j++;
327  ftabCopy[j]--;
328  eclass8[fmap[i]] = (UChar)j;
329  }
330  AssertH ( j < 256, 1005 );
331 }
332 
333 #undef SET_BH
334 #undef CLEAR_BH
335 #undef ISSET_BH
336 #undef WORD_BH
337 #undef UNALIGNED_BH
338 
339 
340 /*---------------------------------------------*/
341 /*--- The main, O(N^2 log(N)) sorting ---*/
342 /*--- algorithm. Faster for "normal" ---*/
343 /*--- non-repetitive blocks. ---*/
344 /*---------------------------------------------*/
345 
346 /*---------------------------------------------*/
347 static
350  UInt32 i2,
351  UChar* block,
352  UInt16* quadrant,
353  UInt32 nblock,
354  Int32* budget )
355 {
356  Int32 k;
357  UChar c1, c2;
358  UInt16 s1, s2;
359 
360  AssertD ( i1 != i2, "mainGtU" );
361  /* 1 */
362  c1 = block[i1]; c2 = block[i2];
363  if (c1 != c2) return (c1 > c2);
364  i1++; i2++;
365  /* 2 */
366  c1 = block[i1]; c2 = block[i2];
367  if (c1 != c2) return (c1 > c2);
368  i1++; i2++;
369  /* 3 */
370  c1 = block[i1]; c2 = block[i2];
371  if (c1 != c2) return (c1 > c2);
372  i1++; i2++;
373  /* 4 */
374  c1 = block[i1]; c2 = block[i2];
375  if (c1 != c2) return (c1 > c2);
376  i1++; i2++;
377  /* 5 */
378  c1 = block[i1]; c2 = block[i2];
379  if (c1 != c2) return (c1 > c2);
380  i1++; i2++;
381  /* 6 */
382  c1 = block[i1]; c2 = block[i2];
383  if (c1 != c2) return (c1 > c2);
384  i1++; i2++;
385  /* 7 */
386  c1 = block[i1]; c2 = block[i2];
387  if (c1 != c2) return (c1 > c2);
388  i1++; i2++;
389  /* 8 */
390  c1 = block[i1]; c2 = block[i2];
391  if (c1 != c2) return (c1 > c2);
392  i1++; i2++;
393  /* 9 */
394  c1 = block[i1]; c2 = block[i2];
395  if (c1 != c2) return (c1 > c2);
396  i1++; i2++;
397  /* 10 */
398  c1 = block[i1]; c2 = block[i2];
399  if (c1 != c2) return (c1 > c2);
400  i1++; i2++;
401  /* 11 */
402  c1 = block[i1]; c2 = block[i2];
403  if (c1 != c2) return (c1 > c2);
404  i1++; i2++;
405  /* 12 */
406  c1 = block[i1]; c2 = block[i2];
407  if (c1 != c2) return (c1 > c2);
408  i1++; i2++;
409 
410  k = nblock + 8;
411 
412  do {
413  /* 1 */
414  c1 = block[i1]; c2 = block[i2];
415  if (c1 != c2) return (c1 > c2);
416  s1 = quadrant[i1]; s2 = quadrant[i2];
417  if (s1 != s2) return (s1 > s2);
418  i1++; i2++;
419  /* 2 */
420  c1 = block[i1]; c2 = block[i2];
421  if (c1 != c2) return (c1 > c2);
422  s1 = quadrant[i1]; s2 = quadrant[i2];
423  if (s1 != s2) return (s1 > s2);
424  i1++; i2++;
425  /* 3 */
426  c1 = block[i1]; c2 = block[i2];
427  if (c1 != c2) return (c1 > c2);
428  s1 = quadrant[i1]; s2 = quadrant[i2];
429  if (s1 != s2) return (s1 > s2);
430  i1++; i2++;
431  /* 4 */
432  c1 = block[i1]; c2 = block[i2];
433  if (c1 != c2) return (c1 > c2);
434  s1 = quadrant[i1]; s2 = quadrant[i2];
435  if (s1 != s2) return (s1 > s2);
436  i1++; i2++;
437  /* 5 */
438  c1 = block[i1]; c2 = block[i2];
439  if (c1 != c2) return (c1 > c2);
440  s1 = quadrant[i1]; s2 = quadrant[i2];
441  if (s1 != s2) return (s1 > s2);
442  i1++; i2++;
443  /* 6 */
444  c1 = block[i1]; c2 = block[i2];
445  if (c1 != c2) return (c1 > c2);
446  s1 = quadrant[i1]; s2 = quadrant[i2];
447  if (s1 != s2) return (s1 > s2);
448  i1++; i2++;
449  /* 7 */
450  c1 = block[i1]; c2 = block[i2];
451  if (c1 != c2) return (c1 > c2);
452  s1 = quadrant[i1]; s2 = quadrant[i2];
453  if (s1 != s2) return (s1 > s2);
454  i1++; i2++;
455  /* 8 */
456  c1 = block[i1]; c2 = block[i2];
457  if (c1 != c2) return (c1 > c2);
458  s1 = quadrant[i1]; s2 = quadrant[i2];
459  if (s1 != s2) return (s1 > s2);
460  i1++; i2++;
461 
462  if (i1 >= nblock) i1 -= nblock;
463  if (i2 >= nblock) i2 -= nblock;
464 
465  k -= 8;
466  (*budget)--;
467  }
468  while (k >= 0);
469 
470  return False;
471 }
472 
473 
474 /*---------------------------------------------*/
475 /*--
476  Knuth's increments seem to work better
477  than Incerpi-Sedgewick here. Possibly
478  because the number of elems to sort is
479  usually small, typically <= 20.
480 --*/
481 static
482 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
483  9841, 29524, 88573, 265720,
484  797161, 2391484 };
485 
486 static
487 void mainSimpleSort ( UInt32* ptr,
488  UChar* block,
489  UInt16* quadrant,
490  Int32 nblock,
491  Int32 lo,
492  Int32 hi,
493  Int32 d,
494  Int32* budget )
495 {
496  Int32 i, j, h, bigN, hp;
497  UInt32 v;
498 
499  bigN = hi - lo + 1;
500  if (bigN < 2) return;
501 
502  hp = 0;
503  while (incs[hp] < bigN) hp++;
504  hp--;
505 
506  for (; hp >= 0; hp--) {
507  h = incs[hp];
508 
509  i = lo + h;
510  while (True) {
511 
512  /*-- copy 1 --*/
513  if (i > hi) break;
514  v = ptr[i];
515  j = i;
516  while ( mainGtU (
517  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
518  ) ) {
519  ptr[j] = ptr[j-h];
520  j = j - h;
521  if (j <= (lo + h - 1)) break;
522  }
523  ptr[j] = v;
524  i++;
525 
526  /*-- copy 2 --*/
527  if (i > hi) break;
528  v = ptr[i];
529  j = i;
530  while ( mainGtU (
531  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
532  ) ) {
533  ptr[j] = ptr[j-h];
534  j = j - h;
535  if (j <= (lo + h - 1)) break;
536  }
537  ptr[j] = v;
538  i++;
539 
540  /*-- copy 3 --*/
541  if (i > hi) break;
542  v = ptr[i];
543  j = i;
544  while ( mainGtU (
545  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
546  ) ) {
547  ptr[j] = ptr[j-h];
548  j = j - h;
549  if (j <= (lo + h - 1)) break;
550  }
551  ptr[j] = v;
552  i++;
553 
554  if (*budget < 0) return;
555  }
556  }
557 }
558 
559 
560 /*---------------------------------------------*/
561 /*--
562  The following is an implementation of
563  an elegant 3-way quicksort for strings,
564  described in a paper "Fast Algorithms for
565  Sorting and Searching Strings", by Robert
566  Sedgewick and Jon L. Bentley.
567 --*/
568 
569 #define mswap(zz1, zz2) \
570  { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
571 
572 #define mvswap(zzp1, zzp2, zzn) \
573 { \
574  Int32 yyp1 = (zzp1); \
575  Int32 yyp2 = (zzp2); \
576  Int32 yyn = (zzn); \
577  while (yyn > 0) { \
578  mswap(ptr[yyp1], ptr[yyp2]); \
579  yyp1++; yyp2++; yyn--; \
580  } \
581 }
582 
583 static
586 {
587  UChar t;
588  if (a > b) { t = a; a = b; b = t; };
589  if (b > c) {
590  b = c;
591  if (a > b) b = a;
592  }
593  return b;
594 }
595 
596 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
597 
598 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
599  stackHi[sp] = hz; \
600  stackD [sp] = dz; \
601  sp++; }
602 
603 #define mpop(lz,hz,dz) { sp--; \
604  lz = stackLo[sp]; \
605  hz = stackHi[sp]; \
606  dz = stackD [sp]; }
607 
608 
609 #define mnextsize(az) (nextHi[az]-nextLo[az])
610 
611 #define mnextswap(az,bz) \
612  { Int32 tz; \
613  tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
614  tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
615  tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
616 
617 
618 #define MAIN_QSORT_SMALL_THRESH 20
619 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
620 #define MAIN_QSORT_STACK_SIZE 100
621 
622 static
623 void mainQSort3 ( UInt32* ptr,
624  UChar* block,
625  UInt16* quadrant,
626  Int32 nblock,
627  Int32 loSt,
628  Int32 hiSt,
629  Int32 dSt,
630  Int32* budget )
631 {
632  Int32 unLo, unHi, ltLo, gtHi, n, m, med;
633  Int32 sp, lo, hi, d;
634 
635  Int32 stackLo[MAIN_QSORT_STACK_SIZE];
636  Int32 stackHi[MAIN_QSORT_STACK_SIZE];
637  Int32 stackD [MAIN_QSORT_STACK_SIZE];
638 
639  Int32 nextLo[3];
640  Int32 nextHi[3];
641  Int32 nextD [3];
642 
643  sp = 0;
644  mpush ( loSt, hiSt, dSt );
645 
646  while (sp > 0) {
647 
648  AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
649 
650  mpop ( lo, hi, d );
651  if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
653  mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
654  if (*budget < 0) return;
655  continue;
656  }
657 
658  med = (Int32)
659  mmed3 ( block[ptr[ lo ]+d],
660  block[ptr[ hi ]+d],
661  block[ptr[ (lo+hi)>>1 ]+d] );
662 
663  unLo = ltLo = lo;
664  unHi = gtHi = hi;
665 
666  while (True) {
667  while (True) {
668  if (unLo > unHi) break;
669  n = ((Int32)block[ptr[unLo]+d]) - med;
670  if (n == 0) {
671  mswap(ptr[unLo], ptr[ltLo]);
672  ltLo++; unLo++; continue;
673  };
674  if (n > 0) break;
675  unLo++;
676  }
677  while (True) {
678  if (unLo > unHi) break;
679  n = ((Int32)block[ptr[unHi]+d]) - med;
680  if (n == 0) {
681  mswap(ptr[unHi], ptr[gtHi]);
682  gtHi--; unHi--; continue;
683  };
684  if (n < 0) break;
685  unHi--;
686  }
687  if (unLo > unHi) break;
688  mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
689  }
690 
691  AssertD ( unHi == unLo-1, "mainQSort3(2)" );
692 
693  if (gtHi < ltLo) {
694  mpush(lo, hi, d+1 );
695  continue;
696  }
697 
698  n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
699  m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
700 
701  n = lo + unLo - ltLo - 1;
702  m = hi - (gtHi - unHi) + 1;
703 
704  nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
705  nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
706  nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
707 
708  if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709  if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
710  if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
711 
712  AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
713  AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
714 
715  mpush (nextLo[0], nextHi[0], nextD[0]);
716  mpush (nextLo[1], nextHi[1], nextD[1]);
717  mpush (nextLo[2], nextHi[2], nextD[2]);
718  }
719 }
720 
721 #undef mswap
722 #undef mvswap
723 #undef mpush
724 #undef mpop
725 #undef mmin
726 #undef mnextsize
727 #undef mnextswap
728 #undef MAIN_QSORT_SMALL_THRESH
729 #undef MAIN_QSORT_DEPTH_THRESH
730 #undef MAIN_QSORT_STACK_SIZE
731 
732 
733 /*---------------------------------------------*/
734 /* Pre:
735  nblock > N_OVERSHOOT
736  block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
737  ((UChar*)block32) [0 .. nblock-1] holds block
738  ptr exists for [0 .. nblock-1]
739 
740  Post:
741  ((UChar*)block32) [0 .. nblock-1] holds block
742  All other areas of block32 destroyed
743  ftab [0 .. 65536 ] destroyed
744  ptr [0 .. nblock-1] holds sorted order
745  if (*budget < 0), sorting was abandoned
746 */
747 
748 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
749 #define SETMASK (1 << 21)
750 #define CLEARMASK (~(SETMASK))
751 
752 static
753 void mainSort ( UInt32* ptr,
754  UChar* block,
755  UInt16* quadrant,
756  UInt32* ftab,
757  Int32 nblock,
758  Int32 verb,
759  Int32* budget )
760 {
761  Int32 i, j, k, ss, sb;
762  Int32 runningOrder[256];
763  Bool bigDone[256];
764  Int32 copyStart[256];
765  Int32 copyEnd [256];
766  UChar c1;
767  Int32 numQSorted;
768  UInt16 s;
769  if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
770 
771  /*-- set up the 2-byte frequency table --*/
772  for (i = 65536; i >= 0; i--) ftab[i] = 0;
773 
774  j = block[0] << 8;
775  i = nblock-1;
776  for (; i >= 3; i -= 4) {
777  quadrant[i] = 0;
778  j = (j >> 8) | ( ((UInt16)block[i]) << 8);
779  ftab[j]++;
780  quadrant[i-1] = 0;
781  j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
782  ftab[j]++;
783  quadrant[i-2] = 0;
784  j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
785  ftab[j]++;
786  quadrant[i-3] = 0;
787  j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
788  ftab[j]++;
789  }
790  for (; i >= 0; i--) {
791  quadrant[i] = 0;
792  j = (j >> 8) | ( ((UInt16)block[i]) << 8);
793  ftab[j]++;
794  }
795 
796  /*-- (emphasises close relationship of block & quadrant) --*/
797  for (i = 0; i < BZ_N_OVERSHOOT; i++) {
798  block [nblock+i] = block[i];
799  quadrant[nblock+i] = 0;
800  }
801 
802  if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
803 
804  /*-- Complete the initial radix sort --*/
805  for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
806 
807  s = block[0] << 8;
808  i = nblock-1;
809  for (; i >= 3; i -= 4) {
810  s = (s >> 8) | (block[i] << 8);
811  j = ftab[s] -1;
812  ftab[s] = j;
813  ptr[j] = i;
814  s = (s >> 8) | (block[i-1] << 8);
815  j = ftab[s] -1;
816  ftab[s] = j;
817  ptr[j] = i-1;
818  s = (s >> 8) | (block[i-2] << 8);
819  j = ftab[s] -1;
820  ftab[s] = j;
821  ptr[j] = i-2;
822  s = (s >> 8) | (block[i-3] << 8);
823  j = ftab[s] -1;
824  ftab[s] = j;
825  ptr[j] = i-3;
826  }
827  for (; i >= 0; i--) {
828  s = (s >> 8) | (block[i] << 8);
829  j = ftab[s] -1;
830  ftab[s] = j;
831  ptr[j] = i;
832  }
833 
834  /*--
835  Now ftab contains the first loc of every small bucket.
836  Calculate the running order, from smallest to largest
837  big bucket.
838  --*/
839  for (i = 0; i <= 255; i++) {
840  bigDone [i] = False;
841  runningOrder[i] = i;
842  }
843 
844  {
845  Int32 vv;
846  Int32 h = 1;
847  do h = 3 * h + 1; while (h <= 256);
848  do {
849  h = h / 3;
850  for (i = h; i <= 255; i++) {
851  vv = runningOrder[i];
852  j = i;
853  while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
854  runningOrder[j] = runningOrder[j-h];
855  j = j - h;
856  if (j <= (h - 1)) goto zero;
857  }
858  zero:
859  runningOrder[j] = vv;
860  }
861  } while (h != 1);
862  }
863 
864  /*--
865  The main sorting loop.
866  --*/
867 
868  numQSorted = 0;
869 
870  for (i = 0; i <= 255; i++) {
871 
872  /*--
873  Process big buckets, starting with the least full.
874  Basically this is a 3-step process in which we call
875  mainQSort3 to sort the small buckets [ss, j], but
876  also make a big effort to avoid the calls if we can.
877  --*/
878  ss = runningOrder[i];
879 
880  /*--
881  Step 1:
882  Complete the big bucket [ss] by quicksorting
883  any unsorted small buckets [ss, j], for j != ss.
884  Hopefully previous pointer-scanning phases have already
885  completed many of the small buckets [ss, j], so
886  we don't have to sort them at all.
887  --*/
888  for (j = 0; j <= 255; j++) {
889  if (j != ss) {
890  sb = (ss << 8) + j;
891  if ( ! (ftab[sb] & SETMASK) ) {
892  Int32 lo = ftab[sb] & CLEARMASK;
893  Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
894  if (hi > lo) {
895  if (verb >= 4)
896  VPrintf4 ( " qsort [0x%x, 0x%x] "
897  "done %d this %d\n",
898  ss, j, numQSorted, hi - lo + 1 );
899  mainQSort3 (
900  ptr, block, quadrant, nblock,
901  lo, hi, BZ_N_RADIX, budget
902  );
903  numQSorted += (hi - lo + 1);
904  if (*budget < 0) return;
905  }
906  }
907  ftab[sb] |= SETMASK;
908  }
909  }
910 
911  AssertH ( !bigDone[ss], 1006 );
912 
913  /*--
914  Step 2:
915  Now scan this big bucket [ss] so as to synthesise the
916  sorted order for small buckets [t, ss] for all t,
917  including, magically, the bucket [ss,ss] too.
918  This will avoid doing Real Work in subsequent Step 1's.
919  --*/
920  {
921  for (j = 0; j <= 255; j++) {
922  copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
923  copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
924  }
925  for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
926  k = ptr[j]-1; if (k < 0) k += nblock;
927  c1 = block[k];
928  if (!bigDone[c1])
929  ptr[ copyStart[c1]++ ] = k;
930  }
931  for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
932  k = ptr[j]-1; if (k < 0) k += nblock;
933  c1 = block[k];
934  if (!bigDone[c1])
935  ptr[ copyEnd[c1]-- ] = k;
936  }
937  }
938 
939  AssertH ( (copyStart[ss]-1 == copyEnd[ss])
940  ||
941  /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
942  Necessity for this case is demonstrated by compressing
943  a sequence of approximately 48.5 million of character
944  251; 1.0.0/1.0.1 will then die here. */
945  (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
946  1007 )
947 
948  for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
949 
950  /*--
951  Step 3:
952  The [ss] big bucket is now done. Record this fact,
953  and update the quadrant descriptors. Remember to
954  update quadrants in the overshoot area too, if
955  necessary. The "if (i < 255)" test merely skips
956  this updating for the last bucket processed, since
957  updating for the last bucket is pointless.
958 
959  The quadrant array provides a way to incrementally
960  cache sort orderings, as they appear, so as to
961  make subsequent comparisons in fullGtU() complete
962  faster. For repetitive blocks this makes a big
963  difference (but not big enough to be able to avoid
964  the fallback sorting mechanism, exponential radix sort).
965 
966  The precise meaning is: at all times:
967 
968  for 0 <= i < nblock and 0 <= j <= nblock
969 
970  if block[i] != block[j],
971 
972  then the relative values of quadrant[i] and
973  quadrant[j] are meaningless.
974 
975  else {
976  if quadrant[i] < quadrant[j]
977  then the string starting at i lexicographically
978  precedes the string starting at j
979 
980  else if quadrant[i] > quadrant[j]
981  then the string starting at j lexicographically
982  precedes the string starting at i
983 
984  else
985  the relative ordering of the strings starting
986  at i and j has not yet been determined.
987  }
988  --*/
989  bigDone[ss] = True;
990 
991  if (i < 255) {
992  Int32 bbStart = ftab[ss << 8] & CLEARMASK;
993  Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
994  Int32 shifts = 0;
995 
996  while ((bbSize >> shifts) > 65534) shifts++;
997 
998  for (j = bbSize-1; j >= 0; j--) {
999  Int32 a2update = ptr[bbStart + j];
1000  UInt16 qVal = (UInt16)(j >> shifts);
1001  quadrant[a2update] = qVal;
1002  if (a2update < BZ_N_OVERSHOOT)
1003  quadrant[a2update + nblock] = qVal;
1004  }
1005  AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1006  }
1007 
1008  }
1009 
1010  if (verb >= 4)
1011  VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1012  nblock, numQSorted, nblock - numQSorted );
1013 }
1014 
1015 #undef BIGFREQ
1016 #undef SETMASK
1017 #undef CLEARMASK
1018 
1019 
1020 /*---------------------------------------------*/
1021 /* Pre:
1022  nblock > 0
1023  arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1024  ((UChar*)arr2) [0 .. nblock-1] holds block
1025  arr1 exists for [0 .. nblock-1]
1026 
1027  Post:
1028  ((UChar*)arr2) [0 .. nblock-1] holds block
1029  All other areas of block destroyed
1030  ftab [ 0 .. 65536 ] destroyed
1031  arr1 [0 .. nblock-1] holds sorted order
1032 */
1034 {
1035  UInt32* ptr = s->ptr;
1036  UChar* block = s->block;
1037  UInt32* ftab = s->ftab;
1038  Int32 nblock = s->nblock;
1039  Int32 verb = s->verbosity;
1040  Int32 wfact = s->workFactor;
1041  UInt16* quadrant;
1042  Int32 budget;
1043  Int32 budgetInit;
1044  Int32 i;
1045 
1046  if (nblock < 10000) {
1047  fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1048  } else {
1049  /* Calculate the location for quadrant, remembering to get
1050  the alignment right. Assumes that &(block[0]) is at least
1051  2-byte aligned -- this should be ok since block is really
1052  the first section of arr2.
1053  */
1054  i = nblock+BZ_N_OVERSHOOT;
1055  if (i & 1) i++;
1056  quadrant = (UInt16*)(&(block[i]));
1057 
1058  /* (wfact-1) / 3 puts the default-factor-30
1059  transition point at very roughly the same place as
1060  with v0.1 and v0.9.0.
1061  Not that it particularly matters any more, since the
1062  resulting compressed stream is now the same regardless
1063  of whether or not we use the main sort or fallback sort.
1064  */
1065  if (wfact < 1 ) wfact = 1;
1066  if (wfact > 100) wfact = 100;
1067  budgetInit = nblock * ((wfact-1) / 3);
1068  budget = budgetInit;
1069 
1070  mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1071  if (verb >= 3)
1072  VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1073  budgetInit - budget,
1074  nblock,
1075  (float)(budgetInit - budget) /
1076  (float)(nblock==0 ? 1 : nblock) );
1077  if (budget < 0) {
1078  if (verb >= 2)
1079  VPrintf0 ( " too repetitive; using fallback"
1080  " sorting algorithm\n" );
1081  fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1082  }
1083  }
1084 
1085  s->origPtr = -1;
1086  for (i = 0; i < s->nblock; i++)
1087  if (ptr[i] == 0)
1088  { s->origPtr = i; break; };
1089 
1090  AssertH( s->origPtr != -1, 1003 );
1091 }
1092 
1093 
1094 /*-------------------------------------------------------------*/
1095 /*--- end blocksort.c ---*/
1096 /*-------------------------------------------------------------*/
1097 
#define fpush(lz, hz)
Definition: blocksort.c:82
#define BIGFREQ(b)
Definition: blocksort.c:748
#define ISSET_BH(zz)
Definition: blocksort.c:209
Int32 origPtr
#define UNALIGNED_BH(zz)
Definition: blocksort.c:211
Int32 workFactor
#define FALLBACK_QSORT_STACK_SIZE
Definition: blocksort.c:91
#define VPrintf1(zf, za1)
Definition: bzlib_private.h:77
#define fpop(lz, hz)
Definition: blocksort.c:86
#define fmin(a, b)
Definition: blocksort.c:80
static ABC_NAMESPACE_IMPL_START __inline__ void fallbackSimpleSort(UInt32 *fmap, UInt32 *eclass, Int32 lo, Int32 hi)
Definition: blocksort.c:34
UInt32 * arr2
#define SET_BH(zz)
Definition: blocksort.c:207
unsigned char Bool
Definition: bzlib_private.h:44
#define MAIN_QSORT_SMALL_THRESH
Definition: blocksort.c:618
#define mswap(zz1, zz2)
Definition: blocksort.c:569
#define BZ_N_OVERSHOOT
#define SETMASK
Definition: blocksort.c:749
#define BZ_N_RADIX
#define AssertH(cond, errcode)
Definition: bzlib_private.h:61
#define mpop(lz, hz, dz)
Definition: blocksort.c:603
Int32 verbosity
#define mnextswap(az, bz)
Definition: blocksort.c:611
UInt32 * arr1
Int32 nblock
static __inline__ UChar mmed3(UChar a, UChar b, UChar c)
Definition: blocksort.c:585
void BZ2_blockSort(EState *s)
Definition: blocksort.c:1033
#define fswap(zz1, zz2)
Definition: blocksort.c:65
#define mvswap(zzp1, zzp2, zzn)
Definition: blocksort.c:572
static void mainSimpleSort(UInt32 *ptr, UChar *block, UInt16 *quadrant, Int32 nblock, Int32 lo, Int32 hi, Int32 d, Int32 *budget)
Definition: blocksort.c:487
#define ABC_NAMESPACE_IMPL_END
Definition: abc_global.h:108
unsigned char UChar
Definition: bzlib_private.h:45
#define mmin(a, b)
Definition: blocksort.c:596
unsigned int UInt32
Definition: bzlib_private.h:47
static char s1[largest_string]
Definition: set.c:514
#define fvswap(zzp1, zzp2, zzn)
Definition: blocksort.c:68
#define FALLBACK_QSORT_SMALL_THRESH
Definition: blocksort.c:90
#define WORD_BH(zz)
Definition: blocksort.c:210
#define ABC_NAMESPACE_IMPL_START
Definition: abc_global.h:107
UInt32 * ftab
#define MAIN_QSORT_DEPTH_THRESH
Definition: blocksort.c:619
#define CLEAR_BH(zz)
Definition: blocksort.c:208
#define VPrintf0(zf)
Definition: bzlib_private.h:75
int Int32
Definition: bzlib_private.h:46
#define VPrintf3(zf, za1, za2, za3)
Definition: bzlib_private.h:81
static void fallbackSort(UInt32 *fmap, UInt32 *eclass, UInt32 *bhtab, Int32 nblock, Int32 verb)
Definition: blocksort.c:214
#define CLEARMASK
Definition: blocksort.c:750
#define mpush(lz, hz, dz)
Definition: blocksort.c:598
#define __inline__
Definition: bzlib_private.h:55
UInt32 * ptr
#define AssertD(cond, msg)
Definition: bzlib_private.h:72
static void fallbackQSort3(UInt32 *fmap, UInt32 *eclass, Int32 loSt, Int32 hiSt)
Definition: blocksort.c:95
#define True
Definition: bzlib_private.h:51
UChar * block
#define MAIN_QSORT_STACK_SIZE
Definition: blocksort.c:620
#define mnextsize(az)
Definition: blocksort.c:609
#define False
Definition: bzlib_private.h:52
unsigned short UInt16
Definition: bzlib_private.h:49
#define VPrintf4(zf, za1, za2, za3, za4)
Definition: bzlib_private.h:83
static void mainQSort3(UInt32 *ptr, UChar *block, UInt16 *quadrant, Int32 nblock, Int32 loSt, Int32 hiSt, Int32 dSt, Int32 *budget)
Definition: blocksort.c:623
static Int32 incs[14]
Definition: blocksort.c:482
static DdNode * zero
Definition: cuddApa.c:100
static __inline__ Bool mainGtU(UInt32 i1, UInt32 i2, UChar *block, UInt16 *quadrant, UInt32 nblock, Int32 *budget)
Definition: blocksort.c:349
static void mainSort(UInt32 *ptr, UChar *block, UInt16 *quadrant, UInt32 *ftab, Int32 nblock, Int32 verb, Int32 *budget)
Definition: blocksort.c:753