Commit 339af14a authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:3315] improving quicksort for the case with lots of duplicates (just in case ;-)

git-svn-id: file:///svn/toku/tokudb@34897 c7de825b-a66e-492c-adef-691d508d4ae1
parent 0ea3e752
...@@ -125,53 +125,44 @@ quicksort_r(void *va, int n, int width, ...@@ -125,53 +125,44 @@ quicksort_r(void *va, int n, int width,
{ {
if (n <= 1) { return 0; } if (n <= 1) { return 0; }
unsigned char *a = va; unsigned char *a = va;
unsigned char *pivot = &a[(n - 1) * width]; unsigned char *lo = a;
unsigned char *mid = &a[(n / 2) * width]; unsigned char *pivot = &a[(n / 2) * width];
unsigned char *hi = &a[(n - 1) * width];
// The pivot is the last position in the array, but is the median of // The pivot is the last position in the array, but is the median of
// three elements (first, middle, last). // three elements (first, middle, last).
if (cmp(extra, a, pivot) > 0) { if (cmp(extra, lo, pivot) > 0) {
swap(a, pivot, width); swap(lo, pivot, width);
} }
if (cmp(extra, pivot, mid) > 0) { if (cmp(extra, pivot, hi) > 0) {
swap(pivot, mid, width); swap(pivot, hi, width);
if (cmp(extra, a, pivot) > 0) { if (cmp(extra, lo, pivot) > 0) {
swap(a, pivot, width); swap(lo, pivot, width);
} }
} }
unsigned char *lp = a, *rp = &a[(n - 2) * width]; unsigned char *lp = lo + width, *rp = hi - width;
while (lp < rp) { while (lp <= rp) {
// In the case where we have a lot of duplicate elements, this is
// kind of horrible (it's O(n^2)). It could be fixed by
// partitioning into less, equal, and greater, but since the only
// place we're using it right now has no duplicates (the MSNs are
// guaranteed unique), it's fine to do it this way, and probably
// better because it's simpler.
while (cmp(extra, lp, pivot) < 0) { while (cmp(extra, lp, pivot) < 0) {
lp += width; lp += width;
} }
while (cmp(extra, pivot, rp) <= 0) { while (cmp(extra, pivot, rp) < 0) {
rp -= width; rp -= width;
} }
if (lp < rp) { if (lp < rp) {
swap(lp, rp, width); swap(lp, rp, width);
// fix up pivot if we moved it
if (pivot == lp) { pivot = rp; }
else if (pivot == rp) { pivot = lp; }
lp += width;
rp -= width;
} else if (lp == rp) {
lp += width; lp += width;
rp -= width; rp -= width;
} }
} }
if (lp == rp && cmp(extra, lp, pivot) < 0) {
// A weird case where lp and rp are both pointing to the rightmost int r = quicksort_r(lo, 1 + (rp - a) / width, width, extra, cmp);
// element less than the pivot, we want lp to point to the first
// element greater than or equal to the pivot.
lp += width;
}
// Swap the pivot back into place.
swap(pivot, lp, width);
int r = quicksort_r(a, (lp - a) / width, width, extra, cmp);
if (r != 0) { return r; } if (r != 0) { return r; }
// The pivot is in this spot and we don't need to sort it, so move r = quicksort_r(lp, 1 + (hi - lp) / width, width, extra, cmp);
// over one space before calling quicksort_r again.
lp += width;
r = quicksort_r(lp, n - (lp - a) / width, width, extra, cmp);
return r; return r;
} }
......
...@@ -39,6 +39,18 @@ zero_array_test(void) ...@@ -39,6 +39,18 @@ zero_array_test(void)
mergesort_r(NULL, 0, sizeof(int), NULL, int_cmp); mergesort_r(NULL, 0, sizeof(int), NULL, int_cmp);
} }
static void
dup_array_test(int nelts)
{
int *MALLOC_N(nelts, a);
for (int i = 0; i < nelts; ++i) {
a[i] = 1;
}
mergesort_r(a, nelts, sizeof a[0], &MAGIC_EXTRA, int_cmp);
check_int_array(a, nelts);
toku_free(a);
}
static void static void
already_sorted_test(int nelts) already_sorted_test(int nelts)
{ {
...@@ -67,13 +79,17 @@ int ...@@ -67,13 +79,17 @@ int
test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__)))
{ {
zero_array_test(); zero_array_test();
already_sorted_test(10);
already_sorted_test(1000);
already_sorted_test(10001);
already_sorted_test(10000000);
random_array_test(10); random_array_test(10);
random_array_test(1000); random_array_test(1000);
random_array_test(10001); random_array_test(10001);
random_array_test(10000000); random_array_test(10000000);
dup_array_test(10);
dup_array_test(1000);
dup_array_test(10001);
dup_array_test(10000000);
already_sorted_test(10);
already_sorted_test(1000);
already_sorted_test(10001);
already_sorted_test(10000000);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment