Commit 339af14a authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:3315] improving quicksort for the case with lots of duplicates (just in case ;-)

git-svn-id: file:///svn/toku/tokudb@34897 c7de825b-a66e-492c-adef-691d508d4ae1
parent 0ea3e752
......@@ -125,53 +125,44 @@ quicksort_r(void *va, int n, int width,
{
if (n <= 1) { return 0; }
unsigned char *a = va;
unsigned char *pivot = &a[(n - 1) * width];
unsigned char *mid = &a[(n / 2) * width];
unsigned char *lo = a;
unsigned char *pivot = &a[(n / 2) * width];
unsigned char *hi = &a[(n - 1) * width];
// The pivot is the last position in the array, but is the median of
// three elements (first, middle, last).
if (cmp(extra, a, pivot) > 0) {
swap(a, pivot, width);
if (cmp(extra, lo, pivot) > 0) {
swap(lo, pivot, width);
}
if (cmp(extra, pivot, mid) > 0) {
swap(pivot, mid, width);
if (cmp(extra, a, pivot) > 0) {
swap(a, pivot, width);
if (cmp(extra, pivot, hi) > 0) {
swap(pivot, hi, width);
if (cmp(extra, lo, pivot) > 0) {
swap(lo, pivot, width);
}
}
unsigned char *lp = a, *rp = &a[(n - 2) * width];
while (lp < rp) {
// In the case where we have a lot of duplicate elements, this is
// kind of horrible (it's O(n^2)). It could be fixed by
// partitioning into less, equal, and greater, but since the only
// place we're using it right now has no duplicates (the MSNs are
// guaranteed unique), it's fine to do it this way, and probably
// better because it's simpler.
unsigned char *lp = lo + width, *rp = hi - width;
while (lp <= rp) {
while (cmp(extra, lp, pivot) < 0) {
lp += width;
}
while (cmp(extra, pivot, rp) <= 0) {
while (cmp(extra, pivot, rp) < 0) {
rp -= width;
}
if (lp < rp) {
swap(lp, rp, width);
// fix up pivot if we moved it
if (pivot == lp) { pivot = rp; }
else if (pivot == rp) { pivot = lp; }
lp += width;
rp -= width;
} else if (lp == rp) {
lp += width;
rp -= width;
}
}
if (lp == rp && cmp(extra, lp, pivot) < 0) {
// A weird case where lp and rp are both pointing to the rightmost
// element less than the pivot, we want lp to point to the first
// element greater than or equal to the pivot.
lp += width;
}
// Swap the pivot back into place.
swap(pivot, lp, width);
int r = quicksort_r(a, (lp - a) / width, width, extra, cmp);
int r = quicksort_r(lo, 1 + (rp - a) / width, width, extra, cmp);
if (r != 0) { return r; }
// The pivot is in this spot and we don't need to sort it, so move
// over one space before calling quicksort_r again.
lp += width;
r = quicksort_r(lp, n - (lp - a) / width, width, extra, cmp);
r = quicksort_r(lp, 1 + (hi - lp) / width, width, extra, cmp);
return r;
}
......
......@@ -39,6 +39,18 @@ zero_array_test(void)
mergesort_r(NULL, 0, sizeof(int), NULL, int_cmp);
}
static void
dup_array_test(int nelts)
{
int *MALLOC_N(nelts, a);
for (int i = 0; i < nelts; ++i) {
a[i] = 1;
}
mergesort_r(a, nelts, sizeof a[0], &MAGIC_EXTRA, int_cmp);
check_int_array(a, nelts);
toku_free(a);
}
static void
already_sorted_test(int nelts)
{
......@@ -67,13 +79,17 @@ int
test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__)))
{
zero_array_test();
already_sorted_test(10);
already_sorted_test(1000);
already_sorted_test(10001);
already_sorted_test(10000000);
random_array_test(10);
random_array_test(1000);
random_array_test(10001);
random_array_test(10000000);
dup_array_test(10);
dup_array_test(1000);
dup_array_test(10001);
dup_array_test(10000000);
already_sorted_test(10);
already_sorted_test(1000);
already_sorted_test(10001);
already_sorted_test(10000000);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment