Commit 84ace7a5 authored by Tim Peters's avatar Tim Peters

_BTree_set(): Fixes for "Guido's bug", and all the other kinds of BTree

deletion endcases uncovered by the new degenerate-BTree tests.  The
degenerate testDeletes() and testEmptyFirstBucketReportedByGuido() are
enabled now.
parent cf820929
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
****************************************************************************/ ****************************************************************************/
#define BTREETEMPLATE_C "$Id: BTreeTemplate.c,v 1.54 2002/06/14 14:39:07 jeremy Exp $\n" #define BTREETEMPLATE_C "$Id: BTreeTemplate.c,v 1.55 2002/06/17 16:42:29 tim_one Exp $\n"
/* /*
** _BTree_get ** _BTree_get
...@@ -346,50 +346,73 @@ BTree_deleteNextBucket(BTree *self) ...@@ -346,50 +346,73 @@ BTree_deleteNextBucket(BTree *self)
If noval is non-zero, then don't set a value (the tree If noval is non-zero, then don't set a value (the tree
is a set). is a set).
Return 1 on successful change, 0 is no change, -1 on error. Return:
-1 error
0 successful, and number of entries didn't change
>0 successful, and number of entries did change
Internal
There are two distinct return values > 0:
1 Successful, number of entries changed, but firstbucket did not go away.
2 Successful, number of entires changed, firstbucket did go away.
This can only happen on a delete (value == NULL). The caller may
need to change its own firstbucket pointer, and in any case *someone*
needs to adjust the 'next' pointer of the bucket immediately preceding
the bucket that went away (it needs to point to the bucket immediately
following the bucket that went away).
*/ */
static int static int
_BTree_set(BTree *self, PyObject *keyarg, PyObject *value, _BTree_set(BTree *self, PyObject *keyarg, PyObject *value,
int unique, int noval) int unique, int noval)
{ {
int min, grew, copied=1, changed=0, bchanged=0; int changed = 0; /* did I mutate? */
int childlength; int bchanged = 0; /* do I have a direct bucket child that mutated? */
BTreeItem *d; int min; /* index of child I searched */
BTreeItem *d; /* self->data[min] */
int childlength; /* len(self->data[min].child) */
int status; /* our return value; and return value from callee */
KEY_TYPE key; KEY_TYPE key;
int copied = 1;
COPY_KEY_FROM_ARG(key, keyarg, copied); COPY_KEY_FROM_ARG(key, keyarg, copied);
UNLESS (copied) return -1; UNLESS (copied) return -1;
PER_USE_OR_RETURN(self, -1); PER_USE_OR_RETURN(self, -1);
if (!self->len) { if (! self->len) {
/* We're empty. Make room. */
if (value) { if (value) {
if (BTree_grow(self, 0, noval) < 0) if (BTree_grow(self, 0, noval) < 0)
goto err; goto Error;
} }
else { else {
/* Can't delete a key from an empty BTree. */
PyErr_SetObject(PyExc_KeyError, keyarg); PyErr_SetObject(PyExc_KeyError, keyarg);
goto err; goto Error;
} }
} }
BTREE_SEARCH(min, self, key, goto err); /* Find the right child to search, and hand the work off to it. */
BTREE_SEARCH(min, self, key, goto Error);
d = self->data + min; d = self->data + min;
if (SameType_Check(self, d->child)) if (SameType_Check(self, d->child))
grew = _BTree_set((BTree *)d->child, keyarg, value, unique, noval); status = _BTree_set(BTREE(d->child), keyarg, value, unique, noval);
else else
grew = _bucket_set((Bucket *)d->child, keyarg, value, unique, noval, status = _bucket_set(BUCKET(d->child), keyarg,
&bchanged); value, unique, noval, &bchanged);
if (grew < 0) if (status == 0) goto Done;
goto err; if (status < 0) goto Error;
if (grew == 0) assert(status == 1 || status == 2);
goto Done;
/* The child changed size. Get its new size. Note that since the tree
/* A bucket changed size. */ * rooted at the child changed size, so did the tree rooted at self:
bchanged = 1; * our status must be >= 1 too.
*/
UNLESS(PER_USE(d->child)) UNLESS(PER_USE(d->child)) goto Error;
goto err;
childlength = d->child->len; childlength = d->child->len;
PER_ALLOW_DEACTIVATION(d->child); PER_ALLOW_DEACTIVATION(d->child);
PER_ACCESSED(d->child); PER_ACCESSED(d->child);
...@@ -398,104 +421,124 @@ _BTree_set(BTree *self, PyObject *keyarg, PyObject *value, ...@@ -398,104 +421,124 @@ _BTree_set(BTree *self, PyObject *keyarg, PyObject *value,
/* A bucket got bigger -- if it's "too big", split it. */ /* A bucket got bigger -- if it's "too big", split it. */
int toobig; int toobig;
assert(status == 1); /* can be 2 only on deletes */
if (SameType_Check(self, d->child)) if (SameType_Check(self, d->child))
toobig = childlength > MAX_BTREE_SIZE(d->child); toobig = childlength > MAX_BTREE_SIZE(d->child);
else else
toobig = childlength > MAX_BUCKET_SIZE(d->child); toobig = childlength > MAX_BUCKET_SIZE(d->child);
if (toobig) { if (toobig) {
if (BTree_grow(self, min, noval) < 0) if (BTree_grow(self, min, noval) < 0) goto Error;
goto err; changed = 1; /* BTree_grow mutated self */
changed = 1;
} }
goto Done; goto Done; /* and status still == 1 */
} }
/* A bucket got smaller. */ /* A bucket got smaller. This is much harder, and despite that we
if (min && grew > 1) { * don't try to rebalance the tree.
/* Somebody below us deleted their first bucket and */ */
/* an intermediate tree couldn't handle it. */ if (status == 2) { /* this is the last reference to child status */
if (BTree_deleteNextBucket(BTREE(d[-1].child)) < 0) /* Two problems to solve: May have to adjust our own firstbucket,
goto err; * and the bucket that went away needs to get unlinked.
grew = 1; /* Reset flag, since we handled it */ */
if (min) {
/* This wasn't our firstbucket, so no need to adjust ours (note
* that it can't be the firstbucket of any node above us either).
* Tell "the tree to the left" to do the unlinking.
*/
if (BTree_deleteNextBucket(BTREE(d[-1].child)) < 0) goto Error;
status = 1; /* we solved the child's firstbucket problem */
}
else {
/* This was our firstbucket. Update to new firstbucket value. */
Bucket *nextbucket;
UNLESS(PER_USE(d->child)) goto Error;
nextbucket = BTREE(d->child)->firstbucket;
PER_ALLOW_DEACTIVATION(d->child);
PER_ACCESSED(d->child);
Py_XINCREF(nextbucket);
Py_DECREF(self->firstbucket);
self->firstbucket = nextbucket;
changed = 1;
/* The caller has to do the unlinking -- we can't. Also, since
* it was our firstbucket, it may also be theirs.
*/
assert(status == 2);
}
} }
if (childlength > 0)
goto Done;
/* The child became empty. */ /* If the child isn't empty, we're done! We did all that was possible for
* us to do with the firstbucket problems the child gave us, and since the
* child isn't empty don't create any new firstbucket problems of our own.
*/
if (childlength) goto Done;
/* The child became empty: we need to remove it from self->data.
* But first, if we're a bottom-level node, we've got more bucket-fiddling
* to set up.
*/
if (!SameType_Check(self, d->child)) { if (!SameType_Check(self, d->child)) {
/* We are about to delete a bucket. */ /* We're about to delete a bucket. */
if (min) { if (min) {
/* If it's not our first bucket, we can tell the /* It's not our first bucket, so we can tell the previous
previous bucket to adjust it's reference to it. */ * bucket to adjust its reference to it. It can't be anyone
if (Bucket_deleteNextBucket(BUCKET(d[-1].child)) < 0) * else's first bucket either, so the caller needn't do anything.
goto err; */
if (Bucket_deleteNextBucket(BUCKET(d[-1].child)) < 0) goto Error;
/* status should be 1, and already is: if it were 2, the
* block above would have set it to 1 in its min != 0 branch.
*/
assert(status == 1);
} }
else { else {
/* If it's the first bucket, we can't adjust the Bucket *nextbucket;
reference to it ourselves, so we'll just /* It's our first bucket. We can't unlink it directly. */
increment the grew flag to indicate to a /* 'changed' will be set true by the deletion code following. */
parent node that it's last bucket should UNLESS(PER_USE(d->child)) goto Error;
adjust its reference. If there is no parent, nextbucket = BUCKET(d->child)->next;
then there's nothing to do. */ PER_ALLOW_DEACTIVATION(d->child);
grew++; PER_ACCESSED(d->child);
}
Py_XINCREF(nextbucket);
Py_DECREF(self->firstbucket);
self->firstbucket = nextbucket;
status = 2; /* we're giving our caller a new firstbucket problem */
}
} }
self->len--;
/* Remove the child from self->data. */
Py_DECREF(d->child); Py_DECREF(d->child);
if (min) { if (min) {
DECREF_KEY(d->key); DECREF_KEY(d->key);
} }
--self->len;
if (min < self->len) if (min < self->len)
memmove(d, d+1, (self->len-min)*sizeof(BTreeItem)); memmove(d, d+1, (self->len - min) * sizeof(BTreeItem));
if (!min) {
if (self->len) {
/* We just deleted our first child, so we need to
adjust our first bucket. */
if (SameType_Check(self, self->data->child)) {
UNLESS (PER_USE(BTREE(self->data->child)))
goto err;
ASSIGNB(self->firstbucket,
BTREE(self->data->child)->firstbucket);
Py_XINCREF(self->firstbucket);
PER_ALLOW_DEACTIVATION(BTREE(self->data->child));
PER_ACCESSED(BTREE(self->data->child));
}
else {
ASSIGNB(self->firstbucket, BUCKET(self->data->child));
Py_INCREF(self->firstbucket);
}
/* We can toss our first key now */
DECREF_KEY(self->data->key);
}
else {
Py_XDECREF(self->firstbucket);
self->firstbucket = 0;
}
}
changed = 1; changed = 1;
Done: Done:
#ifdef PERSISTENT #ifdef PERSISTENT
if (changed if (changed
|| (bchanged /* The bucket changed */ || (bchanged /* our kid was a bucket & it mutated */
&& self->len == 1 /* We have only one */ && self->len == 1 /* and we have only one child */
&& ! SameType_Check(self, self->data->child) /* It's our child */ && self->data[0].child->oid == NULL /* and it's in our record */
&& BUCKET(self->data->child)->oid == NULL /* It's in our record*/ )
) ) {
) if (PER_CHANGED(self) < 0) goto Error;
if (PER_CHANGED(self) < 0) }
goto err;
#endif #endif
_return:
PER_ALLOW_DEACTIVATION(self); PER_ALLOW_DEACTIVATION(self);
PER_ACCESSED(self); PER_ACCESSED(self);
return grew; return status;
err: Error:
PER_ALLOW_DEACTIVATION(self); status = -1;
PER_ACCESSED(self); goto _return;
return -1;
} }
/* /*
......
...@@ -173,8 +173,11 @@ More or less random bits of helpful info. ...@@ -173,8 +173,11 @@ More or less random bits of helpful info.
more efficiently than they can store arbitrary Python objects. more efficiently than they can store arbitrary Python objects.
+ In a non-empty BTree, every bucket node contains at least one key, + In a non-empty BTree, every bucket node contains at least one key,
and every BTree node contains at least one child. An empty BTree and every BTree node contains at least one child and a non-NULL
consists solely of a BTree node with len==0 and firstbucket==NULL. firstbucket pointer. However, a BTree node may not contain any keys.
+ An empty BTree consists solely of a BTree node with len==0 and
firstbucket==NULL.
+ Although a BTree can become unbalanced under a mix of inserts and + Although a BTree can become unbalanced under a mix of inserts and
deletes (meaning both that there's nothing stronger that can be deletes (meaning both that there's nothing stronger that can be
......
...@@ -759,7 +759,7 @@ class TestIOBTrees(BTreeTests, TestCase): ...@@ -759,7 +759,7 @@ class TestIOBTrees(BTreeTests, TestCase):
def _noneraises(self): def _noneraises(self):
self.t[None] = 1 self.t[None] = 1
def XXXtestEmptyFirstBucketReportedByGuido(self): def testEmptyFirstBucketReportedByGuido(self):
b = self.t b = self.t
for i in xrange(29972): # reduce to 29971 and it works for i in xrange(29972): # reduce to 29971 and it works
b[i] = i b[i] = i
...@@ -994,20 +994,18 @@ class TestIITreeSets(NormalSetTests, TestCase): ...@@ -994,20 +994,18 @@ class TestIITreeSets(NormalSetTests, TestCase):
t, keys = self._build_degenerate_tree() t, keys = self._build_degenerate_tree()
self._checkRanges(t, keys) self._checkRanges(t, keys)
def XXXtestDeletes(self): def testDeletes(self):
# Delete keys in all possible orders, checking each tree along # Delete keys in all possible orders, checking each tree along
# the way. # the way.
# XXX This is hopeless for now: it dies with: # This is a tough test. Previous failure modes included:
# XXX 1. A variety of assertion failures in _checkRanges. # 1. A variety of assertion failures in _checkRanges.
# XXX 2. Assorted "Invalid firstbucket pointer" failures at # 2. Assorted "Invalid firstbucket pointer" failures at
# XXX seemingly random times, coming out of the BTree destructor. # seemingly random times, coming out of the BTree destructor.
# XXX 3. Under Python 2.3 CVS, some baffling # 3. Under Python 2.3 CVS, some baffling
# XXX RuntimeWarning: tp_compare didn't return -1 or -2 for exception # RuntimeWarning: tp_compare didn't return -1 or -2 for exception
# XXX warnings, possibly due to memory corruption after a BTree # warnings, possibly due to memory corruption after a BTree
# XXX goes insane. # goes insane.
# XXX These are probably related to "Guido's bug" (which test case
# SXX is also disabled for now).
t, keys = self._build_degenerate_tree() t, keys = self._build_degenerate_tree()
for oneperm in permutations(keys): for oneperm in permutations(keys):
...@@ -1016,6 +1014,9 @@ class TestIITreeSets(NormalSetTests, TestCase): ...@@ -1016,6 +1014,9 @@ class TestIITreeSets(NormalSetTests, TestCase):
t.remove(key) t.remove(key)
keys.remove(key) keys.remove(key)
self._checkRanges(t, keys) self._checkRanges(t, keys)
# We removed all the keys, so the tree should be empty now.
self.assertEqual(t.__getstate__(), None)
# A damaged tree may trigger an "invalid firstbucket pointer" # A damaged tree may trigger an "invalid firstbucket pointer"
# failure at the time its destructor is invoked. Try to force # failure at the time its destructor is invoked. Try to force
# that to happen now, so it doesn't look like a baffling failure # that to happen now, so it doesn't look like a baffling failure
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment