Many files:

Fix assertion failure on line 713 of row0upd.c if there is a column prefix index and the last characters in the prefix are spaces: do not assume that the length of alphabetically equal strings is the same; fix a buglet which could cause InnoDB to think that a secondary index record was not locked though it had been updated in a way which did not alpahabetically change its value, e.g., abc -> aBc

Many files:
Fix assertion failure on line 713 of row0upd.c if there is a column prefix index and the last characters in the prefix are spaces: do not assume that the length of alphabetically equal strings is the same; fix a buglet which could cause InnoDB to think that a secondary index record was not locked though it had been updated in a way which did not alpahabetically change its value, e.g., abc -> aBc
d3d9b727 · heikki@hundin.mysql.fi · 9abb4be2 · d3d9b727 · d3d9b727 · d3d9b727
Commit d3d9b727 authored Dec 12, 2003 by heikki@hundin.mysql.fi
9 changed files
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -1146,7 +1146,6 @@ btr_cur_pessimistic_insert(
 }

 /*==================== B-TREE UPDATE =========================*/
-/* Only clustered index records are modified using these functions */

 /*****************************************************************
 For an update, checks the locks and does the undo logging. */
@@ -1170,12 +1169,15 @@ btr_cur_upd_lock_and_undo(
 	
 	ut_ad(cursor && update && thr && roll_ptr);

-	/* Only clustered index records are updated using this function */
-	ut_ad((cursor->index)->type & DICT_CLUSTERED);
-
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 	
+	if (!(index->type & DICT_CLUSTERED)) {
+		/* We do undo logging only when we update a clustered index
+		record */
+		return(lock_sec_rec_modify_check_and_lock(0, rec, index, thr));
+	}
+
 	/* Check if we have to wait for a lock: enqueue an explicit lock
 	request if yes */

@@ -1222,6 +1224,13 @@ btr_cur_update_in_place_log(
 	mach_write_to_1(log_ptr, flags);
 	log_ptr++;

+	/* The code below assumes index is a clustered index: change index to
+	the clustered index if we are updating a secondary index record (or we
+	could as well skip writing the sys col values to the log in this case
+	because they are not needed for a secondary index record update) */
+
+	index = dict_table_get_first_index(index->table);
+
 	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
 									mtr);
 	mach_write_to_2(log_ptr, rec - buf_frame_align(rec));
@@ -1308,68 +1317,7 @@ btr_cur_parse_update_in_place(
 }

 /*****************************************************************
-Updates a secondary index record when the update causes no size
-changes in its fields. The only case when this function is currently
-called is that in a char field characters change to others which
-are identified in the collation order. */
-
-ulint
-btr_cur_update_sec_rec_in_place(
-/*============================*/
-				/* out: DB_SUCCESS or error number */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr)	/* in: mtr */
-{
-	dict_index_t*	index 		= cursor->index;
-	dict_index_t*	clust_index;
-	ulint		err;
-	rec_t*		rec;
-	dulint		roll_ptr	= ut_dulint_zero;
-	trx_t*		trx		= thr_get_trx(thr);
-
-	/* Only secondary index records are updated using this function */
-	ut_ad(0 == (index->type & DICT_CLUSTERED));
-
-	rec = btr_cur_get_rec(cursor);
-	
-	if (btr_cur_print_record_ops && thr) {
-		printf(
-	"Trx with id %lu %lu going to update table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
-		index->table_name, index->name);
-		rec_print(rec);
-	}
-
-	err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr);
-
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	/* Remove possible hash index pointer to this record */
-	btr_search_update_hash_on_delete(cursor);
-
-	row_upd_rec_in_place(rec, update);
-
-	clust_index = dict_table_get_first_index(index->table);
-
-	/* Note that roll_ptr is really just a dummy value since
-	a secondary index record does not contain any sys columns */
-
-	btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, clust_index,
-						update, trx, roll_ptr, mtr);
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change. */
+Updates a record when the update causes no size changes in its fields. */

 ulint
 btr_cur_update_in_place(
@@ -1389,13 +1337,10 @@ btr_cur_update_in_place(
 	buf_block_t*	block;
 	ulint		err;
 	rec_t*		rec;
-	dulint		roll_ptr;
+	dulint		roll_ptr	= ut_dulint_zero;
 	trx_t*		trx;
 	ibool		was_delete_marked;

-	/* Only clustered index records are updated using this function */
-	ut_ad(cursor->index->type & DICT_CLUSTERED);
-
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 	trx = thr_get_trx(thr);
@@ -1420,7 +1365,12 @@ btr_cur_update_in_place(
 	block = buf_block_align(rec);

 	if (block->is_hashed) {
-	        if (row_upd_changes_ord_field_binary(NULL, index, update)) {
+		/* The function row_upd_changes_ord_field_binary works only
+		if the update vector was built for a clustered index, we must
+		NOT call it if index is secondary */
+
+	        if (!(index->type & DICT_CLUSTERED)
+		    || row_upd_changes_ord_field_binary(NULL, index, update)) {

 		        /* Remove possible hash index pointer to this record */
 	                btr_search_update_hash_on_delete(cursor);
@@ -1495,9 +1445,6 @@ btr_cur_optimistic_update(
 	ibool		reorganized	= FALSE;
 	ulint		i;
 	
-	/* Only clustered index records are updated using this function */
-	ut_ad((cursor->index)->type & DICT_CLUSTERED);
-	
 	page = btr_cur_get_page(cursor);
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
@@ -1546,8 +1493,8 @@ btr_cur_optimistic_update(
 	
 	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);

-	row_upd_index_replace_new_col_vals(new_entry, index, update, NULL);
-
+	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
+									NULL);
 	old_rec_size = rec_get_size(rec);
 	new_rec_size = rec_get_converted_size(new_entry);
 	
@@ -1729,7 +1676,6 @@ btr_cur_pessimistic_update(
 	index = cursor->index;
 	tree = index->tree;

-	ut_ad(index->type & DICT_CLUSTERED);
 	ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
 							MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
@@ -1779,8 +1725,8 @@ btr_cur_pessimistic_update(
 	
 	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);

-	row_upd_index_replace_new_col_vals(new_entry, index, update, heap);
-
+	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
+									heap);
 	if (!(flags & BTR_KEEP_SYS_FLAG)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 								roll_ptr);

--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -188,22 +188,6 @@ btr_cur_pessimistic_insert(
 	que_thr_t*	thr,	/* in: query thread or NULL */
 	mtr_t*		mtr);	/* in: mtr */
 /*****************************************************************
-Updates a secondary index record when the update causes no size
-changes in its fields. The only case when this function is currently
-called is that in a char field characters change to others which
-are identified in the collation order. */
-
-ulint
-btr_cur_update_sec_rec_in_place(
-/*============================*/
-				/* out: DB_SUCCESS or error number */
-	btr_cur_t*	cursor,	/* in: cursor on the record to update;
-				cursor stays valid and positioned on the
-				same record */
-	upd_t*		update,	/* in: update vector */
-	que_thr_t*	thr,	/* in: query thread */
-	mtr_t*		mtr);	/* in: mtr */
-/*****************************************************************
 Updates a record when the update causes no size changes in its fields. */

 ulint

--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -171,13 +171,31 @@ row_upd_build_difference_binary(
 Replaces the new column values stored in the update vector to the index entry
 given. */

+void
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	dict_index_t*	index,	/* in: index; NOTE that this may also be a
+				non-clustered index */
+	upd_t*		update,	/* in: an update vector built for the index so
+				that the field number in an upd_field is the
+				index position */
+	mem_heap_t*	heap);	/* in: memory heap to which we allocate and
+				copy the new values, set this as NULL if you
+				do not want allocation */
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
 	dtuple_t*	entry,	/* in/out: index entry where replaced */
-	dict_index_t*	index,	/* in: index; NOTE that may also be a
+	dict_index_t*	index,	/* in: index; NOTE that this may also be a
 				non-clustered index */
-	upd_t*		update,	/* in: update vector */
+	upd_t*		update,	/* in: an update vector built for the
+				CLUSTERED index so that the field number in
+				an upd_field is the clustered index position */
 	mem_heap_t*	heap);	/* in: memory heap to which we allocate and
 				copy the new values, set this as NULL if you
 				do not want allocation */
@@ -199,7 +217,9 @@ row_upd_changes_ord_field_binary(
 				known when this function is called, e.g., at
 				compile time */
 	dict_index_t*	index,	/* in: index of the record */
-	upd_t*		update);/* in: update vector for the row */
+	upd_t*		update);/* in: update vector for the row; NOTE: the
+				field numbers in this MUST be clustered index
+				positions! */
 /***************************************************************
 Checks if an update vector changes an ordering field of an index record.
 This function is fast if the update vector is short or the number of ordering
@@ -271,7 +291,10 @@ row_upd_index_parse(

 /* Update vector field */
 struct upd_field_struct{
-	ulint		field_no;	/* field number in the clustered
+	ulint		field_no;	/* field number in an index, usually
+					the clustered index, but in upadating
+					a secondary index record in btr0cur.c
+					this is the position in the secondary
 					index */
 	que_node_t*	exp;		/* expression for calculating a new
 					value: it refers to column values and

--- a/innobase/include/sync0sync.ic
+++ b/innobase/include/sync0sync.ic
@@ -257,6 +257,8 @@ mutex_enter_func(
 		mutex->file_name = file_name;
 		mutex->line = line;

+		mutex->thread_id = os_thread_get_curr_id();
+
 		return;	/* Succeeded! */
 	}


--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -339,6 +339,11 @@ lock_deadlock_recursive(
 	ulint*	cost);		/* in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
 				we return TRUE */
+
+#define lock_mutex_enter_kernel()	mutex_enter(&kernel_mutex)
+#define lock_mutex_exit_kernel()	mutex_exit(&kernel_mutex)
+
+#ifdef notdefined
 /*************************************************************************
 Reserves the kernel mutex. This function is used in this module to allow
 monitoring the contention degree on the kernel mutex caused by the lock
@@ -362,6 +367,7 @@ lock_mutex_exit_kernel(void)
 {
 	mutex_exit(&kernel_mutex);
 }
+#endif

 #ifdef notdefined


--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -225,11 +225,15 @@ ulint
 row_ins_sec_index_entry_by_modify(
 /*==============================*/
 				/* out: DB_SUCCESS or error code */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether mtr holds just a leaf
+				latch or also a tree latch */
 	btr_cur_t*	cursor,	/* in: B-tree cursor */
 	dtuple_t*	entry,	/* in: index entry to insert */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr)	/* in: mtr */
 {
+	big_rec_t*	dummy_big_rec;
 	mem_heap_t*	heap;
 	upd_t*		update;
 	rec_t*		rec;
@@ -241,16 +245,28 @@ row_ins_sec_index_entry_by_modify(
 	ut_ad(rec_get_deleted_flag(rec));
 	
 	/* We know that in the alphabetical ordering, entry and rec are
-	identical. But in their binary form there may be differences if
+	identified. But in their binary form there may be differences if
 	there are char fields in them. Therefore we have to calculate the
-	difference and do an update-in-place if necessary. */
+	difference. */
 	
 	heap = mem_heap_create(1024);
 	
 	update = row_upd_build_sec_rec_difference_binary(cursor->index,
 							entry, rec, heap); 
+	if (mode == BTR_MODIFY_LEAF) {
+		/* Try an optimistic updating of the record, keeping changes
+		within the page */

-	err = btr_cur_update_sec_rec_in_place(cursor, update, thr, mtr);
+		err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
+						update, 0, thr, mtr);
+		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+			err = DB_FAIL;
+		}
+	} else  {
+		ut_a(mode == BTR_MODIFY_TREE);
+		err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
+					&dummy_big_rec, update, 0, thr, mtr);
+	}

 	mem_heap_free(heap);

@@ -1829,7 +1845,8 @@ row_ins_index_entry_low(
 							ext_vec, n_ext_vec,
 							thr, &mtr);
 		} else {
-			err = row_ins_sec_index_entry_by_modify(&cursor, entry,
+			err = row_ins_sec_index_entry_by_modify(mode, &cursor,
+								entry,
 								thr, &mtr);
 		}
 		

--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -51,7 +51,8 @@ to que_run_threads: this is to allow canceling runaway queries */

 /************************************************************************
 Returns TRUE if the user-defined column values in a secondary index record
-are the same as the corresponding columns in the clustered index record.
+are alphabetically the same as the corresponding columns in the clustered
+index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
 fields are compared with collation! */
 static
@@ -96,11 +97,6 @@ row_sel_sec_rec_is_for_clust_rec(
 		       clust_len = ifield->prefix_len;
 		}

-                if (sec_len != clust_len) {
-
-                        return(FALSE);
-                }
-
                if (0 != cmp_data_data(dict_col_get_type(col),
                                        clust_field, clust_len,
                                        sec_field, sec_len)) {

--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -698,7 +698,7 @@ row_upd_build_sec_rec_difference_binary(
 	ulint		i;

 	/* This function is used only for a secondary index */
-	ut_ad(0 == (index->type & DICT_CLUSTERED));
+	ut_a(0 == (index->type & DICT_CLUSTERED));

 	update = upd_create(dtuple_get_n_fields(entry), heap);

@@ -710,7 +710,13 @@ row_upd_build_sec_rec_difference_binary(

 		dfield = dtuple_get_nth_field(entry, i);

-		ut_a(len == dfield_get_len(dfield));
+		/* NOTE that it may be that len != dfield_get_len(dfield) if we
+		are updating in a character set and collation where strings of
+		different length can be equal in an alphabetical comparison,
+		and also in the case where we have a column prefix index
+		and the last characters in the index field are spaces; the
+		latter case probably caused the assertion failures reported at
+		row0upd.c line 713 in versions 4.0.14 - 4.0.16. */

 		/* NOTE: we compare the fields as binary strings!
 		(No collation) */
@@ -819,13 +825,77 @@ row_upd_build_difference_binary(
 Replaces the new column values stored in the update vector to the index entry
 given. */

+void
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	dict_index_t*	index,	/* in: index; NOTE that this may also be a
+				non-clustered index */
+	upd_t*		update,	/* in: an update vector built for the index so
+				that the field number in an upd_field is the
+				index position */
+	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
+				copy the new values, set this as NULL if you
+				do not want allocation */
+{
+	dict_field_t*	field;
+	upd_field_t*	upd_field;
+	dfield_t*	dfield;
+	dfield_t*	new_val;
+	ulint		j;
+	ulint		i;
+
+	ut_ad(index);
+
+	dtuple_set_info_bits(entry, update->info_bits);
+
+	for (j = 0; j < dict_index_get_n_fields(index); j++) {
+
+	        field = dict_index_get_nth_field(index, j);
+
+		for (i = 0; i < upd_get_n_fields(update); i++) {
+
+		        upd_field = upd_get_nth_field(update, i);
+
+			if (upd_field->field_no == j) {
+
+			        dfield = dtuple_get_nth_field(entry, j);
+
+				new_val = &(upd_field->new_val);
+
+				dfield_set_data(dfield, new_val->data,
+								new_val->len);
+				if (heap && new_val->len != UNIV_SQL_NULL) {
+				        dfield->data = mem_heap_alloc(heap,
+								new_val->len);
+					ut_memcpy(dfield->data, new_val->data,
+								new_val->len);
+				}
+
+				if (field->prefix_len > 0
+			            && new_val->len != UNIV_SQL_NULL
+			            && new_val->len > field->prefix_len) {
+
+				        dfield->len = field->prefix_len;
+				}
+			}
+		}
+	}
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
 	dtuple_t*	entry,	/* in/out: index entry where replaced */
 	dict_index_t*	index,	/* in: index; NOTE that this may also be a
 				non-clustered index */
-	upd_t*		update,	/* in: update vector */
+	upd_t*		update,	/* in: an update vector built for the
+				CLUSTERED index so that the field number in
+				an upd_field is the clustered index position */
 	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
 				copy the new values, set this as NULL if you
 				do not want allocation */
@@ -893,7 +963,9 @@ row_upd_changes_ord_field_binary(
 				known when this function is called, e.g., at
 				compile time */
 	dict_index_t*	index,	/* in: index of the record */
-	upd_t*		update)	/* in: update vector for the row */
+	upd_t*		update)	/* in: update vector for the row; NOTE: the
+				field numbers in this MUST be clustered index
+				positions! */
 {
 	upd_field_t*	upd_field;
 	dict_field_t*	ind_field;

--- a/innobase/row/row0vers.c
+++ b/innobase/row/row0vers.c
@@ -117,9 +117,10 @@ row_vers_impl_x_locked_off_kernel(
 		return(NULL);
 	}

-	/* We look up if some earlier version of the clustered index record
-	would require rec to be in a different state (delete marked or
-	unmarked, or not existing). If there is such a version, then rec was
+	/* We look up if some earlier version, which was modified by the trx_id
+	transaction, of the clustered index record would require rec to be in
+	a different state (delete marked or unmarked, or have different field
+	values, or not existing). If there is such a version, then rec was
 	modified by the trx_id transaction, and it has an implicit x-lock on
 	rec. Note that if clust_rec itself would require rec to be in a
 	different state, then the trx_id transaction has not yet had time to
@@ -188,6 +189,8 @@ row_vers_impl_x_locked_off_kernel(

 		vers_del = rec_get_deleted_flag(prev_version);

+		/* We check if entry and rec are identified in the alphabetical
+		ordering */
 		if (0 == cmp_dtuple_rec(entry, rec)) {
 			/* The delete marks of rec and prev_version should be
 			equal for rec to be in the state required by
@@ -198,6 +201,20 @@ row_vers_impl_x_locked_off_kernel(

 				break;
 			}
+
+			/* It is possible that the row was updated so that the
+			secondary index record remained the same in
+			alphabetical ordering, but the field values changed
+			still. For example, 'abc' -> 'ABC'. Check also that. */
+
+			dtuple_set_types_binary(entry,
+						dtuple_get_n_fields(entry));
+			if (0 != cmp_dtuple_rec(entry, rec)) {
+
+				trx = trx_get_on_id(trx_id);
+
+				break;
+			}
 		} else if (!rec_del) {
 			/* The delete mark should be set in rec for it to be
 			in the state required by prev_version */
@@ -256,8 +273,8 @@ row_vers_must_preserve_del_marked(
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE. */
+id >= purge view, and the secondary index entry and ientry are identified in
+the alphabetical ordering; exactly in this case we return TRUE. */

 ibool
 row_vers_old_has_index_entry(