[PATCH] RAID-6 fixes

From: "H. Peter Anvin" <hpa@zytor.com> As expected, when it hit mainline I started getting real bug reports... the attached patch does the following: - Fixes a few x86-64 specific bugs; - Removes MMX and SSE-1 from x86-64 (if we have x86-64 we have SSE-2); - Slightly astracts the integer code to make it easier to add architecture-specific optimizations later (e.g. IA64 once gcc gets better IA64 intrinsics support); - Resurrects the user-space testbench, and makes it not output the known false positive of the D+Q case (D+Q is equivalent to a RAID-5 recovery, so I didn't implement it in the user-space testbench.)

[PATCH] RAID-6 fixes
From: "H. Peter Anvin" <hpa@zytor.com> As expected, when it hit mainline I started getting real bug reports... the attached patch does the following: - Fixes a few x86-64 specific bugs; - Removes MMX and SSE-1 from x86-64 (if we have x86-64 we have SSE-2); - Slightly astracts the integer code to make it easier to add architecture-specific optimizations later (e.g. IA64 once gcc gets better IA64 intrinsics support); - Resurrects the user-space testbench, and makes it not output the known false positive of the D+Q case (D+Q is equivalent to a RAID-5 recovery, so I didn't implement it in the user-space testbench.)
85227cef · Andrew Morton · Linus Torvalds · 8776d039 · 85227cef · 85227cef
Commit 85227cef authored Jan 21, 2004 by Andrew Morton Committed by Linus Torvalds Jan 21, 2004
9 changed files
--- a/drivers/md/raid6.h
+++ b/drivers/md/raid6.h
@@ -20,6 +20,7 @@

 #include <linux/module.h>
 #include <linux/stddef.h>
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -45,11 +46,15 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
 #else /* ! __KERNEL__ */
 /* Used for testing in user space */

-#include <stddef.h>
-#include <sys/types.h>
-#include <inttypes.h>
 #include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stddef.h>
 #include <sys/mman.h>
+#include <sys/types.h>
+
+/* Not standard, but glibc defines it */
+#define BITS_PER_LONG __WORDSIZE

 typedef uint8_t  u8;
 typedef uint16_t u16;
@@ -63,27 +68,13 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];

 #define __init
 #define __exit
+#define __attribute_const__ __attribute__((const))

 #define preempt_enable()
 #define preempt_disable()

 #endif /* __KERNEL__ */

-/* Change this from BITS_PER_LONG if there is something better... */
-#if BITS_PER_LONG == 64
-# define NBYTES(x) ((x) * 0x0101010101010101UL)
-# define NSIZE  8
-# define NSHIFT 3
-# define NSTRING "64"
-typedef u64 unative_t;
-#else
-# define NBYTES(x) ((x) * 0x01010101U)
-# define NSIZE  4
-# define NSHIFT 2
-# define NSTRING "32"
-typedef u32 unative_t;
-#endif
-
 /* Routine choices */
 struct raid6_calls {
 	void (*gen_syndrome)(int, size_t, void **);

--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -46,7 +46,7 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_intx16,
 	&raid6_intx32,
 #endif
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__)
 	&raid6_mmxx1,
 	&raid6_mmxx2,
 	&raid6_sse1x1,
@@ -55,6 +55,8 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_sse2x2,
 #endif
 #if defined(__x86_64__)
+	&raid6_sse2x1,
+	&raid6_sse2x2,
 	&raid6_sse2x4,
 #endif
 	NULL

--- a/drivers/md/raid6int.uc
+++ b/drivers/md/raid6int.uc
 /* -*- linux-c -*- ------------------------------------------------------- *
 *
- *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -20,12 +20,64 @@

 #include "raid6.h"

+/*
+ * This is the C data type to use
+ */
+
+/* Change this from BITS_PER_LONG if there is something better... */
+#if BITS_PER_LONG == 64
+# define NBYTES(x) ((x) * 0x0101010101010101UL)
+# define NSIZE  8
+# define NSHIFT 3
+# define NSTRING "64"
+typedef u64 unative_t;
+#else
+# define NBYTES(x) ((x) * 0x01010101U)
+# define NSIZE  4
+# define NSHIFT 2
+# define NSTRING "32"
+typedef u32 unative_t;
+#endif
+
+
+
 /*
 * IA-64 wants insane amounts of unrolling.  On other architectures that
 * is just a waste of space.
 */
+#if ($# <= 8) || defined(__ia64__)
+
+
+/*
+ * These sub-operations are separate inlines since they can sometimes be
+ * specially optimized using architecture-specific hacks.
+ */
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+	unative_t vv;
+
+	vv = (v << 1) & NBYTES(0xfe);
+	return vv;
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+	unative_t vv;
+
+	vv = v & NBYTES(0x80);
+	vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
+	return vv;
+}

-#if ($# <= 8) || defined(_ia64__)

 static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 {
@@ -44,9 +96,8 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 		for ( z = z0-1 ; z >= 0 ; z-- ) {
 			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
 			wp$$ ^= wd$$;
-			w2$$ = wq$$ & NBYTES(0x80);
-			w1$$ = (wq$$ << 1) & NBYTES(0xfe);
-			w2$$ = (w2$$ << 1) - (w2$$ >> 7);
+			w2$$ = MASK(wq$$);
+			w1$$ = SHLBYTE(wq$$);
 			w2$$ &= NBYTES(0x1d);
 			w1$$ ^= w2$$;
 			wq$$ = w1$$ ^ wd$$;

--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -16,7 +16,7 @@
 * MMX implementation of RAID-6 syndrome functions
 */

-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__)

 #include "raid6.h"
 #include "raid6x86.h"

--- a/drivers/md/raid6recov.c
+++ b/drivers/md/raid6recov.c
@@ -117,7 +117,7 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs
 		} else {
 			/* data+Q failure.  Reconstruct data from P,
 			   then rebuild syndrome. */
-			/* FIX */
+			/* NOT IMPLEMENTED - equivalent to RAID-5 */
 		}
 	} else {
 		if ( failb == disks-2 ) {

--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -21,7 +21,7 @@
 * worthwhile as a separate implementation.
 */

-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__)

 #include "raid6.h"
 #include "raid6x86.h"

--- a/drivers/md/raid6test/Makefile
+++ b/drivers/md/raid6test/Makefile
@@ -4,7 +4,8 @@
 #

 CC	 = gcc
-CFLAGS	= -I.. -O2 -g -march=i686
+OPTFLAGS = -O2			# Adjust as desired
+CFLAGS	 = -I.. -g $(OPTFLAGS)
 LD	 = ld
 PERL	 = perl

@@ -17,12 +18,10 @@ PERL	= perl
 %.uc: ../%.uc
 	cp -f $< $@

-%.pl: ../%.pl
-	cp -f $< $@
-
 all:	raid6.o raid6test

 raid6.o: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \
+	 raid6int32.o \
 	 raid6mmx.o raid6sse1.o raid6sse2.o \
 	 raid6recov.o raid6algos.o \
 	 raid6tables.o
@@ -31,20 +30,23 @@ raid6.o: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \
 raid6test: raid6.o test.c
 	$(CC) $(CFLAGS) -o raid6test $^

-raid6int1.c: raid6int.uc unroller.pl
-	$(PERL) ./unroller.pl 1 < raid6int.uc > $@
+raid6int1.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 1 < raid6int.uc > $@
+
+raid6int2.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 2 < raid6int.uc > $@

-raid6int2.c: raid6int.uc unroller.pl
-	$(PERL) ./unroller.pl 2 < raid6int.uc > $@
+raid6int4.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 4 < raid6int.uc > $@

-raid6int4.c: raid6int.uc unroller.pl
-	$(PERL) ./unroller.pl 4 < raid6int.uc > $@
+raid6int8.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 8 < raid6int.uc > $@

-raid6int8.c: raid6int.uc unroller.pl
-	$(PERL) ./unroller.pl 8 < raid6int.uc > $@
+raid6int16.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 16 < raid6int.uc > $@

-raid6int16.c: raid6int.uc unroller.pl
-	$(PERL) ./unroller.pl 16 < raid6int.uc > $@
+raid6int32.c: raid6int.uc ../unroll.pl
+	$(PERL) ../unroll.pl 32 < raid6int.uc > $@

 raid6tables.c: mktables
 	./mktables > raid6tables.c

--- a/drivers/md/raid6test/test.c
+++ b/drivers/md/raid6test/test.c
@@ -73,6 +73,10 @@ int main(int argc, char *argv[])
 					erra = memcmp(data[i], recovi, PAGE_SIZE);
 					errb = memcmp(data[j], recovj, PAGE_SIZE);

+					if ( i < NDISKS-2 && j == NDISKS-1 ) {
+						/* We don't implement the DQ failure scenario, since it's
+						   equivalent to a RAID-5 failure (XOR, then recompute Q) */
+					} else {
 						printf("algo=%-8s  faila=%3d(%c)  failb=%3d(%c)  %s\n",
 						       raid6_call.name,
 						       i, (i==NDISKS-2)?'P':'D',
@@ -81,6 +85,7 @@ int main(int argc, char *argv[])
 						       !erra ? "ERRB" :
 						       !errb ? "ERRA" :
 						       "ERRAB");
+					}

 					dataptrs[i] = data[i];
 					dataptrs[j] = data[j];

--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
 #ident "$Id: raid6x86.h,v 1.3 2002/12/12 22:41:27 hpa Exp $"
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -22,54 +22,75 @@

 #if defined(__i386__) || defined(__x86_64__)

+#ifdef __x86_64__
+
 typedef struct {
 	unsigned int fsave[27];
-	unsigned int cr0;
-} raid6_mmx_save_t;
+	unsigned long cr0;
+} raid6_mmx_save_t __attribute__((aligned(16)));

 /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since
   the code doesn't know about the additional x86-64 registers */
-/* The +3 is so we can make sure the area is aligned properly */
 typedef struct {
-	unsigned int sarea[8*4+3];
+	unsigned int sarea[8*4];
 	unsigned int cr0;
 } raid6_sse_save_t __attribute__((aligned(16)));

-#ifdef __x86_64__
-
 /* This is for x86-64-specific code which uses all 16 XMM registers */
 typedef struct {
-	unsigned int sarea[16*4+3];
-	unsigned int cr0;
+	unsigned int sarea[16*4];
+	unsigned long cr0;
 } raid6_sse16_save_t __attribute__((aligned(16)));

+/* On x86-64 the stack is 16-byte aligned */
+#define SAREA(x) (x->sarea)
+
+#else /* __i386__ */
+
+typedef struct {
+	unsigned int fsave[27];
+	unsigned long cr0;
+} raid6_mmx_save_t;
+
+/* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte
+   alignment.  The +3 is so we have the slack space to manually align
+   a properly-sized area correctly.  */
+typedef struct {
+	unsigned int sarea[8*4+3];
+	unsigned long cr0;
+} raid6_sse_save_t;
+
+#define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15))
+
 #endif

 #ifdef __KERNEL__ /* Real code */

- static inline u32 raid6_get_fpu(void)
+/* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */
+
+static inline unsigned long raid6_get_fpu(void)
 {
-	u32 cr0;
+	unsigned long cr0;

 	preempt_disable();
-	asm volatile("movl %%cr0,%0 ; clts" : "=r" (cr0));
+	asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0));
 	return cr0;
 }

-static inline void raid6_put_fpu(u32 cr0)
+static inline void raid6_put_fpu(unsigned long cr0)
 {
-	asm volatile("movl %0,%%cr0" : : "r" (cr0));
+	asm volatile("mov %0,%%cr0" : : "r" (cr0));
 	preempt_enable();
 }

 #else /* Dummy code for user space testing */

-static inline u32 raid6_get_fpu(void)
+static inline unsigned long raid6_get_fpu(void)
 {
 	return 0xf00ba6;
 }

-static inline void raid6_put_fpu(u32 cr0)
+static inline void raid6_put_fpu(unsigned long cr0)
 {
 	(void)cr0;
 }
@@ -90,13 +111,8 @@ static inline void raid6_after_mmx(raid6_mmx_save_t *s)

 static inline void raid6_before_sse(raid6_sse_save_t *s)
 {
-#ifdef __x86_64__
-	unsigned int *rsa = s->sarea;
-#else
-	/* On i386 the save area may not be aligned */
-	unsigned int *rsa =
-		(unsigned int *)((((unsigned long)&s->sarea)+15) & ~15);
-#endif
+	unsigned int *rsa = SAREA(s);
+
 	s->cr0 = raid6_get_fpu();

 	asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0]));
@@ -111,13 +127,8 @@ static inline void raid6_before_sse(raid6_sse_save_t *s)

 static inline void raid6_after_sse(raid6_sse_save_t *s)
 {
-#ifdef __x86_64__
-	unsigned int *rsa = s->sarea;
-#else
-	/* On i386 the save area may not be aligned */
-	unsigned int *rsa =
-		(unsigned int *)((((unsigned long)&s->sarea)+15) & ~15);
-#endif
+	unsigned int *rsa = SAREA(s);
+
 	asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0]));
 	asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4]));
 	asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8]));
@@ -132,13 +143,8 @@ static inline void raid6_after_sse(raid6_sse_save_t *s)

 static inline void raid6_before_sse2(raid6_sse_save_t *s)
 {
-#ifdef __x86_64__
-	unsigned int *rsa = &s->sarea;
-#else
-	/* On i386 the save area may not be aligned */
-	unsigned int *rsa =
-		(unsigned int *)((((unsigned long)&s->sarea)+15) & ~15);
-#endif
+	unsigned int *rsa = SAREA(s);
+
 	s->cr0 = raid6_get_fpu();

 	asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0]));
@@ -153,13 +159,8 @@ static inline void raid6_before_sse2(raid6_sse_save_t *s)

 static inline void raid6_after_sse2(raid6_sse_save_t *s)
 {
-#ifdef __x86_64__
-	unsigned int *rsa = s->sarea;
-#else
-	/* On i386 the save area may not be aligned */
-	unsigned int *rsa =
-		(unsigned int *)((((unsigned long)&s->sarea)+15) & ~15);
-#endif
+	unsigned int *rsa = SAREA(s);
+
 	asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
 	asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));
 	asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8]));
@@ -174,9 +175,9 @@ static inline void raid6_after_sse2(raid6_sse_save_t *s)

 #ifdef __x86_64__

-static inline raid6_before_sse16(raid6_sse16_save_t *s)
+static inline void raid6_before_sse16(raid6_sse16_save_t *s)
 {
-	unsigned int *rsa = s->sarea;
+	unsigned int *rsa = SAREA(s);

 	s->cr0 = raid6_get_fpu();

@@ -198,9 +199,9 @@ static inline raid6_before_sse16(raid6_sse16_save_t *s)
 	asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60]));
 }

-static inline raid6_after_sse16(raid6_sse16_save_t *s)
+static inline void raid6_after_sse16(raid6_sse16_save_t *s)
 {
-	unsigned int *rsa = s->sarea;
+	unsigned int *rsa = SAREA(s);

 	asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
 	asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));