Commit 63e6c5b8 authored by Anton Blanchard's avatar Anton Blanchard Committed by Benjamin Herrenschmidt

powerpc: Pair loads and stores in copy_4k_page

A number of our chips like loads and stores to be paired. A small kernel
module testcase shows the improvement of pairing loads and stores in
copy_4k_page:

POWER6: +9%
POWER7: +1.5%

#include <linux/module.h>
#include <linux/mm.h>

#define ITERATIONS 10000000

static int __init copypage_init(void)
{
	struct timespec before, after;
	unsigned long i;
	struct page *destpage, *srcpage;
	char *dest, *src;

	destpage = alloc_page(GFP_KERNEL);
	srcpage = alloc_page(GFP_KERNEL);

	dest = page_address(destpage);
	src = page_address(srcpage);

	getnstimeofday(&before);

	for (i = 0; i < ITERATIONS; i++)
		copy_4K_page(dest, src);

	getnstimeofday(&after);

	free_page((unsigned long)dest);
	free_page((unsigned long)src);

	printk(KERN_DEBUG "copy_4K_page loop took %lu ns\n",
		(after.tv_sec - before.tv_sec) * NSEC_PER_SEC +
		(after.tv_nsec - before.tv_nsec));

	return 0;
}

static void __exit copypage_exit(void)
{
}

module_init(copypage_init)
module_exit(copypage_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anton Blanchard");
Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 5a0e9b57
......@@ -43,62 +43,62 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
ld r7,16(r4)
ldu r8,24(r4)
1: std r5,8(r3)
ld r9,8(r4)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
ld r11,24(r4)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
ld r5,40(r4)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
ld r7,56(r4)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
ld r9,72(r4)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
ld r11,88(r4)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
ld r5,104(r4)
std r10,112(r3)
ld r5,104(r4)
ld r6,112(r4)
std r11,120(r3)
ld r7,120(r4)
stdu r12,128(r3)
ld r7,120(r4)
ldu r8,128(r4)
bdnz 1b
std r5,8(r3)
ld r9,8(r4)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
ld r11,24(r4)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
ld r5,40(r4)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
ld r7,56(r4)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
ld r9,72(r4)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
ld r11,88(r4)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
std r10,112(r3)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment