Commit 33ccf4bd authored by Martin Schwidefsky's avatar Martin Schwidefsky Committed by Linus Torvalds

[PATCH] s390 update (14/27): inline optimizations.

Inline csum_partial for s390, the only reason it was out-of-line previously
is that some older compilers could not get the inline version right.
parent b2a749f0
...@@ -6,8 +6,7 @@ L_TARGET = lib.a ...@@ -6,8 +6,7 @@ L_TARGET = lib.a
EXTRA_AFLAGS := -traditional EXTRA_AFLAGS := -traditional
obj-y = checksum.o delay.o memset.o misaligned.o strcmp.o strncpy.o uaccess.o obj-y = delay.o memset.o strcmp.o strncpy.o uaccess.o
export-objs += misaligned.o
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
/*
* arch/s390/lib/checksum.c
* S390 fast network checksum routines
*
* S390 version
* Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Ulrich Hild (first version),
* Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* This file contains network checksum routines
*/
#include <linux/string.h>
#include <linux/types.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <asm/checksum.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
unsigned int
csum_partial (const unsigned char *buff, int len, unsigned int sum)
{
register_pair rp;
/*
* Experiments with ethernet and slip connections show that buff
* is aligned on either a 2-byte or 4-byte boundary.
*/
rp.subreg.even = (unsigned long) buff;
rp.subreg.odd = (unsigned long) len;
__asm__ __volatile__ (
"0: cksm %0,%1\n" /* do checksum on longs */
" jo 0b\n"
: "+&d" (sum), "+&a" (rp) : : "cc" );
return sum;
}
/*
* Fold a partial checksum without adding pseudo headers
*/
unsigned short csum_fold(unsigned int sum)
{
register_pair rp;
__asm__ __volatile__ (
" slr %N1,%N1\n" /* %0 = H L */
" lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */
" srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */
" alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */
" alr %0,%1\n" /* %0 = H+L+C L+H */
" srl %0,16\n" /* %0 = H+L+C */
: "+&d" (sum), "=d" (rp) : : "cc" );
return ((unsigned short) ~sum);
}
/*
* arch/s390/lib/misaligned.c
* S390 misalignment panic stubs
*
* S390 version
* Copyright (C) 2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com).
*
* xchg wants to panic if the pointer is not aligned. To avoid multiplying
* the panic message over and over again, the panic is done in the helper
* functions __misaligned_u32 and __misaligned_u16.
*/
#include <linux/module.h>
#include <linux/kernel.h>
void __misaligned_u16(void)
{
panic("misaligned (__u16 *) in __xchg\n");
}
void __misaligned_u32(void)
{
panic("misaligned (__u32 *) in __xchg\n");
}
EXPORT_SYMBOL(__misaligned_u16);
EXPORT_SYMBOL(__misaligned_u32);
...@@ -6,8 +6,7 @@ L_TARGET = lib.a ...@@ -6,8 +6,7 @@ L_TARGET = lib.a
EXTRA_AFLAGS := -traditional EXTRA_AFLAGS := -traditional
obj-y = checksum.o delay.o memset.o misaligned.o strcmp.o strncpy.o uaccess.o obj-y = delay.o memset.o strcmp.o strncpy.o uaccess.o
export-objs += misaligned.o
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
/*
* arch/s390/lib/checksum.c
* S390 fast network checksum routines
*
* S390 version
* Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Ulrich Hild (first version),
* Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* This file contains network checksum routines
*/
#include <linux/string.h>
#include <linux/types.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <asm/checksum.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
unsigned int
csum_partial (const unsigned char *buff, int len, unsigned int sum)
{
/*
* Experiments with ethernet and slip connections show that buff
* is aligned on either a 2-byte or 4-byte boundary.
*/
__asm__ __volatile__ (
" lgr 2,%1\n" /* address in gpr 2 */
" lgfr 3,%2\n" /* length in gpr 3 */
"0: cksm %0,2\n" /* do checksum on longs */
" jo 0b\n"
: "+&d" (sum)
: "d" (buff), "d" (len)
: "cc", "2", "3" );
return sum;
}
/*
* arch/s390/lib/misaligned.c
* S390 misalignment panic stubs
*
* S390 version
* Copyright (C) 2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com).
*
* xchg wants to panic if the pointer is not aligned. To avoid multiplying
* the panic message over and over again, the panic is done in the helper
* functions __misaligned_u64, __misaligned_u32 and __misaligned_u16.
*/
#include <linux/module.h>
#include <linux/kernel.h>
void __misaligned_u16(void)
{
panic("misaligned (__u16 *) in __xchg\n");
}
void __misaligned_u32(void)
{
panic("misaligned (__u32 *) in __xchg\n");
}
void __misaligned_u64(void)
{
panic("misaligned (__u64 *) in __xchg\n");
}
EXPORT_SYMBOL(__misaligned_u16);
EXPORT_SYMBOL(__misaligned_u32);
EXPORT_SYMBOL(__misaligned_u64);
...@@ -27,13 +27,27 @@ ...@@ -27,13 +27,27 @@
* *
* it's best to have buff aligned on a 32-bit boundary * it's best to have buff aligned on a 32-bit boundary
*/ */
unsigned int static inline unsigned int
csum_partial(const unsigned char * buff, int len, unsigned int sum); csum_partial(const unsigned char * buff, int len, unsigned int sum)
{
register_pair rp;
/*
* Experiments with ethernet and slip connections show that buf
* is aligned on either a 2-byte or 4-byte boundary.
*/
rp.subreg.even = (unsigned long) buff;
rp.subreg.odd = (unsigned long) len;
__asm__ __volatile__ (
"0: cksm %0,%1\n" /* do checksum on longs */
" jo 0b\n"
: "+&d" (sum), "+&a" (rp) : : "cc" );
return sum;
}
/* /*
* csum_partial as an inline function * csum_partial as an inline function
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_inline(const unsigned char * buff, int len, unsigned int sum) csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
{ {
register_pair rp; register_pair rp;
...@@ -55,7 +69,7 @@ csum_partial_inline(const unsigned char * buff, int len, unsigned int sum) ...@@ -55,7 +69,7 @@ csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
* better 64-bit) boundary * better 64-bit) boundary
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_copy(const char *src, char *dst, int len,unsigned int sum) csum_partial_copy(const char *src, char *dst, int len,unsigned int sum)
{ {
memcpy(dst,src,len); memcpy(dst,src,len);
...@@ -71,7 +85,7 @@ csum_partial_copy(const char *src, char *dst, int len,unsigned int sum) ...@@ -71,7 +85,7 @@ csum_partial_copy(const char *src, char *dst, int len,unsigned int sum)
* Copy from userspace and compute checksum. If we catch an exception * Copy from userspace and compute checksum. If we catch an exception
* then zero the rest of the buffer. * then zero the rest of the buffer.
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_copy_from_user (const char *src, char *dst, csum_partial_copy_from_user (const char *src, char *dst,
int len, unsigned int sum, int len, unsigned int sum,
int *err_ptr) int *err_ptr)
...@@ -88,7 +102,7 @@ csum_partial_copy_from_user (const char *src, char *dst, ...@@ -88,7 +102,7 @@ csum_partial_copy_from_user (const char *src, char *dst,
} }
extern inline unsigned int static inline unsigned int
csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum) csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum)
{ {
memcpy(dst,src,len); memcpy(dst,src,len);
...@@ -98,10 +112,7 @@ csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum ...@@ -98,10 +112,7 @@ csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum
/* /*
* Fold a partial checksum without adding pseudo headers * Fold a partial checksum without adding pseudo headers
*/ */
#if 1 static inline unsigned short
unsigned short csum_fold(unsigned int sum);
#else
extern inline unsigned short
csum_fold(unsigned int sum) csum_fold(unsigned int sum)
{ {
register_pair rp; register_pair rp;
...@@ -116,14 +127,13 @@ csum_fold(unsigned int sum) ...@@ -116,14 +127,13 @@ csum_fold(unsigned int sum)
: "+&d" (sum), "=d" (rp) : : "cc" ); : "+&d" (sum), "=d" (rp) : : "cc" );
return ((unsigned short) ~sum); return ((unsigned short) ~sum);
} }
#endif
/* /*
* This is a version of ip_compute_csum() optimized for IP headers, * This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries. * which always checksum on 4 octet boundaries.
* *
*/ */
extern inline unsigned short static inline unsigned short
ip_fast_csum(unsigned char *iph, unsigned int ihl) ip_fast_csum(unsigned char *iph, unsigned int ihl)
{ {
register_pair rp; register_pair rp;
...@@ -143,7 +153,7 @@ ip_fast_csum(unsigned char *iph, unsigned int ihl) ...@@ -143,7 +153,7 @@ ip_fast_csum(unsigned char *iph, unsigned int ihl)
* computes the checksum of the TCP/UDP pseudo-header * computes the checksum of the TCP/UDP pseudo-header
* returns a 32-bit checksum * returns a 32-bit checksum
*/ */
extern inline unsigned int static inline unsigned int
csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
unsigned short len, unsigned short proto, unsigned short len, unsigned short proto,
unsigned int sum) unsigned int sum)
...@@ -176,7 +186,7 @@ csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, ...@@ -176,7 +186,7 @@ csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
* returns a 16-bit checksum, already complemented * returns a 16-bit checksum, already complemented
*/ */
extern inline unsigned short int static inline unsigned short int
csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
unsigned short len, unsigned short proto, unsigned short len, unsigned short proto,
unsigned int sum) unsigned int sum)
...@@ -189,7 +199,7 @@ csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, ...@@ -189,7 +199,7 @@ csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
* in icmp.c * in icmp.c
*/ */
extern inline unsigned short static inline unsigned short
ip_compute_csum(unsigned char * buff, int len) ip_compute_csum(unsigned char * buff, int len)
{ {
return csum_fold(csum_partial(buff, len, 0)); return csum_fold(csum_partial(buff, len, 0));
......
...@@ -30,72 +30,55 @@ struct task_struct; ...@@ -30,72 +30,55 @@ struct task_struct;
#define nop() __asm__ __volatile__ ("nop") #define nop() __asm__ __volatile__ ("nop")
#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) #define xchg(ptr,x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
extern void __misaligned_u16(void);
extern void __misaligned_u32(void);
static inline unsigned long __xchg(unsigned long x, void * ptr, int size) static inline unsigned long __xchg(unsigned long x, void * ptr, int size)
{ {
unsigned long addr, old;
int shift;
switch (size) { switch (size) {
case 1: case 1:
asm volatile ( addr = (unsigned long) ptr;
" lhi 1,3\n" shift = (3 ^ (addr & 3)) << 3;
" nr 1,%0\n" /* isolate last 2 bits */ addr ^= addr & 3;
" xr %0,1\n" /* align ptr */ asm volatile(
" bras 2,0f\n" " l %0,0(%3)\n"
" icm 1,8,3(%1)\n" /* for ptr&3 == 0 */ "0: lr 0,%0\n"
" stcm 0,8,3(%1)\n" " nr 0,%2\n"
" icm 1,4,3(%1)\n" /* for ptr&3 == 1 */ " or 0,%1\n"
" stcm 0,4,3(%1)\n" " cs %0,0,0(%3)\n"
" icm 1,2,3(%1)\n" /* for ptr&3 == 2 */ " jl 0b\n"
" stcm 0,2,3(%1)\n" : "=&d" (old)
" icm 1,1,3(%1)\n" /* for ptr&3 == 3 */ : "d" (x << shift), "d" (~(255 << shift)), "a" (addr)
" stcm 0,1,3(%1)\n" : "memory", "cc", "0" );
"0: sll 1,3\n" x = old >> shift;
" la 2,0(1,2)\n" /* r2 points to an icm */
" l 0,0(%0)\n" /* get fullword */
"1: lr 1,0\n" /* cs loop */
" ex 0,0(2)\n" /* insert x */
" cs 0,1,0(%0)\n"
" jl 1b\n"
" ex 0,4(2)" /* store *ptr to x */
: "+a&" (ptr) : "a" (&x)
: "memory", "cc", "0", "1", "2");
break; break;
case 2: case 2:
if(((__u32)ptr)&1) addr = (unsigned long) ptr;
__misaligned_u16(); shift = (2 ^ (addr & 2)) << 3;
asm volatile ( addr ^= addr & 2;
" lhi 1,2\n" asm volatile(
" nr 1,%0\n" /* isolate bit 2^1 */ " l %0,0(%3)\n"
" xr %0,1\n" /* align ptr */ "0: lr 0,%0\n"
" bras 2,0f\n" " nr 0,%2\n"
" icm 1,12,2(%1)\n" /* for ptr&2 == 0 */ " or 0,%1\n"
" stcm 0,12,2(%1)\n" " cs %0,0,0(%3)\n"
" icm 1,3,2(%1)\n" /* for ptr&2 == 1 */ " jl 0b\n"
" stcm 0,3,2(%1)\n" : "=&d" (old)
"0: sll 1,2\n" : "d" (x << shift), "d" (~(65535 << shift)), "a" (addr)
" la 2,0(1,2)\n" /* r2 points to an icm */ : "memory", "cc", "0" );
" l 0,0(%0)\n" /* get fullword */ x = old >> shift;
"1: lr 1,0\n" /* cs loop */
" ex 0,0(2)\n" /* insert x */
" cs 0,1,0(%0)\n"
" jl 1b\n"
" ex 0,4(2)" /* store *ptr to x */
: "+a&" (ptr) : "a" (&x)
: "memory", "cc", "0", "1", "2");
break; break;
case 4: case 4:
if(((__u32)ptr)&3)
__misaligned_u32();
asm volatile ( asm volatile (
" l 0,0(%1)\n" " l %0,0(%2)\n"
"0: cs 0,%0,0(%1)\n" "0: cs %0,%1,0(%2)\n"
" jl 0b\n" " jl 0b\n"
" lr %0,0\n" : "=&d" (old) : "d" (x), "a" (ptr)
: "+d&" (x) : "a" (ptr)
: "memory", "cc", "0" ); : "memory", "cc", "0" );
x = old;
break; break;
} }
return x; return x;
......
...@@ -27,13 +27,29 @@ ...@@ -27,13 +27,29 @@
* *
* it's best to have buff aligned on a 32-bit boundary * it's best to have buff aligned on a 32-bit boundary
*/ */
unsigned int static inline unsigned int
csum_partial(const unsigned char * buff, int len, unsigned int sum); csum_partial(const unsigned char * buff, int len, unsigned int sum)
{
/*
* Experiments with ethernet and slip connections show that buff
* is aligned on either a 2-byte or 4-byte boundary.
*/
__asm__ __volatile__ (
" lgr 2,%1\n" /* address in gpr 2 */
" lgfr 3,%2\n" /* length in gpr 3 */
"0: cksm %0,2\n" /* do checksum on longs */
" jo 0b\n"
: "+&d" (sum)
: "d" (buff), "d" (len)
: "cc", "2", "3" );
return sum;
}
/* /*
* csum_partial as an inline function * csum_partial as an inline function
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_inline(const unsigned char * buff, int len, unsigned int sum) csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
...@@ -55,7 +71,7 @@ csum_partial_inline(const unsigned char * buff, int len, unsigned int sum) ...@@ -55,7 +71,7 @@ csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
* better 64-bit) boundary * better 64-bit) boundary
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_copy(const char *src, char *dst, int len,unsigned int sum) csum_partial_copy(const char *src, char *dst, int len,unsigned int sum)
{ {
memcpy(dst,src,len); memcpy(dst,src,len);
...@@ -71,7 +87,7 @@ csum_partial_copy(const char *src, char *dst, int len,unsigned int sum) ...@@ -71,7 +87,7 @@ csum_partial_copy(const char *src, char *dst, int len,unsigned int sum)
* Copy from userspace and compute checksum. If we catch an exception * Copy from userspace and compute checksum. If we catch an exception
* then zero the rest of the buffer. * then zero the rest of the buffer.
*/ */
extern inline unsigned int static inline unsigned int
csum_partial_copy_from_user (const char *src, char *dst, csum_partial_copy_from_user (const char *src, char *dst,
int len, unsigned int sum, int len, unsigned int sum,
int *err_ptr) int *err_ptr)
...@@ -87,7 +103,7 @@ csum_partial_copy_from_user (const char *src, char *dst, ...@@ -87,7 +103,7 @@ csum_partial_copy_from_user (const char *src, char *dst,
return csum_partial(dst, len, sum); return csum_partial(dst, len, sum);
} }
extern inline unsigned int static inline unsigned int
csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum) csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum)
{ {
memcpy(dst,src,len); memcpy(dst,src,len);
...@@ -97,7 +113,7 @@ csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum ...@@ -97,7 +113,7 @@ csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum
/* /*
* Fold a partial checksum without adding pseudo headers * Fold a partial checksum without adding pseudo headers
*/ */
extern inline unsigned short static inline unsigned short
csum_fold(unsigned int sum) csum_fold(unsigned int sum)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
...@@ -116,7 +132,7 @@ csum_fold(unsigned int sum) ...@@ -116,7 +132,7 @@ csum_fold(unsigned int sum)
* which always checksum on 4 octet boundaries. * which always checksum on 4 octet boundaries.
* *
*/ */
extern inline unsigned short static inline unsigned short
ip_fast_csum(unsigned char *iph, unsigned int ihl) ip_fast_csum(unsigned char *iph, unsigned int ihl)
{ {
unsigned long sum; unsigned long sum;
...@@ -137,7 +153,7 @@ ip_fast_csum(unsigned char *iph, unsigned int ihl) ...@@ -137,7 +153,7 @@ ip_fast_csum(unsigned char *iph, unsigned int ihl)
* computes the checksum of the TCP/UDP pseudo-header * computes the checksum of the TCP/UDP pseudo-header
* returns a 32-bit checksum * returns a 32-bit checksum
*/ */
extern inline unsigned int static inline unsigned int
csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
unsigned short len, unsigned short proto, unsigned short len, unsigned short proto,
unsigned int sum) unsigned int sum)
...@@ -170,7 +186,7 @@ csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, ...@@ -170,7 +186,7 @@ csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
* returns a 16-bit checksum, already complemented * returns a 16-bit checksum, already complemented
*/ */
extern inline unsigned short int static inline unsigned short int
csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
unsigned short len, unsigned short proto, unsigned short len, unsigned short proto,
unsigned int sum) unsigned int sum)
...@@ -183,7 +199,7 @@ csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, ...@@ -183,7 +199,7 @@ csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
* in icmp.c * in icmp.c
*/ */
extern inline unsigned short static inline unsigned short
ip_compute_csum(unsigned char * buff, int len) ip_compute_csum(unsigned char * buff, int len)
{ {
return csum_fold(csum_partial_inline(buff, len, 0)); return csum_fold(csum_partial_inline(buff, len, 0));
......
...@@ -39,76 +39,59 @@ extern void __misaligned_u64(void); ...@@ -39,76 +39,59 @@ extern void __misaligned_u64(void);
static inline unsigned long __xchg(unsigned long x, void * ptr, int size) static inline unsigned long __xchg(unsigned long x, void * ptr, int size)
{ {
unsigned long addr, old;
int shift;
switch (size) { switch (size) {
case 1: case 1:
asm volatile ( addr = (unsigned long) ptr;
" lghi 1,3\n" shift = (3 ^ (addr & 3)) << 3;
" nr 1,%0\n" /* isolate last 2 bits */ addr ^= addr & 3;
" xr %0,1\n" /* align ptr */ asm volatile(
" bras 2,0f\n" " l %0,0(%3)\n"
" icm 1,8,7(%1)\n" /* for ptr&3 == 0 */ "0: lr 0,%0\n"
" stcm 0,8,7(%1)\n" " nr 0,%2\n"
" icm 1,4,7(%1)\n" /* for ptr&3 == 1 */ " or 0,%1\n"
" stcm 0,4,7(%1)\n" " cs %0,0,0(%3)\n"
" icm 1,2,7(%1)\n" /* for ptr&3 == 2 */ " jl 0b\n"
" stcm 0,2,7(%1)\n" : "=&d" (old)
" icm 1,1,7(%1)\n" /* for ptr&3 == 3 */ : "d" (x << shift), "d" (~(255 << shift)), "a" (addr)
" stcm 0,1,7(%1)\n" : "memory", "cc", "0" );
"0: sll 1,3\n" x = old >> shift;
" la 2,0(1,2)\n" /* r2 points to an icm */
" l 0,0(%0)\n" /* get fullword */
"1: lr 1,0\n" /* cs loop */
" ex 0,0(2)\n" /* insert x */
" cs 0,1,0(%0)\n"
" jl 1b\n"
" ex 0,4(2)" /* store *ptr to x */
: "+&a" (ptr) : "a" (&x)
: "memory", "cc", "0", "1", "2");
break; break;
case 2: case 2:
if(((addr_t)ptr)&1) addr = (unsigned long) ptr;
__misaligned_u16(); shift = (2 ^ (addr & 2)) << 3;
asm volatile ( addr ^= addr & 2;
" lghi 1,2\n" asm volatile(
" nr 1,%0\n" /* isolate bit 2^1 */ " l %0,0(%3)\n"
" xr %0,1\n" /* align ptr */ "0: lr 0,%0\n"
" bras 2,0f\n" " nr 0,%2\n"
" icm 1,12,6(%1)\n" /* for ptr&2 == 0 */ " or 0,%1\n"
" stcm 0,12,6(%1)\n" " cs %0,0,0(%3)\n"
" icm 1,3,2(%1)\n" /* for ptr&2 == 1 */ " jl 0b\n"
" stcm 0,3,2(%1)\n" : "=&d" (old)
"0: sll 1,2\n" : "d" (x << shift), "d" (~(65535 << shift)), "a" (addr)
" la 2,0(1,2)\n" /* r2 points to an icm */ : "memory", "cc", "0" );
" l 0,0(%0)\n" /* get fullword */ x = old >> shift;
"1: lr 1,0\n" /* cs loop */
" ex 0,0(2)\n" /* insert x */
" cs 0,1,0(%0)\n"
" jl 1b\n"
" ex 0,4(2)" /* store *ptr to x */
: "+&a" (ptr) : "a" (&x)
: "memory", "cc", "0", "1", "2");
break; break;
case 4: case 4:
if(((addr_t)ptr)&3)
__misaligned_u32();
asm volatile ( asm volatile (
" l 0,0(%1)\n" " l %0,0(%2)\n"
"0: cs 0,%0,0(%1)\n" "0: cs %0,%1,0(%2)\n"
" jl 0b\n" " jl 0b\n"
" lgfr %0,0\n" : "=&d" (old) : "d" (x), "a" (ptr)
: "+d" (x) : "a" (ptr)
: "memory", "cc", "0" ); : "memory", "cc", "0" );
x = old;
break; break;
case 8: case 8:
if(((addr_t)ptr)&7)
__misaligned_u64();
asm volatile ( asm volatile (
" lg 0,0(%1)\n" " lg %0,0(%2)\n"
"0: csg 0,%0,0(%1)\n" "0: csg %0,%1,0(%2)\n"
" jl 0b\n" " jl 0b\n"
" lgr %0,0\n" : "=&d" (old) : "d" (x), "a" (ptr)
: "+d" (x) : "a" (ptr)
: "memory", "cc", "0" ); : "memory", "cc", "0" );
x = old;
break; break;
} }
return x; return x;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment