Commit 0ae2d26f authored by Babu Moger's avatar Babu Moger Committed by David S. Miller

arch/sparc: Avoid DCTI Couples

Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.

Here is the complete text from Oracle SPARC Architecture Specs.

6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.

SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.

Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."
Signed-off-by: default avatarBabu Moger <babu.moger@oracle.com>
Reviewed-by: default avatarRob Gardner <rob.gardner@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent adfae8a5
...@@ -96,6 +96,7 @@ sparc64_boot: ...@@ -96,6 +96,7 @@ sparc64_boot:
andn %g1, PSTATE_AM, %g1 andn %g1, PSTATE_AM, %g1
wrpr %g1, 0x0, %pstate wrpr %g1, 0x0, %pstate
ba,a,pt %xcc, 1f ba,a,pt %xcc, 1f
nop
.globl prom_finddev_name, prom_chosen_path, prom_root_node .globl prom_finddev_name, prom_chosen_path, prom_root_node
.globl prom_getprop_name, prom_mmu_name, prom_peer_name .globl prom_getprop_name, prom_mmu_name, prom_peer_name
...@@ -613,6 +614,7 @@ niagara_tlb_fixup: ...@@ -613,6 +614,7 @@ niagara_tlb_fixup:
nop nop
ba,a,pt %xcc, 80f ba,a,pt %xcc, 80f
nop
niagara4_patch: niagara4_patch:
call niagara4_patch_copyops call niagara4_patch_copyops
nop nop
...@@ -622,6 +624,7 @@ niagara4_patch: ...@@ -622,6 +624,7 @@ niagara4_patch:
nop nop
ba,a,pt %xcc, 80f ba,a,pt %xcc, 80f
nop
niagara2_patch: niagara2_patch:
call niagara2_patch_copyops call niagara2_patch_copyops
...@@ -632,6 +635,7 @@ niagara2_patch: ...@@ -632,6 +635,7 @@ niagara2_patch:
nop nop
ba,a,pt %xcc, 80f ba,a,pt %xcc, 80f
nop
niagara_patch: niagara_patch:
call niagara_patch_copyops call niagara_patch_copyops
......
...@@ -82,6 +82,7 @@ do_stdfmna: ...@@ -82,6 +82,7 @@ do_stdfmna:
call handle_stdfmna call handle_stdfmna
add %sp, PTREGS_OFF, %o0 add %sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
nop
.size do_stdfmna,.-do_stdfmna .size do_stdfmna,.-do_stdfmna
.type breakpoint_trap,#function .type breakpoint_trap,#function
......
...@@ -237,6 +237,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ...@@ -237,6 +237,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
bne,pt %xcc, user_rtt_fill_32bit bne,pt %xcc, user_rtt_fill_32bit
wrpr %g1, %cwp wrpr %g1, %cwp
ba,a,pt %xcc, user_rtt_fill_64bit ba,a,pt %xcc, user_rtt_fill_64bit
nop
user_rtt_fill_fixup_dax: user_rtt_fill_fixup_dax:
ba,pt %xcc, user_rtt_fill_fixup_common ba,pt %xcc, user_rtt_fill_fixup_common
......
...@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue: ...@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
rd %pc, %g7 rd %pc, %g7
ba,a,pt %xcc, 2f ba,a,pt %xcc, 2f
nop
1: ba,pt %xcc, etrap_irq 1: ba,pt %xcc, etrap_irq
rd %pc, %g7 rd %pc, %g7
......
...@@ -352,6 +352,7 @@ sun4v_mna: ...@@ -352,6 +352,7 @@ sun4v_mna:
call sun4v_do_mna call sun4v_do_mna
add %sp, PTREGS_OFF, %o0 add %sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
nop
/* Privileged Action. */ /* Privileged Action. */
sun4v_privact: sun4v_privact:
......
...@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common: ...@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
call sun4v_data_access_exception call sun4v_data_access_exception
nop nop
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
nop
1: call spitfire_data_access_exception 1: call spitfire_data_access_exception
nop nop
......
...@@ -152,6 +152,8 @@ fill_fixup_dax: ...@@ -152,6 +152,8 @@ fill_fixup_dax:
call sun4v_data_access_exception call sun4v_data_access_exception
nop nop
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
nop
1: call spitfire_data_access_exception 1: call spitfire_data_access_exception
nop nop
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
nop
...@@ -326,11 +326,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -326,11 +326,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
blu 170f blu 170f
nop nop
ba,a,pt %xcc, 180f ba,a,pt %xcc, 180f
nop
4: /* 32 <= low bits < 48 */ 4: /* 32 <= low bits < 48 */
blu 150f blu 150f
nop nop
ba,a,pt %xcc, 160f ba,a,pt %xcc, 160f
nop
5: /* 0 < low bits < 32 */ 5: /* 0 < low bits < 32 */
blu,a 6f blu,a 6f
cmp %g2, 8 cmp %g2, 8
...@@ -338,6 +340,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -338,6 +340,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
blu 130f blu 130f
nop nop
ba,a,pt %xcc, 140f ba,a,pt %xcc, 140f
nop
6: /* 0 < low bits < 16 */ 6: /* 0 < low bits < 16 */
bgeu 120f bgeu 120f
nop nop
...@@ -475,6 +478,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -475,6 +478,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %o2, 85f brz,pt %o2, 85f
sub %o0, %o1, GLOBAL_SPARE sub %o0, %o1, GLOBAL_SPARE
ba,a,pt %XCC, 90f ba,a,pt %XCC, 90f
nop
.align 64 .align 64
75: /* 16 < len <= 64 */ 75: /* 16 < len <= 64 */
......
...@@ -530,4 +530,5 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -530,4 +530,5 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
ba,a,pt %icc, .Lexit ba,a,pt %icc, .Lexit
nop
.size FUNC_NAME, .-FUNC_NAME .size FUNC_NAME, .-FUNC_NAME
...@@ -102,4 +102,5 @@ NG4bzero: ...@@ -102,4 +102,5 @@ NG4bzero:
bne,pt %icc, 1b bne,pt %icc, 1b
add %o0, 0x30, %o0 add %o0, 0x30, %o0
ba,a,pt %icc, .Lpostloop ba,a,pt %icc, .Lpostloop
nop
.size NG4bzero,.-NG4bzero .size NG4bzero,.-NG4bzero
...@@ -394,6 +394,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -394,6 +394,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
brz,pt %i2, 85f brz,pt %i2, 85f
sub %o0, %i1, %i3 sub %o0, %i1, %i3
ba,a,pt %XCC, 90f ba,a,pt %XCC, 90f
nop
.align 64 .align 64
70: /* 16 < len <= 64 */ 70: /* 16 < len <= 64 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment