Commit 67af60f0 authored by Ben Skeggs's avatar Ben Skeggs

drm/nvc0-/gr: bring in some macros to abstract falcon isa differences

Need. A. Compiler...
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 90d6db16
...@@ -38,7 +38,7 @@ queue_put: ...@@ -38,7 +38,7 @@ queue_put:
cmpu b32 $r8 $r9 cmpu b32 $r8 $r9
bra ne #queue_put_next bra ne #queue_put_next
mov $r15 E_CMD_OVERFLOW mov $r15 E_CMD_OVERFLOW
call #error call(error)
ret ret
// store cmd/data on queue // store cmd/data on queue
...@@ -92,18 +92,16 @@ queue_get_done: ...@@ -92,18 +92,16 @@ queue_get_done:
// Out: $r15 value // Out: $r15 value
// //
nv_rd32: nv_rd32:
mov $r11 0x728
shl b32 $r11 6
mov b32 $r12 $r14 mov b32 $r12 $r14
bset $r12 31 // MMIO_CTRL_PENDING bset $r12 31 // MMIO_CTRL_PENDING
iowr I[$r11 + 0x000] $r12 // MMIO_CTRL nv_iowr(NV_PGRAPH_FECS_MMIO_CTRL, 0, $r12)
nv_rd32_wait: nv_rd32_wait:
iord $r12 I[$r11 + 0x000] nv_iord($r12, NV_PGRAPH_FECS_MMIO_CTRL, 0)
xbit $r12 $r12 31 xbit $r12 $r12 31
bra ne #nv_rd32_wait bra ne #nv_rd32_wait
mov $r10 6 // DONE_MMIO_RD mov $r10 6 // DONE_MMIO_RD
call #wait_doneo call(wait_doneo)
iord $r15 I[$r11 + 0x100] // MMIO_RDVAL nv_iord($r15, NV_PGRAPH_FECS_MMIO_RDVAL, 0)
ret ret
// nv_wr32 - write 32-bit value to nv register // nv_wr32 - write 32-bit value to nv register
...@@ -112,37 +110,17 @@ nv_rd32: ...@@ -112,37 +110,17 @@ nv_rd32:
// $r15 value // $r15 value
// //
nv_wr32: nv_wr32:
mov $r11 0x728 nv_iowr(NV_PGRAPH_FECS_MMIO_WRVAL, 0, $r15)
shl b32 $r11 6
iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
mov b32 $r12 $r14 mov b32 $r12 $r14
bset $r12 31 // MMIO_CTRL_PENDING bset $r12 31 // MMIO_CTRL_PENDING
bset $r12 30 // MMIO_CTRL_WRITE bset $r12 30 // MMIO_CTRL_WRITE
iowr I[$r11 + 0x000] $r12 // MMIO_CTRL nv_iowr(NV_PGRAPH_FECS_MMIO_CTRL, 0, $r12)
nv_wr32_wait: nv_wr32_wait:
iord $r12 I[$r11 + 0x000] nv_iord($r12, NV_PGRAPH_FECS_MMIO_CTRL, 0)
xbit $r12 $r12 31 xbit $r12 $r12 31
bra ne #nv_wr32_wait bra ne #nv_wr32_wait
ret ret
// (re)set watchdog timer
//
// In : $r15 timeout
//
watchdog_reset:
mov $r8 0x430
shl b32 $r8 6
bset $r15 31
iowr I[$r8 + 0x000] $r15
ret
// clear watchdog timer
watchdog_clear:
mov $r8 0x430
shl b32 $r8 6
iowr I[$r8 + 0x000] $r0
ret
// wait_donez - wait on FUC_DONE bit to become clear // wait_donez - wait on FUC_DONE bit to become clear
// //
// In : $r10 bit to wait on // In : $r10 bit to wait on
...@@ -163,13 +141,9 @@ wait_donez: ...@@ -163,13 +141,9 @@ wait_donez:
// //
wait_doneo: wait_doneo:
trace_set(T_WAIT); trace_set(T_WAIT);
mov $r8 0x818 nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(6), 0, $r10)
shl b32 $r8 6
iowr I[$r8 + 0x000] $r10
wait_doneo_e: wait_doneo_e:
mov $r8 0x400 nv_iord($r8, NV_PGRAPH_FECS_SIGNAL, 0)
shl b32 $r8 6
iord $r8 I[$r8 + 0x000]
xbit $r8 $r8 $r10 xbit $r8 $r8 $r10
bra e #wait_doneo_e bra e #wait_doneo_e
trace_clr(T_WAIT) trace_clr(T_WAIT)
...@@ -209,21 +183,18 @@ mmctx_size: ...@@ -209,21 +183,18 @@ mmctx_size:
// //
mmctx_xfer: mmctx_xfer:
trace_set(T_MMCTX) trace_set(T_MMCTX)
mov $r8 0x710
shl b32 $r8 6
clear b32 $r9 clear b32 $r9
or $r11 $r11 or $r11 $r11
bra e #mmctx_base_disabled bra e #mmctx_base_disabled
iowr I[$r8 + 0x000] $r11 // MMCTX_BASE nv_iowr(NV_PGRAPH_FECS_MMCTX_BASE, 0, $r11)
bset $r9 0 // BASE_EN bset $r9 0 // BASE_EN
mmctx_base_disabled: mmctx_base_disabled:
or $r14 $r14 or $r14 $r14
bra e #mmctx_multi_disabled bra e #mmctx_multi_disabled
iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE nv_iowr(NV_PGRAPH_FECS_MMCTX_MULTI_STRIDE, 0, $r14)
iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK nv_iowr(NV_PGRAPH_FECS_MMCTX_MULTI_MASK, 0, $r15)
bset $r9 1 // MULTI_EN bset $r9 1 // MULTI_EN
mmctx_multi_disabled: mmctx_multi_disabled:
add b32 $r8 0x100
xbit $r11 $r10 0 xbit $r11 $r10 0
shl b32 $r11 16 // DIR shl b32 $r11 16 // DIR
...@@ -231,20 +202,20 @@ mmctx_xfer: ...@@ -231,20 +202,20 @@ mmctx_xfer:
xbit $r14 $r10 1 xbit $r14 $r10 1
shl b32 $r14 17 shl b32 $r14 17
or $r11 $r14 // START_TRIGGER or $r11 $r14 // START_TRIGGER
iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL nv_iowr(NV_PGRAPH_FECS_MMCTX_CTRL, 0, $r11)
// loop over the mmio list, and send requests to the hw // loop over the mmio list, and send requests to the hw
mmctx_exec_loop: mmctx_exec_loop:
// wait for space in mmctx queue // wait for space in mmctx queue
mmctx_wait_free: mmctx_wait_free:
iord $r14 I[$r8 + 0x000] // MMCTX_CTRL nv_iord($r14, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
and $r14 0x1f and $r14 0x1f
bra e #mmctx_wait_free bra e #mmctx_wait_free
// queue up an entry // queue up an entry
ld b32 $r14 D[$r12] ld b32 $r14 D[$r12]
or $r14 $r9 or $r14 $r9
iowr I[$r8 + 0x300] $r14 nv_iowr(NV_PGRAPH_FECS_MMCTX_QUEUE, 0, $r14)
add b32 $r12 4 add b32 $r12 4
cmpu b32 $r12 $r13 cmpu b32 $r12 $r13
bra ne #mmctx_exec_loop bra ne #mmctx_exec_loop
...@@ -253,22 +224,22 @@ mmctx_xfer: ...@@ -253,22 +224,22 @@ mmctx_xfer:
bra ne #mmctx_stop bra ne #mmctx_stop
// wait for queue to empty // wait for queue to empty
mmctx_fini_wait: mmctx_fini_wait:
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL nv_iord($r11, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
and $r11 0x1f and $r11 0x1f
cmpu b32 $r11 0x10 cmpu b32 $r11 0x10
bra ne #mmctx_fini_wait bra ne #mmctx_fini_wait
mov $r10 2 // DONE_MMCTX mov $r10 2 // DONE_MMCTX
call #wait_donez call(wait_donez)
bra #mmctx_done bra #mmctx_done
mmctx_stop: mmctx_stop:
xbit $r11 $r10 0 xbit $r11 $r10 0
shl b32 $r11 16 // DIR shl b32 $r11 16 // DIR
bset $r11 12 // QLIMIT = 0x10 bset $r11 12 // QLIMIT = 0x10
bset $r11 18 // STOP_TRIGGER bset $r11 18 // STOP_TRIGGER
iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL nv_iowr(NV_PGRAPH_FECS_MMCTX_CTRL, 0, $r11)
mmctx_stop_wait: mmctx_stop_wait:
// wait for STOP_TRIGGER to clear // wait for STOP_TRIGGER to clear
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL nv_iord($r11, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
xbit $r11 $r11 18 xbit $r11 $r11 18
bra ne #mmctx_stop_wait bra ne #mmctx_stop_wait
mmctx_done: mmctx_done:
...@@ -280,28 +251,24 @@ mmctx_xfer: ...@@ -280,28 +251,24 @@ mmctx_xfer:
strand_wait: strand_wait:
push $r10 push $r10
mov $r10 2 mov $r10 2
call #wait_donez call(wait_donez)
pop $r10 pop $r10
ret ret
// unknown - call before issuing strand commands // unknown - call before issuing strand commands
// //
strand_pre: strand_pre:
mov $r8 0x4afc mov $r9 NV_PGRAPH_FECS_STRAND_CMD_ENABLE
sethi $r8 0x20000 nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r9)
mov $r9 0xc call(strand_wait)
iowr I[$r8] $r9
call #strand_wait
ret ret
// unknown - call after issuing strand commands // unknown - call after issuing strand commands
// //
strand_post: strand_post:
mov $r8 0x4afc mov $r9 NV_PGRAPH_FECS_STRAND_CMD_DISABLE
sethi $r8 0x20000 nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r9)
mov $r9 0xd call(strand_wait)
iowr I[$r8] $r9
call #strand_wait
ret ret
// Selects strand set?! // Selects strand set?!
...@@ -309,18 +276,14 @@ strand_post: ...@@ -309,18 +276,14 @@ strand_post:
// In: $r14 id // In: $r14 id
// //
strand_set: strand_set:
mov $r10 0x4ffc
sethi $r10 0x20000
sub b32 $r11 $r10 0x500
mov $r12 0xf mov $r12 0xf
iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf nv_iowr(NV_PGRAPH_FECS_STRAND_FILTER, 0x3f, $r12)
mov $r12 0xb mov $r12 NV_PGRAPH_FECS_STRAND_CMD_DEACTIVATE_FILTER
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
call #strand_wait nv_iowr(NV_PGRAPH_FECS_STRAND_FILTER, 0x3f, $r14)
iowr I[$r10 + 0x000] $r14 // 0x93c = <id> mov $r12 NV_PGRAPH_FECS_STRAND_CMD_ACTIVATE_FILTER
mov $r12 0xa nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa call(strand_wait)
call #strand_wait
ret ret
// Initialise strand context data // Initialise strand context data
...@@ -332,30 +295,27 @@ strand_set: ...@@ -332,30 +295,27 @@ strand_set:
// //
strand_ctx_init: strand_ctx_init:
trace_set(T_STRINIT) trace_set(T_STRINIT)
call #strand_pre call(strand_pre)
mov $r14 3 mov $r14 3
call #strand_set call(strand_set)
mov $r10 0x46fc
sethi $r10 0x20000 clear b32 $r12
add b32 $r11 $r10 0x400 nv_iowr(NV_PGRAPH_FECS_STRAND_SELECT, 0x3f, $r12)
iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 mov $r12 NV_PGRAPH_FECS_STRAND_CMD_SEEK
mov $r12 1 nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE call(strand_wait)
call #strand_wait
sub b32 $r12 $r0 1 sub b32 $r12 $r0 1
iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff nv_iowr(NV_PGRAPH_FECS_STRAND_DATA, 0x3f, $r12)
mov $r12 2 mov $r12 NV_PGRAPH_FECS_STRAND_CMD_GET_INFO
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
call #strand_wait call(strand_wait)
call #strand_post call(strand_post)
// read the size of each strand, poke the context offset of // read the size of each strand, poke the context offset of
// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
// about it later then. // about it later then.
mov $r8 0x880 nv_mkio($r8, NV_PGRAPH_FECS_STRAND_SAVE_SWBASE, 0x00)
shl b32 $r8 6 nv_iord($r9, NV_PGRAPH_FECS_STRANDS_CNT, 0x00)
iord $r9 I[$r8 + 0x000] // STRANDS
add b32 $r8 0x2200
shr b32 $r14 $r15 8 shr b32 $r14 $r15 8
ctx_init_strand_loop: ctx_init_strand_loop:
iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
......
...@@ -58,12 +58,9 @@ mmio_list_base: ...@@ -58,12 +58,9 @@ mmio_list_base:
// //
error: error:
push $r14 push $r14
mov $r14 -0x67ec // 0x9814 nv_wr32(NV_PGRAPH_FECS_CC_SCRATCH_VAL(5), $r15)
sethi $r14 0x400000
call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
add b32 $r14 0x41c
mov $r15 1 mov $r15 1
call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET nv_wr32(NV_PGRAPH_FECS_INTR_UP_SET, $r15)
pop $r14 pop $r14
ret ret
...@@ -84,46 +81,40 @@ init: ...@@ -84,46 +81,40 @@ init:
mov $sp $r0 mov $sp $r0
// enable fifo access // enable fifo access
mov $r1 0x1200 mov $r2 NV_PGRAPH_GPCX_GPCCS_ACCESS_FIFO
mov $r2 2 nv_iowr(NV_PGRAPH_GPCX_GPCCS_ACCESS, 0, $r2)
iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
// setup i0 handler, and route all interrupts to it // setup i0 handler, and route all interrupts to it
mov $r1 #ih mov $r1 #ih
mov $iv0 $r1 mov $iv0 $r1
mov $r1 0x400 nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_ROUTE, 0, $r0)
iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
// enable fifo interrupt // enable fifo interrupt
mov $r2 4 mov $r2 NV_PGRAPH_GPCX_GPCCS_INTR_EN_SET_FIFO
iowr I[$r1 + 0x000] $r2 // INTR_EN_SET nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_EN_SET, 0, $r2)
// enable interrupts // enable interrupts
bset $flags ie0 bset $flags ie0
// figure out which GPC we are, and how many TPCs we have // figure out which GPC we are, and how many TPCs we have
mov $r1 0x608 nv_iord($r2, NV_PGRAPH_GPCX_GPCCS_UNITS, 0)
shl b32 $r1 6
iord $r2 I[$r1 + 0x000] // UNITS
mov $r3 1 mov $r3 1
and $r2 0x1f and $r2 0x1f
shl b32 $r3 $r2 shl b32 $r3 $r2
sub b32 $r3 1 sub b32 $r3 1
st b32 D[$r0 + #tpc_count] $r2 st b32 D[$r0 + #tpc_count] $r2
st b32 D[$r0 + #tpc_mask] $r3 st b32 D[$r0 + #tpc_mask] $r3
add b32 $r1 0x400 nv_iord($r2, NV_PGRAPH_GPCX_GPCCS_MYINDEX, 0)
iord $r2 I[$r1 + 0x000] // MYINDEX
st b32 D[$r0 + #gpc_id] $r2 st b32 D[$r0 + #gpc_id] $r2
#if NV_PGRAPH_GPCX_UNK__SIZE > 0 #if NV_PGRAPH_GPCX_UNK__SIZE > 0
// figure out which, and how many, UNKs are actually present // figure out which, and how many, UNKs are actually present
mov $r14 0x0c30 imm32($r14, 0x500c30)
sethi $r14 0x500000
clear b32 $r2 clear b32 $r2
clear b32 $r3 clear b32 $r3
clear b32 $r4 clear b32 $r4
init_unk_loop: init_unk_loop:
call #nv_rd32 call(nv_rd32)
cmp b32 $r15 0 cmp b32 $r15 0
bra z #init_unk_next bra z #init_unk_next
mov $r15 1 mov $r15 1
...@@ -146,23 +137,21 @@ init: ...@@ -146,23 +137,21 @@ init:
// set mmctx base addresses now so we don't have to do it later, // set mmctx base addresses now so we don't have to do it later,
// they don't currently ever change // they don't currently ever change
mov $r4 0x700
shl b32 $r4 6
shr b32 $r5 $r2 8 shr b32 $r5 $r2 8
iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_SAVE_SWBASE, 0, $r5)
iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_LOAD_SWBASE, 0, $r5)
// calculate GPC mmio context size // calculate GPC mmio context size
ld b32 $r14 D[$r0 + #gpc_mmio_list_head] ld b32 $r14 D[$r0 + #gpc_mmio_list_head]
ld b32 $r15 D[$r0 + #gpc_mmio_list_tail] ld b32 $r15 D[$r0 + #gpc_mmio_list_tail]
call #mmctx_size call(mmctx_size)
add b32 $r2 $r15 add b32 $r2 $r15
add b32 $r3 $r15 add b32 $r3 $r15
// calculate per-TPC mmio context size // calculate per-TPC mmio context size
ld b32 $r14 D[$r0 + #tpc_mmio_list_head] ld b32 $r14 D[$r0 + #tpc_mmio_list_head]
ld b32 $r15 D[$r0 + #tpc_mmio_list_tail] ld b32 $r15 D[$r0 + #tpc_mmio_list_tail]
call #mmctx_size call(mmctx_size)
ld b32 $r14 D[$r0 + #tpc_count] ld b32 $r14 D[$r0 + #tpc_count]
mulu $r14 $r15 mulu $r14 $r15
add b32 $r2 $r14 add b32 $r2 $r14
...@@ -172,7 +161,7 @@ init: ...@@ -172,7 +161,7 @@ init:
// calculate per-UNK mmio context size // calculate per-UNK mmio context size
ld b32 $r14 D[$r0 + #unk_mmio_list_head] ld b32 $r14 D[$r0 + #unk_mmio_list_head]
ld b32 $r15 D[$r0 + #unk_mmio_list_tail] ld b32 $r15 D[$r0 + #unk_mmio_list_tail]
call #mmctx_size call(mmctx_size)
ld b32 $r14 D[$r0 + #unk_count] ld b32 $r14 D[$r0 + #unk_count]
mulu $r14 $r15 mulu $r14 $r15
add b32 $r2 $r14 add b32 $r2 $r14
...@@ -180,9 +169,8 @@ init: ...@@ -180,9 +169,8 @@ init:
#endif #endif
// round up base/size to 256 byte boundary (for strand SWBASE) // round up base/size to 256 byte boundary (for strand SWBASE)
add b32 $r4 0x1300
shr b32 $r3 2 shr b32 $r3 2
iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_LOAD_COUNT, 0, $r3) // wtf for?!
shr b32 $r2 8 shr b32 $r2 8
shr b32 $r3 6 shr b32 $r3 6
add b32 $r2 1 add b32 $r2 1
...@@ -192,7 +180,7 @@ init: ...@@ -192,7 +180,7 @@ init:
// calculate size of strand context data // calculate size of strand context data
mov b32 $r15 $r2 mov b32 $r15 $r2
call #strand_ctx_init call(strand_ctx_init)
add b32 $r3 $r15 add b32 $r3 $r15
// save context size, and tell HUB we're done // save context size, and tell HUB we're done
...@@ -208,7 +196,7 @@ main: ...@@ -208,7 +196,7 @@ main:
bset $flags $p0 bset $flags $p0
sleep $p0 sleep $p0
mov $r13 #cmd_queue mov $r13 #cmd_queue
call #queue_get call(queue_get)
bra $p1 #main bra $p1 #main
// 0x0000-0x0003 are all context transfers // 0x0000-0x0003 are all context transfers
...@@ -224,13 +212,13 @@ main: ...@@ -224,13 +212,13 @@ main:
or $r1 $r14 or $r1 $r14
mov $flags $r1 mov $flags $r1
// transfer context data // transfer context data
call #ctx_xfer call(ctx_xfer)
bra #main bra #main
main_not_ctx_xfer: main_not_ctx_xfer:
shl b32 $r15 $r14 16 shl b32 $r15 $r14 16
or $r15 E_BAD_COMMAND or $r15 E_BAD_COMMAND
call #error call(error)
bra #main bra #main
// interrupt handler // interrupt handler
...@@ -247,22 +235,20 @@ ih: ...@@ -247,22 +235,20 @@ ih:
clear b32 $r0 clear b32 $r0
// incoming fifo command? // incoming fifo command?
iord $r10 I[$r0 + 0x200] // INTR nv_iord($r10, NV_PGRAPH_GPCX_GPCCS_INTR, 0)
and $r11 $r10 0x00000004 and $r11 $r10 NV_PGRAPH_GPCX_GPCCS_INTR_FIFO
bra e #ih_no_fifo bra e #ih_no_fifo
// queue incoming fifo command for later processing // queue incoming fifo command for later processing
mov $r11 0x1900
mov $r13 #cmd_queue mov $r13 #cmd_queue
iord $r14 I[$r11 + 0x100] // FIFO_CMD nv_iord($r14, NV_PGRAPH_GPCX_GPCCS_FIFO_CMD, 0)
iord $r15 I[$r11 + 0x000] // FIFO_DATA nv_iord($r15, NV_PGRAPH_GPCX_GPCCS_FIFO_DATA, 0)
call #queue_put call(queue_put)
add b32 $r11 0x400
mov $r14 1 mov $r14 1
iowr I[$r11 + 0x000] $r14 // FIFO_ACK nv_iowr(NV_PGRAPH_GPCX_GPCCS_FIFO_ACK, 0, $r14)
// ack, and wake up main() // ack, and wake up main()
ih_no_fifo: ih_no_fifo:
iowr I[$r0 + 0x100] $r10 // INTR_ACK nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_ACK, 0, $r10)
pop $r15 pop $r15
pop $r14 pop $r14
...@@ -283,9 +269,7 @@ hub_barrier_done: ...@@ -283,9 +269,7 @@ hub_barrier_done:
mov $r15 1 mov $r15 1
ld b32 $r14 D[$r0 + #gpc_id] ld b32 $r14 D[$r0 + #gpc_id]
shl b32 $r15 $r14 shl b32 $r15 $r14
mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET nv_wr32(0x409418, $r15) // 0x409418 - HUB_BAR_SET
sethi $r14 0x400000
call #nv_wr32
ret ret
// Disables various things, waits a bit, and re-enables them.. // Disables various things, waits a bit, and re-enables them..
...@@ -295,16 +279,15 @@ hub_barrier_done: ...@@ -295,16 +279,15 @@ hub_barrier_done:
// funny things happen. // funny things happen.
// //
ctx_redswitch: ctx_redswitch:
mov $r14 0x614 mov $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_POWER
shl b32 $r14 6 nv_iowr(NV_PGRAPH_GPCX_GPCCS_RED_SWITCH, 0, $r15)
mov $r15 0x020 mov $r14 8
iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
mov $r15 8
ctx_redswitch_delay: ctx_redswitch_delay:
sub b32 $r15 1 sub b32 $r14 1
bra ne #ctx_redswitch_delay bra ne #ctx_redswitch_delay
mov $r15 0xa20 or $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_UNK11
iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER or $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_ENABLE
nv_iowr(NV_PGRAPH_GPCX_GPCCS_RED_SWITCH, 0, $r15)
ret ret
// Transfer GPC context data between GPU and storage area // Transfer GPC context data between GPU and storage area
...@@ -317,46 +300,37 @@ ctx_redswitch: ...@@ -317,46 +300,37 @@ ctx_redswitch:
// //
ctx_xfer: ctx_xfer:
// set context base address // set context base address
mov $r1 0xa04 nv_iowr(NV_PGRAPH_GPCX_GPCCS_MEM_BASE, 0, $r15)
shl b32 $r1 6
iowr I[$r1 + 0x000] $r15// MEM_BASE
bra not $p1 #ctx_xfer_not_load bra not $p1 #ctx_xfer_not_load
call #ctx_redswitch call(ctx_redswitch)
ctx_xfer_not_load: ctx_xfer_not_load:
// strands // strands
mov $r1 0x4afc call(strand_pre)
sethi $r1 0x20000 clear b32 $r2
mov $r2 0xc nv_iowr(NV_PGRAPH_GPCX_GPCCS_STRAND_SELECT, 0x3f, $r2)
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c xbit $r2 $flags $p1 // SAVE/LOAD
call #strand_wait add b32 $r2 NV_PGRAPH_GPCX_GPCCS_STRAND_CMD_SAVE
mov $r2 0x47fc nv_iowr(NV_PGRAPH_GPCX_GPCCS_STRAND_CMD, 0x3f, $r2)
sethi $r2 0x20000
iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
xbit $r2 $flags $p1
add b32 $r2 3
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
// mmio context // mmio context
xbit $r10 $flags $p1 // direction xbit $r10 $flags $p1 // direction
or $r10 2 // first or $r10 2 // first
mov $r11 0x0000 imm32($r11,0x500000)
sethi $r11 0x500000
ld b32 $r12 D[$r0 + #gpc_id] ld b32 $r12 D[$r0 + #gpc_id]
shl b32 $r12 15 shl b32 $r12 15
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
ld b32 $r12 D[$r0 + #gpc_mmio_list_head] ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
mov $r14 0 // not multi mov $r14 0 // not multi
call #mmctx_xfer call(mmctx_xfer)
// per-TPC mmio context // per-TPC mmio context
xbit $r10 $flags $p1 // direction xbit $r10 $flags $p1 // direction
#if !NV_PGRAPH_GPCX_UNK__SIZE #if !NV_PGRAPH_GPCX_UNK__SIZE
or $r10 4 // last or $r10 4 // last
#endif #endif
mov $r11 0x4000 imm32($r11, 0x504000)
sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
ld b32 $r12 D[$r0 + #gpc_id] ld b32 $r12 D[$r0 + #gpc_id]
shl b32 $r12 15 shl b32 $r12 15
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
...@@ -364,14 +338,13 @@ ctx_xfer: ...@@ -364,14 +338,13 @@ ctx_xfer:
ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
ld b32 $r15 D[$r0 + #tpc_mask] ld b32 $r15 D[$r0 + #tpc_mask]
mov $r14 0x800 // stride = 0x800 mov $r14 0x800 // stride = 0x800
call #mmctx_xfer call(mmctx_xfer)
#if NV_PGRAPH_GPCX_UNK__SIZE > 0 #if NV_PGRAPH_GPCX_UNK__SIZE > 0
// per-UNK mmio context // per-UNK mmio context
xbit $r10 $flags $p1 // direction xbit $r10 $flags $p1 // direction
or $r10 4 // last or $r10 4 // last
mov $r11 0x3000 imm32($r11, 0x503000)
sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_UNK0
ld b32 $r12 D[$r0 + #gpc_id] ld b32 $r12 D[$r0 + #gpc_id]
shl b32 $r12 15 shl b32 $r12 15
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_UNK0 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_UNK0
...@@ -379,11 +352,11 @@ ctx_xfer: ...@@ -379,11 +352,11 @@ ctx_xfer:
ld b32 $r13 D[$r0 + #unk_mmio_list_tail] ld b32 $r13 D[$r0 + #unk_mmio_list_tail]
ld b32 $r15 D[$r0 + #unk_mask] ld b32 $r15 D[$r0 + #unk_mask]
mov $r14 0x200 // stride = 0x200 mov $r14 0x200 // stride = 0x200
call #mmctx_xfer call(mmctx_xfer)
#endif #endif
// wait for strands to finish // wait for strands to finish
call #strand_wait call(strand_wait)
// if load, or a save without a load following, do some // if load, or a save without a load following, do some
// unknown stuff that's done after finishing a block of // unknown stuff that's done after finishing a block of
...@@ -391,14 +364,10 @@ ctx_xfer: ...@@ -391,14 +364,10 @@ ctx_xfer:
bra $p1 #ctx_xfer_post bra $p1 #ctx_xfer_post
bra not $p2 #ctx_xfer_done bra not $p2 #ctx_xfer_done
ctx_xfer_post: ctx_xfer_post:
mov $r1 0x4afc call(strand_post)
sethi $r1 0x20000
mov $r2 0xd
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
call #strand_wait
// mark completion in HUB's barrier // mark completion in HUB's barrier
ctx_xfer_done: ctx_xfer_done:
call #hub_barrier_done call(hub_barrier_done)
ret ret
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment