Commit a636a982 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux into drm-next

Alex writes:
"adds support for the
asynchronous DMA engines on r6xx-SI.  These engines are used
for ttm bo moves and VM page table updates currently.  They
could also be exposed via the CS ioctl for userspace use,
but I haven't had a chance to add proper CS checker patches
for them yet.  These patches have been tested extensively
internally for months, so they should be pretty solid."

* 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux:
  drm/radeon: use DMA engine for VM page table updates on SI
  drm/radeon: add dma engine support for vm pt updates on si (v2)
  drm/radeon: use DMA engine for VM page table updates on cayman/TN
  drm/radeon: add dma engine support for vm pt updates on ni (v5)
  drm/radeon: use async dma for ttm buffer moves on 6xx-SI
  drm/radeon/kms: add support for dma rings to radeon_test_moves()
  drm/radeon/kms: Add initial support for async DMA on SI
  drm/radeon/kms: Add initial support for async DMA on cayman/TN
  drm/radeon/kms: Add initial support for async DMA on evergreen
  drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx
parents 97a875cb bf66a786
This diff is collapsed.
...@@ -905,6 +905,37 @@ ...@@ -905,6 +905,37 @@
# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
# define DC_HPDx_EN (1 << 28) # define DC_HPDx_EN (1 << 28)
/* ASYNC DMA */
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_TILING_CONFIG 0xD0B8
#define CAYMAN_DMA1_CNTL 0xd82c
/* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_SRBM_WRITE 0x9
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
/* PCIE link stuff */ /* PCIE link stuff */
#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */
#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
......
This diff is collapsed.
...@@ -50,6 +50,24 @@ ...@@ -50,6 +50,24 @@
#define VMID(x) (((x) & 0x7) << 0) #define VMID(x) (((x) & 0x7) << 0)
#define SRBM_STATUS 0x0E50 #define SRBM_STATUS 0x0E50
#define SRBM_SOFT_RESET 0x0E60
#define SOFT_RESET_BIF (1 << 1)
#define SOFT_RESET_CG (1 << 2)
#define SOFT_RESET_DC (1 << 5)
#define SOFT_RESET_DMA1 (1 << 6)
#define SOFT_RESET_GRBM (1 << 8)
#define SOFT_RESET_HDP (1 << 9)
#define SOFT_RESET_IH (1 << 10)
#define SOFT_RESET_MC (1 << 11)
#define SOFT_RESET_RLC (1 << 13)
#define SOFT_RESET_ROM (1 << 14)
#define SOFT_RESET_SEM (1 << 15)
#define SOFT_RESET_VMC (1 << 17)
#define SOFT_RESET_DMA (1 << 20)
#define SOFT_RESET_TST (1 << 21)
#define SOFT_RESET_REGBB (1 << 22)
#define SOFT_RESET_ORB (1 << 23)
#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define REQUEST_TYPE(x) (((x) & 0xf) << 0) #define REQUEST_TYPE(x) (((x) & 0xf) << 0)
#define RESPONSE_TYPE_MASK 0x000000F0 #define RESPONSE_TYPE_MASK 0x000000F0
...@@ -599,5 +617,62 @@ ...@@ -599,5 +617,62 @@
#define PACKET3_SET_APPEND_CNT 0x75 #define PACKET3_SET_APPEND_CNT 0x75
#define PACKET3_ME_WRITE 0x7A #define PACKET3_ME_WRITE 0x7A
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
# define CMD_VMID_FORCE (1 << 31)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_STATUS_REG 0xd034
# define DMA_IDLE (1 << 0)
#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
#define DMA_TILING_CONFIG 0xd0b8
#define DMA_MODE 0xd0bc
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0))
#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
(((vmid) & 0xF) << 20) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_SRBM_WRITE 0x9
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#endif #endif
This diff is collapsed.
...@@ -590,9 +590,59 @@ ...@@ -590,9 +590,59 @@
#define WAIT_2D_IDLECLEAN_bit (1 << 16) #define WAIT_2D_IDLECLEAN_bit (1 << 16)
#define WAIT_3D_IDLECLEAN_bit (1 << 17) #define WAIT_3D_IDLECLEAN_bit (1 << 17)
/* async DMA */
#define DMA_TILING_CONFIG 0x3ec4
#define DMA_CONFIG 0x3e4c
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_STATUS_REG 0xd034
# define DMA_IDLE (1 << 0)
#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
#define DMA_MODE 0xd0bc
/* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
#define DMA_PACKET_NOP 0xf
#define IH_RB_CNTL 0x3e00 #define IH_RB_CNTL 0x3e00
# define IH_RB_ENABLE (1 << 0) # define IH_RB_ENABLE (1 << 0)
# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ # define IH_RB_SIZE(x) ((x) << 1) /* log2 */
# define IH_RB_FULL_DRAIN_ENABLE (1 << 6) # define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
# define IH_WPTR_WRITEBACK_ENABLE (1 << 8) # define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ # define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
...@@ -637,7 +687,9 @@ ...@@ -637,7 +687,9 @@
#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 #define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
#define SRBM_SOFT_RESET 0xe60 #define SRBM_SOFT_RESET 0xe60
# define SOFT_RESET_DMA (1 << 12)
# define SOFT_RESET_RLC (1 << 13) # define SOFT_RESET_RLC (1 << 13)
# define RV770_SOFT_RESET_DMA (1 << 20)
#define CP_INT_CNTL 0xc124 #define CP_INT_CNTL 0xc124
# define CNTX_BUSY_INT_ENABLE (1 << 19) # define CNTX_BUSY_INT_ENABLE (1 << 19)
......
...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; ...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8 #define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */ /* max number of rings */
#define RADEON_NUM_RINGS 3 #define RADEON_NUM_RINGS 5
/* fence seq are set to this number when signaled */ /* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL #define RADEON_FENCE_SIGNALED_SEQ 0LL
...@@ -122,6 +122,11 @@ extern int radeon_lockup_timeout; ...@@ -122,6 +122,11 @@ extern int radeon_lockup_timeout;
#define CAYMAN_RING_TYPE_CP1_INDEX 1 #define CAYMAN_RING_TYPE_CP1_INDEX 1
#define CAYMAN_RING_TYPE_CP2_INDEX 2 #define CAYMAN_RING_TYPE_CP2_INDEX 2
/* R600+ has an async dma ring */
#define R600_RING_TYPE_DMA_INDEX 3
/* cayman add a second async dma ring */
#define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* hardcode those limit for now */ /* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20)
...@@ -787,6 +792,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne ...@@ -787,6 +792,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
/* r600 async dma */
void r600_dma_stop(struct radeon_device *rdev);
int r600_dma_resume(struct radeon_device *rdev);
void r600_dma_fini(struct radeon_device *rdev);
void cayman_dma_stop(struct radeon_device *rdev);
int cayman_dma_resume(struct radeon_device *rdev);
void cayman_dma_fini(struct radeon_device *rdev);
/* /*
* CS. * CS.
*/ */
...@@ -883,7 +897,9 @@ struct radeon_wb { ...@@ -883,7 +897,9 @@ struct radeon_wb {
#define RADEON_WB_CP_RPTR_OFFSET 1024 #define RADEON_WB_CP_RPTR_OFFSET 1024
#define RADEON_WB_CP1_RPTR_OFFSET 1280 #define RADEON_WB_CP1_RPTR_OFFSET 1280
#define RADEON_WB_CP2_RPTR_OFFSET 1536 #define RADEON_WB_CP2_RPTR_OFFSET 1536
#define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048 #define R600_WB_IH_WPTR_OFFSET 2048
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_EVENT_OFFSET 3072 #define R600_WB_EVENT_OFFSET 3072
/** /**
......
This diff is collapsed.
...@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, ...@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *cp, struct radeon_ring *cp,
struct radeon_semaphore *semaphore, struct radeon_semaphore *semaphore,
bool emit_wait); bool emit_wait);
void r600_dma_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait);
void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_asic_reset(struct radeon_device *rdev); int r600_asic_reset(struct radeon_device *rdev);
int r600_set_surface_reg(struct radeon_device *rdev, int reg, int r600_set_surface_reg(struct radeon_device *rdev, int reg,
...@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg, ...@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t offset, uint32_t obj_size); uint32_t offset, uint32_t obj_size);
void r600_clear_surface_reg(struct radeon_device *rdev, int reg); void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_copy_blit(struct radeon_device *rdev, int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset, uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence); unsigned num_gpu_pages, struct radeon_fence **fence);
int r600_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence);
void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_init(struct radeon_device *rdev);
void r600_hpd_fini(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev);
bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
...@@ -428,6 +441,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc); ...@@ -428,6 +441,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
void evergreen_disable_interrupt_state(struct radeon_device *rdev); void evergreen_disable_interrupt_state(struct radeon_device *rdev);
int evergreen_blit_init(struct radeon_device *rdev); int evergreen_blit_init(struct radeon_device *rdev);
int evergreen_mc_wait_for_idle(struct radeon_device *rdev); int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
int evergreen_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages,
struct radeon_fence **fence);
/* /*
* cayman * cayman
...@@ -449,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -449,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags); uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
/* DCE6 - SI */ /* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev); void dce6_bandwidth_update(struct radeon_device *rdev);
...@@ -476,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -476,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
uint64_t si_get_gpu_clock(struct radeon_device *rdev); uint64_t si_get_gpu_clock(struct radeon_device *rdev);
int si_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages,
struct radeon_fence **fence);
void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
#endif #endif
...@@ -26,16 +26,31 @@ ...@@ -26,16 +26,31 @@
#include "radeon_reg.h" #include "radeon_reg.h"
#include "radeon.h" #include "radeon.h"
#define RADEON_TEST_COPY_BLIT 1
#define RADEON_TEST_COPY_DMA 0
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
void radeon_test_moves(struct radeon_device *rdev) static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
{ {
struct radeon_bo *vram_obj = NULL; struct radeon_bo *vram_obj = NULL;
struct radeon_bo **gtt_obj = NULL; struct radeon_bo **gtt_obj = NULL;
struct radeon_fence *fence = NULL; struct radeon_fence *fence = NULL;
uint64_t gtt_addr, vram_addr; uint64_t gtt_addr, vram_addr;
unsigned i, n, size; unsigned i, n, size;
int r; int r, ring;
switch (flag) {
case RADEON_TEST_COPY_DMA:
ring = radeon_copy_dma_ring_index(rdev);
break;
case RADEON_TEST_COPY_BLIT:
ring = radeon_copy_blit_ring_index(rdev);
break;
default:
DRM_ERROR("Unknown copy method\n");
return;
}
size = 1024 * 1024; size = 1024 * 1024;
...@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev) ...@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]); radeon_bo_kunmap(gtt_obj[i]);
r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (ring == R600_RING_TYPE_DMA_INDEX)
r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
else
r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) { if (r) {
DRM_ERROR("Failed GTT->VRAM copy %d\n", i); DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
goto out_cleanup; goto out_cleanup;
...@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev) ...@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj); radeon_bo_kunmap(vram_obj);
r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (ring == R600_RING_TYPE_DMA_INDEX)
r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
else
r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) { if (r) {
DRM_ERROR("Failed VRAM->GTT copy %d\n", i); DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
goto out_cleanup; goto out_cleanup;
...@@ -223,6 +244,14 @@ void radeon_test_moves(struct radeon_device *rdev) ...@@ -223,6 +244,14 @@ void radeon_test_moves(struct radeon_device *rdev)
} }
} }
void radeon_test_moves(struct radeon_device *rdev)
{
if (rdev->asic->copy.dma)
radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
if (rdev->asic->copy.blit)
radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
}
void radeon_test_ring_sync(struct radeon_device *rdev, void radeon_test_ring_sync(struct radeon_device *rdev,
struct radeon_ring *ringA, struct radeon_ring *ringA,
struct radeon_ring *ringB) struct radeon_ring *ringB)
......
...@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev) ...@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0); WREG32(SCRATCH_UMSK, 0);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
} }
static int rv770_cp_load_microcode(struct radeon_device *rdev) static int rv770_cp_load_microcode(struct radeon_device *rdev)
...@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev) ...@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, gb_tiling_config); WREG32(GB_TILING_CONFIG, gb_tiling_config);
WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
WREG32(CGTS_SYS_TCC_DISABLE, 0); WREG32(CGTS_SYS_TCC_DISABLE, 0);
WREG32(CGTS_TCC_DISABLE, 0); WREG32(CGTS_TCC_DISABLE, 0);
...@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev) ...@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev)
static int rv770_startup(struct radeon_device *rdev) static int rv770_startup(struct radeon_device *rdev)
{ {
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; struct radeon_ring *ring;
int r; int r;
/* enable pcie gen2 link */ /* enable pcie gen2 link */
...@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev) ...@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev)
return r; return r;
} }
r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}
/* Enable IRQ */ /* Enable IRQ */
r = r600_irq_init(rdev); r = r600_irq_init(rdev);
if (r) { if (r) {
...@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev) ...@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev)
} }
r600_irq_set(rdev); r600_irq_set(rdev);
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR, R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2); 0, 0xfffff, RADEON_CP_PACKET2);
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
DMA_RB_RPTR, DMA_RB_WPTR,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
r = rv770_cp_load_microcode(rdev); r = rv770_cp_load_microcode(rdev);
if (r) if (r)
return r; return r;
...@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev) ...@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
r = r600_dma_resume(rdev);
if (r)
return r;
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
...@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev) ...@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev)
{ {
r600_audio_fini(rdev); r600_audio_fini(rdev);
r700_cp_stop(rdev); r700_cp_stop(rdev);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; r600_dma_stop(rdev);
r600_irq_suspend(rdev); r600_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
rv770_pcie_gart_disable(rdev); rv770_pcie_gart_disable(rdev);
...@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev) ...@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
...@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev) ...@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev)
if (r) { if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n"); dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev); r700_cp_fini(rdev);
r600_dma_fini(rdev);
r600_irq_fini(rdev); r600_irq_fini(rdev);
radeon_wb_fini(rdev); radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
...@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev) ...@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev)
{ {
r600_blit_fini(rdev); r600_blit_fini(rdev);
r700_cp_fini(rdev); r700_cp_fini(rdev);
r600_dma_fini(rdev);
r600_irq_fini(rdev); r600_irq_fini(rdev);
radeon_wb_fini(rdev); radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
......
...@@ -109,6 +109,9 @@ ...@@ -109,6 +109,9 @@
#define PIPE_TILING__SHIFT 1 #define PIPE_TILING__SHIFT 1
#define PIPE_TILING__MASK 0x0000000e #define PIPE_TILING__MASK 0x0000000e
#define DMA_TILING_CONFIG 0x3ec8
#define DMA_TILING_CONFIG2 0xd0b8
#define GC_USER_SHADER_PIPE_CONFIG 0x8954 #define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8) #define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00 #define INACTIVE_QD_PIPES_MASK 0x0000FF00
...@@ -358,6 +361,26 @@ ...@@ -358,6 +361,26 @@
#define WAIT_UNTIL 0x8040 #define WAIT_UNTIL 0x8040
/* async DMA */
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
/* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#define SRBM_STATUS 0x0E50 #define SRBM_STATUS 0x0E50
/* DCE 3.2 HDMI */ /* DCE 3.2 HDMI */
......
This diff is collapsed.
...@@ -936,4 +936,61 @@ ...@@ -936,4 +936,61 @@
#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A #define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
#define PACKET3_SWITCH_BUFFER 0x8B #define PACKET3_SWITCH_BUFFER 0x8B
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_TILING_CONFIG 0xd0b8
#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
(((b) & 0x1) << 26) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0))
#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
(((vmid) & 0xF) << 20) | \
(((n) & 0xFFFFF) << 0))
#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
(1 << 26) | \
(1 << 21) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_SRBM_WRITE 0x9
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment