Commit deab48f1 authored by Alex Deucher's avatar Alex Deucher

drm/radeon: add dma engine support for vm pt updates on si (v2)

Async DMA has a special packet for contiguous pt updates
which saves overhead.

v2: rebase
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 33e54678
...@@ -2825,9 +2825,12 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -2825,9 +2825,12 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
{ {
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
uint64_t value;
unsigned ndw;
if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
while (count) { while (count) {
unsigned ndw = 2 + count * 2; ndw = 2 + count * 2;
if (ndw > 0x3FFE) if (ndw > 0x3FFE)
ndw = 0x3FFE; ndw = 0x3FFE;
...@@ -2837,20 +2840,73 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -2837,20 +2840,73 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
radeon_ring_write(ring, pe); radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe)); radeon_ring_write(ring, upper_32_bits(pe));
for (; ndw > 2; ndw -= 2, --count, pe += 8) { for (; ndw > 2; ndw -= 2, --count, pe += 8) {
uint64_t value;
if (flags & RADEON_VM_PAGE_SYSTEM) { if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr); value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL; value &= 0xFFFFFFFFFFFFF000ULL;
} else if (flags & RADEON_VM_PAGE_VALID) } else if (flags & RADEON_VM_PAGE_VALID) {
value = addr; value = addr;
else } else {
value = 0; value = 0;
}
addr += incr; addr += incr;
value |= r600_flags; value |= r600_flags;
radeon_ring_write(ring, value); radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value)); radeon_ring_write(ring, upper_32_bits(value));
} }
} }
} else {
/* DMA */
if (flags & RADEON_VM_PAGE_SYSTEM) {
while (count) {
ndw = count * 2;
if (ndw > 0xFFFFE)
ndw = 0xFFFFE;
/* for non-physically contiguous pages (system) */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL;
} else if (flags & RADEON_VM_PAGE_VALID) {
value = addr;
} else {
value = 0;
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
}
}
} else {
while (count) {
ndw = count * 2;
if (ndw > 0xFFFFE)
ndw = 0xFFFFE;
if (flags & RADEON_VM_PAGE_VALID)
value = addr;
else
value = 0;
/* for physically contiguous pages (vram) */
radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
radeon_ring_write(ring, pe); /* dst addr */
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
radeon_ring_write(ring, r600_flags); /* mask */
radeon_ring_write(ring, 0);
radeon_ring_write(ring, value); /* value */
radeon_ring_write(ring, upper_32_bits(value));
radeon_ring_write(ring, incr); /* increment size */
radeon_ring_write(ring, 0);
pe += ndw * 4;
addr += (ndw / 2) * incr;
count -= ndw / 2;
}
}
}
} }
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
......
...@@ -972,6 +972,16 @@ ...@@ -972,6 +972,16 @@
(((t) & 0x1) << 23) | \ (((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \ (((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0)) (((n) & 0xFFFFF) << 0))
#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
(((vmid) & 0xF) << 20) | \
(((n) & 0xFFFFF) << 0))
#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
(1 << 26) | \
(1 << 21) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */ /* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3 #define DMA_PACKET_COPY 0x3
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment