Commit 975700d2 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/radeon: split semaphore and sync object handling v2

Previously we just allocated space for four hardware semaphores
in each software semaphore object. Make software semaphore objects
represent only one hardware semaphore address again by splitting
the sync code into it's own object.

v2: fix typo in comment
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e0602c35
......@@ -80,7 +80,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
radeon_sync.o
# add async DMA block
radeon-y += \
......
......@@ -3994,31 +3994,27 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.blit_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes, control;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
......@@ -4042,12 +4038,12 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......
......@@ -541,31 +541,27 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
......@@ -586,12 +582,12 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......
......@@ -110,31 +110,27 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
......@@ -153,12 +149,12 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......
......@@ -2889,31 +2889,27 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.blit_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes, tmp;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
......@@ -2942,12 +2938,12 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......
......@@ -441,31 +441,27 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
......@@ -484,12 +480,12 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......@@ -150,9 +150,6 @@ extern int radeon_backlight;
/* number of hw syncs before falling back on blocking */
#define RADEON_NUM_SYNCS 4
/* number of hw syncs before falling back on blocking */
#define RADEON_NUM_SYNCS 4
/* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20)
......@@ -576,10 +573,9 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
* Semaphores.
*/
struct radeon_semaphore {
struct radeon_sa_bo *sa_bo;
signed waiters;
uint64_t gpu_addr;
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
struct radeon_sa_bo *sa_bo;
signed waiters;
uint64_t gpu_addr;
};
int radeon_semaphore_create(struct radeon_device *rdev,
......@@ -588,19 +584,31 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
struct radeon_fence *fence);
int radeon_semaphore_sync_resv(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
struct reservation_object *resv,
bool shared);
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
int waiting_ring);
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence);
/*
* Synchronization
*/
struct radeon_sync {
struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
};
void radeon_sync_create(struct radeon_sync *sync);
void radeon_sync_fence(struct radeon_sync *sync,
struct radeon_fence *fence);
int radeon_sync_resv(struct radeon_device *rdev,
struct radeon_sync *sync,
struct reservation_object *resv,
bool shared);
int radeon_sync_rings(struct radeon_device *rdev,
struct radeon_sync *sync,
int waiting_ring);
void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
struct radeon_fence *fence);
/*
* GART structures, functions & helpers
*/
......@@ -818,7 +826,7 @@ struct radeon_ib {
struct radeon_fence *fence;
struct radeon_vm *vm;
bool is_const_ib;
struct radeon_semaphore *semaphore;
struct radeon_sync sync;
};
struct radeon_ring {
......
......@@ -260,8 +260,8 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
continue;
resv = p->relocs[i].robj->tbo.resv;
r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv,
p->relocs[i].tv.shared);
r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
p->relocs[i].tv.shared);
if (r)
break;
......@@ -285,9 +285,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
INIT_LIST_HEAD(&p->validated);
p->idx = 0;
p->ib.sa_bo = NULL;
p->ib.semaphore = NULL;
p->const_ib.sa_bo = NULL;
p->const_ib.semaphore = NULL;
p->chunk_ib_idx = -1;
p->chunk_relocs_idx = -1;
p->chunk_flags_idx = -1;
......@@ -582,7 +580,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
DRM_ERROR("Failed to sync rings: %i\n", r);
goto out;
}
radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
radeon_sync_fence(&parser->ib.sync, vm->fence);
if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
......
......@@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
return r;
}
r = radeon_semaphore_create(rdev, &ib->semaphore);
if (r) {
return r;
}
radeon_sync_create(&ib->sync);
ib->ring = ring;
ib->fence = NULL;
......@@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
*/
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
{
radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
radeon_sync_free(rdev, &ib->sync, ib->fence);
radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
radeon_fence_unref(&ib->fence);
}
......@@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
if (ib->vm) {
struct radeon_fence *vm_id_fence;
vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
radeon_sync_fence(&ib->sync, vm_id_fence);
}
/* sync with other rings */
r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
r = radeon_sync_rings(rdev, &ib->sync, ib->ring);
if (r) {
dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
radeon_ring_unlock_undo(rdev, ring);
......@@ -161,7 +158,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
if (const_ib) {
radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
radeon_sync_free(rdev, &const_ib->sync, NULL);
}
radeon_ring_ib_execute(rdev, ib->ring, ib);
r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
......
......@@ -34,15 +34,14 @@
int radeon_semaphore_create(struct radeon_device *rdev,
struct radeon_semaphore **semaphore)
{
uint64_t *cpu_addr;
int i, r;
int r;
*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
if (*semaphore == NULL) {
return -ENOMEM;
}
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
8 * RADEON_NUM_SYNCS, 8);
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
*semaphore = NULL;
......@@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
(*semaphore)->waiters = 0;
(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
for (i = 0; i < RADEON_NUM_SYNCS; ++i)
cpu_addr[i] = 0;
for (i = 0; i < RADEON_NUM_RINGS; ++i)
(*semaphore)->sync_to[i] = NULL;
*((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
return 0;
}
......@@ -95,146 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
return false;
}
/**
* radeon_semaphore_sync_fence - use the semaphore to sync to a fence
*
* @semaphore: semaphore object to add fence to
* @fence: fence to sync to
*
* Sync to the fence using this semaphore object
*/
void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
struct radeon_fence *fence)
{
struct radeon_fence *other;
if (!fence)
return;
other = semaphore->sync_to[fence->ring];
semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
}
/**
* radeon_semaphore_sync_to - use the semaphore to sync to a reservation object
*
* @sema: semaphore object to add fence from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should onyl sync to the exclusive fence
*
* Sync to the fence using this semaphore object
*/
int radeon_semaphore_sync_resv(struct radeon_device *rdev,
struct radeon_semaphore *sema,
struct reservation_object *resv,
bool shared)
{
struct reservation_object_list *flist;
struct fence *f;
struct radeon_fence *fence;
unsigned i;
int r = 0;
/* always sync to the exclusive fence */
f = reservation_object_get_excl(resv);
fence = f ? to_radeon_fence(f) : NULL;
if (fence && fence->rdev == rdev)
radeon_semaphore_sync_fence(sema, fence);
else if (f)
r = fence_wait(f, true);
flist = reservation_object_get_list(resv);
if (shared || !flist || r)
return r;
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
fence = to_radeon_fence(f);
if (fence && fence->rdev == rdev)
radeon_semaphore_sync_fence(sema, fence);
else
r = fence_wait(f, true);
if (r)
break;
}
return r;
}
/**
* radeon_semaphore_sync_rings - sync ring to all registered fences
*
* @rdev: radeon_device pointer
* @semaphore: semaphore object to use for sync
* @ring: ring that needs sync
*
* Ensure that all registered fences are signaled before letting
* the ring continue. The caller must hold the ring lock.
*/
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
int ring)
{
unsigned count = 0;
int i, r;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = semaphore->sync_to[i];
/* check if we really need to sync */
if (!radeon_fence_need_sync(fence, ring))
continue;
/* prevent GPU deadlocks */
if (!rdev->ring[i].ready) {
dev_err(rdev->dev, "Syncing to a disabled ring!");
return -EINVAL;
}
if (++count > RADEON_NUM_SYNCS) {
/* not enough room, wait manually */
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
/* allocate enough space for sync command */
r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
if (r) {
return r;
}
/* emit the signal semaphore */
if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
/* signaling wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]);
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
/* we assume caller has already allocated space on waiters ring */
if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
/* waiting wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]);
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
radeon_ring_commit(rdev, &rdev->ring[i], false);
radeon_fence_note_sync(fence, ring);
semaphore->gpu_addr += 8;
}
return 0;
}
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence)
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*/
#include <drm/drmP.h>
#include "radeon.h"
#include "radeon_trace.h"
/**
* radeon_sync_create - zero init sync object
*
* @sync: sync object to initialize
*
* Just clear the sync object for now.
*/
void radeon_sync_create(struct radeon_sync *sync)
{
unsigned i;
for (i = 0; i < RADEON_NUM_SYNCS; ++i)
sync->semaphores[i] = NULL;
for (i = 0; i < RADEON_NUM_RINGS; ++i)
sync->sync_to[i] = NULL;
}
/**
* radeon_sync_fence - use the semaphore to sync to a fence
*
* @sync: sync object to add fence to
* @fence: fence to sync to
*
* Sync to the fence using the semaphore objects
*/
void radeon_sync_fence(struct radeon_sync *sync,
struct radeon_fence *fence)
{
struct radeon_fence *other;
if (!fence)
return;
other = sync->sync_to[fence->ring];
sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
}
/**
* radeon_sync_resv - use the semaphores to sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should only sync to the exclusive fence
*
* Sync to the fence using the semaphore objects
*/
int radeon_sync_resv(struct radeon_device *rdev,
struct radeon_sync *sync,
struct reservation_object *resv,
bool shared)
{
struct reservation_object_list *flist;
struct fence *f;
struct radeon_fence *fence;
unsigned i;
int r = 0;
/* always sync to the exclusive fence */
f = reservation_object_get_excl(resv);
fence = f ? to_radeon_fence(f) : NULL;
if (fence && fence->rdev == rdev)
radeon_sync_fence(sync, fence);
else if (f)
r = fence_wait(f, true);
flist = reservation_object_get_list(resv);
if (shared || !flist || r)
return r;
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
fence = to_radeon_fence(f);
if (fence && fence->rdev == rdev)
radeon_sync_fence(sync, fence);
else
r = fence_wait(f, true);
if (r)
break;
}
return r;
}
/**
* radeon_sync_rings - sync ring to all registered fences
*
* @rdev: radeon_device pointer
* @sync: sync object to use
* @ring: ring that needs sync
*
* Ensure that all registered fences are signaled before letting
* the ring continue. The caller must hold the ring lock.
*/
int radeon_sync_rings(struct radeon_device *rdev,
struct radeon_sync *sync,
int ring)
{
unsigned count = 0;
int i, r;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = sync->sync_to[i];
struct radeon_semaphore *semaphore;
/* check if we really need to sync */
if (!radeon_fence_need_sync(fence, ring))
continue;
/* prevent GPU deadlocks */
if (!rdev->ring[i].ready) {
dev_err(rdev->dev, "Syncing to a disabled ring!");
return -EINVAL;
}
if (count >= RADEON_NUM_SYNCS) {
/* not enough room, wait manually */
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
r = radeon_semaphore_create(rdev, &semaphore);
if (r)
return r;
sync->semaphores[count++] = semaphore;
/* allocate enough space for sync command */
r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
if (r)
return r;
/* emit the signal semaphore */
if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
/* signaling wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]);
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
/* we assume caller has already allocated space on waiters ring */
if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
/* waiting wasn't successful wait manually */
radeon_ring_undo(&rdev->ring[i]);
r = radeon_fence_wait(fence, false);
if (r)
return r;
continue;
}
radeon_ring_commit(rdev, &rdev->ring[i], false);
radeon_fence_note_sync(fence, ring);
}
return 0;
}
/**
* radeon_sync_free - free the sync object
*
* @rdev: radeon_device pointer
* @sync: sync object to use
* @fence: fence to use for the free
*
* Free the sync object by freeing all semaphores in it.
*/
void radeon_sync_free(struct radeon_device *rdev,
struct radeon_sync *sync,
struct radeon_fence *fence)
{
unsigned i;
for (i = 0; i < RADEON_NUM_SYNCS; ++i)
radeon_semaphore_free(rdev, &sync->semaphores[i], fence);
}
......@@ -700,7 +700,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
if (ib.length_dw != 0) {
radeon_asic_vm_pad_ib(rdev, &ib);
radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false);
radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, false);
WARN_ON(ib.length_dw > ndw);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r) {
......@@ -826,7 +826,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
unsigned nptes;
uint64_t pte;
radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false);
radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, false);
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
......
......@@ -44,31 +44,27 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
......@@ -87,12 +83,12 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......@@ -226,31 +226,27 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes;
int i, num_loops;
int r = 0;
r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return ERR_PTR(r);
}
radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_semaphore_sync_resv(rdev, sem, resv, false);
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
radeon_sync_resv(rdev, &sync, resv, false);
radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
......@@ -269,12 +265,12 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_semaphore_free(rdev, &sem, NULL);
radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
radeon_semaphore_free(rdev, &sem, fence);
radeon_sync_free(rdev, &sync, fence);
return fence;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment