Commit e4b35f95 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-fixes-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-fixes

radeon and amdgpu fixes for 4.3.  It's a bit bigger than usual since
it's 3 weeks worth of fixes since I was on vacation, then at XDC.
- lots of stability fixes
- suspend and resume fixes
- GPU scheduler fixes
- Misc other fixes

* 'drm-fixes-4.3' of git://people.freedesktop.org/~agd5f/linux: (31 commits)
  drm/radeon: add quirk for MSI R7 370
  drm/amdgpu: Sprinkle drm_modeset_lock_all to appease locking checks
  drm/radeon: Sprinkle drm_modeset_lock_all to appease locking checks
  drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
  drm/amdgpu: integer overflow in amdgpu_mode_dumb_create()
  drm/amdgpu: info leak in amdgpu_gem_metadata_ioctl()
  drm/amdgpu: integer overflow in amdgpu_info_ioctl()
  drm/amdgpu: unwind properly in amdgpu_cs_parser_init()
  drm/amdgpu: Fix max_vblank_count value for current display engines
  drm/amdgpu: use kmemdup rather than duplicating its implementation
  drm/amdgpu: fix UVD suspend and resume for VI APU
  drm/amdgpu: fix the UVD suspend sequence order
  drm/amdgpu: make UVD handle checking more strict
  drm/amdgpu: Disable UVD PG
  drm/amdgpu: more scheduler cleanups v2
  drm/amdgpu: cleanup fence queue init v2
  drm/amdgpu: rename fence->scheduler to sched v2
  drm/amdgpu: cleanup entity init
  drm/amdgpu: refine the scheduler job type conversion
  drm/amdgpu: refine the job naming for amdgpu_job and amdgpu_sched_job
  ...
parents 14d11b8d e7865479
...@@ -82,6 +82,7 @@ extern int amdgpu_vm_block_size; ...@@ -82,6 +82,7 @@ extern int amdgpu_vm_block_size;
extern int amdgpu_enable_scheduler; extern int amdgpu_enable_scheduler;
extern int amdgpu_sched_jobs; extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission; extern int amdgpu_sched_hw_submission;
extern int amdgpu_enable_semaphores;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
...@@ -432,7 +433,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev); ...@@ -432,7 +433,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, struct amdgpu_irq_src *irq_src,
unsigned irq_type); unsigned irq_type);
...@@ -890,7 +891,7 @@ struct amdgpu_ring { ...@@ -890,7 +891,7 @@ struct amdgpu_ring {
struct amdgpu_device *adev; struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs; const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv; struct amdgpu_fence_driver fence_drv;
struct amd_gpu_scheduler *scheduler; struct amd_gpu_scheduler sched;
spinlock_t fence_lock; spinlock_t fence_lock;
struct mutex *ring_lock; struct mutex *ring_lock;
...@@ -1201,8 +1202,6 @@ struct amdgpu_gfx { ...@@ -1201,8 +1202,6 @@ struct amdgpu_gfx {
struct amdgpu_irq_src priv_inst_irq; struct amdgpu_irq_src priv_inst_irq;
/* gfx status */ /* gfx status */
uint32_t gfx_current_status; uint32_t gfx_current_status;
/* sync signal for const engine */
unsigned ce_sync_offs;
/* ce ram size*/ /* ce ram size*/
unsigned ce_ram_size; unsigned ce_ram_size;
}; };
...@@ -1274,8 +1273,10 @@ struct amdgpu_job { ...@@ -1274,8 +1273,10 @@ struct amdgpu_job {
uint32_t num_ibs; uint32_t num_ibs;
struct mutex job_lock; struct mutex job_lock;
struct amdgpu_user_fence uf; struct amdgpu_user_fence uf;
int (*free_job)(struct amdgpu_job *sched_job); int (*free_job)(struct amdgpu_job *job);
}; };
#define to_amdgpu_job(sched_job) \
container_of((sched_job), struct amdgpu_job, base)
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
{ {
......
...@@ -183,7 +183,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, ...@@ -183,7 +183,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
return -ENOMEM; return -ENOMEM;
r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo); AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
if (r) { if (r) {
dev_err(rdev->dev, dev_err(rdev->dev,
"failed to allocate BO for amdkfd (%d)\n", r); "failed to allocate BO for amdkfd (%d)\n", r);
......
...@@ -79,7 +79,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, ...@@ -79,7 +79,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
int time; int time;
n = AMDGPU_BENCHMARK_ITERATIONS; n = AMDGPU_BENCHMARK_ITERATIONS;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, &sobj); r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
NULL, &sobj);
if (r) { if (r) {
goto out_cleanup; goto out_cleanup;
} }
...@@ -91,7 +92,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, ...@@ -91,7 +92,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
if (r) { if (r) {
goto out_cleanup; goto out_cleanup;
} }
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, &dobj); r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
NULL, &dobj);
if (r) { if (r) {
goto out_cleanup; goto out_cleanup;
} }
......
...@@ -86,7 +86,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, ...@@ -86,7 +86,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages); struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages);
ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo); AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo);
if (ret) if (ret)
return ret; return ret;
ret = amdgpu_bo_reserve(bo, false); ret = amdgpu_bo_reserve(bo, false);
...@@ -197,7 +197,8 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, ...@@ -197,7 +197,8 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE,
true, domain, flags, true, domain, flags,
NULL, &placement, &obj); NULL, &placement, NULL,
&obj);
if (ret) { if (ret) {
DRM_ERROR("(%d) bo create failed\n", ret); DRM_ERROR("(%d) bo create failed\n", ret);
return ret; return ret;
......
...@@ -154,42 +154,41 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -154,42 +154,41 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{ {
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user; uint64_t *chunk_array_user;
uint64_t *chunk_array = NULL; uint64_t *chunk_array;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned size, i; unsigned size, i;
int r = 0; int ret;
if (!cs->in.num_chunks) if (cs->in.num_chunks == 0)
goto out; return 0;
chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
if (!p->ctx) { if (!p->ctx) {
r = -EINVAL; ret = -EINVAL;
goto out; goto free_chunk;
} }
p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
/* get chunks */ /* get chunks */
INIT_LIST_HEAD(&p->validated); INIT_LIST_HEAD(&p->validated);
chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (chunk_array == NULL) {
r = -ENOMEM;
goto out;
}
chunk_array_user = (uint64_t __user *)(cs->in.chunks); chunk_array_user = (uint64_t __user *)(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user, if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) { sizeof(uint64_t)*cs->in.num_chunks)) {
r = -EFAULT; ret = -EFAULT;
goto out; goto put_bo_list;
} }
p->nchunks = cs->in.num_chunks; p->nchunks = cs->in.num_chunks;
p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL); GFP_KERNEL);
if (p->chunks == NULL) { if (!p->chunks) {
r = -ENOMEM; ret = -ENOMEM;
goto out; goto put_bo_list;
} }
for (i = 0; i < p->nchunks; i++) { for (i = 0; i < p->nchunks; i++) {
...@@ -200,8 +199,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -200,8 +199,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
chunk_ptr = (void __user *)chunk_array[i]; chunk_ptr = (void __user *)chunk_array[i];
if (copy_from_user(&user_chunk, chunk_ptr, if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) { sizeof(struct drm_amdgpu_cs_chunk))) {
r = -EFAULT; ret = -EFAULT;
goto out; i--;
goto free_partial_kdata;
} }
p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].chunk_id = user_chunk.chunk_id;
p->chunks[i].length_dw = user_chunk.length_dw; p->chunks[i].length_dw = user_chunk.length_dw;
...@@ -212,13 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -212,13 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
if (p->chunks[i].kdata == NULL) { if (p->chunks[i].kdata == NULL) {
r = -ENOMEM; ret = -ENOMEM;
goto out; i--;
goto free_partial_kdata;
} }
size *= sizeof(uint32_t); size *= sizeof(uint32_t);
if (copy_from_user(p->chunks[i].kdata, cdata, size)) { if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
r = -EFAULT; ret = -EFAULT;
goto out; goto free_partial_kdata;
} }
switch (p->chunks[i].chunk_id) { switch (p->chunks[i].chunk_id) {
...@@ -238,15 +239,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -238,15 +239,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
gobj = drm_gem_object_lookup(p->adev->ddev, gobj = drm_gem_object_lookup(p->adev->ddev,
p->filp, handle); p->filp, handle);
if (gobj == NULL) { if (gobj == NULL) {
r = -EINVAL; ret = -EINVAL;
goto out; goto free_partial_kdata;
} }
p->uf.bo = gem_to_amdgpu_bo(gobj); p->uf.bo = gem_to_amdgpu_bo(gobj);
p->uf.offset = fence_data->offset; p->uf.offset = fence_data->offset;
} else { } else {
r = -EINVAL; ret = -EINVAL;
goto out; goto free_partial_kdata;
} }
break; break;
...@@ -254,19 +255,35 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -254,19 +255,35 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break; break;
default: default:
r = -EINVAL; ret = -EINVAL;
goto out; goto free_partial_kdata;
} }
} }
p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!p->ibs) if (!p->ibs) {
r = -ENOMEM; ret = -ENOMEM;
goto free_all_kdata;
}
out:
kfree(chunk_array); kfree(chunk_array);
return r; return 0;
free_all_kdata:
i = p->nchunks - 1;
free_partial_kdata:
for (; i >= 0; i--)
drm_free_large(p->chunks[i].kdata);
kfree(p->chunks);
put_bo_list:
if (p->bo_list)
amdgpu_bo_list_put(p->bo_list);
amdgpu_ctx_put(p->ctx);
free_chunk:
kfree(chunk_array);
return ret;
} }
/* Returns how many bytes TTM can move per IB. /* Returns how many bytes TTM can move per IB.
...@@ -321,25 +338,17 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) ...@@ -321,25 +338,17 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
return max(bytes_moved_threshold, 1024*1024ull); return max(bytes_moved_threshold, 1024*1024ull);
} }
int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p) int amdgpu_cs_list_validate(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct list_head *validated)
{ {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_device *adev = p->adev;
struct amdgpu_bo_list_entry *lobj; struct amdgpu_bo_list_entry *lobj;
struct list_head duplicates;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
int r; int r;
INIT_LIST_HEAD(&duplicates); list_for_each_entry(lobj, validated, tv.head) {
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
if (unlikely(r != 0)) {
return r;
}
list_for_each_entry(lobj, &p->validated, tv.head) {
bo = lobj->robj; bo = lobj->robj;
if (!bo->pin_count) { if (!bo->pin_count) {
u32 domain = lobj->prefered_domains; u32 domain = lobj->prefered_domains;
...@@ -373,7 +382,6 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p) ...@@ -373,7 +382,6 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
domain = lobj->allowed_domains; domain = lobj->allowed_domains;
goto retry; goto retry;
} }
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
return r; return r;
} }
} }
...@@ -386,6 +394,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) ...@@ -386,6 +394,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
{ {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_cs_buckets buckets; struct amdgpu_cs_buckets buckets;
struct list_head duplicates;
bool need_mmap_lock = false; bool need_mmap_lock = false;
int i, r; int i, r;
...@@ -405,8 +414,22 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) ...@@ -405,8 +414,22 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
if (need_mmap_lock) if (need_mmap_lock)
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
r = amdgpu_cs_list_validate(p); INIT_LIST_HEAD(&duplicates);
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
if (unlikely(r != 0))
goto error_reserve;
r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
if (r)
goto error_validate;
r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates);
error_validate:
if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
error_reserve:
if (need_mmap_lock) if (need_mmap_lock)
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
...@@ -772,15 +795,15 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, ...@@ -772,15 +795,15 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return 0; return 0;
} }
static int amdgpu_cs_free_job(struct amdgpu_job *sched_job) static int amdgpu_cs_free_job(struct amdgpu_job *job)
{ {
int i; int i;
if (sched_job->ibs) if (job->ibs)
for (i = 0; i < sched_job->num_ibs; i++) for (i = 0; i < job->num_ibs; i++)
amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); amdgpu_ib_free(job->adev, &job->ibs[i]);
kfree(sched_job->ibs); kfree(job->ibs);
if (sched_job->uf.bo) if (job->uf.bo)
drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base); drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
return 0; return 0;
} }
...@@ -804,7 +827,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -804,7 +827,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(parser, data); r = amdgpu_cs_parser_init(parser, data);
if (r) { if (r) {
DRM_ERROR("Failed to initialize parser !\n"); DRM_ERROR("Failed to initialize parser !\n");
amdgpu_cs_parser_fini(parser, r, false); kfree(parser);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
...@@ -842,7 +865,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -842,7 +865,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job) if (!job)
return -ENOMEM; return -ENOMEM;
job->base.sched = ring->scheduler; job->base.sched = &ring->sched;
job->base.s_entity = &parser->ctx->rings[ring->idx].entity; job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
job->adev = parser->adev; job->adev = parser->adev;
job->ibs = parser->ibs; job->ibs = parser->ibs;
...@@ -857,7 +880,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -857,7 +880,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
job->free_job = amdgpu_cs_free_job; job->free_job = amdgpu_cs_free_job;
mutex_lock(&job->job_lock); mutex_lock(&job->job_lock);
r = amd_sched_entity_push_job((struct amd_sched_job *)job); r = amd_sched_entity_push_job(&job->base);
if (r) { if (r) {
mutex_unlock(&job->job_lock); mutex_unlock(&job->job_lock);
amdgpu_cs_free_job(job); amdgpu_cs_free_job(job);
......
...@@ -43,10 +43,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, ...@@ -43,10 +43,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
for (i = 0; i < adev->num_rings; i++) { for (i = 0; i < adev->num_rings; i++) {
struct amd_sched_rq *rq; struct amd_sched_rq *rq;
if (kernel) if (kernel)
rq = &adev->rings[i]->scheduler->kernel_rq; rq = &adev->rings[i]->sched.kernel_rq;
else else
rq = &adev->rings[i]->scheduler->sched_rq; rq = &adev->rings[i]->sched.sched_rq;
r = amd_sched_entity_init(adev->rings[i]->scheduler, r = amd_sched_entity_init(&adev->rings[i]->sched,
&ctx->rings[i].entity, &ctx->rings[i].entity,
rq, amdgpu_sched_jobs); rq, amdgpu_sched_jobs);
if (r) if (r)
...@@ -55,7 +55,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, ...@@ -55,7 +55,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
if (i < adev->num_rings) { if (i < adev->num_rings) {
for (j = 0; j < i; j++) for (j = 0; j < i; j++)
amd_sched_entity_fini(adev->rings[j]->scheduler, amd_sched_entity_fini(&adev->rings[j]->sched,
&ctx->rings[j].entity); &ctx->rings[j].entity);
kfree(ctx); kfree(ctx);
return r; return r;
...@@ -75,7 +75,7 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) ...@@ -75,7 +75,7 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
if (amdgpu_enable_scheduler) { if (amdgpu_enable_scheduler) {
for (i = 0; i < adev->num_rings; i++) for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(adev->rings[i]->scheduler, amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity); &ctx->rings[i].entity);
} }
} }
......
...@@ -246,7 +246,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) ...@@ -246,7 +246,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE, r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE,
PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->vram_scratch.robj); NULL, NULL, &adev->vram_scratch.robj);
if (r) { if (r) {
return r; return r;
} }
...@@ -449,7 +449,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) ...@@ -449,7 +449,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
if (adev->wb.wb_obj == NULL) { if (adev->wb.wb_obj == NULL) {
r = amdgpu_bo_create(adev, AMDGPU_MAX_WB * 4, PAGE_SIZE, true, r = amdgpu_bo_create(adev, AMDGPU_MAX_WB * 4, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, &adev->wb.wb_obj); AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->wb.wb_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create WB bo failed\n", r); dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
return r; return r;
...@@ -1650,9 +1651,11 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1650,9 +1651,11 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
drm_kms_helper_poll_disable(dev); drm_kms_helper_poll_disable(dev);
/* turn off display hw */ /* turn off display hw */
drm_modeset_lock_all(dev);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} }
drm_modeset_unlock_all(dev);
/* unpin the front buffers */ /* unpin the front buffers */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
...@@ -1747,9 +1750,11 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) ...@@ -1747,9 +1750,11 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
if (fbcon) { if (fbcon) {
drm_helper_resume_force_mode(dev); drm_helper_resume_force_mode(dev);
/* turn on display hw */ /* turn on display hw */
drm_modeset_lock_all(dev);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
} }
drm_modeset_unlock_all(dev);
} }
drm_kms_helper_poll_enable(dev); drm_kms_helper_poll_enable(dev);
......
...@@ -79,6 +79,7 @@ int amdgpu_exp_hw_support = 0; ...@@ -79,6 +79,7 @@ int amdgpu_exp_hw_support = 0;
int amdgpu_enable_scheduler = 0; int amdgpu_enable_scheduler = 0;
int amdgpu_sched_jobs = 16; int amdgpu_sched_jobs = 16;
int amdgpu_sched_hw_submission = 2; int amdgpu_sched_hw_submission = 2;
int amdgpu_enable_semaphores = 1;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
...@@ -152,6 +153,9 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); ...@@ -152,6 +153,9 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable (default), 0 = disable)");
module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
static struct pci_device_id pciidlist[] = { static struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK #ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */ /* Kaveri */
......
...@@ -609,9 +609,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ...@@ -609,9 +609,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* Init the fence driver for the requested ring (all asics). * Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init(). * Helper function for amdgpu_fence_driver_init().
*/ */
void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{ {
int i; int i, r;
ring->fence_drv.cpu_addr = NULL; ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0; ring->fence_drv.gpu_addr = 0;
...@@ -625,15 +625,19 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ...@@ -625,15 +625,19 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
amdgpu_fence_check_lockup); amdgpu_fence_check_lockup);
ring->fence_drv.ring = ring; ring->fence_drv.ring = ring;
init_waitqueue_head(&ring->fence_drv.fence_queue);
if (amdgpu_enable_scheduler) { if (amdgpu_enable_scheduler) {
ring->scheduler = amd_sched_create(&amdgpu_sched_ops, r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
ring->idx, amdgpu_sched_hw_submission, ring->name);
amdgpu_sched_hw_submission, if (r) {
(void *)ring->adev); DRM_ERROR("Failed to create scheduler on ring %s.\n",
if (!ring->scheduler) ring->name);
DRM_ERROR("Failed to create scheduler on ring %d.\n", return r;
ring->idx);
} }
}
return 0;
} }
/** /**
...@@ -681,8 +685,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) ...@@ -681,8 +685,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
wake_up_all(&ring->fence_drv.fence_queue); wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src, amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type); ring->fence_drv.irq_type);
if (ring->scheduler) amd_sched_fini(&ring->sched);
amd_sched_destroy(ring->scheduler);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
} }
mutex_unlock(&adev->ring_lock); mutex_unlock(&adev->ring_lock);
......
...@@ -127,7 +127,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) ...@@ -127,7 +127,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, adev->gart.table_size, r = amdgpu_bo_create(adev, adev->gart.table_size,
PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->gart.robj); NULL, NULL, &adev->gart.robj);
if (r) { if (r) {
return r; return r;
} }
......
...@@ -69,7 +69,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, ...@@ -69,7 +69,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
} }
} }
retry: retry:
r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, flags, NULL, &robj); r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
flags, NULL, NULL, &robj);
if (r) { if (r) {
if (r != -ERESTARTSYS) { if (r != -ERESTARTSYS) {
if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
...@@ -426,6 +427,10 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, ...@@ -426,6 +427,10 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
&args->data.data_size_bytes, &args->data.data_size_bytes,
&args->data.flags); &args->data.flags);
} else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) { } else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) {
if (args->data.data_size_bytes > sizeof(args->data.data)) {
r = -EINVAL;
goto unreserve;
}
r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info); r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info);
if (!r) if (!r)
r = amdgpu_bo_set_metadata(robj, args->data.data, r = amdgpu_bo_set_metadata(robj, args->data.data,
...@@ -433,6 +438,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, ...@@ -433,6 +438,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
args->data.flags); args->data.flags);
} }
unreserve:
amdgpu_bo_unreserve(robj); amdgpu_bo_unreserve(robj);
out: out:
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
...@@ -454,11 +460,12 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -454,11 +460,12 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct ttm_validate_buffer tv, *entry; struct ttm_validate_buffer tv, *entry;
struct amdgpu_bo_list_entry *vm_bos; struct amdgpu_bo_list_entry *vm_bos;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
struct list_head list; struct list_head list, duplicates;
unsigned domain; unsigned domain;
int r; int r;
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
tv.bo = &bo_va->bo->tbo; tv.bo = &bo_va->bo->tbo;
tv.shared = true; tv.shared = true;
...@@ -468,7 +475,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -468,7 +475,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (!vm_bos) if (!vm_bos)
return; return;
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); /* Provide duplicates to avoid -EALREADY */
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) if (r)
goto error_free; goto error_free;
...@@ -651,7 +659,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, ...@@ -651,7 +659,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
int r; int r;
args->pitch = amdgpu_align_pitch(adev, args->width, args->bpp, 0) * ((args->bpp + 1) / 8); args->pitch = amdgpu_align_pitch(adev, args->width, args->bpp, 0) * ((args->bpp + 1) / 8);
args->size = args->pitch * args->height; args->size = (u64)args->pitch * args->height;
args->size = ALIGN(args->size, PAGE_SIZE); args->size = ALIGN(args->size, PAGE_SIZE);
r = amdgpu_gem_object_create(adev, args->size, 0, r = amdgpu_gem_object_create(adev, args->size, 0,
......
...@@ -43,7 +43,7 @@ static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev) ...@@ -43,7 +43,7 @@ static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, adev->irq.ih.ring_size, r = amdgpu_bo_create(adev, adev->irq.ih.ring_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, AMDGPU_GEM_DOMAIN_GTT, 0,
NULL, &adev->irq.ih.ring_obj); NULL, NULL, &adev->irq.ih.ring_obj);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r); DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
return r; return r;
......
...@@ -140,7 +140,7 @@ void amdgpu_irq_preinstall(struct drm_device *dev) ...@@ -140,7 +140,7 @@ void amdgpu_irq_preinstall(struct drm_device *dev)
*/ */
int amdgpu_irq_postinstall(struct drm_device *dev) int amdgpu_irq_postinstall(struct drm_device *dev)
{ {
dev->max_vblank_count = 0x001fffff; dev->max_vblank_count = 0x00ffffff;
return 0; return 0;
} }
......
...@@ -390,7 +390,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -390,7 +390,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
} }
case AMDGPU_INFO_READ_MMR_REG: { case AMDGPU_INFO_READ_MMR_REG: {
unsigned n, alloc_size = info->read_mmr_reg.count * 4; unsigned n, alloc_size;
uint32_t *regs; uint32_t *regs;
unsigned se_num = (info->read_mmr_reg.instance >> unsigned se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
...@@ -406,9 +406,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -406,9 +406,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
sh_num = 0xffffffff; sh_num = 0xffffffff;
regs = kmalloc(alloc_size, GFP_KERNEL); regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
if (!regs) if (!regs)
return -ENOMEM; return -ENOMEM;
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
for (i = 0; i < info->read_mmr_reg.count; i++) for (i = 0; i < info->read_mmr_reg.count; i++)
if (amdgpu_asic_read_register(adev, se_num, sh_num, if (amdgpu_asic_read_register(adev, se_num, sh_num,
......
...@@ -215,6 +215,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, ...@@ -215,6 +215,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bool kernel, u32 domain, u64 flags, bool kernel, u32 domain, u64 flags,
struct sg_table *sg, struct sg_table *sg,
struct ttm_placement *placement, struct ttm_placement *placement,
struct reservation_object *resv,
struct amdgpu_bo **bo_ptr) struct amdgpu_bo **bo_ptr)
{ {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
...@@ -261,7 +262,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, ...@@ -261,7 +262,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
/* Kernel allocation are uninterruptible */ /* Kernel allocation are uninterruptible */
r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL, &bo->placement, page_align, !kernel, NULL,
acc_size, sg, NULL, &amdgpu_ttm_bo_destroy); acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
if (unlikely(r != 0)) { if (unlikely(r != 0)) {
return r; return r;
} }
...@@ -275,7 +276,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, ...@@ -275,7 +276,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
int amdgpu_bo_create(struct amdgpu_device *adev, int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align, unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags, bool kernel, u32 domain, u64 flags,
struct sg_table *sg, struct amdgpu_bo **bo_ptr) struct sg_table *sg,
struct reservation_object *resv,
struct amdgpu_bo **bo_ptr)
{ {
struct ttm_placement placement = {0}; struct ttm_placement placement = {0};
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
...@@ -286,11 +289,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev, ...@@ -286,11 +289,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
amdgpu_ttm_placement_init(adev, &placement, amdgpu_ttm_placement_init(adev, &placement,
placements, domain, flags); placements, domain, flags);
return amdgpu_bo_create_restricted(adev, size, byte_align, return amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
kernel, domain, flags, domain, flags, sg, &placement,
sg, resv, bo_ptr);
&placement,
bo_ptr);
} }
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
...@@ -535,12 +536,10 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, ...@@ -535,12 +536,10 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
if (metadata == NULL) if (metadata == NULL)
return -EINVAL; return -EINVAL;
buffer = kzalloc(metadata_size, GFP_KERNEL); buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
if (buffer == NULL) if (buffer == NULL)
return -ENOMEM; return -ENOMEM;
memcpy(buffer, metadata, metadata_size);
kfree(bo->metadata); kfree(bo->metadata);
bo->metadata_flags = flags; bo->metadata_flags = flags;
bo->metadata = buffer; bo->metadata = buffer;
......
...@@ -129,12 +129,14 @@ int amdgpu_bo_create(struct amdgpu_device *adev, ...@@ -129,12 +129,14 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align, unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags, bool kernel, u32 domain, u64 flags,
struct sg_table *sg, struct sg_table *sg,
struct reservation_object *resv,
struct amdgpu_bo **bo_ptr); struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_restricted(struct amdgpu_device *adev, int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
unsigned long size, int byte_align, unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags, bool kernel, u32 domain, u64 flags,
struct sg_table *sg, struct sg_table *sg,
struct ttm_placement *placement, struct ttm_placement *placement,
struct reservation_object *resv,
struct amdgpu_bo **bo_ptr); struct amdgpu_bo **bo_ptr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
......
...@@ -61,12 +61,15 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ...@@ -61,12 +61,15 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct dma_buf_attachment *attach,
struct sg_table *sg) struct sg_table *sg)
{ {
struct reservation_object *resv = attach->dmabuf->resv;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
int ret; int ret;
ww_mutex_lock(&resv->lock, NULL);
ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
AMDGPU_GEM_DOMAIN_GTT, 0, sg, &bo); AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo);
ww_mutex_unlock(&resv->lock);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
......
...@@ -357,11 +357,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ...@@ -357,11 +357,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev; ring->adev = adev;
ring->idx = adev->num_rings++; ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring; adev->rings[ring->idx] = ring;
amdgpu_fence_driver_init_ring(ring); r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
} }
init_waitqueue_head(&ring->fence_drv.fence_queue);
r = amdgpu_wb_get(adev, &ring->rptr_offs); r = amdgpu_wb_get(adev, &ring->rptr_offs);
if (r) { if (r) {
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
...@@ -407,7 +407,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ...@@ -407,7 +407,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
if (ring->ring_obj == NULL) { if (ring->ring_obj == NULL) {
r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, AMDGPU_GEM_DOMAIN_GTT, 0,
NULL, &ring->ring_obj); NULL, NULL, &ring->ring_obj);
if (r) { if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r); dev_err(adev->dev, "(%d) ring create failed\n", r);
return r; return r;
......
...@@ -64,8 +64,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, ...@@ -64,8 +64,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&sa_manager->flist[i]); INIT_LIST_HEAD(&sa_manager->flist[i]);
} }
r = amdgpu_bo_create(adev, size, align, true, r = amdgpu_bo_create(adev, size, align, true, domain,
domain, 0, NULL, &sa_manager->bo); 0, NULL, NULL, &sa_manager->bo);
if (r) { if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r; return r;
...@@ -145,8 +145,13 @@ static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) ...@@ -145,8 +145,13 @@ static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
struct amd_sched_fence *s_fence; struct amd_sched_fence *s_fence;
s_fence = to_amd_sched_fence(f); s_fence = to_amd_sched_fence(f);
if (s_fence) if (s_fence) {
return s_fence->scheduler->ring_id; struct amdgpu_ring *ring;
ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
return ring->idx;
}
a_fence = to_amdgpu_fence(f); a_fence = to_amdgpu_fence(f);
if (a_fence) if (a_fence)
return a_fence->ring->idx; return a_fence->ring->idx;
...@@ -412,6 +417,26 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, ...@@ -412,6 +417,26 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
} }
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
{
struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
if (a_fence)
seq_printf(m, " protected by 0x%016llx on ring %d",
a_fence->seq, a_fence->ring->idx);
if (s_fence) {
struct amdgpu_ring *ring;
ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
seq_printf(m, " protected by 0x%016x on ring %d",
s_fence->base.seqno, ring->idx);
}
}
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m) struct seq_file *m)
{ {
...@@ -428,18 +453,8 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, ...@@ -428,18 +453,8 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
} }
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset); soffset, eoffset, eoffset - soffset);
if (i->fence) { if (i->fence)
struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); amdgpu_sa_bo_dump_fence(i->fence, m);
struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence);
if (a_fence)
seq_printf(m, " protected by 0x%016llx on ring %d",
a_fence->seq, a_fence->ring->idx);
if (s_fence)
seq_printf(m, " protected by 0x%016x on ring %d",
s_fence->base.seqno,
s_fence->scheduler->ring_id);
}
seq_printf(m, "\n"); seq_printf(m, "\n");
} }
spin_unlock(&sa_manager->wq.lock); spin_unlock(&sa_manager->wq.lock);
......
...@@ -27,63 +27,48 @@ ...@@ -27,63 +27,48 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *job) static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
{ {
struct amdgpu_job *sched_job = (struct amdgpu_job *)job; struct amdgpu_job *job = to_amdgpu_job(sched_job);
return amdgpu_sync_get_fence(&sched_job->ibs->sync); return amdgpu_sync_get_fence(&job->ibs->sync);
} }
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
{ {
struct amdgpu_job *sched_job; struct amdgpu_fence *fence = NULL;
struct amdgpu_fence *fence; struct amdgpu_job *job;
int r; int r;
if (!job) { if (!sched_job) {
DRM_ERROR("job is null\n"); DRM_ERROR("job is null\n");
return NULL; return NULL;
} }
sched_job = (struct amdgpu_job *)job; job = to_amdgpu_job(sched_job);
mutex_lock(&sched_job->job_lock); mutex_lock(&job->job_lock);
r = amdgpu_ib_schedule(sched_job->adev, r = amdgpu_ib_schedule(job->adev,
sched_job->num_ibs, job->num_ibs,
sched_job->ibs, job->ibs,
sched_job->base.owner); job->base.owner);
if (r) if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err; goto err;
fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); }
if (sched_job->free_job)
sched_job->free_job(sched_job);
mutex_unlock(&sched_job->job_lock); fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence);
return &fence->base;
err: err:
DRM_ERROR("Run job error\n"); if (job->free_job)
mutex_unlock(&sched_job->job_lock); job->free_job(job);
job->sched->ops->process_job(job);
return NULL;
}
static void amdgpu_sched_process_job(struct amd_sched_job *job) mutex_unlock(&job->job_lock);
{ fence_put(&job->base.s_fence->base);
struct amdgpu_job *sched_job; kfree(job);
return fence ? &fence->base : NULL;
if (!job) {
DRM_ERROR("job is null\n");
return;
}
sched_job = (struct amdgpu_job *)job;
/* after processing job, free memory */
fence_put(&sched_job->base.s_fence->base);
kfree(sched_job);
} }
struct amd_sched_backend_ops amdgpu_sched_ops = { struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_sched_dependency, .dependency = amdgpu_sched_dependency,
.run_job = amdgpu_sched_run_job, .run_job = amdgpu_sched_run_job,
.process_job = amdgpu_sched_process_job
}; };
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
...@@ -100,7 +85,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -100,7 +85,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job) if (!job)
return -ENOMEM; return -ENOMEM;
job->base.sched = ring->scheduler; job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
job->adev = adev; job->adev = adev;
job->ibs = ibs; job->ibs = ibs;
...@@ -109,7 +94,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -109,7 +94,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
mutex_init(&job->job_lock); mutex_init(&job->job_lock);
job->free_job = free_job; job->free_job = free_job;
mutex_lock(&job->job_lock); mutex_lock(&job->job_lock);
r = amd_sched_entity_push_job((struct amd_sched_job *)job); r = amd_sched_entity_push_job(&job->base);
if (r) { if (r) {
mutex_unlock(&job->job_lock); mutex_unlock(&job->job_lock);
kfree(job); kfree(job);
......
...@@ -65,8 +65,14 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) ...@@ -65,8 +65,14 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
if (a_fence) if (a_fence)
return a_fence->ring->adev == adev; return a_fence->ring->adev == adev;
if (s_fence)
return (struct amdgpu_device *)s_fence->scheduler->priv == adev; if (s_fence) {
struct amdgpu_ring *ring;
ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
return ring->adev == adev;
}
return false; return false;
} }
...@@ -251,6 +257,20 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync) ...@@ -251,6 +257,20 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
fence_put(e->fence); fence_put(e->fence);
kfree(e); kfree(e);
} }
if (amdgpu_enable_semaphores)
return 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_fence *fence = sync->sync_to[i];
if (!fence)
continue;
r = fence_wait(&fence->base, false);
if (r)
return r;
}
return 0; return 0;
} }
...@@ -285,7 +305,8 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, ...@@ -285,7 +305,8 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL; return -EINVAL;
} }
if (amdgpu_enable_scheduler || (count >= AMDGPU_NUM_SYNCS)) { if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores ||
(count >= AMDGPU_NUM_SYNCS)) {
/* not enough room, wait manually */ /* not enough room, wait manually */
r = fence_wait(&fence->base, false); r = fence_wait(&fence->base, false);
if (r) if (r)
......
...@@ -59,8 +59,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -59,8 +59,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_cleanup; goto out_cleanup;
} }
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
NULL, &vram_obj); AMDGPU_GEM_DOMAIN_VRAM, 0,
NULL, NULL, &vram_obj);
if (r) { if (r) {
DRM_ERROR("Failed to create VRAM object\n"); DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup; goto out_cleanup;
...@@ -80,7 +81,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -80,7 +81,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
struct fence *fence = NULL; struct fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
NULL, gtt_obj + i);
if (r) { if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i); DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean; goto out_lclean;
......
...@@ -861,7 +861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) ...@@ -861,7 +861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->stollen_vga_memory); NULL, NULL, &adev->stollen_vga_memory);
if (r) { if (r) {
return r; return r;
} }
......
...@@ -247,7 +247,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) ...@@ -247,7 +247,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
const struct common_firmware_header *header = NULL; const struct common_firmware_header *header = NULL;
err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, bo); AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
if (err) { if (err) {
dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
err = -ENOMEM; err = -ENOMEM;
......
...@@ -156,7 +156,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) ...@@ -156,7 +156,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->uvd.vcpu_bo); NULL, NULL, &adev->uvd.vcpu_bo);
if (r) { if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r; return r;
...@@ -543,46 +543,60 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, ...@@ -543,46 +543,60 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
return -EINVAL; return -EINVAL;
} }
if (msg_type == 1) { switch (msg_type) {
case 0:
/* it's a create msg, calc image size (width * height) */
amdgpu_bo_kunmap(bo);
/* try to alloc a new handle */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle) {
DRM_ERROR("Handle 0x%x already in use!\n", handle);
return -EINVAL;
}
if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
adev->uvd.filp[i] = ctx->parser->filp;
return 0;
}
}
DRM_ERROR("No more free UVD handles!\n");
return -EINVAL;
case 1:
/* it's a decode msg, calc buffer sizes */ /* it's a decode msg, calc buffer sizes */
r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes);
amdgpu_bo_kunmap(bo); amdgpu_bo_kunmap(bo);
if (r) if (r)
return r; return r;
} else if (msg_type == 2) { /* validate the handle */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle) {
if (adev->uvd.filp[i] != ctx->parser->filp) {
DRM_ERROR("UVD handle collision detected!\n");
return -EINVAL;
}
return 0;
}
}
DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
return -ENOENT;
case 2:
/* it's a destroy msg, free the handle */ /* it's a destroy msg, free the handle */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
amdgpu_bo_kunmap(bo); amdgpu_bo_kunmap(bo);
return 0; return 0;
} else {
/* it's a create msg */
amdgpu_bo_kunmap(bo);
if (msg_type != 0) { default:
DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
return -EINVAL; return -EINVAL;
} }
BUG();
/* it's a create msg, no special handling needed */
}
/* create or decode, validate the handle */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle)
return 0;
}
/* handle not found try to alloc a new one */
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
adev->uvd.filp[i] = ctx->parser->filp;
return 0;
}
}
DRM_ERROR("No more free UVD handles!\n");
return -EINVAL; return -EINVAL;
} }
...@@ -805,10 +819,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) ...@@ -805,10 +819,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
} }
static int amdgpu_uvd_free_job( static int amdgpu_uvd_free_job(
struct amdgpu_job *sched_job) struct amdgpu_job *job)
{ {
amdgpu_ib_free(sched_job->adev, sched_job->ibs); amdgpu_ib_free(job->adev, job->ibs);
kfree(sched_job->ibs); kfree(job->ibs);
return 0; return 0;
} }
...@@ -905,7 +919,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -905,7 +919,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &bo); NULL, NULL, &bo);
if (r) if (r)
return r; return r;
...@@ -954,7 +968,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -954,7 +968,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &bo); NULL, NULL, &bo);
if (r) if (r)
return r; return r;
......
...@@ -143,7 +143,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ...@@ -143,7 +143,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->vce.vcpu_bo); NULL, NULL, &adev->vce.vcpu_bo);
if (r) { if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
return r; return r;
...@@ -342,10 +342,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) ...@@ -342,10 +342,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
} }
static int amdgpu_vce_free_job( static int amdgpu_vce_free_job(
struct amdgpu_job *sched_job) struct amdgpu_job *job)
{ {
amdgpu_ib_free(sched_job->adev, sched_job->ibs); amdgpu_ib_free(job->adev, job->ibs);
kfree(sched_job->ibs); kfree(job->ibs);
return 0; return 0;
} }
......
...@@ -316,12 +316,12 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, ...@@ -316,12 +316,12 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
} }
} }
int amdgpu_vm_free_job(struct amdgpu_job *sched_job) int amdgpu_vm_free_job(struct amdgpu_job *job)
{ {
int i; int i;
for (i = 0; i < sched_job->num_ibs; i++) for (i = 0; i < job->num_ibs; i++)
amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); amdgpu_ib_free(job->adev, &job->ibs[i]);
kfree(sched_job->ibs); kfree(job->ibs);
return 0; return 0;
} }
...@@ -685,31 +685,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -685,31 +685,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
return 0; return 0;
} }
/**
* amdgpu_vm_fence_pts - fence page tables after an update
*
* @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
* @fence: fence to use
*
* Fence the page tables in the range @start - @end (cayman+).
*
* Global and local mutex must be locked!
*/
static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
struct fence *fence)
{
unsigned i;
start >>= amdgpu_vm_block_size;
end >>= amdgpu_vm_block_size;
for (i = start; i <= end; ++i)
amdgpu_bo_fence(vm->page_tables[i].bo, fence, true);
}
/** /**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
* *
...@@ -813,8 +788,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -813,8 +788,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r) if (r)
goto error_free; goto error_free;
amdgpu_vm_fence_pts(vm, mapping->it.start, amdgpu_bo_fence(vm->page_directory, f, true);
mapping->it.last + 1, f);
if (fence) { if (fence) {
fence_put(*fence); fence_put(*fence);
*fence = fence_get(f); *fence = fence_get(f);
...@@ -855,7 +829,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -855,7 +829,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
int r; int r;
if (mem) { if (mem) {
addr = mem->start << PAGE_SHIFT; addr = (u64)mem->start << PAGE_SHIFT;
if (mem->mem_type != TTM_PL_TT) if (mem->mem_type != TTM_PL_TT)
addr += adev->vm_manager.vram_base_offset; addr += adev->vm_manager.vram_base_offset;
} else { } else {
...@@ -1089,6 +1063,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1089,6 +1063,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* walk over the address space and allocate the page tables */ /* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
struct reservation_object *resv = vm->page_directory->tbo.resv;
struct amdgpu_bo *pt; struct amdgpu_bo *pt;
if (vm->page_tables[pt_idx].bo) if (vm->page_tables[pt_idx].bo)
...@@ -1097,11 +1072,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1097,11 +1072,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* drop mutex to allocate and clear page table */ /* drop mutex to allocate and clear page table */
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
ww_mutex_lock(&resv->lock, NULL);
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS, AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
NULL, &pt); NULL, resv, &pt);
ww_mutex_unlock(&resv->lock);
if (r) if (r)
goto error_free; goto error_free;
...@@ -1303,7 +1280,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1303,7 +1280,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
r = amdgpu_bo_create(adev, pd_size, align, true, r = amdgpu_bo_create(adev, pd_size, align, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS, AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
NULL, &vm->page_directory); NULL, NULL, &vm->page_directory);
if (r) if (r)
return r; return r;
......
...@@ -814,7 +814,8 @@ int cz_smu_init(struct amdgpu_device *adev) ...@@ -814,7 +814,8 @@ int cz_smu_init(struct amdgpu_device *adev)
* 3. map kernel virtual address * 3. map kernel virtual address
*/ */
ret = amdgpu_bo_create(adev, priv->toc_buffer.data_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, priv->toc_buffer.data_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, toc_buf); true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
toc_buf);
if (ret) { if (ret) {
dev_err(adev->dev, "(%d) SMC TOC buffer allocation failed\n", ret); dev_err(adev->dev, "(%d) SMC TOC buffer allocation failed\n", ret);
...@@ -822,7 +823,8 @@ int cz_smu_init(struct amdgpu_device *adev) ...@@ -822,7 +823,8 @@ int cz_smu_init(struct amdgpu_device *adev)
} }
ret = amdgpu_bo_create(adev, priv->smu_buffer.data_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, priv->smu_buffer.data_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, smu_buf); true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
smu_buf);
if (ret) { if (ret) {
dev_err(adev->dev, "(%d) SMC Internal buffer allocation failed\n", ret); dev_err(adev->dev, "(%d) SMC Internal buffer allocation failed\n", ret);
......
...@@ -764,7 +764,7 @@ int fiji_smu_init(struct amdgpu_device *adev) ...@@ -764,7 +764,7 @@ int fiji_smu_init(struct amdgpu_device *adev)
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_VRAM, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, toc_buf); NULL, NULL, toc_buf);
if (ret) { if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n"); DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM; return -ENOMEM;
...@@ -774,7 +774,7 @@ int fiji_smu_init(struct amdgpu_device *adev) ...@@ -774,7 +774,7 @@ int fiji_smu_init(struct amdgpu_device *adev)
ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_VRAM, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, smu_buf); NULL, NULL, smu_buf);
if (ret) { if (ret) {
DRM_ERROR("Failed to allocate memory for SMU internal buffer\n"); DRM_ERROR("Failed to allocate memory for SMU internal buffer\n");
return -ENOMEM; return -ENOMEM;
......
...@@ -3206,7 +3206,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) ...@@ -3206,7 +3206,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, r = amdgpu_bo_create(adev,
adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj); &adev->gfx.mec.hpd_eop_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
...@@ -3373,7 +3373,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) ...@@ -3373,7 +3373,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, r = amdgpu_bo_create(adev,
sizeof(struct bonaire_mqd), sizeof(struct bonaire_mqd),
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&ring->mqd_obj); &ring->mqd_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
...@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) ...@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
return 0; return 0;
} }
static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
/* instruct DE to set a magic number */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
WRITE_DATA_DST_SEL(5)));
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 1);
/* let CE wait till condition satisfied */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
WAIT_REG_MEM_FUNCTION(3) | /* == */
WAIT_REG_MEM_ENGINE(2))); /* ce */
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 1);
amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */
/* instruct CE to reset wb of ce_sync to zero */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(5) |
WR_CONFIRM));
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 0);
}
/* /*
* vm * vm
* VMID 0 is the physical GPU addresses as used by the kernel. * VMID 0 is the physical GPU addresses as used by the kernel.
...@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr) unsigned vm_id, uint64_t pd_addr)
{ {
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
if (usepfp) {
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
...@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0x0); amdgpu_ring_write(ring, 0x0);
/* synce CE with ME to prevent CE fetch CEIB before context switch done */ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
gfx_v7_0_ce_sync_me(ring); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
} }
} }
...@@ -3788,7 +3763,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) ...@@ -3788,7 +3763,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->gfx.rlc.save_restore_obj); NULL, NULL,
&adev->gfx.rlc.save_restore_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
return r; return r;
...@@ -3831,7 +3807,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) ...@@ -3831,7 +3807,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->gfx.rlc.clear_state_obj); NULL, NULL,
&adev->gfx.rlc.clear_state_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v7_0_rlc_fini(adev); gfx_v7_0_rlc_fini(adev);
...@@ -3870,7 +3847,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) ...@@ -3870,7 +3847,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, &adev->gfx.rlc.cp_table_obj); NULL, NULL,
&adev->gfx.rlc.cp_table_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
gfx_v7_0_rlc_fini(adev); gfx_v7_0_rlc_fini(adev);
...@@ -4802,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle) ...@@ -4802,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
return r; return r;
} }
r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
if (r) {
DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
return r;
}
for (i = 0; i < adev->gfx.num_gfx_rings; i++) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i]; ring = &adev->gfx.gfx_ring[i];
ring->ring_obj = NULL; ring->ring_obj = NULL;
...@@ -4851,21 +4823,21 @@ static int gfx_v7_0_sw_init(void *handle) ...@@ -4851,21 +4823,21 @@ static int gfx_v7_0_sw_init(void *handle)
r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GDS, 0, AMDGPU_GEM_DOMAIN_GDS, 0,
NULL, &adev->gds.gds_gfx_bo); NULL, NULL, &adev->gds.gds_gfx_bo);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GWS, 0, AMDGPU_GEM_DOMAIN_GWS, 0,
NULL, &adev->gds.gws_gfx_bo); NULL, NULL, &adev->gds.gws_gfx_bo);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_OA, 0, AMDGPU_GEM_DOMAIN_OA, 0,
NULL, &adev->gds.oa_gfx_bo); NULL, NULL, &adev->gds.oa_gfx_bo);
if (r) if (r)
return r; return r;
...@@ -4886,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle) ...@@ -4886,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++) for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_cp_compute_fini(adev);
gfx_v7_0_rlc_fini(adev); gfx_v7_0_rlc_fini(adev);
gfx_v7_0_mec_fini(adev); gfx_v7_0_mec_fini(adev);
......
...@@ -868,7 +868,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) ...@@ -868,7 +868,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, r = amdgpu_bo_create(adev,
adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj); &adev->gfx.mec.hpd_eop_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
...@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle) ...@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
return r; return r;
} }
r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
if (r) {
DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
return r;
}
/* set up the gfx ring */ /* set up the gfx ring */
for (i = 0; i < adev->gfx.num_gfx_rings; i++) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i]; ring = &adev->gfx.gfx_ring[i];
...@@ -995,21 +989,21 @@ static int gfx_v8_0_sw_init(void *handle) ...@@ -995,21 +989,21 @@ static int gfx_v8_0_sw_init(void *handle)
/* reserve GDS, GWS and OA resource for gfx */ /* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GDS, 0, AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
NULL, &adev->gds.gds_gfx_bo); NULL, &adev->gds.gds_gfx_bo);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GWS, 0, AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
NULL, &adev->gds.gws_gfx_bo); NULL, &adev->gds.gws_gfx_bo);
if (r) if (r)
return r; return r;
r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_OA, 0, AMDGPU_GEM_DOMAIN_OA, 0, NULL,
NULL, &adev->gds.oa_gfx_bo); NULL, &adev->gds.oa_gfx_bo);
if (r) if (r)
return r; return r;
...@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle) ...@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++) for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
gfx_v8_0_mec_fini(adev); gfx_v8_0_mec_fini(adev);
return 0; return 0;
...@@ -3106,7 +3098,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) ...@@ -3106,7 +3098,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
sizeof(struct vi_mqd), sizeof(struct vi_mqd),
PAGE_SIZE, true, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
&ring->mqd_obj); NULL, &ring->mqd_obj);
if (r) { if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
return r; return r;
...@@ -3965,6 +3957,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, ...@@ -3965,6 +3957,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq));
} }
/** /**
...@@ -4005,49 +3998,34 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, ...@@ -4005,49 +3998,34 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
return true; return true;
} }
static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring) static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{ {
struct amdgpu_device *adev = ring->adev; int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
uint64_t addr = ring->fence_drv.gpu_addr;
/* instruct DE to set a magic number */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
WRITE_DATA_DST_SEL(5)));
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 1);
/* let CE wait till condition satisfied */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ WAIT_REG_MEM_FUNCTION(3))); /* equal */
WAIT_REG_MEM_FUNCTION(3) | /* == */ amdgpu_ring_write(ring, addr & 0xfffffffc);
WAIT_REG_MEM_ENGINE(2))); /* ce */ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 1);
amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */ amdgpu_ring_write(ring, 4); /* poll interval */
/* instruct CE to reset wb of ce_sync to zero */ if (usepfp) {
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); /* synce CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
WRITE_DATA_DST_SEL(5) |
WR_CONFIRM));
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0);
} amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, }
unsigned vm_id, uint64_t pd_addr)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
WRITE_DATA_DST_SEL(0))); WRITE_DATA_DST_SEL(0)) |
WR_CONFIRM);
if (vm_id < 8) { if (vm_id < 8) {
amdgpu_ring_write(ring, amdgpu_ring_write(ring,
(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
...@@ -4083,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -4083,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */ /* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0); amdgpu_ring_write(ring, 0x0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
/* synce CE with ME to prevent CE fetch CEIB before context switch done */ amdgpu_ring_write(ring, 0);
gfx_v8_0_ce_sync_me(ring); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
} }
} }
......
...@@ -625,7 +625,7 @@ int iceland_smu_init(struct amdgpu_device *adev) ...@@ -625,7 +625,7 @@ int iceland_smu_init(struct amdgpu_device *adev)
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_VRAM, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, toc_buf); NULL, NULL, toc_buf);
if (ret) { if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n"); DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM; return -ENOMEM;
......
...@@ -763,7 +763,7 @@ int tonga_smu_init(struct amdgpu_device *adev) ...@@ -763,7 +763,7 @@ int tonga_smu_init(struct amdgpu_device *adev)
ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, image_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_VRAM, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, toc_buf); NULL, NULL, toc_buf);
if (ret) { if (ret) {
DRM_ERROR("Failed to allocate memory for TOC buffer\n"); DRM_ERROR("Failed to allocate memory for TOC buffer\n");
return -ENOMEM; return -ENOMEM;
...@@ -773,7 +773,7 @@ int tonga_smu_init(struct amdgpu_device *adev) ...@@ -773,7 +773,7 @@ int tonga_smu_init(struct amdgpu_device *adev)
ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE, ret = amdgpu_bo_create(adev, smu_internal_buffer_size, PAGE_SIZE,
true, AMDGPU_GEM_DOMAIN_VRAM, true, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
NULL, smu_buf); NULL, NULL, smu_buf);
if (ret) { if (ret) {
DRM_ERROR("Failed to allocate memory for SMU internal buffer\n"); DRM_ERROR("Failed to allocate memory for SMU internal buffer\n");
return -ENOMEM; return -ENOMEM;
......
...@@ -224,11 +224,11 @@ static int uvd_v4_2_suspend(void *handle) ...@@ -224,11 +224,11 @@ static int uvd_v4_2_suspend(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = uvd_v4_2_hw_fini(adev); r = amdgpu_uvd_suspend(adev);
if (r) if (r)
return r; return r;
r = amdgpu_uvd_suspend(adev); r = uvd_v4_2_hw_fini(adev);
if (r) if (r)
return r; return r;
......
...@@ -220,11 +220,11 @@ static int uvd_v5_0_suspend(void *handle) ...@@ -220,11 +220,11 @@ static int uvd_v5_0_suspend(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = uvd_v5_0_hw_fini(adev); r = amdgpu_uvd_suspend(adev);
if (r) if (r)
return r; return r;
r = amdgpu_uvd_suspend(adev); r = uvd_v5_0_hw_fini(adev);
if (r) if (r)
return r; return r;
......
...@@ -214,11 +214,13 @@ static int uvd_v6_0_suspend(void *handle) ...@@ -214,11 +214,13 @@ static int uvd_v6_0_suspend(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = uvd_v6_0_hw_fini(adev); /* Skip this for APU for now */
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_uvd_suspend(adev);
if (r) if (r)
return r; return r;
}
r = amdgpu_uvd_suspend(adev); r = uvd_v6_0_hw_fini(adev);
if (r) if (r)
return r; return r;
...@@ -230,10 +232,12 @@ static int uvd_v6_0_resume(void *handle) ...@@ -230,10 +232,12 @@ static int uvd_v6_0_resume(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* Skip this for APU for now */
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_uvd_resume(adev); r = amdgpu_uvd_resume(adev);
if (r) if (r)
return r; return r;
}
r = uvd_v6_0_hw_init(adev); r = uvd_v6_0_hw_init(adev);
if (r) if (r)
return r; return r;
......
...@@ -1400,7 +1400,8 @@ static int vi_common_early_init(void *handle) ...@@ -1400,7 +1400,8 @@ static int vi_common_early_init(void *handle)
case CHIP_CARRIZO: case CHIP_CARRIZO:
adev->has_uvd = true; adev->has_uvd = true;
adev->cg_flags = 0; adev->cg_flags = 0;
adev->pg_flags = AMDGPU_PG_SUPPORT_UVD | AMDGPU_PG_SUPPORT_VCE; /* Disable UVD pg */
adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
adev->external_rev_id = adev->rev_id + 0x1; adev->external_rev_id = adev->rev_id + 0x1;
if (amdgpu_smc_load_fw && smc_enabled) if (amdgpu_smc_load_fw && smc_enabled)
adev->firmware.smu_load = true; adev->firmware.smu_load = true;
......
#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _GPU_SCHED_TRACE_H_
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
#include <drm/drmP.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM gpu_sched
#define TRACE_INCLUDE_FILE gpu_sched_trace
TRACE_EVENT(amd_sched_job,
TP_PROTO(struct amd_sched_job *sched_job),
TP_ARGS(sched_job),
TP_STRUCT__entry(
__field(struct amd_sched_entity *, entity)
__field(const char *, name)
__field(u32, job_count)
__field(int, hw_job_count)
),
TP_fast_assign(
__entry->entity = sched_job->s_entity;
__entry->name = sched_job->sched->name;
__entry->job_count = kfifo_len(
&sched_job->s_entity->job_queue) / sizeof(sched_job);
__entry->hw_job_count = atomic_read(
&sched_job->sched->hw_rq_count);
),
TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
__entry->entity, __entry->name, __entry->job_count,
__entry->hw_job_count)
);
#endif
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#include <trace/define_trace.h>
...@@ -27,6 +27,9 @@ ...@@ -27,6 +27,9 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "gpu_scheduler.h" #include "gpu_scheduler.h"
#define CREATE_TRACE_POINTS
#include "gpu_sched_trace.h"
static struct amd_sched_job * static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity); amd_sched_entity_pop_job(struct amd_sched_entity *entity);
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
...@@ -65,29 +68,29 @@ static struct amd_sched_job * ...@@ -65,29 +68,29 @@ static struct amd_sched_job *
amd_sched_rq_select_job(struct amd_sched_rq *rq) amd_sched_rq_select_job(struct amd_sched_rq *rq)
{ {
struct amd_sched_entity *entity; struct amd_sched_entity *entity;
struct amd_sched_job *job; struct amd_sched_job *sched_job;
spin_lock(&rq->lock); spin_lock(&rq->lock);
entity = rq->current_entity; entity = rq->current_entity;
if (entity) { if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) { list_for_each_entry_continue(entity, &rq->entities, list) {
job = amd_sched_entity_pop_job(entity); sched_job = amd_sched_entity_pop_job(entity);
if (job) { if (sched_job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return job; return sched_job;
} }
} }
} }
list_for_each_entry(entity, &rq->entities, list) { list_for_each_entry(entity, &rq->entities, list) {
job = amd_sched_entity_pop_job(entity); sched_job = amd_sched_entity_pop_job(entity);
if (job) { if (sched_job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return job; return sched_job;
} }
if (entity == rq->current_entity) if (entity == rq->current_entity)
...@@ -115,23 +118,27 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -115,23 +118,27 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_rq *rq, struct amd_sched_rq *rq,
uint32_t jobs) uint32_t jobs)
{ {
int r;
if (!(sched && entity && rq)) if (!(sched && entity && rq))
return -EINVAL; return -EINVAL;
memset(entity, 0, sizeof(struct amd_sched_entity)); memset(entity, 0, sizeof(struct amd_sched_entity));
entity->belongto_rq = rq; INIT_LIST_HEAD(&entity->list);
entity->scheduler = sched; entity->rq = rq;
entity->fence_context = fence_context_alloc(1); entity->sched = sched;
if(kfifo_alloc(&entity->job_queue,
jobs * sizeof(void *),
GFP_KERNEL))
return -EINVAL;
spin_lock_init(&entity->queue_lock); spin_lock_init(&entity->queue_lock);
r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
if (r)
return r;
atomic_set(&entity->fence_seq, 0); atomic_set(&entity->fence_seq, 0);
entity->fence_context = fence_context_alloc(1);
/* Add the entity to the run queue */ /* Add the entity to the run queue */
amd_sched_rq_add_entity(rq, entity); amd_sched_rq_add_entity(rq, entity);
return 0; return 0;
} }
...@@ -146,8 +153,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -146,8 +153,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
return entity->scheduler == sched && return entity->sched == sched &&
entity->belongto_rq != NULL; entity->rq != NULL;
} }
/** /**
...@@ -177,7 +184,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) ...@@ -177,7 +184,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
struct amd_sched_rq *rq = entity->belongto_rq; struct amd_sched_rq *rq = entity->rq;
if (!amd_sched_entity_is_initialized(sched, entity)) if (!amd_sched_entity_is_initialized(sched, entity))
return; return;
...@@ -198,22 +205,22 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) ...@@ -198,22 +205,22 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
container_of(cb, struct amd_sched_entity, cb); container_of(cb, struct amd_sched_entity, cb);
entity->dependency = NULL; entity->dependency = NULL;
fence_put(f); fence_put(f);
amd_sched_wakeup(entity->scheduler); amd_sched_wakeup(entity->sched);
} }
static struct amd_sched_job * static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity) amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{ {
struct amd_gpu_scheduler *sched = entity->scheduler; struct amd_gpu_scheduler *sched = entity->sched;
struct amd_sched_job *job; struct amd_sched_job *sched_job;
if (ACCESS_ONCE(entity->dependency)) if (ACCESS_ONCE(entity->dependency))
return NULL; return NULL;
if (!kfifo_out_peek(&entity->job_queue, &job, sizeof(job))) if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
return NULL; return NULL;
while ((entity->dependency = sched->ops->dependency(job))) { while ((entity->dependency = sched->ops->dependency(sched_job))) {
if (fence_add_callback(entity->dependency, &entity->cb, if (fence_add_callback(entity->dependency, &entity->cb,
amd_sched_entity_wakeup)) amd_sched_entity_wakeup))
...@@ -222,32 +229,33 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity) ...@@ -222,32 +229,33 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity)
return NULL; return NULL;
} }
return job; return sched_job;
} }
/** /**
* Helper to submit a job to the job queue * Helper to submit a job to the job queue
* *
* @job The pointer to job required to submit * @sched_job The pointer to job required to submit
* *
* Returns true if we could submit the job. * Returns true if we could submit the job.
*/ */
static bool amd_sched_entity_in(struct amd_sched_job *job) static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
{ {
struct amd_sched_entity *entity = job->s_entity; struct amd_sched_entity *entity = sched_job->s_entity;
bool added, first = false; bool added, first = false;
spin_lock(&entity->queue_lock); spin_lock(&entity->queue_lock);
added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job); added = kfifo_in(&entity->job_queue, &sched_job,
sizeof(sched_job)) == sizeof(sched_job);
if (added && kfifo_len(&entity->job_queue) == sizeof(job)) if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
first = true; first = true;
spin_unlock(&entity->queue_lock); spin_unlock(&entity->queue_lock);
/* first job wakes up scheduler */ /* first job wakes up scheduler */
if (first) if (first)
amd_sched_wakeup(job->sched); amd_sched_wakeup(sched_job->sched);
return added; return added;
} }
...@@ -255,7 +263,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *job) ...@@ -255,7 +263,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *job)
/** /**
* Submit a job to the job queue * Submit a job to the job queue
* *
* @job The pointer to job required to submit * @sched_job The pointer to job required to submit
* *
* Returns 0 for success, negative error code otherwise. * Returns 0 for success, negative error code otherwise.
*/ */
...@@ -271,9 +279,9 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) ...@@ -271,9 +279,9 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
fence_get(&fence->base); fence_get(&fence->base);
sched_job->s_fence = fence; sched_job->s_fence = fence;
wait_event(entity->scheduler->job_scheduled, wait_event(entity->sched->job_scheduled,
amd_sched_entity_in(sched_job)); amd_sched_entity_in(sched_job));
trace_amd_sched_job(sched_job);
return 0; return 0;
} }
...@@ -301,30 +309,28 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) ...@@ -301,30 +309,28 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
static struct amd_sched_job * static struct amd_sched_job *
amd_sched_select_job(struct amd_gpu_scheduler *sched) amd_sched_select_job(struct amd_gpu_scheduler *sched)
{ {
struct amd_sched_job *job; struct amd_sched_job *sched_job;
if (!amd_sched_ready(sched)) if (!amd_sched_ready(sched))
return NULL; return NULL;
/* Kernel run queue has higher priority than normal run queue*/ /* Kernel run queue has higher priority than normal run queue*/
job = amd_sched_rq_select_job(&sched->kernel_rq); sched_job = amd_sched_rq_select_job(&sched->kernel_rq);
if (job == NULL) if (sched_job == NULL)
job = amd_sched_rq_select_job(&sched->sched_rq); sched_job = amd_sched_rq_select_job(&sched->sched_rq);
return job; return sched_job;
} }
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
{ {
struct amd_sched_job *sched_job = struct amd_sched_fence *s_fence =
container_of(cb, struct amd_sched_job, cb); container_of(cb, struct amd_sched_fence, cb);
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched = s_fence->sched;
sched = sched_job->sched;
amd_sched_fence_signal(sched_job->s_fence);
atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->hw_rq_count);
fence_put(&sched_job->s_fence->base); amd_sched_fence_signal(s_fence);
sched->ops->process_job(sched_job); fence_put(&s_fence->base);
wake_up_interruptible(&sched->wake_up_worker); wake_up_interruptible(&sched->wake_up_worker);
} }
...@@ -338,87 +344,82 @@ static int amd_sched_main(void *param) ...@@ -338,87 +344,82 @@ static int amd_sched_main(void *param)
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
struct amd_sched_entity *entity; struct amd_sched_entity *entity;
struct amd_sched_job *job; struct amd_sched_fence *s_fence;
struct amd_sched_job *sched_job;
struct fence *fence; struct fence *fence;
wait_event_interruptible(sched->wake_up_worker, wait_event_interruptible(sched->wake_up_worker,
kthread_should_stop() || kthread_should_stop() ||
(job = amd_sched_select_job(sched))); (sched_job = amd_sched_select_job(sched)));
if (!job) if (!sched_job)
continue; continue;
entity = job->s_entity; entity = sched_job->s_entity;
s_fence = sched_job->s_fence;
atomic_inc(&sched->hw_rq_count); atomic_inc(&sched->hw_rq_count);
fence = sched->ops->run_job(job); fence = sched->ops->run_job(sched_job);
if (fence) { if (fence) {
r = fence_add_callback(fence, &job->cb, r = fence_add_callback(fence, &s_fence->cb,
amd_sched_process_job); amd_sched_process_job);
if (r == -ENOENT) if (r == -ENOENT)
amd_sched_process_job(fence, &job->cb); amd_sched_process_job(fence, &s_fence->cb);
else if (r) else if (r)
DRM_ERROR("fence add callback failed (%d)\n", r); DRM_ERROR("fence add callback failed (%d)\n", r);
fence_put(fence); fence_put(fence);
} else {
DRM_ERROR("Failed to run job!\n");
amd_sched_process_job(NULL, &s_fence->cb);
} }
count = kfifo_out(&entity->job_queue, &job, sizeof(job)); count = kfifo_out(&entity->job_queue, &sched_job,
WARN_ON(count != sizeof(job)); sizeof(sched_job));
WARN_ON(count != sizeof(sched_job));
wake_up(&sched->job_scheduled); wake_up(&sched->job_scheduled);
} }
return 0; return 0;
} }
/** /**
* Create a gpu scheduler * Init a gpu scheduler instance
* *
* @sched The pointer to the scheduler
* @ops The backend operations for this scheduler. * @ops The backend operations for this scheduler.
* @ring The the ring id for the scheduler.
* @hw_submissions Number of hw submissions to do. * @hw_submissions Number of hw submissions to do.
* @name Name used for debugging
* *
* Return the pointer to scheduler for success, otherwise return NULL * Return 0 on success, otherwise error code.
*/ */
struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops, int amd_sched_init(struct amd_gpu_scheduler *sched,
unsigned ring, unsigned hw_submission, struct amd_sched_backend_ops *ops,
void *priv) unsigned hw_submission, const char *name)
{ {
struct amd_gpu_scheduler *sched;
sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
if (!sched)
return NULL;
sched->ops = ops; sched->ops = ops;
sched->ring_id = ring;
sched->hw_submission_limit = hw_submission; sched->hw_submission_limit = hw_submission;
sched->priv = priv; sched->name = name;
snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring);
amd_sched_rq_init(&sched->sched_rq); amd_sched_rq_init(&sched->sched_rq);
amd_sched_rq_init(&sched->kernel_rq); amd_sched_rq_init(&sched->kernel_rq);
init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->wake_up_worker);
init_waitqueue_head(&sched->job_scheduled); init_waitqueue_head(&sched->job_scheduled);
atomic_set(&sched->hw_rq_count, 0); atomic_set(&sched->hw_rq_count, 0);
/* Each scheduler will run on a seperate kernel thread */ /* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_run(amd_sched_main, sched, sched->name); sched->thread = kthread_run(amd_sched_main, sched, sched->name);
if (IS_ERR(sched->thread)) { if (IS_ERR(sched->thread)) {
DRM_ERROR("Failed to create scheduler for id %d.\n", ring); DRM_ERROR("Failed to create scheduler for %s.\n", name);
kfree(sched); return PTR_ERR(sched->thread);
return NULL;
} }
return sched; return 0;
} }
/** /**
* Destroy a gpu scheduler * Destroy a gpu scheduler
* *
* @sched The pointer to the scheduler * @sched The pointer to the scheduler
*
* return 0 if succeed. -1 if failed.
*/ */
int amd_sched_destroy(struct amd_gpu_scheduler *sched) void amd_sched_fini(struct amd_gpu_scheduler *sched)
{ {
kthread_stop(sched->thread); kthread_stop(sched->thread);
kfree(sched);
return 0;
} }
...@@ -38,13 +38,15 @@ struct amd_sched_rq; ...@@ -38,13 +38,15 @@ struct amd_sched_rq;
*/ */
struct amd_sched_entity { struct amd_sched_entity {
struct list_head list; struct list_head list;
struct amd_sched_rq *belongto_rq; struct amd_sched_rq *rq;
atomic_t fence_seq; struct amd_gpu_scheduler *sched;
/* the job_queue maintains the jobs submitted by clients */
struct kfifo job_queue;
spinlock_t queue_lock; spinlock_t queue_lock;
struct amd_gpu_scheduler *scheduler; struct kfifo job_queue;
atomic_t fence_seq;
uint64_t fence_context; uint64_t fence_context;
struct fence *dependency; struct fence *dependency;
struct fence_cb cb; struct fence_cb cb;
}; };
...@@ -62,13 +64,13 @@ struct amd_sched_rq { ...@@ -62,13 +64,13 @@ struct amd_sched_rq {
struct amd_sched_fence { struct amd_sched_fence {
struct fence base; struct fence base;
struct amd_gpu_scheduler *scheduler; struct fence_cb cb;
struct amd_gpu_scheduler *sched;
spinlock_t lock; spinlock_t lock;
void *owner; void *owner;
}; };
struct amd_sched_job { struct amd_sched_job {
struct fence_cb cb;
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity; struct amd_sched_entity *s_entity;
struct amd_sched_fence *s_fence; struct amd_sched_fence *s_fence;
...@@ -91,32 +93,29 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) ...@@ -91,32 +93,29 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
* these functions should be implemented in driver side * these functions should be implemented in driver side
*/ */
struct amd_sched_backend_ops { struct amd_sched_backend_ops {
struct fence *(*dependency)(struct amd_sched_job *job); struct fence *(*dependency)(struct amd_sched_job *sched_job);
struct fence *(*run_job)(struct amd_sched_job *job); struct fence *(*run_job)(struct amd_sched_job *sched_job);
void (*process_job)(struct amd_sched_job *job);
}; };
/** /**
* One scheduler is implemented for each hardware ring * One scheduler is implemented for each hardware ring
*/ */
struct amd_gpu_scheduler { struct amd_gpu_scheduler {
struct task_struct *thread; struct amd_sched_backend_ops *ops;
uint32_t hw_submission_limit;
const char *name;
struct amd_sched_rq sched_rq; struct amd_sched_rq sched_rq;
struct amd_sched_rq kernel_rq; struct amd_sched_rq kernel_rq;
atomic_t hw_rq_count;
struct amd_sched_backend_ops *ops;
uint32_t ring_id;
wait_queue_head_t wake_up_worker; wait_queue_head_t wake_up_worker;
wait_queue_head_t job_scheduled; wait_queue_head_t job_scheduled;
uint32_t hw_submission_limit; atomic_t hw_rq_count;
char name[20]; struct task_struct *thread;
void *priv;
}; };
struct amd_gpu_scheduler * int amd_sched_init(struct amd_gpu_scheduler *sched,
amd_sched_create(struct amd_sched_backend_ops *ops, struct amd_sched_backend_ops *ops,
uint32_t ring, uint32_t hw_submission, void *priv); uint32_t hw_submission, const char *name);
int amd_sched_destroy(struct amd_gpu_scheduler *sched); void amd_sched_fini(struct amd_gpu_scheduler *sched);
int amd_sched_entity_init(struct amd_gpu_scheduler *sched, int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity, struct amd_sched_entity *entity,
......
...@@ -36,7 +36,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity ...@@ -36,7 +36,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
if (fence == NULL) if (fence == NULL)
return NULL; return NULL;
fence->owner = owner; fence->owner = owner;
fence->scheduler = s_entity->scheduler; fence->sched = s_entity->sched;
spin_lock_init(&fence->lock); spin_lock_init(&fence->lock);
seq = atomic_inc_return(&s_entity->fence_seq); seq = atomic_inc_return(&s_entity->fence_seq);
...@@ -63,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence) ...@@ -63,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence)
static const char *amd_sched_fence_get_timeline_name(struct fence *f) static const char *amd_sched_fence_get_timeline_name(struct fence *f)
{ {
struct amd_sched_fence *fence = to_amd_sched_fence(f); struct amd_sched_fence *fence = to_amd_sched_fence(f);
return (const char *)fence->scheduler->name; return (const char *)fence->sched->name;
} }
static bool amd_sched_fence_enable_signaling(struct fence *f) static bool amd_sched_fence_enable_signaling(struct fence *f)
......
...@@ -1573,10 +1573,12 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1573,10 +1573,12 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
drm_kms_helper_poll_disable(dev); drm_kms_helper_poll_disable(dev);
drm_modeset_lock_all(dev);
/* turn off display hw */ /* turn off display hw */
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} }
drm_modeset_unlock_all(dev);
/* unpin the front buffers and cursors */ /* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
...@@ -1734,9 +1736,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) ...@@ -1734,9 +1736,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
if (fbcon) { if (fbcon) {
drm_helper_resume_force_mode(dev); drm_helper_resume_force_mode(dev);
/* turn on display hw */ /* turn on display hw */
drm_modeset_lock_all(dev);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) { list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
} }
drm_modeset_unlock_all(dev);
} }
drm_kms_helper_poll_enable(dev); drm_kms_helper_poll_enable(dev);
......
...@@ -2927,6 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { ...@@ -2927,6 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 },
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 },
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment