Commit 02cfe977 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-for-v4.14-rc1' of git://people.freedesktop.org/~airlied/linux

Pull drm AMD fixes from Dave Airlie:
 "Just had a single AMD fixes pull from Alex for rc1"

* tag 'drm-fixes-for-v4.14-rc1' of git://people.freedesktop.org/~airlied/linux:
  drm/amdgpu: revert "fix deadlock of reservation between cs and gpu reset v2"
  drm/amdgpu: remove duplicate return statement
  drm/amdgpu: check memory allocation failure
  drm/amd/amdgpu: fix BANK_SELECT on Vega10 (v2)
  drm/amdgpu: inline amdgpu_ttm_do_bind again
  drm/amdgpu: fix amdgpu_ttm_bind
  drm/amdgpu: remove the GART copy hack
  drm/ttm:fix wrong decoding of bo_count
  drm/ttm: fix missing inc bo_count
  drm/amdgpu: set sched_hw_submission higher for KIQ (v3)
  drm/amdgpu: move default gart size setting into gmc modules
  drm/amdgpu: refine default gart size
  drm/amd/powerplay: ACG frequency added in PPTable
  drm/amdgpu: discard commands of killed processes
  drm/amdgpu: fix and cleanup shadow handling
  drm/amdgpu: add automatic per asic settings for gart_size
  drm/amdgpu/gfx8: fix spelling typo in mqd allocation
  drm/amd/powerplay: unhalt mec after loading
  drm/amdgpu/virtual_dce: Virtual display doesn't support disable vblank immediately
  drm/amdgpu: Fix huge page updates with CPU
parents bbe05e54 47e0cd6b
......@@ -76,7 +76,7 @@
extern int amdgpu_modeset;
extern int amdgpu_vram_limit;
extern int amdgpu_vis_vram_limit;
extern unsigned amdgpu_gart_size;
extern int amdgpu_gart_size;
extern int amdgpu_gtt_size;
extern int amdgpu_moverate;
extern int amdgpu_benchmarking;
......
......@@ -155,7 +155,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
{
return (struct kfd2kgd_calls *)&kfd2kgd;
return (struct kfd2kgd_calls *)&kfd2kgd;
}
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
......
......@@ -1079,6 +1079,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
GFP_KERNEL);
p->num_post_dep_syncobjs = 0;
if (!p->post_dep_syncobjs)
return -ENOMEM;
for (i = 0; i < num_deps; ++i) {
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
if (!p->post_dep_syncobjs[i])
......@@ -1150,7 +1153,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
amdgpu_cs_parser_fini(p, 0, true);
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
......@@ -1208,10 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
r = amdgpu_cs_submit(&parser, cs);
if (r)
goto out;
return 0;
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
return r;
......
......@@ -1062,11 +1062,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
}
if (amdgpu_gart_size < 32) {
if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
/* gart size must be greater or equal to 32M */
dev_warn(adev->dev, "gart size (%d) too small\n",
amdgpu_gart_size);
amdgpu_gart_size = 32;
amdgpu_gart_size = -1;
}
if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
......@@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
goto err;
}
r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
if (r) {
DRM_ERROR("%p bind failed\n", bo->shadow);
goto err;
}
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
NULL, fence, true);
if (r) {
......
......@@ -76,7 +76,7 @@
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
unsigned amdgpu_gart_size = 256;
int amdgpu_gart_size = -1; /* auto */
int amdgpu_gtt_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
int amdgpu_benchmarking = 0;
......@@ -128,7 +128,7 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)");
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
......
......@@ -56,18 +56,6 @@
* Common GART table functions.
*/
/**
* amdgpu_gart_set_defaults - set the default gart_size
*
* @adev: amdgpu_device pointer
*
* Set the default gart_size based on parameters and available VRAM.
*/
void amdgpu_gart_set_defaults(struct amdgpu_device *adev)
{
adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;
}
/**
* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
*
......
......@@ -56,7 +56,6 @@ struct amdgpu_gart {
const struct amdgpu_gart_funcs *gart_funcs;
};
void amdgpu_gart_set_defaults(struct amdgpu_device *adev);
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
......
......@@ -108,7 +108,7 @@ bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem)
*
* Allocate the address space for a node.
*/
int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
struct ttm_mem_reg *mem)
......@@ -143,12 +143,8 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
fpfn, lpfn, mode);
spin_unlock(&mgr->lock);
if (!r) {
if (!r)
mem->start = node->start;
if (&tbo->mem == mem)
tbo->offset = (tbo->mem.start << PAGE_SHIFT) +
tbo->bdev->man[tbo->mem.mem_type].gpu_offset;
}
return r;
}
......
......@@ -221,6 +221,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
spin_lock_init(&adev->irq.lock);
if (!adev->enable_virtual_display)
/* Disable vblank irqs aggressively for power-saving */
adev->ddev->vblank_disable_immediate = true;
......
......@@ -91,6 +91,9 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
if (flags & AMDGPU_GEM_CREATE_SHADOW)
places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
else
places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
......@@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (bo->shadow)
return 0;
bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
memset(&placements, 0,
(AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
amdgpu_ttm_placement_init(adev, &placement,
placements, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC);
memset(&placements, 0, sizeof(placements));
amdgpu_ttm_placement_init(adev, &placement, placements,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW);
r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC,
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW,
NULL, &placement,
bo->tbo.resv,
0,
......@@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
{
struct ttm_placement placement = {0};
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
int r;
memset(&placements, 0,
(AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
memset(&placements, 0, sizeof(placements));
amdgpu_ttm_placement_init(adev, &placement, placements,
domain, parent_flags);
amdgpu_ttm_placement_init(adev, &placement,
placements, domain, flags);
r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
domain, flags, sg, &placement,
resv, init_value, bo_ptr);
r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain,
parent_flags, sg, &placement, resv,
init_value, bo_ptr);
if (r)
return r;
if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
if (!resv) {
r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
WARN_ON(r != 0);
}
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
if (!resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
if (!resv)
ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
reservation_object_unlock((*bo_ptr)->tbo.resv);
if (r)
amdgpu_bo_unref(bo_ptr);
......
......@@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned irq_type)
{
int r;
int sched_hw_submission = amdgpu_sched_hw_submission;
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
* KFD and KGD which may have outstanding fences and
* it doesn't really use the gpu scheduler anyway;
* KIQ tasks get submitted directly to the ring.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
if (ring->adev == NULL) {
if (adev->num_rings >= AMDGPU_MAX_RINGS)
......@@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
r = amdgpu_fence_driver_init_ring(ring,
amdgpu_sched_hw_submission);
r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
if (r)
return r;
}
......@@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
ring->ring_size = roundup_pow_of_two(max_dw * 4 *
amdgpu_sched_hw_submission);
ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
......
......@@ -761,35 +761,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
}
static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
uint64_t flags;
int r;
spin_lock(&gtt->adev->gtt_list_lock);
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
gtt->offset = (u64)mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r) {
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
ttm->num_pages, gtt->offset);
goto error_gart_bind;
}
list_add_tail(&gtt->list, &gtt->adev->gtt_list);
error_gart_bind:
spin_unlock(&gtt->adev->gtt_list_lock);
return r;
}
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
struct amdgpu_ttm_tt *gtt = (void*)ttm;
uint64_t flags;
int r = 0;
if (gtt->userptr) {
......@@ -809,9 +785,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
if (amdgpu_gtt_mgr_is_allocated(bo_mem))
r = amdgpu_ttm_do_bind(ttm, bo_mem);
if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
return 0;
spin_lock(&gtt->adev->gtt_list_lock);
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r) {
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
ttm->num_pages, gtt->offset);
goto error_gart_bind;
}
list_add_tail(&gtt->list, &gtt->adev->gtt_list);
error_gart_bind:
spin_unlock(&gtt->adev->gtt_list_lock);
return r;
}
......@@ -824,20 +815,39 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_tt *ttm = bo->ttm;
struct ttm_mem_reg tmp;
struct ttm_placement placement;
struct ttm_place placements;
int r;
if (!ttm || amdgpu_ttm_is_bound(ttm))
return 0;
r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo,
NULL, bo_mem);
if (r) {
DRM_ERROR("Failed to allocate GTT address space (%d)\n", r);
tmp = bo->mem;
tmp.mm_node = NULL;
placement.num_placement = 1;
placement.placement = &placements;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
if (unlikely(r))
return r;
}
return amdgpu_ttm_do_bind(ttm, bo_mem);
r = ttm_bo_move_ttm(bo, true, false, &tmp);
if (unlikely(r))
ttm_bo_mem_put(bo, &tmp);
else
bo->offset = (bo->mem.start << PAGE_SHIFT) +
bo->bdev->man[bo->mem.mem_type].gpu_offset;
return r;
}
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
......
......@@ -62,10 +62,6 @@ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
struct ttm_mem_reg *mem);
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
......
......@@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
unsigned i;
int r;
if (parent->bo->shadow) {
struct amdgpu_bo *shadow = parent->bo->shadow;
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
return r;
}
if (use_cpu_for_update) {
r = amdgpu_bo_kmap(parent->bo, NULL);
if (r)
......@@ -1277,7 +1269,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
/* In the case of a mixed PT the PDE must point to it*/
if (p->adev->asic_type < CHIP_VEGA10 ||
nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
p->func == amdgpu_vm_do_copy_ptes ||
p->src ||
!(flags & AMDGPU_PTE_VALID)) {
dst = amdgpu_bo_gpu_offset(entry->bo);
......@@ -1294,9 +1286,23 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
entry->addr = (dst | flags);
if (use_cpu_update) {
/* In case a huge page is replaced with a system
* memory mapping, p->pages_addr != NULL and
* amdgpu_vm_cpu_set_ptes would try to translate dst
* through amdgpu_vm_map_gart. But dst is already a
* GPU address (of the page table). Disable
* amdgpu_vm_map_gart temporarily.
*/
dma_addr_t *tmp;
tmp = p->pages_addr;
p->pages_addr = NULL;
pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
pde = pd_addr + (entry - parent->entries) * 8;
amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
p->pages_addr = tmp;
} else {
if (parent->bo->shadow) {
pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
......@@ -1610,7 +1616,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @exclusive: fence we need to sync to
* @gtt_flags: flags as they are used for GTT
* @pages_addr: DMA addresses to use for mapping
* @vm: requested vm
* @mapping: mapped range and flags to use for the update
......@@ -1624,7 +1629,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
*/
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
uint64_t gtt_flags,
dma_addr_t *pages_addr,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
......@@ -1679,10 +1683,6 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
if (pages_addr) {
if (flags == gtt_flags)
src = adev->gart.table_addr +
(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
else
max_entries = min(max_entries, 16ull * 1024ull);
addr = 0;
} else if (flags & AMDGPU_PTE_VALID) {
......@@ -1728,10 +1728,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL;
uint64_t gtt_flags, flags;
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive;
uint64_t flags;
int r;
if (clear || !bo_va->base.bo) {
......@@ -1751,15 +1751,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
exclusive = reservation_object_get_excl(bo->tbo.resv);
}
if (bo) {
if (bo)
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
flags : 0;
} else {
else
flags = 0x0;
gtt_flags = ~0x0;
}
spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->base.vm_status))
......@@ -1767,8 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) {
r = amdgpu_vm_bo_split_mapping(adev, exclusive,
gtt_flags, pages_addr, vm,
r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
mapping, flags, nodes,
&bo_va->last_pt_update);
if (r)
......
......@@ -4579,9 +4579,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->compute_misc_reserved = 0x00000003;
if (!(adev->flags & AMD_IS_APU)) {
mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+ offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+ offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
}
eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
......@@ -4768,8 +4768,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
......@@ -4792,8 +4792,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
......
......@@ -124,7 +124,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp, field;
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
......@@ -143,9 +143,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = mmVM_L2_CNTL3_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
......
......@@ -332,7 +332,24 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
adev->mc.visible_vram_size = adev->mc.aper_size;
amdgpu_gart_set_defaults(adev);
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_HAINAN: /* no MM engines */
default:
adev->mc.gart_size = 256ULL << 20;
break;
case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
adev->mc.gart_size = 1024ULL << 20;
break;
}
} else {
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
}
gmc_v6_0_vram_gtt_location(adev, &adev->mc);
return 0;
......
......@@ -386,7 +386,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
amdgpu_gart_set_defaults(adev);
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_TOPAZ: /* no MM engines */
default:
adev->mc.gart_size = 256ULL << 20;
break;
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */
case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
adev->mc.gart_size = 1024ULL << 20;
break;
#endif
}
} else {
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
}
gmc_v7_0_vram_gtt_location(adev, &adev->mc);
return 0;
......
......@@ -562,7 +562,26 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
amdgpu_gart_set_defaults(adev);
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS10: /* all engines support GPUVM */
case CHIP_POLARIS12: /* all engines support GPUVM */
default:
adev->mc.gart_size = 256ULL << 20;
break;
case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
adev->mc.gart_size = 1024ULL << 20;
break;
}
} else {
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
}
gmc_v8_0_vram_gtt_location(adev, &adev->mc);
return 0;
......
......@@ -499,7 +499,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
amdgpu_gart_set_defaults(adev);
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_VEGA10: /* all engines support GPUVM */
default:
adev->mc.gart_size = 256ULL << 20;
break;
case CHIP_RAVEN: /* DCE SG support */
adev->mc.gart_size = 1024ULL << 20;
break;
}
} else {
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
}
gmc_v9_0_vram_gtt_location(adev, &adev->mc);
return 0;
......
......@@ -138,7 +138,7 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp, field;
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
......@@ -157,9 +157,8 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = mmVM_L2_CNTL3_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
......
......@@ -419,8 +419,8 @@ struct vi_mqd_allocation {
struct vi_mqd mqd;
uint32_t wptr_poll_mem;
uint32_t rptr_report_mem;
uint32_t dyamic_cu_mask;
uint32_t dyamic_rb_mask;
uint32_t dynamic_cu_mask;
uint32_t dynamic_rb_mask;
};
struct cz_mqd {
......
......@@ -1558,7 +1558,8 @@ static int vega10_populate_smc_link_levels(struct pp_hwmgr *hwmgr)
*/
static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
uint32_t gfx_clock, PllSetting_t *current_gfxclk_level)
uint32_t gfx_clock, PllSetting_t *current_gfxclk_level,
uint32_t *acg_freq)
{
struct phm_ppt_v2_information *table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
......@@ -1609,6 +1610,8 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
cpu_to_le16(dividers.usPll_ss_slew_frac);
current_gfxclk_level->Did = (uint8_t)(dividers.ulDid);
*acg_freq = gfx_clock / 100; /* 100 Khz to Mhz conversion */
return 0;
}
......@@ -1689,7 +1692,8 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
for (i = 0; i < dpm_table->count; i++) {
result = vega10_populate_single_gfx_level(hwmgr,
dpm_table->dpm_levels[i].value,
&(pp_table->GfxclkLevel[i]));
&(pp_table->GfxclkLevel[i]),
&(pp_table->AcgFreqTable[i]));
if (result)
return result;
}
......@@ -1698,7 +1702,8 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
while (i < NUM_GFXCLK_DPM_LEVELS) {
result = vega10_populate_single_gfx_level(hwmgr,
dpm_table->dpm_levels[j].value,
&(pp_table->GfxclkLevel[i]));
&(pp_table->GfxclkLevel[i]),
&(pp_table->AcgFreqTable[i]));
if (result)
return result;
i++;
......
......@@ -315,10 +315,12 @@ typedef struct {
uint8_t AcgEnable[NUM_GFXCLK_DPM_LEVELS];
GbVdroopTable_t AcgBtcGbVdroopTable;
QuadraticInt_t AcgAvfsGb;
uint32_t Reserved[4];
/* ACG Frequency Table, in Mhz */
uint32_t AcgFreqTable[NUM_GFXCLK_DPM_LEVELS];
/* Padding - ignore */
uint32_t MmHubPadding[7]; /* SMU internal use */
uint32_t MmHubPadding[3]; /* SMU internal use */
} PPTable_t;
......
......@@ -380,7 +380,8 @@ static int smu7_populate_single_firmware_entry(struct pp_smumgr *smumgr,
entry->num_register_entries = 0;
}
if (fw_type == UCODE_ID_RLC_G)
if ((fw_type == UCODE_ID_RLC_G)
|| (fw_type == UCODE_ID_CP_MEC))
entry->flags = 1;
else
entry->flags = 0;
......
......@@ -205,17 +205,32 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity)
{
struct amd_sched_rq *rq = entity->rq;
int r;
if (!amd_sched_entity_is_initialized(sched, entity))
return;
/**
* The client will not queue more IBs during this fini, consume existing
* queued IBs
* queued IBs or discard them on SIGKILL
*/
wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
r = -ERESTARTSYS;
else
r = wait_event_killable(sched->job_scheduled,
amd_sched_entity_is_idle(entity));
amd_sched_rq_remove_entity(rq, entity);
if (r) {
struct amd_sched_job *job;
/* Park the kernel for a moment to make sure it isn't processing
* our enity.
*/
kthread_park(sched->thread);
kthread_unpark(sched->thread);
while (kfifo_out(&entity->job_queue, &job, sizeof(job)))
sched->ops->free_job(job);
}
kfifo_free(&entity->job_queue);
}
......
......@@ -109,8 +109,8 @@ static ssize_t ttm_bo_global_show(struct kobject *kobj,
struct ttm_bo_global *glob =
container_of(kobj, struct ttm_bo_global, kobj);
return snprintf(buffer, PAGE_SIZE, "%lu\n",
(unsigned long) atomic_read(&glob->bo_count));
return snprintf(buffer, PAGE_SIZE, "%d\n",
atomic_read(&glob->bo_count));
}
static struct attribute *ttm_bo_global_attrs[] = {
......
......@@ -469,6 +469,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
* TODO: Explicit member copy would probably be better here.
*/
atomic_inc(&bo->glob->bo_count);
INIT_LIST_HEAD(&fbo->ddestroy);
INIT_LIST_HEAD(&fbo->lru);
INIT_LIST_HEAD(&fbo->swap);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment