Commit bd3c0094 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux into drm-next

Last few updates for 4.16:
- Misc fixes for amdgpu
- Enable swapout for reserved BOs during allocation for ttm
- Misc cleanups for ttm

* 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux: (24 commits)
  drm/amdgpu: Correct the IB size of bo update mapping.
  drm/ttm: enable swapout for reserved BOs during allocation
  drm/ttm: add new function to check if bo is allowable to evict or swapout
  drm/ttm: use an operation ctx for ttm_tt_bind
  drm/ttm: use an operation ctx for ttm_tt_populate in ttm_bo_driver (v2)
  drm/ttm: use an operation ctx for ttm_mem_global_alloc_page
  drm/ttm: use an operation ctx for ttm_mem_global_alloc
  drm/ttm: call ttm_bo_swapout directly when ttm shrink
  drm/vmwgfx: remove the default io_mem_pfn set
  drm/virtio: remove the default io_mem_pfn set
  drm/radeon: remove the default io_mem_pfn set
  drm/qxl: remove the default io_mem_pfn set
  drm/nouveau: remove the default io_mem_pfn set
  drm/mgag200: remove the default io_mem_pfn set
  drm/cirrus: remove the default io_mem_pfn set
  drm/bochs: remove the default io_mem_pfn set
  drm/ast: remove the default io_mem_pfn set
  drm/ttm: add ttm_bo_io_mem_pfn to check io_mem_pfn
  drm/amdgpu: fix VM faults with per VM BOs
  drm/ttm: drop the spin in delayed delete if the trylock doesn't work
  ...
parents b0caa133 104bd2ca
......@@ -52,7 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
amdgpu_ids.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
......
......@@ -351,7 +351,7 @@ struct amdgpu_gart_funcs {
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
uint32_t (*get_invalidate_req)(unsigned int vm_id);
uint32_t (*get_invalidate_req)(unsigned int vmid);
};
/* provided by the ih block */
......@@ -1125,7 +1125,7 @@ struct amdgpu_job {
void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush;
unsigned vm_id;
unsigned vmid;
uint64_t vm_pd_addr;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
......@@ -1850,7 +1850,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
......
......@@ -176,8 +176,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_vm_alloc_pasid,
.free_pasid = amdgpu_vm_free_pasid,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
......
......@@ -135,8 +135,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_vm_alloc_pasid,
.free_pasid = amdgpu_vm_free_pasid,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
......
......@@ -801,6 +801,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
else
strcpy(fw_name, "amdgpu/vega10_smc.bin");
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_RAVEN:
adev->pm.fw_version = info->version;
return 0;
......
......@@ -149,7 +149,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return -EINVAL;
}
if (vm && !job->vm_id) {
if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
......@@ -211,7 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
need_ctx_switch);
need_ctx_switch = false;
}
......@@ -229,9 +229,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
r = amdgpu_fence_emit(ring, f);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vm_id)
amdgpu_vm_reset_id(adev, ring->funcs->vmhub,
job->vm_id);
if (job && job->vmid)
amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid);
amdgpu_ring_undo(ring);
return r;
}
......
/*
* Copyright 2017 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu_ids.h"
#include <linux/idr.h>
#include <linux/dma-fence-array.h>
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
/*
* PASID manager
*
* PASIDs are global address space identifiers that can be shared
* between the GPU, an IOMMU and the driver. VMs on different devices
* may use the same PASID if they share the same address
* space. Therefore PASIDs are allocated using a global IDA. VMs are
* looked up from the PASID per amdgpu_device.
*/
static DEFINE_IDA(amdgpu_pasid_ida);
/**
* amdgpu_pasid_alloc - Allocate a PASID
* @bits: Maximum width of the PASID in bits, must be at least 1
*
* Allocates a PASID of the given width while keeping smaller PASIDs
* available if possible.
*
* Returns a positive integer on success. Returns %-EINVAL if bits==0.
* Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
* memory allocation failure.
*/
int amdgpu_pasid_alloc(unsigned int bits)
{
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
pasid = ida_simple_get(&amdgpu_pasid_ida,
1U << (bits - 1), 1U << bits,
GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
return pasid;
}
/**
* amdgpu_pasid_free - Free a PASID
* @pasid: PASID to free
*/
void amdgpu_pasid_free(unsigned int pasid)
{
ida_simple_remove(&amdgpu_pasid_ida, pasid);
}
/*
* VMID manager
*
* VMIDs are a per VMHUB identifier for page tables handling.
*/
/**
* amdgpu_vmid_had_gpu_reset - check if reset occured since last use
*
* @adev: amdgpu_device pointer
* @id: VMID structure
*
* Check if GPU reset occured since last use of the VMID.
*/
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id)
{
return id->current_gpu_reset_count !=
atomic_read(&adev->gpu_reset_counter);
}
/* idr_mgr->lock must be held */
static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
struct amdgpu_sync *sync,
struct dma_fence *fence,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
uint64_t fence_context = adev->fence_context + ring->idx;
struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct dma_fence *updates = sync->last_vm_update;
int r = 0;
struct dma_fence *flushed, *tmp;
bool needs_flush = vm->use_cpu_for_update;
flushed = id->flushed_updates;
if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
(atomic64_read(&id->owner) != vm->entity.fence_context) ||
(job->vm_pd_addr != id->pd_gpu_addr) ||
(updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) ||
(!id->last_flush || (id->last_flush->context != fence_context &&
!dma_fence_is_signaled(id->last_flush)))) {
needs_flush = true;
/* to prevent one context starved by another context */
id->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&id->active, ring);
if (tmp) {
r = amdgpu_sync_fence(adev, sync, tmp, false);
return r;
}
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto out;
if (updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) {
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
}
id->pd_gpu_addr = job->vm_pd_addr;
atomic64_set(&id->owner, vm->entity.fence_context);
job->vm_needs_flush = needs_flush;
if (needs_flush) {
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vmid = id - id_mgr->ids;
trace_amdgpu_vm_grab_id(vm, ring, job);
out:
return r;
}
/**
* amdgpu_vm_grab_id - allocate the next free VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @sync: sync object where we add dependencies
* @fence: fence protecting ID from reuse
*
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*/
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct dma_fence *fence,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
struct dma_fence *updates = sync->last_vm_update;
struct amdgpu_vmid *id, *idle;
struct dma_fence **fences;
unsigned i;
int r = 0;
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub]) {
r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
mutex_unlock(&id_mgr->lock);
return r;
}
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences) {
mutex_unlock(&id_mgr->lock);
return -ENOMEM;
}
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry(idle, &id_mgr->ids_lru, list) {
fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
if (!fences[i])
break;
++i;
}
/* If we can't find a idle VMID to use, wait till one becomes available */
if (&idle->list == &id_mgr->ids_lru) {
u64 fence_context = adev->vm_manager.fence_context + ring->idx;
unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
struct dma_fence_array *array;
unsigned j;
for (j = 0; j < i; ++j)
dma_fence_get(fences[j]);
array = dma_fence_array_create(i, fences, fence_context,
seqno, true);
if (!array) {
for (j = 0; j < i; ++j)
dma_fence_put(fences[j]);
kfree(fences);
r = -ENOMEM;
goto error;
}
r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
dma_fence_put(&array->base);
if (r)
goto error;
mutex_unlock(&id_mgr->lock);
return 0;
}
kfree(fences);
job->vm_needs_flush = vm->use_cpu_for_update;
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
struct dma_fence *flushed;
bool needs_flush = vm->use_cpu_for_update;
/* Check all the prerequisites to using this VMID */
if (amdgpu_vmid_had_gpu_reset(adev, id))
continue;
if (atomic64_read(&id->owner) != vm->entity.fence_context)
continue;
if (job->vm_pd_addr != id->pd_gpu_addr)
continue;
if (!id->last_flush ||
(id->last_flush->context != fence_context &&
!dma_fence_is_signaled(id->last_flush)))
needs_flush = true;
flushed = id->flushed_updates;
if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
needs_flush = true;
/* Concurrent flushes are only possible starting with Vega10 */
if (adev->asic_type < CHIP_VEGA10 && needs_flush)
continue;
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto error;
if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
}
if (needs_flush)
goto needs_flush;
else
goto no_flush_needed;
};
/* Still no ID to use? Then use the idle one found earlier */
id = idle;
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto error;
id->pd_gpu_addr = job->vm_pd_addr;
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
atomic64_set(&id->owner, vm->entity.fence_context);
needs_flush:
job->vm_needs_flush = true;
dma_fence_put(id->last_flush);
id->last_flush = NULL;
no_flush_needed:
list_move_tail(&id->list, &id_mgr->ids_lru);
job->vmid = id - id_mgr->ids;
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
mutex_unlock(&id_mgr->lock);
return r;
}
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr;
struct amdgpu_vmid *idle;
int r = 0;
id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
AMDGPU_VM_MAX_RESERVED_VMID) {
DRM_ERROR("Over limitation of reserved vmid\n");
atomic_dec(&id_mgr->reserved_vmid_num);
r = -EINVAL;
goto unlock;
}
/* Select the first entry VMID */
idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
list_del_init(&idle->list);
vm->reserved_vmid[vmhub] = idle;
mutex_unlock(&id_mgr->lock);
return 0;
unlock:
mutex_unlock(&id_mgr->lock);
return r;
}
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub]) {
list_add(&vm->reserved_vmid[vmhub]->list,
&id_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
atomic_dec(&id_mgr->reserved_vmid_num);
}
mutex_unlock(&id_mgr->lock);
}
/**
* amdgpu_vmid_reset - reset VMID to zero
*
* @adev: amdgpu device structure
* @vmid: vmid number to use
*
* Reset saved GDW, GWS and OA to force switch on next flush.
*/
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[vmid];
atomic64_set(&id->owner, 0);
id->gds_base = 0;
id->gds_size = 0;
id->gws_base = 0;
id->gws_size = 0;
id->oa_base = 0;
id->oa_size = 0;
}
/**
* amdgpu_vmid_reset_all - reset VMID to zero
*
* @adev: amdgpu device structure
*
* Reset VMID to force flush on next use
*/
void amdgpu_vmid_reset_all(struct amdgpu_device *adev)
{
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmid_mgr *id_mgr =
&adev->vm_manager.id_mgr[i];
for (j = 1; j < id_mgr->num_ids; ++j)
amdgpu_vmid_reset(adev, i, j);
}
}
/**
* amdgpu_vmid_mgr_init - init the VMID manager
*
* @adev: amdgpu_device pointer
*
* Initialize the VM manager structures
*/
void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
{
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmid_mgr *id_mgr =
&adev->vm_manager.id_mgr[i];
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
atomic_set(&id_mgr->reserved_vmid_num, 0);
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
amdgpu_vmid_reset(adev, i, j);
amdgpu_sync_create(&id_mgr->ids[i].active);
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
adev->vm_manager.fence_context =
dma_fence_context_alloc(AMDGPU_MAX_RINGS);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
adev->vm_manager.seqno[i] = 0;
}
/**
* amdgpu_vmid_mgr_fini - cleanup VM manager
*
* @adev: amdgpu_device pointer
*
* Cleanup the VM manager and free resources.
*/
void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
{
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmid_mgr *id_mgr =
&adev->vm_manager.id_mgr[i];
mutex_destroy(&id_mgr->lock);
for (j = 0; j < AMDGPU_NUM_VMID; ++j) {
struct amdgpu_vmid *id = &id_mgr->ids[j];
amdgpu_sync_free(&id->active);
dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
}
}
}
/*
* Copyright 2017 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_IDS_H__
#define __AMDGPU_IDS_H__
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/dma-fence.h>
#include "amdgpu_sync.h"
/* maximum number of VMIDs */
#define AMDGPU_NUM_VMID 16
struct amdgpu_device;
struct amdgpu_vm;
struct amdgpu_ring;
struct amdgpu_sync;
struct amdgpu_job;
struct amdgpu_vmid {
struct list_head list;
struct amdgpu_sync active;
struct dma_fence *last_flush;
atomic64_t owner;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct dma_fence *flushed_updates;
uint32_t current_gpu_reset_count;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_base;
uint32_t oa_size;
};
struct amdgpu_vmid_mgr {
struct mutex lock;
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
atomic_t reserved_vmid_num;
};
int amdgpu_pasid_alloc(unsigned int bits);
void amdgpu_pasid_free(unsigned int pasid);
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct dma_fence *fence,
struct amdgpu_job *job);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid);
void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
void amdgpu_vmid_mgr_init(struct amdgpu_device *adev);
void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev);
#endif
......@@ -105,8 +105,8 @@ struct amdgpu_iv_entry {
unsigned client_id;
unsigned src_id;
unsigned ring_id;
unsigned vm_id;
unsigned vm_id_src;
unsigned vmid;
unsigned vmid_src;
uint64_t timestamp;
unsigned timestamp_src;
unsigned pas_id;
......
......@@ -158,12 +158,12 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
}
}
while (fence == NULL && vm && !job->vm_id) {
while (fence == NULL && vm && !job->vmid) {
struct amdgpu_ring *ring = job->ring;
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->finished,
job);
r = amdgpu_vmid_grab(vm, ring, &job->sync,
&job->base.s_fence->finished,
job);
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
......
......@@ -121,11 +121,11 @@ struct amdgpu_ring_funcs {
/* command emit functions */
void (*emit_ib)(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch);
unsigned vmid, bool ctx_switch);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
void (*emit_hdp_flush)(struct amdgpu_ring *ring);
void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
......
......@@ -82,8 +82,8 @@ TRACE_EVENT(amdgpu_iv,
__field(unsigned, client_id)
__field(unsigned, src_id)
__field(unsigned, ring_id)
__field(unsigned, vm_id)
__field(unsigned, vm_id_src)
__field(unsigned, vmid)
__field(unsigned, vmid_src)
__field(uint64_t, timestamp)
__field(unsigned, timestamp_src)
__field(unsigned, pas_id)
......@@ -93,8 +93,8 @@ TRACE_EVENT(amdgpu_iv,
__entry->client_id = iv->client_id;
__entry->src_id = iv->src_id;
__entry->ring_id = iv->ring_id;
__entry->vm_id = iv->vm_id;
__entry->vm_id_src = iv->vm_id_src;
__entry->vmid = iv->vmid;
__entry->vmid_src = iv->vmid_src;
__entry->timestamp = iv->timestamp;
__entry->timestamp_src = iv->timestamp_src;
__entry->pas_id = iv->pas_id;
......@@ -103,9 +103,9 @@ TRACE_EVENT(amdgpu_iv,
__entry->src_data[2] = iv->src_data[2];
__entry->src_data[3] = iv->src_data[3];
),
TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
__entry->client_id, __entry->src_id,
__entry->ring_id, __entry->vm_id,
__entry->ring_id, __entry->vmid,
__entry->timestamp, __entry->pas_id,
__entry->src_data[0], __entry->src_data[1],
__entry->src_data[2], __entry->src_data[3])
......@@ -219,7 +219,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_STRUCT__entry(
__field(struct amdgpu_vm *, vm)
__field(u32, ring)
__field(u32, vm_id)
__field(u32, vmid)
__field(u32, vm_hub)
__field(u64, pd_addr)
__field(u32, needs_flush)
......@@ -228,13 +228,13 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->vm = vm;
__entry->ring = ring->idx;
__entry->vm_id = job->vm_id;
__entry->vmid = job->vmid;
__entry->vm_hub = ring->funcs->vmhub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
__entry->vm, __entry->ring, __entry->vm_id,
__entry->vm, __entry->ring, __entry->vmid,
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
);
......@@ -357,24 +357,24 @@ TRACE_EVENT(amdgpu_vm_copy_ptes,
);
TRACE_EVENT(amdgpu_vm_flush,
TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id,
TP_PROTO(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr),
TP_ARGS(ring, vm_id, pd_addr),
TP_ARGS(ring, vmid, pd_addr),
TP_STRUCT__entry(
__field(u32, ring)
__field(u32, vm_id)
__field(u32, vmid)
__field(u32, vm_hub)
__field(u64, pd_addr)
),
TP_fast_assign(
__entry->ring = ring->idx;
__entry->vm_id = vm_id;
__entry->vmid = vmid;
__entry->vm_hub = ring->funcs->vmhub;
__entry->pd_addr = pd_addr;
),
TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
__entry->ring, __entry->vm_id,
__entry->ring, __entry->vmid,
__entry->vm_hub,__entry->pd_addr)
);
......
......@@ -497,7 +497,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
goto out_cleanup;
}
r = ttm_tt_bind(bo->ttm, &tmp_mem);
r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
if (unlikely(r)) {
goto out_cleanup;
}
......@@ -990,7 +990,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
return &gtt->ttm.ttm;
}
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
......@@ -1018,11 +1019,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
return ttm_dma_populate(&gtt->ttm, adev->dev);
return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
}
#endif
return ttm_populate_and_map_pages(adev->dev, &gtt->ttm);
return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
}
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
......
......@@ -991,7 +991,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
*
*/
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch)
unsigned vmid, bool ctx_switch)
{
amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
......
......@@ -63,7 +63,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch);
unsigned vmid, bool ctx_switch);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
......
......@@ -33,52 +33,6 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
/*
* PASID manager
*
* PASIDs are global address space identifiers that can be shared
* between the GPU, an IOMMU and the driver. VMs on different devices
* may use the same PASID if they share the same address
* space. Therefore PASIDs are allocated using a global IDA. VMs are
* looked up from the PASID per amdgpu_device.
*/
static DEFINE_IDA(amdgpu_vm_pasid_ida);
/**
* amdgpu_vm_alloc_pasid - Allocate a PASID
* @bits: Maximum width of the PASID in bits, must be at least 1
*
* Allocates a PASID of the given width while keeping smaller PASIDs
* available if possible.
*
* Returns a positive integer on success. Returns %-EINVAL if bits==0.
* Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
* memory allocation failure.
*/
int amdgpu_vm_alloc_pasid(unsigned int bits)
{
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
1U << (bits - 1), 1U << bits,
GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
return pasid;
}
/**
* amdgpu_vm_free_pasid - Free a PASID
* @pasid: PASID to free
*/
void amdgpu_vm_free_pasid(unsigned int pasid)
{
ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
}
/*
* GPUVM
* GPUVM is similar to the legacy gart on older asics, however
......@@ -447,286 +401,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
adev->vm_manager.root_level);
}
/**
* amdgpu_vm_had_gpu_reset - check if reset occured since last use
*
* @adev: amdgpu_device pointer
* @id: VMID structure
*
* Check if GPU reset occured since last use of the VMID.
*/
static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vm_id *id)
{
return id->current_gpu_reset_count !=
atomic_read(&adev->gpu_reset_counter);
}
static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
{
return !!vm->reserved_vmid[vmhub];
}
/* idr_mgr->lock must be held */
static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
struct amdgpu_sync *sync,
struct dma_fence *fence,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
uint64_t fence_context = adev->fence_context + ring->idx;
struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct dma_fence *updates = sync->last_vm_update;
int r = 0;
struct dma_fence *flushed, *tmp;
bool needs_flush = vm->use_cpu_for_update;
flushed = id->flushed_updates;
if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
(atomic64_read(&id->owner) != vm->client_id) ||
(job->vm_pd_addr != id->pd_gpu_addr) ||
(updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) ||
(!id->last_flush || (id->last_flush->context != fence_context &&
!dma_fence_is_signaled(id->last_flush)))) {
needs_flush = true;
/* to prevent one context starved by another context */
id->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&id->active, ring);
if (tmp) {
r = amdgpu_sync_fence(adev, sync, tmp, false);
return r;
}
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto out;
if (updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) {
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
}
id->pd_gpu_addr = job->vm_pd_addr;
atomic64_set(&id->owner, vm->client_id);
job->vm_needs_flush = needs_flush;
if (needs_flush) {
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vm_id = id - id_mgr->ids;
trace_amdgpu_vm_grab_id(vm, ring, job);
out:
return r;
}
/**
* amdgpu_vm_grab_id - allocate the next free VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @sync: sync object where we add dependencies
* @fence: fence protecting ID from reuse
*
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*/
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct dma_fence *fence,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
struct dma_fence *updates = sync->last_vm_update;
struct amdgpu_vm_id *id, *idle;
struct dma_fence **fences;
unsigned i;
int r = 0;
mutex_lock(&id_mgr->lock);
if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
mutex_unlock(&id_mgr->lock);
return r;
}
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences) {
mutex_unlock(&id_mgr->lock);
return -ENOMEM;
}
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry(idle, &id_mgr->ids_lru, list) {
fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
if (!fences[i])
break;
++i;
}
/* If we can't find a idle VMID to use, wait till one becomes available */
if (&idle->list == &id_mgr->ids_lru) {
u64 fence_context = adev->vm_manager.fence_context + ring->idx;
unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
struct dma_fence_array *array;
unsigned j;
for (j = 0; j < i; ++j)
dma_fence_get(fences[j]);
array = dma_fence_array_create(i, fences, fence_context,
seqno, true);
if (!array) {
for (j = 0; j < i; ++j)
dma_fence_put(fences[j]);
kfree(fences);
r = -ENOMEM;
goto error;
}
r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
dma_fence_put(&array->base);
if (r)
goto error;
mutex_unlock(&id_mgr->lock);
return 0;
}
kfree(fences);
job->vm_needs_flush = vm->use_cpu_for_update;
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
struct dma_fence *flushed;
bool needs_flush = vm->use_cpu_for_update;
/* Check all the prerequisites to using this VMID */
if (amdgpu_vm_had_gpu_reset(adev, id))
continue;
if (atomic64_read(&id->owner) != vm->client_id)
continue;
if (job->vm_pd_addr != id->pd_gpu_addr)
continue;
if (!id->last_flush ||
(id->last_flush->context != fence_context &&
!dma_fence_is_signaled(id->last_flush)))
needs_flush = true;
flushed = id->flushed_updates;
if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
needs_flush = true;
/* Concurrent flushes are only possible starting with Vega10 */
if (adev->asic_type < CHIP_VEGA10 && needs_flush)
continue;
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto error;
if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
}
if (needs_flush)
goto needs_flush;
else
goto no_flush_needed;
};
/* Still no ID to use? Then use the idle one found earlier */
id = idle;
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
if (r)
goto error;
id->pd_gpu_addr = job->vm_pd_addr;
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
atomic64_set(&id->owner, vm->client_id);
needs_flush:
job->vm_needs_flush = true;
dma_fence_put(id->last_flush);
id->last_flush = NULL;
no_flush_needed:
list_move_tail(&id->list, &id_mgr->ids_lru);
job->vm_id = id - id_mgr->ids;
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
mutex_unlock(&id_mgr->lock);
return r;
}
static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub]) {
list_add(&vm->reserved_vmid[vmhub]->list,
&id_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
atomic_dec(&id_mgr->reserved_vmid_num);
}
mutex_unlock(&id_mgr->lock);
}
static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vm_id_manager *id_mgr;
struct amdgpu_vm_id *idle;
int r = 0;
id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
AMDGPU_VM_MAX_RESERVED_VMID) {
DRM_ERROR("Over limitation of reserved vmid\n");
atomic_dec(&id_mgr->reserved_vmid_num);
r = -EINVAL;
goto unlock;
}
/* Select the first entry VMID */
idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
list_del_init(&idle->list);
vm->reserved_vmid[vmhub] = idle;
mutex_unlock(&id_mgr->lock);
return 0;
unlock:
mutex_unlock(&id_mgr->lock);
return r;
}
/**
* amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
*
......@@ -767,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id;
bool gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
if (job->vm_id == 0)
if (job->vmid == 0)
return false;
id = &id_mgr->ids[job->vm_id];
id = &id_mgr->ids[job->vmid];
gds_switch_needed = ring->funcs->emit_gds_switch && (
id->gds_base != job->gds_base ||
id->gds_size != job->gds_size ||
......@@ -783,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
if (amdgpu_vm_had_gpu_reset(adev, id))
if (amdgpu_vmid_had_gpu_reset(adev, id))
return true;
return vm_flush_needed || gds_switch_needed;
......@@ -798,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
* amdgpu_vm_flush - hardware flush the vm
*
* @ring: ring to use for flush
* @vm_id: vmid number to use
* @vmid: vmid number to use
* @pd_addr: address of the page directory
*
* Emit a VM flush when it is necessary.
......@@ -807,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
bool gds_switch_needed = ring->funcs->emit_gds_switch && (
id->gds_base != job->gds_base ||
id->gds_size != job->gds_size ||
......@@ -820,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
unsigned patch_offset = 0;
int r;
if (amdgpu_vm_had_gpu_reset(adev, id)) {
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
vm_flush_needed = true;
}
......@@ -837,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
if (ring->funcs->emit_vm_flush && vm_flush_needed) {
struct dma_fence *fence;
trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
r = amdgpu_fence_emit(ring, &fence);
if (r)
......@@ -858,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
id->gws_size = job->gws_size;
id->oa_base = job->oa_base;
id->oa_size = job->oa_size;
amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
job->gds_size, job->gws_base,
job->gws_size, job->oa_base,
job->oa_size);
......@@ -875,49 +549,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
return 0;
}
/**
* amdgpu_vm_reset_id - reset VMID to zero
*
* @adev: amdgpu device structure
* @vm_id: vmid number to use
*
* Reset saved GDW, GWS and OA to force switch on next flush.
*/
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid)
{
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
atomic64_set(&id->owner, 0);
id->gds_base = 0;
id->gds_size = 0;
id->gws_base = 0;
id->gws_size = 0;
id->oa_base = 0;
id->oa_size = 0;
}
/**
* amdgpu_vm_reset_all_id - reset VMID to zero
*
* @adev: amdgpu device structure
*
* Reset VMID to force flush on next use
*/
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
{
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vm_id_manager *id_mgr =
&adev->vm_manager.id_mgr[i];
for (j = 1; j < id_mgr->num_ids; ++j)
amdgpu_vm_reset_id(adev, i, j);
}
}
/**
* amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
*
......@@ -1569,13 +1200,19 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
*
* The second command is for the shadow pagetables.
*/
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
if (vm->root.base.bo->shadow)
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
else
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
/* padding, etc. */
ndw = 64;
/* one PDE write for each huge page */
ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
if (vm->root.base.bo->shadow)
ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6 * 2;
else
ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
if (pages_addr) {
/* copy commands needed */
......@@ -2114,8 +1751,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
if (bo)
list_add_tail(&bo_va->base.bo_list, &bo->va);
if (!bo)
return bo_va;
list_add_tail(&bo_va->base.bo_list, &bo->va);
if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
return bo_va;
if (bo->preferred_domains &
amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
return bo_va;
/*
* We checked all the prerequisites, but it looks like this per VM BO
* is currently evicted. add the BO to the evicted list to make sure it
* is validated on next VM use to avoid fault.
* */
spin_lock(&vm->status_lock);
list_move_tail(&bo_va->base.vm_status, &vm->evicted);
spin_unlock(&vm->status_lock);
return bo_va;
}
......@@ -2625,7 +2280,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
uint64_t init_pde_value = 0;
vm->va = RB_ROOT_CACHED;
vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
spin_lock_init(&vm->status_lock);
......@@ -2819,7 +2473,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&root);
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vm_free_reserved_vmid(adev, vm, i);
amdgpu_vmid_free_reserved(adev, vm, i);
}
/**
......@@ -2861,23 +2515,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vm_id_manager *id_mgr =
&adev->vm_manager.id_mgr[i];
unsigned i;
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
atomic_set(&id_mgr->reserved_vmid_num, 0);
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
amdgpu_vm_reset_id(adev, i, j);
amdgpu_sync_create(&id_mgr->ids[i].active);
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
amdgpu_vmid_mgr_init(adev);
adev->vm_manager.fence_context =
dma_fence_context_alloc(AMDGPU_MAX_RINGS);
......@@ -2885,7 +2525,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
adev->vm_manager.seqno[i] = 0;
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
atomic64_set(&adev->vm_manager.client_counter, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
......@@ -2918,24 +2557,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
{
unsigned i, j;
WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
idr_destroy(&adev->vm_manager.pasid_idr);
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vm_id_manager *id_mgr =
&adev->vm_manager.id_mgr[i];
mutex_destroy(&id_mgr->lock);
for (j = 0; j < AMDGPU_NUM_VM; ++j) {
struct amdgpu_vm_id *id = &id_mgr->ids[j];
amdgpu_sync_free(&id->active);
dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
}
}
amdgpu_vmid_mgr_fini(adev);
}
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
......@@ -2948,13 +2573,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* current, we only have requirement to reserve vmid from gfxhub */
r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
AMDGPU_GFXHUB);
r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
if (r)
return r;
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
break;
default:
return -EINVAL;
......
......@@ -31,6 +31,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
struct amdgpu_bo_va;
struct amdgpu_job;
......@@ -40,9 +41,6 @@ struct amdgpu_bo_list_entry;
* GPUVM handling
*/
/* maximum number of VMIDs */
#define AMDGPU_NUM_VM 16
/* Maximum number of PTEs the hardware can write with one command */
#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
......@@ -193,11 +191,9 @@ struct amdgpu_vm {
/* Scheduler entity for page table updates */
struct drm_sched_entity entity;
/* client id and PASID (TODO: replace client_id with PASID) */
u64 client_id;
unsigned int pasid;
/* dedicated to vm */
struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
......@@ -212,37 +208,9 @@ struct amdgpu_vm {
unsigned int fault_credit;
};
struct amdgpu_vm_id {
struct list_head list;
struct amdgpu_sync active;
struct dma_fence *last_flush;
atomic64_t owner;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct dma_fence *flushed_updates;
uint32_t current_gpu_reset_count;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_base;
uint32_t oa_size;
};
struct amdgpu_vm_id_manager {
struct mutex lock;
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
atomic_t reserved_vmid_num;
};
struct amdgpu_vm_manager {
/* Handling of VMIDs */
struct amdgpu_vm_id_manager id_mgr[AMDGPU_MAX_VMHUBS];
struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
/* Handling of VM fences */
u64 fence_context;
......@@ -260,8 +228,6 @@ struct amdgpu_vm_manager {
struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rings;
atomic_t vm_pte_next_ring;
/* client id counter */
atomic64_t client_counter;
/* partial resident texture handling */
spinlock_t prt_lock;
......@@ -280,8 +246,6 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock;
};
int amdgpu_vm_alloc_pasid(unsigned int bits);
void amdgpu_vm_free_pasid(unsigned int pasid);
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
......@@ -299,13 +263,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct dma_fence *fence,
struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid);
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
......
......@@ -280,7 +280,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vm_id = (dw[2] >> 8) & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
entry->pas_id = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
......
......@@ -221,9 +221,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch)
unsigned vmid, bool ctx_switch)
{
u32 extra_bits = vm_id & 0xf;
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
......@@ -880,23 +880,23 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
* using sDMA (CIK).
*/
static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
unsigned vmid, uint64_t pd_addr)
{
u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
if (vm_id < 8) {
amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
if (vmid < 8) {
amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
} else {
amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
}
amdgpu_ring_write(ring, pd_addr >> 12);
/* flush TLB */
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
amdgpu_ring_write(ring, 1 << vm_id);
amdgpu_ring_write(ring, 1 << vmid);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
......
......@@ -259,7 +259,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vm_id = (dw[2] >> 8) & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
entry->pas_id = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment