Commit f1b79965 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-msm-next-2021-07-28' of https://gitlab.freedesktop.org/drm/msm into drm-next

An early pull for v5.15 (there'll be more coming in a week or two),
consisting of the drm/scheduler conversion and a couple other small
series that one was based one.  Mostly sending this now because IIUC
danvet wanted it in drm-next so he could rebase on it.  (Daniel, if
you disagree then speak up, and I'll instead include this in the main
pull request once that is ready.)

This also has a core patch to drop drm_gem_object_put_locked() now
that the last use of it is removed.

[airlied: add NULL to drm_sched_init]
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGumRk7H88bqV=H9Fb1SM0zPBo5B7NsCU3jFFKBYxf5k+Q@mail.gmail.com
parents cfeeb0b5 4541e4f2
......@@ -973,28 +973,6 @@ drm_gem_object_free(struct kref *kref)
}
EXPORT_SYMBOL(drm_gem_object_free);
/**
* drm_gem_object_put_locked - release a GEM buffer object reference
* @obj: GEM buffer object
*
* This releases a reference to @obj. Callers must hold the
* &drm_device.struct_mutex lock when calling this function, even when the
* driver doesn't use &drm_device.struct_mutex for anything.
*
* For drivers not encumbered with legacy locking use
* drm_gem_object_put() instead.
*/
void
drm_gem_object_put_locked(struct drm_gem_object *obj)
{
if (obj) {
WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
kref_put(&obj->refcount, drm_gem_object_free);
}
}
EXPORT_SYMBOL(drm_gem_object_put_locked);
/**
* drm_gem_vm_open - vma->ops->open implementation for GEM
* @vma: VM area structure
......
......@@ -14,6 +14,7 @@ config DRM_MSM
select REGULATOR
select DRM_KMS_HELPER
select DRM_PANEL
select DRM_SCHED
select SHMEM
select TMPFS
select QCOM_SCM if ARCH_QCOM
......
......@@ -90,6 +90,7 @@ msm-y := \
msm_gem_submit.o \
msm_gem_vma.o \
msm_gpu.o \
msm_gpu_devfreq.o \
msm_iommu.o \
msm_perf.o \
msm_rd.o \
......
......@@ -117,13 +117,13 @@ reset_set(void *data, u64 val)
if (a5xx_gpu->pm4_bo) {
msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
drm_gem_object_put_locked(a5xx_gpu->pm4_bo);
drm_gem_object_put(a5xx_gpu->pm4_bo);
a5xx_gpu->pm4_bo = NULL;
}
if (a5xx_gpu->pfp_bo) {
msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
drm_gem_object_put_locked(a5xx_gpu->pfp_bo);
drm_gem_object_put(a5xx_gpu->pfp_bo);
a5xx_gpu->pfp_bo = NULL;
}
......
......@@ -1415,7 +1415,7 @@ struct a5xx_gpu_state {
static int a5xx_crashdumper_init(struct msm_gpu *gpu,
struct a5xx_crashdumper *dumper)
{
dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
dumper->ptr = msm_gem_kernel_new(gpu->dev,
SZ_1M, MSM_BO_WC, gpu->aspace,
&dumper->bo, &dumper->iova);
......@@ -1517,7 +1517,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
if (a5xx_crashdumper_run(gpu, &dumper)) {
kfree(a5xx_state->hlsqregs);
msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
msm_gem_kernel_put(dumper.bo, gpu->aspace);
return;
}
......@@ -1525,7 +1525,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
count * sizeof(u32));
msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
msm_gem_kernel_put(dumper.bo, gpu->aspace);
}
static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
......
......@@ -362,7 +362,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
*/
bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2;
ptr = msm_gem_kernel_new_locked(drm, bosize,
ptr = msm_gem_kernel_new(drm, bosize,
MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace,
&a5xx_gpu->gpmu_bo, &a5xx_gpu->gpmu_iova);
if (IS_ERR(ptr))
......
......@@ -240,7 +240,7 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
A5XX_PREEMPT_COUNTER_SIZE,
MSM_BO_WC, gpu->aspace, &counters_bo, &counters_iova);
if (IS_ERR(counters)) {
msm_gem_kernel_put(bo, gpu->aspace, true);
msm_gem_kernel_put(bo, gpu->aspace);
return PTR_ERR(counters);
}
......@@ -272,9 +272,8 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
int i;
for (i = 0; i < gpu->nr_rings; i++) {
msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true);
msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i],
gpu->aspace, true);
msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace);
msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], gpu->aspace);
}
}
......
......@@ -1129,12 +1129,12 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu)
{
msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->debug.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->icache.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->log.obj, gmu->aspace, false);
msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace);
msm_gem_kernel_put(gmu->debug.obj, gmu->aspace);
msm_gem_kernel_put(gmu->icache.obj, gmu->aspace);
msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace);
msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace);
msm_gem_kernel_put(gmu->log.obj, gmu->aspace);
gmu->aspace->mmu->funcs->detach(gmu->aspace->mmu);
msm_gem_address_space_put(gmu->aspace);
......
......@@ -1035,7 +1035,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
if (!a6xx_gpu->shadow_bo) {
a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev,
a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
sizeof(u32) * gpu->nr_rings,
MSM_BO_WC | MSM_BO_MAP_PRIV,
gpu->aspace, &a6xx_gpu->shadow_bo,
......@@ -1477,7 +1477,7 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
if (ret)
return ret;
msm_gpu_resume_devfreq(gpu);
msm_devfreq_resume(gpu);
a6xx_llc_activate(a6xx_gpu);
......@@ -1494,7 +1494,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
a6xx_llc_deactivate(a6xx_gpu);
devfreq_suspend_device(gpu->devfreq.devfreq);
msm_devfreq_suspend(gpu);
ret = a6xx_gmu_stop(a6xx_gpu);
if (ret)
......
......@@ -112,7 +112,7 @@ static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
static int a6xx_crashdumper_init(struct msm_gpu *gpu,
struct a6xx_crashdumper *dumper)
{
dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
dumper->ptr = msm_gem_kernel_new(gpu->dev,
SZ_1M, MSM_BO_WC, gpu->aspace,
&dumper->bo, &dumper->iova);
......@@ -961,7 +961,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
a6xx_get_clusters(gpu, a6xx_state, dumper);
a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
msm_gem_kernel_put(dumper->bo, gpu->aspace, true);
msm_gem_kernel_put(dumper->bo, gpu->aspace);
}
if (snapshot_debugbus)
......
......@@ -261,8 +261,8 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
return ret;
}
return -EINVAL;
case MSM_PARAM_NR_RINGS:
*value = gpu->nr_rings;
case MSM_PARAM_PRIORITIES:
*value = gpu->nr_rings * NR_SCHED_PRIORITIES;
return 0;
case MSM_PARAM_PP_PGTABLE:
*value = 0;
......@@ -390,7 +390,7 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu,
struct drm_gem_object *bo;
void *ptr;
ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4,
MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
if (IS_ERR(ptr))
......
......@@ -911,6 +911,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
ktime_t timeout = to_ktime(args->timeout);
struct msm_gpu_submitqueue *queue;
struct msm_gpu *gpu = priv->gpu;
struct dma_fence *fence;
int ret;
if (args->pad) {
......@@ -925,10 +926,35 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
if (!queue)
return -ENOENT;
ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout,
true);
/*
* Map submitqueue scoped "seqno" (which is actually an idr key)
* back to underlying dma-fence
*
* The fence is removed from the fence_idr when the submit is
* retired, so if the fence is not found it means there is nothing
* to wait for
*/
ret = mutex_lock_interruptible(&queue->lock);
if (ret)
return ret;
fence = idr_find(&queue->fence_idr, args->fence);
if (fence)
fence = dma_fence_get_rcu(fence);
mutex_unlock(&queue->lock);
if (!fence)
return 0;
ret = dma_fence_wait_timeout(fence, true, timeout_to_jiffies(&timeout));
if (ret == 0) {
ret = -ETIMEDOUT;
} else if (ret != -ERESTARTSYS) {
ret = 0;
}
dma_fence_put(fence);
msm_submitqueue_put(queue);
return ret;
}
......
......@@ -11,7 +11,8 @@
struct msm_fence_context *
msm_fence_context_alloc(struct drm_device *dev, const char *name)
msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr,
const char *name)
{
struct msm_fence_context *fctx;
......@@ -22,7 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name)
fctx->dev = dev;
strncpy(fctx->name, name, sizeof(fctx->name));
fctx->context = dma_fence_context_alloc(1);
init_waitqueue_head(&fctx->event);
fctx->fenceptr = fenceptr;
spin_lock_init(&fctx->spinlock);
return fctx;
......@@ -35,46 +36,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx)
static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence)
{
return (int32_t)(fctx->completed_fence - fence) >= 0;
}
/* legacy path for WAIT_FENCE ioctl: */
int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
ktime_t *timeout, bool interruptible)
{
int ret;
if (fence > fctx->last_fence) {
DRM_ERROR_RATELIMITED("%s: waiting on invalid fence: %u (of %u)\n",
fctx->name, fence, fctx->last_fence);
return -EINVAL;
}
if (!timeout) {
/* no-wait: */
ret = fence_completed(fctx, fence) ? 0 : -EBUSY;
} else {
unsigned long remaining_jiffies = timeout_to_jiffies(timeout);
if (interruptible)
ret = wait_event_interruptible_timeout(fctx->event,
fence_completed(fctx, fence),
remaining_jiffies);
else
ret = wait_event_timeout(fctx->event,
fence_completed(fctx, fence),
remaining_jiffies);
if (ret == 0) {
DBG("timeout waiting for fence: %u (completed: %u)",
fence, fctx->completed_fence);
ret = -ETIMEDOUT;
} else if (ret != -ERESTARTSYS) {
ret = 0;
}
}
return ret;
/*
* Note: Check completed_fence first, as fenceptr is in a write-combine
* mapping, so it will be more expensive to read.
*/
return (int32_t)(fctx->completed_fence - fence) >= 0 ||
(int32_t)(*fctx->fenceptr - fence) >= 0;
}
/* called from workqueue */
......@@ -83,8 +50,6 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence)
spin_lock(&fctx->spinlock);
fctx->completed_fence = max(fence, fctx->completed_fence);
spin_unlock(&fctx->spinlock);
wake_up_all(&fctx->event);
}
struct msm_fence {
......
......@@ -9,23 +9,53 @@
#include "msm_drv.h"
/**
* struct msm_fence_context - fence context for gpu
*
* Each ringbuffer has a single fence context, with the GPU writing an
* incrementing fence seqno at the end of each submit
*/
struct msm_fence_context {
struct drm_device *dev;
/** name: human readable name for fence timeline */
char name[32];
/** context: see dma_fence_context_alloc() */
unsigned context;
/* last_fence == completed_fence --> no pending work */
uint32_t last_fence; /* last assigned fence */
uint32_t completed_fence; /* last completed fence */
wait_queue_head_t event;
/**
* last_fence:
*
* Last assigned fence, incremented each time a fence is created
* on this fence context. If last_fence == completed_fence,
* there is no remaining pending work
*/
uint32_t last_fence;
/**
* completed_fence:
*
* The last completed fence, updated from the CPU after interrupt
* from GPU
*/
uint32_t completed_fence;
/**
* fenceptr:
*
* The address that the GPU directly writes with completed fence
* seqno. This can be ahead of completed_fence. We can peek at
* this to see if a fence has already signaled but the CPU hasn't
* gotten around to handling the irq and updating completed_fence
*/
volatile uint32_t *fenceptr;
spinlock_t spinlock;
};
struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev,
const char *name);
volatile uint32_t *fenceptr, const char *name);
void msm_fence_context_free(struct msm_fence_context *fctx);
int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
ktime_t *timeout, bool interruptible);
void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence);
struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx);
......
......@@ -131,7 +131,6 @@ static struct page **get_pages(struct drm_gem_object *obj)
if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
sync_for_device(msm_obj);
GEM_WARN_ON(msm_obj->active_count);
update_inactive(msm_obj);
}
......@@ -804,39 +803,6 @@ void msm_gem_vunmap(struct drm_gem_object *obj)
msm_obj->vaddr = NULL;
}
/* must be called before _move_to_active().. */
int msm_gem_sync_object(struct drm_gem_object *obj,
struct msm_fence_context *fctx, bool exclusive)
{
struct dma_resv_list *fobj;
struct dma_fence *fence;
int i, ret;
fence = dma_resv_excl_fence(obj->resv);
/* don't need to wait on our own fences, since ring is fifo */
if (fence && (fence->context != fctx->context)) {
ret = dma_fence_wait(fence, true);
if (ret)
return ret;
}
fobj = dma_resv_shared_list(obj->resv);
if (!exclusive || !fobj)
return 0;
for (i = 0; i < fobj->shared_count; i++) {
fence = rcu_dereference_protected(fobj->shared[i],
dma_resv_held(obj->resv));
if (fence->context != fctx->context) {
ret = dma_fence_wait(fence, true);
if (ret)
return ret;
}
}
return 0;
}
void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu)
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
......@@ -846,7 +812,6 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu)
GEM_WARN_ON(!msm_gem_is_locked(obj));
GEM_WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
GEM_WARN_ON(msm_obj->dontneed);
GEM_WARN_ON(!msm_obj->sgt);
if (msm_obj->active_count++ == 0) {
mutex_lock(&priv->mm_lock);
......@@ -1060,7 +1025,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
}
#endif
/* don't call directly! Use drm_gem_object_put_locked() and friends */
/* don't call directly! Use drm_gem_object_put() */
void msm_gem_free_object(struct drm_gem_object *obj)
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
......@@ -1181,7 +1146,6 @@ static int msm_gem_new_impl(struct drm_device *dev,
msm_obj->flags = flags;
msm_obj->madv = MSM_MADV_WILLNEED;
INIT_LIST_HEAD(&msm_obj->submit_entry);
INIT_LIST_HEAD(&msm_obj->vmas);
*obj = &msm_obj->base;
......@@ -1190,8 +1154,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
return 0;
}
static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
uint32_t size, uint32_t flags, bool struct_mutex_locked)
struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags)
{
struct msm_drm_private *priv = dev->dev_private;
struct msm_gem_object *msm_obj;
......@@ -1278,26 +1241,10 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
return obj;
fail:
if (struct_mutex_locked) {
drm_gem_object_put_locked(obj);
} else {
drm_gem_object_put(obj);
}
return ERR_PTR(ret);
}
struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev,
uint32_t size, uint32_t flags)
{
return _msm_gem_new(dev, size, flags, true);
}
struct drm_gem_object *msm_gem_new(struct drm_device *dev,
uint32_t size, uint32_t flags)
{
return _msm_gem_new(dev, size, flags, false);
}
struct drm_gem_object *msm_gem_import(struct drm_device *dev,
struct dma_buf *dmabuf, struct sg_table *sgt)
{
......@@ -1356,12 +1303,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
return ERR_PTR(ret);
}
static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size,
void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size,
uint32_t flags, struct msm_gem_address_space *aspace,
struct drm_gem_object **bo, uint64_t *iova, bool locked)
struct drm_gem_object **bo, uint64_t *iova)
{
void *vaddr;
struct drm_gem_object *obj = _msm_gem_new(dev, size, flags, locked);
struct drm_gem_object *obj = msm_gem_new(dev, size, flags);
int ret;
if (IS_ERR(obj))
......@@ -1385,41 +1332,20 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size,
return vaddr;
err:
if (locked)
drm_gem_object_put_locked(obj);
else
drm_gem_object_put(obj);
return ERR_PTR(ret);
}
void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size,
uint32_t flags, struct msm_gem_address_space *aspace,
struct drm_gem_object **bo, uint64_t *iova)
{
return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, false);
}
void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size,
uint32_t flags, struct msm_gem_address_space *aspace,
struct drm_gem_object **bo, uint64_t *iova)
{
return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, true);
}
void msm_gem_kernel_put(struct drm_gem_object *bo,
struct msm_gem_address_space *aspace, bool locked)
struct msm_gem_address_space *aspace)
{
if (IS_ERR_OR_NULL(bo))
return;
msm_gem_put_vaddr(bo);
msm_gem_unpin_iova(bo, aspace);
if (locked)
drm_gem_object_put_locked(bo);
else
drm_gem_object_put(bo);
}
......
......@@ -9,6 +9,7 @@
#include <linux/kref.h>
#include <linux/dma-resv.h>
#include "drm/gpu_scheduler.h"
#include "msm_drv.h"
/* Make all GEM related WARN_ON()s ratelimited.. when things go wrong they
......@@ -87,13 +88,6 @@ struct msm_gem_object {
*/
struct list_head mm_list;
/* Transiently in the process of submit ioctl, objects associated
* with the submit are on submit->bo_list.. this only lasts for
* the duration of the ioctl, so one bo can never be on multiple
* submit lists.
*/
struct list_head submit_entry;
struct page **pages;
struct sg_table *sgt;
void *vaddr;
......@@ -143,8 +137,6 @@ void *msm_gem_get_vaddr_active(struct drm_gem_object *obj);
void msm_gem_put_vaddr_locked(struct drm_gem_object *obj);
void msm_gem_put_vaddr(struct drm_gem_object *obj);
int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
int msm_gem_sync_object(struct drm_gem_object *obj,
struct msm_fence_context *fctx, bool exclusive);
void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu);
void msm_gem_active_put(struct drm_gem_object *obj);
int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout);
......@@ -154,16 +146,11 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
uint32_t size, uint32_t flags, uint32_t *handle, char *name);
struct drm_gem_object *msm_gem_new(struct drm_device *dev,
uint32_t size, uint32_t flags);
struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev,
uint32_t size, uint32_t flags);
void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size,
uint32_t flags, struct msm_gem_address_space *aspace,
struct drm_gem_object **bo, uint64_t *iova);
void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size,
uint32_t flags, struct msm_gem_address_space *aspace,
struct drm_gem_object **bo, uint64_t *iova);
void msm_gem_kernel_put(struct drm_gem_object *bo,
struct msm_gem_address_space *aspace, bool locked);
struct msm_gem_address_space *aspace);
struct drm_gem_object *msm_gem_import(struct drm_device *dev,
struct dma_buf *dmabuf, struct sg_table *sgt);
__printf(2, 3)
......@@ -313,19 +300,34 @@ void msm_gem_vunmap(struct drm_gem_object *obj);
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
* associated with the cmdstream submission for synchronization (and
* make it easier to unwind when things go wrong, etc). This only
* lasts for the duration of the submit-ioctl.
* make it easier to unwind when things go wrong, etc).
*/
struct msm_gem_submit {
struct drm_sched_job base;
struct kref ref;
struct drm_device *dev;
struct msm_gpu *gpu;
struct msm_gem_address_space *aspace;
struct list_head node; /* node in ring submit list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
uint32_t seqno; /* Sequence number of the submit on the ring */
struct dma_fence *fence;
/* Array of struct dma_fence * to block on before submitting this job.
*/
struct xarray deps;
unsigned long last_dep;
/* Hw fence, which is created when the scheduler executes the job, and
* is signaled when the hw finishes (via seqno write from cmdstream)
*/
struct dma_fence *hw_fence;
/* Userspace visible fence, which is signaled by the scheduler after
* the hw_fence is signaled.
*/
struct dma_fence *user_fence;
int fence_id; /* key into queue->fence_idr */
struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */
bool fault_dumped; /* Limit devcoredump dumping to one per submit */
......@@ -355,6 +357,11 @@ struct msm_gem_submit {
} bos[];
};
static inline struct msm_gem_submit *to_msm_submit(struct drm_sched_job *job)
{
return container_of(job, struct msm_gem_submit, base);
}
void __msm_gem_submit_destroy(struct kref *kref);
static inline void msm_gem_submit_get(struct msm_gem_submit *submit)
......@@ -367,6 +374,8 @@ static inline void msm_gem_submit_put(struct msm_gem_submit *submit)
kref_put(&submit->ref, __msm_gem_submit_destroy);
}
void msm_submit_retire(struct msm_gem_submit *submit);
/* helper to determine of a buffer in submit should be dumped, used for both
* devcoredump and debugfs cmdstream dumping:
*/
......
This diff is collapsed.
This diff is collapsed.
......@@ -80,6 +80,40 @@ struct msm_gpu_fault_info {
const char *block;
};
/**
* struct msm_gpu_devfreq - devfreq related state
*/
struct msm_gpu_devfreq {
/** devfreq: devfreq instance */
struct devfreq *devfreq;
/**
* busy_cycles:
*
* Used by implementation of gpu->gpu_busy() to track the last
* busy counter value, for calculating elapsed busy cycles since
* last sampling period.
*/
u64 busy_cycles;
/** time: Time of last sampling period. */
ktime_t time;
/** idle_time: Time of last transition to idle: */
ktime_t idle_time;
/**
* idle_freq:
*
* Shadow frequency used while the GPU is idle. From the PoV of
* the devfreq governor, we are continuing to sample busyness and
* adjust frequency while the GPU is idle, but we use this shadow
* value as the GPU is actually clamped to minimum frequency while
* it is inactive.
*/
unsigned long idle_freq;
};
struct msm_gpu {
const char *name;
struct drm_device *dev;
......@@ -109,6 +143,19 @@ struct msm_gpu {
*/
struct list_head active_list;
/**
* active_submits:
*
* The number of submitted but not yet retired submits, used to
* determine transitions between active and idle.
*
* Protected by lock
*/
int active_submits;
/** lock: protects active_submits and idle/active transitions */
struct mutex active_lock;
/* does gpu need hw_init? */
bool needs_hw_init;
......@@ -151,11 +198,7 @@ struct msm_gpu {
struct drm_gem_object *memptrs_bo;
struct {
struct devfreq *devfreq;
u64 busy_cycles;
ktime_t time;
} devfreq;
struct msm_gpu_devfreq devfreq;
uint32_t suspend_count;
......@@ -207,14 +250,90 @@ struct msm_gpu_perfcntr {
const char *name;
};
/*
* The number of priority levels provided by drm gpu scheduler. The
* DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
* cases, so we don't use it (no need for kernel generated jobs).
*/
#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
/**
* msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
*
* @gpu: the gpu instance
* @prio: the userspace priority level
* @ring_nr: [out] the ringbuffer the userspace priority maps to
* @sched_prio: [out] the gpu scheduler priority level which the userspace
* priority maps to
*
* With drm/scheduler providing it's own level of prioritization, our total
* number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
* Each ring is associated with it's own scheduler instance. However, our
* UABI is that lower numerical values are higher priority. So mapping the
* single userspace priority level into ring_nr and sched_prio takes some
* care. The userspace provided priority (when a submitqueue is created)
* is mapped to ring nr and scheduler priority as such:
*
* ring_nr = userspace_prio / NR_SCHED_PRIORITIES
* sched_prio = NR_SCHED_PRIORITIES -
* (userspace_prio % NR_SCHED_PRIORITIES) - 1
*
* This allows generations without preemption (nr_rings==1) to have some
* amount of prioritization, and provides more priority levels for gens
* that do have preemption.
*/
static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
unsigned *ring_nr, enum drm_sched_priority *sched_prio)
{
unsigned rn, sp;
rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
/* invert sched priority to map to higher-numeric-is-higher-
* priority convention
*/
sp = NR_SCHED_PRIORITIES - sp - 1;
if (rn >= gpu->nr_rings)
return -EINVAL;
*ring_nr = rn;
*sched_prio = sp;
return 0;
}
/**
* A submitqueue is associated with a gl context or vk queue (or equiv)
* in userspace.
*
* @id: userspace id for the submitqueue, unique within the drm_file
* @flags: userspace flags for the submitqueue, specified at creation
* (currently unusued)
* @ring_nr: the ringbuffer used by this submitqueue, which is determined
* by the submitqueue's priority
* @faults: the number of GPU hangs associated with this submitqueue
* @ctx: the per-drm_file context associated with the submitqueue (ie.
* which set of pgtables do submits jobs associated with the
* submitqueue use)
* @node: node in the context's list of submitqueues
* @fence_idr: maps fence-id to dma_fence for userspace visible fence
* seqno, protected by submitqueue lock
* @lock: submitqueue lock
* @ref: reference count
* @entity: the submit job-queue
*/
struct msm_gpu_submitqueue {
int id;
u32 flags;
u32 prio;
u32 ring_nr;
int faults;
struct msm_file_private *ctx;
struct list_head node;
struct idr fence_idr;
struct mutex lock;
struct kref ref;
struct drm_sched_entity entity;
};
struct msm_gpu_state_bo {
......@@ -301,7 +420,13 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
int msm_gpu_pm_suspend(struct msm_gpu *gpu);
int msm_gpu_pm_resume(struct msm_gpu *gpu);
void msm_gpu_resume_devfreq(struct msm_gpu *gpu);
void msm_devfreq_init(struct msm_gpu *gpu);
void msm_devfreq_cleanup(struct msm_gpu *gpu);
void msm_devfreq_resume(struct msm_gpu *gpu);
void msm_devfreq_suspend(struct msm_gpu *gpu);
void msm_devfreq_active(struct msm_gpu *gpu);
void msm_devfreq_idle(struct msm_gpu *gpu);
int msm_gpu_hw_init(struct msm_gpu *gpu);
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*/
#include "msm_gpu.h"
#include "msm_gpu_trace.h"
#include <linux/devfreq.h>
#include <linux/devfreq_cooling.h>
/*
* Power Management:
*/
static int msm_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct msm_gpu *gpu = dev_to_gpu(dev);
struct dev_pm_opp *opp;
opp = devfreq_recommended_opp(dev, freq, flags);
/*
* If the GPU is idle, devfreq is not aware, so just ignore
* it's requests
*/
if (gpu->devfreq.idle_freq) {
gpu->devfreq.idle_freq = *freq;
return 0;
}
if (IS_ERR(opp))
return PTR_ERR(opp);
trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp));
if (gpu->funcs->gpu_set_freq)
gpu->funcs->gpu_set_freq(gpu, opp);
else
clk_set_rate(gpu->core_clk, *freq);
dev_pm_opp_put(opp);
return 0;
}
static unsigned long get_freq(struct msm_gpu *gpu)
{
if (gpu->devfreq.idle_freq)
return gpu->devfreq.idle_freq;
if (gpu->funcs->gpu_get_freq)
return gpu->funcs->gpu_get_freq(gpu);
return clk_get_rate(gpu->core_clk);
}
static int msm_devfreq_get_dev_status(struct device *dev,
struct devfreq_dev_status *status)
{
struct msm_gpu *gpu = dev_to_gpu(dev);
ktime_t time;
status->current_frequency = get_freq(gpu);
status->busy_time = gpu->funcs->gpu_busy(gpu);
time = ktime_get();
status->total_time = ktime_us_delta(time, gpu->devfreq.time);
gpu->devfreq.time = time;
return 0;
}
static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
{
*freq = get_freq(dev_to_gpu(dev));
return 0;
}
static struct devfreq_dev_profile msm_devfreq_profile = {
.timer = DEVFREQ_TIMER_DELAYED,
.polling_ms = 50,
.target = msm_devfreq_target,
.get_dev_status = msm_devfreq_get_dev_status,
.get_cur_freq = msm_devfreq_get_cur_freq,
};
void msm_devfreq_init(struct msm_gpu *gpu)
{
/* We need target support to do devfreq */
if (!gpu->funcs->gpu_busy)
return;
msm_devfreq_profile.initial_freq = gpu->fast_rate;
/*
* Don't set the freq_table or max_state and let devfreq build the table
* from OPP
* After a deferred probe, these may have be left to non-zero values,
* so set them back to zero before creating the devfreq device
*/
msm_devfreq_profile.freq_table = NULL;
msm_devfreq_profile.max_state = 0;
gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
&msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND,
NULL);
if (IS_ERR(gpu->devfreq.devfreq)) {
DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
gpu->devfreq.devfreq = NULL;
return;
}
devfreq_suspend_device(gpu->devfreq.devfreq);
gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node,
gpu->devfreq.devfreq);
if (IS_ERR(gpu->cooling)) {
DRM_DEV_ERROR(&gpu->pdev->dev,
"Couldn't register GPU cooling device\n");
gpu->cooling = NULL;
}
}
void msm_devfreq_cleanup(struct msm_gpu *gpu)
{
devfreq_cooling_unregister(gpu->cooling);
}
void msm_devfreq_resume(struct msm_gpu *gpu)
{
gpu->devfreq.busy_cycles = 0;
gpu->devfreq.time = ktime_get();
devfreq_resume_device(gpu->devfreq.devfreq);
}
void msm_devfreq_suspend(struct msm_gpu *gpu)
{
devfreq_suspend_device(gpu->devfreq.devfreq);
}
void msm_devfreq_active(struct msm_gpu *gpu)
{
struct msm_gpu_devfreq *df = &gpu->devfreq;
struct devfreq_dev_status status;
unsigned int idle_time;
unsigned long target_freq = df->idle_freq;
/*
* Hold devfreq lock to synchronize with get_dev_status()/
* target() callbacks
*/
mutex_lock(&df->devfreq->lock);
idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time));
/*
* If we've been idle for a significant fraction of a polling
* interval, then we won't meet the threshold of busyness for
* the governor to ramp up the freq.. so give some boost
*/
if (idle_time > msm_devfreq_profile.polling_ms/2) {
target_freq *= 2;
}
df->idle_freq = 0;
msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0);
/*
* Reset the polling interval so we aren't inconsistent
* about freq vs busy/total cycles
*/
msm_devfreq_get_dev_status(&gpu->pdev->dev, &status);
mutex_unlock(&df->devfreq->lock);
}
void msm_devfreq_idle(struct msm_gpu *gpu)
{
struct msm_gpu_devfreq *df = &gpu->devfreq;
unsigned long idle_freq, target_freq = 0;
/*
* Hold devfreq lock to synchronize with get_dev_status()/
* target() callbacks
*/
mutex_lock(&df->devfreq->lock);
idle_freq = get_freq(gpu);
msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0);
df->idle_time = ktime_get();
df->idle_freq = idle_freq;
mutex_unlock(&df->devfreq->lock);
}
......@@ -325,15 +325,19 @@ static void snapshot_buf(struct msm_rd_state *rd,
if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ))
return;
msm_gem_lock(&obj->base);
buf = msm_gem_get_vaddr_active(&obj->base);
if (IS_ERR(buf))
return;
goto out_unlock;
buf += offset;
rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
msm_gem_put_vaddr_locked(&obj->base);
out_unlock:
msm_gem_unlock(&obj->base);
}
/* called under struct_mutex */
......
......@@ -7,10 +7,61 @@
#include "msm_ringbuffer.h"
#include "msm_gpu.h"
static uint num_hw_submissions = 8;
MODULE_PARM_DESC(num_hw_submissions, "The max # of jobs to write into ringbuffer (default 8)");
module_param(num_hw_submissions, uint, 0600);
static struct dma_fence *msm_job_dependency(struct drm_sched_job *job,
struct drm_sched_entity *s_entity)
{
struct msm_gem_submit *submit = to_msm_submit(job);
if (!xa_empty(&submit->deps))
return xa_erase(&submit->deps, submit->last_dep++);
return NULL;
}
static struct dma_fence *msm_job_run(struct drm_sched_job *job)
{
struct msm_gem_submit *submit = to_msm_submit(job);
struct msm_gpu *gpu = submit->gpu;
submit->hw_fence = msm_fence_alloc(submit->ring->fctx);
pm_runtime_get_sync(&gpu->pdev->dev);
/* TODO move submit path over to using a per-ring lock.. */
mutex_lock(&gpu->dev->struct_mutex);
msm_gpu_submit(gpu, submit);
mutex_unlock(&gpu->dev->struct_mutex);
pm_runtime_put(&gpu->pdev->dev);
return dma_fence_get(submit->hw_fence);
}
static void msm_job_free(struct drm_sched_job *job)
{
struct msm_gem_submit *submit = to_msm_submit(job);
drm_sched_job_cleanup(job);
msm_gem_submit_put(submit);
}
const struct drm_sched_backend_ops msm_sched_ops = {
.dependency = msm_job_dependency,
.run_job = msm_job_run,
.free_job = msm_job_free
};
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
void *memptrs, uint64_t memptrs_iova)
{
struct msm_ringbuffer *ring;
long sched_timeout;
char name[32];
int ret;
......@@ -32,7 +83,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
if (IS_ERR(ring->start)) {
ret = PTR_ERR(ring->start);
ring->start = 0;
ring->start = NULL;
goto fail;
}
......@@ -45,13 +96,23 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
ring->memptrs = memptrs;
ring->memptrs_iova = memptrs_iova;
/* currently managing hangcheck ourselves: */
sched_timeout = MAX_SCHEDULE_TIMEOUT;
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
num_hw_submissions, 0, sched_timeout,
NULL, NULL, to_msm_bo(ring->bo)->name);
if (ret) {
goto fail;
}
INIT_LIST_HEAD(&ring->submits);
spin_lock_init(&ring->submit_lock);
spin_lock_init(&ring->preempt_lock);
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
ring->fctx = msm_fence_context_alloc(gpu->dev, name);
ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name);
return ring;
......@@ -65,9 +126,11 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
if (IS_ERR_OR_NULL(ring))
return;
drm_sched_fini(&ring->sched);
msm_fence_context_free(ring->fctx);
msm_gem_kernel_put(ring->bo, ring->gpu->aspace, false);
msm_gem_kernel_put(ring->bo, ring->gpu->aspace);
kfree(ring);
}
......@@ -7,6 +7,7 @@
#ifndef __MSM_RINGBUFFER_H__
#define __MSM_RINGBUFFER_H__
#include "drm/gpu_scheduler.h"
#include "msm_drv.h"
#define rbmemptr(ring, member) \
......@@ -40,8 +41,19 @@ struct msm_ringbuffer {
struct drm_gem_object *bo;
uint32_t *start, *end, *cur, *next;
/*
* The job scheduler for this ring.
*/
struct drm_gpu_scheduler sched;
/*
* List of in-flight submits on this ring. Protected by submit_lock.
*
* Currently just submits that are already written into the ring, not
* submits that are still in drm_gpu_scheduler's queues. At a later
* step we could probably move to letting drm_gpu_scheduler manage
* hangcheck detection and keep track of submit jobs that are in-
* flight.
*/
struct list_head submits;
spinlock_t submit_lock;
......
......@@ -12,6 +12,10 @@ void msm_submitqueue_destroy(struct kref *kref)
struct msm_gpu_submitqueue *queue = container_of(kref,
struct msm_gpu_submitqueue, ref);
idr_destroy(&queue->fence_idr);
drm_sched_entity_destroy(&queue->entity);
msm_file_private_put(queue->ctx);
kfree(queue);
......@@ -62,10 +66,22 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
{
struct msm_drm_private *priv = drm->dev_private;
struct msm_gpu_submitqueue *queue;
struct msm_ringbuffer *ring;
struct drm_gpu_scheduler *sched;
enum drm_sched_priority sched_prio;
unsigned ring_nr;
int ret;
if (!ctx)
return -ENODEV;
if (!priv->gpu)
return -ENODEV;
ret = msm_gpu_convert_priority(priv->gpu, prio, &ring_nr, &sched_prio);
if (ret)
return ret;
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
if (!queue)
......@@ -73,14 +89,16 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
kref_init(&queue->ref);
queue->flags = flags;
queue->ring_nr = ring_nr;
if (priv->gpu) {
if (prio >= priv->gpu->nr_rings) {
kfree(queue);
return -EINVAL;
}
ring = priv->gpu->rb[ring_nr];
sched = &ring->sched;
queue->prio = prio;
ret = drm_sched_entity_init(&queue->entity,
sched_prio, &sched, 1, NULL);
if (ret) {
kfree(queue);
return ret;
}
write_lock(&ctx->queuelock);
......@@ -91,6 +109,9 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
if (id)
*id = queue->id;
idr_init(&queue->fence_idr);
mutex_init(&queue->lock);
list_add_tail(&queue->node, &ctx->submitqueues);
write_unlock(&ctx->queuelock);
......@@ -98,20 +119,26 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
return 0;
}
/*
* Create the default submit-queue (id==0), used for backwards compatibility
* for userspace that pre-dates the introduction of submitqueues.
*/
int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
{
struct msm_drm_private *priv = drm->dev_private;
int default_prio;
int default_prio, max_priority;
if (!ctx)
return 0;
if (!priv->gpu)
return -ENODEV;
max_priority = (priv->gpu->nr_rings * NR_SCHED_PRIORITIES) - 1;
/*
* Select priority 2 as the "default priority" unless nr_rings is less
* than 2 and then pick the lowest pirority
* Pick a medium priority level as default. Lower numeric value is
* higher priority, so round-up to pick a priority that is not higher
* than the middle priority level.
*/
default_prio = priv->gpu ?
clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0;
default_prio = DIV_ROUND_UP(max_priority, 2);
INIT_LIST_HEAD(&ctx->submitqueues);
......
......@@ -384,8 +384,6 @@ drm_gem_object_put(struct drm_gem_object *obj)
__drm_gem_object_put(obj);
}
void drm_gem_object_put_locked(struct drm_gem_object *obj);
int drm_gem_handle_create(struct drm_file *file_priv,
struct drm_gem_object *obj,
u32 *handlep);
......
......@@ -73,11 +73,19 @@ struct drm_msm_timespec {
#define MSM_PARAM_MAX_FREQ 0x04
#define MSM_PARAM_TIMESTAMP 0x05
#define MSM_PARAM_GMEM_BASE 0x06
#define MSM_PARAM_NR_RINGS 0x07
#define MSM_PARAM_PRIORITIES 0x07 /* The # of priority levels */
#define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */
#define MSM_PARAM_FAULTS 0x09
#define MSM_PARAM_SUSPENDS 0x0a
/* For backwards compat. The original support for preemption was based on
* a single ring per priority level so # of priority levels equals the #
* of rings. With drm/scheduler providing additional levels of priority,
* the number of priorities is greater than the # of rings. The param is
* renamed to better reflect this.
*/
#define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES
struct drm_msm_param {
__u32 pipe; /* in, MSM_PIPE_x */
__u32 param; /* in, MSM_PARAM_x */
......@@ -304,6 +312,10 @@ struct drm_msm_gem_madvise {
#define MSM_SUBMITQUEUE_FLAGS (0)
/*
* The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1,
* a lower numeric value is higher priority.
*/
struct drm_msm_submitqueue {
__u32 flags; /* in, MSM_SUBMITQUEUE_x */
__u32 prio; /* in, Priority level */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment