Commit 5f03a507 authored by Danilo Krummrich's avatar Danilo Krummrich

drm/nouveau: implement 1:1 scheduler - entity relationship

Recent patches to the DRM scheduler [1][2] allow for a variable number
of run-queues and add support for (shared) workqueues rather than
dedicated kthreads per scheduler. This allows us to create a 1:1
relationship between a GPU scheduler and a scheduler entity, in order to
properly support firmware schedulers being able to handle an arbitrary
amount of dynamically allocated command ring buffers. This perfectly
matches Nouveau's needs, hence make use of it.

Topology wise we create one scheduler instance per client (handling
VM_BIND jobs) and one scheduler instance per channel (handling EXEC
jobs).

All channel scheduler instances share a workqueue, but every client
scheduler instance has a dedicated workqueue. The latter is required to
ensure that for VM_BIND job's free_job() work and run_job() work can
always run concurrently and hence, free_job() work can never stall
run_job() work. For EXEC jobs we don't have this requirement, since EXEC
job's free_job() does not require to take any locks which indirectly or
directly are held for allocations elsewhere.

[1] https://lore.kernel.org/all/8f53f7ef-7621-4f0b-bdef-d8d20bc497ff@redhat.com/T/
[2] https://lore.kernel.org/all/20231031032439.1558703-1-matthew.brost@intel.com/T/Signed-off-by: default avatarDanilo Krummrich <dakr@redhat.com>
Reviewed-by: default avatarDave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114002728.3491-1-dakr@redhat.com
parent 014f831a
...@@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, ...@@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
{ {
struct nouveau_abi16_ntfy *ntfy, *temp; struct nouveau_abi16_ntfy *ntfy, *temp;
/* When a client exits without waiting for it's queued up jobs to /* Cancel all jobs from the entity's queue. */
* finish it might happen that we fault the channel. This is due to drm_sched_entity_fini(&chan->sched.entity);
* drm_file_free() calling drm_gem_release() before the postclose()
* callback. Hence, we can't tear down this scheduler entity before
* uvmm mappings are unmapped. Currently, we can't detect this case.
*
* However, this should be rare and harmless, since the channel isn't
* needed anymore.
*/
nouveau_sched_entity_fini(&chan->sched_entity);
/* wait for all activity to stop before cleaning up */
if (chan->chan) if (chan->chan)
nouveau_channel_idle(chan->chan); nouveau_channel_idle(chan->chan);
nouveau_sched_fini(&chan->sched);
/* cleanup notifier state */ /* cleanup notifier state */
list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
nouveau_abi16_ntfy_fini(chan, ntfy); nouveau_abi16_ntfy_fini(chan, ntfy);
...@@ -344,8 +337,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) ...@@ -344,8 +337,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret) if (ret)
goto done; goto done;
ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched, ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq);
drm->sched_wq);
if (ret) if (ret)
goto done; goto done;
......
...@@ -26,7 +26,7 @@ struct nouveau_abi16_chan { ...@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy; struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma; struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap; struct nvkm_mm heap;
struct nouveau_sched_entity sched_entity; struct nouveau_sched sched;
}; };
struct nouveau_abi16 { struct nouveau_abi16 {
......
...@@ -201,9 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli) ...@@ -201,9 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli)
WARN_ON(!list_empty(&cli->worker)); WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli); usif_client_fini(cli);
nouveau_sched_fini(&cli->sched);
if (uvmm) if (uvmm)
nouveau_uvmm_fini(uvmm); nouveau_uvmm_fini(uvmm);
nouveau_sched_entity_fini(&cli->sched_entity);
nouveau_vmm_fini(&cli->svm); nouveau_vmm_fini(&cli->svm);
nouveau_vmm_fini(&cli->vmm); nouveau_vmm_fini(&cli->vmm);
nvif_mmu_dtor(&cli->mmu); nvif_mmu_dtor(&cli->mmu);
...@@ -310,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, ...@@ -310,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
cli->mem = &mems[ret]; cli->mem = &mems[ret];
ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched, /* Don't pass in the (shared) sched_wq in order to let
drm->sched_wq); * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
*
* This is required to ensure that for VM_BIND jobs free_job() work and
* run_job() work can always run concurrently and hence, free_job() work
* can never stall run_job() work. For EXEC jobs we don't have this
* requirement, since EXEC job's free_job() does not require to take any
* locks which indirectly or directly are held for allocations
* elsewhere.
*/
ret = nouveau_sched_init(&cli->sched, drm, NULL);
if (ret) if (ret)
goto done; goto done;
...@@ -582,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev) ...@@ -582,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev)
nvif_parent_ctor(&nouveau_parent, &drm->parent); nvif_parent_ctor(&nouveau_parent, &drm->parent);
drm->master.base.object.parent = &drm->parent; drm->master.base.object.parent = &drm->parent;
ret = nouveau_sched_init(drm); drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0,
if (ret) WQ_MAX_ACTIVE);
if (!drm->sched_wq) {
ret = -ENOMEM;
goto fail_alloc; goto fail_alloc;
}
ret = nouveau_cli_init(drm, "DRM-master", &drm->master); ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
if (ret) if (ret)
goto fail_sched; goto fail_wq;
ret = nouveau_cli_init(drm, "DRM", &drm->client); ret = nouveau_cli_init(drm, "DRM", &drm->client);
if (ret) if (ret)
...@@ -658,8 +670,8 @@ nouveau_drm_device_init(struct drm_device *dev) ...@@ -658,8 +670,8 @@ nouveau_drm_device_init(struct drm_device *dev)
nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->client);
fail_master: fail_master:
nouveau_cli_fini(&drm->master); nouveau_cli_fini(&drm->master);
fail_sched: fail_wq:
nouveau_sched_fini(drm); destroy_workqueue(drm->sched_wq);
fail_alloc: fail_alloc:
nvif_parent_dtor(&drm->parent); nvif_parent_dtor(&drm->parent);
kfree(drm); kfree(drm);
...@@ -711,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev) ...@@ -711,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev)
} }
mutex_unlock(&drm->clients_lock); mutex_unlock(&drm->clients_lock);
nouveau_sched_fini(drm);
nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->client);
nouveau_cli_fini(&drm->master); nouveau_cli_fini(&drm->master);
destroy_workqueue(drm->sched_wq);
nvif_parent_dtor(&drm->parent); nvif_parent_dtor(&drm->parent);
mutex_destroy(&drm->clients_lock); mutex_destroy(&drm->clients_lock);
kfree(drm); kfree(drm);
......
...@@ -98,7 +98,7 @@ struct nouveau_cli { ...@@ -98,7 +98,7 @@ struct nouveau_cli {
bool disabled; bool disabled;
} uvmm; } uvmm;
struct nouveau_sched_entity sched_entity; struct nouveau_sched sched;
const struct nvif_mclass *mem; const struct nvif_mclass *mem;
...@@ -258,6 +258,9 @@ struct nouveau_drm { ...@@ -258,6 +258,9 @@ struct nouveau_drm {
u64 context_base; u64 context_base;
} *runl; } *runl;
/* Workqueue used for channel schedulers. */
struct workqueue_struct *sched_wq;
/* context for accelerated drm-internal operations */ /* context for accelerated drm-internal operations */
struct nouveau_channel *cechan; struct nouveau_channel *cechan;
struct nouveau_channel *channel; struct nouveau_channel *channel;
...@@ -298,10 +301,6 @@ struct nouveau_drm { ...@@ -298,10 +301,6 @@ struct nouveau_drm {
struct mutex lock; struct mutex lock;
bool component_registered; bool component_registered;
} audio; } audio;
struct drm_gpu_scheduler sched;
struct workqueue_struct *sched_wq;
}; };
static inline struct nouveau_drm * static inline struct nouveau_drm *
......
...@@ -165,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job) ...@@ -165,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
{ {
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
nouveau_job_done(job);
nouveau_job_free(job); nouveau_job_free(job);
kfree(exec_job->fence); kfree(exec_job->fence);
...@@ -184,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job) ...@@ -184,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job)
NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
chan->chid); chan->chid);
nouveau_sched_entity_fini(job->entity);
return DRM_GPU_SCHED_STAT_NOMINAL; return DRM_GPU_SCHED_STAT_NOMINAL;
} }
...@@ -234,7 +233,7 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob, ...@@ -234,7 +233,7 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
job->chan = __args->chan; job->chan = __args->chan;
args.sched_entity = __args->sched_entity; args.sched = __args->sched;
args.file_priv = __args->file_priv; args.file_priv = __args->file_priv;
args.in_sync.count = __args->in_sync.count; args.in_sync.count = __args->in_sync.count;
...@@ -388,7 +387,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, ...@@ -388,7 +387,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
if (ret) if (ret)
goto out; goto out;
args.sched_entity = &chan16->sched_entity; args.sched = &chan16->sched;
args.file_priv = file_priv; args.file_priv = file_priv;
args.chan = chan; args.chan = chan;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
struct nouveau_exec_job_args { struct nouveau_exec_job_args {
struct drm_file *file_priv; struct drm_file *file_priv;
struct nouveau_sched_entity *sched_entity; struct nouveau_sched *sched;
struct nouveau_channel *chan; struct nouveau_channel *chan;
struct { struct {
......
...@@ -12,30 +12,29 @@ ...@@ -12,30 +12,29 @@
#include "nouveau_abi16.h" #include "nouveau_abi16.h"
#include "nouveau_sched.h" #include "nouveau_sched.h"
/* FIXME #define NOUVEAU_SCHED_HW_SUBMISSIONS 1
*
* We want to make sure that jobs currently executing can't be deferred by
* other jobs competing for the hardware. Otherwise we might end up with job
* timeouts just because of too many clients submitting too many jobs. We don't
* want jobs to time out because of system load, but because of the job being
* too bulky.
*
* For now allow for up to 16 concurrent jobs in flight until we know how many
* rings the hardware can process in parallel.
*/
#define NOUVEAU_SCHED_HW_SUBMISSIONS 16
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
/* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
* index to the run-queue array.
*/
enum nouveau_sched_priority {
NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_MIN,
NOUVEAU_SCHED_PRIORITY_COUNT,
};
int int
nouveau_job_init(struct nouveau_job *job, nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args) struct nouveau_job_args *args)
{ {
struct nouveau_sched_entity *entity = args->sched_entity; struct nouveau_sched *sched = args->sched;
int ret; int ret;
INIT_LIST_HEAD(&job->entry);
job->file_priv = args->file_priv; job->file_priv = args->file_priv;
job->cli = nouveau_cli(args->file_priv); job->cli = nouveau_cli(args->file_priv);
job->entity = entity; job->sched = sched;
job->sync = args->sync; job->sync = args->sync;
job->resv_usage = args->resv_usage; job->resv_usage = args->resv_usage;
...@@ -89,7 +88,7 @@ nouveau_job_init(struct nouveau_job *job, ...@@ -89,7 +88,7 @@ nouveau_job_init(struct nouveau_job *job,
} }
ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL); ret = drm_sched_job_init(&job->base, &sched->entity, 1, NULL);
if (ret) if (ret)
goto err_free_chains; goto err_free_chains;
...@@ -108,6 +107,27 @@ nouveau_job_init(struct nouveau_job *job, ...@@ -108,6 +107,27 @@ nouveau_job_init(struct nouveau_job *job,
return ret; return ret;
} }
void
nouveau_job_fini(struct nouveau_job *job)
{
dma_fence_put(job->done_fence);
drm_sched_job_cleanup(&job->base);
job->ops->free(job);
}
void
nouveau_job_done(struct nouveau_job *job)
{
struct nouveau_sched *sched = job->sched;
spin_lock(&sched->job.list.lock);
list_del(&job->entry);
spin_unlock(&sched->job.list.lock);
wake_up(&sched->job.wq);
}
void void
nouveau_job_free(struct nouveau_job *job) nouveau_job_free(struct nouveau_job *job)
{ {
...@@ -117,13 +137,6 @@ nouveau_job_free(struct nouveau_job *job) ...@@ -117,13 +137,6 @@ nouveau_job_free(struct nouveau_job *job)
kfree(job->out_sync.chains); kfree(job->out_sync.chains);
} }
void nouveau_job_fini(struct nouveau_job *job)
{
dma_fence_put(job->done_fence);
drm_sched_job_cleanup(&job->base);
job->ops->free(job);
}
static int static int
sync_find_fence(struct nouveau_job *job, sync_find_fence(struct nouveau_job *job,
struct drm_nouveau_sync *sync, struct drm_nouveau_sync *sync,
...@@ -261,7 +274,7 @@ nouveau_job_fence_attach(struct nouveau_job *job) ...@@ -261,7 +274,7 @@ nouveau_job_fence_attach(struct nouveau_job *job)
int int
nouveau_job_submit(struct nouveau_job *job) nouveau_job_submit(struct nouveau_job *job)
{ {
struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); struct nouveau_sched *sched = job->sched;
struct dma_fence *done_fence = NULL; struct dma_fence *done_fence = NULL;
struct drm_gpuvm_exec vm_exec = { struct drm_gpuvm_exec vm_exec = {
.vm = &nouveau_cli_uvmm(job->cli)->base, .vm = &nouveau_cli_uvmm(job->cli)->base,
...@@ -281,7 +294,7 @@ nouveau_job_submit(struct nouveau_job *job) ...@@ -281,7 +294,7 @@ nouveau_job_submit(struct nouveau_job *job)
/* Make sure the job appears on the sched_entity's queue in the same /* Make sure the job appears on the sched_entity's queue in the same
* order as it was submitted. * order as it was submitted.
*/ */
mutex_lock(&entity->mutex); mutex_lock(&sched->mutex);
/* Guarantee we won't fail after the submit() callback returned /* Guarantee we won't fail after the submit() callback returned
* successfully. * successfully.
...@@ -292,33 +305,16 @@ nouveau_job_submit(struct nouveau_job *job) ...@@ -292,33 +305,16 @@ nouveau_job_submit(struct nouveau_job *job)
goto err_cleanup; goto err_cleanup;
} }
/* Submit was successful; add the job to the schedulers job list. */
spin_lock(&sched->job.list.lock);
list_add(&job->entry, &sched->job.list.head);
spin_unlock(&sched->job.list.lock);
drm_sched_job_arm(&job->base); drm_sched_job_arm(&job->base);
job->done_fence = dma_fence_get(&job->base.s_fence->finished); job->done_fence = dma_fence_get(&job->base.s_fence->finished);
if (job->sync) if (job->sync)
done_fence = dma_fence_get(job->done_fence); done_fence = dma_fence_get(job->done_fence);
/* If a sched job depends on a dma-fence from a job from the same GPU
* scheduler instance, but a different scheduler entity, the GPU
* scheduler does only wait for the particular job to be scheduled,
* rather than for the job to fully complete. This is due to the GPU
* scheduler assuming that there is a scheduler instance per ring.
* However, the current implementation, in order to avoid arbitrary
* amounts of kthreads, has a single scheduler instance while scheduler
* entities represent rings.
*
* As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
* out-fences in order to force the scheduler to wait for full job
* completion for dependent jobs from different entities and same
* scheduler instance.
*
* There is some work in progress [1] to address the issues of firmware
* schedulers; once it is in-tree the scheduler topology in Nouveau
* should be re-worked accordingly.
*
* [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
*/
set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
if (job->ops->armed_submit) if (job->ops->armed_submit)
job->ops->armed_submit(job, &vm_exec); job->ops->armed_submit(job, &vm_exec);
...@@ -331,7 +327,7 @@ nouveau_job_submit(struct nouveau_job *job) ...@@ -331,7 +327,7 @@ nouveau_job_submit(struct nouveau_job *job)
drm_sched_entity_push_job(&job->base); drm_sched_entity_push_job(&job->base);
mutex_unlock(&entity->mutex); mutex_unlock(&sched->mutex);
if (done_fence) { if (done_fence) {
dma_fence_wait(done_fence, true); dma_fence_wait(done_fence, true);
...@@ -341,20 +337,13 @@ nouveau_job_submit(struct nouveau_job *job) ...@@ -341,20 +337,13 @@ nouveau_job_submit(struct nouveau_job *job)
return 0; return 0;
err_cleanup: err_cleanup:
mutex_unlock(&entity->mutex); mutex_unlock(&sched->mutex);
nouveau_job_fence_attach_cleanup(job); nouveau_job_fence_attach_cleanup(job);
err: err:
job->state = NOUVEAU_JOB_SUBMIT_FAILED; job->state = NOUVEAU_JOB_SUBMIT_FAILED;
return ret; return ret;
} }
bool
nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
struct work_struct *work)
{
return queue_work(entity->sched_wq, work);
}
static struct dma_fence * static struct dma_fence *
nouveau_job_run(struct nouveau_job *job) nouveau_job_run(struct nouveau_job *job)
{ {
...@@ -404,50 +393,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) ...@@ -404,50 +393,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
nouveau_job_fini(job); nouveau_job_fini(job);
} }
int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
struct drm_gpu_scheduler *sched,
struct workqueue_struct *sched_wq)
{
mutex_init(&entity->mutex);
spin_lock_init(&entity->job.list.lock);
INIT_LIST_HEAD(&entity->job.list.head);
init_waitqueue_head(&entity->job.wq);
entity->sched_wq = sched_wq;
return drm_sched_entity_init(&entity->base,
DRM_SCHED_PRIORITY_NORMAL,
&sched, 1, NULL);
}
void
nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
{
drm_sched_entity_destroy(&entity->base);
}
static const struct drm_sched_backend_ops nouveau_sched_ops = { static const struct drm_sched_backend_ops nouveau_sched_ops = {
.run_job = nouveau_sched_run_job, .run_job = nouveau_sched_run_job,
.timedout_job = nouveau_sched_timedout_job, .timedout_job = nouveau_sched_timedout_job,
.free_job = nouveau_sched_free_job, .free_job = nouveau_sched_free_job,
}; };
int nouveau_sched_init(struct nouveau_drm *drm) int
nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
struct workqueue_struct *wq)
{ {
struct drm_gpu_scheduler *sched = &drm->sched; struct drm_gpu_scheduler *drm_sched = &sched->base;
struct drm_sched_entity *entity = &sched->entity;
long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
int ret;
drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); if (!wq) {
if (!drm->sched_wq) wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
return -ENOMEM; current->pid);
if (!wq)
return -ENOMEM;
sched->wq = wq;
}
return drm_sched_init(sched, &nouveau_sched_ops, NULL, ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
DRM_SCHED_PRIORITY_COUNT, NOUVEAU_SCHED_PRIORITY_COUNT,
NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
NULL, NULL, "nouveau_sched", drm->dev->dev); NULL, NULL, "nouveau_sched", drm->dev->dev);
if (ret)
goto fail_wq;
/* Using DRM_SCHED_PRIORITY_MIN, since that's what we're required to use
* when we want to have a single run-queue only.
*
* It's not documented, but one will find out when trying to use any
* other priority running into faults, because the scheduler uses the
* priority as array index.
*
* Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
* matching the enum type used in drm_sched_entity_init().
*/
ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_MIN,
&drm_sched, 1, NULL);
if (ret)
goto fail_sched;
mutex_init(&sched->mutex);
spin_lock_init(&sched->job.list.lock);
INIT_LIST_HEAD(&sched->job.list.head);
init_waitqueue_head(&sched->job.wq);
return 0;
fail_sched:
drm_sched_fini(drm_sched);
fail_wq:
if (sched->wq)
destroy_workqueue(sched->wq);
return ret;
} }
void nouveau_sched_fini(struct nouveau_drm *drm) void
nouveau_sched_fini(struct nouveau_sched *sched)
{ {
destroy_workqueue(drm->sched_wq); struct drm_gpu_scheduler *drm_sched = &sched->base;
drm_sched_fini(&drm->sched); struct drm_sched_entity *entity = &sched->entity;
rmb(); /* for list_empty to work without lock */
wait_event(sched->job.wq, list_empty(&sched->job.list.head));
drm_sched_entity_fini(entity);
drm_sched_fini(drm_sched);
/* Destroy workqueue after scheduler tear down, otherwise it might still
* be in use.
*/
if (sched->wq)
destroy_workqueue(sched->wq);
} }
...@@ -26,7 +26,7 @@ enum nouveau_job_state { ...@@ -26,7 +26,7 @@ enum nouveau_job_state {
struct nouveau_job_args { struct nouveau_job_args {
struct drm_file *file_priv; struct drm_file *file_priv;
struct nouveau_sched_entity *sched_entity; struct nouveau_sched *sched;
enum dma_resv_usage resv_usage; enum dma_resv_usage resv_usage;
bool sync; bool sync;
...@@ -49,7 +49,8 @@ struct nouveau_job { ...@@ -49,7 +49,8 @@ struct nouveau_job {
enum nouveau_job_state state; enum nouveau_job_state state;
struct nouveau_sched_entity *entity; struct nouveau_sched *sched;
struct list_head entry;
struct drm_file *file_priv; struct drm_file *file_priv;
struct nouveau_cli *cli; struct nouveau_cli *cli;
...@@ -89,20 +90,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args, ...@@ -89,20 +90,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args,
int nouveau_job_init(struct nouveau_job *job, int nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args); struct nouveau_job_args *args);
void nouveau_job_free(struct nouveau_job *job);
int nouveau_job_submit(struct nouveau_job *job);
void nouveau_job_fini(struct nouveau_job *job); void nouveau_job_fini(struct nouveau_job *job);
int nouveau_job_submit(struct nouveau_job *job);
void nouveau_job_done(struct nouveau_job *job);
void nouveau_job_free(struct nouveau_job *job);
#define to_nouveau_sched_entity(entity) \ struct nouveau_sched {
container_of((entity), struct nouveau_sched_entity, base) struct drm_gpu_scheduler base;
struct drm_sched_entity entity;
struct nouveau_sched_entity { struct workqueue_struct *wq;
struct drm_sched_entity base;
struct mutex mutex; struct mutex mutex;
struct workqueue_struct *sched_wq;
struct { struct {
struct { struct {
struct list_head head; struct list_head head;
...@@ -112,15 +110,8 @@ struct nouveau_sched_entity { ...@@ -112,15 +110,8 @@ struct nouveau_sched_entity {
} job; } job;
}; };
int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
struct drm_gpu_scheduler *sched, struct workqueue_struct *wq);
struct workqueue_struct *sched_wq); void nouveau_sched_fini(struct nouveau_sched *sched);
void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity);
bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
struct work_struct *work);
int nouveau_sched_init(struct nouveau_drm *drm);
void nouveau_sched_fini(struct nouveau_drm *drm);
#endif #endif
...@@ -963,6 +963,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref) ...@@ -963,6 +963,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref)
{ {
struct nouveau_uvmm_bind_job *job = struct nouveau_uvmm_bind_job *job =
container_of(kref, struct nouveau_uvmm_bind_job, kref); container_of(kref, struct nouveau_uvmm_bind_job, kref);
struct bind_job_op *op, *next;
list_for_each_op_safe(op, next, &job->ops) {
list_del(&op->entry);
kfree(op);
}
nouveau_job_free(&job->base); nouveau_job_free(&job->base);
kfree(job); kfree(job);
...@@ -1004,14 +1010,16 @@ bind_validate_op(struct nouveau_job *job, ...@@ -1004,14 +1010,16 @@ bind_validate_op(struct nouveau_job *job,
static void static void
bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
{ {
struct nouveau_uvmm_bind_job *bind_job; struct nouveau_sched *sched = job->sched;
struct nouveau_sched_entity *entity = job->entity; struct nouveau_job *__job;
struct bind_job_op *op; struct bind_job_op *op;
u64 end = addr + range; u64 end = addr + range;
again: again:
spin_lock(&entity->job.list.lock); spin_lock(&sched->job.list.lock);
list_for_each_entry(bind_job, &entity->job.list.head, entry) { list_for_each_entry(__job, &sched->job.list.head, entry) {
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
list_for_each_op(op, &bind_job->ops) { list_for_each_op(op, &bind_job->ops) {
if (op->op == OP_UNMAP) { if (op->op == OP_UNMAP) {
u64 op_addr = op->va.addr; u64 op_addr = op->va.addr;
...@@ -1019,7 +1027,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) ...@@ -1019,7 +1027,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
if (!(end <= op_addr || addr >= op_end)) { if (!(end <= op_addr || addr >= op_end)) {
nouveau_uvmm_bind_job_get(bind_job); nouveau_uvmm_bind_job_get(bind_job);
spin_unlock(&entity->job.list.lock); spin_unlock(&sched->job.list.lock);
wait_for_completion(&bind_job->complete); wait_for_completion(&bind_job->complete);
nouveau_uvmm_bind_job_put(bind_job); nouveau_uvmm_bind_job_put(bind_job);
goto again; goto again;
...@@ -1027,7 +1035,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) ...@@ -1027,7 +1035,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
} }
} }
} }
spin_unlock(&entity->job.list.lock); spin_unlock(&sched->job.list.lock);
} }
static int static int
...@@ -1183,7 +1191,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, ...@@ -1183,7 +1191,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
{ {
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
struct nouveau_sched_entity *entity = job->entity;
struct drm_exec *exec = &vme->exec; struct drm_exec *exec = &vme->exec;
struct bind_job_op *op; struct bind_job_op *op;
int ret; int ret;
...@@ -1380,10 +1387,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, ...@@ -1380,10 +1387,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
} }
nouveau_uvmm_unlock(uvmm); nouveau_uvmm_unlock(uvmm);
spin_lock(&entity->job.list.lock);
list_add(&bind_job->entry, &entity->job.list.head);
spin_unlock(&entity->job.list.lock);
return 0; return 0;
unwind_continue: unwind_continue:
...@@ -1466,14 +1469,11 @@ nouveau_uvmm_bind_job_run(struct nouveau_job *job) ...@@ -1466,14 +1469,11 @@ nouveau_uvmm_bind_job_run(struct nouveau_job *job)
} }
static void static void
nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job)
{ {
struct nouveau_uvmm_bind_job *bind_job = struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
container_of(work, struct nouveau_uvmm_bind_job, work);
struct nouveau_job *job = &bind_job->base;
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
struct nouveau_sched_entity *entity = job->entity; struct bind_job_op *op;
struct bind_job_op *op, *next;
list_for_each_op(op, &bind_job->ops) { list_for_each_op(op, &bind_job->ops) {
struct drm_gem_object *obj = op->gem.obj; struct drm_gem_object *obj = op->gem.obj;
...@@ -1525,38 +1525,17 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) ...@@ -1525,38 +1525,17 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
drm_gem_object_put(obj); drm_gem_object_put(obj);
} }
spin_lock(&entity->job.list.lock); nouveau_job_done(job);
list_del(&bind_job->entry);
spin_unlock(&entity->job.list.lock);
complete_all(&bind_job->complete); complete_all(&bind_job->complete);
wake_up(&entity->job.wq);
/* Remove and free ops after removing the bind job from the job list to
* avoid races against bind_validate_map_sparse().
*/
list_for_each_op_safe(op, next, &bind_job->ops) {
list_del(&op->entry);
kfree(op);
}
nouveau_uvmm_bind_job_put(bind_job); nouveau_uvmm_bind_job_put(bind_job);
} }
static void
nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job)
{
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
struct nouveau_sched_entity *entity = job->entity;
nouveau_sched_entity_qwork(entity, &bind_job->work);
}
static struct nouveau_job_ops nouveau_bind_job_ops = { static struct nouveau_job_ops nouveau_bind_job_ops = {
.submit = nouveau_uvmm_bind_job_submit, .submit = nouveau_uvmm_bind_job_submit,
.armed_submit = nouveau_uvmm_bind_job_armed_submit, .armed_submit = nouveau_uvmm_bind_job_armed_submit,
.run = nouveau_uvmm_bind_job_run, .run = nouveau_uvmm_bind_job_run,
.free = nouveau_uvmm_bind_job_free_qwork, .free = nouveau_uvmm_bind_job_cleanup,
}; };
static int static int
...@@ -1617,7 +1596,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, ...@@ -1617,7 +1596,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
return ret; return ret;
INIT_LIST_HEAD(&job->ops); INIT_LIST_HEAD(&job->ops);
INIT_LIST_HEAD(&job->entry);
for (i = 0; i < __args->op.count; i++) { for (i = 0; i < __args->op.count; i++) {
ret = bind_job_op_from_uop(&op, &__args->op.s[i]); ret = bind_job_op_from_uop(&op, &__args->op.s[i]);
...@@ -1628,9 +1606,8 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, ...@@ -1628,9 +1606,8 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
} }
init_completion(&job->complete); init_completion(&job->complete);
INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn);
args.sched_entity = __args->sched_entity; args.sched = __args->sched;
args.file_priv = __args->file_priv; args.file_priv = __args->file_priv;
args.in_sync.count = __args->in_sync.count; args.in_sync.count = __args->in_sync.count;
...@@ -1758,7 +1735,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, ...@@ -1758,7 +1735,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
if (ret) if (ret)
return ret; return ret;
args.sched_entity = &cli->sched_entity; args.sched = &cli->sched;
args.file_priv = file_priv; args.file_priv = file_priv;
ret = nouveau_uvmm_vm_bind(&args); ret = nouveau_uvmm_vm_bind(&args);
...@@ -1910,12 +1887,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) ...@@ -1910,12 +1887,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
MA_STATE(mas, &uvmm->region_mt, 0, 0); MA_STATE(mas, &uvmm->region_mt, 0, 0);
struct nouveau_uvma_region *reg; struct nouveau_uvma_region *reg;
struct nouveau_cli *cli = uvmm->vmm.cli; struct nouveau_cli *cli = uvmm->vmm.cli;
struct nouveau_sched_entity *entity = &cli->sched_entity;
struct drm_gpuva *va, *next; struct drm_gpuva *va, *next;
rmb(); /* for list_empty to work without lock */
wait_event(entity->job.wq, list_empty(&entity->job.list.head));
nouveau_uvmm_lock(uvmm); nouveau_uvmm_lock(uvmm);
drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) { drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
struct nouveau_uvma *uvma = uvma_from_va(va); struct nouveau_uvma *uvma = uvma_from_va(va);
......
...@@ -44,8 +44,6 @@ struct nouveau_uvmm_bind_job { ...@@ -44,8 +44,6 @@ struct nouveau_uvmm_bind_job {
struct nouveau_job base; struct nouveau_job base;
struct kref kref; struct kref kref;
struct list_head entry;
struct work_struct work;
struct completion complete; struct completion complete;
/* struct bind_job_op */ /* struct bind_job_op */
...@@ -54,7 +52,7 @@ struct nouveau_uvmm_bind_job { ...@@ -54,7 +52,7 @@ struct nouveau_uvmm_bind_job {
struct nouveau_uvmm_bind_job_args { struct nouveau_uvmm_bind_job_args {
struct drm_file *file_priv; struct drm_file *file_priv;
struct nouveau_sched_entity *sched_entity; struct nouveau_sched *sched;
unsigned int flags; unsigned int flags;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment