drm/panthor: Add the scheduler logical block

This is the piece of software interacting with the FW scheduler, and taking care of some scheduling aspects when the FW comes short of slots scheduling slots. Indeed, the FW only expose a few slots, and the kernel has to give all submission contexts, a chance to execute their jobs. The kernel-side scheduler is timeslice-based, with a round-robin queue per priority level. Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler, allowing us to delegate the dependency tracking to the core. All the gory details should be documented inline. v6: - Add Maxime's and Heiko's acks - Make sure the scheduler is initialized before queueing the tick work in the MMU fault handler - Keep header inclusion alphabetically ordered v5: - Fix typos - Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally - Don't move the group to the waiting list tail when it was already waiting for a different syncobj - Fix fatal_queues flagging in the tiler OOM path - Don't warn when more than one job timesout on a group - Add a warning message when we fail to allocate a heap chunk - Add Steve's R-b v4: - Check drmm_mutex_init() return code - s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in panthor_queue_put_syncwait_obj() - Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked() - Use atomic_xchg() instead of atomic_fetch_and(0) - Fix typos - Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs - Defer TILER_OOM event handling to a separate workqueue to prevent deadlocks when the heap chunk allocation is blocked on mem-reclaim. This is just a temporary solution, until we add support for non-blocking/failable allocations - Pass the scheduler workqueue to drm_sched instead of instantiating a separate one (no longer needed now that heap chunk allocation happens on a dedicated wq) - Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle job timeouts when the system is under mem pressure, and hopefully free up some memory retained by these jobs v3: - Rework the FW event handling logic to avoid races - Make sure MMU faults kill the group immediately - Use the panthor_kernel_bo abstraction for group/queue buffers - Make in_progress an atomic_t, so we can check it without the reset lock held - Don't limit the number of groups per context to the FW scheduler capacity. Fix the limit to 128 for now. - Add a panthor_job_vm() helper - Account for panthor_vm changes - Add our job fence as DMA_RESV_USAGE_WRITE to all external objects (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given we're supposed to be fully-explicit, but other drivers do that, so there must be a good reason - Account for drm_sched changes - Provide a panthor_queue_put_syncwait_obj() - Unconditionally return groups to their idle list in panthor_sched_suspend() - Condition of sched_queue_{,delayed_}work fixed to be only when a reset isn't pending or in progress. - Several typos in comments fixed. Co-developed-by: Steven Price <steven.price@arm.com> Signed-off-by: Steven Price <steven.price@arm.com> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Steven Price <steven.price@arm.com> Acked-by: Maxime Ripard <mripard@kernel.org> Acked-by: Heiko Stuebner <heiko@sntech.de> Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-11-boris.brezillon@collabora.com

drm/panthor: Add the scheduler logical block
This is the piece of software interacting with the FW scheduler, and taking care of some scheduling aspects when the FW comes short of slots scheduling slots. Indeed, the FW only expose a few slots, and the kernel has to give all submission contexts, a chance to execute their jobs. The kernel-side scheduler is timeslice-based, with a round-robin queue per priority level. Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler, allowing us to delegate the dependency tracking to the core. All the gory details should be documented inline. v6: - Add Maxime's and Heiko's acks - Make sure the scheduler is initialized before queueing the tick work in the MMU fault handler - Keep header inclusion alphabetically ordered v5: - Fix typos - Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally - Don't move the group to the waiting list tail when it was already waiting for a different syncobj - Fix fatal_queues flagging in the tiler OOM path - Don't warn when more than one job timesout on a group - Add a warning message when we fail to allocate a heap chunk - Add Steve's R-b v4: - Check drmm_mutex_init() return code - s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in panthor_queue_put_syncwait_obj() - Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked() - Use atomic_xchg() instead of atomic_fetch_and(0) - Fix typos - Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs - Defer TILER_OOM event handling to a separate workqueue to prevent deadlocks when the heap chunk allocation is blocked on mem-reclaim. This is just a temporary solution, until we add support for non-blocking/failable allocations - Pass the scheduler workqueue to drm_sched instead of instantiating a separate one (no longer needed now that heap chunk allocation happens on a dedicated wq) - Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle job timeouts when the system is under mem pressure, and hopefully free up some memory retained by these jobs v3: - Rework the FW event handling logic to avoid races - Make sure MMU faults kill the group immediately - Use the panthor_kernel_bo abstraction for group/queue buffers - Make in_progress an atomic_t, so we can check it without the reset lock held - Don't limit the number of groups per context to the FW scheduler capacity. Fix the limit to 128 for now. - Add a panthor_job_vm() helper - Account for panthor_vm changes - Add our job fence as DMA_RESV_USAGE_WRITE to all external objects (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given we're supposed to be fully-explicit, but other drivers do that, so there must be a good reason - Account for drm_sched changes - Provide a panthor_queue_put_syncwait_obj() - Unconditionally return groups to their idle list in panthor_sched_suspend() - Condition of sched_queue_{,delayed_}work fixed to be only when a reset isn't pending or in progress. - Several typos in comments fixed. Co-developed-by: Steven Price <steven.price@arm.com> Signed-off-by: Steven Price <steven.price@arm.com> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Steven Price <steven.price@arm.com> Acked-by: Maxime Ripard <mripard@kernel.org> Acked-by: Heiko Stuebner <heiko@sntech.de> Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-11-boris.brezillon@collabora.com
de854881 · Boris Brezillon · 9cca48fa · de854881 · de854881
Commit de854881 authored Feb 29, 2024 by Boris Brezillon
Hide whitespace changes
Inline Side-by-side

Showing with 3552 additions and 0 deletions

drivers/gpu/drm/panthor/panthor_sched.c drivers/gpu/drm/panthor/panthor_sched.c +3502 -0

drivers/gpu/drm/panthor/panthor_sched.h drivers/gpu/drm/panthor/panthor_sched.h +50 -0

No files found.
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_SCHED_H__
+#define __PANTHOR_SCHED_H__
+
+struct drm_exec;
+struct dma_fence;
+struct drm_file;
+struct drm_gem_object;
+struct drm_sched_job;
+struct drm_panthor_group_create;
+struct drm_panthor_queue_create;
+struct drm_panthor_group_get_state;
+struct drm_panthor_queue_submit;
+struct panthor_device;
+struct panthor_file;
+struct panthor_group_pool;
+struct panthor_job;
+
+int panthor_group_create(struct panthor_file *pfile,
+			 const struct drm_panthor_group_create *group_args,
+			 const struct drm_panthor_queue_create *queue_args);
+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
+int panthor_group_get_state(struct panthor_file *pfile,
+			    struct drm_panthor_group_get_state *get_state);
+
+struct drm_sched_job *
+panthor_job_create(struct panthor_file *pfile,
+		   u16 group_handle,
+		   const struct drm_panthor_queue_submit *qsubmit);
+struct drm_sched_job *panthor_job_get(struct drm_sched_job *job);
+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job);
+void panthor_job_put(struct drm_sched_job *job);
+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
+
+int panthor_group_pool_create(struct panthor_file *pfile);
+void panthor_group_pool_destroy(struct panthor_file *pfile);
+
+int panthor_sched_init(struct panthor_device *ptdev);
+void panthor_sched_unplug(struct panthor_device *ptdev);
+void panthor_sched_pre_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_suspend(struct panthor_device *ptdev);
+void panthor_sched_resume(struct panthor_device *ptdev);
+
+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
+
+#endif