Commit 3338e4f9 authored by Stuart Summers's avatar Stuart Summers Committed by Matthew Brost

drm/xe: Use topology to determine page fault queue size

Currently the page fault queue size is hard coded. However
the hardware supports faulting for each EU and each CS.
For some applications running on hardware with a large
number of EUs and CSs, this can result in an overflow of
the page fault queue.

Add a small calculation to determine the page fault queue
size based on the number of EUs and CSs in the platform as
detmined by fuses.
Signed-off-by: default avatarStuart Summers <stuart.summers@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/24d582a3b48c97793b8b6a402f34b4b469471636.1723862633.git.stuart.summers@intel.com
parent 7586fc52
...@@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) ...@@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
PFD_VIRTUAL_ADDR_LO_SHIFT; PFD_VIRTUAL_ADDR_LO_SHIFT;
pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
PF_QUEUE_NUM_DW; pf_queue->num_dw;
ret = true; ret = true;
} }
spin_unlock_irq(&pf_queue->lock); spin_unlock_irq(&pf_queue->lock);
...@@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) ...@@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
{ {
lockdep_assert_held(&pf_queue->lock); lockdep_assert_held(&pf_queue->lock);
return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= return CIRC_SPACE(pf_queue->head, pf_queue->tail,
pf_queue->num_dw) <=
PF_MSG_LEN_DW; PF_MSG_LEN_DW;
} }
...@@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) ...@@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
u32 asid; u32 asid;
bool full; bool full;
/*
* The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
*/
BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW);
if (unlikely(len != PF_MSG_LEN_DW)) if (unlikely(len != PF_MSG_LEN_DW))
return -EPROTO; return -EPROTO;
asid = FIELD_GET(PFD_ASID, msg[1]); asid = FIELD_GET(PFD_ASID, msg[1]);
pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
/*
* The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
*/
xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW));
spin_lock_irqsave(&pf_queue->lock, flags); spin_lock_irqsave(&pf_queue->lock, flags);
full = pf_queue_full(pf_queue); full = pf_queue_full(pf_queue);
if (!full) { if (!full) {
memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; pf_queue->head = (pf_queue->head + len) %
pf_queue->num_dw;
queue_work(gt->usm.pf_wq, &pf_queue->worker); queue_work(gt->usm.pf_wq, &pf_queue->worker);
} else { } else {
drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
...@@ -386,26 +388,54 @@ static void pagefault_fini(void *arg) ...@@ -386,26 +388,54 @@ static void pagefault_fini(void *arg)
{ {
struct xe_gt *gt = arg; struct xe_gt *gt = arg;
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
int i;
if (!xe->info.has_usm) if (!xe->info.has_usm)
return; return;
destroy_workqueue(gt->usm.acc_wq); destroy_workqueue(gt->usm.acc_wq);
destroy_workqueue(gt->usm.pf_wq); destroy_workqueue(gt->usm.pf_wq);
for (i = 0; i < NUM_PF_QUEUE; ++i)
kfree(gt->usm.pf_queue[i].data);
}
static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
{
xe_dss_mask_t all_dss;
int num_dss, num_eus;
bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
XE_MAX_DSS_FUSE_BITS);
num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
XE_MAX_EU_FUSE_BITS) * num_dss;
/* user can issue separate page faults per EU and per CS */
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
pf_queue->gt = gt;
pf_queue->data = kzalloc(pf_queue->num_dw, GFP_KERNEL);
spin_lock_init(&pf_queue->lock);
INIT_WORK(&pf_queue->worker, pf_queue_work_func);
return 0;
} }
int xe_gt_pagefault_init(struct xe_gt *gt) int xe_gt_pagefault_init(struct xe_gt *gt)
{ {
struct xe_device *xe = gt_to_xe(gt); struct xe_device *xe = gt_to_xe(gt);
int i; int i, ret = 0;
if (!xe->info.has_usm) if (!xe->info.has_usm)
return 0; return 0;
for (i = 0; i < NUM_PF_QUEUE; ++i) { for (i = 0; i < NUM_PF_QUEUE; ++i) {
gt->usm.pf_queue[i].gt = gt; ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]);
spin_lock_init(&gt->usm.pf_queue[i].lock); if (ret)
INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func); return ret;
} }
for (i = 0; i < NUM_ACC_QUEUE; ++i) { for (i = 0; i < NUM_ACC_QUEUE; ++i) {
gt->usm.acc_queue[i].gt = gt; gt->usm.acc_queue[i].gt = gt;
......
...@@ -247,9 +247,14 @@ struct xe_gt { ...@@ -247,9 +247,14 @@ struct xe_gt {
struct pf_queue { struct pf_queue {
/** @usm.pf_queue.gt: back pointer to GT */ /** @usm.pf_queue.gt: back pointer to GT */
struct xe_gt *gt; struct xe_gt *gt;
#define PF_QUEUE_NUM_DW 128
/** @usm.pf_queue.data: data in the page fault queue */ /** @usm.pf_queue.data: data in the page fault queue */
u32 data[PF_QUEUE_NUM_DW]; u32 *data;
/**
* @usm.pf_queue.num_dw: number of DWORDS in the page
* fault queue. Dynamically calculated based on the number
* of compute resources available.
*/
u32 num_dw;
/** /**
* @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
* moved by worker which processes faults (consumer). * moved by worker which processes faults (consumer).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment