Commit 12fb1ad7 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: update process interrupt handling for debug events

The debugger must be notified by any debugger subscribed exception
that comes from hardware interrupts.

If a debugger session exits, any exceptions it subscribed to may still
have interrupts in the interrupt ring buffer or KGD/KFD pipeline.
To prevent a new session from inheriting stale interrupts, when a new
queue is created, open an interrupt drain and allow the IH ring to drain
from a timestamped checkpoint.  Then inject a custom IV so that once
the custom IV is picked up by the KFD, it's safe to close the drain
and proceed with queue creation.

The drain must also be on debug disable as SW interrupts may still
be processed.  Drain at this time and clear all the exception status.

The debugger may also not be attached nor subscibed to certain
exceptions so forward them directly to the runtime.

GFX10 also requires its own IV processing, hence the creation of
kfd_int_process_v10.c.  This is because the IV from SQ interrupts are
packed into a new continguous format unlike GFX9. To make this clear,
a separate interrupting handling code file was created.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a37d23f8
......@@ -777,6 +777,22 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
amdgpu_umc_poison_handler(adev, reset);
}
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload)
{
int ret;
/* Device or IH ring is not ready so bail. */
ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
if (ret)
return ret;
/* Send payload to fence KFD interrupts */
amdgpu_amdkfd_interrupt(adev, payload);
return 0;
}
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
{
if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
......
......@@ -250,6 +250,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
struct amdgpu_device *src,
bool is_min);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
......
......@@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
$(AMDKFD_PATH)/kfd_int_process_v10.o \
$(AMDKFD_PATH)/kfd_int_process_v11.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o \
......
......@@ -125,6 +125,64 @@ bool kfd_dbg_ev_raise(uint64_t event_mask,
return is_subscribed;
}
/* set pending event queue entry from ring entry */
bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
unsigned int pasid,
uint32_t doorbell_id,
uint64_t trap_mask,
void *exception_data,
size_t exception_data_size)
{
struct kfd_process *p;
bool signaled_to_debugger_or_runtime = false;
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return false;
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
exception_data, exception_data_size)) {
struct process_queue_manager *pqm;
struct process_queue_node *pqn;
if (!!(trap_mask & KFD_EC_MASK_QUEUE) &&
p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
mutex_lock(&p->mutex);
pqm = &p->pqm;
list_for_each_entry(pqn, &pqm->queues,
process_queue_list) {
if (!(pqn->q && pqn->q->device == dev &&
pqn->q->doorbell_id == doorbell_id))
continue;
kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id,
trap_mask);
signaled_to_debugger_or_runtime = true;
break;
}
mutex_unlock(&p->mutex);
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
kfd_dqm_evict_pasid(dev->dqm, p->pasid);
kfd_signal_vm_fault_event(dev, p->pasid, NULL,
exception_data);
signaled_to_debugger_or_runtime = true;
}
} else {
signaled_to_debugger_or_runtime = true;
}
kfd_unref_process(p);
return signaled_to_debugger_or_runtime;
}
int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
unsigned int dev_id,
unsigned int queue_id,
......@@ -281,6 +339,31 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
kfd_dbg_set_workaround(target, false);
}
static void kfd_dbg_clean_exception_status(struct kfd_process *target)
{
struct process_queue_manager *pqm;
struct process_queue_node *pqn;
int i;
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];
kfd_process_drain_interrupts(pdd);
pdd->exception_status = 0;
}
pqm = &target->pqm;
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
if (!pqn->q)
continue;
pqn->q->properties.exception_status = 0;
}
target->exception_status = 0;
}
int kfd_dbg_trap_disable(struct kfd_process *target)
{
if (!target->debug_trap_enabled)
......@@ -304,6 +387,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
}
target->debug_trap_enabled = false;
kfd_dbg_clean_exception_status(target);
kfd_unref_process(target);
return 0;
......
......@@ -27,6 +27,12 @@
void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count);
int kfd_dbg_trap_activate(struct kfd_process *target);
bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
unsigned int pasid,
uint32_t doorbell_id,
uint64_t trap_mask,
void *exception_data,
size_t exception_data_size);
bool kfd_dbg_ev_raise(uint64_t event_mask,
struct kfd_process *process, struct kfd_node *dev,
unsigned int source_id, bool use_worker,
......
......@@ -140,6 +140,8 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(9, 4, 1): /* ARCTURUS */
case IP_VERSION(9, 4, 2): /* ALDEBARAN */
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
break;
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
......@@ -153,7 +155,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
......
This diff is collapsed.
......@@ -26,6 +26,7 @@
#include "kfd_device_queue_manager.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "kfd_smi_events.h"
#include "kfd_debug.h"
/*
* GFX11 SQ Interrupts
......@@ -238,7 +239,7 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
/* Only handle interrupts from KFD VMIDs */
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
if (/*!KFD_IRQ_IS_FENCE(client_id, source_id) &&*/
if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
(vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd))
return false;
......@@ -267,7 +268,7 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
source_id == SOC21_INTSRC_SDMA_TRAP ||
/* KFD_IRQ_IS_FENCE(client_id, source_id) || */
KFD_IRQ_IS_FENCE(client_id, source_id) ||
(((client_id == SOC21_IH_CLIENTID_VMC) ||
((client_id == SOC21_IH_CLIENTID_GFX) &&
(source_id == UTCL2_1_0__SRCID__FAULT))) &&
......@@ -279,7 +280,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
{
uint16_t source_id, client_id, ring_id, pasid, vmid;
uint32_t context_id0, context_id1;
uint8_t sq_int_enc, sq_int_errtype;
uint8_t sq_int_enc, sq_int_priv, sq_int_errtype;
struct kfd_vm_fault_info info = {0};
struct kfd_hsa_memory_exception_data exception_data;
......@@ -312,9 +313,9 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
exception_data.failure.imprecise = 0;
/*kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
&exception_data, sizeof(exception_data));*/
&exception_data, sizeof(exception_data));
kfd_smi_event_update_vmfault(dev, pasid);
/* GRBM, SDMA, SE, PMM */
......@@ -324,11 +325,11 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
/*else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_CTXID0_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
NULL, 0);*/
NULL, 0);
/* SDMA */
else if (source_id == SOC21_INTSRC_SDMA_TRAP)
......@@ -348,6 +349,13 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
print_sq_intr_info_inst(context_id0, context_id1);
sq_int_priv = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_CTXID0_DOORBELL_ID(context_id0),
KFD_CTXID0_TRAP_CODE(context_id0),
NULL, 0)))
return;
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
print_sq_intr_info_error(context_id0, context_id1);
......@@ -366,8 +374,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
}
/*} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
kfd_process_close_interrupt_drain(pasid);*/
} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
kfd_process_close_interrupt_drain(pasid);
}
}
......
......@@ -23,10 +23,40 @@
#include "kfd_priv.h"
#include "kfd_events.h"
#include "kfd_debug.h"
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
/*
* GFX9 SQ Interrupts
*
* There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
* packet to the Interrupt Handler:
* Auto - Generated by the SQG (various cmd overflows, timestamps etc)
* Wave - Generated by S_SENDMSG through a shader program
* Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
*
* The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
* 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
*
* - context_id0[27:26]
* Encoding type (0 = Auto, 1 = Wave, 2 = Error)
*
* - context_id0[13]
* PRIV bit indicates that Wave S_SEND or error occurred within trap
*
* - {context_id1[7:0],context_id0[31:28],context_id0[11:0]}
* 24-bit data with the following layout per encoding type:
* Auto - only context_id0[8:0] is used, which reports various interrupts
* generated by SQG. The rest is 0.
* Wave - user data sent from m0 via S_SENDMSG
* Error - Error type (context_id1[7:4]), Error Details (rest of bits)
*
* The other context_id bits show coordinates (SE/SH/CU/SIMD/WAVE) for wave
* S_SENDMSG and Errors. These are 0 for Auto.
*/
enum SQ_INTERRUPT_WORD_ENCODING {
SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
SQ_INTERRUPT_WORD_ENCODING_INST,
......@@ -84,12 +114,32 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000
#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000
/* GFX9 SQ interrupt 24-bit data from context_id<0,1> */
#define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1) \
((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000))
#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
/*
* The debugger will send user data(m0) with PRIV=1 to indicate it requires
* notification from the KFD with the following queue id (DOORBELL_ID) and
* trap code (TRAP_CODE).
*/
#define KFD_INT_DATA_DEBUG_DOORBELL_MASK 0x0003ff
#define KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT 10
#define KFD_INT_DATA_DEBUG_TRAP_CODE_MASK 0x07fc00
#define KFD_DEBUG_DOORBELL_ID(sq_int_data) ((sq_int_data) & \
KFD_INT_DATA_DEBUG_DOORBELL_MASK)
#define KFD_DEBUG_TRAP_CODE(sq_int_data) (((sq_int_data) & \
KFD_INT_DATA_DEBUG_TRAP_CODE_MASK) \
>> KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT)
#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK 0x3fffc00
#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT 10
#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \
KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
>> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
uint16_t pasid, uint16_t client_id)
{
......@@ -168,14 +218,16 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
uint16_t source_id, client_id, pasid, vmid;
const uint32_t *data = ih_ring_entry;
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
/* Only handle interrupts from KFD VMIDs */
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
if (vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd)
if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
(vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd))
return false;
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
/* Only handle clients we care about */
......@@ -194,7 +246,8 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
client_id != SOC15_IH_CLIENTID_SE0SH &&
client_id != SOC15_IH_CLIENTID_SE1SH &&
client_id != SOC15_IH_CLIENTID_SE2SH &&
client_id != SOC15_IH_CLIENTID_SE3SH)
client_id != SOC15_IH_CLIENTID_SE3SH &&
!KFD_IRQ_IS_FENCE(client_id, source_id))
return false;
/* This is a known issue for gfx9. Under non HWS, pasid is not set
......@@ -247,6 +300,7 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
source_id == SOC15_INTSRC_SDMA_ECC ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
KFD_IRQ_IS_FENCE(client_id, source_id) ||
((client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
client_id == SOC15_IH_CLIENTID_UTCL2) &&
......@@ -302,6 +356,13 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
sq_int_data);
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(sq_int_data),
KFD_DEBUG_TRAP_CODE(sq_int_data),
NULL, 0))
return;
}
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
......@@ -324,8 +385,12 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid);
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
NULL, 0);
}
} else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
client_id == SOC15_IH_CLIENTID_SDMA1 ||
client_id == SOC15_IH_CLIENTID_SDMA2 ||
......@@ -345,6 +410,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
struct kfd_hsa_memory_exception_data exception_data;
if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
......@@ -360,9 +426,23 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
info.prot_read = ring_id & 0x10;
info.prot_write = ring_id & 0x20;
memset(&exception_data, 0, sizeof(exception_data));
exception_data.gpu_id = dev->id;
exception_data.va = (info.page_addr) << PAGE_SHIFT;
exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
exception_data.failure.imprecise = 0;
kfd_set_dbg_ev_from_interrupt(dev,
pasid,
-1,
KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
&exception_data,
sizeof(exception_data));
kfd_smi_event_update_vmfault(dev, pasid);
kfd_dqm_evict_pasid(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
kfd_process_close_interrupt_drain(pasid);
}
}
......
......@@ -963,6 +963,10 @@ struct kfd_process {
uint64_t exception_enable_mask;
uint64_t exception_status;
/* Used to drain stale interrupts */
wait_queue_head_t wait_irq_drain;
bool irq_drain_is_open;
/* shared virtual memory registered by this process */
struct svm_range_list svms;
......@@ -1144,12 +1148,19 @@ int kfd_numa_node_to_apic_id(int numa_node_id);
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */
#define KFD_IRQ_FENCE_CLIENTID 0xff
#define KFD_IRQ_FENCE_SOURCEID 0xff
#define KFD_IRQ_IS_FENCE(client, source) \
((client) == KFD_IRQ_FENCE_CLIENTID && \
(source) == KFD_IRQ_FENCE_SOURCEID)
int kfd_interrupt_init(struct kfd_node *dev);
void kfd_interrupt_exit(struct kfd_node *dev);
bool enqueue_ih_ring_entry(struct kfd_node *kfd, const void *ih_ring_entry);
bool interrupt_is_wanted(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre, bool *flag);
int kfd_process_drain_interrupts(struct kfd_process_device *pdd);
void kfd_process_close_interrupt_drain(unsigned int pasid);
/* amdkfd Apertures */
int kfd_init_apertures(struct kfd_process *process);
......@@ -1421,6 +1432,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
extern const struct kfd_event_interrupt_class event_interrupt_class_v10;
extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
......
......@@ -862,6 +862,8 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
kfd_procfs_add_sysfs_stats(process);
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
init_waitqueue_head(&process->wait_irq_drain);
}
out:
if (!IS_ERR(process))
......@@ -2136,6 +2138,51 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
}
}
/* assumes caller holds process lock. */
int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
{
uint32_t irq_drain_fence[8];
int r = 0;
if (!KFD_IS_SOC15(pdd->dev))
return 0;
pdd->process->irq_drain_is_open = true;
memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
KFD_IRQ_FENCE_CLIENTID;
irq_drain_fence[3] = pdd->process->pasid;
/* ensure stale irqs scheduled KFD interrupts and send drain fence. */
if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev,
irq_drain_fence)) {
pdd->process->irq_drain_is_open = false;
return 0;
}
r = wait_event_interruptible(pdd->process->wait_irq_drain,
!READ_ONCE(pdd->process->irq_drain_is_open));
if (r)
pdd->process->irq_drain_is_open = false;
return r;
}
void kfd_process_close_interrupt_drain(unsigned int pasid)
{
struct kfd_process *p;
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
WRITE_ONCE(p->irq_drain_is_open, false);
wake_up_all(&p->wait_irq_drain);
kfd_unref_process(p);
}
struct send_exception_work_handler_workarea {
struct work_struct work;
struct kfd_process *p;
......
......@@ -337,6 +337,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
retval = kfd_process_drain_interrupts(pdd);
if (retval)
break;
retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
kq, &pdd->qpd);
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment