Commit 91d9f985 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2015-05-19' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Add the interrupts & events modules, including new IOCTLs to create and wait
  on events. The HSA RT open source stack is mainly using events to know when
  a dispatched work has been completed. In addition, this module is
  a pre-requisite for the next module I'm going to upstream - debugger support

  This module also handles H/W exceptions, such as memory exception received
  through the IOMMUv2 H/W and Bad Opcode exception receieved from the GPU.

- Adding a new kernel module parameter to let the user decide whether he wants
  to receive a SIGTERM when a memory exception occurs inside the GPU kernel and
  the HSA application doesn't wait on an appropriate event, or if he just want
  to receive notification about this event in dmesg. The default is the latter.

- Additional improvements for SDMA code

- Update my email address in Maintainers file.

* tag 'drm-amdkfd-next-2015-05-19' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: change driver version to 0.7.2
  drm/amdkfd: Implement events IOCTLs
  drm/amdkfd: Add module parameter of send_sigterm
  drm/amdkfd: Add bad opcode exception handling
  drm/amdkfd: Add memory exception handling
  drm/amdkfd: Add the events module
  drm/amdkfd: add events IOCTL set definitions
  drm/amdkfd: Add interrupt handling module
  drm/radeon: Add init interrupt kfd->kgd interface
  MAINTAINERS: update amdkfd Oded's email address
  drm/amdkfd: make the sdma vm init to be asic specific
  drm/amdkfd: Use new struct for asic specific ops
  drm/amdkfd: reformat some debug prints
  drm/amdkfd: Remove unessary void pointer cast
parents 9c37bf2d 7591cd2c
......@@ -631,7 +631,7 @@ F: drivers/iommu/amd_iommu*.[ch]
F: include/linux/amd-iommu.h
AMD KFD
M: Oded Gabbay <oded.gabbay@amd.com>
M: Oded Gabbay <oded.gabbay@gmail.com>
L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~gabbayo/linux.git
S: Supported
......
......@@ -12,5 +12,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "kfd_priv.h"
#include "kfd_events.h"
#include "cik_int.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
pasid = (ihre->ring_id & 0xffff0000) >> 16;
/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
return;
if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, 0, 0);
else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid);
}
const struct kfd_event_interrupt_class event_interrupt_class_cik = {
.interrupt_isr = cik_event_interrupt_isr,
.interrupt_wq = cik_event_interrupt_wq,
};
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef HSA_RADEON_CIK_INT_H_INCLUDED
#define HSA_RADEON_CIK_INT_H_INCLUDED
#include <linux/types.h>
struct cik_ih_ring_entry {
uint32_t source_id;
uint32_t data;
uint32_t ring_id;
uint32_t reserved;
};
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#endif
......@@ -289,8 +289,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
args->queue_id = queue_id;
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = args->gpu_id << PAGE_SHIFT;
args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
mutex_unlock(&p->mutex);
......@@ -514,6 +516,62 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
return 0;
}
static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_event_args *args = data;
int err;
err = kfd_event_create(filp, p, args->event_type,
args->auto_reset != 0, args->node_id,
&args->event_id, &args->event_trigger_data,
&args->event_page_offset,
&args->event_slot_index);
return err;
}
static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_destroy_event_args *args = data;
return kfd_event_destroy(p, args->event_id);
}
static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_set_event_args *args = data;
return kfd_set_event(p, args->event_id);
}
static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_reset_event_args *args = data;
return kfd_reset_event(p, args->event_id);
}
static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_wait_events_args *args = data;
enum kfd_event_wait_result wait_result;
int err;
err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
args->timeout, &wait_result);
args->wait_result = wait_result;
return err;
}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
......@@ -539,6 +597,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
kfd_ioctl_update_queue, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
kfd_ioctl_create_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
kfd_ioctl_destroy_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
kfd_ioctl_set_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
kfd_ioctl_reset_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
......@@ -639,5 +712,15 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (IS_ERR(process))
return PTR_ERR(process);
return kfd_doorbell_mmap(process, vma);
if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
KFD_MMAP_DOORBELL_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
return kfd_doorbell_mmap(process, vma);
} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
KFD_MMAP_EVENTS_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
return kfd_event_mmap(process, vma);
}
return -EFAULT;
}
......@@ -34,6 +34,7 @@ static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
......@@ -181,6 +182,32 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
kfd_unbind_process_from_device(dev, pasid);
}
/*
* This function called by IOMMU driver on PPR failure
*/
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
BUG_ON(dev == NULL);
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
......@@ -235,6 +262,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error;
}
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device,
"Error initializing interrupts for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
goto kfd_interrupt_error;
}
if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device,
"Error initializing iommuv2 for device (%x:%x)\n",
......@@ -243,6 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
}
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
kfd->dqm = device_queue_manager_init(kfd);
if (!kfd->dqm) {
......@@ -273,6 +308,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
device_queue_manager_error:
amd_iommu_free_device(kfd->pdev);
device_iommu_pasid_error:
kfd_interrupt_exit(kfd);
kfd_interrupt_error:
kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
kfd_gtt_sa_fini(kfd);
......@@ -290,6 +327,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
if (kfd->init_complete) {
device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
kfd_gtt_sa_fini(kfd);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
......@@ -305,6 +343,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
if (kfd->init_complete) {
kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
}
......@@ -324,6 +363,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
kfd->dqm->ops.start(kfd->dqm);
}
......@@ -333,7 +373,17 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
/* Process interrupts / schedule work as necessary */
if (!kfd->init_complete)
return;
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
}
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......
......@@ -522,6 +522,17 @@ int init_pipelines(struct device_queue_manager *dqm,
return 0;
}
static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i;
BUG_ON(dqm == NULL);
for (i = 0 ; i < get_pipes_num(dqm) ; i++)
dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
i + get_first_pipe(dqm));
}
static int init_scheduler(struct device_queue_manager *dqm)
{
int retval;
......@@ -581,6 +592,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
return 0;
}
......@@ -614,19 +626,6 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
uint32_t value = SDMA_ATC;
if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
qpd_to_pdd(qpd)));
q->properties.sdma_vm_addr = value;
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
......@@ -649,7 +648,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
init_sdma_vm(dqm, q, qpd);
dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval != 0) {
......@@ -750,6 +749,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->fence_addr = dqm->fence_mem->cpu_ptr;
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
init_interrupts(dqm);
list_for_each_entry(node, &dqm->queues, list)
if (node->qpd->pqm->process && dqm->dev)
kfd_bind_process_to_device(dqm->dev,
......
......@@ -120,6 +120,21 @@ struct device_queue_manager_ops {
uint64_t alternate_aperture_size);
};
struct device_queue_manager_asic_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*initialize)(struct device_queue_manager *dqm);
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
};
/**
* struct device_queue_manager
*
......@@ -134,7 +149,7 @@ struct device_queue_manager_ops {
struct device_queue_manager {
struct device_queue_manager_ops ops;
struct device_queue_manager_ops ops_asic_specific;
struct device_queue_manager_asic_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets;
......@@ -157,8 +172,8 @@ struct device_queue_manager {
bool active_runlist;
};
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int init_pipelines(struct device_queue_manager *dqm,
......
......@@ -33,12 +33,15 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_cik(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops)
{
ops->set_cache_memory_policy = set_cache_memory_policy_cik;
ops->register_process = register_process_cik;
ops->initialize = initialize_cpsch_cik;
ops->init_sdma_vm = init_sdma_vm;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
......@@ -129,6 +132,19 @@ static int register_process_cik(struct device_queue_manager *dqm,
return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
uint32_t value = SDMA_ATC;
if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
qpd_to_pdd(qpd)));
q->properties.sdma_vm_addr = value;
}
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
......
......@@ -32,14 +32,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_vi(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
{
pr_warn("amdkfd: VI DQM is not currently supported\n");
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi;
ops->init_sdma_vm = init_sdma_vm;
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
......@@ -58,6 +61,11 @@ static int register_process_vi(struct device_queue_manager *dqm,
return -1;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
{
return 0;
......
......@@ -142,14 +142,13 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
doorbell_process_allocation());
pr_debug("mapping doorbell page:\n");
pr_debug(" target user address == 0x%08llX\n",
(unsigned long long) vma->vm_start);
pr_debug(" physical address == 0x%08llX\n", address);
pr_debug(" vm_flags == 0x%04lX\n", vma->vm_flags);
pr_debug(" size == 0x%04lX\n",
doorbell_process_allocation());
return io_remap_pfn_range(vma,
vma->vm_start,
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/mm_types.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
#include <linux/device.h>
/*
* A task can only be on a single wait_queue at a time, but we need to support
* waiting on multiple events (any/all).
* Instead of each event simply having a wait_queue with sleeping tasks, it
* has a singly-linked list of tasks.
* A thread that wants to sleep creates an array of these, one for each event
* and adds one to each event's waiter chain.
*/
struct kfd_event_waiter {
struct list_head waiters;
struct task_struct *sleeping_task;
/* Transitions to true when the event this belongs to is signaled. */
bool activated;
/* Event */
struct kfd_event *event;
uint32_t input_index;
};
/*
* Over-complicated pooled allocator for event notification slots.
*
* Each signal event needs a 64-bit signal slot where the signaler will write
* a 1 before sending an interrupt.l (This is needed because some interrupts
* do not contain enough spare data bits to identify an event.)
* We get whole pages from vmalloc and map them to the process VA.
* Individual signal events are then allocated a slot in a page.
*/
struct signal_page {
struct list_head event_pages; /* kfd_process.signal_event_pages */
uint64_t *kernel_address;
uint64_t __user *user_address;
uint32_t page_index; /* Index into the mmap aperture. */
unsigned int free_slots;
unsigned long used_slot_bitmap[0];
};
#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
SLOT_BITMAP_SIZE * sizeof(long))
/*
* For signal events, the event ID is used as the interrupt user data.
* For SQ s_sendmsg interrupts, this is limited to 8 bits.
*/
#define INTERRUPT_DATA_BITS 8
#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
static uint64_t *page_slots(struct signal_page *page)
{
return page->kernel_address;
}
static bool allocate_free_slot(struct kfd_process *process,
struct signal_page **out_page,
unsigned int *out_slot_index)
{
struct signal_page *page;
list_for_each_entry(page, &process->signal_event_pages, event_pages) {
if (page->free_slots > 0) {
unsigned int slot =
find_first_zero_bit(page->used_slot_bitmap,
SLOTS_PER_PAGE);
__set_bit(slot, page->used_slot_bitmap);
page->free_slots--;
page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
*out_page = page;
*out_slot_index = slot;
pr_debug("allocated event signal slot in page %p, slot %d\n",
page, slot);
return true;
}
}
pr_debug("No free event signal slots were found for process %p\n",
process);
return false;
}
#define list_tail_entry(head, type, member) \
list_entry((head)->prev, type, member)
static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
{
void *backing_store;
struct signal_page *page;
page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL);
if (!page)
goto fail_alloc_signal_page;
page->free_slots = SLOTS_PER_PAGE;
backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
if (!backing_store)
goto fail_alloc_signal_store;
/* prevent user-mode info leaks */
memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
KFD_SIGNAL_EVENT_LIMIT * 8);
page->kernel_address = backing_store;
if (list_empty(&p->signal_event_pages))
page->page_index = 0;
else
page->page_index = list_tail_entry(&p->signal_event_pages,
struct signal_page,
event_pages)->page_index + 1;
pr_debug("allocated new event signal page at %p, for process %p\n",
page, p);
pr_debug("page index is %d\n", page->page_index);
list_add(&page->event_pages, &p->signal_event_pages);
return true;
fail_alloc_signal_store:
kfree(page);
fail_alloc_signal_page:
return false;
}
static bool allocate_event_notification_slot(struct file *devkfd,
struct kfd_process *p,
struct signal_page **page,
unsigned int *signal_slot_index)
{
bool ret;
ret = allocate_free_slot(p, page, signal_slot_index);
if (ret == false) {
ret = allocate_signal_page(devkfd, p);
if (ret == true)
ret = allocate_free_slot(p, page, signal_slot_index);
}
return ret;
}
/* Assumes that the process's event_mutex is locked. */
static void release_event_notification_slot(struct signal_page *page,
size_t slot_index)
{
__clear_bit(slot_index, page->used_slot_bitmap);
page->free_slots++;
/* We don't free signal pages, they are retained by the process
* and reused until it exits. */
}
static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
unsigned int page_index)
{
struct signal_page *page;
/*
* This is safe because we don't delete signal pages until the
* process exits.
*/
list_for_each_entry(page, &p->signal_event_pages, event_pages)
if (page->page_index == page_index)
return page;
return NULL;
}
/*
* Assumes that p->event_mutex is held and of course that p is not going
* away (current or locked).
*/
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
struct kfd_event *ev;
hash_for_each_possible(p->events, ev, events, id)
if (ev->event_id == id)
return ev;
return NULL;
}
static u32 make_signal_event_id(struct signal_page *page,
unsigned int signal_slot_index)
{
return page->page_index |
(signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
}
/*
* Produce a kfd event id for a nonsignal event.
* These are arbitrary numbers, so we do a sequential search through
* the hash table for an unused number.
*/
static u32 make_nonsignal_event_id(struct kfd_process *p)
{
u32 id;
for (id = p->next_nonsignal_event_id;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
lookup_event_by_id(p, id) != NULL;
id++)
;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
/*
* What if id == LAST_NONSIGNAL_EVENT_ID - 1?
* Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
* the first loop fails immediately and we proceed with the
* wraparound loop below.
*/
p->next_nonsignal_event_id = id + 1;
return id;
}
for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
lookup_event_by_id(p, id) != NULL;
id++)
;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
p->next_nonsignal_event_id = id + 1;
return id;
}
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
return 0;
}
static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
struct signal_page *page,
unsigned int signal_slot)
{
return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
}
static int create_signal_event(struct file *devkfd,
struct kfd_process *p,
struct kfd_event *ev)
{
if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
pr_warn("amdkfd: Signal event wasn't created because limit was reached\n");
return -ENOMEM;
}
if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
&ev->signal_slot_index)) {
pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n");
return -ENOMEM;
}
p->signal_event_count++;
ev->user_signal_address =
&ev->signal_page->user_address[ev->signal_slot_index];
ev->event_id = make_signal_event_id(ev->signal_page,
ev->signal_slot_index);
pr_debug("signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id,
ev->user_signal_address);
return 0;
}
/*
* No non-signal events are supported yet.
* We create them as events that never signal.
* Set event calls from user-mode are failed.
*/
static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
{
ev->event_id = make_nonsignal_event_id(p);
if (ev->event_id == 0)
return -ENOMEM;
return 0;
}
void kfd_event_init_process(struct kfd_process *p)
{
mutex_init(&p->event_mutex);
hash_init(p->events);
INIT_LIST_HEAD(&p->signal_event_pages);
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
p->signal_event_count = 0;
}
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
{
if (ev->signal_page != NULL) {
release_event_notification_slot(ev->signal_page,
ev->signal_slot_index);
p->signal_event_count--;
}
/*
* Abandon the list of waiters. Individual waiting threads will
* clean up their own data.
*/
list_del(&ev->waiters);
hash_del(&ev->events);
kfree(ev);
}
static void destroy_events(struct kfd_process *p)
{
struct kfd_event *ev;
struct hlist_node *tmp;
unsigned int hash_bkt;
hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
destroy_event(p, ev);
}
/*
* We assume that the process is being destroyed and there is no need to
* unmap the pages or keep bookkeeping data in order.
*/
static void shutdown_signal_pages(struct kfd_process *p)
{
struct signal_page *page, *tmp;
list_for_each_entry_safe(page, tmp, &p->signal_event_pages,
event_pages) {
free_pages((unsigned long)page->kernel_address,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
kfree(page);
}
}
void kfd_event_free_process(struct kfd_process *p)
{
destroy_events(p);
shutdown_signal_pages(p);
}
static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
{
return ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG;
}
static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
{
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index)
{
int ret = 0;
struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev)
return -ENOMEM;
ev->type = event_type;
ev->auto_reset = auto_reset;
ev->signaled = false;
INIT_LIST_HEAD(&ev->waiters);
*event_page_offset = 0;
mutex_lock(&p->event_mutex);
switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = (ev->signal_page->page_index |
KFD_MMAP_EVENTS_MASK);
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->signal_slot_index;
}
break;
default:
ret = create_other_event(p, ev);
break;
}
if (!ret) {
hash_add(p->events, &ev->events, ev->event_id);
*event_id = ev->event_id;
*event_trigger_data = ev->event_id;
} else {
kfree(ev);
}
mutex_unlock(&p->event_mutex);
return ret;
}
/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
struct kfd_event *ev;
int ret = 0;
mutex_lock(&p->event_mutex);
ev = lookup_event_by_id(p, event_id);
if (ev)
destroy_event(p, ev);
else
ret = -EINVAL;
mutex_unlock(&p->event_mutex);
return ret;
}
static void set_event(struct kfd_event *ev)
{
struct kfd_event_waiter *waiter;
struct kfd_event_waiter *next;
/* Auto reset if the list is non-empty and we're waking someone. */
ev->signaled = !ev->auto_reset || list_empty(&ev->waiters);
list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) {
waiter->activated = true;
/* _init because free_waiters will call list_del */
list_del_init(&waiter->waiters);
wake_up_process(waiter->sleeping_task);
}
}
/* Assumes that p is current. */
int kfd_set_event(struct kfd_process *p, uint32_t event_id)
{
int ret = 0;
struct kfd_event *ev;
mutex_lock(&p->event_mutex);
ev = lookup_event_by_id(p, event_id);
if (ev && event_can_be_cpu_signaled(ev))
set_event(ev);
else
ret = -EINVAL;
mutex_unlock(&p->event_mutex);
return ret;
}
static void reset_event(struct kfd_event *ev)
{
ev->signaled = false;
}
/* Assumes that p is current. */
int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
{
int ret = 0;
struct kfd_event *ev;
mutex_lock(&p->event_mutex);
ev = lookup_event_by_id(p, event_id);
if (ev && event_can_be_cpu_signaled(ev))
reset_event(ev);
else
ret = -EINVAL;
mutex_unlock(&p->event_mutex);
return ret;
}
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
page_slots(ev->signal_page)[ev->signal_slot_index] =
UNSIGNALED_EVENT_SLOT;
}
static bool is_slot_signaled(struct signal_page *page, unsigned int index)
{
return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
}
static void set_event_from_interrupt(struct kfd_process *p,
struct kfd_event *ev)
{
if (ev && event_can_be_gpu_signaled(ev)) {
acknowledge_signal(p, ev);
set_event(ev);
}
}
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits)
{
struct kfd_event *ev;
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function returns a locked process.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return; /* Presumably process exited. */
mutex_lock(&p->event_mutex);
if (valid_id_bits >= INTERRUPT_DATA_BITS) {
/* Partial ID is a full ID. */
ev = lookup_event_by_id(p, partial_id);
set_event_from_interrupt(p, ev);
} else {
/*
* Partial ID is in fact partial. For now we completely
* ignore it, but we could use any bits we did receive to
* search faster.
*/
struct signal_page *page;
unsigned i;
list_for_each_entry(page, &p->signal_event_pages, event_pages)
for (i = 0; i < SLOTS_PER_PAGE; i++)
if (is_slot_signaled(page, i)) {
ev = lookup_event_by_page_slot(p,
page, i);
set_event_from_interrupt(p, ev);
}
}
mutex_unlock(&p->event_mutex);
mutex_unlock(&p->mutex);
}
static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
{
struct kfd_event_waiter *event_waiters;
uint32_t i;
event_waiters = kmalloc_array(num_events,
sizeof(struct kfd_event_waiter),
GFP_KERNEL);
for (i = 0; (event_waiters) && (i < num_events) ; i++) {
INIT_LIST_HEAD(&event_waiters[i].waiters);
event_waiters[i].sleeping_task = current;
event_waiters[i].activated = false;
}
return event_waiters;
}
static int init_event_waiter(struct kfd_process *p,
struct kfd_event_waiter *waiter,
uint32_t event_id,
uint32_t input_index)
{
struct kfd_event *ev = lookup_event_by_id(p, event_id);
if (!ev)
return -EINVAL;
waiter->event = ev;
waiter->input_index = input_index;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
list_add(&waiter->waiters, &ev->waiters);
return 0;
}
static bool test_event_condition(bool all, uint32_t num_events,
struct kfd_event_waiter *event_waiters)
{
uint32_t i;
uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) {
if (event_waiters[i].activated) {
if (!all)
return true;
activated_count++;
}
}
return activated_count == num_events;
}
/*
* Copy event specific data, if defined.
* Currently only memory exception events have additional data to copy to user
*/
static bool copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data)
{
struct kfd_hsa_memory_exception_data *src;
struct kfd_hsa_memory_exception_data __user *dst;
struct kfd_event_waiter *waiter;
struct kfd_event *event;
uint32_t i;
for (i = 0; i < num_events; i++) {
waiter = &event_waiters[i];
event = waiter->event;
if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
dst = &data[waiter->input_index].memory_exception_data;
src = &event->memory_exception_data;
if (copy_to_user(dst, src,
sizeof(struct kfd_hsa_memory_exception_data)))
return false;
}
}
return true;
}
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
return 0;
if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE)
return MAX_SCHEDULE_TIMEOUT;
/*
* msecs_to_jiffies interprets all values above 2^31-1 as infinite,
* but we consider them finite.
* This hack is wrong, but nobody is likely to notice.
*/
user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF);
return msecs_to_jiffies(user_timeout_ms) + 1;
}
static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
{
uint32_t i;
for (i = 0; i < num_events; i++)
list_del(&waiters[i].waiters);
kfree(waiters);
}
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result)
{
struct kfd_event_data __user *events =
(struct kfd_event_data __user *) data;
uint32_t i;
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
long timeout = user_timeout_to_jiffies(user_timeout_ms);
mutex_lock(&p->event_mutex);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
ret = -ENOMEM;
goto fail;
}
for (i = 0; i < num_events; i++) {
struct kfd_event_data event_data;
if (copy_from_user(&event_data, &events[i],
sizeof(struct kfd_event_data)))
goto fail;
ret = init_event_waiter(p, &event_waiters[i],
event_data.event_id, i);
if (ret)
goto fail;
}
mutex_unlock(&p->event_mutex);
while (true) {
if (fatal_signal_pending(current)) {
ret = -EINTR;
break;
}
if (signal_pending(current)) {
/*
* This is wrong when a nonzero, non-infinite timeout
* is specified. We need to use
* ERESTARTSYS_RESTARTBLOCK, but struct restart_block
* contains a union with data for each user and it's
* in generic kernel code that I don't want to
* touch yet.
*/
ret = -ERESTARTSYS;
break;
}
if (test_event_condition(all, num_events, event_waiters)) {
if (copy_signaled_event_data(num_events,
event_waiters, events))
*wait_result = KFD_WAIT_COMPLETE;
else
*wait_result = KFD_WAIT_ERROR;
break;
}
if (timeout <= 0) {
*wait_result = KFD_WAIT_TIMEOUT;
break;
}
timeout = schedule_timeout_interruptible(timeout);
}
__set_current_state(TASK_RUNNING);
mutex_lock(&p->event_mutex);
free_waiters(num_events, event_waiters);
mutex_unlock(&p->event_mutex);
return ret;
fail:
if (event_waiters)
free_waiters(num_events, event_waiters);
mutex_unlock(&p->event_mutex);
*wait_result = KFD_WAIT_ERROR;
return ret;
}
int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
{
unsigned int page_index;
unsigned long pfn;
struct signal_page *page;
/* check required size is logical */
if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
get_order(vma->vm_end - vma->vm_start)) {
pr_err("amdkfd: event page mmap requested illegal size\n");
return -EINVAL;
}
page_index = vma->vm_pgoff;
page = lookup_signal_page_by_index(p, page_index);
if (!page) {
/* Probably KFD bug, but mmap is user-accessible. */
pr_debug("signal page could not be found for page_index %u\n",
page_index);
return -EINVAL;
}
pfn = __pa(page->kernel_address);
pfn >>= PAGE_SHIFT;
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
| VM_DONTDUMP | VM_PFNMAP;
pr_debug("mapping signal page\n");
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
pr_debug(" end user address == 0x%08lx\n", vma->vm_end);
pr_debug(" pfn == 0x%016lX\n", pfn);
pr_debug(" vm_flags == 0x%08lX\n", vma->vm_flags);
pr_debug(" size == 0x%08lX\n",
vma->vm_end - vma->vm_start);
page->user_address = (uint64_t __user *)vma->vm_start;
/* mapping the page to user process */
return remap_pfn_range(vma, vma->vm_start, pfn,
vma->vm_end - vma->vm_start, vma->vm_page_prot);
}
/*
* Assumes that p->event_mutex is held and of course
* that p is not going away (current or locked).
*/
static void lookup_events_by_type_and_signal(struct kfd_process *p,
int type, void *event_data)
{
struct kfd_hsa_memory_exception_data *ev_data;
struct kfd_event *ev;
int bkt;
bool send_signal = true;
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
hash_for_each(p->events, bkt, ev, events)
if (ev->type == type) {
send_signal = false;
dev_dbg(kfd_device,
"Event found: id %X type %d",
ev->event_id, ev->type);
set_event(ev);
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
ev->memory_exception_data = *ev_data;
}
/* Send SIGTERM no event of type "type" has been found*/
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
"Sending SIGTERM to HSA Process with PID %d ",
p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
"HSA Process (PID %d) got unhandled exception",
p->lead_thread->pid);
}
}
}
void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
unsigned long address, bool is_write_requested,
bool is_execute_requested)
{
struct kfd_hsa_memory_exception_data memory_exception_data;
struct vm_area_struct *vma;
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function returns a locked process.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return; /* Presumably process exited. */
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
down_read(&p->mm->mmap_sem);
vma = find_vma(p->mm, address);
memory_exception_data.gpu_id = dev->id;
memory_exception_data.va = address;
/* Set failure reason */
memory_exception_data.failure.NotPresent = 1;
memory_exception_data.failure.NoExecute = 0;
memory_exception_data.failure.ReadOnly = 0;
if (vma) {
if (vma->vm_start > address) {
memory_exception_data.failure.NotPresent = 1;
memory_exception_data.failure.NoExecute = 0;
memory_exception_data.failure.ReadOnly = 0;
} else {
memory_exception_data.failure.NotPresent = 0;
if (is_write_requested && !(vma->vm_flags & VM_WRITE))
memory_exception_data.failure.ReadOnly = 1;
else
memory_exception_data.failure.ReadOnly = 0;
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
memory_exception_data.failure.NoExecute = 1;
else
memory_exception_data.failure.NoExecute = 0;
}
}
up_read(&p->mm->mmap_sem);
mutex_lock(&p->event_mutex);
/* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
&memory_exception_data);
mutex_unlock(&p->event_mutex);
mutex_unlock(&p->mutex);
}
void kfd_signal_hw_exception_event(unsigned int pasid)
{
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function returns a locked process.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return; /* Presumably process exited. */
mutex_lock(&p->event_mutex);
/* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
mutex_unlock(&p->event_mutex);
mutex_unlock(&p->mutex);
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef KFD_EVENTS_H_INCLUDED
#define KFD_EVENTS_H_INCLUDED
#include <linux/kernel.h>
#include <linux/hashtable.h>
#include <linux/types.h>
#include <linux/list.h>
#include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h>
#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
/*
* Written into kfd_signal_slot_t to indicate that the event is not signaled.
* Since the event protocol may need to write the event ID into memory, this
* must not be a valid event ID.
* For the sake of easy memset-ing, this must be a byte pattern.
*/
#define UNSIGNALED_EVENT_SLOT ((uint64_t)-1)
struct kfd_event_waiter;
struct signal_page;
struct kfd_event {
/* All events in process, rooted at kfd_process.events. */
struct hlist_node events;
u32 event_id;
bool signaled;
bool auto_reset;
int type;
struct list_head waiters; /* List of kfd_event_waiter by waiters. */
/* Only for signal events. */
struct signal_page *signal_page;
unsigned int signal_slot_index;
uint64_t __user *user_signal_address;
/* type specific data */
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
};
};
#define KFD_EVENT_TIMEOUT_IMMEDIATE 0
#define KFD_EVENT_TIMEOUT_INFINITE 0xFFFFFFFFu
/* Matching HSA_EVENTTYPE */
#define KFD_EVENT_TYPE_SIGNAL 0
#define KFD_EVENT_TYPE_HW_EXCEPTION 3
#define KFD_EVENT_TYPE_DEBUG 5
#define KFD_EVENT_TYPE_MEMORY 8
extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits);
#endif
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* KFD Interrupts.
*
* AMD GPUs deliver interrupts by pushing an interrupt description onto the
* interrupt ring and then sending an interrupt. KGD receives the interrupt
* in ISR and sends us a pointer to each new entry on the interrupt ring.
*
* We generally can't process interrupt-signaled events from ISR, so we call
* out to each interrupt client module (currently only the scheduler) to ask if
* each interrupt is interesting. If they return true, then it requires further
* processing so we copy it to an internal interrupt ring and call each
* interrupt client again from a work-queue.
*
* There's no acknowledgment for the interrupts we use. The hardware simply
* queues a new interrupt each time without waiting.
*
* The fixed-size internal queue means that it's possible for us to lose
* interrupts because we have no back-pressure to the hardware.
*/
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
#define KFD_INTERRUPT_RING_SIZE 1024
static void interrupt_wq(struct work_struct *);
int kfd_interrupt_init(struct kfd_dev *kfd)
{
void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
kfd->device_info->ih_ring_entry_size,
GFP_KERNEL);
if (!interrupt_ring)
return -ENOMEM;
kfd->interrupt_ring = interrupt_ring;
kfd->interrupt_ring_size =
KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
atomic_set(&kfd->interrupt_ring_wptr, 0);
atomic_set(&kfd->interrupt_ring_rptr, 0);
spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq);
kfd->interrupts_active = true;
/*
* After this function returns, the interrupt will be enabled. This
* barrier ensures that the interrupt running on a different processor
* sees all the above writes.
*/
smp_wmb();
return 0;
}
void kfd_interrupt_exit(struct kfd_dev *kfd)
{
/*
* Stop the interrupt handler from writing to the ring and scheduling
* workqueue items. The spinlock ensures that any interrupt running
* after we have unlocked sees interrupts_active = false.
*/
unsigned long flags;
spin_lock_irqsave(&kfd->interrupt_lock, flags);
kfd->interrupts_active = false;
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
/*
* Flush_scheduled_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
flush_scheduled_work();
kfree(kfd->interrupt_ring);
}
/*
* This assumes that it can't be called concurrently with itself
* but only with dequeue_ih_ring_entry.
*/
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
{
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
if ((rptr - wptr) % kfd->interrupt_ring_size ==
kfd->device_info->ih_ring_entry_size) {
/* This is very bad, the system is likely to hang. */
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt.\n");
return false;
}
memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
atomic_set(&kfd->interrupt_ring_wptr, wptr);
return true;
}
/*
* This assumes that it can't be called concurrently with itself
* but only with enqueue_ih_ring_entry.
*/
static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
{
/*
* Assume that wait queues have an implicit barrier, i.e. anything that
* happened in the ISR before it queued work is visible.
*/
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
if (rptr == wptr)
return false;
memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
kfd->device_info->ih_ring_entry_size);
rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
/*
* Ensure the rptr write update is not visible until
* memcpy has finished reading.
*/
smp_mb();
atomic_set(&kfd->interrupt_ring_rptr, rptr);
return true;
}
static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[DIV_ROUND_UP(
dev->device_info->ih_ring_entry_size,
sizeof(uint32_t))];
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
}
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
{
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
ih_ring_entry);
return wanted != 0;
}
......@@ -215,8 +215,9 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n",
__func__, rptr, wptr, queue_address);
pr_debug("rptr: %d\n", rptr);
pr_debug("wptr: %d\n", wptr);
pr_debug("queue_address 0x%p\n", queue_address);
available_size = (rptr - 1 - wptr + queue_size_dwords) %
queue_size_dwords;
......
......@@ -29,10 +29,10 @@
#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
#define KFD_DRIVER_DATE "20150122"
#define KFD_DRIVER_DATE "20150421"
#define KFD_DRIVER_MAJOR 0
#define KFD_DRIVER_MINOR 7
#define KFD_DRIVER_PATCHLEVEL 1
#define KFD_DRIVER_PATCHLEVEL 2
static const struct kgd2kfd_calls kgd2kfd = {
.exit = kgd2kfd_exit,
......@@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
int send_sigterm;
module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
{
/*
......
......@@ -35,6 +35,9 @@
#define KFD_SYSFS_FILE_MODE 0444
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
/*
* When working with cp scheduler we should assign the HIQ manually or via
* the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot
......@@ -71,6 +74,12 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */
extern int sched_policy;
/*
* Kernel module parameter to specify whether to send sigterm to HSA process on
* unhandled exception
*/
extern int send_sigterm;
/**
* enum kfd_sched_policy
*
......@@ -108,8 +117,16 @@ enum asic_family_type {
CHIP_CARRIZO
};
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
void (*interrupt_wq)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
};
struct kfd_device_info {
unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
......@@ -161,10 +178,23 @@ struct kfd_dev {
unsigned int gtt_sa_chunk_size;
unsigned int gtt_sa_num_of_chunks;
/* Interrupts */
void *interrupt_ring;
size_t interrupt_ring_size;
atomic_t interrupt_ring_rptr;
atomic_t interrupt_ring_wptr;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
/* QCM Device instance */
struct device_queue_manager *dqm;
bool init_complete;
/*
* Interrupts of interest to KFD are copied
* from the HW ring into a SW ring.
*/
bool interrupts_active;
};
/* KGD2KFD callbacks */
......@@ -477,6 +507,15 @@ struct kfd_process {
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
/* Event-related data */
struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */
DECLARE_HASHTABLE(events, 4);
struct list_head signal_event_pages; /* struct slot_page_header.
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
};
/**
......@@ -501,6 +540,7 @@ void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(const struct task_struct *);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
......@@ -555,7 +595,11 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
/* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd);
......@@ -642,4 +686,35 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
enum kfd_event_wait_result {
KFD_WAIT_COMPLETE,
KFD_WAIT_TIMEOUT,
KFD_WAIT_ERROR
};
void kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result);
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_dev *dev,
unsigned int pasid, unsigned long address,
bool is_write_requested, bool is_execute_requested);
void kfd_signal_hw_exception_event(unsigned int pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
#endif
......@@ -178,6 +178,8 @@ static void kfd_process_wq_release(struct work_struct *work)
kfree(pdd);
}
kfd_event_free_process(p);
kfd_pasid_free(p->pasid);
mutex_unlock(&p->mutex);
......@@ -203,8 +205,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
mmdrop(p->mm);
work = (struct kfd_process_release_work *)
kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
if (work) {
INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
......@@ -289,6 +290,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
INIT_LIST_HEAD(&process->per_device_data);
kfd_event_init_process(process);
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
......@@ -431,3 +434,23 @@ bool kfd_has_process_device_data(struct kfd_process *p)
{
return !(list_empty(&p->per_device_data));
}
/* This returns with process->mutex locked. */
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
{
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (p->pasid == pasid) {
mutex_lock(&p->mutex);
break;
}
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
}
......@@ -144,6 +144,8 @@ struct kfd2kgd_calls {
int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr);
......
......@@ -149,6 +149,8 @@
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
#define CPC_INT_CNTL 0xC2D0
#define CP_HQD_IQ_RPTR 0xC970u
#define AQL_ENABLE (1U << 0)
#define SDMA0_RLC0_RB_CNTL 0xD400u
......
......@@ -1335,6 +1335,7 @@
# define CNTX_EMPTY_INT_ENABLE (1 << 20)
# define PRIV_INSTR_INT_ENABLE (1 << 22)
# define PRIV_REG_INT_ENABLE (1 << 23)
# define OPCODE_ERROR_INT_ENABLE (1 << 24)
# define TIME_STAMP_INT_ENABLE (1 << 26)
# define CP_RINGID2_INT_ENABLE (1 << 29)
# define CP_RINGID1_INT_ENABLE (1 << 30)
......
......@@ -66,7 +66,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
......@@ -89,6 +89,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_is_occupied = kgd_hqd_is_occupied,
......@@ -407,6 +408,24 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
lock_srbm(kgd, mec, pipe, 0, 0);
write_register(kgd, CPC_INT_CNTL,
TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
unlock_srbm(kgd);
return 0;
}
static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
{
uint32_t retval;
......
......@@ -27,7 +27,7 @@
#include <linux/ioctl.h>
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 0
#define KFD_IOCTL_MINOR_VERSION 1
struct kfd_ioctl_get_version_args {
uint32_t major_version; /* from KFD */
......@@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args {
uint32_t pad;
};
/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
#define KFD_IOC_EVENT_HW_EXCEPTION 3
#define KFD_IOC_EVENT_SYSTEM_EVENT 4
#define KFD_IOC_EVENT_DEBUG_EVENT 5
#define KFD_IOC_EVENT_PROFILE_EVENT 6
#define KFD_IOC_EVENT_QUEUE_EVENT 7
#define KFD_IOC_EVENT_MEMORY 8
#define KFD_IOC_WAIT_RESULT_COMPLETE 0
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2
#define KFD_SIGNAL_EVENT_LIMIT 256
struct kfd_ioctl_create_event_args {
uint64_t event_page_offset; /* from KFD */
uint32_t event_trigger_data; /* from KFD - signal events only */
uint32_t event_type; /* to KFD */
uint32_t auto_reset; /* to KFD */
uint32_t node_id; /* to KFD - only valid for certain
event types */
uint32_t event_id; /* from KFD */
uint32_t event_slot_index; /* from KFD */
};
struct kfd_ioctl_destroy_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_set_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_reset_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_memory_exception_failure {
uint32_t NotPresent; /* Page not present or supervisor privilege */
uint32_t ReadOnly; /* Write access to a read-only page */
uint32_t NoExecute; /* Execute access to a page marked NX */
uint32_t pad;
};
/* memory exception data*/
struct kfd_hsa_memory_exception_data {
struct kfd_memory_exception_failure failure;
uint64_t va;
uint32_t gpu_id;
uint32_t pad;
};
/* Event data*/
struct kfd_event_data {
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
}; /* From KFD */
uint64_t kfd_event_data_ext; /* pointer to an extension structure
for future exception types */
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_wait_events_args {
uint64_t events_ptr; /* to KFD */
uint32_t num_events; /* to KFD */
uint32_t wait_for_all; /* to KFD */
uint32_t timeout; /* to KFD */
uint32_t wait_result; /* from KFD */
};
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
......@@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args {
#define AMDKFD_IOC_UPDATE_QUEUE \
AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
#define AMDKFD_IOC_CREATE_EVENT \
AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
#define AMDKFD_IOC_DESTROY_EVENT \
AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
#define AMDKFD_IOC_SET_EVENT \
AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
#define AMDKFD_IOC_RESET_EVENT \
AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x08
#define AMDKFD_COMMAND_END 0x0D
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment