Commit 91d9f985 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2015-05-19' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Add the interrupts & events modules, including new IOCTLs to create and wait
  on events. The HSA RT open source stack is mainly using events to know when
  a dispatched work has been completed. In addition, this module is
  a pre-requisite for the next module I'm going to upstream - debugger support

  This module also handles H/W exceptions, such as memory exception received
  through the IOMMUv2 H/W and Bad Opcode exception receieved from the GPU.

- Adding a new kernel module parameter to let the user decide whether he wants
  to receive a SIGTERM when a memory exception occurs inside the GPU kernel and
  the HSA application doesn't wait on an appropriate event, or if he just want
  to receive notification about this event in dmesg. The default is the latter.

- Additional improvements for SDMA code

- Update my email address in Maintainers file.

* tag 'drm-amdkfd-next-2015-05-19' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: change driver version to 0.7.2
  drm/amdkfd: Implement events IOCTLs
  drm/amdkfd: Add module parameter of send_sigterm
  drm/amdkfd: Add bad opcode exception handling
  drm/amdkfd: Add memory exception handling
  drm/amdkfd: Add the events module
  drm/amdkfd: add events IOCTL set definitions
  drm/amdkfd: Add interrupt handling module
  drm/radeon: Add init interrupt kfd->kgd interface
  MAINTAINERS: update amdkfd Oded's email address
  drm/amdkfd: make the sdma vm init to be asic specific
  drm/amdkfd: Use new struct for asic specific ops
  drm/amdkfd: reformat some debug prints
  drm/amdkfd: Remove unessary void pointer cast
parents 9c37bf2d 7591cd2c
...@@ -631,7 +631,7 @@ F: drivers/iommu/amd_iommu*.[ch] ...@@ -631,7 +631,7 @@ F: drivers/iommu/amd_iommu*.[ch]
F: include/linux/amd-iommu.h F: include/linux/amd-iommu.h
AMD KFD AMD KFD
M: Oded Gabbay <oded.gabbay@amd.com> M: Oded Gabbay <oded.gabbay@gmail.com>
L: dri-devel@lists.freedesktop.org L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~gabbayo/linux.git T: git git://people.freedesktop.org/~gabbayo/linux.git
S: Supported S: Supported
......
...@@ -12,5 +12,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ ...@@ -12,5 +12,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o obj-$(CONFIG_HSA_AMD) += amdkfd.o
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "kfd_priv.h"
#include "kfd_events.h"
#include "cik_int.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
pasid = (ihre->ring_id & 0xffff0000) >> 16;
/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
return;
if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, 0, 0);
else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid);
}
const struct kfd_event_interrupt_class event_interrupt_class_cik = {
.interrupt_isr = cik_event_interrupt_isr,
.interrupt_wq = cik_event_interrupt_wq,
};
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef HSA_RADEON_CIK_INT_H_INCLUDED
#define HSA_RADEON_CIK_INT_H_INCLUDED
#include <linux/types.h>
struct cik_ih_ring_entry {
uint32_t source_id;
uint32_t data;
uint32_t ring_id;
uint32_t reserved;
};
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#endif
...@@ -289,8 +289,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, ...@@ -289,8 +289,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
args->queue_id = queue_id; args->queue_id = queue_id;
/* Return gpu_id as doorbell offset for mmap usage */ /* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = args->gpu_id << PAGE_SHIFT; args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
...@@ -514,6 +516,62 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, ...@@ -514,6 +516,62 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
return 0; return 0;
} }
static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_event_args *args = data;
int err;
err = kfd_event_create(filp, p, args->event_type,
args->auto_reset != 0, args->node_id,
&args->event_id, &args->event_trigger_data,
&args->event_page_offset,
&args->event_slot_index);
return err;
}
static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_destroy_event_args *args = data;
return kfd_event_destroy(p, args->event_id);
}
static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_set_event_args *args = data;
return kfd_set_event(p, args->event_id);
}
static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_reset_event_args *args = data;
return kfd_reset_event(p, args->event_id);
}
static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_wait_events_args *args = data;
enum kfd_event_wait_result wait_result;
int err;
err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
args->timeout, &wait_result);
args->wait_result = wait_result;
return err;
}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
...@@ -539,6 +597,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { ...@@ -539,6 +597,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
kfd_ioctl_update_queue, 0), kfd_ioctl_update_queue, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
kfd_ioctl_create_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
kfd_ioctl_destroy_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
kfd_ioctl_set_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
kfd_ioctl_reset_event, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
}; };
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
...@@ -639,5 +712,15 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) ...@@ -639,5 +712,15 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (IS_ERR(process)) if (IS_ERR(process))
return PTR_ERR(process); return PTR_ERR(process);
return kfd_doorbell_mmap(process, vma); if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
KFD_MMAP_DOORBELL_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
return kfd_doorbell_mmap(process, vma);
} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
KFD_MMAP_EVENTS_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
return kfd_event_mmap(process, vma);
}
return -EFAULT;
} }
...@@ -34,6 +34,7 @@ static const struct kfd_device_info kaveri_device_info = { ...@@ -34,6 +34,7 @@ static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI, .asic_family = CHIP_KAVERI,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.mqd_size_aligned = MQD_SIZE_ALIGNED .mqd_size_aligned = MQD_SIZE_ALIGNED
}; };
...@@ -181,6 +182,32 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) ...@@ -181,6 +182,32 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
kfd_unbind_process_from_device(dev, pasid); kfd_unbind_process_from_device(dev, pasid);
} }
/*
* This function called by IOMMU driver on PPR failure
*/
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
BUG_ON(dev == NULL);
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
bool kgd2kfd_device_init(struct kfd_dev *kfd, bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources) const struct kgd2kfd_shared_resources *gpu_resources)
{ {
...@@ -235,6 +262,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -235,6 +262,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error; goto kfd_topology_add_device_error;
} }
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device,
"Error initializing interrupts for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
goto kfd_interrupt_error;
}
if (!device_iommu_pasid_init(kfd)) { if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device, dev_err(kfd_device,
"Error initializing iommuv2 for device (%x:%x)\n", "Error initializing iommuv2 for device (%x:%x)\n",
...@@ -243,6 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -243,6 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} }
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback); iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
kfd->dqm = device_queue_manager_init(kfd); kfd->dqm = device_queue_manager_init(kfd);
if (!kfd->dqm) { if (!kfd->dqm) {
...@@ -273,6 +308,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -273,6 +308,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
device_queue_manager_error: device_queue_manager_error:
amd_iommu_free_device(kfd->pdev); amd_iommu_free_device(kfd->pdev);
device_iommu_pasid_error: device_iommu_pasid_error:
kfd_interrupt_exit(kfd);
kfd_interrupt_error:
kfd_topology_remove_device(kfd); kfd_topology_remove_device(kfd);
kfd_topology_add_device_error: kfd_topology_add_device_error:
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
...@@ -290,6 +327,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) ...@@ -290,6 +327,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
if (kfd->init_complete) { if (kfd->init_complete) {
device_queue_manager_uninit(kfd->dqm); device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev); amd_iommu_free_device(kfd->pdev);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd); kfd_topology_remove_device(kfd);
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
...@@ -305,6 +343,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) ...@@ -305,6 +343,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
if (kfd->init_complete) { if (kfd->init_complete) {
kfd->dqm->ops.stop(kfd->dqm); kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev); amd_iommu_free_device(kfd->pdev);
} }
} }
...@@ -324,6 +363,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) ...@@ -324,6 +363,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
return -ENXIO; return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback); iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
kfd->dqm->ops.start(kfd->dqm); kfd->dqm->ops.start(kfd->dqm);
} }
...@@ -333,7 +373,17 @@ int kgd2kfd_resume(struct kfd_dev *kfd) ...@@ -333,7 +373,17 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */ /* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{ {
/* Process interrupts / schedule work as necessary */ if (!kfd->init_complete)
return;
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
} }
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......
...@@ -522,6 +522,17 @@ int init_pipelines(struct device_queue_manager *dqm, ...@@ -522,6 +522,17 @@ int init_pipelines(struct device_queue_manager *dqm,
return 0; return 0;
} }
static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i;
BUG_ON(dqm == NULL);
for (i = 0 ; i < get_pipes_num(dqm) ; i++)
dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
i + get_first_pipe(dqm));
}
static int init_scheduler(struct device_queue_manager *dqm) static int init_scheduler(struct device_queue_manager *dqm)
{ {
int retval; int retval;
...@@ -581,6 +592,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) ...@@ -581,6 +592,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm)
{ {
init_interrupts(dqm);
return 0; return 0;
} }
...@@ -614,19 +626,6 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, ...@@ -614,19 +626,6 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
uint32_t value = SDMA_ATC;
if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
qpd_to_pdd(qpd)));
q->properties.sdma_vm_addr = value;
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q, struct queue *q,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
...@@ -649,7 +648,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -649,7 +648,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
init_sdma_vm(dqm, q, qpd); dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties); &q->gart_mqd_addr, &q->properties);
if (retval != 0) { if (retval != 0) {
...@@ -750,6 +749,9 @@ static int start_cpsch(struct device_queue_manager *dqm) ...@@ -750,6 +749,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_addr = dqm->fence_mem->cpu_ptr;
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
init_interrupts(dqm);
list_for_each_entry(node, &dqm->queues, list) list_for_each_entry(node, &dqm->queues, list)
if (node->qpd->pqm->process && dqm->dev) if (node->qpd->pqm->process && dqm->dev)
kfd_bind_process_to_device(dqm->dev, kfd_bind_process_to_device(dqm->dev,
......
...@@ -120,6 +120,21 @@ struct device_queue_manager_ops { ...@@ -120,6 +120,21 @@ struct device_queue_manager_ops {
uint64_t alternate_aperture_size); uint64_t alternate_aperture_size);
}; };
struct device_queue_manager_asic_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*initialize)(struct device_queue_manager *dqm);
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
};
/** /**
* struct device_queue_manager * struct device_queue_manager
* *
...@@ -134,7 +149,7 @@ struct device_queue_manager_ops { ...@@ -134,7 +149,7 @@ struct device_queue_manager_ops {
struct device_queue_manager { struct device_queue_manager {
struct device_queue_manager_ops ops; struct device_queue_manager_ops ops;
struct device_queue_manager_ops ops_asic_specific; struct device_queue_manager_asic_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets; struct packet_manager packets;
...@@ -157,8 +172,8 @@ struct device_queue_manager { ...@@ -157,8 +172,8 @@ struct device_queue_manager {
bool active_runlist; bool active_runlist;
}; };
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops); void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops); void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm, void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
int init_pipelines(struct device_queue_manager *dqm, int init_pipelines(struct device_queue_manager *dqm,
......
...@@ -33,12 +33,15 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, ...@@ -33,12 +33,15 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
static int register_process_cik(struct device_queue_manager *dqm, static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int initialize_cpsch_cik(struct device_queue_manager *dqm); static int initialize_cpsch_cik(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops) void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops)
{ {
ops->set_cache_memory_policy = set_cache_memory_policy_cik; ops->set_cache_memory_policy = set_cache_memory_policy_cik;
ops->register_process = register_process_cik; ops->register_process = register_process_cik;
ops->initialize = initialize_cpsch_cik; ops->initialize = initialize_cpsch_cik;
ops->init_sdma_vm = init_sdma_vm;
} }
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
...@@ -129,6 +132,19 @@ static int register_process_cik(struct device_queue_manager *dqm, ...@@ -129,6 +132,19 @@ static int register_process_cik(struct device_queue_manager *dqm,
return 0; return 0;
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
uint32_t value = SDMA_ATC;
if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
qpd_to_pdd(qpd)));
q->properties.sdma_vm_addr = value;
}
static int initialize_cpsch_cik(struct device_queue_manager *dqm) static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{ {
return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
......
...@@ -32,14 +32,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, ...@@ -32,14 +32,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
static int register_process_vi(struct device_queue_manager *dqm, static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int initialize_cpsch_vi(struct device_queue_manager *dqm); static int initialize_cpsch_vi(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops) void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
{ {
pr_warn("amdkfd: VI DQM is not currently supported\n"); pr_warn("amdkfd: VI DQM is not currently supported\n");
ops->set_cache_memory_policy = set_cache_memory_policy_vi; ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi; ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi; ops->initialize = initialize_cpsch_vi;
ops->init_sdma_vm = init_sdma_vm;
} }
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
...@@ -58,6 +61,11 @@ static int register_process_vi(struct device_queue_manager *dqm, ...@@ -58,6 +61,11 @@ static int register_process_vi(struct device_queue_manager *dqm,
return -1; return -1;
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm) static int initialize_cpsch_vi(struct device_queue_manager *dqm)
{ {
return 0; return 0;
......
...@@ -142,14 +142,13 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) ...@@ -142,14 +142,13 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n" pr_debug("mapping doorbell page:\n");
" target user address == 0x%08llX\n" pr_debug(" target user address == 0x%08llX\n",
" physical address == 0x%08llX\n" (unsigned long long) vma->vm_start);
" vm_flags == 0x%04lX\n" pr_debug(" physical address == 0x%08llX\n", address);
" size == 0x%04lX\n", pr_debug(" vm_flags == 0x%04lX\n", vma->vm_flags);
(unsigned long long) vma->vm_start, address, vma->vm_flags, pr_debug(" size == 0x%04lX\n",
doorbell_process_allocation()); doorbell_process_allocation());
return io_remap_pfn_range(vma, return io_remap_pfn_range(vma,
vma->vm_start, vma->vm_start,
......
This diff is collapsed.
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef KFD_EVENTS_H_INCLUDED
#define KFD_EVENTS_H_INCLUDED
#include <linux/kernel.h>
#include <linux/hashtable.h>
#include <linux/types.h>
#include <linux/list.h>
#include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h>
#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
/*
* Written into kfd_signal_slot_t to indicate that the event is not signaled.
* Since the event protocol may need to write the event ID into memory, this
* must not be a valid event ID.
* For the sake of easy memset-ing, this must be a byte pattern.
*/
#define UNSIGNALED_EVENT_SLOT ((uint64_t)-1)
struct kfd_event_waiter;
struct signal_page;
struct kfd_event {
/* All events in process, rooted at kfd_process.events. */
struct hlist_node events;
u32 event_id;
bool signaled;
bool auto_reset;
int type;
struct list_head waiters; /* List of kfd_event_waiter by waiters. */
/* Only for signal events. */
struct signal_page *signal_page;
unsigned int signal_slot_index;
uint64_t __user *user_signal_address;
/* type specific data */
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
};
};
#define KFD_EVENT_TIMEOUT_IMMEDIATE 0
#define KFD_EVENT_TIMEOUT_INFINITE 0xFFFFFFFFu
/* Matching HSA_EVENTTYPE */
#define KFD_EVENT_TYPE_SIGNAL 0
#define KFD_EVENT_TYPE_HW_EXCEPTION 3
#define KFD_EVENT_TYPE_DEBUG 5
#define KFD_EVENT_TYPE_MEMORY 8
extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits);
#endif
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* KFD Interrupts.
*
* AMD GPUs deliver interrupts by pushing an interrupt description onto the
* interrupt ring and then sending an interrupt. KGD receives the interrupt
* in ISR and sends us a pointer to each new entry on the interrupt ring.
*
* We generally can't process interrupt-signaled events from ISR, so we call
* out to each interrupt client module (currently only the scheduler) to ask if
* each interrupt is interesting. If they return true, then it requires further
* processing so we copy it to an internal interrupt ring and call each
* interrupt client again from a work-queue.
*
* There's no acknowledgment for the interrupts we use. The hardware simply
* queues a new interrupt each time without waiting.
*
* The fixed-size internal queue means that it's possible for us to lose
* interrupts because we have no back-pressure to the hardware.
*/
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
#define KFD_INTERRUPT_RING_SIZE 1024
static void interrupt_wq(struct work_struct *);
int kfd_interrupt_init(struct kfd_dev *kfd)
{
void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
kfd->device_info->ih_ring_entry_size,
GFP_KERNEL);
if (!interrupt_ring)
return -ENOMEM;
kfd->interrupt_ring = interrupt_ring;
kfd->interrupt_ring_size =
KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
atomic_set(&kfd->interrupt_ring_wptr, 0);
atomic_set(&kfd->interrupt_ring_rptr, 0);
spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq);
kfd->interrupts_active = true;
/*
* After this function returns, the interrupt will be enabled. This
* barrier ensures that the interrupt running on a different processor
* sees all the above writes.
*/
smp_wmb();
return 0;
}
void kfd_interrupt_exit(struct kfd_dev *kfd)
{
/*
* Stop the interrupt handler from writing to the ring and scheduling
* workqueue items. The spinlock ensures that any interrupt running
* after we have unlocked sees interrupts_active = false.
*/
unsigned long flags;
spin_lock_irqsave(&kfd->interrupt_lock, flags);
kfd->interrupts_active = false;
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
/*
* Flush_scheduled_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
flush_scheduled_work();
kfree(kfd->interrupt_ring);
}
/*
* This assumes that it can't be called concurrently with itself
* but only with dequeue_ih_ring_entry.
*/
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
{
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
if ((rptr - wptr) % kfd->interrupt_ring_size ==
kfd->device_info->ih_ring_entry_size) {
/* This is very bad, the system is likely to hang. */
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt.\n");
return false;
}
memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
atomic_set(&kfd->interrupt_ring_wptr, wptr);
return true;
}
/*
* This assumes that it can't be called concurrently with itself
* but only with enqueue_ih_ring_entry.
*/
static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
{
/*
* Assume that wait queues have an implicit barrier, i.e. anything that
* happened in the ISR before it queued work is visible.
*/
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
if (rptr == wptr)
return false;
memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
kfd->device_info->ih_ring_entry_size);
rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
/*
* Ensure the rptr write update is not visible until
* memcpy has finished reading.
*/
smp_mb();
atomic_set(&kfd->interrupt_ring_rptr, rptr);
return true;
}
static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[DIV_ROUND_UP(
dev->device_info->ih_ring_entry_size,
sizeof(uint32_t))];
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
}
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
{
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
ih_ring_entry);
return wanted != 0;
}
...@@ -215,8 +215,9 @@ static int acquire_packet_buffer(struct kernel_queue *kq, ...@@ -215,8 +215,9 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
queue_address = (unsigned int *)kq->pq_kernel_addr; queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", pr_debug("rptr: %d\n", rptr);
__func__, rptr, wptr, queue_address); pr_debug("wptr: %d\n", wptr);
pr_debug("queue_address 0x%p\n", queue_address);
available_size = (rptr - 1 - wptr + queue_size_dwords) % available_size = (rptr - 1 - wptr + queue_size_dwords) %
queue_size_dwords; queue_size_dwords;
......
...@@ -29,10 +29,10 @@ ...@@ -29,10 +29,10 @@
#define KFD_DRIVER_AUTHOR "AMD Inc. and others" #define KFD_DRIVER_AUTHOR "AMD Inc. and others"
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs" #define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
#define KFD_DRIVER_DATE "20150122" #define KFD_DRIVER_DATE "20150421"
#define KFD_DRIVER_MAJOR 0 #define KFD_DRIVER_MAJOR 0
#define KFD_DRIVER_MINOR 7 #define KFD_DRIVER_MINOR 7
#define KFD_DRIVER_PATCHLEVEL 1 #define KFD_DRIVER_PATCHLEVEL 2
static const struct kgd2kfd_calls kgd2kfd = { static const struct kgd2kfd_calls kgd2kfd = {
.exit = kgd2kfd_exit, .exit = kgd2kfd_exit,
...@@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444); ...@@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device, MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)"); "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
int send_sigterm;
module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
{ {
/* /*
......
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#define KFD_SYSFS_FILE_MODE 0444 #define KFD_SYSFS_FILE_MODE 0444
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
/* /*
* When working with cp scheduler we should assign the HIQ manually or via * When working with cp scheduler we should assign the HIQ manually or via
* the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot
...@@ -71,6 +74,12 @@ extern int max_num_of_queues_per_device; ...@@ -71,6 +74,12 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */ /* Kernel module parameter to specify the scheduling policy */
extern int sched_policy; extern int sched_policy;
/*
* Kernel module parameter to specify whether to send sigterm to HSA process on
* unhandled exception
*/
extern int send_sigterm;
/** /**
* enum kfd_sched_policy * enum kfd_sched_policy
* *
...@@ -108,8 +117,16 @@ enum asic_family_type { ...@@ -108,8 +117,16 @@ enum asic_family_type {
CHIP_CARRIZO CHIP_CARRIZO
}; };
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
void (*interrupt_wq)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
};
struct kfd_device_info { struct kfd_device_info {
unsigned int asic_family; unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits; unsigned int max_pasid_bits;
size_t ih_ring_entry_size; size_t ih_ring_entry_size;
uint8_t num_of_watch_points; uint8_t num_of_watch_points;
...@@ -161,10 +178,23 @@ struct kfd_dev { ...@@ -161,10 +178,23 @@ struct kfd_dev {
unsigned int gtt_sa_chunk_size; unsigned int gtt_sa_chunk_size;
unsigned int gtt_sa_num_of_chunks; unsigned int gtt_sa_num_of_chunks;
/* Interrupts */
void *interrupt_ring;
size_t interrupt_ring_size;
atomic_t interrupt_ring_rptr;
atomic_t interrupt_ring_wptr;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
/* QCM Device instance */ /* QCM Device instance */
struct device_queue_manager *dqm; struct device_queue_manager *dqm;
bool init_complete; bool init_complete;
/*
* Interrupts of interest to KFD are copied
* from the HW ring into a SW ring.
*/
bool interrupts_active;
}; };
/* KGD2KFD callbacks */ /* KGD2KFD callbacks */
...@@ -477,6 +507,15 @@ struct kfd_process { ...@@ -477,6 +507,15 @@ struct kfd_process {
/*Is the user space process 32 bit?*/ /*Is the user space process 32 bit?*/
bool is_32bit_user_mode; bool is_32bit_user_mode;
/* Event-related data */
struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */
DECLARE_HASHTABLE(events, 4);
struct list_head signal_event_pages; /* struct slot_page_header.
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
}; };
/** /**
...@@ -501,6 +540,7 @@ void kfd_process_create_wq(void); ...@@ -501,6 +540,7 @@ void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void); void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(const struct task_struct *); struct kfd_process *kfd_create_process(const struct task_struct *);
struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p); struct kfd_process *p);
...@@ -555,7 +595,11 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); ...@@ -555,7 +595,11 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* Interrupts */ /* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
/* Power Management */ /* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd);
...@@ -642,4 +686,35 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); ...@@ -642,4 +686,35 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process); struct kfd_process *process);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
enum kfd_event_wait_result {
KFD_WAIT_COMPLETE,
KFD_WAIT_TIMEOUT,
KFD_WAIT_ERROR
};
void kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result);
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_dev *dev,
unsigned int pasid, unsigned long address,
bool is_write_requested, bool is_execute_requested);
void kfd_signal_hw_exception_event(unsigned int pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
#endif #endif
...@@ -178,6 +178,8 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -178,6 +178,8 @@ static void kfd_process_wq_release(struct work_struct *work)
kfree(pdd); kfree(pdd);
} }
kfd_event_free_process(p);
kfd_pasid_free(p->pasid); kfd_pasid_free(p->pasid);
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
...@@ -203,8 +205,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) ...@@ -203,8 +205,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
mmdrop(p->mm); mmdrop(p->mm);
work = (struct kfd_process_release_work *) work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
if (work) { if (work) {
INIT_WORK((struct work_struct *) work, kfd_process_wq_release); INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
...@@ -289,6 +290,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -289,6 +290,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
INIT_LIST_HEAD(&process->per_device_data); INIT_LIST_HEAD(&process->per_device_data);
kfd_event_init_process(process);
err = pqm_init(&process->pqm, process); err = pqm_init(&process->pqm, process);
if (err != 0) if (err != 0)
goto err_process_pqm_init; goto err_process_pqm_init;
...@@ -431,3 +434,23 @@ bool kfd_has_process_device_data(struct kfd_process *p) ...@@ -431,3 +434,23 @@ bool kfd_has_process_device_data(struct kfd_process *p)
{ {
return !(list_empty(&p->per_device_data)); return !(list_empty(&p->per_device_data));
} }
/* This returns with process->mutex locked. */
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
{
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (p->pasid == pasid) {
mutex_lock(&p->mutex);
break;
}
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
}
...@@ -144,6 +144,8 @@ struct kfd2kgd_calls { ...@@ -144,6 +144,8 @@ struct kfd2kgd_calls {
int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr); uint32_t hpd_size, uint64_t hpd_gpu_addr);
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr); uint32_t queue_id, uint32_t __user *wptr);
......
...@@ -149,6 +149,8 @@ ...@@ -149,6 +149,8 @@
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 #define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
#define CPC_INT_CNTL 0xC2D0
#define CP_HQD_IQ_RPTR 0xC970u #define CP_HQD_IQ_RPTR 0xC970u
#define AQL_ENABLE (1U << 0) #define AQL_ENABLE (1U << 0)
#define SDMA0_RLC0_RB_CNTL 0xD400u #define SDMA0_RLC0_RB_CNTL 0xD400u
......
...@@ -1335,6 +1335,7 @@ ...@@ -1335,6 +1335,7 @@
# define CNTX_EMPTY_INT_ENABLE (1 << 20) # define CNTX_EMPTY_INT_ENABLE (1 << 20)
# define PRIV_INSTR_INT_ENABLE (1 << 22) # define PRIV_INSTR_INT_ENABLE (1 << 22)
# define PRIV_REG_INT_ENABLE (1 << 23) # define PRIV_REG_INT_ENABLE (1 << 23)
# define OPCODE_ERROR_INT_ENABLE (1 << 24)
# define TIME_STAMP_INT_ENABLE (1 << 26) # define TIME_STAMP_INT_ENABLE (1 << 26)
# define CP_RINGID2_INT_ENABLE (1 << 29) # define CP_RINGID2_INT_ENABLE (1 << 29)
# define CP_RINGID1_INT_ENABLE (1 << 30) # define CP_RINGID1_INT_ENABLE (1 << 30)
......
...@@ -66,7 +66,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ...@@ -66,7 +66,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr); uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr); uint32_t queue_id, uint32_t __user *wptr);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
...@@ -89,6 +89,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -89,6 +89,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings, .program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline, .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load, .hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load, .hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_is_occupied = kgd_hqd_is_occupied, .hqd_is_occupied = kgd_hqd_is_occupied,
...@@ -407,6 +408,24 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, ...@@ -407,6 +408,24 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
return 0; return 0;
} }
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
lock_srbm(kgd, mec, pipe, 0, 0);
write_register(kgd, CPC_INT_CNTL,
TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
unlock_srbm(kgd);
return 0;
}
static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
{ {
uint32_t retval; uint32_t retval;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include <linux/ioctl.h> #include <linux/ioctl.h>
#define KFD_IOCTL_MAJOR_VERSION 1 #define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 0 #define KFD_IOCTL_MINOR_VERSION 1
struct kfd_ioctl_get_version_args { struct kfd_ioctl_get_version_args {
uint32_t major_version; /* from KFD */ uint32_t major_version; /* from KFD */
...@@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args { ...@@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args {
uint32_t pad; uint32_t pad;
}; };
/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
#define KFD_IOC_EVENT_HW_EXCEPTION 3
#define KFD_IOC_EVENT_SYSTEM_EVENT 4
#define KFD_IOC_EVENT_DEBUG_EVENT 5
#define KFD_IOC_EVENT_PROFILE_EVENT 6
#define KFD_IOC_EVENT_QUEUE_EVENT 7
#define KFD_IOC_EVENT_MEMORY 8
#define KFD_IOC_WAIT_RESULT_COMPLETE 0
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2
#define KFD_SIGNAL_EVENT_LIMIT 256
struct kfd_ioctl_create_event_args {
uint64_t event_page_offset; /* from KFD */
uint32_t event_trigger_data; /* from KFD - signal events only */
uint32_t event_type; /* to KFD */
uint32_t auto_reset; /* to KFD */
uint32_t node_id; /* to KFD - only valid for certain
event types */
uint32_t event_id; /* from KFD */
uint32_t event_slot_index; /* from KFD */
};
struct kfd_ioctl_destroy_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_set_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_reset_event_args {
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_memory_exception_failure {
uint32_t NotPresent; /* Page not present or supervisor privilege */
uint32_t ReadOnly; /* Write access to a read-only page */
uint32_t NoExecute; /* Execute access to a page marked NX */
uint32_t pad;
};
/* memory exception data*/
struct kfd_hsa_memory_exception_data {
struct kfd_memory_exception_failure failure;
uint64_t va;
uint32_t gpu_id;
uint32_t pad;
};
/* Event data*/
struct kfd_event_data {
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
}; /* From KFD */
uint64_t kfd_event_data_ext; /* pointer to an extension structure
for future exception types */
uint32_t event_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_wait_events_args {
uint64_t events_ptr; /* to KFD */
uint32_t num_events; /* to KFD */
uint32_t wait_for_all; /* to KFD */
uint32_t timeout; /* to KFD */
uint32_t wait_result; /* from KFD */
};
#define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
...@@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args { ...@@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args {
#define AMDKFD_IOC_UPDATE_QUEUE \ #define AMDKFD_IOC_UPDATE_QUEUE \
AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
#define AMDKFD_IOC_CREATE_EVENT \
AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
#define AMDKFD_IOC_DESTROY_EVENT \
AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
#define AMDKFD_IOC_SET_EVENT \
AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
#define AMDKFD_IOC_RESET_EVENT \
AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
#define AMDKFD_COMMAND_START 0x01 #define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x08 #define AMDKFD_COMMAND_END 0x0D
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment