Commit ef7c8f2b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:

 - The iova_bitmap logic for efficiently reporting dirty pages back to
   userspace has a few more tricky corner case bugs that have been
   resolved and backed with new tests.

   The revised version has simpler logic.

 - Shared branch with iommu for handle support when doing domain attach.

   Handles allow the domain owner to include additional private data on
   a per-device basis.

 - IO Page Fault Reporting to userspace via iommufd. Page faults can be
   generated on fault capable HWPTs when a translation is not present.

   Routing them to userspace would allow a VMM to be able to virtualize
   them into an emulated vIOMMU. This is the next step to fully enabling
   vSVA support.

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (26 commits)
  iommufd: Put constants for all the uAPI enums
  iommufd: Fix error pointer checking
  iommufd: Add check on user response code
  iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE
  iommufd: Require drivers to supply the cache_invalidate_user ops
  iommufd/selftest: Add coverage for IOPF test
  iommufd/selftest: Add IOPF support for mock device
  iommufd: Associate fault object with iommufd_hw_pgtable
  iommufd: Fault-capable hwpt attach/detach/replace
  iommufd: Add iommufd fault object
  iommufd: Add fault and response message definitions
  iommu: Extend domain attach group with handle support
  iommu: Add attach handle to struct iopf_group
  iommu: Remove sva handle list
  iommu: Introduce domain attachment handle
  iommufd/iova_bitmap: Remove iterator logic
  iommufd/iova_bitmap: Dynamic pinning on iova_bitmap_set()
  iommufd/iova_bitmap: Consolidate iova_bitmap_set exit conditionals
  iommufd/iova_bitmap: Move initial pinning to iova_bitmap_for_each()
  iommufd/iova_bitmap: Cache mapped length in iova_bitmap_map struct
  ...
parents 07e773db 136a8066
......@@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd)
* DMA domain is owned by the driver, it should support all valid
* types such as DMA-FQ, identity, etc.
*/
ret = iommu_attach_device_pasid(domain, dev, pasid);
ret = iommu_attach_device_pasid(domain, dev, pasid, NULL);
if (ret) {
dev_err(dev, "failed to attach device pasid %d, domain type %d",
pasid, domain->type);
......
......@@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group)
}
EXPORT_SYMBOL_GPL(iopf_free_group);
static struct iommu_domain *get_domain_for_iopf(struct device *dev,
struct iommu_fault *fault)
{
struct iommu_domain *domain;
if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
if (IS_ERR(domain))
domain = NULL;
} else {
domain = iommu_get_domain_for_dev(dev);
}
if (!domain || !domain->iopf_handler) {
dev_warn_ratelimited(dev,
"iopf (pasid %d) without domain attached or handler installed\n",
fault->prm.pasid);
return NULL;
}
return domain;
}
/* Non-last request of a group. Postpone until the last one. */
static int report_partial_fault(struct iommu_fault_param *fault_param,
struct iommu_fault *fault)
......@@ -134,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
list_add(&group->pending_node, &iopf_param->faults);
mutex_unlock(&iopf_param->lock);
group->fault_count = list_count_nodes(&group->faults);
return group;
}
......@@ -206,20 +184,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
if (group == &abort_group)
goto err_abort;
group->domain = get_domain_for_iopf(dev, fault);
if (!group->domain)
if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
group->attach_handle = iommu_attach_handle_get(dev->iommu_group,
fault->prm.pasid,
0);
if (IS_ERR(group->attach_handle)) {
const struct iommu_ops *ops = dev_iommu_ops(dev);
if (!ops->user_pasid_table)
goto err_abort;
/*
* The iommu driver for this device supports user-
* managed PASID table. Therefore page faults for
* any PASID should go through the NESTING domain
* attached to the device RID.
*/
group->attach_handle =
iommu_attach_handle_get(dev->iommu_group,
IOMMU_NO_PASID,
IOMMU_DOMAIN_NESTED);
if (IS_ERR(group->attach_handle))
goto err_abort;
}
} else {
group->attach_handle =
iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
if (IS_ERR(group->attach_handle))
goto err_abort;
}
if (!group->attach_handle->domain->iopf_handler)
goto err_abort;
/*
* On success iopf_handler must call iopf_group_response() and
* iopf_free_group()
*/
if (group->domain->iopf_handler(group))
if (group->attach_handle->domain->iopf_handler(group))
goto err_abort;
return;
err_abort:
dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
fault->prm.pasid);
iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
if (group == &abort_group)
__iopf_free_group(group);
......
......@@ -28,4 +28,15 @@ void iommu_device_unregister_bus(struct iommu_device *iommu,
const struct bus_type *bus,
struct notifier_block *nb);
struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group,
ioasid_t pasid,
unsigned int type);
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle);
void iommu_detach_group_handle(struct iommu_domain *domain,
struct iommu_group *group);
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */
......@@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
}
iommu_mm->pasid = pasid;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
INIT_LIST_HEAD(&iommu_mm->sva_handles);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
* initialization to iommu_mm fields. If it does, readers may see a
......@@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
*/
struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
{
struct iommu_group *group = dev->iommu_group;
struct iommu_attach_handle *attach_handle;
struct iommu_mm_data *iommu_mm;
struct iommu_domain *domain;
struct iommu_sva *handle;
int ret;
if (!group)
return ERR_PTR(-ENODEV);
mutex_lock(&iommu_sva_lock);
/* Allocate mm->pasid if necessary. */
......@@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_unlock;
}
list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) {
if (handle->dev == dev) {
/* A bond already exists, just take a reference`. */
attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA);
if (!IS_ERR(attach_handle)) {
handle = container_of(attach_handle, struct iommu_sva, handle);
if (attach_handle->domain->mm != mm) {
ret = -EBUSY;
goto out_unlock;
}
refcount_inc(&handle->users);
mutex_unlock(&iommu_sva_lock);
return handle;
}
if (PTR_ERR(attach_handle) != -ENOENT) {
ret = PTR_ERR(attach_handle);
goto out_unlock;
}
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
......@@ -99,7 +113,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
/* Search for an existing domain. */
list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
&handle->handle);
if (!ret) {
domain->users++;
goto out;
......@@ -113,7 +128,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_handle;
}
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
&handle->handle);
if (ret)
goto out_free_domain;
domain->users = 1;
......@@ -121,10 +137,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
out:
refcount_set(&handle->users, 1);
list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
mutex_unlock(&iommu_sva_lock);
handle->dev = dev;
handle->domain = domain;
return handle;
out_free_domain:
......@@ -147,7 +161,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
*/
void iommu_sva_unbind_device(struct iommu_sva *handle)
{
struct iommu_domain *domain = handle->domain;
struct iommu_domain *domain = handle->handle.domain;
struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm;
struct device *dev = handle->dev;
......@@ -156,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
mutex_unlock(&iommu_sva_lock);
return;
}
list_del(&handle->handle_item);
iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
if (--domain->users == 0) {
......@@ -170,7 +183,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{
struct iommu_domain *domain = handle->domain;
struct iommu_domain *domain = handle->handle.domain;
return mm_get_enqcmd_pasid(domain->mm);
}
......@@ -259,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work)
if (status != IOMMU_PAGE_RESP_SUCCESS)
break;
status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
status = iommu_sva_handle_mm(&iopf->fault,
group->attach_handle->domain->mm);
}
iopf_group_response(group, status);
......
......@@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @domain: the iommu domain.
* @dev: the attached device.
* @pasid: the pasid of the device.
* @handle: the attach handle.
*
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
void *curr;
int ret;
if (!domain->ops->set_dev_pasid)
......@@ -3382,11 +3383,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
}
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
if (handle)
handle->domain = domain;
ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
if (ret)
goto out_unlock;
}
ret = __iommu_set_group_pasid(domain, group, pasid);
if (ret)
......@@ -3414,46 +3416,11 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid, domain);
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
xa_erase(&group->pasid_array, pasid);
mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
/*
* iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
* @dev: the queried device
* @pasid: the pasid of the device
* @type: matched domain type, 0 for any match
*
* This is a variant of iommu_get_domain_for_dev(). It returns the existing
* domain attached to pasid of a device. Callers must hold a lock around this
* function, and both iommu_attach/detach_dev_pasid() whenever a domain of
* type is being manipulated. This API does not internally resolve races with
* attach/detach.
*
* Return: attached domain on success, NULL otherwise.
*/
struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
ioasid_t pasid,
unsigned int type)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct iommu_domain *domain;
if (!group)
return NULL;
xa_lock(&group->pasid_array);
domain = xa_load(&group->pasid_array, pasid);
if (type && domain && domain->type != type)
domain = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
return domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
int ret;
......@@ -3480,3 +3447,137 @@ void iommu_free_global_pasid(ioasid_t pasid)
ida_free(&iommu_global_pasid_ida, pasid);
}
EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
/**
* iommu_attach_handle_get - Return the attach handle
* @group: the iommu group that domain was attached to
* @pasid: the pasid within the group
* @type: matched domain type, 0 for any match
*
* Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
*
* Return the attach handle to the caller. The life cycle of an iommu attach
* handle is from the time when the domain is attached to the time when the
* domain is detached. Callers are required to synchronize the call of
* iommu_attach_handle_get() with domain attachment and detachment. The attach
* handle can only be used during its life cycle.
*/
struct iommu_attach_handle *
iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
{
struct iommu_attach_handle *handle;
xa_lock(&group->pasid_array);
handle = xa_load(&group->pasid_array, pasid);
if (!handle)
handle = ERR_PTR(-ENOENT);
else if (type && handle->domain->type != type)
handle = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
return handle;
}
EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL);
/**
* iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
* @domain: IOMMU domain to attach
* @group: IOMMU group that will be attached
* @handle: attach handle
*
* Returns 0 on success and error code on failure.
*
* This is a variant of iommu_attach_group(). It allows the caller to provide
* an attach handle and use it when the domain is attached. This is currently
* used by IOMMUFD to deliver the I/O page faults.
*/
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle)
{
int ret;
if (handle)
handle->domain = domain;
mutex_lock(&group->mutex);
ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
if (ret)
goto err_unlock;
ret = __iommu_attach_group(domain, group);
if (ret)
goto err_erase;
mutex_unlock(&group->mutex);
return 0;
err_erase:
xa_erase(&group->pasid_array, IOMMU_NO_PASID);
err_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, IOMMUFD_INTERNAL);
/**
* iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
* @domain: IOMMU domain to attach
* @group: IOMMU group that will be attached
*
* Detach the specified IOMMU domain from the specified IOMMU group.
* It must be used in conjunction with iommu_attach_group_handle().
*/
void iommu_detach_group_handle(struct iommu_domain *domain,
struct iommu_group *group)
{
mutex_lock(&group->mutex);
__iommu_group_set_core_domain(group);
xa_erase(&group->pasid_array, IOMMU_NO_PASID);
mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, IOMMUFD_INTERNAL);
/**
* iommu_replace_group_handle - replace the domain that a group is attached to
* @group: IOMMU group that will be attached to the new domain
* @new_domain: new IOMMU domain to replace with
* @handle: attach handle
*
* This is a variant of iommu_group_replace_domain(). It allows the caller to
* provide an attach handle for the new domain and use it when the domain is
* attached.
*/
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle)
{
void *curr;
int ret;
if (!new_domain)
return -EINVAL;
mutex_lock(&group->mutex);
if (handle) {
ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
if (ret)
goto err_unlock;
}
ret = __iommu_group_set_domain(group, new_domain);
if (ret)
goto err_release;
curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
WARN_ON(xa_is_err(curr));
mutex_unlock(&group->mutex);
return 0;
err_release:
xa_release(&group->pasid_array, IOMMU_NO_PASID);
err_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, IOMMUFD_INTERNAL);
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
device.o \
fault.o \
hw_pagetable.o \
io_pagetable.o \
ioas.o \
......
......@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
refcount_inc(&idev->obj.users);
/* igroup refcount moves into iommufd_device */
idev->igroup = igroup;
mutex_init(&idev->iopf_lock);
/*
* If the caller fails after this success it must call
......@@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* attachment.
*/
if (list_empty(&idev->igroup->device_list)) {
rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
rc = iommufd_hwpt_attach_device(hwpt, idev);
if (rc)
goto err_unresv;
idev->igroup->hwpt = hwpt;
......@@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
mutex_lock(&idev->igroup->lock);
list_del(&idev->group_item);
if (list_empty(&idev->igroup->device_list)) {
iommu_detach_group(hwpt->domain, idev->igroup->group);
iommufd_hwpt_detach_device(hwpt, idev);
idev->igroup->hwpt = NULL;
}
if (hwpt_is_paging(hwpt))
......@@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev,
goto err_unlock;
}
rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
if (rc)
goto err_unresv;
......
This diff is collapsed.
......@@ -8,6 +8,15 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"
static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
{
if (hwpt->domain)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
refcount_dec(&hwpt->fault->obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
{
struct iommufd_hwpt_paging *hwpt_paging =
......@@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
hwpt_paging->common.domain);
}
if (hwpt_paging->common.domain)
iommu_domain_free(hwpt_paging->common.domain);
__iommufd_hwpt_destroy(&hwpt_paging->common);
refcount_dec(&hwpt_paging->ioas->obj.users);
}
......@@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj)
struct iommufd_hwpt_nested *hwpt_nested =
container_of(obj, struct iommufd_hwpt_nested, common.obj);
if (hwpt_nested->common.domain)
iommu_domain_free(hwpt_nested->common.domain);
__iommufd_hwpt_destroy(&hwpt_nested->common);
refcount_dec(&hwpt_nested->parent->common.obj.users);
}
......@@ -213,7 +218,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt;
int rc;
if (flags || !user_data->len || !ops->domain_alloc_user)
if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
!user_data->len || !ops->domain_alloc_user)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent)
return ERR_PTR(-EINVAL);
......@@ -227,7 +233,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
hwpt->domain = ops->domain_alloc_user(idev->dev,
flags & ~IOMMU_HWPT_FAULT_ID_VALID,
parent->common.domain, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
......@@ -236,7 +243,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
}
hwpt->domain->owner = ops;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED ||
!hwpt->domain->ops->cache_invalidate_user)) {
rc = -EINVAL;
goto out_abort;
}
......@@ -308,6 +316,21 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
goto out_put_pt;
}
if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) {
struct iommufd_fault *fault;
fault = iommufd_get_fault(ucmd, cmd->fault_id);
if (IS_ERR(fault)) {
rc = PTR_ERR(fault);
goto out_hwpt;
}
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
hwpt->domain->fault_data = hwpt;
refcount_inc(&fault->obj.users);
iommufd_put_object(ucmd->ictx, &fault->obj);
}
cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
......
......@@ -11,6 +11,7 @@
#include <linux/iommu.h>
#include <linux/iova_bitmap.h>
#include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"
struct iommu_domain;
struct iommu_group;
......@@ -128,6 +129,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_HWPT_NESTED,
IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS,
IOMMUFD_OBJ_FAULT,
#ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST,
#endif
......@@ -292,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
struct iommufd_fault *fault;
};
struct iommufd_hwpt_paging {
......@@ -395,6 +398,9 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
/* protect iopf_enabled counter */
struct mutex iopf_lock;
unsigned int iopf_enabled;
};
static inline struct iommufd_device *
......@@ -426,6 +432,80 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
/*
* An iommufd_fault object represents an interface to deliver I/O page faults
* to the user space. These objects are created/destroyed by the user space and
* associated with hardware page table objects during page-table allocation.
*/
struct iommufd_fault {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
/* The lists of outstanding faults protected by below mutex. */
struct mutex mutex;
struct list_head deliver;
struct xarray response;
struct wait_queue_head wait_queue;
};
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
};
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
static inline struct iommufd_fault *
iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
{
return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_FAULT),
struct iommufd_fault, obj);
}
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
int iommufd_fault_iopf_handler(struct iopf_group *group);
int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old);
static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
if (hwpt->fault)
return iommufd_fault_domain_attach_dev(hwpt, idev);
return iommu_attach_group(hwpt->domain, idev->igroup->group);
}
static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
if (hwpt->fault)
iommufd_fault_domain_detach_dev(hwpt, idev);
iommu_detach_group(hwpt->domain, idev->igroup->group);
}
static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old)
{
if (old->fault || hwpt->fault)
return iommufd_fault_domain_replace_dev(idev, hwpt, old);
return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
}
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);
......
......@@ -22,6 +22,7 @@ enum {
IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
IOMMU_TEST_OP_DIRTY,
IOMMU_TEST_OP_MD_CHECK_IOTLB,
IOMMU_TEST_OP_TRIGGER_IOPF,
};
enum {
......@@ -127,6 +128,13 @@ struct iommu_test_cmd {
__u32 id;
__u32 iotlb;
} check_iotlb;
struct {
__u32 dev_id;
__u32 pasid;
__u32 grpid;
__u32 perm;
__u64 addr;
} trigger_iopf;
};
__u32 last;
};
......
......@@ -35,6 +35,9 @@ struct iova_bitmap_map {
/* base IOVA representing bit 0 of the first page */
unsigned long iova;
/* mapped length */
unsigned long length;
/* page size order that each bit granules to */
unsigned long pgshift;
......@@ -113,9 +116,6 @@ struct iova_bitmap {
/* length of the IOVA range for the whole bitmap */
size_t length;
/* length of the IOVA range set ahead the pinned pages */
unsigned long set_ahead_length;
};
/*
......@@ -156,6 +156,8 @@ static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
}
static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap);
/*
* Pins the bitmap user pages for the current range window.
* This is internal to IOVA bitmap and called when advancing the
......@@ -206,6 +208,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
* aligned.
*/
mapped->pgoff = offset_in_page(addr);
mapped->length = iova_bitmap_mapped_length(bitmap);
return 0;
}
......@@ -263,9 +266,6 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
goto err;
}
rc = iova_bitmap_get(bitmap);
if (rc)
goto err;
return bitmap;
err:
......@@ -338,65 +338,34 @@ static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
}
/*
* Returns true if there's not more data to iterate.
* Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range.
*/
static bool iova_bitmap_done(struct iova_bitmap *bitmap)
{
return bitmap->mapped_base_index >= bitmap->mapped_total_index;
}
static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
size_t set_ahead_length)
static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped,
unsigned long iova, size_t length)
{
int ret = 0;
while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
unsigned long length = iova_bitmap_mapped_length(bitmap);
unsigned long iova = iova_bitmap_mapped_iova(bitmap);
ret = iova_bitmap_get(bitmap);
if (ret)
break;
length = min(length, set_ahead_length);
iova_bitmap_set(bitmap, iova, length);
set_ahead_length -= length;
bitmap->mapped_base_index +=
iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
iova_bitmap_put(bitmap);
}
bitmap->set_ahead_length = 0;
return ret;
return mapped->npages &&
(iova >= mapped->iova &&
(iova + length - 1) <= (mapped->iova + mapped->length - 1));
}
/*
* Advances to the next range, releases the current pinned
* Advances to a selected range, releases the current pinned
* pages and pins the next set of bitmap pages.
* Returns 0 on success or otherwise errno.
*/
static int iova_bitmap_advance(struct iova_bitmap *bitmap)
static int iova_bitmap_advance_to(struct iova_bitmap *bitmap,
unsigned long iova)
{
unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
unsigned long index;
bitmap->mapped_base_index += count;
index = iova_bitmap_offset_to_index(bitmap, iova - bitmap->iova);
if (index >= bitmap->mapped_total_index)
return -EINVAL;
bitmap->mapped_base_index = index;
iova_bitmap_put(bitmap);
if (iova_bitmap_done(bitmap))
return 0;
/* Iterate, set and skip any bits requested for next iteration */
if (bitmap->set_ahead_length) {
int ret;
ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
if (ret)
return ret;
}
/* When advancing the index we pin the next set of bitmap pages */
/* Pin the next set of bitmap pages */
return iova_bitmap_get(bitmap);
}
......@@ -416,17 +385,7 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap)
int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
iova_bitmap_fn_t fn)
{
int ret = 0;
for (; !iova_bitmap_done(bitmap) && !ret;
ret = iova_bitmap_advance(bitmap)) {
ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
iova_bitmap_mapped_length(bitmap), opaque);
if (ret)
break;
}
return ret;
return fn(bitmap, bitmap->iova, bitmap->length, opaque);
}
EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD);
......@@ -444,11 +403,25 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
unsigned long iova, size_t length)
{
struct iova_bitmap_map *mapped = &bitmap->mapped;
unsigned long cur_bit = ((iova - mapped->iova) >>
unsigned long cur_bit, last_bit, last_page_idx;
update_indexes:
if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
/*
* The attempt to advance the base index to @iova
* may fail if it's out of bounds, or pinning the pages
* returns an error.
*/
if (iova_bitmap_advance_to(bitmap, iova))
return;
}
last_page_idx = mapped->npages - 1;
cur_bit = ((iova - mapped->iova) >>
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
last_bit = (((iova + length - 1) - mapped->iova) >>
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
unsigned long last_page_idx = mapped->npages - 1;
do {
unsigned int page_idx = cur_bit / BITS_PER_PAGE;
......@@ -457,18 +430,19 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
last_bit - cur_bit + 1);
void *kaddr;
if (unlikely(page_idx > last_page_idx))
break;
if (unlikely(page_idx > last_page_idx)) {
unsigned long left =
((last_bit - cur_bit + 1) << mapped->pgshift);
iova += (length - left);
length = left;
goto update_indexes;
}
kaddr = kmap_local_page(mapped->pages[page_idx]);
bitmap_set(kaddr, offset, nbits);
kunmap_local(kaddr);
cur_bit += nbits;
} while (cur_bit <= last_bit);
if (unlikely(cur_bit <= last_bit)) {
bitmap->set_ahead_length =
((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
}
}
EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
......@@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_fault_alloc fault;
struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
......@@ -355,6 +356,8 @@ struct iommufd_ioctl_op {
}
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc,
out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
......@@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_hwpt_nested_destroy,
.abort = iommufd_hwpt_nested_abort,
},
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
#ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy,
......
......@@ -266,8 +266,8 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
/* Clear dirty */
if (mock_test_and_clear_dirty(mock, head, pgsize, flags))
iommu_dirty_bitmap_record(dirty, head, pgsize);
iova = head + pgsize;
iommu_dirty_bitmap_record(dirty, iova, pgsize);
iova += pgsize;
} while (iova < end);
return 0;
......@@ -504,6 +504,8 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
return false;
}
static struct iopf_queue *mock_iommu_iopf_queue;
static struct iommu_device mock_iommu_device = {
};
......@@ -514,6 +516,29 @@ static struct iommu_device *mock_probe_device(struct device *dev)
return &mock_iommu_device;
}
static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg)
{
}
static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
return -ENODEV;
return iopf_queue_add_device(mock_iommu_iopf_queue, dev);
}
static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
return -ENODEV;
iopf_queue_remove_device(mock_iommu_iopf_queue, dev);
return 0;
}
static const struct iommu_ops mock_ops = {
/*
* IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type()
......@@ -529,6 +554,10 @@ static const struct iommu_ops mock_ops = {
.capable = mock_domain_capable,
.device_group = generic_device_group,
.probe_device = mock_probe_device,
.page_response = mock_domain_page_response,
.dev_enable_feat = mock_dev_enable_feat,
.dev_disable_feat = mock_dev_disable_feat,
.user_pasid_table = true,
.default_domain_ops =
&(struct iommu_domain_ops){
.free = mock_domain_free,
......@@ -1334,7 +1363,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
}
max = length / page_size;
bitmap_size = max / BITS_PER_BYTE;
bitmap_size = DIV_ROUND_UP(max, BITS_PER_BYTE);
tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT);
if (!tmp) {
......@@ -1375,6 +1404,31 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
return rc;
}
static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
struct iommu_test_cmd *cmd)
{
struct iopf_fault event = { };
struct iommufd_device *idev;
idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id);
if (IS_ERR(idev))
return PTR_ERR(idev);
event.fault.prm.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
if (cmd->trigger_iopf.pasid != IOMMU_NO_PASID)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = cmd->trigger_iopf.addr;
event.fault.prm.pasid = cmd->trigger_iopf.pasid;
event.fault.prm.grpid = cmd->trigger_iopf.grpid;
event.fault.prm.perm = cmd->trigger_iopf.perm;
iommu_report_device_fault(idev->dev, &event);
iommufd_put_object(ucmd->ictx, &idev->obj);
return 0;
}
void iommufd_selftest_destroy(struct iommufd_object *obj)
{
struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj);
......@@ -1450,6 +1504,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->dirty.page_size,
u64_to_user_ptr(cmd->dirty.uptr),
cmd->dirty.flags);
case IOMMU_TEST_OP_TRIGGER_IOPF:
return iommufd_test_trigger_iopf(ucmd, cmd);
default:
return -EOPNOTSUPP;
}
......@@ -1491,6 +1547,9 @@ int __init iommufd_test_init(void)
&iommufd_mock_bus_type.nb);
if (rc)
goto err_sysfs;
mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq");
return 0;
err_sysfs:
......@@ -1506,6 +1565,11 @@ int __init iommufd_test_init(void)
void iommufd_test_exit(void)
{
if (mock_iommu_iopf_queue) {
iopf_queue_free(mock_iommu_iopf_queue);
mock_iommu_iopf_queue = NULL;
}
iommu_device_sysfs_remove(&mock_iommu_device);
iommu_device_unregister_bus(&mock_iommu_device,
&iommufd_mock_bus_type.bus,
......
......@@ -124,12 +124,16 @@ struct iopf_fault {
struct iopf_group {
struct iopf_fault last_fault;
struct list_head faults;
size_t fault_count;
/* list node for iommu_fault_param::faults */
struct list_head pending_node;
struct work_struct work;
struct iommu_domain *domain;
struct iommu_attach_handle *attach_handle;
/* The device's fault data parameter. */
struct iommu_fault_param *fault_param;
/* Used by handler provider to hook the group on its own lists. */
struct list_head node;
u32 cookie;
};
/**
......@@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array(
* @default_domain: If not NULL this will always be set as the default domain.
* This should be an IDENTITY/BLOCKED/PLATFORM domain.
* Do not use in new drivers.
* @user_pasid_table: IOMMU driver supports user-managed PASID table. There is
* no user domain for each PASID and the I/O page faults are
* forwarded through the user domain attached to the device
* RID.
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
......@@ -590,6 +598,7 @@ struct iommu_ops {
struct iommu_domain *blocked_domain;
struct iommu_domain *release_domain;
struct iommu_domain *default_domain;
u8 user_pasid_table:1;
};
/**
......@@ -989,20 +998,28 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
/*
* An iommu attach handle represents a relationship between an iommu domain
* and a PASID or RID of a device. It is allocated and managed by the component
* that manages the domain and is stored in the iommu group during the time the
* domain is attached.
*/
struct iommu_attach_handle {
struct iommu_domain *domain;
};
/**
* struct iommu_sva - handle to a device-mm bond
*/
struct iommu_sva {
struct iommu_attach_handle handle;
struct device *dev;
struct iommu_domain *domain;
struct list_head handle_item;
refcount_t users;
};
struct iommu_mm_data {
u32 pasid;
struct list_head sva_domains;
struct list_head sva_handles;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
......@@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type);
ioasid_t iommu_alloc_global_pasid(struct device *dev);
void iommu_free_global_pasid(ioasid_t pasid);
#else /* CONFIG_IOMMU_API */
......@@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
}
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle)
{
return -ENODEV;
}
......@@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
{
}
static inline struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type)
{
return NULL;
}
static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
return IOMMU_PASID_INVALID;
......
......@@ -37,19 +37,20 @@
enum {
IOMMUFD_CMD_BASE = 0x80,
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
IOMMUFD_CMD_IOAS_ALLOC,
IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
IOMMUFD_CMD_IOAS_COPY,
IOMMUFD_CMD_IOAS_IOVA_RANGES,
IOMMUFD_CMD_IOAS_MAP,
IOMMUFD_CMD_IOAS_UNMAP,
IOMMUFD_CMD_OPTION,
IOMMUFD_CMD_VFIO_IOAS,
IOMMUFD_CMD_HWPT_ALLOC,
IOMMUFD_CMD_GET_HW_INFO,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
IOMMUFD_CMD_HWPT_INVALIDATE,
IOMMUFD_CMD_IOAS_ALLOC = 0x81,
IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
IOMMUFD_CMD_IOAS_COPY = 0x83,
IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
IOMMUFD_CMD_IOAS_MAP = 0x85,
IOMMUFD_CMD_IOAS_UNMAP = 0x86,
IOMMUFD_CMD_OPTION = 0x87,
IOMMUFD_CMD_VFIO_IOAS = 0x88,
IOMMUFD_CMD_HWPT_ALLOC = 0x89,
IOMMUFD_CMD_GET_HW_INFO = 0x8a,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
};
/**
......@@ -356,10 +357,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment
* @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
* valid.
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
};
/**
......@@ -396,8 +400,8 @@ struct iommu_hwpt_vtd_s1 {
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
*/
enum iommu_hwpt_data_type {
IOMMU_HWPT_DATA_NONE,
IOMMU_HWPT_DATA_VTD_S1,
IOMMU_HWPT_DATA_NONE = 0,
IOMMU_HWPT_DATA_VTD_S1 = 1,
};
/**
......@@ -411,6 +415,9 @@ enum iommu_hwpt_data_type {
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
* @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
* IOMMU_HWPT_FAULT_ID_VALID is set.
* @__reserved2: Padding to 64-bit alignment. Must be 0.
*
* Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the
......@@ -441,6 +448,8 @@ struct iommu_hwpt_alloc {
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
__u32 fault_id;
__u32 __reserved2;
};
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
......@@ -482,8 +491,8 @@ struct iommu_hw_info_vtd {
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
*/
enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE,
IOMMU_HW_INFO_TYPE_INTEL_VTD,
IOMMU_HW_INFO_TYPE_NONE = 0,
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
};
/**
......@@ -620,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap {
* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
*/
enum iommu_hwpt_invalidate_data_type {
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
};
/**
......@@ -692,4 +701,100 @@ struct iommu_hwpt_invalidate {
__u32 __reserved;
};
#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
/**
* enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
* valid.
* @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
*/
enum iommu_hwpt_pgfault_flags {
IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
};
/**
* enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_PERM_READ: request for read permission
* @IOMMU_PGFAULT_PERM_WRITE: request for write permission
* @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
* Execute Requested bit set in PASID TLP Prefix.
* @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
* Privileged Mode Requested bit set in PASID TLP
* Prefix.
*/
enum iommu_hwpt_pgfault_perm {
IOMMU_PGFAULT_PERM_READ = (1 << 0),
IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
};
/**
* struct iommu_hwpt_pgfault - iommu page fault data
* @flags: Combination of enum iommu_hwpt_pgfault_flags
* @dev_id: id of the originated device
* @pasid: Process Address Space ID
* @grpid: Page Request Group Index
* @perm: Combination of enum iommu_hwpt_pgfault_perm
* @addr: Fault address
* @length: a hint of how much data the requestor is expecting to fetch. For
* example, if the PRI initiator knows it is going to do a 10MB
* transfer, it could fill in 10MB and the OS could pre-fault in
* 10MB of IOVA. It's default to 0 if there's no such hint.
* @cookie: kernel-managed cookie identifying a group of fault messages. The
* cookie number encoded in the last page fault of the group should
* be echoed back in the response message.
*/
struct iommu_hwpt_pgfault {
__u32 flags;
__u32 dev_id;
__u32 pasid;
__u32 grpid;
__u32 perm;
__u64 addr;
__u32 length;
__u32 cookie;
};
/**
* enum iommufd_page_response_code - Return status of fault handlers
* @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
* populated, retry the access. This is the
* "Success" defined in PCI 10.4.2.1.
* @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
* access. This is the "Invalid Request" in PCI
* 10.4.2.1.
*/
enum iommufd_page_response_code {
IOMMUFD_PAGE_RESP_SUCCESS = 0,
IOMMUFD_PAGE_RESP_INVALID = 1,
};
/**
* struct iommu_hwpt_page_response - IOMMU page fault response
* @cookie: The kernel-managed cookie reported in the fault message.
* @code: One of response code in enum iommufd_page_response_code.
*/
struct iommu_hwpt_page_response {
__u32 cookie;
__u32 code;
};
/**
* struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
* @size: sizeof(struct iommu_fault_alloc)
* @flags: Must be 0
* @out_fault_id: The ID of the new FAULT
* @out_fault_fd: The fd of the new FAULT
*
* Explicitly allocate a fault handling object.
*/
struct iommu_fault_alloc {
__u32 size;
__u32 flags;
__u32 out_fault_id;
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
#endif
......@@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
uint32_t parent_hwpt_id = 0;
uint32_t parent_hwpt_id_not_work = 0;
uint32_t test_hwpt_id = 0;
uint32_t iopf_hwpt_id;
uint32_t fault_id;
uint32_t fault_fd;
if (self->device_id) {
/* Negative tests */
......@@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
sizeof(data));
/* Allocate two nested hwpts sharing one common parent hwpt */
test_ioctl_fault_alloc(&fault_id, &fault_fd);
test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
&nested_hwpt_id[0],
IOMMU_HWPT_DATA_SELFTEST, &data,
......@@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&nested_hwpt_id[1],
IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id,
UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID,
&iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST,
&data, sizeof(data));
test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0],
IOMMU_TEST_IOTLB_DEFAULT);
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1],
......@@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
_test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
test_ioctl_destroy(nested_hwpt_id[0]);
/* Switch from nested_hwpt_id[1] to iopf_hwpt_id */
test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, iopf_hwpt_id));
/* Trigger an IOPF on the device */
test_cmd_trigger_iopf(self->device_id, fault_fd);
/* Detach from nested_hwpt_id[1] and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
test_ioctl_destroy(nested_hwpt_id[1]);
test_ioctl_destroy(iopf_hwpt_id);
/* Detach from the parent hw_pagetable and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(parent_hwpt_id);
test_ioctl_destroy(parent_hwpt_id_not_work);
close(fault_fd);
test_ioctl_destroy(fault_id);
} else {
test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
&parent_hwpt_id);
......@@ -1722,10 +1744,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
unsigned long size;
int mmap_flags;
void *vrc;
int rc;
if (variant->buffer_size < MOCK_PAGE_SIZE) {
SKIP(return,
"Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
variant->buffer_size, MOCK_PAGE_SIZE);
}
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
......@@ -1749,12 +1778,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert(vrc == self->buffer);
self->page_size = MOCK_PAGE_SIZE;
self->bitmap_size =
variant->buffer_size / self->page_size / BITS_PER_BYTE;
self->bitmap_size = variant->buffer_size / self->page_size;
/* Provision with an extra (PAGE_SIZE) for the unaligned case */
rc = posix_memalign(&self->bitmap, PAGE_SIZE,
self->bitmap_size + PAGE_SIZE);
size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE);
rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE);
assert(!rc);
assert(self->bitmap);
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
......@@ -1775,51 +1803,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
munmap(self->buffer, variant->buffer_size);
munmap(self->bitmap, self->bitmap_size);
munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
teardown_iommufd(self->fd, _metadata);
}
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k)
{
/* half of an u8 index bitmap */
.buffer_size = 8UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k)
{
/* one u8 index bitmap */
.buffer_size = 16UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k)
{
/* one u32 index bitmap */
.buffer_size = 128UL * 1024UL,
.buffer_size = 64UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
{
/* one u64 index bitmap */
.buffer_size = 256UL * 1024UL,
.buffer_size = 128UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k)
{
/* two u64 index and trailing end bitmap */
.buffer_size = 640UL * 1024UL,
.buffer_size = 320UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M)
{
/* 4K bitmap (128M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL,
/* 4K bitmap (64M IOVA range) */
.buffer_size = 64UL * 1024UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge)
{
/* 4K bitmap (128M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL,
/* 4K bitmap (64M IOVA range) */
.buffer_size = 64UL * 1024UL * 1024UL,
.hugepages = true,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
{
/* 8K bitmap (256M IOVA range) */
.buffer_size = 256UL * 1024UL * 1024UL,
/* 8K bitmap (128M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
{
/* 8K bitmap (256M IOVA range) */
.buffer_size = 256UL * 1024UL * 1024UL,
/* 8K bitmap (128M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL,
.hugepages = true,
};
......
......@@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
return -1;
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id,
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
......
......@@ -22,6 +22,8 @@
#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
static inline void set_bit(unsigned int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
......@@ -153,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \
pt_id, NULL))
static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id,
__u32 flags, __u32 *hwpt_id, __u32 data_type,
void *data, size_t data_len)
{
......@@ -165,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
.data_type = data_type,
.data_len = data_len,
.data_uptr = (uint64_t)data,
.fault_id = ft_id,
};
int ret;
......@@ -177,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
}
#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
0))
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
self->fd, device_id, pt_id, flags, \
self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len))
#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \
EXPECT_ERRNO(_errno, \
_test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
_test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len))
#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \
data_type, data, data_len) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
flags, hwpt_id, data_type, data, \
data_len))
#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \
hwpt_id, data_type, data, data_len) \
EXPECT_ERRNO(_errno, \
_test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
flags, hwpt_id, data_type, data, \
data_len))
#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \
({ \
struct iommu_test_cmd test_cmd = { \
......@@ -346,12 +361,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
__u64 iova, size_t page_size,
size_t pte_page_size, __u64 *bitmap,
__u64 bitmap_size, __u32 flags,
__u64 nbits, __u32 flags,
struct __test_metadata *_metadata)
{
unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
unsigned long nbits = bitmap_size * BITS_PER_BYTE;
unsigned long j, i, nr = nbits / pteset ?: 1;
unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
__u64 out_dirty = 0;
/* Mark all even bits as dirty in the mock domain */
......@@ -684,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
{
struct iommu_fault_alloc cmd = {
.size = sizeof(cmd),
};
int ret;
ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd);
if (ret)
return ret;
*fault_id = cmd.out_fault_id;
*fault_fd = cmd.out_fault_fd;
return 0;
}
#define test_ioctl_fault_alloc(fault_id, fault_fd) \
({ \
ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \
fault_fd)); \
ASSERT_NE(0, *(fault_id)); \
ASSERT_NE(0, *(fault_fd)); \
})
static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
{
struct iommu_test_cmd trigger_iopf_cmd = {
.size = sizeof(trigger_iopf_cmd),
.op = IOMMU_TEST_OP_TRIGGER_IOPF,
.trigger_iopf = {
.dev_id = device_id,
.pasid = 0x1,
.grpid = 0x2,
.perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
.addr = 0xdeadbeaf,
},
};
struct iommu_hwpt_page_response response = {
.code = IOMMUFD_PAGE_RESP_SUCCESS,
};
struct iommu_hwpt_pgfault fault = {};
ssize_t bytes;
int ret;
ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd);
if (ret)
return ret;
bytes = read(fault_fd, &fault, sizeof(fault));
if (bytes <= 0)
return -EIO;
response.cookie = fault.cookie;
bytes = write(fault_fd, &response, sizeof(response));
if (bytes <= 0)
return -EIO;
return 0;
}
#define test_cmd_trigger_iopf(device_id, fault_fd) \
ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment