Commit ef7c8f2b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:

 - The iova_bitmap logic for efficiently reporting dirty pages back to
   userspace has a few more tricky corner case bugs that have been
   resolved and backed with new tests.

   The revised version has simpler logic.

 - Shared branch with iommu for handle support when doing domain attach.

   Handles allow the domain owner to include additional private data on
   a per-device basis.

 - IO Page Fault Reporting to userspace via iommufd. Page faults can be
   generated on fault capable HWPTs when a translation is not present.

   Routing them to userspace would allow a VMM to be able to virtualize
   them into an emulated vIOMMU. This is the next step to fully enabling
   vSVA support.

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (26 commits)
  iommufd: Put constants for all the uAPI enums
  iommufd: Fix error pointer checking
  iommufd: Add check on user response code
  iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE
  iommufd: Require drivers to supply the cache_invalidate_user ops
  iommufd/selftest: Add coverage for IOPF test
  iommufd/selftest: Add IOPF support for mock device
  iommufd: Associate fault object with iommufd_hw_pgtable
  iommufd: Fault-capable hwpt attach/detach/replace
  iommufd: Add iommufd fault object
  iommufd: Add fault and response message definitions
  iommu: Extend domain attach group with handle support
  iommu: Add attach handle to struct iopf_group
  iommu: Remove sva handle list
  iommu: Introduce domain attachment handle
  iommufd/iova_bitmap: Remove iterator logic
  iommufd/iova_bitmap: Dynamic pinning on iova_bitmap_set()
  iommufd/iova_bitmap: Consolidate iova_bitmap_set exit conditionals
  iommufd/iova_bitmap: Move initial pinning to iova_bitmap_for_each()
  iommufd/iova_bitmap: Cache mapped length in iova_bitmap_map struct
  ...
parents 07e773db 136a8066
...@@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd) ...@@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd)
* DMA domain is owned by the driver, it should support all valid * DMA domain is owned by the driver, it should support all valid
* types such as DMA-FQ, identity, etc. * types such as DMA-FQ, identity, etc.
*/ */
ret = iommu_attach_device_pasid(domain, dev, pasid); ret = iommu_attach_device_pasid(domain, dev, pasid, NULL);
if (ret) { if (ret) {
dev_err(dev, "failed to attach device pasid %d, domain type %d", dev_err(dev, "failed to attach device pasid %d, domain type %d",
pasid, domain->type); pasid, domain->type);
......
...@@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group) ...@@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group)
} }
EXPORT_SYMBOL_GPL(iopf_free_group); EXPORT_SYMBOL_GPL(iopf_free_group);
static struct iommu_domain *get_domain_for_iopf(struct device *dev,
struct iommu_fault *fault)
{
struct iommu_domain *domain;
if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
if (IS_ERR(domain))
domain = NULL;
} else {
domain = iommu_get_domain_for_dev(dev);
}
if (!domain || !domain->iopf_handler) {
dev_warn_ratelimited(dev,
"iopf (pasid %d) without domain attached or handler installed\n",
fault->prm.pasid);
return NULL;
}
return domain;
}
/* Non-last request of a group. Postpone until the last one. */ /* Non-last request of a group. Postpone until the last one. */
static int report_partial_fault(struct iommu_fault_param *fault_param, static int report_partial_fault(struct iommu_fault_param *fault_param,
struct iommu_fault *fault) struct iommu_fault *fault)
...@@ -134,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, ...@@ -134,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
list_add(&group->pending_node, &iopf_param->faults); list_add(&group->pending_node, &iopf_param->faults);
mutex_unlock(&iopf_param->lock); mutex_unlock(&iopf_param->lock);
group->fault_count = list_count_nodes(&group->faults);
return group; return group;
} }
...@@ -206,20 +184,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) ...@@ -206,20 +184,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
if (group == &abort_group) if (group == &abort_group)
goto err_abort; goto err_abort;
group->domain = get_domain_for_iopf(dev, fault); if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
if (!group->domain) group->attach_handle = iommu_attach_handle_get(dev->iommu_group,
fault->prm.pasid,
0);
if (IS_ERR(group->attach_handle)) {
const struct iommu_ops *ops = dev_iommu_ops(dev);
if (!ops->user_pasid_table)
goto err_abort;
/*
* The iommu driver for this device supports user-
* managed PASID table. Therefore page faults for
* any PASID should go through the NESTING domain
* attached to the device RID.
*/
group->attach_handle =
iommu_attach_handle_get(dev->iommu_group,
IOMMU_NO_PASID,
IOMMU_DOMAIN_NESTED);
if (IS_ERR(group->attach_handle))
goto err_abort;
}
} else {
group->attach_handle =
iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
if (IS_ERR(group->attach_handle))
goto err_abort;
}
if (!group->attach_handle->domain->iopf_handler)
goto err_abort; goto err_abort;
/* /*
* On success iopf_handler must call iopf_group_response() and * On success iopf_handler must call iopf_group_response() and
* iopf_free_group() * iopf_free_group()
*/ */
if (group->domain->iopf_handler(group)) if (group->attach_handle->domain->iopf_handler(group))
goto err_abort; goto err_abort;
return; return;
err_abort: err_abort:
dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
fault->prm.pasid);
iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
if (group == &abort_group) if (group == &abort_group)
__iopf_free_group(group); __iopf_free_group(group);
......
...@@ -28,4 +28,15 @@ void iommu_device_unregister_bus(struct iommu_device *iommu, ...@@ -28,4 +28,15 @@ void iommu_device_unregister_bus(struct iommu_device *iommu,
const struct bus_type *bus, const struct bus_type *bus,
struct notifier_block *nb); struct notifier_block *nb);
struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group,
ioasid_t pasid,
unsigned int type);
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle);
void iommu_detach_group_handle(struct iommu_domain *domain,
struct iommu_group *group);
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */ #endif /* __LINUX_IOMMU_PRIV_H */
...@@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de ...@@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
} }
iommu_mm->pasid = pasid; iommu_mm->pasid = pasid;
INIT_LIST_HEAD(&iommu_mm->sva_domains); INIT_LIST_HEAD(&iommu_mm->sva_domains);
INIT_LIST_HEAD(&iommu_mm->sva_handles);
/* /*
* Make sure the write to mm->iommu_mm is not reordered in front of * Make sure the write to mm->iommu_mm is not reordered in front of
* initialization to iommu_mm fields. If it does, readers may see a * initialization to iommu_mm fields. If it does, readers may see a
...@@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de ...@@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
*/ */
struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
{ {
struct iommu_group *group = dev->iommu_group;
struct iommu_attach_handle *attach_handle;
struct iommu_mm_data *iommu_mm; struct iommu_mm_data *iommu_mm;
struct iommu_domain *domain; struct iommu_domain *domain;
struct iommu_sva *handle; struct iommu_sva *handle;
int ret; int ret;
if (!group)
return ERR_PTR(-ENODEV);
mutex_lock(&iommu_sva_lock); mutex_lock(&iommu_sva_lock);
/* Allocate mm->pasid if necessary. */ /* Allocate mm->pasid if necessary. */
...@@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm ...@@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_unlock; goto out_unlock;
} }
list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { /* A bond already exists, just take a reference`. */
if (handle->dev == dev) { attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA);
refcount_inc(&handle->users); if (!IS_ERR(attach_handle)) {
mutex_unlock(&iommu_sva_lock); handle = container_of(attach_handle, struct iommu_sva, handle);
return handle; if (attach_handle->domain->mm != mm) {
ret = -EBUSY;
goto out_unlock;
} }
refcount_inc(&handle->users);
mutex_unlock(&iommu_sva_lock);
return handle;
}
if (PTR_ERR(attach_handle) != -ENOENT) {
ret = PTR_ERR(attach_handle);
goto out_unlock;
} }
handle = kzalloc(sizeof(*handle), GFP_KERNEL); handle = kzalloc(sizeof(*handle), GFP_KERNEL);
...@@ -99,7 +113,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm ...@@ -99,7 +113,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
/* Search for an existing domain. */ /* Search for an existing domain. */
list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
&handle->handle);
if (!ret) { if (!ret) {
domain->users++; domain->users++;
goto out; goto out;
...@@ -113,7 +128,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm ...@@ -113,7 +128,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_handle; goto out_free_handle;
} }
ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
&handle->handle);
if (ret) if (ret)
goto out_free_domain; goto out_free_domain;
domain->users = 1; domain->users = 1;
...@@ -121,10 +137,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm ...@@ -121,10 +137,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
out: out:
refcount_set(&handle->users, 1); refcount_set(&handle->users, 1);
list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
mutex_unlock(&iommu_sva_lock); mutex_unlock(&iommu_sva_lock);
handle->dev = dev; handle->dev = dev;
handle->domain = domain;
return handle; return handle;
out_free_domain: out_free_domain:
...@@ -147,7 +161,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device); ...@@ -147,7 +161,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
*/ */
void iommu_sva_unbind_device(struct iommu_sva *handle) void iommu_sva_unbind_device(struct iommu_sva *handle)
{ {
struct iommu_domain *domain = handle->domain; struct iommu_domain *domain = handle->handle.domain;
struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm;
struct device *dev = handle->dev; struct device *dev = handle->dev;
...@@ -156,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) ...@@ -156,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
mutex_unlock(&iommu_sva_lock); mutex_unlock(&iommu_sva_lock);
return; return;
} }
list_del(&handle->handle_item);
iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
if (--domain->users == 0) { if (--domain->users == 0) {
...@@ -170,7 +183,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); ...@@ -170,7 +183,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
u32 iommu_sva_get_pasid(struct iommu_sva *handle) u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{ {
struct iommu_domain *domain = handle->domain; struct iommu_domain *domain = handle->handle.domain;
return mm_get_enqcmd_pasid(domain->mm); return mm_get_enqcmd_pasid(domain->mm);
} }
...@@ -259,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work) ...@@ -259,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work)
if (status != IOMMU_PAGE_RESP_SUCCESS) if (status != IOMMU_PAGE_RESP_SUCCESS)
break; break;
status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); status = iommu_sva_handle_mm(&iopf->fault,
group->attach_handle->domain->mm);
} }
iopf_group_response(group, status); iopf_group_response(group, status);
......
...@@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, ...@@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @domain: the iommu domain. * @domain: the iommu domain.
* @dev: the attached device. * @dev: the attached device.
* @pasid: the pasid of the device. * @pasid: the pasid of the device.
* @handle: the attach handle.
* *
* Return: 0 on success, or an error. * Return: 0 on success, or an error.
*/ */
int iommu_attach_device_pasid(struct iommu_domain *domain, int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid) struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle)
{ {
/* Caller must be a probed driver on dev */ /* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group; struct iommu_group *group = dev->iommu_group;
struct group_device *device; struct group_device *device;
void *curr;
int ret; int ret;
if (!domain->ops->set_dev_pasid) if (!domain->ops->set_dev_pasid)
...@@ -3382,11 +3383,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, ...@@ -3382,11 +3383,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
} }
} }
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); if (handle)
if (curr) { handle->domain = domain;
ret = xa_err(curr) ? : -EBUSY;
ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
if (ret)
goto out_unlock; goto out_unlock;
}
ret = __iommu_set_group_pasid(domain, group, pasid); ret = __iommu_set_group_pasid(domain, group, pasid);
if (ret) if (ret)
...@@ -3414,46 +3416,11 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ...@@ -3414,46 +3416,11 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
mutex_lock(&group->mutex); mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid, domain); __iommu_remove_group_pasid(group, pasid, domain);
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); xa_erase(&group->pasid_array, pasid);
mutex_unlock(&group->mutex); mutex_unlock(&group->mutex);
} }
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
/*
* iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
* @dev: the queried device
* @pasid: the pasid of the device
* @type: matched domain type, 0 for any match
*
* This is a variant of iommu_get_domain_for_dev(). It returns the existing
* domain attached to pasid of a device. Callers must hold a lock around this
* function, and both iommu_attach/detach_dev_pasid() whenever a domain of
* type is being manipulated. This API does not internally resolve races with
* attach/detach.
*
* Return: attached domain on success, NULL otherwise.
*/
struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
ioasid_t pasid,
unsigned int type)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct iommu_domain *domain;
if (!group)
return NULL;
xa_lock(&group->pasid_array);
domain = xa_load(&group->pasid_array, pasid);
if (type && domain && domain->type != type)
domain = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
return domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
ioasid_t iommu_alloc_global_pasid(struct device *dev) ioasid_t iommu_alloc_global_pasid(struct device *dev)
{ {
int ret; int ret;
...@@ -3480,3 +3447,137 @@ void iommu_free_global_pasid(ioasid_t pasid) ...@@ -3480,3 +3447,137 @@ void iommu_free_global_pasid(ioasid_t pasid)
ida_free(&iommu_global_pasid_ida, pasid); ida_free(&iommu_global_pasid_ida, pasid);
} }
EXPORT_SYMBOL_GPL(iommu_free_global_pasid); EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
/**
* iommu_attach_handle_get - Return the attach handle
* @group: the iommu group that domain was attached to
* @pasid: the pasid within the group
* @type: matched domain type, 0 for any match
*
* Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
*
* Return the attach handle to the caller. The life cycle of an iommu attach
* handle is from the time when the domain is attached to the time when the
* domain is detached. Callers are required to synchronize the call of
* iommu_attach_handle_get() with domain attachment and detachment. The attach
* handle can only be used during its life cycle.
*/
struct iommu_attach_handle *
iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
{
struct iommu_attach_handle *handle;
xa_lock(&group->pasid_array);
handle = xa_load(&group->pasid_array, pasid);
if (!handle)
handle = ERR_PTR(-ENOENT);
else if (type && handle->domain->type != type)
handle = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
return handle;
}
EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL);
/**
* iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
* @domain: IOMMU domain to attach
* @group: IOMMU group that will be attached
* @handle: attach handle
*
* Returns 0 on success and error code on failure.
*
* This is a variant of iommu_attach_group(). It allows the caller to provide
* an attach handle and use it when the domain is attached. This is currently
* used by IOMMUFD to deliver the I/O page faults.
*/
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle)
{
int ret;
if (handle)
handle->domain = domain;
mutex_lock(&group->mutex);
ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
if (ret)
goto err_unlock;
ret = __iommu_attach_group(domain, group);
if (ret)
goto err_erase;
mutex_unlock(&group->mutex);
return 0;
err_erase:
xa_erase(&group->pasid_array, IOMMU_NO_PASID);
err_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, IOMMUFD_INTERNAL);
/**
* iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
* @domain: IOMMU domain to attach
* @group: IOMMU group that will be attached
*
* Detach the specified IOMMU domain from the specified IOMMU group.
* It must be used in conjunction with iommu_attach_group_handle().
*/
void iommu_detach_group_handle(struct iommu_domain *domain,
struct iommu_group *group)
{
mutex_lock(&group->mutex);
__iommu_group_set_core_domain(group);
xa_erase(&group->pasid_array, IOMMU_NO_PASID);
mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, IOMMUFD_INTERNAL);
/**
* iommu_replace_group_handle - replace the domain that a group is attached to
* @group: IOMMU group that will be attached to the new domain
* @new_domain: new IOMMU domain to replace with
* @handle: attach handle
*
* This is a variant of iommu_group_replace_domain(). It allows the caller to
* provide an attach handle for the new domain and use it when the domain is
* attached.
*/
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle)
{
void *curr;
int ret;
if (!new_domain)
return -EINVAL;
mutex_lock(&group->mutex);
if (handle) {
ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
if (ret)
goto err_unlock;
}
ret = __iommu_group_set_domain(group, new_domain);
if (ret)
goto err_release;
curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
WARN_ON(xa_is_err(curr));
mutex_unlock(&group->mutex);
return 0;
err_release:
xa_release(&group->pasid_array, IOMMU_NO_PASID);
err_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, IOMMUFD_INTERNAL);
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \ iommufd-y := \
device.o \ device.o \
fault.o \
hw_pagetable.o \ hw_pagetable.o \
io_pagetable.o \ io_pagetable.o \
ioas.o \ ioas.o \
......
...@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, ...@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
refcount_inc(&idev->obj.users); refcount_inc(&idev->obj.users);
/* igroup refcount moves into iommufd_device */ /* igroup refcount moves into iommufd_device */
idev->igroup = igroup; idev->igroup = igroup;
mutex_init(&idev->iopf_lock);
/* /*
* If the caller fails after this success it must call * If the caller fails after this success it must call
...@@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, ...@@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* attachment. * attachment.
*/ */
if (list_empty(&idev->igroup->device_list)) { if (list_empty(&idev->igroup->device_list)) {
rc = iommu_attach_group(hwpt->domain, idev->igroup->group); rc = iommufd_hwpt_attach_device(hwpt, idev);
if (rc) if (rc)
goto err_unresv; goto err_unresv;
idev->igroup->hwpt = hwpt; idev->igroup->hwpt = hwpt;
...@@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) ...@@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
mutex_lock(&idev->igroup->lock); mutex_lock(&idev->igroup->lock);
list_del(&idev->group_item); list_del(&idev->group_item);
if (list_empty(&idev->igroup->device_list)) { if (list_empty(&idev->igroup->device_list)) {
iommu_detach_group(hwpt->domain, idev->igroup->group); iommufd_hwpt_detach_device(hwpt, idev);
idev->igroup->hwpt = NULL; idev->igroup->hwpt = NULL;
} }
if (hwpt_is_paging(hwpt)) if (hwpt_is_paging(hwpt))
...@@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ...@@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev,
goto err_unlock; goto err_unlock;
} }
rc = iommu_group_replace_domain(igroup->group, hwpt->domain); rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
if (rc) if (rc)
goto err_unresv; goto err_unresv;
......
This diff is collapsed.
...@@ -8,6 +8,15 @@ ...@@ -8,6 +8,15 @@
#include "../iommu-priv.h" #include "../iommu-priv.h"
#include "iommufd_private.h" #include "iommufd_private.h"
static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
{
if (hwpt->domain)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
refcount_dec(&hwpt->fault->obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
{ {
struct iommufd_hwpt_paging *hwpt_paging = struct iommufd_hwpt_paging *hwpt_paging =
...@@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) ...@@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
hwpt_paging->common.domain); hwpt_paging->common.domain);
} }
if (hwpt_paging->common.domain) __iommufd_hwpt_destroy(&hwpt_paging->common);
iommu_domain_free(hwpt_paging->common.domain);
refcount_dec(&hwpt_paging->ioas->obj.users); refcount_dec(&hwpt_paging->ioas->obj.users);
} }
...@@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj) ...@@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj)
struct iommufd_hwpt_nested *hwpt_nested = struct iommufd_hwpt_nested *hwpt_nested =
container_of(obj, struct iommufd_hwpt_nested, common.obj); container_of(obj, struct iommufd_hwpt_nested, common.obj);
if (hwpt_nested->common.domain) __iommufd_hwpt_destroy(&hwpt_nested->common);
iommu_domain_free(hwpt_nested->common.domain);
refcount_dec(&hwpt_nested->parent->common.obj.users); refcount_dec(&hwpt_nested->parent->common.obj.users);
} }
...@@ -213,7 +218,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, ...@@ -213,7 +218,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt; struct iommufd_hw_pagetable *hwpt;
int rc; int rc;
if (flags || !user_data->len || !ops->domain_alloc_user) if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
!user_data->len || !ops->domain_alloc_user)
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent) if (parent->auto_domain || !parent->nest_parent)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -227,7 +233,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, ...@@ -227,7 +233,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
refcount_inc(&parent->common.obj.users); refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent; hwpt_nested->parent = parent;
hwpt->domain = ops->domain_alloc_user(idev->dev, flags, hwpt->domain = ops->domain_alloc_user(idev->dev,
flags & ~IOMMU_HWPT_FAULT_ID_VALID,
parent->common.domain, user_data); parent->common.domain, user_data);
if (IS_ERR(hwpt->domain)) { if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain); rc = PTR_ERR(hwpt->domain);
...@@ -236,7 +243,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, ...@@ -236,7 +243,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
} }
hwpt->domain->owner = ops; hwpt->domain->owner = ops;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED ||
!hwpt->domain->ops->cache_invalidate_user)) {
rc = -EINVAL; rc = -EINVAL;
goto out_abort; goto out_abort;
} }
...@@ -308,6 +316,21 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ...@@ -308,6 +316,21 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
goto out_put_pt; goto out_put_pt;
} }
if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) {
struct iommufd_fault *fault;
fault = iommufd_get_fault(ucmd, cmd->fault_id);
if (IS_ERR(fault)) {
rc = PTR_ERR(fault);
goto out_hwpt;
}
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
hwpt->domain->fault_data = hwpt;
refcount_inc(&fault->obj.users);
iommufd_put_object(ucmd->ictx, &fault->obj);
}
cmd->out_hwpt_id = hwpt->obj.id; cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc) if (rc)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/iova_bitmap.h> #include <linux/iova_bitmap.h>
#include <uapi/linux/iommufd.h> #include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"
struct iommu_domain; struct iommu_domain;
struct iommu_group; struct iommu_group;
...@@ -128,6 +129,7 @@ enum iommufd_object_type { ...@@ -128,6 +129,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_HWPT_NESTED,
IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_ACCESS,
IOMMUFD_OBJ_FAULT,
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST, IOMMUFD_OBJ_SELFTEST,
#endif #endif
...@@ -292,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt, ...@@ -292,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommufd_hw_pagetable { struct iommufd_hw_pagetable {
struct iommufd_object obj; struct iommufd_object obj;
struct iommu_domain *domain; struct iommu_domain *domain;
struct iommufd_fault *fault;
}; };
struct iommufd_hwpt_paging { struct iommufd_hwpt_paging {
...@@ -395,6 +398,9 @@ struct iommufd_device { ...@@ -395,6 +398,9 @@ struct iommufd_device {
/* always the physical device */ /* always the physical device */
struct device *dev; struct device *dev;
bool enforce_cache_coherency; bool enforce_cache_coherency;
/* protect iopf_enabled counter */
struct mutex iopf_lock;
unsigned int iopf_enabled;
}; };
static inline struct iommufd_device * static inline struct iommufd_device *
...@@ -426,6 +432,80 @@ void iopt_remove_access(struct io_pagetable *iopt, ...@@ -426,6 +432,80 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id); u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj); void iommufd_access_destroy_object(struct iommufd_object *obj);
/*
* An iommufd_fault object represents an interface to deliver I/O page faults
* to the user space. These objects are created/destroyed by the user space and
* associated with hardware page table objects during page-table allocation.
*/
struct iommufd_fault {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
/* The lists of outstanding faults protected by below mutex. */
struct mutex mutex;
struct list_head deliver;
struct xarray response;
struct wait_queue_head wait_queue;
};
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
};
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
static inline struct iommufd_fault *
iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
{
return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_FAULT),
struct iommufd_fault, obj);
}
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
int iommufd_fault_iopf_handler(struct iopf_group *group);
int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old);
static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
if (hwpt->fault)
return iommufd_fault_domain_attach_dev(hwpt, idev);
return iommu_attach_group(hwpt->domain, idev->igroup->group);
}
static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
if (hwpt->fault)
iommufd_fault_domain_detach_dev(hwpt, idev);
iommu_detach_group(hwpt->domain, idev->igroup->group);
}
static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old)
{
if (old->fault || hwpt->fault)
return iommufd_fault_domain_replace_dev(idev, hwpt, old);
return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
}
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd); int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj); void iommufd_selftest_destroy(struct iommufd_object *obj);
......
...@@ -22,6 +22,7 @@ enum { ...@@ -22,6 +22,7 @@ enum {
IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
IOMMU_TEST_OP_DIRTY, IOMMU_TEST_OP_DIRTY,
IOMMU_TEST_OP_MD_CHECK_IOTLB, IOMMU_TEST_OP_MD_CHECK_IOTLB,
IOMMU_TEST_OP_TRIGGER_IOPF,
}; };
enum { enum {
...@@ -127,6 +128,13 @@ struct iommu_test_cmd { ...@@ -127,6 +128,13 @@ struct iommu_test_cmd {
__u32 id; __u32 id;
__u32 iotlb; __u32 iotlb;
} check_iotlb; } check_iotlb;
struct {
__u32 dev_id;
__u32 pasid;
__u32 grpid;
__u32 perm;
__u64 addr;
} trigger_iopf;
}; };
__u32 last; __u32 last;
}; };
......
...@@ -35,6 +35,9 @@ struct iova_bitmap_map { ...@@ -35,6 +35,9 @@ struct iova_bitmap_map {
/* base IOVA representing bit 0 of the first page */ /* base IOVA representing bit 0 of the first page */
unsigned long iova; unsigned long iova;
/* mapped length */
unsigned long length;
/* page size order that each bit granules to */ /* page size order that each bit granules to */
unsigned long pgshift; unsigned long pgshift;
...@@ -113,9 +116,6 @@ struct iova_bitmap { ...@@ -113,9 +116,6 @@ struct iova_bitmap {
/* length of the IOVA range for the whole bitmap */ /* length of the IOVA range for the whole bitmap */
size_t length; size_t length;
/* length of the IOVA range set ahead the pinned pages */
unsigned long set_ahead_length;
}; };
/* /*
...@@ -156,6 +156,8 @@ static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap) ...@@ -156,6 +156,8 @@ static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip); return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
} }
static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap);
/* /*
* Pins the bitmap user pages for the current range window. * Pins the bitmap user pages for the current range window.
* This is internal to IOVA bitmap and called when advancing the * This is internal to IOVA bitmap and called when advancing the
...@@ -206,6 +208,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) ...@@ -206,6 +208,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
* aligned. * aligned.
*/ */
mapped->pgoff = offset_in_page(addr); mapped->pgoff = offset_in_page(addr);
mapped->length = iova_bitmap_mapped_length(bitmap);
return 0; return 0;
} }
...@@ -263,9 +266,6 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, ...@@ -263,9 +266,6 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
goto err; goto err;
} }
rc = iova_bitmap_get(bitmap);
if (rc)
goto err;
return bitmap; return bitmap;
err: err:
...@@ -338,65 +338,34 @@ static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap) ...@@ -338,65 +338,34 @@ static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
} }
/* /*
* Returns true if there's not more data to iterate. * Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range.
*/ */
static bool iova_bitmap_done(struct iova_bitmap *bitmap) static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped,
unsigned long iova, size_t length)
{ {
return bitmap->mapped_base_index >= bitmap->mapped_total_index; return mapped->npages &&
} (iova >= mapped->iova &&
(iova + length - 1) <= (mapped->iova + mapped->length - 1));
static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
size_t set_ahead_length)
{
int ret = 0;
while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
unsigned long length = iova_bitmap_mapped_length(bitmap);
unsigned long iova = iova_bitmap_mapped_iova(bitmap);
ret = iova_bitmap_get(bitmap);
if (ret)
break;
length = min(length, set_ahead_length);
iova_bitmap_set(bitmap, iova, length);
set_ahead_length -= length;
bitmap->mapped_base_index +=
iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
iova_bitmap_put(bitmap);
}
bitmap->set_ahead_length = 0;
return ret;
} }
/* /*
* Advances to the next range, releases the current pinned * Advances to a selected range, releases the current pinned
* pages and pins the next set of bitmap pages. * pages and pins the next set of bitmap pages.
* Returns 0 on success or otherwise errno. * Returns 0 on success or otherwise errno.
*/ */
static int iova_bitmap_advance(struct iova_bitmap *bitmap) static int iova_bitmap_advance_to(struct iova_bitmap *bitmap,
unsigned long iova)
{ {
unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1; unsigned long index;
unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
bitmap->mapped_base_index += count; index = iova_bitmap_offset_to_index(bitmap, iova - bitmap->iova);
if (index >= bitmap->mapped_total_index)
return -EINVAL;
bitmap->mapped_base_index = index;
iova_bitmap_put(bitmap); iova_bitmap_put(bitmap);
if (iova_bitmap_done(bitmap))
return 0;
/* Iterate, set and skip any bits requested for next iteration */
if (bitmap->set_ahead_length) {
int ret;
ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
if (ret)
return ret;
}
/* When advancing the index we pin the next set of bitmap pages */ /* Pin the next set of bitmap pages */
return iova_bitmap_get(bitmap); return iova_bitmap_get(bitmap);
} }
...@@ -416,17 +385,7 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) ...@@ -416,17 +385,7 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap)
int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
iova_bitmap_fn_t fn) iova_bitmap_fn_t fn)
{ {
int ret = 0; return fn(bitmap, bitmap->iova, bitmap->length, opaque);
for (; !iova_bitmap_done(bitmap) && !ret;
ret = iova_bitmap_advance(bitmap)) {
ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
iova_bitmap_mapped_length(bitmap), opaque);
if (ret)
break;
}
return ret;
} }
EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD);
...@@ -444,11 +403,25 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, ...@@ -444,11 +403,25 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
unsigned long iova, size_t length) unsigned long iova, size_t length)
{ {
struct iova_bitmap_map *mapped = &bitmap->mapped; struct iova_bitmap_map *mapped = &bitmap->mapped;
unsigned long cur_bit = ((iova - mapped->iova) >> unsigned long cur_bit, last_bit, last_page_idx;
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> update_indexes:
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
unsigned long last_page_idx = mapped->npages - 1;
/*
* The attempt to advance the base index to @iova
* may fail if it's out of bounds, or pinning the pages
* returns an error.
*/
if (iova_bitmap_advance_to(bitmap, iova))
return;
}
last_page_idx = mapped->npages - 1;
cur_bit = ((iova - mapped->iova) >>
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
last_bit = (((iova + length - 1) - mapped->iova) >>
mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
do { do {
unsigned int page_idx = cur_bit / BITS_PER_PAGE; unsigned int page_idx = cur_bit / BITS_PER_PAGE;
...@@ -457,18 +430,19 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, ...@@ -457,18 +430,19 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
last_bit - cur_bit + 1); last_bit - cur_bit + 1);
void *kaddr; void *kaddr;
if (unlikely(page_idx > last_page_idx)) if (unlikely(page_idx > last_page_idx)) {
break; unsigned long left =
((last_bit - cur_bit + 1) << mapped->pgshift);
iova += (length - left);
length = left;
goto update_indexes;
}
kaddr = kmap_local_page(mapped->pages[page_idx]); kaddr = kmap_local_page(mapped->pages[page_idx]);
bitmap_set(kaddr, offset, nbits); bitmap_set(kaddr, offset, nbits);
kunmap_local(kaddr); kunmap_local(kaddr);
cur_bit += nbits; cur_bit += nbits;
} while (cur_bit <= last_bit); } while (cur_bit <= last_bit);
if (unlikely(cur_bit <= last_bit)) {
bitmap->set_ahead_length =
((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
}
} }
EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
...@@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) ...@@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
union ucmd_buffer { union ucmd_buffer {
struct iommu_destroy destroy; struct iommu_destroy destroy;
struct iommu_fault_alloc fault;
struct iommu_hw_info info; struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
...@@ -355,6 +356,8 @@ struct iommufd_ioctl_op { ...@@ -355,6 +356,8 @@ struct iommufd_ioctl_op {
} }
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc,
out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved), __reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
...@@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { ...@@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_hwpt_nested_destroy, .destroy = iommufd_hwpt_nested_destroy,
.abort = iommufd_hwpt_nested_abort, .abort = iommufd_hwpt_nested_abort,
}, },
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = { [IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy, .destroy = iommufd_selftest_destroy,
......
...@@ -266,8 +266,8 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, ...@@ -266,8 +266,8 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
/* Clear dirty */ /* Clear dirty */
if (mock_test_and_clear_dirty(mock, head, pgsize, flags)) if (mock_test_and_clear_dirty(mock, head, pgsize, flags))
iommu_dirty_bitmap_record(dirty, head, pgsize); iommu_dirty_bitmap_record(dirty, iova, pgsize);
iova = head + pgsize; iova += pgsize;
} while (iova < end); } while (iova < end);
return 0; return 0;
...@@ -504,6 +504,8 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) ...@@ -504,6 +504,8 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
return false; return false;
} }
static struct iopf_queue *mock_iommu_iopf_queue;
static struct iommu_device mock_iommu_device = { static struct iommu_device mock_iommu_device = {
}; };
...@@ -514,6 +516,29 @@ static struct iommu_device *mock_probe_device(struct device *dev) ...@@ -514,6 +516,29 @@ static struct iommu_device *mock_probe_device(struct device *dev)
return &mock_iommu_device; return &mock_iommu_device;
} }
static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg)
{
}
static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
return -ENODEV;
return iopf_queue_add_device(mock_iommu_iopf_queue, dev);
}
static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
return -ENODEV;
iopf_queue_remove_device(mock_iommu_iopf_queue, dev);
return 0;
}
static const struct iommu_ops mock_ops = { static const struct iommu_ops mock_ops = {
/* /*
* IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type()
...@@ -529,6 +554,10 @@ static const struct iommu_ops mock_ops = { ...@@ -529,6 +554,10 @@ static const struct iommu_ops mock_ops = {
.capable = mock_domain_capable, .capable = mock_domain_capable,
.device_group = generic_device_group, .device_group = generic_device_group,
.probe_device = mock_probe_device, .probe_device = mock_probe_device,
.page_response = mock_domain_page_response,
.dev_enable_feat = mock_dev_enable_feat,
.dev_disable_feat = mock_dev_disable_feat,
.user_pasid_table = true,
.default_domain_ops = .default_domain_ops =
&(struct iommu_domain_ops){ &(struct iommu_domain_ops){
.free = mock_domain_free, .free = mock_domain_free,
...@@ -1334,7 +1363,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, ...@@ -1334,7 +1363,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
} }
max = length / page_size; max = length / page_size;
bitmap_size = max / BITS_PER_BYTE; bitmap_size = DIV_ROUND_UP(max, BITS_PER_BYTE);
tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT);
if (!tmp) { if (!tmp) {
...@@ -1375,6 +1404,31 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, ...@@ -1375,6 +1404,31 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
return rc; return rc;
} }
static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
struct iommu_test_cmd *cmd)
{
struct iopf_fault event = { };
struct iommufd_device *idev;
idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id);
if (IS_ERR(idev))
return PTR_ERR(idev);
event.fault.prm.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
if (cmd->trigger_iopf.pasid != IOMMU_NO_PASID)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = cmd->trigger_iopf.addr;
event.fault.prm.pasid = cmd->trigger_iopf.pasid;
event.fault.prm.grpid = cmd->trigger_iopf.grpid;
event.fault.prm.perm = cmd->trigger_iopf.perm;
iommu_report_device_fault(idev->dev, &event);
iommufd_put_object(ucmd->ictx, &idev->obj);
return 0;
}
void iommufd_selftest_destroy(struct iommufd_object *obj) void iommufd_selftest_destroy(struct iommufd_object *obj)
{ {
struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj);
...@@ -1450,6 +1504,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd) ...@@ -1450,6 +1504,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->dirty.page_size, cmd->dirty.page_size,
u64_to_user_ptr(cmd->dirty.uptr), u64_to_user_ptr(cmd->dirty.uptr),
cmd->dirty.flags); cmd->dirty.flags);
case IOMMU_TEST_OP_TRIGGER_IOPF:
return iommufd_test_trigger_iopf(ucmd, cmd);
default: default:
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -1491,6 +1547,9 @@ int __init iommufd_test_init(void) ...@@ -1491,6 +1547,9 @@ int __init iommufd_test_init(void)
&iommufd_mock_bus_type.nb); &iommufd_mock_bus_type.nb);
if (rc) if (rc)
goto err_sysfs; goto err_sysfs;
mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq");
return 0; return 0;
err_sysfs: err_sysfs:
...@@ -1506,6 +1565,11 @@ int __init iommufd_test_init(void) ...@@ -1506,6 +1565,11 @@ int __init iommufd_test_init(void)
void iommufd_test_exit(void) void iommufd_test_exit(void)
{ {
if (mock_iommu_iopf_queue) {
iopf_queue_free(mock_iommu_iopf_queue);
mock_iommu_iopf_queue = NULL;
}
iommu_device_sysfs_remove(&mock_iommu_device); iommu_device_sysfs_remove(&mock_iommu_device);
iommu_device_unregister_bus(&mock_iommu_device, iommu_device_unregister_bus(&mock_iommu_device,
&iommufd_mock_bus_type.bus, &iommufd_mock_bus_type.bus,
......
...@@ -124,12 +124,16 @@ struct iopf_fault { ...@@ -124,12 +124,16 @@ struct iopf_fault {
struct iopf_group { struct iopf_group {
struct iopf_fault last_fault; struct iopf_fault last_fault;
struct list_head faults; struct list_head faults;
size_t fault_count;
/* list node for iommu_fault_param::faults */ /* list node for iommu_fault_param::faults */
struct list_head pending_node; struct list_head pending_node;
struct work_struct work; struct work_struct work;
struct iommu_domain *domain; struct iommu_attach_handle *attach_handle;
/* The device's fault data parameter. */ /* The device's fault data parameter. */
struct iommu_fault_param *fault_param; struct iommu_fault_param *fault_param;
/* Used by handler provider to hook the group on its own lists. */
struct list_head node;
u32 cookie;
}; };
/** /**
...@@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array( ...@@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array(
* @default_domain: If not NULL this will always be set as the default domain. * @default_domain: If not NULL this will always be set as the default domain.
* This should be an IDENTITY/BLOCKED/PLATFORM domain. * This should be an IDENTITY/BLOCKED/PLATFORM domain.
* Do not use in new drivers. * Do not use in new drivers.
* @user_pasid_table: IOMMU driver supports user-managed PASID table. There is
* no user domain for each PASID and the I/O page faults are
* forwarded through the user domain attached to the device
* RID.
*/ */
struct iommu_ops { struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap); bool (*capable)(struct device *dev, enum iommu_cap);
...@@ -590,6 +598,7 @@ struct iommu_ops { ...@@ -590,6 +598,7 @@ struct iommu_ops {
struct iommu_domain *blocked_domain; struct iommu_domain *blocked_domain;
struct iommu_domain *release_domain; struct iommu_domain *release_domain;
struct iommu_domain *default_domain; struct iommu_domain *default_domain;
u8 user_pasid_table:1;
}; };
/** /**
...@@ -989,20 +998,28 @@ struct iommu_fwspec { ...@@ -989,20 +998,28 @@ struct iommu_fwspec {
/* ATS is supported */ /* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
/*
* An iommu attach handle represents a relationship between an iommu domain
* and a PASID or RID of a device. It is allocated and managed by the component
* that manages the domain and is stored in the iommu group during the time the
* domain is attached.
*/
struct iommu_attach_handle {
struct iommu_domain *domain;
};
/** /**
* struct iommu_sva - handle to a device-mm bond * struct iommu_sva - handle to a device-mm bond
*/ */
struct iommu_sva { struct iommu_sva {
struct iommu_attach_handle handle;
struct device *dev; struct device *dev;
struct iommu_domain *domain;
struct list_head handle_item;
refcount_t users; refcount_t users;
}; };
struct iommu_mm_data { struct iommu_mm_data {
u32 pasid; u32 pasid;
struct list_head sva_domains; struct list_head sva_domains;
struct list_head sva_handles;
}; };
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
...@@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner); ...@@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev); void iommu_device_release_dma_owner(struct device *dev);
int iommu_attach_device_pasid(struct iommu_domain *domain, int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid); struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle);
void iommu_detach_device_pasid(struct iommu_domain *domain, void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid); struct device *dev, ioasid_t pasid);
struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type);
ioasid_t iommu_alloc_global_pasid(struct device *dev); ioasid_t iommu_alloc_global_pasid(struct device *dev);
void iommu_free_global_pasid(ioasid_t pasid); void iommu_free_global_pasid(ioasid_t pasid);
#else /* CONFIG_IOMMU_API */ #else /* CONFIG_IOMMU_API */
...@@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) ...@@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
} }
static inline int iommu_attach_device_pasid(struct iommu_domain *domain, static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid) struct device *dev, ioasid_t pasid,
struct iommu_attach_handle *handle)
{ {
return -ENODEV; return -ENODEV;
} }
...@@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain, ...@@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
{ {
} }
static inline struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type)
{
return NULL;
}
static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
{ {
return IOMMU_PASID_INVALID; return IOMMU_PASID_INVALID;
......
...@@ -37,19 +37,20 @@ ...@@ -37,19 +37,20 @@
enum { enum {
IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_BASE = 0x80,
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
IOMMUFD_CMD_IOAS_ALLOC, IOMMUFD_CMD_IOAS_ALLOC = 0x81,
IOMMUFD_CMD_IOAS_ALLOW_IOVAS, IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
IOMMUFD_CMD_IOAS_COPY, IOMMUFD_CMD_IOAS_COPY = 0x83,
IOMMUFD_CMD_IOAS_IOVA_RANGES, IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
IOMMUFD_CMD_IOAS_MAP, IOMMUFD_CMD_IOAS_MAP = 0x85,
IOMMUFD_CMD_IOAS_UNMAP, IOMMUFD_CMD_IOAS_UNMAP = 0x86,
IOMMUFD_CMD_OPTION, IOMMUFD_CMD_OPTION = 0x87,
IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_VFIO_IOAS = 0x88,
IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_HWPT_ALLOC = 0x89,
IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_GET_HW_INFO = 0x8a,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE, IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
}; };
/** /**
...@@ -356,10 +357,13 @@ struct iommu_vfio_ioas { ...@@ -356,10 +357,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration. * the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment * enforced on device attachment
* @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
* valid.
*/ */
enum iommufd_hwpt_alloc_flags { enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
}; };
/** /**
...@@ -396,8 +400,8 @@ struct iommu_hwpt_vtd_s1 { ...@@ -396,8 +400,8 @@ struct iommu_hwpt_vtd_s1 {
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
*/ */
enum iommu_hwpt_data_type { enum iommu_hwpt_data_type {
IOMMU_HWPT_DATA_NONE, IOMMU_HWPT_DATA_NONE = 0,
IOMMU_HWPT_DATA_VTD_S1, IOMMU_HWPT_DATA_VTD_S1 = 1,
}; };
/** /**
...@@ -411,6 +415,9 @@ enum iommu_hwpt_data_type { ...@@ -411,6 +415,9 @@ enum iommu_hwpt_data_type {
* @data_type: One of enum iommu_hwpt_data_type * @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data * @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data * @data_uptr: User pointer to the type specific data
* @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
* IOMMU_HWPT_FAULT_ID_VALID is set.
* @__reserved2: Padding to 64-bit alignment. Must be 0.
* *
* Explicitly allocate a hardware page table object. This is the same object * Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the * type that is returned by iommufd_device_attach() and represents the
...@@ -441,6 +448,8 @@ struct iommu_hwpt_alloc { ...@@ -441,6 +448,8 @@ struct iommu_hwpt_alloc {
__u32 data_type; __u32 data_type;
__u32 data_len; __u32 data_len;
__aligned_u64 data_uptr; __aligned_u64 data_uptr;
__u32 fault_id;
__u32 __reserved2;
}; };
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
...@@ -482,8 +491,8 @@ struct iommu_hw_info_vtd { ...@@ -482,8 +491,8 @@ struct iommu_hw_info_vtd {
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
*/ */
enum iommu_hw_info_type { enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE, IOMMU_HW_INFO_TYPE_NONE = 0,
IOMMU_HW_INFO_TYPE_INTEL_VTD, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
}; };
/** /**
...@@ -620,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap { ...@@ -620,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap {
* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
*/ */
enum iommu_hwpt_invalidate_data_type { enum iommu_hwpt_invalidate_data_type {
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
}; };
/** /**
...@@ -692,4 +701,100 @@ struct iommu_hwpt_invalidate { ...@@ -692,4 +701,100 @@ struct iommu_hwpt_invalidate {
__u32 __reserved; __u32 __reserved;
}; };
#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
/**
* enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
* valid.
* @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
*/
enum iommu_hwpt_pgfault_flags {
IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
};
/**
* enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
* @IOMMU_PGFAULT_PERM_READ: request for read permission
* @IOMMU_PGFAULT_PERM_WRITE: request for write permission
* @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
* Execute Requested bit set in PASID TLP Prefix.
* @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
* Privileged Mode Requested bit set in PASID TLP
* Prefix.
*/
enum iommu_hwpt_pgfault_perm {
IOMMU_PGFAULT_PERM_READ = (1 << 0),
IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
};
/**
* struct iommu_hwpt_pgfault - iommu page fault data
* @flags: Combination of enum iommu_hwpt_pgfault_flags
* @dev_id: id of the originated device
* @pasid: Process Address Space ID
* @grpid: Page Request Group Index
* @perm: Combination of enum iommu_hwpt_pgfault_perm
* @addr: Fault address
* @length: a hint of how much data the requestor is expecting to fetch. For
* example, if the PRI initiator knows it is going to do a 10MB
* transfer, it could fill in 10MB and the OS could pre-fault in
* 10MB of IOVA. It's default to 0 if there's no such hint.
* @cookie: kernel-managed cookie identifying a group of fault messages. The
* cookie number encoded in the last page fault of the group should
* be echoed back in the response message.
*/
struct iommu_hwpt_pgfault {
__u32 flags;
__u32 dev_id;
__u32 pasid;
__u32 grpid;
__u32 perm;
__u64 addr;
__u32 length;
__u32 cookie;
};
/**
* enum iommufd_page_response_code - Return status of fault handlers
* @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
* populated, retry the access. This is the
* "Success" defined in PCI 10.4.2.1.
* @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
* access. This is the "Invalid Request" in PCI
* 10.4.2.1.
*/
enum iommufd_page_response_code {
IOMMUFD_PAGE_RESP_SUCCESS = 0,
IOMMUFD_PAGE_RESP_INVALID = 1,
};
/**
* struct iommu_hwpt_page_response - IOMMU page fault response
* @cookie: The kernel-managed cookie reported in the fault message.
* @code: One of response code in enum iommufd_page_response_code.
*/
struct iommu_hwpt_page_response {
__u32 cookie;
__u32 code;
};
/**
* struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
* @size: sizeof(struct iommu_fault_alloc)
* @flags: Must be 0
* @out_fault_id: The ID of the new FAULT
* @out_fault_fd: The fd of the new FAULT
*
* Explicitly allocate a fault handling object.
*/
struct iommu_fault_alloc {
__u32 size;
__u32 flags;
__u32 out_fault_id;
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
#endif #endif
...@@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) ...@@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id = 0;
uint32_t parent_hwpt_id_not_work = 0; uint32_t parent_hwpt_id_not_work = 0;
uint32_t test_hwpt_id = 0; uint32_t test_hwpt_id = 0;
uint32_t iopf_hwpt_id;
uint32_t fault_id;
uint32_t fault_fd;
if (self->device_id) { if (self->device_id) {
/* Negative tests */ /* Negative tests */
...@@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) ...@@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
sizeof(data)); sizeof(data));
/* Allocate two nested hwpts sharing one common parent hwpt */ /* Allocate two nested hwpts sharing one common parent hwpt */
test_ioctl_fault_alloc(&fault_id, &fault_fd);
test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
&nested_hwpt_id[0], &nested_hwpt_id[0],
IOMMU_HWPT_DATA_SELFTEST, &data, IOMMU_HWPT_DATA_SELFTEST, &data,
...@@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) ...@@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&nested_hwpt_id[1], &nested_hwpt_id[1],
IOMMU_HWPT_DATA_SELFTEST, &data, IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data)); sizeof(data));
test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id,
UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID,
&iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST,
&data, sizeof(data));
test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0],
IOMMU_TEST_IOTLB_DEFAULT); IOMMU_TEST_IOTLB_DEFAULT);
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1],
...@@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) ...@@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
_test_ioctl_destroy(self->fd, nested_hwpt_id[1])); _test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
test_ioctl_destroy(nested_hwpt_id[0]); test_ioctl_destroy(nested_hwpt_id[0]);
/* Switch from nested_hwpt_id[1] to iopf_hwpt_id */
test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, iopf_hwpt_id));
/* Trigger an IOPF on the device */
test_cmd_trigger_iopf(self->device_id, fault_fd);
/* Detach from nested_hwpt_id[1] and destroy it */ /* Detach from nested_hwpt_id[1] and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
test_ioctl_destroy(nested_hwpt_id[1]); test_ioctl_destroy(nested_hwpt_id[1]);
test_ioctl_destroy(iopf_hwpt_id);
/* Detach from the parent hw_pagetable and destroy it */ /* Detach from the parent hw_pagetable and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(parent_hwpt_id); test_ioctl_destroy(parent_hwpt_id);
test_ioctl_destroy(parent_hwpt_id_not_work); test_ioctl_destroy(parent_hwpt_id_not_work);
close(fault_fd);
test_ioctl_destroy(fault_id);
} else { } else {
test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0, test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
&parent_hwpt_id); &parent_hwpt_id);
...@@ -1722,10 +1744,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) ...@@ -1722,10 +1744,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking)
{ {
unsigned long size;
int mmap_flags; int mmap_flags;
void *vrc; void *vrc;
int rc; int rc;
if (variant->buffer_size < MOCK_PAGE_SIZE) {
SKIP(return,
"Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
variant->buffer_size, MOCK_PAGE_SIZE);
}
self->fd = open("/dev/iommu", O_RDWR); self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd); ASSERT_NE(-1, self->fd);
...@@ -1749,12 +1778,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking) ...@@ -1749,12 +1778,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert(vrc == self->buffer); assert(vrc == self->buffer);
self->page_size = MOCK_PAGE_SIZE; self->page_size = MOCK_PAGE_SIZE;
self->bitmap_size = self->bitmap_size = variant->buffer_size / self->page_size;
variant->buffer_size / self->page_size / BITS_PER_BYTE;
/* Provision with an extra (PAGE_SIZE) for the unaligned case */ /* Provision with an extra (PAGE_SIZE) for the unaligned case */
rc = posix_memalign(&self->bitmap, PAGE_SIZE, size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE);
self->bitmap_size + PAGE_SIZE); rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE);
assert(!rc); assert(!rc);
assert(self->bitmap); assert(self->bitmap);
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
...@@ -1775,51 +1803,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking) ...@@ -1775,51 +1803,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{ {
munmap(self->buffer, variant->buffer_size); munmap(self->buffer, variant->buffer_size);
munmap(self->bitmap, self->bitmap_size); munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
teardown_iommufd(self->fd, _metadata); teardown_iommufd(self->fd, _metadata);
} }
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k)
{
/* half of an u8 index bitmap */
.buffer_size = 8UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k)
{
/* one u8 index bitmap */
.buffer_size = 16UL * 1024UL,
};
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k)
{ {
/* one u32 index bitmap */ /* one u32 index bitmap */
.buffer_size = 128UL * 1024UL, .buffer_size = 64UL * 1024UL,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
{ {
/* one u64 index bitmap */ /* one u64 index bitmap */
.buffer_size = 256UL * 1024UL, .buffer_size = 128UL * 1024UL,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k)
{ {
/* two u64 index and trailing end bitmap */ /* two u64 index and trailing end bitmap */
.buffer_size = 640UL * 1024UL, .buffer_size = 320UL * 1024UL,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M)
{ {
/* 4K bitmap (128M IOVA range) */ /* 4K bitmap (64M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL, .buffer_size = 64UL * 1024UL * 1024UL,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge)
{ {
/* 4K bitmap (128M IOVA range) */ /* 4K bitmap (64M IOVA range) */
.buffer_size = 128UL * 1024UL * 1024UL, .buffer_size = 64UL * 1024UL * 1024UL,
.hugepages = true, .hugepages = true,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
{ {
/* 8K bitmap (256M IOVA range) */ /* 8K bitmap (128M IOVA range) */
.buffer_size = 256UL * 1024UL * 1024UL, .buffer_size = 128UL * 1024UL * 1024UL,
}; };
FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
{ {
/* 8K bitmap (256M IOVA range) */ /* 8K bitmap (128M IOVA range) */
.buffer_size = 256UL * 1024UL * 1024UL, .buffer_size = 128UL * 1024UL * 1024UL,
.hugepages = true, .hugepages = true,
}; };
......
...@@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) ...@@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
return -1; return -1;
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id, if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0)) IOMMU_HWPT_DATA_NONE, 0, 0))
return -1; return -1;
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
static inline void set_bit(unsigned int nr, unsigned long *addr) static inline void set_bit(unsigned int nr, unsigned long *addr)
{ {
unsigned long mask = BIT_MASK(nr); unsigned long mask = BIT_MASK(nr);
...@@ -153,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, ...@@ -153,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \
pt_id, NULL)) pt_id, NULL))
static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id,
__u32 flags, __u32 *hwpt_id, __u32 data_type, __u32 flags, __u32 *hwpt_id, __u32 data_type,
void *data, size_t data_len) void *data, size_t data_len)
{ {
...@@ -165,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, ...@@ -165,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
.data_type = data_type, .data_type = data_type,
.data_len = data_len, .data_len = data_len,
.data_uptr = (uint64_t)data, .data_uptr = (uint64_t)data,
.fault_id = ft_id,
}; };
int ret; int ret;
...@@ -177,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, ...@@ -177,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
} }
#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ #define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
0)) 0))
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ #define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
self->fd, device_id, pt_id, flags, \ self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0)) hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \ #define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \ data_type, data, data_len) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len)) hwpt_id, data_type, data, data_len))
#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \ #define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \ data_type, data, data_len) \
EXPECT_ERRNO(_errno, \ EXPECT_ERRNO(_errno, \
_test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len)) hwpt_id, data_type, data, data_len))
#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \
data_type, data, data_len) \
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
flags, hwpt_id, data_type, data, \
data_len))
#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \
hwpt_id, data_type, data, data_len) \
EXPECT_ERRNO(_errno, \
_test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
flags, hwpt_id, data_type, data, \
data_len))
#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ #define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \
({ \ ({ \
struct iommu_test_cmd test_cmd = { \ struct iommu_test_cmd test_cmd = { \
...@@ -346,12 +361,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, ...@@ -346,12 +361,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
__u64 iova, size_t page_size, __u64 iova, size_t page_size,
size_t pte_page_size, __u64 *bitmap, size_t pte_page_size, __u64 *bitmap,
__u64 bitmap_size, __u32 flags, __u64 nbits, __u32 flags,
struct __test_metadata *_metadata) struct __test_metadata *_metadata)
{ {
unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
unsigned long nbits = bitmap_size * BITS_PER_BYTE;
unsigned long j, i, nr = nbits / pteset ?: 1; unsigned long j, i, nr = nbits / pteset ?: 1;
unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
__u64 out_dirty = 0; __u64 out_dirty = 0;
/* Mark all even bits as dirty in the mock domain */ /* Mark all even bits as dirty in the mock domain */
...@@ -684,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, ...@@ -684,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
#define test_cmd_get_hw_capabilities(device_id, caps, mask) \ #define test_cmd_get_hw_capabilities(device_id, caps, mask) \
ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
{
struct iommu_fault_alloc cmd = {
.size = sizeof(cmd),
};
int ret;
ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd);
if (ret)
return ret;
*fault_id = cmd.out_fault_id;
*fault_fd = cmd.out_fault_fd;
return 0;
}
#define test_ioctl_fault_alloc(fault_id, fault_fd) \
({ \
ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \
fault_fd)); \
ASSERT_NE(0, *(fault_id)); \
ASSERT_NE(0, *(fault_fd)); \
})
static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
{
struct iommu_test_cmd trigger_iopf_cmd = {
.size = sizeof(trigger_iopf_cmd),
.op = IOMMU_TEST_OP_TRIGGER_IOPF,
.trigger_iopf = {
.dev_id = device_id,
.pasid = 0x1,
.grpid = 0x2,
.perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
.addr = 0xdeadbeaf,
},
};
struct iommu_hwpt_page_response response = {
.code = IOMMUFD_PAGE_RESP_SUCCESS,
};
struct iommu_hwpt_pgfault fault = {};
ssize_t bytes;
int ret;
ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd);
if (ret)
return ret;
bytes = read(fault_fd, &fault, sizeof(fault));
if (bytes <= 0)
return -EIO;
response.cookie = fault.cookie;
bytes = write(fault_fd, &response, sizeof(response));
if (bytes <= 0)
return -EIO;
return 0;
}
#define test_cmd_trigger_iopf(device_id, fault_fd) \
ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment