Commit bd529dbb authored by Nicolin Chen's avatar Nicolin Chen Committed by Jason Gunthorpe

iommufd: Add a nested HW pagetable object

IOMMU_HWPT_ALLOC already supports iommu_domain allocation for usersapce.
But it can only allocate a hw_pagetable that associates to a given IOAS,
i.e. only a kernel-managed hw_pagetable of IOMMUFD_OBJ_HWPT_PAGING type.

IOMMU drivers can now support user-managed hw_pagetables, for two-stage
translation use cases that require user data input from the user space.

Add a new IOMMUFD_OBJ_HWPT_NESTED type with its abort/destroy(). Pair it
with a new iommufd_hwpt_nested structure and its to_hwpt_nested() helper.
Update the to_hwpt_paging() helper, so a NESTED-type hw_pagetable can be
handled in the callers, for example iommufd_hw_pagetable_enforce_rr().

Screen the inputs including the parent PAGING-type hw_pagetable that has
a need of a new nest_parent flag in the iommufd_hwpt_paging structure.

Extend the IOMMU_HWPT_ALLOC ioctl to accept an IOMMU driver specific data
input which is tagged by the enum iommu_hwpt_data_type. Also, update the
@pt_id to accept hwpt_id too besides an ioas_id. Then, use them to allocate
a hw_pagetable of IOMMUFD_OBJ_HWPT_NESTED type using the
iommufd_hw_pagetable_alloc_nested() allocator.

Link: https://lore.kernel.org/r/20231026043938.63898-8-yi.l.liu@intel.comSigned-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Co-developed-by: default avatarYi Liu <yi.l.liu@intel.com>
Signed-off-by: default avatarYi Liu <yi.l.liu@intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 2bdabb8e
...@@ -588,7 +588,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, ...@@ -588,7 +588,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
} }
hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
immediate_attach); immediate_attach, NULL);
if (IS_ERR(hwpt_paging)) { if (IS_ERR(hwpt_paging)) {
destroy_hwpt = ERR_CAST(hwpt_paging); destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock; goto out_unlock;
...@@ -628,6 +628,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, ...@@ -628,6 +628,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
return PTR_ERR(pt_obj); return PTR_ERR(pt_obj);
switch (pt_obj->type) { switch (pt_obj->type) {
case IOMMUFD_OBJ_HWPT_NESTED:
case IOMMUFD_OBJ_HWPT_PAGING: { case IOMMUFD_OBJ_HWPT_PAGING: {
struct iommufd_hw_pagetable *hwpt = struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj); container_of(pt_obj, struct iommufd_hw_pagetable, obj);
......
...@@ -44,6 +44,22 @@ void iommufd_hwpt_paging_abort(struct iommufd_object *obj) ...@@ -44,6 +44,22 @@ void iommufd_hwpt_paging_abort(struct iommufd_object *obj)
iommufd_hwpt_paging_destroy(obj); iommufd_hwpt_paging_destroy(obj);
} }
void iommufd_hwpt_nested_destroy(struct iommufd_object *obj)
{
struct iommufd_hwpt_nested *hwpt_nested =
container_of(obj, struct iommufd_hwpt_nested, common.obj);
if (hwpt_nested->common.domain)
iommu_domain_free(hwpt_nested->common.domain);
refcount_dec(&hwpt_nested->parent->common.obj.users);
}
void iommufd_hwpt_nested_abort(struct iommufd_object *obj)
{
iommufd_hwpt_nested_destroy(obj);
}
static int static int
iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
{ {
...@@ -68,6 +84,8 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) ...@@ -68,6 +84,8 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
* @idev: Device to get an iommu_domain for * @idev: Device to get an iommu_domain for
* @flags: Flags from userspace * @flags: Flags from userspace
* @immediate_attach: True if idev should be attached to the hwpt * @immediate_attach: True if idev should be attached to the hwpt
* @user_data: The user provided driver specific data describing the domain to
* create
* *
* Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
* will be linked to the given ioas and upon return the underlying iommu_domain * will be linked to the given ioas and upon return the underlying iommu_domain
...@@ -80,7 +98,8 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) ...@@ -80,7 +98,8 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
struct iommufd_hwpt_paging * struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
struct iommufd_device *idev, u32 flags, struct iommufd_device *idev, u32 flags,
bool immediate_attach) bool immediate_attach,
const struct iommu_user_data *user_data)
{ {
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_DIRTY_TRACKING; IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
...@@ -91,7 +110,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, ...@@ -91,7 +110,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
lockdep_assert_held(&ioas->mutex); lockdep_assert_held(&ioas->mutex);
if (flags && !ops->domain_alloc_user) if ((flags || user_data) && !ops->domain_alloc_user)
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
if (flags & ~valid_flags) if (flags & ~valid_flags)
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
...@@ -106,10 +125,11 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, ...@@ -106,10 +125,11 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
/* Pairs with iommufd_hw_pagetable_destroy() */ /* Pairs with iommufd_hw_pagetable_destroy() */
refcount_inc(&ioas->obj.users); refcount_inc(&ioas->obj.users);
hwpt_paging->ioas = ioas; hwpt_paging->ioas = ioas;
hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
if (ops->domain_alloc_user) { if (ops->domain_alloc_user) {
hwpt->domain = hwpt->domain = ops->domain_alloc_user(idev->dev, flags, NULL,
ops->domain_alloc_user(idev->dev, flags, NULL, NULL); user_data);
if (IS_ERR(hwpt->domain)) { if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain); rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL; hwpt->domain = NULL;
...@@ -169,9 +189,70 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, ...@@ -169,9 +189,70 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
return ERR_PTR(rc); return ERR_PTR(rc);
} }
/**
* iommufd_hwpt_nested_alloc() - Get a NESTED iommu_domain for a device
* @ictx: iommufd context
* @parent: Parent PAGING-type hwpt to associate the domain with
* @idev: Device to get an iommu_domain for
* @flags: Flags from userspace
* @user_data: user_data pointer. Must be valid
*
* Allocate a new iommu_domain (must be IOMMU_DOMAIN_NESTED) and return it as
* a NESTED hw_pagetable. The given parent PAGING-type hwpt must be capable of
* being a parent.
*/
static struct iommufd_hwpt_nested *
iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hwpt_paging *parent,
struct iommufd_device *idev, u32 flags,
const struct iommu_user_data *user_data)
{
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_nested *hwpt_nested;
struct iommufd_hw_pagetable *hwpt;
int rc;
if (flags || !user_data->len || !ops->domain_alloc_user)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent)
return ERR_PTR(-EINVAL);
hwpt_nested = __iommufd_object_alloc(
ictx, hwpt_nested, IOMMUFD_OBJ_HWPT_NESTED, common.obj);
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
parent->common.domain, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL;
goto out_abort;
}
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
goto out_abort;
}
return hwpt_nested;
out_abort:
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
return ERR_PTR(rc);
}
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
{ {
struct iommu_hwpt_alloc *cmd = ucmd->cmd; struct iommu_hwpt_alloc *cmd = ucmd->cmd;
const struct iommu_user_data user_data = {
.type = cmd->data_type,
.uptr = u64_to_user_ptr(cmd->data_uptr),
.len = cmd->data_len,
};
struct iommufd_hw_pagetable *hwpt; struct iommufd_hw_pagetable *hwpt;
struct iommufd_ioas *ioas = NULL; struct iommufd_ioas *ioas = NULL;
struct iommufd_object *pt_obj; struct iommufd_object *pt_obj;
...@@ -180,6 +261,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ...@@ -180,6 +261,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
if (cmd->__reserved) if (cmd->__reserved)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len)
return -EINVAL;
idev = iommufd_get_device(ucmd, cmd->dev_id); idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev)) if (IS_ERR(idev))
...@@ -196,13 +279,27 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ...@@ -196,13 +279,27 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
ioas = container_of(pt_obj, struct iommufd_ioas, obj); ioas = container_of(pt_obj, struct iommufd_ioas, obj);
mutex_lock(&ioas->mutex); mutex_lock(&ioas->mutex);
hwpt_paging = iommufd_hwpt_paging_alloc(ucmd->ictx, ioas, idev, hwpt_paging = iommufd_hwpt_paging_alloc(
cmd->flags, false); ucmd->ictx, ioas, idev, cmd->flags, false,
user_data.len ? &user_data : NULL);
if (IS_ERR(hwpt_paging)) { if (IS_ERR(hwpt_paging)) {
rc = PTR_ERR(hwpt_paging); rc = PTR_ERR(hwpt_paging);
goto out_unlock; goto out_unlock;
} }
hwpt = &hwpt_paging->common; hwpt = &hwpt_paging->common;
} else if (pt_obj->type == IOMMUFD_OBJ_HWPT_PAGING) {
struct iommufd_hwpt_nested *hwpt_nested;
hwpt_nested = iommufd_hwpt_nested_alloc(
ucmd->ictx,
container_of(pt_obj, struct iommufd_hwpt_paging,
common.obj),
idev, cmd->flags, &user_data);
if (IS_ERR(hwpt_nested)) {
rc = PTR_ERR(hwpt_nested);
goto out_unlock;
}
hwpt = &hwpt_nested->common;
} else { } else {
rc = -EINVAL; rc = -EINVAL;
goto out_put_pt; goto out_put_pt;
......
...@@ -124,6 +124,7 @@ enum iommufd_object_type { ...@@ -124,6 +124,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_DEVICE,
IOMMUFD_OBJ_HWPT_PAGING, IOMMUFD_OBJ_HWPT_PAGING,
IOMMUFD_OBJ_HWPT_NESTED,
IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_ACCESS,
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
...@@ -255,10 +256,16 @@ struct iommufd_hwpt_paging { ...@@ -255,10 +256,16 @@ struct iommufd_hwpt_paging {
bool auto_domain : 1; bool auto_domain : 1;
bool enforce_cache_coherency : 1; bool enforce_cache_coherency : 1;
bool msi_cookie : 1; bool msi_cookie : 1;
bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */ /* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item; struct list_head hwpt_item;
}; };
struct iommufd_hwpt_nested {
struct iommufd_hw_pagetable common;
struct iommufd_hwpt_paging *parent;
};
static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
{ {
return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
...@@ -283,24 +290,31 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); ...@@ -283,24 +290,31 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
struct iommufd_hwpt_paging * struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
struct iommufd_device *idev, u32 flags, struct iommufd_device *idev, u32 flags,
bool immediate_attach); bool immediate_attach,
const struct iommu_user_data *user_data);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev); struct iommufd_device *idev);
struct iommufd_hw_pagetable * struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device *idev); iommufd_hw_pagetable_detach(struct iommufd_device *idev);
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt) struct iommufd_hw_pagetable *hwpt)
{ {
if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
lockdep_assert_not_held(&hwpt_paging->ioas->mutex); lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
if (hwpt_paging->auto_domain)
if (hwpt_paging->auto_domain) {
iommufd_object_deref_user(ictx, &hwpt->obj); iommufd_object_deref_user(ictx, &hwpt->obj);
else return;
}
}
refcount_dec(&hwpt->obj.users); refcount_dec(&hwpt->obj.users);
} }
......
...@@ -492,6 +492,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { ...@@ -492,6 +492,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_hwpt_paging_destroy, .destroy = iommufd_hwpt_paging_destroy,
.abort = iommufd_hwpt_paging_abort, .abort = iommufd_hwpt_paging_abort,
}, },
[IOMMUFD_OBJ_HWPT_NESTED] = {
.destroy = iommufd_hwpt_nested_destroy,
.abort = iommufd_hwpt_nested_abort,
},
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = { [IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy, .destroy = iommufd_selftest_destroy,
......
...@@ -361,20 +361,44 @@ enum iommufd_hwpt_alloc_flags { ...@@ -361,20 +361,44 @@ enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
}; };
/**
* enum iommu_hwpt_data_type - IOMMU HWPT Data Type
* @IOMMU_HWPT_DATA_NONE: no data
*/
enum iommu_hwpt_data_type {
IOMMU_HWPT_DATA_NONE,
};
/** /**
* struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
* @size: sizeof(struct iommu_hwpt_alloc) * @size: sizeof(struct iommu_hwpt_alloc)
* @flags: Combination of enum iommufd_hwpt_alloc_flags * @flags: Combination of enum iommufd_hwpt_alloc_flags
* @dev_id: The device to allocate this HWPT for * @dev_id: The device to allocate this HWPT for
* @pt_id: The IOAS to connect this HWPT to * @pt_id: The IOAS or HWPT to connect this HWPT to
* @out_hwpt_id: The ID of the new HWPT * @out_hwpt_id: The ID of the new HWPT
* @__reserved: Must be 0 * @__reserved: Must be 0
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
* *
* Explicitly allocate a hardware page table object. This is the same object * Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the * type that is returned by iommufd_device_attach() and represents the
* underlying iommu driver's iommu_domain kernel object. * underlying iommu driver's iommu_domain kernel object.
* *
* A HWPT will be created with the IOVA mappings from the given IOAS. * A kernel-managed HWPT will be created with the mappings from the given
* IOAS via the @pt_id. The @data_type for this allocation must be set to
* IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
* nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
*
* A user-managed nested HWPT will be created from a given parent HWPT via
* @pt_id, in which the parent HWPT must be allocated previously via the
* same ioctl from a given IOAS (@pt_id). In this case, the @data_type
* must be set to a pre-defined type corresponding to an I/O page table
* type supported by the underlying IOMMU hardware.
*
* If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
* @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
* must be given.
*/ */
struct iommu_hwpt_alloc { struct iommu_hwpt_alloc {
__u32 size; __u32 size;
...@@ -383,6 +407,9 @@ struct iommu_hwpt_alloc { ...@@ -383,6 +407,9 @@ struct iommu_hwpt_alloc {
__u32 pt_id; __u32 pt_id;
__u32 out_hwpt_id; __u32 out_hwpt_id;
__u32 __reserved; __u32 __reserved;
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
}; };
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment