Commit 48f06ca4 authored by Alex Williamson's avatar Alex Williamson

Merge branch 'v5.16/vfio/colin_xu_igd_opregion_2.0_v8' into v5.16/vfio/next

parents 052493d5 49ba1a29
...@@ -25,20 +25,121 @@ ...@@ -25,20 +25,121 @@
#define OPREGION_RVDS 0x3c2 #define OPREGION_RVDS 0x3c2
#define OPREGION_VERSION 0x16 #define OPREGION_VERSION 0x16
struct igd_opregion_vbt {
void *opregion;
void *vbt_ex;
};
/**
* igd_opregion_shift_copy() - Copy OpRegion to user buffer and shift position.
* @dst: User buffer ptr to copy to.
* @off: Offset to user buffer ptr. Increased by bytes on return.
* @src: Source buffer to copy from.
* @pos: Increased by bytes on return.
* @remaining: Decreased by bytes on return.
* @bytes: Bytes to copy and adjust off, pos and remaining.
*
* Copy OpRegion to offset from specific source ptr and shift the offset.
*
* Return: 0 on success, -EFAULT otherwise.
*
*/
static inline unsigned long igd_opregion_shift_copy(char __user *dst,
loff_t *off,
void *src,
loff_t *pos,
size_t *remaining,
size_t bytes)
{
if (copy_to_user(dst + (*off), src, bytes))
return -EFAULT;
*off += bytes;
*pos += bytes;
*remaining -= bytes;
return 0;
}
static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
char __user *buf, size_t count, loff_t *ppos, char __user *buf, size_t count, loff_t *ppos,
bool iswrite) bool iswrite)
{ {
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
void *base = vdev->region[i].data; struct igd_opregion_vbt *opregionvbt = vdev->region[i].data;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK, off = 0;
size_t remaining;
if (pos >= vdev->region[i].size || iswrite) if (pos >= vdev->region[i].size || iswrite)
return -EINVAL; return -EINVAL;
count = min(count, (size_t)(vdev->region[i].size - pos)); count = min_t(size_t, count, vdev->region[i].size - pos);
remaining = count;
/* Copy until OpRegion version */
if (remaining && pos < OPREGION_VERSION) {
size_t bytes = min_t(size_t, remaining, OPREGION_VERSION - pos);
if (igd_opregion_shift_copy(buf, &off,
opregionvbt->opregion + pos, &pos,
&remaining, bytes))
return -EFAULT;
}
/* Copy patched (if necessary) OpRegion version */
if (remaining && pos < OPREGION_VERSION + sizeof(__le16)) {
size_t bytes = min_t(size_t, remaining,
OPREGION_VERSION + sizeof(__le16) - pos);
__le16 version = *(__le16 *)(opregionvbt->opregion +
OPREGION_VERSION);
/* Patch to 2.1 if OpRegion 2.0 has extended VBT */
if (le16_to_cpu(version) == 0x0200 && opregionvbt->vbt_ex)
version = cpu_to_le16(0x0201);
if (igd_opregion_shift_copy(buf, &off,
&version + (pos - OPREGION_VERSION),
&pos, &remaining, bytes))
return -EFAULT;
}
/* Copy until RVDA */
if (remaining && pos < OPREGION_RVDA) {
size_t bytes = min_t(size_t, remaining, OPREGION_RVDA - pos);
if (copy_to_user(buf, base + pos, count)) if (igd_opregion_shift_copy(buf, &off,
opregionvbt->opregion + pos, &pos,
&remaining, bytes))
return -EFAULT;
}
/* Copy modified (if necessary) RVDA */
if (remaining && pos < OPREGION_RVDA + sizeof(__le64)) {
size_t bytes = min_t(size_t, remaining,
OPREGION_RVDA + sizeof(__le64) - pos);
__le64 rvda = cpu_to_le64(opregionvbt->vbt_ex ?
OPREGION_SIZE : 0);
if (igd_opregion_shift_copy(buf, &off,
&rvda + (pos - OPREGION_RVDA),
&pos, &remaining, bytes))
return -EFAULT;
}
/* Copy the rest of OpRegion */
if (remaining && pos < OPREGION_SIZE) {
size_t bytes = min_t(size_t, remaining, OPREGION_SIZE - pos);
if (igd_opregion_shift_copy(buf, &off,
opregionvbt->opregion + pos, &pos,
&remaining, bytes))
return -EFAULT;
}
/* Copy extended VBT if exists */
if (remaining &&
copy_to_user(buf + off, opregionvbt->vbt_ex + (pos - OPREGION_SIZE),
remaining))
return -EFAULT; return -EFAULT;
*ppos += count; *ppos += count;
...@@ -49,7 +150,13 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, ...@@ -49,7 +150,13 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev, static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev,
struct vfio_pci_region *region) struct vfio_pci_region *region)
{ {
memunmap(region->data); struct igd_opregion_vbt *opregionvbt = region->data;
if (opregionvbt->vbt_ex)
memunmap(opregionvbt->vbt_ex);
memunmap(opregionvbt->opregion);
kfree(opregionvbt);
} }
static const struct vfio_pci_regops vfio_pci_igd_regops = { static const struct vfio_pci_regops vfio_pci_igd_regops = {
...@@ -61,7 +168,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) ...@@ -61,7 +168,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
{ {
__le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
u32 addr, size; u32 addr, size;
void *base; struct igd_opregion_vbt *opregionvbt;
int ret; int ret;
u16 version; u16 version;
...@@ -72,84 +179,93 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) ...@@ -72,84 +179,93 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
if (!addr || !(~addr)) if (!addr || !(~addr))
return -ENODEV; return -ENODEV;
base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB); opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL);
if (!base) if (!opregionvbt)
return -ENOMEM;
opregionvbt->opregion = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
if (!opregionvbt->opregion) {
kfree(opregionvbt);
return -ENOMEM; return -ENOMEM;
}
if (memcmp(base, OPREGION_SIGNATURE, 16)) { if (memcmp(opregionvbt->opregion, OPREGION_SIGNATURE, 16)) {
memunmap(base); memunmap(opregionvbt->opregion);
kfree(opregionvbt);
return -EINVAL; return -EINVAL;
} }
size = le32_to_cpu(*(__le32 *)(base + 16)); size = le32_to_cpu(*(__le32 *)(opregionvbt->opregion + 16));
if (!size) { if (!size) {
memunmap(base); memunmap(opregionvbt->opregion);
kfree(opregionvbt);
return -EINVAL; return -EINVAL;
} }
size *= 1024; /* In KB */ size *= 1024; /* In KB */
/* /*
* Support opregion v2.1+ * OpRegion and VBT:
* When VBT data exceeds 6KB size and cannot be within mailbox #4, then * When VBT data doesn't exceed 6KB, it's stored in Mailbox #4.
* the Extended VBT region next to opregion is used to hold the VBT data. * When VBT data exceeds 6KB size, Mailbox #4 is no longer large enough
* RVDA (Relative Address of VBT Data from Opregion Base) and RVDS * to hold the VBT data, the Extended VBT region is introduced since
* (Raw VBT Data Size) from opregion structure member are used to hold the * OpRegion 2.0 to hold the VBT data. Since OpRegion 2.0, RVDA/RVDS are
* address from region base and size of VBT data. RVDA/RVDS are not * introduced to define the extended VBT data location and size.
* defined before opregion 2.0. * OpRegion 2.0: RVDA defines the absolute physical address of the
* * extended VBT data, RVDS defines the VBT data size.
* opregion 2.1+: RVDA is unsigned, relative offset from * OpRegion 2.1 and above: RVDA defines the relative address of the
* opregion base, and should point to the end of opregion. * extended VBT data to OpRegion base, RVDS defines the VBT data size.
* otherwise, exposing to userspace to allow read access to everything between
* the OpRegion and VBT is not safe.
* RVDS is defined as size in bytes.
* *
* opregion 2.0: rvda is the physical VBT address. * Due to the RVDA definition diff in OpRegion VBT (also the only diff
* Since rvda is HPA it cannot be directly used in guest. * between 2.0 and 2.1), exposing OpRegion and VBT as a contiguous range
* And it should not be practically available for end user,so it is not supported. * for OpRegion 2.0 and above makes it possible to support the
* non-contiguous VBT through a single vfio region. From r/w ops view,
* only contiguous VBT after OpRegion with version 2.1+ is exposed,
* regardless the host OpRegion is 2.0 or non-contiguous 2.1+. The r/w
* ops will on-the-fly shift the actural offset into VBT so that data at
* correct position can be returned to the requester.
*/ */
version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION)); version = le16_to_cpu(*(__le16 *)(opregionvbt->opregion +
OPREGION_VERSION));
if (version >= 0x0200) { if (version >= 0x0200) {
u64 rvda; u64 rvda = le64_to_cpu(*(__le64 *)(opregionvbt->opregion +
u32 rvds; OPREGION_RVDA));
u32 rvds = le32_to_cpu(*(__le32 *)(opregionvbt->opregion +
OPREGION_RVDS));
rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA)); /* The extended VBT is valid only when RVDA/RVDS are non-zero */
rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
if (rvda && rvds) { if (rvda && rvds) {
/* no support for opregion v2.0 with physical VBT address */ size += rvds;
if (version == 0x0200) {
memunmap(base);
pci_err(vdev->pdev,
"IGD assignment does not support opregion v2.0 with an extended VBT region\n");
return -EINVAL;
}
if (rvda != size) { /*
memunmap(base); * Extended VBT location by RVDA:
pci_err(vdev->pdev, * Absolute physical addr for 2.0.
"Extended VBT does not follow opregion on version 0x%04x\n", * Relative addr to OpRegion header for 2.1+.
version); */
return -EINVAL; if (version == 0x0200)
addr = rvda;
else
addr += rvda;
opregionvbt->vbt_ex = memremap(addr, rvds, MEMREMAP_WB);
if (!opregionvbt->vbt_ex) {
memunmap(opregionvbt->opregion);
kfree(opregionvbt);
return -ENOMEM;
} }
/* region size for opregion v2.0+: opregion and VBT size. */
size += rvds;
} }
} }
if (size != OPREGION_SIZE) {
memunmap(base);
base = memremap(addr, size, MEMREMAP_WB);
if (!base)
return -ENOMEM;
}
ret = vfio_pci_register_dev_region(vdev, ret = vfio_pci_register_dev_region(vdev,
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &vfio_pci_igd_regops,
&vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base); size, VFIO_REGION_INFO_FLAG_READ, opregionvbt);
if (ret) { if (ret) {
memunmap(base); if (opregionvbt->vbt_ex)
memunmap(opregionvbt->vbt_ex);
memunmap(opregionvbt->opregion);
kfree(opregionvbt);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment