Commit d334a563 authored by Tom Lendacky's avatar Tom Lendacky Committed by Joerg Roedel

iommu/amd: Reduce amount of MMIO when submitting commands

As newer, higher speed devices are developed, perf data shows that the
amount of MMIO that is performed when submitting commands to the IOMMU
causes performance issues. Currently, the command submission path reads
the command buffer head and tail pointers and then writes the tail
pointer once the command is ready.

The tail pointer is only ever updated by the driver so it can be tracked
by the driver without having to read it from the hardware.

The head pointer is updated by the hardware, but can be read
opportunistically. Reading the head pointer only when it appears that
there might not be room in the command buffer and then re-checking the
available space reduces the number of times the head pointer has to be
read.
Signed-off-by: default avatarTom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent e2f9d45f
...@@ -874,19 +874,20 @@ static int wait_on_sem(volatile u64 *sem) ...@@ -874,19 +874,20 @@ static int wait_on_sem(volatile u64 *sem)
} }
static void copy_cmd_to_buffer(struct amd_iommu *iommu, static void copy_cmd_to_buffer(struct amd_iommu *iommu,
struct iommu_cmd *cmd, struct iommu_cmd *cmd)
u32 tail)
{ {
u8 *target; u8 *target;
target = iommu->cmd_buf + tail; target = iommu->cmd_buf + iommu->cmd_buf_tail;
tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
iommu->cmd_buf_tail += sizeof(*cmd);
iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
/* Copy command to buffer */ /* Copy command to buffer */
memcpy(target, cmd, sizeof(*cmd)); memcpy(target, cmd, sizeof(*cmd));
/* Tell the IOMMU about it */ /* Tell the IOMMU about it */
writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
} }
static void build_completion_wait(struct iommu_cmd *cmd, u64 address) static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
...@@ -1044,23 +1045,31 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, ...@@ -1044,23 +1045,31 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
struct iommu_cmd *cmd, struct iommu_cmd *cmd,
bool sync) bool sync)
{ {
u32 left, tail, head, next_tail; bool read_head = true;
u32 left, next_tail;
next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
again: again:
left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
left = (head - next_tail) % CMD_BUFFER_SIZE;
if (left <= 0x20) { if (left <= 0x20) {
struct iommu_cmd sync_cmd; struct iommu_cmd sync_cmd;
int ret; int ret;
if (read_head) {
/* Update head and recheck remaining space */
iommu->cmd_buf_head = readl(iommu->mmio_base +
MMIO_CMD_HEAD_OFFSET);
read_head = false;
goto again;
}
read_head = true;
iommu->cmd_sem = 0; iommu->cmd_sem = 0;
build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
copy_cmd_to_buffer(iommu, &sync_cmd, tail); copy_cmd_to_buffer(iommu, &sync_cmd);
if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
return ret; return ret;
...@@ -1068,7 +1077,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, ...@@ -1068,7 +1077,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
goto again; goto again;
} }
copy_cmd_to_buffer(iommu, cmd, tail); copy_cmd_to_buffer(iommu, cmd);
/* We need to sync now to make sure all commands are processed */ /* We need to sync now to make sure all commands are processed */
iommu->need_sync = sync; iommu->need_sync = sync;
......
...@@ -588,6 +588,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) ...@@ -588,6 +588,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
iommu->cmd_buf_head = 0;
iommu->cmd_buf_tail = 0;
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
} }
......
...@@ -516,6 +516,8 @@ struct amd_iommu { ...@@ -516,6 +516,8 @@ struct amd_iommu {
/* command buffer virtual address */ /* command buffer virtual address */
u8 *cmd_buf; u8 *cmd_buf;
u32 cmd_buf_head;
u32 cmd_buf_tail;
/* event buffer virtual address */ /* event buffer virtual address */
u8 *evt_buf; u8 *evt_buf;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment