Commit 2d270df8 authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Michael Ellerman

vfio: powerpc/spapr: Move locked_vm accounting to helpers

There moves locked pages accounting to helpers.
Later they will be reused for Dynamic DMA windows (DDW).

This reworks debug messages to show the current value and the limit.

This stores the locked pages number in the container so when unlocking
the iommu table pointer won't be needed. This does not have an effect
now but it will with the multiple tables per container as then we will
allow attaching/detaching groups on fly and we may end up having
a container with no group attached but with the counter incremented.

While we are here, update the comment explaining why RLIMIT_MEMLOCK
might be required to be bigger than the guest RAM. This also prints
pid of the current process in pr_warn/pr_debug.
Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Reviewed-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 00663d4e
...@@ -29,6 +29,51 @@ ...@@ -29,6 +29,51 @@
static void tce_iommu_detach_group(void *iommu_data, static void tce_iommu_detach_group(void *iommu_data,
struct iommu_group *iommu_group); struct iommu_group *iommu_group);
static long try_increment_locked_vm(long npages)
{
long ret = 0, locked, lock_limit;
if (!current || !current->mm)
return -ESRCH; /* process exited */
if (!npages)
return 0;
down_write(&current->mm->mmap_sem);
locked = current->mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += npages;
pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
npages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
static void decrement_locked_vm(long npages)
{
if (!current || !current->mm || !npages)
return; /* process exited */
down_write(&current->mm->mmap_sem);
if (WARN_ON_ONCE(npages > current->mm->locked_vm))
npages = current->mm->locked_vm;
current->mm->locked_vm -= npages;
pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
npages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK));
up_write(&current->mm->mmap_sem);
}
/* /*
* VFIO IOMMU fd for SPAPR_TCE IOMMU implementation * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
* *
...@@ -45,6 +90,7 @@ struct tce_container { ...@@ -45,6 +90,7 @@ struct tce_container {
struct mutex lock; struct mutex lock;
struct iommu_table *tbl; struct iommu_table *tbl;
bool enabled; bool enabled;
unsigned long locked_pages;
}; };
static bool tce_page_is_contained(struct page *page, unsigned page_shift) static bool tce_page_is_contained(struct page *page, unsigned page_shift)
...@@ -60,7 +106,7 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift) ...@@ -60,7 +106,7 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
static int tce_iommu_enable(struct tce_container *container) static int tce_iommu_enable(struct tce_container *container)
{ {
int ret = 0; int ret = 0;
unsigned long locked, lock_limit, npages; unsigned long locked;
struct iommu_table *tbl = container->tbl; struct iommu_table *tbl = container->tbl;
if (!container->tbl) if (!container->tbl)
...@@ -89,21 +135,22 @@ static int tce_iommu_enable(struct tce_container *container) ...@@ -89,21 +135,22 @@ static int tce_iommu_enable(struct tce_container *container)
* Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
* that would effectively kill the guest at random points, much better * that would effectively kill the guest at random points, much better
* enforcing the limit based on the max that the guest can map. * enforcing the limit based on the max that the guest can map.
*
* Unfortunately at the moment it counts whole tables, no matter how
* much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
* each with 2GB DMA window, 8GB will be counted here. The reason for
* this is that we cannot tell here the amount of RAM used by the guest
* as this information is only available from KVM and VFIO is
* KVM agnostic.
*/ */
down_write(&current->mm->mmap_sem); locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
npages = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT; ret = try_increment_locked_vm(locked);
locked = current->mm->locked_vm + npages; if (ret)
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; return ret;
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", container->locked_pages = locked;
rlimit(RLIMIT_MEMLOCK));
ret = -ENOMEM;
} else {
current->mm->locked_vm += npages;
container->enabled = true; container->enabled = true;
}
up_write(&current->mm->mmap_sem);
return ret; return ret;
} }
...@@ -115,13 +162,10 @@ static void tce_iommu_disable(struct tce_container *container) ...@@ -115,13 +162,10 @@ static void tce_iommu_disable(struct tce_container *container)
container->enabled = false; container->enabled = false;
if (!container->tbl || !current->mm) if (!current->mm)
return; return;
down_write(&current->mm->mmap_sem); decrement_locked_vm(container->locked_pages);
current->mm->locked_vm -= (container->tbl->it_size <<
container->tbl->it_page_shift) >> PAGE_SHIFT;
up_write(&current->mm->mmap_sem);
} }
static void *tce_iommu_open(unsigned long arg) static void *tce_iommu_open(unsigned long arg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment