Commit 187d2929 authored by Rob Herring's avatar Rob Herring

drm/panfrost: Add support for GPU heap allocations

The midgard/bifrost GPUs need to allocate GPU heap memory which is
allocated on GPU page faults and not pinned in memory. The vendor driver
calls this functionality GROW_ON_GPF.

This implementation assumes that BOs allocated with the
PANFROST_BO_NOEXEC flag are never mmapped or exported. Both of those may
actually work, but I'm unsure if there's some interaction there. It
would cause the whole object to be pinned in memory which would defeat
the point of this.

On faults, we map in 2MB at a time in order to utilize huge pages (if
enabled). Currently, once we've mapped pages in, they are only unmapped
if the BO is freed. Once we add shrinker support, we can unmap pages
with the shrinker.

Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Acked-by: default avatarAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Signed-off-by: default avatarRob Herring <robh@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20190808222200.13176-9-robh@kernel.org
parent b31bdd13
...@@ -10,8 +10,6 @@ ...@@ -10,8 +10,6 @@
The hard part is handling when more address spaces are needed than what The hard part is handling when more address spaces are needed than what
the h/w provides. the h/w provides.
- Support pinning pages on demand (GPU page faults).
- Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu)
- Compute job support. So called 'compute only' jobs need to be plumbed up to - Compute job support. So called 'compute only' jobs need to be plumbed up to
......
...@@ -82,7 +82,12 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, ...@@ -82,7 +82,12 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
struct drm_panfrost_create_bo *args = data; struct drm_panfrost_create_bo *args = data;
if (!args->size || args->pad || if (!args->size || args->pad ||
(args->flags & ~PANFROST_BO_NOEXEC)) (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
return -EINVAL;
/* Heaps should never be executable */
if ((args->flags & PANFROST_BO_HEAP) &&
!(args->flags & PANFROST_BO_NOEXEC))
return -EINVAL; return -EINVAL;
bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
...@@ -297,6 +302,10 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, ...@@ -297,6 +302,10 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data,
return -ENOENT; return -ENOENT;
} }
/* Don't allow mmapping of heap objects as pages are not pinned. */
if (to_panfrost_bo(gem_obj)->is_heap)
return -EINVAL;
ret = drm_gem_create_mmap_offset(gem_obj); ret = drm_gem_create_mmap_offset(gem_obj);
if (ret == 0) if (ret == 0)
args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
......
...@@ -19,6 +19,20 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj) ...@@ -19,6 +19,20 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
struct panfrost_gem_object *bo = to_panfrost_bo(obj); struct panfrost_gem_object *bo = to_panfrost_bo(obj);
struct panfrost_device *pfdev = obj->dev->dev_private; struct panfrost_device *pfdev = obj->dev->dev_private;
if (bo->sgts) {
int i;
int n_sgt = bo->base.base.size / SZ_2M;
for (i = 0; i < n_sgt; i++) {
if (bo->sgts[i].sgl) {
dma_unmap_sg(pfdev->dev, bo->sgts[i].sgl,
bo->sgts[i].nents, DMA_BIDIRECTIONAL);
sg_free_table(&bo->sgts[i]);
}
}
kfree(bo->sgts);
}
mutex_lock(&pfdev->shrinker_lock); mutex_lock(&pfdev->shrinker_lock);
if (!list_empty(&bo->base.madv_list)) if (!list_empty(&bo->base.madv_list))
list_del(&bo->base.madv_list); list_del(&bo->base.madv_list);
...@@ -53,10 +67,11 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p ...@@ -53,10 +67,11 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p
if (ret) if (ret)
goto out; goto out;
if (!bo->is_heap) {
ret = panfrost_mmu_map(bo); ret = panfrost_mmu_map(bo);
if (ret) if (ret)
drm_mm_remove_node(&bo->node); drm_mm_remove_node(&bo->node);
}
out: out:
spin_unlock(&pfdev->mm_lock); spin_unlock(&pfdev->mm_lock);
return ret; return ret;
...@@ -76,12 +91,20 @@ static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file ...@@ -76,12 +91,20 @@ static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file
spin_unlock(&pfdev->mm_lock); spin_unlock(&pfdev->mm_lock);
} }
static int panfrost_gem_pin(struct drm_gem_object *obj)
{
if (to_panfrost_bo(obj)->is_heap)
return -EINVAL;
return drm_gem_shmem_pin(obj);
}
static const struct drm_gem_object_funcs panfrost_gem_funcs = { static const struct drm_gem_object_funcs panfrost_gem_funcs = {
.free = panfrost_gem_free_object, .free = panfrost_gem_free_object,
.open = panfrost_gem_open, .open = panfrost_gem_open,
.close = panfrost_gem_close, .close = panfrost_gem_close,
.print_info = drm_gem_shmem_print_info, .print_info = drm_gem_shmem_print_info,
.pin = drm_gem_shmem_pin, .pin = panfrost_gem_pin,
.unpin = drm_gem_shmem_unpin, .unpin = drm_gem_shmem_unpin,
.get_sg_table = drm_gem_shmem_get_sg_table, .get_sg_table = drm_gem_shmem_get_sg_table,
.vmap = drm_gem_shmem_vmap, .vmap = drm_gem_shmem_vmap,
...@@ -120,12 +143,17 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, ...@@ -120,12 +143,17 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
struct drm_gem_shmem_object *shmem; struct drm_gem_shmem_object *shmem;
struct panfrost_gem_object *bo; struct panfrost_gem_object *bo;
/* Round up heap allocations to 2MB to keep fault handling simple */
if (flags & PANFROST_BO_HEAP)
size = roundup(size, SZ_2M);
shmem = drm_gem_shmem_create(dev, size); shmem = drm_gem_shmem_create(dev, size);
if (IS_ERR(shmem)) if (IS_ERR(shmem))
return ERR_CAST(shmem); return ERR_CAST(shmem);
bo = to_panfrost_bo(&shmem->base); bo = to_panfrost_bo(&shmem->base);
bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
bo->is_heap = !!(flags & PANFROST_BO_HEAP);
/* /*
* Allocate an id of idr table where the obj is registered * Allocate an id of idr table where the obj is registered
......
...@@ -9,10 +9,12 @@ ...@@ -9,10 +9,12 @@
struct panfrost_gem_object { struct panfrost_gem_object {
struct drm_gem_shmem_object base; struct drm_gem_shmem_object base;
struct sg_table *sgts;
struct drm_mm_node node; struct drm_mm_node node;
bool is_mapped :1; bool is_mapped :1;
bool noexec :1; bool noexec :1;
bool is_heap :1;
}; };
static inline static inline
...@@ -21,6 +23,12 @@ struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj) ...@@ -21,6 +23,12 @@ struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj)
return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base); return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base);
} }
static inline
struct panfrost_gem_object *drm_mm_node_to_panfrost_bo(struct drm_mm_node *node)
{
return container_of(node, struct panfrost_gem_object, node);
}
struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size);
struct drm_gem_object * struct drm_gem_object *
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
#include <linux/bitfield.h> #include <linux/bitfield.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/iopoll.h> #include <linux/iopoll.h>
...@@ -9,6 +10,7 @@ ...@@ -9,6 +10,7 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/shmem_fs.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include "panfrost_device.h" #include "panfrost_device.h"
...@@ -240,12 +242,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) ...@@ -240,12 +242,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
size_t unmapped_page; size_t unmapped_page;
size_t pgsize = get_pgsize(iova, len - unmapped_len); size_t pgsize = get_pgsize(iova, len - unmapped_len);
if (ops->iova_to_phys(ops, iova)) {
unmapped_page = ops->unmap(ops, iova, pgsize); unmapped_page = ops->unmap(ops, iova, pgsize);
if (!unmapped_page) WARN_ON(unmapped_page != pgsize);
break; }
iova += pgsize;
iova += unmapped_page; unmapped_len += pgsize;
unmapped_len += unmapped_page;
} }
mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT,
...@@ -281,6 +283,105 @@ static const struct iommu_gather_ops mmu_tlb_ops = { ...@@ -281,6 +283,105 @@ static const struct iommu_gather_ops mmu_tlb_ops = {
.tlb_sync = mmu_tlb_sync_context, .tlb_sync = mmu_tlb_sync_context,
}; };
static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
{
struct drm_mm_node *node;
u64 offset = addr >> PAGE_SHIFT;
drm_mm_for_each_node(node, &pfdev->mm) {
if (offset >= node->start && offset < (node->start + node->size))
return node;
}
return NULL;
}
#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
{
int ret, i;
struct drm_mm_node *node;
struct panfrost_gem_object *bo;
struct address_space *mapping;
pgoff_t page_offset;
struct sg_table *sgt;
struct page **pages;
node = addr_to_drm_mm_node(pfdev, as, addr);
if (!node)
return -ENOENT;
bo = drm_mm_node_to_panfrost_bo(node);
if (!bo->is_heap) {
dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)",
node->start << PAGE_SHIFT);
return -EINVAL;
}
/* Assume 2MB alignment and size multiple */
addr &= ~((u64)SZ_2M - 1);
page_offset = addr >> PAGE_SHIFT;
page_offset -= node->start;
mutex_lock(&bo->base.pages_lock);
if (!bo->base.pages) {
bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M,
sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
if (!bo->sgts)
return -ENOMEM;
pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
if (!pages) {
kfree(bo->sgts);
bo->sgts = NULL;
return -ENOMEM;
}
bo->base.pages = pages;
bo->base.pages_use_count = 1;
} else
pages = bo->base.pages;
mapping = bo->base.base.filp->f_mapping;
mapping_set_unevictable(mapping);
for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
mutex_unlock(&bo->base.pages_lock);
ret = PTR_ERR(pages[i]);
goto err_pages;
}
}
mutex_unlock(&bo->base.pages_lock);
sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)];
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
goto err_pages;
if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL)) {
ret = -EINVAL;
goto err_map;
}
mmu_map_sg(pfdev, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
bo->is_mapped = true;
dev_dbg(pfdev->dev, "mapped page fault @ %llx", addr);
return 0;
err_map:
sg_free_table(sgt);
err_pages:
drm_gem_shmem_put_pages(&bo->base);
return ret;
}
static const char *access_type_name(struct panfrost_device *pfdev, static const char *access_type_name(struct panfrost_device *pfdev,
u32 fault_status) u32 fault_status)
{ {
...@@ -317,9 +418,7 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) ...@@ -317,9 +418,7 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
{ {
struct panfrost_device *pfdev = data; struct panfrost_device *pfdev = data;
u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT); u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT);
int i; int i, ret;
dev_err(pfdev->dev, "mmu irq status=%x\n", status);
for (i = 0; status; i++) { for (i = 0; status; i++) {
u32 mask = BIT(i) | BIT(i + 16); u32 mask = BIT(i) | BIT(i + 16);
...@@ -341,6 +440,18 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) ...@@ -341,6 +440,18 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
access_type = (fault_status >> 8) & 0x3; access_type = (fault_status >> 8) & 0x3;
source_id = (fault_status >> 16); source_id = (fault_status >> 16);
/* Page fault only */
if ((status & mask) == BIT(i)) {
WARN_ON(exception_type < 0xC1 || exception_type > 0xC4);
ret = panfrost_mmu_map_fault_addr(pfdev, i, addr);
if (!ret) {
mmu_write(pfdev, MMU_INT_CLEAR, BIT(i));
status &= ~mask;
continue;
}
}
/* terminal fault, print info about the fault */ /* terminal fault, print info about the fault */
dev_err(pfdev->dev, dev_err(pfdev->dev,
"Unhandled Page fault in AS%d at VA 0x%016llX\n" "Unhandled Page fault in AS%d at VA 0x%016llX\n"
......
...@@ -85,6 +85,7 @@ struct drm_panfrost_wait_bo { ...@@ -85,6 +85,7 @@ struct drm_panfrost_wait_bo {
}; };
#define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_NOEXEC 1
#define PANFROST_BO_HEAP 2
/** /**
* struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment