Commit d099fc8f authored by Christian König's avatar Christian König

drm/ttm: new TT backend allocation pool v3

This replaces the spaghetti code in the two existing page pools.

First of all depending on the allocation size it is between 3 (1GiB) and
5 (1MiB) times faster than the old implementation.

It makes better use of buddy pages to allow for larger physical contiguous
allocations which should result in better TLB utilization at least for
amdgpu.

Instead of a completely braindead approach of filling the pool with one
CPU while another one is trying to shrink it we only give back freed
pages.

This also results in much less locking contention and a trylock free MM
shrinker callback, so we can guarantee that pages are given back to the
system when needed.

Downside of this is that it takes longer for many small allocations until
the pool is filled up. We could address this, but I couldn't find an use
case where this actually matters. We also don't bother freeing large
chunks of pages any more since the CPU overhead in that path isn't really
that important.

The sysfs files are replaced with a single module parameter, allowing
users to override how many pages should be globally pooled in TTM. This
unfortunately breaks the UAPI slightly, but as far as we know nobody ever
depended on this.

Zeroing memory coming from the pool was handled inconsistently. The
alloc_pages() based pool was zeroing it, the dma_alloc_attr() based one
wasn't. For now the new implementation isn't zeroing pages from the pool
either and only sets the __GFP_ZERO flag when necessary.

The implementation has only 768 lines of code compared to the over 2600
of the old one, and also allows for saving quite a bunch of code in the
drivers since we don't need specialized handling there any more based on
kernel config.

Additional to all of that there was a neat bug with IOMMU, coherent DMA
mappings and huge pages which is now fixed in the new code as well.

v2: make ttm_pool_apply_caching static as reported by the kernel bot, add
    some more checks
v3: fix some more checkpatch.pl warnings
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarDave Airlie <airlied@redhat.com>
Reviewed-by: default avatarMadhav Chauhan <madhav.chauhan@amd.com>
Tested-by: default avatarHuang Rui <ray.huang@amd.com>
Link: https://patchwork.freedesktop.org/patch/397080/?series=83051&rev=1
parent 5144eead
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \ ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_execbuf_util.o ttm_page_alloc.o ttm_range_manager.o \ ttm_execbuf_util.o ttm_page_alloc.o ttm_range_manager.o \
ttm_resource.o ttm_resource.o ttm_pool.o
ttm-$(CONFIG_AGP) += ttm_agp_backend.o ttm-$(CONFIG_AGP) += ttm_agp_backend.o
ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <drm/ttm/ttm_pool.h>
#define TTM_MEMORY_ALLOC_RETRIES 4 #define TTM_MEMORY_ALLOC_RETRIES 4
...@@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob) ...@@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
} }
ttm_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE)); ttm_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
ttm_dma_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE)); ttm_dma_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
return 0; return 0;
out_no_zone: out_no_zone:
ttm_mem_global_release(glob); ttm_mem_global_release(glob);
...@@ -467,6 +469,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) ...@@ -467,6 +469,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
/* let the page allocator first stop the shrink work. */ /* let the page allocator first stop the shrink work. */
ttm_page_alloc_fini(); ttm_page_alloc_fini();
ttm_dma_page_alloc_fini(); ttm_dma_page_alloc_fini();
ttm_pool_mgr_fini();
flush_workqueue(glob->swap_queue); flush_workqueue(glob->swap_queue);
destroy_workqueue(glob->swap_queue); destroy_workqueue(glob->swap_queue);
......
This diff is collapsed.
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#ifndef _TTM_CACHING_H_ #ifndef _TTM_CACHING_H_
#define _TTM_CACHING_H_ #define _TTM_CACHING_H_
#define TTM_NUM_CACHING_TYPES 3
enum ttm_caching { enum ttm_caching {
ttm_uncached, ttm_uncached,
ttm_write_combined, ttm_write_combined,
......
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Christian König
*/
#ifndef _TTM_PAGE_POOL_H_
#define _TTM_PAGE_POOL_H_
#include <linux/mmzone.h>
#include <linux/llist.h>
#include <linux/spinlock.h>
#include <drm/ttm/ttm_caching.h>
struct device;
struct ttm_tt;
struct ttm_pool;
struct ttm_operation_ctx;
/**
* ttm_pool_type - Pool for a certain memory type
*
* @pool: the pool we belong to, might be NULL for the global ones
* @order: the allocation order our pages have
* @caching: the caching type our pages have
* @shrinker_list: our place on the global shrinker list
* @lock: protection of the page list
* @pages: the list of pages in the pool
*/
struct ttm_pool_type {
struct ttm_pool *pool;
unsigned int order;
enum ttm_caching caching;
struct list_head shrinker_list;
spinlock_t lock;
struct list_head pages;
};
/**
* ttm_pool - Pool for all caching and orders
*
* @use_dma_alloc: if coherent DMA allocations should be used
* @use_dma32: if GFP_DMA32 should be used
* @caching: pools for each caching/order
*/
struct ttm_pool {
struct device *dev;
bool use_dma_alloc;
bool use_dma32;
struct {
struct ttm_pool_type orders[MAX_ORDER];
} caching[TTM_NUM_CACHING_TYPES];
};
int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
struct ttm_operation_ctx *ctx);
void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
bool use_dma_alloc, bool use_dma32);
void ttm_pool_fini(struct ttm_pool *pool);
int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
int ttm_pool_mgr_init(unsigned long num_pages);
void ttm_pool_mgr_fini(void);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment