Commit 89bb5752 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS v2

Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
instead of a placement limit. That allows us to better handle CPU
accessible placements.

v2: prevent virtual BO start address from overflowing
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarMichel Dänzer <michel.daenzer@amd.com>
Reviewed-by: default avatarNicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f75e237c
...@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, ...@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
if (domain & AMDGPU_GEM_DOMAIN_VRAM) { if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
unsigned lpfn = 0;
/* This forces a reallocation if the flag wasn't set before */
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
places[c].fpfn = 0; places[c].fpfn = 0;
places[c].lpfn = lpfn; places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM; TTM_PL_FLAG_VRAM;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
places[c].lpfn = visible_pfn; places[c].lpfn = visible_pfn;
else else
places[c].flags |= TTM_PL_FLAG_TOPDOWN; places[c].flags |= TTM_PL_FLAG_TOPDOWN;
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
c++; c++;
} }
......
...@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
const struct ttm_place *place, const struct ttm_place *place,
struct ttm_mem_reg *mem) struct ttm_mem_reg *mem)
{ {
struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
struct amdgpu_vram_mgr *mgr = man->priv; struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_mm *mm = &mgr->mm; struct drm_mm *mm = &mgr->mm;
struct drm_mm_node *nodes; struct drm_mm_node *nodes;
...@@ -106,8 +105,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -106,8 +105,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (!lpfn) if (!lpfn)
lpfn = man->size; lpfn = man->size;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS || if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
place->lpfn || amdgpu_vram_page_split == -1) { amdgpu_vram_page_split == -1) {
pages_per_node = ~0ul; pages_per_node = ~0ul;
num_nodes = 1; num_nodes = 1;
} else { } else {
...@@ -124,12 +123,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -124,12 +123,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN) if (place->flags & TTM_PL_FLAG_TOPDOWN)
mode = DRM_MM_INSERT_HIGH; mode = DRM_MM_INSERT_HIGH;
mem->start = 0;
pages_left = mem->num_pages; pages_left = mem->num_pages;
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
for (i = 0; i < num_nodes; ++i) { for (i = 0; i < num_nodes; ++i) {
unsigned long pages = min(pages_left, pages_per_node); unsigned long pages = min(pages_left, pages_per_node);
uint32_t alignment = mem->page_alignment; uint32_t alignment = mem->page_alignment;
unsigned long start;
if (pages == pages_per_node) if (pages == pages_per_node)
alignment = pages_per_node; alignment = pages_per_node;
...@@ -141,11 +142,19 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -141,11 +142,19 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (unlikely(r)) if (unlikely(r))
goto error; goto error;
/* Calculate a virtual BO start address to easily check if
* everything is CPU accessible.
*/
start = nodes[i].start + nodes[i].size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
pages_left -= pages; pages_left -= pages;
} }
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
mem->mm_node = nodes; mem->mm_node = nodes;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment