Commit 4a5ad08f authored by Arunpravin Paneer Selvam's avatar Arunpravin Paneer Selvam Committed by Alex Deucher

drm/amdgpu: Add address alignment support to DCC buffers

Add address alignment support to the DCC VRAM buffers.

v2:
  - adjust size based on the max_texture_channel_caches values
    only for GFX12 DCC buffers.
  - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
    for DCC buffers.
  - roundup non power of two DCC buffer adjusted size to nearest
    power of two number as the buddy allocator does not support non
    power of two alignments. This applies only to the contiguous
    DCC buffers.

v3:(Alex)
  - rewrite the max texture channel caches comparison code in an
    algorithmic way to determine the alignment size.

v4:(Alex)
  - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c
    and add a new gmc func callback for dcc alignment. If the callback
    is non-NULL, call it to get the alignment, otherwise, use the default.

v5:(Alex)
  - Set the Alignment to a default value if the callback doesn't exist.
  - Add the callback to amdgpu_gmc_funcs.

v6:
  - Fix checkpatch warning reported by Intel CI.

v7:(Christian)
  - remove the AMDGPU_GEM_CREATE_GFX12_DCC flag and keep a flag that
    checks the BO pinning and for a specific hw generation.

v8:(Christian)
  - move this check into gmc_v12_0_get_dcc_alignment.

v9:
  - Fix 32bit build errors
Signed-off-by: default avatarArunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFrank Min <Frank.Min@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
(cherry picked from commit aa94b623)
parent 50e376f1
...@@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { ...@@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
uint64_t addr, uint64_t *flags); uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */ /* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
/* get the DCC buffer alignment */
unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
enum amdgpu_memory_partition (*query_mem_partition_mode)( enum amdgpu_memory_partition (*query_mem_partition_mode)(
struct amdgpu_device *adev); struct amdgpu_device *adev);
...@@ -363,6 +365,10 @@ struct amdgpu_gmc { ...@@ -363,6 +365,10 @@ struct amdgpu_gmc {
(adev)->gmc.gmc_funcs->override_vm_pte_flags \ (adev)->gmc.gmc_funcs->override_vm_pte_flags \
((adev), (vm), (addr), (pte_flags)) ((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
#define amdgpu_gmc_get_dcc_alignment(adev) ({ \
typeof(adev) _adev = (adev); \
_adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \
})
/** /**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
......
...@@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, ...@@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
u64 vis_usage = 0, max_bytes, min_block_size; u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres; struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn; u64 size, remaining_size, lpfn, fpfn;
unsigned int adjust_dcc_size = 0;
struct drm_buddy *mm = &mgr->mm; struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block; struct drm_buddy_block *block;
unsigned long pages_per_block; unsigned long pages_per_block;
...@@ -511,7 +512,18 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, ...@@ -511,7 +512,18 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Allocate blocks in desired range */ /* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
if (adev->gmc.gmc_funcs->get_dcc_alignment)
adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
remaining_size = (u64)vres->base.size; remaining_size = (u64)vres->base.size;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
unsigned int dcc_size;
dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
remaining_size = (u64)dcc_size;
vres->flags |= DRM_BUDDY_TRIM_DISABLE;
}
mutex_lock(&mgr->lock); mutex_lock(&mgr->lock);
while (remaining_size) { while (remaining_size) {
...@@ -521,7 +533,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, ...@@ -521,7 +533,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
min_block_size = mgr->default_page_size; min_block_size = mgr->default_page_size;
size = remaining_size; size = remaining_size;
if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
min_block_size = size;
else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT; min_block_size = (u64)pages_per_block << PAGE_SHIFT;
...@@ -553,6 +568,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, ...@@ -553,6 +568,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
} }
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
struct drm_buddy_block *dcc_block;
unsigned long dcc_start;
u64 trim_start;
dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
/* Adjust the start address for DCC buffers only */
dcc_start =
roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
adjust_dcc_size);
trim_start = (u64)dcc_start;
drm_buddy_block_trim(mm, &trim_start,
(u64)vres->base.size,
&vres->blocks);
}
vres->base.start = 0; vres->base.start = 0;
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
vres->base.size); vres->base.size);
......
...@@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) ...@@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
return 0; return 0;
} }
static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
{
unsigned int max_tex_channel_caches, alignment;
if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) &&
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1))
return 0;
max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
if (is_power_of_2(max_tex_channel_caches))
alignment = (unsigned int)(max_tex_channel_caches / SZ_4);
else
alignment = roundup_pow_of_two(max_tex_channel_caches);
return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K);
}
static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
...@@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { ...@@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
.get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pde = gmc_v12_0_get_vm_pde,
.get_vm_pte = gmc_v12_0_get_vm_pte, .get_vm_pte = gmc_v12_0_get_vm_pte,
.get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
.get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
}; };
static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment