Commit 3a41a05d authored by Michał Winiarski's avatar Michał Winiarski Committed by Daniel Vetter

drm/i915/gtt: Avoid calling kcalloc in a loop when allocating temp bitmaps

On each call to gen8_alloc_va_range_3lvl we're allocating temporary
bitmaps needed for error handling. Unfortunately, when we increase
address space size (48b ppgtt) we do additional (512 - 4) calls to
kcalloc, increasing latency between exec and actual start of execution
on the GPU. Let's just do a single kcalloc, we can also drop the size
from free_gen8_temp_bitmaps since it's no longer used.

v2: Use GFP_TEMPORARY to make the allocations reclaimable.
v3: Drop the 2D array, just allocate a single block.
v4: Rebase to handle gen8_preallocate_top_level_pdps.
v5: Align misaligned bracket.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
[danvet: Correct kcalloc arguments as suggested by Chris.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent ea916ea0
...@@ -1164,13 +1164,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, ...@@ -1164,13 +1164,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
} }
static void static void
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
uint32_t pdpes)
{ {
int i;
for (i = 0; i < pdpes; i++)
kfree(new_pts[i]);
kfree(new_pts); kfree(new_pts);
kfree(new_pds); kfree(new_pds);
} }
...@@ -1180,29 +1175,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, ...@@ -1180,29 +1175,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
*/ */
static static
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
unsigned long ***new_pts, unsigned long **new_pts,
uint32_t pdpes) uint32_t pdpes)
{ {
int i;
unsigned long *pds; unsigned long *pds;
unsigned long **pts; unsigned long *pts;
pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
if (!pds) if (!pds)
return -ENOMEM; return -ENOMEM;
pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL); pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
if (!pts) { GFP_TEMPORARY);
kfree(pds); if (!pts)
return -ENOMEM; goto err_out;
}
for (i = 0; i < pdpes; i++) {
pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
sizeof(unsigned long), GFP_KERNEL);
if (!pts[i])
goto err_out;
}
*new_pds = pds; *new_pds = pds;
*new_pts = pts; *new_pts = pts;
...@@ -1210,7 +1196,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, ...@@ -1210,7 +1196,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
return 0; return 0;
err_out: err_out:
free_gen8_temp_bitmaps(pds, pts, pdpes); free_gen8_temp_bitmaps(pds, pts);
return -ENOMEM; return -ENOMEM;
} }
...@@ -1231,7 +1217,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ...@@ -1231,7 +1217,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
{ {
struct i915_hw_ppgtt *ppgtt = struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base); container_of(vm, struct i915_hw_ppgtt, base);
unsigned long *new_page_dirs, **new_page_tables; unsigned long *new_page_dirs, *new_page_tables;
struct drm_device *dev = vm->dev; struct drm_device *dev = vm->dev;
struct i915_page_directory *pd; struct i915_page_directory *pd;
const uint64_t orig_start = start; const uint64_t orig_start = start;
...@@ -1258,14 +1244,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ...@@ -1258,14 +1244,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
new_page_dirs); new_page_dirs);
if (ret) { if (ret) {
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
return ret; return ret;
} }
/* For every page directory referenced, allocate page tables */ /* For every page directory referenced, allocate page tables */
gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
new_page_tables[pdpe]); new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
if (ret) if (ret)
goto err_out; goto err_out;
} }
...@@ -1316,20 +1302,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ...@@ -1316,20 +1302,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
} }
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
mark_tlbs_dirty(ppgtt); mark_tlbs_dirty(ppgtt);
return 0; return 0;
err_out: err_out:
while (pdpe--) { while (pdpe--) {
for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES) for_each_set_bit(temp, new_page_tables + pdpe *
BITS_TO_LONGS(I915_PDES), I915_PDES)
free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
} }
for_each_set_bit(pdpe, new_page_dirs, pdpes) for_each_set_bit(pdpe, new_page_dirs, pdpes)
free_pd(dev, pdp->page_directory[pdpe]); free_pd(dev, pdp->page_directory[pdpe]);
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
mark_tlbs_dirty(ppgtt); mark_tlbs_dirty(ppgtt);
return ret; return ret;
} }
...@@ -1481,7 +1468,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) ...@@ -1481,7 +1468,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
{ {
unsigned long *new_page_dirs, **new_page_tables; unsigned long *new_page_dirs, *new_page_tables;
uint32_t pdpes = I915_PDPES_PER_PDP(dev); uint32_t pdpes = I915_PDPES_PER_PDP(dev);
int ret; int ret;
...@@ -1501,7 +1488,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) ...@@ -1501,7 +1488,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
if (!ret) if (!ret)
*ppgtt->pdp.used_pdpes = *new_page_dirs; *ppgtt->pdp.used_pdpes = *new_page_dirs;
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment