Commit c3a0c771 authored by Alexandre Courbot's avatar Alexandre Courbot Committed by Ben Skeggs

drm/nouveau: implement explicitly coherent BOs

Allow nouveau_bo_new() to recognize the TTM_PL_FLAG_UNCACHED flag, which
means that we want the allocated BO to be perfectly coherent between the
CPU and GPU. This is useful on non-coherent architectures for which we
do not want to manually sync some rarely-accessed buffers: typically,
fences and pushbuffers.

A TTM BO allocated with the TTM_PL_FLAG_UNCACHED on a non-coherent
architecture will be populated using the DMA API, and accesses to it
performed using the coherent mapping performed by dma_alloc_coherent().
Signed-off-by: default avatarAlexandre Courbot <acourbot@nvidia.com>
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent c5d7ddf7
...@@ -214,6 +214,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, ...@@ -214,6 +214,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
nvbo->tile_flags = tile_flags; nvbo->tile_flags = tile_flags;
nvbo->bo.bdev = &drm->ttm.bdev; nvbo->bo.bdev = &drm->ttm.bdev;
if (!nv_device_is_cpu_coherent(nvkm_device(&drm->device)))
nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
nvbo->page_shift = 12; nvbo->page_shift = 12;
if (drm->client.vm) { if (drm->client.vm) {
if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024)
...@@ -291,7 +294,8 @@ void ...@@ -291,7 +294,8 @@ void
nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
{ {
struct ttm_placement *pl = &nvbo->placement; struct ttm_placement *pl = &nvbo->placement;
uint32_t flags = TTM_PL_MASK_CACHING | uint32_t flags = (nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
TTM_PL_MASK_CACHING) |
(nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
pl->placement = nvbo->placements; pl->placement = nvbo->placements;
...@@ -396,7 +400,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo) ...@@ -396,7 +400,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
if (ret) if (ret)
return ret; return ret;
ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap); /*
* TTM buffers allocated using the DMA API already have a mapping, let's
* use it instead.
*/
if (!nvbo->force_coherent)
ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
&nvbo->kmap);
ttm_bo_unreserve(&nvbo->bo); ttm_bo_unreserve(&nvbo->bo);
return ret; return ret;
} }
...@@ -404,7 +415,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo) ...@@ -404,7 +415,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
void void
nouveau_bo_unmap(struct nouveau_bo *nvbo) nouveau_bo_unmap(struct nouveau_bo *nvbo)
{ {
if (nvbo) if (!nvbo)
return;
/*
* TTM buffers allocated using the DMA API already had a coherent
* mapping which we used, no need to unmap.
*/
if (!nvbo->force_coherent)
ttm_bo_kunmap(&nvbo->kmap); ttm_bo_kunmap(&nvbo->kmap);
} }
...@@ -422,12 +440,36 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible, ...@@ -422,12 +440,36 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
return 0; return 0;
} }
static inline void *
_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
{
struct ttm_dma_tt *dma_tt;
u8 *m = mem;
index *= sz;
if (m) {
/* kmap'd address, return the corresponding offset */
m += index;
} else {
/* DMA-API mapping, lookup the right address */
dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
m = dma_tt->cpu_address[index / PAGE_SIZE];
m += index % PAGE_SIZE;
}
return m;
}
#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
u16 u16
nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index) nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index)
{ {
bool is_iomem; bool is_iomem;
u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
mem = &mem[index];
mem = nouveau_bo_mem_index(nvbo, index, mem);
if (is_iomem) if (is_iomem)
return ioread16_native((void __force __iomem *)mem); return ioread16_native((void __force __iomem *)mem);
else else
...@@ -439,7 +481,9 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val) ...@@ -439,7 +481,9 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
{ {
bool is_iomem; bool is_iomem;
u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
mem = &mem[index];
mem = nouveau_bo_mem_index(nvbo, index, mem);
if (is_iomem) if (is_iomem)
iowrite16_native(val, (void __force __iomem *)mem); iowrite16_native(val, (void __force __iomem *)mem);
else else
...@@ -451,7 +495,9 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index) ...@@ -451,7 +495,9 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
{ {
bool is_iomem; bool is_iomem;
u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
mem = &mem[index];
mem = nouveau_bo_mem_index(nvbo, index, mem);
if (is_iomem) if (is_iomem)
return ioread32_native((void __force __iomem *)mem); return ioread32_native((void __force __iomem *)mem);
else else
...@@ -463,7 +509,9 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val) ...@@ -463,7 +509,9 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
{ {
bool is_iomem; bool is_iomem;
u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
mem = &mem[index];
mem = nouveau_bo_mem_index(nvbo, index, mem);
if (is_iomem) if (is_iomem)
iowrite32_native(val, (void __force __iomem *)mem); iowrite32_native(val, (void __force __iomem *)mem);
else else
...@@ -1383,6 +1431,14 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) ...@@ -1383,6 +1431,14 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
dev = drm->dev; dev = drm->dev;
pdev = nv_device_base(device); pdev = nv_device_base(device);
/*
* Objects matching this condition have been marked as force_coherent,
* so use the DMA API for them.
*/
if (!nv_device_is_cpu_coherent(device) &&
ttm->caching_state == tt_uncached)
return ttm_dma_populate(ttm_dma, dev->dev);
#if __OS_HAS_AGP #if __OS_HAS_AGP
if (drm->agp.stat == ENABLED) { if (drm->agp.stat == ENABLED) {
return ttm_agp_tt_populate(ttm); return ttm_agp_tt_populate(ttm);
...@@ -1440,6 +1496,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) ...@@ -1440,6 +1496,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
dev = drm->dev; dev = drm->dev;
pdev = nv_device_base(device); pdev = nv_device_base(device);
/*
* Objects matching this condition have been marked as force_coherent,
* so use the DMA API for them.
*/
if (!nv_device_is_cpu_coherent(device) &&
ttm->caching_state == tt_uncached)
ttm_dma_unpopulate(ttm_dma, dev->dev);
#if __OS_HAS_AGP #if __OS_HAS_AGP
if (drm->agp.stat == ENABLED) { if (drm->agp.stat == ENABLED) {
ttm_agp_tt_unpopulate(ttm); ttm_agp_tt_unpopulate(ttm);
......
...@@ -13,6 +13,7 @@ struct nouveau_bo { ...@@ -13,6 +13,7 @@ struct nouveau_bo {
u32 valid_domains; u32 valid_domains;
struct ttm_place placements[3]; struct ttm_place placements[3];
struct ttm_place busy_placements[3]; struct ttm_place busy_placements[3];
bool force_coherent;
struct ttm_bo_kmap_obj kmap; struct ttm_bo_kmap_obj kmap;
struct list_head head; struct list_head head;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment