Commit 984fee64 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-etnaviv-next' of git://git.pengutronix.de/git/lst/linux into drm-next

Notable changes:
- correctness fixes to the GPU cache flushing when switching execution
state and when powering down the GPU
- reduction of time spent in hardirq-off context
- placement improvements to the GPU DMA linear window, allowing the
driver to properly work on i.MX6 systems with more than 2GB of RAM

* 'drm-etnaviv-next' of git://git.pengutronix.de/git/lst/linux:
  drm: etnaviv: clean up submit_bo()
  drm: etnaviv: clean up vram_mapping submission/retire path
  drm: etnaviv: improve readability of command insertion to ring buffer
  drm: etnaviv: clean up GPU command submission
  drm: etnaviv: use previous GPU pipe state when pipe switching
  drm: etnaviv: flush all GPU caches when stopping GPU
  drm: etnaviv: track current execution state
  drm: etnaviv: extract arming of semaphore
  drm: etnaviv: extract replacement of WAIT command
  drm: etnaviv: extract command ring reservation
  drm/etnaviv: move GPU linear window to end of DMA window
  drm/etnaviv: move runtime PM balance into retire worker
parents 507d44a9 8779aa8f
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "common.xml.h" #include "common.xml.h"
#include "state.xml.h" #include "state.xml.h"
#include "state_3d.xml.h"
#include "cmdstream.xml.h" #include "cmdstream.xml.h"
/* /*
...@@ -85,10 +86,17 @@ static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer, ...@@ -85,10 +86,17 @@ static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to)); OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
} }
static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe) static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
{ {
u32 flush; CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
u32 stall; VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
VIVS_GL_SEMAPHORE_TOKEN_TO(to));
}
static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
struct etnaviv_cmdbuf *buffer, u8 pipe)
{
u32 flush = 0;
/* /*
* This assumes that if we're switching to 2D, we're switching * This assumes that if we're switching to 2D, we're switching
...@@ -96,17 +104,13 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe) ...@@ -96,17 +104,13 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe)
* the 2D core, we need to flush the 3D depth and color caches, * the 2D core, we need to flush the 3D depth and color caches,
* otherwise we need to flush the 2D pixel engine cache. * otherwise we need to flush the 2D pixel engine cache.
*/ */
if (pipe == ETNA_PIPE_2D) if (gpu->exec_state == ETNA_PIPE_2D)
flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
else
flush = VIVS_GL_FLUSH_CACHE_PE2D; flush = VIVS_GL_FLUSH_CACHE_PE2D;
else if (gpu->exec_state == ETNA_PIPE_3D)
stall = VIVS_GL_SEMAPHORE_TOKEN_FROM(SYNC_RECIPIENT_FE) | flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
VIVS_GL_SEMAPHORE_TOKEN_TO(SYNC_RECIPIENT_PE);
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush); CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN, stall); CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE); CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT, CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
...@@ -131,6 +135,36 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, ...@@ -131,6 +135,36 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
ptr, len * 4, 0); ptr, len * 4, 0);
} }
/*
* Safely replace the WAIT of a waitlink with a new command and argument.
* The GPU may be executing this WAIT while we're modifying it, so we have
* to write it in a specific order to avoid the GPU branching to somewhere
* else. 'wl_offset' is the offset to the first byte of the WAIT command.
*/
static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
unsigned int wl_offset, u32 cmd, u32 arg)
{
u32 *lw = buffer->vaddr + wl_offset;
lw[1] = arg;
mb();
lw[0] = cmd;
mb();
}
/*
* Ensure that there is space in the command buffer to contiguously write
* 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
*/
static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
{
if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
buffer->user_size = 0;
return gpu_va(gpu, buffer) + buffer->user_size;
}
u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
{ {
struct etnaviv_cmdbuf *buffer = gpu->buffer; struct etnaviv_cmdbuf *buffer = gpu->buffer;
...@@ -147,81 +181,79 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) ...@@ -147,81 +181,79 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
void etnaviv_buffer_end(struct etnaviv_gpu *gpu) void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
{ {
struct etnaviv_cmdbuf *buffer = gpu->buffer; struct etnaviv_cmdbuf *buffer = gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 link_target, flush = 0;
/* Replace the last WAIT with an END */ if (gpu->exec_state == ETNA_PIPE_2D)
buffer->user_size -= 16; flush = VIVS_GL_FLUSH_CACHE_PE2D;
else if (gpu->exec_state == ETNA_PIPE_3D)
CMD_END(buffer); flush = VIVS_GL_FLUSH_CACHE_DEPTH |
mb(); VIVS_GL_FLUSH_CACHE_COLOR |
VIVS_GL_FLUSH_CACHE_TEXTURE |
VIVS_GL_FLUSH_CACHE_TEXTUREVS |
VIVS_GL_FLUSH_CACHE_SHADER_L2;
if (flush) {
unsigned int dwords = 7;
link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
if (gpu->exec_state == ETNA_PIPE_3D)
CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
VIVS_TS_FLUSH_CACHE_FLUSH);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_END(buffer);
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(dwords),
link_target);
} else {
/* Replace the last link-wait with an "END" command */
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_END_HEADER_OP_END, 0);
}
} }
/* Append a command buffer to the ring buffer. */
void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
struct etnaviv_cmdbuf *cmdbuf) struct etnaviv_cmdbuf *cmdbuf)
{ {
struct etnaviv_cmdbuf *buffer = gpu->buffer; struct etnaviv_cmdbuf *buffer = gpu->buffer;
u32 *lw = buffer->vaddr + buffer->user_size - 16; unsigned int waitlink_offset = buffer->user_size - 16;
u32 back, link_target, link_size, reserve_size, extra_size = 0; u32 return_target, return_dwords;
u32 link_target, link_dwords;
if (drm_debug & DRM_UT_DRIVER) if (drm_debug & DRM_UT_DRIVER)
etnaviv_buffer_dump(gpu, buffer, 0, 0x50); etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
link_target = gpu_va(gpu, cmdbuf);
link_dwords = cmdbuf->size / 8;
/* /*
* If we need to flush the MMU prior to submitting this buffer, we * If we need maintanence prior to submitting this buffer, we will
* will need to append a mmu flush load state, followed by a new * need to append a mmu flush load state, followed by a new
* link to this buffer - a total of four additional words. * link to this buffer - a total of four additional words.
*/ */
if (gpu->mmu->need_flush || gpu->switch_context) { if (gpu->mmu->need_flush || gpu->switch_context) {
u32 target, extra_dwords;
/* link command */ /* link command */
extra_size += 2; extra_dwords = 1;
/* flush command */ /* flush command */
if (gpu->mmu->need_flush) if (gpu->mmu->need_flush)
extra_size += 2; extra_dwords += 1;
/* pipe switch commands */ /* pipe switch commands */
if (gpu->switch_context) if (gpu->switch_context)
extra_size += 8; extra_dwords += 4;
}
reserve_size = (6 + extra_size) * 4; target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
/*
* if we are going to completely overflow the buffer, we need to wrap.
*/
if (buffer->user_size + reserve_size > buffer->size)
buffer->user_size = 0;
/* save offset back into main buffer */
back = buffer->user_size + reserve_size - 6 * 4;
link_target = gpu_va(gpu, buffer) + buffer->user_size;
link_size = 6;
/* Skip over any extra instructions */
link_target += extra_size * sizeof(u32);
if (drm_debug & DRM_UT_DRIVER)
pr_info("stream link to 0x%08x @ 0x%08x %p\n",
link_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
/* jump back from cmd to main buffer */
CMD_LINK(cmdbuf, link_size, link_target);
link_target = gpu_va(gpu, cmdbuf);
link_size = cmdbuf->size / 8;
if (drm_debug & DRM_UT_DRIVER) {
print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
cmdbuf->vaddr, cmdbuf->size, 0);
pr_info("link op: %p\n", lw);
pr_info("link addr: %p\n", lw + 1);
pr_info("addr: 0x%08x\n", link_target);
pr_info("back: 0x%08x\n", gpu_va(gpu, buffer) + back);
pr_info("event: %d\n", event);
}
if (gpu->mmu->need_flush || gpu->switch_context) {
u32 new_target = gpu_va(gpu, buffer) + buffer->user_size;
if (gpu->mmu->need_flush) { if (gpu->mmu->need_flush) {
/* Add the MMU flush */ /* Add the MMU flush */
...@@ -236,32 +268,59 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, ...@@ -236,32 +268,59 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
} }
if (gpu->switch_context) { if (gpu->switch_context) {
etnaviv_cmd_select_pipe(buffer, cmdbuf->exec_state); etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state);
gpu->exec_state = cmdbuf->exec_state;
gpu->switch_context = false; gpu->switch_context = false;
} }
/* And the link to the first buffer */ /* And the link to the submitted buffer */
CMD_LINK(buffer, link_size, link_target); CMD_LINK(buffer, link_dwords, link_target);
/* Update the link target to point to above instructions */ /* Update the link target to point to above instructions */
link_target = new_target; link_target = target;
link_size = extra_size; link_dwords = extra_dwords;
} }
/* trigger event */ /*
* Append a LINK to the submitted command buffer to return to
* the ring buffer. return_target is the ring target address.
* We need three dwords: event, wait, link.
*/
return_dwords = 3;
return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
CMD_LINK(cmdbuf, return_dwords, return_target);
/*
* Append event, wait and link pointing back to the wait
* command to the ring buffer.
*/
CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) | CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
VIVS_GL_EVENT_FROM_PE); VIVS_GL_EVENT_FROM_PE);
/* append WAIT/LINK to main buffer */
CMD_WAIT(buffer); CMD_WAIT(buffer);
CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + (buffer->user_size - 4)); CMD_LINK(buffer, 2, return_target + 8);
/* Change WAIT into a LINK command; write the address first. */ if (drm_debug & DRM_UT_DRIVER)
*(lw + 1) = link_target; pr_info("stream link to 0x%08x @ 0x%08x %p\n",
mb(); return_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
*(lw) = VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(link_size); if (drm_debug & DRM_UT_DRIVER) {
mb(); print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
cmdbuf->vaddr, cmdbuf->size, 0);
pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
pr_info("addr: 0x%08x\n", link_target);
pr_info("back: 0x%08x\n", return_target);
pr_info("event: %d\n", event);
}
/*
* Kick off the submitted command by replacing the previous
* WAIT with a link to the address in the ring buffer.
*/
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
link_target);
if (drm_debug & DRM_UT_DRIVER) if (drm_debug & DRM_UT_DRIVER)
etnaviv_buffer_dump(gpu, buffer, 0, 0x50); etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
......
...@@ -75,9 +75,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ...@@ -75,9 +75,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma); int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset); int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
struct drm_gem_object *obj, u32 *iova);
void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj);
struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj); struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj); void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
......
...@@ -260,8 +260,32 @@ etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj, ...@@ -260,8 +260,32 @@ etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
return NULL; return NULL;
} }
int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu, void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping)
struct drm_gem_object *obj, u32 *iova) {
struct etnaviv_gem_object *etnaviv_obj = mapping->object;
drm_gem_object_reference(&etnaviv_obj->base);
mutex_lock(&etnaviv_obj->lock);
WARN_ON(mapping->use == 0);
mapping->use += 1;
mutex_unlock(&etnaviv_obj->lock);
}
void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
{
struct etnaviv_gem_object *etnaviv_obj = mapping->object;
mutex_lock(&etnaviv_obj->lock);
WARN_ON(mapping->use == 0);
mapping->use -= 1;
mutex_unlock(&etnaviv_obj->lock);
drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
}
struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
struct drm_gem_object *obj, struct etnaviv_gpu *gpu)
{ {
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
struct etnaviv_vram_mapping *mapping; struct etnaviv_vram_mapping *mapping;
...@@ -329,28 +353,12 @@ int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu, ...@@ -329,28 +353,12 @@ int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
out: out:
mutex_unlock(&etnaviv_obj->lock); mutex_unlock(&etnaviv_obj->lock);
if (!ret) { if (ret)
/* Take a reference on the object */ return ERR_PTR(ret);
drm_gem_object_reference(obj);
*iova = mapping->iova;
}
return ret;
}
void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
{
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
struct etnaviv_vram_mapping *mapping;
mutex_lock(&etnaviv_obj->lock);
mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
WARN_ON(mapping->use == 0);
mapping->use -= 1;
mutex_unlock(&etnaviv_obj->lock);
drm_gem_object_unreference_unlocked(obj); /* Take a reference on the object */
drm_gem_object_reference(obj);
return mapping;
} }
void *etnaviv_gem_vmap(struct drm_gem_object *obj) void *etnaviv_gem_vmap(struct drm_gem_object *obj)
......
...@@ -88,6 +88,12 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj) ...@@ -88,6 +88,12 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
#define MAX_CMDS 4 #define MAX_CMDS 4
struct etnaviv_gem_submit_bo {
u32 flags;
struct etnaviv_gem_object *obj;
struct etnaviv_vram_mapping *mapping;
};
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
* associated with the cmdstream submission for synchronization (and * associated with the cmdstream submission for synchronization (and
* make it easier to unwind when things go wrong, etc). This only * make it easier to unwind when things go wrong, etc). This only
...@@ -99,11 +105,7 @@ struct etnaviv_gem_submit { ...@@ -99,11 +105,7 @@ struct etnaviv_gem_submit {
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
u32 fence; u32 fence;
unsigned int nr_bos; unsigned int nr_bos;
struct { struct etnaviv_gem_submit_bo bos[0];
u32 flags;
struct etnaviv_gem_object *obj;
u32 iova;
} bos[0];
}; };
int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
...@@ -115,4 +117,9 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj); ...@@ -115,4 +117,9 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj); struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj); void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
struct drm_gem_object *obj, struct etnaviv_gpu *gpu);
void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping);
void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);
#endif /* __ETNAVIV_GEM_H__ */ #endif /* __ETNAVIV_GEM_H__ */
...@@ -187,12 +187,10 @@ static void submit_unpin_objects(struct etnaviv_gem_submit *submit) ...@@ -187,12 +187,10 @@ static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
int i; int i;
for (i = 0; i < submit->nr_bos; i++) { for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
if (submit->bos[i].flags & BO_PINNED) if (submit->bos[i].flags & BO_PINNED)
etnaviv_gem_put_iova(submit->gpu, &etnaviv_obj->base); etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
submit->bos[i].iova = 0; submit->bos[i].mapping = NULL;
submit->bos[i].flags &= ~BO_PINNED; submit->bos[i].flags &= ~BO_PINNED;
} }
} }
...@@ -203,22 +201,24 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit) ...@@ -203,22 +201,24 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
for (i = 0; i < submit->nr_bos; i++) { for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
u32 iova; struct etnaviv_vram_mapping *mapping;
ret = etnaviv_gem_get_iova(submit->gpu, &etnaviv_obj->base, mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
&iova); submit->gpu);
if (ret) if (IS_ERR(mapping)) {
ret = PTR_ERR(mapping);
break; break;
}
submit->bos[i].flags |= BO_PINNED; submit->bos[i].flags |= BO_PINNED;
submit->bos[i].iova = iova; submit->bos[i].mapping = mapping;
} }
return ret; return ret;
} }
static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx, static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
struct etnaviv_gem_object **obj, u32 *iova) struct etnaviv_gem_submit_bo **bo)
{ {
if (idx >= submit->nr_bos) { if (idx >= submit->nr_bos) {
DRM_ERROR("invalid buffer index: %u (out of %u)\n", DRM_ERROR("invalid buffer index: %u (out of %u)\n",
...@@ -226,10 +226,7 @@ static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx, ...@@ -226,10 +226,7 @@ static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
return -EINVAL; return -EINVAL;
} }
if (obj) *bo = &submit->bos[idx];
*obj = submit->bos[idx].obj;
if (iova)
*iova = submit->bos[idx].iova;
return 0; return 0;
} }
...@@ -245,8 +242,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, ...@@ -245,8 +242,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
for (i = 0; i < nr_relocs; i++) { for (i = 0; i < nr_relocs; i++) {
const struct drm_etnaviv_gem_submit_reloc *r = relocs + i; const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
struct etnaviv_gem_object *bobj; struct etnaviv_gem_submit_bo *bo;
u32 iova, off; u32 off;
if (unlikely(r->flags)) { if (unlikely(r->flags)) {
DRM_ERROR("invalid reloc flags\n"); DRM_ERROR("invalid reloc flags\n");
...@@ -268,17 +265,16 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, ...@@ -268,17 +265,16 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
return -EINVAL; return -EINVAL;
} }
ret = submit_bo(submit, r->reloc_idx, &bobj, &iova); ret = submit_bo(submit, r->reloc_idx, &bo);
if (ret) if (ret)
return ret; return ret;
if (r->reloc_offset >= if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) {
bobj->base.size - sizeof(*ptr)) {
DRM_ERROR("relocation %u outside object", i); DRM_ERROR("relocation %u outside object", i);
return -EINVAL; return -EINVAL;
} }
ptr[off] = iova + r->reloc_offset; ptr[off] = bo->mapping->iova + r->reloc_offset;
last_offset = off; last_offset = off;
} }
......
...@@ -628,6 +628,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) ...@@ -628,6 +628,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
/* Now program the hardware */ /* Now program the hardware */
mutex_lock(&gpu->lock); mutex_lock(&gpu->lock);
etnaviv_gpu_hw_init(gpu); etnaviv_gpu_hw_init(gpu);
gpu->exec_state = -1;
mutex_unlock(&gpu->lock); mutex_unlock(&gpu->lock);
pm_runtime_mark_last_busy(gpu->dev); pm_runtime_mark_last_busy(gpu->dev);
...@@ -871,17 +872,13 @@ static void recover_worker(struct work_struct *work) ...@@ -871,17 +872,13 @@ static void recover_worker(struct work_struct *work)
gpu->event[i].fence = NULL; gpu->event[i].fence = NULL;
gpu->event[i].used = false; gpu->event[i].used = false;
complete(&gpu->event_free); complete(&gpu->event_free);
/*
* Decrement the PM count for each stuck event. This is safe
* even in atomic context as we use ASYNC RPM here.
*/
pm_runtime_put_autosuspend(gpu->dev);
} }
spin_unlock_irqrestore(&gpu->event_spinlock, flags); spin_unlock_irqrestore(&gpu->event_spinlock, flags);
gpu->completed_fence = gpu->active_fence; gpu->completed_fence = gpu->active_fence;
etnaviv_gpu_hw_init(gpu); etnaviv_gpu_hw_init(gpu);
gpu->switch_context = true; gpu->switch_context = true;
gpu->exec_state = -1;
mutex_unlock(&gpu->lock); mutex_unlock(&gpu->lock);
pm_runtime_mark_last_busy(gpu->dev); pm_runtime_mark_last_busy(gpu->dev);
...@@ -1106,7 +1103,7 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size, ...@@ -1106,7 +1103,7 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
size_t nr_bos) size_t nr_bos)
{ {
struct etnaviv_cmdbuf *cmdbuf; struct etnaviv_cmdbuf *cmdbuf;
size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo[0]), size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
sizeof(*cmdbuf)); sizeof(*cmdbuf));
cmdbuf = kzalloc(sz, GFP_KERNEL); cmdbuf = kzalloc(sz, GFP_KERNEL);
...@@ -1150,14 +1147,23 @@ static void retire_worker(struct work_struct *work) ...@@ -1150,14 +1147,23 @@ static void retire_worker(struct work_struct *work)
fence_put(cmdbuf->fence); fence_put(cmdbuf->fence);
for (i = 0; i < cmdbuf->nr_bos; i++) { for (i = 0; i < cmdbuf->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = cmdbuf->bo[i]; struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
struct etnaviv_gem_object *etnaviv_obj = mapping->object;
atomic_dec(&etnaviv_obj->gpu_active); atomic_dec(&etnaviv_obj->gpu_active);
/* drop the refcount taken in etnaviv_gpu_submit */ /* drop the refcount taken in etnaviv_gpu_submit */
etnaviv_gem_put_iova(gpu, &etnaviv_obj->base); etnaviv_gem_mapping_unreference(mapping);
} }
etnaviv_gpu_cmdbuf_free(cmdbuf); etnaviv_gpu_cmdbuf_free(cmdbuf);
/*
* We need to balance the runtime PM count caused by
* each submission. Upon submission, we increment
* the runtime PM counter, and allocate one event.
* So here, we put the runtime PM count for each
* completed event.
*/
pm_runtime_put_autosuspend(gpu->dev);
} }
gpu->retired_fence = fence; gpu->retired_fence = fence;
...@@ -1304,11 +1310,10 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, ...@@ -1304,11 +1310,10 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
for (i = 0; i < submit->nr_bos; i++) { for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
u32 iova;
/* Each cmdbuf takes a refcount on the iova */ /* Each cmdbuf takes a refcount on the mapping */
etnaviv_gem_get_iova(gpu, &etnaviv_obj->base, &iova); etnaviv_gem_mapping_reference(submit->bos[i].mapping);
cmdbuf->bo[i] = etnaviv_obj; cmdbuf->bo_map[i] = submit->bos[i].mapping;
atomic_inc(&etnaviv_obj->gpu_active); atomic_inc(&etnaviv_obj->gpu_active);
if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
...@@ -1378,15 +1383,6 @@ static irqreturn_t irq_handler(int irq, void *data) ...@@ -1378,15 +1383,6 @@ static irqreturn_t irq_handler(int irq, void *data)
gpu->completed_fence = fence->seqno; gpu->completed_fence = fence->seqno;
event_free(gpu, event); event_free(gpu, event);
/*
* We need to balance the runtime PM count caused by
* each submission. Upon submission, we increment
* the runtime PM counter, and allocate one event.
* So here, we put the runtime PM count for each
* completed event.
*/
pm_runtime_put_autosuspend(gpu->dev);
} }
/* Retire the buffer objects in a work */ /* Retire the buffer objects in a work */
...@@ -1481,6 +1477,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu) ...@@ -1481,6 +1477,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
etnaviv_gpu_hw_init(gpu); etnaviv_gpu_hw_init(gpu);
gpu->switch_context = true; gpu->switch_context = true;
gpu->exec_state = -1;
mutex_unlock(&gpu->lock); mutex_unlock(&gpu->lock);
...@@ -1569,6 +1566,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) ...@@ -1569,6 +1566,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
{ {
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct etnaviv_gpu *gpu; struct etnaviv_gpu *gpu;
u32 dma_mask;
int err = 0; int err = 0;
gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL); gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
...@@ -1579,12 +1577,16 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) ...@@ -1579,12 +1577,16 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
mutex_init(&gpu->lock); mutex_init(&gpu->lock);
/* /*
* Set the GPU base address to the start of physical memory. This * Set the GPU linear window to be at the end of the DMA window, where
* ensures that if we have up to 2GB, the v1 MMU can address the * the CMA area is likely to reside. This ensures that we are able to
* highest memory. This is important as command buffers may be * map the command buffers while having the linear window overlap as
* allocated outside of this limit. * much RAM as possible, so we can optimize mappings for other buffers.
*/ */
gpu->memory_base = PHYS_OFFSET; dma_mask = (u32)dma_get_required_mask(dev);
if (dma_mask < PHYS_OFFSET + SZ_2G)
gpu->memory_base = PHYS_OFFSET;
else
gpu->memory_base = dma_mask - SZ_2G + 1;
/* Map registers: */ /* Map registers: */
gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev)); gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "etnaviv_drv.h" #include "etnaviv_drv.h"
struct etnaviv_gem_submit; struct etnaviv_gem_submit;
struct etnaviv_vram_mapping;
struct etnaviv_chip_identity { struct etnaviv_chip_identity {
/* Chip model. */ /* Chip model. */
...@@ -103,6 +104,7 @@ struct etnaviv_gpu { ...@@ -103,6 +104,7 @@ struct etnaviv_gpu {
/* 'ring'-buffer: */ /* 'ring'-buffer: */
struct etnaviv_cmdbuf *buffer; struct etnaviv_cmdbuf *buffer;
int exec_state;
/* bus base address of memory */ /* bus base address of memory */
u32 memory_base; u32 memory_base;
...@@ -166,7 +168,7 @@ struct etnaviv_cmdbuf { ...@@ -166,7 +168,7 @@ struct etnaviv_cmdbuf {
struct list_head node; struct list_head node;
/* BOs attached to this command buffer */ /* BOs attached to this command buffer */
unsigned int nr_bos; unsigned int nr_bos;
struct etnaviv_gem_object *bo[0]; struct etnaviv_vram_mapping *bo_map[0];
}; };
static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data) static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
......
...@@ -193,7 +193,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, ...@@ -193,7 +193,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
/* /*
* Unmap the blocks which need to be reaped from the MMU. * Unmap the blocks which need to be reaped from the MMU.
* Clear the mmu pointer to prevent the get_iova finding * Clear the mmu pointer to prevent the mapping_get finding
* this mapping. * this mapping.
*/ */
list_for_each_entry_safe(m, n, &list, scan_node) { list_for_each_entry_safe(m, n, &list, scan_node) {
......
#ifndef STATE_3D_XML
#define STATE_3D_XML
/* This is a cut-down version of the state_3d.xml.h file */
#define VIVS_TS_FLUSH_CACHE 0x00001650
#define VIVS_TS_FLUSH_CACHE_FLUSH 0x00000001
#endif /* STATE_3D_XML */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment