Commit f97decac authored by Jordan Crouse's avatar Jordan Crouse Committed by Rob Clark

drm/msm: Support multiple ringbuffers

Add the infrastructure to support the idea of multiple ringbuffers.
Assign each ringbuffer an id and use that as an index for the various
ring specific operations.

The biggest delta is to support legacy fences. Each fence gets its own
sequence number but the legacy functions expect to use a unique integer.
To handle this we return a unique identifier for each submission but
map it to a specific ring/sequence under the covers. Newer users use
a dma_fence pointer anyway so they don't care about the actual sequence
ID or ring.

The actual mechanics for multiple ringbuffers are very target specific
so this code just allows for the possibility but still only defines
one ringbuffer for each target family.
Signed-off-by: default avatarJordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: default avatarRob Clark <robdclark@gmail.com>
parent cd414f3d
...@@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); ...@@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu);
static bool a3xx_me_init(struct msm_gpu *gpu) static bool a3xx_me_init(struct msm_gpu *gpu)
{ {
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17); OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7); OUT_RING(ring, 0x000003f7);
...@@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) ...@@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
return a3xx_idle(gpu); return a3xx_idle(gpu);
} }
...@@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) ...@@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu)
static bool a3xx_idle(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu)
{ {
/* wait for ringbuffer to drain: */ /* wait for ringbuffer to drain: */
if (!adreno_idle(gpu)) if (!adreno_idle(gpu, gpu->rb[0]))
return false; return false;
/* then wait for GPU to finish: */ /* then wait for GPU to finish: */
...@@ -446,6 +446,7 @@ static const struct adreno_gpu_funcs funcs = { ...@@ -446,6 +446,7 @@ static const struct adreno_gpu_funcs funcs = {
.recover = a3xx_recover, .recover = a3xx_recover,
.submit = adreno_submit, .submit = adreno_submit,
.flush = adreno_flush, .flush = adreno_flush,
.active_ring = adreno_active_ring,
.irq = a3xx_irq, .irq = a3xx_irq,
.destroy = a3xx_destroy, .destroy = a3xx_destroy,
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) ...@@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a3xx_registers; adreno_gpu->registers = a3xx_registers;
adreno_gpu->reg_offsets = a3xx_register_offsets; adreno_gpu->reg_offsets = a3xx_register_offsets;
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) if (ret)
goto fail; goto fail;
......
...@@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) ...@@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
static bool a4xx_me_init(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu)
{ {
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17); OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7); OUT_RING(ring, 0x000003f7);
...@@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) ...@@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
return a4xx_idle(gpu); return a4xx_idle(gpu);
} }
...@@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) ...@@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu)
static bool a4xx_idle(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu)
{ {
/* wait for ringbuffer to drain: */ /* wait for ringbuffer to drain: */
if (!adreno_idle(gpu)) if (!adreno_idle(gpu, gpu->rb[0]))
return false; return false;
/* then wait for GPU to finish: */ /* then wait for GPU to finish: */
...@@ -534,6 +534,7 @@ static const struct adreno_gpu_funcs funcs = { ...@@ -534,6 +534,7 @@ static const struct adreno_gpu_funcs funcs = {
.recover = a4xx_recover, .recover = a4xx_recover,
.submit = adreno_submit, .submit = adreno_submit,
.flush = adreno_flush, .flush = adreno_flush,
.active_ring = adreno_active_ring,
.irq = a4xx_irq, .irq = a4xx_irq,
.destroy = a4xx_destroy, .destroy = a4xx_destroy,
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -573,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) ...@@ -573,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a4xx_registers; adreno_gpu->registers = a4xx_registers;
adreno_gpu->reg_offsets = a4xx_register_offsets; adreno_gpu->reg_offsets = a4xx_register_offsets;
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) if (ret)
goto fail; goto fail;
......
...@@ -117,7 +117,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -117,7 +117,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx) struct msm_file_private *ctx)
{ {
struct msm_drm_private *priv = gpu->dev->dev_private; struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0; unsigned int i, ibs = 0;
for (i = 0; i < submit->nr_cmds; i++) { for (i = 0; i < submit->nr_cmds; i++) {
...@@ -138,15 +138,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -138,15 +138,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
} }
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->fence->seqno); OUT_RING(ring, submit->seqno);
OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
OUT_RING(ring, lower_32_bits(rbmemptr(gpu, fence))); OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(gpu, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->fence->seqno); OUT_RING(ring, submit->seqno);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
} }
static const struct { static const struct {
...@@ -262,7 +262,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) ...@@ -262,7 +262,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
static int a5xx_me_init(struct msm_gpu *gpu) static int a5xx_me_init(struct msm_gpu *gpu)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT7(ring, CP_ME_INIT, 8); OUT_PKT7(ring, CP_ME_INIT, 8);
...@@ -293,9 +293,8 @@ static int a5xx_me_init(struct msm_gpu *gpu) ...@@ -293,9 +293,8 @@ static int a5xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
return a5xx_idle(gpu) ? 0 : -EINVAL;
} }
static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
...@@ -581,11 +580,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) ...@@ -581,11 +580,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
* ticking correctly * ticking correctly
*/ */
if (adreno_is_a530(adreno_gpu)) { if (adreno_is_a530(adreno_gpu)) {
OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
OUT_RING(gpu->rb, 0x0F); OUT_RING(gpu->rb[0], 0x0F);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, gpu->rb[0]);
if (!a5xx_idle(gpu)) if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL; return -EINVAL;
} }
...@@ -598,11 +597,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) ...@@ -598,11 +597,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
*/ */
ret = a5xx_zap_shader_init(gpu); ret = a5xx_zap_shader_init(gpu);
if (!ret) { if (!ret) {
OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
OUT_RING(gpu->rb, 0x00000000); OUT_RING(gpu->rb[0], 0x00000000);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, gpu->rb[0]);
if (!a5xx_idle(gpu)) if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL; return -EINVAL;
} else { } else {
/* Print a warning so if we die, we know why */ /* Print a warning so if we die, we know why */
...@@ -676,18 +675,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) ...@@ -676,18 +675,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
} }
bool a5xx_idle(struct msm_gpu *gpu) bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{ {
/* wait for CP to drain ringbuffer: */ /* wait for CP to drain ringbuffer: */
if (!adreno_idle(gpu)) if (!adreno_idle(gpu, ring))
return false; return false;
if (spin_until(_a5xx_check_idle(gpu))) { if (spin_until(_a5xx_check_idle(gpu))) {
DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
gpu->name, __builtin_return_address(0), gpu->name, __builtin_return_address(0),
gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
return false; return false;
} }
...@@ -818,9 +818,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) ...@@ -818,9 +818,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
{ {
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private; struct msm_drm_private *priv = dev->dev_private;
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
gpu->memptrs->fence, ring ? ring->id : -1, ring ? ring->seqno : 0,
gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
...@@ -1010,6 +1011,7 @@ static const struct adreno_gpu_funcs funcs = { ...@@ -1010,6 +1011,7 @@ static const struct adreno_gpu_funcs funcs = {
.recover = a5xx_recover, .recover = a5xx_recover,
.submit = a5xx_submit, .submit = a5xx_submit,
.flush = adreno_flush, .flush = adreno_flush,
.active_ring = adreno_active_ring,
.irq = a5xx_irq, .irq = a5xx_irq,
.destroy = a5xx_destroy, .destroy = a5xx_destroy,
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -1045,7 +1047,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) ...@@ -1045,7 +1047,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
a5xx_gpu->lm_leakage = 0x4E001A; a5xx_gpu->lm_leakage = 0x4E001A;
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) { if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base)); a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret); return ERR_PTR(ret);
......
...@@ -55,7 +55,7 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, ...@@ -55,7 +55,7 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
return -ETIMEDOUT; return -ETIMEDOUT;
} }
bool a5xx_idle(struct msm_gpu *gpu); bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
#endif /* __A5XX_GPU_H__ */ #endif /* __A5XX_GPU_H__ */
...@@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) ...@@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = gpu->rb[0];
if (!a5xx_gpu->gpmu_dwords) if (!a5xx_gpu->gpmu_dwords)
return 0; return 0;
...@@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) ...@@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1); OUT_RING(ring, 1);
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
if (!a5xx_idle(gpu)) { if (!a5xx_idle(gpu, ring)) {
DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n",
gpu->name); gpu->name);
return -EINVAL; return -EINVAL;
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "msm_gem.h" #include "msm_gem.h"
#include "msm_mmu.h" #include "msm_mmu.h"
#define RB_SIZE SZ_32K
#define RB_BLKSIZE 32 #define RB_BLKSIZE 32
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
...@@ -163,7 +162,7 @@ static int adreno_load_fw(struct adreno_gpu *adreno_gpu) ...@@ -163,7 +162,7 @@ static int adreno_load_fw(struct adreno_gpu *adreno_gpu)
int adreno_hw_init(struct msm_gpu *gpu) int adreno_hw_init(struct msm_gpu *gpu)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret; int ret, i;
DBG("%s", gpu->name); DBG("%s", gpu->name);
...@@ -171,34 +170,42 @@ int adreno_hw_init(struct msm_gpu *gpu) ...@@ -171,34 +170,42 @@ int adreno_hw_init(struct msm_gpu *gpu)
if (ret) if (ret)
return ret; return ret;
ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); for (i = 0; i < gpu->nr_rings; i++) {
struct msm_ringbuffer *ring = gpu->rb[i];
if (!ring)
continue;
ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova);
if (ret) { if (ret) {
gpu->rb_iova = 0; ring->iova = 0;
dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); dev_err(gpu->dev->dev,
"could not map ringbuffer %d: %d\n", i, ret);
return ret; return ret;
} }
/* reset ringbuffer: */ ring->cur = ring->start;
gpu->rb->cur = gpu->rb->start;
/* reset completed fence seqno: */ /* reset completed fence seqno: */
gpu->memptrs->fence = gpu->fctx->completed_fence; ring->memptrs->fence = ring->seqno;
gpu->memptrs->rptr = 0; ring->memptrs->rptr = 0;
}
/* Setup REG_CP_RB_CNTL: */ /* Setup REG_CP_RB_CNTL: */
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
/* size is log2(quad-words): */ /* size is log2(quad-words): */
AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) |
AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
(adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
/* Setup ringbuffer address: */ /* Setup ringbuffer address - use ringbuffer[0] for GPU init */
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova);
if (!adreno_is_a430(adreno_gpu)) { if (!adreno_is_a430(adreno_gpu)) {
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(gpu, rptr)); REG_ADRENO_CP_RB_RPTR_ADDR_HI,
rbmemptr(gpu->rb[0], rptr));
} }
return 0; return 0;
...@@ -210,15 +217,19 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) ...@@ -210,15 +217,19 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring)
} }
/* Use this helper to read rptr, since a430 doesn't update rptr in memory */ /* Use this helper to read rptr, since a430 doesn't update rptr in memory */
static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
struct msm_ringbuffer *ring)
{ {
struct msm_gpu *gpu = &adreno_gpu->base;
if (adreno_is_a430(adreno_gpu)) if (adreno_is_a430(adreno_gpu))
return gpu->memptrs->rptr = adreno_gpu_read( return ring->memptrs->rptr = adreno_gpu_read(
adreno_gpu, REG_ADRENO_CP_RB_RPTR); adreno_gpu, REG_ADRENO_CP_RB_RPTR);
else else
return gpu->memptrs->rptr; return ring->memptrs->rptr;
}
struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
{
return gpu->rb[0];
} }
void adreno_recover(struct msm_gpu *gpu) void adreno_recover(struct msm_gpu *gpu)
...@@ -244,7 +255,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -244,7 +255,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct msm_drm_private *priv = gpu->dev->dev_private; struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = gpu->rb; struct msm_ringbuffer *ring = submit->ring;
unsigned i; unsigned i;
for (i = 0; i < submit->nr_cmds; i++) { for (i = 0; i < submit->nr_cmds; i++) {
...@@ -267,7 +278,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -267,7 +278,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
} }
OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
OUT_RING(ring, submit->fence->seqno); OUT_RING(ring, submit->seqno);
if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
/* Flush HLSQ lazy updates to make sure there is nothing /* Flush HLSQ lazy updates to make sure there is nothing
...@@ -283,8 +294,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -283,8 +294,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_PKT3(ring, CP_EVENT_WRITE, 3);
OUT_RING(ring, CACHE_FLUSH_TS); OUT_RING(ring, CACHE_FLUSH_TS);
OUT_RING(ring, rbmemptr(gpu, fence)); OUT_RING(ring, rbmemptr(ring, fence));
OUT_RING(ring, submit->fence->seqno); OUT_RING(ring, submit->seqno);
/* we could maybe be clever and only CP_COND_EXEC the interrupt: */ /* we could maybe be clever and only CP_COND_EXEC the interrupt: */
OUT_PKT3(ring, CP_INTERRUPT, 1); OUT_PKT3(ring, CP_INTERRUPT, 1);
...@@ -310,10 +321,10 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -310,10 +321,10 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
} }
#endif #endif
gpu->funcs->flush(gpu); gpu->funcs->flush(gpu, ring);
} }
void adreno_flush(struct msm_gpu *gpu) void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
uint32_t wptr; uint32_t wptr;
...@@ -323,7 +334,7 @@ void adreno_flush(struct msm_gpu *gpu) ...@@ -323,7 +334,7 @@ void adreno_flush(struct msm_gpu *gpu)
* to account for the possibility that the last command fit exactly into * to account for the possibility that the last command fit exactly into
* the ringbuffer and rb->next hasn't wrapped to zero yet * the ringbuffer and rb->next hasn't wrapped to zero yet
*/ */
wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2);
/* ensure writes to ringbuffer have hit system memory: */ /* ensure writes to ringbuffer have hit system memory: */
mb(); mb();
...@@ -331,17 +342,18 @@ void adreno_flush(struct msm_gpu *gpu) ...@@ -331,17 +342,18 @@ void adreno_flush(struct msm_gpu *gpu)
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr);
} }
bool adreno_idle(struct msm_gpu *gpu) bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
uint32_t wptr = get_wptr(gpu->rb); uint32_t wptr = get_wptr(ring);
/* wait for CP to drain ringbuffer: */ /* wait for CP to drain ringbuffer: */
if (!spin_until(get_rptr(adreno_gpu) == wptr)) if (!spin_until(get_rptr(adreno_gpu, ring) == wptr))
return true; return true;
/* TODO maybe we need to reset GPU here to recover from hang? */ /* TODO maybe we need to reset GPU here to recover from hang? */
DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name,
ring->id);
return false; return false;
} }
...@@ -356,10 +368,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) ...@@ -356,10 +368,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid); adreno_gpu->rev.patchid);
seq_printf(m, "fence: %d/%d\n", gpu->memptrs->fence, for (i = 0; i < gpu->nr_rings; i++) {
gpu->fctx->last_fence); struct msm_ringbuffer *ring = gpu->rb[i];
seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu));
seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); seq_printf(m, "rb %d: fence: %d/%d\n", i,
ring->memptrs->fence, ring->seqno);
seq_printf(m, " rptr: %d\n",
get_rptr(adreno_gpu, ring));
seq_printf(m, "rb wptr: %d\n", get_wptr(ring));
}
/* dump these out in a form that can be parsed by demsm: */ /* dump these out in a form that can be parsed by demsm: */
seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
...@@ -385,16 +403,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) ...@@ -385,16 +403,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
void adreno_dump_info(struct msm_gpu *gpu) void adreno_dump_info(struct msm_gpu *gpu)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i;
printk("revision: %d (%d.%d.%d.%d)\n", printk("revision: %d (%d.%d.%d.%d)\n",
adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->info->revn, adreno_gpu->rev.core,
adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid); adreno_gpu->rev.patchid);
printk("fence: %d/%d\n", gpu->memptrs->fence, for (i = 0; i < gpu->nr_rings; i++) {
gpu->fctx->last_fence); struct msm_ringbuffer *ring = gpu->rb[i];
printk("rptr: %d\n", get_rptr(adreno_gpu));
printk("rb wptr: %d\n", get_wptr(gpu->rb)); printk("rb %d: fence: %d/%d\n", i,
ring->memptrs->fence,
ring->seqno);
printk("rptr: %d\n", get_rptr(adreno_gpu, ring));
printk("rb wptr: %d\n", get_wptr(ring));
}
} }
/* would be nice to not have to duplicate the _show() stuff with printk(): */ /* would be nice to not have to duplicate the _show() stuff with printk(): */
...@@ -417,23 +442,26 @@ void adreno_dump(struct msm_gpu *gpu) ...@@ -417,23 +442,26 @@ void adreno_dump(struct msm_gpu *gpu)
} }
} }
static uint32_t ring_freewords(struct msm_gpu *gpu) static uint32_t ring_freewords(struct msm_ringbuffer *ring)
{ {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
uint32_t size = gpu->rb->size / 4; uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
uint32_t wptr = get_wptr(gpu->rb); uint32_t wptr = get_wptr(ring);
uint32_t rptr = get_rptr(adreno_gpu); uint32_t rptr = get_rptr(adreno_gpu, ring);
return (rptr + (size - 1) - wptr) % size; return (rptr + (size - 1) - wptr) % size;
} }
void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
{ {
if (spin_until(ring_freewords(gpu) >= ndwords)) if (spin_until(ring_freewords(ring) >= ndwords))
DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); DRM_DEV_ERROR(ring->gpu->dev->dev,
"timeout waiting for space in ringubffer %d\n",
ring->id);
} }
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) struct adreno_gpu *adreno_gpu,
const struct adreno_gpu_funcs *funcs, int nr_rings)
{ {
struct adreno_platform_config *config = pdev->dev.platform_data; struct adreno_platform_config *config = pdev->dev.platform_data;
struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu_config adreno_gpu_config = { 0 };
...@@ -460,7 +488,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -460,7 +488,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_start = SZ_16M;
adreno_gpu_config.va_end = 0xffffffff; adreno_gpu_config.va_end = 0xffffffff;
adreno_gpu_config.ringsz = RB_SIZE; adreno_gpu_config.nr_rings = nr_rings;
pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_use_autosuspend(&pdev->dev);
......
...@@ -208,17 +208,19 @@ int adreno_hw_init(struct msm_gpu *gpu); ...@@ -208,17 +208,19 @@ int adreno_hw_init(struct msm_gpu *gpu);
void adreno_recover(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu);
void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx); struct msm_file_private *ctx);
void adreno_flush(struct msm_gpu *gpu); void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
bool adreno_idle(struct msm_gpu *gpu); bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
void adreno_show(struct msm_gpu *gpu, struct seq_file *m); void adreno_show(struct msm_gpu *gpu, struct seq_file *m);
#endif #endif
void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump_info(struct msm_gpu *gpu);
void adreno_dump(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu);
void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords);
struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu);
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
int nr_rings);
void adreno_gpu_cleanup(struct adreno_gpu *gpu); void adreno_gpu_cleanup(struct adreno_gpu *gpu);
...@@ -227,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); ...@@ -227,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu);
static inline void static inline void
OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{ {
adreno_wait_ring(ring->gpu, cnt+1); adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
} }
...@@ -235,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) ...@@ -235,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
static inline void static inline void
OUT_PKT2(struct msm_ringbuffer *ring) OUT_PKT2(struct msm_ringbuffer *ring)
{ {
adreno_wait_ring(ring->gpu, 1); adreno_wait_ring(ring, 1);
OUT_RING(ring, CP_TYPE2_PKT); OUT_RING(ring, CP_TYPE2_PKT);
} }
static inline void static inline void
OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{ {
adreno_wait_ring(ring->gpu, cnt+1); adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
} }
...@@ -264,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) ...@@ -264,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val)
static inline void static inline void
OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{ {
adreno_wait_ring(ring->gpu, cnt + 1); adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, PKT4(regindx, cnt)); OUT_RING(ring, PKT4(regindx, cnt));
} }
static inline void static inline void
OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{ {
adreno_wait_ring(ring->gpu, cnt + 1); adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
} }
......
...@@ -507,7 +507,7 @@ static void load_gpu(struct drm_device *dev) ...@@ -507,7 +507,7 @@ static void load_gpu(struct drm_device *dev)
mutex_unlock(&init_lock); mutex_unlock(&init_lock);
} }
static int context_init(struct drm_file *file) static int context_init(struct drm_device *dev, struct drm_file *file)
{ {
struct msm_file_private *ctx; struct msm_file_private *ctx;
...@@ -515,7 +515,7 @@ static int context_init(struct drm_file *file) ...@@ -515,7 +515,7 @@ static int context_init(struct drm_file *file)
if (!ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
msm_submitqueue_init(ctx); msm_submitqueue_init(dev, ctx);
file->driver_priv = ctx; file->driver_priv = ctx;
...@@ -529,7 +529,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file) ...@@ -529,7 +529,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
*/ */
load_gpu(dev); load_gpu(dev);
return context_init(file); return context_init(dev, file);
} }
static void context_close(struct msm_file_private *ctx) static void context_close(struct msm_file_private *ctx)
...@@ -743,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, ...@@ -743,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
struct msm_drm_private *priv = dev->dev_private; struct msm_drm_private *priv = dev->dev_private;
struct drm_msm_wait_fence *args = data; struct drm_msm_wait_fence *args = data;
ktime_t timeout = to_ktime(args->timeout); ktime_t timeout = to_ktime(args->timeout);
struct msm_gpu_submitqueue *queue;
struct msm_gpu *gpu = priv->gpu;
int ret;
if (args->pad) { if (args->pad) {
DRM_ERROR("invalid pad: %08x\n", args->pad); DRM_ERROR("invalid pad: %08x\n", args->pad);
return -EINVAL; return -EINVAL;
} }
if (!priv->gpu) if (!gpu)
return 0; return 0;
return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); queue = msm_submitqueue_get(file->driver_priv, args->queueid);
if (!queue)
return -ENOENT;
ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout,
true);
msm_submitqueue_put(queue);
return ret;
} }
static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
...@@ -802,7 +813,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, ...@@ -802,7 +813,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) if (args->flags & ~MSM_SUBMITQUEUE_FLAGS)
return -EINVAL; return -EINVAL;
return msm_submitqueue_create(file->driver_priv, args->prio, return msm_submitqueue_create(dev, file->driver_priv, args->prio,
args->flags, &args->id); args->flags, &args->id);
} }
......
...@@ -74,6 +74,8 @@ struct msm_vblank_ctrl { ...@@ -74,6 +74,8 @@ struct msm_vblank_ctrl {
spinlock_t lock; spinlock_t lock;
}; };
#define MSM_GPU_MAX_RINGS 1
struct msm_drm_private { struct msm_drm_private {
struct drm_device *dev; struct drm_device *dev;
...@@ -318,11 +320,11 @@ void msm_writel(u32 data, void __iomem *addr); ...@@ -318,11 +320,11 @@ void msm_writel(u32 data, void __iomem *addr);
u32 msm_readl(const void __iomem *addr); u32 msm_readl(const void __iomem *addr);
struct msm_gpu_submitqueue; struct msm_gpu_submitqueue;
int msm_submitqueue_init(struct msm_file_private *ctx); int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
u32 id); u32 id);
int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
u32 flags, u32 *id); u32 prio, u32 flags, u32 *id);
int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
void msm_submitqueue_close(struct msm_file_private *ctx); void msm_submitqueue_close(struct msm_file_private *ctx);
......
...@@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) ...@@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
fctx->dev = dev; fctx->dev = dev;
fctx->name = name; strncpy(fctx->name, name, sizeof(fctx->name));
fctx->context = dma_fence_context_alloc(1); fctx->context = dma_fence_context_alloc(1);
init_waitqueue_head(&fctx->event); init_waitqueue_head(&fctx->event);
spin_lock_init(&fctx->spinlock); spin_lock_init(&fctx->spinlock);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
struct msm_fence_context { struct msm_fence_context {
struct drm_device *dev; struct drm_device *dev;
const char *name; char name[32];
unsigned context; unsigned context;
/* last_fence == completed_fence --> no pending work */ /* last_fence == completed_fence --> no pending work */
uint32_t last_fence; /* last assigned fence */ uint32_t last_fence; /* last assigned fence */
......
...@@ -138,13 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); ...@@ -138,13 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass);
struct msm_gem_submit { struct msm_gem_submit {
struct drm_device *dev; struct drm_device *dev;
struct msm_gpu *gpu; struct msm_gpu *gpu;
struct list_head node; /* node in gpu submit_list */ struct list_head node; /* node in ring submit list */
struct list_head bo_list; struct list_head bo_list;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
uint32_t seqno; /* Sequence number of the submit on the ring */
struct dma_fence *fence; struct dma_fence *fence;
struct msm_gpu_submitqueue *queue; struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */ struct pid *pid; /* submitting process */
bool valid; /* true if no cmdstream patching needed */ bool valid; /* true if no cmdstream patching needed */
struct msm_ringbuffer *ring;
unsigned int nr_cmds; unsigned int nr_cmds;
unsigned int nr_bos; unsigned int nr_bos;
struct { struct {
......
...@@ -51,6 +51,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, ...@@ -51,6 +51,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
submit->pid = get_pid(task_pid(current)); submit->pid = get_pid(task_pid(current));
submit->cmd = (void *)&submit->bos[nr_bos]; submit->cmd = (void *)&submit->bos[nr_bos];
submit->queue = queue; submit->queue = queue;
submit->ring = gpu->rb[queue->prio];
/* initially, until copy_from_user() and bo lookup succeeds: */ /* initially, until copy_from_user() and bo lookup succeeds: */
submit->nr_bos = 0; submit->nr_bos = 0;
...@@ -247,7 +248,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) ...@@ -247,7 +248,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
if (no_implicit) if (no_implicit)
continue; continue;
ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx,
write);
if (ret) if (ret)
break; break;
} }
...@@ -410,6 +412,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, ...@@ -410,6 +412,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct dma_fence *in_fence = NULL; struct dma_fence *in_fence = NULL;
struct sync_file *sync_file = NULL; struct sync_file *sync_file = NULL;
struct msm_gpu_submitqueue *queue; struct msm_gpu_submitqueue *queue;
struct msm_ringbuffer *ring;
int out_fence_fd = -1; int out_fence_fd = -1;
unsigned i; unsigned i;
int ret; int ret;
...@@ -430,6 +433,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, ...@@ -430,6 +433,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
if (!queue) if (!queue)
return -ENOENT; return -ENOENT;
ring = gpu->rb[queue->prio];
if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
in_fence = sync_file_get_fence(args->fence_fd); in_fence = sync_file_get_fence(args->fence_fd);
...@@ -440,7 +445,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, ...@@ -440,7 +445,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
* Wait if the fence is from a foreign context, or if the fence * Wait if the fence is from a foreign context, or if the fence
* array contains any fence from a foreign context. * array contains any fence from a foreign context.
*/ */
if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { if (!dma_fence_match_context(in_fence, ring->fctx->context)) {
ret = dma_fence_wait(in_fence, true); ret = dma_fence_wait(in_fence, true);
if (ret) if (ret)
return ret; return ret;
...@@ -543,8 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, ...@@ -543,8 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
submit->nr_cmds = i; submit->nr_cmds = i;
submit->fence = msm_fence_alloc(gpu->fctx); submit->fence = msm_fence_alloc(ring->fctx);
if (IS_ERR(submit->fence)) { if (IS_ERR(submit->fence)) {
ret = PTR_ERR(submit->fence); ret = PTR_ERR(submit->fence);
submit->fence = NULL; submit->fence = NULL;
......
...@@ -221,6 +221,20 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) ...@@ -221,6 +221,20 @@ int msm_gpu_hw_init(struct msm_gpu *gpu)
* Hangcheck detection for locked gpu: * Hangcheck detection for locked gpu:
*/ */
static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
uint32_t fence)
{
struct msm_gem_submit *submit;
list_for_each_entry(submit, &ring->submits, node) {
if (submit->seqno > fence)
break;
msm_update_fence(submit->ring->fctx,
submit->fence->seqno);
}
}
static void retire_submits(struct msm_gpu *gpu); static void retire_submits(struct msm_gpu *gpu);
static void recover_worker(struct work_struct *work) static void recover_worker(struct work_struct *work)
...@@ -228,15 +242,34 @@ static void recover_worker(struct work_struct *work) ...@@ -228,15 +242,34 @@ static void recover_worker(struct work_struct *work)
struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_gem_submit *submit; struct msm_gem_submit *submit;
uint32_t fence = gpu->memptrs->fence; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
uint64_t fence;
int i;
msm_update_fence(gpu->fctx, fence + 1); /* Update all the rings with the latest and greatest fence */
for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
struct msm_ringbuffer *ring = gpu->rb[i];
fence = ring->memptrs->fence;
/*
* For the current (faulting?) ring/submit advance the fence by
* one more to clear the faulting submit
*/
if (ring == cur_ring)
fence = fence + 1;
update_fences(gpu, ring, fence);
}
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
list_for_each_entry(submit, &gpu->submit_list, node) { fence = cur_ring->memptrs->fence + 1;
if (submit->fence->seqno == (fence + 1)) {
list_for_each_entry(submit, &cur_ring->submits, node) {
if (submit->seqno == fence) {
struct task_struct *task; struct task_struct *task;
rcu_read_lock(); rcu_read_lock();
...@@ -258,8 +291,15 @@ static void recover_worker(struct work_struct *work) ...@@ -258,8 +291,15 @@ static void recover_worker(struct work_struct *work)
gpu->funcs->recover(gpu); gpu->funcs->recover(gpu);
pm_runtime_put_sync(&gpu->pdev->dev); pm_runtime_put_sync(&gpu->pdev->dev);
/* replay the remaining submits after the one that hung: */ /*
list_for_each_entry(submit, &gpu->submit_list, node) { * Replay all remaining submits starting with highest priority
* ring
*/
for (i = gpu->nr_rings - 1; i >= 0; i--) {
struct msm_ringbuffer *ring = gpu->rb[i];
list_for_each_entry(submit, &ring->submits, node)
gpu->funcs->submit(gpu, submit, NULL); gpu->funcs->submit(gpu, submit, NULL);
} }
} }
...@@ -281,25 +321,27 @@ static void hangcheck_handler(unsigned long data) ...@@ -281,25 +321,27 @@ static void hangcheck_handler(unsigned long data)
struct msm_gpu *gpu = (struct msm_gpu *)data; struct msm_gpu *gpu = (struct msm_gpu *)data;
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private; struct msm_drm_private *priv = dev->dev_private;
uint32_t fence = gpu->memptrs->fence; struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
uint32_t fence = ring->memptrs->fence;
if (fence != gpu->hangcheck_fence) { if (fence != ring->hangcheck_fence) {
/* some progress has been made.. ya! */ /* some progress has been made.. ya! */
gpu->hangcheck_fence = fence; ring->hangcheck_fence = fence;
} else if (fence < gpu->fctx->last_fence) { } else if (fence < ring->seqno) {
/* no progress and not done.. hung! */ /* no progress and not done.. hung! */
gpu->hangcheck_fence = fence; ring->hangcheck_fence = fence;
dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
gpu->name); gpu->name, ring->id);
dev_err(dev->dev, "%s: completed fence: %u\n", dev_err(dev->dev, "%s: completed fence: %u\n",
gpu->name, fence); gpu->name, fence);
dev_err(dev->dev, "%s: submitted fence: %u\n", dev_err(dev->dev, "%s: submitted fence: %u\n",
gpu->name, gpu->fctx->last_fence); gpu->name, ring->seqno);
queue_work(priv->wq, &gpu->recover_work); queue_work(priv->wq, &gpu->recover_work);
} }
/* if still more pending work, reset the hangcheck timer: */ /* if still more pending work, reset the hangcheck timer: */
if (gpu->fctx->last_fence > gpu->hangcheck_fence) if (ring->seqno > ring->hangcheck_fence)
hangcheck_timer_reset(gpu); hangcheck_timer_reset(gpu);
/* workaround for missing irq: */ /* workaround for missing irq: */
...@@ -428,19 +470,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) ...@@ -428,19 +470,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
static void retire_submits(struct msm_gpu *gpu) static void retire_submits(struct msm_gpu *gpu)
{ {
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_gem_submit *submit, *tmp;
int i;
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); WARN_ON(!mutex_is_locked(&dev->struct_mutex));
while (!list_empty(&gpu->submit_list)) { /* Retire the commits starting with highest priority */
struct msm_gem_submit *submit; for (i = gpu->nr_rings - 1; i >= 0; i--) {
struct msm_ringbuffer *ring = gpu->rb[i];
submit = list_first_entry(&gpu->submit_list, list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
struct msm_gem_submit, node); if (dma_fence_is_signaled(submit->fence))
if (dma_fence_is_signaled(submit->fence)) {
retire_submit(gpu, submit); retire_submit(gpu, submit);
} else {
break;
} }
} }
} }
...@@ -449,9 +490,10 @@ static void retire_worker(struct work_struct *work) ...@@ -449,9 +490,10 @@ static void retire_worker(struct work_struct *work)
{ {
struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
uint32_t fence = gpu->memptrs->fence; int i;
msm_update_fence(gpu->fctx, fence); for (i = 0; i < gpu->nr_rings; i++)
update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
retire_submits(gpu); retire_submits(gpu);
...@@ -472,6 +514,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -472,6 +514,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
{ {
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private; struct msm_drm_private *priv = dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
int i; int i;
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); WARN_ON(!mutex_is_locked(&dev->struct_mutex));
...@@ -480,7 +523,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, ...@@ -480,7 +523,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
msm_gpu_hw_init(gpu); msm_gpu_hw_init(gpu);
list_add_tail(&submit->node, &gpu->submit_list); submit->seqno = ++ring->seqno;
list_add_tail(&submit->node, &ring->submits);
msm_rd_dump_submit(submit); msm_rd_dump_submit(submit);
...@@ -605,7 +650,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -605,7 +650,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
const char *name, struct msm_gpu_config *config) const char *name, struct msm_gpu_config *config)
{ {
int ret; int i, ret, nr_rings = config->nr_rings;
void *memptrs;
uint64_t memptrs_iova;
if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
...@@ -613,18 +660,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -613,18 +660,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
gpu->dev = drm; gpu->dev = drm;
gpu->funcs = funcs; gpu->funcs = funcs;
gpu->name = name; gpu->name = name;
gpu->fctx = msm_fence_context_alloc(drm, name);
if (IS_ERR(gpu->fctx)) {
ret = PTR_ERR(gpu->fctx);
gpu->fctx = NULL;
goto fail;
}
INIT_LIST_HEAD(&gpu->active_list); INIT_LIST_HEAD(&gpu->active_list);
INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->retire_work, retire_worker);
INIT_WORK(&gpu->recover_work, recover_worker); INIT_WORK(&gpu->recover_work, recover_worker);
INIT_LIST_HEAD(&gpu->submit_list);
setup_timer(&gpu->hangcheck_timer, hangcheck_handler, setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
(unsigned long)gpu); (unsigned long)gpu);
...@@ -689,29 +729,47 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -689,29 +729,47 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
goto fail; goto fail;
} }
gpu->memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo),
MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
&gpu->memptrs_iova); &memptrs_iova);
if (IS_ERR(gpu->memptrs)) { if (IS_ERR(memptrs)) {
ret = PTR_ERR(gpu->memptrs); ret = PTR_ERR(memptrs);
gpu->memptrs = NULL;
dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
goto fail; goto fail;
} }
/* Create ringbuffer: */ if (nr_rings > ARRAY_SIZE(gpu->rb)) {
gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); DRM_DEV_INFO_ONCE(drm->dev, "Only creating %lu ringbuffers\n",
if (IS_ERR(gpu->rb)) { ARRAY_SIZE(gpu->rb));
ret = PTR_ERR(gpu->rb); nr_rings = ARRAY_SIZE(gpu->rb);
gpu->rb = NULL; }
dev_err(drm->dev, "could not create ringbuffer: %d\n", ret);
/* Create ringbuffer(s): */
for (i = 0; i < nr_rings; i++) {
gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);
if (IS_ERR(gpu->rb[i])) {
ret = PTR_ERR(gpu->rb[i]);
dev_err(drm->dev,
"could not create ringbuffer %d: %d\n", i, ret);
goto fail; goto fail;
} }
memptrs += sizeof(struct msm_rbmemptrs);
memptrs_iova += sizeof(struct msm_rbmemptrs);
}
gpu->nr_rings = nr_rings;
return 0; return 0;
fail: fail:
for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
msm_ringbuffer_destroy(gpu->rb[i]);
gpu->rb[i] = NULL;
}
if (gpu->memptrs_bo) { if (gpu->memptrs_bo) {
msm_gem_put_vaddr(gpu->memptrs_bo); msm_gem_put_vaddr(gpu->memptrs_bo);
msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
...@@ -724,16 +782,17 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -724,16 +782,17 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
void msm_gpu_cleanup(struct msm_gpu *gpu) void msm_gpu_cleanup(struct msm_gpu *gpu)
{ {
int i;
DBG("%s", gpu->name); DBG("%s", gpu->name);
WARN_ON(!list_empty(&gpu->active_list)); WARN_ON(!list_empty(&gpu->active_list));
bs_fini(gpu); bs_fini(gpu);
if (gpu->rb) { for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
if (gpu->rb_iova) msm_ringbuffer_destroy(gpu->rb[i]);
msm_gem_put_iova(gpu->rb->bo, gpu->aspace); gpu->rb[i] = NULL;
msm_ringbuffer_destroy(gpu->rb);
} }
if (gpu->memptrs_bo) { if (gpu->memptrs_bo) {
......
...@@ -33,7 +33,7 @@ struct msm_gpu_config { ...@@ -33,7 +33,7 @@ struct msm_gpu_config {
const char *irqname; const char *irqname;
uint64_t va_start; uint64_t va_start;
uint64_t va_end; uint64_t va_end;
unsigned int ringsz; unsigned int nr_rings;
}; };
/* So far, with hardware that I've seen to date, we can have: /* So far, with hardware that I've seen to date, we can have:
...@@ -57,8 +57,9 @@ struct msm_gpu_funcs { ...@@ -57,8 +57,9 @@ struct msm_gpu_funcs {
int (*pm_resume)(struct msm_gpu *gpu); int (*pm_resume)(struct msm_gpu *gpu);
void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx); struct msm_file_private *ctx);
void (*flush)(struct msm_gpu *gpu); void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
irqreturn_t (*irq)(struct msm_gpu *irq); irqreturn_t (*irq)(struct msm_gpu *irq);
struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
void (*recover)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu);
void (*destroy)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu);
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
...@@ -67,14 +68,6 @@ struct msm_gpu_funcs { ...@@ -67,14 +68,6 @@ struct msm_gpu_funcs {
#endif #endif
}; };
#define rbmemptr(gpu, member) \
((gpu)->memptrs_iova + offsetof(struct msm_rbmemptrs, member))
struct msm_rbmemptrs {
volatile uint32_t rptr;
volatile uint32_t fence;
};
struct msm_gpu { struct msm_gpu {
const char *name; const char *name;
struct drm_device *dev; struct drm_device *dev;
...@@ -93,16 +86,12 @@ struct msm_gpu { ...@@ -93,16 +86,12 @@ struct msm_gpu {
const struct msm_gpu_perfcntr *perfcntrs; const struct msm_gpu_perfcntr *perfcntrs;
uint32_t num_perfcntrs; uint32_t num_perfcntrs;
/* ringbuffer: */ struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
struct msm_ringbuffer *rb; int nr_rings;
uint64_t rb_iova;
/* list of GEM active objects: */ /* list of GEM active objects: */
struct list_head active_list; struct list_head active_list;
/* fencing: */
struct msm_fence_context *fctx;
/* does gpu need hw_init? */ /* does gpu need hw_init? */
bool needs_hw_init; bool needs_hw_init;
...@@ -133,21 +122,26 @@ struct msm_gpu { ...@@ -133,21 +122,26 @@ struct msm_gpu {
#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
struct timer_list hangcheck_timer; struct timer_list hangcheck_timer;
uint32_t hangcheck_fence;
struct work_struct recover_work; struct work_struct recover_work;
struct list_head submit_list;
struct msm_rbmemptrs *memptrs;
struct drm_gem_object *memptrs_bo; struct drm_gem_object *memptrs_bo;
uint64_t memptrs_iova;
}; };
/* It turns out that all targets use the same ringbuffer size */
#define MSM_GPU_RINGBUFFER_SZ SZ_32K
static inline bool msm_gpu_active(struct msm_gpu *gpu) static inline bool msm_gpu_active(struct msm_gpu *gpu)
{ {
return gpu->fctx->last_fence > gpu->memptrs->fence; int i;
for (i = 0; i < gpu->nr_rings; i++) {
struct msm_ringbuffer *ring = gpu->rb[i];
if (ring->seqno > ring->memptrs->fence)
return true;
}
return false;
} }
/* Perf-Counters: /* Perf-Counters:
......
...@@ -18,13 +18,15 @@ ...@@ -18,13 +18,15 @@
#include "msm_ringbuffer.h" #include "msm_ringbuffer.h"
#include "msm_gpu.h" #include "msm_gpu.h"
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
void *memptrs, uint64_t memptrs_iova)
{ {
struct msm_ringbuffer *ring; struct msm_ringbuffer *ring;
char name[32];
int ret; int ret;
if (WARN_ON(!is_power_of_2(size))) /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */
return ERR_PTR(-EINVAL); BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ));
ring = kzalloc(sizeof(*ring), GFP_KERNEL); ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring) { if (!ring) {
...@@ -33,32 +35,44 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) ...@@ -33,32 +35,44 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
} }
ring->gpu = gpu; ring->gpu = gpu;
ring->id = id;
/* Pass NULL for the iova pointer - we will map it later */ /* Pass NULL for the iova pointer - we will map it later */
ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ,
gpu->aspace, &ring->bo, NULL); MSM_BO_WC, gpu->aspace, &ring->bo, NULL);
if (IS_ERR(ring->start)) { if (IS_ERR(ring->start)) {
ret = PTR_ERR(ring->start); ret = PTR_ERR(ring->start);
ring->start = 0; ring->start = 0;
goto fail; goto fail;
} }
ring->end = ring->start + (size / 4); ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2);
ring->cur = ring->start; ring->cur = ring->start;
ring->size = size; ring->memptrs = memptrs;
ring->memptrs_iova = memptrs_iova;
INIT_LIST_HEAD(&ring->submits);
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
ring->fctx = msm_fence_context_alloc(gpu->dev, name);
return ring; return ring;
fail: fail:
if (ring)
msm_ringbuffer_destroy(ring); msm_ringbuffer_destroy(ring);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
{ {
if (IS_ERR_OR_NULL(ring))
return;
msm_fence_context_free(ring->fctx);
if (ring->bo) { if (ring->bo) {
msm_gem_put_iova(ring->bo, ring->gpu->aspace);
msm_gem_put_vaddr(ring->bo); msm_gem_put_vaddr(ring->bo);
drm_gem_object_unreference_unlocked(ring->bo); drm_gem_object_unreference_unlocked(ring->bo);
} }
......
...@@ -20,14 +20,30 @@ ...@@ -20,14 +20,30 @@
#include "msm_drv.h" #include "msm_drv.h"
#define rbmemptr(ring, member) \
((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member))
struct msm_rbmemptrs {
volatile uint32_t rptr;
volatile uint32_t fence;
};
struct msm_ringbuffer { struct msm_ringbuffer {
struct msm_gpu *gpu; struct msm_gpu *gpu;
int size; int id;
struct drm_gem_object *bo; struct drm_gem_object *bo;
uint32_t *start, *end, *cur; uint32_t *start, *end, *cur;
struct list_head submits;
uint64_t iova;
uint32_t seqno;
uint32_t hangcheck_fence;
struct msm_rbmemptrs *memptrs;
uint64_t memptrs_iova;
struct msm_fence_context *fctx;
}; };
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
void *memptrs, uint64_t memptrs_iova);
void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring);
/* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */
......
...@@ -60,9 +60,10 @@ void msm_submitqueue_close(struct msm_file_private *ctx) ...@@ -60,9 +60,10 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
msm_submitqueue_put(entry); msm_submitqueue_put(entry);
} }
int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
u32 *id) u32 prio, u32 flags, u32 *id)
{ {
struct msm_drm_private *priv = drm->dev_private;
struct msm_gpu_submitqueue *queue; struct msm_gpu_submitqueue *queue;
if (!ctx) if (!ctx)
...@@ -75,7 +76,13 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, ...@@ -75,7 +76,13 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
kref_init(&queue->ref); kref_init(&queue->ref);
queue->flags = flags; queue->flags = flags;
if (priv->gpu) {
if (prio >= priv->gpu->nr_rings)
return -EINVAL;
queue->prio = prio; queue->prio = prio;
}
write_lock(&ctx->queuelock); write_lock(&ctx->queuelock);
...@@ -91,16 +98,26 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, ...@@ -91,16 +98,26 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
return 0; return 0;
} }
int msm_submitqueue_init(struct msm_file_private *ctx) int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
{ {
struct msm_drm_private *priv = drm->dev_private;
int default_prio;
if (!ctx) if (!ctx)
return 0; return 0;
/*
* Select priority 2 as the "default priority" unless nr_rings is less
* than 2 and then pick the lowest pirority
*/
default_prio = priv->gpu ?
clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0;
INIT_LIST_HEAD(&ctx->submitqueues); INIT_LIST_HEAD(&ctx->submitqueues);
rwlock_init(&ctx->queuelock); rwlock_init(&ctx->queuelock);
return msm_submitqueue_create(ctx, 2, 0, NULL); return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL);
} }
int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id)
......
This diff is collapsed.
...@@ -232,6 +232,7 @@ struct drm_msm_wait_fence { ...@@ -232,6 +232,7 @@ struct drm_msm_wait_fence {
__u32 fence; /* in */ __u32 fence; /* in */
__u32 pad; __u32 pad;
struct drm_msm_timespec timeout; /* in */ struct drm_msm_timespec timeout; /* in */
__u32 queueid; /* in, submitqueue id */
}; };
/* madvise provides a way to tell the kernel in case a buffers contents /* madvise provides a way to tell the kernel in case a buffers contents
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment