Commit cdec4d36 authored by Eric Anholt's avatar Eric Anholt

drm/vc4: Expose dma-buf fences for V3D rendering.

This is needed for proper synchronization with display on another DRM
device (pl111 or tinydrm) with buffers produced by vc4 V3D.  Fixes the
new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2
desktop tests on pl111+vc4.

This doesn't yet introduce waits on another device's fences before
vc4's rendering/display, because I don't have testcases for them.

v2: Reuse dma_fence_free(), retitle commit message to clarify that
    it's not a full dma-buf fencing implementation yet.
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170412191202.22740-6-eric@anholt.netAcked-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent ce9971de
...@@ -9,6 +9,7 @@ vc4-y := \ ...@@ -9,6 +9,7 @@ vc4-y := \
vc4_drv.o \ vc4_drv.o \
vc4_dpi.o \ vc4_dpi.o \
vc4_dsi.o \ vc4_dsi.o \
vc4_fence.o \
vc4_kms.o \ vc4_kms.o \
vc4_gem.o \ vc4_gem.o \
vc4_hdmi.o \ vc4_hdmi.o \
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
* rendering can return quickly. * rendering can return quickly.
*/ */
#include <linux/dma-buf.h>
#include "vc4_drv.h" #include "vc4_drv.h"
#include "uapi/drm/vc4_drm.h" #include "uapi/drm/vc4_drm.h"
...@@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo) ...@@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
vc4->bo_stats.num_allocated--; vc4->bo_stats.num_allocated--;
vc4->bo_stats.size_allocated -= obj->size; vc4->bo_stats.size_allocated -= obj->size;
if (bo->resv == &bo->_resv)
reservation_object_fini(bo->resv);
drm_gem_cma_free_object(obj); drm_gem_cma_free_object(obj);
} }
...@@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, ...@@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
} }
bo = to_vc4_bo(&cma_obj->base);
return to_vc4_bo(&cma_obj->base); bo->resv = &bo->_resv;
reservation_object_init(bo->resv);
return bo;
} }
int vc4_dumb_create(struct drm_file *file_priv, int vc4_dumb_create(struct drm_file *file_priv,
...@@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data) ...@@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data)
schedule_work(&vc4->bo_cache.time_work); schedule_work(&vc4->bo_cache.time_work);
} }
struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
{
struct vc4_bo *bo = to_vc4_bo(obj);
return bo->resv;
}
struct dma_buf * struct dma_buf *
vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
{ {
...@@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj) ...@@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
return drm_gem_cma_prime_vmap(obj); return drm_gem_cma_prime_vmap(obj);
} }
struct drm_gem_object *
vc4_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sgt)
{
struct drm_gem_object *obj;
struct vc4_bo *bo;
obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
if (IS_ERR(obj))
return obj;
bo = to_vc4_bo(obj);
bo->resv = attach->dmabuf->resv;
return obj;
}
int vc4_create_bo_ioctl(struct drm_device *dev, void *data, int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv) struct drm_file *file_priv)
{ {
......
...@@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = { ...@@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle, .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import, .gem_prime_import = drm_gem_prime_import,
.gem_prime_export = vc4_prime_export, .gem_prime_export = vc4_prime_export,
.gem_prime_res_obj = vc4_prime_res_obj,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, .gem_prime_import_sg_table = vc4_prime_import_sg_table,
.gem_prime_vmap = vc4_prime_vmap, .gem_prime_vmap = vc4_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = vc4_prime_mmap, .gem_prime_mmap = vc4_prime_mmap,
......
...@@ -8,7 +8,9 @@ ...@@ -8,7 +8,9 @@
#include "drmP.h" #include "drmP.h"
#include "drm_gem_cma_helper.h" #include "drm_gem_cma_helper.h"
#include "drm_gem_cma_helper.h"
#include <linux/reservation.h>
#include <drm/drm_encoder.h> #include <drm/drm_encoder.h>
struct vc4_dev { struct vc4_dev {
...@@ -56,6 +58,8 @@ struct vc4_dev { ...@@ -56,6 +58,8 @@ struct vc4_dev {
/* Protects bo_cache and the BO stats. */ /* Protects bo_cache and the BO stats. */
struct mutex bo_lock; struct mutex bo_lock;
uint64_t dma_fence_context;
/* Sequence number for the last job queued in bin_job_list. /* Sequence number for the last job queued in bin_job_list.
* Starts at 0 (no jobs emitted). * Starts at 0 (no jobs emitted).
*/ */
...@@ -150,6 +154,10 @@ struct vc4_bo { ...@@ -150,6 +154,10 @@ struct vc4_bo {
* DRM_IOCTL_VC4_CREATE_SHADER_BO. * DRM_IOCTL_VC4_CREATE_SHADER_BO.
*/ */
struct vc4_validated_shader_info *validated_shader; struct vc4_validated_shader_info *validated_shader;
/* normally (resv == &_resv) except for imported bo's */
struct reservation_object *resv;
struct reservation_object _resv;
}; };
static inline struct vc4_bo * static inline struct vc4_bo *
...@@ -158,6 +166,19 @@ to_vc4_bo(struct drm_gem_object *bo) ...@@ -158,6 +166,19 @@ to_vc4_bo(struct drm_gem_object *bo)
return (struct vc4_bo *)bo; return (struct vc4_bo *)bo;
} }
struct vc4_fence {
struct dma_fence base;
struct drm_device *dev;
/* vc4 seqno for signaled() test */
uint64_t seqno;
};
static inline struct vc4_fence *
to_vc4_fence(struct dma_fence *fence)
{
return (struct vc4_fence *)fence;
}
struct vc4_seqno_cb { struct vc4_seqno_cb {
struct work_struct work; struct work_struct work;
uint64_t seqno; uint64_t seqno;
...@@ -230,6 +251,8 @@ struct vc4_exec_info { ...@@ -230,6 +251,8 @@ struct vc4_exec_info {
/* Latest write_seqno of any BO that binning depends on. */ /* Latest write_seqno of any BO that binning depends on. */
uint64_t bin_dep_seqno; uint64_t bin_dep_seqno;
struct dma_fence *fence;
/* Last current addresses the hardware was processing when the /* Last current addresses the hardware was processing when the
* hangcheck timer checked on us. * hangcheck timer checked on us.
*/ */
...@@ -436,7 +459,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, ...@@ -436,7 +459,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv); struct drm_file *file_priv);
int vc4_mmap(struct file *filp, struct vm_area_struct *vma); int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sgt);
void *vc4_prime_vmap(struct drm_gem_object *obj); void *vc4_prime_vmap(struct drm_gem_object *obj);
void vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_init(struct drm_device *dev);
void vc4_bo_cache_destroy(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev);
...@@ -468,6 +495,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); ...@@ -468,6 +495,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
extern struct platform_driver vc4_dsi_driver; extern struct platform_driver vc4_dsi_driver;
int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused); int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
/* vc4_fence.c */
extern const struct dma_fence_ops vc4_fence_ops;
/* vc4_gem.c */ /* vc4_gem.c */
void vc4_gem_init(struct drm_device *dev); void vc4_gem_init(struct drm_device *dev);
void vc4_gem_destroy(struct drm_device *dev); void vc4_gem_destroy(struct drm_device *dev);
......
/*
* Copyright © 2017 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "vc4_drv.h"
static const char *vc4_fence_get_driver_name(struct dma_fence *fence)
{
return "vc4";
}
static const char *vc4_fence_get_timeline_name(struct dma_fence *fence)
{
return "vc4-v3d";
}
static bool vc4_fence_enable_signaling(struct dma_fence *fence)
{
return true;
}
static bool vc4_fence_signaled(struct dma_fence *fence)
{
struct vc4_fence *f = to_vc4_fence(fence);
struct vc4_dev *vc4 = to_vc4_dev(f->dev);
return vc4->finished_seqno >= f->seqno;
}
const struct dma_fence_ops vc4_fence_ops = {
.get_driver_name = vc4_fence_get_driver_name,
.get_timeline_name = vc4_fence_get_timeline_name,
.enable_signaling = vc4_fence_enable_signaling,
.signaled = vc4_fence_signaled,
.wait = dma_fence_default_wait,
.release = dma_fence_free,
};
...@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) ...@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
for (i = 0; i < exec->bo_count; i++) { for (i = 0; i < exec->bo_count; i++) {
bo = to_vc4_bo(&exec->bo[i]->base); bo = to_vc4_bo(&exec->bo[i]->base);
bo->seqno = seqno; bo->seqno = seqno;
reservation_object_add_shared_fence(bo->resv, exec->fence);
} }
list_for_each_entry(bo, &exec->unref_list, unref_head) { list_for_each_entry(bo, &exec->unref_list, unref_head) {
...@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) ...@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
for (i = 0; i < exec->rcl_write_bo_count; i++) { for (i = 0; i < exec->rcl_write_bo_count; i++) {
bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
bo->write_seqno = seqno; bo->write_seqno = seqno;
reservation_object_add_excl_fence(bo->resv, exec->fence);
}
}
static void
vc4_unlock_bo_reservations(struct drm_device *dev,
struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
{
int i;
for (i = 0; i < exec->bo_count; i++) {
struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
ww_mutex_unlock(&bo->resv->lock);
} }
ww_acquire_fini(acquire_ctx);
}
/* Takes the reservation lock on all the BOs being referenced, so that
* at queue submit time we can update the reservations.
*
* We don't lock the RCL the tile alloc/state BOs, or overflow memory
* (all of which are on exec->unref_list). They're entirely private
* to vc4, so we don't attach dma-buf fences to them.
*/
static int
vc4_lock_bo_reservations(struct drm_device *dev,
struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
{
int contended_lock = -1;
int i, ret;
struct vc4_bo *bo;
ww_acquire_init(acquire_ctx, &reservation_ww_class);
retry:
if (contended_lock != -1) {
bo = to_vc4_bo(&exec->bo[contended_lock]->base);
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
acquire_ctx);
if (ret) {
ww_acquire_done(acquire_ctx);
return ret;
}
}
for (i = 0; i < exec->bo_count; i++) {
if (i == contended_lock)
continue;
bo = to_vc4_bo(&exec->bo[i]->base);
ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
if (ret) {
int j;
for (j = 0; j < i; j++) {
bo = to_vc4_bo(&exec->bo[j]->base);
ww_mutex_unlock(&bo->resv->lock);
}
if (contended_lock != -1 && contended_lock >= i) {
bo = to_vc4_bo(&exec->bo[contended_lock]->base);
ww_mutex_unlock(&bo->resv->lock);
}
if (ret == -EDEADLK) {
contended_lock = i;
goto retry;
}
ww_acquire_done(acquire_ctx);
return ret;
}
}
ww_acquire_done(acquire_ctx);
/* Reserve space for our shared (read-only) fence references,
* before we commit the CL to the hardware.
*/
for (i = 0; i < exec->bo_count; i++) {
bo = to_vc4_bo(&exec->bo[i]->base);
ret = reservation_object_reserve_shared(bo->resv);
if (ret) {
vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
return ret;
}
}
return 0;
} }
/* Queues a struct vc4_exec_info for execution. If no job is /* Queues a struct vc4_exec_info for execution. If no job is
...@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) ...@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
* then bump the end address. That's a change for a later date, * then bump the end address. That's a change for a later date,
* though. * though.
*/ */
static void static int
vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
uint64_t seqno; uint64_t seqno;
unsigned long irqflags; unsigned long irqflags;
struct vc4_fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (!fence)
return -ENOMEM;
fence->dev = dev;
spin_lock_irqsave(&vc4->job_lock, irqflags); spin_lock_irqsave(&vc4->job_lock, irqflags);
seqno = ++vc4->emit_seqno; seqno = ++vc4->emit_seqno;
exec->seqno = seqno; exec->seqno = seqno;
dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
vc4->dma_fence_context, exec->seqno);
fence->seqno = exec->seqno;
exec->fence = &fence->base;
vc4_update_bo_seqnos(exec, seqno); vc4_update_bo_seqnos(exec, seqno);
vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
list_add_tail(&exec->head, &vc4->bin_job_list); list_add_tail(&exec->head, &vc4->bin_job_list);
/* If no job was executing, kick ours off. Otherwise, it'll /* If no job was executing, kick ours off. Otherwise, it'll
...@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) ...@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
} }
spin_unlock_irqrestore(&vc4->job_lock, irqflags); spin_unlock_irqrestore(&vc4->job_lock, irqflags);
return 0;
} }
/** /**
...@@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) ...@@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned i; unsigned i;
/* If we got force-completed because of GPU reset rather than
* through our IRQ handler, signal the fence now.
*/
if (exec->fence)
dma_fence_signal(exec->fence);
if (exec->bo) { if (exec->bo) {
for (i = 0; i < exec->bo_count; i++) for (i = 0; i < exec->bo_count; i++)
drm_gem_object_unreference_unlocked(&exec->bo[i]->base); drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
...@@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, ...@@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_submit_cl *args = data; struct drm_vc4_submit_cl *args = data;
struct vc4_exec_info *exec; struct vc4_exec_info *exec;
struct ww_acquire_ctx acquire_ctx;
int ret = 0; int ret = 0;
if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
...@@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, ...@@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret) if (ret)
goto fail; goto fail;
ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
if (ret)
goto fail;
/* Clear this out of the struct we'll be putting in the queue, /* Clear this out of the struct we'll be putting in the queue,
* since it's part of our stack. * since it's part of our stack.
*/ */
exec->args = NULL; exec->args = NULL;
vc4_queue_submit(dev, exec); ret = vc4_queue_submit(dev, exec, &acquire_ctx);
if (ret)
goto fail;
/* Return the seqno for our job. */ /* Return the seqno for our job. */
args->seqno = vc4->emit_seqno; args->seqno = vc4->emit_seqno;
...@@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev) ...@@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
vc4->dma_fence_context = dma_fence_context_alloc(1);
INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->bin_job_list);
INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->render_job_list);
INIT_LIST_HEAD(&vc4->job_done_list); INIT_LIST_HEAD(&vc4->job_done_list);
......
...@@ -142,6 +142,10 @@ vc4_irq_finish_render_job(struct drm_device *dev) ...@@ -142,6 +142,10 @@ vc4_irq_finish_render_job(struct drm_device *dev)
vc4->finished_seqno++; vc4->finished_seqno++;
list_move_tail(&exec->head, &vc4->job_done_list); list_move_tail(&exec->head, &vc4->job_done_list);
if (exec->fence) {
dma_fence_signal_locked(exec->fence);
exec->fence = NULL;
}
vc4_submit_next_render_job(dev); vc4_submit_next_render_job(dev);
wake_up_all(&vc4->job_wait_queue); wake_up_all(&vc4->job_wait_queue);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment