Commit d5b1a78a authored by Eric Anholt's avatar Eric Anholt

drm/vc4: Add support for drawing 3D frames.

The user submission is basically a pointer to a command list and a
pointer to uniforms.  We copy those in to the kernel, validate and
relocate them, and store the result in a GPU BO which we queue for
execution.

v2: Drop support for NV shader recs (not necessary for GL), simplify
    vc4_use_bo(), improve bin flush/semaphore checks, use __u32 style
    types.
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
parent d3f5168a
...@@ -8,12 +8,19 @@ vc4-y := \ ...@@ -8,12 +8,19 @@ vc4-y := \
vc4_crtc.o \ vc4_crtc.o \
vc4_drv.o \ vc4_drv.o \
vc4_kms.o \ vc4_kms.o \
vc4_gem.o \
vc4_hdmi.o \ vc4_hdmi.o \
vc4_hvs.o \ vc4_hvs.o \
vc4_irq.o \
vc4_plane.o \ vc4_plane.o \
vc4_render_cl.o \
vc4_trace_points.o \
vc4_v3d.o \ vc4_v3d.o \
vc4_validate.o \
vc4_validate_shaders.o vc4_validate_shaders.o
vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
obj-$(CONFIG_DRM_VC4) += vc4.o obj-$(CONFIG_DRM_VC4) += vc4.o
CFLAGS_vc4_trace_points.o := -I$(src)
...@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = { ...@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = {
}; };
static const struct drm_ioctl_desc vc4_drm_ioctls[] = { static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
...@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = { ...@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = {
.driver_features = (DRIVER_MODESET | .driver_features = (DRIVER_MODESET |
DRIVER_ATOMIC | DRIVER_ATOMIC |
DRIVER_GEM | DRIVER_GEM |
DRIVER_HAVE_IRQ |
DRIVER_PRIME), DRIVER_PRIME),
.lastclose = vc4_lastclose, .lastclose = vc4_lastclose,
.preclose = vc4_drm_preclose, .preclose = vc4_drm_preclose,
.irq_handler = vc4_irq,
.irq_preinstall = vc4_irq_preinstall,
.irq_postinstall = vc4_irq_postinstall,
.irq_uninstall = vc4_irq_uninstall,
.enable_vblank = vc4_enable_vblank, .enable_vblank = vc4_enable_vblank,
.disable_vblank = vc4_disable_vblank, .disable_vblank = vc4_disable_vblank,
.get_vblank_counter = drm_vblank_count, .get_vblank_counter = drm_vblank_count,
...@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev) ...@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev)
if (ret) if (ret)
goto unref; goto unref;
vc4_gem_init(drm);
ret = component_bind_all(dev, drm); ret = component_bind_all(dev, drm);
if (ret) if (ret)
goto unref; goto gem_destroy;
ret = drm_dev_register(drm, 0); ret = drm_dev_register(drm, 0);
if (ret < 0) if (ret < 0)
...@@ -207,6 +218,8 @@ static int vc4_drm_bind(struct device *dev) ...@@ -207,6 +218,8 @@ static int vc4_drm_bind(struct device *dev)
drm_dev_unregister(drm); drm_dev_unregister(drm);
unbind_all: unbind_all:
component_unbind_all(dev, drm); component_unbind_all(dev, drm);
gem_destroy:
vc4_gem_destroy(drm);
unref: unref:
drm_dev_unref(drm); drm_dev_unref(drm);
vc4_bo_cache_destroy(drm); vc4_bo_cache_destroy(drm);
......
...@@ -49,6 +49,48 @@ struct vc4_dev { ...@@ -49,6 +49,48 @@ struct vc4_dev {
/* Protects bo_cache and the BO stats. */ /* Protects bo_cache and the BO stats. */
struct mutex bo_lock; struct mutex bo_lock;
/* Sequence number for the last job queued in job_list.
* Starts at 0 (no jobs emitted).
*/
uint64_t emit_seqno;
/* Sequence number for the last completed job on the GPU.
* Starts at 0 (no jobs completed).
*/
uint64_t finished_seqno;
/* List of all struct vc4_exec_info for jobs to be executed.
* The first job in the list is the one currently programmed
* into ct0ca/ct1ca for execution.
*/
struct list_head job_list;
/* List of the finished vc4_exec_infos waiting to be freed by
* job_done_work.
*/
struct list_head job_done_list;
/* Spinlock used to synchronize the job_list and seqno
* accesses between the IRQ handler and GEM ioctls.
*/
spinlock_t job_lock;
wait_queue_head_t job_wait_queue;
struct work_struct job_done_work;
/* The binner overflow memory that's currently set up in
* BPOA/BPOS registers. When overflow occurs and a new one is
* allocated, the previous one will be moved to
* vc4->current_exec's free list.
*/
struct vc4_bo *overflow_mem;
struct work_struct overflow_mem_work;
struct {
uint32_t last_ct0ca, last_ct1ca;
struct timer_list timer;
struct work_struct reset_work;
} hangcheck;
struct semaphore async_modeset;
}; };
static inline struct vc4_dev * static inline struct vc4_dev *
...@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev) ...@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev)
struct vc4_bo { struct vc4_bo {
struct drm_gem_cma_object base; struct drm_gem_cma_object base;
/* seqno of the last job to render to this BO. */
uint64_t seqno;
/* List entry for the BO's position in either /* List entry for the BO's position in either
* vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
*/ */
...@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder) ...@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder)
#define HVS_READ(offset) readl(vc4->hvs->regs + offset) #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
struct vc4_exec_info {
/* Sequence number for this bin/render job. */
uint64_t seqno;
/* Kernel-space copy of the ioctl arguments */
struct drm_vc4_submit_cl *args;
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
struct drm_gem_cma_object **bo;
uint32_t bo_count;
/* Pointers for our position in vc4->job_list */
struct list_head head;
/* List of other BOs used in the job that need to be released
* once the job is complete.
*/
struct list_head unref_list;
/* Current unvalidated indices into @bo loaded by the non-hardware
* VC4_PACKET_GEM_HANDLES.
*/
uint32_t bo_index[2];
/* This is the BO where we store the validated command lists, shader
* records, and uniforms.
*/
struct drm_gem_cma_object *exec_bo;
/**
* This tracks the per-shader-record state (packet 64) that
* determines the length of the shader record and the offset
* it's expected to be found at. It gets read in from the
* command lists.
*/
struct vc4_shader_state {
uint32_t addr;
/* Maximum vertex index referenced by any primitive using this
* shader state.
*/
uint32_t max_index;
} *shader_state;
/** How many shader states the user declared they were using. */
uint32_t shader_state_size;
/** How many shader state records the validator has seen. */
uint32_t shader_state_count;
bool found_tile_binning_mode_config_packet;
bool found_start_tile_binning_packet;
bool found_increment_semaphore_packet;
bool found_flush;
uint8_t bin_tiles_x, bin_tiles_y;
struct drm_gem_cma_object *tile_bo;
uint32_t tile_alloc_offset;
/**
* Computed addresses pointing into exec_bo where we start the
* bin thread (ct0) and render thread (ct1).
*/
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
/* Pointer to the unvalidated bin CL (if present). */
void *bin_u;
/* Pointers to the shader recs. These paddr gets incremented as CL
* packets are relocated in validate_gl_shader_state, and the vaddrs
* (u and v) get incremented and size decremented as the shader recs
* themselves are validated.
*/
void *shader_rec_u;
void *shader_rec_v;
uint32_t shader_rec_p;
uint32_t shader_rec_size;
/* Pointers to the uniform data. These pointers are incremented, and
* size decremented, as each batch of uniforms is uploaded.
*/
void *uniforms_u;
void *uniforms_v;
uint32_t uniforms_p;
uint32_t uniforms_size;
};
static inline struct vc4_exec_info *
vc4_first_job(struct vc4_dev *vc4)
{
if (list_empty(&vc4->job_list))
return NULL;
return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
}
/** /**
* struct vc4_texture_sample_info - saves the offsets into the UBO for texture * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
* setup parameters. * setup parameters.
...@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor); ...@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
/* vc4_drv.c */ /* vc4_drv.c */
void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
/* vc4_gem.c */
void vc4_gem_init(struct drm_device *dev);
void vc4_gem_destroy(struct drm_device *dev);
int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void vc4_submit_next_job(struct drm_device *dev);
int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
uint64_t timeout_ns, bool interruptible);
void vc4_job_handle_completed(struct vc4_dev *vc4);
/* vc4_hdmi.c */ /* vc4_hdmi.c */
extern struct platform_driver vc4_hdmi_driver; extern struct platform_driver vc4_hdmi_driver;
int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
/* vc4_irq.c */
irqreturn_t vc4_irq(int irq, void *arg);
void vc4_irq_preinstall(struct drm_device *dev);
int vc4_irq_postinstall(struct drm_device *dev);
void vc4_irq_uninstall(struct drm_device *dev);
void vc4_irq_reset(struct drm_device *dev);
/* vc4_hvs.c */ /* vc4_hvs.c */
extern struct platform_driver vc4_hvs_driver; extern struct platform_driver vc4_hvs_driver;
void vc4_hvs_dump_state(struct drm_device *dev); void vc4_hvs_dump_state(struct drm_device *dev);
...@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state); ...@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state);
extern struct platform_driver vc4_v3d_driver; extern struct platform_driver vc4_v3d_driver;
int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
/* vc4_validate.c */
int
vc4_validate_bin_cl(struct drm_device *dev,
void *validated,
void *unvalidated,
struct vc4_exec_info *exec);
int
vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
uint32_t hindex);
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
bool vc4_check_tex_size(struct vc4_exec_info *exec,
struct drm_gem_cma_object *fbo,
uint32_t offset, uint8_t tiling_format,
uint32_t width, uint32_t height, uint8_t cpp);
/* vc4_validate_shader.c */ /* vc4_validate_shader.c */
struct vc4_validated_shader_info * struct vc4_validated_shader_info *
......
This diff is collapsed.
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** DOC: Interrupt management for the V3D engine.
*
* We have an interrupt status register (V3D_INTCTL) which reports
* interrupts, and where writing 1 bits clears those interrupts.
* There are also a pair of interrupt registers
* (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
* disables that specific interrupt, and 0s written are ignored
* (reading either one returns the set of enabled interrupts).
*
* When we take a render frame interrupt, we need to wake the
* processes waiting for some frame to be done, and get the next frame
* submitted ASAP (so the hardware doesn't sit idle when there's work
* to do).
*
* When we take the binner out of memory interrupt, we need to
* allocate some new memory and pass it to the binner so that the
* current job can make progress.
*/
#include "vc4_drv.h"
#include "vc4_regs.h"
#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
V3D_INT_FRDONE)
DECLARE_WAIT_QUEUE_HEAD(render_wait);
static void
vc4_overflow_mem_work(struct work_struct *work)
{
struct vc4_dev *vc4 =
container_of(work, struct vc4_dev, overflow_mem_work);
struct drm_device *dev = vc4->dev;
struct vc4_bo *bo;
bo = vc4_bo_create(dev, 256 * 1024, true);
if (!bo) {
DRM_ERROR("Couldn't allocate binner overflow mem\n");
return;
}
/* If there's a job executing currently, then our previous
* overflow allocation is getting used in that job and we need
* to queue it to be released when the job is done. But if no
* job is executing at all, then we can free the old overflow
* object direcctly.
*
* No lock necessary for this pointer since we're the only
* ones that update the pointer, and our workqueue won't
* reenter.
*/
if (vc4->overflow_mem) {
struct vc4_exec_info *current_exec;
unsigned long irqflags;
spin_lock_irqsave(&vc4->job_lock, irqflags);
current_exec = vc4_first_job(vc4);
if (current_exec) {
vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
list_add_tail(&vc4->overflow_mem->unref_head,
&current_exec->unref_list);
vc4->overflow_mem = NULL;
}
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}
if (vc4->overflow_mem)
drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
vc4->overflow_mem = bo;
V3D_WRITE(V3D_BPOA, bo->base.paddr);
V3D_WRITE(V3D_BPOS, bo->base.base.size);
V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
}
static void
vc4_irq_finish_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_job(vc4);
if (!exec)
return;
vc4->finished_seqno++;
list_move_tail(&exec->head, &vc4->job_done_list);
vc4_submit_next_job(dev);
wake_up_all(&vc4->job_wait_queue);
schedule_work(&vc4->job_done_work);
}
irqreturn_t
vc4_irq(int irq, void *arg)
{
struct drm_device *dev = arg;
struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t intctl;
irqreturn_t status = IRQ_NONE;
barrier();
intctl = V3D_READ(V3D_INTCTL);
/* Acknowledge the interrupts we're handling here. The render
* frame done interrupt will be cleared, while OUTOMEM will
* stay high until the underlying cause is cleared.
*/
V3D_WRITE(V3D_INTCTL, intctl);
if (intctl & V3D_INT_OUTOMEM) {
/* Disable OUTOMEM until the work is done. */
V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
schedule_work(&vc4->overflow_mem_work);
status = IRQ_HANDLED;
}
if (intctl & V3D_INT_FRDONE) {
spin_lock(&vc4->job_lock);
vc4_irq_finish_job(dev);
spin_unlock(&vc4->job_lock);
status = IRQ_HANDLED;
}
return status;
}
void
vc4_irq_preinstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
init_waitqueue_head(&vc4->job_wait_queue);
INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
/* Clear any pending interrupts someone might have left around
* for us.
*/
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
}
int
vc4_irq_postinstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
/* Enable both the render done and out of memory interrupts. */
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
return 0;
}
void
vc4_irq_uninstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
/* Disable sending interrupts for our driver's IRQs. */
V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
/* Clear any pending interrupts we might have left. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
cancel_work_sync(&vc4->overflow_mem_work);
}
/** Reinitializes interrupt registers when a GPU reset is performed. */
void vc4_irq_reset(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long irqflags;
/* Acknowledge any stale IRQs. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
/*
* Turn all our interrupts on. Binner out of memory is the
* only one we expect to trigger at this point, since we've
* just come from poweron and haven't supplied any overflow
* memory yet.
*/
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
spin_lock_irqsave(&vc4->job_lock, irqflags);
vc4_irq_finish_job(dev);
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) 2015 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _VC4_TRACE_H_
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vc4
#define TRACE_INCLUDE_FILE vc4_trace
TRACE_EVENT(vc4_wait_for_seqno_begin,
TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
TP_ARGS(dev, seqno, timeout),
TP_STRUCT__entry(
__field(u32, dev)
__field(u64, seqno)
__field(u64, timeout)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->seqno = seqno;
__entry->timeout = timeout;
),
TP_printk("dev=%u, seqno=%llu, timeout=%llu",
__entry->dev, __entry->seqno, __entry->timeout)
);
TRACE_EVENT(vc4_wait_for_seqno_end,
TP_PROTO(struct drm_device *dev, uint64_t seqno),
TP_ARGS(dev, seqno),
TP_STRUCT__entry(
__field(u32, dev)
__field(u64, seqno)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->seqno = seqno;
),
TP_printk("dev=%u, seqno=%llu",
__entry->dev, __entry->seqno)
);
#endif /* _VC4_TRACE_H_ */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#include <trace/define_trace.h>
/*
* Copyright (C) 2015 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "vc4_drv.h"
#ifndef __CHECKER__
#define CREATE_TRACE_POINTS
#include "vc4_trace.h"
#endif
...@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) ...@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
} }
#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_DEBUG_FS */
/*
* Asks the firmware to turn on power to the V3D engine.
*
* This may be doable with just the clocks interface, though this
* packet does some other register setup from the firmware, too.
*/
int
vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
{
if (on)
return pm_generic_poweroff(&vc4->v3d->pdev->dev);
else
return pm_generic_resume(&vc4->v3d->pdev->dev);
}
static void vc4_v3d_init_hw(struct drm_device *dev) static void vc4_v3d_init_hw(struct drm_device *dev)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
...@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) ...@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
struct drm_device *drm = dev_get_drvdata(master); struct drm_device *drm = dev_get_drvdata(master);
struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_v3d *v3d = NULL; struct vc4_v3d *v3d = NULL;
int ret;
v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
if (!v3d) if (!v3d)
...@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) ...@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
return -EINVAL; return -EINVAL;
} }
/* Reset the binner overflow address/size at setup, to be sure
* we don't reuse an old one.
*/
V3D_WRITE(V3D_BPOA, 0);
V3D_WRITE(V3D_BPOS, 0);
vc4_v3d_init_hw(drm); vc4_v3d_init_hw(drm);
ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
if (ret) {
DRM_ERROR("Failed to install IRQ handler\n");
return ret;
}
return 0; return 0;
} }
...@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master, ...@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
struct drm_device *drm = dev_get_drvdata(master); struct drm_device *drm = dev_get_drvdata(master);
struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dev *vc4 = to_vc4_dev(drm);
drm_irq_uninstall(drm);
/* Disable the binner's overflow memory address, so the next
* driver probe (if any) doesn't try to reuse our old
* allocation.
*/
V3D_WRITE(V3D_BPOA, 0);
V3D_WRITE(V3D_BPOS, 0);
vc4->v3d = NULL; vc4->v3d = NULL;
} }
......
This diff is collapsed.
...@@ -26,14 +26,155 @@ ...@@ -26,14 +26,155 @@
#include "drm.h" #include "drm.h"
#define DRM_VC4_SUBMIT_CL 0x00
#define DRM_VC4_WAIT_SEQNO 0x01
#define DRM_VC4_WAIT_BO 0x02
#define DRM_VC4_CREATE_BO 0x03 #define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_MMAP_BO 0x04
#define DRM_VC4_CREATE_SHADER_BO 0x05 #define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
struct drm_vc4_submit_rcl_surface {
__u32 hindex; /* Handle index, or ~0 if not present. */
__u32 offset; /* Offset to start of buffer. */
/*
* Bits for either render config (color_write) or load/store packet.
* Bits should all be 0 for MSAA load/stores.
*/
__u16 bits;
#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
__u16 flags;
};
/**
* struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
* engine.
*
* Drivers typically use GPU BOs to store batchbuffers / command lists and
* their associated state. However, because the VC4 lacks an MMU, we have to
* do validation of memory accesses by the GPU commands. If we were to store
* our commands in BOs, we'd need to do uncached readback from them to do the
* validation process, which is too expensive. Instead, userspace accumulates
* commands and associated state in plain memory, then the kernel copies the
* data to its own address space, and then validates and stores it in a GPU
* BO.
*/
struct drm_vc4_submit_cl {
/* Pointer to the binner command list.
*
* This is the first set of commands executed, which runs the
* coordinate shader to determine where primitives land on the screen,
* then writes out the state updates and draw calls necessary per tile
* to the tile allocation BO.
*/
__u64 bin_cl;
/* Pointer to the shader records.
*
* Shader records are the structures read by the hardware that contain
* pointers to uniforms, shaders, and vertex attributes. The
* reference to the shader record has enough information to determine
* how many pointers are necessary (fixed number for shaders/uniforms,
* and an attribute count), so those BO indices into bo_handles are
* just stored as __u32s before each shader record passed in.
*/
__u64 shader_rec;
/* Pointer to uniform data and texture handles for the textures
* referenced by the shader.
*
* For each shader state record, there is a set of uniform data in the
* order referenced by the record (FS, VS, then CS). Each set of
* uniform data has a __u32 index into bo_handles per texture
* sample operation, in the order the QPU_W_TMUn_S writes appear in
* the program. Following the texture BO handle indices is the actual
* uniform data.
*
* The individual uniform state blocks don't have sizes passed in,
* because the kernel has to determine the sizes anyway during shader
* code validation.
*/
__u64 uniforms;
__u64 bo_handles;
/* Size in bytes of the binner command list. */
__u32 bin_cl_size;
/* Size in bytes of the set of shader records. */
__u32 shader_rec_size;
/* Number of shader records.
*
* This could just be computed from the contents of shader_records and
* the address bits of references to them from the bin CL, but it
* keeps the kernel from having to resize some allocations it makes.
*/
__u32 shader_rec_count;
/* Size in bytes of the uniform state. */
__u32 uniforms_size;
/* Number of BO handles passed in (size is that times 4). */
__u32 bo_handle_count;
/* RCL setup: */
__u16 width;
__u16 height;
__u8 min_x_tile;
__u8 min_y_tile;
__u8 max_x_tile;
__u8 max_y_tile;
struct drm_vc4_submit_rcl_surface color_read;
struct drm_vc4_submit_rcl_surface color_write;
struct drm_vc4_submit_rcl_surface zs_read;
struct drm_vc4_submit_rcl_surface zs_write;
struct drm_vc4_submit_rcl_surface msaa_color_write;
struct drm_vc4_submit_rcl_surface msaa_zs_write;
__u32 clear_color[2];
__u32 clear_z;
__u8 clear_s;
__u32 pad:24;
#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
__u32 flags;
/* Returned value of the seqno of this render job (for the
* wait ioctl).
*/
__u64 seqno;
};
/**
* struct drm_vc4_wait_seqno - ioctl argument for waiting for
* DRM_VC4_SUBMIT_CL completion using its returned seqno.
*
* timeout_ns is the timeout in nanoseconds, where "0" means "don't
* block, just return the status."
*/
struct drm_vc4_wait_seqno {
__u64 seqno;
__u64 timeout_ns;
};
/**
* struct drm_vc4_wait_bo - ioctl argument for waiting for
* completion of the last DRM_VC4_SUBMIT_CL on a BO.
*
* This is useful for cases where multiple processes might be
* rendering to a BO and you want to wait for all rendering to be
* completed.
*/
struct drm_vc4_wait_bo {
__u32 handle;
__u32 pad;
__u64 timeout_ns;
};
/** /**
* struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment