Commit 21de54b3 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-vc4-next-2015-12-11' of http://github.com/anholt/linux into drm-next

This pull request brings in 3D acceleration support for the VC4 GPU.
While there is still performance work to be done (particularly
surrounding RCL generation), the CL submit ABI should be settled and
done now.

* tag 'drm-vc4-next-2015-12-11' of http://github.com/anholt/linux:
  drm/vc4: Add an interface for capturing the GPU state after a hang.
  drm/vc4: Add support for async pageflips.
  drm/vc4: Add support for drawing 3D frames.
  drm/vc4: Bind and initialize the V3D engine.
  drm/vc4: Fix a typo in a V3D debug register.
  drm/vc4: Add an API for creating GPU shaders in GEM BOs.
  drm/vc4: Add create and map BO ioctls.
  drm/vc4: Add a BO cache.
  drm: Create a driver hook for allocating GEM object structs.
parents 870a1718 21461365
......@@ -59,11 +59,13 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
struct drm_gem_object *gem_obj;
int ret;
cma_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
if (!cma_obj)
if (drm->driver->gem_create_object)
gem_obj = drm->driver->gem_create_object(drm, size);
else
gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
if (!gem_obj)
return ERR_PTR(-ENOMEM);
gem_obj = &cma_obj->base;
cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
ret = drm_gem_object_init(drm, gem_obj, size);
if (ret)
......
......@@ -8,10 +8,19 @@ vc4-y := \
vc4_crtc.o \
vc4_drv.o \
vc4_kms.o \
vc4_gem.o \
vc4_hdmi.o \
vc4_hvs.o \
vc4_plane.o
vc4_irq.o \
vc4_plane.o \
vc4_render_cl.o \
vc4_trace_points.o \
vc4_v3d.o \
vc4_validate.o \
vc4_validate_shaders.o
vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
obj-$(CONFIG_DRM_VC4) += vc4.o
CFLAGS_vc4_trace_points.o := -I$(src)
This diff is collapsed.
......@@ -35,6 +35,7 @@
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
#include "linux/clk.h"
#include "drm_fb_cma_helper.h"
#include "linux/component.h"
#include "linux/of_device.h"
#include "vc4_drv.h"
......@@ -476,10 +477,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
return ret;
}
struct vc4_async_flip_state {
struct drm_crtc *crtc;
struct drm_framebuffer *fb;
struct drm_pending_vblank_event *event;
struct vc4_seqno_cb cb;
};
/* Called when the V3D execution for the BO being flipped to is done, so that
* we can actually update the plane's address to point to it.
*/
static void
vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
{
struct vc4_async_flip_state *flip_state =
container_of(cb, struct vc4_async_flip_state, cb);
struct drm_crtc *crtc = flip_state->crtc;
struct drm_device *dev = crtc->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_plane *plane = crtc->primary;
vc4_plane_async_set_fb(plane, flip_state->fb);
if (flip_state->event) {
unsigned long flags;
spin_lock_irqsave(&dev->event_lock, flags);
drm_crtc_send_vblank_event(crtc, flip_state->event);
spin_unlock_irqrestore(&dev->event_lock, flags);
}
drm_framebuffer_unreference(flip_state->fb);
kfree(flip_state);
up(&vc4->async_modeset);
}
/* Implements async (non-vblank-synced) page flips.
*
* The page flip ioctl needs to return immediately, so we grab the
* modeset semaphore on the pipe, and queue the address update for
* when V3D is done with the BO being flipped to.
*/
static int vc4_async_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t flags)
{
struct drm_device *dev = crtc->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_plane *plane = crtc->primary;
int ret = 0;
struct vc4_async_flip_state *flip_state;
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
if (!flip_state)
return -ENOMEM;
drm_framebuffer_reference(fb);
flip_state->fb = fb;
flip_state->crtc = crtc;
flip_state->event = event;
/* Make sure all other async modesetes have landed. */
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
kfree(flip_state);
return ret;
}
/* Immediately update the plane's legacy fb pointer, so that later
* modeset prep sees the state that will be present when the semaphore
* is released.
*/
drm_atomic_set_fb_for_plane(plane->state, fb);
plane->fb = fb;
vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
vc4_async_page_flip_complete);
/* Driver takes ownership of state on successful async commit. */
return 0;
}
static int vc4_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t flags)
{
if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
return vc4_async_page_flip(crtc, fb, event, flags);
else
return drm_atomic_helper_page_flip(crtc, fb, event, flags);
}
static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.destroy = vc4_crtc_destroy,
.page_flip = drm_atomic_helper_page_flip,
.page_flip = vc4_page_flip,
.set_property = NULL,
.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
......
......@@ -16,11 +16,14 @@
#include "vc4_regs.h"
static const struct drm_info_list vc4_debugfs_list[] = {
{"bo_stats", vc4_bo_stats_debugfs, 0},
{"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
{"hvs_regs", vc4_hvs_debugfs_regs, 0},
{"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
{"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
{"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
{"v3d_ident", vc4_v3d_debugfs_ident, 0},
{"v3d_regs", vc4_v3d_debugfs_regs, 0},
};
#define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
......
......@@ -16,6 +16,7 @@
#include <linux/platform_device.h>
#include "drm_fb_cma_helper.h"
#include "uapi/drm/vc4_drm.h"
#include "vc4_drv.h"
#include "vc4_regs.h"
......@@ -63,7 +64,7 @@ static const struct file_operations vc4_drm_fops = {
.open = drm_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.mmap = drm_gem_cma_mmap,
.mmap = vc4_mmap,
.poll = drm_poll,
.read = drm_read,
#ifdef CONFIG_COMPAT
......@@ -73,16 +74,30 @@ static const struct file_operations vc4_drm_fops = {
};
static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
DRM_ROOT_ONLY),
};
static struct drm_driver vc4_drm_driver = {
.driver_features = (DRIVER_MODESET |
DRIVER_ATOMIC |
DRIVER_GEM |
DRIVER_HAVE_IRQ |
DRIVER_PRIME),
.lastclose = vc4_lastclose,
.preclose = vc4_drm_preclose,
.irq_handler = vc4_irq,
.irq_preinstall = vc4_irq_preinstall,
.irq_postinstall = vc4_irq_postinstall,
.irq_uninstall = vc4_irq_uninstall,
.enable_vblank = vc4_enable_vblank,
.disable_vblank = vc4_disable_vblank,
.get_vblank_counter = drm_vblank_count,
......@@ -92,18 +107,19 @@ static struct drm_driver vc4_drm_driver = {
.debugfs_cleanup = vc4_debugfs_cleanup,
#endif
.gem_free_object = drm_gem_cma_free_object,
.gem_create_object = vc4_create_object,
.gem_free_object = vc4_free_object,
.gem_vm_ops = &drm_gem_cma_vm_ops,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_export = vc4_prime_export,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
.gem_prime_vmap = drm_gem_cma_prime_vmap,
.gem_prime_vmap = vc4_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = drm_gem_cma_prime_mmap,
.gem_prime_mmap = vc4_prime_mmap,
.dumb_create = vc4_dumb_create,
.dumb_map_offset = drm_gem_cma_dumb_map_offset,
......@@ -170,13 +186,17 @@ static int vc4_drm_bind(struct device *dev)
drm_dev_set_unique(drm, dev_name(dev));
vc4_bo_cache_init(drm);
drm_mode_config_init(drm);
if (ret)
goto unref;
vc4_gem_init(drm);
ret = component_bind_all(dev, drm);
if (ret)
goto unref;
goto gem_destroy;
ret = drm_dev_register(drm, 0);
if (ret < 0)
......@@ -200,8 +220,11 @@ static int vc4_drm_bind(struct device *dev)
drm_dev_unregister(drm);
unbind_all:
component_unbind_all(dev, drm);
gem_destroy:
vc4_gem_destroy(drm);
unref:
drm_dev_unref(drm);
vc4_bo_cache_destroy(drm);
return ret;
}
......@@ -228,6 +251,7 @@ static struct platform_driver *const component_drivers[] = {
&vc4_hdmi_driver,
&vc4_crtc_driver,
&vc4_hvs_driver,
&vc4_v3d_driver,
};
static int vc4_platform_drm_probe(struct platform_device *pdev)
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** DOC: Interrupt management for the V3D engine.
*
* We have an interrupt status register (V3D_INTCTL) which reports
* interrupts, and where writing 1 bits clears those interrupts.
* There are also a pair of interrupt registers
* (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
* disables that specific interrupt, and 0s written are ignored
* (reading either one returns the set of enabled interrupts).
*
* When we take a render frame interrupt, we need to wake the
* processes waiting for some frame to be done, and get the next frame
* submitted ASAP (so the hardware doesn't sit idle when there's work
* to do).
*
* When we take the binner out of memory interrupt, we need to
* allocate some new memory and pass it to the binner so that the
* current job can make progress.
*/
#include "vc4_drv.h"
#include "vc4_regs.h"
#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
V3D_INT_FRDONE)
DECLARE_WAIT_QUEUE_HEAD(render_wait);
static void
vc4_overflow_mem_work(struct work_struct *work)
{
struct vc4_dev *vc4 =
container_of(work, struct vc4_dev, overflow_mem_work);
struct drm_device *dev = vc4->dev;
struct vc4_bo *bo;
bo = vc4_bo_create(dev, 256 * 1024, true);
if (!bo) {
DRM_ERROR("Couldn't allocate binner overflow mem\n");
return;
}
/* If there's a job executing currently, then our previous
* overflow allocation is getting used in that job and we need
* to queue it to be released when the job is done. But if no
* job is executing at all, then we can free the old overflow
* object direcctly.
*
* No lock necessary for this pointer since we're the only
* ones that update the pointer, and our workqueue won't
* reenter.
*/
if (vc4->overflow_mem) {
struct vc4_exec_info *current_exec;
unsigned long irqflags;
spin_lock_irqsave(&vc4->job_lock, irqflags);
current_exec = vc4_first_job(vc4);
if (current_exec) {
vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
list_add_tail(&vc4->overflow_mem->unref_head,
&current_exec->unref_list);
vc4->overflow_mem = NULL;
}
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}
if (vc4->overflow_mem)
drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
vc4->overflow_mem = bo;
V3D_WRITE(V3D_BPOA, bo->base.paddr);
V3D_WRITE(V3D_BPOS, bo->base.base.size);
V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
}
static void
vc4_irq_finish_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_job(vc4);
if (!exec)
return;
vc4->finished_seqno++;
list_move_tail(&exec->head, &vc4->job_done_list);
vc4_submit_next_job(dev);
wake_up_all(&vc4->job_wait_queue);
schedule_work(&vc4->job_done_work);
}
irqreturn_t
vc4_irq(int irq, void *arg)
{
struct drm_device *dev = arg;
struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t intctl;
irqreturn_t status = IRQ_NONE;
barrier();
intctl = V3D_READ(V3D_INTCTL);
/* Acknowledge the interrupts we're handling here. The render
* frame done interrupt will be cleared, while OUTOMEM will
* stay high until the underlying cause is cleared.
*/
V3D_WRITE(V3D_INTCTL, intctl);
if (intctl & V3D_INT_OUTOMEM) {
/* Disable OUTOMEM until the work is done. */
V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
schedule_work(&vc4->overflow_mem_work);
status = IRQ_HANDLED;
}
if (intctl & V3D_INT_FRDONE) {
spin_lock(&vc4->job_lock);
vc4_irq_finish_job(dev);
spin_unlock(&vc4->job_lock);
status = IRQ_HANDLED;
}
return status;
}
void
vc4_irq_preinstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
init_waitqueue_head(&vc4->job_wait_queue);
INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
/* Clear any pending interrupts someone might have left around
* for us.
*/
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
}
int
vc4_irq_postinstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
/* Enable both the render done and out of memory interrupts. */
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
return 0;
}
void
vc4_irq_uninstall(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
/* Disable sending interrupts for our driver's IRQs. */
V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
/* Clear any pending interrupts we might have left. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
cancel_work_sync(&vc4->overflow_mem_work);
}
/** Reinitializes interrupt registers when a GPU reset is performed. */
void vc4_irq_reset(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long irqflags;
/* Acknowledge any stale IRQs. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
/*
* Turn all our interrupts on. Binner out of memory is the
* only one we expect to trigger at this point, since we've
* just come from poweron and haven't supplied any overflow
* memory yet.
*/
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
spin_lock_irqsave(&vc4->job_lock, irqflags);
vc4_irq_finish_job(dev);
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
}
......@@ -15,6 +15,7 @@
*/
#include "drm_crtc.h"
#include "drm_atomic.h"
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
#include "drm_plane_helper.h"
......@@ -29,10 +30,152 @@ static void vc4_output_poll_changed(struct drm_device *dev)
drm_fbdev_cma_hotplug_event(vc4->fbdev);
}
struct vc4_commit {
struct drm_device *dev;
struct drm_atomic_state *state;
struct vc4_seqno_cb cb;
};
static void
vc4_atomic_complete_commit(struct vc4_commit *c)
{
struct drm_atomic_state *state = c->state;
struct drm_device *dev = state->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
drm_atomic_helper_commit_modeset_disables(dev, state);
drm_atomic_helper_commit_planes(dev, state, false);
drm_atomic_helper_commit_modeset_enables(dev, state);
drm_atomic_helper_wait_for_vblanks(dev, state);
drm_atomic_helper_cleanup_planes(dev, state);
drm_atomic_state_free(state);
up(&vc4->async_modeset);
kfree(c);
}
static void
vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
{
struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
vc4_atomic_complete_commit(c);
}
static struct vc4_commit *commit_init(struct drm_atomic_state *state)
{
struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c)
return NULL;
c->dev = state->dev;
c->state = state;
return c;
}
/**
* vc4_atomic_commit - commit validated state object
* @dev: DRM device
* @state: the driver state object
* @async: asynchronous commit
*
* This function commits a with drm_atomic_helper_check() pre-validated state
* object. This can still fail when e.g. the framebuffer reservation fails. For
* now this doesn't implement asynchronous commits.
*
* RETURNS
* Zero for success or -errno.
*/
static int vc4_atomic_commit(struct drm_device *dev,
struct drm_atomic_state *state,
bool async)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret;
int i;
uint64_t wait_seqno = 0;
struct vc4_commit *c;
c = commit_init(state);
if (!c)
return -ENOMEM;
/* Make sure that any outstanding modesets have finished. */
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
kfree(c);
return ret;
}
ret = drm_atomic_helper_prepare_planes(dev, state);
if (ret) {
kfree(c);
up(&vc4->async_modeset);
return ret;
}
for (i = 0; i < dev->mode_config.num_total_plane; i++) {
struct drm_plane *plane = state->planes[i];
struct drm_plane_state *new_state = state->plane_states[i];
if (!plane)
continue;
if ((plane->state->fb != new_state->fb) && new_state->fb) {
struct drm_gem_cma_object *cma_bo =
drm_fb_cma_get_gem_obj(new_state->fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
wait_seqno = max(bo->seqno, wait_seqno);
}
}
/*
* This is the point of no return - everything below never fails except
* when the hw goes bonghits. Which means we can commit the new state on
* the software side now.
*/
drm_atomic_helper_swap_state(dev, state);
/*
* Everything below can be run asynchronously without the need to grab
* any modeset locks at all under one condition: It must be guaranteed
* that the asynchronous work has either been cancelled (if the driver
* supports it, which at least requires that the framebuffers get
* cleaned up with drm_atomic_helper_cleanup_planes()) or completed
* before the new state gets committed on the software side with
* drm_atomic_helper_swap_state().
*
* This scheme allows new atomic state updates to be prepared and
* checked in parallel to the asynchronous completion of the previous
* update. Which is important since compositors need to figure out the
* composition of the next frame right after having submitted the
* current layout.
*/
if (async) {
vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
vc4_atomic_complete_commit_seqno_cb);
} else {
vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
vc4_atomic_complete_commit(c);
}
return 0;
}
static const struct drm_mode_config_funcs vc4_mode_funcs = {
.output_poll_changed = vc4_output_poll_changed,
.atomic_check = drm_atomic_helper_check,
.atomic_commit = drm_atomic_helper_commit,
.atomic_commit = vc4_atomic_commit,
.fb_create = drm_fb_cma_create,
};
......@@ -41,6 +184,8 @@ int vc4_kms_load(struct drm_device *dev)
struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret;
sema_init(&vc4->async_modeset, 1);
ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
if (ret < 0) {
dev_err(dev->dev, "failed to initialize vblank\n");
......@@ -51,6 +196,8 @@ int vc4_kms_load(struct drm_device *dev)
dev->mode_config.max_height = 2048;
dev->mode_config.funcs = &vc4_mode_funcs;
dev->mode_config.preferred_depth = 24;
dev->mode_config.async_page_flip = true;
dev->vblank_disable_allowed = true;
drm_mode_config_reset(dev);
......
This diff is collapsed.
......@@ -29,6 +29,14 @@ struct vc4_plane_state {
u32 *dlist;
u32 dlist_size; /* Number of dwords in allocated for the display list */
u32 dlist_count; /* Number of used dwords in the display list. */
/* Offset in the dlist to pointer word 0. */
u32 pw0_offset;
/* Offset where the plane's dlist was last stored in the
hardware at vc4_crtc_atomic_flush() time.
*/
u32 *hw_dlist;
};
static inline struct vc4_plane_state *
......@@ -207,6 +215,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
vc4_state->pw0_offset = vc4_state->dlist_count;
/* Pointer Word 0: RGB / Y Pointer */
vc4_dlist_write(vc4_state, bo->paddr + offset);
......@@ -258,6 +268,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
int i;
vc4_state->hw_dlist = dlist;
/* Can't memcpy_toio() because it needs to be 32-bit writes. */
for (i = 0; i < vc4_state->dlist_count; i++)
writel(vc4_state->dlist[i], &dlist[i]);
......@@ -272,6 +284,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state)
return vc4_state->dlist_count;
}
/* Updates the plane to immediately (well, once the FIFO needs
* refilling) scan out from at a new framebuffer.
*/
void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
uint32_t addr;
/* We're skipping the address adjustment for negative origin,
* because this is only called on the primary plane.
*/
WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
addr = bo->paddr + fb->offsets[0];
/* Write the new address into the hardware immediately. The
* scanout will start from this address as soon as the FIFO
* needs to refill with pixels.
*/
writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
/* Also update the CPU-side dlist copy, so that any later
* atomic updates that don't do a new modeset on our plane
* also use our updated address.
*/
vc4_state->dlist[vc4_state->pw0_offset] = addr;
}
static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
.prepare_fb = NULL,
.cleanup_fb = NULL,
......
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef VC4_QPU_DEFINES_H
#define VC4_QPU_DEFINES_H
enum qpu_op_add {
QPU_A_NOP,
QPU_A_FADD,
QPU_A_FSUB,
QPU_A_FMIN,
QPU_A_FMAX,
QPU_A_FMINABS,
QPU_A_FMAXABS,
QPU_A_FTOI,
QPU_A_ITOF,
QPU_A_ADD = 12,
QPU_A_SUB,
QPU_A_SHR,
QPU_A_ASR,
QPU_A_ROR,
QPU_A_SHL,
QPU_A_MIN,
QPU_A_MAX,
QPU_A_AND,
QPU_A_OR,
QPU_A_XOR,
QPU_A_NOT,
QPU_A_CLZ,
QPU_A_V8ADDS = 30,
QPU_A_V8SUBS = 31,
};
enum qpu_op_mul {
QPU_M_NOP,
QPU_M_FMUL,
QPU_M_MUL24,
QPU_M_V8MULD,
QPU_M_V8MIN,
QPU_M_V8MAX,
QPU_M_V8ADDS,
QPU_M_V8SUBS,
};
enum qpu_raddr {
QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
/* 0-31 are the plain regfile a or b fields */
QPU_R_UNIF = 32,
QPU_R_VARY = 35,
QPU_R_ELEM_QPU = 38,
QPU_R_NOP,
QPU_R_XY_PIXEL_COORD = 41,
QPU_R_MS_REV_FLAGS = 41,
QPU_R_VPM = 48,
QPU_R_VPM_LD_BUSY,
QPU_R_VPM_LD_WAIT,
QPU_R_MUTEX_ACQUIRE,
};
enum qpu_waddr {
/* 0-31 are the plain regfile a or b fields */
QPU_W_ACC0 = 32, /* aka r0 */
QPU_W_ACC1,
QPU_W_ACC2,
QPU_W_ACC3,
QPU_W_TMU_NOSWAP,
QPU_W_ACC5,
QPU_W_HOST_INT,
QPU_W_NOP,
QPU_W_UNIFORMS_ADDRESS,
QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
QPU_W_MS_FLAGS = 42,
QPU_W_REV_FLAG = 42,
QPU_W_TLB_STENCIL_SETUP = 43,
QPU_W_TLB_Z,
QPU_W_TLB_COLOR_MS,
QPU_W_TLB_COLOR_ALL,
QPU_W_TLB_ALPHA_MASK,
QPU_W_VPM,
QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
QPU_W_MUTEX_RELEASE,
QPU_W_SFU_RECIP,
QPU_W_SFU_RECIPSQRT,
QPU_W_SFU_EXP,
QPU_W_SFU_LOG,
QPU_W_TMU0_S,
QPU_W_TMU0_T,
QPU_W_TMU0_R,
QPU_W_TMU0_B,
QPU_W_TMU1_S,
QPU_W_TMU1_T,
QPU_W_TMU1_R,
QPU_W_TMU1_B,
};
enum qpu_sig_bits {
QPU_SIG_SW_BREAKPOINT,
QPU_SIG_NONE,
QPU_SIG_THREAD_SWITCH,
QPU_SIG_PROG_END,
QPU_SIG_WAIT_FOR_SCOREBOARD,
QPU_SIG_SCOREBOARD_UNLOCK,
QPU_SIG_LAST_THREAD_SWITCH,
QPU_SIG_COVERAGE_LOAD,
QPU_SIG_COLOR_LOAD,
QPU_SIG_COLOR_LOAD_END,
QPU_SIG_LOAD_TMU0,
QPU_SIG_LOAD_TMU1,
QPU_SIG_ALPHA_MASK_LOAD,
QPU_SIG_SMALL_IMM,
QPU_SIG_LOAD_IMM,
QPU_SIG_BRANCH
};
enum qpu_mux {
/* hardware mux values */
QPU_MUX_R0,
QPU_MUX_R1,
QPU_MUX_R2,
QPU_MUX_R3,
QPU_MUX_R4,
QPU_MUX_R5,
QPU_MUX_A,
QPU_MUX_B,
/* non-hardware mux values */
QPU_MUX_IMM,
};
enum qpu_cond {
QPU_COND_NEVER,
QPU_COND_ALWAYS,
QPU_COND_ZS,
QPU_COND_ZC,
QPU_COND_NS,
QPU_COND_NC,
QPU_COND_CS,
QPU_COND_CC,
};
enum qpu_pack_mul {
QPU_PACK_MUL_NOP,
/* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_MUL_8888 = 3,
QPU_PACK_MUL_8A,
QPU_PACK_MUL_8B,
QPU_PACK_MUL_8C,
QPU_PACK_MUL_8D,
};
enum qpu_pack_a {
QPU_PACK_A_NOP,
/* convert to 16 bit float if float input, or to int16. */
QPU_PACK_A_16A,
QPU_PACK_A_16B,
/* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_A_8888,
/* Convert to 8-bit unsigned int. */
QPU_PACK_A_8A,
QPU_PACK_A_8B,
QPU_PACK_A_8C,
QPU_PACK_A_8D,
/* Saturating variants of the previous instructions. */
QPU_PACK_A_32_SAT, /* int-only */
QPU_PACK_A_16A_SAT, /* int or float */
QPU_PACK_A_16B_SAT,
QPU_PACK_A_8888_SAT,
QPU_PACK_A_8A_SAT,
QPU_PACK_A_8B_SAT,
QPU_PACK_A_8C_SAT,
QPU_PACK_A_8D_SAT,
};
enum qpu_unpack_r4 {
QPU_UNPACK_R4_NOP,
QPU_UNPACK_R4_F16A_TO_F32,
QPU_UNPACK_R4_F16B_TO_F32,
QPU_UNPACK_R4_8D_REP,
QPU_UNPACK_R4_8A,
QPU_UNPACK_R4_8B,
QPU_UNPACK_R4_8C,
QPU_UNPACK_R4_8D,
};
#define QPU_MASK(high, low) \
((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
#define QPU_GET_FIELD(word, field) \
((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
#define QPU_SIG_SHIFT 60
#define QPU_SIG_MASK QPU_MASK(63, 60)
#define QPU_UNPACK_SHIFT 57
#define QPU_UNPACK_MASK QPU_MASK(59, 57)
/**
* If set, the pack field means PACK_MUL or R4 packing, instead of normal
* regfile a packing.
*/
#define QPU_PM ((uint64_t)1 << 56)
#define QPU_PACK_SHIFT 52
#define QPU_PACK_MASK QPU_MASK(55, 52)
#define QPU_COND_ADD_SHIFT 49
#define QPU_COND_ADD_MASK QPU_MASK(51, 49)
#define QPU_COND_MUL_SHIFT 46
#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
#define QPU_SF ((uint64_t)1 << 45)
#define QPU_WADDR_ADD_SHIFT 38
#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
#define QPU_WADDR_MUL_SHIFT 32
#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32)
#define QPU_OP_MUL_SHIFT 29
#define QPU_OP_MUL_MASK QPU_MASK(31, 29)
#define QPU_RADDR_A_SHIFT 18
#define QPU_RADDR_A_MASK QPU_MASK(23, 18)
#define QPU_RADDR_B_SHIFT 12
#define QPU_RADDR_B_MASK QPU_MASK(17, 12)
#define QPU_SMALL_IMM_SHIFT 12
#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12)
#define QPU_ADD_A_SHIFT 9
#define QPU_ADD_A_MASK QPU_MASK(11, 9)
#define QPU_ADD_B_SHIFT 6
#define QPU_ADD_B_MASK QPU_MASK(8, 6)
#define QPU_MUL_A_SHIFT 3
#define QPU_MUL_A_MASK QPU_MASK(5, 3)
#define QPU_MUL_B_SHIFT 0
#define QPU_MUL_B_MASK QPU_MASK(2, 0)
#define QPU_WS ((uint64_t)1 << 44)
#define QPU_OP_ADD_SHIFT 24
#define QPU_OP_ADD_MASK QPU_MASK(28, 24)
#endif /* VC4_QPU_DEFINES_H */
......@@ -154,7 +154,7 @@
#define V3D_PCTRS14 0x006f4
#define V3D_PCTR15 0x006f8
#define V3D_PCTRS15 0x006fc
#define V3D_BGE 0x00f00
#define V3D_DBGE 0x00f00
#define V3D_FDBGO 0x00f04
#define V3D_FDBGB 0x00f08
#define V3D_FDBGR 0x00f0c
......
This diff is collapsed.
/*
* Copyright (C) 2015 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _VC4_TRACE_H_
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vc4
#define TRACE_INCLUDE_FILE vc4_trace
TRACE_EVENT(vc4_wait_for_seqno_begin,
TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
TP_ARGS(dev, seqno, timeout),
TP_STRUCT__entry(
__field(u32, dev)
__field(u64, seqno)
__field(u64, timeout)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->seqno = seqno;
__entry->timeout = timeout;
),
TP_printk("dev=%u, seqno=%llu, timeout=%llu",
__entry->dev, __entry->seqno, __entry->timeout)
);
TRACE_EVENT(vc4_wait_for_seqno_end,
TP_PROTO(struct drm_device *dev, uint64_t seqno),
TP_ARGS(dev, seqno),
TP_STRUCT__entry(
__field(u32, dev)
__field(u64, seqno)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->seqno = seqno;
),
TP_printk("dev=%u, seqno=%llu",
__entry->dev, __entry->seqno)
);
#endif /* _VC4_TRACE_H_ */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#include <trace/define_trace.h>
/*
* Copyright (C) 2015 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "vc4_drv.h"
#ifndef __CHECKER__
#define CREATE_TRACE_POINTS
#include "vc4_trace.h"
#endif
/*
* Copyright (c) 2014 The Linux Foundation. All rights reserved.
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "linux/component.h"
#include "vc4_drv.h"
#include "vc4_regs.h"
#ifdef CONFIG_DEBUG_FS
#define REGDEF(reg) { reg, #reg }
static const struct {
uint32_t reg;
const char *name;
} vc4_reg_defs[] = {
REGDEF(V3D_IDENT0),
REGDEF(V3D_IDENT1),
REGDEF(V3D_IDENT2),
REGDEF(V3D_SCRATCH),
REGDEF(V3D_L2CACTL),
REGDEF(V3D_SLCACTL),
REGDEF(V3D_INTCTL),
REGDEF(V3D_INTENA),
REGDEF(V3D_INTDIS),
REGDEF(V3D_CT0CS),
REGDEF(V3D_CT1CS),
REGDEF(V3D_CT0EA),
REGDEF(V3D_CT1EA),
REGDEF(V3D_CT0CA),
REGDEF(V3D_CT1CA),
REGDEF(V3D_CT00RA0),
REGDEF(V3D_CT01RA0),
REGDEF(V3D_CT0LC),
REGDEF(V3D_CT1LC),
REGDEF(V3D_CT0PC),
REGDEF(V3D_CT1PC),
REGDEF(V3D_PCS),
REGDEF(V3D_BFC),
REGDEF(V3D_RFC),
REGDEF(V3D_BPCA),
REGDEF(V3D_BPCS),
REGDEF(V3D_BPOA),
REGDEF(V3D_BPOS),
REGDEF(V3D_BXCF),
REGDEF(V3D_SQRSV0),
REGDEF(V3D_SQRSV1),
REGDEF(V3D_SQCNTL),
REGDEF(V3D_SRQPC),
REGDEF(V3D_SRQUA),
REGDEF(V3D_SRQUL),
REGDEF(V3D_SRQCS),
REGDEF(V3D_VPACNTL),
REGDEF(V3D_VPMBASE),
REGDEF(V3D_PCTRC),
REGDEF(V3D_PCTRE),
REGDEF(V3D_PCTR0),
REGDEF(V3D_PCTRS0),
REGDEF(V3D_PCTR1),
REGDEF(V3D_PCTRS1),
REGDEF(V3D_PCTR2),
REGDEF(V3D_PCTRS2),
REGDEF(V3D_PCTR3),
REGDEF(V3D_PCTRS3),
REGDEF(V3D_PCTR4),
REGDEF(V3D_PCTRS4),
REGDEF(V3D_PCTR5),
REGDEF(V3D_PCTRS5),
REGDEF(V3D_PCTR6),
REGDEF(V3D_PCTRS6),
REGDEF(V3D_PCTR7),
REGDEF(V3D_PCTRS7),
REGDEF(V3D_PCTR8),
REGDEF(V3D_PCTRS8),
REGDEF(V3D_PCTR9),
REGDEF(V3D_PCTRS9),
REGDEF(V3D_PCTR10),
REGDEF(V3D_PCTRS10),
REGDEF(V3D_PCTR11),
REGDEF(V3D_PCTRS11),
REGDEF(V3D_PCTR12),
REGDEF(V3D_PCTRS12),
REGDEF(V3D_PCTR13),
REGDEF(V3D_PCTRS13),
REGDEF(V3D_PCTR14),
REGDEF(V3D_PCTRS14),
REGDEF(V3D_PCTR15),
REGDEF(V3D_PCTRS15),
REGDEF(V3D_DBGE),
REGDEF(V3D_FDBGO),
REGDEF(V3D_FDBGB),
REGDEF(V3D_FDBGR),
REGDEF(V3D_FDBGS),
REGDEF(V3D_ERRSTAT),
};
int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
int i;
for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
seq_printf(m, "%s (0x%04x): 0x%08x\n",
vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
V3D_READ(vc4_reg_defs[i].reg));
}
return 0;
}
int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t ident1 = V3D_READ(V3D_IDENT1);
uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
seq_printf(m, "Revision: %d\n",
VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
seq_printf(m, "Slices: %d\n", nslc);
seq_printf(m, "TMUs: %d\n", nslc * tups);
seq_printf(m, "QPUs: %d\n", nslc * qups);
seq_printf(m, "Semaphores: %d\n",
VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
return 0;
}
#endif /* CONFIG_DEBUG_FS */
/*
* Asks the firmware to turn on power to the V3D engine.
*
* This may be doable with just the clocks interface, though this
* packet does some other register setup from the firmware, too.
*/
int
vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
{
if (on)
return pm_generic_poweroff(&vc4->v3d->pdev->dev);
else
return pm_generic_resume(&vc4->v3d->pdev->dev);
}
static void vc4_v3d_init_hw(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
/* Take all the memory that would have been reserved for user
* QPU programs, since we don't have an interface for running
* them, anyway.
*/
V3D_WRITE(V3D_VPMBASE, 0);
}
static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
{
struct platform_device *pdev = to_platform_device(dev);
struct drm_device *drm = dev_get_drvdata(master);
struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_v3d *v3d = NULL;
int ret;
v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
if (!v3d)
return -ENOMEM;
v3d->pdev = pdev;
v3d->regs = vc4_ioremap_regs(pdev, 0);
if (IS_ERR(v3d->regs))
return PTR_ERR(v3d->regs);
vc4->v3d = v3d;
if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
return -EINVAL;
}
/* Reset the binner overflow address/size at setup, to be sure
* we don't reuse an old one.
*/
V3D_WRITE(V3D_BPOA, 0);
V3D_WRITE(V3D_BPOS, 0);
vc4_v3d_init_hw(drm);
ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
if (ret) {
DRM_ERROR("Failed to install IRQ handler\n");
return ret;
}
return 0;
}
static void vc4_v3d_unbind(struct device *dev, struct device *master,
void *data)
{
struct drm_device *drm = dev_get_drvdata(master);
struct vc4_dev *vc4 = to_vc4_dev(drm);
drm_irq_uninstall(drm);
/* Disable the binner's overflow memory address, so the next
* driver probe (if any) doesn't try to reuse our old
* allocation.
*/
V3D_WRITE(V3D_BPOA, 0);
V3D_WRITE(V3D_BPOS, 0);
vc4->v3d = NULL;
}
static const struct component_ops vc4_v3d_ops = {
.bind = vc4_v3d_bind,
.unbind = vc4_v3d_unbind,
};
static int vc4_v3d_dev_probe(struct platform_device *pdev)
{
return component_add(&pdev->dev, &vc4_v3d_ops);
}
static int vc4_v3d_dev_remove(struct platform_device *pdev)
{
component_del(&pdev->dev, &vc4_v3d_ops);
return 0;
}
static const struct of_device_id vc4_v3d_dt_match[] = {
{ .compatible = "brcm,vc4-v3d" },
{}
};
struct platform_driver vc4_v3d_driver = {
.probe = vc4_v3d_dev_probe,
.remove = vc4_v3d_dev_remove,
.driver = {
.name = "vc4_v3d",
.of_match_table = vc4_v3d_dt_match,
},
};
This diff is collapsed.
This diff is collapsed.
......@@ -587,6 +587,13 @@ struct drm_driver {
int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
/**
* Hook for allocating the GEM object struct, for use by core
* helpers.
*/
struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
size_t size);
/* prime: */
/* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
......
......@@ -18,4 +18,5 @@ header-y += tegra_drm.h
header-y += via_drm.h
header-y += vmwgfx_drm.h
header-y += msm_drm.h
header-y += vc4_drm.h
header-y += virtgpu_drm.h
/*
* Copyright © 2014-2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef _UAPI_VC4_DRM_H_
#define _UAPI_VC4_DRM_H_
#include "drm.h"
#define DRM_VC4_SUBMIT_CL 0x00
#define DRM_VC4_WAIT_SEQNO 0x01
#define DRM_VC4_WAIT_BO 0x02
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
#define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_VC4_GET_HANG_STATE 0x06
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
struct drm_vc4_submit_rcl_surface {
__u32 hindex; /* Handle index, or ~0 if not present. */
__u32 offset; /* Offset to start of buffer. */
/*
* Bits for either render config (color_write) or load/store packet.
* Bits should all be 0 for MSAA load/stores.
*/
__u16 bits;
#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
__u16 flags;
};
/**
* struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
* engine.
*
* Drivers typically use GPU BOs to store batchbuffers / command lists and
* their associated state. However, because the VC4 lacks an MMU, we have to
* do validation of memory accesses by the GPU commands. If we were to store
* our commands in BOs, we'd need to do uncached readback from them to do the
* validation process, which is too expensive. Instead, userspace accumulates
* commands and associated state in plain memory, then the kernel copies the
* data to its own address space, and then validates and stores it in a GPU
* BO.
*/
struct drm_vc4_submit_cl {
/* Pointer to the binner command list.
*
* This is the first set of commands executed, which runs the
* coordinate shader to determine where primitives land on the screen,
* then writes out the state updates and draw calls necessary per tile
* to the tile allocation BO.
*/
__u64 bin_cl;
/* Pointer to the shader records.
*
* Shader records are the structures read by the hardware that contain
* pointers to uniforms, shaders, and vertex attributes. The
* reference to the shader record has enough information to determine
* how many pointers are necessary (fixed number for shaders/uniforms,
* and an attribute count), so those BO indices into bo_handles are
* just stored as __u32s before each shader record passed in.
*/
__u64 shader_rec;
/* Pointer to uniform data and texture handles for the textures
* referenced by the shader.
*
* For each shader state record, there is a set of uniform data in the
* order referenced by the record (FS, VS, then CS). Each set of
* uniform data has a __u32 index into bo_handles per texture
* sample operation, in the order the QPU_W_TMUn_S writes appear in
* the program. Following the texture BO handle indices is the actual
* uniform data.
*
* The individual uniform state blocks don't have sizes passed in,
* because the kernel has to determine the sizes anyway during shader
* code validation.
*/
__u64 uniforms;
__u64 bo_handles;
/* Size in bytes of the binner command list. */
__u32 bin_cl_size;
/* Size in bytes of the set of shader records. */
__u32 shader_rec_size;
/* Number of shader records.
*
* This could just be computed from the contents of shader_records and
* the address bits of references to them from the bin CL, but it
* keeps the kernel from having to resize some allocations it makes.
*/
__u32 shader_rec_count;
/* Size in bytes of the uniform state. */
__u32 uniforms_size;
/* Number of BO handles passed in (size is that times 4). */
__u32 bo_handle_count;
/* RCL setup: */
__u16 width;
__u16 height;
__u8 min_x_tile;
__u8 min_y_tile;
__u8 max_x_tile;
__u8 max_y_tile;
struct drm_vc4_submit_rcl_surface color_read;
struct drm_vc4_submit_rcl_surface color_write;
struct drm_vc4_submit_rcl_surface zs_read;
struct drm_vc4_submit_rcl_surface zs_write;
struct drm_vc4_submit_rcl_surface msaa_color_write;
struct drm_vc4_submit_rcl_surface msaa_zs_write;
__u32 clear_color[2];
__u32 clear_z;
__u8 clear_s;
__u32 pad:24;
#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
__u32 flags;
/* Returned value of the seqno of this render job (for the
* wait ioctl).
*/
__u64 seqno;
};
/**
* struct drm_vc4_wait_seqno - ioctl argument for waiting for
* DRM_VC4_SUBMIT_CL completion using its returned seqno.
*
* timeout_ns is the timeout in nanoseconds, where "0" means "don't
* block, just return the status."
*/
struct drm_vc4_wait_seqno {
__u64 seqno;
__u64 timeout_ns;
};
/**
* struct drm_vc4_wait_bo - ioctl argument for waiting for
* completion of the last DRM_VC4_SUBMIT_CL on a BO.
*
* This is useful for cases where multiple processes might be
* rendering to a BO and you want to wait for all rendering to be
* completed.
*/
struct drm_vc4_wait_bo {
__u32 handle;
__u32 pad;
__u64 timeout_ns;
};
/**
* struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
*
* There are currently no values for the flags argument, but it may be
* used in a future extension.
*/
struct drm_vc4_create_bo {
__u32 size;
__u32 flags;
/** Returned GEM handle for the BO. */
__u32 handle;
__u32 pad;
};
/**
* struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
*
* This doesn't actually perform an mmap. Instead, it returns the
* offset you need to use in an mmap on the DRM device node. This
* means that tools like valgrind end up knowing about the mapped
* memory.
*
* There are currently no values for the flags argument, but it may be
* used in a future extension.
*/
struct drm_vc4_mmap_bo {
/** Handle for the object being mapped. */
__u32 handle;
__u32 flags;
/** offset into the drm node to use for subsequent mmap call. */
__u64 offset;
};
/**
* struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
* shader BOs.
*
* Since allowing a shader to be overwritten while it's also being
* executed from would allow privlege escalation, shaders must be
* created using this ioctl, and they can't be mmapped later.
*/
struct drm_vc4_create_shader_bo {
/* Size of the data argument. */
__u32 size;
/* Flags, currently must be 0. */
__u32 flags;
/* Pointer to the data. */
__u64 data;
/** Returned GEM handle for the BO. */
__u32 handle;
/* Pad, must be 0. */
__u32 pad;
};
struct drm_vc4_get_hang_state_bo {
__u32 handle;
__u32 paddr;
__u32 size;
__u32 pad;
};
/**
* struct drm_vc4_hang_state - ioctl argument for collecting state
* from a GPU hang for analysis.
*/
struct drm_vc4_get_hang_state {
/** Pointer to array of struct drm_vc4_get_hang_state_bo. */
__u64 bo;
/**
* On input, the size of the bo array. Output is the number
* of bos to be returned.
*/
__u32 bo_count;
__u32 start_bin, start_render;
__u32 ct0ca, ct0ea;
__u32 ct1ca, ct1ea;
__u32 ct0cs, ct1cs;
__u32 ct0ra0, ct1ra0;
__u32 bpca, bpcs;
__u32 bpoa, bpos;
__u32 vpmbase;
__u32 dbge;
__u32 fdbgo;
__u32 fdbgb;
__u32 fdbgr;
__u32 fdbgs;
__u32 errstat;
/* Pad that we may save more registers into in the future. */
__u32 pad[16];
};
#endif /* _UAPI_VC4_DRM_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment