Commit 2d635fde authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-vc4-next-2016-07-15' of https://github.com/anholt/linux into drm-next

This pull request brings in vc4 shader validation for branching,
allowing GLSL shaders with non-unrolled loops.

* tag 'drm-vc4-next-2016-07-15' of https://github.com/anholt/linux:
  drm/vc4: Fix a "the the" typo in a comment.
  drm/vc4: Fix definition of QPU_R_MS_REV_FLAGS
  drm/vc4: Add a getparam to signal support for branches.
  drm/vc4: Add support for branching in shader validation.
  drm/vc4: Add a bitmap of branch targets during shader validation.
  drm/vc4: Move validation's current/max ip into the validation struct.
  drm/vc4: Add a getparam ioctl for getting the V3D identity regs.
parents ec2174fe a20d5fa6
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include "drm_fb_cma_helper.h" #include "drm_fb_cma_helper.h"
#include "uapi/drm/vc4_drm.h" #include "uapi/drm/vc4_drm.h"
...@@ -43,6 +44,49 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) ...@@ -43,6 +44,49 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
return map; return map;
} }
static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_get_param *args = data;
int ret;
if (args->pad != 0)
return -EINVAL;
switch (args->param) {
case DRM_VC4_PARAM_V3D_IDENT0:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret)
return ret;
args->value = V3D_READ(V3D_IDENT0);
pm_runtime_put(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT1:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret)
return ret;
args->value = V3D_READ(V3D_IDENT1);
pm_runtime_put(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT2:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret)
return ret;
args->value = V3D_READ(V3D_IDENT2);
pm_runtime_put(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
args->value = true;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
}
return 0;
}
static void vc4_lastclose(struct drm_device *dev) static void vc4_lastclose(struct drm_device *dev)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
...@@ -74,6 +118,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { ...@@ -74,6 +118,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
DRM_ROOT_ONLY), DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
}; };
static struct drm_driver vc4_drm_driver = { static struct drm_driver vc4_drm_driver = {
......
...@@ -355,6 +355,9 @@ struct vc4_validated_shader_info { ...@@ -355,6 +355,9 @@ struct vc4_validated_shader_info {
uint32_t uniforms_src_size; uint32_t uniforms_src_size;
uint32_t num_texture_samples; uint32_t num_texture_samples;
struct vc4_texture_sample_info *texture_samples; struct vc4_texture_sample_info *texture_samples;
uint32_t num_uniform_addr_offsets;
uint32_t *uniform_addr_offsets;
}; };
/** /**
......
...@@ -70,7 +70,7 @@ enum qpu_raddr { ...@@ -70,7 +70,7 @@ enum qpu_raddr {
QPU_R_ELEM_QPU = 38, QPU_R_ELEM_QPU = 38,
QPU_R_NOP, QPU_R_NOP,
QPU_R_XY_PIXEL_COORD = 41, QPU_R_XY_PIXEL_COORD = 41,
QPU_R_MS_REV_FLAGS = 41, QPU_R_MS_REV_FLAGS = 42,
QPU_R_VPM = 48, QPU_R_VPM = 48,
QPU_R_VPM_LD_BUSY, QPU_R_VPM_LD_BUSY,
QPU_R_VPM_LD_WAIT, QPU_R_VPM_LD_WAIT,
...@@ -230,6 +230,15 @@ enum qpu_unpack_r4 { ...@@ -230,6 +230,15 @@ enum qpu_unpack_r4 {
#define QPU_COND_MUL_SHIFT 46 #define QPU_COND_MUL_SHIFT 46
#define QPU_COND_MUL_MASK QPU_MASK(48, 46) #define QPU_COND_MUL_MASK QPU_MASK(48, 46)
#define QPU_BRANCH_COND_SHIFT 52
#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52)
#define QPU_BRANCH_REL ((uint64_t)1 << 51)
#define QPU_BRANCH_REG ((uint64_t)1 << 50)
#define QPU_BRANCH_RADDR_A_SHIFT 45
#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45)
#define QPU_SF ((uint64_t)1 << 45) #define QPU_SF ((uint64_t)1 << 45)
#define QPU_WADDR_ADD_SHIFT 38 #define QPU_WADDR_ADD_SHIFT 38
...@@ -261,4 +270,10 @@ enum qpu_unpack_r4 { ...@@ -261,4 +270,10 @@ enum qpu_unpack_r4 {
#define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_SHIFT 24
#define QPU_OP_ADD_MASK QPU_MASK(28, 24) #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
#define QPU_LOAD_IMM_SHIFT 0
#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0)
#define QPU_BRANCH_TARGET_SHIFT 0
#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
#endif /* VC4_QPU_DEFINES_H */ #endif /* VC4_QPU_DEFINES_H */
...@@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev, ...@@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev,
uint32_t src_offset = *(uint32_t *)(pkt_u + o); uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u; uint32_t *texture_handles_u;
void *uniform_data_u; void *uniform_data_u;
uint32_t tex; uint32_t tex, uni;
*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
...@@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev, ...@@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev,
} }
} }
/* Fill in the uniform slots that need this shader's
* start-of-uniforms address (used for resetting the uniform
* stream in the presence of control flow).
*/
for (uni = 0;
uni < validated_shader->num_uniform_addr_offsets;
uni++) {
uint32_t o = validated_shader->uniform_addr_offsets[uni];
((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
}
*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
exec->uniforms_u += validated_shader->uniforms_src_size; exec->uniforms_u += validated_shader->uniforms_src_size;
......
This diff is collapsed.
...@@ -37,6 +37,7 @@ extern "C" { ...@@ -37,6 +37,7 @@ extern "C" {
#define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_MMAP_BO 0x04
#define DRM_VC4_CREATE_SHADER_BO 0x05 #define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_VC4_GET_HANG_STATE 0x06 #define DRM_VC4_GET_HANG_STATE 0x06
#define DRM_VC4_GET_PARAM 0x07
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
...@@ -45,6 +46,7 @@ extern "C" { ...@@ -45,6 +46,7 @@ extern "C" {
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
#define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
struct drm_vc4_submit_rcl_surface { struct drm_vc4_submit_rcl_surface {
__u32 hindex; /* Handle index, or ~0 if not present. */ __u32 hindex; /* Handle index, or ~0 if not present. */
...@@ -280,6 +282,17 @@ struct drm_vc4_get_hang_state { ...@@ -280,6 +282,17 @@ struct drm_vc4_get_hang_state {
__u32 pad[16]; __u32 pad[16];
}; };
#define DRM_VC4_PARAM_V3D_IDENT0 0
#define DRM_VC4_PARAM_V3D_IDENT1 1
#define DRM_VC4_PARAM_V3D_IDENT2 2
#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
struct drm_vc4_get_param {
__u32 param;
__u32 pad;
__u64 value;
};
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment