Commit fc04023f authored by Eric Anholt's avatar Eric Anholt

drm/vc4: Add support for YUV planes.

This supports 420 and 422 subsampling with 2 or 3 planes, tested with
modetest.  It doesn't set up chroma subsampling position (which it
appears KMS doesn't deal with yet).

The LBM memory is overallocated in many cases, but apparently the docs
aren't quite correct and I'll probably need to look at the hardware
source to really figure it out.
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
parent fe4cd847
......@@ -54,15 +54,19 @@ struct vc4_plane_state {
/* Clipped coordinates of the plane on the display. */
int crtc_x, crtc_y, crtc_w, crtc_h;
/* Clipped area being scanned from in the FB. */
u32 src_x, src_y, src_w, src_h;
u32 src_x, src_y;
enum vc4_scaling_mode x_scaling, y_scaling;
u32 src_w[2], src_h[2];
/* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
enum vc4_scaling_mode x_scaling[2], y_scaling[2];
bool is_unity;
bool is_yuv;
/* Offset to start scanning out from the start of the plane's
* BO.
*/
u32 offset;
u32 offsets[3];
/* Our allocation in LBM for temporary storage during scaling. */
struct drm_mm_node lbm;
......@@ -79,6 +83,7 @@ static const struct hvs_format {
u32 hvs; /* HVS_FORMAT_* */
u32 pixel_order;
bool has_alpha;
bool flip_cbcr;
} hvs_formats[] = {
{
.drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
......@@ -104,6 +109,32 @@ static const struct hvs_format {
.drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
},
{
.drm = DRM_FORMAT_YUV422,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
},
{
.drm = DRM_FORMAT_YVU422,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
.flip_cbcr = true,
},
{
.drm = DRM_FORMAT_YUV420,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
},
{
.drm = DRM_FORMAT_YVU420,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
.flip_cbcr = true,
},
{
.drm = DRM_FORMAT_NV12,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
},
{
.drm = DRM_FORMAT_NV16,
.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
},
};
static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
......@@ -219,11 +250,11 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
*
* This is a replication of a table from the spec.
*/
static u32 vc4_get_scl_field(struct drm_plane_state *state)
static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
return SCALER_CTL0_SCL_H_PPF_V_PPF;
case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
......@@ -254,9 +285,16 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
struct drm_plane *plane = state->plane;
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct drm_framebuffer *fb = state->fb;
struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
u32 subpixel_src_mask = (1 << 16) - 1;
u32 format = fb->pixel_format;
int num_planes = drm_format_num_planes(format);
u32 h_subsample = 1;
u32 v_subsample = 1;
int i;
vc4_state->offset = fb->offsets[0];
for (i = 0; i < num_planes; i++)
vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
/* We don't support subpixel source positioning for scaling. */
if ((state->src_x & subpixel_src_mask) ||
......@@ -268,20 +306,48 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
vc4_state->src_x = state->src_x >> 16;
vc4_state->src_y = state->src_y >> 16;
vc4_state->src_w = state->src_w >> 16;
vc4_state->src_h = state->src_h >> 16;
vc4_state->src_w[0] = state->src_w >> 16;
vc4_state->src_h[0] = state->src_h >> 16;
vc4_state->crtc_x = state->crtc_x;
vc4_state->crtc_y = state->crtc_y;
vc4_state->crtc_w = state->crtc_w;
vc4_state->crtc_h = state->crtc_h;
vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
vc4_state->crtc_w);
vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
vc4_state->crtc_h);
vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
vc4_state->y_scaling == VC4_SCALING_NONE);
vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
vc4_state->crtc_w);
vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
vc4_state->crtc_h);
if (num_planes > 1) {
vc4_state->is_yuv = true;
h_subsample = drm_format_horz_chroma_subsampling(format);
v_subsample = drm_format_vert_chroma_subsampling(format);
vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
vc4_state->x_scaling[1] =
vc4_get_scaling_mode(vc4_state->src_w[1],
vc4_state->crtc_w);
vc4_state->y_scaling[1] =
vc4_get_scaling_mode(vc4_state->src_h[1],
vc4_state->crtc_h);
/* YUV conversion requires that scaling be enabled,
* even on a plane that's otherwise 1:1. Choose TPZ
* for simplicity.
*/
if (vc4_state->x_scaling[0] == VC4_SCALING_NONE)
vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
}
vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
vc4_state->x_scaling[1] == VC4_SCALING_NONE &&
vc4_state->y_scaling[1] == VC4_SCALING_NONE);
/* No configuring scaling on the cursor plane, since it gets
non-vblank-synced updates, and scaling requires requires
......@@ -294,16 +360,27 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
* support negative y, and negative x wastes bandwidth.
*/
if (vc4_state->crtc_x < 0) {
vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
0) *
-vc4_state->crtc_x);
vc4_state->src_w += vc4_state->crtc_x;
for (i = 0; i < num_planes; i++) {
u32 cpp = drm_format_plane_cpp(fb->pixel_format, i);
u32 subs = ((i == 0) ? 1 : h_subsample);
vc4_state->offsets[i] += (cpp *
(-vc4_state->crtc_x) / subs);
}
vc4_state->src_w[0] += vc4_state->crtc_x;
vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
vc4_state->crtc_x = 0;
}
if (vc4_state->crtc_y < 0) {
vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y;
vc4_state->src_h += vc4_state->crtc_y;
for (i = 0; i < num_planes; i++) {
u32 subs = ((i == 0) ? 1 : v_subsample);
vc4_state->offsets[i] += (fb->pitches[i] *
(-vc4_state->crtc_y) / subs);
}
vc4_state->src_h[0] += vc4_state->crtc_y;
vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
vc4_state->crtc_y = 0;
}
......@@ -344,15 +421,23 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
/* This is the worst case number. One of the two sizes will
* be used depending on the scaling configuration.
*/
u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
u32 lbm;
if (vc4_state->is_unity)
return 0;
else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
lbm = pix_per_line * 8;
else {
/* In special cases, this multiplier might be 12. */
if (!vc4_state->is_yuv) {
if (vc4_state->is_unity)
return 0;
else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
lbm = pix_per_line * 8;
else {
/* In special cases, this multiplier might be 12. */
lbm = pix_per_line * 16;
}
} else {
/* There are cases for this going down to a multiplier
* of 2, but according to the firmware source, the
* table in the docs is somewhat wrong.
*/
lbm = pix_per_line * 16;
}
......@@ -361,33 +446,34 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
return lbm;
}
static void vc4_write_scaling_parameters(struct drm_plane_state *state)
static void vc4_write_scaling_parameters(struct drm_plane_state *state,
int channel)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
/* Ch0 H-PPF Word 0: Scaling Parameters */
if (vc4_state->x_scaling == VC4_SCALING_PPF) {
if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
vc4_write_ppf(vc4_state,
vc4_state->src_w, vc4_state->crtc_w);
vc4_state->src_w[channel], vc4_state->crtc_w);
}
/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
if (vc4_state->y_scaling == VC4_SCALING_PPF) {
if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
vc4_write_ppf(vc4_state,
vc4_state->src_h, vc4_state->crtc_h);
vc4_state->src_h[channel], vc4_state->crtc_h);
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
}
/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
vc4_write_tpz(vc4_state,
vc4_state->src_w, vc4_state->crtc_w);
vc4_state->src_w[channel], vc4_state->crtc_w);
}
/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
vc4_write_tpz(vc4_state,
vc4_state->src_h, vc4_state->crtc_h);
vc4_state->src_h[channel], vc4_state->crtc_h);
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
}
}
......@@ -401,13 +487,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct drm_framebuffer *fb = state->fb;
struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
u32 ctl0_offset = vc4_state->dlist_count;
const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
u32 scl;
int num_planes = drm_format_num_planes(format->drm);
u32 scl0, scl1;
u32 lbm_size;
unsigned long irqflags;
int ret;
int ret, i;
ret = vc4_plane_setup_clipping_and_scaling(state);
if (ret)
......@@ -432,7 +518,19 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
if (ret)
return ret;
scl = vc4_get_scl_field(state);
/* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
* and 4:4:4, scl1 should be set to scl0 so both channels of
* the scaler do the same thing. For YUV, the Y plane needs
* to be put in channel 1 and Cb/Cr in channel 0, so we swap
* the scl fields here.
*/
if (num_planes == 1) {
scl0 = vc4_get_scl_field(state, 1);
scl1 = scl0;
} else {
scl0 = vc4_get_scl_field(state, 1);
scl1 = vc4_get_scl_field(state, 0);
}
/* Control word */
vc4_dlist_write(vc4_state,
......@@ -440,8 +538,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
(format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
/* Position Word 0: Image Positions and Alpha Value */
vc4_state->pos0_offset = vc4_state->dlist_count;
......@@ -466,35 +564,68 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
SCALER_POS2_ALPHA_MODE_PIPELINE :
SCALER_POS2_ALPHA_MODE_FIXED,
SCALER_POS2_ALPHA_MODE) |
VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) |
VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT));
VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
/* Pointer Word 0: RGB / Y Pointer */
/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
*
* The pointers may be any byte address.
*/
vc4_state->ptr0_offset = vc4_state->dlist_count;
vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset);
if (!format->flip_cbcr) {
for (i = 0; i < num_planes; i++)
vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
} else {
WARN_ON_ONCE(num_planes != 3);
vc4_dlist_write(vc4_state, vc4_state->offsets[0]);
vc4_dlist_write(vc4_state, vc4_state->offsets[2]);
vc4_dlist_write(vc4_state, vc4_state->offsets[1]);
}
/* Pointer Context Word 0: Written by the HVS */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
/* Pointer Context Word 0/1/2: Written by the HVS */
for (i = 0; i < num_planes; i++)
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
/* Pitch word 0: Pointer 0 Pitch */
vc4_dlist_write(vc4_state,
VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
/* Pitch word 0/1/2 */
for (i = 0; i < num_planes; i++) {
vc4_dlist_write(vc4_state,
VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
}
/* Colorspace conversion words */
if (vc4_state->is_yuv) {
vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
}
if (!vc4_state->is_unity) {
/* LBM Base Address. */
if (vc4_state->y_scaling != VC4_SCALING_NONE)
if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
vc4_dlist_write(vc4_state, vc4_state->lbm.start);
}
vc4_write_scaling_parameters(state);
if (num_planes > 1) {
/* Emit Cb/Cr as channel 0 and Y as channel
* 1. This matches how we set up scl0/scl1
* above.
*/
vc4_write_scaling_parameters(state, 1);
}
vc4_write_scaling_parameters(state, 0);
/* If any PPF setup was done, then all the kernel
* pointers get uploaded.
*/
if (vc4_state->x_scaling == VC4_SCALING_PPF ||
vc4_state->y_scaling == VC4_SCALING_PPF) {
if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
SCALER_PPF_KERNEL_OFFSET);
......@@ -698,6 +829,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
struct drm_plane *plane = NULL;
struct vc4_plane *vc4_plane;
u32 formats[ARRAY_SIZE(hvs_formats)];
u32 num_formats = 0;
int ret = 0;
unsigned i;
......@@ -708,12 +840,20 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
goto fail;
}
for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
formats[i] = hvs_formats[i].drm;
for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
/* Don't allow YUV in cursor planes, since that means
* tuning on the scaler, which we don't allow for the
* cursor.
*/
if (type != DRM_PLANE_TYPE_CURSOR ||
hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
formats[num_formats++] = hvs_formats[i].drm;
}
}
plane = &vc4_plane->base;
ret = drm_universal_plane_init(dev, plane, 0xff,
&vc4_plane_funcs,
formats, ARRAY_SIZE(formats),
formats, num_formats,
type, NULL);
drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
......
......@@ -503,7 +503,12 @@ enum hvs_pixel_format {
HVS_PIXEL_FORMAT_RGB888 = 5,
HVS_PIXEL_FORMAT_RGBA6666 = 6,
/* 32bpp */
HVS_PIXEL_FORMAT_RGBA8888 = 7
HVS_PIXEL_FORMAT_RGBA8888 = 7,
HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8,
HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9,
HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10,
HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11,
};
/* Note: the LSB is the rightmost character shown. Only valid for
......@@ -585,6 +590,55 @@ enum hvs_pixel_format {
#define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0)
#define SCALER_POS2_WIDTH_SHIFT 0
/* Color Space Conversion words. Some values are S2.8 signed
* integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1,
* 0x2: 2, 0x3: -1}
*/
/* bottom 8 bits of S2.8 contribution of Cr to Blue */
#define SCALER_CSC0_COEF_CR_BLU_MASK VC4_MASK(31, 24)
#define SCALER_CSC0_COEF_CR_BLU_SHIFT 24
/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */
#define SCALER_CSC0_COEF_YY_OFS_MASK VC4_MASK(23, 16)
#define SCALER_CSC0_COEF_YY_OFS_SHIFT 16
/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */
#define SCALER_CSC0_COEF_CB_OFS_MASK VC4_MASK(15, 8)
#define SCALER_CSC0_COEF_CB_OFS_SHIFT 8
/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */
#define SCALER_CSC0_COEF_CR_OFS_MASK VC4_MASK(7, 0)
#define SCALER_CSC0_COEF_CR_OFS_SHIFT 0
#define SCALER_CSC0_ITR_R_601_5 0x00f00000
#define SCALER_CSC0_ITR_R_709_3 0x00f00000
#define SCALER_CSC0_JPEG_JFIF 0x00000000
/* S2.8 contribution of Cb to Green */
#define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22)
#define SCALER_CSC1_COEF_CB_GRN_SHIFT 22
/* S2.8 contribution of Cr to Green */
#define SCALER_CSC1_COEF_CR_GRN_MASK VC4_MASK(21, 12)
#define SCALER_CSC1_COEF_CR_GRN_SHIFT 12
/* S2.8 contribution of Y to all of RGB */
#define SCALER_CSC1_COEF_YY_ALL_MASK VC4_MASK(11, 2)
#define SCALER_CSC1_COEF_YY_ALL_SHIFT 2
/* top 2 bits of S2.8 contribution of Cr to Blue */
#define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0)
#define SCALER_CSC1_COEF_CR_BLU_SHIFT 0
#define SCALER_CSC1_ITR_R_601_5 0xe73304a8
#define SCALER_CSC1_ITR_R_709_3 0xf2b784a8
#define SCALER_CSC1_JPEG_JFIF 0xea34a400
/* S2.8 contribution of Cb to Red */
#define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20)
#define SCALER_CSC2_COEF_CB_RED_SHIFT 20
/* S2.8 contribution of Cr to Red */
#define SCALER_CSC2_COEF_CR_RED_MASK VC4_MASK(19, 10)
#define SCALER_CSC2_COEF_CR_RED_SHIFT 10
/* S2.8 contribution of Cb to Blue */
#define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10)
#define SCALER_CSC2_COEF_CB_BLU_SHIFT 10
#define SCALER_CSC2_ITR_R_601_5 0x00066204
#define SCALER_CSC2_ITR_R_709_3 0x00072a1c
#define SCALER_CSC2_JPEG_JFIF 0x000599c5
#define SCALER_TPZ0_VERT_RECALC BIT(31)
#define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8)
#define SCALER_TPZ0_SCALE_SHIFT 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment