Commit 8333f607 authored by Alex Deucher's avatar Alex Deucher

drm/radeon: remove UMS support

It's been deprecated behind a kconfig option for almost
two years and hasn't really been supported for years before
that.  DDX support was dropped more than three years ago.
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 80d69009
...@@ -5,12 +5,3 @@ config DRM_RADEON_USERPTR ...@@ -5,12 +5,3 @@ config DRM_RADEON_USERPTR
help help
This option selects CONFIG_MMU_NOTIFIER if it isn't already This option selects CONFIG_MMU_NOTIFIER if it isn't already
selected to enabled full userptr support. selected to enabled full userptr support.
config DRM_RADEON_UMS
bool "Enable userspace modesetting on radeon (DEPRECATED)"
depends on DRM_RADEON
help
Choose this option if you still need userspace modesetting.
Userspace modesetting is deprecated for quite some time now, so
enable this only if you have ancient versions of the DDX drivers.
...@@ -58,10 +58,6 @@ $(obj)/evergreen_cs.o: $(obj)/evergreen_reg_safe.h $(obj)/cayman_reg_safe.h ...@@ -58,10 +58,6 @@ $(obj)/evergreen_cs.o: $(obj)/evergreen_reg_safe.h $(obj)/cayman_reg_safe.h
radeon-y := radeon_drv.o radeon-y := radeon_drv.o
# add UMS driver
radeon-$(CONFIG_DRM_RADEON_UMS)+= radeon_cp.o radeon_state.o radeon_mem.o \
radeon_irq.o r300_cmdbuf.o r600_cp.o r600_blit.o drm_buffer.o
# add KMS driver # add KMS driver
radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
radeon_atombios.o radeon_agp.o atombios_crtc.o radeon_combios.o \ radeon_atombios.o radeon_agp.o atombios_crtc.o radeon_combios.o \
......
/**************************************************************************
*
* Copyright 2010 Pauli Nieminen.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*
**************************************************************************/
/*
* Multipart buffer for coping data which is larger than the page size.
*
* Authors:
* Pauli Nieminen <suokkos-at-gmail-dot-com>
*/
#include <linux/export.h>
#include "drm_buffer.h"
/**
* Allocate the drm buffer object.
*
* buf: Pointer to a pointer where the object is stored.
* size: The number of bytes to allocate.
*/
int drm_buffer_alloc(struct drm_buffer **buf, int size)
{
int nr_pages = size / PAGE_SIZE + 1;
int idx;
/* Allocating pointer table to end of structure makes drm_buffer
* variable sized */
*buf = kzalloc(sizeof(struct drm_buffer) + nr_pages*sizeof(char *),
GFP_KERNEL);
if (*buf == NULL) {
DRM_ERROR("Failed to allocate drm buffer object to hold"
" %d bytes in %d pages.\n",
size, nr_pages);
return -ENOMEM;
}
(*buf)->size = size;
for (idx = 0; idx < nr_pages; ++idx) {
(*buf)->data[idx] =
kmalloc(min(PAGE_SIZE, size - idx * PAGE_SIZE),
GFP_KERNEL);
if ((*buf)->data[idx] == NULL) {
DRM_ERROR("Failed to allocate %dth page for drm"
" buffer with %d bytes and %d pages.\n",
idx + 1, size, nr_pages);
goto error_out;
}
}
return 0;
error_out:
for (; idx >= 0; --idx)
kfree((*buf)->data[idx]);
kfree(*buf);
return -ENOMEM;
}
/**
* Copy the user data to the begin of the buffer and reset the processing
* iterator.
*
* user_data: A pointer the data that is copied to the buffer.
* size: The Number of bytes to copy.
*/
int drm_buffer_copy_from_user(struct drm_buffer *buf,
void __user *user_data, int size)
{
int nr_pages = size / PAGE_SIZE + 1;
int idx;
if (size > buf->size) {
DRM_ERROR("Requesting to copy %d bytes to a drm buffer with"
" %d bytes space\n",
size, buf->size);
return -EFAULT;
}
for (idx = 0; idx < nr_pages; ++idx) {
if (copy_from_user(buf->data[idx],
user_data + idx * PAGE_SIZE,
min(PAGE_SIZE, size - idx * PAGE_SIZE))) {
DRM_ERROR("Failed to copy user data (%p) to drm buffer"
" (%p) %dth page.\n",
user_data, buf, idx);
return -EFAULT;
}
}
buf->iterator = 0;
return 0;
}
/**
* Free the drm buffer object
*/
void drm_buffer_free(struct drm_buffer *buf)
{
if (buf != NULL) {
int nr_pages = buf->size / PAGE_SIZE + 1;
int idx;
for (idx = 0; idx < nr_pages; ++idx)
kfree(buf->data[idx]);
kfree(buf);
}
}
/**
* Read an object from buffer that may be split to multiple parts. If object
* is not split function just returns the pointer to object in buffer. But in
* case of split object data is copied to given stack object that is suplied
* by caller.
*
* The processing location of the buffer is also advanced to the next byte
* after the object.
*
* objsize: The size of the objet in bytes.
* stack_obj: A pointer to a memory location where object can be copied.
*/
void *drm_buffer_read_object(struct drm_buffer *buf,
int objsize, void *stack_obj)
{
int idx = drm_buffer_index(buf);
int page = drm_buffer_page(buf);
void *obj = NULL;
if (idx + objsize <= PAGE_SIZE) {
obj = &buf->data[page][idx];
} else {
/* The object is split which forces copy to temporary object.*/
int beginsz = PAGE_SIZE - idx;
memcpy(stack_obj, &buf->data[page][idx], beginsz);
memcpy(stack_obj + beginsz, &buf->data[page + 1][0],
objsize - beginsz);
obj = stack_obj;
}
drm_buffer_advance(buf, objsize);
return obj;
}
/**************************************************************************
*
* Copyright 2010 Pauli Nieminen.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*
**************************************************************************/
/*
* Multipart buffer for coping data which is larger than the page size.
*
* Authors:
* Pauli Nieminen <suokkos-at-gmail-dot-com>
*/
#ifndef _DRM_BUFFER_H_
#define _DRM_BUFFER_H_
#include <drm/drmP.h>
struct drm_buffer {
int iterator;
int size;
char *data[];
};
/**
* Return the index of page that buffer is currently pointing at.
*/
static inline int drm_buffer_page(struct drm_buffer *buf)
{
return buf->iterator / PAGE_SIZE;
}
/**
* Return the index of the current byte in the page
*/
static inline int drm_buffer_index(struct drm_buffer *buf)
{
return buf->iterator & (PAGE_SIZE - 1);
}
/**
* Return number of bytes that is left to process
*/
static inline int drm_buffer_unprocessed(struct drm_buffer *buf)
{
return buf->size - buf->iterator;
}
/**
* Advance the buffer iterator number of bytes that is given.
*/
static inline void drm_buffer_advance(struct drm_buffer *buf, int bytes)
{
buf->iterator += bytes;
}
/**
* Allocate the drm buffer object.
*
* buf: A pointer to a pointer where the object is stored.
* size: The number of bytes to allocate.
*/
extern int drm_buffer_alloc(struct drm_buffer **buf, int size);
/**
* Copy the user data to the begin of the buffer and reset the processing
* iterator.
*
* user_data: A pointer the data that is copied to the buffer.
* size: The Number of bytes to copy.
*/
extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
void __user *user_data, int size);
/**
* Free the drm buffer object
*/
extern void drm_buffer_free(struct drm_buffer *buf);
/**
* Read an object from buffer that may be split to multiple parts. If object
* is not split function just returns the pointer to object in buffer. But in
* case of split object data is copied to given stack object that is suplied
* by caller.
*
* The processing location of the buffer is also advanced to the next byte
* after the object.
*
* objsize: The size of the objet in bytes.
* stack_obj: A pointer to a memory location where object can be copied.
*/
extern void *drm_buffer_read_object(struct drm_buffer *buf,
int objsize, void *stack_obj);
/**
* Returns the pointer to the dword which is offset number of elements from the
* current processing location.
*
* Caller must make sure that dword is not split in the buffer. This
* requirement is easily met if all the sizes of objects in buffer are
* multiples of dword and PAGE_SIZE is multiple dword.
*
* Call to this function doesn't change the processing location.
*
* offset: The index of the dword relative to the internat iterator.
*/
static inline void *drm_buffer_pointer_to_dword(struct drm_buffer *buffer,
int offset)
{
int iter = buffer->iterator + offset * 4;
return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
}
/**
* Returns the pointer to the dword which is offset number of elements from
* the current processing location.
*
* Call to this function doesn't change the processing location.
*
* offset: The index of the byte relative to the internat iterator.
*/
static inline void *drm_buffer_pointer_to_byte(struct drm_buffer *buffer,
int offset)
{
int iter = buffer->iterator + offset;
return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
}
#endif
/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
*
* Copyright (C) The Weather Channel, Inc. 2002.
* Copyright (C) 2004 Nicolai Haehnle.
* All Rights Reserved.
*
* The Weather Channel (TM) funded Tungsten Graphics to develop the
* initial release of the Radeon 8500 driver under the XFree86 license.
* This notice must be preserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Nicolai Haehnle <prefect_@gmx.net>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
#include "r300_reg.h"
#include "drm_buffer.h"
#include <asm/unaligned.h>
#define R300_SIMULTANEOUS_CLIPRECTS 4
/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
*/
static const int r300_cliprect_cntl[4] = {
0xAAAA,
0xEEEE,
0xFEFE,
0xFFFE
};
/**
* Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
* buffer, starting with index n.
*/
static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf, int n)
{
struct drm_clip_rect box;
int nr;
int i;
RING_LOCALS;
nr = cmdbuf->nbox - n;
if (nr > R300_SIMULTANEOUS_CLIPRECTS)
nr = R300_SIMULTANEOUS_CLIPRECTS;
DRM_DEBUG("%i cliprects\n", nr);
if (nr) {
BEGIN_RING(6 + nr * 2);
OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
for (i = 0; i < nr; ++i) {
if (copy_from_user
(&box, &cmdbuf->boxes[n + i], sizeof(box))) {
DRM_ERROR("copy cliprect faulted\n");
return -EFAULT;
}
box.x2--; /* Hardware expects inclusive bottom-right corner */
box.y2--;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
box.x1 = (box.x1) &
R300_CLIPRECT_MASK;
box.y1 = (box.y1) &
R300_CLIPRECT_MASK;
box.x2 = (box.x2) &
R300_CLIPRECT_MASK;
box.y2 = (box.y2) &
R300_CLIPRECT_MASK;
} else {
box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
R300_CLIPRECT_MASK;
box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
R300_CLIPRECT_MASK;
box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
R300_CLIPRECT_MASK;
box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
R300_CLIPRECT_MASK;
}
OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
(box.y1 << R300_CLIPRECT_Y_SHIFT));
OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
(box.y2 << R300_CLIPRECT_Y_SHIFT));
}
OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
/* TODO/SECURITY: Force scissors to a safe value, otherwise the
* client might be able to trample over memory.
* The impact should be very limited, but I'd rather be safe than
* sorry.
*/
OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
OUT_RING(0);
OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
ADVANCE_RING();
} else {
/* Why we allow zero cliprect rendering:
* There are some commands in a command buffer that must be submitted
* even when there are no cliprects, e.g. DMA buffer discard
* or state setting (though state setting could be avoided by
* simulating a loss of context).
*
* Now since the cmdbuf interface is so chaotic right now (and is
* bound to remain that way for a bit until things settle down),
* it is basically impossible to filter out the commands that are
* necessary and those that aren't.
*
* So I choose the safe way and don't do any filtering at all;
* instead, I simply set up the engine so that all rendering
* can't produce any fragments.
*/
BEGIN_RING(2);
OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
ADVANCE_RING();
}
/* flus cache and wait idle clean after cliprect change */
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
OUT_RING(R300_RB3D_DC_FLUSH);
ADVANCE_RING();
BEGIN_RING(2);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
/* set flush flag */
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
return 0;
}
static u8 r300_reg_flags[0x10000 >> 2];
void r300_init_reg_flags(struct drm_device *dev)
{
int i;
drm_radeon_private_t *dev_priv = dev->dev_private;
memset(r300_reg_flags, 0, 0x10000 >> 2);
#define ADD_RANGE_MARK(reg, count,mark) \
for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
r300_reg_flags[i]|=(mark);
#define MARK_SAFE 1
#define MARK_CHECK_OFFSET 2
#define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
ADD_RANGE(R300_VAP_CNTL, 1);
ADD_RANGE(R300_SE_VTE_CNTL, 2);
ADD_RANGE(0x2134, 2);
ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
ADD_RANGE(0x21DC, 1);
ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
ADD_RANGE(R300_VAP_CLIP_X_0, 4);
ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
ADD_RANGE(R300_GB_ENABLE, 1);
ADD_RANGE(R300_GB_MSPOS0, 5);
ADD_RANGE(R300_TX_INVALTAGS, 1);
ADD_RANGE(R300_TX_ENABLE, 1);
ADD_RANGE(0x4200, 4);
ADD_RANGE(0x4214, 1);
ADD_RANGE(R300_RE_POINTSIZE, 1);
ADD_RANGE(0x4230, 3);
ADD_RANGE(R300_RE_LINE_CNT, 1);
ADD_RANGE(R300_RE_UNK4238, 1);
ADD_RANGE(0x4260, 3);
ADD_RANGE(R300_RE_SHADE, 4);
ADD_RANGE(R300_RE_POLYGON_MODE, 5);
ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
ADD_RANGE(R300_RE_CULL_CNTL, 1);
ADD_RANGE(0x42C0, 2);
ADD_RANGE(R300_RS_CNTL_0, 2);
ADD_RANGE(R300_SU_REG_DEST, 1);
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
ADD_RANGE(R300_SC_HYPERZ, 2);
ADD_RANGE(0x43E8, 1);
ADD_RANGE(0x46A4, 5);
ADD_RANGE(R300_RE_FOG_STATE, 1);
ADD_RANGE(R300_FOG_COLOR_R, 3);
ADD_RANGE(R300_PP_ALPHA_TEST, 2);
ADD_RANGE(0x4BD8, 1);
ADD_RANGE(R300_PFS_PARAM_0_X, 64);
ADD_RANGE(0x4E00, 1);
ADD_RANGE(R300_RB3D_CBLEND, 2);
ADD_RANGE(R300_RB3D_COLORMASK, 1);
ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
ADD_RANGE(0x4E50, 9);
ADD_RANGE(0x4E88, 1);
ADD_RANGE(0x4EA0, 2);
ADD_RANGE(R300_ZB_CNTL, 3);
ADD_RANGE(R300_ZB_FORMAT, 4);
ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
ADD_RANGE(R300_TX_FILTER_0, 16);
ADD_RANGE(R300_TX_FILTER1_0, 16);
ADD_RANGE(R300_TX_SIZE_0, 16);
ADD_RANGE(R300_TX_FORMAT_0, 16);
ADD_RANGE(R300_TX_PITCH_0, 16);
/* Texture offset is dangerous and needs more checking */
ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
/* Sporadic registers used as primitives are emitted */
ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
ADD_RANGE(R500_US_CONFIG, 2);
ADD_RANGE(R500_US_CODE_ADDR, 3);
ADD_RANGE(R500_US_FC_CTRL, 1);
ADD_RANGE(R500_RS_IP_0, 16);
ADD_RANGE(R500_RS_INST_0, 16);
ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
} else {
ADD_RANGE(R300_PFS_CNTL_0, 3);
ADD_RANGE(R300_PFS_NODE_0, 4);
ADD_RANGE(R300_PFS_TEXI_0, 64);
ADD_RANGE(R300_PFS_INSTR0_0, 64);
ADD_RANGE(R300_PFS_INSTR1_0, 64);
ADD_RANGE(R300_PFS_INSTR2_0, 64);
ADD_RANGE(R300_PFS_INSTR3_0, 64);
ADD_RANGE(R300_RS_INTERP_0, 8);
ADD_RANGE(R300_RS_ROUTE_0, 8);
}
}
static __inline__ int r300_check_range(unsigned reg, int count)
{
int i;
if (reg & ~0xffff)
return -1;
for (i = (reg >> 2); i < (reg >> 2) + count; i++)
if (r300_reg_flags[i] != MARK_SAFE)
return 1;
return 0;
}
static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
dev_priv,
drm_radeon_kcmd_buffer_t
* cmdbuf,
drm_r300_cmd_header_t
header)
{
int reg;
int sz;
int i;
u32 *value;
RING_LOCALS;
sz = header.packet0.count;
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
if ((sz > 64) || (sz < 0)) {
DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
reg, sz);
return -EINVAL;
}
for (i = 0; i < sz; i++) {
switch (r300_reg_flags[(reg >> 2) + i]) {
case MARK_SAFE:
break;
case MARK_CHECK_OFFSET:
value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
if (!radeon_check_offset(dev_priv, *value)) {
DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
reg, sz);
return -EINVAL;
}
break;
default:
DRM_ERROR("Register %04x failed check as flag=%02x\n",
reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
return -EINVAL;
}
}
BEGIN_RING(1 + sz);
OUT_RING(CP_PACKET0(reg, sz - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
/**
* Emits a packet0 setting arbitrary registers.
* Called by r300_do_cp_cmdbuf.
*
* Note that checks are performed on contents and addresses of the registers
*/
static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
drm_r300_cmd_header_t header)
{
int reg;
int sz;
RING_LOCALS;
sz = header.packet0.count;
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
if (!sz)
return 0;
if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
if (reg + sz * 4 >= 0x10000) {
DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
sz);
return -EINVAL;
}
if (r300_check_range(reg, sz)) {
/* go and check everything */
return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
header);
}
/* the rest of the data is safe to emit, whatever the values the user passed */
BEGIN_RING(1 + sz);
OUT_RING(CP_PACKET0(reg, sz - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
/**
* Uploads user-supplied vertex program instructions or parameters onto
* the graphics card.
* Called by r300_do_cp_cmdbuf.
*/
static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
drm_r300_cmd_header_t header)
{
int sz;
int addr;
RING_LOCALS;
sz = header.vpu.count;
addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
if (!sz)
return 0;
if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
/* VAP is very sensitive so we purge cache before we program it
* and we also flush its state before & after */
BEGIN_RING(6);
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
OUT_RING(R300_RB3D_DC_FLUSH);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
OUT_RING(0);
ADVANCE_RING();
/* set flush flag */
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
BEGIN_RING(3 + sz * 4);
OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
ADVANCE_RING();
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
OUT_RING(0);
ADVANCE_RING();
return 0;
}
/**
* Emit a clear packet from userspace.
* Called by r300_emit_packet3.
*/
static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
RING_LOCALS;
if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
BEGIN_RING(10);
OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
(1 << R300_PRIM_NUM_VERTICES_SHIFT));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
ADVANCE_RING();
BEGIN_RING(4);
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
OUT_RING(R300_RB3D_DC_FLUSH);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
/* set flush flag */
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
return 0;
}
static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
u32 header)
{
int count, i, k;
#define MAX_ARRAY_PACKET 64
u32 *data;
u32 narrays;
RING_LOCALS;
count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
if ((count + 1) > MAX_ARRAY_PACKET) {
DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
count);
return -EINVAL;
}
/* carefully check packet contents */
/* We have already read the header so advance the buffer. */
drm_buffer_advance(cmdbuf->buffer, 4);
narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
k = 0;
i = 1;
while ((k < narrays) && (i < (count + 1))) {
i++; /* skip attribute field */
data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
if (!radeon_check_offset(dev_priv, *data)) {
DRM_ERROR
("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
k, i);
return -EINVAL;
}
k++;
i++;
if (k == narrays)
break;
/* have one more to process, they come in pairs */
data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
if (!radeon_check_offset(dev_priv, *data)) {
DRM_ERROR
("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
k, i);
return -EINVAL;
}
k++;
i++;
}
/* do the counts match what we expect ? */
if ((k != narrays) || (i != (count + 1))) {
DRM_ERROR
("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
k, i, narrays, count + 1);
return -EINVAL;
}
/* all clear, output packet */
BEGIN_RING(count + 2);
OUT_RING(header);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
ADVANCE_RING();
return 0;
}
static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
int count, ret;
RING_LOCALS;
count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
if (*cmd & 0x8000) {
u32 offset;
u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
| RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
offset = *cmd2 << 10;
ret = !radeon_check_offset(dev_priv, offset);
if (ret) {
DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
return -EINVAL;
}
}
if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
(*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
offset = *cmd3 << 10;
ret = !radeon_check_offset(dev_priv, offset);
if (ret) {
DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
return -EINVAL;
}
}
}
BEGIN_RING(count+2);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
ADVANCE_RING();
return 0;
}
static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
int count;
int expected_count;
RING_LOCALS;
count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
expected_count = *cmd1 >> 16;
if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
expected_count = (expected_count+1)/2;
if (count && count != expected_count) {
DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
count, expected_count);
return -EINVAL;
}
BEGIN_RING(count+2);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
ADVANCE_RING();
if (!count) {
drm_r300_cmd_header_t stack_header, *header;
u32 *cmd1, *cmd2, *cmd3;
if (drm_buffer_unprocessed(cmdbuf->buffer)
< 4*4 + sizeof(stack_header)) {
DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
return -EINVAL;
}
header = drm_buffer_read_object(cmdbuf->buffer,
sizeof(stack_header), &stack_header);
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
if (header->header.cmd_type != R300_CMD_PACKET3 ||
header->packet3.packet != R300_CMD_PACKET3_RAW ||
*cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
return -EINVAL;
}
if ((*cmd1 & 0x8000ffff) != 0x80000810) {
DRM_ERROR("Invalid indx_buffer reg address %08X\n",
*cmd1);
return -EINVAL;
}
if (!radeon_check_offset(dev_priv, *cmd2)) {
DRM_ERROR("Invalid indx_buffer offset is %08X\n",
*cmd2);
return -EINVAL;
}
if (*cmd3 != expected_count) {
DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
*cmd3, expected_count);
return -EINVAL;
}
BEGIN_RING(4);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
ADVANCE_RING();
}
return 0;
}
static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
u32 *header;
int count;
RING_LOCALS;
if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
/* Fixme !! This simply emits a packet without much checking.
We need to be smarter. */
/* obtain first word - actual packet3 header */
header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
/* Is it packet 3 ? */
if ((*header >> 30) != 0x3) {
DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
return -EINVAL;
}
count = (*header >> 16) & 0x3fff;
/* Check again now that we know how much data to expect */
if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
DRM_ERROR
("Expected packet3 of length %d but have only %d bytes left\n",
(count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
return -EINVAL;
}
/* Is it a packet type we know about ? */
switch (*header & 0xff00) {
case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
case RADEON_CNTL_BITBLT_MULTI:
return r300_emit_bitblt_multi(dev_priv, cmdbuf);
case RADEON_CP_INDX_BUFFER:
DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
return -EINVAL;
case RADEON_CP_3D_DRAW_IMMD_2:
/* triggers drawing using in-packet vertex data */
case RADEON_CP_3D_DRAW_VBUF_2:
/* triggers drawing of vertex buffers setup elsewhere */
dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
RADEON_PURGE_EMITED);
break;
case RADEON_CP_3D_DRAW_INDX_2:
/* triggers drawing using indices to vertex buffer */
/* whenever we send vertex we clear flush & purge */
dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
RADEON_PURGE_EMITED);
return r300_emit_draw_indx_2(dev_priv, cmdbuf);
case RADEON_WAIT_FOR_IDLE:
case RADEON_CP_NOP:
/* these packets are safe */
break;
default:
DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
return -EINVAL;
}
BEGIN_RING(count + 2);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
ADVANCE_RING();
return 0;
}
/**
* Emit a rendering packet3 from userspace.
* Called by r300_do_cp_cmdbuf.
*/
static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
drm_r300_cmd_header_t header)
{
int n;
int ret;
int orig_iter = cmdbuf->buffer->iterator;
/* This is a do-while-loop so that we run the interior at least once,
* even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
*/
n = 0;
do {
if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
if (ret)
return ret;
cmdbuf->buffer->iterator = orig_iter;
}
switch (header.packet3.packet) {
case R300_CMD_PACKET3_CLEAR:
DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
ret = r300_emit_clear(dev_priv, cmdbuf);
if (ret) {
DRM_ERROR("r300_emit_clear failed\n");
return ret;
}
break;
case R300_CMD_PACKET3_RAW:
DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
if (ret) {
DRM_ERROR("r300_emit_raw_packet3 failed\n");
return ret;
}
break;
default:
DRM_ERROR("bad packet3 type %i at byte %d\n",
header.packet3.packet,
cmdbuf->buffer->iterator - (int)sizeof(header));
return -EINVAL;
}
n += R300_SIMULTANEOUS_CLIPRECTS;
} while (n < cmdbuf->nbox);
return 0;
}
/* Some of the R300 chips seem to be extremely touchy about the two registers
* that are configured in r300_pacify.
* Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
* sends a command buffer that contains only state setting commands and a
* vertex program/parameter upload sequence, this will eventually lead to a
* lockup, unless the sequence is bracketed by calls to r300_pacify.
* So we should take great care to *always* call r300_pacify before
* *anything* 3D related, and again afterwards. This is what the
* call bracket in r300_do_cp_cmdbuf is for.
*/
/**
* Emit the sequence to pacify R300.
*/
static void r300_pacify(drm_radeon_private_t *dev_priv)
{
uint32_t cache_z, cache_3d, cache_2d;
RING_LOCALS;
cache_z = R300_ZC_FLUSH;
cache_2d = R300_RB2D_DC_FLUSH;
cache_3d = R300_RB3D_DC_FLUSH;
if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
/* we can purge, primitive where draw since last purge */
cache_z |= R300_ZC_FREE;
cache_2d |= R300_RB2D_DC_FREE;
cache_3d |= R300_RB3D_DC_FREE;
}
/* flush & purge zbuffer */
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
OUT_RING(cache_z);
ADVANCE_RING();
/* flush & purge 3d */
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
OUT_RING(cache_3d);
ADVANCE_RING();
/* flush & purge texture */
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
OUT_RING(0);
ADVANCE_RING();
/* FIXME: is this one really needed ? */
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
OUT_RING(0);
ADVANCE_RING();
BEGIN_RING(2);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
/* flush & purge 2d through E2 as RB2D will trigger lockup */
BEGIN_RING(4);
OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
OUT_RING(cache_2d);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
RADEON_WAIT_HOST_IDLECLEAN);
ADVANCE_RING();
/* set flush & purge flags */
dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/**
* Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
* The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
* be careful about how this function is called.
*/
static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
{
drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
buf->pending = 1;
buf->used = 0;
}
static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
drm_r300_cmd_header_t header)
{
u32 wait_until;
RING_LOCALS;
if (!header.wait.flags)
return;
wait_until = 0;
switch(header.wait.flags) {
case R300_WAIT_2D:
wait_until = RADEON_WAIT_2D_IDLE;
break;
case R300_WAIT_3D:
wait_until = RADEON_WAIT_3D_IDLE;
break;
case R300_NEW_WAIT_2D_3D:
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
break;
case R300_NEW_WAIT_2D_2D_CLEAN:
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
break;
case R300_NEW_WAIT_3D_3D_CLEAN:
wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
break;
case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
break;
default:
return;
}
BEGIN_RING(2);
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_RING(wait_until);
ADVANCE_RING();
}
static int r300_scratch(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
drm_r300_cmd_header_t header)
{
u32 *ref_age_base;
u32 i, *buf_idx, h_pending;
u64 *ptr_addr;
u64 stack_ptr_addr;
RING_LOCALS;
if (drm_buffer_unprocessed(cmdbuf->buffer) <
(sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
return -EINVAL;
}
if (header.scratch.reg >= 5) {
return -EINVAL;
}
dev_priv->scratch_ages[header.scratch.reg]++;
ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
sizeof(stack_ptr_addr), &stack_ptr_addr);
ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
for (i=0; i < header.scratch.n_bufs; i++) {
buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
*buf_idx *= 2; /* 8 bytes per buf */
if (copy_to_user(ref_age_base + *buf_idx,
&dev_priv->scratch_ages[header.scratch.reg],
sizeof(u32)))
return -EINVAL;
if (copy_from_user(&h_pending,
ref_age_base + *buf_idx + 1,
sizeof(u32)))
return -EINVAL;
if (h_pending == 0)
return -EINVAL;
h_pending--;
if (copy_to_user(ref_age_base + *buf_idx + 1,
&h_pending,
sizeof(u32)))
return -EINVAL;
drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
}
BEGIN_RING(2);
OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
ADVANCE_RING();
return 0;
}
/**
* Uploads user-supplied vertex program instructions or parameters onto
* the graphics card.
* Called by r300_do_cp_cmdbuf.
*/
static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
drm_r300_cmd_header_t header)
{
int sz;
int addr;
int type;
int isclamp;
int stride;
RING_LOCALS;
sz = header.r500fp.count;
/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
addr |= (type << 16);
addr |= (isclamp << 17);
stride = type ? 4 : 6;
DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
if (!sz)
return 0;
if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
BEGIN_RING(3 + sz * stride);
OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
ADVANCE_RING();
return 0;
}
/**
* Parses and validates a user-supplied command buffer and emits appropriate
* commands on the DMA ring buffer.
* Called by the ioctl handler function radeon_cp_cmdbuf.
*/
int r300_do_cp_cmdbuf(struct drm_device *dev,
struct drm_file *file_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf = NULL;
int emit_dispatch_age = 0;
int ret = 0;
DRM_DEBUG("\n");
/* pacify */
r300_pacify(dev_priv);
if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
if (ret)
goto cleanup;
}
while (drm_buffer_unprocessed(cmdbuf->buffer)
>= sizeof(drm_r300_cmd_header_t)) {
int idx;
drm_r300_cmd_header_t *header, stack_header;
header = drm_buffer_read_object(cmdbuf->buffer,
sizeof(stack_header), &stack_header);
switch (header->header.cmd_type) {
case R300_CMD_PACKET0:
DRM_DEBUG("R300_CMD_PACKET0\n");
ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
if (ret) {
DRM_ERROR("r300_emit_packet0 failed\n");
goto cleanup;
}
break;
case R300_CMD_VPU:
DRM_DEBUG("R300_CMD_VPU\n");
ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
if (ret) {
DRM_ERROR("r300_emit_vpu failed\n");
goto cleanup;
}
break;
case R300_CMD_PACKET3:
DRM_DEBUG("R300_CMD_PACKET3\n");
ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
if (ret) {
DRM_ERROR("r300_emit_packet3 failed\n");
goto cleanup;
}
break;
case R300_CMD_END3D:
DRM_DEBUG("R300_CMD_END3D\n");
/* TODO:
Ideally userspace driver should not need to issue this call,
i.e. the drm driver should issue it automatically and prevent
lockups.
In practice, we do not understand why this call is needed and what
it does (except for some vague guesses that it has to do with cache
coherence) and so the user space driver does it.
Once we are sure which uses prevent lockups the code could be moved
into the kernel and the userspace driver will not
need to use this command.
Note that issuing this command does not hurt anything
except, possibly, performance */
r300_pacify(dev_priv);
break;
case R300_CMD_CP_DELAY:
/* simple enough, we can do it here */
DRM_DEBUG("R300_CMD_CP_DELAY\n");
{
int i;
RING_LOCALS;
BEGIN_RING(header->delay.count);
for (i = 0; i < header->delay.count; i++)
OUT_RING(RADEON_CP_PACKET2);
ADVANCE_RING();
}
break;
case R300_CMD_DMA_DISCARD:
DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
idx = header->dma.buf_idx;
if (idx < 0 || idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
idx, dma->buf_count - 1);
ret = -EINVAL;
goto cleanup;
}
buf = dma->buflist[idx];
if (buf->file_priv != file_priv || buf->pending) {
DRM_ERROR("bad buffer %p %p %d\n",
buf->file_priv, file_priv,
buf->pending);
ret = -EINVAL;
goto cleanup;
}
emit_dispatch_age = 1;
r300_discard_buffer(dev, file_priv->master, buf);
break;
case R300_CMD_WAIT:
DRM_DEBUG("R300_CMD_WAIT\n");
r300_cmd_wait(dev_priv, *header);
break;
case R300_CMD_SCRATCH:
DRM_DEBUG("R300_CMD_SCRATCH\n");
ret = r300_scratch(dev_priv, cmdbuf, *header);
if (ret) {
DRM_ERROR("r300_scratch failed\n");
goto cleanup;
}
break;
case R300_CMD_R500FP:
if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
DRM_ERROR("Calling r500 command on r300 card\n");
ret = -EINVAL;
goto cleanup;
}
DRM_DEBUG("R300_CMD_R500FP\n");
ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
if (ret) {
DRM_ERROR("r300_emit_r500fp failed\n");
goto cleanup;
}
break;
default:
DRM_ERROR("bad cmd_type %i at byte %d\n",
header->header.cmd_type,
cmdbuf->buffer->iterator - (int)sizeof(*header));
ret = -EINVAL;
goto cleanup;
}
}
DRM_DEBUG("END\n");
cleanup:
r300_pacify(dev_priv);
/* We emit the vertex buffer age here, outside the pacifier "brackets"
* for two reasons:
* (1) This may coalesce multiple age emissions into a single one and
* (2) more importantly, some chips lock up hard when scratch registers
* are written inside the pacifier bracket.
*/
if (emit_dispatch_age) {
RING_LOCALS;
/* Emit the vertex buffer age */
BEGIN_RING(2);
RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
ADVANCE_RING();
}
COMMIT_RING();
return ret;
}
/*
* Copyright 2009 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Alex Deucher <alexander.deucher@amd.com>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
#include "r600_blit_shaders.h"
/* 23 bits of float fractional data */
#define I2F_FRAC_BITS 23
#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
/*
* Converts unsigned integer into 32-bit IEEE floating point representation.
* Will be exact from 0 to 2^24. Above that, we round towards zero
* as the fractional bits will not fit in a float. (It would be better to
* round towards even as the fpu does, but that is slower.)
*/
static __pure uint32_t int2float(uint32_t x)
{
uint32_t msb, exponent, fraction;
/* Zero is special */
if (!x) return 0;
/* Get location of the most significant bit */
msb = __fls(x);
/*
* Use a rotate instead of a shift because that works both leftwards
* and rightwards due to the mod(32) behaviour. This means we don't
* need to check to see if we are above 2^24 or not.
*/
fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
exponent = (127 + msb) << I2F_FRAC_BITS;
return fraction + exponent;
}
#define DI_PT_RECTLIST 0x11
#define DI_INDEX_SIZE_16_BIT 0x0
#define DI_SRC_SEL_AUTO_INDEX 0x2
#define FMT_8 0x1
#define FMT_5_6_5 0x8
#define FMT_8_8_8_8 0x1a
#define COLOR_8 0x1
#define COLOR_5_6_5 0x8
#define COLOR_8_8_8_8 0x1a
static void
set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
{
u32 cb_color_info;
int pitch, slice;
RING_LOCALS;
DRM_DEBUG("\n");
h = ALIGN(h, 8);
if (h < 8)
h = 8;
cb_color_info = ((format << 2) | (1 << 27));
pitch = (w / 8) - 1;
slice = ((w * h) / 64) - 1;
if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
BEGIN_RING(21 + 2);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(gpu_addr >> 8);
OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
OUT_RING(2 << 0);
} else {
BEGIN_RING(21);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(gpu_addr >> 8);
}
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING((pitch << 0) | (slice << 10));
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(cb_color_info);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
ADVANCE_RING();
}
static void
cp_set_surface_sync(drm_radeon_private_t *dev_priv,
u32 sync_type, u32 size, u64 mc_addr)
{
u32 cp_coher_size;
RING_LOCALS;
DRM_DEBUG("\n");
if (size == 0xffffffff)
cp_coher_size = 0xffffffff;
else
cp_coher_size = ((size + 255) >> 8);
BEGIN_RING(5);
OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
OUT_RING(sync_type);
OUT_RING(cp_coher_size);
OUT_RING((mc_addr >> 8));
OUT_RING(10); /* poll interval */
ADVANCE_RING();
}
static void
set_shaders(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
u64 gpu_addr;
int i;
u32 *vs, *ps;
uint32_t sq_pgm_resources;
RING_LOCALS;
DRM_DEBUG("\n");
/* load shaders */
vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
for (i = 0; i < r6xx_vs_size; i++)
vs[i] = cpu_to_le32(r6xx_vs[i]);
for (i = 0; i < r6xx_ps_size; i++)
ps[i] = cpu_to_le32(r6xx_ps[i]);
dev_priv->blit_vb->used = 512;
gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
/* setup shader regs */
sq_pgm_resources = (1 << 0);
BEGIN_RING(9 + 12);
/* VS */
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(gpu_addr >> 8);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(sq_pgm_resources);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
/* PS */
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING((gpu_addr + 256) >> 8);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(sq_pgm_resources | (1 << 28));
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(2);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING(0);
ADVANCE_RING();
cp_set_surface_sync(dev_priv,
R600_SH_ACTION_ENA, 512, gpu_addr);
}
static void
set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
{
uint32_t sq_vtx_constant_word2;
RING_LOCALS;
DRM_DEBUG("\n");
sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
#ifdef __BIG_ENDIAN
sq_vtx_constant_word2 |= (2 << 30);
#endif
BEGIN_RING(9);
OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
OUT_RING(0x460);
OUT_RING(gpu_addr & 0xffffffff);
OUT_RING(48 - 1);
OUT_RING(sq_vtx_constant_word2);
OUT_RING(1 << 0);
OUT_RING(0);
OUT_RING(0);
OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
ADVANCE_RING();
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
cp_set_surface_sync(dev_priv,
R600_TC_ACTION_ENA, 48, gpu_addr);
else
cp_set_surface_sync(dev_priv,
R600_VC_ACTION_ENA, 48, gpu_addr);
}
static void
set_tex_resource(drm_radeon_private_t *dev_priv,
int format, int w, int h, int pitch, u64 gpu_addr)
{
uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
RING_LOCALS;
DRM_DEBUG("\n");
if (h < 1)
h = 1;
sq_tex_resource_word0 = (1 << 0);
sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
((w - 1) << 19));
sq_tex_resource_word1 = (format << 26);
sq_tex_resource_word1 |= ((h - 1) << 0);
sq_tex_resource_word4 = ((1 << 14) |
(0 << 16) |
(1 << 19) |
(2 << 22) |
(3 << 25));
BEGIN_RING(9);
OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
OUT_RING(0);
OUT_RING(sq_tex_resource_word0);
OUT_RING(sq_tex_resource_word1);
OUT_RING(gpu_addr >> 8);
OUT_RING(gpu_addr >> 8);
OUT_RING(sq_tex_resource_word4);
OUT_RING(0);
OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
ADVANCE_RING();
}
static void
set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
{
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(12);
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING((x1 << 0) | (y1 << 16));
OUT_RING((x2 << 0) | (y2 << 16));
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
OUT_RING((x2 << 0) | (y2 << 16));
OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
OUT_RING((x2 << 0) | (y2 << 16));
ADVANCE_RING();
}
static void
draw_auto(drm_radeon_private_t *dev_priv)
{
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(10);
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
OUT_RING(DI_PT_RECTLIST);
OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
#ifdef __BIG_ENDIAN
OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
#else
OUT_RING(DI_INDEX_SIZE_16_BIT);
#endif
OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
OUT_RING(1);
OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
OUT_RING(3);
OUT_RING(DI_SRC_SEL_AUTO_INDEX);
ADVANCE_RING();
COMMIT_RING();
}
static void
set_default_state(drm_radeon_private_t *dev_priv)
{
int i;
u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
RING_LOCALS;
switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
case CHIP_R600:
num_ps_gprs = 192;
num_vs_gprs = 56;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 136;
num_vs_threads = 48;
num_gs_threads = 4;
num_es_threads = 4;
num_ps_stack_entries = 128;
num_vs_stack_entries = 128;
num_gs_stack_entries = 0;
num_es_stack_entries = 0;
break;
case CHIP_RV630:
case CHIP_RV635:
num_ps_gprs = 84;
num_vs_gprs = 36;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 144;
num_vs_threads = 40;
num_gs_threads = 4;
num_es_threads = 4;
num_ps_stack_entries = 40;
num_vs_stack_entries = 40;
num_gs_stack_entries = 32;
num_es_stack_entries = 16;
break;
case CHIP_RV610:
case CHIP_RV620:
case CHIP_RS780:
case CHIP_RS880:
default:
num_ps_gprs = 84;
num_vs_gprs = 36;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 136;
num_vs_threads = 48;
num_gs_threads = 4;
num_es_threads = 4;
num_ps_stack_entries = 40;
num_vs_stack_entries = 40;
num_gs_stack_entries = 32;
num_es_stack_entries = 16;
break;
case CHIP_RV670:
num_ps_gprs = 144;
num_vs_gprs = 40;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 136;
num_vs_threads = 48;
num_gs_threads = 4;
num_es_threads = 4;
num_ps_stack_entries = 40;
num_vs_stack_entries = 40;
num_gs_stack_entries = 32;
num_es_stack_entries = 16;
break;
case CHIP_RV770:
num_ps_gprs = 192;
num_vs_gprs = 56;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 188;
num_vs_threads = 60;
num_gs_threads = 0;
num_es_threads = 0;
num_ps_stack_entries = 256;
num_vs_stack_entries = 256;
num_gs_stack_entries = 0;
num_es_stack_entries = 0;
break;
case CHIP_RV730:
case CHIP_RV740:
num_ps_gprs = 84;
num_vs_gprs = 36;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 188;
num_vs_threads = 60;
num_gs_threads = 0;
num_es_threads = 0;
num_ps_stack_entries = 128;
num_vs_stack_entries = 128;
num_gs_stack_entries = 0;
num_es_stack_entries = 0;
break;
case CHIP_RV710:
num_ps_gprs = 192;
num_vs_gprs = 56;
num_temp_gprs = 4;
num_gs_gprs = 0;
num_es_gprs = 0;
num_ps_threads = 144;
num_vs_threads = 48;
num_gs_threads = 0;
num_es_threads = 0;
num_ps_stack_entries = 128;
num_vs_stack_entries = 128;
num_gs_stack_entries = 0;
num_es_stack_entries = 0;
break;
}
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
sq_config = 0;
else
sq_config = R600_VC_ENABLE;
sq_config |= (R600_DX9_CONSTS |
R600_ALU_INST_PREFER_VECTOR |
R600_PS_PRIO(0) |
R600_VS_PRIO(1) |
R600_GS_PRIO(2) |
R600_ES_PRIO(3));
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
R600_NUM_VS_GPRS(num_vs_gprs) |
R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
R600_NUM_ES_GPRS(num_es_gprs));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
R600_NUM_VS_THREADS(num_vs_threads) |
R600_NUM_GS_THREADS(num_gs_threads) |
R600_NUM_ES_THREADS(num_es_threads));
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
BEGIN_RING(r7xx_default_size + 10);
for (i = 0; i < r7xx_default_size; i++)
OUT_RING(r7xx_default_state[i]);
} else {
BEGIN_RING(r6xx_default_size + 10);
for (i = 0; i < r6xx_default_size; i++)
OUT_RING(r6xx_default_state[i]);
}
OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
/* SQ config */
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
OUT_RING(sq_config);
OUT_RING(sq_gpr_resource_mgmt_1);
OUT_RING(sq_gpr_resource_mgmt_2);
OUT_RING(sq_thread_resource_mgmt);
OUT_RING(sq_stack_resource_mgmt_1);
OUT_RING(sq_stack_resource_mgmt_2);
ADVANCE_RING();
}
static int r600_nomm_get_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->blit_vb = radeon_freelist_get(dev);
if (!dev_priv->blit_vb) {
DRM_ERROR("Unable to allocate vertex buffer for blit\n");
return -EAGAIN;
}
return 0;
}
static void r600_nomm_put_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->blit_vb->used = 0;
radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
}
static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
return (((char *)dev->agp_buffer_map->handle +
dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
}
int
r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
int ret;
DRM_DEBUG("\n");
ret = r600_nomm_get_vb(dev);
if (ret)
return ret;
dev_priv->blit_vb->file_priv = file_priv;
set_default_state(dev_priv);
set_shaders(dev);
return 0;
}
void
r600_done_blit_copy(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(5);
OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
/* wait for 3D idle clean */
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
COMMIT_RING();
r600_nomm_put_vb(dev);
}
void
r600_blit_copy(struct drm_device *dev,
uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
int size_bytes)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
int max_bytes;
u64 vb_addr;
u32 *vb;
vb = r600_nomm_get_vb_ptr(dev);
if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
max_bytes = 8192;
while (size_bytes) {
int cur_size = size_bytes;
int src_x = src_gpu_addr & 255;
int dst_x = dst_gpu_addr & 255;
int h = 1;
src_gpu_addr = src_gpu_addr & ~255;
dst_gpu_addr = dst_gpu_addr & ~255;
if (!src_x && !dst_x) {
h = (cur_size / max_bytes);
if (h > 8192)
h = 8192;
if (h == 0)
h = 1;
else
cur_size = max_bytes;
} else {
if (cur_size > max_bytes)
cur_size = max_bytes;
if (cur_size > (max_bytes - dst_x))
cur_size = (max_bytes - dst_x);
if (cur_size > (max_bytes - src_x))
cur_size = (max_bytes - src_x);
}
if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
r600_nomm_put_vb(dev);
r600_nomm_get_vb(dev);
if (!dev_priv->blit_vb)
return;
set_shaders(dev);
vb = r600_nomm_get_vb_ptr(dev);
}
vb[0] = int2float(dst_x);
vb[1] = 0;
vb[2] = int2float(src_x);
vb[3] = 0;
vb[4] = int2float(dst_x);
vb[5] = int2float(h);
vb[6] = int2float(src_x);
vb[7] = int2float(h);
vb[8] = int2float(dst_x + cur_size);
vb[9] = int2float(h);
vb[10] = int2float(src_x + cur_size);
vb[11] = int2float(h);
/* src */
set_tex_resource(dev_priv, FMT_8,
src_x + cur_size, h, src_x + cur_size,
src_gpu_addr);
cp_set_surface_sync(dev_priv,
R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
/* dst */
set_render_target(dev_priv, COLOR_8,
dst_x + cur_size, h,
dst_gpu_addr);
/* scissors */
set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
/* Vertex buffer setup */
vb_addr = dev_priv->gart_buffers_offset +
dev_priv->blit_vb->offset +
dev_priv->blit_vb->used;
set_vtx_resource(dev_priv, vb_addr);
/* draw */
draw_auto(dev_priv);
cp_set_surface_sync(dev_priv,
R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
cur_size * h, dst_gpu_addr);
vb += 12;
dev_priv->blit_vb->used += 12 * 4;
src_gpu_addr += cur_size * h;
dst_gpu_addr += cur_size * h;
size_bytes -= cur_size * h;
}
} else {
max_bytes = 8192 * 4;
while (size_bytes) {
int cur_size = size_bytes;
int src_x = (src_gpu_addr & 255);
int dst_x = (dst_gpu_addr & 255);
int h = 1;
src_gpu_addr = src_gpu_addr & ~255;
dst_gpu_addr = dst_gpu_addr & ~255;
if (!src_x && !dst_x) {
h = (cur_size / max_bytes);
if (h > 8192)
h = 8192;
if (h == 0)
h = 1;
else
cur_size = max_bytes;
} else {
if (cur_size > max_bytes)
cur_size = max_bytes;
if (cur_size > (max_bytes - dst_x))
cur_size = (max_bytes - dst_x);
if (cur_size > (max_bytes - src_x))
cur_size = (max_bytes - src_x);
}
if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
r600_nomm_put_vb(dev);
r600_nomm_get_vb(dev);
if (!dev_priv->blit_vb)
return;
set_shaders(dev);
vb = r600_nomm_get_vb_ptr(dev);
}
vb[0] = int2float(dst_x / 4);
vb[1] = 0;
vb[2] = int2float(src_x / 4);
vb[3] = 0;
vb[4] = int2float(dst_x / 4);
vb[5] = int2float(h);
vb[6] = int2float(src_x / 4);
vb[7] = int2float(h);
vb[8] = int2float((dst_x + cur_size) / 4);
vb[9] = int2float(h);
vb[10] = int2float((src_x + cur_size) / 4);
vb[11] = int2float(h);
/* src */
set_tex_resource(dev_priv, FMT_8_8_8_8,
(src_x + cur_size) / 4,
h, (src_x + cur_size) / 4,
src_gpu_addr);
cp_set_surface_sync(dev_priv,
R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
/* dst */
set_render_target(dev_priv, COLOR_8_8_8_8,
(dst_x + cur_size) / 4, h,
dst_gpu_addr);
/* scissors */
set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
/* Vertex buffer setup */
vb_addr = dev_priv->gart_buffers_offset +
dev_priv->blit_vb->offset +
dev_priv->blit_vb->used;
set_vtx_resource(dev_priv, vb_addr);
/* draw */
draw_auto(dev_priv);
cp_set_surface_sync(dev_priv,
R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
cur_size * h, dst_gpu_addr);
vb += 12;
dev_priv->blit_vb->used += 12 * 4;
src_gpu_addr += cur_size * h;
dst_gpu_addr += cur_size * h;
size_bytes -= cur_size * h;
}
}
}
void
r600_blit_swap(struct drm_device *dev,
uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
int sx, int sy, int dx, int dy,
int w, int h, int src_pitch, int dst_pitch, int cpp)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
int cb_format, tex_format;
int sx2, sy2, dx2, dy2;
u64 vb_addr;
u32 *vb;
if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
r600_nomm_put_vb(dev);
r600_nomm_get_vb(dev);
if (!dev_priv->blit_vb)
return;
set_shaders(dev);
}
vb = r600_nomm_get_vb_ptr(dev);
sx2 = sx + w;
sy2 = sy + h;
dx2 = dx + w;
dy2 = dy + h;
vb[0] = int2float(dx);
vb[1] = int2float(dy);
vb[2] = int2float(sx);
vb[3] = int2float(sy);
vb[4] = int2float(dx);
vb[5] = int2float(dy2);
vb[6] = int2float(sx);
vb[7] = int2float(sy2);
vb[8] = int2float(dx2);
vb[9] = int2float(dy2);
vb[10] = int2float(sx2);
vb[11] = int2float(sy2);
switch(cpp) {
case 4:
cb_format = COLOR_8_8_8_8;
tex_format = FMT_8_8_8_8;
break;
case 2:
cb_format = COLOR_5_6_5;
tex_format = FMT_5_6_5;
break;
default:
cb_format = COLOR_8;
tex_format = FMT_8;
break;
}
/* src */
set_tex_resource(dev_priv, tex_format,
src_pitch / cpp,
sy2, src_pitch / cpp,
src_gpu_addr);
cp_set_surface_sync(dev_priv,
R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
/* dst */
set_render_target(dev_priv, cb_format,
dst_pitch / cpp, dy2,
dst_gpu_addr);
/* scissors */
set_scissors(dev_priv, dx, dy, dx2, dy2);
/* Vertex buffer setup */
vb_addr = dev_priv->gart_buffers_offset +
dev_priv->blit_vb->offset +
dev_priv->blit_vb->used;
set_vtx_resource(dev_priv, vb_addr);
/* draw */
draw_auto(dev_priv);
cp_set_surface_sync(dev_priv,
R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
dst_pitch * dy2, dst_gpu_addr);
dev_priv->blit_vb->used += 12 * 4;
}
/*
* Copyright 2008-2009 Advanced Micro Devices, Inc.
* Copyright 2008 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Dave Airlie <airlied@redhat.com>
* Alex Deucher <alexander.deucher@amd.com>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <linux/module.h>
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
#define PFP_UCODE_SIZE 576
#define PM4_UCODE_SIZE 1792
#define R700_PFP_UCODE_SIZE 848
#define R700_PM4_UCODE_SIZE 1360
/* Firmware Names */
MODULE_FIRMWARE("radeon/R600_pfp.bin");
MODULE_FIRMWARE("radeon/R600_me.bin");
MODULE_FIRMWARE("radeon/RV610_pfp.bin");
MODULE_FIRMWARE("radeon/RV610_me.bin");
MODULE_FIRMWARE("radeon/RV630_pfp.bin");
MODULE_FIRMWARE("radeon/RV630_me.bin");
MODULE_FIRMWARE("radeon/RV620_pfp.bin");
MODULE_FIRMWARE("radeon/RV620_me.bin");
MODULE_FIRMWARE("radeon/RV635_pfp.bin");
MODULE_FIRMWARE("radeon/RV635_me.bin");
MODULE_FIRMWARE("radeon/RV670_pfp.bin");
MODULE_FIRMWARE("radeon/RV670_me.bin");
MODULE_FIRMWARE("radeon/RS780_pfp.bin");
MODULE_FIRMWARE("radeon/RS780_me.bin");
MODULE_FIRMWARE("radeon/RV770_pfp.bin");
MODULE_FIRMWARE("radeon/RV770_me.bin");
MODULE_FIRMWARE("radeon/RV730_pfp.bin");
MODULE_FIRMWARE("radeon/RV730_me.bin");
MODULE_FIRMWARE("radeon/RV710_pfp.bin");
MODULE_FIRMWARE("radeon/RV710_me.bin");
int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
unsigned family, u32 *ib, int *l);
void r600_cs_legacy_init(void);
# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */
# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1))
#define R600_PTE_VALID (1 << 0)
#define R600_PTE_SYSTEM (1 << 1)
#define R600_PTE_SNOOPED (1 << 2)
#define R600_PTE_READABLE (1 << 5)
#define R600_PTE_WRITEABLE (1 << 6)
/* MAX values used for gfx init */
#define R6XX_MAX_SH_GPRS 256
#define R6XX_MAX_TEMP_GPRS 16
#define R6XX_MAX_SH_THREADS 256
#define R6XX_MAX_SH_STACK_ENTRIES 4096
#define R6XX_MAX_BACKENDS 8
#define R6XX_MAX_BACKENDS_MASK 0xff
#define R6XX_MAX_SIMDS 8
#define R6XX_MAX_SIMDS_MASK 0xff
#define R6XX_MAX_PIPES 8
#define R6XX_MAX_PIPES_MASK 0xff
#define R7XX_MAX_SH_GPRS 256
#define R7XX_MAX_TEMP_GPRS 16
#define R7XX_MAX_SH_THREADS 256
#define R7XX_MAX_SH_STACK_ENTRIES 4096
#define R7XX_MAX_BACKENDS 8
#define R7XX_MAX_BACKENDS_MASK 0xff
#define R7XX_MAX_SIMDS 16
#define R7XX_MAX_SIMDS_MASK 0xffff
#define R7XX_MAX_PIPES 8
#define R7XX_MAX_PIPES_MASK 0xff
static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
{
int i;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
for (i = 0; i < dev_priv->usec_timeout; i++) {
int slots;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
slots = (RADEON_READ(R600_GRBM_STATUS)
& R700_CMDFIFO_AVAIL_MASK);
else
slots = (RADEON_READ(R600_GRBM_STATUS)
& R600_CMDFIFO_AVAIL_MASK);
if (slots >= entries)
return 0;
DRM_UDELAY(1);
}
DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
RADEON_READ(R600_GRBM_STATUS),
RADEON_READ(R600_GRBM_STATUS2));
return -EBUSY;
}
static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
{
int i, ret;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
ret = r600_do_wait_for_fifo(dev_priv, 8);
else
ret = r600_do_wait_for_fifo(dev_priv, 16);
if (ret)
return ret;
for (i = 0; i < dev_priv->usec_timeout; i++) {
if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
return 0;
DRM_UDELAY(1);
}
DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
RADEON_READ(R600_GRBM_STATUS),
RADEON_READ(R600_GRBM_STATUS2));
return -EBUSY;
}
void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
{
struct drm_sg_mem *entry = dev->sg;
int max_pages;
int pages;
int i;
if (!entry)
return;
if (gart_info->bus_addr) {
max_pages = (gart_info->table_size / sizeof(u64));
pages = (entry->pages <= max_pages)
? entry->pages : max_pages;
for (i = 0; i < pages; i++) {
if (!entry->busaddr[i])
break;
pci_unmap_page(dev->pdev, entry->busaddr[i],
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
}
if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
gart_info->bus_addr = 0;
}
}
/* R600 has page table setup */
int r600_page_table_init(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
struct drm_local_map *map = &gart_info->mapping;
struct drm_sg_mem *entry = dev->sg;
int ret = 0;
int i, j;
int pages;
u64 page_base;
dma_addr_t entry_addr;
int max_ati_pages, max_real_pages, gart_idx;
/* okay page table is available - lets rock */
max_ati_pages = (gart_info->table_size / sizeof(u64));
max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE);
pages = (entry->pages <= max_real_pages) ?
entry->pages : max_real_pages;
memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64));
gart_idx = 0;
for (i = 0; i < pages; i++) {
entry->busaddr[i] = pci_map_page(dev->pdev,
entry->pagelist[i], 0,
PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) {
DRM_ERROR("unable to map PCIGART pages!\n");
r600_page_table_cleanup(dev, gart_info);
goto done;
}
entry_addr = entry->busaddr[i];
for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
DRM_WRITE64(map, gart_idx * sizeof(u64), page_base);
gart_idx++;
if ((i % 128) == 0)
DRM_DEBUG("page entry %d: 0x%016llx\n",
i, (unsigned long long)page_base);
entry_addr += ATI_PCIGART_PAGE_SIZE;
}
}
ret = 1;
done:
return ret;
}
static void r600_vm_flush_gart_range(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
u32 resp, countdown = 1000;
RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
do {
resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
countdown--;
DRM_UDELAY(1);
} while (((resp & 0xf0) == 0) && countdown);
}
static void r600_vm_init(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
/* initialise the VM to use the page table we constructed up there */
u32 vm_c0, i;
u32 mc_rd_a;
u32 vm_l2_cntl, vm_l2_cntl3;
/* okay set up the PCIE aperture type thingo */
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
/* setup MC RD a */
mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
RADEON_WRITE(R600_VM_L2_CNTL2, 0);
vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
/* disable all other contexts */
for (i = 1; i < 8; i++)
RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
r600_vm_flush_gart_range(dev);
}
static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv)
{
struct platform_device *pdev;
const char *chip_name;
size_t pfp_req_size, me_req_size;
char fw_name[30];
int err;
pdev = platform_device_register_simple("r600_cp", 0, NULL, 0);
err = IS_ERR(pdev);
if (err) {
printk(KERN_ERR "r600_cp: Failed to register firmware\n");
return -EINVAL;
}
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_R600: chip_name = "R600"; break;
case CHIP_RV610: chip_name = "RV610"; break;
case CHIP_RV630: chip_name = "RV630"; break;
case CHIP_RV620: chip_name = "RV620"; break;
case CHIP_RV635: chip_name = "RV635"; break;
case CHIP_RV670: chip_name = "RV670"; break;
case CHIP_RS780:
case CHIP_RS880: chip_name = "RS780"; break;
case CHIP_RV770: chip_name = "RV770"; break;
case CHIP_RV730:
case CHIP_RV740: chip_name = "RV730"; break;
case CHIP_RV710: chip_name = "RV710"; break;
default: BUG();
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
pfp_req_size = R700_PFP_UCODE_SIZE * 4;
me_req_size = R700_PM4_UCODE_SIZE * 4;
} else {
pfp_req_size = PFP_UCODE_SIZE * 4;
me_req_size = PM4_UCODE_SIZE * 12;
}
DRM_INFO("Loading %s CP Microcode\n", chip_name);
snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
err = request_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev);
if (err)
goto out;
if (dev_priv->pfp_fw->size != pfp_req_size) {
printk(KERN_ERR
"r600_cp: Bogus length %zu in firmware \"%s\"\n",
dev_priv->pfp_fw->size, fw_name);
err = -EINVAL;
goto out;
}
snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);
if (err)
goto out;
if (dev_priv->me_fw->size != me_req_size) {
printk(KERN_ERR
"r600_cp: Bogus length %zu in firmware \"%s\"\n",
dev_priv->me_fw->size, fw_name);
err = -EINVAL;
}
out:
platform_device_unregister(pdev);
if (err) {
if (err != -EINVAL)
printk(KERN_ERR
"r600_cp: Failed to load firmware \"%s\"\n",
fw_name);
release_firmware(dev_priv->pfp_fw);
dev_priv->pfp_fw = NULL;
release_firmware(dev_priv->me_fw);
dev_priv->me_fw = NULL;
}
return err;
}
static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
{
const __be32 *fw_data;
int i;
if (!dev_priv->me_fw || !dev_priv->pfp_fw)
return;
r600_do_cp_stop(dev_priv);
RADEON_WRITE(R600_CP_RB_CNTL,
#ifdef __BIG_ENDIAN
R600_BUF_SWAP_32BIT |
#endif
R600_RB_NO_UPDATE |
R600_RB_BLKSZ(15) |
R600_RB_BUFSZ(3));
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
RADEON_READ(R600_GRBM_SOFT_RESET);
mdelay(15);
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
fw_data = (const __be32 *)dev_priv->me_fw->data;
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
RADEON_WRITE(R600_CP_ME_RAM_DATA,
be32_to_cpup(fw_data++));
fw_data = (const __be32 *)dev_priv->pfp_fw->data;
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
for (i = 0; i < PFP_UCODE_SIZE; i++)
RADEON_WRITE(R600_CP_PFP_UCODE_DATA,
be32_to_cpup(fw_data++));
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
}
static void r700_vm_init(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
/* initialise the VM to use the page table we constructed up there */
u32 vm_c0, i;
u32 mc_vm_md_l1;
u32 vm_l2_cntl, vm_l2_cntl3;
/* okay set up the PCIE aperture type thingo */
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
mc_vm_md_l1 = R700_ENABLE_L1_TLB |
R700_ENABLE_L1_FRAGMENT_PROCESSING |
R700_SYSTEM_ACCESS_MODE_IN_SYS |
R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
R700_EFFECTIVE_L1_TLB_SIZE(5) |
R700_EFFECTIVE_L1_QUEUE_SIZE(5);
RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
RADEON_WRITE(R600_VM_L2_CNTL2, 0);
vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
/* disable all other contexts */
for (i = 1; i < 8; i++)
RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
r600_vm_flush_gart_range(dev);
}
static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
{
const __be32 *fw_data;
int i;
if (!dev_priv->me_fw || !dev_priv->pfp_fw)
return;
r600_do_cp_stop(dev_priv);
RADEON_WRITE(R600_CP_RB_CNTL,
#ifdef __BIG_ENDIAN
R600_BUF_SWAP_32BIT |
#endif
R600_RB_NO_UPDATE |
R600_RB_BLKSZ(15) |
R600_RB_BUFSZ(3));
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
RADEON_READ(R600_GRBM_SOFT_RESET);
mdelay(15);
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
fw_data = (const __be32 *)dev_priv->pfp_fw->data;
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
fw_data = (const __be32 *)dev_priv->me_fw->data;
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
}
static void r600_test_writeback(drm_radeon_private_t *dev_priv)
{
u32 tmp;
/* Start with assuming that writeback doesn't work */
dev_priv->writeback_works = 0;
/* Writeback doesn't seem to work everywhere, test it here and possibly
* enable it if it appears to work
*/
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
u32 val;
val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
if (val == 0xdeadbeef)
break;
DRM_UDELAY(1);
}
if (tmp < dev_priv->usec_timeout) {
dev_priv->writeback_works = 1;
DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
} else {
dev_priv->writeback_works = 0;
DRM_INFO("writeback test failed\n");
}
if (radeon_no_wb == 1) {
dev_priv->writeback_works = 0;
DRM_INFO("writeback forced off\n");
}
if (!dev_priv->writeback_works) {
/* Disable writeback to avoid unnecessary bus master transfer */
RADEON_WRITE(R600_CP_RB_CNTL,
#ifdef __BIG_ENDIAN
R600_BUF_SWAP_32BIT |
#endif
RADEON_READ(R600_CP_RB_CNTL) |
R600_RB_NO_UPDATE);
RADEON_WRITE(R600_SCRATCH_UMSK, 0);
}
}
int r600_do_engine_reset(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
DRM_INFO("Resetting GPU\n");
cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
RADEON_READ(R600_GRBM_SOFT_RESET);
DRM_UDELAY(50);
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
RADEON_READ(R600_GRBM_SOFT_RESET);
RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
RADEON_WRITE(R600_CP_RB_CNTL,
#ifdef __BIG_ENDIAN
R600_BUF_SWAP_32BIT |
#endif
R600_RB_RPTR_WR_ENA);
RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
/* Reset the CP ring */
r600_do_cp_reset(dev_priv);
/* The CP is no longer running after an engine reset */
dev_priv->cp_running = 0;
/* Reset any pending vertex, indirect buffers */
radeon_freelist_reset(dev);
return 0;
}
static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
u32 num_backends,
u32 backend_disable_mask)
{
u32 backend_map = 0;
u32 enabled_backends_mask;
u32 enabled_backends_count;
u32 cur_pipe;
u32 swizzle_pipe[R6XX_MAX_PIPES];
u32 cur_backend;
u32 i;
if (num_tile_pipes > R6XX_MAX_PIPES)
num_tile_pipes = R6XX_MAX_PIPES;
if (num_tile_pipes < 1)
num_tile_pipes = 1;
if (num_backends > R6XX_MAX_BACKENDS)
num_backends = R6XX_MAX_BACKENDS;
if (num_backends < 1)
num_backends = 1;
enabled_backends_mask = 0;
enabled_backends_count = 0;
for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
if (((backend_disable_mask >> i) & 1) == 0) {
enabled_backends_mask |= (1 << i);
++enabled_backends_count;
}
if (enabled_backends_count == num_backends)
break;
}
if (enabled_backends_count == 0) {
enabled_backends_mask = 1;
enabled_backends_count = 1;
}
if (enabled_backends_count != num_backends)
num_backends = enabled_backends_count;
memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
switch (num_tile_pipes) {
case 1:
swizzle_pipe[0] = 0;
break;
case 2:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
break;
case 3:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
break;
case 4:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
break;
case 5:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
swizzle_pipe[4] = 4;
break;
case 6:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 5;
swizzle_pipe[4] = 1;
swizzle_pipe[5] = 3;
break;
case 7:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 6;
swizzle_pipe[4] = 1;
swizzle_pipe[5] = 3;
swizzle_pipe[6] = 5;
break;
case 8:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 6;
swizzle_pipe[4] = 1;
swizzle_pipe[5] = 3;
swizzle_pipe[6] = 5;
swizzle_pipe[7] = 7;
break;
}
cur_backend = 0;
for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
while (((1 << cur_backend) & enabled_backends_mask) == 0)
cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
}
return backend_map;
}
static int r600_count_pipe_bits(uint32_t val)
{
return hweight32(val);
}
static void r600_gfx_init(struct drm_device *dev,
drm_radeon_private_t *dev_priv)
{
int i, j, num_qd_pipes;
u32 sx_debug_1;
u32 tc_cntl;
u32 arb_pop;
u32 num_gs_verts_per_thread;
u32 vgt_gs_per_es;
u32 gs_prim_buffer_depth = 0;
u32 sq_ms_fifo_sizes;
u32 sq_config;
u32 sq_gpr_resource_mgmt_1 = 0;
u32 sq_gpr_resource_mgmt_2 = 0;
u32 sq_thread_resource_mgmt = 0;
u32 sq_stack_resource_mgmt_1 = 0;
u32 sq_stack_resource_mgmt_2 = 0;
u32 hdp_host_path_cntl;
u32 backend_map;
u32 gb_tiling_config = 0;
u32 cc_rb_backend_disable;
u32 cc_gc_shader_pipe_config;
u32 ramcfg;
/* setup chip specs */
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_R600:
dev_priv->r600_max_pipes = 4;
dev_priv->r600_max_tile_pipes = 8;
dev_priv->r600_max_simds = 4;
dev_priv->r600_max_backends = 4;
dev_priv->r600_max_gprs = 256;
dev_priv->r600_max_threads = 192;
dev_priv->r600_max_stack_entries = 256;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 16;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 128;
dev_priv->r600_sq_num_cf_insts = 2;
break;
case CHIP_RV630:
case CHIP_RV635:
dev_priv->r600_max_pipes = 2;
dev_priv->r600_max_tile_pipes = 2;
dev_priv->r600_max_simds = 3;
dev_priv->r600_max_backends = 1;
dev_priv->r600_max_gprs = 128;
dev_priv->r600_max_threads = 192;
dev_priv->r600_max_stack_entries = 128;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 4;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 128;
dev_priv->r600_sq_num_cf_insts = 2;
break;
case CHIP_RV610:
case CHIP_RS780:
case CHIP_RS880:
case CHIP_RV620:
dev_priv->r600_max_pipes = 1;
dev_priv->r600_max_tile_pipes = 1;
dev_priv->r600_max_simds = 2;
dev_priv->r600_max_backends = 1;
dev_priv->r600_max_gprs = 128;
dev_priv->r600_max_threads = 192;
dev_priv->r600_max_stack_entries = 128;
dev_priv->r600_max_hw_contexts = 4;
dev_priv->r600_max_gs_threads = 4;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 128;
dev_priv->r600_sq_num_cf_insts = 1;
break;
case CHIP_RV670:
dev_priv->r600_max_pipes = 4;
dev_priv->r600_max_tile_pipes = 4;
dev_priv->r600_max_simds = 4;
dev_priv->r600_max_backends = 4;
dev_priv->r600_max_gprs = 192;
dev_priv->r600_max_threads = 192;
dev_priv->r600_max_stack_entries = 256;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 16;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 128;
dev_priv->r600_sq_num_cf_insts = 2;
break;
default:
break;
}
/* Initialize HDP */
j = 0;
for (i = 0; i < 32; i++) {
RADEON_WRITE((0x2c14 + j), 0x00000000);
RADEON_WRITE((0x2c18 + j), 0x00000000);
RADEON_WRITE((0x2c1c + j), 0x00000000);
RADEON_WRITE((0x2c20 + j), 0x00000000);
RADEON_WRITE((0x2c24 + j), 0x00000000);
j += 0x18;
}
RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
/* setup tiling, simd, pipe config */
ramcfg = RADEON_READ(R600_RAMCFG);
switch (dev_priv->r600_max_tile_pipes) {
case 1:
gb_tiling_config |= R600_PIPE_TILING(0);
break;
case 2:
gb_tiling_config |= R600_PIPE_TILING(1);
break;
case 4:
gb_tiling_config |= R600_PIPE_TILING(2);
break;
case 8:
gb_tiling_config |= R600_PIPE_TILING(3);
break;
default:
break;
}
gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
gb_tiling_config |= R600_GROUP_SIZE(0);
if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
gb_tiling_config |= R600_ROW_TILING(3);
gb_tiling_config |= R600_SAMPLE_SPLIT(3);
} else {
gb_tiling_config |=
R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
gb_tiling_config |=
R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
}
gb_tiling_config |= R600_BANK_SWAPS(1);
cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
cc_rb_backend_disable |=
R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
cc_gc_shader_pipe_config |=
R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
cc_gc_shader_pipe_config |=
R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
(R6XX_MAX_BACKENDS -
r600_count_pipe_bits((cc_rb_backend_disable &
R6XX_MAX_BACKENDS_MASK) >> 16)),
(cc_rb_backend_disable >> 16));
gb_tiling_config |= R600_BACKEND_MAP(backend_map);
RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);
RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
if (gb_tiling_config & 0xc0) {
dev_priv->r600_group_size = 512;
} else {
dev_priv->r600_group_size = 256;
}
dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
if (gb_tiling_config & 0x30) {
dev_priv->r600_nbanks = 8;
} else {
dev_priv->r600_nbanks = 4;
}
RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
num_qd_pipes =
R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
/* set HW defaults for 3D engine */
RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
R600_ROQ_IB2_START(0x2b)));
RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
R600_ROQ_END(0x40)));
RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
R600_SYNC_GRADIENT |
R600_SYNC_WALKER |
R600_SYNC_ALIGNER));
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
sx_debug_1 |= R600_SMX_EVENT_RELEASE;
if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
else
RADEON_WRITE(R600_DB_DEBUG, 0);
RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
R600_DEPTH_FLUSH(16) |
R600_DEPTH_PENDING_FREE(4) |
R600_DEPTH_CACHELINE_FREE(16)));
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
R600_FETCH_FIFO_HIWATER(0xa) |
R600_DONE_FIFO_HIWATER(0xe0) |
R600_ALU_UPDATE_FIFO_HIWATER(0x8));
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
}
RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
* should be adjusted as needed by the 2D/3D drivers. This just sets default values
*/
sq_config = RADEON_READ(R600_SQ_CONFIG);
sq_config &= ~(R600_PS_PRIO(3) |
R600_VS_PRIO(3) |
R600_GS_PRIO(3) |
R600_ES_PRIO(3));
sq_config |= (R600_DX9_CONSTS |
R600_VC_ENABLE |
R600_PS_PRIO(0) |
R600_VS_PRIO(1) |
R600_GS_PRIO(2) |
R600_ES_PRIO(3));
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
R600_NUM_VS_GPRS(124) |
R600_NUM_CLAUSE_TEMP_GPRS(4));
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
R600_NUM_ES_GPRS(0));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
R600_NUM_VS_THREADS(48) |
R600_NUM_GS_THREADS(4) |
R600_NUM_ES_THREADS(4));
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
R600_NUM_VS_STACK_ENTRIES(128));
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
R600_NUM_ES_STACK_ENTRIES(0));
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
/* no vertex cache */
sq_config &= ~R600_VC_ENABLE;
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
R600_NUM_VS_GPRS(44) |
R600_NUM_CLAUSE_TEMP_GPRS(2));
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
R600_NUM_ES_GPRS(17));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
R600_NUM_VS_THREADS(78) |
R600_NUM_GS_THREADS(4) |
R600_NUM_ES_THREADS(31));
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
R600_NUM_VS_STACK_ENTRIES(40));
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
R600_NUM_ES_STACK_ENTRIES(16));
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
R600_NUM_VS_GPRS(44) |
R600_NUM_CLAUSE_TEMP_GPRS(2));
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
R600_NUM_ES_GPRS(18));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
R600_NUM_VS_THREADS(78) |
R600_NUM_GS_THREADS(4) |
R600_NUM_ES_THREADS(31));
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
R600_NUM_VS_STACK_ENTRIES(40));
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
R600_NUM_ES_STACK_ENTRIES(16));
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
R600_NUM_VS_GPRS(44) |
R600_NUM_CLAUSE_TEMP_GPRS(2));
sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
R600_NUM_ES_GPRS(17));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
R600_NUM_VS_THREADS(78) |
R600_NUM_GS_THREADS(4) |
R600_NUM_ES_THREADS(31));
sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
R600_NUM_VS_STACK_ENTRIES(64));
sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
R600_NUM_ES_STACK_ENTRIES(64));
}
RADEON_WRITE(R600_SQ_CONFIG, sq_config);
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1);
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2);
RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
else
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
R600_S0_Y(0x4) |
R600_S1_X(0x4) |
R600_S1_Y(0xc)));
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
R600_S0_Y(0xe) |
R600_S1_X(0x2) |
R600_S1_Y(0x2) |
R600_S2_X(0xa) |
R600_S2_Y(0x6) |
R600_S3_X(0x6) |
R600_S3_Y(0xa)));
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
R600_S0_Y(0xb) |
R600_S1_X(0x4) |
R600_S1_Y(0xc) |
R600_S2_X(0x1) |
R600_S2_Y(0x6) |
R600_S3_X(0xa) |
R600_S3_Y(0xe)));
RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
R600_S4_Y(0x1) |
R600_S5_X(0x0) |
R600_S5_Y(0x0) |
R600_S6_X(0xb) |
R600_S6_Y(0x4) |
R600_S7_X(0x7) |
R600_S7_Y(0x8)));
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_R600:
case CHIP_RV630:
case CHIP_RV635:
gs_prim_buffer_depth = 0;
break;
case CHIP_RV610:
case CHIP_RS780:
case CHIP_RS880:
case CHIP_RV620:
gs_prim_buffer_depth = 32;
break;
case CHIP_RV670:
gs_prim_buffer_depth = 128;
break;
default:
break;
}
num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
/* Max value for this is 256 */
if (vgt_gs_per_es > 256)
vgt_gs_per_es = 256;
RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
/* more default values. 2D/3D driver should adjust as needed */
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
RADEON_WRITE(R600_SX_MISC, 0);
RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
RADEON_WRITE(R600_SPI_INPUT_Z, 0);
RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
/* clear render buffer base addresses */
RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV610:
case CHIP_RS780:
case CHIP_RS880:
case CHIP_RV620:
tc_cntl = R600_TC_L2_SIZE(8);
break;
case CHIP_RV630:
case CHIP_RV635:
tc_cntl = R600_TC_L2_SIZE(4);
break;
case CHIP_R600:
tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
break;
default:
tc_cntl = R600_TC_L2_SIZE(0);
break;
}
RADEON_WRITE(R600_TC_CNTL, tc_cntl);
hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
arb_pop = RADEON_READ(R600_ARB_POP);
arb_pop |= R600_ENABLE_TC128;
RADEON_WRITE(R600_ARB_POP, arb_pop);
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
R600_NUM_CLIP_SEQ(3)));
RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
}
static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,
u32 num_tile_pipes,
u32 num_backends,
u32 backend_disable_mask)
{
u32 backend_map = 0;
u32 enabled_backends_mask;
u32 enabled_backends_count;
u32 cur_pipe;
u32 swizzle_pipe[R7XX_MAX_PIPES];
u32 cur_backend;
u32 i;
bool force_no_swizzle;
if (num_tile_pipes > R7XX_MAX_PIPES)
num_tile_pipes = R7XX_MAX_PIPES;
if (num_tile_pipes < 1)
num_tile_pipes = 1;
if (num_backends > R7XX_MAX_BACKENDS)
num_backends = R7XX_MAX_BACKENDS;
if (num_backends < 1)
num_backends = 1;
enabled_backends_mask = 0;
enabled_backends_count = 0;
for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
if (((backend_disable_mask >> i) & 1) == 0) {
enabled_backends_mask |= (1 << i);
++enabled_backends_count;
}
if (enabled_backends_count == num_backends)
break;
}
if (enabled_backends_count == 0) {
enabled_backends_mask = 1;
enabled_backends_count = 1;
}
if (enabled_backends_count != num_backends)
num_backends = enabled_backends_count;
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV770:
case CHIP_RV730:
force_no_swizzle = false;
break;
case CHIP_RV710:
case CHIP_RV740:
default:
force_no_swizzle = true;
break;
}
memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
switch (num_tile_pipes) {
case 1:
swizzle_pipe[0] = 0;
break;
case 2:
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
break;
case 3:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 1;
}
break;
case 4:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 3;
swizzle_pipe[3] = 1;
}
break;
case 5:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
swizzle_pipe[4] = 4;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 1;
swizzle_pipe[4] = 3;
}
break;
case 6:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
swizzle_pipe[4] = 4;
swizzle_pipe[5] = 5;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 5;
swizzle_pipe[4] = 3;
swizzle_pipe[5] = 1;
}
break;
case 7:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
swizzle_pipe[4] = 4;
swizzle_pipe[5] = 5;
swizzle_pipe[6] = 6;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 6;
swizzle_pipe[4] = 3;
swizzle_pipe[5] = 1;
swizzle_pipe[6] = 5;
}
break;
case 8:
if (force_no_swizzle) {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 1;
swizzle_pipe[2] = 2;
swizzle_pipe[3] = 3;
swizzle_pipe[4] = 4;
swizzle_pipe[5] = 5;
swizzle_pipe[6] = 6;
swizzle_pipe[7] = 7;
} else {
swizzle_pipe[0] = 0;
swizzle_pipe[1] = 2;
swizzle_pipe[2] = 4;
swizzle_pipe[3] = 6;
swizzle_pipe[4] = 3;
swizzle_pipe[5] = 1;
swizzle_pipe[6] = 7;
swizzle_pipe[7] = 5;
}
break;
}
cur_backend = 0;
for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
while (((1 << cur_backend) & enabled_backends_mask) == 0)
cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
}
return backend_map;
}
static void r700_gfx_init(struct drm_device *dev,
drm_radeon_private_t *dev_priv)
{
int i, j, num_qd_pipes;
u32 ta_aux_cntl;
u32 sx_debug_1;
u32 smx_dc_ctl0;
u32 db_debug3;
u32 num_gs_verts_per_thread;
u32 vgt_gs_per_es;
u32 gs_prim_buffer_depth = 0;
u32 sq_ms_fifo_sizes;
u32 sq_config;
u32 sq_thread_resource_mgmt;
u32 hdp_host_path_cntl;
u32 sq_dyn_gpr_size_simd_ab_0;
u32 backend_map;
u32 gb_tiling_config = 0;
u32 cc_rb_backend_disable;
u32 cc_gc_shader_pipe_config;
u32 mc_arb_ramcfg;
u32 db_debug4;
/* setup chip specs */
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV770:
dev_priv->r600_max_pipes = 4;
dev_priv->r600_max_tile_pipes = 8;
dev_priv->r600_max_simds = 10;
dev_priv->r600_max_backends = 4;
dev_priv->r600_max_gprs = 256;
dev_priv->r600_max_threads = 248;
dev_priv->r600_max_stack_entries = 512;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 16 * 2;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 112;
dev_priv->r600_sq_num_cf_insts = 2;
dev_priv->r700_sx_num_of_sets = 7;
dev_priv->r700_sc_prim_fifo_size = 0xF9;
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
break;
case CHIP_RV730:
dev_priv->r600_max_pipes = 2;
dev_priv->r600_max_tile_pipes = 4;
dev_priv->r600_max_simds = 8;
dev_priv->r600_max_backends = 2;
dev_priv->r600_max_gprs = 128;
dev_priv->r600_max_threads = 248;
dev_priv->r600_max_stack_entries = 256;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 16 * 2;
dev_priv->r600_sx_max_export_size = 256;
dev_priv->r600_sx_max_export_pos_size = 32;
dev_priv->r600_sx_max_export_smx_size = 224;
dev_priv->r600_sq_num_cf_insts = 2;
dev_priv->r700_sx_num_of_sets = 7;
dev_priv->r700_sc_prim_fifo_size = 0xf9;
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
if (dev_priv->r600_sx_max_export_pos_size > 16) {
dev_priv->r600_sx_max_export_pos_size -= 16;
dev_priv->r600_sx_max_export_smx_size += 16;
}
break;
case CHIP_RV710:
dev_priv->r600_max_pipes = 2;
dev_priv->r600_max_tile_pipes = 2;
dev_priv->r600_max_simds = 2;
dev_priv->r600_max_backends = 1;
dev_priv->r600_max_gprs = 256;
dev_priv->r600_max_threads = 192;
dev_priv->r600_max_stack_entries = 256;
dev_priv->r600_max_hw_contexts = 4;
dev_priv->r600_max_gs_threads = 8 * 2;
dev_priv->r600_sx_max_export_size = 128;
dev_priv->r600_sx_max_export_pos_size = 16;
dev_priv->r600_sx_max_export_smx_size = 112;
dev_priv->r600_sq_num_cf_insts = 1;
dev_priv->r700_sx_num_of_sets = 7;
dev_priv->r700_sc_prim_fifo_size = 0x40;
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
break;
case CHIP_RV740:
dev_priv->r600_max_pipes = 4;
dev_priv->r600_max_tile_pipes = 4;
dev_priv->r600_max_simds = 8;
dev_priv->r600_max_backends = 4;
dev_priv->r600_max_gprs = 256;
dev_priv->r600_max_threads = 248;
dev_priv->r600_max_stack_entries = 512;
dev_priv->r600_max_hw_contexts = 8;
dev_priv->r600_max_gs_threads = 16 * 2;
dev_priv->r600_sx_max_export_size = 256;
dev_priv->r600_sx_max_export_pos_size = 32;
dev_priv->r600_sx_max_export_smx_size = 224;
dev_priv->r600_sq_num_cf_insts = 2;
dev_priv->r700_sx_num_of_sets = 7;
dev_priv->r700_sc_prim_fifo_size = 0x100;
dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
if (dev_priv->r600_sx_max_export_pos_size > 16) {
dev_priv->r600_sx_max_export_pos_size -= 16;
dev_priv->r600_sx_max_export_smx_size += 16;
}
break;
default:
break;
}
/* Initialize HDP */
j = 0;
for (i = 0; i < 32; i++) {
RADEON_WRITE((0x2c14 + j), 0x00000000);
RADEON_WRITE((0x2c18 + j), 0x00000000);
RADEON_WRITE((0x2c1c + j), 0x00000000);
RADEON_WRITE((0x2c20 + j), 0x00000000);
RADEON_WRITE((0x2c24 + j), 0x00000000);
j += 0x18;
}
RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
/* setup tiling, simd, pipe config */
mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
switch (dev_priv->r600_max_tile_pipes) {
case 1:
gb_tiling_config |= R600_PIPE_TILING(0);
break;
case 2:
gb_tiling_config |= R600_PIPE_TILING(1);
break;
case 4:
gb_tiling_config |= R600_PIPE_TILING(2);
break;
case 8:
gb_tiling_config |= R600_PIPE_TILING(3);
break;
default:
break;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
gb_tiling_config |= R600_BANK_TILING(1);
else
gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
gb_tiling_config |= R600_GROUP_SIZE(0);
if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
gb_tiling_config |= R600_ROW_TILING(3);
gb_tiling_config |= R600_SAMPLE_SPLIT(3);
} else {
gb_tiling_config |=
R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
gb_tiling_config |=
R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
}
gb_tiling_config |= R600_BANK_SWAPS(1);
cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
cc_rb_backend_disable |=
R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
cc_gc_shader_pipe_config |=
R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
cc_gc_shader_pipe_config |=
R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
backend_map = 0x28;
else
backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,
dev_priv->r600_max_tile_pipes,
(R7XX_MAX_BACKENDS -
r600_count_pipe_bits((cc_rb_backend_disable &
R7XX_MAX_BACKENDS_MASK) >> 16)),
(cc_rb_backend_disable >> 16));
gb_tiling_config |= R600_BACKEND_MAP(backend_map);
RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config);
RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
if (gb_tiling_config & 0xc0) {
dev_priv->r600_group_size = 512;
} else {
dev_priv->r600_group_size = 256;
}
dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
if (gb_tiling_config & 0x30) {
dev_priv->r600_nbanks = 8;
} else {
dev_priv->r600_nbanks = 4;
}
RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
num_qd_pipes =
R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
/* set HW defaults for 3D engine */
RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
R600_ROQ_IB2_START(0x2b)));
RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);
RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);
sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)
RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
R700_GS_FLUSH_CTL(4) |
R700_ACK_FLUSH_CTL(3) |
R700_SYNC_FLUSH_CTL));
db_debug3 = RADEON_READ(R700_DB_DEBUG3);
db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV770:
case CHIP_RV740:
db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);
break;
case CHIP_RV710:
case CHIP_RV730:
default:
db_debug3 |= R700_DB_CLK_OFF_DELAY(2);
break;
}
RADEON_WRITE(R700_DB_DEBUG3, db_debug3);
if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {
db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
}
RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
R600_DONE_FIFO_HIWATER(0xe0) |
R600_ALU_UPDATE_FIFO_HIWATER(0x8));
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV770:
case CHIP_RV730:
case CHIP_RV710:
sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
break;
case CHIP_RV740:
default:
sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
break;
}
RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
* should be adjusted as needed by the 2D/3D drivers. This just sets default values
*/
sq_config = RADEON_READ(R600_SQ_CONFIG);
sq_config &= ~(R600_PS_PRIO(3) |
R600_VS_PRIO(3) |
R600_GS_PRIO(3) |
R600_ES_PRIO(3));
sq_config |= (R600_DX9_CONSTS |
R600_VC_ENABLE |
R600_EXPORT_SRC_C |
R600_PS_PRIO(0) |
R600_VS_PRIO(1) |
R600_GS_PRIO(2) |
R600_ES_PRIO(3));
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
/* no vertex cache */
sq_config &= ~R600_VC_ENABLE;
RADEON_WRITE(R600_SQ_CONFIG, sq_config);
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
else
sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
R700_FORCE_EOV_MAX_REZ_CNT(255)));
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
else
RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
switch (dev_priv->flags & RADEON_FAMILY_MASK) {
case CHIP_RV770:
case CHIP_RV730:
case CHIP_RV740:
gs_prim_buffer_depth = 384;
break;
case CHIP_RV710:
gs_prim_buffer_depth = 128;
break;
default:
break;
}
num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
/* Max value for this is 256 */
if (vgt_gs_per_es > 256)
vgt_gs_per_es = 256;
RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
/* more default values. 2D/3D driver should adjust as needed */
RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
RADEON_WRITE(R600_SX_MISC, 0);
RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
RADEON_WRITE(R600_SPI_INPUT_Z, 0);
RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
/* clear render buffer base addresses */
RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
RADEON_WRITE(R700_TCP_CNTL, 0);
hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
R600_NUM_CLIP_SEQ(3)));
}
static void r600_cp_init_ring_buffer(struct drm_device *dev,
drm_radeon_private_t *dev_priv,
struct drm_file *file_priv)
{
struct drm_radeon_master_private *master_priv;
u32 ring_start;
u64 rptr_addr;
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
r700_gfx_init(dev, dev_priv);
else
r600_gfx_init(dev, dev_priv);
RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
RADEON_READ(R600_GRBM_SOFT_RESET);
mdelay(15);
RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
/* Set ring buffer size */
#ifdef __BIG_ENDIAN
RADEON_WRITE(R600_CP_RB_CNTL,
R600_BUF_SWAP_32BIT |
R600_RB_NO_UPDATE |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#else
RADEON_WRITE(R600_CP_RB_CNTL,
RADEON_RB_NO_UPDATE |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#endif
RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0);
/* Set the write pointer delay */
RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
#ifdef __BIG_ENDIAN
RADEON_WRITE(R600_CP_RB_CNTL,
R600_BUF_SWAP_32BIT |
R600_RB_NO_UPDATE |
R600_RB_RPTR_WR_ENA |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#else
RADEON_WRITE(R600_CP_RB_CNTL,
R600_RB_NO_UPDATE |
R600_RB_RPTR_WR_ENA |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#endif
/* Initialize the ring buffer's read and write pointers */
RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
RADEON_WRITE(R600_CP_RB_WPTR, 0);
SET_RING_HEAD(dev_priv, 0);
dev_priv->ring.tail = 0;
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
rptr_addr = dev_priv->ring_rptr->offset
- dev->agp->base +
dev_priv->gart_vm_start;
} else
#endif
{
rptr_addr = dev_priv->ring_rptr->offset
- ((unsigned long) dev->sg->virtual)
+ dev_priv->gart_vm_start;
}
RADEON_WRITE(R600_CP_RB_RPTR_ADDR, (rptr_addr & 0xfffffffc));
RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, upper_32_bits(rptr_addr));
#ifdef __BIG_ENDIAN
RADEON_WRITE(R600_CP_RB_CNTL,
RADEON_BUF_SWAP_32BIT |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#else
RADEON_WRITE(R600_CP_RB_CNTL,
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#endif
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
/* XXX */
radeon_write_agp_base(dev_priv, dev->agp->base);
/* XXX */
radeon_write_agp_location(dev_priv,
(((dev_priv->gart_vm_start - 1 +
dev_priv->gart_size) & 0xffff0000) |
(dev_priv->gart_vm_start >> 16)));
ring_start = (dev_priv->cp_ring->offset
- dev->agp->base
+ dev_priv->gart_vm_start);
} else
#endif
ring_start = (dev_priv->cp_ring->offset
- (unsigned long)dev->sg->virtual
+ dev_priv->gart_vm_start);
RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
/* Initialize the scratch register pointer. This will cause
* the scratch register values to be written out to memory
* whenever they are updated.
*
* We simply put this behind the ring read pointer, this works
* with PCI GART as well as (whatever kind of) AGP GART
*/
{
u64 scratch_addr;
scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;
scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
scratch_addr += R600_SCRATCH_REG_OFFSET;
scratch_addr >>= 8;
scratch_addr &= 0xffffffff;
RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
}
RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
/* Turn on bus mastering */
radeon_enable_bm(dev_priv);
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
RADEON_WRITE(R600_LAST_FRAME_REG, 0);
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
/* reset sarea copies of these */
master_priv = file_priv->master->driver_priv;
if (master_priv->sarea_priv) {
master_priv->sarea_priv->last_frame = 0;
master_priv->sarea_priv->last_dispatch = 0;
master_priv->sarea_priv->last_clear = 0;
}
r600_do_wait_for_idle(dev_priv);
}
int r600_do_cleanup_cp(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
/* Make sure interrupts are disabled here because the uninstall ioctl
* may not have been called from userspace and after dev_private
* is freed, it's too late.
*/
if (dev->irq_enabled)
drm_irq_uninstall(dev);
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
if (dev_priv->cp_ring != NULL) {
drm_legacy_ioremapfree(dev_priv->cp_ring, dev);
dev_priv->cp_ring = NULL;
}
if (dev_priv->ring_rptr != NULL) {
drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
dev_priv->ring_rptr = NULL;
}
if (dev->agp_buffer_map != NULL) {
drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
dev->agp_buffer_map = NULL;
}
} else
#endif
{
if (dev_priv->gart_info.bus_addr)
r600_page_table_cleanup(dev, &dev_priv->gart_info);
if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
drm_legacy_ioremapfree(&dev_priv->gart_info.mapping, dev);
dev_priv->gart_info.addr = NULL;
}
}
/* only clear to the start of flags */
memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
return 0;
}
int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
DRM_DEBUG("\n");
mutex_init(&dev_priv->cs_mutex);
r600_cs_legacy_init();
/* if we require new memory map but we don't have it fail */
if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
DRM_DEBUG("Forcing AGP card to PCI mode\n");
dev_priv->flags &= ~RADEON_IS_AGP;
/* The writeback test succeeds, but when writeback is enabled,
* the ring buffer read ptr update fails after first 128 bytes.
*/
radeon_no_wb = 1;
} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
&& !init->is_pci) {
DRM_DEBUG("Restoring AGP flag\n");
dev_priv->flags |= RADEON_IS_AGP;
}
dev_priv->usec_timeout = init->usec_timeout;
if (dev_priv->usec_timeout < 1 ||
dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
DRM_DEBUG("TIMEOUT problem!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
/* Enable vblank on CRTC1 for older X servers
*/
dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
dev_priv->do_boxes = 0;
dev_priv->cp_mode = init->cp_mode;
/* We don't support anything other than bus-mastering ring mode,
* but the ring can be in either AGP or PCI space for the ring
* read pointer.
*/
if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
(init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
r600_do_cleanup_cp(dev);
return -EINVAL;
}
switch (init->fb_bpp) {
case 16:
dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
break;
case 32:
default:
dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
break;
}
dev_priv->front_offset = init->front_offset;
dev_priv->front_pitch = init->front_pitch;
dev_priv->back_offset = init->back_offset;
dev_priv->back_pitch = init->back_pitch;
dev_priv->ring_offset = init->ring_offset;
dev_priv->ring_rptr_offset = init->ring_rptr_offset;
dev_priv->buffers_offset = init->buffers_offset;
dev_priv->gart_textures_offset = init->gart_textures_offset;
master_priv->sarea = drm_legacy_getsarea(dev);
if (!master_priv->sarea) {
DRM_ERROR("could not find sarea!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->cp_ring = drm_legacy_findmap(dev, init->ring_offset);
if (!dev_priv->cp_ring) {
DRM_ERROR("could not find cp ring region!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
if (!dev_priv->ring_rptr) {
DRM_ERROR("could not find ring read pointer!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
dev->agp_buffer_token = init->buffers_offset;
dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
if (!dev->agp_buffer_map) {
DRM_ERROR("could not find dma buffer region!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
if (init->gart_textures_offset) {
dev_priv->gart_textures =
drm_legacy_findmap(dev, init->gart_textures_offset);
if (!dev_priv->gart_textures) {
DRM_ERROR("could not find GART texture region!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
}
#if IS_ENABLED(CONFIG_AGP)
/* XXX */
if (dev_priv->flags & RADEON_IS_AGP) {
drm_legacy_ioremap_wc(dev_priv->cp_ring, dev);
drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
if (!dev_priv->cp_ring->handle ||
!dev_priv->ring_rptr->handle ||
!dev->agp_buffer_map->handle) {
DRM_ERROR("could not find ioremap agp regions!\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
} else
#endif
{
dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
dev_priv->ring_rptr->handle =
(void *)(unsigned long)dev_priv->ring_rptr->offset;
dev->agp_buffer_map->handle =
(void *)(unsigned long)dev->agp_buffer_map->offset;
DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
dev_priv->cp_ring->handle);
DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
dev_priv->ring_rptr->handle);
DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
dev->agp_buffer_map->handle);
}
dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
dev_priv->fb_size =
(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
- dev_priv->fb_location;
dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
((dev_priv->front_offset
+ dev_priv->fb_location) >> 10));
dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
((dev_priv->back_offset
+ dev_priv->fb_location) >> 10));
dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
((dev_priv->depth_offset
+ dev_priv->fb_location) >> 10));
dev_priv->gart_size = init->gart_size;
/* New let's set the memory map ... */
if (dev_priv->new_memmap) {
u32 base = 0;
DRM_INFO("Setting GART location based on new memory map\n");
/* If using AGP, try to locate the AGP aperture at the same
* location in the card and on the bus, though we have to
* align it down.
*/
#if IS_ENABLED(CONFIG_AGP)
/* XXX */
if (dev_priv->flags & RADEON_IS_AGP) {
base = dev->agp->base;
/* Check if valid */
if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
dev->agp->base);
base = 0;
}
}
#endif
/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
if (base == 0) {
base = dev_priv->fb_location + dev_priv->fb_size;
if (base < dev_priv->fb_location ||
((base + dev_priv->gart_size) & 0xfffffffful) < base)
base = dev_priv->fb_location
- dev_priv->gart_size;
}
dev_priv->gart_vm_start = base & 0xffc00000u;
if (dev_priv->gart_vm_start != base)
DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
base, dev_priv->gart_vm_start);
}
#if IS_ENABLED(CONFIG_AGP)
/* XXX */
if (dev_priv->flags & RADEON_IS_AGP)
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
- dev->agp->base
+ dev_priv->gart_vm_start);
else
#endif
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
- (unsigned long)dev->sg->virtual
+ dev_priv->gart_vm_start);
DRM_DEBUG("fb 0x%08x size %d\n",
(unsigned int) dev_priv->fb_location,
(unsigned int) dev_priv->fb_size);
DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
(unsigned int) dev_priv->gart_vm_start);
DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
dev_priv->gart_buffers_offset);
dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
+ init->ring_size / sizeof(u32));
dev_priv->ring.size = init->ring_size;
dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
dev_priv->ring.rptr_update_l2qw = order_base_2(/* init->rptr_update */ 4096 / 8);
dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
dev_priv->ring.fetch_size_l2ow = order_base_2(/* init->fetch_size */ 32 / 16);
dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
/* XXX turn off pcie gart */
} else
#endif
{
dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
/* if we have an offset set from userspace */
if (!dev_priv->pcigart_offset_set) {
DRM_ERROR("Need gart offset from userspace\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
dev_priv->gart_info.bus_addr =
dev_priv->pcigart_offset + dev_priv->fb_location;
dev_priv->gart_info.mapping.offset =
dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
dev_priv->gart_info.mapping.size =
dev_priv->gart_info.table_size;
drm_legacy_ioremap_wc(&dev_priv->gart_info.mapping, dev);
if (!dev_priv->gart_info.mapping.handle) {
DRM_ERROR("ioremap failed.\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->gart_info.addr =
dev_priv->gart_info.mapping.handle;
DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
dev_priv->gart_info.addr,
dev_priv->pcigart_offset);
if (!r600_page_table_init(dev)) {
DRM_ERROR("Failed to init GART table\n");
r600_do_cleanup_cp(dev);
return -EINVAL;
}
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
r700_vm_init(dev);
else
r600_vm_init(dev);
}
if (!dev_priv->me_fw || !dev_priv->pfp_fw) {
int err = r600_cp_init_microcode(dev_priv);
if (err) {
DRM_ERROR("Failed to load firmware!\n");
r600_do_cleanup_cp(dev);
return err;
}
}
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
r700_cp_load_microcode(dev_priv);
else
r600_cp_load_microcode(dev_priv);
r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
dev_priv->last_buf = 0;
r600_do_engine_reset(dev);
r600_test_writeback(dev_priv);
return 0;
}
int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
r700_vm_init(dev);
r700_cp_load_microcode(dev_priv);
} else {
r600_vm_init(dev);
r600_cp_load_microcode(dev_priv);
}
r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
r600_do_engine_reset(dev);
return 0;
}
/* Wait for the CP to go idle.
*/
int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
{
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(5);
OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
/* wait for 3D idle clean */
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
COMMIT_RING();
return r600_do_wait_for_idle(dev_priv);
}
/* Start the Command Processor.
*/
void r600_do_cp_start(drm_radeon_private_t *dev_priv)
{
u32 cp_me;
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(7);
OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
OUT_RING(0x00000001);
if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
OUT_RING(0x00000003);
else
OUT_RING(0x00000000);
OUT_RING((dev_priv->r600_max_hw_contexts - 1));
OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
OUT_RING(0x00000000);
OUT_RING(0x00000000);
ADVANCE_RING();
COMMIT_RING();
/* set the mux and reset the halt bit */
cp_me = 0xff;
RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
dev_priv->cp_running = 1;
}
void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
{
u32 cur_read_ptr;
DRM_DEBUG("\n");
cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
SET_RING_HEAD(dev_priv, cur_read_ptr);
dev_priv->ring.tail = cur_read_ptr;
}
void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
{
uint32_t cp_me;
DRM_DEBUG("\n");
cp_me = 0xff | R600_CP_ME_HALT;
RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
dev_priv->cp_running = 0;
}
int r600_cp_dispatch_indirect(struct drm_device *dev,
struct drm_buf *buf, int start, int end)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RING_LOCALS;
if (start != end) {
unsigned long offset = (dev_priv->gart_buffers_offset
+ buf->offset + start);
int dwords = (end - start + 3) / sizeof(u32);
DRM_DEBUG("dwords:%d\n", dwords);
DRM_DEBUG("offset 0x%lx\n", offset);
/* Indirect buffer data must be a multiple of 16 dwords.
* pad the data with a Type-2 CP packet.
*/
while (dwords & 0xf) {
u32 *data = (u32 *)
((char *)dev->agp_buffer_map->handle
+ buf->offset + start);
data[dwords++] = RADEON_CP_PACKET2;
}
/* Fire off the indirect buffer */
BEGIN_RING(4);
OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
OUT_RING((offset & 0xfffffffc));
OUT_RING((upper_32_bits(offset) & 0xff));
OUT_RING(dwords);
ADVANCE_RING();
}
return 0;
}
void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_master *master = file_priv->master;
struct drm_radeon_master_private *master_priv = master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
int nbox = sarea_priv->nbox;
struct drm_clip_rect *pbox = sarea_priv->boxes;
int i, cpp, src_pitch, dst_pitch;
uint64_t src, dst;
RING_LOCALS;
DRM_DEBUG("\n");
if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
cpp = 4;
else
cpp = 2;
if (sarea_priv->pfCurrentPage == 0) {
src_pitch = dev_priv->back_pitch;
dst_pitch = dev_priv->front_pitch;
src = dev_priv->back_offset + dev_priv->fb_location;
dst = dev_priv->front_offset + dev_priv->fb_location;
} else {
src_pitch = dev_priv->front_pitch;
dst_pitch = dev_priv->back_pitch;
src = dev_priv->front_offset + dev_priv->fb_location;
dst = dev_priv->back_offset + dev_priv->fb_location;
}
if (r600_prepare_blit_copy(dev, file_priv)) {
DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
return;
}
for (i = 0; i < nbox; i++) {
int x = pbox[i].x1;
int y = pbox[i].y1;
int w = pbox[i].x2 - x;
int h = pbox[i].y2 - y;
DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
r600_blit_swap(dev,
src, dst,
x, y, x, y, w, h,
src_pitch, dst_pitch, cpp);
}
r600_done_blit_copy(dev);
/* Increment the frame counter. The client-side 3D driver must
* throttle the framerate by waiting for this value before
* performing the swapbuffer ioctl.
*/
sarea_priv->last_frame++;
BEGIN_RING(3);
R600_FRAME_AGE(sarea_priv->last_frame);
ADVANCE_RING();
}
int r600_cp_dispatch_texture(struct drm_device *dev,
struct drm_file *file_priv,
drm_radeon_texture_t *tex,
drm_radeon_tex_image_t *image)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_buf *buf;
u32 *buffer;
const u8 __user *data;
unsigned int size, pass_size;
u64 src_offset, dst_offset;
if (!radeon_check_offset(dev_priv, tex->offset)) {
DRM_ERROR("Invalid destination offset\n");
return -EINVAL;
}
/* this might fail for zero-sized uploads - are those illegal? */
if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
DRM_ERROR("Invalid final destination offset\n");
return -EINVAL;
}
size = tex->height * tex->pitch;
if (size == 0)
return 0;
dst_offset = tex->offset;
if (r600_prepare_blit_copy(dev, file_priv)) {
DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
return -EAGAIN;
}
do {
data = (const u8 __user *)image->data;
pass_size = size;
buf = radeon_freelist_get(dev);
if (!buf) {
DRM_DEBUG("EAGAIN\n");
if (copy_to_user(tex->image, image, sizeof(*image)))
return -EFAULT;
return -EAGAIN;
}
if (pass_size > buf->total)
pass_size = buf->total;
/* Dispatch the indirect buffer.
*/
buffer =
(u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
if (copy_from_user(buffer, data, pass_size)) {
DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
return -EFAULT;
}
buf->file_priv = file_priv;
buf->used = pass_size;
src_offset = dev_priv->gart_buffers_offset + buf->offset;
r600_blit_copy(dev, src_offset, dst_offset, pass_size);
radeon_cp_discard_buffer(dev, file_priv->master, buf);
/* Update the input parameters for next time */
image->data = (const u8 __user *)image->data + pass_size;
dst_offset += pass_size;
size -= pass_size;
} while (size > 0);
r600_done_blit_copy(dev);
return 0;
}
/*
* Legacy cs ioctl
*/
static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
{
/* FIXME: check if wrap affect last reported wrap & sequence */
radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
if (!radeon->cs_id_scnt) {
/* increment wrap counter */
radeon->cs_id_wcnt += 0x01000000;
/* valid sequence counter start at 1 */
radeon->cs_id_scnt = 1;
}
return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
}
static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
{
RING_LOCALS;
*id = radeon_cs_id_get(dev_priv);
/* SCRATCH 2 */
BEGIN_RING(3);
R600_CLEAR_AGE(*id);
ADVANCE_RING();
COMMIT_RING();
}
static int r600_ib_get(struct drm_device *dev,
struct drm_file *fpriv,
struct drm_buf **buffer)
{
struct drm_buf *buf;
*buffer = NULL;
buf = radeon_freelist_get(dev);
if (!buf) {
return -EBUSY;
}
buf->file_priv = fpriv;
*buffer = buf;
return 0;
}
static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
struct drm_file *fpriv, int l, int r)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if (buf) {
if (!r)
r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
radeon_cp_discard_buffer(dev, fpriv->master, buf);
COMMIT_RING();
}
}
int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
{
struct drm_radeon_private *dev_priv = dev->dev_private;
struct drm_radeon_cs *cs = data;
struct drm_buf *buf;
unsigned family;
int l, r = 0;
u32 *ib, cs_id = 0;
if (dev_priv == NULL) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
family = dev_priv->flags & RADEON_FAMILY_MASK;
if (family < CHIP_R600) {
DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
return -EINVAL;
}
mutex_lock(&dev_priv->cs_mutex);
/* get ib */
r = r600_ib_get(dev, fpriv, &buf);
if (r) {
DRM_ERROR("ib_get failed\n");
goto out;
}
ib = dev->agp_buffer_map->handle + buf->offset;
/* now parse command stream */
r = r600_cs_legacy(dev, data, fpriv, family, ib, &l);
if (r) {
goto out;
}
out:
r600_ib_free(dev, buf, fpriv, l, r);
/* emit cs id sequence */
r600_cs_id_emit(dev_priv, &cs_id);
cs->cs_id = cs_id;
mutex_unlock(&dev_priv->cs_mutex);
return r;
}
void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)
{
struct drm_radeon_private *dev_priv = dev->dev_private;
*npipes = dev_priv->r600_npipes;
*nbanks = dev_priv->r600_nbanks;
*group_size = dev_priv->r600_group_size;
}
...@@ -2328,101 +2328,6 @@ int r600_cs_parse(struct radeon_cs_parser *p) ...@@ -2328,101 +2328,6 @@ int r600_cs_parse(struct radeon_cs_parser *p)
return 0; return 0;
} }
#ifdef CONFIG_DRM_RADEON_UMS
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
*
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
{
unsigned i;
kfree(parser->relocs);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
kfree(parser->chunks_array);
}
static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
{
if (p->chunk_relocs == NULL) {
return 0;
}
p->relocs = kzalloc(sizeof(struct radeon_bo_list), GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
return 0;
}
int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
unsigned family, u32 *ib, int *l)
{
struct radeon_cs_parser parser;
struct radeon_cs_chunk *ib_chunk;
struct r600_cs_track *track;
int r;
/* initialize tracker */
track = kzalloc(sizeof(*track), GFP_KERNEL);
if (track == NULL)
return -ENOMEM;
r600_cs_track_init(track);
r600_cs_legacy_get_tiling_conf(dev, &track->npipes, &track->nbanks, &track->group_size);
/* initialize parser */
memset(&parser, 0, sizeof(struct radeon_cs_parser));
parser.filp = filp;
parser.dev = &dev->pdev->dev;
parser.rdev = NULL;
parser.family = family;
parser.track = track;
parser.ib.ptr = ib;
r = radeon_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
r600_cs_parser_fini(&parser, r);
return r;
}
r = r600_cs_parser_relocs_legacy(&parser);
if (r) {
DRM_ERROR("Failed to parse relocation !\n");
r600_cs_parser_fini(&parser, r);
return r;
}
/* Copy the packet into the IB, the parser will read from the
* input memory (cached) and write to the IB (which can be
* uncached). */
ib_chunk = parser.chunk_ib;
parser.ib.length_dw = ib_chunk->length_dw;
*l = parser.ib.length_dw;
if (copy_from_user(ib, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) {
r = -EFAULT;
r600_cs_parser_fini(&parser, r);
return r;
}
r = r600_cs_parse(&parser);
if (r) {
DRM_ERROR("Invalid command stream !\n");
r600_cs_parser_fini(&parser, r);
return r;
}
r600_cs_parser_fini(&parser, r);
return r;
}
void r600_cs_legacy_init(void)
{
r600_nomm = 1;
}
#endif
/* /*
* DMA * DMA
*/ */
......
/* radeon_cp.c -- CP support for Radeon -*- linux-c -*- */
/*
* Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
* Copyright 2000 VA Linux Systems, Inc., Fremont, California.
* Copyright 2007 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Kevin E. Martin <martin@valinux.com>
* Gareth Hughes <gareth@valinux.com>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <linux/module.h>
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
#include "r300_reg.h"
#define RADEON_FIFO_DEBUG 0
/* Firmware Names */
#define FIRMWARE_R100 "radeon/R100_cp.bin"
#define FIRMWARE_R200 "radeon/R200_cp.bin"
#define FIRMWARE_R300 "radeon/R300_cp.bin"
#define FIRMWARE_R420 "radeon/R420_cp.bin"
#define FIRMWARE_RS690 "radeon/RS690_cp.bin"
#define FIRMWARE_RS600 "radeon/RS600_cp.bin"
#define FIRMWARE_R520 "radeon/R520_cp.bin"
MODULE_FIRMWARE(FIRMWARE_R100);
MODULE_FIRMWARE(FIRMWARE_R200);
MODULE_FIRMWARE(FIRMWARE_R300);
MODULE_FIRMWARE(FIRMWARE_R420);
MODULE_FIRMWARE(FIRMWARE_RS690);
MODULE_FIRMWARE(FIRMWARE_RS600);
MODULE_FIRMWARE(FIRMWARE_R520);
static int radeon_do_cleanup_cp(struct drm_device * dev);
static void radeon_do_cp_start(drm_radeon_private_t * dev_priv);
u32 radeon_read_ring_rptr(drm_radeon_private_t *dev_priv, u32 off)
{
u32 val;
if (dev_priv->flags & RADEON_IS_AGP) {
val = DRM_READ32(dev_priv->ring_rptr, off);
} else {
val = *(((volatile u32 *)
dev_priv->ring_rptr->handle) +
(off / sizeof(u32)));
val = le32_to_cpu(val);
}
return val;
}
u32 radeon_get_ring_head(drm_radeon_private_t *dev_priv)
{
if (dev_priv->writeback_works)
return radeon_read_ring_rptr(dev_priv, 0);
else {
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return RADEON_READ(R600_CP_RB_RPTR);
else
return RADEON_READ(RADEON_CP_RB_RPTR);
}
}
void radeon_write_ring_rptr(drm_radeon_private_t *dev_priv, u32 off, u32 val)
{
if (dev_priv->flags & RADEON_IS_AGP)
DRM_WRITE32(dev_priv->ring_rptr, off, val);
else
*(((volatile u32 *) dev_priv->ring_rptr->handle) +
(off / sizeof(u32))) = cpu_to_le32(val);
}
void radeon_set_ring_head(drm_radeon_private_t *dev_priv, u32 val)
{
radeon_write_ring_rptr(dev_priv, 0, val);
}
u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index)
{
if (dev_priv->writeback_works) {
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return radeon_read_ring_rptr(dev_priv,
R600_SCRATCHOFF(index));
else
return radeon_read_ring_rptr(dev_priv,
RADEON_SCRATCHOFF(index));
} else {
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return RADEON_READ(R600_SCRATCH_REG0 + 4*index);
else
return RADEON_READ(RADEON_SCRATCH_REG0 + 4*index);
}
}
static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
RADEON_WRITE(R520_MC_IND_INDEX, 0x7f0000 | (addr & 0xff));
ret = RADEON_READ(R520_MC_IND_DATA);
RADEON_WRITE(R520_MC_IND_INDEX, 0);
return ret;
}
static u32 RS480_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
RADEON_WRITE(RS480_NB_MC_INDEX, addr & 0xff);
ret = RADEON_READ(RS480_NB_MC_DATA);
RADEON_WRITE(RS480_NB_MC_INDEX, 0xff);
return ret;
}
static u32 RS690_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
RADEON_WRITE(RS690_MC_INDEX, (addr & RS690_MC_INDEX_MASK));
ret = RADEON_READ(RS690_MC_DATA);
RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_MASK);
return ret;
}
static u32 RS600_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
RADEON_WRITE(RS600_MC_INDEX, ((addr & RS600_MC_ADDR_MASK) |
RS600_MC_IND_CITF_ARB0));
ret = RADEON_READ(RS600_MC_DATA);
return ret;
}
static u32 IGP_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740))
return RS690_READ_MCIND(dev_priv, addr);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
return RS600_READ_MCIND(dev_priv, addr);
else
return RS480_READ_MCIND(dev_priv, addr);
}
u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv)
{
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
return RADEON_READ(R700_MC_VM_FB_LOCATION);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return RADEON_READ(R600_MC_VM_FB_LOCATION);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515)
return R500_READ_MCIND(dev_priv, RV515_MC_FB_LOCATION);
else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740))
return RS690_READ_MCIND(dev_priv, RS690_MC_FB_LOCATION);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
return RS600_READ_MCIND(dev_priv, RS600_MC_FB_LOCATION);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515)
return R500_READ_MCIND(dev_priv, R520_MC_FB_LOCATION);
else
return RADEON_READ(RADEON_MC_FB_LOCATION);
}
static void radeon_write_fb_location(drm_radeon_private_t *dev_priv, u32 fb_loc)
{
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
RADEON_WRITE(R700_MC_VM_FB_LOCATION, fb_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
RADEON_WRITE(R600_MC_VM_FB_LOCATION, fb_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515)
R500_WRITE_MCIND(RV515_MC_FB_LOCATION, fb_loc);
else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740))
RS690_WRITE_MCIND(RS690_MC_FB_LOCATION, fb_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
RS600_WRITE_MCIND(RS600_MC_FB_LOCATION, fb_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515)
R500_WRITE_MCIND(R520_MC_FB_LOCATION, fb_loc);
else
RADEON_WRITE(RADEON_MC_FB_LOCATION, fb_loc);
}
void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc)
{
/*R6xx/R7xx: AGP_TOP and BOT are actually 18 bits each */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
RADEON_WRITE(R700_MC_VM_AGP_BOT, agp_loc & 0xffff); /* FIX ME */
RADEON_WRITE(R700_MC_VM_AGP_TOP, (agp_loc >> 16) & 0xffff);
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
RADEON_WRITE(R600_MC_VM_AGP_BOT, agp_loc & 0xffff); /* FIX ME */
RADEON_WRITE(R600_MC_VM_AGP_TOP, (agp_loc >> 16) & 0xffff);
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515)
R500_WRITE_MCIND(RV515_MC_AGP_LOCATION, agp_loc);
else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740))
RS690_WRITE_MCIND(RS690_MC_AGP_LOCATION, agp_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
RS600_WRITE_MCIND(RS600_MC_AGP_LOCATION, agp_loc);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515)
R500_WRITE_MCIND(R520_MC_AGP_LOCATION, agp_loc);
else
RADEON_WRITE(RADEON_MC_AGP_LOCATION, agp_loc);
}
void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base)
{
u32 agp_base_hi = upper_32_bits(agp_base);
u32 agp_base_lo = agp_base & 0xffffffff;
u32 r6xx_agp_base = (agp_base >> 22) & 0x3ffff;
/* R6xx/R7xx must be aligned to a 4MB boundary */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
RADEON_WRITE(R700_MC_VM_AGP_BASE, r6xx_agp_base);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
RADEON_WRITE(R600_MC_VM_AGP_BASE, r6xx_agp_base);
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) {
R500_WRITE_MCIND(RV515_MC_AGP_BASE, agp_base_lo);
R500_WRITE_MCIND(RV515_MC_AGP_BASE_2, agp_base_hi);
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740)) {
RS690_WRITE_MCIND(RS690_MC_AGP_BASE, agp_base_lo);
RS690_WRITE_MCIND(RS690_MC_AGP_BASE_2, agp_base_hi);
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600) {
RS600_WRITE_MCIND(RS600_AGP_BASE, agp_base_lo);
RS600_WRITE_MCIND(RS600_AGP_BASE_2, agp_base_hi);
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_RV515) {
R500_WRITE_MCIND(R520_MC_AGP_BASE, agp_base_lo);
R500_WRITE_MCIND(R520_MC_AGP_BASE_2, agp_base_hi);
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS400) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480)) {
RADEON_WRITE(RADEON_AGP_BASE, agp_base_lo);
RADEON_WRITE(RS480_AGP_BASE_2, agp_base_hi);
} else {
RADEON_WRITE(RADEON_AGP_BASE, agp_base_lo);
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R200)
RADEON_WRITE(RADEON_AGP_BASE_2, agp_base_hi);
}
}
void radeon_enable_bm(struct drm_radeon_private *dev_priv)
{
u32 tmp;
/* Turn on bus mastering */
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740)) {
/* rs600/rs690/rs740 */
tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
RADEON_WRITE(RADEON_BUS_CNTL, tmp);
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV350) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS400) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480)) {
/* r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
tmp = RADEON_READ(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
RADEON_WRITE(RADEON_BUS_CNTL, tmp);
} /* PCIE cards appears to not need this */
}
static int RADEON_READ_PLL(struct drm_device * dev, int addr)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RADEON_WRITE8(RADEON_CLOCK_CNTL_INDEX, addr & 0x1f);
return RADEON_READ(RADEON_CLOCK_CNTL_DATA);
}
static u32 RADEON_READ_PCIE(drm_radeon_private_t *dev_priv, int addr)
{
RADEON_WRITE8(RADEON_PCIE_INDEX, addr & 0xff);
return RADEON_READ(RADEON_PCIE_DATA);
}
#if RADEON_FIFO_DEBUG
static void radeon_status(drm_radeon_private_t * dev_priv)
{
printk("%s:\n", __func__);
printk("RBBM_STATUS = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_RBBM_STATUS));
printk("CP_RB_RTPR = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_CP_RB_RPTR));
printk("CP_RB_WTPR = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_CP_RB_WPTR));
printk("AIC_CNTL = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_AIC_CNTL));
printk("AIC_STAT = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_AIC_STAT));
printk("AIC_PT_BASE = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_AIC_PT_BASE));
printk("TLB_ADDR = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_AIC_TLB_ADDR));
printk("TLB_DATA = 0x%08x\n",
(unsigned int)RADEON_READ(RADEON_AIC_TLB_DATA));
}
#endif
/* ================================================================
* Engine, FIFO control
*/
static int radeon_do_pixcache_flush(drm_radeon_private_t * dev_priv)
{
u32 tmp;
int i;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) {
tmp = RADEON_READ(RADEON_RB3D_DSTCACHE_CTLSTAT);
tmp |= RADEON_RB3D_DC_FLUSH_ALL;
RADEON_WRITE(RADEON_RB3D_DSTCACHE_CTLSTAT, tmp);
for (i = 0; i < dev_priv->usec_timeout; i++) {
if (!(RADEON_READ(RADEON_RB3D_DSTCACHE_CTLSTAT)
& RADEON_RB3D_DC_BUSY)) {
return 0;
}
DRM_UDELAY(1);
}
} else {
/* don't flush or purge cache here or lockup */
return 0;
}
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
radeon_status(dev_priv);
#endif
return -EBUSY;
}
static int radeon_do_wait_for_fifo(drm_radeon_private_t * dev_priv, int entries)
{
int i;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
for (i = 0; i < dev_priv->usec_timeout; i++) {
int slots = (RADEON_READ(RADEON_RBBM_STATUS)
& RADEON_RBBM_FIFOCNT_MASK);
if (slots >= entries)
return 0;
DRM_UDELAY(1);
}
DRM_DEBUG("wait for fifo failed status : 0x%08X 0x%08X\n",
RADEON_READ(RADEON_RBBM_STATUS),
RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
radeon_status(dev_priv);
#endif
return -EBUSY;
}
static int radeon_do_wait_for_idle(drm_radeon_private_t * dev_priv)
{
int i, ret;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
ret = radeon_do_wait_for_fifo(dev_priv, 64);
if (ret)
return ret;
for (i = 0; i < dev_priv->usec_timeout; i++) {
if (!(RADEON_READ(RADEON_RBBM_STATUS)
& RADEON_RBBM_ACTIVE)) {
radeon_do_pixcache_flush(dev_priv);
return 0;
}
DRM_UDELAY(1);
}
DRM_DEBUG("wait idle failed status : 0x%08X 0x%08X\n",
RADEON_READ(RADEON_RBBM_STATUS),
RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
radeon_status(dev_priv);
#endif
return -EBUSY;
}
static void radeon_init_pipes(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
uint32_t gb_tile_config, gb_pipe_sel = 0;
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) {
uint32_t z_pipe_sel = RADEON_READ(RV530_GB_PIPE_SELECT2);
if ((z_pipe_sel & 3) == 3)
dev_priv->num_z_pipes = 2;
else
dev_priv->num_z_pipes = 1;
} else
dev_priv->num_z_pipes = 1;
/* RS4xx/RS6xx/R4xx/R5xx */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R420) {
gb_pipe_sel = RADEON_READ(R400_GB_PIPE_SELECT);
dev_priv->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
/* SE cards have 1 pipe */
if ((dev->pdev->device == 0x5e4c) ||
(dev->pdev->device == 0x5e4f))
dev_priv->num_gb_pipes = 1;
} else {
/* R3xx */
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R300 &&
dev->pdev->device != 0x4144) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R350 &&
dev->pdev->device != 0x4148)) {
dev_priv->num_gb_pipes = 2;
} else {
/* RV3xx/R300 AD/R350 AH */
dev_priv->num_gb_pipes = 1;
}
}
DRM_INFO("Num pipes: %d\n", dev_priv->num_gb_pipes);
gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 /*| R300_SUBPIXEL_1_16*/);
switch (dev_priv->num_gb_pipes) {
case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
default:
case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
RADEON_WRITE_PLL(R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
RADEON_WRITE(R300_SU_REG_DEST, ((1 << dev_priv->num_gb_pipes) - 1));
}
RADEON_WRITE(R300_GB_TILE_CONFIG, gb_tile_config);
radeon_do_wait_for_idle(dev_priv);
RADEON_WRITE(R300_DST_PIPE_CONFIG, RADEON_READ(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
RADEON_WRITE(R300_RB2D_DSTCACHE_MODE, (RADEON_READ(R300_RB2D_DSTCACHE_MODE) |
R300_DC_AUTOFLUSH_ENABLE |
R300_DC_DC_DISABLE_IGNORE_PE));
}
/* ================================================================
* CP control, initialization
*/
/* Load the microcode for the CP */
static int radeon_cp_init_microcode(drm_radeon_private_t *dev_priv)
{
struct platform_device *pdev;
const char *fw_name = NULL;
int err;
DRM_DEBUG("\n");
pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
err = IS_ERR(pdev);
if (err) {
printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
return -EINVAL;
}
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R100) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV100) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV200) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS100) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS200)) {
DRM_INFO("Loading R100 Microcode\n");
fw_name = FIRMWARE_R100;
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R200) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV250) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV280) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS300)) {
DRM_INFO("Loading R200 Microcode\n");
fw_name = FIRMWARE_R200;
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R300) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R350) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV350) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV380) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS400) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS480)) {
DRM_INFO("Loading R300 Microcode\n");
fw_name = FIRMWARE_R300;
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R423) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV410)) {
DRM_INFO("Loading R400 Microcode\n");
fw_name = FIRMWARE_R420;
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740)) {
DRM_INFO("Loading RS690/RS740 Microcode\n");
fw_name = FIRMWARE_RS690;
} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600) {
DRM_INFO("Loading RS600 Microcode\n");
fw_name = FIRMWARE_RS600;
} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV515) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R520) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R580) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV560) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV570)) {
DRM_INFO("Loading R500 Microcode\n");
fw_name = FIRMWARE_R520;
}
err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);
platform_device_unregister(pdev);
if (err) {
printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
fw_name);
} else if (dev_priv->me_fw->size % 8) {
printk(KERN_ERR
"radeon_cp: Bogus length %zu in firmware \"%s\"\n",
dev_priv->me_fw->size, fw_name);
err = -EINVAL;
release_firmware(dev_priv->me_fw);
dev_priv->me_fw = NULL;
}
return err;
}
static void radeon_cp_load_microcode(drm_radeon_private_t *dev_priv)
{
const __be32 *fw_data;
int i, size;
radeon_do_wait_for_idle(dev_priv);
if (dev_priv->me_fw) {
size = dev_priv->me_fw->size / 4;
fw_data = (const __be32 *)&dev_priv->me_fw->data[0];
RADEON_WRITE(RADEON_CP_ME_RAM_ADDR, 0);
for (i = 0; i < size; i += 2) {
RADEON_WRITE(RADEON_CP_ME_RAM_DATAH,
be32_to_cpup(&fw_data[i]));
RADEON_WRITE(RADEON_CP_ME_RAM_DATAL,
be32_to_cpup(&fw_data[i + 1]));
}
}
}
/* Flush any pending commands to the CP. This should only be used just
* prior to a wait for idle, as it informs the engine that the command
* stream is ending.
*/
static void radeon_do_cp_flush(drm_radeon_private_t * dev_priv)
{
DRM_DEBUG("\n");
#if 0
u32 tmp;
tmp = RADEON_READ(RADEON_CP_RB_WPTR) | (1 << 31);
RADEON_WRITE(RADEON_CP_RB_WPTR, tmp);
#endif
}
/* Wait for the CP to go idle.
*/
int radeon_do_cp_idle(drm_radeon_private_t * dev_priv)
{
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(6);
RADEON_PURGE_CACHE();
RADEON_PURGE_ZCACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
COMMIT_RING();
return radeon_do_wait_for_idle(dev_priv);
}
/* Start the Command Processor.
*/
static void radeon_do_cp_start(drm_radeon_private_t * dev_priv)
{
RING_LOCALS;
DRM_DEBUG("\n");
radeon_do_wait_for_idle(dev_priv);
RADEON_WRITE(RADEON_CP_CSQ_CNTL, dev_priv->cp_mode);
dev_priv->cp_running = 1;
/* on r420, any DMA from CP to system memory while 2D is active
* can cause a hang. workaround is to queue a CP RESYNC token
*/
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) {
BEGIN_RING(3);
OUT_RING(CP_PACKET0(R300_CP_RESYNC_ADDR, 1));
OUT_RING(5); /* scratch reg 5 */
OUT_RING(0xdeadbeef);
ADVANCE_RING();
COMMIT_RING();
}
BEGIN_RING(8);
/* isync can only be written through cp on r5xx write it here */
OUT_RING(CP_PACKET0(RADEON_ISYNC_CNTL, 0));
OUT_RING(RADEON_ISYNC_ANY2D_IDLE3D |
RADEON_ISYNC_ANY3D_IDLE2D |
RADEON_ISYNC_WAIT_IDLEGUI |
RADEON_ISYNC_CPSCRATCH_IDLEGUI);
RADEON_PURGE_CACHE();
RADEON_PURGE_ZCACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
COMMIT_RING();
dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/* Reset the Command Processor. This will not flush any pending
* commands, so you must wait for the CP command stream to complete
* before calling this routine.
*/
static void radeon_do_cp_reset(drm_radeon_private_t * dev_priv)
{
u32 cur_read_ptr;
DRM_DEBUG("\n");
cur_read_ptr = RADEON_READ(RADEON_CP_RB_RPTR);
RADEON_WRITE(RADEON_CP_RB_WPTR, cur_read_ptr);
SET_RING_HEAD(dev_priv, cur_read_ptr);
dev_priv->ring.tail = cur_read_ptr;
}
/* Stop the Command Processor. This will not flush any pending
* commands, so you must flush the command stream and wait for the CP
* to go idle before calling this routine.
*/
static void radeon_do_cp_stop(drm_radeon_private_t * dev_priv)
{
RING_LOCALS;
DRM_DEBUG("\n");
/* finish the pending CP_RESYNC token */
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R420) {
BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
OUT_RING(R300_RB3D_DC_FINISH);
ADVANCE_RING();
COMMIT_RING();
radeon_do_wait_for_idle(dev_priv);
}
RADEON_WRITE(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS);
dev_priv->cp_running = 0;
}
/* Reset the engine. This will stop the CP if it is running.
*/
static int radeon_do_engine_reset(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
u32 clock_cntl_index = 0, mclk_cntl = 0, rbbm_soft_reset;
DRM_DEBUG("\n");
radeon_do_pixcache_flush(dev_priv);
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV410) {
/* may need something similar for newer chips */
clock_cntl_index = RADEON_READ(RADEON_CLOCK_CNTL_INDEX);
mclk_cntl = RADEON_READ_PLL(dev, RADEON_MCLK_CNTL);
RADEON_WRITE_PLL(RADEON_MCLK_CNTL, (mclk_cntl |
RADEON_FORCEON_MCLKA |
RADEON_FORCEON_MCLKB |
RADEON_FORCEON_YCLKA |
RADEON_FORCEON_YCLKB |
RADEON_FORCEON_MC |
RADEON_FORCEON_AIC));
}
rbbm_soft_reset = RADEON_READ(RADEON_RBBM_SOFT_RESET);
RADEON_WRITE(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
RADEON_SOFT_RESET_CP |
RADEON_SOFT_RESET_HI |
RADEON_SOFT_RESET_SE |
RADEON_SOFT_RESET_RE |
RADEON_SOFT_RESET_PP |
RADEON_SOFT_RESET_E2 |
RADEON_SOFT_RESET_RB));
RADEON_READ(RADEON_RBBM_SOFT_RESET);
RADEON_WRITE(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset &
~(RADEON_SOFT_RESET_CP |
RADEON_SOFT_RESET_HI |
RADEON_SOFT_RESET_SE |
RADEON_SOFT_RESET_RE |
RADEON_SOFT_RESET_PP |
RADEON_SOFT_RESET_E2 |
RADEON_SOFT_RESET_RB)));
RADEON_READ(RADEON_RBBM_SOFT_RESET);
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV410) {
RADEON_WRITE_PLL(RADEON_MCLK_CNTL, mclk_cntl);
RADEON_WRITE(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index);
RADEON_WRITE(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
}
/* setup the raster pipes */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R300)
radeon_init_pipes(dev);
/* Reset the CP ring */
radeon_do_cp_reset(dev_priv);
/* The CP is no longer running after an engine reset */
dev_priv->cp_running = 0;
/* Reset any pending vertex, indirect buffers */
radeon_freelist_reset(dev);
return 0;
}
static void radeon_cp_init_ring_buffer(struct drm_device * dev,
drm_radeon_private_t *dev_priv,
struct drm_file *file_priv)
{
struct drm_radeon_master_private *master_priv;
u32 ring_start, cur_read_ptr;
/* Initialize the memory controller. With new memory map, the fb location
* is not changed, it should have been properly initialized already. Part
* of the problem is that the code below is bogus, assuming the GART is
* always appended to the fb which is not necessarily the case
*/
if (!dev_priv->new_memmap)
radeon_write_fb_location(dev_priv,
((dev_priv->gart_vm_start - 1) & 0xffff0000)
| (dev_priv->fb_location >> 16));
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
radeon_write_agp_base(dev_priv, dev->agp->base);
radeon_write_agp_location(dev_priv,
(((dev_priv->gart_vm_start - 1 +
dev_priv->gart_size) & 0xffff0000) |
(dev_priv->gart_vm_start >> 16)));
ring_start = (dev_priv->cp_ring->offset
- dev->agp->base
+ dev_priv->gart_vm_start);
} else
#endif
ring_start = (dev_priv->cp_ring->offset
- (unsigned long)dev->sg->virtual
+ dev_priv->gart_vm_start);
RADEON_WRITE(RADEON_CP_RB_BASE, ring_start);
/* Set the write pointer delay */
RADEON_WRITE(RADEON_CP_RB_WPTR_DELAY, 0);
/* Initialize the ring buffer's read and write pointers */
cur_read_ptr = RADEON_READ(RADEON_CP_RB_RPTR);
RADEON_WRITE(RADEON_CP_RB_WPTR, cur_read_ptr);
SET_RING_HEAD(dev_priv, cur_read_ptr);
dev_priv->ring.tail = cur_read_ptr;
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR,
dev_priv->ring_rptr->offset
- dev->agp->base + dev_priv->gart_vm_start);
} else
#endif
{
RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR,
dev_priv->ring_rptr->offset
- ((unsigned long) dev->sg->virtual)
+ dev_priv->gart_vm_start);
}
/* Set ring buffer size */
#ifdef __BIG_ENDIAN
RADEON_WRITE(RADEON_CP_RB_CNTL,
RADEON_BUF_SWAP_32BIT |
(dev_priv->ring.fetch_size_l2ow << 18) |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#else
RADEON_WRITE(RADEON_CP_RB_CNTL,
(dev_priv->ring.fetch_size_l2ow << 18) |
(dev_priv->ring.rptr_update_l2qw << 8) |
dev_priv->ring.size_l2qw);
#endif
/* Initialize the scratch register pointer. This will cause
* the scratch register values to be written out to memory
* whenever they are updated.
*
* We simply put this behind the ring read pointer, this works
* with PCI GART as well as (whatever kind of) AGP GART
*/
RADEON_WRITE(RADEON_SCRATCH_ADDR, RADEON_READ(RADEON_CP_RB_RPTR_ADDR)
+ RADEON_SCRATCH_REG_OFFSET);
RADEON_WRITE(RADEON_SCRATCH_UMSK, 0x7);
radeon_enable_bm(dev_priv);
radeon_write_ring_rptr(dev_priv, RADEON_SCRATCHOFF(0), 0);
RADEON_WRITE(RADEON_LAST_FRAME_REG, 0);
radeon_write_ring_rptr(dev_priv, RADEON_SCRATCHOFF(1), 0);
RADEON_WRITE(RADEON_LAST_DISPATCH_REG, 0);
radeon_write_ring_rptr(dev_priv, RADEON_SCRATCHOFF(2), 0);
RADEON_WRITE(RADEON_LAST_CLEAR_REG, 0);
/* reset sarea copies of these */
master_priv = file_priv->master->driver_priv;
if (master_priv->sarea_priv) {
master_priv->sarea_priv->last_frame = 0;
master_priv->sarea_priv->last_dispatch = 0;
master_priv->sarea_priv->last_clear = 0;
}
radeon_do_wait_for_idle(dev_priv);
/* Sync everything up */
RADEON_WRITE(RADEON_ISYNC_CNTL,
(RADEON_ISYNC_ANY2D_IDLE3D |
RADEON_ISYNC_ANY3D_IDLE2D |
RADEON_ISYNC_WAIT_IDLEGUI |
RADEON_ISYNC_CPSCRATCH_IDLEGUI));
}
static void radeon_test_writeback(drm_radeon_private_t * dev_priv)
{
u32 tmp;
/* Start with assuming that writeback doesn't work */
dev_priv->writeback_works = 0;
/* Writeback doesn't seem to work everywhere, test it here and possibly
* enable it if it appears to work
*/
radeon_write_ring_rptr(dev_priv, RADEON_SCRATCHOFF(1), 0);
RADEON_WRITE(RADEON_SCRATCH_REG1, 0xdeadbeef);
for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
u32 val;
val = radeon_read_ring_rptr(dev_priv, RADEON_SCRATCHOFF(1));
if (val == 0xdeadbeef)
break;
DRM_UDELAY(1);
}
if (tmp < dev_priv->usec_timeout) {
dev_priv->writeback_works = 1;
DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
} else {
dev_priv->writeback_works = 0;
DRM_INFO("writeback test failed\n");
}
if (radeon_no_wb == 1) {
dev_priv->writeback_works = 0;
DRM_INFO("writeback forced off\n");
}
if (!dev_priv->writeback_works) {
/* Disable writeback to avoid unnecessary bus master transfer */
RADEON_WRITE(RADEON_CP_RB_CNTL, RADEON_READ(RADEON_CP_RB_CNTL) |
RADEON_RB_NO_UPDATE);
RADEON_WRITE(RADEON_SCRATCH_UMSK, 0);
}
}
/* Enable or disable IGP GART on the chip */
static void radeon_set_igpgart(drm_radeon_private_t * dev_priv, int on)
{
u32 temp;
if (on) {
DRM_DEBUG("programming igp gart %08X %08lX %08X\n",
dev_priv->gart_vm_start,
(long)dev_priv->gart_info.bus_addr,
dev_priv->gart_size);
temp = IGP_READ_MCIND(dev_priv, RS480_MC_MISC_CNTL);
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740))
IGP_WRITE_MCIND(RS480_MC_MISC_CNTL, (RS480_GART_INDEX_REG_EN |
RS690_BLOCK_GFX_D3_EN));
else
IGP_WRITE_MCIND(RS480_MC_MISC_CNTL, RS480_GART_INDEX_REG_EN);
IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN |
RS480_VA_SIZE_32MB));
temp = IGP_READ_MCIND(dev_priv, RS480_GART_FEATURE_ID);
IGP_WRITE_MCIND(RS480_GART_FEATURE_ID, (RS480_HANG_EN |
RS480_TLB_ENABLE |
RS480_GTW_LAC_EN |
RS480_1LEVEL_GART));
temp = dev_priv->gart_info.bus_addr & 0xfffff000;
temp |= (upper_32_bits(dev_priv->gart_info.bus_addr) & 0xff) << 4;
IGP_WRITE_MCIND(RS480_GART_BASE, temp);
temp = IGP_READ_MCIND(dev_priv, RS480_AGP_MODE_CNTL);
IGP_WRITE_MCIND(RS480_AGP_MODE_CNTL, ((1 << RS480_REQ_TYPE_SNOOP_SHIFT) |
RS480_REQ_TYPE_SNOOP_DIS));
radeon_write_agp_base(dev_priv, dev_priv->gart_vm_start);
dev_priv->gart_size = 32*1024*1024;
temp = (((dev_priv->gart_vm_start - 1 + dev_priv->gart_size) &
0xffff0000) | (dev_priv->gart_vm_start >> 16));
radeon_write_agp_location(dev_priv, temp);
temp = IGP_READ_MCIND(dev_priv, RS480_AGP_ADDRESS_SPACE_SIZE);
IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN |
RS480_VA_SIZE_32MB));
do {
temp = IGP_READ_MCIND(dev_priv, RS480_GART_CACHE_CNTRL);
if ((temp & RS480_GART_CACHE_INVALIDATE) == 0)
break;
DRM_UDELAY(1);
} while (1);
IGP_WRITE_MCIND(RS480_GART_CACHE_CNTRL,
RS480_GART_CACHE_INVALIDATE);
do {
temp = IGP_READ_MCIND(dev_priv, RS480_GART_CACHE_CNTRL);
if ((temp & RS480_GART_CACHE_INVALIDATE) == 0)
break;
DRM_UDELAY(1);
} while (1);
IGP_WRITE_MCIND(RS480_GART_CACHE_CNTRL, 0);
} else {
IGP_WRITE_MCIND(RS480_AGP_ADDRESS_SPACE_SIZE, 0);
}
}
/* Enable or disable IGP GART on the chip */
static void rs600_set_igpgart(drm_radeon_private_t *dev_priv, int on)
{
u32 temp;
int i;
if (on) {
DRM_DEBUG("programming igp gart %08X %08lX %08X\n",
dev_priv->gart_vm_start,
(long)dev_priv->gart_info.bus_addr,
dev_priv->gart_size);
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, (RS600_EFFECTIVE_L2_CACHE_SIZE(6) |
RS600_EFFECTIVE_L2_QUEUE_SIZE(6)));
for (i = 0; i < 19; i++)
IGP_WRITE_MCIND(RS600_MC_PT0_CLIENT0_CNTL + i,
(RS600_ENABLE_TRANSLATION_MODE_OVERRIDE |
RS600_SYSTEM_ACCESS_MODE_IN_SYS |
RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASSTHROUGH |
RS600_EFFECTIVE_L1_CACHE_SIZE(3) |
RS600_ENABLE_FRAGMENT_PROCESSING |
RS600_EFFECTIVE_L1_QUEUE_SIZE(3)));
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_CNTL, (RS600_ENABLE_PAGE_TABLE |
RS600_PAGE_TABLE_TYPE_FLAT));
/* disable all other contexts */
for (i = 1; i < 8; i++)
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_CNTL + i, 0);
/* setup the page table aperture */
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_FLAT_BASE_ADDR,
dev_priv->gart_info.bus_addr);
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_FLAT_START_ADDR,
dev_priv->gart_vm_start);
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_FLAT_END_ADDR,
(dev_priv->gart_vm_start + dev_priv->gart_size - 1));
IGP_WRITE_MCIND(RS600_MC_PT0_CONTEXT0_DEFAULT_READ_ADDR, 0);
/* setup the system aperture */
IGP_WRITE_MCIND(RS600_MC_PT0_SYSTEM_APERTURE_LOW_ADDR,
dev_priv->gart_vm_start);
IGP_WRITE_MCIND(RS600_MC_PT0_SYSTEM_APERTURE_HIGH_ADDR,
(dev_priv->gart_vm_start + dev_priv->gart_size - 1));
/* enable page tables */
temp = IGP_READ_MCIND(dev_priv, RS600_MC_PT0_CNTL);
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, (temp | RS600_ENABLE_PT));
temp = IGP_READ_MCIND(dev_priv, RS600_MC_CNTL1);
IGP_WRITE_MCIND(RS600_MC_CNTL1, (temp | RS600_ENABLE_PAGE_TABLES));
/* invalidate the cache */
temp = IGP_READ_MCIND(dev_priv, RS600_MC_PT0_CNTL);
temp &= ~(RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE);
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, temp);
temp = IGP_READ_MCIND(dev_priv, RS600_MC_PT0_CNTL);
temp |= RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE;
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, temp);
temp = IGP_READ_MCIND(dev_priv, RS600_MC_PT0_CNTL);
temp &= ~(RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE);
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, temp);
temp = IGP_READ_MCIND(dev_priv, RS600_MC_PT0_CNTL);
} else {
IGP_WRITE_MCIND(RS600_MC_PT0_CNTL, 0);
temp = IGP_READ_MCIND(dev_priv, RS600_MC_CNTL1);
temp &= ~RS600_ENABLE_PAGE_TABLES;
IGP_WRITE_MCIND(RS600_MC_CNTL1, temp);
}
}
static void radeon_set_pciegart(drm_radeon_private_t * dev_priv, int on)
{
u32 tmp = RADEON_READ_PCIE(dev_priv, RADEON_PCIE_TX_GART_CNTL);
if (on) {
DRM_DEBUG("programming pcie %08X %08lX %08X\n",
dev_priv->gart_vm_start,
(long)dev_priv->gart_info.bus_addr,
dev_priv->gart_size);
RADEON_WRITE_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO,
dev_priv->gart_vm_start);
RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_BASE,
dev_priv->gart_info.bus_addr);
RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_START_LO,
dev_priv->gart_vm_start);
RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_END_LO,
dev_priv->gart_vm_start +
dev_priv->gart_size - 1);
radeon_write_agp_location(dev_priv, 0xffffffc0); /* ?? */
RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL,
RADEON_PCIE_TX_GART_EN);
} else {
RADEON_WRITE_PCIE(RADEON_PCIE_TX_GART_CNTL,
tmp & ~RADEON_PCIE_TX_GART_EN);
}
}
/* Enable or disable PCI GART on the chip */
static void radeon_set_pcigart(drm_radeon_private_t * dev_priv, int on)
{
u32 tmp;
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) ||
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740) ||
(dev_priv->flags & RADEON_IS_IGPGART)) {
radeon_set_igpgart(dev_priv, on);
return;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600) {
rs600_set_igpgart(dev_priv, on);
return;
}
if (dev_priv->flags & RADEON_IS_PCIE) {
radeon_set_pciegart(dev_priv, on);
return;
}
tmp = RADEON_READ(RADEON_AIC_CNTL);
if (on) {
RADEON_WRITE(RADEON_AIC_CNTL,
tmp | RADEON_PCIGART_TRANSLATE_EN);
/* set PCI GART page-table base address
*/
RADEON_WRITE(RADEON_AIC_PT_BASE, dev_priv->gart_info.bus_addr);
/* set address range for PCI address translate
*/
RADEON_WRITE(RADEON_AIC_LO_ADDR, dev_priv->gart_vm_start);
RADEON_WRITE(RADEON_AIC_HI_ADDR, dev_priv->gart_vm_start
+ dev_priv->gart_size - 1);
/* Turn off AGP aperture -- is this required for PCI GART?
*/
radeon_write_agp_location(dev_priv, 0xffffffc0);
RADEON_WRITE(RADEON_AGP_COMMAND, 0); /* clear AGP_COMMAND */
} else {
RADEON_WRITE(RADEON_AIC_CNTL,
tmp & ~RADEON_PCIGART_TRANSLATE_EN);
}
}
static int radeon_setup_pcigart_surface(drm_radeon_private_t *dev_priv)
{
struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
struct radeon_virt_surface *vp;
int i;
for (i = 0; i < RADEON_MAX_SURFACES * 2; i++) {
if (!dev_priv->virt_surfaces[i].file_priv ||
dev_priv->virt_surfaces[i].file_priv == PCIGART_FILE_PRIV)
break;
}
if (i >= 2 * RADEON_MAX_SURFACES)
return -ENOMEM;
vp = &dev_priv->virt_surfaces[i];
for (i = 0; i < RADEON_MAX_SURFACES; i++) {
struct radeon_surface *sp = &dev_priv->surfaces[i];
if (sp->refcount)
continue;
vp->surface_index = i;
vp->lower = gart_info->bus_addr;
vp->upper = vp->lower + gart_info->table_size;
vp->flags = 0;
vp->file_priv = PCIGART_FILE_PRIV;
sp->refcount = 1;
sp->lower = vp->lower;
sp->upper = vp->upper;
sp->flags = 0;
RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * i, sp->flags);
RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * i, sp->lower);
RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * i, sp->upper);
return 0;
}
return -ENOMEM;
}
static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
DRM_DEBUG("\n");
/* if we require new memory map but we don't have it fail */
if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
DRM_DEBUG("Forcing AGP card to PCI mode\n");
dev_priv->flags &= ~RADEON_IS_AGP;
} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
&& !init->is_pci) {
DRM_DEBUG("Restoring AGP flag\n");
dev_priv->flags |= RADEON_IS_AGP;
}
if ((!(dev_priv->flags & RADEON_IS_AGP)) && !dev->sg) {
DRM_ERROR("PCI GART memory not allocated!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->usec_timeout = init->usec_timeout;
if (dev_priv->usec_timeout < 1 ||
dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
DRM_DEBUG("TIMEOUT problem!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
/* Enable vblank on CRTC1 for older X servers
*/
dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
switch(init->func) {
case RADEON_INIT_R200_CP:
dev_priv->microcode_version = UCODE_R200;
break;
case RADEON_INIT_R300_CP:
dev_priv->microcode_version = UCODE_R300;
break;
default:
dev_priv->microcode_version = UCODE_R100;
}
dev_priv->do_boxes = 0;
dev_priv->cp_mode = init->cp_mode;
/* We don't support anything other than bus-mastering ring mode,
* but the ring can be in either AGP or PCI space for the ring
* read pointer.
*/
if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
(init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
switch (init->fb_bpp) {
case 16:
dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
break;
case 32:
default:
dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
break;
}
dev_priv->front_offset = init->front_offset;
dev_priv->front_pitch = init->front_pitch;
dev_priv->back_offset = init->back_offset;
dev_priv->back_pitch = init->back_pitch;
switch (init->depth_bpp) {
case 16:
dev_priv->depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
break;
case 32:
default:
dev_priv->depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
break;
}
dev_priv->depth_offset = init->depth_offset;
dev_priv->depth_pitch = init->depth_pitch;
/* Hardware state for depth clears. Remove this if/when we no
* longer clear the depth buffer with a 3D rectangle. Hard-code
* all values to prevent unwanted 3D state from slipping through
* and screwing with the clear operation.
*/
dev_priv->depth_clear.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE |
(dev_priv->color_fmt << 10) |
(dev_priv->microcode_version ==
UCODE_R100 ? RADEON_ZBLOCK16 : 0));
dev_priv->depth_clear.rb3d_zstencilcntl =
(dev_priv->depth_fmt |
RADEON_Z_TEST_ALWAYS |
RADEON_STENCIL_TEST_ALWAYS |
RADEON_STENCIL_S_FAIL_REPLACE |
RADEON_STENCIL_ZPASS_REPLACE |
RADEON_STENCIL_ZFAIL_REPLACE | RADEON_Z_WRITE_ENABLE);
dev_priv->depth_clear.se_cntl = (RADEON_FFACE_CULL_CW |
RADEON_BFACE_SOLID |
RADEON_FFACE_SOLID |
RADEON_FLAT_SHADE_VTX_LAST |
RADEON_DIFFUSE_SHADE_FLAT |
RADEON_ALPHA_SHADE_FLAT |
RADEON_SPECULAR_SHADE_FLAT |
RADEON_FOG_SHADE_FLAT |
RADEON_VTX_PIX_CENTER_OGL |
RADEON_ROUND_MODE_TRUNC |
RADEON_ROUND_PREC_8TH_PIX);
dev_priv->ring_offset = init->ring_offset;
dev_priv->ring_rptr_offset = init->ring_rptr_offset;
dev_priv->buffers_offset = init->buffers_offset;
dev_priv->gart_textures_offset = init->gart_textures_offset;
master_priv->sarea = drm_legacy_getsarea(dev);
if (!master_priv->sarea) {
DRM_ERROR("could not find sarea!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->cp_ring = drm_legacy_findmap(dev, init->ring_offset);
if (!dev_priv->cp_ring) {
DRM_ERROR("could not find cp ring region!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
if (!dev_priv->ring_rptr) {
DRM_ERROR("could not find ring read pointer!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
dev->agp_buffer_token = init->buffers_offset;
dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
if (!dev->agp_buffer_map) {
DRM_ERROR("could not find dma buffer region!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
if (init->gart_textures_offset) {
dev_priv->gart_textures =
drm_legacy_findmap(dev, init->gart_textures_offset);
if (!dev_priv->gart_textures) {
DRM_ERROR("could not find GART texture region!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
}
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
drm_legacy_ioremap_wc(dev_priv->cp_ring, dev);
drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
if (!dev_priv->cp_ring->handle ||
!dev_priv->ring_rptr->handle ||
!dev->agp_buffer_map->handle) {
DRM_ERROR("could not find ioremap agp regions!\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
} else
#endif
{
dev_priv->cp_ring->handle =
(void *)(unsigned long)dev_priv->cp_ring->offset;
dev_priv->ring_rptr->handle =
(void *)(unsigned long)dev_priv->ring_rptr->offset;
dev->agp_buffer_map->handle =
(void *)(unsigned long)dev->agp_buffer_map->offset;
DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
dev_priv->cp_ring->handle);
DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
dev_priv->ring_rptr->handle);
DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
dev->agp_buffer_map->handle);
}
dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 16;
dev_priv->fb_size =
((radeon_read_fb_location(dev_priv) & 0xffff0000u) + 0x10000)
- dev_priv->fb_location;
dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
((dev_priv->front_offset
+ dev_priv->fb_location) >> 10));
dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
((dev_priv->back_offset
+ dev_priv->fb_location) >> 10));
dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
((dev_priv->depth_offset
+ dev_priv->fb_location) >> 10));
dev_priv->gart_size = init->gart_size;
/* New let's set the memory map ... */
if (dev_priv->new_memmap) {
u32 base = 0;
DRM_INFO("Setting GART location based on new memory map\n");
/* If using AGP, try to locate the AGP aperture at the same
* location in the card and on the bus, though we have to
* align it down.
*/
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
base = dev->agp->base;
/* Check if valid */
if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
dev->agp->base);
base = 0;
}
}
#endif
/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
if (base == 0) {
base = dev_priv->fb_location + dev_priv->fb_size;
if (base < dev_priv->fb_location ||
((base + dev_priv->gart_size) & 0xfffffffful) < base)
base = dev_priv->fb_location
- dev_priv->gart_size;
}
dev_priv->gart_vm_start = base & 0xffc00000u;
if (dev_priv->gart_vm_start != base)
DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
base, dev_priv->gart_vm_start);
} else {
DRM_INFO("Setting GART location based on old memory map\n");
dev_priv->gart_vm_start = dev_priv->fb_location +
RADEON_READ(RADEON_CONFIG_APER_SIZE);
}
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP)
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
- dev->agp->base
+ dev_priv->gart_vm_start);
else
#endif
dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
- (unsigned long)dev->sg->virtual
+ dev_priv->gart_vm_start);
DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
DRM_DEBUG("dev_priv->gart_vm_start 0x%x\n", dev_priv->gart_vm_start);
DRM_DEBUG("dev_priv->gart_buffers_offset 0x%lx\n",
dev_priv->gart_buffers_offset);
dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
+ init->ring_size / sizeof(u32));
dev_priv->ring.size = init->ring_size;
dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
dev_priv->ring.rptr_update_l2qw = order_base_2( /* init->rptr_update */ 4096 / 8);
dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
dev_priv->ring.fetch_size_l2ow = order_base_2( /* init->fetch_size */ 32 / 16);
dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
/* Turn off PCI GART */
radeon_set_pcigart(dev_priv, 0);
} else
#endif
{
u32 sctrl;
int ret;
dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
/* if we have an offset set from userspace */
if (dev_priv->pcigart_offset_set) {
dev_priv->gart_info.bus_addr =
(resource_size_t)dev_priv->pcigart_offset + dev_priv->fb_location;
dev_priv->gart_info.mapping.offset =
dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
dev_priv->gart_info.mapping.size =
dev_priv->gart_info.table_size;
drm_legacy_ioremap_wc(&dev_priv->gart_info.mapping, dev);
dev_priv->gart_info.addr =
dev_priv->gart_info.mapping.handle;
if (dev_priv->flags & RADEON_IS_PCIE)
dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCIE;
else
dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI;
dev_priv->gart_info.gart_table_location =
DRM_ATI_GART_FB;
DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
dev_priv->gart_info.addr,
dev_priv->pcigart_offset);
} else {
if (dev_priv->flags & RADEON_IS_IGPGART)
dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_IGP;
else
dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCI;
dev_priv->gart_info.gart_table_location =
DRM_ATI_GART_MAIN;
dev_priv->gart_info.addr = NULL;
dev_priv->gart_info.bus_addr = 0;
if (dev_priv->flags & RADEON_IS_PCIE) {
DRM_ERROR
("Cannot use PCI Express without GART in FB memory\n");
radeon_do_cleanup_cp(dev);
return -EINVAL;
}
}
sctrl = RADEON_READ(RADEON_SURFACE_CNTL);
RADEON_WRITE(RADEON_SURFACE_CNTL, 0);
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
ret = r600_page_table_init(dev);
else
ret = drm_ati_pcigart_init(dev, &dev_priv->gart_info);
RADEON_WRITE(RADEON_SURFACE_CNTL, sctrl);
if (!ret) {
DRM_ERROR("failed to init PCI GART!\n");
radeon_do_cleanup_cp(dev);
return -ENOMEM;
}
ret = radeon_setup_pcigart_surface(dev_priv);
if (ret) {
DRM_ERROR("failed to setup GART surface!\n");
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
r600_page_table_cleanup(dev, &dev_priv->gart_info);
else
drm_ati_pcigart_cleanup(dev, &dev_priv->gart_info);
radeon_do_cleanup_cp(dev);
return ret;
}
/* Turn on PCI GART */
radeon_set_pcigart(dev_priv, 1);
}
if (!dev_priv->me_fw) {
int err = radeon_cp_init_microcode(dev_priv);
if (err) {
DRM_ERROR("Failed to load firmware!\n");
radeon_do_cleanup_cp(dev);
return err;
}
}
radeon_cp_load_microcode(dev_priv);
radeon_cp_init_ring_buffer(dev, dev_priv, file_priv);
dev_priv->last_buf = 0;
radeon_do_engine_reset(dev);
radeon_test_writeback(dev_priv);
return 0;
}
static int radeon_do_cleanup_cp(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
/* Make sure interrupts are disabled here because the uninstall ioctl
* may not have been called from userspace and after dev_private
* is freed, it's too late.
*/
if (dev->irq_enabled)
drm_irq_uninstall(dev);
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
if (dev_priv->cp_ring != NULL) {
drm_legacy_ioremapfree(dev_priv->cp_ring, dev);
dev_priv->cp_ring = NULL;
}
if (dev_priv->ring_rptr != NULL) {
drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
dev_priv->ring_rptr = NULL;
}
if (dev->agp_buffer_map != NULL) {
drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
dev->agp_buffer_map = NULL;
}
} else
#endif
{
if (dev_priv->gart_info.bus_addr) {
/* Turn off PCI GART */
radeon_set_pcigart(dev_priv, 0);
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600)
r600_page_table_cleanup(dev, &dev_priv->gart_info);
else {
if (!drm_ati_pcigart_cleanup(dev, &dev_priv->gart_info))
DRM_ERROR("failed to cleanup PCI GART!\n");
}
}
if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB)
{
drm_legacy_ioremapfree(&dev_priv->gart_info.mapping, dev);
dev_priv->gart_info.addr = NULL;
}
}
/* only clear to the start of flags */
memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
return 0;
}
/* This code will reinit the Radeon CP hardware after a resume from disc.
* AFAIK, it would be very difficult to pickle the state at suspend time, so
* here we make sure that all Radeon hardware initialisation is re-done without
* affecting running applications.
*
* Charl P. Botha <http://cpbotha.net>
*/
static int radeon_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if (!dev_priv) {
DRM_ERROR("Called with no initialization\n");
return -EINVAL;
}
DRM_DEBUG("Starting radeon_do_resume_cp()\n");
#if IS_ENABLED(CONFIG_AGP)
if (dev_priv->flags & RADEON_IS_AGP) {
/* Turn off PCI GART */
radeon_set_pcigart(dev_priv, 0);
} else
#endif
{
/* Turn on PCI GART */
radeon_set_pcigart(dev_priv, 1);
}
radeon_cp_load_microcode(dev_priv);
radeon_cp_init_ring_buffer(dev, dev_priv, file_priv);
dev_priv->have_z_offset = 0;
radeon_do_engine_reset(dev);
radeon_irq_set_state(dev, RADEON_SW_INT_ENABLE, 1);
DRM_DEBUG("radeon_do_resume_cp() complete\n");
return 0;
}
int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_init_t *init = data;
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (init->func == RADEON_INIT_R300_CP)
r300_init_reg_flags(dev);
switch (init->func) {
case RADEON_INIT_CP:
case RADEON_INIT_R200_CP:
case RADEON_INIT_R300_CP:
return radeon_do_init_cp(dev, init, file_priv);
case RADEON_INIT_R600_CP:
return r600_do_init_cp(dev, init, file_priv);
case RADEON_CLEANUP_CP:
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return r600_do_cleanup_cp(dev);
else
return radeon_do_cleanup_cp(dev);
}
return -EINVAL;
}
int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (dev_priv->cp_running) {
DRM_DEBUG("while CP running\n");
return 0;
}
if (dev_priv->cp_mode == RADEON_CSQ_PRIDIS_INDDIS) {
DRM_DEBUG("called with bogus CP mode (%d)\n",
dev_priv->cp_mode);
return 0;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_do_cp_start(dev_priv);
else
radeon_do_cp_start(dev_priv);
return 0;
}
/* Stop the CP. The engine must have been idled before calling this
* routine.
*/
int radeon_cp_stop(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_cp_stop_t *stop = data;
int ret;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (!dev_priv->cp_running)
return 0;
/* Flush any pending CP commands. This ensures any outstanding
* commands are exectuted by the engine before we turn it off.
*/
if (stop->flush) {
radeon_do_cp_flush(dev_priv);
}
/* If we fail to make the engine go idle, we return an error
* code so that the DRM ioctl wrapper can try again.
*/
if (stop->idle) {
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
ret = r600_do_cp_idle(dev_priv);
else
ret = radeon_do_cp_idle(dev_priv);
if (ret)
return ret;
}
/* Finally, we can turn off the CP. If the engine isn't idle,
* we will get some dropped triangles as they won't be fully
* rendered before the CP is shut down.
*/
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_do_cp_stop(dev_priv);
else
radeon_do_cp_stop(dev_priv);
/* Reset the engine */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_do_engine_reset(dev);
else
radeon_do_engine_reset(dev);
return 0;
}
void radeon_do_release(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
int i, ret;
if (dev_priv) {
if (dev_priv->cp_running) {
/* Stop the cp */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
while ((ret = r600_do_cp_idle(dev_priv)) != 0) {
DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
#ifdef __linux__
schedule();
#else
tsleep(&ret, PZERO, "rdnrel", 1);
#endif
}
} else {
while ((ret = radeon_do_cp_idle(dev_priv)) != 0) {
DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
#ifdef __linux__
schedule();
#else
tsleep(&ret, PZERO, "rdnrel", 1);
#endif
}
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
r600_do_cp_stop(dev_priv);
r600_do_engine_reset(dev);
} else {
radeon_do_cp_stop(dev_priv);
radeon_do_engine_reset(dev);
}
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_R600) {
/* Disable *all* interrupts */
if (dev_priv->mmio) /* remove this after permanent addmaps */
RADEON_WRITE(RADEON_GEN_INT_CNTL, 0);
if (dev_priv->mmio) { /* remove all surfaces */
for (i = 0; i < RADEON_MAX_SURFACES; i++) {
RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * i, 0);
RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND +
16 * i, 0);
RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND +
16 * i, 0);
}
}
}
/* Free memory heap structures */
radeon_mem_takedown(&(dev_priv->gart_heap));
radeon_mem_takedown(&(dev_priv->fb_heap));
/* deallocate kernel resources */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_do_cleanup_cp(dev);
else
radeon_do_cleanup_cp(dev);
release_firmware(dev_priv->me_fw);
dev_priv->me_fw = NULL;
release_firmware(dev_priv->pfp_fw);
dev_priv->pfp_fw = NULL;
}
}
/* Just reset the CP ring. Called as part of an X Server engine reset.
*/
int radeon_cp_reset(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (!dev_priv) {
DRM_DEBUG("called before init done\n");
return -EINVAL;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_do_cp_reset(dev_priv);
else
radeon_do_cp_reset(dev_priv);
/* The CP is no longer running after an engine reset */
dev_priv->cp_running = 0;
return 0;
}
int radeon_cp_idle(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return r600_do_cp_idle(dev_priv);
else
return radeon_do_cp_idle(dev_priv);
}
/* Added by Charl P. Botha to call radeon_do_resume_cp().
*/
int radeon_cp_resume(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return r600_do_resume_cp(dev, file_priv);
else
return radeon_do_resume_cp(dev, file_priv);
}
int radeon_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return r600_do_engine_reset(dev);
else
return radeon_do_engine_reset(dev);
}
/* ================================================================
* Fullscreen mode
*/
/* KW: Deprecated to say the least:
*/
int radeon_fullscreen(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
return 0;
}
/* ================================================================
* Freelist management
*/
/* Original comment: FIXME: ROTATE_BUFS is a hack to cycle through
* bufs until freelist code is used. Note this hides a problem with
* the scratch register * (used to keep track of last buffer
* completed) being written to before * the last buffer has actually
* completed rendering.
*
* KW: It's also a good way to find free buffers quickly.
*
* KW: Ideally this loop wouldn't exist, and freelist_get wouldn't
* sleep. However, bugs in older versions of radeon_accel.c mean that
* we essentially have to do this, else old clients will break.
*
* However, it does leave open a potential deadlock where all the
* buffers are held by other clients, which can't release them because
* they can't get the lock.
*/
struct drm_buf *radeon_freelist_get(struct drm_device * dev)
{
struct drm_device_dma *dma = dev->dma;
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_buf_priv_t *buf_priv;
struct drm_buf *buf;
int i, t;
int start;
if (++dev_priv->last_buf >= dma->buf_count)
dev_priv->last_buf = 0;
start = dev_priv->last_buf;
for (t = 0; t < dev_priv->usec_timeout; t++) {
u32 done_age = GET_SCRATCH(dev_priv, 1);
DRM_DEBUG("done_age = %d\n", done_age);
for (i = 0; i < dma->buf_count; i++) {
buf = dma->buflist[start];
buf_priv = buf->dev_private;
if (buf->file_priv == NULL || (buf->pending &&
buf_priv->age <=
done_age)) {
dev_priv->stats.requested_bufs++;
buf->pending = 0;
return buf;
}
if (++start >= dma->buf_count)
start = 0;
}
if (t) {
DRM_UDELAY(1);
dev_priv->stats.freelist_loops++;
}
}
return NULL;
}
void radeon_freelist_reset(struct drm_device * dev)
{
struct drm_device_dma *dma = dev->dma;
drm_radeon_private_t *dev_priv = dev->dev_private;
int i;
dev_priv->last_buf = 0;
for (i = 0; i < dma->buf_count; i++) {
struct drm_buf *buf = dma->buflist[i];
drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
buf_priv->age = 0;
}
}
/* ================================================================
* CP command submission
*/
int radeon_wait_ring(drm_radeon_private_t * dev_priv, int n)
{
drm_radeon_ring_buffer_t *ring = &dev_priv->ring;
int i;
u32 last_head = GET_RING_HEAD(dev_priv);
for (i = 0; i < dev_priv->usec_timeout; i++) {
u32 head = GET_RING_HEAD(dev_priv);
ring->space = (head - ring->tail) * sizeof(u32);
if (ring->space <= 0)
ring->space += ring->size;
if (ring->space > n)
return 0;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
if (head != last_head)
i = 0;
last_head = head;
DRM_UDELAY(1);
}
/* FIXME: This return value is ignored in the BEGIN_RING macro! */
#if RADEON_FIFO_DEBUG
radeon_status(dev_priv);
DRM_ERROR("failed!\n");
#endif
return -EBUSY;
}
static int radeon_cp_get_buffers(struct drm_device *dev,
struct drm_file *file_priv,
struct drm_dma * d)
{
int i;
struct drm_buf *buf;
for (i = d->granted_count; i < d->request_count; i++) {
buf = radeon_freelist_get(dev);
if (!buf)
return -EBUSY; /* NOTE: broken client */
buf->file_priv = file_priv;
if (copy_to_user(&d->request_indices[i], &buf->idx,
sizeof(buf->idx)))
return -EFAULT;
if (copy_to_user(&d->request_sizes[i], &buf->total,
sizeof(buf->total)))
return -EFAULT;
d->granted_count++;
}
return 0;
}
int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
struct drm_device_dma *dma = dev->dma;
int ret = 0;
struct drm_dma *d = data;
LOCK_TEST_WITH_RETURN(dev, file_priv);
/* Please don't send us buffers.
*/
if (d->send_count != 0) {
DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
DRM_CURRENTPID, d->send_count);
return -EINVAL;
}
/* We'll send you buffers.
*/
if (d->request_count < 0 || d->request_count > dma->buf_count) {
DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
DRM_CURRENTPID, d->request_count, dma->buf_count);
return -EINVAL;
}
d->granted_count = 0;
if (d->request_count) {
ret = radeon_cp_get_buffers(dev, file_priv, d);
}
return ret;
}
int radeon_driver_load(struct drm_device *dev, unsigned long flags)
{
drm_radeon_private_t *dev_priv;
int ret = 0;
dev_priv = kzalloc(sizeof(drm_radeon_private_t), GFP_KERNEL);
if (dev_priv == NULL)
return -ENOMEM;
dev->dev_private = (void *)dev_priv;
dev_priv->flags = flags;
switch (flags & RADEON_FAMILY_MASK) {
case CHIP_R100:
case CHIP_RV200:
case CHIP_R200:
case CHIP_R300:
case CHIP_R350:
case CHIP_R420:
case CHIP_R423:
case CHIP_RV410:
case CHIP_RV515:
case CHIP_R520:
case CHIP_RV570:
case CHIP_R580:
dev_priv->flags |= RADEON_HAS_HIERZ;
break;
default:
/* all other chips have no hierarchical z buffer */
break;
}
pci_set_master(dev->pdev);
if (drm_pci_device_is_agp(dev))
dev_priv->flags |= RADEON_IS_AGP;
else if (pci_is_pcie(dev->pdev))
dev_priv->flags |= RADEON_IS_PCIE;
else
dev_priv->flags |= RADEON_IS_PCI;
ret = drm_legacy_addmap(dev, pci_resource_start(dev->pdev, 2),
pci_resource_len(dev->pdev, 2), _DRM_REGISTERS,
_DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio);
if (ret != 0)
return ret;
ret = drm_vblank_init(dev, 2);
if (ret) {
radeon_driver_unload(dev);
return ret;
}
DRM_DEBUG("%s card detected\n",
((dev_priv->flags & RADEON_IS_AGP) ? "AGP" : (((dev_priv->flags & RADEON_IS_PCIE) ? "PCIE" : "PCI"))));
return ret;
}
int radeon_master_create(struct drm_device *dev, struct drm_master *master)
{
struct drm_radeon_master_private *master_priv;
unsigned long sareapage;
int ret;
master_priv = kzalloc(sizeof(*master_priv), GFP_KERNEL);
if (!master_priv)
return -ENOMEM;
/* prebuild the SAREA */
sareapage = max_t(unsigned long, SAREA_MAX, PAGE_SIZE);
ret = drm_legacy_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK,
&master_priv->sarea);
if (ret) {
DRM_ERROR("SAREA setup failed\n");
kfree(master_priv);
return ret;
}
master_priv->sarea_priv = master_priv->sarea->handle + sizeof(struct drm_sarea);
master_priv->sarea_priv->pfCurrentPage = 0;
master->driver_priv = master_priv;
return 0;
}
void radeon_master_destroy(struct drm_device *dev, struct drm_master *master)
{
struct drm_radeon_master_private *master_priv = master->driver_priv;
if (!master_priv)
return;
if (master_priv->sarea_priv &&
master_priv->sarea_priv->pfCurrentPage != 0)
radeon_cp_dispatch_flip(dev, master);
master_priv->sarea_priv = NULL;
if (master_priv->sarea)
drm_legacy_rmmap_locked(dev, master_priv->sarea);
kfree(master_priv);
master->driver_priv = NULL;
}
/* Create mappings for registers and framebuffer so userland doesn't necessarily
* have to find them.
*/
int radeon_driver_firstopen(struct drm_device *dev)
{
int ret;
drm_local_map_t *map;
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
dev_priv->fb_aper_offset = pci_resource_start(dev->pdev, 0);
ret = drm_legacy_addmap(dev, dev_priv->fb_aper_offset,
pci_resource_len(dev->pdev, 0),
_DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING, &map);
if (ret != 0)
return ret;
return 0;
}
int radeon_driver_unload(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
drm_legacy_rmmap(dev, dev_priv->mmio);
kfree(dev_priv);
dev->dev_private = NULL;
return 0;
}
void radeon_commit_ring(drm_radeon_private_t *dev_priv)
{
int i;
u32 *ring;
int tail_aligned;
/* check if the ring is padded out to 16-dword alignment */
tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1);
if (tail_aligned) {
int num_p2 = RADEON_RING_ALIGN - tail_aligned;
ring = dev_priv->ring.start;
/* pad with some CP_PACKET2 */
for (i = 0; i < num_p2; i++)
ring[dev_priv->ring.tail + i] = CP_PACKET2();
dev_priv->ring.tail += i;
dev_priv->ring.space -= num_p2 * sizeof(u32);
}
dev_priv->ring.tail &= dev_priv->ring.tail_mask;
mb();
GET_RING_HEAD( dev_priv );
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
RADEON_WRITE(R600_CP_RB_WPTR, dev_priv->ring.tail);
/* read from PCI bus to ensure correct posting */
RADEON_READ(R600_CP_RB_RPTR);
} else {
RADEON_WRITE(RADEON_CP_RB_WPTR, dev_priv->ring.tail);
/* read from PCI bus to ensure correct posting */
RADEON_READ(RADEON_CP_RB_RPTR);
}
}
...@@ -291,88 +291,6 @@ static struct pci_device_id pciidlist[] = { ...@@ -291,88 +291,6 @@ static struct pci_device_id pciidlist[] = {
MODULE_DEVICE_TABLE(pci, pciidlist); MODULE_DEVICE_TABLE(pci, pciidlist);
#ifdef CONFIG_DRM_RADEON_UMS
static int radeon_suspend(struct drm_device *dev, pm_message_t state)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return 0;
/* Disable *all* interrupts */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600)
RADEON_WRITE(R500_DxMODE_INT_MASK, 0);
RADEON_WRITE(RADEON_GEN_INT_CNTL, 0);
return 0;
}
static int radeon_resume(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return 0;
/* Restore interrupt registers */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600)
RADEON_WRITE(R500_DxMODE_INT_MASK, dev_priv->r500_disp_irq_reg);
RADEON_WRITE(RADEON_GEN_INT_CNTL, dev_priv->irq_enable_reg);
return 0;
}
static const struct file_operations radeon_driver_old_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.mmap = drm_legacy_mmap,
.poll = drm_poll,
.read = drm_read,
#ifdef CONFIG_COMPAT
.compat_ioctl = radeon_compat_ioctl,
#endif
.llseek = noop_llseek,
};
static struct drm_driver driver_old = {
.driver_features =
DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG |
DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED,
.dev_priv_size = sizeof(drm_radeon_buf_priv_t),
.load = radeon_driver_load,
.firstopen = radeon_driver_firstopen,
.open = radeon_driver_open,
.preclose = radeon_driver_preclose,
.postclose = radeon_driver_postclose,
.lastclose = radeon_driver_lastclose,
.set_busid = drm_pci_set_busid,
.unload = radeon_driver_unload,
.suspend = radeon_suspend,
.resume = radeon_resume,
.get_vblank_counter = radeon_get_vblank_counter,
.enable_vblank = radeon_enable_vblank,
.disable_vblank = radeon_disable_vblank,
.master_create = radeon_master_create,
.master_destroy = radeon_master_destroy,
.irq_preinstall = radeon_driver_irq_preinstall,
.irq_postinstall = radeon_driver_irq_postinstall,
.irq_uninstall = radeon_driver_irq_uninstall,
.irq_handler = radeon_driver_irq_handler,
.ioctls = radeon_ioctls,
.dma_ioctl = radeon_cp_buffers,
.fops = &radeon_driver_old_fops,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
.major = DRIVER_MAJOR,
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
};
#endif
static struct drm_driver kms_driver; static struct drm_driver kms_driver;
static int radeon_kick_out_firmware_fb(struct pci_dev *pdev) static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
...@@ -619,13 +537,6 @@ static struct drm_driver kms_driver = { ...@@ -619,13 +537,6 @@ static struct drm_driver kms_driver = {
static struct drm_driver *driver; static struct drm_driver *driver;
static struct pci_driver *pdriver; static struct pci_driver *pdriver;
#ifdef CONFIG_DRM_RADEON_UMS
static struct pci_driver radeon_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
};
#endif
static struct pci_driver radeon_kms_pci_driver = { static struct pci_driver radeon_kms_pci_driver = {
.name = DRIVER_NAME, .name = DRIVER_NAME,
.id_table = pciidlist, .id_table = pciidlist,
...@@ -655,16 +566,8 @@ static int __init radeon_init(void) ...@@ -655,16 +566,8 @@ static int __init radeon_init(void)
radeon_register_atpx_handler(); radeon_register_atpx_handler();
} else { } else {
#ifdef CONFIG_DRM_RADEON_UMS
DRM_INFO("radeon userspace modesetting enabled.\n");
driver = &driver_old;
pdriver = &radeon_pci_driver;
driver->driver_features &= ~DRIVER_MODESET;
driver->num_ioctls = radeon_max_ioctl;
#else
DRM_ERROR("No UMS support in radeon module!\n"); DRM_ERROR("No UMS support in radeon module!\n");
return -EINVAL; return -EINVAL;
#endif
} }
radeon_kfd_init(); radeon_kfd_init();
......
...@@ -119,2052 +119,4 @@ ...@@ -119,2052 +119,4 @@
long radeon_drm_ioctl(struct file *filp, long radeon_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg); unsigned int cmd, unsigned long arg);
/* The rest of the file is DEPRECATED! */
#ifdef CONFIG_DRM_RADEON_UMS
enum radeon_cp_microcode_version {
UCODE_R100,
UCODE_R200,
UCODE_R300,
};
typedef struct drm_radeon_freelist {
unsigned int age;
struct drm_buf *buf;
struct drm_radeon_freelist *next;
struct drm_radeon_freelist *prev;
} drm_radeon_freelist_t;
typedef struct drm_radeon_ring_buffer {
u32 *start;
u32 *end;
int size;
int size_l2qw;
int rptr_update; /* Double Words */
int rptr_update_l2qw; /* log2 Quad Words */
int fetch_size; /* Double Words */
int fetch_size_l2ow; /* log2 Oct Words */
u32 tail;
u32 tail_mask;
int space;
int high_mark;
} drm_radeon_ring_buffer_t;
typedef struct drm_radeon_depth_clear_t {
u32 rb3d_cntl;
u32 rb3d_zstencilcntl;
u32 se_cntl;
} drm_radeon_depth_clear_t;
struct drm_radeon_driver_file_fields {
int64_t radeon_fb_delta;
};
struct mem_block {
struct mem_block *next;
struct mem_block *prev;
int start;
int size;
struct drm_file *file_priv; /* NULL: free, -1: heap, other: real files */
};
struct radeon_surface {
int refcount;
u32 lower;
u32 upper;
u32 flags;
};
struct radeon_virt_surface {
int surface_index;
u32 lower;
u32 upper;
u32 flags;
struct drm_file *file_priv;
#define PCIGART_FILE_PRIV ((void *) -1L)
};
#define RADEON_FLUSH_EMITED (1 << 0)
#define RADEON_PURGE_EMITED (1 << 1)
struct drm_radeon_master_private {
drm_local_map_t *sarea;
drm_radeon_sarea_t *sarea_priv;
};
typedef struct drm_radeon_private {
drm_radeon_ring_buffer_t ring;
u32 fb_location;
u32 fb_size;
int new_memmap;
int gart_size;
u32 gart_vm_start;
unsigned long gart_buffers_offset;
int cp_mode;
int cp_running;
drm_radeon_freelist_t *head;
drm_radeon_freelist_t *tail;
int last_buf;
int writeback_works;
int usec_timeout;
int microcode_version;
struct {
u32 boxes;
int freelist_timeouts;
int freelist_loops;
int requested_bufs;
int last_frame_reads;
int last_clear_reads;
int clears;
int texture_uploads;
} stats;
int do_boxes;
int page_flipping;
u32 color_fmt;
unsigned int front_offset;
unsigned int front_pitch;
unsigned int back_offset;
unsigned int back_pitch;
u32 depth_fmt;
unsigned int depth_offset;
unsigned int depth_pitch;
u32 front_pitch_offset;
u32 back_pitch_offset;
u32 depth_pitch_offset;
drm_radeon_depth_clear_t depth_clear;
unsigned long ring_offset;
unsigned long ring_rptr_offset;
unsigned long buffers_offset;
unsigned long gart_textures_offset;
drm_local_map_t *sarea;
drm_local_map_t *cp_ring;
drm_local_map_t *ring_rptr;
drm_local_map_t *gart_textures;
struct mem_block *gart_heap;
struct mem_block *fb_heap;
/* SW interrupt */
wait_queue_head_t swi_queue;
atomic_t swi_emitted;
int vblank_crtc;
uint32_t irq_enable_reg;
uint32_t r500_disp_irq_reg;
struct radeon_surface surfaces[RADEON_MAX_SURFACES];
struct radeon_virt_surface virt_surfaces[2 * RADEON_MAX_SURFACES];
unsigned long pcigart_offset;
unsigned int pcigart_offset_set;
struct drm_ati_pcigart_info gart_info;
u32 scratch_ages[5];
int have_z_offset;
/* starting from here on, data is preserved across an open */
uint32_t flags; /* see radeon_chip_flags */
resource_size_t fb_aper_offset;
int num_gb_pipes;
int num_z_pipes;
int track_flush;
drm_local_map_t *mmio;
/* r6xx/r7xx pipe/shader config */
int r600_max_pipes;
int r600_max_tile_pipes;
int r600_max_simds;
int r600_max_backends;
int r600_max_gprs;
int r600_max_threads;
int r600_max_stack_entries;
int r600_max_hw_contexts;
int r600_max_gs_threads;
int r600_sx_max_export_size;
int r600_sx_max_export_pos_size;
int r600_sx_max_export_smx_size;
int r600_sq_num_cf_insts;
int r700_sx_num_of_sets;
int r700_sc_prim_fifo_size;
int r700_sc_hiz_tile_fifo_size;
int r700_sc_earlyz_tile_fifo_fize;
int r600_group_size;
int r600_npipes;
int r600_nbanks;
struct mutex cs_mutex;
u32 cs_id_scnt;
u32 cs_id_wcnt;
/* r6xx/r7xx drm blit vertex buffer */
struct drm_buf *blit_vb;
/* firmware */
const struct firmware *me_fw, *pfp_fw;
} drm_radeon_private_t;
typedef struct drm_radeon_buf_priv {
u32 age;
} drm_radeon_buf_priv_t;
struct drm_buffer;
typedef struct drm_radeon_kcmd_buffer {
int bufsz;
struct drm_buffer *buffer;
int nbox;
struct drm_clip_rect __user *boxes;
} drm_radeon_kcmd_buffer_t;
extern int radeon_no_wb;
extern struct drm_ioctl_desc radeon_ioctls[];
extern int radeon_max_ioctl;
extern u32 radeon_get_ring_head(drm_radeon_private_t *dev_priv);
extern void radeon_set_ring_head(drm_radeon_private_t *dev_priv, u32 val);
#define GET_RING_HEAD(dev_priv) radeon_get_ring_head(dev_priv)
#define SET_RING_HEAD(dev_priv, val) radeon_set_ring_head(dev_priv, val)
/* Check whether the given hardware address is inside the framebuffer or the
* GART area.
*/
static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv,
u64 off)
{
u32 fb_start = dev_priv->fb_location;
u32 fb_end = fb_start + dev_priv->fb_size - 1;
u32 gart_start = dev_priv->gart_vm_start;
u32 gart_end = gart_start + dev_priv->gart_size - 1;
return ((off >= fb_start && off <= fb_end) ||
(off >= gart_start && off <= gart_end));
}
/* radeon_state.c */
extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf);
/* radeon_cp.c */
extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_stop(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_reset(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_idle(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_resume(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_fullscreen(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
extern void radeon_freelist_reset(struct drm_device * dev);
extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);
extern int radeon_wait_ring(drm_radeon_private_t * dev_priv, int n);
extern int radeon_do_cp_idle(drm_radeon_private_t * dev_priv);
extern int radeon_driver_preinit(struct drm_device *dev, unsigned long flags);
extern int radeon_presetup(struct drm_device *dev);
extern int radeon_driver_postcleanup(struct drm_device *dev);
extern int radeon_mem_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_mem_free(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_mem_init_heap(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern void radeon_mem_takedown(struct mem_block **heap);
extern void radeon_mem_release(struct drm_file *file_priv,
struct mem_block *heap);
extern void radeon_enable_bm(struct drm_radeon_private *dev_priv);
extern u32 radeon_read_ring_rptr(drm_radeon_private_t *dev_priv, u32 off);
extern void radeon_write_ring_rptr(drm_radeon_private_t *dev_priv, u32 off, u32 val);
/* radeon_irq.c */
extern void radeon_irq_set_state(struct drm_device *dev, u32 mask, int state);
extern int radeon_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern void radeon_do_release(struct drm_device * dev);
extern u32 radeon_get_vblank_counter(struct drm_device *dev, unsigned int pipe);
extern int radeon_enable_vblank(struct drm_device *dev, unsigned int pipe);
extern void radeon_disable_vblank(struct drm_device *dev, unsigned int pipe);
extern irqreturn_t radeon_driver_irq_handler(int irq, void *arg);
extern void radeon_driver_irq_preinstall(struct drm_device * dev);
extern int radeon_driver_irq_postinstall(struct drm_device *dev);
extern void radeon_driver_irq_uninstall(struct drm_device * dev);
extern void radeon_enable_interrupt(struct drm_device *dev);
extern int radeon_vblank_crtc_get(struct drm_device *dev);
extern int radeon_vblank_crtc_set(struct drm_device *dev, int64_t value);
extern int radeon_driver_load(struct drm_device *dev, unsigned long flags);
extern int radeon_driver_unload(struct drm_device *dev);
extern int radeon_driver_firstopen(struct drm_device *dev);
extern void radeon_driver_preclose(struct drm_device *dev,
struct drm_file *file_priv);
extern void radeon_driver_postclose(struct drm_device *dev,
struct drm_file *file_priv);
extern void radeon_driver_lastclose(struct drm_device * dev);
extern int radeon_driver_open(struct drm_device *dev,
struct drm_file *file_priv);
extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
extern int radeon_master_create(struct drm_device *dev, struct drm_master *master);
extern void radeon_master_destroy(struct drm_device *dev, struct drm_master *master);
extern void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master);
/* r300_cmdbuf.c */
extern void r300_init_reg_flags(struct drm_device *dev);
extern int r300_do_cp_cmdbuf(struct drm_device *dev,
struct drm_file *file_priv,
drm_radeon_kcmd_buffer_t *cmdbuf);
/* r600_cp.c */
extern int r600_do_engine_reset(struct drm_device *dev);
extern int r600_do_cleanup_cp(struct drm_device *dev);
extern int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
struct drm_file *file_priv);
extern int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv);
extern int r600_do_cp_idle(drm_radeon_private_t *dev_priv);
extern void r600_do_cp_start(drm_radeon_private_t *dev_priv);
extern void r600_do_cp_reset(drm_radeon_private_t *dev_priv);
extern void r600_do_cp_stop(drm_radeon_private_t *dev_priv);
extern int r600_cp_dispatch_indirect(struct drm_device *dev,
struct drm_buf *buf, int start, int end);
extern int r600_page_table_init(struct drm_device *dev);
extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info);
extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv);
extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv);
extern int r600_cp_dispatch_texture(struct drm_device *dev,
struct drm_file *file_priv,
drm_radeon_texture_t *tex,
drm_radeon_tex_image_t *image);
/* r600_blit.c */
extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv);
extern void r600_done_blit_copy(struct drm_device *dev);
extern void r600_blit_copy(struct drm_device *dev,
uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
int size_bytes);
extern void r600_blit_swap(struct drm_device *dev,
uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
int sx, int sy, int dx, int dy,
int w, int h, int src_pitch, int dst_pitch, int cpp);
/* Flags for stats.boxes
*/
#define RADEON_BOX_DMA_IDLE 0x1
#define RADEON_BOX_RING_FULL 0x2
#define RADEON_BOX_FLIP 0x4
#define RADEON_BOX_WAIT_IDLE 0x8
#define RADEON_BOX_TEXTURE_LOAD 0x10
/* Register definitions, register access macros and drmAddMap constants
* for Radeon kernel driver.
*/
#define RADEON_MM_INDEX 0x0000
#define RADEON_MM_DATA 0x0004
#define RADEON_AGP_COMMAND 0x0f60
#define RADEON_AGP_COMMAND_PCI_CONFIG 0x0060 /* offset in PCI config */
# define RADEON_AGP_ENABLE (1<<8)
#define RADEON_AUX_SCISSOR_CNTL 0x26f0
# define RADEON_EXCLUSIVE_SCISSOR_0 (1 << 24)
# define RADEON_EXCLUSIVE_SCISSOR_1 (1 << 25)
# define RADEON_EXCLUSIVE_SCISSOR_2 (1 << 26)
# define RADEON_SCISSOR_0_ENABLE (1 << 28)
# define RADEON_SCISSOR_1_ENABLE (1 << 29)
# define RADEON_SCISSOR_2_ENABLE (1 << 30)
/*
* PCIE radeons (rv370/rv380, rv410, r423/r430/r480, r5xx)
* don't have an explicit bus mastering disable bit. It's handled
* by the PCI D-states. PMI_BM_DIS disables D-state bus master
* handling, not bus mastering itself.
*/
#define RADEON_BUS_CNTL 0x0030
/* r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
# define RADEON_BUS_MASTER_DIS (1 << 6)
/* rs600/rs690/rs740 */
# define RS600_BUS_MASTER_DIS (1 << 14)
# define RS600_MSI_REARM (1 << 20)
/* see RS400_MSI_REARM in AIC_CNTL for rs480 */
#define RADEON_BUS_CNTL1 0x0034
# define RADEON_PMI_BM_DIS (1 << 2)
# define RADEON_PMI_INT_DIS (1 << 3)
#define RV370_BUS_CNTL 0x004c
# define RV370_PMI_BM_DIS (1 << 5)
# define RV370_PMI_INT_DIS (1 << 6)
#define RADEON_MSI_REARM_EN 0x0160
/* rv370/rv380, rv410, r423/r430/r480, r5xx */
# define RV370_MSI_REARM_EN (1 << 0)
#define RADEON_CLOCK_CNTL_DATA 0x000c
# define RADEON_PLL_WR_EN (1 << 7)
#define RADEON_CLOCK_CNTL_INDEX 0x0008
#define RADEON_CONFIG_APER_SIZE 0x0108
#define RADEON_CONFIG_MEMSIZE 0x00f8
#define RADEON_CRTC_OFFSET 0x0224
#define RADEON_CRTC_OFFSET_CNTL 0x0228
# define RADEON_CRTC_TILE_EN (1 << 15)
# define RADEON_CRTC_OFFSET_FLIP_CNTL (1 << 16)
#define RADEON_CRTC2_OFFSET 0x0324
#define RADEON_CRTC2_OFFSET_CNTL 0x0328
#define RADEON_PCIE_INDEX 0x0030
#define RADEON_PCIE_DATA 0x0034
#define RADEON_PCIE_TX_GART_CNTL 0x10
# define RADEON_PCIE_TX_GART_EN (1 << 0)
# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_PASS_THRU (0 << 1)
# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_CLAMP_LO (1 << 1)
# define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD (3 << 1)
# define RADEON_PCIE_TX_GART_MODE_32_128_CACHE (0 << 3)
# define RADEON_PCIE_TX_GART_MODE_8_4_128_CACHE (1 << 3)
# define RADEON_PCIE_TX_GART_CHK_RW_VALID_EN (1 << 5)
# define RADEON_PCIE_TX_GART_INVALIDATE_TLB (1 << 8)
#define RADEON_PCIE_TX_DISCARD_RD_ADDR_LO 0x11
#define RADEON_PCIE_TX_DISCARD_RD_ADDR_HI 0x12
#define RADEON_PCIE_TX_GART_BASE 0x13
#define RADEON_PCIE_TX_GART_START_LO 0x14
#define RADEON_PCIE_TX_GART_START_HI 0x15
#define RADEON_PCIE_TX_GART_END_LO 0x16
#define RADEON_PCIE_TX_GART_END_HI 0x17
#define RS480_NB_MC_INDEX 0x168
# define RS480_NB_MC_IND_WR_EN (1 << 8)
#define RS480_NB_MC_DATA 0x16c
#define RS690_MC_INDEX 0x78
# define RS690_MC_INDEX_MASK 0x1ff
# define RS690_MC_INDEX_WR_EN (1 << 9)
# define RS690_MC_INDEX_WR_ACK 0x7f
#define RS690_MC_DATA 0x7c
/* MC indirect registers */
#define RS480_MC_MISC_CNTL 0x18
# define RS480_DISABLE_GTW (1 << 1)
/* switch between MCIND GART and MM GART registers. 0 = mmgart, 1 = mcind gart */
# define RS480_GART_INDEX_REG_EN (1 << 12)
# define RS690_BLOCK_GFX_D3_EN (1 << 14)
#define RS480_K8_FB_LOCATION 0x1e
#define RS480_GART_FEATURE_ID 0x2b
# define RS480_HANG_EN (1 << 11)
# define RS480_TLB_ENABLE (1 << 18)
# define RS480_P2P_ENABLE (1 << 19)
# define RS480_GTW_LAC_EN (1 << 25)
# define RS480_2LEVEL_GART (0 << 30)
# define RS480_1LEVEL_GART (1 << 30)
# define RS480_PDC_EN (1 << 31)
#define RS480_GART_BASE 0x2c
#define RS480_GART_CACHE_CNTRL 0x2e
# define RS480_GART_CACHE_INVALIDATE (1 << 0) /* wait for it to clear */
#define RS480_AGP_ADDRESS_SPACE_SIZE 0x38
# define RS480_GART_EN (1 << 0)
# define RS480_VA_SIZE_32MB (0 << 1)
# define RS480_VA_SIZE_64MB (1 << 1)
# define RS480_VA_SIZE_128MB (2 << 1)
# define RS480_VA_SIZE_256MB (3 << 1)
# define RS480_VA_SIZE_512MB (4 << 1)
# define RS480_VA_SIZE_1GB (5 << 1)
# define RS480_VA_SIZE_2GB (6 << 1)
#define RS480_AGP_MODE_CNTL 0x39
# define RS480_POST_GART_Q_SIZE (1 << 18)
# define RS480_NONGART_SNOOP (1 << 19)
# define RS480_AGP_RD_BUF_SIZE (1 << 20)
# define RS480_REQ_TYPE_SNOOP_SHIFT 22
# define RS480_REQ_TYPE_SNOOP_MASK 0x3
# define RS480_REQ_TYPE_SNOOP_DIS (1 << 24)
#define RS480_MC_MISC_UMA_CNTL 0x5f
#define RS480_MC_MCLK_CNTL 0x7a
#define RS480_MC_UMA_DUALCH_CNTL 0x86
#define RS690_MC_FB_LOCATION 0x100
#define RS690_MC_AGP_LOCATION 0x101
#define RS690_MC_AGP_BASE 0x102
#define RS690_MC_AGP_BASE_2 0x103
#define RS600_MC_INDEX 0x70
# define RS600_MC_ADDR_MASK 0xffff
# define RS600_MC_IND_SEQ_RBS_0 (1 << 16)
# define RS600_MC_IND_SEQ_RBS_1 (1 << 17)
# define RS600_MC_IND_SEQ_RBS_2 (1 << 18)
# define RS600_MC_IND_SEQ_RBS_3 (1 << 19)
# define RS600_MC_IND_AIC_RBS (1 << 20)
# define RS600_MC_IND_CITF_ARB0 (1 << 21)
# define RS600_MC_IND_CITF_ARB1 (1 << 22)
# define RS600_MC_IND_WR_EN (1 << 23)
#define RS600_MC_DATA 0x74
#define RS600_MC_STATUS 0x0
# define RS600_MC_IDLE (1 << 1)
#define RS600_MC_FB_LOCATION 0x4
#define RS600_MC_AGP_LOCATION 0x5
#define RS600_AGP_BASE 0x6
#define RS600_AGP_BASE_2 0x7
#define RS600_MC_CNTL1 0x9
# define RS600_ENABLE_PAGE_TABLES (1 << 26)
#define RS600_MC_PT0_CNTL 0x100
# define RS600_ENABLE_PT (1 << 0)
# define RS600_EFFECTIVE_L2_CACHE_SIZE(x) ((x) << 15)
# define RS600_EFFECTIVE_L2_QUEUE_SIZE(x) ((x) << 21)
# define RS600_INVALIDATE_ALL_L1_TLBS (1 << 28)
# define RS600_INVALIDATE_L2_CACHE (1 << 29)
#define RS600_MC_PT0_CONTEXT0_CNTL 0x102
# define RS600_ENABLE_PAGE_TABLE (1 << 0)
# define RS600_PAGE_TABLE_TYPE_FLAT (0 << 1)
#define RS600_MC_PT0_SYSTEM_APERTURE_LOW_ADDR 0x112
#define RS600_MC_PT0_SYSTEM_APERTURE_HIGH_ADDR 0x114
#define RS600_MC_PT0_CONTEXT0_DEFAULT_READ_ADDR 0x11c
#define RS600_MC_PT0_CONTEXT0_FLAT_BASE_ADDR 0x12c
#define RS600_MC_PT0_CONTEXT0_FLAT_START_ADDR 0x13c
#define RS600_MC_PT0_CONTEXT0_FLAT_END_ADDR 0x14c
#define RS600_MC_PT0_CLIENT0_CNTL 0x16c
# define RS600_ENABLE_TRANSLATION_MODE_OVERRIDE (1 << 0)
# define RS600_TRANSLATION_MODE_OVERRIDE (1 << 1)
# define RS600_SYSTEM_ACCESS_MODE_MASK (3 << 8)
# define RS600_SYSTEM_ACCESS_MODE_PA_ONLY (0 << 8)
# define RS600_SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 8)
# define RS600_SYSTEM_ACCESS_MODE_IN_SYS (2 << 8)
# define RS600_SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 8)
# define RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASSTHROUGH (0 << 10)
# define RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 10)
# define RS600_EFFECTIVE_L1_CACHE_SIZE(x) ((x) << 11)
# define RS600_ENABLE_FRAGMENT_PROCESSING (1 << 14)
# define RS600_EFFECTIVE_L1_QUEUE_SIZE(x) ((x) << 15)
# define RS600_INVALIDATE_L1_TLB (1 << 20)
#define R520_MC_IND_INDEX 0x70
#define R520_MC_IND_WR_EN (1 << 24)
#define R520_MC_IND_DATA 0x74
#define RV515_MC_FB_LOCATION 0x01
#define RV515_MC_AGP_LOCATION 0x02
#define RV515_MC_AGP_BASE 0x03
#define RV515_MC_AGP_BASE_2 0x04
#define R520_MC_FB_LOCATION 0x04
#define R520_MC_AGP_LOCATION 0x05
#define R520_MC_AGP_BASE 0x06
#define R520_MC_AGP_BASE_2 0x07
#define RADEON_MPP_TB_CONFIG 0x01c0
#define RADEON_MEM_CNTL 0x0140
#define RADEON_MEM_SDRAM_MODE_REG 0x0158
#define RADEON_AGP_BASE_2 0x015c /* r200+ only */
#define RS480_AGP_BASE_2 0x0164
#define RADEON_AGP_BASE 0x0170
/* pipe config regs */
#define R400_GB_PIPE_SELECT 0x402c
#define RV530_GB_PIPE_SELECT2 0x4124
#define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */
#define R300_GB_TILE_CONFIG 0x4018
# define R300_ENABLE_TILING (1 << 0)
# define R300_PIPE_COUNT_RV350 (0 << 1)
# define R300_PIPE_COUNT_R300 (3 << 1)
# define R300_PIPE_COUNT_R420_3P (6 << 1)
# define R300_PIPE_COUNT_R420 (7 << 1)
# define R300_TILE_SIZE_8 (0 << 4)
# define R300_TILE_SIZE_16 (1 << 4)
# define R300_TILE_SIZE_32 (2 << 4)
# define R300_SUBPIXEL_1_12 (0 << 16)
# define R300_SUBPIXEL_1_16 (1 << 16)
#define R300_DST_PIPE_CONFIG 0x170c
# define R300_PIPE_AUTO_CONFIG (1 << 31)
#define R300_RB2D_DSTCACHE_MODE 0x3428
# define R300_DC_AUTOFLUSH_ENABLE (1 << 8)
# define R300_DC_DC_DISABLE_IGNORE_PE (1 << 17)
#define RADEON_RB3D_COLOROFFSET 0x1c40
#define RADEON_RB3D_COLORPITCH 0x1c48
#define RADEON_SRC_X_Y 0x1590
#define RADEON_DP_GUI_MASTER_CNTL 0x146c
# define RADEON_GMC_SRC_PITCH_OFFSET_CNTL (1 << 0)
# define RADEON_GMC_DST_PITCH_OFFSET_CNTL (1 << 1)
# define RADEON_GMC_BRUSH_SOLID_COLOR (13 << 4)
# define RADEON_GMC_BRUSH_NONE (15 << 4)
# define RADEON_GMC_DST_16BPP (4 << 8)
# define RADEON_GMC_DST_24BPP (5 << 8)
# define RADEON_GMC_DST_32BPP (6 << 8)
# define RADEON_GMC_DST_DATATYPE_SHIFT 8
# define RADEON_GMC_SRC_DATATYPE_COLOR (3 << 12)
# define RADEON_DP_SRC_SOURCE_MEMORY (2 << 24)
# define RADEON_DP_SRC_SOURCE_HOST_DATA (3 << 24)
# define RADEON_GMC_CLR_CMP_CNTL_DIS (1 << 28)
# define RADEON_GMC_WR_MSK_DIS (1 << 30)
# define RADEON_ROP3_S 0x00cc0000
# define RADEON_ROP3_P 0x00f00000
#define RADEON_DP_WRITE_MASK 0x16cc
#define RADEON_SRC_PITCH_OFFSET 0x1428
#define RADEON_DST_PITCH_OFFSET 0x142c
#define RADEON_DST_PITCH_OFFSET_C 0x1c80
# define RADEON_DST_TILE_LINEAR (0 << 30)
# define RADEON_DST_TILE_MACRO (1 << 30)
# define RADEON_DST_TILE_MICRO (2 << 30)
# define RADEON_DST_TILE_BOTH (3 << 30)
#define RADEON_SCRATCH_REG0 0x15e0
#define RADEON_SCRATCH_REG1 0x15e4
#define RADEON_SCRATCH_REG2 0x15e8
#define RADEON_SCRATCH_REG3 0x15ec
#define RADEON_SCRATCH_REG4 0x15f0
#define RADEON_SCRATCH_REG5 0x15f4
#define RADEON_SCRATCH_UMSK 0x0770
#define RADEON_SCRATCH_ADDR 0x0774
#define RADEON_SCRATCHOFF( x ) (RADEON_SCRATCH_REG_OFFSET + 4*(x))
extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index);
#define GET_SCRATCH(dev_priv, x) radeon_get_scratch(dev_priv, x)
#define R600_SCRATCH_REG0 0x8500
#define R600_SCRATCH_REG1 0x8504
#define R600_SCRATCH_REG2 0x8508
#define R600_SCRATCH_REG3 0x850c
#define R600_SCRATCH_REG4 0x8510
#define R600_SCRATCH_REG5 0x8514
#define R600_SCRATCH_REG6 0x8518
#define R600_SCRATCH_REG7 0x851c
#define R600_SCRATCH_UMSK 0x8540
#define R600_SCRATCH_ADDR 0x8544
#define R600_SCRATCHOFF(x) (R600_SCRATCH_REG_OFFSET + 4*(x))
#define RADEON_GEN_INT_CNTL 0x0040
# define RADEON_CRTC_VBLANK_MASK (1 << 0)
# define RADEON_CRTC2_VBLANK_MASK (1 << 9)
# define RADEON_GUI_IDLE_INT_ENABLE (1 << 19)
# define RADEON_SW_INT_ENABLE (1 << 25)
#define RADEON_GEN_INT_STATUS 0x0044
# define RADEON_CRTC_VBLANK_STAT (1 << 0)
# define RADEON_CRTC_VBLANK_STAT_ACK (1 << 0)
# define RADEON_CRTC2_VBLANK_STAT (1 << 9)
# define RADEON_CRTC2_VBLANK_STAT_ACK (1 << 9)
# define RADEON_GUI_IDLE_INT_TEST_ACK (1 << 19)
# define RADEON_SW_INT_TEST (1 << 25)
# define RADEON_SW_INT_TEST_ACK (1 << 25)
# define RADEON_SW_INT_FIRE (1 << 26)
# define R500_DISPLAY_INT_STATUS (1 << 0)
#define RADEON_HOST_PATH_CNTL 0x0130
# define RADEON_HDP_SOFT_RESET (1 << 26)
# define RADEON_HDP_WC_TIMEOUT_MASK (7 << 28)
# define RADEON_HDP_WC_TIMEOUT_28BCLK (7 << 28)
#define RADEON_ISYNC_CNTL 0x1724
# define RADEON_ISYNC_ANY2D_IDLE3D (1 << 0)
# define RADEON_ISYNC_ANY3D_IDLE2D (1 << 1)
# define RADEON_ISYNC_TRIG2D_IDLE3D (1 << 2)
# define RADEON_ISYNC_TRIG3D_IDLE2D (1 << 3)
# define RADEON_ISYNC_WAIT_IDLEGUI (1 << 4)
# define RADEON_ISYNC_CPSCRATCH_IDLEGUI (1 << 5)
#define RADEON_RBBM_GUICNTL 0x172c
# define RADEON_HOST_DATA_SWAP_NONE (0 << 0)
# define RADEON_HOST_DATA_SWAP_16BIT (1 << 0)
# define RADEON_HOST_DATA_SWAP_32BIT (2 << 0)
# define RADEON_HOST_DATA_SWAP_HDW (3 << 0)
#define RADEON_MC_AGP_LOCATION 0x014c
#define RADEON_MC_FB_LOCATION 0x0148
#define RADEON_MCLK_CNTL 0x0012
# define RADEON_FORCEON_MCLKA (1 << 16)
# define RADEON_FORCEON_MCLKB (1 << 17)
# define RADEON_FORCEON_YCLKA (1 << 18)
# define RADEON_FORCEON_YCLKB (1 << 19)
# define RADEON_FORCEON_MC (1 << 20)
# define RADEON_FORCEON_AIC (1 << 21)
#define RADEON_PP_BORDER_COLOR_0 0x1d40
#define RADEON_PP_BORDER_COLOR_1 0x1d44
#define RADEON_PP_BORDER_COLOR_2 0x1d48
#define RADEON_PP_CNTL 0x1c38
# define RADEON_SCISSOR_ENABLE (1 << 1)
#define RADEON_PP_LUM_MATRIX 0x1d00
#define RADEON_PP_MISC 0x1c14
#define RADEON_PP_ROT_MATRIX_0 0x1d58
#define RADEON_PP_TXFILTER_0 0x1c54
#define RADEON_PP_TXOFFSET_0 0x1c5c
#define RADEON_PP_TXFILTER_1 0x1c6c
#define RADEON_PP_TXFILTER_2 0x1c84
#define R300_RB2D_DSTCACHE_CTLSTAT 0x342c /* use R300_DSTCACHE_CTLSTAT */
#define R300_DSTCACHE_CTLSTAT 0x1714
# define R300_RB2D_DC_FLUSH (3 << 0)
# define R300_RB2D_DC_FREE (3 << 2)
# define R300_RB2D_DC_FLUSH_ALL 0xf
# define R300_RB2D_DC_BUSY (1 << 31)
#define RADEON_RB3D_CNTL 0x1c3c
# define RADEON_ALPHA_BLEND_ENABLE (1 << 0)
# define RADEON_PLANE_MASK_ENABLE (1 << 1)
# define RADEON_DITHER_ENABLE (1 << 2)
# define RADEON_ROUND_ENABLE (1 << 3)
# define RADEON_SCALE_DITHER_ENABLE (1 << 4)
# define RADEON_DITHER_INIT (1 << 5)
# define RADEON_ROP_ENABLE (1 << 6)
# define RADEON_STENCIL_ENABLE (1 << 7)
# define RADEON_Z_ENABLE (1 << 8)
# define RADEON_ZBLOCK16 (1 << 15)
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230
#define RADEON_RB3D_DEPTHPITCH 0x1c28
#define RADEON_RB3D_PLANEMASK 0x1d84
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
#define RADEON_RB3D_ZCACHE_MODE 0x3250
#define RADEON_RB3D_ZCACHE_CTLSTAT 0x3254
# define RADEON_RB3D_ZC_FLUSH (1 << 0)
# define RADEON_RB3D_ZC_FREE (1 << 2)
# define RADEON_RB3D_ZC_FLUSH_ALL 0x5
# define RADEON_RB3D_ZC_BUSY (1 << 31)
#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
# define R300_ZC_FLUSH (1 << 0)
# define R300_ZC_FREE (1 << 1)
# define R300_ZC_BUSY (1 << 31)
#define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325c
# define RADEON_RB3D_DC_FLUSH (3 << 0)
# define RADEON_RB3D_DC_FREE (3 << 2)
# define RADEON_RB3D_DC_FLUSH_ALL 0xf
# define RADEON_RB3D_DC_BUSY (1 << 31)
#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
# define R300_RB3D_DC_FLUSH (2 << 0)
# define R300_RB3D_DC_FREE (2 << 2)
# define R300_RB3D_DC_FINISH (1 << 4)
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_Z_TEST_MASK (7 << 4)
# define RADEON_Z_TEST_ALWAYS (7 << 4)
# define RADEON_Z_HIERARCHY_ENABLE (1 << 8)
# define RADEON_STENCIL_TEST_ALWAYS (7 << 12)
# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16)
# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20)
# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24)
# define RADEON_Z_COMPRESSION_ENABLE (1 << 28)
# define RADEON_FORCE_Z_DIRTY (1 << 29)
# define RADEON_Z_WRITE_ENABLE (1 << 30)
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
#define RADEON_RBBM_SOFT_RESET 0x00f0
# define RADEON_SOFT_RESET_CP (1 << 0)
# define RADEON_SOFT_RESET_HI (1 << 1)
# define RADEON_SOFT_RESET_SE (1 << 2)
# define RADEON_SOFT_RESET_RE (1 << 3)
# define RADEON_SOFT_RESET_PP (1 << 4)
# define RADEON_SOFT_RESET_E2 (1 << 5)
# define RADEON_SOFT_RESET_RB (1 << 6)
# define RADEON_SOFT_RESET_HDP (1 << 7)
/*
* 6:0 Available slots in the FIFO
* 8 Host Interface active
* 9 CP request active
* 10 FIFO request active
* 11 Host Interface retry active
* 12 CP retry active
* 13 FIFO retry active
* 14 FIFO pipeline busy
* 15 Event engine busy
* 16 CP command stream busy
* 17 2D engine busy
* 18 2D portion of render backend busy
* 20 3D setup engine busy
* 26 GA engine busy
* 27 CBA 2D engine busy
* 31 2D engine busy or 3D engine busy or FIFO not empty or CP busy or
* command stream queue not empty or Ring Buffer not empty
*/
#define RADEON_RBBM_STATUS 0x0e40
/* Same as the previous RADEON_RBBM_STATUS; this is a mirror of that register. */
/* #define RADEON_RBBM_STATUS 0x1740 */
/* bits 6:0 are dword slots available in the cmd fifo */
# define RADEON_RBBM_FIFOCNT_MASK 0x007f
# define RADEON_HIRQ_ON_RBB (1 << 8)
# define RADEON_CPRQ_ON_RBB (1 << 9)
# define RADEON_CFRQ_ON_RBB (1 << 10)
# define RADEON_HIRQ_IN_RTBUF (1 << 11)
# define RADEON_CPRQ_IN_RTBUF (1 << 12)
# define RADEON_CFRQ_IN_RTBUF (1 << 13)
# define RADEON_PIPE_BUSY (1 << 14)
# define RADEON_ENG_EV_BUSY (1 << 15)
# define RADEON_CP_CMDSTRM_BUSY (1 << 16)
# define RADEON_E2_BUSY (1 << 17)
# define RADEON_RB2D_BUSY (1 << 18)
# define RADEON_RB3D_BUSY (1 << 19) /* not used on r300 */
# define RADEON_VAP_BUSY (1 << 20)
# define RADEON_RE_BUSY (1 << 21) /* not used on r300 */
# define RADEON_TAM_BUSY (1 << 22) /* not used on r300 */
# define RADEON_TDM_BUSY (1 << 23) /* not used on r300 */
# define RADEON_PB_BUSY (1 << 24) /* not used on r300 */
# define RADEON_TIM_BUSY (1 << 25) /* not used on r300 */
# define RADEON_GA_BUSY (1 << 26)
# define RADEON_CBA2D_BUSY (1 << 27)
# define RADEON_RBBM_ACTIVE (1 << 31)
#define RADEON_RE_LINE_PATTERN 0x1cd0
#define RADEON_RE_MISC 0x26c4
#define RADEON_RE_TOP_LEFT 0x26c0
#define RADEON_RE_WIDTH_HEIGHT 0x1c44
#define RADEON_RE_STIPPLE_ADDR 0x1cc8
#define RADEON_RE_STIPPLE_DATA 0x1ccc
#define RADEON_SCISSOR_TL_0 0x1cd8
#define RADEON_SCISSOR_BR_0 0x1cdc
#define RADEON_SCISSOR_TL_1 0x1ce0
#define RADEON_SCISSOR_BR_1 0x1ce4
#define RADEON_SCISSOR_TL_2 0x1ce8
#define RADEON_SCISSOR_BR_2 0x1cec
#define RADEON_SE_COORD_FMT 0x1c50
#define RADEON_SE_CNTL 0x1c4c
# define RADEON_FFACE_CULL_CW (0 << 0)
# define RADEON_BFACE_SOLID (3 << 1)
# define RADEON_FFACE_SOLID (3 << 3)
# define RADEON_FLAT_SHADE_VTX_LAST (3 << 6)
# define RADEON_DIFFUSE_SHADE_FLAT (1 << 8)
# define RADEON_DIFFUSE_SHADE_GOURAUD (2 << 8)
# define RADEON_ALPHA_SHADE_FLAT (1 << 10)
# define RADEON_ALPHA_SHADE_GOURAUD (2 << 10)
# define RADEON_SPECULAR_SHADE_FLAT (1 << 12)
# define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12)
# define RADEON_FOG_SHADE_FLAT (1 << 14)
# define RADEON_FOG_SHADE_GOURAUD (2 << 14)
# define RADEON_VPORT_XY_XFORM_ENABLE (1 << 24)
# define RADEON_VPORT_Z_XFORM_ENABLE (1 << 25)
# define RADEON_VTX_PIX_CENTER_OGL (1 << 27)
# define RADEON_ROUND_MODE_TRUNC (0 << 28)
# define RADEON_ROUND_PREC_8TH_PIX (1 << 30)
#define RADEON_SE_CNTL_STATUS 0x2140
#define RADEON_SE_LINE_WIDTH 0x1db8
#define RADEON_SE_VPORT_XSCALE 0x1d98
#define RADEON_SE_ZBIAS_FACTOR 0x1db0
#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED 0x2210
#define RADEON_SE_TCL_OUTPUT_VTX_FMT 0x2254
#define RADEON_SE_TCL_VECTOR_INDX_REG 0x2200
# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16
# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28
#define RADEON_SE_TCL_VECTOR_DATA_REG 0x2204
#define RADEON_SE_TCL_SCALAR_INDX_REG 0x2208
# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16
#define RADEON_SE_TCL_SCALAR_DATA_REG 0x220C
#define RADEON_SURFACE_ACCESS_FLAGS 0x0bf8
#define RADEON_SURFACE_ACCESS_CLR 0x0bfc
#define RADEON_SURFACE_CNTL 0x0b00
# define RADEON_SURF_TRANSLATION_DIS (1 << 8)
# define RADEON_NONSURF_AP0_SWP_MASK (3 << 20)
# define RADEON_NONSURF_AP0_SWP_LITTLE (0 << 20)
# define RADEON_NONSURF_AP0_SWP_BIG16 (1 << 20)
# define RADEON_NONSURF_AP0_SWP_BIG32 (2 << 20)
# define RADEON_NONSURF_AP1_SWP_MASK (3 << 22)
# define RADEON_NONSURF_AP1_SWP_LITTLE (0 << 22)
# define RADEON_NONSURF_AP1_SWP_BIG16 (1 << 22)
# define RADEON_NONSURF_AP1_SWP_BIG32 (2 << 22)
#define RADEON_SURFACE0_INFO 0x0b0c
# define RADEON_SURF_PITCHSEL_MASK (0x1ff << 0)
# define RADEON_SURF_TILE_MODE_MASK (3 << 16)
# define RADEON_SURF_TILE_MODE_MACRO (0 << 16)
# define RADEON_SURF_TILE_MODE_MICRO (1 << 16)
# define RADEON_SURF_TILE_MODE_32BIT_Z (2 << 16)
# define RADEON_SURF_TILE_MODE_16BIT_Z (3 << 16)
#define RADEON_SURFACE0_LOWER_BOUND 0x0b04
#define RADEON_SURFACE0_UPPER_BOUND 0x0b08
# define RADEON_SURF_ADDRESS_FIXED_MASK (0x3ff << 0)
#define RADEON_SURFACE1_INFO 0x0b1c
#define RADEON_SURFACE1_LOWER_BOUND 0x0b14
#define RADEON_SURFACE1_UPPER_BOUND 0x0b18
#define RADEON_SURFACE2_INFO 0x0b2c
#define RADEON_SURFACE2_LOWER_BOUND 0x0b24
#define RADEON_SURFACE2_UPPER_BOUND 0x0b28
#define RADEON_SURFACE3_INFO 0x0b3c
#define RADEON_SURFACE3_LOWER_BOUND 0x0b34
#define RADEON_SURFACE3_UPPER_BOUND 0x0b38
#define RADEON_SURFACE4_INFO 0x0b4c
#define RADEON_SURFACE4_LOWER_BOUND 0x0b44
#define RADEON_SURFACE4_UPPER_BOUND 0x0b48
#define RADEON_SURFACE5_INFO 0x0b5c
#define RADEON_SURFACE5_LOWER_BOUND 0x0b54
#define RADEON_SURFACE5_UPPER_BOUND 0x0b58
#define RADEON_SURFACE6_INFO 0x0b6c
#define RADEON_SURFACE6_LOWER_BOUND 0x0b64
#define RADEON_SURFACE6_UPPER_BOUND 0x0b68
#define RADEON_SURFACE7_INFO 0x0b7c
#define RADEON_SURFACE7_LOWER_BOUND 0x0b74
#define RADEON_SURFACE7_UPPER_BOUND 0x0b78
#define RADEON_SW_SEMAPHORE 0x013c
#define RADEON_WAIT_UNTIL 0x1720
# define RADEON_WAIT_CRTC_PFLIP (1 << 0)
# define RADEON_WAIT_2D_IDLE (1 << 14)
# define RADEON_WAIT_3D_IDLE (1 << 15)
# define RADEON_WAIT_2D_IDLECLEAN (1 << 16)
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
#define RADEON_RB3D_ZMASKOFFSET 0x3234
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
/* CP registers */
#define RADEON_CP_ME_RAM_ADDR 0x07d4
#define RADEON_CP_ME_RAM_RADDR 0x07d8
#define RADEON_CP_ME_RAM_DATAH 0x07dc
#define RADEON_CP_ME_RAM_DATAL 0x07e0
#define RADEON_CP_RB_BASE 0x0700
#define RADEON_CP_RB_CNTL 0x0704
# define RADEON_BUF_SWAP_32BIT (2 << 16)
# define RADEON_RB_NO_UPDATE (1 << 27)
# define RADEON_RB_RPTR_WR_ENA (1 << 31)
#define RADEON_CP_RB_RPTR_ADDR 0x070c
#define RADEON_CP_RB_RPTR 0x0710
#define RADEON_CP_RB_WPTR 0x0714
#define RADEON_CP_RB_WPTR_DELAY 0x0718
# define RADEON_PRE_WRITE_TIMER_SHIFT 0
# define RADEON_PRE_WRITE_LIMIT_SHIFT 23
#define RADEON_CP_IB_BASE 0x0738
#define RADEON_CP_CSQ_CNTL 0x0740
# define RADEON_CSQ_CNT_PRIMARY_MASK (0xff << 0)
# define RADEON_CSQ_PRIDIS_INDDIS (0 << 28)
# define RADEON_CSQ_PRIPIO_INDDIS (1 << 28)
# define RADEON_CSQ_PRIBM_INDDIS (2 << 28)
# define RADEON_CSQ_PRIPIO_INDBM (3 << 28)
# define RADEON_CSQ_PRIBM_INDBM (4 << 28)
# define RADEON_CSQ_PRIPIO_INDPIO (15 << 28)
#define R300_CP_RESYNC_ADDR 0x0778
#define R300_CP_RESYNC_DATA 0x077c
#define RADEON_AIC_CNTL 0x01d0
# define RADEON_PCIGART_TRANSLATE_EN (1 << 0)
# define RS400_MSI_REARM (1 << 3)
#define RADEON_AIC_STAT 0x01d4
#define RADEON_AIC_PT_BASE 0x01d8
#define RADEON_AIC_LO_ADDR 0x01dc
#define RADEON_AIC_HI_ADDR 0x01e0
#define RADEON_AIC_TLB_ADDR 0x01e4
#define RADEON_AIC_TLB_DATA 0x01e8
/* CP command packets */
#define RADEON_CP_PACKET0 0x00000000
# define RADEON_ONE_REG_WR (1 << 15)
#define RADEON_CP_PACKET1 0x40000000
#define RADEON_CP_PACKET2 0x80000000
#define RADEON_CP_PACKET3 0xC0000000
# define RADEON_CP_NOP 0x00001000
# define RADEON_CP_NEXT_CHAR 0x00001900
# define RADEON_CP_PLY_NEXTSCAN 0x00001D00
# define RADEON_CP_SET_SCISSORS 0x00001E00
/* GEN_INDX_PRIM is unsupported starting with R300 */
# define RADEON_3D_RNDR_GEN_INDX_PRIM 0x00002300
# define RADEON_WAIT_FOR_IDLE 0x00002600
# define RADEON_3D_DRAW_VBUF 0x00002800
# define RADEON_3D_DRAW_IMMD 0x00002900
# define RADEON_3D_DRAW_INDX 0x00002A00
# define RADEON_CP_LOAD_PALETTE 0x00002C00
# define RADEON_3D_LOAD_VBPNTR 0x00002F00
# define RADEON_MPEG_IDCT_MACROBLOCK 0x00003000
# define RADEON_MPEG_IDCT_MACROBLOCK_REV 0x00003100
# define RADEON_3D_CLEAR_ZMASK 0x00003200
# define RADEON_CP_INDX_BUFFER 0x00003300
# define RADEON_CP_3D_DRAW_VBUF_2 0x00003400
# define RADEON_CP_3D_DRAW_IMMD_2 0x00003500
# define RADEON_CP_3D_DRAW_INDX_2 0x00003600
# define RADEON_3D_CLEAR_HIZ 0x00003700
# define RADEON_CP_3D_CLEAR_CMASK 0x00003802
# define RADEON_CNTL_HOSTDATA_BLT 0x00009400
# define RADEON_CNTL_PAINT_MULTI 0x00009A00
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
# define RADEON_CNTL_SET_SCISSORS 0xC0001E00
# define R600_IT_INDIRECT_BUFFER_END 0x00001700
# define R600_IT_SET_PREDICATION 0x00002000
# define R600_IT_REG_RMW 0x00002100
# define R600_IT_COND_EXEC 0x00002200
# define R600_IT_PRED_EXEC 0x00002300
# define R600_IT_START_3D_CMDBUF 0x00002400
# define R600_IT_DRAW_INDEX_2 0x00002700
# define R600_IT_CONTEXT_CONTROL 0x00002800
# define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900
# define R600_IT_INDEX_TYPE 0x00002A00
# define R600_IT_DRAW_INDEX 0x00002B00
# define R600_IT_DRAW_INDEX_AUTO 0x00002D00
# define R600_IT_DRAW_INDEX_IMMD 0x00002E00
# define R600_IT_NUM_INSTANCES 0x00002F00
# define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400
# define R600_IT_INDIRECT_BUFFER_MP 0x00003800
# define R600_IT_MEM_SEMAPHORE 0x00003900
# define R600_IT_MPEG_INDEX 0x00003A00
# define R600_IT_WAIT_REG_MEM 0x00003C00
# define R600_IT_MEM_WRITE 0x00003D00
# define R600_IT_INDIRECT_BUFFER 0x00003200
# define R600_IT_SURFACE_SYNC 0x00004300
# define R600_CB0_DEST_BASE_ENA (1 << 6)
# define R600_TC_ACTION_ENA (1 << 23)
# define R600_VC_ACTION_ENA (1 << 24)
# define R600_CB_ACTION_ENA (1 << 25)
# define R600_DB_ACTION_ENA (1 << 26)
# define R600_SH_ACTION_ENA (1 << 27)
# define R600_SMX_ACTION_ENA (1 << 28)
# define R600_IT_ME_INITIALIZE 0x00004400
# define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
# define R600_IT_COND_WRITE 0x00004500
# define R600_IT_EVENT_WRITE 0x00004600
# define R600_IT_EVENT_WRITE_EOP 0x00004700
# define R600_IT_ONE_REG_WRITE 0x00005700
# define R600_IT_SET_CONFIG_REG 0x00006800
# define R600_SET_CONFIG_REG_OFFSET 0x00008000
# define R600_SET_CONFIG_REG_END 0x0000ac00
# define R600_IT_SET_CONTEXT_REG 0x00006900
# define R600_SET_CONTEXT_REG_OFFSET 0x00028000
# define R600_SET_CONTEXT_REG_END 0x00029000
# define R600_IT_SET_ALU_CONST 0x00006A00
# define R600_SET_ALU_CONST_OFFSET 0x00030000
# define R600_SET_ALU_CONST_END 0x00032000
# define R600_IT_SET_BOOL_CONST 0x00006B00
# define R600_SET_BOOL_CONST_OFFSET 0x0003e380
# define R600_SET_BOOL_CONST_END 0x00040000
# define R600_IT_SET_LOOP_CONST 0x00006C00
# define R600_SET_LOOP_CONST_OFFSET 0x0003e200
# define R600_SET_LOOP_CONST_END 0x0003e380
# define R600_IT_SET_RESOURCE 0x00006D00
# define R600_SET_RESOURCE_OFFSET 0x00038000
# define R600_SET_RESOURCE_END 0x0003c000
# define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0
# define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1
# define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2
# define R600_SQ_TEX_VTX_VALID_BUFFER 0x3
# define R600_IT_SET_SAMPLER 0x00006E00
# define R600_SET_SAMPLER_OFFSET 0x0003c000
# define R600_SET_SAMPLER_END 0x0003cff0
# define R600_IT_SET_CTL_CONST 0x00006F00
# define R600_SET_CTL_CONST_OFFSET 0x0003cff0
# define R600_SET_CTL_CONST_END 0x0003e200
# define R600_IT_SURFACE_BASE_UPDATE 0x00007300
#define RADEON_CP_PACKET_MASK 0xC0000000
#define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000
#define RADEON_CP_PACKET0_REG_MASK 0x000007ff
#define RADEON_CP_PACKET1_REG0_MASK 0x000007ff
#define RADEON_CP_PACKET1_REG1_MASK 0x003ff800
#define RADEON_VTX_Z_PRESENT (1 << 31)
#define RADEON_VTX_PKCOLOR_PRESENT (1 << 3)
#define RADEON_PRIM_TYPE_NONE (0 << 0)
#define RADEON_PRIM_TYPE_POINT (1 << 0)
#define RADEON_PRIM_TYPE_LINE (2 << 0)
#define RADEON_PRIM_TYPE_LINE_STRIP (3 << 0)
#define RADEON_PRIM_TYPE_TRI_LIST (4 << 0)
#define RADEON_PRIM_TYPE_TRI_FAN (5 << 0)
#define RADEON_PRIM_TYPE_TRI_STRIP (6 << 0)
#define RADEON_PRIM_TYPE_TRI_TYPE2 (7 << 0)
#define RADEON_PRIM_TYPE_RECT_LIST (8 << 0)
#define RADEON_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
#define RADEON_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
#define RADEON_PRIM_TYPE_MASK 0xf
#define RADEON_PRIM_WALK_IND (1 << 4)
#define RADEON_PRIM_WALK_LIST (2 << 4)
#define RADEON_PRIM_WALK_RING (3 << 4)
#define RADEON_COLOR_ORDER_BGRA (0 << 6)
#define RADEON_COLOR_ORDER_RGBA (1 << 6)
#define RADEON_MAOS_ENABLE (1 << 7)
#define RADEON_VTX_FMT_R128_MODE (0 << 8)
#define RADEON_VTX_FMT_RADEON_MODE (1 << 8)
#define RADEON_NUM_VERTICES_SHIFT 16
#define RADEON_COLOR_FORMAT_CI8 2
#define RADEON_COLOR_FORMAT_ARGB1555 3
#define RADEON_COLOR_FORMAT_RGB565 4
#define RADEON_COLOR_FORMAT_ARGB8888 6
#define RADEON_COLOR_FORMAT_RGB332 7
#define RADEON_COLOR_FORMAT_RGB8 9
#define RADEON_COLOR_FORMAT_ARGB4444 15
#define RADEON_TXFORMAT_I8 0
#define RADEON_TXFORMAT_AI88 1
#define RADEON_TXFORMAT_RGB332 2
#define RADEON_TXFORMAT_ARGB1555 3
#define RADEON_TXFORMAT_RGB565 4
#define RADEON_TXFORMAT_ARGB4444 5
#define RADEON_TXFORMAT_ARGB8888 6
#define RADEON_TXFORMAT_RGBA8888 7
#define RADEON_TXFORMAT_Y8 8
#define RADEON_TXFORMAT_VYUY422 10
#define RADEON_TXFORMAT_YVYU422 11
#define RADEON_TXFORMAT_DXT1 12
#define RADEON_TXFORMAT_DXT23 14
#define RADEON_TXFORMAT_DXT45 15
#define R200_PP_TXCBLEND_0 0x2f00
#define R200_PP_TXCBLEND_1 0x2f10
#define R200_PP_TXCBLEND_2 0x2f20
#define R200_PP_TXCBLEND_3 0x2f30
#define R200_PP_TXCBLEND_4 0x2f40
#define R200_PP_TXCBLEND_5 0x2f50
#define R200_PP_TXCBLEND_6 0x2f60
#define R200_PP_TXCBLEND_7 0x2f70
#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268
#define R200_PP_TFACTOR_0 0x2ee0
#define R200_SE_VTX_FMT_0 0x2088
#define R200_SE_VAP_CNTL 0x2080
#define R200_SE_TCL_MATRIX_SEL_0 0x2230
#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8
#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0
#define R200_PP_TXFILTER_5 0x2ca0
#define R200_PP_TXFILTER_4 0x2c80
#define R200_PP_TXFILTER_3 0x2c60
#define R200_PP_TXFILTER_2 0x2c40
#define R200_PP_TXFILTER_1 0x2c20
#define R200_PP_TXFILTER_0 0x2c00
#define R200_PP_TXOFFSET_5 0x2d78
#define R200_PP_TXOFFSET_4 0x2d60
#define R200_PP_TXOFFSET_3 0x2d48
#define R200_PP_TXOFFSET_2 0x2d30
#define R200_PP_TXOFFSET_1 0x2d18
#define R200_PP_TXOFFSET_0 0x2d00
#define R200_PP_CUBIC_FACES_0 0x2c18
#define R200_PP_CUBIC_FACES_1 0x2c38
#define R200_PP_CUBIC_FACES_2 0x2c58
#define R200_PP_CUBIC_FACES_3 0x2c78
#define R200_PP_CUBIC_FACES_4 0x2c98
#define R200_PP_CUBIC_FACES_5 0x2cb8
#define R200_PP_CUBIC_OFFSET_F1_0 0x2d04
#define R200_PP_CUBIC_OFFSET_F2_0 0x2d08
#define R200_PP_CUBIC_OFFSET_F3_0 0x2d0c
#define R200_PP_CUBIC_OFFSET_F4_0 0x2d10
#define R200_PP_CUBIC_OFFSET_F5_0 0x2d14
#define R200_PP_CUBIC_OFFSET_F1_1 0x2d1c
#define R200_PP_CUBIC_OFFSET_F2_1 0x2d20
#define R200_PP_CUBIC_OFFSET_F3_1 0x2d24
#define R200_PP_CUBIC_OFFSET_F4_1 0x2d28
#define R200_PP_CUBIC_OFFSET_F5_1 0x2d2c
#define R200_PP_CUBIC_OFFSET_F1_2 0x2d34
#define R200_PP_CUBIC_OFFSET_F2_2 0x2d38
#define R200_PP_CUBIC_OFFSET_F3_2 0x2d3c
#define R200_PP_CUBIC_OFFSET_F4_2 0x2d40
#define R200_PP_CUBIC_OFFSET_F5_2 0x2d44
#define R200_PP_CUBIC_OFFSET_F1_3 0x2d4c
#define R200_PP_CUBIC_OFFSET_F2_3 0x2d50
#define R200_PP_CUBIC_OFFSET_F3_3 0x2d54
#define R200_PP_CUBIC_OFFSET_F4_3 0x2d58
#define R200_PP_CUBIC_OFFSET_F5_3 0x2d5c
#define R200_PP_CUBIC_OFFSET_F1_4 0x2d64
#define R200_PP_CUBIC_OFFSET_F2_4 0x2d68
#define R200_PP_CUBIC_OFFSET_F3_4 0x2d6c
#define R200_PP_CUBIC_OFFSET_F4_4 0x2d70
#define R200_PP_CUBIC_OFFSET_F5_4 0x2d74
#define R200_PP_CUBIC_OFFSET_F1_5 0x2d7c
#define R200_PP_CUBIC_OFFSET_F2_5 0x2d80
#define R200_PP_CUBIC_OFFSET_F3_5 0x2d84
#define R200_PP_CUBIC_OFFSET_F4_5 0x2d88
#define R200_PP_CUBIC_OFFSET_F5_5 0x2d8c
#define R200_RE_AUX_SCISSOR_CNTL 0x26f0
#define R200_SE_VTE_CNTL 0x20b0
#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250
#define R200_PP_TAM_DEBUG3 0x2d9c
#define R200_PP_CNTL_X 0x2cc4
#define R200_SE_VAP_CNTL_STATUS 0x2140
#define R200_RE_SCISSOR_TL_0 0x1cd8
#define R200_RE_SCISSOR_TL_1 0x1ce0
#define R200_RE_SCISSOR_TL_2 0x1ce8
#define R200_RB3D_DEPTHXY_OFFSET 0x1d60
#define R200_RE_AUX_SCISSOR_CNTL 0x26f0
#define R200_SE_VTX_STATE_CNTL 0x2180
#define R200_RE_POINTSIZE 0x2648
#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254
#define RADEON_PP_TEX_SIZE_0 0x1d04 /* NPOT */
#define RADEON_PP_TEX_SIZE_1 0x1d0c
#define RADEON_PP_TEX_SIZE_2 0x1d14
#define RADEON_PP_CUBIC_FACES_0 0x1d24
#define RADEON_PP_CUBIC_FACES_1 0x1d28
#define RADEON_PP_CUBIC_FACES_2 0x1d2c
#define RADEON_PP_CUBIC_OFFSET_T0_0 0x1dd0 /* bits [31:5] */
#define RADEON_PP_CUBIC_OFFSET_T1_0 0x1e00
#define RADEON_PP_CUBIC_OFFSET_T2_0 0x1e14
#define RADEON_SE_TCL_STATE_FLUSH 0x2284
#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001
#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000
#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012
#define SE_VTE_CNTL__VTX_XY_FMT_MASK 0x00000100
#define SE_VTE_CNTL__VTX_Z_FMT_MASK 0x00000200
#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK 0x00000001
#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK 0x00000002
#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT 0x0000000b
#define R200_3D_DRAW_IMMD_2 0xC0003500
#define R200_SE_VTX_FMT_1 0x208c
#define R200_RE_CNTL 0x1c50
#define R200_RB3D_BLENDCOLOR 0x3218
#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4
#define R200_PP_TRI_PERF 0x2cf8
#define R200_PP_AFS_0 0x2f80
#define R200_PP_AFS_1 0x2f00 /* same as txcblend_0 */
#define R200_VAP_PVS_CNTL_1 0x22D0
#define RADEON_CRTC_CRNT_FRAME 0x0214
#define RADEON_CRTC2_CRNT_FRAME 0x0314
#define R500_D1CRTC_STATUS 0x609c
#define R500_D2CRTC_STATUS 0x689c
#define R500_CRTC_V_BLANK (1<<0)
#define R500_D1CRTC_FRAME_COUNT 0x60a4
#define R500_D2CRTC_FRAME_COUNT 0x68a4
#define R500_D1MODE_V_COUNTER 0x6530
#define R500_D2MODE_V_COUNTER 0x6d30
#define R500_D1MODE_VBLANK_STATUS 0x6534
#define R500_D2MODE_VBLANK_STATUS 0x6d34
#define R500_VBLANK_OCCURED (1<<0)
#define R500_VBLANK_ACK (1<<4)
#define R500_VBLANK_STAT (1<<12)
#define R500_VBLANK_INT (1<<16)
#define R500_DxMODE_INT_MASK 0x6540
#define R500_D1MODE_INT_MASK (1<<0)
#define R500_D2MODE_INT_MASK (1<<8)
#define R500_DISP_INTERRUPT_STATUS 0x7edc
#define R500_D1_VBLANK_INTERRUPT (1 << 4)
#define R500_D2_VBLANK_INTERRUPT (1 << 5)
/* R6xx/R7xx registers */
#define R600_MC_VM_FB_LOCATION 0x2180
#define R600_MC_VM_AGP_TOP 0x2184
#define R600_MC_VM_AGP_BOT 0x2188
#define R600_MC_VM_AGP_BASE 0x218c
#define R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2190
#define R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194
#define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198
#define R700_MC_VM_FB_LOCATION 0x2024
#define R700_MC_VM_AGP_TOP 0x2028
#define R700_MC_VM_AGP_BOT 0x202c
#define R700_MC_VM_AGP_BASE 0x2030
#define R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034
#define R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038
#define R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203c
#define R600_MCD_RD_A_CNTL 0x219c
#define R600_MCD_RD_B_CNTL 0x21a0
#define R600_MCD_WR_A_CNTL 0x21a4
#define R600_MCD_WR_B_CNTL 0x21a8
#define R600_MCD_RD_SYS_CNTL 0x2200
#define R600_MCD_WR_SYS_CNTL 0x2214
#define R600_MCD_RD_GFX_CNTL 0x21fc
#define R600_MCD_RD_HDP_CNTL 0x2204
#define R600_MCD_RD_PDMA_CNTL 0x2208
#define R600_MCD_RD_SEM_CNTL 0x220c
#define R600_MCD_WR_GFX_CNTL 0x2210
#define R600_MCD_WR_HDP_CNTL 0x2218
#define R600_MCD_WR_PDMA_CNTL 0x221c
#define R600_MCD_WR_SEM_CNTL 0x2220
# define R600_MCD_L1_TLB (1 << 0)
# define R600_MCD_L1_FRAG_PROC (1 << 1)
# define R600_MCD_L1_STRICT_ORDERING (1 << 2)
# define R600_MCD_SYSTEM_ACCESS_MODE_MASK (3 << 6)
# define R600_MCD_SYSTEM_ACCESS_MODE_PA_ONLY (0 << 6)
# define R600_MCD_SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 6)
# define R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS (2 << 6)
# define R600_MCD_SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 6)
# define R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 8)
# define R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 8)
# define R600_MCD_SEMAPHORE_MODE (1 << 10)
# define R600_MCD_WAIT_L2_QUERY (1 << 11)
# define R600_MCD_EFFECTIVE_L1_TLB_SIZE(x) ((x) << 12)
# define R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(x) ((x) << 15)
#define R700_MC_VM_MD_L1_TLB0_CNTL 0x2654
#define R700_MC_VM_MD_L1_TLB1_CNTL 0x2658
#define R700_MC_VM_MD_L1_TLB2_CNTL 0x265c
#define R700_MC_VM_MB_L1_TLB0_CNTL 0x2234
#define R700_MC_VM_MB_L1_TLB1_CNTL 0x2238
#define R700_MC_VM_MB_L1_TLB2_CNTL 0x223c
#define R700_MC_VM_MB_L1_TLB3_CNTL 0x2240
# define R700_ENABLE_L1_TLB (1 << 0)
# define R700_ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
# define R700_SYSTEM_ACCESS_MODE_IN_SYS (2 << 3)
# define R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5)
# define R700_EFFECTIVE_L1_TLB_SIZE(x) ((x) << 15)
# define R700_EFFECTIVE_L1_QUEUE_SIZE(x) ((x) << 18)
#define R700_MC_ARB_RAMCFG 0x2760
# define R700_NOOFBANK_SHIFT 0
# define R700_NOOFBANK_MASK 0x3
# define R700_NOOFRANK_SHIFT 2
# define R700_NOOFRANK_MASK 0x1
# define R700_NOOFROWS_SHIFT 3
# define R700_NOOFROWS_MASK 0x7
# define R700_NOOFCOLS_SHIFT 6
# define R700_NOOFCOLS_MASK 0x3
# define R700_CHANSIZE_SHIFT 8
# define R700_CHANSIZE_MASK 0x1
# define R700_BURSTLENGTH_SHIFT 9
# define R700_BURSTLENGTH_MASK 0x1
#define R600_RAMCFG 0x2408
# define R600_NOOFBANK_SHIFT 0
# define R600_NOOFBANK_MASK 0x1
# define R600_NOOFRANK_SHIFT 1
# define R600_NOOFRANK_MASK 0x1
# define R600_NOOFROWS_SHIFT 2
# define R600_NOOFROWS_MASK 0x7
# define R600_NOOFCOLS_SHIFT 5
# define R600_NOOFCOLS_MASK 0x3
# define R600_CHANSIZE_SHIFT 7
# define R600_CHANSIZE_MASK 0x1
# define R600_BURSTLENGTH_SHIFT 8
# define R600_BURSTLENGTH_MASK 0x1
#define R600_VM_L2_CNTL 0x1400
# define R600_VM_L2_CACHE_EN (1 << 0)
# define R600_VM_L2_FRAG_PROC (1 << 1)
# define R600_VM_ENABLE_PTE_CACHE_LRU_W (1 << 9)
# define R600_VM_L2_CNTL_QUEUE_SIZE(x) ((x) << 13)
# define R700_VM_L2_CNTL_QUEUE_SIZE(x) ((x) << 14)
#define R600_VM_L2_CNTL2 0x1404
# define R600_VM_L2_CNTL2_INVALIDATE_ALL_L1_TLBS (1 << 0)
# define R600_VM_L2_CNTL2_INVALIDATE_L2_CACHE (1 << 1)
#define R600_VM_L2_CNTL3 0x1408
# define R600_VM_L2_CNTL3_BANK_SELECT_0(x) ((x) << 0)
# define R600_VM_L2_CNTL3_BANK_SELECT_1(x) ((x) << 5)
# define R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(x) ((x) << 10)
# define R700_VM_L2_CNTL3_BANK_SELECT(x) ((x) << 0)
# define R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(x) ((x) << 6)
#define R600_VM_L2_STATUS 0x140c
#define R600_VM_CONTEXT0_CNTL 0x1410
# define R600_VM_ENABLE_CONTEXT (1 << 0)
# define R600_VM_PAGE_TABLE_DEPTH_FLAT (0 << 1)
#define R600_VM_CONTEXT0_CNTL2 0x1430
#define R600_VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR 0x1490
#define R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR 0x14b0
#define R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x1574
#define R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x1594
#define R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x15b4
#define R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153c
#define R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155c
#define R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157c
#define R600_HDP_HOST_PATH_CNTL 0x2c00
#define R600_GRBM_CNTL 0x8000
# define R600_GRBM_READ_TIMEOUT(x) ((x) << 0)
#define R600_GRBM_STATUS 0x8010
# define R600_CMDFIFO_AVAIL_MASK 0x1f
# define R700_CMDFIFO_AVAIL_MASK 0xf
# define R600_GUI_ACTIVE (1 << 31)
#define R600_GRBM_STATUS2 0x8014
#define R600_GRBM_SOFT_RESET 0x8020
# define R600_SOFT_RESET_CP (1 << 0)
#define R600_WAIT_UNTIL 0x8040
#define R600_CP_SEM_WAIT_TIMER 0x85bc
#define R600_CP_ME_CNTL 0x86d8
# define R600_CP_ME_HALT (1 << 28)
#define R600_CP_QUEUE_THRESHOLDS 0x8760
# define R600_ROQ_IB1_START(x) ((x) << 0)
# define R600_ROQ_IB2_START(x) ((x) << 8)
#define R600_CP_MEQ_THRESHOLDS 0x8764
# define R700_STQ_SPLIT(x) ((x) << 0)
# define R600_MEQ_END(x) ((x) << 16)
# define R600_ROQ_END(x) ((x) << 24)
#define R600_CP_PERFMON_CNTL 0x87fc
#define R600_CP_RB_BASE 0xc100
#define R600_CP_RB_CNTL 0xc104
# define R600_RB_BUFSZ(x) ((x) << 0)
# define R600_RB_BLKSZ(x) ((x) << 8)
# define R600_BUF_SWAP_32BIT (2 << 16)
# define R600_RB_NO_UPDATE (1 << 27)
# define R600_RB_RPTR_WR_ENA (1 << 31)
#define R600_CP_RB_RPTR_WR 0xc108
#define R600_CP_RB_RPTR_ADDR 0xc10c
#define R600_CP_RB_RPTR_ADDR_HI 0xc110
#define R600_CP_RB_WPTR 0xc114
#define R600_CP_RB_WPTR_ADDR 0xc118
#define R600_CP_RB_WPTR_ADDR_HI 0xc11c
#define R600_CP_RB_RPTR 0x8700
#define R600_CP_RB_WPTR_DELAY 0x8704
#define R600_CP_PFP_UCODE_ADDR 0xc150
#define R600_CP_PFP_UCODE_DATA 0xc154
#define R600_CP_ME_RAM_RADDR 0xc158
#define R600_CP_ME_RAM_WADDR 0xc15c
#define R600_CP_ME_RAM_DATA 0xc160
#define R600_CP_DEBUG 0xc1fc
#define R600_PA_CL_ENHANCE 0x8a14
# define R600_CLIP_VTX_REORDER_ENA (1 << 0)
# define R600_NUM_CLIP_SEQ(x) ((x) << 1)
#define R600_PA_SC_LINE_STIPPLE_STATE 0x8b10
#define R600_PA_SC_MULTI_CHIP_CNTL 0x8b20
#define R700_PA_SC_FORCE_EOV_MAX_CNTS 0x8b24
# define R700_FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
# define R700_FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
#define R600_PA_SC_AA_SAMPLE_LOCS_2S 0x8b40
#define R600_PA_SC_AA_SAMPLE_LOCS_4S 0x8b44
#define R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0 0x8b48
#define R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1 0x8b4c
# define R600_S0_X(x) ((x) << 0)
# define R600_S0_Y(x) ((x) << 4)
# define R600_S1_X(x) ((x) << 8)
# define R600_S1_Y(x) ((x) << 12)
# define R600_S2_X(x) ((x) << 16)
# define R600_S2_Y(x) ((x) << 20)
# define R600_S3_X(x) ((x) << 24)
# define R600_S3_Y(x) ((x) << 28)
# define R600_S4_X(x) ((x) << 0)
# define R600_S4_Y(x) ((x) << 4)
# define R600_S5_X(x) ((x) << 8)
# define R600_S5_Y(x) ((x) << 12)
# define R600_S6_X(x) ((x) << 16)
# define R600_S6_Y(x) ((x) << 20)
# define R600_S7_X(x) ((x) << 24)
# define R600_S7_Y(x) ((x) << 28)
#define R600_PA_SC_FIFO_SIZE 0x8bd0
# define R600_SC_PRIM_FIFO_SIZE(x) ((x) << 0)
# define R600_SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 8)
# define R600_SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 16)
#define R700_PA_SC_FIFO_SIZE_R7XX 0x8bcc
# define R700_SC_PRIM_FIFO_SIZE(x) ((x) << 0)
# define R700_SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12)
# define R700_SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20)
#define R600_PA_SC_ENHANCE 0x8bf0
# define R600_FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
# define R600_FORCE_EOV_MAX_TILE_CNT(x) ((x) << 12)
#define R600_PA_SC_CLIPRECT_RULE 0x2820c
#define R700_PA_SC_EDGERULE 0x28230
#define R600_PA_SC_LINE_STIPPLE 0x28a0c
#define R600_PA_SC_MODE_CNTL 0x28a4c
#define R600_PA_SC_AA_CONFIG 0x28c04
#define R600_SX_EXPORT_BUFFER_SIZES 0x900c
# define R600_COLOR_BUFFER_SIZE(x) ((x) << 0)
# define R600_POSITION_BUFFER_SIZE(x) ((x) << 8)
# define R600_SMX_BUFFER_SIZE(x) ((x) << 16)
#define R600_SX_DEBUG_1 0x9054
# define R600_SMX_EVENT_RELEASE (1 << 0)
# define R600_ENABLE_NEW_SMX_ADDRESS (1 << 16)
#define R700_SX_DEBUG_1 0x9058
# define R700_ENABLE_NEW_SMX_ADDRESS (1 << 16)
#define R600_SX_MISC 0x28350
#define R600_DB_DEBUG 0x9830
# define R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31)
#define R600_DB_WATERMARKS 0x9838
# define R600_DEPTH_FREE(x) ((x) << 0)
# define R600_DEPTH_FLUSH(x) ((x) << 5)
# define R600_DEPTH_PENDING_FREE(x) ((x) << 15)
# define R600_DEPTH_CACHELINE_FREE(x) ((x) << 20)
#define R700_DB_DEBUG3 0x98b0
# define R700_DB_CLK_OFF_DELAY(x) ((x) << 11)
#define RV700_DB_DEBUG4 0x9b8c
# define RV700_DISABLE_TILE_COVERED_FOR_PS_ITER (1 << 6)
#define R600_VGT_CACHE_INVALIDATION 0x88c4
# define R600_CACHE_INVALIDATION(x) ((x) << 0)
# define R600_VC_ONLY 0
# define R600_TC_ONLY 1
# define R600_VC_AND_TC 2
# define R700_AUTO_INVLD_EN(x) ((x) << 6)
# define R700_NO_AUTO 0
# define R700_ES_AUTO 1
# define R700_GS_AUTO 2
# define R700_ES_AND_GS_AUTO 3
#define R600_VGT_GS_PER_ES 0x88c8
#define R600_VGT_ES_PER_GS 0x88cc
#define R600_VGT_GS_PER_VS 0x88e8
#define R600_VGT_GS_VERTEX_REUSE 0x88d4
#define R600_VGT_NUM_INSTANCES 0x8974
#define R600_VGT_STRMOUT_EN 0x28ab0
#define R600_VGT_EVENT_INITIATOR 0x28a90
# define R600_CACHE_FLUSH_AND_INV_EVENT (0x16 << 0)
#define R600_VGT_VERTEX_REUSE_BLOCK_CNTL 0x28c58
# define R600_VTX_REUSE_DEPTH_MASK 0xff
#define R600_VGT_OUT_DEALLOC_CNTL 0x28c5c
# define R600_DEALLOC_DIST_MASK 0x7f
#define R600_CB_COLOR0_BASE 0x28040
#define R600_CB_COLOR1_BASE 0x28044
#define R600_CB_COLOR2_BASE 0x28048
#define R600_CB_COLOR3_BASE 0x2804c
#define R600_CB_COLOR4_BASE 0x28050
#define R600_CB_COLOR5_BASE 0x28054
#define R600_CB_COLOR6_BASE 0x28058
#define R600_CB_COLOR7_BASE 0x2805c
#define R600_CB_COLOR7_FRAG 0x280fc
#define R600_CB_COLOR0_SIZE 0x28060
#define R600_CB_COLOR0_VIEW 0x28080
#define R600_CB_COLOR0_INFO 0x280a0
#define R600_CB_COLOR0_TILE 0x280c0
#define R600_CB_COLOR0_FRAG 0x280e0
#define R600_CB_COLOR0_MASK 0x28100
#define AVIVO_D1MODE_VLINE_START_END 0x6538
#define AVIVO_D2MODE_VLINE_START_END 0x6d38
#define R600_CP_COHER_BASE 0x85f8
#define R600_DB_DEPTH_BASE 0x2800c
#define R600_SQ_PGM_START_FS 0x28894
#define R600_SQ_PGM_START_ES 0x28880
#define R600_SQ_PGM_START_VS 0x28858
#define R600_SQ_PGM_RESOURCES_VS 0x28868
#define R600_SQ_PGM_CF_OFFSET_VS 0x288d0
#define R600_SQ_PGM_START_GS 0x2886c
#define R600_SQ_PGM_START_PS 0x28840
#define R600_SQ_PGM_RESOURCES_PS 0x28850
#define R600_SQ_PGM_EXPORTS_PS 0x28854
#define R600_SQ_PGM_CF_OFFSET_PS 0x288cc
#define R600_VGT_DMA_BASE 0x287e8
#define R600_VGT_DMA_BASE_HI 0x287e4
#define R600_VGT_STRMOUT_BASE_OFFSET_0 0x28b10
#define R600_VGT_STRMOUT_BASE_OFFSET_1 0x28b14
#define R600_VGT_STRMOUT_BASE_OFFSET_2 0x28b18
#define R600_VGT_STRMOUT_BASE_OFFSET_3 0x28b1c
#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0 0x28b44
#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1 0x28b48
#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2 0x28b4c
#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3 0x28b50
#define R600_VGT_STRMOUT_BUFFER_BASE_0 0x28ad8
#define R600_VGT_STRMOUT_BUFFER_BASE_1 0x28ae8
#define R600_VGT_STRMOUT_BUFFER_BASE_2 0x28af8
#define R600_VGT_STRMOUT_BUFFER_BASE_3 0x28b08
#define R600_VGT_STRMOUT_BUFFER_OFFSET_0 0x28adc
#define R600_VGT_STRMOUT_BUFFER_OFFSET_1 0x28aec
#define R600_VGT_STRMOUT_BUFFER_OFFSET_2 0x28afc
#define R600_VGT_STRMOUT_BUFFER_OFFSET_3 0x28b0c
#define R600_VGT_PRIMITIVE_TYPE 0x8958
#define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030
#define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240
#define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204
#define R600_TC_CNTL 0x9608
# define R600_TC_L2_SIZE(x) ((x) << 5)
# define R600_L2_DISABLE_LATE_HIT (1 << 9)
#define R600_ARB_POP 0x2418
# define R600_ENABLE_TC128 (1 << 30)
#define R600_ARB_GDEC_RD_CNTL 0x246c
#define R600_TA_CNTL_AUX 0x9508
# define R600_DISABLE_CUBE_WRAP (1 << 0)
# define R600_DISABLE_CUBE_ANISO (1 << 1)
# define R700_GETLOD_SELECT(x) ((x) << 2)
# define R600_SYNC_GRADIENT (1 << 24)
# define R600_SYNC_WALKER (1 << 25)
# define R600_SYNC_ALIGNER (1 << 26)
# define R600_BILINEAR_PRECISION_6_BIT (0 << 31)
# define R600_BILINEAR_PRECISION_8_BIT (1 << 31)
#define R700_TCP_CNTL 0x9610
#define R600_SMX_DC_CTL0 0xa020
# define R700_USE_HASH_FUNCTION (1 << 0)
# define R700_CACHE_DEPTH(x) ((x) << 1)
# define R700_FLUSH_ALL_ON_EVENT (1 << 10)
# define R700_STALL_ON_EVENT (1 << 11)
#define R700_SMX_EVENT_CTL 0xa02c
# define R700_ES_FLUSH_CTL(x) ((x) << 0)
# define R700_GS_FLUSH_CTL(x) ((x) << 3)
# define R700_ACK_FLUSH_CTL(x) ((x) << 6)
# define R700_SYNC_FLUSH_CTL (1 << 8)
#define R600_SQ_CONFIG 0x8c00
# define R600_VC_ENABLE (1 << 0)
# define R600_EXPORT_SRC_C (1 << 1)
# define R600_DX9_CONSTS (1 << 2)
# define R600_ALU_INST_PREFER_VECTOR (1 << 3)
# define R600_DX10_CLAMP (1 << 4)
# define R600_CLAUSE_SEQ_PRIO(x) ((x) << 8)
# define R600_PS_PRIO(x) ((x) << 24)
# define R600_VS_PRIO(x) ((x) << 26)
# define R600_GS_PRIO(x) ((x) << 28)
# define R600_ES_PRIO(x) ((x) << 30)
#define R600_SQ_GPR_RESOURCE_MGMT_1 0x8c04
# define R600_NUM_PS_GPRS(x) ((x) << 0)
# define R600_NUM_VS_GPRS(x) ((x) << 16)
# define R700_DYN_GPR_ENABLE (1 << 27)
# define R600_NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28)
#define R600_SQ_GPR_RESOURCE_MGMT_2 0x8c08
# define R600_NUM_GS_GPRS(x) ((x) << 0)
# define R600_NUM_ES_GPRS(x) ((x) << 16)
#define R600_SQ_THREAD_RESOURCE_MGMT 0x8c0c
# define R600_NUM_PS_THREADS(x) ((x) << 0)
# define R600_NUM_VS_THREADS(x) ((x) << 8)
# define R600_NUM_GS_THREADS(x) ((x) << 16)
# define R600_NUM_ES_THREADS(x) ((x) << 24)
#define R600_SQ_STACK_RESOURCE_MGMT_1 0x8c10
# define R600_NUM_PS_STACK_ENTRIES(x) ((x) << 0)
# define R600_NUM_VS_STACK_ENTRIES(x) ((x) << 16)
#define R600_SQ_STACK_RESOURCE_MGMT_2 0x8c14
# define R600_NUM_GS_STACK_ENTRIES(x) ((x) << 0)
# define R600_NUM_ES_STACK_ENTRIES(x) ((x) << 16)
#define R600_SQ_MS_FIFO_SIZES 0x8cf0
# define R600_CACHE_FIFO_SIZE(x) ((x) << 0)
# define R600_FETCH_FIFO_HIWATER(x) ((x) << 8)
# define R600_DONE_FIFO_HIWATER(x) ((x) << 16)
# define R600_ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24)
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_0 0x8db0
# define R700_SIMDA_RING0(x) ((x) << 0)
# define R700_SIMDA_RING1(x) ((x) << 8)
# define R700_SIMDB_RING0(x) ((x) << 16)
# define R700_SIMDB_RING1(x) ((x) << 24)
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_1 0x8db4
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_2 0x8db8
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_3 0x8dbc
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_4 0x8dc0
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_5 0x8dc4
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_6 0x8dc8
#define R700_SQ_DYN_GPR_SIZE_SIMD_AB_7 0x8dcc
#define R600_SPI_PS_IN_CONTROL_0 0x286cc
# define R600_NUM_INTERP(x) ((x) << 0)
# define R600_POSITION_ENA (1 << 8)
# define R600_POSITION_CENTROID (1 << 9)
# define R600_POSITION_ADDR(x) ((x) << 10)
# define R600_PARAM_GEN(x) ((x) << 15)
# define R600_PARAM_GEN_ADDR(x) ((x) << 19)
# define R600_BARYC_SAMPLE_CNTL(x) ((x) << 26)
# define R600_PERSP_GRADIENT_ENA (1 << 28)
# define R600_LINEAR_GRADIENT_ENA (1 << 29)
# define R600_POSITION_SAMPLE (1 << 30)
# define R600_BARYC_AT_SAMPLE_ENA (1 << 31)
#define R600_SPI_PS_IN_CONTROL_1 0x286d0
# define R600_GEN_INDEX_PIX (1 << 0)
# define R600_GEN_INDEX_PIX_ADDR(x) ((x) << 1)
# define R600_FRONT_FACE_ENA (1 << 8)
# define R600_FRONT_FACE_CHAN(x) ((x) << 9)
# define R600_FRONT_FACE_ALL_BITS (1 << 11)
# define R600_FRONT_FACE_ADDR(x) ((x) << 12)
# define R600_FOG_ADDR(x) ((x) << 17)
# define R600_FIXED_PT_POSITION_ENA (1 << 24)
# define R600_FIXED_PT_POSITION_ADDR(x) ((x) << 25)
# define R700_POSITION_ULC (1 << 30)
#define R600_SPI_INPUT_Z 0x286d8
#define R600_SPI_CONFIG_CNTL 0x9100
# define R600_GPR_WRITE_PRIORITY(x) ((x) << 0)
# define R600_DISABLE_INTERP_1 (1 << 5)
#define R600_SPI_CONFIG_CNTL_1 0x913c
# define R600_VTX_DONE_DELAY(x) ((x) << 0)
# define R600_INTERP_ONE_PRIM_PER_ROW (1 << 4)
#define R600_GB_TILING_CONFIG 0x98f0
# define R600_PIPE_TILING(x) ((x) << 1)
# define R600_BANK_TILING(x) ((x) << 4)
# define R600_GROUP_SIZE(x) ((x) << 6)
# define R600_ROW_TILING(x) ((x) << 8)
# define R600_BANK_SWAPS(x) ((x) << 11)
# define R600_SAMPLE_SPLIT(x) ((x) << 14)
# define R600_BACKEND_MAP(x) ((x) << 16)
#define R600_DCP_TILING_CONFIG 0x6ca0
#define R600_HDP_TILING_CONFIG 0x2f3c
#define R600_CC_RB_BACKEND_DISABLE 0x98f4
#define R700_CC_SYS_RB_BACKEND_DISABLE 0x3f88
# define R600_BACKEND_DISABLE(x) ((x) << 16)
#define R600_CC_GC_SHADER_PIPE_CONFIG 0x8950
#define R600_GC_USER_SHADER_PIPE_CONFIG 0x8954
# define R600_INACTIVE_QD_PIPES(x) ((x) << 8)
# define R600_INACTIVE_QD_PIPES_MASK (0xff << 8)
# define R600_INACTIVE_SIMDS(x) ((x) << 16)
# define R600_INACTIVE_SIMDS_MASK (0xff << 16)
#define R700_CGTS_SYS_TCC_DISABLE 0x3f90
#define R700_CGTS_USER_SYS_TCC_DISABLE 0x3f94
#define R700_CGTS_TCC_DISABLE 0x9148
#define R700_CGTS_USER_TCC_DISABLE 0x914c
/* Constants */
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define RADEON_LAST_FRAME_REG RADEON_SCRATCH_REG0
#define RADEON_LAST_DISPATCH_REG RADEON_SCRATCH_REG1
#define RADEON_LAST_CLEAR_REG RADEON_SCRATCH_REG2
#define RADEON_LAST_SWI_REG RADEON_SCRATCH_REG3
#define RADEON_LAST_DISPATCH 1
#define R600_LAST_FRAME_REG R600_SCRATCH_REG0
#define R600_LAST_DISPATCH_REG R600_SCRATCH_REG1
#define R600_LAST_CLEAR_REG R600_SCRATCH_REG2
#define R600_LAST_SWI_REG R600_SCRATCH_REG3
#define RADEON_MAX_VB_AGE 0x7fffffff
#define RADEON_MAX_VB_VERTS (0xffff)
#define RADEON_RING_HIGH_MARK 128
#define RADEON_PCIGART_TABLE_SIZE (32*1024)
#define RADEON_READ(reg) DRM_READ32( dev_priv->mmio, (reg) )
#define RADEON_WRITE(reg, val) \
do { \
if (reg < 0x10000) { \
DRM_WRITE32(dev_priv->mmio, (reg), (val)); \
} else { \
DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, (reg)); \
DRM_WRITE32(dev_priv->mmio, RADEON_MM_DATA, (val)); \
} \
} while (0)
#define RADEON_READ8(reg) DRM_READ8( dev_priv->mmio, (reg) )
#define RADEON_WRITE8(reg,val) DRM_WRITE8( dev_priv->mmio, (reg), (val) )
#define RADEON_WRITE_PLL(addr, val) \
do { \
RADEON_WRITE8(RADEON_CLOCK_CNTL_INDEX, \
((addr) & 0x1f) | RADEON_PLL_WR_EN ); \
RADEON_WRITE(RADEON_CLOCK_CNTL_DATA, (val)); \
} while (0)
#define RADEON_WRITE_PCIE(addr, val) \
do { \
RADEON_WRITE8(RADEON_PCIE_INDEX, \
((addr) & 0xff)); \
RADEON_WRITE(RADEON_PCIE_DATA, (val)); \
} while (0)
#define R500_WRITE_MCIND(addr, val) \
do { \
RADEON_WRITE(R520_MC_IND_INDEX, 0xff0000 | ((addr) & 0xff)); \
RADEON_WRITE(R520_MC_IND_DATA, (val)); \
RADEON_WRITE(R520_MC_IND_INDEX, 0); \
} while (0)
#define RS480_WRITE_MCIND(addr, val) \
do { \
RADEON_WRITE(RS480_NB_MC_INDEX, \
((addr) & 0xff) | RS480_NB_MC_IND_WR_EN); \
RADEON_WRITE(RS480_NB_MC_DATA, (val)); \
RADEON_WRITE(RS480_NB_MC_INDEX, 0xff); \
} while (0)
#define RS690_WRITE_MCIND(addr, val) \
do { \
RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_WR_EN | ((addr) & RS690_MC_INDEX_MASK)); \
RADEON_WRITE(RS690_MC_DATA, val); \
RADEON_WRITE(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK); \
} while (0)
#define RS600_WRITE_MCIND(addr, val) \
do { \
RADEON_WRITE(RS600_MC_INDEX, RS600_MC_IND_WR_EN | RS600_MC_IND_CITF_ARB0 | ((addr) & RS600_MC_ADDR_MASK)); \
RADEON_WRITE(RS600_MC_DATA, val); \
} while (0)
#define IGP_WRITE_MCIND(addr, val) \
do { \
if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS690) || \
((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS740)) \
RS690_WRITE_MCIND(addr, val); \
else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS600) \
RS600_WRITE_MCIND(addr, val); \
else \
RS480_WRITE_MCIND(addr, val); \
} while (0)
#define CP_PACKET0( reg, n ) \
(RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2))
#define CP_PACKET0_TABLE( reg, n ) \
(RADEON_CP_PACKET0 | RADEON_ONE_REG_WR | ((n) << 16) | ((reg) >> 2))
#define CP_PACKET1( reg0, reg1 ) \
(RADEON_CP_PACKET1 | (((reg1) >> 2) << 15) | ((reg0) >> 2))
#define CP_PACKET2() \
(RADEON_CP_PACKET2)
#define CP_PACKET3( pkt, n ) \
(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
/* ================================================================
* Engine control helper macros
*/
#define RADEON_WAIT_UNTIL_2D_IDLE() do { \
OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \
OUT_RING( (RADEON_WAIT_2D_IDLECLEAN | \
RADEON_WAIT_HOST_IDLECLEAN) ); \
} while (0)
#define RADEON_WAIT_UNTIL_3D_IDLE() do { \
OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \
OUT_RING( (RADEON_WAIT_3D_IDLECLEAN | \
RADEON_WAIT_HOST_IDLECLEAN) ); \
} while (0)
#define RADEON_WAIT_UNTIL_IDLE() do { \
OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \
OUT_RING( (RADEON_WAIT_2D_IDLECLEAN | \
RADEON_WAIT_3D_IDLECLEAN | \
RADEON_WAIT_HOST_IDLECLEAN) ); \
} while (0)
#define RADEON_WAIT_UNTIL_PAGE_FLIPPED() do { \
OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) ); \
OUT_RING( RADEON_WAIT_CRTC_PFLIP ); \
} while (0)
#define RADEON_FLUSH_CACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
OUT_RING(RADEON_RB3D_DC_FLUSH); \
} else { \
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
OUT_RING(R300_RB3D_DC_FLUSH); \
} \
} while (0)
#define RADEON_PURGE_CACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
OUT_RING(RADEON_RB3D_DC_FLUSH | RADEON_RB3D_DC_FREE); \
} else { \
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
OUT_RING(R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); \
} \
} while (0)
#define RADEON_FLUSH_ZCACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
OUT_RING(RADEON_RB3D_ZC_FLUSH); \
} else { \
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); \
OUT_RING(R300_ZC_FLUSH); \
} \
} while (0)
#define RADEON_PURGE_ZCACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
OUT_RING(RADEON_RB3D_ZC_FLUSH | RADEON_RB3D_ZC_FREE); \
} else { \
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); \
OUT_RING(R300_ZC_FLUSH | R300_ZC_FREE); \
} \
} while (0)
/* ================================================================
* Misc helper macros
*/
/* Perfbox functionality only.
*/
#define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \
do { \
if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) { \
u32 head = GET_RING_HEAD( dev_priv ); \
if (head == dev_priv->ring.tail) \
dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE; \
} \
} while (0)
#define VB_AGE_TEST_WITH_RETURN( dev_priv ) \
do { \
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv; \
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv; \
if ( sarea_priv->last_dispatch >= RADEON_MAX_VB_AGE ) { \
int __ret; \
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) \
__ret = r600_do_cp_idle(dev_priv); \
else \
__ret = radeon_do_cp_idle(dev_priv); \
if ( __ret ) return __ret; \
sarea_priv->last_dispatch = 0; \
radeon_freelist_reset( dev ); \
} \
} while (0)
#define RADEON_DISPATCH_AGE( age ) do { \
OUT_RING( CP_PACKET0( RADEON_LAST_DISPATCH_REG, 0 ) ); \
OUT_RING( age ); \
} while (0)
#define RADEON_FRAME_AGE( age ) do { \
OUT_RING( CP_PACKET0( RADEON_LAST_FRAME_REG, 0 ) ); \
OUT_RING( age ); \
} while (0)
#define RADEON_CLEAR_AGE( age ) do { \
OUT_RING( CP_PACKET0( RADEON_LAST_CLEAR_REG, 0 ) ); \
OUT_RING( age ); \
} while (0)
#define R600_DISPATCH_AGE(age) do { \
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); \
OUT_RING((R600_LAST_DISPATCH_REG - R600_SET_CONFIG_REG_OFFSET) >> 2); \
OUT_RING(age); \
} while (0)
#define R600_FRAME_AGE(age) do { \
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); \
OUT_RING((R600_LAST_FRAME_REG - R600_SET_CONFIG_REG_OFFSET) >> 2); \
OUT_RING(age); \
} while (0)
#define R600_CLEAR_AGE(age) do { \
OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); \
OUT_RING((R600_LAST_CLEAR_REG - R600_SET_CONFIG_REG_OFFSET) >> 2); \
OUT_RING(age); \
} while (0)
/* ================================================================
* Ring control
*/
#define RADEON_VERBOSE 0
#define RING_LOCALS int write, _nr, _align_nr; unsigned int mask; u32 *ring;
#define RADEON_RING_ALIGN 16
#define BEGIN_RING( n ) do { \
if ( RADEON_VERBOSE ) { \
DRM_INFO( "BEGIN_RING( %d )\n", (n)); \
} \
_align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1)); \
_align_nr += n; \
if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) { \
COMMIT_RING(); \
radeon_wait_ring( dev_priv, _align_nr * sizeof(u32)); \
} \
_nr = n; dev_priv->ring.space -= (n) * sizeof(u32); \
ring = dev_priv->ring.start; \
write = dev_priv->ring.tail; \
mask = dev_priv->ring.tail_mask; \
} while (0)
#define ADVANCE_RING() do { \
if ( RADEON_VERBOSE ) { \
DRM_INFO( "ADVANCE_RING() wr=0x%06x tail=0x%06x\n", \
write, dev_priv->ring.tail ); \
} \
if (((dev_priv->ring.tail + _nr) & mask) != write) { \
DRM_ERROR( \
"ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \
((dev_priv->ring.tail + _nr) & mask), \
write, __LINE__); \
} else \
dev_priv->ring.tail = write; \
} while (0)
extern void radeon_commit_ring(drm_radeon_private_t *dev_priv);
#define COMMIT_RING() do { \
radeon_commit_ring(dev_priv); \
} while(0)
#define OUT_RING( x ) do { \
if ( RADEON_VERBOSE ) { \
DRM_INFO( " OUT_RING( 0x%08x ) at 0x%x\n", \
(unsigned int)(x), write ); \
} \
ring[write++] = (x); \
write &= mask; \
} while (0)
#define OUT_RING_REG( reg, val ) do { \
OUT_RING( CP_PACKET0( reg, 0 ) ); \
OUT_RING( val ); \
} while (0)
#define OUT_RING_TABLE( tab, sz ) do { \
int _size = (sz); \
int *_tab = (int *)(tab); \
\
if (write + _size > mask) { \
int _i = (mask+1) - write; \
_size -= _i; \
while (_i > 0 ) { \
*(int *)(ring + write) = *_tab++; \
write++; \
_i--; \
} \
write = 0; \
_tab += _i; \
} \
while (_size > 0) { \
*(ring + write) = *_tab++; \
write++; \
_size--; \
} \
write &= mask; \
} while (0)
/**
* Copy given number of dwords from drm buffer to the ring buffer.
*/
#define OUT_RING_DRM_BUFFER(buf, sz) do { \
int _size = (sz) * 4; \
struct drm_buffer *_buf = (buf); \
int _part_size; \
while (_size > 0) { \
_part_size = _size; \
\
if (write + _part_size/4 > mask) \
_part_size = ((mask + 1) - write)*4; \
\
if (drm_buffer_index(_buf) + _part_size > PAGE_SIZE) \
_part_size = PAGE_SIZE - drm_buffer_index(_buf);\
\
\
\
memcpy(ring + write, &_buf->data[drm_buffer_page(_buf)] \
[drm_buffer_index(_buf)], _part_size); \
\
_size -= _part_size; \
write = (write + _part_size/4) & mask; \
drm_buffer_advance(_buf, _part_size); \
} \
} while (0)
#endif /* CONFIG_DRM_RADEON_UMS */
#endif /* __RADEON_DRV_H__ */ #endif /* __RADEON_DRV_H__ */
/* radeon_irq.c -- IRQ handling for radeon -*- linux-c -*- */
/*
* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
*
* The Weather Channel (TM) funded Tungsten Graphics to develop the
* initial release of the Radeon 8500 driver under the XFree86 license.
* This notice must be preserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Michel D�zer <michel@daenzer.net>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
void radeon_irq_set_state(struct drm_device *dev, u32 mask, int state)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if (state)
dev_priv->irq_enable_reg |= mask;
else
dev_priv->irq_enable_reg &= ~mask;
if (dev->irq_enabled)
RADEON_WRITE(RADEON_GEN_INT_CNTL, dev_priv->irq_enable_reg);
}
static void r500_vbl_irq_set_state(struct drm_device *dev, u32 mask, int state)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if (state)
dev_priv->r500_disp_irq_reg |= mask;
else
dev_priv->r500_disp_irq_reg &= ~mask;
if (dev->irq_enabled)
RADEON_WRITE(R500_DxMODE_INT_MASK, dev_priv->r500_disp_irq_reg);
}
int radeon_enable_vblank(struct drm_device *dev, unsigned int pipe)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600) {
switch (pipe) {
case 0:
r500_vbl_irq_set_state(dev, R500_D1MODE_INT_MASK, 1);
break;
case 1:
r500_vbl_irq_set_state(dev, R500_D2MODE_INT_MASK, 1);
break;
default:
DRM_ERROR("tried to enable vblank on non-existent crtc %u\n",
pipe);
return -EINVAL;
}
} else {
switch (pipe) {
case 0:
radeon_irq_set_state(dev, RADEON_CRTC_VBLANK_MASK, 1);
break;
case 1:
radeon_irq_set_state(dev, RADEON_CRTC2_VBLANK_MASK, 1);
break;
default:
DRM_ERROR("tried to enable vblank on non-existent crtc %u\n",
pipe);
return -EINVAL;
}
}
return 0;
}
void radeon_disable_vblank(struct drm_device *dev, unsigned int pipe)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600) {
switch (pipe) {
case 0:
r500_vbl_irq_set_state(dev, R500_D1MODE_INT_MASK, 0);
break;
case 1:
r500_vbl_irq_set_state(dev, R500_D2MODE_INT_MASK, 0);
break;
default:
DRM_ERROR("tried to enable vblank on non-existent crtc %u\n",
pipe);
break;
}
} else {
switch (pipe) {
case 0:
radeon_irq_set_state(dev, RADEON_CRTC_VBLANK_MASK, 0);
break;
case 1:
radeon_irq_set_state(dev, RADEON_CRTC2_VBLANK_MASK, 0);
break;
default:
DRM_ERROR("tried to enable vblank on non-existent crtc %u\n",
pipe);
break;
}
}
}
static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
{
u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
u32 irq_mask = RADEON_SW_INT_TEST;
*r500_disp_int = 0;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600) {
/* vbl interrupts in a different place */
if (irqs & R500_DISPLAY_INT_STATUS) {
/* if a display interrupt */
u32 disp_irq;
disp_irq = RADEON_READ(R500_DISP_INTERRUPT_STATUS);
*r500_disp_int = disp_irq;
if (disp_irq & R500_D1_VBLANK_INTERRUPT)
RADEON_WRITE(R500_D1MODE_VBLANK_STATUS, R500_VBLANK_ACK);
if (disp_irq & R500_D2_VBLANK_INTERRUPT)
RADEON_WRITE(R500_D2MODE_VBLANK_STATUS, R500_VBLANK_ACK);
}
irq_mask |= R500_DISPLAY_INT_STATUS;
} else
irq_mask |= RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT;
irqs &= irq_mask;
if (irqs)
RADEON_WRITE(RADEON_GEN_INT_STATUS, irqs);
return irqs;
}
/* Interrupts - Used for device synchronization and flushing in the
* following circumstances:
*
* - Exclusive FB access with hw idle:
* - Wait for GUI Idle (?) interrupt, then do normal flush.
*
* - Frame throttling, NV_fence:
* - Drop marker irq's into command stream ahead of time.
* - Wait on irq's with lock *not held*
* - Check each for termination condition
*
* - Internally in cp_getbuffer, etc:
* - as above, but wait with lock held???
*
* NOTE: These functions are misleadingly named -- the irq's aren't
* tied to dma at all, this is just a hangover from dri prehistory.
*/
irqreturn_t radeon_driver_irq_handler(int irq, void *arg)
{
struct drm_device *dev = (struct drm_device *) arg;
drm_radeon_private_t *dev_priv =
(drm_radeon_private_t *) dev->dev_private;
u32 stat;
u32 r500_disp_int;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return IRQ_NONE;
/* Only consider the bits we're interested in - others could be used
* outside the DRM
*/
stat = radeon_acknowledge_irqs(dev_priv, &r500_disp_int);
if (!stat)
return IRQ_NONE;
stat &= dev_priv->irq_enable_reg;
/* SW interrupt */
if (stat & RADEON_SW_INT_TEST)
wake_up(&dev_priv->swi_queue);
/* VBLANK interrupt */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600) {
if (r500_disp_int & R500_D1_VBLANK_INTERRUPT)
drm_handle_vblank(dev, 0);
if (r500_disp_int & R500_D2_VBLANK_INTERRUPT)
drm_handle_vblank(dev, 1);
} else {
if (stat & RADEON_CRTC_VBLANK_STAT)
drm_handle_vblank(dev, 0);
if (stat & RADEON_CRTC2_VBLANK_STAT)
drm_handle_vblank(dev, 1);
}
return IRQ_HANDLED;
}
static int radeon_emit_irq(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
unsigned int ret;
RING_LOCALS;
atomic_inc(&dev_priv->swi_emitted);
ret = atomic_read(&dev_priv->swi_emitted);
BEGIN_RING(4);
OUT_RING_REG(RADEON_LAST_SWI_REG, ret);
OUT_RING_REG(RADEON_GEN_INT_STATUS, RADEON_SW_INT_FIRE);
ADVANCE_RING();
COMMIT_RING();
return ret;
}
static int radeon_wait_irq(struct drm_device * dev, int swi_nr)
{
drm_radeon_private_t *dev_priv =
(drm_radeon_private_t *) dev->dev_private;
int ret = 0;
if (RADEON_READ(RADEON_LAST_SWI_REG) >= swi_nr)
return 0;
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
DRM_WAIT_ON(ret, dev_priv->swi_queue, 3 * HZ,
RADEON_READ(RADEON_LAST_SWI_REG) >= swi_nr);
return ret;
}
u32 radeon_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
if (pipe > 1) {
DRM_ERROR("Invalid crtc %u\n", pipe);
return -EINVAL;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600) {
if (pipe == 0)
return RADEON_READ(R500_D1CRTC_FRAME_COUNT);
else
return RADEON_READ(R500_D2CRTC_FRAME_COUNT);
} else {
if (pipe == 0)
return RADEON_READ(RADEON_CRTC_CRNT_FRAME);
else
return RADEON_READ(RADEON_CRTC2_CRNT_FRAME);
}
}
/* Needs the lock as it touches the ring.
*/
int radeon_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_irq_emit_t *emit = data;
int result;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return -EINVAL;
LOCK_TEST_WITH_RETURN(dev, file_priv);
result = radeon_emit_irq(dev);
if (copy_to_user(emit->irq_seq, &result, sizeof(int))) {
DRM_ERROR("copy_to_user\n");
return -EFAULT;
}
return 0;
}
/* Doesn't need the hardware lock.
*/
int radeon_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_irq_wait_t *irqwait = data;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return -EINVAL;
return radeon_wait_irq(dev, irqwait->irq_seq);
}
/* drm_dma.h hooks
*/
void radeon_driver_irq_preinstall(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv =
(drm_radeon_private_t *) dev->dev_private;
u32 dummy;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return;
/* Disable *all* interrupts */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600)
RADEON_WRITE(R500_DxMODE_INT_MASK, 0);
RADEON_WRITE(RADEON_GEN_INT_CNTL, 0);
/* Clear bits if they're already high */
radeon_acknowledge_irqs(dev_priv, &dummy);
}
int radeon_driver_irq_postinstall(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv =
(drm_radeon_private_t *) dev->dev_private;
atomic_set(&dev_priv->swi_emitted, 0);
init_waitqueue_head(&dev_priv->swi_queue);
dev->max_vblank_count = 0x001fffff;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return 0;
radeon_irq_set_state(dev, RADEON_SW_INT_ENABLE, 1);
return 0;
}
void radeon_driver_irq_uninstall(struct drm_device * dev)
{
drm_radeon_private_t *dev_priv =
(drm_radeon_private_t *) dev->dev_private;
if (!dev_priv)
return;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
return;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RS600)
RADEON_WRITE(R500_DxMODE_INT_MASK, 0);
/* Disable *all* interrupts */
RADEON_WRITE(RADEON_GEN_INT_CNTL, 0);
}
int radeon_vblank_crtc_get(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private;
return dev_priv->vblank_crtc;
}
int radeon_vblank_crtc_set(struct drm_device *dev, int64_t value)
{
drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private;
if (value & ~(DRM_RADEON_VBLANK_CRTC1 | DRM_RADEON_VBLANK_CRTC2)) {
DRM_ERROR("called with invalid crtc 0x%x\n", (unsigned int)value);
return -EINVAL;
}
dev_priv->vblank_crtc = (unsigned int)value;
return 0;
}
/* radeon_mem.c -- Simple GART/fb memory manager for radeon -*- linux-c -*- */
/*
* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
*
* The Weather Channel (TM) funded Tungsten Graphics to develop the
* initial release of the Radeon 8500 driver under the XFree86 license.
* This notice must be preserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
/* Very simple allocator for GART memory, working on a static range
* already mapped into each client's address space.
*/
static struct mem_block *split_block(struct mem_block *p, int start, int size,
struct drm_file *file_priv)
{
/* Maybe cut off the start of an existing block */
if (start > p->start) {
struct mem_block *newblock = kmalloc(sizeof(*newblock),
GFP_KERNEL);
if (!newblock)
goto out;
newblock->start = start;
newblock->size = p->size - (start - p->start);
newblock->file_priv = NULL;
newblock->next = p->next;
newblock->prev = p;
p->next->prev = newblock;
p->next = newblock;
p->size -= newblock->size;
p = newblock;
}
/* Maybe cut off the end of an existing block */
if (size < p->size) {
struct mem_block *newblock = kmalloc(sizeof(*newblock),
GFP_KERNEL);
if (!newblock)
goto out;
newblock->start = start + size;
newblock->size = p->size - size;
newblock->file_priv = NULL;
newblock->next = p->next;
newblock->prev = p;
p->next->prev = newblock;
p->next = newblock;
p->size = size;
}
out:
/* Our block is in the middle */
p->file_priv = file_priv;
return p;
}
static struct mem_block *alloc_block(struct mem_block *heap, int size,
int align2, struct drm_file *file_priv)
{
struct mem_block *p;
int mask = (1 << align2) - 1;
list_for_each(p, heap) {
int start = (p->start + mask) & ~mask;
if (p->file_priv == NULL && start + size <= p->start + p->size)
return split_block(p, start, size, file_priv);
}
return NULL;
}
static struct mem_block *find_block(struct mem_block *heap, int start)
{
struct mem_block *p;
list_for_each(p, heap)
if (p->start == start)
return p;
return NULL;
}
static void free_block(struct mem_block *p)
{
p->file_priv = NULL;
/* Assumes a single contiguous range. Needs a special file_priv in
* 'heap' to stop it being subsumed.
*/
if (p->next->file_priv == NULL) {
struct mem_block *q = p->next;
p->size += q->size;
p->next = q->next;
p->next->prev = p;
kfree(q);
}
if (p->prev->file_priv == NULL) {
struct mem_block *q = p->prev;
q->size += p->size;
q->next = p->next;
q->next->prev = q;
kfree(p);
}
}
/* Initialize. How to check for an uninitialized heap?
*/
static int init_heap(struct mem_block **heap, int start, int size)
{
struct mem_block *blocks = kmalloc(sizeof(*blocks), GFP_KERNEL);
if (!blocks)
return -ENOMEM;
*heap = kzalloc(sizeof(**heap), GFP_KERNEL);
if (!*heap) {
kfree(blocks);
return -ENOMEM;
}
blocks->start = start;
blocks->size = size;
blocks->file_priv = NULL;
blocks->next = blocks->prev = *heap;
(*heap)->file_priv = (struct drm_file *) - 1;
(*heap)->next = (*heap)->prev = blocks;
return 0;
}
/* Free all blocks associated with the releasing file.
*/
void radeon_mem_release(struct drm_file *file_priv, struct mem_block *heap)
{
struct mem_block *p;
if (!heap || !heap->next)
return;
list_for_each(p, heap) {
if (p->file_priv == file_priv)
p->file_priv = NULL;
}
/* Assumes a single contiguous range. Needs a special file_priv in
* 'heap' to stop it being subsumed.
*/
list_for_each(p, heap) {
while (p->file_priv == NULL && p->next->file_priv == NULL) {
struct mem_block *q = p->next;
p->size += q->size;
p->next = q->next;
p->next->prev = p;
kfree(q);
}
}
}
/* Shutdown.
*/
void radeon_mem_takedown(struct mem_block **heap)
{
struct mem_block *p;
if (!*heap)
return;
for (p = (*heap)->next; p != *heap;) {
struct mem_block *q = p;
p = p->next;
kfree(q);
}
kfree(*heap);
*heap = NULL;
}
/* IOCTL HANDLERS */
static struct mem_block **get_heap(drm_radeon_private_t * dev_priv, int region)
{
switch (region) {
case RADEON_MEM_REGION_GART:
return &dev_priv->gart_heap;
case RADEON_MEM_REGION_FB:
return &dev_priv->fb_heap;
default:
return NULL;
}
}
int radeon_mem_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_mem_alloc_t *alloc = data;
struct mem_block *block, **heap;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
heap = get_heap(dev_priv, alloc->region);
if (!heap || !*heap)
return -EFAULT;
/* Make things easier on ourselves: all allocations at least
* 4k aligned.
*/
if (alloc->alignment < 12)
alloc->alignment = 12;
block = alloc_block(*heap, alloc->size, alloc->alignment, file_priv);
if (!block)
return -ENOMEM;
if (copy_to_user(alloc->region_offset, &block->start,
sizeof(int))) {
DRM_ERROR("copy_to_user\n");
return -EFAULT;
}
return 0;
}
int radeon_mem_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_mem_free_t *memfree = data;
struct mem_block *block, **heap;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
heap = get_heap(dev_priv, memfree->region);
if (!heap || !*heap)
return -EFAULT;
block = find_block(*heap, memfree->region_offset);
if (!block)
return -EFAULT;
if (block->file_priv != file_priv)
return -EPERM;
free_block(block);
return 0;
}
int radeon_mem_init_heap(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_mem_init_heap_t *initheap = data;
struct mem_block **heap;
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
heap = get_heap(dev_priv, initheap->region);
if (!heap)
return -EFAULT;
if (*heap) {
DRM_ERROR("heap already initialized?");
return -EFAULT;
}
return init_heap(heap, initheap->start, initheap->size);
}
/* radeon_state.c -- State support for Radeon -*- linux-c -*- */
/*
* Copyright 2000 VA Linux Systems, Inc., Fremont, California.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Gareth Hughes <gareth@valinux.com>
* Kevin E. Martin <martin@valinux.com>
*
* ------------------------ This file is DEPRECATED! -------------------------
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
#include "radeon_drv.h"
#include "drm_buffer.h"
/* ================================================================
* Helper functions for client state checking and fixup
*/
static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
dev_priv,
struct drm_file * file_priv,
u32 *offset)
{
u64 off = *offset;
u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
struct drm_radeon_driver_file_fields *radeon_priv;
/* Hrm ... the story of the offset ... So this function converts
* the various ideas of what userland clients might have for an
* offset in the card address space into an offset into the card
* address space :) So with a sane client, it should just keep
* the value intact and just do some boundary checking. However,
* not all clients are sane. Some older clients pass us 0 based
* offsets relative to the start of the framebuffer and some may
* assume the AGP aperture it appended to the framebuffer, so we
* try to detect those cases and fix them up.
*
* Note: It might be a good idea here to make sure the offset lands
* in some "allowed" area to protect things like the PCIE GART...
*/
/* First, the best case, the offset already lands in either the
* framebuffer or the GART mapped space
*/
if (radeon_check_offset(dev_priv, off))
return 0;
/* Ok, that didn't happen... now check if we have a zero based
* offset that fits in the framebuffer + gart space, apply the
* magic offset we get from SETPARAM or calculated from fb_location
*/
if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
radeon_priv = file_priv->driver_priv;
off += radeon_priv->radeon_fb_delta;
}
/* Finally, assume we aimed at a GART offset if beyond the fb */
if (off > fb_end)
off = off - fb_end - 1 + dev_priv->gart_vm_start;
/* Now recheck and fail if out of bounds */
if (radeon_check_offset(dev_priv, off)) {
DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
*offset = off;
return 0;
}
return -EINVAL;
}
static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
dev_priv,
struct drm_file *file_priv,
int id, struct drm_buffer *buf)
{
u32 *data;
switch (id) {
case RADEON_EMIT_PP_MISC:
data = drm_buffer_pointer_to_dword(buf,
(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
DRM_ERROR("Invalid depth buffer offset\n");
return -EINVAL;
}
dev_priv->have_z_offset = 1;
break;
case RADEON_EMIT_PP_CNTL:
data = drm_buffer_pointer_to_dword(buf,
(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
DRM_ERROR("Invalid colour buffer offset\n");
return -EINVAL;
}
break;
case R200_EMIT_PP_TXOFFSET_0:
case R200_EMIT_PP_TXOFFSET_1:
case R200_EMIT_PP_TXOFFSET_2:
case R200_EMIT_PP_TXOFFSET_3:
case R200_EMIT_PP_TXOFFSET_4:
case R200_EMIT_PP_TXOFFSET_5:
data = drm_buffer_pointer_to_dword(buf, 0);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
DRM_ERROR("Invalid R200 texture offset\n");
return -EINVAL;
}
break;
case RADEON_EMIT_PP_TXFILTER_0:
case RADEON_EMIT_PP_TXFILTER_1:
case RADEON_EMIT_PP_TXFILTER_2:
data = drm_buffer_pointer_to_dword(buf,
(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
DRM_ERROR("Invalid R100 texture offset\n");
return -EINVAL;
}
break;
case R200_EMIT_PP_CUBIC_OFFSETS_0:
case R200_EMIT_PP_CUBIC_OFFSETS_1:
case R200_EMIT_PP_CUBIC_OFFSETS_2:
case R200_EMIT_PP_CUBIC_OFFSETS_3:
case R200_EMIT_PP_CUBIC_OFFSETS_4:
case R200_EMIT_PP_CUBIC_OFFSETS_5:{
int i;
for (i = 0; i < 5; i++) {
data = drm_buffer_pointer_to_dword(buf, i);
if (radeon_check_and_fixup_offset(dev_priv,
file_priv,
data)) {
DRM_ERROR
("Invalid R200 cubic texture offset\n");
return -EINVAL;
}
}
break;
}
case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
int i;
for (i = 0; i < 5; i++) {
data = drm_buffer_pointer_to_dword(buf, i);
if (radeon_check_and_fixup_offset(dev_priv,
file_priv,
data)) {
DRM_ERROR
("Invalid R100 cubic texture offset\n");
return -EINVAL;
}
}
}
break;
case R200_EMIT_VAP_CTL:{
RING_LOCALS;
BEGIN_RING(2);
OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
ADVANCE_RING();
}
break;
case RADEON_EMIT_RB3D_COLORPITCH:
case RADEON_EMIT_RE_LINE_PATTERN:
case RADEON_EMIT_SE_LINE_WIDTH:
case RADEON_EMIT_PP_LUM_MATRIX:
case RADEON_EMIT_PP_ROT_MATRIX_0:
case RADEON_EMIT_RB3D_STENCILREFMASK:
case RADEON_EMIT_SE_VPORT_XSCALE:
case RADEON_EMIT_SE_CNTL:
case RADEON_EMIT_SE_CNTL_STATUS:
case RADEON_EMIT_RE_MISC:
case RADEON_EMIT_PP_BORDER_COLOR_0:
case RADEON_EMIT_PP_BORDER_COLOR_1:
case RADEON_EMIT_PP_BORDER_COLOR_2:
case RADEON_EMIT_SE_ZBIAS_FACTOR:
case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
case R200_EMIT_PP_TXCBLEND_0:
case R200_EMIT_PP_TXCBLEND_1:
case R200_EMIT_PP_TXCBLEND_2:
case R200_EMIT_PP_TXCBLEND_3:
case R200_EMIT_PP_TXCBLEND_4:
case R200_EMIT_PP_TXCBLEND_5:
case R200_EMIT_PP_TXCBLEND_6:
case R200_EMIT_PP_TXCBLEND_7:
case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
case R200_EMIT_TFACTOR_0:
case R200_EMIT_VTX_FMT_0:
case R200_EMIT_MATRIX_SELECT_0:
case R200_EMIT_TEX_PROC_CTL_2:
case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
case R200_EMIT_PP_TXFILTER_0:
case R200_EMIT_PP_TXFILTER_1:
case R200_EMIT_PP_TXFILTER_2:
case R200_EMIT_PP_TXFILTER_3:
case R200_EMIT_PP_TXFILTER_4:
case R200_EMIT_PP_TXFILTER_5:
case R200_EMIT_VTE_CNTL:
case R200_EMIT_OUTPUT_VTX_COMP_SEL:
case R200_EMIT_PP_TAM_DEBUG3:
case R200_EMIT_PP_CNTL_X:
case R200_EMIT_RB3D_DEPTHXY_OFFSET:
case R200_EMIT_RE_AUX_SCISSOR_CNTL:
case R200_EMIT_RE_SCISSOR_TL_0:
case R200_EMIT_RE_SCISSOR_TL_1:
case R200_EMIT_RE_SCISSOR_TL_2:
case R200_EMIT_SE_VAP_CNTL_STATUS:
case R200_EMIT_SE_VTX_STATE_CNTL:
case R200_EMIT_RE_POINTSIZE:
case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
case R200_EMIT_PP_CUBIC_FACES_0:
case R200_EMIT_PP_CUBIC_FACES_1:
case R200_EMIT_PP_CUBIC_FACES_2:
case R200_EMIT_PP_CUBIC_FACES_3:
case R200_EMIT_PP_CUBIC_FACES_4:
case R200_EMIT_PP_CUBIC_FACES_5:
case RADEON_EMIT_PP_TEX_SIZE_0:
case RADEON_EMIT_PP_TEX_SIZE_1:
case RADEON_EMIT_PP_TEX_SIZE_2:
case R200_EMIT_RB3D_BLENDCOLOR:
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
case RADEON_EMIT_PP_CUBIC_FACES_0:
case RADEON_EMIT_PP_CUBIC_FACES_1:
case RADEON_EMIT_PP_CUBIC_FACES_2:
case R200_EMIT_PP_TRI_PERF_CNTL:
case R200_EMIT_PP_AFS_0:
case R200_EMIT_PP_AFS_1:
case R200_EMIT_ATF_TFACTOR:
case R200_EMIT_PP_TXCTLALL_0:
case R200_EMIT_PP_TXCTLALL_1:
case R200_EMIT_PP_TXCTLALL_2:
case R200_EMIT_PP_TXCTLALL_3:
case R200_EMIT_PP_TXCTLALL_4:
case R200_EMIT_PP_TXCTLALL_5:
case R200_EMIT_VAP_PVS_CNTL:
/* These packets don't contain memory offsets */
break;
default:
DRM_ERROR("Unknown state packet ID %d\n", id);
return -EINVAL;
}
return 0;
}
static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
dev_priv,
struct drm_file *file_priv,
drm_radeon_kcmd_buffer_t *
cmdbuf,
unsigned int *cmdsz)
{
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
u32 offset, narrays;
int count, i, k;
count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
*cmdsz = 2 + count;
if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
DRM_ERROR("Not a type 3 packet\n");
return -EINVAL;
}
if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
DRM_ERROR("Packet size larger than size of data provided\n");
return -EINVAL;
}
switch (*cmd & 0xff00) {
/* XXX Are there old drivers needing other packets? */
case RADEON_3D_DRAW_IMMD:
case RADEON_3D_DRAW_VBUF:
case RADEON_3D_DRAW_INDX:
case RADEON_WAIT_FOR_IDLE:
case RADEON_CP_NOP:
case RADEON_3D_CLEAR_ZMASK:
/* case RADEON_CP_NEXT_CHAR:
case RADEON_CP_PLY_NEXTSCAN:
case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
/* these packets are safe */
break;
case RADEON_CP_3D_DRAW_IMMD_2:
case RADEON_CP_3D_DRAW_VBUF_2:
case RADEON_CP_3D_DRAW_INDX_2:
case RADEON_3D_CLEAR_HIZ:
/* safe but r200 only */
if (dev_priv->microcode_version != UCODE_R200) {
DRM_ERROR("Invalid 3d packet for r100-class chip\n");
return -EINVAL;
}
break;
case RADEON_3D_LOAD_VBPNTR:
if (count > 18) { /* 12 arrays max */
DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
count);
return -EINVAL;
}
/* carefully check packet contents */
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
narrays = *cmd & ~0xc000;
k = 0;
i = 2;
while ((k < narrays) && (i < (count + 2))) {
i++; /* skip attribute field */
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
cmd)) {
DRM_ERROR
("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
k, i);
return -EINVAL;
}
k++;
i++;
if (k == narrays)
break;
/* have one more to process, they come in pairs */
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
if (radeon_check_and_fixup_offset(dev_priv,
file_priv, cmd))
{
DRM_ERROR
("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
k, i);
return -EINVAL;
}
k++;
i++;
}
/* do the counts match what we expect ? */
if ((k != narrays) || (i != (count + 2))) {
DRM_ERROR
("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
k, i, narrays, count + 1);
return -EINVAL;
}
break;
case RADEON_3D_RNDR_GEN_INDX_PRIM:
if (dev_priv->microcode_version != UCODE_R100) {
DRM_ERROR("Invalid 3d packet for r200-class chip\n");
return -EINVAL;
}
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
DRM_ERROR("Invalid rndr_gen_indx offset\n");
return -EINVAL;
}
break;
case RADEON_CP_INDX_BUFFER:
if (dev_priv->microcode_version != UCODE_R200) {
DRM_ERROR("Invalid 3d packet for r100-class chip\n");
return -EINVAL;
}
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
if ((*cmd & 0x8000ffff) != 0x80000810) {
DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
return -EINVAL;
}
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
return -EINVAL;
}
break;
case RADEON_CNTL_HOSTDATA_BLT:
case RADEON_CNTL_PAINT_MULTI:
case RADEON_CNTL_BITBLT_MULTI:
/* MSB of opcode: next DWORD GUI_CNTL */
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
| RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
offset = *cmd2 << 10;
if (radeon_check_and_fixup_offset
(dev_priv, file_priv, &offset)) {
DRM_ERROR("Invalid first packet offset\n");
return -EINVAL;
}
*cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
}
if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
(*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
offset = *cmd3 << 10;
if (radeon_check_and_fixup_offset
(dev_priv, file_priv, &offset)) {
DRM_ERROR("Invalid second packet offset\n");
return -EINVAL;
}
*cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
}
break;
default:
DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
return -EINVAL;
}
return 0;
}
/* ================================================================
* CP hardware state programming functions
*/
static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
struct drm_clip_rect * box)
{
RING_LOCALS;
DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
box->x1, box->y1, box->x2, box->y2);
BEGIN_RING(4);
OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
OUT_RING((box->y1 << 16) | box->x1);
OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
ADVANCE_RING();
}
/* Emit 1.1 state
*/
static int radeon_emit_state(drm_radeon_private_t * dev_priv,
struct drm_file *file_priv,
drm_radeon_context_regs_t * ctx,
drm_radeon_texture_regs_t * tex,
unsigned int dirty)
{
RING_LOCALS;
DRM_DEBUG("dirty=0x%08x\n", dirty);
if (dirty & RADEON_UPLOAD_CONTEXT) {
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
&ctx->rb3d_depthoffset)) {
DRM_ERROR("Invalid depth buffer offset\n");
return -EINVAL;
}
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
&ctx->rb3d_coloroffset)) {
DRM_ERROR("Invalid depth buffer offset\n");
return -EINVAL;
}
BEGIN_RING(14);
OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
OUT_RING(ctx->pp_misc);
OUT_RING(ctx->pp_fog_color);
OUT_RING(ctx->re_solid_color);
OUT_RING(ctx->rb3d_blendcntl);
OUT_RING(ctx->rb3d_depthoffset);
OUT_RING(ctx->rb3d_depthpitch);
OUT_RING(ctx->rb3d_zstencilcntl);
OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
OUT_RING(ctx->pp_cntl);
OUT_RING(ctx->rb3d_cntl);
OUT_RING(ctx->rb3d_coloroffset);
OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
OUT_RING(ctx->rb3d_colorpitch);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_VERTFMT) {
BEGIN_RING(2);
OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
OUT_RING(ctx->se_coord_fmt);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_LINE) {
BEGIN_RING(5);
OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
OUT_RING(ctx->re_line_pattern);
OUT_RING(ctx->re_line_state);
OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
OUT_RING(ctx->se_line_width);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_BUMPMAP) {
BEGIN_RING(5);
OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
OUT_RING(ctx->pp_lum_matrix);
OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
OUT_RING(ctx->pp_rot_matrix_0);
OUT_RING(ctx->pp_rot_matrix_1);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_MASKS) {
BEGIN_RING(4);
OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
OUT_RING(ctx->rb3d_stencilrefmask);
OUT_RING(ctx->rb3d_ropcntl);
OUT_RING(ctx->rb3d_planemask);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_VIEWPORT) {
BEGIN_RING(7);
OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
OUT_RING(ctx->se_vport_xscale);
OUT_RING(ctx->se_vport_xoffset);
OUT_RING(ctx->se_vport_yscale);
OUT_RING(ctx->se_vport_yoffset);
OUT_RING(ctx->se_vport_zscale);
OUT_RING(ctx->se_vport_zoffset);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_SETUP) {
BEGIN_RING(4);
OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
OUT_RING(ctx->se_cntl);
OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
OUT_RING(ctx->se_cntl_status);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_MISC) {
BEGIN_RING(2);
OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
OUT_RING(ctx->re_misc);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_TEX0) {
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
&tex[0].pp_txoffset)) {
DRM_ERROR("Invalid texture offset for unit 0\n");
return -EINVAL;
}
BEGIN_RING(9);
OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
OUT_RING(tex[0].pp_txfilter);
OUT_RING(tex[0].pp_txformat);
OUT_RING(tex[0].pp_txoffset);
OUT_RING(tex[0].pp_txcblend);
OUT_RING(tex[0].pp_txablend);
OUT_RING(tex[0].pp_tfactor);
OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
OUT_RING(tex[0].pp_border_color);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_TEX1) {
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
&tex[1].pp_txoffset)) {
DRM_ERROR("Invalid texture offset for unit 1\n");
return -EINVAL;
}
BEGIN_RING(9);
OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
OUT_RING(tex[1].pp_txfilter);
OUT_RING(tex[1].pp_txformat);
OUT_RING(tex[1].pp_txoffset);
OUT_RING(tex[1].pp_txcblend);
OUT_RING(tex[1].pp_txablend);
OUT_RING(tex[1].pp_tfactor);
OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
OUT_RING(tex[1].pp_border_color);
ADVANCE_RING();
}
if (dirty & RADEON_UPLOAD_TEX2) {
if (radeon_check_and_fixup_offset(dev_priv, file_priv,
&tex[2].pp_txoffset)) {
DRM_ERROR("Invalid texture offset for unit 2\n");
return -EINVAL;
}
BEGIN_RING(9);
OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
OUT_RING(tex[2].pp_txfilter);
OUT_RING(tex[2].pp_txformat);
OUT_RING(tex[2].pp_txoffset);
OUT_RING(tex[2].pp_txcblend);
OUT_RING(tex[2].pp_txablend);
OUT_RING(tex[2].pp_tfactor);
OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
OUT_RING(tex[2].pp_border_color);
ADVANCE_RING();
}
return 0;
}
/* Emit 1.2 state
*/
static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
struct drm_file *file_priv,
drm_radeon_state_t * state)
{
RING_LOCALS;
if (state->dirty & RADEON_UPLOAD_ZBIAS) {
BEGIN_RING(3);
OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
OUT_RING(state->context2.se_zbias_factor);
OUT_RING(state->context2.se_zbias_constant);
ADVANCE_RING();
}
return radeon_emit_state(dev_priv, file_priv, &state->context,
state->tex, state->dirty);
}
/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
* 1.3 cmdbuffers allow all previous state to be updated as well as
* the tcl scalar and vector areas.
*/
static struct {
int start;
int len;
const char *name;
} packet[RADEON_MAX_STATE_PACKETS] = {
{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
"R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
"R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
{R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
};
/* ================================================================
* Performance monitoring functions
*/
static void radeon_clear_box(drm_radeon_private_t * dev_priv,
struct drm_radeon_master_private *master_priv,
int x, int y, int w, int h, int r, int g, int b)
{
u32 color;
RING_LOCALS;
x += master_priv->sarea_priv->boxes[0].x1;
y += master_priv->sarea_priv->boxes[0].y1;
switch (dev_priv->color_fmt) {
case RADEON_COLOR_FORMAT_RGB565:
color = (((r & 0xf8) << 8) |
((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
break;
case RADEON_COLOR_FORMAT_ARGB8888:
default:
color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
break;
}
BEGIN_RING(4);
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
OUT_RING(0xffffffff);
ADVANCE_RING();
BEGIN_RING(6);
OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_SOLID_COLOR |
(dev_priv->color_fmt << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
if (master_priv->sarea_priv->pfCurrentPage == 1) {
OUT_RING(dev_priv->front_pitch_offset);
} else {
OUT_RING(dev_priv->back_pitch_offset);
}
OUT_RING(color);
OUT_RING((x << 16) | y);
OUT_RING((w << 16) | h);
ADVANCE_RING();
}
static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
{
/* Collapse various things into a wait flag -- trying to
* guess if userspase slept -- better just to have them tell us.
*/
if (dev_priv->stats.last_frame_reads > 1 ||
dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
}
if (dev_priv->stats.freelist_loops) {
dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
}
/* Purple box for page flipping
*/
if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
/* Red box if we have to wait for idle at any point
*/
if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
/* Blue box: lost context?
*/
/* Yellow box for texture swaps
*/
if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
/* Green box if hardware never idles (as far as we can tell)
*/
if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
/* Draw bars indicating number of buffers allocated
* (not a great measure, easily confused)
*/
if (dev_priv->stats.requested_bufs) {
if (dev_priv->stats.requested_bufs > 100)
dev_priv->stats.requested_bufs = 100;
radeon_clear_box(dev_priv, master_priv, 4, 16,
dev_priv->stats.requested_bufs, 4,
196, 128, 128);
}
memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
}
/* ================================================================
* CP command dispatch functions
*/
static void radeon_cp_dispatch_clear(struct drm_device * dev,
struct drm_master *master,
drm_radeon_clear_t * clear,
drm_radeon_clear_rect_t * depth_boxes)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
int nbox = sarea_priv->nbox;
struct drm_clip_rect *pbox = sarea_priv->boxes;
unsigned int flags = clear->flags;
u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
int i;
RING_LOCALS;
DRM_DEBUG("flags = 0x%x\n", flags);
dev_priv->stats.clears++;
if (sarea_priv->pfCurrentPage == 1) {
unsigned int tmp = flags;
flags &= ~(RADEON_FRONT | RADEON_BACK);
if (tmp & RADEON_FRONT)
flags |= RADEON_BACK;
if (tmp & RADEON_BACK)
flags |= RADEON_FRONT;
}
if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
if (!dev_priv->have_z_offset) {
printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
}
}
if (flags & (RADEON_FRONT | RADEON_BACK)) {
BEGIN_RING(4);
/* Ensure the 3D stream is idle before doing a
* 2D fill to clear the front or back buffer.
*/
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
OUT_RING(clear->color_mask);
ADVANCE_RING();
/* Make sure we restore the 3D state next time.
*/
sarea_priv->ctx_owner = 0;
for (i = 0; i < nbox; i++) {
int x = pbox[i].x1;
int y = pbox[i].y1;
int w = pbox[i].x2 - x;
int h = pbox[i].y2 - y;
DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
x, y, w, h, flags);
if (flags & RADEON_FRONT) {
BEGIN_RING(6);
OUT_RING(CP_PACKET3
(RADEON_CNTL_PAINT_MULTI, 4));
OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_SOLID_COLOR |
(dev_priv->
color_fmt << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_P |
RADEON_GMC_CLR_CMP_CNTL_DIS);
OUT_RING(dev_priv->front_pitch_offset);
OUT_RING(clear->clear_color);
OUT_RING((x << 16) | y);
OUT_RING((w << 16) | h);
ADVANCE_RING();
}
if (flags & RADEON_BACK) {
BEGIN_RING(6);
OUT_RING(CP_PACKET3
(RADEON_CNTL_PAINT_MULTI, 4));
OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_SOLID_COLOR |
(dev_priv->
color_fmt << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_P |
RADEON_GMC_CLR_CMP_CNTL_DIS);
OUT_RING(dev_priv->back_pitch_offset);
OUT_RING(clear->clear_color);
OUT_RING((x << 16) | y);
OUT_RING((w << 16) | h);
ADVANCE_RING();
}
}
}
/* hyper z clear */
/* no docs available, based on reverse engineering by Stephane Marchesin */
if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
&& (flags & RADEON_CLEAR_FASTZ)) {
int i;
int depthpixperline =
dev_priv->depth_fmt ==
RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
2) : (dev_priv->
depth_pitch / 4);
u32 clearmask;
u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
((clear->depth_mask & 0xff) << 24);
/* Make sure we restore the 3D state next time.
* we haven't touched any "normal" state - still need this?
*/
sarea_priv->ctx_owner = 0;
if ((dev_priv->flags & RADEON_HAS_HIERZ)
&& (flags & RADEON_USE_HIERZ)) {
/* FIXME : reverse engineer that for Rx00 cards */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
/* pattern seems to work for r100, though get slight
rendering errors with glxgears. If hierz is not enabled for r100,
only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
other ones are ignored, and the same clear mask can be used. That's
very different behaviour than R200 which needs different clear mask
and different number of tiles to clear if hierz is enabled or not !?!
*/
clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
} else {
/* clear mask : chooses the clearing pattern.
rv250: could be used to clear only parts of macrotiles
(but that would get really complicated...)?
bit 0 and 1 (either or both of them ?!?!) are used to
not clear tile (or maybe one of the bits indicates if the tile is
compressed or not), bit 2 and 3 to not clear tile 1,...,.
Pattern is as follows:
| 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
bits -------------------------------------------------
| 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
rv100: clearmask covers 2x8 4x1 tiles, but one clear still
covers 256 pixels ?!?
*/
clearmask = 0x0;
}
BEGIN_RING(8);
RADEON_WAIT_UNTIL_2D_IDLE();
OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
tempRB3D_DEPTHCLEARVALUE);
/* what offset is this exactly ? */
OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
/* need ctlstat, otherwise get some strange black flickering */
OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
RADEON_RB3D_ZC_FLUSH_ALL);
ADVANCE_RING();
for (i = 0; i < nbox; i++) {
int tileoffset, nrtilesx, nrtilesy, j;
/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
if ((dev_priv->flags & RADEON_HAS_HIERZ)
&& !(dev_priv->microcode_version == UCODE_R200)) {
/* FIXME : figure this out for r200 (when hierz is enabled). Or
maybe r200 actually doesn't need to put the low-res z value into
the tile cache like r100, but just needs to clear the hi-level z-buffer?
Works for R100, both with hierz and without.
R100 seems to operate on 2x1 8x8 tiles, but...
odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
problematic with resolutions which are not 64 pix aligned? */
tileoffset =
((pbox[i].y1 >> 3) * depthpixperline +
pbox[i].x1) >> 6;
nrtilesx =
((pbox[i].x2 & ~63) -
(pbox[i].x1 & ~63)) >> 4;
nrtilesy =
(pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING(4);
OUT_RING(CP_PACKET3
(RADEON_3D_CLEAR_ZMASK, 2));
/* first tile */
OUT_RING(tileoffset * 8);
/* the number of tiles to clear */
OUT_RING(nrtilesx + 4);
/* clear mask : chooses the clearing pattern. */
OUT_RING(clearmask);
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
} else if (dev_priv->microcode_version == UCODE_R200) {
/* works for rv250. */
/* find first macro tile (8x2 4x4 z-pixels on rv250) */
tileoffset =
((pbox[i].y1 >> 3) * depthpixperline +
pbox[i].x1) >> 5;
nrtilesx =
(pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
nrtilesy =
(pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING(4);
OUT_RING(CP_PACKET3
(RADEON_3D_CLEAR_ZMASK, 2));
/* first tile */
/* judging by the first tile offset needed, could possibly
directly address/clear 4x4 tiles instead of 8x2 * 4x4
macro tiles, though would still need clear mask for
right/bottom if truly 4x4 granularity is desired ? */
OUT_RING(tileoffset * 16);
/* the number of tiles to clear */
OUT_RING(nrtilesx + 1);
/* clear mask : chooses the clearing pattern. */
OUT_RING(clearmask);
ADVANCE_RING();
tileoffset += depthpixperline >> 5;
}
} else { /* rv 100 */
/* rv100 might not need 64 pix alignment, who knows */
/* offsets are, hmm, weird */
tileoffset =
((pbox[i].y1 >> 4) * depthpixperline +
pbox[i].x1) >> 6;
nrtilesx =
((pbox[i].x2 & ~63) -
(pbox[i].x1 & ~63)) >> 4;
nrtilesy =
(pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING(4);
OUT_RING(CP_PACKET3
(RADEON_3D_CLEAR_ZMASK, 2));
OUT_RING(tileoffset * 128);
/* the number of tiles to clear */
OUT_RING(nrtilesx + 4);
/* clear mask : chooses the clearing pattern. */
OUT_RING(clearmask);
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
}
}
/* TODO don't always clear all hi-level z tiles */
if ((dev_priv->flags & RADEON_HAS_HIERZ)
&& (dev_priv->microcode_version == UCODE_R200)
&& (flags & RADEON_USE_HIERZ))
/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
{
BEGIN_RING(4);
OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
OUT_RING(0x0); /* First tile */
OUT_RING(0x3cc0);
OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
ADVANCE_RING();
}
}
/* We have to clear the depth and/or stencil buffers by
* rendering a quad into just those buffers. Thus, we have to
* make sure the 3D engine is configured correctly.
*/
else if ((dev_priv->microcode_version == UCODE_R200) &&
(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempPP_CNTL;
int tempRE_CNTL;
int tempRB3D_CNTL;
int tempRB3D_ZSTENCILCNTL;
int tempRB3D_STENCILREFMASK;
int tempRB3D_PLANEMASK;
int tempSE_CNTL;
int tempSE_VTE_CNTL;
int tempSE_VTX_FMT_0;
int tempSE_VTX_FMT_1;
int tempSE_VAP_CNTL;
int tempRE_AUX_SCISSOR_CNTL;
tempPP_CNTL = 0;
tempRE_CNTL = 0;
tempRB3D_CNTL = depth_clear->rb3d_cntl;
tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
tempRB3D_STENCILREFMASK = 0x0;
tempSE_CNTL = depth_clear->se_cntl;
/* Disable TCL */
tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
(0x9 <<
SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
tempRB3D_PLANEMASK = 0x0;
tempRE_AUX_SCISSOR_CNTL = 0x0;
tempSE_VTE_CNTL =
SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
/* Vertex format (X, Y, Z, W) */
tempSE_VTX_FMT_0 =
SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
tempSE_VTX_FMT_1 = 0x0;
/*
* Depth buffer specific enables
*/
if (flags & RADEON_DEPTH) {
/* Enable depth buffer */
tempRB3D_CNTL |= RADEON_Z_ENABLE;
} else {
/* Disable depth buffer */
tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
}
/*
* Stencil buffer specific enables
*/
if (flags & RADEON_STENCIL) {
tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
tempRB3D_STENCILREFMASK = clear->depth_mask;
} else {
tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
tempRB3D_STENCILREFMASK = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING(26);
RADEON_WAIT_UNTIL_2D_IDLE();
OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
tempRB3D_STENCILREFMASK);
OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
ADVANCE_RING();
/* Make sure we restore the 3D state next time.
*/
sarea_priv->ctx_owner = 0;
for (i = 0; i < nbox; i++) {
/* Funny that this should be required --
* sets top-left?
*/
radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
BEGIN_RING(14);
OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
RADEON_PRIM_WALK_RING |
(3 << RADEON_NUM_VERTICES_SHIFT)));
OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x3f800000);
OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x3f800000);
OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x3f800000);
ADVANCE_RING();
}
} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
rb3d_cntl = depth_clear->rb3d_cntl;
if (flags & RADEON_DEPTH) {
rb3d_cntl |= RADEON_Z_ENABLE;
} else {
rb3d_cntl &= ~RADEON_Z_ENABLE;
}
if (flags & RADEON_STENCIL) {
rb3d_cntl |= RADEON_STENCIL_ENABLE;
rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
} else {
rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
rb3d_stencilrefmask = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING(13);
RADEON_WAIT_UNTIL_2D_IDLE();
OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
OUT_RING(0x00000000);
OUT_RING(rb3d_cntl);
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
ADVANCE_RING();
/* Make sure we restore the 3D state next time.
*/
sarea_priv->ctx_owner = 0;
for (i = 0; i < nbox; i++) {
/* Funny that this should be required --
* sets top-left?
*/
radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
BEGIN_RING(15);
OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
OUT_RING(RADEON_VTX_Z_PRESENT |
RADEON_VTX_PKCOLOR_PRESENT);
OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
RADEON_PRIM_WALK_RING |
RADEON_MAOS_ENABLE |
RADEON_VTX_FMT_RADEON_MODE |
(3 << RADEON_NUM_VERTICES_SHIFT)));
OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x0);
OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x0);
OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
OUT_RING(0x0);
ADVANCE_RING();
}
}
/* Increment the clear counter. The client-side 3D driver must
* wait on this value before performing the clear ioctl. We
* need this because the card's so damned fast...
*/
sarea_priv->last_clear++;
BEGIN_RING(4);
RADEON_CLEAR_AGE(sarea_priv->last_clear);
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
}
static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
int nbox = sarea_priv->nbox;
struct drm_clip_rect *pbox = sarea_priv->boxes;
int i;
RING_LOCALS;
DRM_DEBUG("\n");
/* Do some trivial performance monitoring...
*/
if (dev_priv->do_boxes)
radeon_cp_performance_boxes(dev_priv, master_priv);
/* Wait for the 3D stream to idle before dispatching the bitblt.
* This will prevent data corruption between the two streams.
*/
BEGIN_RING(2);
RADEON_WAIT_UNTIL_3D_IDLE();
ADVANCE_RING();
for (i = 0; i < nbox; i++) {
int x = pbox[i].x1;
int y = pbox[i].y1;
int w = pbox[i].x2 - x;
int h = pbox[i].y2 - y;
DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
BEGIN_RING(9);
OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(dev_priv->color_fmt << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_MEMORY |
RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
/* Make this work even if front & back are flipped:
*/
OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
if (sarea_priv->pfCurrentPage == 0) {
OUT_RING(dev_priv->back_pitch_offset);
OUT_RING(dev_priv->front_pitch_offset);
} else {
OUT_RING(dev_priv->front_pitch_offset);
OUT_RING(dev_priv->back_pitch_offset);
}
OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
OUT_RING((x << 16) | y);
OUT_RING((x << 16) | y);
OUT_RING((w << 16) | h);
ADVANCE_RING();
}
/* Increment the frame counter. The client-side 3D driver must
* throttle the framerate by waiting for this value before
* performing the swapbuffer ioctl.
*/
sarea_priv->last_frame++;
BEGIN_RING(4);
RADEON_FRAME_AGE(sarea_priv->last_frame);
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
}
void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
? dev_priv->front_offset : dev_priv->back_offset;
RING_LOCALS;
DRM_DEBUG("pfCurrentPage=%d\n",
master_priv->sarea_priv->pfCurrentPage);
/* Do some trivial performance monitoring...
*/
if (dev_priv->do_boxes) {
dev_priv->stats.boxes |= RADEON_BOX_FLIP;
radeon_cp_performance_boxes(dev_priv, master_priv);
}
/* Update the frame offsets for both CRTCs
*/
BEGIN_RING(6);
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING_REG(RADEON_CRTC_OFFSET,
((sarea->frame.y * dev_priv->front_pitch +
sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
+ offset);
OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
+ offset);
ADVANCE_RING();
/* Increment the frame counter. The client-side 3D driver must
* throttle the framerate by waiting for this value before
* performing the swapbuffer ioctl.
*/
master_priv->sarea_priv->last_frame++;
master_priv->sarea_priv->pfCurrentPage =
1 - master_priv->sarea_priv->pfCurrentPage;
BEGIN_RING(2);
RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
ADVANCE_RING();
}
static int bad_prim_vertex_nr(int primitive, int nr)
{
switch (primitive & RADEON_PRIM_TYPE_MASK) {
case RADEON_PRIM_TYPE_NONE:
case RADEON_PRIM_TYPE_POINT:
return nr < 1;
case RADEON_PRIM_TYPE_LINE:
return (nr & 1) || nr == 0;
case RADEON_PRIM_TYPE_LINE_STRIP:
return nr < 2;
case RADEON_PRIM_TYPE_TRI_LIST:
case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
case RADEON_PRIM_TYPE_RECT_LIST:
return nr % 3 || nr == 0;
case RADEON_PRIM_TYPE_TRI_FAN:
case RADEON_PRIM_TYPE_TRI_STRIP:
return nr < 3;
default:
return 1;
}
}
typedef struct {
unsigned int start;
unsigned int finish;
unsigned int prim;
unsigned int numverts;
unsigned int offset;
unsigned int vc_format;
} drm_radeon_tcl_prim_t;
static void radeon_cp_dispatch_vertex(struct drm_device * dev,
struct drm_file *file_priv,
struct drm_buf * buf,
drm_radeon_tcl_prim_t * prim)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
int numverts = (int)prim->numverts;
int nbox = sarea_priv->nbox;
int i = 0;
RING_LOCALS;
DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
prim->prim,
prim->vc_format, prim->start, prim->finish, prim->numverts);
if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
DRM_ERROR("bad prim %x numverts %d\n",
prim->prim, prim->numverts);
return;
}
do {
/* Emit the next cliprect */
if (i < nbox) {
radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
}
/* Emit the vertex buffer rendering commands */
BEGIN_RING(5);
OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
OUT_RING(offset);
OUT_RING(numverts);
OUT_RING(prim->vc_format);
OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
RADEON_COLOR_ORDER_RGBA |
RADEON_VTX_FMT_RADEON_MODE |
(numverts << RADEON_NUM_VERTICES_SHIFT));
ADVANCE_RING();
i++;
} while (i < nbox);
}
void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
RING_LOCALS;
buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
/* Emit the vertex buffer age */
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
BEGIN_RING(3);
R600_DISPATCH_AGE(buf_priv->age);
ADVANCE_RING();
} else {
BEGIN_RING(2);
RADEON_DISPATCH_AGE(buf_priv->age);
ADVANCE_RING();
}
buf->pending = 1;
buf->used = 0;
}
static void radeon_cp_dispatch_indirect(struct drm_device * dev,
struct drm_buf * buf, int start, int end)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RING_LOCALS;
DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
if (start != end) {
int offset = (dev_priv->gart_buffers_offset
+ buf->offset + start);
int dwords = (end - start + 3) / sizeof(u32);
/* Indirect buffer data must be an even number of
* dwords, so if we've been given an odd number we must
* pad the data with a Type-2 CP packet.
*/
if (dwords & 1) {
u32 *data = (u32 *)
((char *)dev->agp_buffer_map->handle
+ buf->offset + start);
data[dwords++] = RADEON_CP_PACKET2;
}
/* Fire off the indirect buffer */
BEGIN_RING(3);
OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
OUT_RING(offset);
OUT_RING(dwords);
ADVANCE_RING();
}
}
static void radeon_cp_dispatch_indices(struct drm_device *dev,
struct drm_master *master,
struct drm_buf * elt_buf,
drm_radeon_tcl_prim_t * prim)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
int offset = dev_priv->gart_buffers_offset + prim->offset;
u32 *data;
int dwords;
int i = 0;
int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
int count = (prim->finish - start) / sizeof(u16);
int nbox = sarea_priv->nbox;
DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
prim->prim,
prim->vc_format,
prim->start, prim->finish, prim->offset, prim->numverts);
if (bad_prim_vertex_nr(prim->prim, count)) {
DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
return;
}
if (start >= prim->finish || (prim->start & 0x7)) {
DRM_ERROR("buffer prim %d\n", prim->prim);
return;
}
dwords = (prim->finish - prim->start + 3) / sizeof(u32);
data = (u32 *) ((char *)dev->agp_buffer_map->handle +
elt_buf->offset + prim->start);
data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
data[1] = offset;
data[2] = prim->numverts;
data[3] = prim->vc_format;
data[4] = (prim->prim |
RADEON_PRIM_WALK_IND |
RADEON_COLOR_ORDER_RGBA |
RADEON_VTX_FMT_RADEON_MODE |
(count << RADEON_NUM_VERTICES_SHIFT));
do {
if (i < nbox)
radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
radeon_cp_dispatch_indirect(dev, elt_buf,
prim->start, prim->finish);
i++;
} while (i < nbox);
}
#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
static int radeon_cp_dispatch_texture(struct drm_device * dev,
struct drm_file *file_priv,
drm_radeon_texture_t * tex,
drm_radeon_tex_image_t * image)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_buf *buf;
u32 format;
u32 *buffer;
const u8 __user *data;
unsigned int size, dwords, tex_width, blit_width, spitch;
u32 height;
int i;
u32 texpitch, microtile;
u32 offset, byte_offset;
RING_LOCALS;
if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
DRM_ERROR("Invalid destination offset\n");
return -EINVAL;
}
dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
/* Flush the pixel cache. This ensures no pixel data gets mixed
* up with the texture data from the host data blit, otherwise
* part of the texture image may be corrupted.
*/
BEGIN_RING(4);
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
/* The compiler won't optimize away a division by a variable,
* even if the only legal values are powers of two. Thus, we'll
* use a shift instead.
*/
switch (tex->format) {
case RADEON_TXFORMAT_ARGB8888:
case RADEON_TXFORMAT_RGBA8888:
format = RADEON_COLOR_FORMAT_ARGB8888;
tex_width = tex->width * 4;
blit_width = image->width * 4;
break;
case RADEON_TXFORMAT_AI88:
case RADEON_TXFORMAT_ARGB1555:
case RADEON_TXFORMAT_RGB565:
case RADEON_TXFORMAT_ARGB4444:
case RADEON_TXFORMAT_VYUY422:
case RADEON_TXFORMAT_YVYU422:
format = RADEON_COLOR_FORMAT_RGB565;
tex_width = tex->width * 2;
blit_width = image->width * 2;
break;
case RADEON_TXFORMAT_I8:
case RADEON_TXFORMAT_RGB332:
format = RADEON_COLOR_FORMAT_CI8;
tex_width = tex->width * 1;
blit_width = image->width * 1;
break;
default:
DRM_ERROR("invalid texture format %d\n", tex->format);
return -EINVAL;
}
spitch = blit_width >> 6;
if (spitch == 0 && image->height > 1)
return -EINVAL;
texpitch = tex->pitch;
if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
microtile = 1;
if (tex_width < 64) {
texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
/* we got tiled coordinates, untile them */
image->x *= 2;
}
} else
microtile = 0;
/* this might fail for zero-sized uploads - are those illegal? */
if (!radeon_check_offset(dev_priv, tex->offset + image->height *
blit_width - 1)) {
DRM_ERROR("Invalid final destination offset\n");
return -EINVAL;
}
DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
do {
DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
tex->offset >> 10, tex->pitch, tex->format,
image->x, image->y, image->width, image->height);
/* Make a copy of some parameters in case we have to
* update them for a multi-pass texture blit.
*/
height = image->height;
data = (const u8 __user *)image->data;
size = height * blit_width;
if (size > RADEON_MAX_TEXTURE_SIZE) {
height = RADEON_MAX_TEXTURE_SIZE / blit_width;
size = height * blit_width;
} else if (size < 4 && size > 0) {
size = 4;
} else if (size == 0) {
return 0;
}
buf = radeon_freelist_get(dev);
if (0 && !buf) {
radeon_do_cp_idle(dev_priv);
buf = radeon_freelist_get(dev);
}
if (!buf) {
DRM_DEBUG("EAGAIN\n");
if (copy_to_user(tex->image, image, sizeof(*image)))
return -EFAULT;
return -EAGAIN;
}
/* Dispatch the indirect buffer.
*/
buffer =
(u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
dwords = size / 4;
#define RADEON_COPY_MT(_buf, _data, _width) \
do { \
if (copy_from_user(_buf, _data, (_width))) {\
DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
return -EFAULT; \
} \
} while(0)
if (microtile) {
/* texture micro tiling in use, minimum texture width is thus 16 bytes.
however, we cannot use blitter directly for texture width < 64 bytes,
since minimum tex pitch is 64 bytes and we need this to match
the texture width, otherwise the blitter will tile it wrong.
Thus, tiling manually in this case. Additionally, need to special
case tex height = 1, since our actual image will have height 2
and we need to ensure we don't read beyond the texture size
from user space. */
if (tex->height == 1) {
if (tex_width >= 64 || tex_width <= 16) {
RADEON_COPY_MT(buffer, data,
(int)(tex_width * sizeof(u32)));
} else if (tex_width == 32) {
RADEON_COPY_MT(buffer, data, 16);
RADEON_COPY_MT(buffer + 8,
data + 16, 16);
}
} else if (tex_width >= 64 || tex_width == 16) {
RADEON_COPY_MT(buffer, data,
(int)(dwords * sizeof(u32)));
} else if (tex_width < 16) {
for (i = 0; i < tex->height; i++) {
RADEON_COPY_MT(buffer, data, tex_width);
buffer += 4;
data += tex_width;
}
} else if (tex_width == 32) {
/* TODO: make sure this works when not fitting in one buffer
(i.e. 32bytes x 2048...) */
for (i = 0; i < tex->height; i += 2) {
RADEON_COPY_MT(buffer, data, 16);
data += 16;
RADEON_COPY_MT(buffer + 8, data, 16);
data += 16;
RADEON_COPY_MT(buffer + 4, data, 16);
data += 16;
RADEON_COPY_MT(buffer + 12, data, 16);
data += 16;
buffer += 16;
}
}
} else {
if (tex_width >= 32) {
/* Texture image width is larger than the minimum, so we
* can upload it directly.
*/
RADEON_COPY_MT(buffer, data,
(int)(dwords * sizeof(u32)));
} else {
/* Texture image width is less than the minimum, so we
* need to pad out each image scanline to the minimum
* width.
*/
for (i = 0; i < tex->height; i++) {
RADEON_COPY_MT(buffer, data, tex_width);
buffer += 8;
data += tex_width;
}
}
}
#undef RADEON_COPY_MT
byte_offset = (image->y & ~2047) * blit_width;
buf->file_priv = file_priv;
buf->used = size;
offset = dev_priv->gart_buffers_offset + buf->offset;
BEGIN_RING(9);
OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(format << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_MEMORY |
RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
OUT_RING((spitch << 22) | (offset >> 10));
OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
OUT_RING(0);
OUT_RING((image->x << 16) | (image->y % 2048));
OUT_RING((image->width << 16) | height);
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
COMMIT_RING();
radeon_cp_discard_buffer(dev, file_priv->master, buf);
/* Update the input parameters for next time */
image->y += height;
image->height -= height;
image->data = (const u8 __user *)image->data + size;
} while (image->height > 0);
/* Flush the pixel cache after the blit completes. This ensures
* the texture data is written out to memory before rendering
* continues.
*/
BEGIN_RING(4);
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
COMMIT_RING();
return 0;
}
static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
int i;
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(35);
OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
OUT_RING(0x00000000);
OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
for (i = 0; i < 32; i++) {
OUT_RING(stipple[i]);
}
ADVANCE_RING();
}
static void radeon_apply_surface_regs(int surf_index,
drm_radeon_private_t *dev_priv)
{
if (!dev_priv->mmio)
return;
radeon_do_cp_idle(dev_priv);
RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
dev_priv->surfaces[surf_index].flags);
RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
dev_priv->surfaces[surf_index].lower);
RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
dev_priv->surfaces[surf_index].upper);
}
/* Allocates a virtual surface
* doesn't always allocate a real surface, will stretch an existing
* surface when possible.
*
* Note that refcount can be at most 2, since during a free refcount=3
* might mean we have to allocate a new surface which might not always
* be available.
* For example : we allocate three contiguous surfaces ABC. If B is
* freed, we suddenly need two surfaces to store A and C, which might
* not always be available.
*/
static int alloc_surface(drm_radeon_surface_alloc_t *new,
drm_radeon_private_t *dev_priv,
struct drm_file *file_priv)
{
struct radeon_virt_surface *s;
int i;
int virt_surface_index;
uint32_t new_upper, new_lower;
new_lower = new->address;
new_upper = new_lower + new->size - 1;
/* sanity check */
if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
RADEON_SURF_ADDRESS_FIXED_MASK)
|| ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
return -1;
/* make sure there is no overlap with existing surfaces */
for (i = 0; i < RADEON_MAX_SURFACES; i++) {
if ((dev_priv->surfaces[i].refcount != 0) &&
(((new_lower >= dev_priv->surfaces[i].lower) &&
(new_lower < dev_priv->surfaces[i].upper)) ||
((new_lower < dev_priv->surfaces[i].lower) &&
(new_upper > dev_priv->surfaces[i].lower)))) {
return -1;
}
}
/* find a virtual surface */
for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
if (dev_priv->virt_surfaces[i].file_priv == NULL)
break;
if (i == 2 * RADEON_MAX_SURFACES) {
return -1;
}
virt_surface_index = i;
/* try to reuse an existing surface */
for (i = 0; i < RADEON_MAX_SURFACES; i++) {
/* extend before */
if ((dev_priv->surfaces[i].refcount == 1) &&
(new->flags == dev_priv->surfaces[i].flags) &&
(new_upper + 1 == dev_priv->surfaces[i].lower)) {
s = &(dev_priv->virt_surfaces[virt_surface_index]);
s->surface_index = i;
s->lower = new_lower;
s->upper = new_upper;
s->flags = new->flags;
s->file_priv = file_priv;
dev_priv->surfaces[i].refcount++;
dev_priv->surfaces[i].lower = s->lower;
radeon_apply_surface_regs(s->surface_index, dev_priv);
return virt_surface_index;
}
/* extend after */
if ((dev_priv->surfaces[i].refcount == 1) &&
(new->flags == dev_priv->surfaces[i].flags) &&
(new_lower == dev_priv->surfaces[i].upper + 1)) {
s = &(dev_priv->virt_surfaces[virt_surface_index]);
s->surface_index = i;
s->lower = new_lower;
s->upper = new_upper;
s->flags = new->flags;
s->file_priv = file_priv;
dev_priv->surfaces[i].refcount++;
dev_priv->surfaces[i].upper = s->upper;
radeon_apply_surface_regs(s->surface_index, dev_priv);
return virt_surface_index;
}
}
/* okay, we need a new one */
for (i = 0; i < RADEON_MAX_SURFACES; i++) {
if (dev_priv->surfaces[i].refcount == 0) {
s = &(dev_priv->virt_surfaces[virt_surface_index]);
s->surface_index = i;
s->lower = new_lower;
s->upper = new_upper;
s->flags = new->flags;
s->file_priv = file_priv;
dev_priv->surfaces[i].refcount = 1;
dev_priv->surfaces[i].lower = s->lower;
dev_priv->surfaces[i].upper = s->upper;
dev_priv->surfaces[i].flags = s->flags;
radeon_apply_surface_regs(s->surface_index, dev_priv);
return virt_surface_index;
}
}
/* we didn't find anything */
return -1;
}
static int free_surface(struct drm_file *file_priv,
drm_radeon_private_t * dev_priv,
int lower)
{
struct radeon_virt_surface *s;
int i;
/* find the virtual surface */
for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
s = &(dev_priv->virt_surfaces[i]);
if (s->file_priv) {
if ((lower == s->lower) && (file_priv == s->file_priv))
{
if (dev_priv->surfaces[s->surface_index].
lower == s->lower)
dev_priv->surfaces[s->surface_index].
lower = s->upper;
if (dev_priv->surfaces[s->surface_index].
upper == s->upper)
dev_priv->surfaces[s->surface_index].
upper = s->lower;
dev_priv->surfaces[s->surface_index].refcount--;
if (dev_priv->surfaces[s->surface_index].
refcount == 0)
dev_priv->surfaces[s->surface_index].
flags = 0;
s->file_priv = NULL;
radeon_apply_surface_regs(s->surface_index,
dev_priv);
return 0;
}
}
}
return 1;
}
static void radeon_surfaces_release(struct drm_file *file_priv,
drm_radeon_private_t * dev_priv)
{
int i;
for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
if (dev_priv->virt_surfaces[i].file_priv == file_priv)
free_surface(file_priv, dev_priv,
dev_priv->virt_surfaces[i].lower);
}
}
/* ================================================================
* IOCTL functions
*/
static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_surface_alloc_t *alloc = data;
if (alloc_surface(alloc, dev_priv, file_priv) == -1)
return -EINVAL;
else
return 0;
}
static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_surface_free_t *memfree = data;
if (free_surface(file_priv, dev_priv, memfree->address))
return -EINVAL;
else
return 0;
}
static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
drm_radeon_clear_t *clear = data;
drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
RING_SPACE_TEST_WITH_RETURN(dev_priv);
if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
if (copy_from_user(&depth_boxes, clear->depth_boxes,
sarea_priv->nbox * sizeof(depth_boxes[0])))
return -EFAULT;
radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
COMMIT_RING();
return 0;
}
/* Not sure why this isn't set all the time:
*/
static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
RING_LOCALS;
DRM_DEBUG("\n");
BEGIN_RING(6);
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
RADEON_CRTC_OFFSET_FLIP_CNTL);
OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
RADEON_CRTC_OFFSET_FLIP_CNTL);
ADVANCE_RING();
dev_priv->page_flipping = 1;
if (master_priv->sarea_priv->pfCurrentPage != 1)
master_priv->sarea_priv->pfCurrentPage = 0;
return 0;
}
/* Swapping and flipping are different operations, need different ioctls.
* They can & should be intermixed to support multiple 3d windows.
*/
static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
RING_SPACE_TEST_WITH_RETURN(dev_priv);
if (!dev_priv->page_flipping)
radeon_do_init_pageflip(dev, file_priv->master);
radeon_cp_dispatch_flip(dev, file_priv->master);
COMMIT_RING();
return 0;
}
static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
DRM_DEBUG("\n");
LOCK_TEST_WITH_RETURN(dev, file_priv);
RING_SPACE_TEST_WITH_RETURN(dev_priv);
if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_cp_dispatch_swap(dev, file_priv);
else
radeon_cp_dispatch_swap(dev, file_priv->master);
sarea_priv->ctx_owner = 0;
COMMIT_RING();
return 0;
}
static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf;
drm_radeon_vertex_t *vertex = data;
drm_radeon_tcl_prim_t prim;
LOCK_TEST_WITH_RETURN(dev, file_priv);
sarea_priv = master_priv->sarea_priv;
DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
vertex->idx, dma->buf_count - 1);
return -EINVAL;
}
if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
DRM_ERROR("buffer prim %d\n", vertex->prim);
return -EINVAL;
}
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
buf = dma->buflist[vertex->idx];
if (buf->file_priv != file_priv) {
DRM_ERROR("process %d using buffer owned by %p\n",
DRM_CURRENTPID, buf->file_priv);
return -EINVAL;
}
if (buf->pending) {
DRM_ERROR("sending pending buffer %d\n", vertex->idx);
return -EINVAL;
}
/* Build up a prim_t record:
*/
if (vertex->count) {
buf->used = vertex->count; /* not used? */
if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
if (radeon_emit_state(dev_priv, file_priv,
&sarea_priv->context_state,
sarea_priv->tex_state,
sarea_priv->dirty)) {
DRM_ERROR("radeon_emit_state failed\n");
return -EINVAL;
}
sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
RADEON_UPLOAD_TEX1IMAGES |
RADEON_UPLOAD_TEX2IMAGES |
RADEON_REQUIRE_QUIESCENCE);
}
prim.start = 0;
prim.finish = vertex->count; /* unused */
prim.prim = vertex->prim;
prim.numverts = vertex->count;
prim.vc_format = sarea_priv->vc_format;
radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
}
if (vertex->discard) {
radeon_cp_discard_buffer(dev, file_priv->master, buf);
}
COMMIT_RING();
return 0;
}
static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf;
drm_radeon_indices_t *elts = data;
drm_radeon_tcl_prim_t prim;
int count;
LOCK_TEST_WITH_RETURN(dev, file_priv);
sarea_priv = master_priv->sarea_priv;
DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
DRM_CURRENTPID, elts->idx, elts->start, elts->end,
elts->discard);
if (elts->idx < 0 || elts->idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
elts->idx, dma->buf_count - 1);
return -EINVAL;
}
if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
DRM_ERROR("buffer prim %d\n", elts->prim);
return -EINVAL;
}
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
buf = dma->buflist[elts->idx];
if (buf->file_priv != file_priv) {
DRM_ERROR("process %d using buffer owned by %p\n",
DRM_CURRENTPID, buf->file_priv);
return -EINVAL;
}
if (buf->pending) {
DRM_ERROR("sending pending buffer %d\n", elts->idx);
return -EINVAL;
}
count = (elts->end - elts->start) / sizeof(u16);
elts->start -= RADEON_INDEX_PRIM_OFFSET;
if (elts->start & 0x7) {
DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
return -EINVAL;
}
if (elts->start < buf->used) {
DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
return -EINVAL;
}
buf->used = elts->end;
if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
if (radeon_emit_state(dev_priv, file_priv,
&sarea_priv->context_state,
sarea_priv->tex_state,
sarea_priv->dirty)) {
DRM_ERROR("radeon_emit_state failed\n");
return -EINVAL;
}
sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
RADEON_UPLOAD_TEX1IMAGES |
RADEON_UPLOAD_TEX2IMAGES |
RADEON_REQUIRE_QUIESCENCE);
}
/* Build up a prim_t record:
*/
prim.start = elts->start;
prim.finish = elts->end;
prim.prim = elts->prim;
prim.offset = 0; /* offset from start of dma buffers */
prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
prim.vc_format = sarea_priv->vc_format;
radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
if (elts->discard) {
radeon_cp_discard_buffer(dev, file_priv->master, buf);
}
COMMIT_RING();
return 0;
}
static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_texture_t *tex = data;
drm_radeon_tex_image_t image;
int ret;
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (tex->image == NULL) {
DRM_ERROR("null texture image!\n");
return -EINVAL;
}
if (copy_from_user(&image,
(drm_radeon_tex_image_t __user *) tex->image,
sizeof(image)))
return -EFAULT;
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
else
ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
return ret;
}
static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_stipple_t *stipple = data;
u32 mask[32];
LOCK_TEST_WITH_RETURN(dev, file_priv);
if (copy_from_user(&mask, stipple->mask, 32 * sizeof(u32)))
return -EFAULT;
RING_SPACE_TEST_WITH_RETURN(dev_priv);
radeon_cp_dispatch_stipple(dev, mask);
COMMIT_RING();
return 0;
}
static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf;
drm_radeon_indirect_t *indirect = data;
RING_LOCALS;
LOCK_TEST_WITH_RETURN(dev, file_priv);
DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
indirect->idx, indirect->start, indirect->end,
indirect->discard);
if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
indirect->idx, dma->buf_count - 1);
return -EINVAL;
}
buf = dma->buflist[indirect->idx];
if (buf->file_priv != file_priv) {
DRM_ERROR("process %d using buffer owned by %p\n",
DRM_CURRENTPID, buf->file_priv);
return -EINVAL;
}
if (buf->pending) {
DRM_ERROR("sending pending buffer %d\n", indirect->idx);
return -EINVAL;
}
if (indirect->start < buf->used) {
DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
indirect->start, buf->used);
return -EINVAL;
}
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
buf->used = indirect->end;
/* Dispatch the indirect buffer full of commands from the
* X server. This is insecure and is thus only available to
* privileged clients.
*/
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
else {
/* Wait for the 3D stream to idle before the indirect buffer
* containing 2D acceleration commands is processed.
*/
BEGIN_RING(2);
RADEON_WAIT_UNTIL_3D_IDLE();
ADVANCE_RING();
radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
}
if (indirect->discard) {
radeon_cp_discard_buffer(dev, file_priv->master, buf);
}
COMMIT_RING();
return 0;
}
static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_sarea_t *sarea_priv;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf;
drm_radeon_vertex2_t *vertex = data;
int i;
unsigned char laststate;
LOCK_TEST_WITH_RETURN(dev, file_priv);
sarea_priv = master_priv->sarea_priv;
DRM_DEBUG("pid=%d index=%d discard=%d\n",
DRM_CURRENTPID, vertex->idx, vertex->discard);
if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
vertex->idx, dma->buf_count - 1);
return -EINVAL;
}
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
buf = dma->buflist[vertex->idx];
if (buf->file_priv != file_priv) {
DRM_ERROR("process %d using buffer owned by %p\n",
DRM_CURRENTPID, buf->file_priv);
return -EINVAL;
}
if (buf->pending) {
DRM_ERROR("sending pending buffer %d\n", vertex->idx);
return -EINVAL;
}
if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
return -EINVAL;
for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
drm_radeon_prim_t prim;
drm_radeon_tcl_prim_t tclprim;
if (copy_from_user(&prim, &vertex->prim[i], sizeof(prim)))
return -EFAULT;
if (prim.stateidx != laststate) {
drm_radeon_state_t state;
if (copy_from_user(&state,
&vertex->state[prim.stateidx],
sizeof(state)))
return -EFAULT;
if (radeon_emit_state2(dev_priv, file_priv, &state)) {
DRM_ERROR("radeon_emit_state2 failed\n");
return -EINVAL;
}
laststate = prim.stateidx;
}
tclprim.start = prim.start;
tclprim.finish = prim.finish;
tclprim.prim = prim.prim;
tclprim.vc_format = prim.vc_format;
if (prim.prim & RADEON_PRIM_WALK_IND) {
tclprim.offset = prim.numverts * 64;
tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
} else {
tclprim.numverts = prim.numverts;
tclprim.offset = 0; /* not used */
radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
}
if (sarea_priv->nbox == 1)
sarea_priv->nbox = 0;
}
if (vertex->discard) {
radeon_cp_discard_buffer(dev, file_priv->master, buf);
}
COMMIT_RING();
return 0;
}
static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
struct drm_file *file_priv,
drm_radeon_cmd_header_t header,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
int id = (int)header.packet.packet_id;
int sz, reg;
RING_LOCALS;
if (id >= RADEON_MAX_STATE_PACKETS)
return -EINVAL;
sz = packet[id].len;
reg = packet[id].start;
if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
DRM_ERROR("Packet size provided larger than data provided\n");
return -EINVAL;
}
if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
cmdbuf->buffer)) {
DRM_ERROR("Packet verification failed\n");
return -EINVAL;
}
BEGIN_RING(sz + 1);
OUT_RING(CP_PACKET0(reg, (sz - 1)));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
drm_radeon_cmd_header_t header,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
int sz = header.scalars.count;
int start = header.scalars.offset;
int stride = header.scalars.stride;
RING_LOCALS;
BEGIN_RING(3 + sz);
OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
/* God this is ugly
*/
static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
drm_radeon_cmd_header_t header,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
int sz = header.scalars.count;
int start = ((unsigned int)header.scalars.offset) + 0x100;
int stride = header.scalars.stride;
RING_LOCALS;
BEGIN_RING(3 + sz);
OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
drm_radeon_cmd_header_t header,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
int sz = header.vectors.count;
int start = header.vectors.offset;
int stride = header.vectors.stride;
RING_LOCALS;
BEGIN_RING(5 + sz);
OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
drm_radeon_cmd_header_t header,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
int sz = header.veclinear.count * 4;
int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
RING_LOCALS;
if (!sz)
return 0;
if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
return -EINVAL;
BEGIN_RING(5 + sz);
OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
ADVANCE_RING();
return 0;
}
static int radeon_emit_packet3(struct drm_device * dev,
struct drm_file *file_priv,
drm_radeon_kcmd_buffer_t *cmdbuf)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
unsigned int cmdsz;
int ret;
RING_LOCALS;
DRM_DEBUG("\n");
if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
cmdbuf, &cmdsz))) {
DRM_ERROR("Packet verification failed\n");
return ret;
}
BEGIN_RING(cmdsz);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
ADVANCE_RING();
return 0;
}
static int radeon_emit_packet3_cliprect(struct drm_device *dev,
struct drm_file *file_priv,
drm_radeon_kcmd_buffer_t *cmdbuf,
int orig_nbox)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_clip_rect box;
unsigned int cmdsz;
int ret;
struct drm_clip_rect __user *boxes = cmdbuf->boxes;
int i = 0;
RING_LOCALS;
DRM_DEBUG("\n");
if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
cmdbuf, &cmdsz))) {
DRM_ERROR("Packet verification failed\n");
return ret;
}
if (!orig_nbox)
goto out;
do {
if (i < cmdbuf->nbox) {
if (copy_from_user(&box, &boxes[i], sizeof(box)))
return -EFAULT;
/* FIXME The second and subsequent times round
* this loop, send a WAIT_UNTIL_3D_IDLE before
* calling emit_clip_rect(). This fixes a
* lockup on fast machines when sending
* several cliprects with a cmdbuf, as when
* waving a 2D window over a 3D
* window. Something in the commands from user
* space seems to hang the card when they're
* sent several times in a row. That would be
* the correct place to fix it but this works
* around it until I can figure that out - Tim
* Smith */
if (i) {
BEGIN_RING(2);
RADEON_WAIT_UNTIL_3D_IDLE();
ADVANCE_RING();
}
radeon_emit_clip_rect(dev_priv, &box);
}
BEGIN_RING(cmdsz);
OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
ADVANCE_RING();
} while (++i < cmdbuf->nbox);
if (cmdbuf->nbox == 1)
cmdbuf->nbox = 0;
return 0;
out:
drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
return 0;
}
static int radeon_emit_wait(struct drm_device * dev, int flags)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RING_LOCALS;
DRM_DEBUG("%x\n", flags);
switch (flags) {
case RADEON_WAIT_2D:
BEGIN_RING(2);
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
break;
case RADEON_WAIT_3D:
BEGIN_RING(2);
RADEON_WAIT_UNTIL_3D_IDLE();
ADVANCE_RING();
break;
case RADEON_WAIT_2D | RADEON_WAIT_3D:
BEGIN_RING(2);
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
break;
default:
return -EINVAL;
}
return 0;
}
static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_device_dma *dma = dev->dma;
struct drm_buf *buf = NULL;
drm_radeon_cmd_header_t stack_header;
int idx;
drm_radeon_kcmd_buffer_t *cmdbuf = data;
int orig_nbox;
LOCK_TEST_WITH_RETURN(dev, file_priv);
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
return -EINVAL;
}
/* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
* races between checking values and using those values in other code,
* and simply to avoid a lot of function calls to copy in data.
*/
if (cmdbuf->bufsz != 0) {
int rv;
void __user *buffer = cmdbuf->buffer;
rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
if (rv)
return rv;
rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
cmdbuf->bufsz);
if (rv) {
drm_buffer_free(cmdbuf->buffer);
return rv;
}
} else
goto done;
orig_nbox = cmdbuf->nbox;
if (dev_priv->microcode_version == UCODE_R300) {
int temp;
temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
drm_buffer_free(cmdbuf->buffer);
return temp;
}
/* microcode_version != r300 */
while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
drm_radeon_cmd_header_t *header;
header = drm_buffer_read_object(cmdbuf->buffer,
sizeof(stack_header), &stack_header);
switch (header->header.cmd_type) {
case RADEON_CMD_PACKET:
DRM_DEBUG("RADEON_CMD_PACKET\n");
if (radeon_emit_packets
(dev_priv, file_priv, *header, cmdbuf)) {
DRM_ERROR("radeon_emit_packets failed\n");
goto err;
}
break;
case RADEON_CMD_SCALARS:
DRM_DEBUG("RADEON_CMD_SCALARS\n");
if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
DRM_ERROR("radeon_emit_scalars failed\n");
goto err;
}
break;
case RADEON_CMD_VECTORS:
DRM_DEBUG("RADEON_CMD_VECTORS\n");
if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
DRM_ERROR("radeon_emit_vectors failed\n");
goto err;
}
break;
case RADEON_CMD_DMA_DISCARD:
DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
idx = header->dma.buf_idx;
if (idx < 0 || idx >= dma->buf_count) {
DRM_ERROR("buffer index %d (of %d max)\n",
idx, dma->buf_count - 1);
goto err;
}
buf = dma->buflist[idx];
if (buf->file_priv != file_priv || buf->pending) {
DRM_ERROR("bad buffer %p %p %d\n",
buf->file_priv, file_priv,
buf->pending);
goto err;
}
radeon_cp_discard_buffer(dev, file_priv->master, buf);
break;
case RADEON_CMD_PACKET3:
DRM_DEBUG("RADEON_CMD_PACKET3\n");
if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
DRM_ERROR("radeon_emit_packet3 failed\n");
goto err;
}
break;
case RADEON_CMD_PACKET3_CLIP:
DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
if (radeon_emit_packet3_cliprect
(dev, file_priv, cmdbuf, orig_nbox)) {
DRM_ERROR("radeon_emit_packet3_clip failed\n");
goto err;
}
break;
case RADEON_CMD_SCALARS2:
DRM_DEBUG("RADEON_CMD_SCALARS2\n");
if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
DRM_ERROR("radeon_emit_scalars2 failed\n");
goto err;
}
break;
case RADEON_CMD_WAIT:
DRM_DEBUG("RADEON_CMD_WAIT\n");
if (radeon_emit_wait(dev, header->wait.flags)) {
DRM_ERROR("radeon_emit_wait failed\n");
goto err;
}
break;
case RADEON_CMD_VECLINEAR:
DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
DRM_ERROR("radeon_emit_veclinear failed\n");
goto err;
}
break;
default:
DRM_ERROR("bad cmd_type %d at byte %d\n",
header->header.cmd_type,
cmdbuf->buffer->iterator);
goto err;
}
}
drm_buffer_free(cmdbuf->buffer);
done:
DRM_DEBUG("DONE\n");
COMMIT_RING();
return 0;
err:
drm_buffer_free(cmdbuf->buffer);
return -EINVAL;
}
static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
drm_radeon_getparam_t *param = data;
int value;
DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
switch (param->param) {
case RADEON_PARAM_GART_BUFFER_OFFSET:
value = dev_priv->gart_buffers_offset;
break;
case RADEON_PARAM_LAST_FRAME:
dev_priv->stats.last_frame_reads++;
value = GET_SCRATCH(dev_priv, 0);
break;
case RADEON_PARAM_LAST_DISPATCH:
value = GET_SCRATCH(dev_priv, 1);
break;
case RADEON_PARAM_LAST_CLEAR:
dev_priv->stats.last_clear_reads++;
value = GET_SCRATCH(dev_priv, 2);
break;
case RADEON_PARAM_IRQ_NR:
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
value = 0;
else
value = dev->pdev->irq;
break;
case RADEON_PARAM_GART_BASE:
value = dev_priv->gart_vm_start;
break;
case RADEON_PARAM_REGISTER_HANDLE:
value = dev_priv->mmio->offset;
break;
case RADEON_PARAM_STATUS_HANDLE:
value = dev_priv->ring_rptr_offset;
break;
#if BITS_PER_LONG == 32
/*
* This ioctl() doesn't work on 64-bit platforms because hw_lock is a
* pointer which can't fit into an int-sized variable. According to
* Michel Dänzer, the ioctl() is only used on embedded platforms, so
* not supporting it shouldn't be a problem. If the same functionality
* is needed on 64-bit platforms, a new ioctl() would have to be added,
* so backwards-compatibility for the embedded platforms can be
* maintained. --davidm 4-Feb-2004.
*/
case RADEON_PARAM_SAREA_HANDLE:
/* The lock is the first dword in the sarea. */
/* no users of this parameter */
break;
#endif
case RADEON_PARAM_GART_TEX_HANDLE:
value = dev_priv->gart_textures_offset;
break;
case RADEON_PARAM_SCRATCH_OFFSET:
if (!dev_priv->writeback_works)
return -EINVAL;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
value = R600_SCRATCH_REG_OFFSET;
else
value = RADEON_SCRATCH_REG_OFFSET;
break;
case RADEON_PARAM_CARD_TYPE:
if (dev_priv->flags & RADEON_IS_PCIE)
value = RADEON_CARD_PCIE;
else if (dev_priv->flags & RADEON_IS_AGP)
value = RADEON_CARD_AGP;
else
value = RADEON_CARD_PCI;
break;
case RADEON_PARAM_VBLANK_CRTC:
value = radeon_vblank_crtc_get(dev);
break;
case RADEON_PARAM_FB_LOCATION:
value = radeon_read_fb_location(dev_priv);
break;
case RADEON_PARAM_NUM_GB_PIPES:
value = dev_priv->num_gb_pipes;
break;
case RADEON_PARAM_NUM_Z_PIPES:
value = dev_priv->num_z_pipes;
break;
default:
DRM_DEBUG("Invalid parameter %d\n", param->param);
return -EINVAL;
}
if (copy_to_user(param->value, &value, sizeof(int))) {
DRM_ERROR("copy_to_user\n");
return -EFAULT;
}
return 0;
}
static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
drm_radeon_setparam_t *sp = data;
struct drm_radeon_driver_file_fields *radeon_priv;
switch (sp->param) {
case RADEON_SETPARAM_FB_LOCATION:
radeon_priv = file_priv->driver_priv;
radeon_priv->radeon_fb_delta = dev_priv->fb_location -
sp->value;
break;
case RADEON_SETPARAM_SWITCH_TILING:
if (sp->value == 0) {
DRM_DEBUG("color tiling disabled\n");
dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
if (master_priv->sarea_priv)
master_priv->sarea_priv->tiling_enabled = 0;
} else if (sp->value == 1) {
DRM_DEBUG("color tiling enabled\n");
dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
if (master_priv->sarea_priv)
master_priv->sarea_priv->tiling_enabled = 1;
}
break;
case RADEON_SETPARAM_PCIGART_LOCATION:
dev_priv->pcigart_offset = sp->value;
dev_priv->pcigart_offset_set = 1;
break;
case RADEON_SETPARAM_NEW_MEMMAP:
dev_priv->new_memmap = sp->value;
break;
case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
dev_priv->gart_info.table_size = sp->value;
if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
break;
case RADEON_SETPARAM_VBLANK_CRTC:
return radeon_vblank_crtc_set(dev, sp->value);
break;
default:
DRM_DEBUG("Invalid parameter %d\n", sp->param);
return -EINVAL;
}
return 0;
}
/* When a client dies:
* - Check for and clean up flipped page state
* - Free any alloced GART memory.
* - Free any alloced radeon surfaces.
*
* DRM infrastructure takes care of reclaiming dma buffers.
*/
void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
{
if (dev->dev_private) {
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->page_flipping = 0;
radeon_mem_release(file_priv, dev_priv->gart_heap);
radeon_mem_release(file_priv, dev_priv->fb_heap);
radeon_surfaces_release(file_priv, dev_priv);
}
}
void radeon_driver_lastclose(struct drm_device *dev)
{
radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
radeon_do_release(dev);
}
int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_driver_file_fields *radeon_priv;
DRM_DEBUG("\n");
radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
if (!radeon_priv)
return -ENOMEM;
file_priv->driver_priv = radeon_priv;
if (dev_priv)
radeon_priv->radeon_fb_delta = dev_priv->fb_location;
else
radeon_priv->radeon_fb_delta = 0;
return 0;
}
void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
{
struct drm_radeon_driver_file_fields *radeon_priv =
file_priv->driver_priv;
kfree(radeon_priv);
}
struct drm_ioctl_desc radeon_ioctls[] = {
DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
};
int radeon_max_ioctl = ARRAY_SIZE(radeon_ioctls);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment