Commit f691e2f4 authored by Daniel Vetter's avatar Daniel Vetter

drm/i915: swizzling support for snb/ivb

We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

v3: Add a comment as to why we don't need to check the 3rd memory
channel.

v4: Fixup whitespace.
Acked-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarBen Widawsky <ben@bwidawsk.net>
Reviewed-by: default avatarEric Anholt <eric@anholt.net>
Signed-Off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 172975aa
...@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev) ...@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE); i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
ret = i915_gem_init_ringbuffer(dev); ret = i915_gem_init_hw(dev);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
if (ret) if (ret)
return ret; return ret;
......
...@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev) ...@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
dev_priv->mm.suspended = 0; dev_priv->mm.suspended = 0;
error = i915_gem_init_ringbuffer(dev); error = i915_gem_init_hw(dev);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
if (HAS_PCH_SPLIT(dev)) if (HAS_PCH_SPLIT(dev))
...@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags) ...@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
!dev_priv->mm.suspended) { !dev_priv->mm.suspended) {
dev_priv->mm.suspended = 0; dev_priv->mm.suspended = 0;
i915_gem_init_swizzling(dev);
dev_priv->ring[RCS].init(&dev_priv->ring[RCS]); dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
if (HAS_BSD(dev)) if (HAS_BSD(dev))
dev_priv->ring[VCS].init(&dev_priv->ring[VCS]); dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
......
...@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, ...@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
uint32_t read_domains, uint32_t read_domains,
uint32_t write_domain); uint32_t write_domain);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init_ringbuffer(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev);
void i915_gem_init_swizzling(struct drm_device *dev);
void i915_gem_cleanup_ringbuffer(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
void i915_gem_do_init(struct drm_device *dev, void i915_gem_do_init(struct drm_device *dev,
unsigned long start, unsigned long start,
......
...@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev) ...@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
return 0; return 0;
} }
void i915_gem_init_swizzling(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
if (INTEL_INFO(dev)->gen < 6 ||
dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
return;
I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
DISP_TILE_SURFACE_SWIZZLING);
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
if (IS_GEN6(dev))
I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
else
I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
}
int int
i915_gem_init_ringbuffer(struct drm_device *dev) i915_gem_init_hw(struct drm_device *dev)
{ {
drm_i915_private_t *dev_priv = dev->dev_private; drm_i915_private_t *dev_priv = dev->dev_private;
int ret; int ret;
i915_gem_init_swizzling(dev);
ret = intel_init_render_ring_buffer(dev); ret = intel_init_render_ring_buffer(dev);
if (ret) if (ret)
return ret; return ret;
...@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, ...@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
dev_priv->mm.suspended = 0; dev_priv->mm.suspended = 0;
ret = i915_gem_init_ringbuffer(dev); ret = i915_gem_init_hw(dev);
if (ret != 0) { if (ret != 0) {
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
return ret; return ret;
......
...@@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) ...@@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
if (INTEL_INFO(dev)->gen >= 6) { if (INTEL_INFO(dev)->gen >= 6) {
swizzle_x = I915_BIT_6_SWIZZLE_NONE; uint32_t dimm_c0, dimm_c1;
swizzle_y = I915_BIT_6_SWIZZLE_NONE; dimm_c0 = I915_READ(MAD_DIMM_C0);
dimm_c1 = I915_READ(MAD_DIMM_C1);
dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
/* Enable swizzling when the channels are populated with
* identically sized dimms. We don't need to check the 3rd
* channel because no cpu with gpu attached ships in that
* configuration. Also, swizzling only makes sense for 2
* channels anyway. */
if (dimm_c0 == dimm_c1) {
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
swizzle_y = I915_BIT_6_SWIZZLE_9;
} else {
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
}
} else if (IS_GEN5(dev)) { } else if (IS_GEN5(dev)) {
/* On Ironlake whatever DRAM config, GPU always do /* On Ironlake whatever DRAM config, GPU always do
* same swizzling setup. * same swizzling setup.
......
...@@ -295,6 +295,12 @@ ...@@ -295,6 +295,12 @@
#define FENCE_REG_SANDYBRIDGE_0 0x100000 #define FENCE_REG_SANDYBRIDGE_0 0x100000
#define SANDYBRIDGE_FENCE_PITCH_SHIFT 32 #define SANDYBRIDGE_FENCE_PITCH_SHIFT 32
/* control register for cpu gtt access */
#define TILECTL 0x101000
#define TILECTL_SWZCTL (1 << 0)
#define TILECTL_TLB_PREFETCH_DIS (1 << 2)
#define TILECTL_BACKSNOOP_DIS (1 << 3)
/* /*
* Instruction and interrupt control regs * Instruction and interrupt control regs
*/ */
...@@ -318,6 +324,11 @@ ...@@ -318,6 +324,11 @@
#define RING_MAX_IDLE(base) ((base)+0x54) #define RING_MAX_IDLE(base) ((base)+0x54)
#define RING_HWS_PGA(base) ((base)+0x80) #define RING_HWS_PGA(base) ((base)+0x80)
#define RING_HWS_PGA_GEN6(base) ((base)+0x2080) #define RING_HWS_PGA_GEN6(base) ((base)+0x2080)
#define ARB_MODE 0x04030
#define ARB_MODE_SWIZZLE_SNB (1<<4)
#define ARB_MODE_SWIZZLE_IVB (1<<5)
#define ARB_MODE_ENABLE(x) GFX_MODE_ENABLE(x)
#define ARB_MODE_DISABLE(x) GFX_MODE_DISABLE(x)
#define RENDER_HWS_PGA_GEN7 (0x04080) #define RENDER_HWS_PGA_GEN7 (0x04080)
#define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id)
#define DONE_REG 0x40b0 #define DONE_REG 0x40b0
...@@ -1037,6 +1048,29 @@ ...@@ -1037,6 +1048,29 @@
#define C0DRB3 0x10206 #define C0DRB3 0x10206
#define C1DRB3 0x10606 #define C1DRB3 0x10606
/** snb MCH registers for reading the DRAM channel configuration */
#define MAD_DIMM_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5004)
#define MAD_DIMM_C1 (MCHBAR_MIRROR_BASE_SNB + 0x5008)
#define MAD_DIMM_C2 (MCHBAR_MIRROR_BASE_SNB + 0x500C)
#define MAD_DIMM_ECC_MASK (0x3 << 24)
#define MAD_DIMM_ECC_OFF (0x0 << 24)
#define MAD_DIMM_ECC_IO_ON_LOGIC_OFF (0x1 << 24)
#define MAD_DIMM_ECC_IO_OFF_LOGIC_ON (0x2 << 24)
#define MAD_DIMM_ECC_ON (0x3 << 24)
#define MAD_DIMM_ENH_INTERLEAVE (0x1 << 22)
#define MAD_DIMM_RANK_INTERLEAVE (0x1 << 21)
#define MAD_DIMM_B_WIDTH_X16 (0x1 << 20) /* X8 chips if unset */
#define MAD_DIMM_A_WIDTH_X16 (0x1 << 19) /* X8 chips if unset */
#define MAD_DIMM_B_DUAL_RANK (0x1 << 18)
#define MAD_DIMM_A_DUAL_RANK (0x1 << 17)
#define MAD_DIMM_A_SELECT (0x1 << 16)
/* DIMM sizes are in multiples of 256mb. */
#define MAD_DIMM_B_SIZE_SHIFT 8
#define MAD_DIMM_B_SIZE_MASK (0xff << MAD_DIMM_B_SIZE_SHIFT)
#define MAD_DIMM_A_SIZE_SHIFT 0
#define MAD_DIMM_A_SIZE_MASK (0xff << MAD_DIMM_A_SIZE_SHIFT)
/* Clocking configuration register */ /* Clocking configuration register */
#define CLKCFG 0x10c00 #define CLKCFG 0x10c00
#define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */ #define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment