Commit 43cf3bf0 authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Improved w/a for rps on Baytrail

Rewrite commit 31685c25
Author: Deepak S <deepak.s@linux.intel.com>
Date:   Thu Jul 3 17:33:01 2014 -0400

    drm/i915/vlv: WA for Turbo and RC6 to work together.

Other than code clarity, the major improvement is to disable the extra
interrupts generated when idle.  However, the reclocking remains rather
slow under the new manual regime, in particular it fails to downclock as
quickly as desired. The second major improvement is that for certain
workloads, like games, we need to combine render+media activity counters
as the work of displaying the frame is split across the engines and both
need to be taken into account when deciding the global GPU frequency as
memory cycles are shared.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent aed242ff
...@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev, ...@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
wake_up_all(&ring->irq_queue); wake_up_all(&ring->irq_queue);
} }
static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, static void vlv_c0_read(struct drm_i915_private *dev_priv,
struct intel_rps_ei *rps_ei) struct intel_rps_ei *ei)
{ {
u32 cz_ts, cz_freq_khz; ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
u32 render_count, media_count; ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
u32 elapsed_render, elapsed_media, elapsed_time; ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
u32 residency = 0; }
cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
if (rps_ei->cz_clock == 0) {
rps_ei->cz_clock = cz_ts;
rps_ei->render_c0 = render_count;
rps_ei->media_c0 = media_count;
return dev_priv->rps.cur_freq;
}
elapsed_time = cz_ts - rps_ei->cz_clock;
rps_ei->cz_clock = cz_ts;
elapsed_render = render_count - rps_ei->render_c0; static bool vlv_c0_above(struct drm_i915_private *dev_priv,
rps_ei->render_c0 = render_count; const struct intel_rps_ei *old,
const struct intel_rps_ei *now,
int threshold)
{
u64 time, c0;
elapsed_media = media_count - rps_ei->media_c0; if (old->cz_clock == 0)
rps_ei->media_c0 = media_count; return false;
/* Convert all the counters into common unit of milli sec */ time = now->cz_clock - old->cz_clock;
elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; time *= threshold * dev_priv->mem_freq;
elapsed_render /= cz_freq_khz;
elapsed_media /= cz_freq_khz;
/* /* Workload can be split between render + media, e.g. SwapBuffers
* Calculate overall C0 residency percentage * being blitted in X after being rendered in mesa. To account for
* only if elapsed time is non zero * this we need to combine both engines into our activity counter.
*/ */
if (elapsed_time) { c0 = now->render_c0 - old->render_c0;
residency = c0 += now->media_c0 - old->media_c0;
((max(elapsed_render, elapsed_media) * 100) c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
/ elapsed_time);
}
return residency; return c0 >= time;
} }
/** void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
* vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
* busy-ness calculated from C0 counters of render & media power wells
* @dev_priv: DRM device private
*
*/
static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
{ {
u32 residency_C0_up = 0, residency_C0_down = 0; vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
int new_delay, adj; dev_priv->rps.up_ei = dev_priv->rps.down_ei;
dev_priv->rps.ei_interrupt_count = 0;
dev_priv->rps.ei_interrupt_count++; }
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
{
struct intel_rps_ei now;
u32 events = 0;
if (dev_priv->rps.up_ei.cz_clock == 0) { if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei); return 0;
vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
return dev_priv->rps.cur_freq;
}
vlv_c0_read(dev_priv, &now);
if (now.cz_clock == 0)
return 0;
/* /*
* To down throttle, C0 residency should be less than down threshold * To down throttle, C0 residency should be less than down threshold
* for continous EI intervals. So calculate down EI counters * for continous EI intervals. So calculate down EI counters
* once in VLV_INT_COUNT_FOR_DOWN_EI * once in VLV_INT_COUNT_FOR_DOWN_EI
*/ */
if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) { if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
dev_priv->rps.ei_interrupt_count = 0; dev_priv->rps.ei_interrupt_count = 0;
residency_C0_down = vlv_c0_residency(dev_priv,
&dev_priv->rps.down_ei);
} else {
residency_C0_up = vlv_c0_residency(dev_priv,
&dev_priv->rps.up_ei);
} }
new_delay = dev_priv->rps.cur_freq; if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
if (!vlv_c0_above(dev_priv,
adj = dev_priv->rps.last_adj; &dev_priv->rps.down_ei, &now,
/* C0 residency is greater than UP threshold. Increase Frequency */ VLV_RP_DOWN_EI_THRESHOLD))
if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { events |= GEN6_PM_RP_DOWN_THRESHOLD;
if (adj > 0) dev_priv->rps.down_ei = now;
adj *= 2; }
else
adj = 1;
if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
new_delay = dev_priv->rps.cur_freq + adj;
/*
* For better performance, jump directly
* to RPe if we're below it.
*/
if (new_delay < dev_priv->rps.efficient_freq)
new_delay = dev_priv->rps.efficient_freq;
} else if (!dev_priv->rps.ei_interrupt_count && if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { if (vlv_c0_above(dev_priv,
if (adj < 0) &dev_priv->rps.up_ei, &now,
adj *= 2; VLV_RP_UP_EI_THRESHOLD))
else events |= GEN6_PM_RP_UP_THRESHOLD;
adj = -1; dev_priv->rps.up_ei = now;
/*
* This means, C0 residency is less than down threshold over
* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
*/
if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
new_delay = dev_priv->rps.cur_freq + adj;
} }
return new_delay; return events;
} }
static void gen6_pm_rps_work(struct work_struct *work) static void gen6_pm_rps_work(struct work_struct *work)
...@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work) ...@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
mutex_lock(&dev_priv->rps.hw_lock); mutex_lock(&dev_priv->rps.hw_lock);
pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
adj = dev_priv->rps.last_adj; adj = dev_priv->rps.last_adj;
if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0) if (adj > 0)
...@@ -1171,8 +1128,6 @@ static void gen6_pm_rps_work(struct work_struct *work) ...@@ -1171,8 +1128,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
else else
new_delay = dev_priv->rps.min_freq_softlimit; new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0; adj = 0;
} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0) if (adj < 0)
adj *= 2; adj *= 2;
......
...@@ -6220,8 +6220,8 @@ enum skl_disp_power_wells { ...@@ -6220,8 +6220,8 @@ enum skl_disp_power_wells {
#define GEN6_GT_GFX_RC6p 0x13810C #define GEN6_GT_GFX_RC6p 0x13810C
#define GEN6_GT_GFX_RC6pp 0x138110 #define GEN6_GT_GFX_RC6pp 0x138110
#define VLV_RENDER_C0_COUNT_REG 0x138118 #define VLV_RENDER_C0_COUNT 0x138118
#define VLV_MEDIA_C0_COUNT_REG 0x13811C #define VLV_MEDIA_C0_COUNT 0x13811C
#define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_MAILBOX 0x138124
#define GEN6_PCODE_READY (1<<31) #define GEN6_PCODE_READY (1<<31)
......
...@@ -9201,6 +9201,8 @@ void intel_mark_busy(struct drm_device *dev) ...@@ -9201,6 +9201,8 @@ void intel_mark_busy(struct drm_device *dev)
intel_runtime_pm_get(dev_priv); intel_runtime_pm_get(dev_priv);
i915_update_gfx_val(dev_priv); i915_update_gfx_val(dev_priv);
if (INTEL_INFO(dev)->gen >= 6)
gen6_rps_busy(dev_priv);
dev_priv->mm.busy = true; dev_priv->mm.busy = true;
} }
......
...@@ -1242,6 +1242,8 @@ void intel_disable_gt_powersave(struct drm_device *dev); ...@@ -1242,6 +1242,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
void intel_suspend_gt_powersave(struct drm_device *dev); void intel_suspend_gt_powersave(struct drm_device *dev);
void intel_reset_gt_powersave(struct drm_device *dev); void intel_reset_gt_powersave(struct drm_device *dev);
void gen6_update_ring_freq(struct drm_device *dev); void gen6_update_ring_freq(struct drm_device *dev);
void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_private *dev_priv); void gen6_rps_boost(struct drm_i915_private *dev_priv);
void ilk_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev);
......
...@@ -4041,6 +4041,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) ...@@ -4041,6 +4041,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
} }
void gen6_rps_busy(struct drm_i915_private *dev_priv)
{
mutex_lock(&dev_priv->rps.hw_lock);
if (dev_priv->rps.enabled) {
if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
gen6_rps_reset_ei(dev_priv);
I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
void gen6_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_idle(struct drm_i915_private *dev_priv)
{ {
struct drm_device *dev = dev_priv->dev; struct drm_device *dev = dev_priv->dev;
...@@ -4052,15 +4064,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) ...@@ -4052,15 +4064,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
else else
gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
dev_priv->rps.last_adj = 0; dev_priv->rps.last_adj = 0;
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
} }
mutex_unlock(&dev_priv->rps.hw_lock); mutex_unlock(&dev_priv->rps.hw_lock);
} }
void gen6_rps_boost(struct drm_i915_private *dev_priv) void gen6_rps_boost(struct drm_i915_private *dev_priv)
{ {
u32 val;
mutex_lock(&dev_priv->rps.hw_lock); mutex_lock(&dev_priv->rps.hw_lock);
if (dev_priv->rps.enabled) { val = dev_priv->rps.max_freq_softlimit;
intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); if (dev_priv->rps.enabled &&
dev_priv->mm.busy &&
dev_priv->rps.cur_freq < val) {
intel_set_rps(dev_priv->dev, val);
dev_priv->rps.last_adj = 0; dev_priv->rps.last_adj = 0;
} }
mutex_unlock(&dev_priv->rps.hw_lock); mutex_unlock(&dev_priv->rps.hw_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment