Commit c717d156 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull intel pstate fixes from Rafael Wysocki:
 "Final power management fixes for 3.15

   - Taking non-idle time into account when calculating core busy time
     was a mistake and led to a performance regression.  Since the
     problem it was supposed to address is now taken care of in a
     different way, we don't need to do it any more, so drop the
     non-idle time tracking from intel_pstate.  Dirk Brandewie.

   - Changing to fixed point math throughout the busy calculation
     introduced rounding errors that adversely affect the accuracy of
     intel_pstate's computations.  Fix from Dirk Brandewie.

   - The PID controller algorithm used by intel_pstate assumes that the
     time interval between two adjacent samples will always be the same
     which is not the case for deferable timers (used by intel_pstate)
     when the system is idle.  This leads to inaccurate predictions and
     artificially increases convergence times for the minimum P-state.
     Fix from Dirk Brandewie.

   - intel_pstate carries out computations using 32-bit variables that
     may overflow for large enough values of APERF/MPERF.  Switch to
     using 64-bit variables for computations, from Doug Smythies"

* tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  intel_pstate: Improve initial busy calculation
  intel_pstate: add sample time scaling
  intel_pstate: Correct rounding in busy calculation
  intel_pstate: Remove C0 tracking
parents 9e9a928e bf810222
...@@ -40,10 +40,10 @@ ...@@ -40,10 +40,10 @@
#define BYT_TURBO_VIDS 0x66d #define BYT_TURBO_VIDS 0x66d
#define FRAC_BITS 6 #define FRAC_BITS 8
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS)
#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS)
static inline int32_t mul_fp(int32_t x, int32_t y) static inline int32_t mul_fp(int32_t x, int32_t y)
{ {
...@@ -59,8 +59,8 @@ struct sample { ...@@ -59,8 +59,8 @@ struct sample {
int32_t core_pct_busy; int32_t core_pct_busy;
u64 aperf; u64 aperf;
u64 mperf; u64 mperf;
unsigned long long tsc;
int freq; int freq;
ktime_t time;
}; };
struct pstate_data { struct pstate_data {
...@@ -98,9 +98,9 @@ struct cpudata { ...@@ -98,9 +98,9 @@ struct cpudata {
struct vid_data vid; struct vid_data vid;
struct _pid pid; struct _pid pid;
ktime_t last_sample_time;
u64 prev_aperf; u64 prev_aperf;
u64 prev_mperf; u64 prev_mperf;
unsigned long long prev_tsc;
struct sample sample; struct sample sample;
}; };
...@@ -200,7 +200,10 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) ...@@ -200,7 +200,10 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
pid->last_err = fp_error; pid->last_err = fp_error;
result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
if (result >= 0)
result = result + (1 << (FRAC_BITS-1));
else
result = result - (1 << (FRAC_BITS-1));
return (signed int)fp_toint(result); return (signed int)fp_toint(result);
} }
...@@ -560,47 +563,42 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) ...@@ -560,47 +563,42 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
static inline void intel_pstate_calc_busy(struct cpudata *cpu, static inline void intel_pstate_calc_busy(struct cpudata *cpu,
struct sample *sample) struct sample *sample)
{ {
int32_t core_pct; int64_t core_pct;
int32_t c0_pct; int32_t rem;
core_pct = div_fp(int_tofp((sample->aperf)), core_pct = int_tofp(sample->aperf) * int_tofp(100);
int_tofp((sample->mperf))); core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem);
core_pct = mul_fp(core_pct, int_tofp(100));
FP_ROUNDUP(core_pct);
c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); if ((rem << 1) >= int_tofp(sample->mperf))
core_pct += 1;
sample->freq = fp_toint( sample->freq = fp_toint(
mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
sample->core_pct_busy = mul_fp(core_pct, c0_pct); sample->core_pct_busy = (int32_t)core_pct;
} }
static inline void intel_pstate_sample(struct cpudata *cpu) static inline void intel_pstate_sample(struct cpudata *cpu)
{ {
u64 aperf, mperf; u64 aperf, mperf;
unsigned long long tsc;
rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf); rdmsrl(MSR_IA32_MPERF, mperf);
tsc = native_read_tsc();
aperf = aperf >> FRAC_BITS; aperf = aperf >> FRAC_BITS;
mperf = mperf >> FRAC_BITS; mperf = mperf >> FRAC_BITS;
tsc = tsc >> FRAC_BITS;
cpu->last_sample_time = cpu->sample.time;
cpu->sample.time = ktime_get();
cpu->sample.aperf = aperf; cpu->sample.aperf = aperf;
cpu->sample.mperf = mperf; cpu->sample.mperf = mperf;
cpu->sample.tsc = tsc;
cpu->sample.aperf -= cpu->prev_aperf; cpu->sample.aperf -= cpu->prev_aperf;
cpu->sample.mperf -= cpu->prev_mperf; cpu->sample.mperf -= cpu->prev_mperf;
cpu->sample.tsc -= cpu->prev_tsc;
intel_pstate_calc_busy(cpu, &cpu->sample); intel_pstate_calc_busy(cpu, &cpu->sample);
cpu->prev_aperf = aperf; cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf; cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
} }
static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
...@@ -614,13 +612,25 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) ...@@ -614,13 +612,25 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
{ {
int32_t core_busy, max_pstate, current_pstate; int32_t core_busy, max_pstate, current_pstate, sample_ratio;
u32 duration_us;
u32 sample_time;
core_busy = cpu->sample.core_pct_busy; core_busy = cpu->sample.core_pct_busy;
max_pstate = int_tofp(cpu->pstate.max_pstate); max_pstate = int_tofp(cpu->pstate.max_pstate);
current_pstate = int_tofp(cpu->pstate.current_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate);
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
return FP_ROUNDUP(core_busy);
sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
duration_us = (u32) ktime_us_delta(cpu->sample.time,
cpu->last_sample_time);
if (duration_us > sample_time * 3) {
sample_ratio = div_fp(int_tofp(sample_time),
int_tofp(duration_us));
core_busy = mul_fp(core_busy, sample_ratio);
}
return core_busy;
} }
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment