Commit 0d571b62 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'pm-tools'

* pm-tools:
  tools/power turbostat: bugfix: TDP MSRs print bits fixing
  tools/power turbostat: correct output for MSR_NHM_SNB_PKG_CST_CFG_CTL dump
  tools/power turbostat: call __cpuid() instead of __get_cpuid()
  tools/power turbostat: indicate SMX and SGX support
  tools/power turbostat: detect and work around syscall jitter
  tools/power turbostat: show GFX%rc6
  tools/power turbostat: show GFXMHz
  tools/power turbostat: show IRQs per CPU
  tools/power turbostat: make fewer systems calls
  tools/power turbostat: fix compiler warnings
  tools/power turbostat: add --out option for saving output in a file
  tools/power turbostat: re-name "%Busy" field to "Busy%"
  tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding
  tools/power turbostat: Intel Xeon x200: fix erroneous bclk value
  tools/power turbostat: allow sub-sec intervals
  tools/power turbostat: Decode MSR_MISC_PWR_MGMT
  tools/power turbostat: decode HWP registers
  x86 msr-index: Simplify syntax for HWP fields
  tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency
  tools/power turbostat: decode more CPUID fields
parents 93dffd03 3fdb7464
...@@ -230,10 +230,10 @@ ...@@ -230,10 +230,10 @@
#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
/* IA32_HWP_CAPABILITIES */ /* IA32_HWP_CAPABILITIES */
#define HWP_HIGHEST_PERF(x) (x & 0xff) #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */ /* IA32_HWP_REQUEST */
#define HWP_MIN_PERF(x) (x & 0xff) #define HWP_MIN_PERF(x) (x & 0xff)
......
...@@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option ...@@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option
\fB--debug\fP displays additional system configuration information. Invoking this parameter \fB--debug\fP displays additional system configuration information. Invoking this parameter
more than once may also enable internal turbostat debug information. more than once may also enable internal turbostat debug information.
.PP .PP
\fB--interval seconds\fP overrides the default 5-second measurement interval. \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--out output_file\fP turbostat output is written to the specified output_file.
The file is truncated if it already exists, and it is created if it does not exist.
.PP .PP
\fB--help\fP displays usage for the most common parameters. \fB--help\fP displays usage for the most common parameters.
.PP .PP
...@@ -61,7 +64,7 @@ displays the statistics gathered since it was forked. ...@@ -61,7 +64,7 @@ displays the statistics gathered since it was forked.
.nf .nf
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
\fBAVG_MHz\fP number of cycles executed divided by time elapsed. \fBAVG_MHz\fP number of cycles executed divided by time elapsed.
\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
.fi .fi
...@@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T ...@@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
.fi .fi
.PP .PP
.SH EXAMPLE .SH PERIODIC EXAMPLE
Without any parameters, turbostat displays statistics ever 5 seconds. Without any parameters, turbostat displays statistics ever 5 seconds.
(override interval with "-i sec" option, or specify a command Periodic output goes to stdout, by default, unless --out is used to specify an output file.
for turbostat to fork). The 5-second interval can be changed with th "-i sec" option.
Or a command may be specified as in "FORK EXAMPLE" below.
.nf .nf
[root@hsw]# ./turbostat [root@hsw]# ./turbostat
CPU Avg_MHz %Busy Bzy_MHz TSC_MHz CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- 488 12.51 3898 3498 - 488 12.51 3898 3498
0 0 0.01 3885 3498 0 0 0.01 3885 3498
4 3897 99.99 3898 3498 4 3897 99.99 3898 3498
...@@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) ...@@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
- - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
0 4 3897 99.98 3898 3498 0 0.02 0 4 3897 99.98 3898 3498 0 0.02
...@@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C ...@@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C
See the field definitions above. See the field definitions above.
.SH FORK EXAMPLE .SH FORK EXAMPLE
If turbostat is invoked with a command, it will fork that command If turbostat is invoked with a command, it will fork that command
and output the statistics gathered when the command exits. and output the statistics gathered after the command exits.
In this case, turbostat output goes to stderr, by default.
Output can instead be saved to a file using the --out option.
eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
until ^C while the other CPUs are mostly idle: until ^C while the other CPUs are mostly idle:
.nf .nf
root@hsw: turbostat cat /dev/zero > /dev/null root@hsw: turbostat cat /dev/zero > /dev/null
^C ^C
CPU Avg_MHz %Busy Bzy_MHz TSC_MHz CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- 482 12.51 3854 3498 - 482 12.51 3854 3498
0 0 0.01 1960 3498 0 0 0.01 1960 3498
4 0 0.00 2128 3498 4 0 0.00 2128 3498
...@@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null ...@@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null
.fi .fi
Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
The first row shows the average MHz and %Busy across all the processors in the system. The first row shows the average MHz and Busy% across all the processors in the system.
Note that the Avg_MHz column reflects the total number of cycles executed Note that the Avg_MHz column reflects the total number of cycles executed
divided by the measurement interval. If the %Busy column is 100%, divided by the measurement interval. If the Busy% column is 100%,
then the processor was running at that speed the entire interval. then the processor was running at that speed the entire interval.
The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
which is the average frequency while the processor was executing -- which is the average frequency while the processor was executing --
not including any non-busy idle time. not including any non-busy idle time.
...@@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where ...@@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where
the TSC stops in idle, TSC_MHz will drop the TSC stops in idle, TSC_MHz will drop
below the processor's base frequency. below the processor's base frequency.
%Busy = MPERF_delta/TSC_delta Busy% = MPERF_delta/TSC_delta
Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
......
...@@ -38,12 +38,15 @@ ...@@ -38,12 +38,15 @@
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <sched.h> #include <sched.h>
#include <time.h>
#include <cpuid.h> #include <cpuid.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <errno.h> #include <errno.h>
char *proc_stat = "/proc/stat"; char *proc_stat = "/proc/stat";
unsigned int interval_sec = 5; FILE *outf;
int *fd_percpu;
struct timespec interval_ts = {5, 0};
unsigned int debug; unsigned int debug;
unsigned int rapl_joules; unsigned int rapl_joules;
unsigned int summary_only; unsigned int summary_only;
...@@ -72,6 +75,7 @@ unsigned int extra_msr_offset64; ...@@ -72,6 +75,7 @@ unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32; unsigned int extra_delta_offset32;
unsigned int extra_delta_offset64; unsigned int extra_delta_offset64;
unsigned int aperf_mperf_multiplier = 1; unsigned int aperf_mperf_multiplier = 1;
int do_irq = 1;
int do_smi; int do_smi;
double bclk; double bclk;
double base_hz; double base_hz;
...@@ -86,6 +90,10 @@ char *output_buffer, *outp; ...@@ -86,6 +90,10 @@ char *output_buffer, *outp;
unsigned int do_rapl; unsigned int do_rapl;
unsigned int do_dts; unsigned int do_dts;
unsigned int do_ptm; unsigned int do_ptm;
unsigned int do_gfx_rc6_ms;
unsigned long long gfx_cur_rc6_ms;
unsigned int do_gfx_mhz;
unsigned int gfx_cur_mhz;
unsigned int tcc_activation_temp; unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override; unsigned int tcc_activation_temp_override;
double rapl_power_units, rapl_time_units; double rapl_power_units, rapl_time_units;
...@@ -98,6 +106,12 @@ unsigned int crystal_hz; ...@@ -98,6 +106,12 @@ unsigned int crystal_hz;
unsigned long long tsc_hz; unsigned long long tsc_hz;
int base_cpu; int base_cpu;
double discover_bclk(unsigned int family, unsigned int model); double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
#define RAPL_PKG (1 << 0) #define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */ /* 0x610 MSR_PKG_POWER_LIMIT */
...@@ -145,6 +159,7 @@ struct thread_data { ...@@ -145,6 +159,7 @@ struct thread_data {
unsigned long long extra_delta64; unsigned long long extra_delta64;
unsigned long long extra_msr32; unsigned long long extra_msr32;
unsigned long long extra_delta32; unsigned long long extra_delta32;
unsigned int irq_count;
unsigned int smi_count; unsigned int smi_count;
unsigned int cpu_id; unsigned int cpu_id;
unsigned int flags; unsigned int flags;
...@@ -172,6 +187,8 @@ struct pkg_data { ...@@ -172,6 +187,8 @@ struct pkg_data {
unsigned long long pkg_any_core_c0; unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0; unsigned long long pkg_any_gfxe_c0;
unsigned long long pkg_both_core_gfxe_c0; unsigned long long pkg_both_core_gfxe_c0;
unsigned long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int package_id; unsigned int package_id;
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
...@@ -212,6 +229,9 @@ struct topo_params { ...@@ -212,6 +229,9 @@ struct topo_params {
struct timeval tv_even, tv_odd, tv_delta; struct timeval tv_even, tv_odd, tv_delta;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(void); void setup_all_buffers(void);
int cpu_is_not_present(int cpu) int cpu_is_not_present(int cpu)
...@@ -262,23 +282,34 @@ int cpu_migrate(int cpu) ...@@ -262,23 +282,34 @@ int cpu_migrate(int cpu)
else else
return 0; return 0;
} }
int get_msr_fd(int cpu)
int get_msr(int cpu, off_t offset, unsigned long long *msr)
{ {
ssize_t retval;
char pathname[32]; char pathname[32];
int fd; int fd;
fd = fd_percpu[cpu];
if (fd)
return fd;
sprintf(pathname, "/dev/cpu/%d/msr", cpu); sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY); fd = open(pathname, O_RDONLY);
if (fd < 0) if (fd < 0)
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
retval = pread(fd, msr, sizeof *msr, offset); fd_percpu[cpu] = fd;
close(fd);
return fd;
}
int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr) if (retval != sizeof *msr)
err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
return 0; return 0;
} }
...@@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) ...@@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
/* /*
* Example Format w/ field column widths: * Example Format w/ field column widths:
* *
* Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
* 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
*/ */
void print_header(void) void print_header(void)
...@@ -301,7 +332,7 @@ void print_header(void) ...@@ -301,7 +332,7 @@ void print_header(void)
if (has_aperf) if (has_aperf)
outp += sprintf(outp, " Avg_MHz"); outp += sprintf(outp, " Avg_MHz");
if (has_aperf) if (has_aperf)
outp += sprintf(outp, " %%Busy"); outp += sprintf(outp, " Busy%%");
if (has_aperf) if (has_aperf)
outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " Bzy_MHz");
outp += sprintf(outp, " TSC_MHz"); outp += sprintf(outp, " TSC_MHz");
...@@ -318,6 +349,8 @@ void print_header(void) ...@@ -318,6 +349,8 @@ void print_header(void)
if (!debug) if (!debug)
goto done; goto done;
if (do_irq)
outp += sprintf(outp, " IRQ");
if (do_smi) if (do_smi)
outp += sprintf(outp, " SMI"); outp += sprintf(outp, " SMI");
...@@ -335,6 +368,12 @@ void print_header(void) ...@@ -335,6 +368,12 @@ void print_header(void)
if (do_ptm) if (do_ptm)
outp += sprintf(outp, " PkgTmp"); outp += sprintf(outp, " PkgTmp");
if (do_gfx_rc6_ms)
outp += sprintf(outp, " GFX%%rc6");
if (do_gfx_mhz)
outp += sprintf(outp, " GFXMHz");
if (do_skl_residency) { if (do_skl_residency) {
outp += sprintf(outp, " Totl%%C0"); outp += sprintf(outp, " Totl%%C0");
outp += sprintf(outp, " Any%%C0"); outp += sprintf(outp, " Any%%C0");
...@@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, ...@@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
extra_msr_offset32, t->extra_msr32); extra_msr_offset32, t->extra_msr32);
outp += sprintf(outp, "msr0x%x: %016llX\n", outp += sprintf(outp, "msr0x%x: %016llX\n",
extra_msr_offset64, t->extra_msr64); extra_msr_offset64, t->extra_msr64);
if (do_irq)
outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
if (do_smi) if (do_smi)
outp += sprintf(outp, "SMI: %08X\n", t->smi_count); outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
} }
...@@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%8.0f", outp += sprintf(outp, "%8.0f",
1.0 / units * t->aperf / interval_float); 1.0 / units * t->aperf / interval_float);
/* %Busy */ /* Busy% */
if (has_aperf) { if (has_aperf) {
if (!skip_c0) if (!skip_c0)
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
...@@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!debug) if (!debug)
goto done; goto done;
/* IRQ */
if (do_irq)
outp += sprintf(outp, "%8d", t->irq_count);
/* SMI */ /* SMI */
if (do_smi) if (do_smi)
outp += sprintf(outp, "%8d", t->smi_count); outp += sprintf(outp, "%8d", t->smi_count);
...@@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_ptm) if (do_ptm)
outp += sprintf(outp, "%8d", p->pkg_temp_c); outp += sprintf(outp, "%8d", p->pkg_temp_c);
/* GFXrc6 */
if (do_gfx_rc6_ms)
outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float);
/* GFXMHz */
if (do_gfx_mhz)
outp += sprintf(outp, "%8d", p->gfx_mhz);
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (do_skl_residency) { if (do_skl_residency) {
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
...@@ -645,15 +698,24 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -645,15 +698,24 @@ int format_counters(struct thread_data *t, struct core_data *c,
return 0; return 0;
} }
void flush_stdout() void flush_output_stdout(void)
{ {
fputs(output_buffer, stdout); FILE *filep;
fflush(stdout);
if (outf == stderr)
filep = stdout;
else
filep = outf;
fputs(output_buffer, filep);
fflush(filep);
outp = output_buffer; outp = output_buffer;
} }
void flush_stderr() void flush_output_stderr(void)
{ {
fputs(output_buffer, stderr); fputs(output_buffer, outf);
fflush(outf);
outp = output_buffer; outp = output_buffer;
} }
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
...@@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old) ...@@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
old->pc10 = new->pc10 - old->pc10; old->pc10 = new->pc10 - old->pc10;
old->pkg_temp_c = new->pkg_temp_c; old->pkg_temp_c = new->pkg_temp_c;
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
old->gfx_mhz = new->gfx_mhz;
DELTA_WRAP32(new->energy_pkg, old->energy_pkg); DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
DELTA_WRAP32(new->energy_cores, old->energy_cores); DELTA_WRAP32(new->energy_cores, old->energy_cores);
DELTA_WRAP32(new->energy_gfx, old->energy_gfx); DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
...@@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, ...@@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
} else { } else {
if (!aperf_mperf_unstable) { if (!aperf_mperf_unstable) {
fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
fprintf(stderr, "* Frequency results do not cover entire interval *\n"); fprintf(outf, "* Frequency results do not cover entire interval *\n");
fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
aperf_mperf_unstable = 1; aperf_mperf_unstable = 1;
} }
...@@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, ...@@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
} }
if (old->mperf == 0) { if (old->mperf == 0) {
if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); if (debug > 1)
fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
old->mperf = 1; /* divide by 0 protection */ old->mperf = 1; /* divide by 0 protection */
} }
...@@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, ...@@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->extra_msr32 = new->extra_msr32; old->extra_msr32 = new->extra_msr32;
old->extra_msr64 = new->extra_msr64; old->extra_msr64 = new->extra_msr64;
if (do_irq)
old->irq_count = new->irq_count - old->irq_count;
if (do_smi) if (do_smi)
old->smi_count = new->smi_count - old->smi_count; old->smi_count = new->smi_count - old->smi_count;
} }
...@@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->mperf = 0; t->mperf = 0;
t->c1 = 0; t->c1 = 0;
t->smi_count = 0;
t->extra_delta32 = 0; t->extra_delta32 = 0;
t->extra_delta64 = 0; t->extra_delta64 = 0;
t->irq_count = 0;
t->smi_count = 0;
/* tells format_counters to dump all fields from this set */ /* tells format_counters to dump all fields from this set */
t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
...@@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->rapl_pkg_perf_status = 0; p->rapl_pkg_perf_status = 0;
p->rapl_dram_perf_status = 0; p->rapl_dram_perf_status = 0;
p->pkg_temp_c = 0; p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->gfx_mhz = 0;
} }
int sum_counters(struct thread_data *t, struct core_data *c, int sum_counters(struct thread_data *t, struct core_data *c,
struct pkg_data *p) struct pkg_data *p)
...@@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, ...@@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.threads.extra_delta32 += t->extra_delta32; average.threads.extra_delta32 += t->extra_delta32;
average.threads.extra_delta64 += t->extra_delta64; average.threads.extra_delta64 += t->extra_delta64;
average.threads.irq_count += t->irq_count;
average.threads.smi_count += t->smi_count;
/* sum per-core values only for 1st thread in core */ /* sum per-core values only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0; return 0;
...@@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, ...@@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.packages.energy_cores += p->energy_cores; average.packages.energy_cores += p->energy_cores;
average.packages.energy_gfx += p->energy_gfx; average.packages.energy_gfx += p->energy_gfx;
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
...@@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void) ...@@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void)
return low | ((unsigned long long)high) << 32; return low | ((unsigned long long)high) << 32;
} }
/* /*
* get_counters(...) * get_counters(...)
* migrate to cpu * migrate to cpu
...@@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{ {
int cpu = t->cpu_id; int cpu = t->cpu_id;
unsigned long long msr; unsigned long long msr;
int aperf_mperf_retry_count = 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */ t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (has_aperf) { if (has_aperf) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
* The TSC, APERF and MPERF must be read together for
* APERF/MPERF and MPERF/TSC to give accurate results.
*
* Unfortunately, APERF and MPERF are read by
* individual system call, so delays may occur
* between them. If the time to read them
* varies by a large amount, we re-read them.
*/
/*
* This initial dummy APERF read has been seen to
* reduce jitter in the subsequent reads.
*/
if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
return -3;
t->tsc = rdtsc(); /* re-read close to APERF */
tsc_before = t->tsc;
if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
return -3; return -3;
tsc_between = rdtsc();
if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
return -4; return -4;
tsc_after = rdtsc();
aperf_time = tsc_between - tsc_before;
mperf_time = tsc_after - tsc_between;
/*
* If the system call latency to read APERF and MPERF
* differ by more than 2x, then try again.
*/
if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
aperf_mperf_retry_count++;
if (aperf_mperf_retry_count < 5)
goto retry;
else
warnx("cpu%d jitter %lld %lld",
cpu, aperf_time, mperf_time);
}
aperf_mperf_retry_count = 0;
t->aperf = t->aperf * aperf_mperf_multiplier; t->aperf = t->aperf * aperf_mperf_multiplier;
t->mperf = t->mperf * aperf_mperf_multiplier; t->mperf = t->mperf * aperf_mperf_multiplier;
} }
if (do_irq)
t->irq_count = irqs_per_cpu[cpu];
if (do_smi) { if (do_smi) {
if (get_msr(cpu, MSR_SMI_COUNT, &msr)) if (get_msr(cpu, MSR_SMI_COUNT, &msr))
return -5; return -5;
...@@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -17; return -17;
p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
} }
if (do_gfx_rc6_ms)
p->gfx_rc6_ms = gfx_cur_rc6_ms;
if (do_gfx_mhz)
p->gfx_mhz = gfx_cur_mhz;
return 0; return 0;
} }
...@@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void) ...@@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void)
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF; ratio = (msr >> 40) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF; ratio = (msr >> 8) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
return; return;
...@@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void) ...@@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 8) & 0xFF; ratio = (msr >> 8) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF; ratio = (msr >> 0) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
return; return;
} }
...@@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void) ...@@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF; ratio = (msr >> 56) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF; ratio = (msr >> 48) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF; ratio = (msr >> 40) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF; ratio = (msr >> 32) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF; ratio = (msr >> 24) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF; ratio = (msr >> 16) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF; ratio = (msr >> 8) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF; ratio = (msr >> 0) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
return; return;
} }
...@@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void) ...@@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF; ratio = (msr >> 56) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF; ratio = (msr >> 48) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF; ratio = (msr >> 40) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF; ratio = (msr >> 32) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF; ratio = (msr >> 24) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF; ratio = (msr >> 16) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF; ratio = (msr >> 8) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF; ratio = (msr >> 0) & 0xFF;
if (ratio) if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
return; return;
} }
...@@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void) ...@@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void)
static void static void
dump_knl_turbo_ratio_limits(void) dump_knl_turbo_ratio_limits(void)
{ {
int cores; const unsigned int buckets_no = 7;
unsigned int ratio;
unsigned long long msr; unsigned long long msr;
int delta_cores; int delta_cores, delta_ratio;
int delta_ratio; int i, b_nr;
int i; unsigned int cores[buckets_no];
unsigned int ratio[buckets_no];
get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
base_cpu, msr); base_cpu, msr);
/** /**
* Turbo encoding in KNL is as follows: * Turbo encoding in KNL is as follows:
* [7:0] -- Base value of number of active cores of bucket 1. * [0] -- Reserved
* [7:1] -- Base value of number of active cores of bucket 1.
* [15:8] -- Base value of freq ratio of bucket 1. * [15:8] -- Base value of freq ratio of bucket 1.
* [20:16] -- +ve delta of number of active cores of bucket 2. * [20:16] -- +ve delta of number of active cores of bucket 2.
* i.e. active cores of bucket 2 = * i.e. active cores of bucket 2 =
...@@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void) ...@@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void)
* [60:56]-- +ve delta of number of active cores of bucket 7. * [60:56]-- +ve delta of number of active cores of bucket 7.
* [63:61]-- -ve delta of freq ratio of bucket 7. * [63:61]-- -ve delta of freq ratio of bucket 7.
*/ */
cores = msr & 0xFF;
ratio = (msr >> 8) && 0xFF;
if (ratio > 0)
fprintf(stderr,
"%d * %.0f = %.0f MHz max turbo %d active cores\n",
ratio, bclk, ratio * bclk, cores);
for (i = 16; i < 64; i = i + 8) { b_nr = 0;
cores[b_nr] = (msr & 0xFF) >> 1;
ratio[b_nr] = (msr >> 8) & 0xFF;
for (i = 16; i < 64; i += 8) {
delta_cores = (msr >> i) & 0x1F; delta_cores = (msr >> i) & 0x1F;
delta_ratio = (msr >> (i + 5)) && 0x7; delta_ratio = (msr >> (i + 5)) & 0x7;
if (!delta_cores || !delta_ratio)
return;
cores = cores + delta_cores;
ratio = ratio - delta_ratio;
/** -ve ratios will make successive ratio calculations cores[b_nr + 1] = cores[b_nr] + delta_cores;
* negative. Hence return instead of carrying on. ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
*/ b_nr++;
if (ratio > 0)
fprintf(stderr,
"%d * %.0f = %.0f MHz max turbo %d active cores\n",
ratio, bclk, ratio * bclk, cores);
} }
for (i = buckets_no - 1; i >= 0; i--)
if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
fprintf(outf,
"%d * %.0f = %.0f MHz max turbo %d active cores\n",
ratio[i], bclk, ratio[i] * bclk, cores[i]);
} }
static void static void
...@@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void) ...@@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void)
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
(msr & (1 << 15)) ? "" : "UN", (msr & (1 << 15)) ? "" : "UN",
(unsigned int)msr & 7, (unsigned int)msr & 0xF,
pkg_cstate_limit_strings[pkg_cstate_limit]); pkg_cstate_limit_strings[pkg_cstate_limit]);
return; return;
} }
...@@ -1408,48 +1543,59 @@ dump_config_tdp(void) ...@@ -1408,48 +1543,59 @@ dump_config_tdp(void)
unsigned long long msr; unsigned long long msr;
get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
if (msr) { if (msr) {
fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
} }
fprintf(stderr, ")\n"); fprintf(outf, ")\n");
get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
if (msr) { if (msr) {
fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
} }
fprintf(stderr, ")\n"); fprintf(outf, ")\n");
get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
if ((msr) & 0x3) if ((msr) & 0x3)
fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
fprintf(stderr, ")\n"); fprintf(outf, ")\n");
get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
fprintf(stderr, ")\n"); fprintf(outf, ")\n");
}
void free_fd_percpu(void)
{
int i;
for (i = 0; i < topo.max_cpu_num; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
} }
void free_all_buffers(void) void free_all_buffers(void)
{ {
CPU_FREE(cpu_present_set); CPU_FREE(cpu_present_set);
cpu_present_set = NULL; cpu_present_set = NULL;
cpu_present_set = 0; cpu_present_setsize = 0;
CPU_FREE(cpu_affinity_set); CPU_FREE(cpu_affinity_set);
cpu_affinity_set = NULL; cpu_affinity_set = NULL;
...@@ -1474,6 +1620,11 @@ void free_all_buffers(void) ...@@ -1474,6 +1620,11 @@ void free_all_buffers(void)
free(output_buffer); free(output_buffer);
output_buffer = NULL; output_buffer = NULL;
outp = NULL; outp = NULL;
free_fd_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
} }
/* /*
...@@ -1481,7 +1632,7 @@ void free_all_buffers(void) ...@@ -1481,7 +1632,7 @@ void free_all_buffers(void)
*/ */
FILE *fopen_or_die(const char *path, const char *mode) FILE *fopen_or_die(const char *path, const char *mode)
{ {
FILE *filep = fopen(path, "r"); FILE *filep = fopen(path, mode);
if (!filep) if (!filep)
err(1, "%s: open failed", path); err(1, "%s: open failed", path);
return filep; return filep;
...@@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu) ...@@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu)
return 0; return 0;
} }
/*
* snapshot_proc_interrupts()
*
* read and record summary of /proc/interrupts
*
* return 1 if config change requires a restart, else return 0
*/
int snapshot_proc_interrupts(void)
{
static FILE *fp;
int column, retval;
if (fp == NULL)
fp = fopen_or_die("/proc/interrupts", "r");
else
rewind(fp);
/* read 1st line of /proc/interrupts to get cpu* name for each column */
for (column = 0; column < topo.num_cpus; ++column) {
int cpu_number;
retval = fscanf(fp, " CPU%d", &cpu_number);
if (retval != 1)
break;
if (cpu_number > topo.max_cpu_num) {
warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
return 1;
}
irq_column_2_cpu[column] = cpu_number;
irqs_per_cpu[cpu_number] = 0;
}
/* read /proc/interrupt count lines and sum up irqs per cpu */
while (1) {
int column;
char buf[64];
retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
if (retval != 1)
break;
/* read the count per cpu */
for (column = 0; column < topo.num_cpus; ++column) {
int cpu_number, irq_count;
retval = fscanf(fp, " %d", &irq_count);
if (retval != 1)
break;
cpu_number = irq_column_2_cpu[column];
irqs_per_cpu[cpu_number] += irq_count;
}
while (getc(fp) != '\n')
; /* flush interrupt description */
}
return 0;
}
/*
* snapshot_gfx_rc6_ms()
*
* record snapshot of
* /sys/class/drm/card0/power/rc6_residency_ms
*
* return 1 if config change requires a restart, else return 0
*/
int snapshot_gfx_rc6_ms(void)
{
FILE *fp;
int retval;
fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
if (retval != 1)
err(1, "GFX rc6");
fclose(fp);
return 0;
}
/*
* snapshot_gfx_mhz()
*
* record snapshot of
* /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
*
* return 1 if config change requires a restart, else return 0
*/
int snapshot_gfx_mhz(void)
{
static FILE *fp;
int retval;
if (fp == NULL)
fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
else
rewind(fp);
retval = fscanf(fp, "%d", &gfx_cur_mhz);
if (retval != 1)
err(1, "GFX MHz");
return 0;
}
/*
* snapshot /proc and /sys files
*
* return 1 if configuration restart needed, else return 0
*/
int snapshot_proc_sysfs_files(void)
{
if (snapshot_proc_interrupts())
return 1;
if (do_gfx_rc6_ms)
snapshot_gfx_rc6_ms();
if (do_gfx_mhz)
snapshot_gfx_mhz();
return 0;
}
void turbostat_loop() void turbostat_loop()
{ {
int retval; int retval;
...@@ -1704,6 +1985,7 @@ void turbostat_loop() ...@@ -1704,6 +1985,7 @@ void turbostat_loop()
restart: restart:
restarted++; restarted++;
snapshot_proc_sysfs_files();
retval = for_all_cpus(get_counters, EVEN_COUNTERS); retval = for_all_cpus(get_counters, EVEN_COUNTERS);
if (retval < -1) { if (retval < -1) {
exit(retval); exit(retval);
...@@ -1722,7 +2004,9 @@ void turbostat_loop() ...@@ -1722,7 +2004,9 @@ void turbostat_loop()
re_initialize(); re_initialize();
goto restart; goto restart;
} }
sleep(interval_sec); nanosleep(&interval_ts, NULL);
if (snapshot_proc_sysfs_files())
goto restart;
retval = for_all_cpus(get_counters, ODD_COUNTERS); retval = for_all_cpus(get_counters, ODD_COUNTERS);
if (retval < -1) { if (retval < -1) {
exit(retval); exit(retval);
...@@ -1735,8 +2019,10 @@ void turbostat_loop() ...@@ -1735,8 +2019,10 @@ void turbostat_loop()
for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
compute_average(EVEN_COUNTERS); compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS);
flush_stdout(); flush_output_stdout();
sleep(interval_sec); nanosleep(&interval_ts, NULL);
if (snapshot_proc_sysfs_files())
goto restart;
retval = for_all_cpus(get_counters, EVEN_COUNTERS); retval = for_all_cpus(get_counters, EVEN_COUNTERS);
if (retval < -1) { if (retval < -1) {
exit(retval); exit(retval);
...@@ -1749,7 +2035,7 @@ void turbostat_loop() ...@@ -1749,7 +2035,7 @@ void turbostat_loop()
for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
compute_average(ODD_COUNTERS); compute_average(ODD_COUNTERS);
format_all_counters(ODD_COUNTERS); format_all_counters(ODD_COUNTERS);
flush_stdout(); flush_output_stdout();
} }
} }
...@@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) ...@@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
/* Nehalem compatible, but do not include turbo-ratio limit support */ /* Nehalem compatible, but do not include turbo-ratio limit support */
case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */
case 0x57: /* PHI - Knights Landing (different MSR definition) */
return 0; return 0;
default: default:
return 1; return 1;
...@@ -1970,7 +2257,7 @@ int has_config_tdp(unsigned int family, unsigned int model) ...@@ -1970,7 +2257,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
} }
static void static void
dump_cstate_pstate_config_info(family, model) dump_cstate_pstate_config_info(int family, int model)
{ {
if (!do_nhm_platform_info) if (!do_nhm_platform_info)
return; return;
...@@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
epb_string = "custom"; epb_string = "custom";
break; break;
} }
fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
return 0;
}
/*
* print_hwp()
* Decode the MSR_HWP_CAPABILITIES
*/
int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
int cpu;
if (!has_hwp)
return 0;
cpu = t->cpu_id;
/* MSR_HWP_CAPABILITIES is per-package */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
if (get_msr(cpu, MSR_PM_ENABLE, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
cpu, msr, (msr & (1 << 0)) ? "" : "No-");
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
if ((msr & (1 << 0)) == 0)
return 0;
if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
cpu, msr,
(unsigned int)HWP_HIGHEST_PERF(msr),
(unsigned int)HWP_GUARANTEED_PERF(msr),
(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
(unsigned int)HWP_LOWEST_PERF(msr));
if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
cpu, msr,
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
(unsigned int)(((msr) >> 16) & 0xff),
(unsigned int)(((msr) >> 24) & 0xff),
(unsigned int)(((msr) >> 32) & 0xff3),
(unsigned int)(((msr) >> 42) & 0x1));
if (has_hwp_pkg) {
if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
cpu, msr,
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
(unsigned int)(((msr) >> 16) & 0xff),
(unsigned int)(((msr) >> 24) & 0xff),
(unsigned int)(((msr) >> 32) & 0xff3));
}
if (has_hwp_notify) {
if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
cpu, msr,
((msr) & 0x1) ? "EN" : "Dis",
((msr) & 0x2) ? "EN" : "Dis");
}
if (get_msr(cpu, MSR_HWP_STATUS, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
cpu, msr,
((msr) & 0x1) ? "" : "No-",
((msr) & 0x2) ? "" : "No-");
return 0; return 0;
} }
...@@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
if (do_core_perf_limit_reasons) { if (do_core_perf_limit_reasons) {
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
(msr & 1 << 15) ? "bit15, " : "", (msr & 1 << 15) ? "bit15, " : "",
(msr & 1 << 14) ? "bit14, " : "", (msr & 1 << 14) ? "bit14, " : "",
(msr & 1 << 13) ? "Transitions, " : "", (msr & 1 << 13) ? "Transitions, " : "",
...@@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 2) ? "bit2, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 0) ? "PROCHOT, " : ""); (msr & 1 << 0) ? "PROCHOT, " : "");
fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
(msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 31) ? "bit31, " : "",
(msr & 1 << 30) ? "bit30, " : "", (msr & 1 << 30) ? "bit30, " : "",
(msr & 1 << 29) ? "Transitions, " : "", (msr & 1 << 29) ? "Transitions, " : "",
...@@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
} }
if (do_gfx_perf_limit_reasons) { if (do_gfx_perf_limit_reasons) {
get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
(msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 0) ? "PROCHOT, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 4) ? "Graphics, " : "", (msr & 1 << 4) ? "Graphics, " : "",
...@@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 9) ? "GFXPwr, " : "",
(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "",
(msr & 1 << 11) ? "PkgPwrL2, " : ""); (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 20) ? "Graphics, " : "", (msr & 1 << 20) ? "Graphics, " : "",
...@@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
} }
if (do_ring_perf_limit_reasons) { if (do_ring_perf_limit_reasons) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(stderr, " (Active: %s%s%s%s%s%s)", fprintf(outf, " (Active: %s%s%s%s%s%s)",
(msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 0) ? "PROCHOT, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 8) ? "Amps, " : "",
(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "",
(msr & 1 << 11) ? "PkgPwrL2, " : ""); (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 22) ? "VR-Therm, " : "",
...@@ -2142,7 +2520,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data ...@@ -2142,7 +2520,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
double get_tdp(model) double get_tdp(int model)
{ {
unsigned long long msr; unsigned long long msr;
...@@ -2251,12 +2629,12 @@ void rapl_probe(unsigned int family, unsigned int model) ...@@ -2251,12 +2629,12 @@ void rapl_probe(unsigned int family, unsigned int model)
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (debug) if (debug)
fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return; return;
} }
void perf_limit_reasons_probe(family, model) void perf_limit_reasons_probe(int family, int model)
{ {
if (!genuine_intel) if (!genuine_intel)
return; return;
...@@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0; return 0;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0; return 0;
dts = (msr >> 16) & 0x7F; dts = (msr >> 16) & 0x7F;
fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
cpu, msr, tcc_activation_temp - dts); cpu, msr, tcc_activation_temp - dts);
#ifdef THERM_DEBUG #ifdef THERM_DEBUG
...@@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F; dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F; dts2 = (msr >> 8) & 0x7F;
fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
#endif #endif
} }
...@@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F; dts = (msr >> 16) & 0x7F;
resolution = (msr >> 27) & 0xF; resolution = (msr >> 27) & 0xF;
fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
cpu, msr, tcc_activation_temp - dts, resolution); cpu, msr, tcc_activation_temp - dts, resolution);
#ifdef THERM_DEBUG #ifdef THERM_DEBUG
...@@ -2334,7 +2712,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2334,7 +2712,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F; dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F; dts2 = (msr >> 8) & 0x7F;
fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
#endif #endif
} }
...@@ -2344,7 +2722,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p ...@@ -2344,7 +2722,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
void print_power_limit_msr(int cpu, unsigned long long msr, char *label) void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{ {
fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
cpu, label, cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units, ((msr >> 0) & 0x7FFF) * rapl_power_units,
...@@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id; cpu = t->cpu_id;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
...@@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1; return -1;
if (debug) { if (debug) {
fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr, "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
rapl_power_units, rapl_energy_units, rapl_time_units); rapl_power_units, rapl_energy_units, rapl_time_units);
} }
...@@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -5; return -5;
fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr, cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
...@@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9; return -9;
fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 63) & 1 ? "": "UN"); cpu, msr, (msr >> 63) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1"); print_power_limit_msr(cpu, msr, "PKG Limit #1");
fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
cpu, cpu,
((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units, ((msr >> 32) & 0x7FFF) * rapl_power_units,
...@@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6; return -6;
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr, cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
...@@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (do_rapl & RAPL_DRAM) { if (do_rapl & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9; return -9;
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN"); cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "DRAM Limit"); print_power_limit_msr(cpu, msr, "DRAM Limit");
...@@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP0_POLICY, &msr)) if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7; return -7;
fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
} }
} }
if (do_rapl & RAPL_CORES) { if (do_rapl & RAPL_CORES) {
...@@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9; return -9;
fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN"); cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "Cores Limit"); print_power_limit_msr(cpu, msr, "Cores Limit");
} }
...@@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP1_POLICY, &msr)) if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8; return -8;
fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
return -9; return -9;
fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN"); cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "GFX Limit"); print_power_limit_msr(cpu, msr, "GFX Limit");
} }
...@@ -2583,23 +2961,23 @@ double slm_bclk(void) ...@@ -2583,23 +2961,23 @@ double slm_bclk(void)
double freq; double freq;
if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
fprintf(stderr, "SLM BCLK: unknown\n"); fprintf(outf, "SLM BCLK: unknown\n");
i = msr & 0xf; i = msr & 0xf;
if (i >= SLM_BCLK_FREQS) { if (i >= SLM_BCLK_FREQS) {
fprintf(stderr, "SLM BCLK[%d] invalid\n", i); fprintf(outf, "SLM BCLK[%d] invalid\n", i);
msr = 3; msr = 3;
} }
freq = slm_freq_table[i]; freq = slm_freq_table[i];
fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
return freq; return freq;
} }
double discover_bclk(unsigned int family, unsigned int model) double discover_bclk(unsigned int family, unsigned int model)
{ {
if (has_snb_msrs(family, model)) if (has_snb_msrs(family, model) || is_knl(family, model))
return 100.00; return 100.00;
else if (is_slm(family, model)) else if (is_slm(family, model))
return slm_bclk(); return slm_bclk();
...@@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk ...@@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
cpu = t->cpu_id; cpu = t->cpu_id;
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(stderr, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
} }
if (tcc_activation_temp_override != 0) { if (tcc_activation_temp_override != 0) {
tcc_activation_temp = tcc_activation_temp_override; tcc_activation_temp = tcc_activation_temp_override;
fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
cpu, tcc_activation_temp); cpu, tcc_activation_temp);
return 0; return 0;
} }
...@@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk ...@@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
target_c_local = (msr >> 16) & 0xFF; target_c_local = (msr >> 16) & 0xFF;
if (debug) if (debug)
fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local); cpu, msr, target_c_local);
if (!target_c_local) if (!target_c_local)
...@@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk ...@@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
guess: guess:
tcc_activation_temp = TJMAX_DEFAULT; tcc_activation_temp = TJMAX_DEFAULT;
fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
cpu, tcc_activation_temp); cpu, tcc_activation_temp);
return 0; return 0;
} }
void decode_feature_control_msr(void)
{
unsigned long long msr;
if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr,
msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
msr & (1 << 18) ? "SGX" : "");
}
void decode_misc_enable_msr(void)
{
unsigned long long msr;
if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
base_cpu, msr,
msr & (1 << 3) ? "TCC" : "",
msr & (1 << 16) ? "EIST" : "",
msr & (1 << 18) ? "MONITOR" : "");
}
/*
* Decode MSR_MISC_PWR_MGMT
*
* Decode the bits according to the Nehalem documentation
* bit[0] seems to continue to have same meaning going forward
* bit[1] less so...
*/
void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
if (!do_nhm_platform_info)
return;
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
base_cpu, msr,
msr & (1 << 0) ? "DIS" : "EN",
msr & (1 << 1) ? "EN" : "DIS");
}
void process_cpuid() void process_cpuid()
{ {
unsigned int eax, ebx, ecx, edx, max_level; unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
unsigned int fms, family, model, stepping; unsigned int fms, family, model, stepping;
eax = ebx = ecx = edx = 0; eax = ebx = ecx = edx = 0;
__get_cpuid(0, &max_level, &ebx, &ecx, &edx); __cpuid(0, max_level, ebx, ecx, edx);
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1; genuine_intel = 1;
if (debug) if (debug)
fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx); (char *)&ebx, (char *)&edx, (char *)&ecx);
__get_cpuid(1, &fms, &ebx, &ecx, &edx); __cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf; family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf; model = (fms >> 4) & 0xf;
stepping = fms & 0xf; stepping = fms & 0xf;
if (family == 6 || family == 0xf) if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4; model += ((fms >> 16) & 0xf) << 4;
if (debug) if (debug) {
fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping); max_level, family, model, stepping, family, model, stepping);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
ecx & (1 << 0) ? "SSE3" : "-",
ecx & (1 << 3) ? "MONITOR" : "-",
ecx & (1 << 6) ? "SMX" : "-",
ecx & (1 << 7) ? "EIST" : "-",
ecx & (1 << 8) ? "TM2" : "-",
edx & (1 << 4) ? "TSC" : "-",
edx & (1 << 5) ? "MSR" : "-",
edx & (1 << 22) ? "ACPI-TM" : "-",
edx & (1 << 29) ? "TM" : "-");
}
if (!(edx & (1 << 5))) if (!(edx & (1 << 5)))
errx(1, "CPUID: no MSR"); errx(1, "CPUID: no MSR");
...@@ -2709,15 +3143,15 @@ void process_cpuid() ...@@ -2709,15 +3143,15 @@ void process_cpuid()
* This check is valid for both Intel and AMD. * This check is valid for both Intel and AMD.
*/ */
ebx = ecx = edx = 0; ebx = ecx = edx = 0;
__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (max_level >= 0x80000007) { if (max_extended_level >= 0x80000007) {
/* /*
* Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
* this check is valid for both Intel and AMD * this check is valid for both Intel and AMD
*/ */
__get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); __cpuid(0x80000007, eax, ebx, ecx, edx);
has_invariant_tsc = edx & (1 << 8); has_invariant_tsc = edx & (1 << 8);
} }
...@@ -2726,20 +3160,48 @@ void process_cpuid() ...@@ -2726,20 +3160,48 @@ void process_cpuid()
* this check is valid for both Intel and AMD * this check is valid for both Intel and AMD
*/ */
__get_cpuid(0x6, &eax, &ebx, &ecx, &edx); __cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0); has_aperf = ecx & (1 << 0);
do_dts = eax & (1 << 0); do_dts = eax & (1 << 0);
do_ptm = eax & (1 << 6); do_ptm = eax & (1 << 6);
has_hwp = eax & (1 << 7);
has_hwp_notify = eax & (1 << 8);
has_hwp_activity_window = eax & (1 << 9);
has_hwp_epp = eax & (1 << 10);
has_hwp_pkg = eax & (1 << 11);
has_epb = ecx & (1 << 3); has_epb = ecx & (1 << 3);
if (debug) if (debug)
fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
has_aperf ? "" : "No ", "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
do_dts ? "" : "No ", has_aperf ? "" : "No-",
do_ptm ? "" : "No ", do_dts ? "" : "No-",
has_epb ? "" : "No "); do_ptm ? "" : "No-",
has_hwp ? "" : "No-",
has_hwp_notify ? "" : "No-",
has_hwp_activity_window ? "" : "No-",
has_hwp_epp ? "" : "No-",
has_hwp_pkg ? "" : "No-",
has_epb ? "" : "No-");
if (max_level > 0x15) { if (debug)
decode_misc_enable_msr();
if (max_level >= 0x7) {
int has_sgx;
ecx = 0;
__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
has_sgx = ebx & (1 << 2);
fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
if (has_sgx)
decode_feature_control_msr();
}
if (max_level >= 0x15) {
unsigned int eax_crystal; unsigned int eax_crystal;
unsigned int ebx_tsc; unsigned int ebx_tsc;
...@@ -2747,12 +3209,12 @@ void process_cpuid() ...@@ -2747,12 +3209,12 @@ void process_cpuid()
* CPUID 15H TSC/Crystal ratio, possibly Crystal Hz * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
*/ */
eax_crystal = ebx_tsc = crystal_hz = edx = 0; eax_crystal = ebx_tsc = crystal_hz = edx = 0;
__get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
if (ebx_tsc != 0) { if (ebx_tsc != 0) {
if (debug && (ebx != 0)) if (debug && (ebx != 0))
fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
eax_crystal, ebx_tsc, crystal_hz); eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0) if (crystal_hz == 0)
...@@ -2768,11 +3230,24 @@ void process_cpuid() ...@@ -2768,11 +3230,24 @@ void process_cpuid()
if (crystal_hz) { if (crystal_hz) {
tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
if (debug) if (debug)
fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
} }
} }
} }
if (max_level >= 0x16) {
unsigned int base_mhz, max_mhz, bus_mhz, edx;
/*
* CPUID 16H Base MHz, Max MHz, Bus MHz
*/
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
if (debug)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
}
if (has_aperf) if (has_aperf)
aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
...@@ -2788,21 +3263,28 @@ void process_cpuid() ...@@ -2788,21 +3263,28 @@ void process_cpuid()
do_slm_cstates = is_slm(family, model); do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model); do_knl_cstates = is_knl(family, model);
if (debug)
decode_misc_pwr_mgmt_msr();
rapl_probe(family, model); rapl_probe(family, model);
perf_limit_reasons_probe(family, model); perf_limit_reasons_probe(family, model);
if (debug) if (debug)
dump_cstate_pstate_config_info(); dump_cstate_pstate_config_info(family, model);
if (has_skl_msrs(family, model)) if (has_skl_msrs(family, model))
calculate_tsc_tweak(); calculate_tsc_tweak();
do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
return; return;
} }
void help() void help()
{ {
fprintf(stderr, fprintf(outf,
"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
"\n" "\n"
"Turbostat forks the specified COMMAND and prints statistics\n" "Turbostat forks the specified COMMAND and prints statistics\n"
...@@ -2814,6 +3296,7 @@ void help() ...@@ -2814,6 +3296,7 @@ void help()
"--help print this help message\n" "--help print this help message\n"
"--counter msr print 32-bit counter at address \"msr\"\n" "--counter msr print 32-bit counter at address \"msr\"\n"
"--Counter msr print 64-bit Counter at address \"msr\"\n" "--Counter msr print 64-bit Counter at address \"msr\"\n"
"--out file create or truncate \"file\" for all output\n"
"--msr msr print 32-bit value at address \"msr\"\n" "--msr msr print 32-bit value at address \"msr\"\n"
"--MSR msr print 64-bit Value at address \"msr\"\n" "--MSR msr print 64-bit Value at address \"msr\"\n"
"--version print version information\n" "--version print version information\n"
...@@ -2858,7 +3341,7 @@ void topology_probe() ...@@ -2858,7 +3341,7 @@ void topology_probe()
show_cpu = 1; show_cpu = 1;
if (debug > 1) if (debug > 1)
fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
if (cpus == NULL) if (cpus == NULL)
...@@ -2893,7 +3376,7 @@ void topology_probe() ...@@ -2893,7 +3376,7 @@ void topology_probe()
if (cpu_is_not_present(i)) { if (cpu_is_not_present(i)) {
if (debug > 1) if (debug > 1)
fprintf(stderr, "cpu%d NOT PRESENT\n", i); fprintf(outf, "cpu%d NOT PRESENT\n", i);
continue; continue;
} }
cpus[i].core_id = get_core_id(i); cpus[i].core_id = get_core_id(i);
...@@ -2908,26 +3391,26 @@ void topology_probe() ...@@ -2908,26 +3391,26 @@ void topology_probe()
if (siblings > max_siblings) if (siblings > max_siblings)
max_siblings = siblings; max_siblings = siblings;
if (debug > 1) if (debug > 1)
fprintf(stderr, "cpu %d pkg %d core %d\n", fprintf(outf, "cpu %d pkg %d core %d\n",
i, cpus[i].physical_package_id, cpus[i].core_id); i, cpus[i].physical_package_id, cpus[i].core_id);
} }
topo.num_cores_per_pkg = max_core_id + 1; topo.num_cores_per_pkg = max_core_id + 1;
if (debug > 1) if (debug > 1)
fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
max_core_id, topo.num_cores_per_pkg); max_core_id, topo.num_cores_per_pkg);
if (debug && !summary_only && topo.num_cores_per_pkg > 1) if (debug && !summary_only && topo.num_cores_per_pkg > 1)
show_core = 1; show_core = 1;
topo.num_packages = max_package_id + 1; topo.num_packages = max_package_id + 1;
if (debug > 1) if (debug > 1)
fprintf(stderr, "max_package_id %d, sizing for %d packages\n", fprintf(outf, "max_package_id %d, sizing for %d packages\n",
max_package_id, topo.num_packages); max_package_id, topo.num_packages);
if (debug && !summary_only && topo.num_packages > 1) if (debug && !summary_only && topo.num_packages > 1)
show_pkg = 1; show_pkg = 1;
topo.num_threads_per_core = max_siblings; topo.num_threads_per_core = max_siblings;
if (debug > 1) if (debug > 1)
fprintf(stderr, "max_siblings %d\n", max_siblings); fprintf(outf, "max_siblings %d\n", max_siblings);
free(cpus); free(cpus);
} }
...@@ -3019,10 +3502,27 @@ void allocate_output_buffer() ...@@ -3019,10 +3502,27 @@ void allocate_output_buffer()
if (outp == NULL) if (outp == NULL)
err(-1, "calloc output buffer"); err(-1, "calloc output buffer");
} }
void allocate_fd_percpu(void)
{
fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
if (fd_percpu == NULL)
err(-1, "calloc fd_percpu");
}
void allocate_irq_buffers(void)
{
irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
if (irq_column_2_cpu == NULL)
err(-1, "calloc %d", topo.num_cpus);
irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
if (irqs_per_cpu == NULL)
err(-1, "calloc %d", topo.max_cpu_num);
}
void setup_all_buffers(void) void setup_all_buffers(void)
{ {
topology_probe(); topology_probe();
allocate_irq_buffers();
allocate_fd_percpu();
allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_even, &core_even, &package_even);
allocate_counters(&thread_odd, &core_odd, &package_odd); allocate_counters(&thread_odd, &core_odd, &package_odd);
allocate_output_buffer(); allocate_output_buffer();
...@@ -3036,7 +3536,7 @@ void set_base_cpu(void) ...@@ -3036,7 +3536,7 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found"); err(-ENODEV, "No valid cpus found");
if (debug > 1) if (debug > 1)
fprintf(stderr, "base_cpu = %d\n", base_cpu); fprintf(outf, "base_cpu = %d\n", base_cpu);
} }
void turbostat_init() void turbostat_init()
...@@ -3048,6 +3548,9 @@ void turbostat_init() ...@@ -3048,6 +3548,9 @@ void turbostat_init()
process_cpuid(); process_cpuid();
if (debug)
for_all_cpus(print_hwp, ODD_COUNTERS);
if (debug) if (debug)
for_all_cpus(print_epb, ODD_COUNTERS); for_all_cpus(print_epb, ODD_COUNTERS);
...@@ -3100,9 +3603,10 @@ int fork_it(char **argv) ...@@ -3100,9 +3603,10 @@ int fork_it(char **argv)
for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
compute_average(EVEN_COUNTERS); compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS);
flush_stderr();
fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
flush_output_stderr();
return status; return status;
} }
...@@ -3119,13 +3623,13 @@ int get_and_dump_counters(void) ...@@ -3119,13 +3623,13 @@ int get_and_dump_counters(void)
if (status) if (status)
return status; return status;
flush_stdout(); flush_output_stdout();
return status; return status;
} }
void print_version() { void print_version() {
fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" fprintf(outf, "turbostat version 4.11 27 Feb 2016"
" - Len Brown <lenb@kernel.org>\n"); " - Len Brown <lenb@kernel.org>\n");
} }
...@@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv) ...@@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv)
{"Joules", no_argument, 0, 'J'}, {"Joules", no_argument, 0, 'J'},
{"MSR", required_argument, 0, 'M'}, {"MSR", required_argument, 0, 'M'},
{"msr", required_argument, 0, 'm'}, {"msr", required_argument, 0, 'm'},
{"out", required_argument, 0, 'o'},
{"Package", no_argument, 0, 'p'}, {"Package", no_argument, 0, 'p'},
{"processor", no_argument, 0, 'p'}, {"processor", no_argument, 0, 'p'},
{"Summary", no_argument, 0, 'S'}, {"Summary", no_argument, 0, 'S'},
...@@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv) ...@@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv)
progname = argv[0]; progname = argv[0];
while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
long_options, &option_index)) != -1) { long_options, &option_index)) != -1) {
switch (opt) { switch (opt) {
case 'C': case 'C':
...@@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv) ...@@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv)
help(); help();
exit(1); exit(1);
case 'i': case 'i':
interval_sec = atoi(optarg); {
double interval = strtod(optarg, NULL);
if (interval < 0.001) {
fprintf(outf, "interval %f seconds is too small\n",
interval);
exit(2);
}
interval_ts.tv_sec = interval;
interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
}
break; break;
case 'J': case 'J':
rapl_joules++; rapl_joules++;
...@@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv) ...@@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv)
case 'm': case 'm':
sscanf(optarg, "%x", &extra_msr_offset32); sscanf(optarg, "%x", &extra_msr_offset32);
break; break;
case 'o':
outf = fopen_or_die(optarg, "w");
break;
case 'P': case 'P':
show_pkg_only++; show_pkg_only++;
break; break;
...@@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv) ...@@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
outf = stderr;
cmdline(argc, argv); cmdline(argc, argv);
if (debug) if (debug)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment