Commit a5a107f0 authored by John Levon's avatar John Levon Committed by Linus Torvalds

[PATCH] OProfile: reduce allocations of MSR structs

Andi Kleen pointed out the MSRs array was a massive bloat source. Reduce
it somewhat by only allocating the amount actually needed for the CPU type.

Untested on Pentium IV - I don't have a machine.
parent 0e7ee1c1
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/oprofile.h> #include <linux/oprofile.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/slab.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/apic.h> #include <asm/apic.h>
...@@ -91,24 +92,66 @@ static void nmi_save_registers(struct op_msrs * msrs) ...@@ -91,24 +92,66 @@ static void nmi_save_registers(struct op_msrs * msrs)
{ {
unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls; unsigned int const nr_ctrls = model->num_controls;
struct op_msr_group * counters = &msrs->counters; struct op_msr * counters = msrs->counters;
struct op_msr_group * controls = &msrs->controls; struct op_msr * controls = msrs->controls;
unsigned int i; unsigned int i;
for (i = 0; i < nr_ctrs; ++i) { for (i = 0; i < nr_ctrs; ++i) {
rdmsr(counters->addrs[i], rdmsr(counters[i].addr,
counters->saved[i].low, counters[i].saved.low,
counters->saved[i].high); counters[i].saved.high);
} }
for (i = 0; i < nr_ctrls; ++i) { for (i = 0; i < nr_ctrls; ++i) {
rdmsr(controls->addrs[i], rdmsr(controls[i].addr,
controls->saved[i].low, controls[i].saved.low,
controls->saved[i].high); controls[i].saved.high);
} }
} }
static void free_msrs(void)
{
int i;
for (i = 0; i < NR_CPUS; ++i) {
kfree(cpu_msrs[i].counters);
cpu_msrs[i].counters = NULL;
kfree(cpu_msrs[i].controls);
cpu_msrs[i].controls = NULL;
}
}
static int allocate_msrs(void)
{
int success = 1;
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
int i;
for (i = 0; i < NR_CPUS; ++i) {
if (!cpu_online(i))
continue;
cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
if (!cpu_msrs[i].counters) {
success = 0;
break;
}
cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
if (!cpu_msrs[i].controls) {
success = 0;
break;
}
}
if (!success)
free_msrs();
return success;
}
static void nmi_cpu_setup(void * dummy) static void nmi_cpu_setup(void * dummy)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
...@@ -125,6 +168,9 @@ static void nmi_cpu_setup(void * dummy) ...@@ -125,6 +168,9 @@ static void nmi_cpu_setup(void * dummy)
static int nmi_setup(void) static int nmi_setup(void)
{ {
if (!allocate_msrs())
return -ENOMEM;
/* We walk a thin line between law and rape here. /* We walk a thin line between law and rape here.
* We need to be careful to install our NMI handler * We need to be careful to install our NMI handler
* without actually triggering any NMIs as this will * without actually triggering any NMIs as this will
...@@ -142,20 +188,20 @@ static void nmi_restore_registers(struct op_msrs * msrs) ...@@ -142,20 +188,20 @@ static void nmi_restore_registers(struct op_msrs * msrs)
{ {
unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls; unsigned int const nr_ctrls = model->num_controls;
struct op_msr_group * counters = &msrs->counters; struct op_msr * counters = msrs->counters;
struct op_msr_group * controls = &msrs->controls; struct op_msr * controls = msrs->controls;
unsigned int i; unsigned int i;
for (i = 0; i < nr_ctrls; ++i) { for (i = 0; i < nr_ctrls; ++i) {
wrmsr(controls->addrs[i], wrmsr(controls[i].addr,
controls->saved[i].low, controls[i].saved.low,
controls->saved[i].high); controls[i].saved.high);
} }
for (i = 0; i < nr_ctrs; ++i) { for (i = 0; i < nr_ctrs; ++i) {
wrmsr(counters->addrs[i], wrmsr(counters[i].addr,
counters->saved[i].low, counters[i].saved.low,
counters->saved[i].high); counters[i].saved.high);
} }
} }
...@@ -185,6 +231,7 @@ static void nmi_shutdown(void) ...@@ -185,6 +231,7 @@ static void nmi_shutdown(void)
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unset_nmi_callback(); unset_nmi_callback();
enable_lapic_nmi_watchdog(); enable_lapic_nmi_watchdog();
free_msrs();
} }
......
...@@ -20,12 +20,12 @@ ...@@ -20,12 +20,12 @@
#define NUM_COUNTERS 4 #define NUM_COUNTERS 4
#define NUM_CONTROLS 4 #define NUM_CONTROLS 4
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(unsigned int)(l), -1);} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls.addrs[(c)], (l), (h));} while (0) #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls.addrs[(c)], (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21)) #define CTRL_CLEAR(x) (x &= (1<<21))
...@@ -39,15 +39,15 @@ static unsigned long reset_value[NUM_COUNTERS]; ...@@ -39,15 +39,15 @@ static unsigned long reset_value[NUM_COUNTERS];
static void athlon_fill_in_addresses(struct op_msrs * const msrs) static void athlon_fill_in_addresses(struct op_msrs * const msrs)
{ {
msrs->counters.addrs[0] = MSR_K7_PERFCTR0; msrs->counters[0].addr = MSR_K7_PERFCTR0;
msrs->counters.addrs[1] = MSR_K7_PERFCTR1; msrs->counters[1].addr = MSR_K7_PERFCTR1;
msrs->counters.addrs[2] = MSR_K7_PERFCTR2; msrs->counters[2].addr = MSR_K7_PERFCTR2;
msrs->counters.addrs[3] = MSR_K7_PERFCTR3; msrs->counters[3].addr = MSR_K7_PERFCTR3;
msrs->controls.addrs[0] = MSR_K7_EVNTSEL0; msrs->controls[0].addr = MSR_K7_EVNTSEL0;
msrs->controls.addrs[1] = MSR_K7_EVNTSEL1; msrs->controls[1].addr = MSR_K7_EVNTSEL1;
msrs->controls.addrs[2] = MSR_K7_EVNTSEL2; msrs->controls[2].addr = MSR_K7_EVNTSEL2;
msrs->controls.addrs[3] = MSR_K7_EVNTSEL3; msrs->controls[3].addr = MSR_K7_EVNTSEL3;
} }
......
...@@ -366,8 +366,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { ...@@ -366,8 +366,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
#define CCCR_READ(low, high, i) do {rdmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0) #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_WRITE(low, high, i) do {wrmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0) #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
...@@ -410,7 +410,7 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) ...@@ -410,7 +410,7 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
/* the counter registers we pay attention to */ /* the counter registers we pay attention to */
for (i = 0; i < num_counters; ++i) { for (i = 0; i < num_counters; ++i) {
msrs->counters.addrs[i] = msrs->counters[i].addr =
p4_counters[VIRT_CTR(stag, i)].counter_address; p4_counters[VIRT_CTR(stag, i)].counter_address;
} }
...@@ -419,42 +419,42 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) ...@@ -419,42 +419,42 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
/* 18 CCCR registers */ /* 18 CCCR registers */
for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr; msrs->controls[i].addr = addr;
} }
/* 43 ESCR registers in three discontiguous group */ /* 43 ESCR registers in three discontiguous group */
for (addr = MSR_P4_BSU_ESCR0 + stag; for (addr = MSR_P4_BSU_ESCR0 + stag;
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr; msrs->controls[i].addr = addr;
} }
for (addr = MSR_P4_MS_ESCR0 + stag; for (addr = MSR_P4_MS_ESCR0 + stag;
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr; msrs->controls[i].addr = addr;
} }
for (addr = MSR_P4_IX_ESCR0 + stag; for (addr = MSR_P4_IX_ESCR0 + stag;
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr; msrs->controls[i].addr = addr;
} }
/* there are 2 remaining non-contiguously located ESCRs */ /* there are 2 remaining non-contiguously located ESCRs */
if (num_counters == NUM_COUNTERS_NON_HT) { if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/ /* standard non-HT CPUs handle both remaining ESCRs*/
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4; msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else if (stag == 0) { } else if (stag == 0) {
/* HT CPUs give the first remainder to the even thread, as /* HT CPUs give the first remainder to the even thread, as
the 32nd control register */ the 32nd control register */
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4; msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else { } else {
/* and two copies of the second to the odd thread, /* and two copies of the second to the odd thread,
for the 22st and 23nd control registers */ for the 22st and 23nd control registers */
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
} }
} }
......
...@@ -20,12 +20,12 @@ ...@@ -20,12 +20,12 @@
#define NUM_COUNTERS 2 #define NUM_COUNTERS 2
#define NUM_CONTROLS 2 #define NUM_CONTROLS 2
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0) #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21)) #define CTRL_CLEAR(x) (x &= (1<<21))
...@@ -39,11 +39,11 @@ static unsigned long reset_value[NUM_COUNTERS]; ...@@ -39,11 +39,11 @@ static unsigned long reset_value[NUM_COUNTERS];
static void ppro_fill_in_addresses(struct op_msrs * const msrs) static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{ {
msrs->counters.addrs[0] = MSR_P6_PERFCTR0; msrs->counters[0].addr = MSR_P6_PERFCTR0;
msrs->counters.addrs[1] = MSR_P6_PERFCTR1; msrs->counters[1].addr = MSR_P6_PERFCTR1;
msrs->controls.addrs[0] = MSR_P6_EVNTSEL0; msrs->controls[0].addr = MSR_P6_EVNTSEL0;
msrs->controls.addrs[1] = MSR_P6_EVNTSEL1; msrs->controls[1].addr = MSR_P6_EVNTSEL1;
} }
......
...@@ -11,22 +11,19 @@ ...@@ -11,22 +11,19 @@
#ifndef OP_X86_MODEL_H #ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H #define OP_X86_MODEL_H
/* Pentium IV needs all these */
#define MAX_MSR 63
struct op_saved_msr { struct op_saved_msr {
unsigned int high; unsigned int high;
unsigned int low; unsigned int low;
}; };
struct op_msr_group { struct op_msr {
unsigned int addrs[MAX_MSR]; unsigned long addr;
struct op_saved_msr saved[MAX_MSR]; struct op_saved_msr saved;
}; };
struct op_msrs { struct op_msrs {
struct op_msr_group counters; struct op_msr * counters;
struct op_msr_group controls; struct op_msr * controls;
}; };
struct pt_regs; struct pt_regs;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment