Commit a5a107f0 authored by John Levon's avatar John Levon Committed by Linus Torvalds

[PATCH] OProfile: reduce allocations of MSR structs

Andi Kleen pointed out the MSRs array was a massive bloat source. Reduce
it somewhat by only allocating the amount actually needed for the CPU type.

Untested on Pentium IV - I don't have a machine.
parent 0e7ee1c1
......@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
#include <linux/slab.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
......@@ -91,24 +92,66 @@ static void nmi_save_registers(struct op_msrs * msrs)
{
unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls;
struct op_msr_group * counters = &msrs->counters;
struct op_msr_group * controls = &msrs->controls;
struct op_msr * counters = msrs->counters;
struct op_msr * controls = msrs->controls;
unsigned int i;
for (i = 0; i < nr_ctrs; ++i) {
rdmsr(counters->addrs[i],
counters->saved[i].low,
counters->saved[i].high);
rdmsr(counters[i].addr,
counters[i].saved.low,
counters[i].saved.high);
}
for (i = 0; i < nr_ctrls; ++i) {
rdmsr(controls->addrs[i],
controls->saved[i].low,
controls->saved[i].high);
rdmsr(controls[i].addr,
controls[i].saved.low,
controls[i].saved.high);
}
}
static void free_msrs(void)
{
int i;
for (i = 0; i < NR_CPUS; ++i) {
kfree(cpu_msrs[i].counters);
cpu_msrs[i].counters = NULL;
kfree(cpu_msrs[i].controls);
cpu_msrs[i].controls = NULL;
}
}
static int allocate_msrs(void)
{
int success = 1;
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
int i;
for (i = 0; i < NR_CPUS; ++i) {
if (!cpu_online(i))
continue;
cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
if (!cpu_msrs[i].counters) {
success = 0;
break;
}
cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
if (!cpu_msrs[i].controls) {
success = 0;
break;
}
}
if (!success)
free_msrs();
return success;
}
static void nmi_cpu_setup(void * dummy)
{
int cpu = smp_processor_id();
......@@ -125,6 +168,9 @@ static void nmi_cpu_setup(void * dummy)
static int nmi_setup(void)
{
if (!allocate_msrs())
return -ENOMEM;
/* We walk a thin line between law and rape here.
* We need to be careful to install our NMI handler
* without actually triggering any NMIs as this will
......@@ -142,20 +188,20 @@ static void nmi_restore_registers(struct op_msrs * msrs)
{
unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls;
struct op_msr_group * counters = &msrs->counters;
struct op_msr_group * controls = &msrs->controls;
struct op_msr * counters = msrs->counters;
struct op_msr * controls = msrs->controls;
unsigned int i;
for (i = 0; i < nr_ctrls; ++i) {
wrmsr(controls->addrs[i],
controls->saved[i].low,
controls->saved[i].high);
wrmsr(controls[i].addr,
controls[i].saved.low,
controls[i].saved.high);
}
for (i = 0; i < nr_ctrs; ++i) {
wrmsr(counters->addrs[i],
counters->saved[i].low,
counters->saved[i].high);
wrmsr(counters[i].addr,
counters[i].saved.low,
counters[i].saved.high);
}
}
......@@ -185,6 +231,7 @@ static void nmi_shutdown(void)
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unset_nmi_callback();
enable_lapic_nmi_watchdog();
free_msrs();
}
......
......@@ -20,12 +20,12 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(unsigned int)(l), -1);} while (0)
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21))
......@@ -39,15 +39,15 @@ static unsigned long reset_value[NUM_COUNTERS];
static void athlon_fill_in_addresses(struct op_msrs * const msrs)
{
msrs->counters.addrs[0] = MSR_K7_PERFCTR0;
msrs->counters.addrs[1] = MSR_K7_PERFCTR1;
msrs->counters.addrs[2] = MSR_K7_PERFCTR2;
msrs->counters.addrs[3] = MSR_K7_PERFCTR3;
msrs->controls.addrs[0] = MSR_K7_EVNTSEL0;
msrs->controls.addrs[1] = MSR_K7_EVNTSEL1;
msrs->controls.addrs[2] = MSR_K7_EVNTSEL2;
msrs->controls.addrs[3] = MSR_K7_EVNTSEL3;
msrs->counters[0].addr = MSR_K7_PERFCTR0;
msrs->counters[1].addr = MSR_K7_PERFCTR1;
msrs->counters[2].addr = MSR_K7_PERFCTR2;
msrs->counters[3].addr = MSR_K7_PERFCTR3;
msrs->controls[0].addr = MSR_K7_EVNTSEL0;
msrs->controls[1].addr = MSR_K7_EVNTSEL1;
msrs->controls[2].addr = MSR_K7_EVNTSEL2;
msrs->controls[3].addr = MSR_K7_EVNTSEL3;
}
......
......@@ -366,8 +366,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
#define CCCR_READ(low, high, i) do {rdmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_WRITE(low, high, i) do {wrmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
......@@ -410,7 +410,7 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
/* the counter registers we pay attention to */
for (i = 0; i < num_counters; ++i) {
msrs->counters.addrs[i] =
msrs->counters[i].addr =
p4_counters[VIRT_CTR(stag, i)].counter_address;
}
......@@ -419,42 +419,42 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
/* 18 CCCR registers */
for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr;
msrs->controls[i].addr = addr;
}
/* 43 ESCR registers in three discontiguous group */
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr;
msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_MS_ESCR0 + stag;
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr;
msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_IX_ESCR0 + stag;
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
msrs->controls.addrs[i] = addr;
msrs->controls[i].addr = addr;
}
/* there are 2 remaining non-contiguously located ESCRs */
if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else if (stag == 0) {
/* HT CPUs give the first remainder to the even thread, as
the 32nd control register */
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else {
/* and two copies of the second to the odd thread,
for the 22st and 23nd control registers */
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
}
}
......
......@@ -20,12 +20,12 @@
#define NUM_COUNTERS 2
#define NUM_CONTROLS 2
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0)
#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21))
......@@ -39,11 +39,11 @@ static unsigned long reset_value[NUM_COUNTERS];
static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{
msrs->counters.addrs[0] = MSR_P6_PERFCTR0;
msrs->counters.addrs[1] = MSR_P6_PERFCTR1;
msrs->counters[0].addr = MSR_P6_PERFCTR0;
msrs->counters[1].addr = MSR_P6_PERFCTR1;
msrs->controls.addrs[0] = MSR_P6_EVNTSEL0;
msrs->controls.addrs[1] = MSR_P6_EVNTSEL1;
msrs->controls[0].addr = MSR_P6_EVNTSEL0;
msrs->controls[1].addr = MSR_P6_EVNTSEL1;
}
......
......@@ -11,22 +11,19 @@
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H
/* Pentium IV needs all these */
#define MAX_MSR 63
struct op_saved_msr {
unsigned int high;
unsigned int low;
};
struct op_msr_group {
unsigned int addrs[MAX_MSR];
struct op_saved_msr saved[MAX_MSR];
struct op_msr {
unsigned long addr;
struct op_saved_msr saved;
};
struct op_msrs {
struct op_msr_group counters;
struct op_msr_group controls;
struct op_msr * counters;
struct op_msr * controls;
};
struct pt_regs;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment