Commit be3f1525 authored by Marco Elver's avatar Marco Elver Committed by Peter Zijlstra

perf/hw_breakpoint: Optimize constant number of breakpoint slots

Optimize internal hw_breakpoint state if the architecture's number of
breakpoint slots is constant. This avoids several kmalloc() calls and
potentially unnecessary failures if the allocations fail, as well as
subtly improves code generation and cache locality.

The protocol is that if an architecture defines hw_breakpoint_slots via
the preprocessor, it must be constant and the same for all types.
Signed-off-by: default avatarMarco Elver <elver@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarDmitry Vyukov <dvyukov@google.com>
Acked-by: default avatarIan Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20220829124719.675715-7-elver@google.com
parent db5f6f85
...@@ -48,10 +48,7 @@ struct pmu; ...@@ -48,10 +48,7 @@ struct pmu;
/* Maximum number of UBC channels */ /* Maximum number of UBC channels */
#define HBP_NUM 2 #define HBP_NUM 2
static inline int hw_breakpoint_slots(int type) #define hw_breakpoint_slots(type) (HBP_NUM)
{
return HBP_NUM;
}
/* arch/sh/kernel/hw_breakpoint.c */ /* arch/sh/kernel/hw_breakpoint.c */
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
......
...@@ -44,10 +44,7 @@ struct arch_hw_breakpoint { ...@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
/* Total number of available HW breakpoint registers */ /* Total number of available HW breakpoint registers */
#define HBP_NUM 4 #define HBP_NUM 4
static inline int hw_breakpoint_slots(int type) #define hw_breakpoint_slots(type) (HBP_NUM)
{
return HBP_NUM;
}
struct perf_event_attr; struct perf_event_attr;
struct perf_event; struct perf_event;
......
...@@ -40,13 +40,16 @@ struct bp_cpuinfo { ...@@ -40,13 +40,16 @@ struct bp_cpuinfo {
/* Number of pinned cpu breakpoints in a cpu */ /* Number of pinned cpu breakpoints in a cpu */
unsigned int cpu_pinned; unsigned int cpu_pinned;
/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */ /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
#ifdef hw_breakpoint_slots
unsigned int tsk_pinned[hw_breakpoint_slots(0)];
#else
unsigned int *tsk_pinned; unsigned int *tsk_pinned;
#endif
/* Number of non-pinned cpu/task breakpoints in a cpu */ /* Number of non-pinned cpu/task breakpoints in a cpu */
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */ unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
}; };
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
static int nr_slots[TYPE_MAX] __ro_after_init;
static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
{ {
...@@ -73,6 +76,54 @@ struct bp_busy_slots { ...@@ -73,6 +76,54 @@ struct bp_busy_slots {
/* Serialize accesses to the above constraints */ /* Serialize accesses to the above constraints */
static DEFINE_MUTEX(nr_bp_mutex); static DEFINE_MUTEX(nr_bp_mutex);
#ifdef hw_breakpoint_slots
/*
* Number of breakpoint slots is constant, and the same for all types.
*/
static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); }
static inline int init_breakpoint_slots(void) { return 0; }
#else
/*
* Dynamic number of breakpoint slots.
*/
static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
static inline int hw_breakpoint_slots_cached(int type)
{
return __nr_bp_slots[type];
}
static __init int init_breakpoint_slots(void)
{
int i, cpu, err_cpu;
for (i = 0; i < TYPE_MAX; i++)
__nr_bp_slots[i] = hw_breakpoint_slots(i);
for_each_possible_cpu(cpu) {
for (i = 0; i < TYPE_MAX; i++) {
struct bp_cpuinfo *info = get_bp_info(cpu, i);
info->tsk_pinned = kcalloc(__nr_bp_slots[i], sizeof(int), GFP_KERNEL);
if (!info->tsk_pinned)
goto err;
}
}
return 0;
err:
for_each_possible_cpu(err_cpu) {
for (i = 0; i < TYPE_MAX; i++)
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
if (err_cpu == cpu)
break;
}
return -ENOMEM;
}
#endif
__weak int hw_breakpoint_weight(struct perf_event *bp) __weak int hw_breakpoint_weight(struct perf_event *bp)
{ {
return 1; return 1;
...@@ -95,7 +146,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) ...@@ -95,7 +146,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned; unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
int i; int i;
for (i = nr_slots[type] - 1; i >= 0; i--) { for (i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
if (tsk_pinned[i] > 0) if (tsk_pinned[i] > 0)
return i + 1; return i + 1;
} }
...@@ -312,7 +363,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) ...@@ -312,7 +363,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
fetch_this_slot(&slots, weight); fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */ /* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) > nr_slots[type]) if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
return -ENOSPC; return -ENOSPC;
ret = arch_reserve_bp_slot(bp); ret = arch_reserve_bp_slot(bp);
...@@ -632,7 +683,7 @@ bool hw_breakpoint_is_used(void) ...@@ -632,7 +683,7 @@ bool hw_breakpoint_is_used(void)
if (info->cpu_pinned) if (info->cpu_pinned)
return true; return true;
for (int slot = 0; slot < nr_slots[type]; ++slot) { for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
if (info->tsk_pinned[slot]) if (info->tsk_pinned[slot])
return true; return true;
} }
...@@ -716,42 +767,19 @@ static struct pmu perf_breakpoint = { ...@@ -716,42 +767,19 @@ static struct pmu perf_breakpoint = {
int __init init_hw_breakpoint(void) int __init init_hw_breakpoint(void)
{ {
int cpu, err_cpu; int ret;
int i, ret;
for (i = 0; i < TYPE_MAX; i++)
nr_slots[i] = hw_breakpoint_slots(i);
for_each_possible_cpu(cpu) {
for (i = 0; i < TYPE_MAX; i++) {
struct bp_cpuinfo *info = get_bp_info(cpu, i);
info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
GFP_KERNEL);
if (!info->tsk_pinned) {
ret = -ENOMEM;
goto err;
}
}
}
ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
if (ret) if (ret)
goto err; return ret;
ret = init_breakpoint_slots();
if (ret)
return ret;
constraints_initialized = true; constraints_initialized = true;
perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
return register_die_notifier(&hw_breakpoint_exceptions_nb); return register_die_notifier(&hw_breakpoint_exceptions_nb);
err:
for_each_possible_cpu(err_cpu) {
for (i = 0; i < TYPE_MAX; i++)
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
if (err_cpu == cpu)
break;
}
return ret;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment