Commit bfaa7834 authored by Thiago Jung Bauermann's avatar Thiago Jung Bauermann Committed by Michael Ellerman

powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8

On POWER9 SMT8 the 24x7 API returns two result elements for physical core
and virtual CPU events and we need to add their counts to get the final
result.
Reviewed-by: default avatarSukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: default avatarThiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 2e6553aa
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
/* Version of the 24x7 hypervisor API that we should use in this machine. */ /* Version of the 24x7 hypervisor API that we should use in this machine. */
static int interface_version; static int interface_version;
/* Whether we have to aggregate result data for some domains. */
static bool aggregate_result_elements;
static bool domain_is_valid(unsigned domain) static bool domain_is_valid(unsigned domain)
{ {
switch (domain) { switch (domain) {
...@@ -58,6 +61,15 @@ static bool is_physical_domain(unsigned domain) ...@@ -58,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
} }
} }
/* Domains for which more than one result element are returned for each event. */
static bool domain_needs_aggregation(unsigned int domain)
{
return aggregate_result_elements &&
(domain == HV_PERF_DOMAIN_PHYS_CORE ||
(domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
}
static const char *domain_name(unsigned domain) static const char *domain_name(unsigned domain)
{ {
if (!domain_is_valid(domain)) if (!domain_is_valid(domain))
...@@ -1145,17 +1157,23 @@ static int add_event_to_24x7_request(struct perf_event *event, ...@@ -1145,17 +1157,23 @@ static int add_event_to_24x7_request(struct perf_event *event,
req->starting_ix = cpu_to_be16(idx); req->starting_ix = cpu_to_be16(idx);
req->max_ix = cpu_to_be16(1); req->max_ix = cpu_to_be16(1);
if (request_buffer->interface_version > 1 && if (request_buffer->interface_version > 1) {
req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { if (domain_needs_aggregation(req->performance_domain))
req->starting_thread_group_ix = idx % 2; req->max_num_thread_groups = -1;
req->max_num_thread_groups = 1; else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
req->starting_thread_group_ix = idx % 2;
req->max_num_thread_groups = 1;
}
} }
return 0; return 0;
} }
/** /**
* get_count_from_result - get event count from the given result * get_count_from_result - get event count from all result elements in result
*
* If the event corresponding to this result needs aggregation of the result
* element values, then this function does that.
* *
* @event: Event associated with @res. * @event: Event associated with @res.
* @resb: Result buffer containing @res. * @resb: Result buffer containing @res.
...@@ -1172,6 +1190,8 @@ static int get_count_from_result(struct perf_event *event, ...@@ -1172,6 +1190,8 @@ static int get_count_from_result(struct perf_event *event,
u16 data_size = be16_to_cpu(res->result_element_data_size); u16 data_size = be16_to_cpu(res->result_element_data_size);
unsigned int data_offset; unsigned int data_offset;
void *element_data; void *element_data;
int i;
u64 count;
/* /*
* We can bail out early if the result is empty. * We can bail out early if the result is empty.
...@@ -1189,8 +1209,10 @@ static int get_count_from_result(struct perf_event *event, ...@@ -1189,8 +1209,10 @@ static int get_count_from_result(struct perf_event *event,
/* /*
* Since we always specify 1 as the maximum for the smallest resource * Since we always specify 1 as the maximum for the smallest resource
* we're requesting, there should to be only one element per result. * we're requesting, there should to be only one element per result.
* Except when an event needs aggregation, in which case there are more.
*/ */
if (num_elements != 1) { if (num_elements != 1 &&
!domain_needs_aggregation(event_get_domain(event))) {
pr_err("Error: result of request %hhu has %hu elements\n", pr_err("Error: result of request %hhu has %hu elements\n",
res->result_ix, num_elements); res->result_ix, num_elements);
...@@ -1211,13 +1233,17 @@ static int get_count_from_result(struct perf_event *event, ...@@ -1211,13 +1233,17 @@ static int get_count_from_result(struct perf_event *event,
data_offset = offsetof(struct hv_24x7_result_element_v2, data_offset = offsetof(struct hv_24x7_result_element_v2,
element_data); element_data);
element_data = res->elements + data_offset; /* Go through the result elements in the result. */
for (i = count = 0, element_data = res->elements + data_offset;
i < num_elements;
i++, element_data += data_size + data_offset)
count += be64_to_cpu(*((u64 *) element_data));
*countp = be64_to_cpu(*((u64 *) element_data)); *countp = count;
/* The next result is after the result element. */ /* The next result is after the last result element. */
if (next) if (next)
*next = element_data + data_size; *next = element_data - data_offset;
return 0; return 0;
} }
...@@ -1568,9 +1594,14 @@ static int hv_24x7_init(void) ...@@ -1568,9 +1594,14 @@ static int hv_24x7_init(void)
/* POWER8 only supports v1, while POWER9 only supports v2. */ /* POWER8 only supports v1, while POWER9 only supports v2. */
if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
interface_version = 1; interface_version = 1;
else else {
interface_version = 2; interface_version = 2;
/* SMT8 in POWER9 needs to aggregate result elements. */
if (threads_per_core == 8)
aggregate_result_elements = true;
}
hret = hv_perf_caps_get(&caps); hret = hv_perf_caps_get(&caps);
if (hret) { if (hret) {
pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment