b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Hypervisor supplied "gpci" ("get performance counter info") performance |
| 4 | * counter support |
| 5 | * |
| 6 | * Author: Cody P Schafer <cody@linux.vnet.ibm.com> |
| 7 | * Copyright 2014 IBM Corporation. |
| 8 | */ |
| 9 | |
| 10 | #define pr_fmt(fmt) "hv-gpci: " fmt |
| 11 | |
| 12 | #include <linux/init.h> |
| 13 | #include <linux/perf_event.h> |
| 14 | #include <asm/firmware.h> |
| 15 | #include <asm/hvcall.h> |
| 16 | #include <asm/io.h> |
| 17 | |
| 18 | #include "hv-gpci.h" |
| 19 | #include "hv-common.h" |
| 20 | |
| 21 | /* |
| 22 | * Example usage: |
| 23 | * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, |
| 24 | * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... |
| 25 | */ |
| 26 | |
| 27 | /* u32 */ |
| 28 | EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); |
| 29 | /* u32 */ |
| 30 | /* |
| 31 | * Note that starting_index, phys_processor_idx, sibling_part_id, |
| 32 | * hw_chip_id, partition_id all refer to the same bit range. They |
| 33 | * are basically aliases for the starting_index. The specific alias |
| 34 | * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h |
| 35 | */ |
| 36 | EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); |
| 37 | EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); |
| 38 | EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); |
| 39 | EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); |
| 40 | EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); |
| 41 | |
| 42 | /* u16 */ |
| 43 | EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); |
| 44 | /* u8 */ |
| 45 | EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); |
| 46 | /* u8, bytes of data (1-8) */ |
| 47 | EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); |
| 48 | /* u32, byte offset */ |
| 49 | EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); |
| 50 | |
| 51 | static struct attribute *format_attrs[] = { |
| 52 | &format_attr_request.attr, |
| 53 | &format_attr_starting_index.attr, |
| 54 | &format_attr_phys_processor_idx.attr, |
| 55 | &format_attr_sibling_part_id.attr, |
| 56 | &format_attr_hw_chip_id.attr, |
| 57 | &format_attr_partition_id.attr, |
| 58 | &format_attr_secondary_index.attr, |
| 59 | &format_attr_counter_info_version.attr, |
| 60 | |
| 61 | &format_attr_offset.attr, |
| 62 | &format_attr_length.attr, |
| 63 | NULL, |
| 64 | }; |
| 65 | |
| 66 | static struct attribute_group format_group = { |
| 67 | .name = "format", |
| 68 | .attrs = format_attrs, |
| 69 | }; |
| 70 | |
| 71 | static struct attribute_group event_group = { |
| 72 | .name = "events", |
| 73 | /* .attrs is set in init */ |
| 74 | }; |
| 75 | |
| 76 | #define HV_CAPS_ATTR(_name, _format) \ |
| 77 | static ssize_t _name##_show(struct device *dev, \ |
| 78 | struct device_attribute *attr, \ |
| 79 | char *page) \ |
| 80 | { \ |
| 81 | struct hv_perf_caps caps; \ |
| 82 | unsigned long hret = hv_perf_caps_get(&caps); \ |
| 83 | if (hret) \ |
| 84 | return -EIO; \ |
| 85 | \ |
| 86 | return sprintf(page, _format, caps._name); \ |
| 87 | } \ |
| 88 | static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) |
| 89 | |
| 90 | static ssize_t kernel_version_show(struct device *dev, |
| 91 | struct device_attribute *attr, |
| 92 | char *page) |
| 93 | { |
| 94 | return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); |
| 95 | } |
| 96 | |
| 97 | static DEVICE_ATTR_RO(kernel_version); |
| 98 | HV_CAPS_ATTR(version, "0x%x\n"); |
| 99 | HV_CAPS_ATTR(ga, "%d\n"); |
| 100 | HV_CAPS_ATTR(expanded, "%d\n"); |
| 101 | HV_CAPS_ATTR(lab, "%d\n"); |
| 102 | HV_CAPS_ATTR(collect_privileged, "%d\n"); |
| 103 | |
| 104 | static struct attribute *interface_attrs[] = { |
| 105 | &dev_attr_kernel_version.attr, |
| 106 | &hv_caps_attr_version.attr, |
| 107 | &hv_caps_attr_ga.attr, |
| 108 | &hv_caps_attr_expanded.attr, |
| 109 | &hv_caps_attr_lab.attr, |
| 110 | &hv_caps_attr_collect_privileged.attr, |
| 111 | NULL, |
| 112 | }; |
| 113 | |
| 114 | static struct attribute_group interface_group = { |
| 115 | .name = "interface", |
| 116 | .attrs = interface_attrs, |
| 117 | }; |
| 118 | |
| 119 | static const struct attribute_group *attr_groups[] = { |
| 120 | &format_group, |
| 121 | &event_group, |
| 122 | &interface_group, |
| 123 | NULL, |
| 124 | }; |
| 125 | |
| 126 | #define HGPCI_REQ_BUFFER_SIZE 4096 |
| 127 | #define HGPCI_MAX_DATA_BYTES \ |
| 128 | (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) |
| 129 | |
| 130 | static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); |
| 131 | |
| 132 | struct hv_gpci_request_buffer { |
| 133 | struct hv_get_perf_counter_info_params params; |
| 134 | uint8_t bytes[HGPCI_MAX_DATA_BYTES]; |
| 135 | } __packed; |
| 136 | |
| 137 | static unsigned long single_gpci_request(u32 req, u32 starting_index, |
| 138 | u16 secondary_index, u8 version_in, u32 offset, u8 length, |
| 139 | u64 *value) |
| 140 | { |
| 141 | unsigned long ret; |
| 142 | size_t i; |
| 143 | u64 count; |
| 144 | struct hv_gpci_request_buffer *arg; |
| 145 | |
| 146 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
| 147 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| 148 | |
| 149 | arg->params.counter_request = cpu_to_be32(req); |
| 150 | arg->params.starting_index = cpu_to_be32(starting_index); |
| 151 | arg->params.secondary_index = cpu_to_be16(secondary_index); |
| 152 | arg->params.counter_info_version_in = version_in; |
| 153 | |
| 154 | ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| 155 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| 156 | |
| 157 | /* |
| 158 | * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', |
| 159 | * specifies that the current buffer size cannot accommodate |
| 160 | * all the information and a partial buffer returned. |
| 161 | * Since in this function we are only accessing data for a given starting index, |
| 162 | * we don't need to accommodate whole data and can get required count by |
| 163 | * accessing first entry data. |
| 164 | * Hence hcall fails only incase the ret value is other than H_SUCCESS or |
| 165 | * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). |
| 166 | */ |
| 167 | if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) |
| 168 | ret = 0; |
| 169 | |
| 170 | if (ret) { |
| 171 | pr_devel("hcall failed: 0x%lx\n", ret); |
| 172 | goto out; |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * we verify offset and length are within the zeroed buffer at event |
| 177 | * init. |
| 178 | */ |
| 179 | count = 0; |
| 180 | for (i = offset; i < offset + length; i++) |
| 181 | count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); |
| 182 | |
| 183 | *value = count; |
| 184 | out: |
| 185 | put_cpu_var(hv_gpci_reqb); |
| 186 | return ret; |
| 187 | } |
| 188 | |
| 189 | static u64 h_gpci_get_value(struct perf_event *event) |
| 190 | { |
| 191 | u64 count; |
| 192 | unsigned long ret = single_gpci_request(event_get_request(event), |
| 193 | event_get_starting_index(event), |
| 194 | event_get_secondary_index(event), |
| 195 | event_get_counter_info_version(event), |
| 196 | event_get_offset(event), |
| 197 | event_get_length(event), |
| 198 | &count); |
| 199 | if (ret) |
| 200 | return 0; |
| 201 | return count; |
| 202 | } |
| 203 | |
| 204 | static void h_gpci_event_update(struct perf_event *event) |
| 205 | { |
| 206 | s64 prev; |
| 207 | u64 now = h_gpci_get_value(event); |
| 208 | prev = local64_xchg(&event->hw.prev_count, now); |
| 209 | local64_add(now - prev, &event->count); |
| 210 | } |
| 211 | |
| 212 | static void h_gpci_event_start(struct perf_event *event, int flags) |
| 213 | { |
| 214 | local64_set(&event->hw.prev_count, h_gpci_get_value(event)); |
| 215 | } |
| 216 | |
| 217 | static void h_gpci_event_stop(struct perf_event *event, int flags) |
| 218 | { |
| 219 | h_gpci_event_update(event); |
| 220 | } |
| 221 | |
| 222 | static int h_gpci_event_add(struct perf_event *event, int flags) |
| 223 | { |
| 224 | if (flags & PERF_EF_START) |
| 225 | h_gpci_event_start(event, flags); |
| 226 | |
| 227 | return 0; |
| 228 | } |
| 229 | |
| 230 | static int h_gpci_event_init(struct perf_event *event) |
| 231 | { |
| 232 | u64 count; |
| 233 | u8 length; |
| 234 | unsigned long ret; |
| 235 | |
| 236 | /* Not our event */ |
| 237 | if (event->attr.type != event->pmu->type) |
| 238 | return -ENOENT; |
| 239 | |
| 240 | /* config2 is unused */ |
| 241 | if (event->attr.config2) { |
| 242 | pr_devel("config2 set when reserved\n"); |
| 243 | return -EINVAL; |
| 244 | } |
| 245 | |
| 246 | /* no branch sampling */ |
| 247 | if (has_branch_stack(event)) |
| 248 | return -EOPNOTSUPP; |
| 249 | |
| 250 | length = event_get_length(event); |
| 251 | if (length < 1 || length > 8) { |
| 252 | pr_devel("length invalid\n"); |
| 253 | return -EINVAL; |
| 254 | } |
| 255 | |
| 256 | /* last byte within the buffer? */ |
| 257 | if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { |
| 258 | pr_devel("request outside of buffer: %zu > %zu\n", |
| 259 | (size_t)event_get_offset(event) + length, |
| 260 | HGPCI_MAX_DATA_BYTES); |
| 261 | return -EINVAL; |
| 262 | } |
| 263 | |
| 264 | /* check if the request works... */ |
| 265 | ret = single_gpci_request(event_get_request(event), |
| 266 | event_get_starting_index(event), |
| 267 | event_get_secondary_index(event), |
| 268 | event_get_counter_info_version(event), |
| 269 | event_get_offset(event), |
| 270 | length, |
| 271 | &count); |
| 272 | |
| 273 | /* |
| 274 | * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
| 275 | * performance information, and required to set |
| 276 | * "Enable Performance Information Collection" option. |
| 277 | */ |
| 278 | if (ret == H_AUTHORITY) |
| 279 | return -EPERM; |
| 280 | |
| 281 | if (ret) { |
| 282 | pr_devel("gpci hcall failed\n"); |
| 283 | return -EINVAL; |
| 284 | } |
| 285 | |
| 286 | return 0; |
| 287 | } |
| 288 | |
| 289 | static struct pmu h_gpci_pmu = { |
| 290 | .task_ctx_nr = perf_invalid_context, |
| 291 | |
| 292 | .name = "hv_gpci", |
| 293 | .attr_groups = attr_groups, |
| 294 | .event_init = h_gpci_event_init, |
| 295 | .add = h_gpci_event_add, |
| 296 | .del = h_gpci_event_stop, |
| 297 | .start = h_gpci_event_start, |
| 298 | .stop = h_gpci_event_stop, |
| 299 | .read = h_gpci_event_update, |
| 300 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
| 301 | }; |
| 302 | |
| 303 | static int hv_gpci_init(void) |
| 304 | { |
| 305 | int r; |
| 306 | unsigned long hret; |
| 307 | struct hv_perf_caps caps; |
| 308 | struct hv_gpci_request_buffer *arg; |
| 309 | |
| 310 | hv_gpci_assert_offsets_correct(); |
| 311 | |
| 312 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
| 313 | pr_debug("not a virtualized system, not enabling\n"); |
| 314 | return -ENODEV; |
| 315 | } |
| 316 | |
| 317 | hret = hv_perf_caps_get(&caps); |
| 318 | if (hret) { |
| 319 | pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", |
| 320 | hret); |
| 321 | return -ENODEV; |
| 322 | } |
| 323 | |
| 324 | /* sampling not supported */ |
| 325 | h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
| 326 | |
| 327 | arg = (void *)get_cpu_var(hv_gpci_reqb); |
| 328 | memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| 329 | |
| 330 | /* |
| 331 | * hcall H_GET_PERF_COUNTER_INFO populates the output |
| 332 | * counter_info_version value based on the system hypervisor. |
| 333 | * Pass the counter request 0x10 corresponds to request type |
| 334 | * 'Dispatch_timebase_by_processor', to get the supported |
| 335 | * counter_info_version. |
| 336 | */ |
| 337 | arg->params.counter_request = cpu_to_be32(0x10); |
| 338 | |
| 339 | r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| 340 | virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| 341 | if (r) { |
| 342 | pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); |
| 343 | arg->params.counter_info_version_out = 0x8; |
| 344 | } |
| 345 | |
| 346 | /* |
| 347 | * Use counter_info_version_out value to assign |
| 348 | * required hv-gpci event list. |
| 349 | */ |
| 350 | if (arg->params.counter_info_version_out >= 0x8) |
| 351 | event_group.attrs = hv_gpci_event_attrs; |
| 352 | else |
| 353 | event_group.attrs = hv_gpci_event_attrs_v6; |
| 354 | |
| 355 | put_cpu_var(hv_gpci_reqb); |
| 356 | |
| 357 | r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); |
| 358 | if (r) |
| 359 | return r; |
| 360 | |
| 361 | return 0; |
| 362 | } |
| 363 | |
| 364 | device_initcall(hv_gpci_init); |