blob: 910f2621d21122eeaa435fc327c442b214b33c5d [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corp. 2018
4 * Auxtrace support for s390 CPU-Measurement Sampling Facility
5 *
6 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
7 *
8 * Auxiliary traces are collected during 'perf record' using rbd000 event.
9 * Several PERF_RECORD_XXX are generated during recording:
10 *
11 * PERF_RECORD_AUX:
12 * Records that new data landed in the AUX buffer part.
13 * PERF_RECORD_AUXTRACE:
14 * Defines auxtrace data. Followed by the actual data. The contents of
15 * the auxtrace data is dependent on the event and the CPU.
16 * This record is generated by perf record command. For details
17 * see Documentation/perf.data-file-format.txt.
18 * PERF_RECORD_AUXTRACE_INFO:
19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
20 * record is generated during 'perf record' command. Each record contains up
21 * to 256 entries describing offset and size of the AUXTRACE data in the
22 * perf.data file.
23 * PERF_RECORD_AUXTRACE_ERROR:
24 * Indicates an error during AUXTRACE collection such as buffer overflow.
25 * PERF_RECORD_FINISHED_ROUND:
26 * Perf events are not necessarily in time stamp order, as they can be
27 * collected in parallel on different CPUs. If the events should be
28 * processed in time order they need to be sorted first.
29 * Perf report guarantees that there is no reordering over a
30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
31 * time stamp lower than this record are processed (and displayed) before
32 * the succeeding perf record are processed.
33 *
34 * These records are evaluated during perf report command.
35 *
36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
38 * below.
39 * Auxiliary trace data is collected per CPU. To merge the data into the report
40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
41 * data is in ascending order.
42 *
43 * Each queue has a double linked list of auxtrace_buffers. This list contains
44 * the offset and size of a CPU's auxtrace data. During auxtrace processing
45 * the data portion is mmap()'ed.
46 *
47 * To sort the queues in chronological order, all queue access is controlled
48 * by the auxtrace_heap. This is basicly a stack, each stack element has two
49 * entries, the queue number and a time stamp. However the stack is sorted by
50 * the time stamps. The highest time stamp is at the bottom the lowest
51 * (nearest) time stamp is at the top. That sort order is maintained at all
52 * times!
53 *
54 * After the auxtrace infrastructure has been setup, the auxtrace queues are
55 * filled with data (offset/size pairs) and the auxtrace_heap is populated.
56 *
57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of
59 * the perf record is compared with the time stamp located on the auxtrace_heap
60 * top element. If that time stamp is lower than the time stamp from the
61 * record sample, the auxtrace queues will be processed. As auxtrace queues
62 * control many auxtrace_buffers and each buffer can be quite large, the
63 * auxtrace buffer might be processed only partially. In this case the
64 * position in the auxtrace_buffer of that queue is remembered and the time
65 * stamp of the last processed entry of the auxtrace_buffer replaces the
66 * current auxtrace_heap top.
67 *
68 * 3. Auxtrace_queues might run of out data and are feeded by the
69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
70 *
71 * Event Generation
72 * Each sampling-data entry in the auxilary trace data generates a perf sample.
73 * This sample is filled
74 * with data from the auxtrace such as PID/TID, instruction address, CPU state,
75 * etc. This sample is processed with perf_session__deliver_synth_event() to
76 * be included into the GUI.
77 *
78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
79 * auxiliary traces entries until the time stamp of this record is reached
80 * auxtrace_heap top. This is triggered by ordered_event->deliver().
81 *
82 *
83 * Perf event processing.
84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
85 * This is the function call sequence:
86 *
87 * __cmd_report()
88 * |
89 * perf_session__process_events()
90 * |
91 * __perf_session__process_events()
92 * |
93 * perf_session__process_event()
94 * | This functions splits the PERF_RECORD_XXX records.
95 * | - Those generated by perf record command (type number equal or higher
96 * | than PERF_RECORD_USER_TYPE_START) are handled by
97 * | perf_session__process_user_event(see below)
98 * | - Those generated by the kernel are handled by
99 * | perf_evlist__parse_sample_timestamp()
100 * |
101 * perf_evlist__parse_sample_timestamp()
102 * | Extract time stamp from sample data.
103 * |
104 * perf_session__queue_event()
105 * | If timestamp is positive the sample is entered into an ordered_event
106 * | list, sort order is the timestamp. The event processing is deferred until
107 * | later (see perf_session__process_user_event()).
108 * | Other timestamps (0 or -1) are handled immediately by
109 * | perf_session__deliver_event(). These are events generated at start up
110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
111 * | records. They are needed to create a list of running processes and its
112 * | memory mappings and layout. They are needed at the beginning to enable
113 * | command perf report to create process trees and memory mappings.
114 * |
115 * perf_session__deliver_event()
116 * | Delivers a PERF_RECORD_XXX entry for handling.
117 * |
118 * auxtrace__process_event()
119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
120 * | time stamps from the auxiliary trace buffers. This enables
121 * | synchronization between auxiliary trace data and the events on the
122 * | perf.data file.
123 * |
124 * machine__deliver_event()
125 * | Handles the PERF_RECORD_XXX event. This depends on the record type.
126 * It might update the process tree, update a process memory map or enter
127 * a sample with IP and call back chain data into GUI data pool.
128 *
129 *
130 * Deferred processing determined by perf_session__process_user_event() is
131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
132 * are generated during command perf record.
133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
135 * built up while reading the perf.data file.
136 * Each event is now processed by calling perf_session__deliver_event().
137 * This enables time synchronization between the data in the perf.data file and
138 * the data in the auxiliary trace buffers.
139 */
140
141#include <endian.h>
142#include <errno.h>
143#include <byteswap.h>
144#include <inttypes.h>
145#include <linux/kernel.h>
146#include <linux/types.h>
147#include <linux/bitops.h>
148#include <linux/log2.h>
149
150#include "cpumap.h"
151#include "color.h"
152#include "evsel.h"
153#include "evlist.h"
154#include "machine.h"
155#include "session.h"
156#include "util.h"
157#include "thread.h"
158#include "debug.h"
159#include "auxtrace.h"
160#include "s390-cpumsf.h"
161#include "s390-cpumsf-kernel.h"
162
163struct s390_cpumsf {
164 struct auxtrace auxtrace;
165 struct auxtrace_queues queues;
166 struct auxtrace_heap heap;
167 struct perf_session *session;
168 struct machine *machine;
169 u32 auxtrace_type;
170 u32 pmu_type;
171 u16 machine_type;
172 bool data_queued;
173};
174
175struct s390_cpumsf_queue {
176 struct s390_cpumsf *sf;
177 unsigned int queue_nr;
178 struct auxtrace_buffer *buffer;
179 int cpu;
180};
181
182/* Display s390 CPU measurement facility basic-sampling data entry */
183static bool s390_cpumsf_basic_show(const char *color, size_t pos,
184 struct hws_basic_entry *basic)
185{
186 if (basic->def != 1) {
187 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
188 return false;
189 }
190 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
191 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
192 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
193 pos, basic->def, basic->U,
194 basic->T ? 'T' : ' ',
195 basic->W ? 'W' : ' ',
196 basic->P ? 'P' : ' ',
197 basic->I ? 'I' : ' ',
198 basic->AS, basic->prim_asn, basic->ia, basic->CL,
199 basic->hpp, basic->gpp);
200 return true;
201}
202
203/* Display s390 CPU measurement facility diagnostic-sampling data entry */
204static bool s390_cpumsf_diag_show(const char *color, size_t pos,
205 struct hws_diag_entry *diag)
206{
207 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
208 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
209 return false;
210 }
211 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
212 pos, diag->def, diag->I ? 'I' : ' ');
213 return true;
214}
215
216/* Return TOD timestamp contained in an trailer entry */
217static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
218{
219 /* te->t set: TOD in STCKE format, bytes 8-15
220 * to->t not set: TOD in STCK format, bytes 0-7
221 */
222 unsigned long long ts;
223
224 memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
225 return ts;
226}
227
228/* Display s390 CPU measurement facility trailer entry */
229static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
230 struct hws_trailer_entry *te)
231{
232 if (te->bsdes != sizeof(struct hws_basic_entry)) {
233 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
234 return false;
235 }
236 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
237 " dsdes:%d Overflow:%lld Time:%#llx\n"
238 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
239 pos,
240 te->f ? 'F' : ' ',
241 te->a ? 'A' : ' ',
242 te->t ? 'T' : ' ',
243 te->bsdes, te->dsdes, te->overflow,
244 trailer_timestamp(te), te->clock_base, te->progusage2,
245 te->progusage[0], te->progusage[1]);
246 return true;
247}
248
249/* Test a sample data block. It must be 4KB or a multiple thereof in size and
250 * 4KB page aligned. Each sample data page has a trailer entry at the
251 * end which contains the sample entry data sizes.
252 *
253 * Return true if the sample data block passes the checks and set the
254 * basic set entry size and diagnostic set entry size.
255 *
256 * Return false on failure.
257 *
258 * Note: Old hardware does not set the basic or diagnostic entry sizes
259 * in the trailer entry. Use the type number instead.
260 */
261static bool s390_cpumsf_validate(int machine_type,
262 unsigned char *buf, size_t len,
263 unsigned short *bsdes,
264 unsigned short *dsdes)
265{
266 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
267 struct hws_trailer_entry *te;
268
269 *dsdes = *bsdes = 0;
270 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
271 return false;
272 if (basic->def != 1) /* No basic set entry, must be first */
273 return false;
274 /* Check for trailer entry at end of SDB */
275 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
276 - sizeof(*te));
277 *bsdes = te->bsdes;
278 *dsdes = te->dsdes;
279 if (!te->bsdes && !te->dsdes) {
280 /* Very old hardware, use CPUID */
281 switch (machine_type) {
282 case 2097:
283 case 2098:
284 *dsdes = 64;
285 *bsdes = 32;
286 break;
287 case 2817:
288 case 2818:
289 *dsdes = 74;
290 *bsdes = 32;
291 break;
292 case 2827:
293 case 2828:
294 *dsdes = 85;
295 *bsdes = 32;
296 break;
297 case 2964:
298 case 2965:
299 *dsdes = 112;
300 *bsdes = 32;
301 break;
302 default:
303 /* Illegal trailer entry */
304 return false;
305 }
306 }
307 return true;
308}
309
310/* Return true if there is room for another entry */
311static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
312{
313 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
314
315 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
316 return false;
317 return true;
318}
319
320/* Dump an auxiliary buffer. These buffers are multiple of
321 * 4KB SDB pages.
322 */
323static void s390_cpumsf_dump(struct s390_cpumsf *sf,
324 unsigned char *buf, size_t len)
325{
326 const char *color = PERF_COLOR_BLUE;
327 struct hws_basic_entry *basic;
328 struct hws_diag_entry *diag;
329 unsigned short bsdes, dsdes;
330 size_t pos = 0;
331
332 color_fprintf(stdout, color,
333 ". ... s390 AUX data: size %zu bytes\n",
334 len);
335
336 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
337 &dsdes)) {
338 pr_err("Invalid AUX trace data block size:%zu"
339 " (type:%d bsdes:%hd dsdes:%hd)\n",
340 len, sf->machine_type, bsdes, dsdes);
341 return;
342 }
343
344 /* s390 kernel always returns 4KB blocks fully occupied,
345 * no partially filled SDBs.
346 */
347 while (pos < len) {
348 /* Handle Basic entry */
349 basic = (struct hws_basic_entry *)(buf + pos);
350 if (s390_cpumsf_basic_show(color, pos, basic))
351 pos += bsdes;
352 else
353 return;
354
355 /* Handle Diagnostic entry */
356 diag = (struct hws_diag_entry *)(buf + pos);
357 if (s390_cpumsf_diag_show(color, pos, diag))
358 pos += dsdes;
359 else
360 return;
361
362 /* Check for trailer entry */
363 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
364 /* Show trailer entry */
365 struct hws_trailer_entry te;
366
367 pos = (pos + S390_CPUMSF_PAGESZ)
368 & ~(S390_CPUMSF_PAGESZ - 1);
369 pos -= sizeof(te);
370 memcpy(&te, buf + pos, sizeof(te));
371 /* Set descriptor sizes in case of old hardware
372 * where these values are not set.
373 */
374 te.bsdes = bsdes;
375 te.dsdes = dsdes;
376 if (s390_cpumsf_trailer_show(color, pos, &te))
377 pos += sizeof(te);
378 else
379 return;
380 }
381 }
382}
383
384static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
385 size_t len)
386{
387 printf(".\n");
388 s390_cpumsf_dump(sf, buf, len);
389}
390
391#define S390_LPP_PID_MASK 0xffffffff
392
393static bool s390_cpumsf_make_event(size_t pos,
394 struct hws_basic_entry *basic,
395 struct s390_cpumsf_queue *sfq)
396{
397 struct perf_sample sample = {
398 .ip = basic->ia,
399 .pid = basic->hpp & S390_LPP_PID_MASK,
400 .tid = basic->hpp & S390_LPP_PID_MASK,
401 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
402 .cpu = sfq->cpu,
403 .period = 1
404 };
405 union perf_event event;
406
407 memset(&event, 0, sizeof(event));
408 if (basic->CL == 1) /* Native LPAR mode */
409 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
410 : PERF_RECORD_MISC_KERNEL;
411 else if (basic->CL == 2) /* Guest kernel/user space */
412 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
413 : PERF_RECORD_MISC_GUEST_KERNEL;
414 else if (basic->gpp || basic->prim_asn != 0xffff)
415 /* Use heuristics on old hardware */
416 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
417 : PERF_RECORD_MISC_GUEST_KERNEL;
418 else
419 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
420 : PERF_RECORD_MISC_KERNEL;
421
422 event.sample.header.type = PERF_RECORD_SAMPLE;
423 event.sample.header.misc = sample.cpumode;
424 event.sample.header.size = sizeof(struct perf_event_header);
425
426 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
427 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
428 sample.tid, sample.cpumode, sample.cpu);
429 if (perf_session__deliver_synth_event(sfq->sf->session, &event,
430 &sample)) {
431 pr_err("s390 Auxiliary Trace: failed to deliver event\n");
432 return false;
433 }
434 return true;
435}
436
437static unsigned long long get_trailer_time(const unsigned char *buf)
438{
439 struct hws_trailer_entry *te;
440 unsigned long long aux_time;
441
442 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
443 - sizeof(*te));
444
445 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
446 return 0;
447
448 /* Correct calculation to convert time stamp in trailer entry to
449 * nano seconds (taken from arch/s390 function tod_to_ns()).
450 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
451 */
452 aux_time = trailer_timestamp(te) - te->progusage2;
453 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
454 return aux_time;
455}
456
457/* Process the data samples of a single queue. The first parameter is a
458 * pointer to the queue, the second parameter is the time stamp. This
459 * is the time stamp:
460 * - of the event that triggered this processing.
461 * - or the time stamp when the last proccesing of this queue stopped.
462 * In this case it stopped at a 4KB page boundary and record the
463 * position on where to continue processing on the next invocation
464 * (see buffer->use_data and buffer->use_size).
465 *
466 * When this function returns the second parameter is updated to
467 * reflect the time stamp of the last processed auxiliary data entry
468 * (taken from the trailer entry of that page). The caller uses this
469 * returned time stamp to record the last processed entry in this
470 * queue.
471 *
472 * The function returns:
473 * 0: Processing successful. The second parameter returns the
474 * time stamp from the trailer entry until which position
475 * processing took place. Subsequent calls resume from this
476 * position.
477 * <0: An error occurred during processing. The second parameter
478 * returns the maximum time stamp.
479 * >0: Done on this queue. The second parameter returns the
480 * maximum time stamp.
481 */
482static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
483{
484 struct s390_cpumsf *sf = sfq->sf;
485 unsigned char *buf = sfq->buffer->use_data;
486 size_t len = sfq->buffer->use_size;
487 struct hws_basic_entry *basic;
488 unsigned short bsdes, dsdes;
489 size_t pos = 0;
490 int err = 1;
491 u64 aux_ts;
492
493 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
494 &dsdes)) {
495 *ts = ~0ULL;
496 return -1;
497 }
498
499 /* Get trailer entry time stamp and check if entries in
500 * this auxiliary page are ready for processing. If the
501 * time stamp of the first entry is too high, whole buffer
502 * can be skipped. In this case return time stamp.
503 */
504 aux_ts = get_trailer_time(buf);
505 if (!aux_ts) {
506 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
507 (s64)sfq->buffer->data_offset);
508 aux_ts = ~0ULL;
509 goto out;
510 }
511 if (aux_ts > *ts) {
512 *ts = aux_ts;
513 return 0;
514 }
515
516 while (pos < len) {
517 /* Handle Basic entry */
518 basic = (struct hws_basic_entry *)(buf + pos);
519 if (s390_cpumsf_make_event(pos, basic, sfq))
520 pos += bsdes;
521 else {
522 err = -EBADF;
523 goto out;
524 }
525
526 pos += dsdes; /* Skip diagnositic entry */
527
528 /* Check for trailer entry */
529 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
530 pos = (pos + S390_CPUMSF_PAGESZ)
531 & ~(S390_CPUMSF_PAGESZ - 1);
532 /* Check existence of next page */
533 if (pos >= len)
534 break;
535 aux_ts = get_trailer_time(buf + pos);
536 if (!aux_ts) {
537 aux_ts = ~0ULL;
538 goto out;
539 }
540 if (aux_ts > *ts) {
541 *ts = aux_ts;
542 sfq->buffer->use_data += pos;
543 sfq->buffer->use_size -= pos;
544 return 0;
545 }
546 }
547 }
548out:
549 *ts = aux_ts;
550 sfq->buffer->use_size = 0;
551 sfq->buffer->use_data = NULL;
552 return err; /* Buffer completely scanned or error */
553}
554
555/* Run the s390 auxiliary trace decoder.
556 * Select the queue buffer to operate on, the caller already selected
557 * the proper queue, depending on second parameter 'ts'.
558 * This is the time stamp until which the auxiliary entries should
559 * be processed. This value is updated by called functions and
560 * returned to the caller.
561 *
562 * Resume processing in the current buffer. If there is no buffer
563 * get a new buffer from the queue and setup start position for
564 * processing.
565 * When a buffer is completely processed remove it from the queue
566 * before returning.
567 *
568 * This function returns
569 * 1: When the queue is empty. Second parameter will be set to
570 * maximum time stamp.
571 * 0: Normal processing done.
572 * <0: Error during queue buffer setup. This causes the caller
573 * to stop processing completely.
574 */
575static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
576 u64 *ts)
577{
578
579 struct auxtrace_buffer *buffer;
580 struct auxtrace_queue *queue;
581 int err;
582
583 queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
584
585 /* Get buffer and last position in buffer to resume
586 * decoding the auxiliary entries. One buffer might be large
587 * and decoding might stop in between. This depends on the time
588 * stamp of the trailer entry in each page of the auxiliary
589 * data and the time stamp of the event triggering the decoding.
590 */
591 if (sfq->buffer == NULL) {
592 sfq->buffer = buffer = auxtrace_buffer__next(queue,
593 sfq->buffer);
594 if (!buffer) {
595 *ts = ~0ULL;
596 return 1; /* Processing done on this queue */
597 }
598 /* Start with a new buffer on this queue */
599 if (buffer->data) {
600 buffer->use_size = buffer->size;
601 buffer->use_data = buffer->data;
602 }
603 } else
604 buffer = sfq->buffer;
605
606 if (!buffer->data) {
607 int fd = perf_data__fd(sfq->sf->session->data);
608
609 buffer->data = auxtrace_buffer__get_data(buffer, fd);
610 if (!buffer->data)
611 return -ENOMEM;
612 buffer->use_size = buffer->size;
613 buffer->use_data = buffer->data;
614 }
615 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
616 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
617 buffer->size, buffer->use_size);
618 err = s390_cpumsf_samples(sfq, ts);
619
620 /* If non-zero, there is either an error (err < 0) or the buffer is
621 * completely done (err > 0). The error is unrecoverable, usually
622 * some descriptors could not be read successfully, so continue with
623 * the next buffer.
624 * In both cases the parameter 'ts' has been updated.
625 */
626 if (err) {
627 sfq->buffer = NULL;
628 list_del(&buffer->list);
629 auxtrace_buffer__free(buffer);
630 if (err > 0) /* Buffer done, no error */
631 err = 0;
632 }
633 return err;
634}
635
636static struct s390_cpumsf_queue *
637s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
638{
639 struct s390_cpumsf_queue *sfq;
640
641 sfq = zalloc(sizeof(struct s390_cpumsf_queue));
642 if (sfq == NULL)
643 return NULL;
644
645 sfq->sf = sf;
646 sfq->queue_nr = queue_nr;
647 sfq->cpu = -1;
648 return sfq;
649}
650
651static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
652 struct auxtrace_queue *queue,
653 unsigned int queue_nr, u64 ts)
654{
655 struct s390_cpumsf_queue *sfq = queue->priv;
656
657 if (list_empty(&queue->head))
658 return 0;
659
660 if (sfq == NULL) {
661 sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
662 if (!sfq)
663 return -ENOMEM;
664 queue->priv = sfq;
665
666 if (queue->cpu != -1)
667 sfq->cpu = queue->cpu;
668 }
669 return auxtrace_heap__add(&sf->heap, queue_nr, ts);
670}
671
672static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
673{
674 unsigned int i;
675 int ret = 0;
676
677 for (i = 0; i < sf->queues.nr_queues; i++) {
678 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
679 i, ts);
680 if (ret)
681 break;
682 }
683 return ret;
684}
685
686static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
687{
688 if (!sf->queues.new_data)
689 return 0;
690
691 sf->queues.new_data = false;
692 return s390_cpumsf_setup_queues(sf, ts);
693}
694
695static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
696{
697 unsigned int queue_nr;
698 u64 ts;
699 int ret;
700
701 while (1) {
702 struct auxtrace_queue *queue;
703 struct s390_cpumsf_queue *sfq;
704
705 if (!sf->heap.heap_cnt)
706 return 0;
707
708 if (sf->heap.heap_array[0].ordinal >= timestamp)
709 return 0;
710
711 queue_nr = sf->heap.heap_array[0].queue_nr;
712 queue = &sf->queues.queue_array[queue_nr];
713 sfq = queue->priv;
714
715 auxtrace_heap__pop(&sf->heap);
716 if (sf->heap.heap_cnt) {
717 ts = sf->heap.heap_array[0].ordinal + 1;
718 if (ts > timestamp)
719 ts = timestamp;
720 } else {
721 ts = timestamp;
722 }
723
724 ret = s390_cpumsf_run_decoder(sfq, &ts);
725 if (ret < 0) {
726 auxtrace_heap__add(&sf->heap, queue_nr, ts);
727 return ret;
728 }
729 if (!ret) {
730 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
731 if (ret < 0)
732 return ret;
733 }
734 }
735 return 0;
736}
737
738static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
739 pid_t pid, pid_t tid, u64 ip)
740{
741 char msg[MAX_AUXTRACE_ERROR_MSG];
742 union perf_event event;
743 int err;
744
745 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
746 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
747 code, cpu, pid, tid, ip, msg);
748
749 err = perf_session__deliver_synth_event(sf->session, &event, NULL);
750 if (err)
751 pr_err("s390 Auxiliary Trace: failed to deliver error event,"
752 "error %d\n", err);
753 return err;
754}
755
756static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
757{
758 return s390_cpumsf_synth_error(sf, 1, sample->cpu,
759 sample->pid, sample->tid, 0);
760}
761
762static int
763s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
764 union perf_event *event,
765 struct perf_sample *sample,
766 struct perf_tool *tool)
767{
768 struct s390_cpumsf *sf = container_of(session->auxtrace,
769 struct s390_cpumsf,
770 auxtrace);
771 u64 timestamp = sample->time;
772 int err = 0;
773
774 if (dump_trace)
775 return 0;
776
777 if (!tool->ordered_events) {
778 pr_err("s390 Auxiliary Trace requires ordered events\n");
779 return -EINVAL;
780 }
781
782 if (event->header.type == PERF_RECORD_AUX &&
783 event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
784 return s390_cpumsf_lost(sf, sample);
785
786 if (timestamp) {
787 err = s390_cpumsf_update_queues(sf, timestamp);
788 if (!err)
789 err = s390_cpumsf_process_queues(sf, timestamp);
790 }
791 return err;
792}
793
794struct s390_cpumsf_synth {
795 struct perf_tool cpumsf_tool;
796 struct perf_session *session;
797};
798
799static int
800s390_cpumsf_process_auxtrace_event(struct perf_session *session,
801 union perf_event *event __maybe_unused,
802 struct perf_tool *tool __maybe_unused)
803{
804 struct s390_cpumsf *sf = container_of(session->auxtrace,
805 struct s390_cpumsf,
806 auxtrace);
807
808 int fd = perf_data__fd(session->data);
809 struct auxtrace_buffer *buffer;
810 off_t data_offset;
811 int err;
812
813 if (sf->data_queued)
814 return 0;
815
816 if (perf_data__is_pipe(session->data)) {
817 data_offset = 0;
818 } else {
819 data_offset = lseek(fd, 0, SEEK_CUR);
820 if (data_offset == -1)
821 return -errno;
822 }
823
824 err = auxtrace_queues__add_event(&sf->queues, session, event,
825 data_offset, &buffer);
826 if (err)
827 return err;
828
829 /* Dump here after copying piped trace out of the pipe */
830 if (dump_trace) {
831 if (auxtrace_buffer__get_data(buffer, fd)) {
832 s390_cpumsf_dump_event(sf, buffer->data,
833 buffer->size);
834 auxtrace_buffer__put_data(buffer);
835 }
836 }
837 return 0;
838}
839
840static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
841{
842}
843
844static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
845 struct perf_tool *tool __maybe_unused)
846{
847 return 0;
848}
849
850static void s390_cpumsf_free_queues(struct perf_session *session)
851{
852 struct s390_cpumsf *sf = container_of(session->auxtrace,
853 struct s390_cpumsf,
854 auxtrace);
855 struct auxtrace_queues *queues = &sf->queues;
856 unsigned int i;
857
858 for (i = 0; i < queues->nr_queues; i++)
859 zfree(&queues->queue_array[i].priv);
860 auxtrace_queues__free(queues);
861}
862
863static void s390_cpumsf_free(struct perf_session *session)
864{
865 struct s390_cpumsf *sf = container_of(session->auxtrace,
866 struct s390_cpumsf,
867 auxtrace);
868
869 auxtrace_heap__free(&sf->heap);
870 s390_cpumsf_free_queues(session);
871 session->auxtrace = NULL;
872 free(sf);
873}
874
875static int s390_cpumsf_get_type(const char *cpuid)
876{
877 int ret, family = 0;
878
879 ret = sscanf(cpuid, "%*[^,],%u", &family);
880 return (ret == 1) ? family : 0;
881}
882
883/* Check itrace options set on perf report command.
884 * Return true, if none are set or all options specified can be
885 * handled on s390.
886 * Return false otherwise.
887 */
888static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
889{
890 if (!itops || !itops->set)
891 return true;
892 pr_err("No --itrace options supported\n");
893 return false;
894}
895
896int s390_cpumsf_process_auxtrace_info(union perf_event *event,
897 struct perf_session *session)
898{
899 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
900 struct s390_cpumsf *sf;
901 int err;
902
903 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
904 return -EINVAL;
905
906 sf = zalloc(sizeof(struct s390_cpumsf));
907 if (sf == NULL)
908 return -ENOMEM;
909
910 if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
911 err = -EINVAL;
912 goto err_free;
913 }
914
915 err = auxtrace_queues__init(&sf->queues);
916 if (err)
917 goto err_free;
918
919 sf->session = session;
920 sf->machine = &session->machines.host; /* No kvm support */
921 sf->auxtrace_type = auxtrace_info->type;
922 sf->pmu_type = PERF_TYPE_RAW;
923 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
924
925 sf->auxtrace.process_event = s390_cpumsf_process_event;
926 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
927 sf->auxtrace.flush_events = s390_cpumsf_flush;
928 sf->auxtrace.free_events = s390_cpumsf_free_events;
929 sf->auxtrace.free = s390_cpumsf_free;
930 session->auxtrace = &sf->auxtrace;
931
932 if (dump_trace)
933 return 0;
934
935 err = auxtrace_queues__process_index(&sf->queues, session);
936 if (err)
937 goto err_free_queues;
938
939 if (sf->queues.populated)
940 sf->data_queued = true;
941
942 return 0;
943
944err_free_queues:
945 auxtrace_queues__free(&sf->queues);
946 session->auxtrace = NULL;
947err_free:
948 free(sf);
949 return err;
950}