|  | // SPDX-License-Identifier: GPL-2.0 | 
|  | /* | 
|  | * Copyright IBM Corp. 2018 | 
|  | * Auxtrace support for s390 CPU-Measurement Sampling Facility | 
|  | * | 
|  | * Author(s):  Thomas Richter <tmricht@linux.ibm.com> | 
|  | * | 
|  | * Auxiliary traces are collected during 'perf record' using rbd000 event. | 
|  | * Several PERF_RECORD_XXX are generated during recording: | 
|  | * | 
|  | * PERF_RECORD_AUX: | 
|  | *	Records that new data landed in the AUX buffer part. | 
|  | * PERF_RECORD_AUXTRACE: | 
|  | *	Defines auxtrace data. Followed by the actual data. The contents of | 
|  | *	the auxtrace data is dependent on the event and the CPU. | 
|  | *	This record is generated by perf record command. For details | 
|  | *	see Documentation/perf.data-file-format.txt. | 
|  | * PERF_RECORD_AUXTRACE_INFO: | 
|  | *	Defines a table of contains for PERF_RECORD_AUXTRACE records. This | 
|  | *	record is generated during 'perf record' command. Each record contains up | 
|  | *	to 256 entries describing offset and size of the AUXTRACE data in the | 
|  | *	perf.data file. | 
|  | * PERF_RECORD_AUXTRACE_ERROR: | 
|  | *	Indicates an error during AUXTRACE collection such as buffer overflow. | 
|  | * PERF_RECORD_FINISHED_ROUND: | 
|  | *	Perf events are not necessarily in time stamp order, as they can be | 
|  | *	collected in parallel on different CPUs. If the events should be | 
|  | *	processed in time order they need to be sorted first. | 
|  | *	Perf report guarantees that there is no reordering over a | 
|  | *	PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a | 
|  | *	time stamp lower than this record are processed (and displayed) before | 
|  | *	the succeeding perf record are processed. | 
|  | * | 
|  | * These records are evaluated during perf report command. | 
|  | * | 
|  | * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for | 
|  | * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() | 
|  | * below. | 
|  | * Auxiliary trace data is collected per CPU. To merge the data into the report | 
|  | * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace | 
|  | * data is in ascending order. | 
|  | * | 
|  | * Each queue has a double linked list of auxtrace_buffers. This list contains | 
|  | * the offset and size of a CPU's auxtrace data. During auxtrace processing | 
|  | * the data portion is mmap()'ed. | 
|  | * | 
|  | * To sort the queues in chronological order, all queue access is controlled | 
|  | * by the auxtrace_heap. This is basicly a stack, each stack element has two | 
|  | * entries, the queue number and a time stamp. However the stack is sorted by | 
|  | * the time stamps. The highest time stamp is at the bottom the lowest | 
|  | * (nearest) time stamp is at the top. That sort order is maintained at all | 
|  | * times! | 
|  | * | 
|  | * After the auxtrace infrastructure has been setup, the auxtrace queues are | 
|  | * filled with data (offset/size pairs) and the auxtrace_heap is populated. | 
|  | * | 
|  | * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. | 
|  | * Each record is handled by s390_cpumsf_process_event(). The time stamp of | 
|  | * the perf record is compared with the time stamp located on the auxtrace_heap | 
|  | * top element. If that time stamp is lower than the time stamp from the | 
|  | * record sample, the auxtrace queues will be processed. As auxtrace queues | 
|  | * control many auxtrace_buffers and each buffer can be quite large, the | 
|  | * auxtrace buffer might be processed only partially. In this case the | 
|  | * position in the auxtrace_buffer of that queue is remembered and the time | 
|  | * stamp of the last processed entry of the auxtrace_buffer replaces the | 
|  | * current auxtrace_heap top. | 
|  | * | 
|  | * 3. Auxtrace_queues might run of out data and are feeded by the | 
|  | * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). | 
|  | * | 
|  | * Event Generation | 
|  | * Each sampling-data entry in the auxilary trace data generates a perf sample. | 
|  | * This sample is filled | 
|  | * with data from the auxtrace such as PID/TID, instruction address, CPU state, | 
|  | * etc. This sample is processed with perf_session__deliver_synth_event() to | 
|  | * be included into the GUI. | 
|  | * | 
|  | * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining | 
|  | * auxiliary traces entries until the time stamp of this record is reached | 
|  | * auxtrace_heap top. This is triggered by ordered_event->deliver(). | 
|  | * | 
|  | * | 
|  | * Perf event processing. | 
|  | * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. | 
|  | * This is the function call sequence: | 
|  | * | 
|  | * __cmd_report() | 
|  | * | | 
|  | * perf_session__process_events() | 
|  | * | | 
|  | * __perf_session__process_events() | 
|  | * | | 
|  | * perf_session__process_event() | 
|  | * |  This functions splits the PERF_RECORD_XXX records. | 
|  | * |  - Those generated by perf record command (type number equal or higher | 
|  | * |    than PERF_RECORD_USER_TYPE_START) are handled by | 
|  | * |    perf_session__process_user_event(see below) | 
|  | * |  - Those generated by the kernel are handled by | 
|  | * |    perf_evlist__parse_sample_timestamp() | 
|  | * | | 
|  | * perf_evlist__parse_sample_timestamp() | 
|  | * |  Extract time stamp from sample data. | 
|  | * | | 
|  | * perf_session__queue_event() | 
|  | * |  If timestamp is positive the sample is entered into an ordered_event | 
|  | * |  list, sort order is the timestamp. The event processing is deferred until | 
|  | * |  later (see perf_session__process_user_event()). | 
|  | * |  Other timestamps (0 or -1) are handled immediately by | 
|  | * |  perf_session__deliver_event(). These are events generated at start up | 
|  | * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* | 
|  | * |  records. They are needed to create a list of running processes and its | 
|  | * |  memory mappings and layout. They are needed at the beginning to enable | 
|  | * |  command perf report to create process trees and memory mappings. | 
|  | * | | 
|  | * perf_session__deliver_event() | 
|  | * |  Delivers a PERF_RECORD_XXX entry for handling. | 
|  | * | | 
|  | * auxtrace__process_event() | 
|  | * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with | 
|  | * |  time stamps from the auxiliary trace buffers. This enables | 
|  | * |  synchronization between auxiliary trace data and the events on the | 
|  | * |  perf.data file. | 
|  | * | | 
|  | * machine__deliver_event() | 
|  | * |  Handles the PERF_RECORD_XXX event. This depends on the record type. | 
|  | *    It might update the process tree, update a process memory map or enter | 
|  | *    a sample with IP and call back chain data into GUI data pool. | 
|  | * | 
|  | * | 
|  | * Deferred processing determined by perf_session__process_user_event() is | 
|  | * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These | 
|  | * are generated during command perf record. | 
|  | * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all | 
|  | * PERF_RECORD_XXX entries stored in the ordered_event list. This list was | 
|  | * built up while reading the perf.data file. | 
|  | * Each event is now processed by calling perf_session__deliver_event(). | 
|  | * This enables time synchronization between the data in the perf.data file and | 
|  | * the data in the auxiliary trace buffers. | 
|  | */ | 
|  |  | 
|  | #include <endian.h> | 
|  | #include <errno.h> | 
|  | #include <byteswap.h> | 
|  | #include <inttypes.h> | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/types.h> | 
|  | #include <linux/bitops.h> | 
|  | #include <linux/log2.h> | 
|  |  | 
|  | #include "cpumap.h" | 
|  | #include "color.h" | 
|  | #include "evsel.h" | 
|  | #include "evlist.h" | 
|  | #include "machine.h" | 
|  | #include "session.h" | 
|  | #include "util.h" | 
|  | #include "thread.h" | 
|  | #include "debug.h" | 
|  | #include "auxtrace.h" | 
|  | #include "s390-cpumsf.h" | 
|  | #include "s390-cpumsf-kernel.h" | 
|  |  | 
|  | struct s390_cpumsf { | 
|  | struct auxtrace		auxtrace; | 
|  | struct auxtrace_queues	queues; | 
|  | struct auxtrace_heap	heap; | 
|  | struct perf_session	*session; | 
|  | struct machine		*machine; | 
|  | u32			auxtrace_type; | 
|  | u32			pmu_type; | 
|  | u16			machine_type; | 
|  | bool			data_queued; | 
|  | }; | 
|  |  | 
|  | struct s390_cpumsf_queue { | 
|  | struct s390_cpumsf	*sf; | 
|  | unsigned int		queue_nr; | 
|  | struct auxtrace_buffer	*buffer; | 
|  | int			cpu; | 
|  | }; | 
|  |  | 
|  | /* Display s390 CPU measurement facility basic-sampling data entry */ | 
|  | static bool s390_cpumsf_basic_show(const char *color, size_t pos, | 
|  | struct hws_basic_entry *basic) | 
|  | { | 
|  | if (basic->def != 1) { | 
|  | pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); | 
|  | return false; | 
|  | } | 
|  | color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x" | 
|  | " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" | 
|  | "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", | 
|  | pos, basic->def, basic->U, | 
|  | basic->T ? 'T' : ' ', | 
|  | basic->W ? 'W' : ' ', | 
|  | basic->P ? 'P' : ' ', | 
|  | basic->I ? 'I' : ' ', | 
|  | basic->AS, basic->prim_asn, basic->ia, basic->CL, | 
|  | basic->hpp, basic->gpp); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Display s390 CPU measurement facility diagnostic-sampling data entry */ | 
|  | static bool s390_cpumsf_diag_show(const char *color, size_t pos, | 
|  | struct hws_diag_entry *diag) | 
|  | { | 
|  | if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { | 
|  | pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); | 
|  | return false; | 
|  | } | 
|  | color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n", | 
|  | pos, diag->def, diag->I ? 'I' : ' '); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Return TOD timestamp contained in an trailer entry */ | 
|  | static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) | 
|  | { | 
|  | /* te->t set: TOD in STCKE format, bytes 8-15 | 
|  | * to->t not set: TOD in STCK format, bytes 0-7 | 
|  | */ | 
|  | unsigned long long ts; | 
|  |  | 
|  | memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); | 
|  | return ts; | 
|  | } | 
|  |  | 
|  | /* Display s390 CPU measurement facility trailer entry */ | 
|  | static bool s390_cpumsf_trailer_show(const char *color, size_t pos, | 
|  | struct hws_trailer_entry *te) | 
|  | { | 
|  | if (te->bsdes != sizeof(struct hws_basic_entry)) { | 
|  | pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); | 
|  | return false; | 
|  | } | 
|  | color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d" | 
|  | " dsdes:%d Overflow:%lld Time:%#llx\n" | 
|  | "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", | 
|  | pos, | 
|  | te->f ? 'F' : ' ', | 
|  | te->a ? 'A' : ' ', | 
|  | te->t ? 'T' : ' ', | 
|  | te->bsdes, te->dsdes, te->overflow, | 
|  | trailer_timestamp(te), te->clock_base, te->progusage2, | 
|  | te->progusage[0], te->progusage[1]); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Test a sample data block. It must be 4KB or a multiple thereof in size and | 
|  | * 4KB page aligned. Each sample data page has a trailer entry at the | 
|  | * end which contains the sample entry data sizes. | 
|  | * | 
|  | * Return true if the sample data block passes the checks and set the | 
|  | * basic set entry size and diagnostic set entry size. | 
|  | * | 
|  | * Return false on failure. | 
|  | * | 
|  | * Note: Old hardware does not set the basic or diagnostic entry sizes | 
|  | * in the trailer entry. Use the type number instead. | 
|  | */ | 
|  | static bool s390_cpumsf_validate(int machine_type, | 
|  | unsigned char *buf, size_t len, | 
|  | unsigned short *bsdes, | 
|  | unsigned short *dsdes) | 
|  | { | 
|  | struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; | 
|  | struct hws_trailer_entry *te; | 
|  |  | 
|  | *dsdes = *bsdes = 0; | 
|  | if (len & (S390_CPUMSF_PAGESZ - 1))	/* Illegal size */ | 
|  | return false; | 
|  | if (basic->def != 1)		/* No basic set entry, must be first */ | 
|  | return false; | 
|  | /* Check for trailer entry at end of SDB */ | 
|  | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ | 
|  | - sizeof(*te)); | 
|  | *bsdes = te->bsdes; | 
|  | *dsdes = te->dsdes; | 
|  | if (!te->bsdes && !te->dsdes) { | 
|  | /* Very old hardware, use CPUID */ | 
|  | switch (machine_type) { | 
|  | case 2097: | 
|  | case 2098: | 
|  | *dsdes = 64; | 
|  | *bsdes = 32; | 
|  | break; | 
|  | case 2817: | 
|  | case 2818: | 
|  | *dsdes = 74; | 
|  | *bsdes = 32; | 
|  | break; | 
|  | case 2827: | 
|  | case 2828: | 
|  | *dsdes = 85; | 
|  | *bsdes = 32; | 
|  | break; | 
|  | case 2964: | 
|  | case 2965: | 
|  | *dsdes = 112; | 
|  | *bsdes = 32; | 
|  | break; | 
|  | default: | 
|  | /* Illegal trailer entry */ | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Return true if there is room for another entry */ | 
|  | static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) | 
|  | { | 
|  | size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); | 
|  |  | 
|  | if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) | 
|  | return false; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Dump an auxiliary buffer. These buffers are multiple of | 
|  | * 4KB SDB pages. | 
|  | */ | 
|  | static void s390_cpumsf_dump(struct s390_cpumsf *sf, | 
|  | unsigned char *buf, size_t len) | 
|  | { | 
|  | const char *color = PERF_COLOR_BLUE; | 
|  | struct hws_basic_entry *basic; | 
|  | struct hws_diag_entry *diag; | 
|  | unsigned short bsdes, dsdes; | 
|  | size_t pos = 0; | 
|  |  | 
|  | color_fprintf(stdout, color, | 
|  | ". ... s390 AUX data: size %zu bytes\n", | 
|  | len); | 
|  |  | 
|  | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, | 
|  | &dsdes)) { | 
|  | pr_err("Invalid AUX trace data block size:%zu" | 
|  | " (type:%d bsdes:%hd dsdes:%hd)\n", | 
|  | len, sf->machine_type, bsdes, dsdes); | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* s390 kernel always returns 4KB blocks fully occupied, | 
|  | * no partially filled SDBs. | 
|  | */ | 
|  | while (pos < len) { | 
|  | /* Handle Basic entry */ | 
|  | basic = (struct hws_basic_entry *)(buf + pos); | 
|  | if (s390_cpumsf_basic_show(color, pos, basic)) | 
|  | pos += bsdes; | 
|  | else | 
|  | return; | 
|  |  | 
|  | /* Handle Diagnostic entry */ | 
|  | diag = (struct hws_diag_entry *)(buf + pos); | 
|  | if (s390_cpumsf_diag_show(color, pos, diag)) | 
|  | pos += dsdes; | 
|  | else | 
|  | return; | 
|  |  | 
|  | /* Check for trailer entry */ | 
|  | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { | 
|  | /* Show trailer entry */ | 
|  | struct hws_trailer_entry te; | 
|  |  | 
|  | pos = (pos + S390_CPUMSF_PAGESZ) | 
|  | & ~(S390_CPUMSF_PAGESZ - 1); | 
|  | pos -= sizeof(te); | 
|  | memcpy(&te, buf + pos, sizeof(te)); | 
|  | /* Set descriptor sizes in case of old hardware | 
|  | * where these values are not set. | 
|  | */ | 
|  | te.bsdes = bsdes; | 
|  | te.dsdes = dsdes; | 
|  | if (s390_cpumsf_trailer_show(color, pos, &te)) | 
|  | pos += sizeof(te); | 
|  | else | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, | 
|  | size_t len) | 
|  | { | 
|  | printf(".\n"); | 
|  | s390_cpumsf_dump(sf, buf, len); | 
|  | } | 
|  |  | 
|  | #define	S390_LPP_PID_MASK	0xffffffff | 
|  |  | 
|  | static bool s390_cpumsf_make_event(size_t pos, | 
|  | struct hws_basic_entry *basic, | 
|  | struct s390_cpumsf_queue *sfq) | 
|  | { | 
|  | struct perf_sample sample = { | 
|  | .ip = basic->ia, | 
|  | .pid = basic->hpp & S390_LPP_PID_MASK, | 
|  | .tid = basic->hpp & S390_LPP_PID_MASK, | 
|  | .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, | 
|  | .cpu = sfq->cpu, | 
|  | .period = 1 | 
|  | }; | 
|  | union perf_event event; | 
|  |  | 
|  | memset(&event, 0, sizeof(event)); | 
|  | if (basic->CL == 1)	/* Native LPAR mode */ | 
|  | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER | 
|  | : PERF_RECORD_MISC_KERNEL; | 
|  | else if (basic->CL == 2)	/* Guest kernel/user space */ | 
|  | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER | 
|  | : PERF_RECORD_MISC_GUEST_KERNEL; | 
|  | else if (basic->gpp || basic->prim_asn != 0xffff) | 
|  | /* Use heuristics on old hardware */ | 
|  | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER | 
|  | : PERF_RECORD_MISC_GUEST_KERNEL; | 
|  | else | 
|  | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER | 
|  | : PERF_RECORD_MISC_KERNEL; | 
|  |  | 
|  | event.sample.header.type = PERF_RECORD_SAMPLE; | 
|  | event.sample.header.misc = sample.cpumode; | 
|  | event.sample.header.size = sizeof(struct perf_event_header); | 
|  |  | 
|  | pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", | 
|  | __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, | 
|  | sample.tid, sample.cpumode, sample.cpu); | 
|  | if (perf_session__deliver_synth_event(sfq->sf->session, &event, | 
|  | &sample)) { | 
|  | pr_err("s390 Auxiliary Trace: failed to deliver event\n"); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static unsigned long long get_trailer_time(const unsigned char *buf) | 
|  | { | 
|  | struct hws_trailer_entry *te; | 
|  | unsigned long long aux_time; | 
|  |  | 
|  | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ | 
|  | - sizeof(*te)); | 
|  |  | 
|  | if (!te->clock_base)	/* TOD_CLOCK_BASE value missing */ | 
|  | return 0; | 
|  |  | 
|  | /* Correct calculation to convert time stamp in trailer entry to | 
|  | * nano seconds (taken from arch/s390 function tod_to_ns()). | 
|  | * TOD_CLOCK_BASE is stored in trailer entry member progusage2. | 
|  | */ | 
|  | aux_time = trailer_timestamp(te) - te->progusage2; | 
|  | aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); | 
|  | return aux_time; | 
|  | } | 
|  |  | 
|  | /* Process the data samples of a single queue. The first parameter is a | 
|  | * pointer to the queue, the second parameter is the time stamp. This | 
|  | * is the time stamp: | 
|  | * - of the event that triggered this processing. | 
|  | * - or the time stamp when the last proccesing of this queue stopped. | 
|  | *   In this case it stopped at a 4KB page boundary and record the | 
|  | *   position on where to continue processing on the next invocation | 
|  | *   (see buffer->use_data and buffer->use_size). | 
|  | * | 
|  | * When this function returns the second parameter is updated to | 
|  | * reflect the time stamp of the last processed auxiliary data entry | 
|  | * (taken from the trailer entry of that page). The caller uses this | 
|  | * returned time stamp to record the last processed entry in this | 
|  | * queue. | 
|  | * | 
|  | * The function returns: | 
|  | * 0:  Processing successful. The second parameter returns the | 
|  | *     time stamp from the trailer entry until which position | 
|  | *     processing took place. Subsequent calls resume from this | 
|  | *     position. | 
|  | * <0: An error occurred during processing. The second parameter | 
|  | *     returns the maximum time stamp. | 
|  | * >0: Done on this queue. The second parameter returns the | 
|  | *     maximum time stamp. | 
|  | */ | 
|  | static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) | 
|  | { | 
|  | struct s390_cpumsf *sf = sfq->sf; | 
|  | unsigned char *buf = sfq->buffer->use_data; | 
|  | size_t len = sfq->buffer->use_size; | 
|  | struct hws_basic_entry *basic; | 
|  | unsigned short bsdes, dsdes; | 
|  | size_t pos = 0; | 
|  | int err = 1; | 
|  | u64 aux_ts; | 
|  |  | 
|  | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, | 
|  | &dsdes)) { | 
|  | *ts = ~0ULL; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* Get trailer entry time stamp and check if entries in | 
|  | * this auxiliary page are ready for processing. If the | 
|  | * time stamp of the first entry is too high, whole buffer | 
|  | * can be skipped. In this case return time stamp. | 
|  | */ | 
|  | aux_ts = get_trailer_time(buf); | 
|  | if (!aux_ts) { | 
|  | pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", | 
|  | (s64)sfq->buffer->data_offset); | 
|  | aux_ts = ~0ULL; | 
|  | goto out; | 
|  | } | 
|  | if (aux_ts > *ts) { | 
|  | *ts = aux_ts; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | while (pos < len) { | 
|  | /* Handle Basic entry */ | 
|  | basic = (struct hws_basic_entry *)(buf + pos); | 
|  | if (s390_cpumsf_make_event(pos, basic, sfq)) | 
|  | pos += bsdes; | 
|  | else { | 
|  | err = -EBADF; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | pos += dsdes;	/* Skip diagnositic entry */ | 
|  |  | 
|  | /* Check for trailer entry */ | 
|  | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { | 
|  | pos = (pos + S390_CPUMSF_PAGESZ) | 
|  | & ~(S390_CPUMSF_PAGESZ - 1); | 
|  | /* Check existence of next page */ | 
|  | if (pos >= len) | 
|  | break; | 
|  | aux_ts = get_trailer_time(buf + pos); | 
|  | if (!aux_ts) { | 
|  | aux_ts = ~0ULL; | 
|  | goto out; | 
|  | } | 
|  | if (aux_ts > *ts) { | 
|  | *ts = aux_ts; | 
|  | sfq->buffer->use_data += pos; | 
|  | sfq->buffer->use_size -= pos; | 
|  | return 0; | 
|  | } | 
|  | } | 
|  | } | 
|  | out: | 
|  | *ts = aux_ts; | 
|  | sfq->buffer->use_size = 0; | 
|  | sfq->buffer->use_data = NULL; | 
|  | return err;	/* Buffer completely scanned or error */ | 
|  | } | 
|  |  | 
|  | /* Run the s390 auxiliary trace decoder. | 
|  | * Select the queue buffer to operate on, the caller already selected | 
|  | * the proper queue, depending on second parameter 'ts'. | 
|  | * This is the time stamp until which the auxiliary entries should | 
|  | * be processed. This value is updated by called functions and | 
|  | * returned to the caller. | 
|  | * | 
|  | * Resume processing in the current buffer. If there is no buffer | 
|  | * get a new buffer from the queue and setup start position for | 
|  | * processing. | 
|  | * When a buffer is completely processed remove it from the queue | 
|  | * before returning. | 
|  | * | 
|  | * This function returns | 
|  | * 1: When the queue is empty. Second parameter will be set to | 
|  | *    maximum time stamp. | 
|  | * 0: Normal processing done. | 
|  | * <0: Error during queue buffer setup. This causes the caller | 
|  | *     to stop processing completely. | 
|  | */ | 
|  | static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, | 
|  | u64 *ts) | 
|  | { | 
|  |  | 
|  | struct auxtrace_buffer *buffer; | 
|  | struct auxtrace_queue *queue; | 
|  | int err; | 
|  |  | 
|  | queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; | 
|  |  | 
|  | /* Get buffer and last position in buffer to resume | 
|  | * decoding the auxiliary entries. One buffer might be large | 
|  | * and decoding might stop in between. This depends on the time | 
|  | * stamp of the trailer entry in each page of the auxiliary | 
|  | * data and the time stamp of the event triggering the decoding. | 
|  | */ | 
|  | if (sfq->buffer == NULL) { | 
|  | sfq->buffer = buffer = auxtrace_buffer__next(queue, | 
|  | sfq->buffer); | 
|  | if (!buffer) { | 
|  | *ts = ~0ULL; | 
|  | return 1;	/* Processing done on this queue */ | 
|  | } | 
|  | /* Start with a new buffer on this queue */ | 
|  | if (buffer->data) { | 
|  | buffer->use_size = buffer->size; | 
|  | buffer->use_data = buffer->data; | 
|  | } | 
|  | } else | 
|  | buffer = sfq->buffer; | 
|  |  | 
|  | if (!buffer->data) { | 
|  | int fd = perf_data__fd(sfq->sf->session->data); | 
|  |  | 
|  | buffer->data = auxtrace_buffer__get_data(buffer, fd); | 
|  | if (!buffer->data) | 
|  | return -ENOMEM; | 
|  | buffer->use_size = buffer->size; | 
|  | buffer->use_data = buffer->data; | 
|  | } | 
|  | pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", | 
|  | __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, | 
|  | buffer->size, buffer->use_size); | 
|  | err = s390_cpumsf_samples(sfq, ts); | 
|  |  | 
|  | /* If non-zero, there is either an error (err < 0) or the buffer is | 
|  | * completely done (err > 0). The error is unrecoverable, usually | 
|  | * some descriptors could not be read successfully, so continue with | 
|  | * the next buffer. | 
|  | * In both cases the parameter 'ts' has been updated. | 
|  | */ | 
|  | if (err) { | 
|  | sfq->buffer = NULL; | 
|  | list_del(&buffer->list); | 
|  | auxtrace_buffer__free(buffer); | 
|  | if (err > 0)		/* Buffer done, no error */ | 
|  | err = 0; | 
|  | } | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static struct s390_cpumsf_queue * | 
|  | s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) | 
|  | { | 
|  | struct s390_cpumsf_queue *sfq; | 
|  |  | 
|  | sfq = zalloc(sizeof(struct s390_cpumsf_queue)); | 
|  | if (sfq == NULL) | 
|  | return NULL; | 
|  |  | 
|  | sfq->sf = sf; | 
|  | sfq->queue_nr = queue_nr; | 
|  | sfq->cpu = -1; | 
|  | return sfq; | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, | 
|  | struct auxtrace_queue *queue, | 
|  | unsigned int queue_nr, u64 ts) | 
|  | { | 
|  | struct s390_cpumsf_queue *sfq = queue->priv; | 
|  |  | 
|  | if (list_empty(&queue->head)) | 
|  | return 0; | 
|  |  | 
|  | if (sfq == NULL) { | 
|  | sfq = s390_cpumsf_alloc_queue(sf, queue_nr); | 
|  | if (!sfq) | 
|  | return -ENOMEM; | 
|  | queue->priv = sfq; | 
|  |  | 
|  | if (queue->cpu != -1) | 
|  | sfq->cpu = queue->cpu; | 
|  | } | 
|  | return auxtrace_heap__add(&sf->heap, queue_nr, ts); | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) | 
|  | { | 
|  | unsigned int i; | 
|  | int ret = 0; | 
|  |  | 
|  | for (i = 0; i < sf->queues.nr_queues; i++) { | 
|  | ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], | 
|  | i, ts); | 
|  | if (ret) | 
|  | break; | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) | 
|  | { | 
|  | if (!sf->queues.new_data) | 
|  | return 0; | 
|  |  | 
|  | sf->queues.new_data = false; | 
|  | return s390_cpumsf_setup_queues(sf, ts); | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) | 
|  | { | 
|  | unsigned int queue_nr; | 
|  | u64 ts; | 
|  | int ret; | 
|  |  | 
|  | while (1) { | 
|  | struct auxtrace_queue *queue; | 
|  | struct s390_cpumsf_queue *sfq; | 
|  |  | 
|  | if (!sf->heap.heap_cnt) | 
|  | return 0; | 
|  |  | 
|  | if (sf->heap.heap_array[0].ordinal >= timestamp) | 
|  | return 0; | 
|  |  | 
|  | queue_nr = sf->heap.heap_array[0].queue_nr; | 
|  | queue = &sf->queues.queue_array[queue_nr]; | 
|  | sfq = queue->priv; | 
|  |  | 
|  | auxtrace_heap__pop(&sf->heap); | 
|  | if (sf->heap.heap_cnt) { | 
|  | ts = sf->heap.heap_array[0].ordinal + 1; | 
|  | if (ts > timestamp) | 
|  | ts = timestamp; | 
|  | } else { | 
|  | ts = timestamp; | 
|  | } | 
|  |  | 
|  | ret = s390_cpumsf_run_decoder(sfq, &ts); | 
|  | if (ret < 0) { | 
|  | auxtrace_heap__add(&sf->heap, queue_nr, ts); | 
|  | return ret; | 
|  | } | 
|  | if (!ret) { | 
|  | ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); | 
|  | if (ret < 0) | 
|  | return ret; | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, | 
|  | pid_t pid, pid_t tid, u64 ip) | 
|  | { | 
|  | char msg[MAX_AUXTRACE_ERROR_MSG]; | 
|  | union perf_event event; | 
|  | int err; | 
|  |  | 
|  | strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); | 
|  | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | 
|  | code, cpu, pid, tid, ip, msg); | 
|  |  | 
|  | err = perf_session__deliver_synth_event(sf->session, &event, NULL); | 
|  | if (err) | 
|  | pr_err("s390 Auxiliary Trace: failed to deliver error event," | 
|  | "error %d\n", err); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) | 
|  | { | 
|  | return s390_cpumsf_synth_error(sf, 1, sample->cpu, | 
|  | sample->pid, sample->tid, 0); | 
|  | } | 
|  |  | 
|  | static int | 
|  | s390_cpumsf_process_event(struct perf_session *session __maybe_unused, | 
|  | union perf_event *event, | 
|  | struct perf_sample *sample, | 
|  | struct perf_tool *tool) | 
|  | { | 
|  | struct s390_cpumsf *sf = container_of(session->auxtrace, | 
|  | struct s390_cpumsf, | 
|  | auxtrace); | 
|  | u64 timestamp = sample->time; | 
|  | int err = 0; | 
|  |  | 
|  | if (dump_trace) | 
|  | return 0; | 
|  |  | 
|  | if (!tool->ordered_events) { | 
|  | pr_err("s390 Auxiliary Trace requires ordered events\n"); | 
|  | return -EINVAL; | 
|  | } | 
|  |  | 
|  | if (event->header.type == PERF_RECORD_AUX && | 
|  | event->aux.flags & PERF_AUX_FLAG_TRUNCATED) | 
|  | return s390_cpumsf_lost(sf, sample); | 
|  |  | 
|  | if (timestamp) { | 
|  | err = s390_cpumsf_update_queues(sf, timestamp); | 
|  | if (!err) | 
|  | err = s390_cpumsf_process_queues(sf, timestamp); | 
|  | } | 
|  | return err; | 
|  | } | 
|  |  | 
|  | struct s390_cpumsf_synth { | 
|  | struct perf_tool cpumsf_tool; | 
|  | struct perf_session *session; | 
|  | }; | 
|  |  | 
|  | static int | 
|  | s390_cpumsf_process_auxtrace_event(struct perf_session *session, | 
|  | union perf_event *event __maybe_unused, | 
|  | struct perf_tool *tool __maybe_unused) | 
|  | { | 
|  | struct s390_cpumsf *sf = container_of(session->auxtrace, | 
|  | struct s390_cpumsf, | 
|  | auxtrace); | 
|  |  | 
|  | int fd = perf_data__fd(session->data); | 
|  | struct auxtrace_buffer *buffer; | 
|  | off_t data_offset; | 
|  | int err; | 
|  |  | 
|  | if (sf->data_queued) | 
|  | return 0; | 
|  |  | 
|  | if (perf_data__is_pipe(session->data)) { | 
|  | data_offset = 0; | 
|  | } else { | 
|  | data_offset = lseek(fd, 0, SEEK_CUR); | 
|  | if (data_offset == -1) | 
|  | return -errno; | 
|  | } | 
|  |  | 
|  | err = auxtrace_queues__add_event(&sf->queues, session, event, | 
|  | data_offset, &buffer); | 
|  | if (err) | 
|  | return err; | 
|  |  | 
|  | /* Dump here after copying piped trace out of the pipe */ | 
|  | if (dump_trace) { | 
|  | if (auxtrace_buffer__get_data(buffer, fd)) { | 
|  | s390_cpumsf_dump_event(sf, buffer->data, | 
|  | buffer->size); | 
|  | auxtrace_buffer__put_data(buffer); | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) | 
|  | { | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, | 
|  | struct perf_tool *tool __maybe_unused) | 
|  | { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void s390_cpumsf_free_queues(struct perf_session *session) | 
|  | { | 
|  | struct s390_cpumsf *sf = container_of(session->auxtrace, | 
|  | struct s390_cpumsf, | 
|  | auxtrace); | 
|  | struct auxtrace_queues *queues = &sf->queues; | 
|  | unsigned int i; | 
|  |  | 
|  | for (i = 0; i < queues->nr_queues; i++) | 
|  | zfree(&queues->queue_array[i].priv); | 
|  | auxtrace_queues__free(queues); | 
|  | } | 
|  |  | 
|  | static void s390_cpumsf_free(struct perf_session *session) | 
|  | { | 
|  | struct s390_cpumsf *sf = container_of(session->auxtrace, | 
|  | struct s390_cpumsf, | 
|  | auxtrace); | 
|  |  | 
|  | auxtrace_heap__free(&sf->heap); | 
|  | s390_cpumsf_free_queues(session); | 
|  | session->auxtrace = NULL; | 
|  | free(sf); | 
|  | } | 
|  |  | 
|  | static int s390_cpumsf_get_type(const char *cpuid) | 
|  | { | 
|  | int ret, family = 0; | 
|  |  | 
|  | ret = sscanf(cpuid, "%*[^,],%u", &family); | 
|  | return (ret == 1) ? family : 0; | 
|  | } | 
|  |  | 
|  | /* Check itrace options set on perf report command. | 
|  | * Return true, if none are set or all options specified can be | 
|  | * handled on s390. | 
|  | * Return false otherwise. | 
|  | */ | 
|  | static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) | 
|  | { | 
|  | if (!itops || !itops->set) | 
|  | return true; | 
|  | pr_err("No --itrace options supported\n"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | int s390_cpumsf_process_auxtrace_info(union perf_event *event, | 
|  | struct perf_session *session) | 
|  | { | 
|  | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; | 
|  | struct s390_cpumsf *sf; | 
|  | int err; | 
|  |  | 
|  | if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) | 
|  | return -EINVAL; | 
|  |  | 
|  | sf = zalloc(sizeof(struct s390_cpumsf)); | 
|  | if (sf == NULL) | 
|  | return -ENOMEM; | 
|  |  | 
|  | if (!check_auxtrace_itrace(session->itrace_synth_opts)) { | 
|  | err = -EINVAL; | 
|  | goto err_free; | 
|  | } | 
|  |  | 
|  | err = auxtrace_queues__init(&sf->queues); | 
|  | if (err) | 
|  | goto err_free; | 
|  |  | 
|  | sf->session = session; | 
|  | sf->machine = &session->machines.host; /* No kvm support */ | 
|  | sf->auxtrace_type = auxtrace_info->type; | 
|  | sf->pmu_type = PERF_TYPE_RAW; | 
|  | sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); | 
|  |  | 
|  | sf->auxtrace.process_event = s390_cpumsf_process_event; | 
|  | sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; | 
|  | sf->auxtrace.flush_events = s390_cpumsf_flush; | 
|  | sf->auxtrace.free_events = s390_cpumsf_free_events; | 
|  | sf->auxtrace.free = s390_cpumsf_free; | 
|  | session->auxtrace = &sf->auxtrace; | 
|  |  | 
|  | if (dump_trace) | 
|  | return 0; | 
|  |  | 
|  | err = auxtrace_queues__process_index(&sf->queues, session); | 
|  | if (err) | 
|  | goto err_free_queues; | 
|  |  | 
|  | if (sf->queues.populated) | 
|  | sf->data_queued = true; | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_free_queues: | 
|  | auxtrace_queues__free(&sf->queues); | 
|  | session->auxtrace = NULL; | 
|  | err_free: | 
|  | free(sf); | 
|  | return err; | 
|  | } |