Blame - src/kernel/linux/v4.19/tools/perf/util/stat-shadow.c - T800

blob: bbb0e042d8e5802a8de01f50e1c2eebbcf02bfe2 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	#include <stdio.h>
				3	#include "evsel.h"
				4	#include "stat.h"
				5	#include "color.h"
				6	#include "pmu.h"
				7	#include "rblist.h"
				8	#include "evlist.h"
				9	#include "expr.h"
				10	#include "metricgroup.h"
				11
				12	/*
				13	* AGGR_GLOBAL: Use CPU 0
				14	* AGGR_SOCKET: Use first CPU of socket
				15	* AGGR_CORE: Use first CPU of core
				16	* AGGR_NONE: Use matching CPU
				17	* AGGR_THREAD: Not supported?
				18	*/
				19	static bool have_frontend_stalled;
				20
				21	struct runtime_stat rt_stat;
				22	struct stats walltime_nsecs_stats;
				23
				24	struct saved_value {
				25	struct rb_node rb_node;
				26	struct perf_evsel *evsel;
				27	enum stat_type type;
				28	int ctx;
				29	int cpu;
				30	struct runtime_stat *stat;
				31	struct stats stats;
				32	};
				33
				34	static int saved_value_cmp(struct rb_node rb_node, const void entry)
				35	{
				36	struct saved_value *a = container_of(rb_node,
				37	struct saved_value,
				38	rb_node);
				39	const struct saved_value *b = entry;
				40
				41	if (a->cpu != b->cpu)
				42	return a->cpu - b->cpu;
				43
				44	/*
				45	* Previously the rbtree was used to link generic metrics.
				46	* The keys were evsel/cpu. Now the rbtree is extended to support
				47	* per-thread shadow stats. For shadow stats case, the keys
				48	* are cpu/type/ctx/stat (evsel is NULL). For generic metrics
				49	* case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
				50	*/
				51	if (a->type != b->type)
				52	return a->type - b->type;
				53
				54	if (a->ctx != b->ctx)
				55	return a->ctx - b->ctx;
				56
				57	if (a->evsel == NULL && b->evsel == NULL) {
				58	if (a->stat == b->stat)
				59	return 0;
				60
				61	if ((char )a->stat < (char )b->stat)
				62	return -1;
				63
				64	return 1;
				65	}
				66
				67	if (a->evsel == b->evsel)
				68	return 0;
				69	if ((char )a->evsel < (char )b->evsel)
				70	return -1;
				71	return +1;
				72	}
				73
				74	static struct rb_node saved_value_new(struct rblist rblist __maybe_unused,
				75	const void *entry)
				76	{
				77	struct saved_value *nd = malloc(sizeof(struct saved_value));
				78
				79	if (!nd)
				80	return NULL;
				81	memcpy(nd, entry, sizeof(struct saved_value));
				82	return &nd->rb_node;
				83	}
				84
				85	static void saved_value_delete(struct rblist *rblist __maybe_unused,
				86	struct rb_node *rb_node)
				87	{
				88	struct saved_value *v;
				89
				90	BUG_ON(!rb_node);
				91	v = container_of(rb_node, struct saved_value, rb_node);
				92	free(v);
				93	}
				94
				95	static struct saved_value saved_value_lookup(struct perf_evsel evsel,
				96	int cpu,
				97	bool create,
				98	enum stat_type type,
				99	int ctx,
				100	struct runtime_stat *st)
				101	{
				102	struct rblist *rblist;
				103	struct rb_node *nd;
				104	struct saved_value dm = {
				105	.cpu = cpu,
				106	.evsel = evsel,
				107	.type = type,
				108	.ctx = ctx,
				109	.stat = st,
				110	};
				111
				112	rblist = &st->value_list;
				113
				114	nd = rblist__find(rblist, &dm);
				115	if (nd)
				116	return container_of(nd, struct saved_value, rb_node);
				117	if (create) {
				118	rblist__add_node(rblist, &dm);
				119	nd = rblist__find(rblist, &dm);
				120	if (nd)
				121	return container_of(nd, struct saved_value, rb_node);
				122	}
				123	return NULL;
				124	}
				125
				126	void runtime_stat__init(struct runtime_stat *st)
				127	{
				128	struct rblist *rblist = &st->value_list;
				129
				130	rblist__init(rblist);
				131	rblist->node_cmp = saved_value_cmp;
				132	rblist->node_new = saved_value_new;
				133	rblist->node_delete = saved_value_delete;
				134	}
				135
				136	void runtime_stat__exit(struct runtime_stat *st)
				137	{
				138	rblist__exit(&st->value_list);
				139	}
				140
				141	void perf_stat__init_shadow_stats(void)
				142	{
				143	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
				144	runtime_stat__init(&rt_stat);
				145	}
				146
				147	static int evsel_context(struct perf_evsel *evsel)
				148	{
				149	int ctx = 0;
				150
				151	if (evsel->attr.exclude_kernel)
				152	ctx \|= CTX_BIT_KERNEL;
				153	if (evsel->attr.exclude_user)
				154	ctx \|= CTX_BIT_USER;
				155	if (evsel->attr.exclude_hv)
				156	ctx \|= CTX_BIT_HV;
				157	if (evsel->attr.exclude_host)
				158	ctx \|= CTX_BIT_HOST;
				159	if (evsel->attr.exclude_idle)
				160	ctx \|= CTX_BIT_IDLE;
				161
				162	return ctx;
				163	}
				164
				165	static void reset_stat(struct runtime_stat *st)
				166	{
				167	struct rblist *rblist;
				168	struct rb_node pos, next;
				169
				170	rblist = &st->value_list;
				171	next = rb_first(&rblist->entries);
				172	while (next) {
				173	pos = next;
				174	next = rb_next(pos);
				175	memset(&container_of(pos, struct saved_value, rb_node)->stats,
				176	0,
				177	sizeof(struct stats));
				178	}
				179	}
				180
				181	void perf_stat__reset_shadow_stats(void)
				182	{
				183	reset_stat(&rt_stat);
				184	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
				185	}
				186
				187	void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
				188	{
				189	reset_stat(st);
				190	}
				191
				192	static void update_runtime_stat(struct runtime_stat *st,
				193	enum stat_type type,
				194	int ctx, int cpu, u64 count)
				195	{
				196	struct saved_value *v = saved_value_lookup(NULL, cpu, true,
				197	type, ctx, st);
				198
				199	if (v)
				200	update_stats(&v->stats, count);
				201	}
				202
				203	/*
				204	* Update various tracking values we maintain to print
				205	* more semantic information such as miss/hit ratios,
				206	* instruction rates, etc:
				207	*/
				208	void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
				209	int cpu, struct runtime_stat *st)
				210	{
				211	int ctx = evsel_context(counter);
				212
				213	count *= counter->scale;
				214
				215	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) \|\|
				216	perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
				217	update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
				218	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
				219	update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
				220	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
				221	update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
				222	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
				223	update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
				224	else if (perf_stat_evsel__is(counter, ELISION_START))
				225	update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
				226	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
				227	update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
				228	ctx, cpu, count);
				229	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
				230	update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
				231	ctx, cpu, count);
				232	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
				233	update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
				234	ctx, cpu, count);
				235	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
				236	update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
				237	ctx, cpu, count);
				238	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
				239	update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
				240	ctx, cpu, count);
				241	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
				242	update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
				243	ctx, cpu, count);
				244	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
				245	update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
				246	ctx, cpu, count);
				247	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
				248	update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
				249	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
				250	update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
				251	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
				252	update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
				253	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
				254	update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
				255	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
				256	update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
				257	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
				258	update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
				259	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
				260	update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
				261	else if (perf_stat_evsel__is(counter, SMI_NUM))
				262	update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
				263	else if (perf_stat_evsel__is(counter, APERF))
				264	update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
				265
				266	if (counter->collect_stat) {
				267	struct saved_value *v = saved_value_lookup(counter, cpu, true,
				268	STAT_NONE, 0, st);
				269	update_stats(&v->stats, count);
				270	}
				271	}
				272
				273	/* used for get_ratio_color() */
				274	enum grc_type {
				275	GRC_STALLED_CYCLES_FE,
				276	GRC_STALLED_CYCLES_BE,
				277	GRC_CACHE_MISSES,
				278	GRC_MAX_NR
				279	};
				280
				281	static const char *get_ratio_color(enum grc_type type, double ratio)
				282	{
				283	static const double grc_table[GRC_MAX_NR][3] = {
				284	[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
				285	[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
				286	[GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
				287	};
				288	const char *color = PERF_COLOR_NORMAL;
				289
				290	if (ratio > grc_table[type][0])
				291	color = PERF_COLOR_RED;
				292	else if (ratio > grc_table[type][1])
				293	color = PERF_COLOR_MAGENTA;
				294	else if (ratio > grc_table[type][2])
				295	color = PERF_COLOR_YELLOW;
				296
				297	return color;
				298	}
				299
				300	static struct perf_evsel perf_stat__find_event(struct perf_evlist evsel_list,
				301	const char *name)
				302	{
				303	struct perf_evsel *c2;
				304
				305	evlist__for_each_entry (evsel_list, c2) {
				306	if (!strcasecmp(c2->name, name) && !c2->collect_stat)
				307	return c2;
				308	}
				309	return NULL;
				310	}
				311
				312	/* Mark MetricExpr target events and link events using them to them. */
				313	void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
				314	{
				315	struct perf_evsel counter, leader, *metric_events, oc;
				316	bool found;
				317	const char **metric_names;
				318	int i;
				319	int num_metric_names;
				320
				321	evlist__for_each_entry(evsel_list, counter) {
				322	bool invalid = false;
				323
				324	leader = counter->leader;
				325	if (!counter->metric_expr)
				326	continue;
				327	metric_events = counter->metric_events;
				328	if (!metric_events) {
				329	if (expr__find_other(counter->metric_expr, counter->name,
				330	&metric_names, &num_metric_names) < 0)
				331	continue;
				332
				333	metric_events = calloc(sizeof(struct perf_evsel *),
				334	num_metric_names + 1);
				335	if (!metric_events)
				336	return;
				337	counter->metric_events = metric_events;
				338	}
				339
				340	for (i = 0; i < num_metric_names; i++) {
				341	found = false;
				342	if (leader) {
				343	/* Search in group */
				344	for_each_group_member (oc, leader) {
				345	if (!strcasecmp(oc->name, metric_names[i]) &&
				346	!oc->collect_stat) {
				347	found = true;
				348	break;
				349	}
				350	}
				351	}
				352	if (!found) {
				353	/* Search ignoring groups */
				354	oc = perf_stat__find_event(evsel_list, metric_names[i]);
				355	}
				356	if (!oc) {
				357	/* Deduping one is good enough to handle duplicated PMUs. */
				358	static char *printed;
				359
				360	/*
				361	* Adding events automatically would be difficult, because
				362	* it would risk creating groups that are not schedulable.
				363	* perf stat doesn't understand all the scheduling constraints
				364	* of events. So we ask the user instead to add the missing
				365	* events.
				366	*/
				367	if (!printed \|\| strcasecmp(printed, metric_names[i])) {
				368	fprintf(stderr,
				369	"Add %s event to groups to get metric expression for %s\n",
				370	metric_names[i],
				371	counter->name);
				372	printed = strdup(metric_names[i]);
				373	}
				374	invalid = true;
				375	continue;
				376	}
				377	metric_events[i] = oc;
				378	oc->collect_stat = true;
				379	}
				380	metric_events[i] = NULL;
				381	free(metric_names);
				382	if (invalid) {
				383	free(metric_events);
				384	counter->metric_events = NULL;
				385	counter->metric_expr = NULL;
				386	}
				387	}
				388	}
				389
				390	static double runtime_stat_avg(struct runtime_stat *st,
				391	enum stat_type type, int ctx, int cpu)
				392	{
				393	struct saved_value *v;
				394
				395	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
				396	if (!v)
				397	return 0.0;
				398
				399	return avg_stats(&v->stats);
				400	}
				401
				402	static double runtime_stat_n(struct runtime_stat *st,
				403	enum stat_type type, int ctx, int cpu)
				404	{
				405	struct saved_value *v;
				406
				407	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
				408	if (!v)
				409	return 0.0;
				410
				411	return v->stats.n;
				412	}
				413
				414	static void print_stalled_cycles_frontend(int cpu,
				415	struct perf_evsel *evsel, double avg,
				416	struct perf_stat_output_ctx *out,
				417	struct runtime_stat *st)
				418	{
				419	double total, ratio = 0.0;
				420	const char *color;
				421	int ctx = evsel_context(evsel);
				422
				423	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				424
				425	if (total)
				426	ratio = avg / total * 100.0;
				427
				428	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
				429
				430	if (ratio)
				431	out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
				432	ratio);
				433	else
				434	out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
				435	}
				436
				437	static void print_stalled_cycles_backend(int cpu,
				438	struct perf_evsel *evsel, double avg,
				439	struct perf_stat_output_ctx *out,
				440	struct runtime_stat *st)
				441	{
				442	double total, ratio = 0.0;
				443	const char *color;
				444	int ctx = evsel_context(evsel);
				445
				446	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				447
				448	if (total)
				449	ratio = avg / total * 100.0;
				450
				451	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
				452
				453	out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
				454	}
				455
				456	static void print_branch_misses(int cpu,
				457	struct perf_evsel *evsel,
				458	double avg,
				459	struct perf_stat_output_ctx *out,
				460	struct runtime_stat *st)
				461	{
				462	double total, ratio = 0.0;
				463	const char *color;
				464	int ctx = evsel_context(evsel);
				465
				466	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
				467
				468	if (total)
				469	ratio = avg / total * 100.0;
				470
				471	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				472
				473	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
				474	}
				475
				476	static void print_l1_dcache_misses(int cpu,
				477	struct perf_evsel *evsel,
				478	double avg,
				479	struct perf_stat_output_ctx *out,
				480	struct runtime_stat *st)
				481
				482	{
				483	double total, ratio = 0.0;
				484	const char *color;
				485	int ctx = evsel_context(evsel);
				486
				487	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
				488
				489	if (total)
				490	ratio = avg / total * 100.0;
				491
				492	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				493
				494	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
				495	}
				496
				497	static void print_l1_icache_misses(int cpu,
				498	struct perf_evsel *evsel,
				499	double avg,
				500	struct perf_stat_output_ctx *out,
				501	struct runtime_stat *st)
				502
				503	{
				504	double total, ratio = 0.0;
				505	const char *color;
				506	int ctx = evsel_context(evsel);
				507
				508	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
				509
				510	if (total)
				511	ratio = avg / total * 100.0;
				512
				513	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				514	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
				515	}
				516
				517	static void print_dtlb_cache_misses(int cpu,
				518	struct perf_evsel *evsel,
				519	double avg,
				520	struct perf_stat_output_ctx *out,
				521	struct runtime_stat *st)
				522	{
				523	double total, ratio = 0.0;
				524	const char *color;
				525	int ctx = evsel_context(evsel);
				526
				527	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
				528
				529	if (total)
				530	ratio = avg / total * 100.0;
				531
				532	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				533	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
				534	}
				535
				536	static void print_itlb_cache_misses(int cpu,
				537	struct perf_evsel *evsel,
				538	double avg,
				539	struct perf_stat_output_ctx *out,
				540	struct runtime_stat *st)
				541	{
				542	double total, ratio = 0.0;
				543	const char *color;
				544	int ctx = evsel_context(evsel);
				545
				546	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
				547
				548	if (total)
				549	ratio = avg / total * 100.0;
				550
				551	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				552	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
				553	}
				554
				555	static void print_ll_cache_misses(int cpu,
				556	struct perf_evsel *evsel,
				557	double avg,
				558	struct perf_stat_output_ctx *out,
				559	struct runtime_stat *st)
				560	{
				561	double total, ratio = 0.0;
				562	const char *color;
				563	int ctx = evsel_context(evsel);
				564
				565	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
				566
				567	if (total)
				568	ratio = avg / total * 100.0;
				569
				570	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
				571	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
				572	}
				573
				574	/*
				575	* High level "TopDown" CPU core pipe line bottleneck break down.
				576	*
				577	* Basic concept following
				578	* Yasin, A Top Down Method for Performance analysis and Counter architecture
				579	* ISPASS14
				580	*
				581	* The CPU pipeline is divided into 4 areas that can be bottlenecks:
				582	*
				583	* Frontend -> Backend -> Retiring
				584	* BadSpeculation in addition means out of order execution that is thrown away
				585	* (for example branch mispredictions)
				586	* Frontend is instruction decoding.
				587	* Backend is execution, like computation and accessing data in memory
				588	* Retiring is good execution that is not directly bottlenecked
				589	*
				590	* The formulas are computed in slots.
				591	* A slot is an entry in the pipeline each for the pipeline width
				592	* (for example a 4-wide pipeline has 4 slots for each cycle)
				593	*
				594	* Formulas:
				595	* BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
				596	* TotalSlots
				597	* Retiring = SlotsRetired / TotalSlots
				598	* FrontendBound = FetchBubbles / TotalSlots
				599	* BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
				600	*
				601	* The kernel provides the mapping to the low level CPU events and any scaling
				602	* needed for the CPU pipeline width, for example:
				603	*
				604	* TotalSlots = Cycles * 4
				605	*
				606	* The scaling factor is communicated in the sysfs unit.
				607	*
				608	* In some cases the CPU may not be able to measure all the formulas due to
				609	* missing events. In this case multiple formulas are combined, as possible.
				610	*
				611	* Full TopDown supports more levels to sub-divide each area: for example
				612	* BackendBound into computing bound and memory bound. For now we only
				613	* support Level 1 TopDown.
				614	*/
				615
				616	static double sanitize_val(double x)
				617	{
				618	if (x < 0 && x >= -0.02)
				619	return 0.0;
				620	return x;
				621	}
				622
				623	static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
				624	{
				625	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
				626	}
				627
				628	static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
				629	{
				630	double bad_spec = 0;
				631	double total_slots;
				632	double total;
				633
				634	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
				635	runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
				636	runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
				637
				638	total_slots = td_total_slots(ctx, cpu, st);
				639	if (total_slots)
				640	bad_spec = total / total_slots;
				641	return sanitize_val(bad_spec);
				642	}
				643
				644	static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
				645	{
				646	double retiring = 0;
				647	double total_slots = td_total_slots(ctx, cpu, st);
				648	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
				649	ctx, cpu);
				650
				651	if (total_slots)
				652	retiring = ret_slots / total_slots;
				653	return retiring;
				654	}
				655
				656	static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
				657	{
				658	double fe_bound = 0;
				659	double total_slots = td_total_slots(ctx, cpu, st);
				660	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
				661	ctx, cpu);
				662
				663	if (total_slots)
				664	fe_bound = fetch_bub / total_slots;
				665	return fe_bound;
				666	}
				667
				668	static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
				669	{
				670	double sum = (td_fe_bound(ctx, cpu, st) +
				671	td_bad_spec(ctx, cpu, st) +
				672	td_retiring(ctx, cpu, st));
				673	if (sum == 0)
				674	return 0;
				675	return sanitize_val(1.0 - sum);
				676	}
				677
				678	static void print_smi_cost(int cpu, struct perf_evsel *evsel,
				679	struct perf_stat_output_ctx *out,
				680	struct runtime_stat *st)
				681	{
				682	double smi_num, aperf, cycles, cost = 0.0;
				683	int ctx = evsel_context(evsel);
				684	const char *color = NULL;
				685
				686	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
				687	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
				688	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				689
				690	if ((cycles == 0) \|\| (aperf == 0))
				691	return;
				692
				693	if (smi_num)
				694	cost = (aperf - cycles) / aperf * 100.00;
				695
				696	if (cost > 10)
				697	color = PERF_COLOR_RED;
				698	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
				699	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
				700	}
				701
				702	static void generic_metric(const char *metric_expr,
				703	struct perf_evsel **metric_events,
				704	char *name,
				705	const char *metric_name,
				706	double avg,
				707	int cpu,
				708	struct perf_stat_output_ctx *out,
				709	struct runtime_stat *st)
				710	{
				711	print_metric_t print_metric = out->print_metric;
				712	struct parse_ctx pctx;
				713	double ratio;
				714	int i;
				715	void *ctxp = out->ctx;
				716
				717	expr__ctx_init(&pctx);
				718	expr__add_id(&pctx, name, avg);
				719	for (i = 0; metric_events[i]; i++) {
				720	struct saved_value *v;
				721	struct stats *stats;
				722	double scale;
				723
				724	if (!strcmp(metric_events[i]->name, "duration_time")) {
				725	stats = &walltime_nsecs_stats;
				726	scale = 1e-9;
				727	} else {
				728	v = saved_value_lookup(metric_events[i], cpu, false,
				729	STAT_NONE, 0, st);
				730	if (!v)
				731	break;
				732	stats = &v->stats;
				733	scale = 1.0;
				734	}
				735	expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
				736	}
				737	if (!metric_events[i]) {
				738	const char *p = metric_expr;
				739
				740	if (expr__parse(&ratio, &pctx, &p) == 0)
				741	print_metric(ctxp, NULL, "%8.1f",
				742	metric_name ?
				743	metric_name :
				744	out->force_header ? name : "",
				745	ratio);
				746	else
				747	print_metric(ctxp, NULL, NULL,
				748	out->force_header ?
				749	(metric_name ? metric_name : name) : "", 0);
				750	} else
				751	print_metric(ctxp, NULL, NULL, "", 0);
				752	}
				753
				754	void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
				755	double avg, int cpu,
				756	struct perf_stat_output_ctx *out,
				757	struct rblist *metric_events,
				758	struct runtime_stat *st)
				759	{
				760	void *ctxp = out->ctx;
				761	print_metric_t print_metric = out->print_metric;
				762	double total, ratio = 0.0, total2;
				763	const char *color = NULL;
				764	int ctx = evsel_context(evsel);
				765	struct metric_event *me;
				766	int num = 1;
				767
				768	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
				769	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				770
				771	if (total) {
				772	ratio = avg / total;
				773	print_metric(ctxp, NULL, "%7.2f ",
				774	"insn per cycle", ratio);
				775	} else {
				776	print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
				777	}
				778
				779	total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
				780	ctx, cpu);
				781
				782	total = max(total, runtime_stat_avg(st,
				783	STAT_STALLED_CYCLES_BACK,
				784	ctx, cpu));
				785
				786	if (total && avg) {
				787	out->new_line(ctxp);
				788	ratio = total / avg;
				789	print_metric(ctxp, NULL, "%7.2f ",
				790	"stalled cycles per insn",
				791	ratio);
				792	} else if (have_frontend_stalled) {
				793	print_metric(ctxp, NULL, NULL,
				794	"stalled cycles per insn", 0);
				795	}
				796	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
				797	if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
				798	print_branch_misses(cpu, evsel, avg, out, st);
				799	else
				800	print_metric(ctxp, NULL, NULL, "of all branches", 0);
				801	} else if (
				802	evsel->attr.type == PERF_TYPE_HW_CACHE &&
				803	evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D \|
				804	((PERF_COUNT_HW_CACHE_OP_READ) << 8) \|
				805	((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
				806
				807	if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
				808	print_l1_dcache_misses(cpu, evsel, avg, out, st);
				809	else
				810	print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
				811	} else if (
				812	evsel->attr.type == PERF_TYPE_HW_CACHE &&
				813	evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I \|
				814	((PERF_COUNT_HW_CACHE_OP_READ) << 8) \|
				815	((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
				816
				817	if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
				818	print_l1_icache_misses(cpu, evsel, avg, out, st);
				819	else
				820	print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
				821	} else if (
				822	evsel->attr.type == PERF_TYPE_HW_CACHE &&
				823	evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB \|
				824	((PERF_COUNT_HW_CACHE_OP_READ) << 8) \|
				825	((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
				826
				827	if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
				828	print_dtlb_cache_misses(cpu, evsel, avg, out, st);
				829	else
				830	print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
				831	} else if (
				832	evsel->attr.type == PERF_TYPE_HW_CACHE &&
				833	evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB \|
				834	((PERF_COUNT_HW_CACHE_OP_READ) << 8) \|
				835	((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
				836
				837	if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
				838	print_itlb_cache_misses(cpu, evsel, avg, out, st);
				839	else
				840	print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
				841	} else if (
				842	evsel->attr.type == PERF_TYPE_HW_CACHE &&
				843	evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL \|
				844	((PERF_COUNT_HW_CACHE_OP_READ) << 8) \|
				845	((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
				846
				847	if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
				848	print_ll_cache_misses(cpu, evsel, avg, out, st);
				849	else
				850	print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
				851	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
				852	total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
				853
				854	if (total)
				855	ratio = avg * 100 / total;
				856
				857	if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
				858	print_metric(ctxp, NULL, "%8.3f %%",
				859	"of all cache refs", ratio);
				860	else
				861	print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
				862	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
				863	print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
				864	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
				865	print_stalled_cycles_backend(cpu, evsel, avg, out, st);
				866	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
				867	total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
				868
				869	if (total) {
				870	ratio = avg / total;
				871	print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
				872	} else {
				873	print_metric(ctxp, NULL, NULL, "Ghz", 0);
				874	}
				875	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
				876	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				877
				878	if (total)
				879	print_metric(ctxp, NULL,
				880	"%7.2f%%", "transactional cycles",
				881	100.0 * (avg / total));
				882	else
				883	print_metric(ctxp, NULL, NULL, "transactional cycles",
				884	0);
				885	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
				886	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
				887	total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
				888
				889	if (total2 < avg)
				890	total2 = avg;
				891	if (total)
				892	print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
				893	100.0 * ((total2-avg) / total));
				894	else
				895	print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
				896	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
				897	total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
				898	ctx, cpu);
				899
				900	if (avg)
				901	ratio = total / avg;
				902
				903	if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
				904	print_metric(ctxp, NULL, "%8.0f",
				905	"cycles / transaction", ratio);
				906	else
				907	print_metric(ctxp, NULL, NULL, "cycles / transaction",
				908	0);
				909	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
				910	total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
				911	ctx, cpu);
				912
				913	if (avg)
				914	ratio = total / avg;
				915
				916	print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
				917	} else if (perf_evsel__is_clock(evsel)) {
				918	if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
				919	print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
				920	avg / (ratio * evsel->scale));
				921	else
				922	print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
				923	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
				924	double fe_bound = td_fe_bound(ctx, cpu, st);
				925
				926	if (fe_bound > 0.2)
				927	color = PERF_COLOR_RED;
				928	print_metric(ctxp, color, "%8.1f%%", "frontend bound",
				929	fe_bound * 100.);
				930	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
				931	double retiring = td_retiring(ctx, cpu, st);
				932
				933	if (retiring > 0.7)
				934	color = PERF_COLOR_GREEN;
				935	print_metric(ctxp, color, "%8.1f%%", "retiring",
				936	retiring * 100.);
				937	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
				938	double bad_spec = td_bad_spec(ctx, cpu, st);
				939
				940	if (bad_spec > 0.1)
				941	color = PERF_COLOR_RED;
				942	print_metric(ctxp, color, "%8.1f%%", "bad speculation",
				943	bad_spec * 100.);
				944	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
				945	double be_bound = td_be_bound(ctx, cpu, st);
				946	const char *name = "backend bound";
				947	static int have_recovery_bubbles = -1;
				948
				949	/* In case the CPU does not support topdown-recovery-bubbles */
				950	if (have_recovery_bubbles < 0)
				951	have_recovery_bubbles = pmu_have_event("cpu",
				952	"topdown-recovery-bubbles");
				953	if (!have_recovery_bubbles)
				954	name = "backend bound/bad spec";
				955
				956	if (be_bound > 0.2)
				957	color = PERF_COLOR_RED;
				958	if (td_total_slots(ctx, cpu, st) > 0)
				959	print_metric(ctxp, color, "%8.1f%%", name,
				960	be_bound * 100.);
				961	else
				962	print_metric(ctxp, NULL, NULL, name, 0);
				963	} else if (evsel->metric_expr) {
				964	generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
				965	evsel->metric_name, avg, cpu, out, st);
				966	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
				967	char unit = 'M';
				968	char unit_buf[10];
				969
				970	total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
				971
				972	if (total)
				973	ratio = 1000.0 * avg / total;
				974	if (ratio < 0.001) {
				975	ratio *= 1000;
				976	unit = 'K';
				977	}
				978	snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
				979	print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
				980	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
				981	print_smi_cost(cpu, evsel, out, st);
				982	} else {
				983	num = 0;
				984	}
				985
				986	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
				987	struct metric_expr *mexp;
				988
				989	list_for_each_entry (mexp, &me->head, nd) {
				990	if (num++ > 0)
				991	out->new_line(ctxp);
				992	generic_metric(mexp->metric_expr, mexp->metric_events,
				993	evsel->name, mexp->metric_name,
				994	avg, cpu, out, st);
				995	}
				996	}
				997	if (num == 0)
				998	print_metric(ctxp, NULL, NULL, NULL, 0);
				999	}