| b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0+ | 
 | 2 | /* | 
 | 3 |  * Read-Copy Update module-based performance-test facility | 
 | 4 |  * | 
 | 5 |  * Copyright (C) IBM Corporation, 2015 | 
 | 6 |  * | 
 | 7 |  * Authors: Paul E. McKenney <paulmck@linux.ibm.com> | 
 | 8 |  */ | 
 | 9 |  | 
 | 10 | #define pr_fmt(fmt) fmt | 
 | 11 |  | 
 | 12 | #include <linux/types.h> | 
 | 13 | #include <linux/kernel.h> | 
 | 14 | #include <linux/init.h> | 
 | 15 | #include <linux/module.h> | 
 | 16 | #include <linux/kthread.h> | 
 | 17 | #include <linux/err.h> | 
 | 18 | #include <linux/spinlock.h> | 
 | 19 | #include <linux/smp.h> | 
 | 20 | #include <linux/rcupdate.h> | 
 | 21 | #include <linux/interrupt.h> | 
 | 22 | #include <linux/sched.h> | 
 | 23 | #include <uapi/linux/sched/types.h> | 
 | 24 | #include <linux/atomic.h> | 
 | 25 | #include <linux/bitops.h> | 
 | 26 | #include <linux/completion.h> | 
 | 27 | #include <linux/moduleparam.h> | 
 | 28 | #include <linux/percpu.h> | 
 | 29 | #include <linux/notifier.h> | 
 | 30 | #include <linux/reboot.h> | 
 | 31 | #include <linux/freezer.h> | 
 | 32 | #include <linux/cpu.h> | 
 | 33 | #include <linux/delay.h> | 
 | 34 | #include <linux/stat.h> | 
 | 35 | #include <linux/srcu.h> | 
 | 36 | #include <linux/slab.h> | 
 | 37 | #include <asm/byteorder.h> | 
 | 38 | #include <linux/torture.h> | 
 | 39 | #include <linux/vmalloc.h> | 
 | 40 |  | 
 | 41 | #include "rcu.h" | 
 | 42 |  | 
 | 43 | MODULE_LICENSE("GPL"); | 
 | 44 | MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); | 
 | 45 |  | 
 | 46 | #define PERF_FLAG "-perf:" | 
 | 47 | #define PERFOUT_STRING(s) \ | 
 | 48 | 	pr_alert("%s" PERF_FLAG " %s\n", perf_type, s) | 
 | 49 | #define VERBOSE_PERFOUT_STRING(s) \ | 
 | 50 | 	do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0) | 
 | 51 | #define VERBOSE_PERFOUT_ERRSTRING(s) \ | 
 | 52 | 	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) | 
 | 53 |  | 
 | 54 | /* | 
 | 55 |  * The intended use cases for the nreaders and nwriters module parameters | 
 | 56 |  * are as follows: | 
 | 57 |  * | 
 | 58 |  * 1.	Specify only the nr_cpus kernel boot parameter.  This will | 
 | 59 |  *	set both nreaders and nwriters to the value specified by | 
 | 60 |  *	nr_cpus for a mixed reader/writer test. | 
 | 61 |  * | 
 | 62 |  * 2.	Specify the nr_cpus kernel boot parameter, but set | 
 | 63 |  *	rcuperf.nreaders to zero.  This will set nwriters to the | 
 | 64 |  *	value specified by nr_cpus for an update-only test. | 
 | 65 |  * | 
 | 66 |  * 3.	Specify the nr_cpus kernel boot parameter, but set | 
 | 67 |  *	rcuperf.nwriters to zero.  This will set nreaders to the | 
 | 68 |  *	value specified by nr_cpus for a read-only test. | 
 | 69 |  * | 
 | 70 |  * Various other use cases may of course be specified. | 
 | 71 |  */ | 
 | 72 |  | 
 | 73 | #ifdef MODULE | 
 | 74 | # define RCUPERF_SHUTDOWN 0 | 
 | 75 | #else | 
 | 76 | # define RCUPERF_SHUTDOWN 1 | 
 | 77 | #endif | 
 | 78 |  | 
 | 79 | torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); | 
 | 80 | torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); | 
 | 81 | torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); | 
 | 82 | torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); | 
 | 83 | torture_param(int, nreaders, -1, "Number of RCU reader threads"); | 
 | 84 | torture_param(int, nwriters, -1, "Number of RCU updater threads"); | 
 | 85 | torture_param(bool, shutdown, RCUPERF_SHUTDOWN, | 
 | 86 | 	      "Shutdown at end of performance tests."); | 
 | 87 | torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); | 
 | 88 | torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); | 
 | 89 |  | 
 | 90 | static char *perf_type = "rcu"; | 
 | 91 | module_param(perf_type, charp, 0444); | 
 | 92 | MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)"); | 
 | 93 |  | 
 | 94 | static int nrealreaders; | 
 | 95 | static int nrealwriters; | 
 | 96 | static struct task_struct **writer_tasks; | 
 | 97 | static struct task_struct **reader_tasks; | 
 | 98 | static struct task_struct *shutdown_task; | 
 | 99 |  | 
 | 100 | static u64 **writer_durations; | 
 | 101 | static int *writer_n_durations; | 
 | 102 | static atomic_t n_rcu_perf_reader_started; | 
 | 103 | static atomic_t n_rcu_perf_writer_started; | 
 | 104 | static atomic_t n_rcu_perf_writer_finished; | 
 | 105 | static wait_queue_head_t shutdown_wq; | 
 | 106 | static u64 t_rcu_perf_writer_started; | 
 | 107 | static u64 t_rcu_perf_writer_finished; | 
 | 108 | static unsigned long b_rcu_perf_writer_started; | 
 | 109 | static unsigned long b_rcu_perf_writer_finished; | 
 | 110 | static DEFINE_PER_CPU(atomic_t, n_async_inflight); | 
 | 111 |  | 
 | 112 | static int rcu_perf_writer_state; | 
 | 113 | #define RTWS_INIT		0 | 
 | 114 | #define RTWS_ASYNC		1 | 
 | 115 | #define RTWS_BARRIER		2 | 
 | 116 | #define RTWS_EXP_SYNC		3 | 
 | 117 | #define RTWS_SYNC		4 | 
 | 118 | #define RTWS_IDLE		5 | 
 | 119 | #define RTWS_STOPPING		6 | 
 | 120 |  | 
 | 121 | #define MAX_MEAS 10000 | 
 | 122 | #define MIN_MEAS 100 | 
 | 123 |  | 
 | 124 | /* | 
 | 125 |  * Operations vector for selecting different types of tests. | 
 | 126 |  */ | 
 | 127 |  | 
 | 128 | struct rcu_perf_ops { | 
 | 129 | 	int ptype; | 
 | 130 | 	void (*init)(void); | 
 | 131 | 	void (*cleanup)(void); | 
 | 132 | 	int (*readlock)(void); | 
 | 133 | 	void (*readunlock)(int idx); | 
 | 134 | 	unsigned long (*get_gp_seq)(void); | 
 | 135 | 	unsigned long (*gp_diff)(unsigned long new, unsigned long old); | 
 | 136 | 	unsigned long (*exp_completed)(void); | 
 | 137 | 	void (*async)(struct rcu_head *head, rcu_callback_t func); | 
 | 138 | 	void (*gp_barrier)(void); | 
 | 139 | 	void (*sync)(void); | 
 | 140 | 	void (*exp_sync)(void); | 
 | 141 | 	const char *name; | 
 | 142 | }; | 
 | 143 |  | 
 | 144 | static struct rcu_perf_ops *cur_ops; | 
 | 145 |  | 
 | 146 | /* | 
 | 147 |  * Definitions for rcu perf testing. | 
 | 148 |  */ | 
 | 149 |  | 
 | 150 | static int rcu_perf_read_lock(void) __acquires(RCU) | 
 | 151 | { | 
 | 152 | 	rcu_read_lock(); | 
 | 153 | 	return 0; | 
 | 154 | } | 
 | 155 |  | 
 | 156 | static void rcu_perf_read_unlock(int idx) __releases(RCU) | 
 | 157 | { | 
 | 158 | 	rcu_read_unlock(); | 
 | 159 | } | 
 | 160 |  | 
 | 161 | static unsigned long __maybe_unused rcu_no_completed(void) | 
 | 162 | { | 
 | 163 | 	return 0; | 
 | 164 | } | 
 | 165 |  | 
 | 166 | static void rcu_sync_perf_init(void) | 
 | 167 | { | 
 | 168 | } | 
 | 169 |  | 
 | 170 | static struct rcu_perf_ops rcu_ops = { | 
 | 171 | 	.ptype		= RCU_FLAVOR, | 
 | 172 | 	.init		= rcu_sync_perf_init, | 
 | 173 | 	.readlock	= rcu_perf_read_lock, | 
 | 174 | 	.readunlock	= rcu_perf_read_unlock, | 
 | 175 | 	.get_gp_seq	= rcu_get_gp_seq, | 
 | 176 | 	.gp_diff	= rcu_seq_diff, | 
 | 177 | 	.exp_completed	= rcu_exp_batches_completed, | 
 | 178 | 	.async		= call_rcu, | 
 | 179 | 	.gp_barrier	= rcu_barrier, | 
 | 180 | 	.sync		= synchronize_rcu, | 
 | 181 | 	.exp_sync	= synchronize_rcu_expedited, | 
 | 182 | 	.name		= "rcu" | 
 | 183 | }; | 
 | 184 |  | 
 | 185 | /* | 
 | 186 |  * Definitions for srcu perf testing. | 
 | 187 |  */ | 
 | 188 |  | 
 | 189 | DEFINE_STATIC_SRCU(srcu_ctl_perf); | 
 | 190 | static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf; | 
 | 191 |  | 
 | 192 | static int srcu_perf_read_lock(void) __acquires(srcu_ctlp) | 
 | 193 | { | 
 | 194 | 	return srcu_read_lock(srcu_ctlp); | 
 | 195 | } | 
 | 196 |  | 
 | 197 | static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp) | 
 | 198 | { | 
 | 199 | 	srcu_read_unlock(srcu_ctlp, idx); | 
 | 200 | } | 
 | 201 |  | 
 | 202 | static unsigned long srcu_perf_completed(void) | 
 | 203 | { | 
 | 204 | 	return srcu_batches_completed(srcu_ctlp); | 
 | 205 | } | 
 | 206 |  | 
 | 207 | static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) | 
 | 208 | { | 
 | 209 | 	call_srcu(srcu_ctlp, head, func); | 
 | 210 | } | 
 | 211 |  | 
 | 212 | static void srcu_rcu_barrier(void) | 
 | 213 | { | 
 | 214 | 	srcu_barrier(srcu_ctlp); | 
 | 215 | } | 
 | 216 |  | 
 | 217 | static void srcu_perf_synchronize(void) | 
 | 218 | { | 
 | 219 | 	synchronize_srcu(srcu_ctlp); | 
 | 220 | } | 
 | 221 |  | 
 | 222 | static void srcu_perf_synchronize_expedited(void) | 
 | 223 | { | 
 | 224 | 	synchronize_srcu_expedited(srcu_ctlp); | 
 | 225 | } | 
 | 226 |  | 
 | 227 | static struct rcu_perf_ops srcu_ops = { | 
 | 228 | 	.ptype		= SRCU_FLAVOR, | 
 | 229 | 	.init		= rcu_sync_perf_init, | 
 | 230 | 	.readlock	= srcu_perf_read_lock, | 
 | 231 | 	.readunlock	= srcu_perf_read_unlock, | 
 | 232 | 	.get_gp_seq	= srcu_perf_completed, | 
 | 233 | 	.gp_diff	= rcu_seq_diff, | 
 | 234 | 	.exp_completed	= srcu_perf_completed, | 
 | 235 | 	.async		= srcu_call_rcu, | 
 | 236 | 	.gp_barrier	= srcu_rcu_barrier, | 
 | 237 | 	.sync		= srcu_perf_synchronize, | 
 | 238 | 	.exp_sync	= srcu_perf_synchronize_expedited, | 
 | 239 | 	.name		= "srcu" | 
 | 240 | }; | 
 | 241 |  | 
 | 242 | static struct srcu_struct srcud; | 
 | 243 |  | 
 | 244 | static void srcu_sync_perf_init(void) | 
 | 245 | { | 
 | 246 | 	srcu_ctlp = &srcud; | 
 | 247 | 	init_srcu_struct(srcu_ctlp); | 
 | 248 | } | 
 | 249 |  | 
 | 250 | static void srcu_sync_perf_cleanup(void) | 
 | 251 | { | 
 | 252 | 	cleanup_srcu_struct(srcu_ctlp); | 
 | 253 | } | 
 | 254 |  | 
 | 255 | static struct rcu_perf_ops srcud_ops = { | 
 | 256 | 	.ptype		= SRCU_FLAVOR, | 
 | 257 | 	.init		= srcu_sync_perf_init, | 
 | 258 | 	.cleanup	= srcu_sync_perf_cleanup, | 
 | 259 | 	.readlock	= srcu_perf_read_lock, | 
 | 260 | 	.readunlock	= srcu_perf_read_unlock, | 
 | 261 | 	.get_gp_seq	= srcu_perf_completed, | 
 | 262 | 	.gp_diff	= rcu_seq_diff, | 
 | 263 | 	.exp_completed	= srcu_perf_completed, | 
 | 264 | 	.async		= srcu_call_rcu, | 
 | 265 | 	.gp_barrier	= srcu_rcu_barrier, | 
 | 266 | 	.sync		= srcu_perf_synchronize, | 
 | 267 | 	.exp_sync	= srcu_perf_synchronize_expedited, | 
 | 268 | 	.name		= "srcud" | 
 | 269 | }; | 
 | 270 |  | 
 | 271 | /* | 
 | 272 |  * Definitions for RCU-tasks perf testing. | 
 | 273 |  */ | 
 | 274 |  | 
 | 275 | static int tasks_perf_read_lock(void) | 
 | 276 | { | 
 | 277 | 	return 0; | 
 | 278 | } | 
 | 279 |  | 
 | 280 | static void tasks_perf_read_unlock(int idx) | 
 | 281 | { | 
 | 282 | } | 
 | 283 |  | 
 | 284 | static struct rcu_perf_ops tasks_ops = { | 
 | 285 | 	.ptype		= RCU_TASKS_FLAVOR, | 
 | 286 | 	.init		= rcu_sync_perf_init, | 
 | 287 | 	.readlock	= tasks_perf_read_lock, | 
 | 288 | 	.readunlock	= tasks_perf_read_unlock, | 
 | 289 | 	.get_gp_seq	= rcu_no_completed, | 
 | 290 | 	.gp_diff	= rcu_seq_diff, | 
 | 291 | 	.async		= call_rcu_tasks, | 
 | 292 | 	.gp_barrier	= rcu_barrier_tasks, | 
 | 293 | 	.sync		= synchronize_rcu_tasks, | 
 | 294 | 	.exp_sync	= synchronize_rcu_tasks, | 
 | 295 | 	.name		= "tasks" | 
 | 296 | }; | 
 | 297 |  | 
 | 298 | static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old) | 
 | 299 | { | 
 | 300 | 	if (!cur_ops->gp_diff) | 
 | 301 | 		return new - old; | 
 | 302 | 	return cur_ops->gp_diff(new, old); | 
 | 303 | } | 
 | 304 |  | 
 | 305 | /* | 
 | 306 |  * If performance tests complete, wait for shutdown to commence. | 
 | 307 |  */ | 
 | 308 | static void rcu_perf_wait_shutdown(void) | 
 | 309 | { | 
 | 310 | 	cond_resched_tasks_rcu_qs(); | 
 | 311 | 	if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters) | 
 | 312 | 		return; | 
 | 313 | 	while (!torture_must_stop()) | 
 | 314 | 		schedule_timeout_uninterruptible(1); | 
 | 315 | } | 
 | 316 |  | 
 | 317 | /* | 
 | 318 |  * RCU perf reader kthread.  Repeatedly does empty RCU read-side | 
 | 319 |  * critical section, minimizing update-side interference. | 
 | 320 |  */ | 
 | 321 | static int | 
 | 322 | rcu_perf_reader(void *arg) | 
 | 323 | { | 
 | 324 | 	unsigned long flags; | 
 | 325 | 	int idx; | 
 | 326 | 	long me = (long)arg; | 
 | 327 |  | 
 | 328 | 	VERBOSE_PERFOUT_STRING("rcu_perf_reader task started"); | 
 | 329 | 	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); | 
 | 330 | 	set_user_nice(current, MAX_NICE); | 
 | 331 | 	atomic_inc(&n_rcu_perf_reader_started); | 
 | 332 |  | 
 | 333 | 	do { | 
 | 334 | 		local_irq_save(flags); | 
 | 335 | 		idx = cur_ops->readlock(); | 
 | 336 | 		cur_ops->readunlock(idx); | 
 | 337 | 		local_irq_restore(flags); | 
 | 338 | 		rcu_perf_wait_shutdown(); | 
 | 339 | 	} while (!torture_must_stop()); | 
 | 340 | 	torture_kthread_stopping("rcu_perf_reader"); | 
 | 341 | 	return 0; | 
 | 342 | } | 
 | 343 |  | 
 | 344 | /* | 
 | 345 |  * Callback function for asynchronous grace periods from rcu_perf_writer(). | 
 | 346 |  */ | 
 | 347 | static void rcu_perf_async_cb(struct rcu_head *rhp) | 
 | 348 | { | 
 | 349 | 	atomic_dec(this_cpu_ptr(&n_async_inflight)); | 
 | 350 | 	kfree(rhp); | 
 | 351 | } | 
 | 352 |  | 
 | 353 | /* | 
 | 354 |  * RCU perf writer kthread.  Repeatedly does a grace period. | 
 | 355 |  */ | 
 | 356 | static int | 
 | 357 | rcu_perf_writer(void *arg) | 
 | 358 | { | 
 | 359 | 	int i = 0; | 
 | 360 | 	int i_max; | 
 | 361 | 	long me = (long)arg; | 
 | 362 | 	struct rcu_head *rhp = NULL; | 
 | 363 | 	struct sched_param sp; | 
 | 364 | 	bool started = false, done = false, alldone = false; | 
 | 365 | 	u64 t; | 
 | 366 | 	u64 *wdp; | 
 | 367 | 	u64 *wdpp = writer_durations[me]; | 
 | 368 |  | 
 | 369 | 	VERBOSE_PERFOUT_STRING("rcu_perf_writer task started"); | 
 | 370 | 	WARN_ON(!wdpp); | 
 | 371 | 	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); | 
 | 372 | 	sp.sched_priority = 1; | 
 | 373 | 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 
 | 374 |  | 
 | 375 | 	if (holdoff) | 
 | 376 | 		schedule_timeout_uninterruptible(holdoff * HZ); | 
 | 377 |  | 
 | 378 | 	/* | 
 | 379 | 	 * Wait until rcu_end_inkernel_boot() is called for normal GP tests | 
 | 380 | 	 * so that RCU is not always expedited for normal GP tests. | 
 | 381 | 	 * The system_state test is approximate, but works well in practice. | 
 | 382 | 	 */ | 
 | 383 | 	while (!gp_exp && system_state != SYSTEM_RUNNING) | 
 | 384 | 		schedule_timeout_uninterruptible(1); | 
 | 385 |  | 
 | 386 | 	t = ktime_get_mono_fast_ns(); | 
 | 387 | 	if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) { | 
 | 388 | 		t_rcu_perf_writer_started = t; | 
 | 389 | 		if (gp_exp) { | 
 | 390 | 			b_rcu_perf_writer_started = | 
 | 391 | 				cur_ops->exp_completed() / 2; | 
 | 392 | 		} else { | 
 | 393 | 			b_rcu_perf_writer_started = cur_ops->get_gp_seq(); | 
 | 394 | 		} | 
 | 395 | 	} | 
 | 396 |  | 
 | 397 | 	do { | 
 | 398 | 		if (writer_holdoff) | 
 | 399 | 			udelay(writer_holdoff); | 
 | 400 | 		wdp = &wdpp[i]; | 
 | 401 | 		*wdp = ktime_get_mono_fast_ns(); | 
 | 402 | 		if (gp_async) { | 
 | 403 | retry: | 
 | 404 | 			if (!rhp) | 
 | 405 | 				rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); | 
 | 406 | 			if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) { | 
 | 407 | 				rcu_perf_writer_state = RTWS_ASYNC; | 
 | 408 | 				atomic_inc(this_cpu_ptr(&n_async_inflight)); | 
 | 409 | 				cur_ops->async(rhp, rcu_perf_async_cb); | 
 | 410 | 				rhp = NULL; | 
 | 411 | 			} else if (!kthread_should_stop()) { | 
 | 412 | 				rcu_perf_writer_state = RTWS_BARRIER; | 
 | 413 | 				cur_ops->gp_barrier(); | 
 | 414 | 				goto retry; | 
 | 415 | 			} else { | 
 | 416 | 				kfree(rhp); /* Because we are stopping. */ | 
 | 417 | 			} | 
 | 418 | 		} else if (gp_exp) { | 
 | 419 | 			rcu_perf_writer_state = RTWS_EXP_SYNC; | 
 | 420 | 			cur_ops->exp_sync(); | 
 | 421 | 		} else { | 
 | 422 | 			rcu_perf_writer_state = RTWS_SYNC; | 
 | 423 | 			cur_ops->sync(); | 
 | 424 | 		} | 
 | 425 | 		rcu_perf_writer_state = RTWS_IDLE; | 
 | 426 | 		t = ktime_get_mono_fast_ns(); | 
 | 427 | 		*wdp = t - *wdp; | 
 | 428 | 		i_max = i; | 
 | 429 | 		if (!started && | 
 | 430 | 		    atomic_read(&n_rcu_perf_writer_started) >= nrealwriters) | 
 | 431 | 			started = true; | 
 | 432 | 		if (!done && i >= MIN_MEAS) { | 
 | 433 | 			done = true; | 
 | 434 | 			sp.sched_priority = 0; | 
 | 435 | 			sched_setscheduler_nocheck(current, | 
 | 436 | 						   SCHED_NORMAL, &sp); | 
 | 437 | 			pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n", | 
 | 438 | 				 perf_type, PERF_FLAG, me, MIN_MEAS); | 
 | 439 | 			if (atomic_inc_return(&n_rcu_perf_writer_finished) >= | 
 | 440 | 			    nrealwriters) { | 
 | 441 | 				schedule_timeout_interruptible(10); | 
 | 442 | 				rcu_ftrace_dump(DUMP_ALL); | 
 | 443 | 				PERFOUT_STRING("Test complete"); | 
 | 444 | 				t_rcu_perf_writer_finished = t; | 
 | 445 | 				if (gp_exp) { | 
 | 446 | 					b_rcu_perf_writer_finished = | 
 | 447 | 						cur_ops->exp_completed() / 2; | 
 | 448 | 				} else { | 
 | 449 | 					b_rcu_perf_writer_finished = | 
 | 450 | 						cur_ops->get_gp_seq(); | 
 | 451 | 				} | 
 | 452 | 				if (shutdown) { | 
 | 453 | 					smp_mb(); /* Assign before wake. */ | 
 | 454 | 					wake_up(&shutdown_wq); | 
 | 455 | 				} | 
 | 456 | 			} | 
 | 457 | 		} | 
 | 458 | 		if (done && !alldone && | 
 | 459 | 		    atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters) | 
 | 460 | 			alldone = true; | 
 | 461 | 		if (started && !alldone && i < MAX_MEAS - 1) | 
 | 462 | 			i++; | 
 | 463 | 		rcu_perf_wait_shutdown(); | 
 | 464 | 	} while (!torture_must_stop()); | 
 | 465 | 	if (gp_async) { | 
 | 466 | 		rcu_perf_writer_state = RTWS_BARRIER; | 
 | 467 | 		cur_ops->gp_barrier(); | 
 | 468 | 	} | 
 | 469 | 	rcu_perf_writer_state = RTWS_STOPPING; | 
 | 470 | 	writer_n_durations[me] = i_max; | 
 | 471 | 	torture_kthread_stopping("rcu_perf_writer"); | 
 | 472 | 	return 0; | 
 | 473 | } | 
 | 474 |  | 
 | 475 | static void | 
 | 476 | rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag) | 
 | 477 | { | 
 | 478 | 	pr_alert("%s" PERF_FLAG | 
 | 479 | 		 "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n", | 
 | 480 | 		 perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown); | 
 | 481 | } | 
 | 482 |  | 
 | 483 | static void | 
 | 484 | rcu_perf_cleanup(void) | 
 | 485 | { | 
 | 486 | 	int i; | 
 | 487 | 	int j; | 
 | 488 | 	int ngps = 0; | 
 | 489 | 	u64 *wdp; | 
 | 490 | 	u64 *wdpp; | 
 | 491 |  | 
 | 492 | 	/* | 
 | 493 | 	 * Would like warning at start, but everything is expedited | 
 | 494 | 	 * during the mid-boot phase, so have to wait till the end. | 
 | 495 | 	 */ | 
 | 496 | 	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) | 
 | 497 | 		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); | 
 | 498 | 	if (rcu_gp_is_normal() && gp_exp) | 
 | 499 | 		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); | 
 | 500 | 	if (gp_exp && gp_async) | 
 | 501 | 		VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!"); | 
 | 502 |  | 
 | 503 | 	if (torture_cleanup_begin()) | 
 | 504 | 		return; | 
 | 505 | 	if (!cur_ops) { | 
 | 506 | 		torture_cleanup_end(); | 
 | 507 | 		return; | 
 | 508 | 	} | 
 | 509 |  | 
 | 510 | 	if (reader_tasks) { | 
 | 511 | 		for (i = 0; i < nrealreaders; i++) | 
 | 512 | 			torture_stop_kthread(rcu_perf_reader, | 
 | 513 | 					     reader_tasks[i]); | 
 | 514 | 		kfree(reader_tasks); | 
 | 515 | 	} | 
 | 516 |  | 
 | 517 | 	if (writer_tasks) { | 
 | 518 | 		for (i = 0; i < nrealwriters; i++) { | 
 | 519 | 			torture_stop_kthread(rcu_perf_writer, | 
 | 520 | 					     writer_tasks[i]); | 
 | 521 | 			if (!writer_n_durations) | 
 | 522 | 				continue; | 
 | 523 | 			j = writer_n_durations[i]; | 
 | 524 | 			pr_alert("%s%s writer %d gps: %d\n", | 
 | 525 | 				 perf_type, PERF_FLAG, i, j); | 
 | 526 | 			ngps += j; | 
 | 527 | 		} | 
 | 528 | 		pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", | 
 | 529 | 			 perf_type, PERF_FLAG, | 
 | 530 | 			 t_rcu_perf_writer_started, t_rcu_perf_writer_finished, | 
 | 531 | 			 t_rcu_perf_writer_finished - | 
 | 532 | 			 t_rcu_perf_writer_started, | 
 | 533 | 			 ngps, | 
 | 534 | 			 rcuperf_seq_diff(b_rcu_perf_writer_finished, | 
 | 535 | 					  b_rcu_perf_writer_started)); | 
 | 536 | 		for (i = 0; i < nrealwriters; i++) { | 
 | 537 | 			if (!writer_durations) | 
 | 538 | 				break; | 
 | 539 | 			if (!writer_n_durations) | 
 | 540 | 				continue; | 
 | 541 | 			wdpp = writer_durations[i]; | 
 | 542 | 			if (!wdpp) | 
 | 543 | 				continue; | 
 | 544 | 			for (j = 0; j <= writer_n_durations[i]; j++) { | 
 | 545 | 				wdp = &wdpp[j]; | 
 | 546 | 				pr_alert("%s%s %4d writer-duration: %5d %llu\n", | 
 | 547 | 					perf_type, PERF_FLAG, | 
 | 548 | 					i, j, *wdp); | 
 | 549 | 				if (j % 100 == 0) | 
 | 550 | 					schedule_timeout_uninterruptible(1); | 
 | 551 | 			} | 
 | 552 | 			kfree(writer_durations[i]); | 
 | 553 | 		} | 
 | 554 | 		kfree(writer_tasks); | 
 | 555 | 		kfree(writer_durations); | 
 | 556 | 		kfree(writer_n_durations); | 
 | 557 | 	} | 
 | 558 |  | 
 | 559 | 	/* Do torture-type-specific cleanup operations.  */ | 
 | 560 | 	if (cur_ops->cleanup != NULL) | 
 | 561 | 		cur_ops->cleanup(); | 
 | 562 |  | 
 | 563 | 	torture_cleanup_end(); | 
 | 564 | } | 
 | 565 |  | 
 | 566 | /* | 
 | 567 |  * Return the number if non-negative.  If -1, the number of CPUs. | 
 | 568 |  * If less than -1, that much less than the number of CPUs, but | 
 | 569 |  * at least one. | 
 | 570 |  */ | 
 | 571 | static int compute_real(int n) | 
 | 572 | { | 
 | 573 | 	int nr; | 
 | 574 |  | 
 | 575 | 	if (n >= 0) { | 
 | 576 | 		nr = n; | 
 | 577 | 	} else { | 
 | 578 | 		nr = num_online_cpus() + 1 + n; | 
 | 579 | 		if (nr <= 0) | 
 | 580 | 			nr = 1; | 
 | 581 | 	} | 
 | 582 | 	return nr; | 
 | 583 | } | 
 | 584 |  | 
 | 585 | /* | 
 | 586 |  * RCU perf shutdown kthread.  Just waits to be awakened, then shuts | 
 | 587 |  * down system. | 
 | 588 |  */ | 
 | 589 | static int | 
 | 590 | rcu_perf_shutdown(void *arg) | 
 | 591 | { | 
 | 592 | 	do { | 
 | 593 | 		wait_event(shutdown_wq, | 
 | 594 | 			   atomic_read(&n_rcu_perf_writer_finished) >= | 
 | 595 | 			   nrealwriters); | 
 | 596 | 	} while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters); | 
 | 597 | 	smp_mb(); /* Wake before output. */ | 
 | 598 | 	rcu_perf_cleanup(); | 
 | 599 | 	kernel_power_off(); | 
 | 600 | 	return -EINVAL; | 
 | 601 | } | 
 | 602 |  | 
 | 603 | static int __init | 
 | 604 | rcu_perf_init(void) | 
 | 605 | { | 
 | 606 | 	long i; | 
 | 607 | 	int firsterr = 0; | 
 | 608 | 	static struct rcu_perf_ops *perf_ops[] = { | 
 | 609 | 		&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, | 
 | 610 | 	}; | 
 | 611 |  | 
 | 612 | 	if (!torture_init_begin(perf_type, verbose)) | 
 | 613 | 		return -EBUSY; | 
 | 614 |  | 
 | 615 | 	/* Process args and tell the world that the perf'er is on the job. */ | 
 | 616 | 	for (i = 0; i < ARRAY_SIZE(perf_ops); i++) { | 
 | 617 | 		cur_ops = perf_ops[i]; | 
 | 618 | 		if (strcmp(perf_type, cur_ops->name) == 0) | 
 | 619 | 			break; | 
 | 620 | 	} | 
 | 621 | 	if (i == ARRAY_SIZE(perf_ops)) { | 
 | 622 | 		pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type); | 
 | 623 | 		pr_alert("rcu-perf types:"); | 
 | 624 | 		for (i = 0; i < ARRAY_SIZE(perf_ops); i++) | 
 | 625 | 			pr_cont(" %s", perf_ops[i]->name); | 
 | 626 | 		pr_cont("\n"); | 
 | 627 | 		WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST)); | 
 | 628 | 		firsterr = -EINVAL; | 
 | 629 | 		cur_ops = NULL; | 
 | 630 | 		goto unwind; | 
 | 631 | 	} | 
 | 632 | 	if (cur_ops->init) | 
 | 633 | 		cur_ops->init(); | 
 | 634 |  | 
 | 635 | 	nrealwriters = compute_real(nwriters); | 
 | 636 | 	nrealreaders = compute_real(nreaders); | 
 | 637 | 	atomic_set(&n_rcu_perf_reader_started, 0); | 
 | 638 | 	atomic_set(&n_rcu_perf_writer_started, 0); | 
 | 639 | 	atomic_set(&n_rcu_perf_writer_finished, 0); | 
 | 640 | 	rcu_perf_print_module_parms(cur_ops, "Start of test"); | 
 | 641 |  | 
 | 642 | 	/* Start up the kthreads. */ | 
 | 643 |  | 
 | 644 | 	if (shutdown) { | 
 | 645 | 		init_waitqueue_head(&shutdown_wq); | 
 | 646 | 		firsterr = torture_create_kthread(rcu_perf_shutdown, NULL, | 
 | 647 | 						  shutdown_task); | 
 | 648 | 		if (firsterr) | 
 | 649 | 			goto unwind; | 
 | 650 | 		schedule_timeout_uninterruptible(1); | 
 | 651 | 	} | 
 | 652 | 	reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), | 
 | 653 | 			       GFP_KERNEL); | 
 | 654 | 	if (reader_tasks == NULL) { | 
 | 655 | 		VERBOSE_PERFOUT_ERRSTRING("out of memory"); | 
 | 656 | 		firsterr = -ENOMEM; | 
 | 657 | 		goto unwind; | 
 | 658 | 	} | 
 | 659 | 	for (i = 0; i < nrealreaders; i++) { | 
 | 660 | 		firsterr = torture_create_kthread(rcu_perf_reader, (void *)i, | 
 | 661 | 						  reader_tasks[i]); | 
 | 662 | 		if (firsterr) | 
 | 663 | 			goto unwind; | 
 | 664 | 	} | 
 | 665 | 	while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders) | 
 | 666 | 		schedule_timeout_uninterruptible(1); | 
 | 667 | 	writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]), | 
 | 668 | 			       GFP_KERNEL); | 
 | 669 | 	writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), | 
 | 670 | 				   GFP_KERNEL); | 
 | 671 | 	writer_n_durations = | 
 | 672 | 		kcalloc(nrealwriters, sizeof(*writer_n_durations), | 
 | 673 | 			GFP_KERNEL); | 
 | 674 | 	if (!writer_tasks || !writer_durations || !writer_n_durations) { | 
 | 675 | 		VERBOSE_PERFOUT_ERRSTRING("out of memory"); | 
 | 676 | 		firsterr = -ENOMEM; | 
 | 677 | 		goto unwind; | 
 | 678 | 	} | 
 | 679 | 	for (i = 0; i < nrealwriters; i++) { | 
 | 680 | 		writer_durations[i] = | 
 | 681 | 			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), | 
 | 682 | 				GFP_KERNEL); | 
 | 683 | 		if (!writer_durations[i]) { | 
 | 684 | 			firsterr = -ENOMEM; | 
 | 685 | 			goto unwind; | 
 | 686 | 		} | 
 | 687 | 		firsterr = torture_create_kthread(rcu_perf_writer, (void *)i, | 
 | 688 | 						  writer_tasks[i]); | 
 | 689 | 		if (firsterr) | 
 | 690 | 			goto unwind; | 
 | 691 | 	} | 
 | 692 | 	torture_init_end(); | 
 | 693 | 	return 0; | 
 | 694 |  | 
 | 695 | unwind: | 
 | 696 | 	torture_init_end(); | 
 | 697 | 	rcu_perf_cleanup(); | 
 | 698 | 	return firsterr; | 
 | 699 | } | 
 | 700 |  | 
 | 701 | module_init(rcu_perf_init); | 
 | 702 | module_exit(rcu_perf_cleanup); |