blob: 6c06b3039faed01d992965f73c95bc218880e286 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * padata.c - generic interface to process data streams in parallel
4 *
5 * See Documentation/padata.txt for an api documentation.
6 *
7 * Copyright (C) 2008, 2009 secunet Security Networks AG
8 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 */
23
24#include <linux/export.h>
25#include <linux/cpumask.h>
26#include <linux/err.h>
27#include <linux/cpu.h>
28#include <linux/padata.h>
29#include <linux/mutex.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/sysfs.h>
33#include <linux/rcupdate.h>
34#include <linux/module.h>
35
36#define MAX_OBJ_NUM 1000
37
38static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
39{
40 int cpu, target_cpu;
41
42 target_cpu = cpumask_first(pd->cpumask.pcpu);
43 for (cpu = 0; cpu < cpu_index; cpu++)
44 target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
45
46 return target_cpu;
47}
48
49static int padata_cpu_hash(struct parallel_data *pd)
50{
51 unsigned int seq_nr;
52 int cpu_index;
53
54 /*
55 * Hash the sequence numbers to the cpus by taking
56 * seq_nr mod. number of cpus in use.
57 */
58
59 seq_nr = atomic_inc_return(&pd->seq_nr);
60 cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
61
62 return padata_index_to_cpu(pd, cpu_index);
63}
64
65static void padata_parallel_worker(struct work_struct *parallel_work)
66{
67 struct padata_parallel_queue *pqueue;
68 LIST_HEAD(local_list);
69
70 local_bh_disable();
71 pqueue = container_of(parallel_work,
72 struct padata_parallel_queue, work);
73
74 spin_lock(&pqueue->parallel.lock);
75 list_replace_init(&pqueue->parallel.list, &local_list);
76 spin_unlock(&pqueue->parallel.lock);
77
78 while (!list_empty(&local_list)) {
79 struct padata_priv *padata;
80
81 padata = list_entry(local_list.next,
82 struct padata_priv, list);
83
84 list_del_init(&padata->list);
85
86 padata->parallel(padata);
87 }
88
89 local_bh_enable();
90}
91
92/**
93 * padata_do_parallel - padata parallelization function
94 *
95 * @pinst: padata instance
96 * @padata: object to be parallelized
97 * @cb_cpu: cpu the serialization callback function will run on,
98 * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
99 *
100 * The parallelization callback function will run with BHs off.
101 * Note: Every object which is parallelized by padata_do_parallel
102 * must be seen by padata_do_serial.
103 */
104int padata_do_parallel(struct padata_instance *pinst,
105 struct padata_priv *padata, int cb_cpu)
106{
107 int target_cpu, err;
108 struct padata_parallel_queue *queue;
109 struct parallel_data *pd;
110
111 rcu_read_lock_bh();
112
113 pd = rcu_dereference_bh(pinst->pd);
114
115 err = -EINVAL;
116 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
117 goto out;
118
119 if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
120 goto out;
121
122 err = -EBUSY;
123 if ((pinst->flags & PADATA_RESET))
124 goto out;
125
126 if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
127 goto out;
128
129 err = 0;
130 atomic_inc(&pd->refcnt);
131 padata->pd = pd;
132 padata->cb_cpu = cb_cpu;
133
134 target_cpu = padata_cpu_hash(pd);
135 padata->cpu = target_cpu;
136 queue = per_cpu_ptr(pd->pqueue, target_cpu);
137
138 spin_lock(&queue->parallel.lock);
139 list_add_tail(&padata->list, &queue->parallel.list);
140 spin_unlock(&queue->parallel.lock);
141
142 queue_work_on(target_cpu, pinst->wq, &queue->work);
143
144out:
145 rcu_read_unlock_bh();
146
147 return err;
148}
149EXPORT_SYMBOL(padata_do_parallel);
150
151/*
152 * padata_get_next - Get the next object that needs serialization.
153 *
154 * Return values are:
155 *
156 * A pointer to the control struct of the next object that needs
157 * serialization, if present in one of the percpu reorder queues.
158 *
159 * -EINPROGRESS, if the next object that needs serialization will
160 * be parallel processed by another cpu and is not yet present in
161 * the cpu's reorder queue.
162 *
163 * -ENODATA, if this cpu has to do the parallel processing for
164 * the next object.
165 */
166static struct padata_priv *padata_get_next(struct parallel_data *pd)
167{
168 int cpu, num_cpus;
169 unsigned int next_nr, next_index;
170 struct padata_parallel_queue *next_queue;
171 struct padata_priv *padata;
172 struct padata_list *reorder;
173
174 num_cpus = cpumask_weight(pd->cpumask.pcpu);
175
176 /*
177 * Calculate the percpu reorder queue and the sequence
178 * number of the next object.
179 */
180 next_nr = pd->processed;
181 next_index = next_nr % num_cpus;
182 cpu = padata_index_to_cpu(pd, next_index);
183 next_queue = per_cpu_ptr(pd->pqueue, cpu);
184
185 reorder = &next_queue->reorder;
186
187 spin_lock(&reorder->lock);
188 if (!list_empty(&reorder->list)) {
189 padata = list_entry(reorder->list.next,
190 struct padata_priv, list);
191
192 list_del_init(&padata->list);
193 atomic_dec(&pd->reorder_objects);
194
195 pd->processed++;
196
197 spin_unlock(&reorder->lock);
198 goto out;
199 }
200 spin_unlock(&reorder->lock);
201
202 if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
203 padata = ERR_PTR(-ENODATA);
204 goto out;
205 }
206
207 padata = ERR_PTR(-EINPROGRESS);
208out:
209 return padata;
210}
211
212static void padata_reorder(struct parallel_data *pd)
213{
214 int cb_cpu;
215 struct padata_priv *padata;
216 struct padata_serial_queue *squeue;
217 struct padata_instance *pinst = pd->pinst;
218
219 /*
220 * We need to ensure that only one cpu can work on dequeueing of
221 * the reorder queue the time. Calculating in which percpu reorder
222 * queue the next object will arrive takes some time. A spinlock
223 * would be highly contended. Also it is not clear in which order
224 * the objects arrive to the reorder queues. So a cpu could wait to
225 * get the lock just to notice that there is nothing to do at the
226 * moment. Therefore we use a trylock and let the holder of the lock
227 * care for all the objects enqueued during the holdtime of the lock.
228 */
229 if (!spin_trylock_bh(&pd->lock))
230 return;
231
232 while (1) {
233 padata = padata_get_next(pd);
234
235 /*
236 * If the next object that needs serialization is parallel
237 * processed by another cpu and is still on it's way to the
238 * cpu's reorder queue, nothing to do for now.
239 */
240 if (PTR_ERR(padata) == -EINPROGRESS)
241 break;
242
243 /*
244 * This cpu has to do the parallel processing of the next
245 * object. It's waiting in the cpu's parallelization queue,
246 * so exit immediately.
247 */
248 if (PTR_ERR(padata) == -ENODATA) {
249 del_timer(&pd->timer);
250 spin_unlock_bh(&pd->lock);
251 return;
252 }
253
254 cb_cpu = padata->cb_cpu;
255 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
256
257 spin_lock(&squeue->serial.lock);
258 list_add_tail(&padata->list, &squeue->serial.list);
259 spin_unlock(&squeue->serial.lock);
260
261 queue_work_on(cb_cpu, pinst->wq, &squeue->work);
262 }
263
264 spin_unlock_bh(&pd->lock);
265
266 /*
267 * The next object that needs serialization might have arrived to
268 * the reorder queues in the meantime, we will be called again
269 * from the timer function if no one else cares for it.
270 *
271 * Ensure reorder_objects is read after pd->lock is dropped so we see
272 * an increment from another task in padata_do_serial. Pairs with
273 * smp_mb__after_atomic in padata_do_serial.
274 */
275 smp_mb();
276 if (atomic_read(&pd->reorder_objects)
277 && !(pinst->flags & PADATA_RESET))
278 mod_timer(&pd->timer, jiffies + HZ);
279 else
280 del_timer(&pd->timer);
281
282 return;
283}
284
285static void invoke_padata_reorder(struct work_struct *work)
286{
287 struct padata_parallel_queue *pqueue;
288 struct parallel_data *pd;
289
290 local_bh_disable();
291 pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
292 pd = pqueue->pd;
293 padata_reorder(pd);
294 local_bh_enable();
295}
296
297static void padata_reorder_timer(struct timer_list *t)
298{
299 struct parallel_data *pd = from_timer(pd, t, timer);
300 unsigned int weight;
301 int target_cpu, cpu;
302
303 cpu = get_cpu();
304
305 /* We don't lock pd here to not interfere with parallel processing
306 * padata_reorder() calls on other CPUs. We just need any CPU out of
307 * the cpumask.pcpu set. It would be nice if it's the right one but
308 * it doesn't matter if we're off to the next one by using an outdated
309 * pd->processed value.
310 */
311 weight = cpumask_weight(pd->cpumask.pcpu);
312 target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
313
314 /* ensure to call the reorder callback on the correct CPU */
315 if (cpu != target_cpu) {
316 struct padata_parallel_queue *pqueue;
317 struct padata_instance *pinst;
318
319 /* The timer function is serialized wrt itself -- no locking
320 * needed.
321 */
322 pinst = pd->pinst;
323 pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
324 queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
325 } else {
326 padata_reorder(pd);
327 }
328
329 put_cpu();
330}
331
332static void padata_serial_worker(struct work_struct *serial_work)
333{
334 struct padata_serial_queue *squeue;
335 struct parallel_data *pd;
336 LIST_HEAD(local_list);
337
338 local_bh_disable();
339 squeue = container_of(serial_work, struct padata_serial_queue, work);
340 pd = squeue->pd;
341
342 spin_lock(&squeue->serial.lock);
343 list_replace_init(&squeue->serial.list, &local_list);
344 spin_unlock(&squeue->serial.lock);
345
346 while (!list_empty(&local_list)) {
347 struct padata_priv *padata;
348
349 padata = list_entry(local_list.next,
350 struct padata_priv, list);
351
352 list_del_init(&padata->list);
353
354 padata->serial(padata);
355 atomic_dec(&pd->refcnt);
356 }
357 local_bh_enable();
358}
359
360/**
361 * padata_do_serial - padata serialization function
362 *
363 * @padata: object to be serialized.
364 *
365 * padata_do_serial must be called for every parallelized object.
366 * The serialization callback function will run with BHs off.
367 */
368void padata_do_serial(struct padata_priv *padata)
369{
370 int cpu;
371 struct padata_parallel_queue *pqueue;
372 struct parallel_data *pd;
373 int reorder_via_wq = 0;
374
375 pd = padata->pd;
376
377 cpu = get_cpu();
378
379 /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
380 * was called on -- or, at least, enqueue the padata object into the
381 * correct per-cpu queue.
382 */
383 if (cpu != padata->cpu) {
384 reorder_via_wq = 1;
385 cpu = padata->cpu;
386 }
387
388 pqueue = per_cpu_ptr(pd->pqueue, cpu);
389
390 spin_lock(&pqueue->reorder.lock);
391 atomic_inc(&pd->reorder_objects);
392 list_add_tail(&padata->list, &pqueue->reorder.list);
393 spin_unlock(&pqueue->reorder.lock);
394
395 /*
396 * Ensure the atomic_inc of reorder_objects above is ordered correctly
397 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
398 * in padata_reorder.
399 */
400 smp_mb__after_atomic();
401
402 put_cpu();
403
404 /* If we're running on the wrong CPU, call padata_reorder() via a
405 * kernel worker.
406 */
407 if (reorder_via_wq)
408 queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
409 else
410 padata_reorder(pd);
411}
412EXPORT_SYMBOL(padata_do_serial);
413
414static int padata_setup_cpumasks(struct parallel_data *pd,
415 const struct cpumask *pcpumask,
416 const struct cpumask *cbcpumask)
417{
418 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
419 return -ENOMEM;
420
421 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
422 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
423 free_cpumask_var(pd->cpumask.pcpu);
424 return -ENOMEM;
425 }
426
427 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
428 return 0;
429}
430
431static void __padata_list_init(struct padata_list *pd_list)
432{
433 INIT_LIST_HEAD(&pd_list->list);
434 spin_lock_init(&pd_list->lock);
435}
436
437/* Initialize all percpu queues used by serial workers */
438static void padata_init_squeues(struct parallel_data *pd)
439{
440 int cpu;
441 struct padata_serial_queue *squeue;
442
443 for_each_cpu(cpu, pd->cpumask.cbcpu) {
444 squeue = per_cpu_ptr(pd->squeue, cpu);
445 squeue->pd = pd;
446 __padata_list_init(&squeue->serial);
447 INIT_WORK(&squeue->work, padata_serial_worker);
448 }
449}
450
451/* Initialize all percpu queues used by parallel workers */
452static void padata_init_pqueues(struct parallel_data *pd)
453{
454 int cpu_index, cpu;
455 struct padata_parallel_queue *pqueue;
456
457 cpu_index = 0;
458 for_each_possible_cpu(cpu) {
459 pqueue = per_cpu_ptr(pd->pqueue, cpu);
460
461 if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
462 pqueue->cpu_index = -1;
463 continue;
464 }
465
466 pqueue->pd = pd;
467 pqueue->cpu_index = cpu_index;
468 cpu_index++;
469
470 __padata_list_init(&pqueue->reorder);
471 __padata_list_init(&pqueue->parallel);
472 INIT_WORK(&pqueue->work, padata_parallel_worker);
473 INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
474 atomic_set(&pqueue->num_obj, 0);
475 }
476}
477
478/* Allocate and initialize the internal cpumask dependend resources. */
479static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
480 const struct cpumask *pcpumask,
481 const struct cpumask *cbcpumask)
482{
483 struct parallel_data *pd;
484
485 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
486 if (!pd)
487 goto err;
488
489 pd->pqueue = alloc_percpu(struct padata_parallel_queue);
490 if (!pd->pqueue)
491 goto err_free_pd;
492
493 pd->squeue = alloc_percpu(struct padata_serial_queue);
494 if (!pd->squeue)
495 goto err_free_pqueue;
496 if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
497 goto err_free_squeue;
498
499 padata_init_pqueues(pd);
500 padata_init_squeues(pd);
501 timer_setup(&pd->timer, padata_reorder_timer, 0);
502 atomic_set(&pd->seq_nr, -1);
503 atomic_set(&pd->reorder_objects, 0);
504 atomic_set(&pd->refcnt, 0);
505 pd->pinst = pinst;
506 spin_lock_init(&pd->lock);
507
508 return pd;
509
510err_free_squeue:
511 free_percpu(pd->squeue);
512err_free_pqueue:
513 free_percpu(pd->pqueue);
514err_free_pd:
515 kfree(pd);
516err:
517 return NULL;
518}
519
520static void padata_free_pd(struct parallel_data *pd)
521{
522 free_cpumask_var(pd->cpumask.pcpu);
523 free_cpumask_var(pd->cpumask.cbcpu);
524 free_percpu(pd->pqueue);
525 free_percpu(pd->squeue);
526 kfree(pd);
527}
528
529/* Flush all objects out of the padata queues. */
530static void padata_flush_queues(struct parallel_data *pd)
531{
532 int cpu;
533 struct padata_parallel_queue *pqueue;
534 struct padata_serial_queue *squeue;
535
536 for_each_cpu(cpu, pd->cpumask.pcpu) {
537 pqueue = per_cpu_ptr(pd->pqueue, cpu);
538 flush_work(&pqueue->work);
539 }
540
541 del_timer_sync(&pd->timer);
542
543 if (atomic_read(&pd->reorder_objects))
544 padata_reorder(pd);
545
546 for_each_cpu(cpu, pd->cpumask.cbcpu) {
547 squeue = per_cpu_ptr(pd->squeue, cpu);
548 flush_work(&squeue->work);
549 }
550
551 BUG_ON(atomic_read(&pd->refcnt) != 0);
552}
553
554static void __padata_start(struct padata_instance *pinst)
555{
556 pinst->flags |= PADATA_INIT;
557}
558
559static void __padata_stop(struct padata_instance *pinst)
560{
561 if (!(pinst->flags & PADATA_INIT))
562 return;
563
564 pinst->flags &= ~PADATA_INIT;
565
566 synchronize_rcu();
567
568 get_online_cpus();
569 padata_flush_queues(pinst->pd);
570 put_online_cpus();
571}
572
573/* Replace the internal control structure with a new one. */
574static void padata_replace(struct padata_instance *pinst,
575 struct parallel_data *pd_new)
576{
577 struct parallel_data *pd_old = pinst->pd;
578 int notification_mask = 0;
579
580 pinst->flags |= PADATA_RESET;
581
582 rcu_assign_pointer(pinst->pd, pd_new);
583
584 synchronize_rcu();
585
586 if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
587 notification_mask |= PADATA_CPU_PARALLEL;
588 if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
589 notification_mask |= PADATA_CPU_SERIAL;
590
591 padata_flush_queues(pd_old);
592 padata_free_pd(pd_old);
593
594 if (notification_mask)
595 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
596 notification_mask,
597 &pd_new->cpumask);
598
599 pinst->flags &= ~PADATA_RESET;
600}
601
602/**
603 * padata_register_cpumask_notifier - Registers a notifier that will be called
604 * if either pcpu or cbcpu or both cpumasks change.
605 *
606 * @pinst: A poineter to padata instance
607 * @nblock: A pointer to notifier block.
608 */
609int padata_register_cpumask_notifier(struct padata_instance *pinst,
610 struct notifier_block *nblock)
611{
612 return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
613 nblock);
614}
615EXPORT_SYMBOL(padata_register_cpumask_notifier);
616
617/**
618 * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
619 * registered earlier using padata_register_cpumask_notifier
620 *
621 * @pinst: A pointer to data instance.
622 * @nlock: A pointer to notifier block.
623 */
624int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
625 struct notifier_block *nblock)
626{
627 return blocking_notifier_chain_unregister(
628 &pinst->cpumask_change_notifier,
629 nblock);
630}
631EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
632
633
634/* If cpumask contains no active cpu, we mark the instance as invalid. */
635static bool padata_validate_cpumask(struct padata_instance *pinst,
636 const struct cpumask *cpumask)
637{
638 if (!cpumask_intersects(cpumask, cpu_online_mask)) {
639 pinst->flags |= PADATA_INVALID;
640 return false;
641 }
642
643 pinst->flags &= ~PADATA_INVALID;
644 return true;
645}
646
647static int __padata_set_cpumasks(struct padata_instance *pinst,
648 cpumask_var_t pcpumask,
649 cpumask_var_t cbcpumask)
650{
651 int valid;
652 struct parallel_data *pd;
653
654 valid = padata_validate_cpumask(pinst, pcpumask);
655 if (!valid) {
656 __padata_stop(pinst);
657 goto out_replace;
658 }
659
660 valid = padata_validate_cpumask(pinst, cbcpumask);
661 if (!valid)
662 __padata_stop(pinst);
663
664out_replace:
665 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
666 if (!pd)
667 return -ENOMEM;
668
669 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
670 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
671
672 padata_replace(pinst, pd);
673
674 if (valid)
675 __padata_start(pinst);
676
677 return 0;
678}
679
680/**
681 * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
682 * equivalent to @cpumask.
683 *
684 * @pinst: padata instance
685 * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
686 * to parallel and serial cpumasks respectively.
687 * @cpumask: the cpumask to use
688 */
689int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
690 cpumask_var_t cpumask)
691{
692 struct cpumask *serial_mask, *parallel_mask;
693 int err = -EINVAL;
694
695 mutex_lock(&pinst->lock);
696 get_online_cpus();
697
698 switch (cpumask_type) {
699 case PADATA_CPU_PARALLEL:
700 serial_mask = pinst->cpumask.cbcpu;
701 parallel_mask = cpumask;
702 break;
703 case PADATA_CPU_SERIAL:
704 parallel_mask = pinst->cpumask.pcpu;
705 serial_mask = cpumask;
706 break;
707 default:
708 goto out;
709 }
710
711 err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
712
713out:
714 put_online_cpus();
715 mutex_unlock(&pinst->lock);
716
717 return err;
718}
719EXPORT_SYMBOL(padata_set_cpumask);
720
721/**
722 * padata_start - start the parallel processing
723 *
724 * @pinst: padata instance to start
725 */
726int padata_start(struct padata_instance *pinst)
727{
728 int err = 0;
729
730 mutex_lock(&pinst->lock);
731
732 if (pinst->flags & PADATA_INVALID)
733 err = -EINVAL;
734
735 __padata_start(pinst);
736
737 mutex_unlock(&pinst->lock);
738
739 return err;
740}
741EXPORT_SYMBOL(padata_start);
742
743/**
744 * padata_stop - stop the parallel processing
745 *
746 * @pinst: padata instance to stop
747 */
748void padata_stop(struct padata_instance *pinst)
749{
750 mutex_lock(&pinst->lock);
751 __padata_stop(pinst);
752 mutex_unlock(&pinst->lock);
753}
754EXPORT_SYMBOL(padata_stop);
755
756#ifdef CONFIG_HOTPLUG_CPU
757
758static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
759{
760 struct parallel_data *pd;
761
762 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
763 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
764 pinst->cpumask.cbcpu);
765 if (!pd)
766 return -ENOMEM;
767
768 padata_replace(pinst, pd);
769
770 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
771 padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
772 __padata_start(pinst);
773 }
774
775 return 0;
776}
777
778static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
779{
780 struct parallel_data *pd = NULL;
781
782 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
783
784 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
785 !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
786 __padata_stop(pinst);
787
788 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
789 pinst->cpumask.cbcpu);
790 if (!pd)
791 return -ENOMEM;
792
793 padata_replace(pinst, pd);
794
795 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
796 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
797 }
798
799 return 0;
800}
801
802 /**
803 * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
804 * padata cpumasks.
805 *
806 * @pinst: padata instance
807 * @cpu: cpu to remove
808 * @mask: bitmask specifying from which cpumask @cpu should be removed
809 * The @mask may be any combination of the following flags:
810 * PADATA_CPU_SERIAL - serial cpumask
811 * PADATA_CPU_PARALLEL - parallel cpumask
812 */
813int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
814{
815 int err;
816
817 if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
818 return -EINVAL;
819
820 mutex_lock(&pinst->lock);
821
822 get_online_cpus();
823 if (mask & PADATA_CPU_SERIAL)
824 cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
825 if (mask & PADATA_CPU_PARALLEL)
826 cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
827
828 err = __padata_remove_cpu(pinst, cpu);
829 put_online_cpus();
830
831 mutex_unlock(&pinst->lock);
832
833 return err;
834}
835EXPORT_SYMBOL(padata_remove_cpu);
836
837static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
838{
839 return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
840 cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
841}
842
843static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
844{
845 struct padata_instance *pinst;
846 int ret;
847
848 pinst = hlist_entry_safe(node, struct padata_instance, node);
849 if (!pinst_has_cpu(pinst, cpu))
850 return 0;
851
852 mutex_lock(&pinst->lock);
853 ret = __padata_add_cpu(pinst, cpu);
854 mutex_unlock(&pinst->lock);
855 return ret;
856}
857
858static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
859{
860 struct padata_instance *pinst;
861 int ret;
862
863 pinst = hlist_entry_safe(node, struct padata_instance, node);
864 if (!pinst_has_cpu(pinst, cpu))
865 return 0;
866
867 mutex_lock(&pinst->lock);
868 ret = __padata_remove_cpu(pinst, cpu);
869 mutex_unlock(&pinst->lock);
870 return ret;
871}
872
873static enum cpuhp_state hp_online;
874#endif
875
876static void __padata_free(struct padata_instance *pinst)
877{
878#ifdef CONFIG_HOTPLUG_CPU
879 cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
880#endif
881
882 padata_stop(pinst);
883 padata_free_pd(pinst->pd);
884 free_cpumask_var(pinst->cpumask.pcpu);
885 free_cpumask_var(pinst->cpumask.cbcpu);
886 kfree(pinst);
887}
888
889#define kobj2pinst(_kobj) \
890 container_of(_kobj, struct padata_instance, kobj)
891#define attr2pentry(_attr) \
892 container_of(_attr, struct padata_sysfs_entry, attr)
893
894static void padata_sysfs_release(struct kobject *kobj)
895{
896 struct padata_instance *pinst = kobj2pinst(kobj);
897 __padata_free(pinst);
898}
899
900struct padata_sysfs_entry {
901 struct attribute attr;
902 ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
903 ssize_t (*store)(struct padata_instance *, struct attribute *,
904 const char *, size_t);
905};
906
907static ssize_t show_cpumask(struct padata_instance *pinst,
908 struct attribute *attr, char *buf)
909{
910 struct cpumask *cpumask;
911 ssize_t len;
912
913 mutex_lock(&pinst->lock);
914 if (!strcmp(attr->name, "serial_cpumask"))
915 cpumask = pinst->cpumask.cbcpu;
916 else
917 cpumask = pinst->cpumask.pcpu;
918
919 len = snprintf(buf, PAGE_SIZE, "%*pb\n",
920 nr_cpu_ids, cpumask_bits(cpumask));
921 mutex_unlock(&pinst->lock);
922 return len < PAGE_SIZE ? len : -EINVAL;
923}
924
925static ssize_t store_cpumask(struct padata_instance *pinst,
926 struct attribute *attr,
927 const char *buf, size_t count)
928{
929 cpumask_var_t new_cpumask;
930 ssize_t ret;
931 int mask_type;
932
933 if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
934 return -ENOMEM;
935
936 ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
937 nr_cpumask_bits);
938 if (ret < 0)
939 goto out;
940
941 mask_type = !strcmp(attr->name, "serial_cpumask") ?
942 PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
943 ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
944 if (!ret)
945 ret = count;
946
947out:
948 free_cpumask_var(new_cpumask);
949 return ret;
950}
951
952#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
953 static struct padata_sysfs_entry _name##_attr = \
954 __ATTR(_name, 0644, _show_name, _store_name)
955#define PADATA_ATTR_RO(_name, _show_name) \
956 static struct padata_sysfs_entry _name##_attr = \
957 __ATTR(_name, 0400, _show_name, NULL)
958
959PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
960PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
961
962/*
963 * Padata sysfs provides the following objects:
964 * serial_cpumask [RW] - cpumask for serial workers
965 * parallel_cpumask [RW] - cpumask for parallel workers
966 */
967static struct attribute *padata_default_attrs[] = {
968 &serial_cpumask_attr.attr,
969 &parallel_cpumask_attr.attr,
970 NULL,
971};
972
973static ssize_t padata_sysfs_show(struct kobject *kobj,
974 struct attribute *attr, char *buf)
975{
976 struct padata_instance *pinst;
977 struct padata_sysfs_entry *pentry;
978 ssize_t ret = -EIO;
979
980 pinst = kobj2pinst(kobj);
981 pentry = attr2pentry(attr);
982 if (pentry->show)
983 ret = pentry->show(pinst, attr, buf);
984
985 return ret;
986}
987
988static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
989 const char *buf, size_t count)
990{
991 struct padata_instance *pinst;
992 struct padata_sysfs_entry *pentry;
993 ssize_t ret = -EIO;
994
995 pinst = kobj2pinst(kobj);
996 pentry = attr2pentry(attr);
997 if (pentry->show)
998 ret = pentry->store(pinst, attr, buf, count);
999
1000 return ret;
1001}
1002
1003static const struct sysfs_ops padata_sysfs_ops = {
1004 .show = padata_sysfs_show,
1005 .store = padata_sysfs_store,
1006};
1007
1008static struct kobj_type padata_attr_type = {
1009 .sysfs_ops = &padata_sysfs_ops,
1010 .default_attrs = padata_default_attrs,
1011 .release = padata_sysfs_release,
1012};
1013
1014/**
1015 * padata_alloc - allocate and initialize a padata instance and specify
1016 * cpumasks for serial and parallel workers.
1017 *
1018 * @wq: workqueue to use for the allocated padata instance
1019 * @pcpumask: cpumask that will be used for padata parallelization
1020 * @cbcpumask: cpumask that will be used for padata serialization
1021 *
1022 * Must be called from a cpus_read_lock() protected region
1023 */
1024static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
1025 const struct cpumask *pcpumask,
1026 const struct cpumask *cbcpumask)
1027{
1028 struct padata_instance *pinst;
1029 struct parallel_data *pd = NULL;
1030
1031 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
1032 if (!pinst)
1033 goto err;
1034
1035 if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
1036 goto err_free_inst;
1037 if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
1038 free_cpumask_var(pinst->cpumask.pcpu);
1039 goto err_free_inst;
1040 }
1041 if (!padata_validate_cpumask(pinst, pcpumask) ||
1042 !padata_validate_cpumask(pinst, cbcpumask))
1043 goto err_free_masks;
1044
1045 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
1046 if (!pd)
1047 goto err_free_masks;
1048
1049 rcu_assign_pointer(pinst->pd, pd);
1050
1051 pinst->wq = wq;
1052
1053 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
1054 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
1055
1056 pinst->flags = 0;
1057
1058 BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
1059 kobject_init(&pinst->kobj, &padata_attr_type);
1060 mutex_init(&pinst->lock);
1061
1062#ifdef CONFIG_HOTPLUG_CPU
1063 cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
1064#endif
1065 return pinst;
1066
1067err_free_masks:
1068 free_cpumask_var(pinst->cpumask.pcpu);
1069 free_cpumask_var(pinst->cpumask.cbcpu);
1070err_free_inst:
1071 kfree(pinst);
1072err:
1073 return NULL;
1074}
1075
1076/**
1077 * padata_alloc_possible - Allocate and initialize padata instance.
1078 * Use the cpu_possible_mask for serial and
1079 * parallel workers.
1080 *
1081 * @wq: workqueue to use for the allocated padata instance
1082 *
1083 * Must be called from a cpus_read_lock() protected region
1084 */
1085struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
1086{
1087 lockdep_assert_cpus_held();
1088 return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
1089}
1090EXPORT_SYMBOL(padata_alloc_possible);
1091
1092/**
1093 * padata_free - free a padata instance
1094 *
1095 * @padata_inst: padata instance to free
1096 */
1097void padata_free(struct padata_instance *pinst)
1098{
1099 kobject_put(&pinst->kobj);
1100}
1101EXPORT_SYMBOL(padata_free);
1102
1103#ifdef CONFIG_HOTPLUG_CPU
1104
1105static __init int padata_driver_init(void)
1106{
1107 int ret;
1108
1109 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1110 padata_cpu_online,
1111 padata_cpu_prep_down);
1112 if (ret < 0)
1113 return ret;
1114 hp_online = ret;
1115 return 0;
1116}
1117module_init(padata_driver_init);
1118
1119static __exit void padata_driver_exit(void)
1120{
1121 cpuhp_remove_multi_state(hp_online);
1122}
1123module_exit(padata_driver_exit);
1124#endif