blob: 2c97d2552c5bd082fcb2fc3432b27a7847011f86 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Copyright © 2006-2009, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
18 */
19
20#include <linux/iova.h>
21#include <linux/module.h>
22#include <linux/slab.h>
23#include <linux/smp.h>
24#include <linux/bitops.h>
25#include <linux/cpu.h>
26
27static bool iova_rcache_insert(struct iova_domain *iovad,
28 unsigned long pfn,
29 unsigned long size);
30static unsigned long iova_rcache_get(struct iova_domain *iovad,
31 unsigned long size,
32 unsigned long limit_pfn);
33static void init_iova_rcaches(struct iova_domain *iovad);
34static void free_iova_rcaches(struct iova_domain *iovad);
35static void fq_destroy_all_entries(struct iova_domain *iovad);
36static void fq_flush_timeout(unsigned long data);
37
38void
39init_iova_domain(struct iova_domain *iovad, unsigned long granule,
40 unsigned long start_pfn, unsigned long pfn_32bit)
41{
42 /*
43 * IOVA granularity will normally be equal to the smallest
44 * supported IOMMU page size; both *must* be capable of
45 * representing individual CPU pages exactly.
46 */
47 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
48
49 spin_lock_init(&iovad->iova_rbtree_lock);
50 iovad->rbroot = RB_ROOT;
51 iovad->cached32_node = NULL;
52 iovad->granule = granule;
53 iovad->start_pfn = start_pfn;
54 iovad->dma_32bit_pfn = pfn_32bit + 1;
55 iovad->flush_cb = NULL;
56 iovad->fq = NULL;
57 init_iova_rcaches(iovad);
58}
59EXPORT_SYMBOL_GPL(init_iova_domain);
60
61bool has_iova_flush_queue(struct iova_domain *iovad)
62{
63 return !!iovad->fq;
64}
65
66static void free_iova_flush_queue(struct iova_domain *iovad)
67{
68 if (!has_iova_flush_queue(iovad))
69 return;
70
71 if (timer_pending(&iovad->fq_timer))
72 del_timer(&iovad->fq_timer);
73
74 fq_destroy_all_entries(iovad);
75
76 free_percpu(iovad->fq);
77
78 iovad->fq = NULL;
79 iovad->flush_cb = NULL;
80 iovad->entry_dtor = NULL;
81}
82
83int init_iova_flush_queue(struct iova_domain *iovad,
84 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
85{
86 struct iova_fq __percpu *queue;
87 int cpu;
88
89 atomic64_set(&iovad->fq_flush_start_cnt, 0);
90 atomic64_set(&iovad->fq_flush_finish_cnt, 0);
91
92 queue = alloc_percpu(struct iova_fq);
93 if (!queue)
94 return -ENOMEM;
95
96 iovad->flush_cb = flush_cb;
97 iovad->entry_dtor = entry_dtor;
98
99 for_each_possible_cpu(cpu) {
100 struct iova_fq *fq;
101
102 fq = per_cpu_ptr(queue, cpu);
103 fq->head = 0;
104 fq->tail = 0;
105
106 spin_lock_init(&fq->lock);
107 }
108
109 smp_wmb();
110
111 iovad->fq = queue;
112
113 setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad);
114 atomic_set(&iovad->fq_timer_on, 0);
115
116 return 0;
117}
118EXPORT_SYMBOL_GPL(init_iova_flush_queue);
119
120static struct rb_node *
121__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
122{
123 if ((*limit_pfn > iovad->dma_32bit_pfn) ||
124 (iovad->cached32_node == NULL))
125 return rb_last(&iovad->rbroot);
126 else {
127 struct rb_node *prev_node = rb_prev(iovad->cached32_node);
128 struct iova *curr_iova =
129 rb_entry(iovad->cached32_node, struct iova, node);
130 *limit_pfn = curr_iova->pfn_lo;
131 return prev_node;
132 }
133}
134
135static void
136__cached_rbnode_insert_update(struct iova_domain *iovad,
137 unsigned long limit_pfn, struct iova *new)
138{
139 if (limit_pfn != iovad->dma_32bit_pfn)
140 return;
141 iovad->cached32_node = &new->node;
142}
143
144static void
145__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
146{
147 struct iova *cached_iova;
148 struct rb_node *curr;
149
150 if (!iovad->cached32_node)
151 return;
152 curr = iovad->cached32_node;
153 cached_iova = rb_entry(curr, struct iova, node);
154
155 if (free->pfn_lo >= cached_iova->pfn_lo) {
156 struct rb_node *node = rb_next(&free->node);
157 struct iova *iova = rb_entry(node, struct iova, node);
158
159 /* only cache if it's below 32bit pfn */
160 if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
161 iovad->cached32_node = node;
162 else
163 iovad->cached32_node = NULL;
164 }
165}
166
167/* Insert the iova into domain rbtree by holding writer lock */
168static void
169iova_insert_rbtree(struct rb_root *root, struct iova *iova,
170 struct rb_node *start)
171{
172 struct rb_node **new, *parent = NULL;
173
174 new = (start) ? &start : &(root->rb_node);
175 /* Figure out where to put new node */
176 while (*new) {
177 struct iova *this = rb_entry(*new, struct iova, node);
178
179 parent = *new;
180
181 if (iova->pfn_lo < this->pfn_lo)
182 new = &((*new)->rb_left);
183 else if (iova->pfn_lo > this->pfn_lo)
184 new = &((*new)->rb_right);
185 else {
186 WARN_ON(1); /* this should not happen */
187 return;
188 }
189 }
190 /* Add new node and rebalance tree. */
191 rb_link_node(&iova->node, parent, new);
192 rb_insert_color(&iova->node, root);
193}
194
195/*
196 * Computes the padding size required, to make the start address
197 * naturally aligned on the power-of-two order of its size
198 */
199static unsigned int
200iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
201{
202 return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
203}
204
205static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
206 unsigned long size, unsigned long limit_pfn,
207 struct iova *new, bool size_aligned)
208{
209 struct rb_node *prev, *curr = NULL;
210 unsigned long flags;
211 unsigned long saved_pfn;
212 unsigned int pad_size = 0;
213
214 /* Walk the tree backwards */
215 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
216 saved_pfn = limit_pfn;
217 curr = __get_cached_rbnode(iovad, &limit_pfn);
218 prev = curr;
219 while (curr) {
220 struct iova *curr_iova = rb_entry(curr, struct iova, node);
221
222 if (limit_pfn <= curr_iova->pfn_lo) {
223 goto move_left;
224 } else if (limit_pfn > curr_iova->pfn_hi) {
225 if (size_aligned)
226 pad_size = iova_get_pad_size(size, limit_pfn);
227 if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
228 break; /* found a free slot */
229 }
230 limit_pfn = curr_iova->pfn_lo;
231move_left:
232 prev = curr;
233 curr = rb_prev(curr);
234 }
235
236 if (!curr) {
237 if (size_aligned)
238 pad_size = iova_get_pad_size(size, limit_pfn);
239 if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
240 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
241 return -ENOMEM;
242 }
243 }
244
245 /* pfn_lo will point to size aligned address if size_aligned is set */
246 new->pfn_lo = limit_pfn - (size + pad_size);
247 new->pfn_hi = new->pfn_lo + size - 1;
248
249 /* If we have 'prev', it's a valid place to start the insertion. */
250 iova_insert_rbtree(&iovad->rbroot, new, prev);
251 __cached_rbnode_insert_update(iovad, saved_pfn, new);
252
253 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
254
255
256 return 0;
257}
258
259static struct kmem_cache *iova_cache;
260static unsigned int iova_cache_users;
261static DEFINE_MUTEX(iova_cache_mutex);
262
263struct iova *alloc_iova_mem(void)
264{
265 return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
266}
267EXPORT_SYMBOL(alloc_iova_mem);
268
269void free_iova_mem(struct iova *iova)
270{
271 kmem_cache_free(iova_cache, iova);
272}
273EXPORT_SYMBOL(free_iova_mem);
274
275int iova_cache_get(void)
276{
277 mutex_lock(&iova_cache_mutex);
278 if (!iova_cache_users) {
279 iova_cache = kmem_cache_create(
280 "iommu_iova", sizeof(struct iova), 0,
281 SLAB_HWCACHE_ALIGN, NULL);
282 if (!iova_cache) {
283 mutex_unlock(&iova_cache_mutex);
284 printk(KERN_ERR "Couldn't create iova cache\n");
285 return -ENOMEM;
286 }
287 }
288
289 iova_cache_users++;
290 mutex_unlock(&iova_cache_mutex);
291
292 return 0;
293}
294EXPORT_SYMBOL_GPL(iova_cache_get);
295
296void iova_cache_put(void)
297{
298 mutex_lock(&iova_cache_mutex);
299 if (WARN_ON(!iova_cache_users)) {
300 mutex_unlock(&iova_cache_mutex);
301 return;
302 }
303 iova_cache_users--;
304 if (!iova_cache_users)
305 kmem_cache_destroy(iova_cache);
306 mutex_unlock(&iova_cache_mutex);
307}
308EXPORT_SYMBOL_GPL(iova_cache_put);
309
310/**
311 * alloc_iova - allocates an iova
312 * @iovad: - iova domain in question
313 * @size: - size of page frames to allocate
314 * @limit_pfn: - max limit address
315 * @size_aligned: - set if size_aligned address range is required
316 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
317 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
318 * flag is set then the allocated address iova->pfn_lo will be naturally
319 * aligned on roundup_power_of_two(size).
320 */
321struct iova *
322alloc_iova(struct iova_domain *iovad, unsigned long size,
323 unsigned long limit_pfn,
324 bool size_aligned)
325{
326 struct iova *new_iova;
327 int ret;
328
329 new_iova = alloc_iova_mem();
330 if (!new_iova)
331 return NULL;
332
333 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
334 new_iova, size_aligned);
335
336 if (ret) {
337 free_iova_mem(new_iova);
338 return NULL;
339 }
340
341 return new_iova;
342}
343EXPORT_SYMBOL_GPL(alloc_iova);
344
345static struct iova *
346private_find_iova(struct iova_domain *iovad, unsigned long pfn)
347{
348 struct rb_node *node = iovad->rbroot.rb_node;
349
350 assert_spin_locked(&iovad->iova_rbtree_lock);
351
352 while (node) {
353 struct iova *iova = rb_entry(node, struct iova, node);
354
355 /* If pfn falls within iova's range, return iova */
356 if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
357 return iova;
358 }
359
360 if (pfn < iova->pfn_lo)
361 node = node->rb_left;
362 else if (pfn > iova->pfn_lo)
363 node = node->rb_right;
364 }
365
366 return NULL;
367}
368
369static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
370{
371 assert_spin_locked(&iovad->iova_rbtree_lock);
372 __cached_rbnode_delete_update(iovad, iova);
373 rb_erase(&iova->node, &iovad->rbroot);
374 free_iova_mem(iova);
375}
376
377/**
378 * find_iova - finds an iova for a given pfn
379 * @iovad: - iova domain in question.
380 * @pfn: - page frame number
381 * This function finds and returns an iova belonging to the
382 * given doamin which matches the given pfn.
383 */
384struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
385{
386 unsigned long flags;
387 struct iova *iova;
388
389 /* Take the lock so that no other thread is manipulating the rbtree */
390 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
391 iova = private_find_iova(iovad, pfn);
392 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
393 return iova;
394}
395EXPORT_SYMBOL_GPL(find_iova);
396
397/**
398 * __free_iova - frees the given iova
399 * @iovad: iova domain in question.
400 * @iova: iova in question.
401 * Frees the given iova belonging to the giving domain
402 */
403void
404__free_iova(struct iova_domain *iovad, struct iova *iova)
405{
406 unsigned long flags;
407
408 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
409 private_free_iova(iovad, iova);
410 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
411}
412EXPORT_SYMBOL_GPL(__free_iova);
413
414/**
415 * free_iova - finds and frees the iova for a given pfn
416 * @iovad: - iova domain in question.
417 * @pfn: - pfn that is allocated previously
418 * This functions finds an iova for a given pfn and then
419 * frees the iova from that domain.
420 */
421void
422free_iova(struct iova_domain *iovad, unsigned long pfn)
423{
424 struct iova *iova = find_iova(iovad, pfn);
425
426 if (iova)
427 __free_iova(iovad, iova);
428
429}
430EXPORT_SYMBOL_GPL(free_iova);
431
432/**
433 * alloc_iova_fast - allocates an iova from rcache
434 * @iovad: - iova domain in question
435 * @size: - size of page frames to allocate
436 * @limit_pfn: - max limit address
437 * This function tries to satisfy an iova allocation from the rcache,
438 * and falls back to regular allocation on failure.
439*/
440unsigned long
441alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
442 unsigned long limit_pfn)
443{
444 bool flushed_rcache = false;
445 unsigned long iova_pfn;
446 struct iova *new_iova;
447
448 iova_pfn = iova_rcache_get(iovad, size, limit_pfn);
449 if (iova_pfn)
450 return iova_pfn;
451
452retry:
453 new_iova = alloc_iova(iovad, size, limit_pfn, true);
454 if (!new_iova) {
455 unsigned int cpu;
456
457 if (flushed_rcache)
458 return 0;
459
460 /* Try replenishing IOVAs by flushing rcache. */
461 flushed_rcache = true;
462 for_each_online_cpu(cpu)
463 free_cpu_cached_iovas(cpu, iovad);
464 goto retry;
465 }
466
467 return new_iova->pfn_lo;
468}
469EXPORT_SYMBOL_GPL(alloc_iova_fast);
470
471/**
472 * free_iova_fast - free iova pfn range into rcache
473 * @iovad: - iova domain in question.
474 * @pfn: - pfn that is allocated previously
475 * @size: - # of pages in range
476 * This functions frees an iova range by trying to put it into the rcache,
477 * falling back to regular iova deallocation via free_iova() if this fails.
478 */
479void
480free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
481{
482 if (iova_rcache_insert(iovad, pfn, size))
483 return;
484
485 free_iova(iovad, pfn);
486}
487EXPORT_SYMBOL_GPL(free_iova_fast);
488
489#define fq_ring_for_each(i, fq) \
490 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
491
492static inline bool fq_full(struct iova_fq *fq)
493{
494 assert_spin_locked(&fq->lock);
495 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
496}
497
498static inline unsigned fq_ring_add(struct iova_fq *fq)
499{
500 unsigned idx = fq->tail;
501
502 assert_spin_locked(&fq->lock);
503
504 fq->tail = (idx + 1) % IOVA_FQ_SIZE;
505
506 return idx;
507}
508
509static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
510{
511 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
512 unsigned idx;
513
514 assert_spin_locked(&fq->lock);
515
516 fq_ring_for_each(idx, fq) {
517
518 if (fq->entries[idx].counter >= counter)
519 break;
520
521 if (iovad->entry_dtor)
522 iovad->entry_dtor(fq->entries[idx].data);
523
524 free_iova_fast(iovad,
525 fq->entries[idx].iova_pfn,
526 fq->entries[idx].pages);
527
528 fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
529 }
530}
531
532static void iova_domain_flush(struct iova_domain *iovad)
533{
534 atomic64_inc(&iovad->fq_flush_start_cnt);
535 iovad->flush_cb(iovad);
536 atomic64_inc(&iovad->fq_flush_finish_cnt);
537}
538
539static void fq_destroy_all_entries(struct iova_domain *iovad)
540{
541 int cpu;
542
543 /*
544 * This code runs when the iova_domain is being detroyed, so don't
545 * bother to free iovas, just call the entry_dtor on all remaining
546 * entries.
547 */
548 if (!iovad->entry_dtor)
549 return;
550
551 for_each_possible_cpu(cpu) {
552 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
553 int idx;
554
555 fq_ring_for_each(idx, fq)
556 iovad->entry_dtor(fq->entries[idx].data);
557 }
558}
559
560static void fq_flush_timeout(unsigned long data)
561{
562 struct iova_domain *iovad = (struct iova_domain *)data;
563 int cpu;
564
565 atomic_set(&iovad->fq_timer_on, 0);
566 iova_domain_flush(iovad);
567
568 for_each_possible_cpu(cpu) {
569 unsigned long flags;
570 struct iova_fq *fq;
571
572 fq = per_cpu_ptr(iovad->fq, cpu);
573 spin_lock_irqsave(&fq->lock, flags);
574 fq_ring_free(iovad, fq);
575 spin_unlock_irqrestore(&fq->lock, flags);
576 }
577}
578
579void queue_iova(struct iova_domain *iovad,
580 unsigned long pfn, unsigned long pages,
581 unsigned long data)
582{
583 struct iova_fq *fq = get_cpu_ptr(iovad->fq);
584 unsigned long flags;
585 unsigned idx;
586
587 spin_lock_irqsave(&fq->lock, flags);
588
589 /*
590 * First remove all entries from the flush queue that have already been
591 * flushed out on another CPU. This makes the fq_full() check below less
592 * likely to be true.
593 */
594 fq_ring_free(iovad, fq);
595
596 if (fq_full(fq)) {
597 iova_domain_flush(iovad);
598 fq_ring_free(iovad, fq);
599 }
600
601 idx = fq_ring_add(fq);
602
603 fq->entries[idx].iova_pfn = pfn;
604 fq->entries[idx].pages = pages;
605 fq->entries[idx].data = data;
606 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt);
607
608 spin_unlock_irqrestore(&fq->lock, flags);
609
610 /* Avoid false sharing as much as possible. */
611 if (!atomic_read(&iovad->fq_timer_on) &&
612 !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1))
613 mod_timer(&iovad->fq_timer,
614 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
615
616 put_cpu_ptr(iovad->fq);
617}
618EXPORT_SYMBOL_GPL(queue_iova);
619
620/**
621 * put_iova_domain - destroys the iova doamin
622 * @iovad: - iova domain in question.
623 * All the iova's in that domain are destroyed.
624 */
625void put_iova_domain(struct iova_domain *iovad)
626{
627 struct rb_node *node;
628 unsigned long flags;
629
630 free_iova_flush_queue(iovad);
631 free_iova_rcaches(iovad);
632 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
633 node = rb_first(&iovad->rbroot);
634 while (node) {
635 struct iova *iova = rb_entry(node, struct iova, node);
636
637 rb_erase(node, &iovad->rbroot);
638 free_iova_mem(iova);
639 node = rb_first(&iovad->rbroot);
640 }
641 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
642}
643EXPORT_SYMBOL_GPL(put_iova_domain);
644
645static int
646__is_range_overlap(struct rb_node *node,
647 unsigned long pfn_lo, unsigned long pfn_hi)
648{
649 struct iova *iova = rb_entry(node, struct iova, node);
650
651 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
652 return 1;
653 return 0;
654}
655
656static inline struct iova *
657alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
658{
659 struct iova *iova;
660
661 iova = alloc_iova_mem();
662 if (iova) {
663 iova->pfn_lo = pfn_lo;
664 iova->pfn_hi = pfn_hi;
665 }
666
667 return iova;
668}
669
670static struct iova *
671__insert_new_range(struct iova_domain *iovad,
672 unsigned long pfn_lo, unsigned long pfn_hi)
673{
674 struct iova *iova;
675
676 iova = alloc_and_init_iova(pfn_lo, pfn_hi);
677 if (iova)
678 iova_insert_rbtree(&iovad->rbroot, iova, NULL);
679
680 return iova;
681}
682
683static void
684__adjust_overlap_range(struct iova *iova,
685 unsigned long *pfn_lo, unsigned long *pfn_hi)
686{
687 if (*pfn_lo < iova->pfn_lo)
688 iova->pfn_lo = *pfn_lo;
689 if (*pfn_hi > iova->pfn_hi)
690 *pfn_lo = iova->pfn_hi + 1;
691}
692
693/**
694 * reserve_iova - reserves an iova in the given range
695 * @iovad: - iova domain pointer
696 * @pfn_lo: - lower page frame address
697 * @pfn_hi:- higher pfn adderss
698 * This function allocates reserves the address range from pfn_lo to pfn_hi so
699 * that this address is not dished out as part of alloc_iova.
700 */
701struct iova *
702reserve_iova(struct iova_domain *iovad,
703 unsigned long pfn_lo, unsigned long pfn_hi)
704{
705 struct rb_node *node;
706 unsigned long flags;
707 struct iova *iova;
708 unsigned int overlap = 0;
709
710 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
711 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
712 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
713 iova = rb_entry(node, struct iova, node);
714 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
715 if ((pfn_lo >= iova->pfn_lo) &&
716 (pfn_hi <= iova->pfn_hi))
717 goto finish;
718 overlap = 1;
719
720 } else if (overlap)
721 break;
722 }
723
724 /* We are here either because this is the first reserver node
725 * or need to insert remaining non overlap addr range
726 */
727 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
728finish:
729
730 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
731 return iova;
732}
733EXPORT_SYMBOL_GPL(reserve_iova);
734
735/**
736 * copy_reserved_iova - copies the reserved between domains
737 * @from: - source doamin from where to copy
738 * @to: - destination domin where to copy
739 * This function copies reserved iova's from one doamin to
740 * other.
741 */
742void
743copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
744{
745 unsigned long flags;
746 struct rb_node *node;
747
748 spin_lock_irqsave(&from->iova_rbtree_lock, flags);
749 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
750 struct iova *iova = rb_entry(node, struct iova, node);
751 struct iova *new_iova;
752
753 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
754 if (!new_iova)
755 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
756 iova->pfn_lo, iova->pfn_lo);
757 }
758 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
759}
760EXPORT_SYMBOL_GPL(copy_reserved_iova);
761
762struct iova *
763split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
764 unsigned long pfn_lo, unsigned long pfn_hi)
765{
766 unsigned long flags;
767 struct iova *prev = NULL, *next = NULL;
768
769 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
770 if (iova->pfn_lo < pfn_lo) {
771 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
772 if (prev == NULL)
773 goto error;
774 }
775 if (iova->pfn_hi > pfn_hi) {
776 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
777 if (next == NULL)
778 goto error;
779 }
780
781 __cached_rbnode_delete_update(iovad, iova);
782 rb_erase(&iova->node, &iovad->rbroot);
783
784 if (prev) {
785 iova_insert_rbtree(&iovad->rbroot, prev, NULL);
786 iova->pfn_lo = pfn_lo;
787 }
788 if (next) {
789 iova_insert_rbtree(&iovad->rbroot, next, NULL);
790 iova->pfn_hi = pfn_hi;
791 }
792 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
793
794 return iova;
795
796error:
797 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
798 if (prev)
799 free_iova_mem(prev);
800 return NULL;
801}
802
803/*
804 * Magazine caches for IOVA ranges. For an introduction to magazines,
805 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
806 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
807 * For simplicity, we use a static magazine size and don't implement the
808 * dynamic size tuning described in the paper.
809 */
810
811#define IOVA_MAG_SIZE 128
812
813struct iova_magazine {
814 unsigned long size;
815 unsigned long pfns[IOVA_MAG_SIZE];
816};
817
818struct iova_cpu_rcache {
819 spinlock_t lock;
820 struct iova_magazine *loaded;
821 struct iova_magazine *prev;
822};
823
824static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
825{
826 return kzalloc(sizeof(struct iova_magazine), flags);
827}
828
829static void iova_magazine_free(struct iova_magazine *mag)
830{
831 kfree(mag);
832}
833
834static void
835iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
836{
837 unsigned long flags;
838 int i;
839
840 if (!mag)
841 return;
842
843 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
844
845 for (i = 0 ; i < mag->size; ++i) {
846 struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
847
848 if (WARN_ON(!iova))
849 continue;
850
851 private_free_iova(iovad, iova);
852 }
853
854 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
855
856 mag->size = 0;
857}
858
859static bool iova_magazine_full(struct iova_magazine *mag)
860{
861 return (mag && mag->size == IOVA_MAG_SIZE);
862}
863
864static bool iova_magazine_empty(struct iova_magazine *mag)
865{
866 return (!mag || mag->size == 0);
867}
868
869static unsigned long iova_magazine_pop(struct iova_magazine *mag,
870 unsigned long limit_pfn)
871{
872 BUG_ON(iova_magazine_empty(mag));
873
874 if (mag->pfns[mag->size - 1] >= limit_pfn)
875 return 0;
876
877 return mag->pfns[--mag->size];
878}
879
880static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
881{
882 BUG_ON(iova_magazine_full(mag));
883
884 mag->pfns[mag->size++] = pfn;
885}
886
887static void init_iova_rcaches(struct iova_domain *iovad)
888{
889 struct iova_cpu_rcache *cpu_rcache;
890 struct iova_rcache *rcache;
891 unsigned int cpu;
892 int i;
893
894 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
895 rcache = &iovad->rcaches[i];
896 spin_lock_init(&rcache->lock);
897 rcache->depot_size = 0;
898 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
899 if (WARN_ON(!rcache->cpu_rcaches))
900 continue;
901 for_each_possible_cpu(cpu) {
902 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
903 spin_lock_init(&cpu_rcache->lock);
904 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
905 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
906 }
907 }
908}
909
910/*
911 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
912 * return true on success. Can fail if rcache is full and we can't free
913 * space, and free_iova() (our only caller) will then return the IOVA
914 * range to the rbtree instead.
915 */
916static bool __iova_rcache_insert(struct iova_domain *iovad,
917 struct iova_rcache *rcache,
918 unsigned long iova_pfn)
919{
920 struct iova_magazine *mag_to_free = NULL;
921 struct iova_cpu_rcache *cpu_rcache;
922 bool can_insert = false;
923 unsigned long flags;
924
925 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
926 spin_lock_irqsave(&cpu_rcache->lock, flags);
927
928 if (!iova_magazine_full(cpu_rcache->loaded)) {
929 can_insert = true;
930 } else if (!iova_magazine_full(cpu_rcache->prev)) {
931 swap(cpu_rcache->prev, cpu_rcache->loaded);
932 can_insert = true;
933 } else {
934 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
935
936 if (new_mag) {
937 spin_lock(&rcache->lock);
938 if (rcache->depot_size < MAX_GLOBAL_MAGS) {
939 rcache->depot[rcache->depot_size++] =
940 cpu_rcache->loaded;
941 } else {
942 mag_to_free = cpu_rcache->loaded;
943 }
944 spin_unlock(&rcache->lock);
945
946 cpu_rcache->loaded = new_mag;
947 can_insert = true;
948 }
949 }
950
951 if (can_insert)
952 iova_magazine_push(cpu_rcache->loaded, iova_pfn);
953
954 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
955
956 if (mag_to_free) {
957 iova_magazine_free_pfns(mag_to_free, iovad);
958 iova_magazine_free(mag_to_free);
959 }
960
961 return can_insert;
962}
963
964static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
965 unsigned long size)
966{
967 unsigned int log_size = order_base_2(size);
968
969 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
970 return false;
971
972 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
973}
974
975/*
976 * Caller wants to allocate a new IOVA range from 'rcache'. If we can
977 * satisfy the request, return a matching non-NULL range and remove
978 * it from the 'rcache'.
979 */
980static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
981 unsigned long limit_pfn)
982{
983 struct iova_cpu_rcache *cpu_rcache;
984 unsigned long iova_pfn = 0;
985 bool has_pfn = false;
986 unsigned long flags;
987
988 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
989 spin_lock_irqsave(&cpu_rcache->lock, flags);
990
991 if (!iova_magazine_empty(cpu_rcache->loaded)) {
992 has_pfn = true;
993 } else if (!iova_magazine_empty(cpu_rcache->prev)) {
994 swap(cpu_rcache->prev, cpu_rcache->loaded);
995 has_pfn = true;
996 } else {
997 spin_lock(&rcache->lock);
998 if (rcache->depot_size > 0) {
999 iova_magazine_free(cpu_rcache->loaded);
1000 cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
1001 has_pfn = true;
1002 }
1003 spin_unlock(&rcache->lock);
1004 }
1005
1006 if (has_pfn)
1007 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
1008
1009 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1010
1011 return iova_pfn;
1012}
1013
1014/*
1015 * Try to satisfy IOVA allocation range from rcache. Fail if requested
1016 * size is too big or the DMA limit we are given isn't satisfied by the
1017 * top element in the magazine.
1018 */
1019static unsigned long iova_rcache_get(struct iova_domain *iovad,
1020 unsigned long size,
1021 unsigned long limit_pfn)
1022{
1023 unsigned int log_size = order_base_2(size);
1024
1025 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1026 return 0;
1027
1028 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn);
1029}
1030
1031/*
1032 * Free a cpu's rcache.
1033 */
1034static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
1035 struct iova_rcache *rcache)
1036{
1037 struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1038 unsigned long flags;
1039
1040 spin_lock_irqsave(&cpu_rcache->lock, flags);
1041
1042 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1043 iova_magazine_free(cpu_rcache->loaded);
1044
1045 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1046 iova_magazine_free(cpu_rcache->prev);
1047
1048 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1049}
1050
1051/*
1052 * free rcache data structures.
1053 */
1054static void free_iova_rcaches(struct iova_domain *iovad)
1055{
1056 struct iova_rcache *rcache;
1057 unsigned long flags;
1058 unsigned int cpu;
1059 int i, j;
1060
1061 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1062 rcache = &iovad->rcaches[i];
1063 for_each_possible_cpu(cpu)
1064 free_cpu_iova_rcache(cpu, iovad, rcache);
1065 spin_lock_irqsave(&rcache->lock, flags);
1066 free_percpu(rcache->cpu_rcaches);
1067 for (j = 0; j < rcache->depot_size; ++j) {
1068 iova_magazine_free_pfns(rcache->depot[j], iovad);
1069 iova_magazine_free(rcache->depot[j]);
1070 }
1071 spin_unlock_irqrestore(&rcache->lock, flags);
1072 }
1073}
1074
1075/*
1076 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1077 */
1078void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1079{
1080 struct iova_cpu_rcache *cpu_rcache;
1081 struct iova_rcache *rcache;
1082 unsigned long flags;
1083 int i;
1084
1085 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1086 rcache = &iovad->rcaches[i];
1087 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1088 spin_lock_irqsave(&cpu_rcache->lock, flags);
1089 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1090 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1091 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1092 }
1093}
1094
1095MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1096MODULE_LICENSE("GPL");