blob: b9af2419006f8341563551d2f32c1226c081b249 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
19 */
20
21#define pr_fmt(fmt) "DMAR: " fmt
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
25#include <linux/debugfs.h>
26#include <linux/export.h>
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
35#include <linux/memory.h>
36#include <linux/cpu.h>
37#include <linux/timer.h>
38#include <linux/io.h>
39#include <linux/iova.h>
40#include <linux/iommu.h>
41#include <linux/intel-iommu.h>
42#include <linux/syscore_ops.h>
43#include <linux/tboot.h>
44#include <linux/dmi.h>
45#include <linux/pci-ats.h>
46#include <linux/memblock.h>
47#include <linux/dma-contiguous.h>
48#include <linux/dma-direct.h>
49#include <linux/crash_dump.h>
50#include <asm/irq_remapping.h>
51#include <asm/cacheflush.h>
52#include <asm/iommu.h>
53
54#include "irq_remapping.h"
55#include "intel-pasid.h"
56
57#define ROOT_SIZE VTD_PAGE_SIZE
58#define CONTEXT_SIZE VTD_PAGE_SIZE
59
60#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
61#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
62#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
63#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
64
65#define IOAPIC_RANGE_START (0xfee00000)
66#define IOAPIC_RANGE_END (0xfeefffff)
67#define IOVA_START_ADDR (0x1000)
68
69#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
70
71#define MAX_AGAW_WIDTH 64
72#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
73
74#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
75#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
76
77/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
78 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
79#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
80 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
81#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
82
83/* IO virtual address start page frame number */
84#define IOVA_START_PFN (1)
85
86#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
87
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
118}
119
120static inline int width_to_agaw(int width)
121{
122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
149
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
153}
154
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
178static void __init check_tylersburg_isoch(void);
179static int rwbf_quirk;
180
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
186int intel_iommu_tboot_noforce;
187
188/*
189 * 0: Present
190 * 1-11: Reserved
191 * 12-63: Context Ptr (12 - (haw-1))
192 * 64-127: Reserved
193 */
194struct root_entry {
195 u64 lo;
196 u64 hi;
197};
198#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
199
200/*
201 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
202 * if marked present.
203 */
204static phys_addr_t root_entry_lctp(struct root_entry *re)
205{
206 if (!(re->lo & 1))
207 return 0;
208
209 return re->lo & VTD_PAGE_MASK;
210}
211
212/*
213 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
214 * if marked present.
215 */
216static phys_addr_t root_entry_uctp(struct root_entry *re)
217{
218 if (!(re->hi & 1))
219 return 0;
220
221 return re->hi & VTD_PAGE_MASK;
222}
223/*
224 * low 64 bits:
225 * 0: present
226 * 1: fault processing disable
227 * 2-3: translation type
228 * 12-63: address space root
229 * high 64 bits:
230 * 0-2: address width
231 * 3-6: aval
232 * 8-23: domain id
233 */
234struct context_entry {
235 u64 lo;
236 u64 hi;
237};
238
239static inline void context_clear_pasid_enable(struct context_entry *context)
240{
241 context->lo &= ~(1ULL << 11);
242}
243
244static inline bool context_pasid_enabled(struct context_entry *context)
245{
246 return !!(context->lo & (1ULL << 11));
247}
248
249static inline void context_set_copied(struct context_entry *context)
250{
251 context->hi |= (1ull << 3);
252}
253
254static inline bool context_copied(struct context_entry *context)
255{
256 return !!(context->hi & (1ULL << 3));
257}
258
259static inline bool __context_present(struct context_entry *context)
260{
261 return (context->lo & 1);
262}
263
264static inline bool context_present(struct context_entry *context)
265{
266 return context_pasid_enabled(context) ?
267 __context_present(context) :
268 __context_present(context) && !context_copied(context);
269}
270
271static inline void context_set_present(struct context_entry *context)
272{
273 context->lo |= 1;
274}
275
276static inline void context_set_fault_enable(struct context_entry *context)
277{
278 context->lo &= (((u64)-1) << 2) | 1;
279}
280
281static inline void context_set_translation_type(struct context_entry *context,
282 unsigned long value)
283{
284 context->lo &= (((u64)-1) << 4) | 3;
285 context->lo |= (value & 3) << 2;
286}
287
288static inline void context_set_address_root(struct context_entry *context,
289 unsigned long value)
290{
291 context->lo &= ~VTD_PAGE_MASK;
292 context->lo |= value & VTD_PAGE_MASK;
293}
294
295static inline void context_set_address_width(struct context_entry *context,
296 unsigned long value)
297{
298 context->hi |= value & 7;
299}
300
301static inline void context_set_domain_id(struct context_entry *context,
302 unsigned long value)
303{
304 context->hi |= (value & ((1 << 16) - 1)) << 8;
305}
306
307static inline int context_domain_id(struct context_entry *c)
308{
309 return((c->hi >> 8) & 0xffff);
310}
311
312static inline void context_clear_entry(struct context_entry *context)
313{
314 context->lo = 0;
315 context->hi = 0;
316}
317
318/*
319 * 0: readable
320 * 1: writable
321 * 2-6: reserved
322 * 7: super page
323 * 8-10: available
324 * 11: snoop behavior
325 * 12-63: Host physcial address
326 */
327struct dma_pte {
328 u64 val;
329};
330
331static inline void dma_clear_pte(struct dma_pte *pte)
332{
333 pte->val = 0;
334}
335
336static inline u64 dma_pte_addr(struct dma_pte *pte)
337{
338#ifdef CONFIG_64BIT
339 return pte->val & VTD_PAGE_MASK;
340#else
341 /* Must have a full atomic 64-bit read */
342 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
343#endif
344}
345
346static inline bool dma_pte_present(struct dma_pte *pte)
347{
348 return (pte->val & 3) != 0;
349}
350
351static inline bool dma_pte_superpage(struct dma_pte *pte)
352{
353 return (pte->val & DMA_PTE_LARGE_PAGE);
354}
355
356static inline int first_pte_in_page(struct dma_pte *pte)
357{
358 return !((unsigned long)pte & ~VTD_PAGE_MASK);
359}
360
361/*
362 * This domain is a statically identity mapping domain.
363 * 1. This domain creats a static 1:1 mapping to all usable memory.
364 * 2. It maps to each iommu if successful.
365 * 3. Each iommu mapps to this domain if successful.
366 */
367static struct dmar_domain *si_domain;
368static int hw_pass_through = 1;
369
370/*
371 * Domain represents a virtual machine, more than one devices
372 * across iommus may be owned in one domain, e.g. kvm guest.
373 */
374#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
375
376/* si_domain contains mulitple devices */
377#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
378
379#define for_each_domain_iommu(idx, domain) \
380 for (idx = 0; idx < g_num_of_iommus; idx++) \
381 if (domain->iommu_refcnt[idx])
382
383struct dmar_rmrr_unit {
384 struct list_head list; /* list of rmrr units */
385 struct acpi_dmar_header *hdr; /* ACPI header */
386 u64 base_address; /* reserved base address*/
387 u64 end_address; /* reserved end address */
388 struct dmar_dev_scope *devices; /* target devices */
389 int devices_cnt; /* target device count */
390 struct iommu_resv_region *resv; /* reserved region handle */
391};
392
393struct dmar_atsr_unit {
394 struct list_head list; /* list of ATSR units */
395 struct acpi_dmar_header *hdr; /* ACPI header */
396 struct dmar_dev_scope *devices; /* target devices */
397 int devices_cnt; /* target device count */
398 u8 include_all:1; /* include all ports */
399};
400
401static LIST_HEAD(dmar_atsr_units);
402static LIST_HEAD(dmar_rmrr_units);
403
404#define for_each_rmrr_units(rmrr) \
405 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
406
407/* bitmap for indexing intel_iommus */
408static int g_num_of_iommus;
409
410static void domain_exit(struct dmar_domain *domain);
411static void domain_remove_dev_info(struct dmar_domain *domain);
412static void dmar_remove_one_dev_info(struct dmar_domain *domain,
413 struct device *dev);
414static void __dmar_remove_one_dev_info(struct device_domain_info *info);
415static void domain_context_clear(struct intel_iommu *iommu,
416 struct device *dev);
417static int domain_detach_iommu(struct dmar_domain *domain,
418 struct intel_iommu *iommu);
419
420#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
421int dmar_disabled = 0;
422#else
423int dmar_disabled = 1;
424#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
425
426int intel_iommu_enabled = 0;
427EXPORT_SYMBOL_GPL(intel_iommu_enabled);
428
429static int dmar_map_gfx = 1;
430static int dmar_forcedac;
431static int intel_iommu_strict;
432static int intel_iommu_superpage = 1;
433static int intel_iommu_ecs = 1;
434static int intel_iommu_pasid28;
435static int iommu_identity_mapping;
436
437#define IDENTMAP_ALL 1
438#define IDENTMAP_GFX 2
439#define IDENTMAP_AZALIA 4
440
441/* Broadwell and Skylake have broken ECS support — normal so-called "second
442 * level" translation of DMA requests-without-PASID doesn't actually happen
443 * unless you also set the NESTE bit in an extended context-entry. Which of
444 * course means that SVM doesn't work because it's trying to do nested
445 * translation of the physical addresses it finds in the process page tables,
446 * through the IOVA->phys mapping found in the "second level" page tables.
447 *
448 * The VT-d specification was retroactively changed to change the definition
449 * of the capability bits and pretend that Broadwell/Skylake never happened...
450 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
451 * for some reason it was the PASID capability bit which was redefined (from
452 * bit 28 on BDW/SKL to bit 40 in future).
453 *
454 * So our test for ECS needs to eschew those implementations which set the old
455 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
456 * Unless we are working around the 'pasid28' limitations, that is, by putting
457 * the device into passthrough mode for normal DMA and thus masking the bug.
458 */
459#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
460 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
461/* PASID support is thus enabled if ECS is enabled and *either* of the old
462 * or new capability bits are set. */
463#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
464 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
465
466int intel_iommu_gfx_mapped;
467EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
468
469#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
470static DEFINE_SPINLOCK(device_domain_lock);
471static LIST_HEAD(device_domain_list);
472
473/*
474 * Iterate over elements in device_domain_list and call the specified
475 * callback @fn against each element. This helper should only be used
476 * in the context where the device_domain_lock has already been holden.
477 */
478int for_each_device_domain(int (*fn)(struct device_domain_info *info,
479 void *data), void *data)
480{
481 int ret = 0;
482 struct device_domain_info *info;
483
484 assert_spin_locked(&device_domain_lock);
485 list_for_each_entry(info, &device_domain_list, global) {
486 ret = fn(info, data);
487 if (ret)
488 return ret;
489 }
490
491 return 0;
492}
493
494const struct iommu_ops intel_iommu_ops;
495
496static bool translation_pre_enabled(struct intel_iommu *iommu)
497{
498 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
499}
500
501static void clear_translation_pre_enabled(struct intel_iommu *iommu)
502{
503 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
504}
505
506static void init_translation_status(struct intel_iommu *iommu)
507{
508 u32 gsts;
509
510 gsts = readl(iommu->reg + DMAR_GSTS_REG);
511 if (gsts & DMA_GSTS_TES)
512 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
513}
514
515/* Convert generic 'struct iommu_domain to private struct dmar_domain */
516static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
517{
518 return container_of(dom, struct dmar_domain, domain);
519}
520
521static int __init intel_iommu_setup(char *str)
522{
523 if (!str)
524 return -EINVAL;
525 while (*str) {
526 if (!strncmp(str, "on", 2)) {
527 dmar_disabled = 0;
528 pr_info("IOMMU enabled\n");
529 } else if (!strncmp(str, "off", 3)) {
530 dmar_disabled = 1;
531 pr_info("IOMMU disabled\n");
532 } else if (!strncmp(str, "igfx_off", 8)) {
533 dmar_map_gfx = 0;
534 pr_info("Disable GFX device mapping\n");
535 } else if (!strncmp(str, "forcedac", 8)) {
536 pr_info("Forcing DAC for PCI devices\n");
537 dmar_forcedac = 1;
538 } else if (!strncmp(str, "strict", 6)) {
539 pr_info("Disable batched IOTLB flush\n");
540 intel_iommu_strict = 1;
541 } else if (!strncmp(str, "sp_off", 6)) {
542 pr_info("Disable supported super page\n");
543 intel_iommu_superpage = 0;
544 } else if (!strncmp(str, "ecs_off", 7)) {
545 printk(KERN_INFO
546 "Intel-IOMMU: disable extended context table support\n");
547 intel_iommu_ecs = 0;
548 } else if (!strncmp(str, "pasid28", 7)) {
549 printk(KERN_INFO
550 "Intel-IOMMU: enable pre-production PASID support\n");
551 intel_iommu_pasid28 = 1;
552 iommu_identity_mapping |= IDENTMAP_GFX;
553 } else if (!strncmp(str, "tboot_noforce", 13)) {
554 printk(KERN_INFO
555 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
556 intel_iommu_tboot_noforce = 1;
557 }
558
559 str += strcspn(str, ",");
560 while (*str == ',')
561 str++;
562 }
563 return 0;
564}
565__setup("intel_iommu=", intel_iommu_setup);
566
567static struct kmem_cache *iommu_domain_cache;
568static struct kmem_cache *iommu_devinfo_cache;
569
570static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
571{
572 struct dmar_domain **domains;
573 int idx = did >> 8;
574
575 domains = iommu->domains[idx];
576 if (!domains)
577 return NULL;
578
579 return domains[did & 0xff];
580}
581
582static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
583 struct dmar_domain *domain)
584{
585 struct dmar_domain **domains;
586 int idx = did >> 8;
587
588 if (!iommu->domains[idx]) {
589 size_t size = 256 * sizeof(struct dmar_domain *);
590 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
591 }
592
593 domains = iommu->domains[idx];
594 if (WARN_ON(!domains))
595 return;
596 else
597 domains[did & 0xff] = domain;
598}
599
600void *alloc_pgtable_page(int node)
601{
602 struct page *page;
603 void *vaddr = NULL;
604
605 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
606 if (page)
607 vaddr = page_address(page);
608 return vaddr;
609}
610
611void free_pgtable_page(void *vaddr)
612{
613 free_page((unsigned long)vaddr);
614}
615
616static inline void *alloc_domain_mem(void)
617{
618 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
619}
620
621static void free_domain_mem(void *vaddr)
622{
623 kmem_cache_free(iommu_domain_cache, vaddr);
624}
625
626static inline void * alloc_devinfo_mem(void)
627{
628 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
629}
630
631static inline void free_devinfo_mem(void *vaddr)
632{
633 kmem_cache_free(iommu_devinfo_cache, vaddr);
634}
635
636static inline int domain_type_is_vm(struct dmar_domain *domain)
637{
638 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
639}
640
641static inline int domain_type_is_si(struct dmar_domain *domain)
642{
643 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
644}
645
646static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
647{
648 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
649 DOMAIN_FLAG_STATIC_IDENTITY);
650}
651
652static inline int domain_pfn_supported(struct dmar_domain *domain,
653 unsigned long pfn)
654{
655 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
656
657 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
658}
659
660static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
661{
662 unsigned long sagaw;
663 int agaw = -1;
664
665 sagaw = cap_sagaw(iommu->cap);
666 for (agaw = width_to_agaw(max_gaw);
667 agaw >= 0; agaw--) {
668 if (test_bit(agaw, &sagaw))
669 break;
670 }
671
672 return agaw;
673}
674
675/*
676 * Calculate max SAGAW for each iommu.
677 */
678int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
679{
680 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
681}
682
683/*
684 * calculate agaw for each iommu.
685 * "SAGAW" may be different across iommus, use a default agaw, and
686 * get a supported less agaw for iommus that don't support the default agaw.
687 */
688int iommu_calculate_agaw(struct intel_iommu *iommu)
689{
690 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
691}
692
693/* This functionin only returns single iommu in a domain */
694struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
695{
696 int iommu_id;
697
698 /* si_domain and vm domain should not get here. */
699 BUG_ON(domain_type_is_vm_or_si(domain));
700 for_each_domain_iommu(iommu_id, domain)
701 break;
702
703 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
704 return NULL;
705
706 return g_iommus[iommu_id];
707}
708
709static void domain_update_iommu_coherency(struct dmar_domain *domain)
710{
711 struct dmar_drhd_unit *drhd;
712 struct intel_iommu *iommu;
713 bool found = false;
714 int i;
715
716 domain->iommu_coherency = 1;
717
718 for_each_domain_iommu(i, domain) {
719 found = true;
720 if (!ecap_coherent(g_iommus[i]->ecap)) {
721 domain->iommu_coherency = 0;
722 break;
723 }
724 }
725 if (found)
726 return;
727
728 /* No hardware attached; use lowest common denominator */
729 rcu_read_lock();
730 for_each_active_iommu(iommu, drhd) {
731 if (!ecap_coherent(iommu->ecap)) {
732 domain->iommu_coherency = 0;
733 break;
734 }
735 }
736 rcu_read_unlock();
737}
738
739static int domain_update_iommu_snooping(struct intel_iommu *skip)
740{
741 struct dmar_drhd_unit *drhd;
742 struct intel_iommu *iommu;
743 int ret = 1;
744
745 rcu_read_lock();
746 for_each_active_iommu(iommu, drhd) {
747 if (iommu != skip) {
748 if (!ecap_sc_support(iommu->ecap)) {
749 ret = 0;
750 break;
751 }
752 }
753 }
754 rcu_read_unlock();
755
756 return ret;
757}
758
759static int domain_update_iommu_superpage(struct intel_iommu *skip)
760{
761 struct dmar_drhd_unit *drhd;
762 struct intel_iommu *iommu;
763 int mask = 0xf;
764
765 if (!intel_iommu_superpage) {
766 return 0;
767 }
768
769 /* set iommu_superpage to the smallest common denominator */
770 rcu_read_lock();
771 for_each_active_iommu(iommu, drhd) {
772 if (iommu != skip) {
773 mask &= cap_super_page_val(iommu->cap);
774 if (!mask)
775 break;
776 }
777 }
778 rcu_read_unlock();
779
780 return fls(mask);
781}
782
783/* Some capabilities may be different across iommus */
784static void domain_update_iommu_cap(struct dmar_domain *domain)
785{
786 domain_update_iommu_coherency(domain);
787 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
788 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
789}
790
791static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
792 u8 bus, u8 devfn, int alloc)
793{
794 struct root_entry *root = &iommu->root_entry[bus];
795 struct context_entry *context;
796 u64 *entry;
797
798 entry = &root->lo;
799 if (ecs_enabled(iommu)) {
800 if (devfn >= 0x80) {
801 devfn -= 0x80;
802 entry = &root->hi;
803 }
804 devfn *= 2;
805 }
806 if (*entry & 1)
807 context = phys_to_virt(*entry & VTD_PAGE_MASK);
808 else {
809 unsigned long phy_addr;
810 if (!alloc)
811 return NULL;
812
813 context = alloc_pgtable_page(iommu->node);
814 if (!context)
815 return NULL;
816
817 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
818 phy_addr = virt_to_phys((void *)context);
819 *entry = phy_addr | 1;
820 __iommu_flush_cache(iommu, entry, sizeof(*entry));
821 }
822 return &context[devfn];
823}
824
825static int iommu_dummy(struct device *dev)
826{
827 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
828}
829
830static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
831{
832 struct dmar_drhd_unit *drhd = NULL;
833 struct intel_iommu *iommu;
834 struct device *tmp;
835 struct pci_dev *ptmp, *pdev = NULL;
836 u16 segment = 0;
837 int i;
838
839 if (iommu_dummy(dev))
840 return NULL;
841
842 if (dev_is_pci(dev)) {
843 struct pci_dev *pf_pdev;
844
845 pdev = to_pci_dev(dev);
846
847#ifdef CONFIG_X86
848 /* VMD child devices currently cannot be handled individually */
849 if (is_vmd(pdev->bus))
850 return NULL;
851#endif
852
853 /* VFs aren't listed in scope tables; we need to look up
854 * the PF instead to find the IOMMU. */
855 pf_pdev = pci_physfn(pdev);
856 dev = &pf_pdev->dev;
857 segment = pci_domain_nr(pdev->bus);
858 } else if (has_acpi_companion(dev))
859 dev = &ACPI_COMPANION(dev)->dev;
860
861 rcu_read_lock();
862 for_each_active_iommu(iommu, drhd) {
863 if (pdev && segment != drhd->segment)
864 continue;
865
866 for_each_active_dev_scope(drhd->devices,
867 drhd->devices_cnt, i, tmp) {
868 if (tmp == dev) {
869 /* For a VF use its original BDF# not that of the PF
870 * which we used for the IOMMU lookup. Strictly speaking
871 * we could do this for all PCI devices; we only need to
872 * get the BDF# from the scope table for ACPI matches. */
873 if (pdev && pdev->is_virtfn)
874 goto got_pdev;
875
876 *bus = drhd->devices[i].bus;
877 *devfn = drhd->devices[i].devfn;
878 goto out;
879 }
880
881 if (!pdev || !dev_is_pci(tmp))
882 continue;
883
884 ptmp = to_pci_dev(tmp);
885 if (ptmp->subordinate &&
886 ptmp->subordinate->number <= pdev->bus->number &&
887 ptmp->subordinate->busn_res.end >= pdev->bus->number)
888 goto got_pdev;
889 }
890
891 if (pdev && drhd->include_all) {
892 got_pdev:
893 *bus = pdev->bus->number;
894 *devfn = pdev->devfn;
895 goto out;
896 }
897 }
898 iommu = NULL;
899 out:
900 rcu_read_unlock();
901
902 return iommu;
903}
904
905static void domain_flush_cache(struct dmar_domain *domain,
906 void *addr, int size)
907{
908 if (!domain->iommu_coherency)
909 clflush_cache_range(addr, size);
910}
911
912static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
913{
914 struct context_entry *context;
915 int ret = 0;
916 unsigned long flags;
917
918 spin_lock_irqsave(&iommu->lock, flags);
919 context = iommu_context_addr(iommu, bus, devfn, 0);
920 if (context)
921 ret = context_present(context);
922 spin_unlock_irqrestore(&iommu->lock, flags);
923 return ret;
924}
925
926static void free_context_table(struct intel_iommu *iommu)
927{
928 int i;
929 unsigned long flags;
930 struct context_entry *context;
931
932 spin_lock_irqsave(&iommu->lock, flags);
933 if (!iommu->root_entry) {
934 goto out;
935 }
936 for (i = 0; i < ROOT_ENTRY_NR; i++) {
937 context = iommu_context_addr(iommu, i, 0, 0);
938 if (context)
939 free_pgtable_page(context);
940
941 if (!ecs_enabled(iommu))
942 continue;
943
944 context = iommu_context_addr(iommu, i, 0x80, 0);
945 if (context)
946 free_pgtable_page(context);
947
948 }
949 free_pgtable_page(iommu->root_entry);
950 iommu->root_entry = NULL;
951out:
952 spin_unlock_irqrestore(&iommu->lock, flags);
953}
954
955static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
956 unsigned long pfn, int *target_level)
957{
958 struct dma_pte *parent, *pte = NULL;
959 int level = agaw_to_level(domain->agaw);
960 int offset;
961
962 BUG_ON(!domain->pgd);
963
964 if (!domain_pfn_supported(domain, pfn))
965 /* Address beyond IOMMU's addressing capabilities. */
966 return NULL;
967
968 parent = domain->pgd;
969
970 while (1) {
971 void *tmp_page;
972
973 offset = pfn_level_offset(pfn, level);
974 pte = &parent[offset];
975 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
976 break;
977 if (level == *target_level)
978 break;
979
980 if (!dma_pte_present(pte)) {
981 uint64_t pteval;
982
983 tmp_page = alloc_pgtable_page(domain->nid);
984
985 if (!tmp_page)
986 return NULL;
987
988 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
989 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
990 if (cmpxchg64(&pte->val, 0ULL, pteval))
991 /* Someone else set it while we were thinking; use theirs. */
992 free_pgtable_page(tmp_page);
993 else
994 domain_flush_cache(domain, pte, sizeof(*pte));
995 }
996 if (level == 1)
997 break;
998
999 parent = phys_to_virt(dma_pte_addr(pte));
1000 level--;
1001 }
1002
1003 if (!*target_level)
1004 *target_level = level;
1005
1006 return pte;
1007}
1008
1009
1010/* return address's pte at specific level */
1011static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1012 unsigned long pfn,
1013 int level, int *large_page)
1014{
1015 struct dma_pte *parent, *pte = NULL;
1016 int total = agaw_to_level(domain->agaw);
1017 int offset;
1018
1019 parent = domain->pgd;
1020 while (level <= total) {
1021 offset = pfn_level_offset(pfn, total);
1022 pte = &parent[offset];
1023 if (level == total)
1024 return pte;
1025
1026 if (!dma_pte_present(pte)) {
1027 *large_page = total;
1028 break;
1029 }
1030
1031 if (dma_pte_superpage(pte)) {
1032 *large_page = total;
1033 return pte;
1034 }
1035
1036 parent = phys_to_virt(dma_pte_addr(pte));
1037 total--;
1038 }
1039 return NULL;
1040}
1041
1042/* clear last level pte, a tlb flush should be followed */
1043static void dma_pte_clear_range(struct dmar_domain *domain,
1044 unsigned long start_pfn,
1045 unsigned long last_pfn)
1046{
1047 unsigned int large_page = 1;
1048 struct dma_pte *first_pte, *pte;
1049
1050 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1051 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1052 BUG_ON(start_pfn > last_pfn);
1053
1054 /* we don't need lock here; nobody else touches the iova range */
1055 do {
1056 large_page = 1;
1057 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1058 if (!pte) {
1059 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1060 continue;
1061 }
1062 do {
1063 dma_clear_pte(pte);
1064 start_pfn += lvl_to_nr_pages(large_page);
1065 pte++;
1066 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1067
1068 domain_flush_cache(domain, first_pte,
1069 (void *)pte - (void *)first_pte);
1070
1071 } while (start_pfn && start_pfn <= last_pfn);
1072}
1073
1074static void dma_pte_free_level(struct dmar_domain *domain, int level,
1075 int retain_level, struct dma_pte *pte,
1076 unsigned long pfn, unsigned long start_pfn,
1077 unsigned long last_pfn)
1078{
1079 pfn = max(start_pfn, pfn);
1080 pte = &pte[pfn_level_offset(pfn, level)];
1081
1082 do {
1083 unsigned long level_pfn;
1084 struct dma_pte *level_pte;
1085
1086 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1087 goto next;
1088
1089 level_pfn = pfn & level_mask(level);
1090 level_pte = phys_to_virt(dma_pte_addr(pte));
1091
1092 if (level > 2) {
1093 dma_pte_free_level(domain, level - 1, retain_level,
1094 level_pte, level_pfn, start_pfn,
1095 last_pfn);
1096 }
1097
1098 /*
1099 * Free the page table if we're below the level we want to
1100 * retain and the range covers the entire table.
1101 */
1102 if (level < retain_level && !(start_pfn > level_pfn ||
1103 last_pfn < level_pfn + level_size(level) - 1)) {
1104 dma_clear_pte(pte);
1105 domain_flush_cache(domain, pte, sizeof(*pte));
1106 free_pgtable_page(level_pte);
1107 }
1108next:
1109 pfn += level_size(level);
1110 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1111}
1112
1113/*
1114 * clear last level (leaf) ptes and free page table pages below the
1115 * level we wish to keep intact.
1116 */
1117static void dma_pte_free_pagetable(struct dmar_domain *domain,
1118 unsigned long start_pfn,
1119 unsigned long last_pfn,
1120 int retain_level)
1121{
1122 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1123 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1124 BUG_ON(start_pfn > last_pfn);
1125
1126 dma_pte_clear_range(domain, start_pfn, last_pfn);
1127
1128 /* We don't need lock here; nobody else touches the iova range */
1129 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1130 domain->pgd, 0, start_pfn, last_pfn);
1131
1132 /* free pgd */
1133 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1134 free_pgtable_page(domain->pgd);
1135 domain->pgd = NULL;
1136 }
1137}
1138
1139/* When a page at a given level is being unlinked from its parent, we don't
1140 need to *modify* it at all. All we need to do is make a list of all the
1141 pages which can be freed just as soon as we've flushed the IOTLB and we
1142 know the hardware page-walk will no longer touch them.
1143 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1144 be freed. */
1145static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1146 int level, struct dma_pte *pte,
1147 struct page *freelist)
1148{
1149 struct page *pg;
1150
1151 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1152 pg->freelist = freelist;
1153 freelist = pg;
1154
1155 if (level == 1)
1156 return freelist;
1157
1158 pte = page_address(pg);
1159 do {
1160 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1161 freelist = dma_pte_list_pagetables(domain, level - 1,
1162 pte, freelist);
1163 pte++;
1164 } while (!first_pte_in_page(pte));
1165
1166 return freelist;
1167}
1168
1169static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1170 struct dma_pte *pte, unsigned long pfn,
1171 unsigned long start_pfn,
1172 unsigned long last_pfn,
1173 struct page *freelist)
1174{
1175 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1176
1177 pfn = max(start_pfn, pfn);
1178 pte = &pte[pfn_level_offset(pfn, level)];
1179
1180 do {
1181 unsigned long level_pfn;
1182
1183 if (!dma_pte_present(pte))
1184 goto next;
1185
1186 level_pfn = pfn & level_mask(level);
1187
1188 /* If range covers entire pagetable, free it */
1189 if (start_pfn <= level_pfn &&
1190 last_pfn >= level_pfn + level_size(level) - 1) {
1191 /* These suborbinate page tables are going away entirely. Don't
1192 bother to clear them; we're just going to *free* them. */
1193 if (level > 1 && !dma_pte_superpage(pte))
1194 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1195
1196 dma_clear_pte(pte);
1197 if (!first_pte)
1198 first_pte = pte;
1199 last_pte = pte;
1200 } else if (level > 1) {
1201 /* Recurse down into a level that isn't *entirely* obsolete */
1202 freelist = dma_pte_clear_level(domain, level - 1,
1203 phys_to_virt(dma_pte_addr(pte)),
1204 level_pfn, start_pfn, last_pfn,
1205 freelist);
1206 }
1207next:
1208 pfn += level_size(level);
1209 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1210
1211 if (first_pte)
1212 domain_flush_cache(domain, first_pte,
1213 (void *)++last_pte - (void *)first_pte);
1214
1215 return freelist;
1216}
1217
1218/* We can't just free the pages because the IOMMU may still be walking
1219 the page tables, and may have cached the intermediate levels. The
1220 pages can only be freed after the IOTLB flush has been done. */
1221static struct page *domain_unmap(struct dmar_domain *domain,
1222 unsigned long start_pfn,
1223 unsigned long last_pfn)
1224{
1225 struct page *freelist = NULL;
1226
1227 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1228 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1229 BUG_ON(start_pfn > last_pfn);
1230
1231 /* we don't need lock here; nobody else touches the iova range */
1232 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1233 domain->pgd, 0, start_pfn, last_pfn, NULL);
1234
1235 /* free pgd */
1236 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1237 struct page *pgd_page = virt_to_page(domain->pgd);
1238 pgd_page->freelist = freelist;
1239 freelist = pgd_page;
1240
1241 domain->pgd = NULL;
1242 }
1243
1244 return freelist;
1245}
1246
1247static void dma_free_pagelist(struct page *freelist)
1248{
1249 struct page *pg;
1250
1251 while ((pg = freelist)) {
1252 freelist = pg->freelist;
1253 free_pgtable_page(page_address(pg));
1254 }
1255}
1256
1257static void iova_entry_free(unsigned long data)
1258{
1259 struct page *freelist = (struct page *)data;
1260
1261 dma_free_pagelist(freelist);
1262}
1263
1264/* iommu handling */
1265static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1266{
1267 struct root_entry *root;
1268 unsigned long flags;
1269
1270 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1271 if (!root) {
1272 pr_err("Allocating root entry for %s failed\n",
1273 iommu->name);
1274 return -ENOMEM;
1275 }
1276
1277 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1278
1279 spin_lock_irqsave(&iommu->lock, flags);
1280 iommu->root_entry = root;
1281 spin_unlock_irqrestore(&iommu->lock, flags);
1282
1283 return 0;
1284}
1285
1286static void iommu_set_root_entry(struct intel_iommu *iommu)
1287{
1288 u64 addr;
1289 u32 sts;
1290 unsigned long flag;
1291
1292 addr = virt_to_phys(iommu->root_entry);
1293 if (ecs_enabled(iommu))
1294 addr |= DMA_RTADDR_RTT;
1295
1296 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1297 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1298
1299 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1300
1301 /* Make sure hardware complete it */
1302 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1303 readl, (sts & DMA_GSTS_RTPS), sts);
1304
1305 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1306}
1307
1308static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1309{
1310 u32 val;
1311 unsigned long flag;
1312
1313 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1314 return;
1315
1316 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1317 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1318
1319 /* Make sure hardware complete it */
1320 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1321 readl, (!(val & DMA_GSTS_WBFS)), val);
1322
1323 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1324}
1325
1326/* return value determine if we need a write buffer flush */
1327static void __iommu_flush_context(struct intel_iommu *iommu,
1328 u16 did, u16 source_id, u8 function_mask,
1329 u64 type)
1330{
1331 u64 val = 0;
1332 unsigned long flag;
1333
1334 switch (type) {
1335 case DMA_CCMD_GLOBAL_INVL:
1336 val = DMA_CCMD_GLOBAL_INVL;
1337 break;
1338 case DMA_CCMD_DOMAIN_INVL:
1339 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1340 break;
1341 case DMA_CCMD_DEVICE_INVL:
1342 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1343 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1344 break;
1345 default:
1346 BUG();
1347 }
1348 val |= DMA_CCMD_ICC;
1349
1350 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1351 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1352
1353 /* Make sure hardware complete it */
1354 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1355 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1356
1357 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1358}
1359
1360/* return value determine if we need a write buffer flush */
1361static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1362 u64 addr, unsigned int size_order, u64 type)
1363{
1364 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1365 u64 val = 0, val_iva = 0;
1366 unsigned long flag;
1367
1368 switch (type) {
1369 case DMA_TLB_GLOBAL_FLUSH:
1370 /* global flush doesn't need set IVA_REG */
1371 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1372 break;
1373 case DMA_TLB_DSI_FLUSH:
1374 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1375 break;
1376 case DMA_TLB_PSI_FLUSH:
1377 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1378 /* IH bit is passed in as part of address */
1379 val_iva = size_order | addr;
1380 break;
1381 default:
1382 BUG();
1383 }
1384 /* Note: set drain read/write */
1385#if 0
1386 /*
1387 * This is probably to be super secure.. Looks like we can
1388 * ignore it without any impact.
1389 */
1390 if (cap_read_drain(iommu->cap))
1391 val |= DMA_TLB_READ_DRAIN;
1392#endif
1393 if (cap_write_drain(iommu->cap))
1394 val |= DMA_TLB_WRITE_DRAIN;
1395
1396 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1397 /* Note: Only uses first TLB reg currently */
1398 if (val_iva)
1399 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1400 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1401
1402 /* Make sure hardware complete it */
1403 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1404 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1405
1406 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1407
1408 /* check IOTLB invalidation granularity */
1409 if (DMA_TLB_IAIG(val) == 0)
1410 pr_err("Flush IOTLB failed\n");
1411 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1412 pr_debug("TLB flush request %Lx, actual %Lx\n",
1413 (unsigned long long)DMA_TLB_IIRG(type),
1414 (unsigned long long)DMA_TLB_IAIG(val));
1415}
1416
1417static struct device_domain_info *
1418iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1419 u8 bus, u8 devfn)
1420{
1421 struct device_domain_info *info;
1422
1423 assert_spin_locked(&device_domain_lock);
1424
1425 if (!iommu->qi)
1426 return NULL;
1427
1428 list_for_each_entry(info, &domain->devices, link)
1429 if (info->iommu == iommu && info->bus == bus &&
1430 info->devfn == devfn) {
1431 if (info->ats_supported && info->dev)
1432 return info;
1433 break;
1434 }
1435
1436 return NULL;
1437}
1438
1439static void domain_update_iotlb(struct dmar_domain *domain)
1440{
1441 struct device_domain_info *info;
1442 bool has_iotlb_device = false;
1443
1444 assert_spin_locked(&device_domain_lock);
1445
1446 list_for_each_entry(info, &domain->devices, link) {
1447 struct pci_dev *pdev;
1448
1449 if (!info->dev || !dev_is_pci(info->dev))
1450 continue;
1451
1452 pdev = to_pci_dev(info->dev);
1453 if (pdev->ats_enabled) {
1454 has_iotlb_device = true;
1455 break;
1456 }
1457 }
1458
1459 domain->has_iotlb_device = has_iotlb_device;
1460}
1461
1462static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1463{
1464 struct pci_dev *pdev;
1465
1466 assert_spin_locked(&device_domain_lock);
1467
1468 if (!info || !dev_is_pci(info->dev))
1469 return;
1470
1471 pdev = to_pci_dev(info->dev);
1472 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1473 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1474 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1475 * reserved, which should be set to 0.
1476 */
1477 if (!ecap_dit(info->iommu->ecap))
1478 info->pfsid = 0;
1479 else {
1480 struct pci_dev *pf_pdev;
1481
1482 /* pdev will be returned if device is not a vf */
1483 pf_pdev = pci_physfn(pdev);
1484 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1485 }
1486
1487#ifdef CONFIG_INTEL_IOMMU_SVM
1488 /* The PCIe spec, in its wisdom, declares that the behaviour of
1489 the device if you enable PASID support after ATS support is
1490 undefined. So always enable PASID support on devices which
1491 have it, even if we can't yet know if we're ever going to
1492 use it. */
1493 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1494 info->pasid_enabled = 1;
1495
1496 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1497 info->pri_enabled = 1;
1498#endif
1499 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1500 info->ats_enabled = 1;
1501 domain_update_iotlb(info->domain);
1502 info->ats_qdep = pci_ats_queue_depth(pdev);
1503 }
1504}
1505
1506static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1507{
1508 struct pci_dev *pdev;
1509
1510 assert_spin_locked(&device_domain_lock);
1511
1512 if (!dev_is_pci(info->dev))
1513 return;
1514
1515 pdev = to_pci_dev(info->dev);
1516
1517 if (info->ats_enabled) {
1518 pci_disable_ats(pdev);
1519 info->ats_enabled = 0;
1520 domain_update_iotlb(info->domain);
1521 }
1522#ifdef CONFIG_INTEL_IOMMU_SVM
1523 if (info->pri_enabled) {
1524 pci_disable_pri(pdev);
1525 info->pri_enabled = 0;
1526 }
1527 if (info->pasid_enabled) {
1528 pci_disable_pasid(pdev);
1529 info->pasid_enabled = 0;
1530 }
1531#endif
1532}
1533
1534static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1535 u64 addr, unsigned mask)
1536{
1537 u16 sid, qdep;
1538 unsigned long flags;
1539 struct device_domain_info *info;
1540
1541 if (!domain->has_iotlb_device)
1542 return;
1543
1544 spin_lock_irqsave(&device_domain_lock, flags);
1545 list_for_each_entry(info, &domain->devices, link) {
1546 if (!info->ats_enabled)
1547 continue;
1548
1549 sid = info->bus << 8 | info->devfn;
1550 qdep = info->ats_qdep;
1551 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1552 qdep, addr, mask);
1553 }
1554 spin_unlock_irqrestore(&device_domain_lock, flags);
1555}
1556
1557static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1558 struct dmar_domain *domain,
1559 unsigned long pfn, unsigned int pages,
1560 int ih, int map)
1561{
1562 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1563 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1564 u16 did = domain->iommu_did[iommu->seq_id];
1565
1566 BUG_ON(pages == 0);
1567
1568 if (ih)
1569 ih = 1 << 6;
1570 /*
1571 * Fallback to domain selective flush if no PSI support or the size is
1572 * too big.
1573 * PSI requires page size to be 2 ^ x, and the base address is naturally
1574 * aligned to the size
1575 */
1576 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1577 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1578 DMA_TLB_DSI_FLUSH);
1579 else
1580 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1581 DMA_TLB_PSI_FLUSH);
1582
1583 /*
1584 * In caching mode, changes of pages from non-present to present require
1585 * flush. However, device IOTLB doesn't need to be flushed in this case.
1586 */
1587 if (!cap_caching_mode(iommu->cap) || !map)
1588 iommu_flush_dev_iotlb(domain, addr, mask);
1589}
1590
1591/* Notification for newly created mappings */
1592static inline void __mapping_notify_one(struct intel_iommu *iommu,
1593 struct dmar_domain *domain,
1594 unsigned long pfn, unsigned int pages)
1595{
1596 /* It's a non-present to present mapping. Only flush if caching mode */
1597 if (cap_caching_mode(iommu->cap))
1598 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1599 else
1600 iommu_flush_write_buffer(iommu);
1601}
1602
1603static void iommu_flush_iova(struct iova_domain *iovad)
1604{
1605 struct dmar_domain *domain;
1606 int idx;
1607
1608 domain = container_of(iovad, struct dmar_domain, iovad);
1609
1610 for_each_domain_iommu(idx, domain) {
1611 struct intel_iommu *iommu = g_iommus[idx];
1612 u16 did = domain->iommu_did[iommu->seq_id];
1613
1614 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1615
1616 if (!cap_caching_mode(iommu->cap))
1617 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1618 0, MAX_AGAW_PFN_WIDTH);
1619 }
1620}
1621
1622static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1623{
1624 u32 pmen;
1625 unsigned long flags;
1626
1627 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1628 return;
1629
1630 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1631 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1632 pmen &= ~DMA_PMEN_EPM;
1633 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1634
1635 /* wait for the protected region status bit to clear */
1636 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1637 readl, !(pmen & DMA_PMEN_PRS), pmen);
1638
1639 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1640}
1641
1642static void iommu_enable_translation(struct intel_iommu *iommu)
1643{
1644 u32 sts;
1645 unsigned long flags;
1646
1647 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1648 iommu->gcmd |= DMA_GCMD_TE;
1649 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1650
1651 /* Make sure hardware complete it */
1652 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1653 readl, (sts & DMA_GSTS_TES), sts);
1654
1655 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1656}
1657
1658static void iommu_disable_translation(struct intel_iommu *iommu)
1659{
1660 u32 sts;
1661 unsigned long flag;
1662
1663 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1664 iommu->gcmd &= ~DMA_GCMD_TE;
1665 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1666
1667 /* Make sure hardware complete it */
1668 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1669 readl, (!(sts & DMA_GSTS_TES)), sts);
1670
1671 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1672}
1673
1674
1675static int iommu_init_domains(struct intel_iommu *iommu)
1676{
1677 u32 ndomains, nlongs;
1678 size_t size;
1679
1680 ndomains = cap_ndoms(iommu->cap);
1681 pr_debug("%s: Number of Domains supported <%d>\n",
1682 iommu->name, ndomains);
1683 nlongs = BITS_TO_LONGS(ndomains);
1684
1685 spin_lock_init(&iommu->lock);
1686
1687 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1688 if (!iommu->domain_ids) {
1689 pr_err("%s: Allocating domain id array failed\n",
1690 iommu->name);
1691 return -ENOMEM;
1692 }
1693
1694 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1695 iommu->domains = kzalloc(size, GFP_KERNEL);
1696
1697 if (iommu->domains) {
1698 size = 256 * sizeof(struct dmar_domain *);
1699 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1700 }
1701
1702 if (!iommu->domains || !iommu->domains[0]) {
1703 pr_err("%s: Allocating domain array failed\n",
1704 iommu->name);
1705 kfree(iommu->domain_ids);
1706 kfree(iommu->domains);
1707 iommu->domain_ids = NULL;
1708 iommu->domains = NULL;
1709 return -ENOMEM;
1710 }
1711
1712
1713
1714 /*
1715 * If Caching mode is set, then invalid translations are tagged
1716 * with domain-id 0, hence we need to pre-allocate it. We also
1717 * use domain-id 0 as a marker for non-allocated domain-id, so
1718 * make sure it is not used for a real domain.
1719 */
1720 set_bit(0, iommu->domain_ids);
1721
1722 return 0;
1723}
1724
1725static void disable_dmar_iommu(struct intel_iommu *iommu)
1726{
1727 struct device_domain_info *info, *tmp;
1728 unsigned long flags;
1729
1730 if (!iommu->domains || !iommu->domain_ids)
1731 return;
1732
1733again:
1734 spin_lock_irqsave(&device_domain_lock, flags);
1735 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1736 struct dmar_domain *domain;
1737
1738 if (info->iommu != iommu)
1739 continue;
1740
1741 if (!info->dev || !info->domain)
1742 continue;
1743
1744 domain = info->domain;
1745
1746 __dmar_remove_one_dev_info(info);
1747
1748 if (!domain_type_is_vm_or_si(domain)) {
1749 /*
1750 * The domain_exit() function can't be called under
1751 * device_domain_lock, as it takes this lock itself.
1752 * So release the lock here and re-run the loop
1753 * afterwards.
1754 */
1755 spin_unlock_irqrestore(&device_domain_lock, flags);
1756 domain_exit(domain);
1757 goto again;
1758 }
1759 }
1760 spin_unlock_irqrestore(&device_domain_lock, flags);
1761
1762 if (iommu->gcmd & DMA_GCMD_TE)
1763 iommu_disable_translation(iommu);
1764}
1765
1766static void free_dmar_iommu(struct intel_iommu *iommu)
1767{
1768 if ((iommu->domains) && (iommu->domain_ids)) {
1769 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1770 int i;
1771
1772 for (i = 0; i < elems; i++)
1773 kfree(iommu->domains[i]);
1774 kfree(iommu->domains);
1775 kfree(iommu->domain_ids);
1776 iommu->domains = NULL;
1777 iommu->domain_ids = NULL;
1778 }
1779
1780 g_iommus[iommu->seq_id] = NULL;
1781
1782 /* free context mapping */
1783 free_context_table(iommu);
1784
1785#ifdef CONFIG_INTEL_IOMMU_SVM
1786 if (pasid_enabled(iommu)) {
1787 if (ecap_prs(iommu->ecap))
1788 intel_svm_finish_prq(iommu);
1789 intel_svm_exit(iommu);
1790 }
1791#endif
1792}
1793
1794static struct dmar_domain *alloc_domain(int flags)
1795{
1796 struct dmar_domain *domain;
1797
1798 domain = alloc_domain_mem();
1799 if (!domain)
1800 return NULL;
1801
1802 memset(domain, 0, sizeof(*domain));
1803 domain->nid = -1;
1804 domain->flags = flags;
1805 domain->has_iotlb_device = false;
1806 INIT_LIST_HEAD(&domain->devices);
1807
1808 return domain;
1809}
1810
1811/* Must be called with iommu->lock */
1812static int domain_attach_iommu(struct dmar_domain *domain,
1813 struct intel_iommu *iommu)
1814{
1815 unsigned long ndomains;
1816 int num;
1817
1818 assert_spin_locked(&device_domain_lock);
1819 assert_spin_locked(&iommu->lock);
1820
1821 domain->iommu_refcnt[iommu->seq_id] += 1;
1822 domain->iommu_count += 1;
1823 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1824 ndomains = cap_ndoms(iommu->cap);
1825 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1826
1827 if (num >= ndomains) {
1828 pr_err("%s: No free domain ids\n", iommu->name);
1829 domain->iommu_refcnt[iommu->seq_id] -= 1;
1830 domain->iommu_count -= 1;
1831 return -ENOSPC;
1832 }
1833
1834 set_bit(num, iommu->domain_ids);
1835 set_iommu_domain(iommu, num, domain);
1836
1837 domain->iommu_did[iommu->seq_id] = num;
1838 domain->nid = iommu->node;
1839
1840 domain_update_iommu_cap(domain);
1841 }
1842
1843 return 0;
1844}
1845
1846static int domain_detach_iommu(struct dmar_domain *domain,
1847 struct intel_iommu *iommu)
1848{
1849 int num, count = INT_MAX;
1850
1851 assert_spin_locked(&device_domain_lock);
1852 assert_spin_locked(&iommu->lock);
1853
1854 domain->iommu_refcnt[iommu->seq_id] -= 1;
1855 count = --domain->iommu_count;
1856 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1857 num = domain->iommu_did[iommu->seq_id];
1858 clear_bit(num, iommu->domain_ids);
1859 set_iommu_domain(iommu, num, NULL);
1860
1861 domain_update_iommu_cap(domain);
1862 domain->iommu_did[iommu->seq_id] = 0;
1863 }
1864
1865 return count;
1866}
1867
1868static struct iova_domain reserved_iova_list;
1869static struct lock_class_key reserved_rbtree_key;
1870
1871static int dmar_init_reserved_ranges(void)
1872{
1873 struct pci_dev *pdev = NULL;
1874 struct iova *iova;
1875 int i;
1876
1877 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1878
1879 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1880 &reserved_rbtree_key);
1881
1882 /* IOAPIC ranges shouldn't be accessed by DMA */
1883 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1884 IOVA_PFN(IOAPIC_RANGE_END));
1885 if (!iova) {
1886 pr_err("Reserve IOAPIC range failed\n");
1887 return -ENODEV;
1888 }
1889
1890 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1891 for_each_pci_dev(pdev) {
1892 struct resource *r;
1893
1894 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1895 r = &pdev->resource[i];
1896 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1897 continue;
1898 iova = reserve_iova(&reserved_iova_list,
1899 IOVA_PFN(r->start),
1900 IOVA_PFN(r->end));
1901 if (!iova) {
1902 pr_err("Reserve iova failed\n");
1903 return -ENODEV;
1904 }
1905 }
1906 }
1907 return 0;
1908}
1909
1910static void domain_reserve_special_ranges(struct dmar_domain *domain)
1911{
1912 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1913}
1914
1915static inline int guestwidth_to_adjustwidth(int gaw)
1916{
1917 int agaw;
1918 int r = (gaw - 12) % 9;
1919
1920 if (r == 0)
1921 agaw = gaw;
1922 else
1923 agaw = gaw + 9 - r;
1924 if (agaw > 64)
1925 agaw = 64;
1926 return agaw;
1927}
1928
1929static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1930 int guest_width)
1931{
1932 int adjust_width, agaw;
1933 unsigned long sagaw;
1934 int err;
1935
1936 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1937
1938 err = init_iova_flush_queue(&domain->iovad,
1939 iommu_flush_iova, iova_entry_free);
1940 if (err)
1941 return err;
1942
1943 domain_reserve_special_ranges(domain);
1944
1945 /* calculate AGAW */
1946 if (guest_width > cap_mgaw(iommu->cap))
1947 guest_width = cap_mgaw(iommu->cap);
1948 domain->gaw = guest_width;
1949 adjust_width = guestwidth_to_adjustwidth(guest_width);
1950 agaw = width_to_agaw(adjust_width);
1951 sagaw = cap_sagaw(iommu->cap);
1952 if (!test_bit(agaw, &sagaw)) {
1953 /* hardware doesn't support it, choose a bigger one */
1954 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1955 agaw = find_next_bit(&sagaw, 5, agaw);
1956 if (agaw >= 5)
1957 return -ENODEV;
1958 }
1959 domain->agaw = agaw;
1960
1961 if (ecap_coherent(iommu->ecap))
1962 domain->iommu_coherency = 1;
1963 else
1964 domain->iommu_coherency = 0;
1965
1966 if (ecap_sc_support(iommu->ecap))
1967 domain->iommu_snooping = 1;
1968 else
1969 domain->iommu_snooping = 0;
1970
1971 if (intel_iommu_superpage)
1972 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1973 else
1974 domain->iommu_superpage = 0;
1975
1976 domain->nid = iommu->node;
1977
1978 /* always allocate the top pgd */
1979 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1980 if (!domain->pgd)
1981 return -ENOMEM;
1982 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1983 return 0;
1984}
1985
1986static void domain_exit(struct dmar_domain *domain)
1987{
1988 struct page *freelist = NULL;
1989
1990 /* Domain 0 is reserved, so dont process it */
1991 if (!domain)
1992 return;
1993
1994 /* Remove associated devices and clear attached or cached domains */
1995 rcu_read_lock();
1996 domain_remove_dev_info(domain);
1997 rcu_read_unlock();
1998
1999 /* destroy iovas */
2000 put_iova_domain(&domain->iovad);
2001
2002 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
2003
2004 dma_free_pagelist(freelist);
2005
2006 free_domain_mem(domain);
2007}
2008
2009static int domain_context_mapping_one(struct dmar_domain *domain,
2010 struct intel_iommu *iommu,
2011 u8 bus, u8 devfn)
2012{
2013 u16 did = domain->iommu_did[iommu->seq_id];
2014 int translation = CONTEXT_TT_MULTI_LEVEL;
2015 struct device_domain_info *info = NULL;
2016 struct context_entry *context;
2017 unsigned long flags;
2018 struct dma_pte *pgd;
2019 int ret, agaw;
2020
2021 WARN_ON(did == 0);
2022
2023 if (hw_pass_through && domain_type_is_si(domain))
2024 translation = CONTEXT_TT_PASS_THROUGH;
2025
2026 pr_debug("Set context mapping for %02x:%02x.%d\n",
2027 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2028
2029 BUG_ON(!domain->pgd);
2030
2031 spin_lock_irqsave(&device_domain_lock, flags);
2032 spin_lock(&iommu->lock);
2033
2034 ret = -ENOMEM;
2035 context = iommu_context_addr(iommu, bus, devfn, 1);
2036 if (!context)
2037 goto out_unlock;
2038
2039 ret = 0;
2040 if (context_present(context))
2041 goto out_unlock;
2042
2043 /*
2044 * For kdump cases, old valid entries may be cached due to the
2045 * in-flight DMA and copied pgtable, but there is no unmapping
2046 * behaviour for them, thus we need an explicit cache flush for
2047 * the newly-mapped device. For kdump, at this point, the device
2048 * is supposed to finish reset at its driver probe stage, so no
2049 * in-flight DMA will exist, and we don't need to worry anymore
2050 * hereafter.
2051 */
2052 if (context_copied(context)) {
2053 u16 did_old = context_domain_id(context);
2054
2055 if (did_old < cap_ndoms(iommu->cap)) {
2056 iommu->flush.flush_context(iommu, did_old,
2057 (((u16)bus) << 8) | devfn,
2058 DMA_CCMD_MASK_NOBIT,
2059 DMA_CCMD_DEVICE_INVL);
2060 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2061 DMA_TLB_DSI_FLUSH);
2062 }
2063 }
2064
2065 pgd = domain->pgd;
2066
2067 context_clear_entry(context);
2068 context_set_domain_id(context, did);
2069
2070 /*
2071 * Skip top levels of page tables for iommu which has less agaw
2072 * than default. Unnecessary for PT mode.
2073 */
2074 if (translation != CONTEXT_TT_PASS_THROUGH) {
2075 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2076 ret = -ENOMEM;
2077 pgd = phys_to_virt(dma_pte_addr(pgd));
2078 if (!dma_pte_present(pgd))
2079 goto out_unlock;
2080 }
2081
2082 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2083 if (info && info->ats_supported)
2084 translation = CONTEXT_TT_DEV_IOTLB;
2085 else
2086 translation = CONTEXT_TT_MULTI_LEVEL;
2087
2088 context_set_address_root(context, virt_to_phys(pgd));
2089 context_set_address_width(context, agaw);
2090 } else {
2091 /*
2092 * In pass through mode, AW must be programmed to
2093 * indicate the largest AGAW value supported by
2094 * hardware. And ASR is ignored by hardware.
2095 */
2096 context_set_address_width(context, iommu->msagaw);
2097 }
2098
2099 context_set_translation_type(context, translation);
2100 context_set_fault_enable(context);
2101 context_set_present(context);
2102 domain_flush_cache(domain, context, sizeof(*context));
2103
2104 /*
2105 * It's a non-present to present mapping. If hardware doesn't cache
2106 * non-present entry we only need to flush the write-buffer. If the
2107 * _does_ cache non-present entries, then it does so in the special
2108 * domain #0, which we have to flush:
2109 */
2110 if (cap_caching_mode(iommu->cap)) {
2111 iommu->flush.flush_context(iommu, 0,
2112 (((u16)bus) << 8) | devfn,
2113 DMA_CCMD_MASK_NOBIT,
2114 DMA_CCMD_DEVICE_INVL);
2115 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2116 } else {
2117 iommu_flush_write_buffer(iommu);
2118 }
2119 iommu_enable_dev_iotlb(info);
2120
2121 ret = 0;
2122
2123out_unlock:
2124 spin_unlock(&iommu->lock);
2125 spin_unlock_irqrestore(&device_domain_lock, flags);
2126
2127 return ret;
2128}
2129
2130struct domain_context_mapping_data {
2131 struct dmar_domain *domain;
2132 struct intel_iommu *iommu;
2133};
2134
2135static int domain_context_mapping_cb(struct pci_dev *pdev,
2136 u16 alias, void *opaque)
2137{
2138 struct domain_context_mapping_data *data = opaque;
2139
2140 return domain_context_mapping_one(data->domain, data->iommu,
2141 PCI_BUS_NUM(alias), alias & 0xff);
2142}
2143
2144static int
2145domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2146{
2147 struct intel_iommu *iommu;
2148 u8 bus, devfn;
2149 struct domain_context_mapping_data data;
2150
2151 iommu = device_to_iommu(dev, &bus, &devfn);
2152 if (!iommu)
2153 return -ENODEV;
2154
2155 if (!dev_is_pci(dev))
2156 return domain_context_mapping_one(domain, iommu, bus, devfn);
2157
2158 data.domain = domain;
2159 data.iommu = iommu;
2160
2161 return pci_for_each_dma_alias(to_pci_dev(dev),
2162 &domain_context_mapping_cb, &data);
2163}
2164
2165static int domain_context_mapped_cb(struct pci_dev *pdev,
2166 u16 alias, void *opaque)
2167{
2168 struct intel_iommu *iommu = opaque;
2169
2170 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2171}
2172
2173static int domain_context_mapped(struct device *dev)
2174{
2175 struct intel_iommu *iommu;
2176 u8 bus, devfn;
2177
2178 iommu = device_to_iommu(dev, &bus, &devfn);
2179 if (!iommu)
2180 return -ENODEV;
2181
2182 if (!dev_is_pci(dev))
2183 return device_context_mapped(iommu, bus, devfn);
2184
2185 return !pci_for_each_dma_alias(to_pci_dev(dev),
2186 domain_context_mapped_cb, iommu);
2187}
2188
2189/* Returns a number of VTD pages, but aligned to MM page size */
2190static inline unsigned long aligned_nrpages(unsigned long host_addr,
2191 size_t size)
2192{
2193 host_addr &= ~PAGE_MASK;
2194 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2195}
2196
2197/* Return largest possible superpage level for a given mapping */
2198static inline int hardware_largepage_caps(struct dmar_domain *domain,
2199 unsigned long iov_pfn,
2200 unsigned long phy_pfn,
2201 unsigned long pages)
2202{
2203 int support, level = 1;
2204 unsigned long pfnmerge;
2205
2206 support = domain->iommu_superpage;
2207
2208 /* To use a large page, the virtual *and* physical addresses
2209 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2210 of them will mean we have to use smaller pages. So just
2211 merge them and check both at once. */
2212 pfnmerge = iov_pfn | phy_pfn;
2213
2214 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2215 pages >>= VTD_STRIDE_SHIFT;
2216 if (!pages)
2217 break;
2218 pfnmerge >>= VTD_STRIDE_SHIFT;
2219 level++;
2220 support--;
2221 }
2222 return level;
2223}
2224
2225static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2226 struct scatterlist *sg, unsigned long phys_pfn,
2227 unsigned long nr_pages, int prot)
2228{
2229 struct dma_pte *first_pte = NULL, *pte = NULL;
2230 phys_addr_t uninitialized_var(pteval);
2231 unsigned long sg_res = 0;
2232 unsigned int largepage_lvl = 0;
2233 unsigned long lvl_pages = 0;
2234
2235 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2236
2237 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2238 return -EINVAL;
2239
2240 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2241
2242 if (!sg) {
2243 sg_res = nr_pages;
2244 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2245 }
2246
2247 while (nr_pages > 0) {
2248 uint64_t tmp;
2249
2250 if (!sg_res) {
2251 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2252
2253 sg_res = aligned_nrpages(sg->offset, sg->length);
2254 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2255 sg->dma_length = sg->length;
2256 pteval = (sg_phys(sg) - pgoff) | prot;
2257 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2258 }
2259
2260 if (!pte) {
2261 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2262
2263 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2264 if (!pte)
2265 return -ENOMEM;
2266 /* It is large page*/
2267 if (largepage_lvl > 1) {
2268 unsigned long nr_superpages, end_pfn;
2269
2270 pteval |= DMA_PTE_LARGE_PAGE;
2271 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2272
2273 nr_superpages = sg_res / lvl_pages;
2274 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2275
2276 /*
2277 * Ensure that old small page tables are
2278 * removed to make room for superpage(s).
2279 * We're adding new large pages, so make sure
2280 * we don't remove their parent tables.
2281 */
2282 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2283 largepage_lvl + 1);
2284 } else {
2285 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2286 }
2287
2288 }
2289 /* We don't need lock here, nobody else
2290 * touches the iova range
2291 */
2292 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2293 if (tmp) {
2294 static int dumps = 5;
2295 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2296 iov_pfn, tmp, (unsigned long long)pteval);
2297 if (dumps) {
2298 dumps--;
2299 debug_dma_dump_mappings(NULL);
2300 }
2301 WARN_ON(1);
2302 }
2303
2304 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2305
2306 BUG_ON(nr_pages < lvl_pages);
2307 BUG_ON(sg_res < lvl_pages);
2308
2309 nr_pages -= lvl_pages;
2310 iov_pfn += lvl_pages;
2311 phys_pfn += lvl_pages;
2312 pteval += lvl_pages * VTD_PAGE_SIZE;
2313 sg_res -= lvl_pages;
2314
2315 /* If the next PTE would be the first in a new page, then we
2316 need to flush the cache on the entries we've just written.
2317 And then we'll need to recalculate 'pte', so clear it and
2318 let it get set again in the if (!pte) block above.
2319
2320 If we're done (!nr_pages) we need to flush the cache too.
2321
2322 Also if we've been setting superpages, we may need to
2323 recalculate 'pte' and switch back to smaller pages for the
2324 end of the mapping, if the trailing size is not enough to
2325 use another superpage (i.e. sg_res < lvl_pages). */
2326 pte++;
2327 if (!nr_pages || first_pte_in_page(pte) ||
2328 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2329 domain_flush_cache(domain, first_pte,
2330 (void *)pte - (void *)first_pte);
2331 pte = NULL;
2332 }
2333
2334 if (!sg_res && nr_pages)
2335 sg = sg_next(sg);
2336 }
2337 return 0;
2338}
2339
2340static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2341 struct scatterlist *sg, unsigned long phys_pfn,
2342 unsigned long nr_pages, int prot)
2343{
2344 int ret;
2345 struct intel_iommu *iommu;
2346
2347 /* Do the real mapping first */
2348 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2349 if (ret)
2350 return ret;
2351
2352 /* Notify about the new mapping */
2353 if (domain_type_is_vm(domain)) {
2354 /* VM typed domains can have more than one IOMMUs */
2355 int iommu_id;
2356 for_each_domain_iommu(iommu_id, domain) {
2357 iommu = g_iommus[iommu_id];
2358 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2359 }
2360 } else {
2361 /* General domains only have one IOMMU */
2362 iommu = domain_get_iommu(domain);
2363 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2364 }
2365
2366 return 0;
2367}
2368
2369static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2370 struct scatterlist *sg, unsigned long nr_pages,
2371 int prot)
2372{
2373 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2374}
2375
2376static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2377 unsigned long phys_pfn, unsigned long nr_pages,
2378 int prot)
2379{
2380 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2381}
2382
2383static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2384{
2385 unsigned long flags;
2386 struct context_entry *context;
2387 u16 did_old;
2388
2389 if (!iommu)
2390 return;
2391
2392 spin_lock_irqsave(&iommu->lock, flags);
2393 context = iommu_context_addr(iommu, bus, devfn, 0);
2394 if (!context) {
2395 spin_unlock_irqrestore(&iommu->lock, flags);
2396 return;
2397 }
2398 did_old = context_domain_id(context);
2399 context_clear_entry(context);
2400 __iommu_flush_cache(iommu, context, sizeof(*context));
2401 spin_unlock_irqrestore(&iommu->lock, flags);
2402 iommu->flush.flush_context(iommu,
2403 did_old,
2404 (((u16)bus) << 8) | devfn,
2405 DMA_CCMD_MASK_NOBIT,
2406 DMA_CCMD_DEVICE_INVL);
2407 iommu->flush.flush_iotlb(iommu,
2408 did_old,
2409 0,
2410 0,
2411 DMA_TLB_DSI_FLUSH);
2412}
2413
2414static inline void unlink_domain_info(struct device_domain_info *info)
2415{
2416 assert_spin_locked(&device_domain_lock);
2417 list_del(&info->link);
2418 list_del(&info->global);
2419 if (info->dev)
2420 info->dev->archdata.iommu = NULL;
2421}
2422
2423static void domain_remove_dev_info(struct dmar_domain *domain)
2424{
2425 struct device_domain_info *info, *tmp;
2426 unsigned long flags;
2427
2428 spin_lock_irqsave(&device_domain_lock, flags);
2429 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2430 __dmar_remove_one_dev_info(info);
2431 spin_unlock_irqrestore(&device_domain_lock, flags);
2432}
2433
2434/*
2435 * find_domain
2436 * Note: we use struct device->archdata.iommu stores the info
2437 */
2438static struct dmar_domain *find_domain(struct device *dev)
2439{
2440 struct device_domain_info *info;
2441
2442 /* No lock here, assumes no domain exit in normal case */
2443 info = dev->archdata.iommu;
2444 if (likely(info))
2445 return info->domain;
2446 return NULL;
2447}
2448
2449static inline struct device_domain_info *
2450dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2451{
2452 struct device_domain_info *info;
2453
2454 list_for_each_entry(info, &device_domain_list, global)
2455 if (info->iommu->segment == segment && info->bus == bus &&
2456 info->devfn == devfn)
2457 return info;
2458
2459 return NULL;
2460}
2461
2462static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2463 int bus, int devfn,
2464 struct device *dev,
2465 struct dmar_domain *domain)
2466{
2467 struct dmar_domain *found = NULL;
2468 struct device_domain_info *info;
2469 unsigned long flags;
2470 int ret;
2471
2472 info = alloc_devinfo_mem();
2473 if (!info)
2474 return NULL;
2475
2476 info->bus = bus;
2477 info->devfn = devfn;
2478 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2479 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2480 info->ats_qdep = 0;
2481 info->dev = dev;
2482 info->domain = domain;
2483 info->iommu = iommu;
2484 info->pasid_table = NULL;
2485
2486 if (dev && dev_is_pci(dev)) {
2487 struct pci_dev *pdev = to_pci_dev(info->dev);
2488
2489 if (!pci_ats_disabled() &&
2490 ecap_dev_iotlb_support(iommu->ecap) &&
2491 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2492 dmar_find_matched_atsr_unit(pdev))
2493 info->ats_supported = 1;
2494
2495 if (ecs_enabled(iommu)) {
2496 if (pasid_enabled(iommu)) {
2497 int features = pci_pasid_features(pdev);
2498 if (features >= 0)
2499 info->pasid_supported = features | 1;
2500 }
2501
2502 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2503 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2504 info->pri_supported = 1;
2505 }
2506 }
2507
2508 spin_lock_irqsave(&device_domain_lock, flags);
2509 if (dev)
2510 found = find_domain(dev);
2511
2512 if (!found) {
2513 struct device_domain_info *info2;
2514 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2515 if (info2) {
2516 found = info2->domain;
2517 info2->dev = dev;
2518 }
2519 }
2520
2521 if (found) {
2522 spin_unlock_irqrestore(&device_domain_lock, flags);
2523 free_devinfo_mem(info);
2524 /* Caller must free the original domain */
2525 return found;
2526 }
2527
2528 spin_lock(&iommu->lock);
2529 ret = domain_attach_iommu(domain, iommu);
2530 spin_unlock(&iommu->lock);
2531
2532 if (ret) {
2533 spin_unlock_irqrestore(&device_domain_lock, flags);
2534 free_devinfo_mem(info);
2535 return NULL;
2536 }
2537
2538 list_add(&info->link, &domain->devices);
2539 list_add(&info->global, &device_domain_list);
2540 if (dev)
2541 dev->archdata.iommu = info;
2542
2543 if (dev && dev_is_pci(dev) && info->pasid_supported) {
2544 ret = intel_pasid_alloc_table(dev);
2545 if (ret) {
2546 pr_warn("No pasid table for %s, pasid disabled\n",
2547 dev_name(dev));
2548 info->pasid_supported = 0;
2549 }
2550 }
2551 spin_unlock_irqrestore(&device_domain_lock, flags);
2552
2553 if (dev && domain_context_mapping(domain, dev)) {
2554 pr_err("Domain context map for %s failed\n", dev_name(dev));
2555 dmar_remove_one_dev_info(domain, dev);
2556 return NULL;
2557 }
2558
2559 return domain;
2560}
2561
2562static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2563{
2564 *(u16 *)opaque = alias;
2565 return 0;
2566}
2567
2568static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2569{
2570 struct device_domain_info *info = NULL;
2571 struct dmar_domain *domain = NULL;
2572 struct intel_iommu *iommu;
2573 u16 dma_alias;
2574 unsigned long flags;
2575 u8 bus, devfn;
2576
2577 iommu = device_to_iommu(dev, &bus, &devfn);
2578 if (!iommu)
2579 return NULL;
2580
2581 if (dev_is_pci(dev)) {
2582 struct pci_dev *pdev = to_pci_dev(dev);
2583
2584 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2585
2586 spin_lock_irqsave(&device_domain_lock, flags);
2587 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2588 PCI_BUS_NUM(dma_alias),
2589 dma_alias & 0xff);
2590 if (info) {
2591 iommu = info->iommu;
2592 domain = info->domain;
2593 }
2594 spin_unlock_irqrestore(&device_domain_lock, flags);
2595
2596 /* DMA alias already has a domain, use it */
2597 if (info)
2598 goto out;
2599 }
2600
2601 /* Allocate and initialize new domain for the device */
2602 domain = alloc_domain(0);
2603 if (!domain)
2604 return NULL;
2605 if (domain_init(domain, iommu, gaw)) {
2606 domain_exit(domain);
2607 return NULL;
2608 }
2609
2610out:
2611
2612 return domain;
2613}
2614
2615static struct dmar_domain *set_domain_for_dev(struct device *dev,
2616 struct dmar_domain *domain)
2617{
2618 struct intel_iommu *iommu;
2619 struct dmar_domain *tmp;
2620 u16 req_id, dma_alias;
2621 u8 bus, devfn;
2622
2623 iommu = device_to_iommu(dev, &bus, &devfn);
2624 if (!iommu)
2625 return NULL;
2626
2627 req_id = ((u16)bus << 8) | devfn;
2628
2629 if (dev_is_pci(dev)) {
2630 struct pci_dev *pdev = to_pci_dev(dev);
2631
2632 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2633
2634 /* register PCI DMA alias device */
2635 if (req_id != dma_alias) {
2636 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2637 dma_alias & 0xff, NULL, domain);
2638
2639 if (!tmp || tmp != domain)
2640 return tmp;
2641 }
2642 }
2643
2644 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2645 if (!tmp || tmp != domain)
2646 return tmp;
2647
2648 return domain;
2649}
2650
2651static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2652{
2653 struct dmar_domain *domain, *tmp;
2654
2655 domain = find_domain(dev);
2656 if (domain)
2657 goto out;
2658
2659 domain = find_or_alloc_domain(dev, gaw);
2660 if (!domain)
2661 goto out;
2662
2663 tmp = set_domain_for_dev(dev, domain);
2664 if (!tmp || domain != tmp) {
2665 domain_exit(domain);
2666 domain = tmp;
2667 }
2668
2669out:
2670
2671 return domain;
2672}
2673
2674static int iommu_domain_identity_map(struct dmar_domain *domain,
2675 unsigned long long start,
2676 unsigned long long end)
2677{
2678 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2679 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2680
2681 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2682 dma_to_mm_pfn(last_vpfn))) {
2683 pr_err("Reserving iova failed\n");
2684 return -ENOMEM;
2685 }
2686
2687 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2688 /*
2689 * RMRR range might have overlap with physical memory range,
2690 * clear it first
2691 */
2692 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2693
2694 return __domain_mapping(domain, first_vpfn, NULL,
2695 first_vpfn, last_vpfn - first_vpfn + 1,
2696 DMA_PTE_READ|DMA_PTE_WRITE);
2697}
2698
2699static int domain_prepare_identity_map(struct device *dev,
2700 struct dmar_domain *domain,
2701 unsigned long long start,
2702 unsigned long long end)
2703{
2704 /* For _hardware_ passthrough, don't bother. But for software
2705 passthrough, we do it anyway -- it may indicate a memory
2706 range which is reserved in E820, so which didn't get set
2707 up to start with in si_domain */
2708 if (domain == si_domain && hw_pass_through) {
2709 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2710 dev_name(dev), start, end);
2711 return 0;
2712 }
2713
2714 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2715 dev_name(dev), start, end);
2716
2717 if (end < start) {
2718 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2719 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2720 dmi_get_system_info(DMI_BIOS_VENDOR),
2721 dmi_get_system_info(DMI_BIOS_VERSION),
2722 dmi_get_system_info(DMI_PRODUCT_VERSION));
2723 return -EIO;
2724 }
2725
2726 if (end >> agaw_to_width(domain->agaw)) {
2727 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2728 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2729 agaw_to_width(domain->agaw),
2730 dmi_get_system_info(DMI_BIOS_VENDOR),
2731 dmi_get_system_info(DMI_BIOS_VERSION),
2732 dmi_get_system_info(DMI_PRODUCT_VERSION));
2733 return -EIO;
2734 }
2735
2736 return iommu_domain_identity_map(domain, start, end);
2737}
2738
2739static int iommu_prepare_identity_map(struct device *dev,
2740 unsigned long long start,
2741 unsigned long long end)
2742{
2743 struct dmar_domain *domain;
2744 int ret;
2745
2746 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2747 if (!domain)
2748 return -ENOMEM;
2749
2750 ret = domain_prepare_identity_map(dev, domain, start, end);
2751 if (ret)
2752 domain_exit(domain);
2753
2754 return ret;
2755}
2756
2757static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2758 struct device *dev)
2759{
2760 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2761 return 0;
2762 return iommu_prepare_identity_map(dev, rmrr->base_address,
2763 rmrr->end_address);
2764}
2765
2766#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2767static inline void iommu_prepare_isa(void)
2768{
2769 struct pci_dev *pdev;
2770 int ret;
2771
2772 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2773 if (!pdev)
2774 return;
2775
2776 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2777 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2778
2779 if (ret)
2780 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2781
2782 pci_dev_put(pdev);
2783}
2784#else
2785static inline void iommu_prepare_isa(void)
2786{
2787 return;
2788}
2789#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2790
2791static int md_domain_init(struct dmar_domain *domain, int guest_width);
2792
2793static int __init si_domain_init(int hw)
2794{
2795 int nid, ret = 0;
2796
2797 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2798 if (!si_domain)
2799 return -EFAULT;
2800
2801 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2802 domain_exit(si_domain);
2803 return -EFAULT;
2804 }
2805
2806 pr_debug("Identity mapping domain allocated\n");
2807
2808 if (hw)
2809 return 0;
2810
2811 for_each_online_node(nid) {
2812 unsigned long start_pfn, end_pfn;
2813 int i;
2814
2815 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2816 ret = iommu_domain_identity_map(si_domain,
2817 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2818 if (ret)
2819 return ret;
2820 }
2821 }
2822
2823 return 0;
2824}
2825
2826static int identity_mapping(struct device *dev)
2827{
2828 struct device_domain_info *info;
2829
2830 if (likely(!iommu_identity_mapping))
2831 return 0;
2832
2833 info = dev->archdata.iommu;
2834 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2835 return (info->domain == si_domain);
2836
2837 return 0;
2838}
2839
2840static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2841{
2842 struct dmar_domain *ndomain;
2843 struct intel_iommu *iommu;
2844 u8 bus, devfn;
2845
2846 iommu = device_to_iommu(dev, &bus, &devfn);
2847 if (!iommu)
2848 return -ENODEV;
2849
2850 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2851 if (ndomain != domain)
2852 return -EBUSY;
2853
2854 return 0;
2855}
2856
2857static bool device_has_rmrr(struct device *dev)
2858{
2859 struct dmar_rmrr_unit *rmrr;
2860 struct device *tmp;
2861 int i;
2862
2863 rcu_read_lock();
2864 for_each_rmrr_units(rmrr) {
2865 /*
2866 * Return TRUE if this RMRR contains the device that
2867 * is passed in.
2868 */
2869 for_each_active_dev_scope(rmrr->devices,
2870 rmrr->devices_cnt, i, tmp)
2871 if (tmp == dev) {
2872 rcu_read_unlock();
2873 return true;
2874 }
2875 }
2876 rcu_read_unlock();
2877 return false;
2878}
2879
2880/*
2881 * There are a couple cases where we need to restrict the functionality of
2882 * devices associated with RMRRs. The first is when evaluating a device for
2883 * identity mapping because problems exist when devices are moved in and out
2884 * of domains and their respective RMRR information is lost. This means that
2885 * a device with associated RMRRs will never be in a "passthrough" domain.
2886 * The second is use of the device through the IOMMU API. This interface
2887 * expects to have full control of the IOVA space for the device. We cannot
2888 * satisfy both the requirement that RMRR access is maintained and have an
2889 * unencumbered IOVA space. We also have no ability to quiesce the device's
2890 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2891 * We therefore prevent devices associated with an RMRR from participating in
2892 * the IOMMU API, which eliminates them from device assignment.
2893 *
2894 * In both cases we assume that PCI USB devices with RMRRs have them largely
2895 * for historical reasons and that the RMRR space is not actively used post
2896 * boot. This exclusion may change if vendors begin to abuse it.
2897 *
2898 * The same exception is made for graphics devices, with the requirement that
2899 * any use of the RMRR regions will be torn down before assigning the device
2900 * to a guest.
2901 */
2902static bool device_is_rmrr_locked(struct device *dev)
2903{
2904 if (!device_has_rmrr(dev))
2905 return false;
2906
2907 if (dev_is_pci(dev)) {
2908 struct pci_dev *pdev = to_pci_dev(dev);
2909
2910 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2911 return false;
2912 }
2913
2914 return true;
2915}
2916
2917static int iommu_should_identity_map(struct device *dev, int startup)
2918{
2919
2920 if (dev_is_pci(dev)) {
2921 struct pci_dev *pdev = to_pci_dev(dev);
2922
2923 if (device_is_rmrr_locked(dev))
2924 return 0;
2925
2926 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2927 return 1;
2928
2929 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2930 return 1;
2931
2932 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2933 return 0;
2934
2935 /*
2936 * We want to start off with all devices in the 1:1 domain, and
2937 * take them out later if we find they can't access all of memory.
2938 *
2939 * However, we can't do this for PCI devices behind bridges,
2940 * because all PCI devices behind the same bridge will end up
2941 * with the same source-id on their transactions.
2942 *
2943 * Practically speaking, we can't change things around for these
2944 * devices at run-time, because we can't be sure there'll be no
2945 * DMA transactions in flight for any of their siblings.
2946 *
2947 * So PCI devices (unless they're on the root bus) as well as
2948 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2949 * the 1:1 domain, just in _case_ one of their siblings turns out
2950 * not to be able to map all of memory.
2951 */
2952 if (!pci_is_pcie(pdev)) {
2953 if (!pci_is_root_bus(pdev->bus))
2954 return 0;
2955 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2956 return 0;
2957 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2958 return 0;
2959 } else {
2960 if (device_has_rmrr(dev))
2961 return 0;
2962 }
2963
2964 /*
2965 * At boot time, we don't yet know if devices will be 64-bit capable.
2966 * Assume that they will — if they turn out not to be, then we can
2967 * take them out of the 1:1 domain later.
2968 */
2969 if (!startup) {
2970 /*
2971 * If the device's dma_mask is less than the system's memory
2972 * size then this is not a candidate for identity mapping.
2973 */
2974 u64 dma_mask = *dev->dma_mask;
2975
2976 if (dev->coherent_dma_mask &&
2977 dev->coherent_dma_mask < dma_mask)
2978 dma_mask = dev->coherent_dma_mask;
2979
2980 return dma_mask >= dma_get_required_mask(dev);
2981 }
2982
2983 return 1;
2984}
2985
2986static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2987{
2988 int ret;
2989
2990 if (!iommu_should_identity_map(dev, 1))
2991 return 0;
2992
2993 ret = domain_add_dev_info(si_domain, dev);
2994 if (!ret)
2995 pr_info("%s identity mapping for device %s\n",
2996 hw ? "Hardware" : "Software", dev_name(dev));
2997 else if (ret == -ENODEV)
2998 /* device not associated with an iommu */
2999 ret = 0;
3000
3001 return ret;
3002}
3003
3004
3005static int __init iommu_prepare_static_identity_mapping(int hw)
3006{
3007 struct pci_dev *pdev = NULL;
3008 struct dmar_drhd_unit *drhd;
3009 struct intel_iommu *iommu;
3010 struct device *dev;
3011 int i;
3012 int ret = 0;
3013
3014 for_each_pci_dev(pdev) {
3015 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3016 if (ret)
3017 return ret;
3018 }
3019
3020 for_each_active_iommu(iommu, drhd)
3021 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3022 struct acpi_device_physical_node *pn;
3023 struct acpi_device *adev;
3024
3025 if (dev->bus != &acpi_bus_type)
3026 continue;
3027
3028 adev= to_acpi_device(dev);
3029 mutex_lock(&adev->physical_node_lock);
3030 list_for_each_entry(pn, &adev->physical_node_list, node) {
3031 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3032 if (ret)
3033 break;
3034 }
3035 mutex_unlock(&adev->physical_node_lock);
3036 if (ret)
3037 return ret;
3038 }
3039
3040 return 0;
3041}
3042
3043static void intel_iommu_init_qi(struct intel_iommu *iommu)
3044{
3045 /*
3046 * Start from the sane iommu hardware state.
3047 * If the queued invalidation is already initialized by us
3048 * (for example, while enabling interrupt-remapping) then
3049 * we got the things already rolling from a sane state.
3050 */
3051 if (!iommu->qi) {
3052 /*
3053 * Clear any previous faults.
3054 */
3055 dmar_fault(-1, iommu);
3056 /*
3057 * Disable queued invalidation if supported and already enabled
3058 * before OS handover.
3059 */
3060 dmar_disable_qi(iommu);
3061 }
3062
3063 if (dmar_enable_qi(iommu)) {
3064 /*
3065 * Queued Invalidate not enabled, use Register Based Invalidate
3066 */
3067 iommu->flush.flush_context = __iommu_flush_context;
3068 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3069 pr_info("%s: Using Register based invalidation\n",
3070 iommu->name);
3071 } else {
3072 iommu->flush.flush_context = qi_flush_context;
3073 iommu->flush.flush_iotlb = qi_flush_iotlb;
3074 pr_info("%s: Using Queued invalidation\n", iommu->name);
3075 }
3076}
3077
3078static int copy_context_table(struct intel_iommu *iommu,
3079 struct root_entry *old_re,
3080 struct context_entry **tbl,
3081 int bus, bool ext)
3082{
3083 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3084 struct context_entry *new_ce = NULL, ce;
3085 struct context_entry *old_ce = NULL;
3086 struct root_entry re;
3087 phys_addr_t old_ce_phys;
3088
3089 tbl_idx = ext ? bus * 2 : bus;
3090 memcpy(&re, old_re, sizeof(re));
3091
3092 for (devfn = 0; devfn < 256; devfn++) {
3093 /* First calculate the correct index */
3094 idx = (ext ? devfn * 2 : devfn) % 256;
3095
3096 if (idx == 0) {
3097 /* First save what we may have and clean up */
3098 if (new_ce) {
3099 tbl[tbl_idx] = new_ce;
3100 __iommu_flush_cache(iommu, new_ce,
3101 VTD_PAGE_SIZE);
3102 pos = 1;
3103 }
3104
3105 if (old_ce)
3106 memunmap(old_ce);
3107
3108 ret = 0;
3109 if (devfn < 0x80)
3110 old_ce_phys = root_entry_lctp(&re);
3111 else
3112 old_ce_phys = root_entry_uctp(&re);
3113
3114 if (!old_ce_phys) {
3115 if (ext && devfn == 0) {
3116 /* No LCTP, try UCTP */
3117 devfn = 0x7f;
3118 continue;
3119 } else {
3120 goto out;
3121 }
3122 }
3123
3124 ret = -ENOMEM;
3125 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3126 MEMREMAP_WB);
3127 if (!old_ce)
3128 goto out;
3129
3130 new_ce = alloc_pgtable_page(iommu->node);
3131 if (!new_ce)
3132 goto out_unmap;
3133
3134 ret = 0;
3135 }
3136
3137 /* Now copy the context entry */
3138 memcpy(&ce, old_ce + idx, sizeof(ce));
3139
3140 if (!__context_present(&ce))
3141 continue;
3142
3143 did = context_domain_id(&ce);
3144 if (did >= 0 && did < cap_ndoms(iommu->cap))
3145 set_bit(did, iommu->domain_ids);
3146
3147 /*
3148 * We need a marker for copied context entries. This
3149 * marker needs to work for the old format as well as
3150 * for extended context entries.
3151 *
3152 * Bit 67 of the context entry is used. In the old
3153 * format this bit is available to software, in the
3154 * extended format it is the PGE bit, but PGE is ignored
3155 * by HW if PASIDs are disabled (and thus still
3156 * available).
3157 *
3158 * So disable PASIDs first and then mark the entry
3159 * copied. This means that we don't copy PASID
3160 * translations from the old kernel, but this is fine as
3161 * faults there are not fatal.
3162 */
3163 context_clear_pasid_enable(&ce);
3164 context_set_copied(&ce);
3165
3166 new_ce[idx] = ce;
3167 }
3168
3169 tbl[tbl_idx + pos] = new_ce;
3170
3171 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3172
3173out_unmap:
3174 memunmap(old_ce);
3175
3176out:
3177 return ret;
3178}
3179
3180static int copy_translation_tables(struct intel_iommu *iommu)
3181{
3182 struct context_entry **ctxt_tbls;
3183 struct root_entry *old_rt;
3184 phys_addr_t old_rt_phys;
3185 int ctxt_table_entries;
3186 unsigned long flags;
3187 u64 rtaddr_reg;
3188 int bus, ret;
3189 bool new_ext, ext;
3190
3191 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3192 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3193 new_ext = !!ecap_ecs(iommu->ecap);
3194
3195 /*
3196 * The RTT bit can only be changed when translation is disabled,
3197 * but disabling translation means to open a window for data
3198 * corruption. So bail out and don't copy anything if we would
3199 * have to change the bit.
3200 */
3201 if (new_ext != ext)
3202 return -EINVAL;
3203
3204 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3205 if (!old_rt_phys)
3206 return -EINVAL;
3207
3208 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3209 if (!old_rt)
3210 return -ENOMEM;
3211
3212 /* This is too big for the stack - allocate it from slab */
3213 ctxt_table_entries = ext ? 512 : 256;
3214 ret = -ENOMEM;
3215 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3216 if (!ctxt_tbls)
3217 goto out_unmap;
3218
3219 for (bus = 0; bus < 256; bus++) {
3220 ret = copy_context_table(iommu, &old_rt[bus],
3221 ctxt_tbls, bus, ext);
3222 if (ret) {
3223 pr_err("%s: Failed to copy context table for bus %d\n",
3224 iommu->name, bus);
3225 continue;
3226 }
3227 }
3228
3229 spin_lock_irqsave(&iommu->lock, flags);
3230
3231 /* Context tables are copied, now write them to the root_entry table */
3232 for (bus = 0; bus < 256; bus++) {
3233 int idx = ext ? bus * 2 : bus;
3234 u64 val;
3235
3236 if (ctxt_tbls[idx]) {
3237 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3238 iommu->root_entry[bus].lo = val;
3239 }
3240
3241 if (!ext || !ctxt_tbls[idx + 1])
3242 continue;
3243
3244 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3245 iommu->root_entry[bus].hi = val;
3246 }
3247
3248 spin_unlock_irqrestore(&iommu->lock, flags);
3249
3250 kfree(ctxt_tbls);
3251
3252 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3253
3254 ret = 0;
3255
3256out_unmap:
3257 memunmap(old_rt);
3258
3259 return ret;
3260}
3261
3262static int __init init_dmars(void)
3263{
3264 struct dmar_drhd_unit *drhd;
3265 struct dmar_rmrr_unit *rmrr;
3266 bool copied_tables = false;
3267 struct device *dev;
3268 struct intel_iommu *iommu;
3269 int i, ret;
3270
3271 /*
3272 * for each drhd
3273 * allocate root
3274 * initialize and program root entry to not present
3275 * endfor
3276 */
3277 for_each_drhd_unit(drhd) {
3278 /*
3279 * lock not needed as this is only incremented in the single
3280 * threaded kernel __init code path all other access are read
3281 * only
3282 */
3283 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3284 g_num_of_iommus++;
3285 continue;
3286 }
3287 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3288 }
3289
3290 /* Preallocate enough resources for IOMMU hot-addition */
3291 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3292 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3293
3294 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3295 GFP_KERNEL);
3296 if (!g_iommus) {
3297 pr_err("Allocating global iommu array failed\n");
3298 ret = -ENOMEM;
3299 goto error;
3300 }
3301
3302 for_each_active_iommu(iommu, drhd) {
3303 /*
3304 * Find the max pasid size of all IOMMU's in the system.
3305 * We need to ensure the system pasid table is no bigger
3306 * than the smallest supported.
3307 */
3308 if (pasid_enabled(iommu)) {
3309 u32 temp = 2 << ecap_pss(iommu->ecap);
3310
3311 intel_pasid_max_id = min_t(u32, temp,
3312 intel_pasid_max_id);
3313 }
3314
3315 g_iommus[iommu->seq_id] = iommu;
3316
3317 intel_iommu_init_qi(iommu);
3318
3319 ret = iommu_init_domains(iommu);
3320 if (ret)
3321 goto free_iommu;
3322
3323 init_translation_status(iommu);
3324
3325 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3326 iommu_disable_translation(iommu);
3327 clear_translation_pre_enabled(iommu);
3328 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3329 iommu->name);
3330 }
3331
3332 /*
3333 * TBD:
3334 * we could share the same root & context tables
3335 * among all IOMMU's. Need to Split it later.
3336 */
3337 ret = iommu_alloc_root_entry(iommu);
3338 if (ret)
3339 goto free_iommu;
3340
3341 if (translation_pre_enabled(iommu)) {
3342 pr_info("Translation already enabled - trying to copy translation structures\n");
3343
3344 ret = copy_translation_tables(iommu);
3345 if (ret) {
3346 /*
3347 * We found the IOMMU with translation
3348 * enabled - but failed to copy over the
3349 * old root-entry table. Try to proceed
3350 * by disabling translation now and
3351 * allocating a clean root-entry table.
3352 * This might cause DMAR faults, but
3353 * probably the dump will still succeed.
3354 */
3355 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3356 iommu->name);
3357 iommu_disable_translation(iommu);
3358 clear_translation_pre_enabled(iommu);
3359 } else {
3360 pr_info("Copied translation tables from previous kernel for %s\n",
3361 iommu->name);
3362 copied_tables = true;
3363 }
3364 }
3365
3366 if (!ecap_pass_through(iommu->ecap))
3367 hw_pass_through = 0;
3368#ifdef CONFIG_INTEL_IOMMU_SVM
3369 if (pasid_enabled(iommu))
3370 intel_svm_init(iommu);
3371#endif
3372 }
3373
3374 /*
3375 * Now that qi is enabled on all iommus, set the root entry and flush
3376 * caches. This is required on some Intel X58 chipsets, otherwise the
3377 * flush_context function will loop forever and the boot hangs.
3378 */
3379 for_each_active_iommu(iommu, drhd) {
3380 iommu_flush_write_buffer(iommu);
3381 iommu_set_root_entry(iommu);
3382 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3383 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3384 }
3385
3386 if (iommu_pass_through)
3387 iommu_identity_mapping |= IDENTMAP_ALL;
3388
3389#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3390 iommu_identity_mapping |= IDENTMAP_GFX;
3391#endif
3392
3393 check_tylersburg_isoch();
3394
3395 if (iommu_identity_mapping) {
3396 ret = si_domain_init(hw_pass_through);
3397 if (ret)
3398 goto free_iommu;
3399 }
3400
3401
3402 /*
3403 * If we copied translations from a previous kernel in the kdump
3404 * case, we can not assign the devices to domains now, as that
3405 * would eliminate the old mappings. So skip this part and defer
3406 * the assignment to device driver initialization time.
3407 */
3408 if (copied_tables)
3409 goto domains_done;
3410
3411 /*
3412 * If pass through is not set or not enabled, setup context entries for
3413 * identity mappings for rmrr, gfx, and isa and may fall back to static
3414 * identity mapping if iommu_identity_mapping is set.
3415 */
3416 if (iommu_identity_mapping) {
3417 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3418 if (ret) {
3419 pr_crit("Failed to setup IOMMU pass-through\n");
3420 goto free_iommu;
3421 }
3422 }
3423 /*
3424 * For each rmrr
3425 * for each dev attached to rmrr
3426 * do
3427 * locate drhd for dev, alloc domain for dev
3428 * allocate free domain
3429 * allocate page table entries for rmrr
3430 * if context not allocated for bus
3431 * allocate and init context
3432 * set present in root table for this bus
3433 * init context with domain, translation etc
3434 * endfor
3435 * endfor
3436 */
3437 pr_info("Setting RMRR:\n");
3438 for_each_rmrr_units(rmrr) {
3439 /* some BIOS lists non-exist devices in DMAR table. */
3440 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3441 i, dev) {
3442 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3443 if (ret)
3444 pr_err("Mapping reserved region failed\n");
3445 }
3446 }
3447
3448 iommu_prepare_isa();
3449
3450domains_done:
3451
3452 /*
3453 * for each drhd
3454 * enable fault log
3455 * global invalidate context cache
3456 * global invalidate iotlb
3457 * enable translation
3458 */
3459 for_each_iommu(iommu, drhd) {
3460 if (drhd->ignored) {
3461 /*
3462 * we always have to disable PMRs or DMA may fail on
3463 * this device
3464 */
3465 if (force_on)
3466 iommu_disable_protect_mem_regions(iommu);
3467 continue;
3468 }
3469
3470 iommu_flush_write_buffer(iommu);
3471
3472#ifdef CONFIG_INTEL_IOMMU_SVM
3473 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3474 ret = intel_svm_enable_prq(iommu);
3475 if (ret)
3476 goto free_iommu;
3477 }
3478#endif
3479 ret = dmar_set_interrupt(iommu);
3480 if (ret)
3481 goto free_iommu;
3482
3483 if (!translation_pre_enabled(iommu))
3484 iommu_enable_translation(iommu);
3485
3486 iommu_disable_protect_mem_regions(iommu);
3487 }
3488
3489 return 0;
3490
3491free_iommu:
3492 for_each_active_iommu(iommu, drhd) {
3493 disable_dmar_iommu(iommu);
3494 free_dmar_iommu(iommu);
3495 }
3496
3497 kfree(g_iommus);
3498
3499error:
3500 return ret;
3501}
3502
3503/* This takes a number of _MM_ pages, not VTD pages */
3504static unsigned long intel_alloc_iova(struct device *dev,
3505 struct dmar_domain *domain,
3506 unsigned long nrpages, uint64_t dma_mask)
3507{
3508 unsigned long iova_pfn = 0;
3509
3510 /* Restrict dma_mask to the width that the iommu can handle */
3511 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3512 /* Ensure we reserve the whole size-aligned region */
3513 nrpages = __roundup_pow_of_two(nrpages);
3514
3515 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3516 /*
3517 * First try to allocate an io virtual address in
3518 * DMA_BIT_MASK(32) and if that fails then try allocating
3519 * from higher range
3520 */
3521 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3522 IOVA_PFN(DMA_BIT_MASK(32)), false);
3523 if (iova_pfn)
3524 return iova_pfn;
3525 }
3526 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3527 IOVA_PFN(dma_mask), true);
3528 if (unlikely(!iova_pfn)) {
3529 pr_err("Allocating %ld-page iova for %s failed",
3530 nrpages, dev_name(dev));
3531 return 0;
3532 }
3533
3534 return iova_pfn;
3535}
3536
3537struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3538{
3539 struct dmar_domain *domain, *tmp;
3540 struct dmar_rmrr_unit *rmrr;
3541 struct device *i_dev;
3542 int i, ret;
3543
3544 domain = find_domain(dev);
3545 if (domain)
3546 goto out;
3547
3548 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3549 if (!domain)
3550 goto out;
3551
3552 /* We have a new domain - setup possible RMRRs for the device */
3553 rcu_read_lock();
3554 for_each_rmrr_units(rmrr) {
3555 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3556 i, i_dev) {
3557 if (i_dev != dev)
3558 continue;
3559
3560 ret = domain_prepare_identity_map(dev, domain,
3561 rmrr->base_address,
3562 rmrr->end_address);
3563 if (ret)
3564 dev_err(dev, "Mapping reserved region failed\n");
3565 }
3566 }
3567 rcu_read_unlock();
3568
3569 tmp = set_domain_for_dev(dev, domain);
3570 if (!tmp || domain != tmp) {
3571 domain_exit(domain);
3572 domain = tmp;
3573 }
3574
3575out:
3576
3577 if (!domain)
3578 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3579
3580
3581 return domain;
3582}
3583
3584/* Check if the dev needs to go through non-identity map and unmap process.*/
3585static int iommu_no_mapping(struct device *dev)
3586{
3587 int found;
3588
3589 if (iommu_dummy(dev))
3590 return 1;
3591
3592 if (!iommu_identity_mapping)
3593 return 0;
3594
3595 found = identity_mapping(dev);
3596 if (found) {
3597 if (iommu_should_identity_map(dev, 0))
3598 return 1;
3599 else {
3600 /*
3601 * 32 bit DMA is removed from si_domain and fall back
3602 * to non-identity mapping.
3603 */
3604 dmar_remove_one_dev_info(si_domain, dev);
3605 pr_info("32bit %s uses non-identity mapping\n",
3606 dev_name(dev));
3607 return 0;
3608 }
3609 } else {
3610 /*
3611 * In case of a detached 64 bit DMA device from vm, the device
3612 * is put into si_domain for identity mapping.
3613 */
3614 if (iommu_should_identity_map(dev, 0)) {
3615 int ret;
3616 ret = domain_add_dev_info(si_domain, dev);
3617 if (!ret) {
3618 pr_info("64bit %s uses identity mapping\n",
3619 dev_name(dev));
3620 return 1;
3621 }
3622 }
3623 }
3624
3625 return 0;
3626}
3627
3628static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3629 size_t size, int dir, u64 dma_mask)
3630{
3631 struct dmar_domain *domain;
3632 phys_addr_t start_paddr;
3633 unsigned long iova_pfn;
3634 int prot = 0;
3635 int ret;
3636 struct intel_iommu *iommu;
3637 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3638
3639 BUG_ON(dir == DMA_NONE);
3640
3641 if (iommu_no_mapping(dev))
3642 return paddr;
3643
3644 domain = get_valid_domain_for_dev(dev);
3645 if (!domain)
3646 return 0;
3647
3648 iommu = domain_get_iommu(domain);
3649 size = aligned_nrpages(paddr, size);
3650
3651 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3652 if (!iova_pfn)
3653 goto error;
3654
3655 /*
3656 * Check if DMAR supports zero-length reads on write only
3657 * mappings..
3658 */
3659 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3660 !cap_zlr(iommu->cap))
3661 prot |= DMA_PTE_READ;
3662 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3663 prot |= DMA_PTE_WRITE;
3664 /*
3665 * paddr - (paddr + size) might be partial page, we should map the whole
3666 * page. Note: if two part of one page are separately mapped, we
3667 * might have two guest_addr mapping to the same host paddr, but this
3668 * is not a big problem
3669 */
3670 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3671 mm_to_dma_pfn(paddr_pfn), size, prot);
3672 if (ret)
3673 goto error;
3674
3675 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3676 start_paddr += paddr & ~PAGE_MASK;
3677 return start_paddr;
3678
3679error:
3680 if (iova_pfn)
3681 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3682 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3683 dev_name(dev), size, (unsigned long long)paddr, dir);
3684 return 0;
3685}
3686
3687static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3688 unsigned long offset, size_t size,
3689 enum dma_data_direction dir,
3690 unsigned long attrs)
3691{
3692 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3693 dir, *dev->dma_mask);
3694}
3695
3696static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3697{
3698 struct dmar_domain *domain;
3699 unsigned long start_pfn, last_pfn;
3700 unsigned long nrpages;
3701 unsigned long iova_pfn;
3702 struct intel_iommu *iommu;
3703 struct page *freelist;
3704
3705 if (iommu_no_mapping(dev))
3706 return;
3707
3708 domain = find_domain(dev);
3709 BUG_ON(!domain);
3710
3711 iommu = domain_get_iommu(domain);
3712
3713 iova_pfn = IOVA_PFN(dev_addr);
3714
3715 nrpages = aligned_nrpages(dev_addr, size);
3716 start_pfn = mm_to_dma_pfn(iova_pfn);
3717 last_pfn = start_pfn + nrpages - 1;
3718
3719 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3720 dev_name(dev), start_pfn, last_pfn);
3721
3722 freelist = domain_unmap(domain, start_pfn, last_pfn);
3723
3724 if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) {
3725 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3726 nrpages, !freelist, 0);
3727 /* free iova */
3728 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3729 dma_free_pagelist(freelist);
3730 } else {
3731 queue_iova(&domain->iovad, iova_pfn, nrpages,
3732 (unsigned long)freelist);
3733 /*
3734 * queue up the release of the unmap to save the 1/6th of the
3735 * cpu used up by the iotlb flush operation...
3736 */
3737 }
3738}
3739
3740static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3741 size_t size, enum dma_data_direction dir,
3742 unsigned long attrs)
3743{
3744 intel_unmap(dev, dev_addr, size);
3745}
3746
3747static void *intel_alloc_coherent(struct device *dev, size_t size,
3748 dma_addr_t *dma_handle, gfp_t flags,
3749 unsigned long attrs)
3750{
3751 struct page *page = NULL;
3752 int order;
3753
3754 size = PAGE_ALIGN(size);
3755 order = get_order(size);
3756
3757 if (!iommu_no_mapping(dev))
3758 flags &= ~(GFP_DMA | GFP_DMA32);
3759 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3760 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3761 flags |= GFP_DMA;
3762 else
3763 flags |= GFP_DMA32;
3764 }
3765
3766 if (gfpflags_allow_blocking(flags)) {
3767 unsigned int count = size >> PAGE_SHIFT;
3768
3769 page = dma_alloc_from_contiguous(dev, count, order,
3770 flags & __GFP_NOWARN);
3771 if (page && iommu_no_mapping(dev) &&
3772 page_to_phys(page) + size > dev->coherent_dma_mask) {
3773 dma_release_from_contiguous(dev, page, count);
3774 page = NULL;
3775 }
3776 }
3777
3778 if (!page)
3779 page = alloc_pages(flags, order);
3780 if (!page)
3781 return NULL;
3782 memset(page_address(page), 0, size);
3783
3784 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3785 DMA_BIDIRECTIONAL,
3786 dev->coherent_dma_mask);
3787 if (*dma_handle)
3788 return page_address(page);
3789 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3790 __free_pages(page, order);
3791
3792 return NULL;
3793}
3794
3795static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3796 dma_addr_t dma_handle, unsigned long attrs)
3797{
3798 int order;
3799 struct page *page = virt_to_page(vaddr);
3800
3801 size = PAGE_ALIGN(size);
3802 order = get_order(size);
3803
3804 intel_unmap(dev, dma_handle, size);
3805 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3806 __free_pages(page, order);
3807}
3808
3809static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3810 int nelems, enum dma_data_direction dir,
3811 unsigned long attrs)
3812{
3813 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3814 unsigned long nrpages = 0;
3815 struct scatterlist *sg;
3816 int i;
3817
3818 for_each_sg(sglist, sg, nelems, i) {
3819 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3820 }
3821
3822 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3823}
3824
3825static int intel_nontranslate_map_sg(struct device *hddev,
3826 struct scatterlist *sglist, int nelems, int dir)
3827{
3828 int i;
3829 struct scatterlist *sg;
3830
3831 for_each_sg(sglist, sg, nelems, i) {
3832 BUG_ON(!sg_page(sg));
3833 sg->dma_address = sg_phys(sg);
3834 sg->dma_length = sg->length;
3835 }
3836 return nelems;
3837}
3838
3839static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3840 enum dma_data_direction dir, unsigned long attrs)
3841{
3842 int i;
3843 struct dmar_domain *domain;
3844 size_t size = 0;
3845 int prot = 0;
3846 unsigned long iova_pfn;
3847 int ret;
3848 struct scatterlist *sg;
3849 unsigned long start_vpfn;
3850 struct intel_iommu *iommu;
3851
3852 BUG_ON(dir == DMA_NONE);
3853 if (iommu_no_mapping(dev))
3854 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3855
3856 domain = get_valid_domain_for_dev(dev);
3857 if (!domain)
3858 return 0;
3859
3860 iommu = domain_get_iommu(domain);
3861
3862 for_each_sg(sglist, sg, nelems, i)
3863 size += aligned_nrpages(sg->offset, sg->length);
3864
3865 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3866 *dev->dma_mask);
3867 if (!iova_pfn) {
3868 sglist->dma_length = 0;
3869 return 0;
3870 }
3871
3872 /*
3873 * Check if DMAR supports zero-length reads on write only
3874 * mappings..
3875 */
3876 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3877 !cap_zlr(iommu->cap))
3878 prot |= DMA_PTE_READ;
3879 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3880 prot |= DMA_PTE_WRITE;
3881
3882 start_vpfn = mm_to_dma_pfn(iova_pfn);
3883
3884 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3885 if (unlikely(ret)) {
3886 dma_pte_free_pagetable(domain, start_vpfn,
3887 start_vpfn + size - 1,
3888 agaw_to_level(domain->agaw) + 1);
3889 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3890 return 0;
3891 }
3892
3893 return nelems;
3894}
3895
3896static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3897{
3898 return !dma_addr;
3899}
3900
3901const struct dma_map_ops intel_dma_ops = {
3902 .alloc = intel_alloc_coherent,
3903 .free = intel_free_coherent,
3904 .map_sg = intel_map_sg,
3905 .unmap_sg = intel_unmap_sg,
3906 .map_page = intel_map_page,
3907 .unmap_page = intel_unmap_page,
3908 .mapping_error = intel_mapping_error,
3909#ifdef CONFIG_X86
3910 .dma_supported = dma_direct_supported,
3911#endif
3912};
3913
3914static inline int iommu_domain_cache_init(void)
3915{
3916 int ret = 0;
3917
3918 iommu_domain_cache = kmem_cache_create("iommu_domain",
3919 sizeof(struct dmar_domain),
3920 0,
3921 SLAB_HWCACHE_ALIGN,
3922
3923 NULL);
3924 if (!iommu_domain_cache) {
3925 pr_err("Couldn't create iommu_domain cache\n");
3926 ret = -ENOMEM;
3927 }
3928
3929 return ret;
3930}
3931
3932static inline int iommu_devinfo_cache_init(void)
3933{
3934 int ret = 0;
3935
3936 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3937 sizeof(struct device_domain_info),
3938 0,
3939 SLAB_HWCACHE_ALIGN,
3940 NULL);
3941 if (!iommu_devinfo_cache) {
3942 pr_err("Couldn't create devinfo cache\n");
3943 ret = -ENOMEM;
3944 }
3945
3946 return ret;
3947}
3948
3949static int __init iommu_init_mempool(void)
3950{
3951 int ret;
3952 ret = iova_cache_get();
3953 if (ret)
3954 return ret;
3955
3956 ret = iommu_domain_cache_init();
3957 if (ret)
3958 goto domain_error;
3959
3960 ret = iommu_devinfo_cache_init();
3961 if (!ret)
3962 return ret;
3963
3964 kmem_cache_destroy(iommu_domain_cache);
3965domain_error:
3966 iova_cache_put();
3967
3968 return -ENOMEM;
3969}
3970
3971static void __init iommu_exit_mempool(void)
3972{
3973 kmem_cache_destroy(iommu_devinfo_cache);
3974 kmem_cache_destroy(iommu_domain_cache);
3975 iova_cache_put();
3976}
3977
3978static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3979{
3980 struct dmar_drhd_unit *drhd;
3981 u32 vtbar;
3982 int rc;
3983
3984 /* We know that this device on this chipset has its own IOMMU.
3985 * If we find it under a different IOMMU, then the BIOS is lying
3986 * to us. Hope that the IOMMU for this device is actually
3987 * disabled, and it needs no translation...
3988 */
3989 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3990 if (rc) {
3991 /* "can't" happen */
3992 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3993 return;
3994 }
3995 vtbar &= 0xffff0000;
3996
3997 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3998 drhd = dmar_find_matched_drhd_unit(pdev);
3999 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4000 TAINT_FIRMWARE_WORKAROUND,
4001 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4002 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4003}
4004DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4005
4006static void __init init_no_remapping_devices(void)
4007{
4008 struct dmar_drhd_unit *drhd;
4009 struct device *dev;
4010 int i;
4011
4012 for_each_drhd_unit(drhd) {
4013 if (!drhd->include_all) {
4014 for_each_active_dev_scope(drhd->devices,
4015 drhd->devices_cnt, i, dev)
4016 break;
4017 /* ignore DMAR unit if no devices exist */
4018 if (i == drhd->devices_cnt)
4019 drhd->ignored = 1;
4020 }
4021 }
4022
4023 for_each_active_drhd_unit(drhd) {
4024 if (drhd->include_all)
4025 continue;
4026
4027 for_each_active_dev_scope(drhd->devices,
4028 drhd->devices_cnt, i, dev)
4029 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4030 break;
4031 if (i < drhd->devices_cnt)
4032 continue;
4033
4034 /* This IOMMU has *only* gfx devices. Either bypass it or
4035 set the gfx_mapped flag, as appropriate */
4036 if (!dmar_map_gfx) {
4037 drhd->ignored = 1;
4038 for_each_active_dev_scope(drhd->devices,
4039 drhd->devices_cnt, i, dev)
4040 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4041 }
4042 }
4043}
4044
4045#ifdef CONFIG_SUSPEND
4046static int init_iommu_hw(void)
4047{
4048 struct dmar_drhd_unit *drhd;
4049 struct intel_iommu *iommu = NULL;
4050
4051 for_each_active_iommu(iommu, drhd)
4052 if (iommu->qi)
4053 dmar_reenable_qi(iommu);
4054
4055 for_each_iommu(iommu, drhd) {
4056 if (drhd->ignored) {
4057 /*
4058 * we always have to disable PMRs or DMA may fail on
4059 * this device
4060 */
4061 if (force_on)
4062 iommu_disable_protect_mem_regions(iommu);
4063 continue;
4064 }
4065
4066 iommu_flush_write_buffer(iommu);
4067
4068 iommu_set_root_entry(iommu);
4069
4070 iommu->flush.flush_context(iommu, 0, 0, 0,
4071 DMA_CCMD_GLOBAL_INVL);
4072 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4073 iommu_enable_translation(iommu);
4074 iommu_disable_protect_mem_regions(iommu);
4075 }
4076
4077 return 0;
4078}
4079
4080static void iommu_flush_all(void)
4081{
4082 struct dmar_drhd_unit *drhd;
4083 struct intel_iommu *iommu;
4084
4085 for_each_active_iommu(iommu, drhd) {
4086 iommu->flush.flush_context(iommu, 0, 0, 0,
4087 DMA_CCMD_GLOBAL_INVL);
4088 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4089 DMA_TLB_GLOBAL_FLUSH);
4090 }
4091}
4092
4093static int iommu_suspend(void)
4094{
4095 struct dmar_drhd_unit *drhd;
4096 struct intel_iommu *iommu = NULL;
4097 unsigned long flag;
4098
4099 for_each_active_iommu(iommu, drhd) {
4100 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4101 GFP_ATOMIC);
4102 if (!iommu->iommu_state)
4103 goto nomem;
4104 }
4105
4106 iommu_flush_all();
4107
4108 for_each_active_iommu(iommu, drhd) {
4109 iommu_disable_translation(iommu);
4110
4111 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4112
4113 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4114 readl(iommu->reg + DMAR_FECTL_REG);
4115 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4116 readl(iommu->reg + DMAR_FEDATA_REG);
4117 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4118 readl(iommu->reg + DMAR_FEADDR_REG);
4119 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4120 readl(iommu->reg + DMAR_FEUADDR_REG);
4121
4122 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4123 }
4124 return 0;
4125
4126nomem:
4127 for_each_active_iommu(iommu, drhd)
4128 kfree(iommu->iommu_state);
4129
4130 return -ENOMEM;
4131}
4132
4133static void iommu_resume(void)
4134{
4135 struct dmar_drhd_unit *drhd;
4136 struct intel_iommu *iommu = NULL;
4137 unsigned long flag;
4138
4139 if (init_iommu_hw()) {
4140 if (force_on)
4141 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4142 else
4143 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4144 return;
4145 }
4146
4147 for_each_active_iommu(iommu, drhd) {
4148
4149 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4150
4151 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4152 iommu->reg + DMAR_FECTL_REG);
4153 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4154 iommu->reg + DMAR_FEDATA_REG);
4155 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4156 iommu->reg + DMAR_FEADDR_REG);
4157 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4158 iommu->reg + DMAR_FEUADDR_REG);
4159
4160 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4161 }
4162
4163 for_each_active_iommu(iommu, drhd)
4164 kfree(iommu->iommu_state);
4165}
4166
4167static struct syscore_ops iommu_syscore_ops = {
4168 .resume = iommu_resume,
4169 .suspend = iommu_suspend,
4170};
4171
4172static void __init init_iommu_pm_ops(void)
4173{
4174 register_syscore_ops(&iommu_syscore_ops);
4175}
4176
4177#else
4178static inline void init_iommu_pm_ops(void) {}
4179#endif /* CONFIG_PM */
4180
4181
4182int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4183{
4184 struct acpi_dmar_reserved_memory *rmrr;
4185 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
4186 struct dmar_rmrr_unit *rmrru;
4187 size_t length;
4188
4189 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4190 if (!rmrru)
4191 goto out;
4192
4193 rmrru->hdr = header;
4194 rmrr = (struct acpi_dmar_reserved_memory *)header;
4195 rmrru->base_address = rmrr->base_address;
4196 rmrru->end_address = rmrr->end_address;
4197
4198 length = rmrr->end_address - rmrr->base_address + 1;
4199 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4200 IOMMU_RESV_DIRECT);
4201 if (!rmrru->resv)
4202 goto free_rmrru;
4203
4204 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4205 ((void *)rmrr) + rmrr->header.length,
4206 &rmrru->devices_cnt);
4207 if (rmrru->devices_cnt && rmrru->devices == NULL)
4208 goto free_all;
4209
4210 list_add(&rmrru->list, &dmar_rmrr_units);
4211
4212 return 0;
4213free_all:
4214 kfree(rmrru->resv);
4215free_rmrru:
4216 kfree(rmrru);
4217out:
4218 return -ENOMEM;
4219}
4220
4221static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4222{
4223 struct dmar_atsr_unit *atsru;
4224 struct acpi_dmar_atsr *tmp;
4225
4226 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4227 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4228 if (atsr->segment != tmp->segment)
4229 continue;
4230 if (atsr->header.length != tmp->header.length)
4231 continue;
4232 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4233 return atsru;
4234 }
4235
4236 return NULL;
4237}
4238
4239int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4240{
4241 struct acpi_dmar_atsr *atsr;
4242 struct dmar_atsr_unit *atsru;
4243
4244 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4245 return 0;
4246
4247 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4248 atsru = dmar_find_atsr(atsr);
4249 if (atsru)
4250 return 0;
4251
4252 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4253 if (!atsru)
4254 return -ENOMEM;
4255
4256 /*
4257 * If memory is allocated from slab by ACPI _DSM method, we need to
4258 * copy the memory content because the memory buffer will be freed
4259 * on return.
4260 */
4261 atsru->hdr = (void *)(atsru + 1);
4262 memcpy(atsru->hdr, hdr, hdr->length);
4263 atsru->include_all = atsr->flags & 0x1;
4264 if (!atsru->include_all) {
4265 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4266 (void *)atsr + atsr->header.length,
4267 &atsru->devices_cnt);
4268 if (atsru->devices_cnt && atsru->devices == NULL) {
4269 kfree(atsru);
4270 return -ENOMEM;
4271 }
4272 }
4273
4274 list_add_rcu(&atsru->list, &dmar_atsr_units);
4275
4276 return 0;
4277}
4278
4279static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4280{
4281 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4282 kfree(atsru);
4283}
4284
4285int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4286{
4287 struct acpi_dmar_atsr *atsr;
4288 struct dmar_atsr_unit *atsru;
4289
4290 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4291 atsru = dmar_find_atsr(atsr);
4292 if (atsru) {
4293 list_del_rcu(&atsru->list);
4294 synchronize_rcu();
4295 intel_iommu_free_atsr(atsru);
4296 }
4297
4298 return 0;
4299}
4300
4301int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4302{
4303 int i;
4304 struct device *dev;
4305 struct acpi_dmar_atsr *atsr;
4306 struct dmar_atsr_unit *atsru;
4307
4308 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4309 atsru = dmar_find_atsr(atsr);
4310 if (!atsru)
4311 return 0;
4312
4313 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4314 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4315 i, dev)
4316 return -EBUSY;
4317 }
4318
4319 return 0;
4320}
4321
4322static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4323{
4324 int sp, ret = 0;
4325 struct intel_iommu *iommu = dmaru->iommu;
4326
4327 if (g_iommus[iommu->seq_id])
4328 return 0;
4329
4330 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4331 pr_warn("%s: Doesn't support hardware pass through.\n",
4332 iommu->name);
4333 return -ENXIO;
4334 }
4335 if (!ecap_sc_support(iommu->ecap) &&
4336 domain_update_iommu_snooping(iommu)) {
4337 pr_warn("%s: Doesn't support snooping.\n",
4338 iommu->name);
4339 return -ENXIO;
4340 }
4341 sp = domain_update_iommu_superpage(iommu) - 1;
4342 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4343 pr_warn("%s: Doesn't support large page.\n",
4344 iommu->name);
4345 return -ENXIO;
4346 }
4347
4348 /*
4349 * Disable translation if already enabled prior to OS handover.
4350 */
4351 if (iommu->gcmd & DMA_GCMD_TE)
4352 iommu_disable_translation(iommu);
4353
4354 g_iommus[iommu->seq_id] = iommu;
4355 ret = iommu_init_domains(iommu);
4356 if (ret == 0)
4357 ret = iommu_alloc_root_entry(iommu);
4358 if (ret)
4359 goto out;
4360
4361#ifdef CONFIG_INTEL_IOMMU_SVM
4362 if (pasid_enabled(iommu))
4363 intel_svm_init(iommu);
4364#endif
4365
4366 if (dmaru->ignored) {
4367 /*
4368 * we always have to disable PMRs or DMA may fail on this device
4369 */
4370 if (force_on)
4371 iommu_disable_protect_mem_regions(iommu);
4372 return 0;
4373 }
4374
4375 intel_iommu_init_qi(iommu);
4376 iommu_flush_write_buffer(iommu);
4377
4378#ifdef CONFIG_INTEL_IOMMU_SVM
4379 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4380 ret = intel_svm_enable_prq(iommu);
4381 if (ret)
4382 goto disable_iommu;
4383 }
4384#endif
4385 ret = dmar_set_interrupt(iommu);
4386 if (ret)
4387 goto disable_iommu;
4388
4389 iommu_set_root_entry(iommu);
4390 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4391 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4392 iommu_enable_translation(iommu);
4393
4394 iommu_disable_protect_mem_regions(iommu);
4395 return 0;
4396
4397disable_iommu:
4398 disable_dmar_iommu(iommu);
4399out:
4400 free_dmar_iommu(iommu);
4401 return ret;
4402}
4403
4404int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4405{
4406 int ret = 0;
4407 struct intel_iommu *iommu = dmaru->iommu;
4408
4409 if (!intel_iommu_enabled)
4410 return 0;
4411 if (iommu == NULL)
4412 return -EINVAL;
4413
4414 if (insert) {
4415 ret = intel_iommu_add(dmaru);
4416 } else {
4417 disable_dmar_iommu(iommu);
4418 free_dmar_iommu(iommu);
4419 }
4420
4421 return ret;
4422}
4423
4424static void intel_iommu_free_dmars(void)
4425{
4426 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4427 struct dmar_atsr_unit *atsru, *atsr_n;
4428
4429 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4430 list_del(&rmrru->list);
4431 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4432 kfree(rmrru->resv);
4433 kfree(rmrru);
4434 }
4435
4436 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4437 list_del(&atsru->list);
4438 intel_iommu_free_atsr(atsru);
4439 }
4440}
4441
4442int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4443{
4444 int i, ret = 1;
4445 struct pci_bus *bus;
4446 struct pci_dev *bridge = NULL;
4447 struct device *tmp;
4448 struct acpi_dmar_atsr *atsr;
4449 struct dmar_atsr_unit *atsru;
4450
4451 dev = pci_physfn(dev);
4452 for (bus = dev->bus; bus; bus = bus->parent) {
4453 bridge = bus->self;
4454 /* If it's an integrated device, allow ATS */
4455 if (!bridge)
4456 return 1;
4457 /* Connected via non-PCIe: no ATS */
4458 if (!pci_is_pcie(bridge) ||
4459 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4460 return 0;
4461 /* If we found the root port, look it up in the ATSR */
4462 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4463 break;
4464 }
4465
4466 rcu_read_lock();
4467 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4468 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4469 if (atsr->segment != pci_domain_nr(dev->bus))
4470 continue;
4471
4472 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4473 if (tmp == &bridge->dev)
4474 goto out;
4475
4476 if (atsru->include_all)
4477 goto out;
4478 }
4479 ret = 0;
4480out:
4481 rcu_read_unlock();
4482
4483 return ret;
4484}
4485
4486int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4487{
4488 int ret = 0;
4489 struct dmar_rmrr_unit *rmrru;
4490 struct dmar_atsr_unit *atsru;
4491 struct acpi_dmar_atsr *atsr;
4492 struct acpi_dmar_reserved_memory *rmrr;
4493
4494 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4495 return 0;
4496
4497 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4498 rmrr = container_of(rmrru->hdr,
4499 struct acpi_dmar_reserved_memory, header);
4500 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4501 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4502 ((void *)rmrr) + rmrr->header.length,
4503 rmrr->segment, rmrru->devices,
4504 rmrru->devices_cnt);
4505 if(ret < 0)
4506 return ret;
4507 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4508 dmar_remove_dev_scope(info, rmrr->segment,
4509 rmrru->devices, rmrru->devices_cnt);
4510 }
4511 }
4512
4513 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4514 if (atsru->include_all)
4515 continue;
4516
4517 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4518 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4519 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4520 (void *)atsr + atsr->header.length,
4521 atsr->segment, atsru->devices,
4522 atsru->devices_cnt);
4523 if (ret > 0)
4524 break;
4525 else if(ret < 0)
4526 return ret;
4527 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4528 if (dmar_remove_dev_scope(info, atsr->segment,
4529 atsru->devices, atsru->devices_cnt))
4530 break;
4531 }
4532 }
4533
4534 return 0;
4535}
4536
4537/*
4538 * Here we only respond to action of unbound device from driver.
4539 *
4540 * Added device is not attached to its DMAR domain here yet. That will happen
4541 * when mapping the device to iova.
4542 */
4543static int device_notifier(struct notifier_block *nb,
4544 unsigned long action, void *data)
4545{
4546 struct device *dev = data;
4547 struct dmar_domain *domain;
4548
4549 if (iommu_dummy(dev))
4550 return 0;
4551
4552 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4553 return 0;
4554
4555 domain = find_domain(dev);
4556 if (!domain)
4557 return 0;
4558
4559 dmar_remove_one_dev_info(domain, dev);
4560 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4561 domain_exit(domain);
4562
4563 return 0;
4564}
4565
4566static struct notifier_block device_nb = {
4567 .notifier_call = device_notifier,
4568};
4569
4570static int intel_iommu_memory_notifier(struct notifier_block *nb,
4571 unsigned long val, void *v)
4572{
4573 struct memory_notify *mhp = v;
4574 unsigned long long start, end;
4575 unsigned long start_vpfn, last_vpfn;
4576
4577 switch (val) {
4578 case MEM_GOING_ONLINE:
4579 start = mhp->start_pfn << PAGE_SHIFT;
4580 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4581 if (iommu_domain_identity_map(si_domain, start, end)) {
4582 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4583 start, end);
4584 return NOTIFY_BAD;
4585 }
4586 break;
4587
4588 case MEM_OFFLINE:
4589 case MEM_CANCEL_ONLINE:
4590 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4591 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4592 while (start_vpfn <= last_vpfn) {
4593 struct iova *iova;
4594 struct dmar_drhd_unit *drhd;
4595 struct intel_iommu *iommu;
4596 struct page *freelist;
4597
4598 iova = find_iova(&si_domain->iovad, start_vpfn);
4599 if (iova == NULL) {
4600 pr_debug("Failed get IOVA for PFN %lx\n",
4601 start_vpfn);
4602 break;
4603 }
4604
4605 iova = split_and_remove_iova(&si_domain->iovad, iova,
4606 start_vpfn, last_vpfn);
4607 if (iova == NULL) {
4608 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4609 start_vpfn, last_vpfn);
4610 return NOTIFY_BAD;
4611 }
4612
4613 freelist = domain_unmap(si_domain, iova->pfn_lo,
4614 iova->pfn_hi);
4615
4616 rcu_read_lock();
4617 for_each_active_iommu(iommu, drhd)
4618 iommu_flush_iotlb_psi(iommu, si_domain,
4619 iova->pfn_lo, iova_size(iova),
4620 !freelist, 0);
4621 rcu_read_unlock();
4622 dma_free_pagelist(freelist);
4623
4624 start_vpfn = iova->pfn_hi + 1;
4625 free_iova_mem(iova);
4626 }
4627 break;
4628 }
4629
4630 return NOTIFY_OK;
4631}
4632
4633static struct notifier_block intel_iommu_memory_nb = {
4634 .notifier_call = intel_iommu_memory_notifier,
4635 .priority = 0
4636};
4637
4638static void free_all_cpu_cached_iovas(unsigned int cpu)
4639{
4640 int i;
4641
4642 for (i = 0; i < g_num_of_iommus; i++) {
4643 struct intel_iommu *iommu = g_iommus[i];
4644 struct dmar_domain *domain;
4645 int did;
4646
4647 if (!iommu)
4648 continue;
4649
4650 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4651 domain = get_iommu_domain(iommu, (u16)did);
4652
4653 if (!domain)
4654 continue;
4655 free_cpu_cached_iovas(cpu, &domain->iovad);
4656 }
4657 }
4658}
4659
4660static int intel_iommu_cpu_dead(unsigned int cpu)
4661{
4662 free_all_cpu_cached_iovas(cpu);
4663 return 0;
4664}
4665
4666static void intel_disable_iommus(void)
4667{
4668 struct intel_iommu *iommu = NULL;
4669 struct dmar_drhd_unit *drhd;
4670
4671 for_each_iommu(iommu, drhd)
4672 iommu_disable_translation(iommu);
4673}
4674
4675static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4676{
4677 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4678
4679 return container_of(iommu_dev, struct intel_iommu, iommu);
4680}
4681
4682static ssize_t intel_iommu_show_version(struct device *dev,
4683 struct device_attribute *attr,
4684 char *buf)
4685{
4686 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4687 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4688 return sprintf(buf, "%d:%d\n",
4689 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4690}
4691static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4692
4693static ssize_t intel_iommu_show_address(struct device *dev,
4694 struct device_attribute *attr,
4695 char *buf)
4696{
4697 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4698 return sprintf(buf, "%llx\n", iommu->reg_phys);
4699}
4700static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4701
4702static ssize_t intel_iommu_show_cap(struct device *dev,
4703 struct device_attribute *attr,
4704 char *buf)
4705{
4706 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4707 return sprintf(buf, "%llx\n", iommu->cap);
4708}
4709static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4710
4711static ssize_t intel_iommu_show_ecap(struct device *dev,
4712 struct device_attribute *attr,
4713 char *buf)
4714{
4715 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4716 return sprintf(buf, "%llx\n", iommu->ecap);
4717}
4718static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4719
4720static ssize_t intel_iommu_show_ndoms(struct device *dev,
4721 struct device_attribute *attr,
4722 char *buf)
4723{
4724 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4725 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4726}
4727static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4728
4729static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4730 struct device_attribute *attr,
4731 char *buf)
4732{
4733 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4734 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4735 cap_ndoms(iommu->cap)));
4736}
4737static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4738
4739static struct attribute *intel_iommu_attrs[] = {
4740 &dev_attr_version.attr,
4741 &dev_attr_address.attr,
4742 &dev_attr_cap.attr,
4743 &dev_attr_ecap.attr,
4744 &dev_attr_domains_supported.attr,
4745 &dev_attr_domains_used.attr,
4746 NULL,
4747};
4748
4749static struct attribute_group intel_iommu_group = {
4750 .name = "intel-iommu",
4751 .attrs = intel_iommu_attrs,
4752};
4753
4754const struct attribute_group *intel_iommu_groups[] = {
4755 &intel_iommu_group,
4756 NULL,
4757};
4758
4759int __init intel_iommu_init(void)
4760{
4761 int ret = -ENODEV;
4762 struct dmar_drhd_unit *drhd;
4763 struct intel_iommu *iommu;
4764
4765 /* VT-d is required for a TXT/tboot launch, so enforce that */
4766 force_on = tboot_force_iommu();
4767
4768 if (iommu_init_mempool()) {
4769 if (force_on)
4770 panic("tboot: Failed to initialize iommu memory\n");
4771 return -ENOMEM;
4772 }
4773
4774 down_write(&dmar_global_lock);
4775 if (dmar_table_init()) {
4776 if (force_on)
4777 panic("tboot: Failed to initialize DMAR table\n");
4778 goto out_free_dmar;
4779 }
4780
4781 if (dmar_dev_scope_init() < 0) {
4782 if (force_on)
4783 panic("tboot: Failed to initialize DMAR device scope\n");
4784 goto out_free_dmar;
4785 }
4786
4787 up_write(&dmar_global_lock);
4788
4789 /*
4790 * The bus notifier takes the dmar_global_lock, so lockdep will
4791 * complain later when we register it under the lock.
4792 */
4793 dmar_register_bus_notifier();
4794
4795 down_write(&dmar_global_lock);
4796
4797 if (no_iommu || dmar_disabled) {
4798 /*
4799 * We exit the function here to ensure IOMMU's remapping and
4800 * mempool aren't setup, which means that the IOMMU's PMRs
4801 * won't be disabled via the call to init_dmars(). So disable
4802 * it explicitly here. The PMRs were setup by tboot prior to
4803 * calling SENTER, but the kernel is expected to reset/tear
4804 * down the PMRs.
4805 */
4806 if (intel_iommu_tboot_noforce) {
4807 for_each_iommu(iommu, drhd)
4808 iommu_disable_protect_mem_regions(iommu);
4809 }
4810
4811 /*
4812 * Make sure the IOMMUs are switched off, even when we
4813 * boot into a kexec kernel and the previous kernel left
4814 * them enabled
4815 */
4816 intel_disable_iommus();
4817 goto out_free_dmar;
4818 }
4819
4820 if (list_empty(&dmar_rmrr_units))
4821 pr_info("No RMRR found\n");
4822
4823 if (list_empty(&dmar_atsr_units))
4824 pr_info("No ATSR found\n");
4825
4826 if (dmar_init_reserved_ranges()) {
4827 if (force_on)
4828 panic("tboot: Failed to reserve iommu ranges\n");
4829 goto out_free_reserved_range;
4830 }
4831
4832 if (dmar_map_gfx)
4833 intel_iommu_gfx_mapped = 1;
4834
4835 init_no_remapping_devices();
4836
4837 ret = init_dmars();
4838 if (ret) {
4839 if (force_on)
4840 panic("tboot: Failed to initialize DMARs\n");
4841 pr_err("Initialization failed\n");
4842 goto out_free_reserved_range;
4843 }
4844 up_write(&dmar_global_lock);
4845 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4846
4847#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4848 swiotlb = 0;
4849#endif
4850 dma_ops = &intel_dma_ops;
4851
4852 init_iommu_pm_ops();
4853
4854 for_each_active_iommu(iommu, drhd) {
4855 iommu_device_sysfs_add(&iommu->iommu, NULL,
4856 intel_iommu_groups,
4857 "%s", iommu->name);
4858 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4859 iommu_device_register(&iommu->iommu);
4860 }
4861
4862 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4863 bus_register_notifier(&pci_bus_type, &device_nb);
4864 if (si_domain && !hw_pass_through)
4865 register_memory_notifier(&intel_iommu_memory_nb);
4866 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4867 intel_iommu_cpu_dead);
4868 intel_iommu_enabled = 1;
4869
4870 return 0;
4871
4872out_free_reserved_range:
4873 put_iova_domain(&reserved_iova_list);
4874out_free_dmar:
4875 intel_iommu_free_dmars();
4876 up_write(&dmar_global_lock);
4877 iommu_exit_mempool();
4878 return ret;
4879}
4880
4881static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4882{
4883 struct intel_iommu *iommu = opaque;
4884
4885 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4886 return 0;
4887}
4888
4889/*
4890 * NB - intel-iommu lacks any sort of reference counting for the users of
4891 * dependent devices. If multiple endpoints have intersecting dependent
4892 * devices, unbinding the driver from any one of them will possibly leave
4893 * the others unable to operate.
4894 */
4895static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4896{
4897 if (!iommu || !dev || !dev_is_pci(dev))
4898 return;
4899
4900 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4901}
4902
4903static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4904{
4905 struct intel_iommu *iommu;
4906 unsigned long flags;
4907
4908 assert_spin_locked(&device_domain_lock);
4909
4910 if (WARN_ON(!info))
4911 return;
4912
4913 iommu = info->iommu;
4914
4915 if (info->dev) {
4916 iommu_disable_dev_iotlb(info);
4917 domain_context_clear(iommu, info->dev);
4918 intel_pasid_free_table(info->dev);
4919 }
4920
4921 unlink_domain_info(info);
4922
4923 spin_lock_irqsave(&iommu->lock, flags);
4924 domain_detach_iommu(info->domain, iommu);
4925 spin_unlock_irqrestore(&iommu->lock, flags);
4926
4927 free_devinfo_mem(info);
4928}
4929
4930static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4931 struct device *dev)
4932{
4933 struct device_domain_info *info;
4934 unsigned long flags;
4935
4936 spin_lock_irqsave(&device_domain_lock, flags);
4937 info = dev->archdata.iommu;
4938 __dmar_remove_one_dev_info(info);
4939 spin_unlock_irqrestore(&device_domain_lock, flags);
4940}
4941
4942static int md_domain_init(struct dmar_domain *domain, int guest_width)
4943{
4944 int adjust_width;
4945
4946 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4947 domain_reserve_special_ranges(domain);
4948
4949 /* calculate AGAW */
4950 domain->gaw = guest_width;
4951 adjust_width = guestwidth_to_adjustwidth(guest_width);
4952 domain->agaw = width_to_agaw(adjust_width);
4953
4954 domain->iommu_coherency = 0;
4955 domain->iommu_snooping = 0;
4956 domain->iommu_superpage = 0;
4957 domain->max_addr = 0;
4958
4959 /* always allocate the top pgd */
4960 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4961 if (!domain->pgd)
4962 return -ENOMEM;
4963 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4964 return 0;
4965}
4966
4967static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4968{
4969 struct dmar_domain *dmar_domain;
4970 struct iommu_domain *domain;
4971
4972 if (type != IOMMU_DOMAIN_UNMANAGED)
4973 return NULL;
4974
4975 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4976 if (!dmar_domain) {
4977 pr_err("Can't allocate dmar_domain\n");
4978 return NULL;
4979 }
4980 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4981 pr_err("Domain initialization failed\n");
4982 domain_exit(dmar_domain);
4983 return NULL;
4984 }
4985 domain_update_iommu_cap(dmar_domain);
4986
4987 domain = &dmar_domain->domain;
4988 domain->geometry.aperture_start = 0;
4989 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4990 domain->geometry.force_aperture = true;
4991
4992 return domain;
4993}
4994
4995static void intel_iommu_domain_free(struct iommu_domain *domain)
4996{
4997 domain_exit(to_dmar_domain(domain));
4998}
4999
5000static int intel_iommu_attach_device(struct iommu_domain *domain,
5001 struct device *dev)
5002{
5003 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5004 struct intel_iommu *iommu;
5005 int addr_width;
5006 u8 bus, devfn;
5007
5008 if (device_is_rmrr_locked(dev)) {
5009 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5010 return -EPERM;
5011 }
5012
5013 /* normally dev is not mapped */
5014 if (unlikely(domain_context_mapped(dev))) {
5015 struct dmar_domain *old_domain;
5016
5017 old_domain = find_domain(dev);
5018 if (old_domain) {
5019 rcu_read_lock();
5020 dmar_remove_one_dev_info(old_domain, dev);
5021 rcu_read_unlock();
5022
5023 if (!domain_type_is_vm_or_si(old_domain) &&
5024 list_empty(&old_domain->devices))
5025 domain_exit(old_domain);
5026 }
5027 }
5028
5029 iommu = device_to_iommu(dev, &bus, &devfn);
5030 if (!iommu)
5031 return -ENODEV;
5032
5033 /* check if this iommu agaw is sufficient for max mapped address */
5034 addr_width = agaw_to_width(iommu->agaw);
5035 if (addr_width > cap_mgaw(iommu->cap))
5036 addr_width = cap_mgaw(iommu->cap);
5037
5038 if (dmar_domain->max_addr > (1LL << addr_width)) {
5039 pr_err("%s: iommu width (%d) is not "
5040 "sufficient for the mapped address (%llx)\n",
5041 __func__, addr_width, dmar_domain->max_addr);
5042 return -EFAULT;
5043 }
5044 dmar_domain->gaw = addr_width;
5045
5046 /*
5047 * Knock out extra levels of page tables if necessary
5048 */
5049 while (iommu->agaw < dmar_domain->agaw) {
5050 struct dma_pte *pte;
5051
5052 pte = dmar_domain->pgd;
5053 if (dma_pte_present(pte)) {
5054 dmar_domain->pgd = (struct dma_pte *)
5055 phys_to_virt(dma_pte_addr(pte));
5056 free_pgtable_page(pte);
5057 }
5058 dmar_domain->agaw--;
5059 }
5060
5061 return domain_add_dev_info(dmar_domain, dev);
5062}
5063
5064static void intel_iommu_detach_device(struct iommu_domain *domain,
5065 struct device *dev)
5066{
5067 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
5068}
5069
5070static int intel_iommu_map(struct iommu_domain *domain,
5071 unsigned long iova, phys_addr_t hpa,
5072 size_t size, int iommu_prot)
5073{
5074 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5075 u64 max_addr;
5076 int prot = 0;
5077 int ret;
5078
5079 if (iommu_prot & IOMMU_READ)
5080 prot |= DMA_PTE_READ;
5081 if (iommu_prot & IOMMU_WRITE)
5082 prot |= DMA_PTE_WRITE;
5083 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5084 prot |= DMA_PTE_SNP;
5085
5086 max_addr = iova + size;
5087 if (dmar_domain->max_addr < max_addr) {
5088 u64 end;
5089
5090 /* check if minimum agaw is sufficient for mapped address */
5091 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5092 if (end < max_addr) {
5093 pr_err("%s: iommu width (%d) is not "
5094 "sufficient for the mapped address (%llx)\n",
5095 __func__, dmar_domain->gaw, max_addr);
5096 return -EFAULT;
5097 }
5098 dmar_domain->max_addr = max_addr;
5099 }
5100 /* Round up size to next multiple of PAGE_SIZE, if it and
5101 the low bits of hpa would take us onto the next page */
5102 size = aligned_nrpages(hpa, size);
5103 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5104 hpa >> VTD_PAGE_SHIFT, size, prot);
5105 return ret;
5106}
5107
5108static size_t intel_iommu_unmap(struct iommu_domain *domain,
5109 unsigned long iova, size_t size)
5110{
5111 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5112 struct page *freelist = NULL;
5113 unsigned long start_pfn, last_pfn;
5114 unsigned int npages;
5115 int iommu_id, level = 0;
5116
5117 /* Cope with horrid API which requires us to unmap more than the
5118 size argument if it happens to be a large-page mapping. */
5119 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5120
5121 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5122 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5123
5124 start_pfn = iova >> VTD_PAGE_SHIFT;
5125 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5126
5127 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5128
5129 npages = last_pfn - start_pfn + 1;
5130
5131 for_each_domain_iommu(iommu_id, dmar_domain)
5132 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5133 start_pfn, npages, !freelist, 0);
5134
5135 dma_free_pagelist(freelist);
5136
5137 if (dmar_domain->max_addr == iova + size)
5138 dmar_domain->max_addr = iova;
5139
5140 return size;
5141}
5142
5143static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5144 dma_addr_t iova)
5145{
5146 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5147 struct dma_pte *pte;
5148 int level = 0;
5149 u64 phys = 0;
5150
5151 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5152 if (pte)
5153 phys = dma_pte_addr(pte);
5154
5155 return phys;
5156}
5157
5158static bool intel_iommu_capable(enum iommu_cap cap)
5159{
5160 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5161 return domain_update_iommu_snooping(NULL) == 1;
5162 if (cap == IOMMU_CAP_INTR_REMAP)
5163 return irq_remapping_enabled == 1;
5164
5165 return false;
5166}
5167
5168static int intel_iommu_add_device(struct device *dev)
5169{
5170 struct intel_iommu *iommu;
5171 struct iommu_group *group;
5172 u8 bus, devfn;
5173
5174 iommu = device_to_iommu(dev, &bus, &devfn);
5175 if (!iommu)
5176 return -ENODEV;
5177
5178 iommu_device_link(&iommu->iommu, dev);
5179
5180 group = iommu_group_get_for_dev(dev);
5181
5182 if (IS_ERR(group))
5183 return PTR_ERR(group);
5184
5185 iommu_group_put(group);
5186 return 0;
5187}
5188
5189static void intel_iommu_remove_device(struct device *dev)
5190{
5191 struct intel_iommu *iommu;
5192 u8 bus, devfn;
5193
5194 iommu = device_to_iommu(dev, &bus, &devfn);
5195 if (!iommu)
5196 return;
5197
5198 iommu_group_remove_device(dev);
5199
5200 iommu_device_unlink(&iommu->iommu, dev);
5201}
5202
5203static void intel_iommu_get_resv_regions(struct device *device,
5204 struct list_head *head)
5205{
5206 struct iommu_resv_region *reg;
5207 struct dmar_rmrr_unit *rmrr;
5208 struct device *i_dev;
5209 int i;
5210
5211 rcu_read_lock();
5212 for_each_rmrr_units(rmrr) {
5213 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5214 i, i_dev) {
5215 if (i_dev != device)
5216 continue;
5217
5218 list_add_tail(&rmrr->resv->list, head);
5219 }
5220 }
5221 rcu_read_unlock();
5222
5223 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5224 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5225 0, IOMMU_RESV_MSI);
5226 if (!reg)
5227 return;
5228 list_add_tail(&reg->list, head);
5229}
5230
5231static void intel_iommu_put_resv_regions(struct device *dev,
5232 struct list_head *head)
5233{
5234 struct iommu_resv_region *entry, *next;
5235
5236 list_for_each_entry_safe(entry, next, head, list) {
5237 if (entry->type == IOMMU_RESV_MSI)
5238 kfree(entry);
5239 }
5240}
5241
5242#ifdef CONFIG_INTEL_IOMMU_SVM
5243#define MAX_NR_PASID_BITS (20)
5244static inline unsigned long intel_iommu_get_pts(struct device *dev)
5245{
5246 int pts, max_pasid;
5247
5248 max_pasid = intel_pasid_get_dev_max_id(dev);
5249 pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
5250 if (pts < 5)
5251 return 0;
5252
5253 return pts - 5;
5254}
5255
5256int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5257{
5258 struct device_domain_info *info;
5259 struct context_entry *context;
5260 struct dmar_domain *domain;
5261 unsigned long flags;
5262 u64 ctx_lo;
5263 int ret;
5264
5265 domain = get_valid_domain_for_dev(sdev->dev);
5266 if (!domain)
5267 return -EINVAL;
5268
5269 spin_lock_irqsave(&device_domain_lock, flags);
5270 spin_lock(&iommu->lock);
5271
5272 ret = -EINVAL;
5273 info = sdev->dev->archdata.iommu;
5274 if (!info || !info->pasid_supported)
5275 goto out;
5276
5277 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5278 if (WARN_ON(!context))
5279 goto out;
5280
5281 ctx_lo = context[0].lo;
5282
5283 sdev->did = domain->iommu_did[iommu->seq_id];
5284 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5285
5286 if (!(ctx_lo & CONTEXT_PASIDE)) {
5287 if (iommu->pasid_state_table)
5288 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5289 context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
5290 intel_iommu_get_pts(sdev->dev);
5291
5292 wmb();
5293 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5294 * extended to permit requests-with-PASID if the PASIDE bit
5295 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5296 * however, the PASIDE bit is ignored and requests-with-PASID
5297 * are unconditionally blocked. Which makes less sense.
5298 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5299 * "guest mode" translation types depending on whether ATS
5300 * is available or not. Annoyingly, we can't use the new
5301 * modes *unless* PASIDE is set. */
5302 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5303 ctx_lo &= ~CONTEXT_TT_MASK;
5304 if (info->ats_supported)
5305 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5306 else
5307 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5308 }
5309 ctx_lo |= CONTEXT_PASIDE;
5310 if (iommu->pasid_state_table)
5311 ctx_lo |= CONTEXT_DINVE;
5312 if (info->pri_supported)
5313 ctx_lo |= CONTEXT_PRS;
5314 context[0].lo = ctx_lo;
5315 wmb();
5316 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5317 DMA_CCMD_MASK_NOBIT,
5318 DMA_CCMD_DEVICE_INVL);
5319 }
5320
5321 /* Enable PASID support in the device, if it wasn't already */
5322 if (!info->pasid_enabled)
5323 iommu_enable_dev_iotlb(info);
5324
5325 if (info->ats_enabled) {
5326 sdev->dev_iotlb = 1;
5327 sdev->qdep = info->ats_qdep;
5328 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5329 sdev->qdep = 0;
5330 }
5331 ret = 0;
5332
5333 out:
5334 spin_unlock(&iommu->lock);
5335 spin_unlock_irqrestore(&device_domain_lock, flags);
5336
5337 return ret;
5338}
5339
5340struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5341{
5342 struct intel_iommu *iommu;
5343 u8 bus, devfn;
5344
5345 if (iommu_dummy(dev)) {
5346 dev_warn(dev,
5347 "No IOMMU translation for device; cannot enable SVM\n");
5348 return NULL;
5349 }
5350
5351 iommu = device_to_iommu(dev, &bus, &devfn);
5352 if ((!iommu)) {
5353 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5354 return NULL;
5355 }
5356
5357 return iommu;
5358}
5359#endif /* CONFIG_INTEL_IOMMU_SVM */
5360
5361const struct iommu_ops intel_iommu_ops = {
5362 .capable = intel_iommu_capable,
5363 .domain_alloc = intel_iommu_domain_alloc,
5364 .domain_free = intel_iommu_domain_free,
5365 .attach_dev = intel_iommu_attach_device,
5366 .detach_dev = intel_iommu_detach_device,
5367 .map = intel_iommu_map,
5368 .unmap = intel_iommu_unmap,
5369 .iova_to_phys = intel_iommu_iova_to_phys,
5370 .add_device = intel_iommu_add_device,
5371 .remove_device = intel_iommu_remove_device,
5372 .get_resv_regions = intel_iommu_get_resv_regions,
5373 .put_resv_regions = intel_iommu_put_resv_regions,
5374 .device_group = pci_device_group,
5375 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5376};
5377
5378static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5379{
5380 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5381 pr_info("Disabling IOMMU for graphics on this chipset\n");
5382 dmar_map_gfx = 0;
5383}
5384
5385DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5386DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5387DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5388DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5389DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5390DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5391DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5392
5393static void quirk_iommu_rwbf(struct pci_dev *dev)
5394{
5395 /*
5396 * Mobile 4 Series Chipset neglects to set RWBF capability,
5397 * but needs it. Same seems to hold for the desktop versions.
5398 */
5399 pr_info("Forcing write-buffer flush capability\n");
5400 rwbf_quirk = 1;
5401}
5402
5403DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5404DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5405DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5406DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5407DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5408DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5409DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5410
5411#define GGC 0x52
5412#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5413#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5414#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5415#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5416#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5417#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5418#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5419#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5420
5421static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5422{
5423 unsigned short ggc;
5424
5425 if (pci_read_config_word(dev, GGC, &ggc))
5426 return;
5427
5428 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5429 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5430 dmar_map_gfx = 0;
5431 } else if (dmar_map_gfx) {
5432 /* we have to ensure the gfx device is idle before we flush */
5433 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5434 intel_iommu_strict = 1;
5435 }
5436}
5437DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5438DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5439DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5440DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5441
5442/* On Tylersburg chipsets, some BIOSes have been known to enable the
5443 ISOCH DMAR unit for the Azalia sound device, but not give it any
5444 TLB entries, which causes it to deadlock. Check for that. We do
5445 this in a function called from init_dmars(), instead of in a PCI
5446 quirk, because we don't want to print the obnoxious "BIOS broken"
5447 message if VT-d is actually disabled.
5448*/
5449static void __init check_tylersburg_isoch(void)
5450{
5451 struct pci_dev *pdev;
5452 uint32_t vtisochctrl;
5453
5454 /* If there's no Azalia in the system anyway, forget it. */
5455 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5456 if (!pdev)
5457 return;
5458 pci_dev_put(pdev);
5459
5460 /* System Management Registers. Might be hidden, in which case
5461 we can't do the sanity check. But that's OK, because the
5462 known-broken BIOSes _don't_ actually hide it, so far. */
5463 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5464 if (!pdev)
5465 return;
5466
5467 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5468 pci_dev_put(pdev);
5469 return;
5470 }
5471
5472 pci_dev_put(pdev);
5473
5474 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5475 if (vtisochctrl & 1)
5476 return;
5477
5478 /* Drop all bits other than the number of TLB entries */
5479 vtisochctrl &= 0x1c;
5480
5481 /* If we have the recommended number of TLB entries (16), fine. */
5482 if (vtisochctrl == 0x10)
5483 return;
5484
5485 /* Zero TLB entries? You get to ride the short bus to school. */
5486 if (!vtisochctrl) {
5487 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5488 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5489 dmi_get_system_info(DMI_BIOS_VENDOR),
5490 dmi_get_system_info(DMI_BIOS_VERSION),
5491 dmi_get_system_info(DMI_PRODUCT_VERSION));
5492 iommu_identity_mapping |= IDENTMAP_AZALIA;
5493 return;
5494 }
5495
5496 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5497 vtisochctrl);
5498}