blob: d662895690162823476ddbeb16e747bbe88e22d4 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * IOMMU API for ARM architected SMMU implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Copyright (C) 2013 ARM Limited
18 *
19 * Author: Will Deacon <will.deacon@arm.com>
20 *
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
28 */
29
30#define pr_fmt(fmt) "arm-smmu: " fmt
31
32#include <linux/acpi.h>
33#include <linux/acpi_iort.h>
34#include <linux/atomic.h>
35#include <linux/delay.h>
36#include <linux/dma-iommu.h>
37#include <linux/dma-mapping.h>
38#include <linux/err.h>
39#include <linux/interrupt.h>
40#include <linux/io.h>
41#include <linux/io-64-nonatomic-hi-lo.h>
42#include <linux/io-pgtable.h>
43#include <linux/iommu.h>
44#include <linux/iopoll.h>
45#include <linux/module.h>
46#include <linux/of.h>
47#include <linux/of_address.h>
48#include <linux/of_device.h>
49#include <linux/of_iommu.h>
50#include <linux/pci.h>
51#include <linux/platform_device.h>
52#include <linux/slab.h>
53#include <linux/spinlock.h>
54
55#include <linux/amba/bus.h>
56
57#include "arm-smmu-regs.h"
58
59/*
60 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61 * global register space are still, in fact, using a hypervisor to mediate it
62 * by trapping and emulating register accesses. Sadly, some deployed versions
63 * of said trapping code have bugs wherein they go horribly wrong for stores
64 * using r31 (i.e. XZR/WZR) as the source register.
65 */
66#define QCOM_DUMMY_VAL -1
67
68#define ARM_MMU500_ACTLR_CPRE (1 << 1)
69
70#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
71#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
72#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
73
74#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
75#define TLB_SPIN_COUNT 10
76
77/* Maximum number of context banks per SMMU */
78#define ARM_SMMU_MAX_CBS 128
79
80/* SMMU global address space */
81#define ARM_SMMU_GR0(smmu) ((smmu)->base)
82#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
83
84/*
85 * SMMU global address space with conditional offset to access secure
86 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
87 * nsGFSYNR0: 0x450)
88 */
89#define ARM_SMMU_GR0_NS(smmu) \
90 ((smmu)->base + \
91 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
92 ? 0x400 : 0))
93
94/*
95 * Some 64-bit registers only make sense to write atomically, but in such
96 * cases all the data relevant to AArch32 formats lies within the lower word,
97 * therefore this actually makes more sense than it might first appear.
98 */
99#ifdef CONFIG_64BIT
100#define smmu_write_atomic_lq writeq_relaxed
101#else
102#define smmu_write_atomic_lq writel_relaxed
103#endif
104
105/* Translation context bank */
106#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
107
108#define MSI_IOVA_BASE 0x8000000
109#define MSI_IOVA_LENGTH 0x100000
110
111static int force_stage;
112module_param(force_stage, int, S_IRUGO);
113MODULE_PARM_DESC(force_stage,
114 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
115static bool disable_bypass;
116module_param(disable_bypass, bool, S_IRUGO);
117MODULE_PARM_DESC(disable_bypass,
118 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
119
120enum arm_smmu_arch_version {
121 ARM_SMMU_V1,
122 ARM_SMMU_V1_64K,
123 ARM_SMMU_V2,
124};
125
126enum arm_smmu_implementation {
127 GENERIC_SMMU,
128 ARM_MMU500,
129 CAVIUM_SMMUV2,
130 QCOM_SMMUV2,
131};
132
133struct arm_smmu_s2cr {
134 struct iommu_group *group;
135 int count;
136 enum arm_smmu_s2cr_type type;
137 enum arm_smmu_s2cr_privcfg privcfg;
138 u8 cbndx;
139};
140
141#define s2cr_init_val (struct arm_smmu_s2cr){ \
142 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
143}
144
145struct arm_smmu_smr {
146 u16 mask;
147 u16 id;
148 bool valid;
149};
150
151struct arm_smmu_cb {
152 u64 ttbr[2];
153 u32 tcr[2];
154 u32 mair[2];
155 struct arm_smmu_cfg *cfg;
156};
157
158struct arm_smmu_master_cfg {
159 struct arm_smmu_device *smmu;
160 s16 smendx[];
161};
162#define INVALID_SMENDX -1
163#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
164#define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
165#define fwspec_smendx(fw, i) \
166 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
167#define for_each_cfg_sme(fw, i, idx) \
168 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
169
170struct arm_smmu_device {
171 struct device *dev;
172
173 void __iomem *base;
174 void __iomem *cb_base;
175 unsigned long pgshift;
176
177#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
178#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
179#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
180#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
181#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
182#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
183#define ARM_SMMU_FEAT_VMID16 (1 << 6)
184#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
185#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
186#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
187#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
188#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
189#define ARM_SMMU_FEAT_EXIDS (1 << 12)
190 u32 features;
191
192#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
193 u32 options;
194 enum arm_smmu_arch_version version;
195 enum arm_smmu_implementation model;
196
197 u32 num_context_banks;
198 u32 num_s2_context_banks;
199 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
200 struct arm_smmu_cb *cbs;
201 atomic_t irptndx;
202
203 u32 num_mapping_groups;
204 u16 streamid_mask;
205 u16 smr_mask_mask;
206 struct arm_smmu_smr *smrs;
207 struct arm_smmu_s2cr *s2crs;
208 struct mutex stream_map_mutex;
209
210 unsigned long va_size;
211 unsigned long ipa_size;
212 unsigned long pa_size;
213 unsigned long pgsize_bitmap;
214
215 u32 num_global_irqs;
216 u32 num_context_irqs;
217 unsigned int *irqs;
218
219 u32 cavium_id_base; /* Specific to Cavium */
220
221 spinlock_t global_sync_lock;
222
223 /* IOMMU core code handle */
224 struct iommu_device iommu;
225};
226
227enum arm_smmu_context_fmt {
228 ARM_SMMU_CTX_FMT_NONE,
229 ARM_SMMU_CTX_FMT_AARCH64,
230 ARM_SMMU_CTX_FMT_AARCH32_L,
231 ARM_SMMU_CTX_FMT_AARCH32_S,
232};
233
234struct arm_smmu_cfg {
235 u8 cbndx;
236 u8 irptndx;
237 union {
238 u16 asid;
239 u16 vmid;
240 };
241 u32 cbar;
242 enum arm_smmu_context_fmt fmt;
243};
244#define INVALID_IRPTNDX 0xff
245
246enum arm_smmu_domain_stage {
247 ARM_SMMU_DOMAIN_S1 = 0,
248 ARM_SMMU_DOMAIN_S2,
249 ARM_SMMU_DOMAIN_NESTED,
250 ARM_SMMU_DOMAIN_BYPASS,
251};
252
253struct arm_smmu_domain {
254 struct arm_smmu_device *smmu;
255 struct io_pgtable_ops *pgtbl_ops;
256 const struct iommu_gather_ops *tlb_ops;
257 struct arm_smmu_cfg cfg;
258 enum arm_smmu_domain_stage stage;
259 struct mutex init_mutex; /* Protects smmu pointer */
260 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
261 struct iommu_domain domain;
262};
263
264struct arm_smmu_option_prop {
265 u32 opt;
266 const char *prop;
267};
268
269static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
270
271static bool using_legacy_binding, using_generic_binding;
272
273static struct arm_smmu_option_prop arm_smmu_options[] = {
274 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
275 { 0, NULL},
276};
277
278static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
279{
280 return container_of(dom, struct arm_smmu_domain, domain);
281}
282
283static void parse_driver_options(struct arm_smmu_device *smmu)
284{
285 int i = 0;
286
287 do {
288 if (of_property_read_bool(smmu->dev->of_node,
289 arm_smmu_options[i].prop)) {
290 smmu->options |= arm_smmu_options[i].opt;
291 dev_notice(smmu->dev, "option %s\n",
292 arm_smmu_options[i].prop);
293 }
294 } while (arm_smmu_options[++i].opt);
295}
296
297static struct device_node *dev_get_dev_node(struct device *dev)
298{
299 if (dev_is_pci(dev)) {
300 struct pci_bus *bus = to_pci_dev(dev)->bus;
301
302 while (!pci_is_root_bus(bus))
303 bus = bus->parent;
304 return of_node_get(bus->bridge->parent->of_node);
305 }
306
307 return of_node_get(dev->of_node);
308}
309
310static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
311{
312 *((__be32 *)data) = cpu_to_be32(alias);
313 return 0; /* Continue walking */
314}
315
316static int __find_legacy_master_phandle(struct device *dev, void *data)
317{
318 struct of_phandle_iterator *it = *(void **)data;
319 struct device_node *np = it->node;
320 int err;
321
322 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
323 "#stream-id-cells", 0)
324 if (it->node == np) {
325 *(void **)data = dev;
326 return 1;
327 }
328 it->node = np;
329 return err == -ENOENT ? 0 : err;
330}
331
332static struct platform_driver arm_smmu_driver;
333static struct iommu_ops arm_smmu_ops;
334
335static int arm_smmu_register_legacy_master(struct device *dev,
336 struct arm_smmu_device **smmu)
337{
338 struct device *smmu_dev;
339 struct device_node *np;
340 struct of_phandle_iterator it;
341 void *data = &it;
342 u32 *sids;
343 __be32 pci_sid;
344 int err;
345
346 np = dev_get_dev_node(dev);
347 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
348 of_node_put(np);
349 return -ENODEV;
350 }
351
352 it.node = np;
353 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
354 __find_legacy_master_phandle);
355 smmu_dev = data;
356 of_node_put(np);
357 if (err == 0)
358 return -ENODEV;
359 if (err < 0)
360 return err;
361
362 if (dev_is_pci(dev)) {
363 /* "mmu-masters" assumes Stream ID == Requester ID */
364 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
365 &pci_sid);
366 it.cur = &pci_sid;
367 it.cur_count = 1;
368 }
369
370 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
371 &arm_smmu_ops);
372 if (err)
373 return err;
374
375 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
376 if (!sids)
377 return -ENOMEM;
378
379 *smmu = dev_get_drvdata(smmu_dev);
380 of_phandle_iterator_args(&it, sids, it.cur_count);
381 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
382 kfree(sids);
383 return err;
384}
385
386static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
387{
388 int idx;
389
390 do {
391 idx = find_next_zero_bit(map, end, start);
392 if (idx == end)
393 return -ENOSPC;
394 } while (test_and_set_bit(idx, map));
395
396 return idx;
397}
398
399static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
400{
401 clear_bit(idx, map);
402}
403
404/* Wait for any pending TLB invalidations to complete */
405static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
406 void __iomem *sync, void __iomem *status)
407{
408 unsigned int spin_cnt, delay;
409
410 writel_relaxed(QCOM_DUMMY_VAL, sync);
411 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
412 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
413 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
414 return;
415 cpu_relax();
416 }
417 udelay(delay);
418 }
419 dev_err_ratelimited(smmu->dev,
420 "TLB sync timed out -- SMMU may be deadlocked\n");
421}
422
423static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
424{
425 void __iomem *base = ARM_SMMU_GR0(smmu);
426 unsigned long flags;
427
428 spin_lock_irqsave(&smmu->global_sync_lock, flags);
429 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
430 base + ARM_SMMU_GR0_sTLBGSTATUS);
431 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
432}
433
434static void arm_smmu_tlb_sync_context(void *cookie)
435{
436 struct arm_smmu_domain *smmu_domain = cookie;
437 struct arm_smmu_device *smmu = smmu_domain->smmu;
438 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
439 unsigned long flags;
440
441 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
442 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
443 base + ARM_SMMU_CB_TLBSTATUS);
444 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
445}
446
447static void arm_smmu_tlb_sync_vmid(void *cookie)
448{
449 struct arm_smmu_domain *smmu_domain = cookie;
450
451 arm_smmu_tlb_sync_global(smmu_domain->smmu);
452}
453
454static void arm_smmu_tlb_inv_context_s1(void *cookie)
455{
456 struct arm_smmu_domain *smmu_domain = cookie;
457 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
458 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
459
460 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
461 arm_smmu_tlb_sync_context(cookie);
462}
463
464static void arm_smmu_tlb_inv_context_s2(void *cookie)
465{
466 struct arm_smmu_domain *smmu_domain = cookie;
467 struct arm_smmu_device *smmu = smmu_domain->smmu;
468 void __iomem *base = ARM_SMMU_GR0(smmu);
469
470 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
471 arm_smmu_tlb_sync_global(smmu);
472}
473
474static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
475 size_t granule, bool leaf, void *cookie)
476{
477 struct arm_smmu_domain *smmu_domain = cookie;
478 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
479 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
480 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
481
482 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
483 wmb();
484
485 if (stage1) {
486 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
487
488 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
489 iova &= ~12UL;
490 iova |= cfg->asid;
491 do {
492 writel_relaxed(iova, reg);
493 iova += granule;
494 } while (size -= granule);
495 } else {
496 iova >>= 12;
497 iova |= (u64)cfg->asid << 48;
498 do {
499 writeq_relaxed(iova, reg);
500 iova += granule >> 12;
501 } while (size -= granule);
502 }
503 } else {
504 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
505 ARM_SMMU_CB_S2_TLBIIPAS2;
506 iova >>= 12;
507 do {
508 smmu_write_atomic_lq(iova, reg);
509 iova += granule >> 12;
510 } while (size -= granule);
511 }
512}
513
514/*
515 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
516 * almost negligible, but the benefit of getting the first one in as far ahead
517 * of the sync as possible is significant, hence we don't just make this a
518 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
519 */
520static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
521 size_t granule, bool leaf, void *cookie)
522{
523 struct arm_smmu_domain *smmu_domain = cookie;
524 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
525
526 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
527 wmb();
528
529 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
530}
531
532static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
533 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
534 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
535 .tlb_sync = arm_smmu_tlb_sync_context,
536};
537
538static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
539 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
540 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
541 .tlb_sync = arm_smmu_tlb_sync_context,
542};
543
544static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
545 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
546 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
547 .tlb_sync = arm_smmu_tlb_sync_vmid,
548};
549
550static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
551{
552 u32 fsr, fsynr;
553 unsigned long iova;
554 struct iommu_domain *domain = dev;
555 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
556 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
557 struct arm_smmu_device *smmu = smmu_domain->smmu;
558 void __iomem *cb_base;
559
560 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
561 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
562
563 if (!(fsr & FSR_FAULT))
564 return IRQ_NONE;
565
566 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
567 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
568
569 dev_err_ratelimited(smmu->dev,
570 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
571 fsr, iova, fsynr, cfg->cbndx);
572
573 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
574 return IRQ_HANDLED;
575}
576
577static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
578{
579 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
580 struct arm_smmu_device *smmu = dev;
581 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
582
583 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
584 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
585 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
586 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
587
588 if (!gfsr)
589 return IRQ_NONE;
590
591 dev_err_ratelimited(smmu->dev,
592 "Unexpected global fault, this could be serious\n");
593 dev_err_ratelimited(smmu->dev,
594 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
595 gfsr, gfsynr0, gfsynr1, gfsynr2);
596
597 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
598 return IRQ_HANDLED;
599}
600
601static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
602 struct io_pgtable_cfg *pgtbl_cfg)
603{
604 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
605 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
606 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
607
608 cb->cfg = cfg;
609
610 /* TTBCR */
611 if (stage1) {
612 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
613 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
614 } else {
615 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
616 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
617 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
618 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
619 cb->tcr[1] |= TTBCR2_AS;
620 }
621 } else {
622 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
623 }
624
625 /* TTBRs */
626 if (stage1) {
627 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
628 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
629 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
630 } else {
631 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
632 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
633 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
634 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
635 }
636 } else {
637 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
638 }
639
640 /* MAIRs (stage-1 only) */
641 if (stage1) {
642 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
643 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
644 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
645 } else {
646 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
647 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
648 }
649 }
650}
651
652static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
653{
654 u32 reg;
655 bool stage1;
656 struct arm_smmu_cb *cb = &smmu->cbs[idx];
657 struct arm_smmu_cfg *cfg = cb->cfg;
658 void __iomem *cb_base, *gr1_base;
659
660 cb_base = ARM_SMMU_CB(smmu, idx);
661
662 /* Unassigned context banks only need disabling */
663 if (!cfg) {
664 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
665 return;
666 }
667
668 gr1_base = ARM_SMMU_GR1(smmu);
669 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
670
671 /* CBA2R */
672 if (smmu->version > ARM_SMMU_V1) {
673 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
674 reg = CBA2R_RW64_64BIT;
675 else
676 reg = CBA2R_RW64_32BIT;
677 /* 16-bit VMIDs live in CBA2R */
678 if (smmu->features & ARM_SMMU_FEAT_VMID16)
679 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
680
681 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
682 }
683
684 /* CBAR */
685 reg = cfg->cbar;
686 if (smmu->version < ARM_SMMU_V2)
687 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
688
689 /*
690 * Use the weakest shareability/memory types, so they are
691 * overridden by the ttbcr/pte.
692 */
693 if (stage1) {
694 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
695 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
696 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
697 /* 8-bit VMIDs live in CBAR */
698 reg |= cfg->vmid << CBAR_VMID_SHIFT;
699 }
700 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
701
702 /*
703 * TTBCR
704 * We must write this before the TTBRs, since it determines the
705 * access behaviour of some fields (in particular, ASID[15:8]).
706 */
707 if (stage1 && smmu->version > ARM_SMMU_V1)
708 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
709 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
710
711 /* TTBRs */
712 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
713 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
714 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
715 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
716 } else {
717 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
718 if (stage1)
719 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
720 }
721
722 /* MAIRs (stage-1 only) */
723 if (stage1) {
724 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
725 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
726 }
727
728 /* SCTLR */
729 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
730 if (stage1)
731 reg |= SCTLR_S1_ASIDPNE;
732 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
733 reg |= SCTLR_E;
734
735 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
736}
737
738static int arm_smmu_init_domain_context(struct iommu_domain *domain,
739 struct arm_smmu_device *smmu)
740{
741 int irq, start, ret = 0;
742 unsigned long ias, oas;
743 struct io_pgtable_ops *pgtbl_ops;
744 struct io_pgtable_cfg pgtbl_cfg;
745 enum io_pgtable_fmt fmt;
746 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
747 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
748
749 mutex_lock(&smmu_domain->init_mutex);
750 if (smmu_domain->smmu)
751 goto out_unlock;
752
753 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
754 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
755 smmu_domain->smmu = smmu;
756 goto out_unlock;
757 }
758
759 /*
760 * Mapping the requested stage onto what we support is surprisingly
761 * complicated, mainly because the spec allows S1+S2 SMMUs without
762 * support for nested translation. That means we end up with the
763 * following table:
764 *
765 * Requested Supported Actual
766 * S1 N S1
767 * S1 S1+S2 S1
768 * S1 S2 S2
769 * S1 S1 S1
770 * N N N
771 * N S1+S2 S2
772 * N S2 S2
773 * N S1 S1
774 *
775 * Note that you can't actually request stage-2 mappings.
776 */
777 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
778 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
779 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
780 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
781
782 /*
783 * Choosing a suitable context format is even more fiddly. Until we
784 * grow some way for the caller to express a preference, and/or move
785 * the decision into the io-pgtable code where it arguably belongs,
786 * just aim for the closest thing to the rest of the system, and hope
787 * that the hardware isn't esoteric enough that we can't assume AArch64
788 * support to be a superset of AArch32 support...
789 */
790 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
791 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
792 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
793 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
794 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
795 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
796 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
797 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
798 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
799 ARM_SMMU_FEAT_FMT_AARCH64_16K |
800 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
801 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
802
803 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
804 ret = -EINVAL;
805 goto out_unlock;
806 }
807
808 switch (smmu_domain->stage) {
809 case ARM_SMMU_DOMAIN_S1:
810 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
811 start = smmu->num_s2_context_banks;
812 ias = smmu->va_size;
813 oas = smmu->ipa_size;
814 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
815 fmt = ARM_64_LPAE_S1;
816 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
817 fmt = ARM_32_LPAE_S1;
818 ias = min(ias, 32UL);
819 oas = min(oas, 40UL);
820 } else {
821 fmt = ARM_V7S;
822 ias = min(ias, 32UL);
823 oas = min(oas, 32UL);
824 }
825 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
826 break;
827 case ARM_SMMU_DOMAIN_NESTED:
828 /*
829 * We will likely want to change this if/when KVM gets
830 * involved.
831 */
832 case ARM_SMMU_DOMAIN_S2:
833 cfg->cbar = CBAR_TYPE_S2_TRANS;
834 start = 0;
835 ias = smmu->ipa_size;
836 oas = smmu->pa_size;
837 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
838 fmt = ARM_64_LPAE_S2;
839 } else {
840 fmt = ARM_32_LPAE_S2;
841 ias = min(ias, 40UL);
842 oas = min(oas, 40UL);
843 }
844 if (smmu->version == ARM_SMMU_V2)
845 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
846 else
847 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
848 break;
849 default:
850 ret = -EINVAL;
851 goto out_unlock;
852 }
853 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
854 smmu->num_context_banks);
855 if (ret < 0)
856 goto out_unlock;
857
858 cfg->cbndx = ret;
859 if (smmu->version < ARM_SMMU_V2) {
860 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
861 cfg->irptndx %= smmu->num_context_irqs;
862 } else {
863 cfg->irptndx = cfg->cbndx;
864 }
865
866 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
867 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
868 else
869 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
870
871 pgtbl_cfg = (struct io_pgtable_cfg) {
872 .pgsize_bitmap = smmu->pgsize_bitmap,
873 .ias = ias,
874 .oas = oas,
875 .tlb = smmu_domain->tlb_ops,
876 .iommu_dev = smmu->dev,
877 };
878
879 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
880 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
881
882 smmu_domain->smmu = smmu;
883 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
884 if (!pgtbl_ops) {
885 ret = -ENOMEM;
886 goto out_clear_smmu;
887 }
888
889 /* Update the domain's page sizes to reflect the page table format */
890 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
891 domain->geometry.aperture_end = (1UL << ias) - 1;
892 domain->geometry.force_aperture = true;
893
894 /* Initialise the context bank with our page table cfg */
895 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
896 arm_smmu_write_context_bank(smmu, cfg->cbndx);
897
898 /*
899 * Request context fault interrupt. Do this last to avoid the
900 * handler seeing a half-initialised domain state.
901 */
902 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
903 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
904 IRQF_SHARED, "arm-smmu-context-fault", domain);
905 if (ret < 0) {
906 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
907 cfg->irptndx, irq);
908 cfg->irptndx = INVALID_IRPTNDX;
909 }
910
911 mutex_unlock(&smmu_domain->init_mutex);
912
913 /* Publish page table ops for map/unmap */
914 smmu_domain->pgtbl_ops = pgtbl_ops;
915 return 0;
916
917out_clear_smmu:
918 smmu_domain->smmu = NULL;
919out_unlock:
920 mutex_unlock(&smmu_domain->init_mutex);
921 return ret;
922}
923
924static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
925{
926 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
927 struct arm_smmu_device *smmu = smmu_domain->smmu;
928 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
929 int irq;
930
931 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
932 return;
933
934 /*
935 * Disable the context bank and free the page tables before freeing
936 * it.
937 */
938 smmu->cbs[cfg->cbndx].cfg = NULL;
939 arm_smmu_write_context_bank(smmu, cfg->cbndx);
940
941 if (cfg->irptndx != INVALID_IRPTNDX) {
942 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
943 devm_free_irq(smmu->dev, irq, domain);
944 }
945
946 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
947 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
948}
949
950static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
951{
952 struct arm_smmu_domain *smmu_domain;
953
954 if (type != IOMMU_DOMAIN_UNMANAGED &&
955 type != IOMMU_DOMAIN_DMA &&
956 type != IOMMU_DOMAIN_IDENTITY)
957 return NULL;
958 /*
959 * Allocate the domain and initialise some of its data structures.
960 * We can't really do anything meaningful until we've added a
961 * master.
962 */
963 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
964 if (!smmu_domain)
965 return NULL;
966
967 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
968 iommu_get_dma_cookie(&smmu_domain->domain))) {
969 kfree(smmu_domain);
970 return NULL;
971 }
972
973 mutex_init(&smmu_domain->init_mutex);
974 spin_lock_init(&smmu_domain->cb_lock);
975
976 return &smmu_domain->domain;
977}
978
979static void arm_smmu_domain_free(struct iommu_domain *domain)
980{
981 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
982
983 /*
984 * Free the domain resources. We assume that all devices have
985 * already been detached.
986 */
987 iommu_put_dma_cookie(domain);
988 arm_smmu_destroy_domain_context(domain);
989 kfree(smmu_domain);
990}
991
992static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
993{
994 struct arm_smmu_smr *smr = smmu->smrs + idx;
995 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
996
997 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
998 reg |= SMR_VALID;
999 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1000}
1001
1002static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1003{
1004 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1005 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1006 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1007 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1008
1009 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1010 smmu->smrs[idx].valid)
1011 reg |= S2CR_EXIDVALID;
1012 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1013}
1014
1015static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1016{
1017 arm_smmu_write_s2cr(smmu, idx);
1018 if (smmu->smrs)
1019 arm_smmu_write_smr(smmu, idx);
1020}
1021
1022/*
1023 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1024 * should be called after sCR0 is written.
1025 */
1026static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1027{
1028 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1029 u32 smr;
1030
1031 if (!smmu->smrs)
1032 return;
1033
1034 /*
1035 * SMR.ID bits may not be preserved if the corresponding MASK
1036 * bits are set, so check each one separately. We can reject
1037 * masters later if they try to claim IDs outside these masks.
1038 */
1039 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1040 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1041 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1042 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1043
1044 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1045 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1046 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1047 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1048}
1049
1050static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1051{
1052 struct arm_smmu_smr *smrs = smmu->smrs;
1053 int i, free_idx = -ENOSPC;
1054
1055 /* Stream indexing is blissfully easy */
1056 if (!smrs)
1057 return id;
1058
1059 /* Validating SMRs is... less so */
1060 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1061 if (!smrs[i].valid) {
1062 /*
1063 * Note the first free entry we come across, which
1064 * we'll claim in the end if nothing else matches.
1065 */
1066 if (free_idx < 0)
1067 free_idx = i;
1068 continue;
1069 }
1070 /*
1071 * If the new entry is _entirely_ matched by an existing entry,
1072 * then reuse that, with the guarantee that there also cannot
1073 * be any subsequent conflicting entries. In normal use we'd
1074 * expect simply identical entries for this case, but there's
1075 * no harm in accommodating the generalisation.
1076 */
1077 if ((mask & smrs[i].mask) == mask &&
1078 !((id ^ smrs[i].id) & ~smrs[i].mask))
1079 return i;
1080 /*
1081 * If the new entry has any other overlap with an existing one,
1082 * though, then there always exists at least one stream ID
1083 * which would cause a conflict, and we can't allow that risk.
1084 */
1085 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1086 return -EINVAL;
1087 }
1088
1089 return free_idx;
1090}
1091
1092static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1093{
1094 if (--smmu->s2crs[idx].count)
1095 return false;
1096
1097 smmu->s2crs[idx] = s2cr_init_val;
1098 if (smmu->smrs)
1099 smmu->smrs[idx].valid = false;
1100
1101 return true;
1102}
1103
1104static int arm_smmu_master_alloc_smes(struct device *dev)
1105{
1106 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1107 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1108 struct arm_smmu_device *smmu = cfg->smmu;
1109 struct arm_smmu_smr *smrs = smmu->smrs;
1110 struct iommu_group *group;
1111 int i, idx, ret;
1112
1113 mutex_lock(&smmu->stream_map_mutex);
1114 /* Figure out a viable stream map entry allocation */
1115 for_each_cfg_sme(fwspec, i, idx) {
1116 u16 sid = fwspec->ids[i];
1117 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1118
1119 if (idx != INVALID_SMENDX) {
1120 ret = -EEXIST;
1121 goto out_err;
1122 }
1123
1124 ret = arm_smmu_find_sme(smmu, sid, mask);
1125 if (ret < 0)
1126 goto out_err;
1127
1128 idx = ret;
1129 if (smrs && smmu->s2crs[idx].count == 0) {
1130 smrs[idx].id = sid;
1131 smrs[idx].mask = mask;
1132 smrs[idx].valid = true;
1133 }
1134 smmu->s2crs[idx].count++;
1135 cfg->smendx[i] = (s16)idx;
1136 }
1137
1138 group = iommu_group_get_for_dev(dev);
1139 if (!group)
1140 group = ERR_PTR(-ENOMEM);
1141 if (IS_ERR(group)) {
1142 ret = PTR_ERR(group);
1143 goto out_err;
1144 }
1145 iommu_group_put(group);
1146
1147 /* It worked! Now, poke the actual hardware */
1148 for_each_cfg_sme(fwspec, i, idx) {
1149 arm_smmu_write_sme(smmu, idx);
1150 smmu->s2crs[idx].group = group;
1151 }
1152
1153 mutex_unlock(&smmu->stream_map_mutex);
1154 return 0;
1155
1156out_err:
1157 while (i--) {
1158 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1159 cfg->smendx[i] = INVALID_SMENDX;
1160 }
1161 mutex_unlock(&smmu->stream_map_mutex);
1162 return ret;
1163}
1164
1165static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1166{
1167 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1168 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1169 int i, idx;
1170
1171 mutex_lock(&smmu->stream_map_mutex);
1172 for_each_cfg_sme(fwspec, i, idx) {
1173 if (arm_smmu_free_sme(smmu, idx))
1174 arm_smmu_write_sme(smmu, idx);
1175 cfg->smendx[i] = INVALID_SMENDX;
1176 }
1177 mutex_unlock(&smmu->stream_map_mutex);
1178}
1179
1180static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1181 struct iommu_fwspec *fwspec)
1182{
1183 struct arm_smmu_device *smmu = smmu_domain->smmu;
1184 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1185 u8 cbndx = smmu_domain->cfg.cbndx;
1186 enum arm_smmu_s2cr_type type;
1187 int i, idx;
1188
1189 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1190 type = S2CR_TYPE_BYPASS;
1191 else
1192 type = S2CR_TYPE_TRANS;
1193
1194 for_each_cfg_sme(fwspec, i, idx) {
1195 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1196 continue;
1197
1198 s2cr[idx].type = type;
1199 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1200 s2cr[idx].cbndx = cbndx;
1201 arm_smmu_write_s2cr(smmu, idx);
1202 }
1203 return 0;
1204}
1205
1206static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1207{
1208 int ret;
1209 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1210 struct arm_smmu_device *smmu;
1211 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1212
1213 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1214 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1215 return -ENXIO;
1216 }
1217
1218 /*
1219 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1220 * domains between of_xlate() and add_device() - we have no way to cope
1221 * with that, so until ARM gets converted to rely on groups and default
1222 * domains, just say no (but more politely than by dereferencing NULL).
1223 * This should be at least a WARN_ON once that's sorted.
1224 */
1225 if (!fwspec->iommu_priv)
1226 return -ENODEV;
1227
1228 smmu = fwspec_smmu(fwspec);
1229 /* Ensure that the domain is finalised */
1230 ret = arm_smmu_init_domain_context(domain, smmu);
1231 if (ret < 0)
1232 return ret;
1233
1234 /*
1235 * Sanity check the domain. We don't support domains across
1236 * different SMMUs.
1237 */
1238 if (smmu_domain->smmu != smmu) {
1239 dev_err(dev,
1240 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1241 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1242 return -EINVAL;
1243 }
1244
1245 /* Looks ok, so add the device to the domain */
1246 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1247}
1248
1249static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1250 phys_addr_t paddr, size_t size, int prot)
1251{
1252 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1253
1254 if (!ops)
1255 return -ENODEV;
1256
1257 return ops->map(ops, iova, paddr, size, prot);
1258}
1259
1260static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1261 size_t size)
1262{
1263 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1264
1265 if (!ops)
1266 return 0;
1267
1268 return ops->unmap(ops, iova, size);
1269}
1270
1271static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1272{
1273 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1274
1275 if (smmu_domain->tlb_ops)
1276 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1277}
1278
1279static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1280 dma_addr_t iova)
1281{
1282 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1283 struct arm_smmu_device *smmu = smmu_domain->smmu;
1284 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1285 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1286 struct device *dev = smmu->dev;
1287 void __iomem *cb_base;
1288 u32 tmp;
1289 u64 phys;
1290 unsigned long va, flags;
1291
1292 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1293
1294 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1295 /* ATS1 registers can only be written atomically */
1296 va = iova & ~0xfffUL;
1297 if (smmu->version == ARM_SMMU_V2)
1298 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1299 else /* Register is only 32-bit in v1 */
1300 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1301
1302 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1303 !(tmp & ATSR_ACTIVE), 5, 50)) {
1304 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1305 dev_err(dev,
1306 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1307 &iova);
1308 return ops->iova_to_phys(ops, iova);
1309 }
1310
1311 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1312 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313 if (phys & CB_PAR_F) {
1314 dev_err(dev, "translation fault!\n");
1315 dev_err(dev, "PAR = 0x%llx\n", phys);
1316 return 0;
1317 }
1318
1319 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1320}
1321
1322static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1323 dma_addr_t iova)
1324{
1325 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1326 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1327
1328 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1329 return iova;
1330
1331 if (!ops)
1332 return 0;
1333
1334 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1335 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1336 return arm_smmu_iova_to_phys_hard(domain, iova);
1337
1338 return ops->iova_to_phys(ops, iova);
1339}
1340
1341static bool arm_smmu_capable(enum iommu_cap cap)
1342{
1343 switch (cap) {
1344 case IOMMU_CAP_CACHE_COHERENCY:
1345 /*
1346 * Return true here as the SMMU can always send out coherent
1347 * requests.
1348 */
1349 return true;
1350 case IOMMU_CAP_NOEXEC:
1351 return true;
1352 default:
1353 return false;
1354 }
1355}
1356
1357static int arm_smmu_match_node(struct device *dev, void *data)
1358{
1359 return dev->fwnode == data;
1360}
1361
1362static
1363struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1364{
1365 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1366 fwnode, arm_smmu_match_node);
1367 put_device(dev);
1368 return dev ? dev_get_drvdata(dev) : NULL;
1369}
1370
1371static int arm_smmu_add_device(struct device *dev)
1372{
1373 struct arm_smmu_device *smmu;
1374 struct arm_smmu_master_cfg *cfg;
1375 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1376 int i, ret;
1377
1378 if (using_legacy_binding) {
1379 ret = arm_smmu_register_legacy_master(dev, &smmu);
1380
1381 /*
1382 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1383 * will allocate/initialise a new one. Thus we need to update fwspec for
1384 * later use.
1385 */
1386 fwspec = dev->iommu_fwspec;
1387 if (ret)
1388 goto out_free;
1389 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1390 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1391 } else {
1392 return -ENODEV;
1393 }
1394
1395 ret = -EINVAL;
1396 for (i = 0; i < fwspec->num_ids; i++) {
1397 u16 sid = fwspec->ids[i];
1398 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1399
1400 if (sid & ~smmu->streamid_mask) {
1401 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1402 sid, smmu->streamid_mask);
1403 goto out_free;
1404 }
1405 if (mask & ~smmu->smr_mask_mask) {
1406 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1407 mask, smmu->smr_mask_mask);
1408 goto out_free;
1409 }
1410 }
1411
1412 ret = -ENOMEM;
1413 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1414 GFP_KERNEL);
1415 if (!cfg)
1416 goto out_free;
1417
1418 cfg->smmu = smmu;
1419 fwspec->iommu_priv = cfg;
1420 while (i--)
1421 cfg->smendx[i] = INVALID_SMENDX;
1422
1423 ret = arm_smmu_master_alloc_smes(dev);
1424 if (ret)
1425 goto out_cfg_free;
1426
1427 iommu_device_link(&smmu->iommu, dev);
1428
1429 return 0;
1430
1431out_cfg_free:
1432 kfree(cfg);
1433out_free:
1434 iommu_fwspec_free(dev);
1435 return ret;
1436}
1437
1438static void arm_smmu_remove_device(struct device *dev)
1439{
1440 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1441 struct arm_smmu_master_cfg *cfg;
1442 struct arm_smmu_device *smmu;
1443
1444
1445 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1446 return;
1447
1448 cfg = fwspec->iommu_priv;
1449 smmu = cfg->smmu;
1450
1451 iommu_device_unlink(&smmu->iommu, dev);
1452 arm_smmu_master_free_smes(fwspec);
1453 iommu_group_remove_device(dev);
1454 kfree(fwspec->iommu_priv);
1455 iommu_fwspec_free(dev);
1456}
1457
1458static struct iommu_group *arm_smmu_device_group(struct device *dev)
1459{
1460 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1461 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1462 struct iommu_group *group = NULL;
1463 int i, idx;
1464
1465 for_each_cfg_sme(fwspec, i, idx) {
1466 if (group && smmu->s2crs[idx].group &&
1467 group != smmu->s2crs[idx].group)
1468 return ERR_PTR(-EINVAL);
1469
1470 group = smmu->s2crs[idx].group;
1471 }
1472
1473 if (group)
1474 return iommu_group_ref_get(group);
1475
1476 if (dev_is_pci(dev))
1477 group = pci_device_group(dev);
1478 else
1479 group = generic_device_group(dev);
1480
1481 return group;
1482}
1483
1484static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1485 enum iommu_attr attr, void *data)
1486{
1487 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1488
1489 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1490 return -EINVAL;
1491
1492 switch (attr) {
1493 case DOMAIN_ATTR_NESTING:
1494 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1495 return 0;
1496 default:
1497 return -ENODEV;
1498 }
1499}
1500
1501static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1502 enum iommu_attr attr, void *data)
1503{
1504 int ret = 0;
1505 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1506
1507 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1508 return -EINVAL;
1509
1510 mutex_lock(&smmu_domain->init_mutex);
1511
1512 switch (attr) {
1513 case DOMAIN_ATTR_NESTING:
1514 if (smmu_domain->smmu) {
1515 ret = -EPERM;
1516 goto out_unlock;
1517 }
1518
1519 if (*(int *)data)
1520 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1521 else
1522 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1523
1524 break;
1525 default:
1526 ret = -ENODEV;
1527 }
1528
1529out_unlock:
1530 mutex_unlock(&smmu_domain->init_mutex);
1531 return ret;
1532}
1533
1534static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1535{
1536 u32 mask, fwid = 0;
1537
1538 if (args->args_count > 0)
1539 fwid |= (u16)args->args[0];
1540
1541 if (args->args_count > 1)
1542 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1543 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1544 fwid |= (u16)mask << SMR_MASK_SHIFT;
1545
1546 return iommu_fwspec_add_ids(dev, &fwid, 1);
1547}
1548
1549static void arm_smmu_get_resv_regions(struct device *dev,
1550 struct list_head *head)
1551{
1552 struct iommu_resv_region *region;
1553 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1554
1555 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1556 prot, IOMMU_RESV_SW_MSI);
1557 if (!region)
1558 return;
1559
1560 list_add_tail(&region->list, head);
1561
1562 iommu_dma_get_resv_regions(dev, head);
1563}
1564
1565static void arm_smmu_put_resv_regions(struct device *dev,
1566 struct list_head *head)
1567{
1568 struct iommu_resv_region *entry, *next;
1569
1570 list_for_each_entry_safe(entry, next, head, list)
1571 kfree(entry);
1572}
1573
1574static struct iommu_ops arm_smmu_ops = {
1575 .capable = arm_smmu_capable,
1576 .domain_alloc = arm_smmu_domain_alloc,
1577 .domain_free = arm_smmu_domain_free,
1578 .attach_dev = arm_smmu_attach_dev,
1579 .map = arm_smmu_map,
1580 .unmap = arm_smmu_unmap,
1581 .flush_iotlb_all = arm_smmu_iotlb_sync,
1582 .iotlb_sync = arm_smmu_iotlb_sync,
1583 .iova_to_phys = arm_smmu_iova_to_phys,
1584 .add_device = arm_smmu_add_device,
1585 .remove_device = arm_smmu_remove_device,
1586 .device_group = arm_smmu_device_group,
1587 .domain_get_attr = arm_smmu_domain_get_attr,
1588 .domain_set_attr = arm_smmu_domain_set_attr,
1589 .of_xlate = arm_smmu_of_xlate,
1590 .get_resv_regions = arm_smmu_get_resv_regions,
1591 .put_resv_regions = arm_smmu_put_resv_regions,
1592 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1593};
1594
1595static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1596{
1597 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1598 int i;
1599 u32 reg, major;
1600
1601 /* clear global FSR */
1602 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1603 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1604
1605 /*
1606 * Reset stream mapping groups: Initial values mark all SMRn as
1607 * invalid and all S2CRn as bypass unless overridden.
1608 */
1609 for (i = 0; i < smmu->num_mapping_groups; ++i)
1610 arm_smmu_write_sme(smmu, i);
1611
1612 if (smmu->model == ARM_MMU500) {
1613 /*
1614 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1615 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1616 * bit is only present in MMU-500r2 onwards.
1617 */
1618 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1619 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1620 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1621 if (major >= 2)
1622 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1623 /*
1624 * Allow unmatched Stream IDs to allocate bypass
1625 * TLB entries for reduced latency.
1626 */
1627 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1628 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1629 }
1630
1631 /* Make sure all context banks are disabled and clear CB_FSR */
1632 for (i = 0; i < smmu->num_context_banks; ++i) {
1633 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1634
1635 arm_smmu_write_context_bank(smmu, i);
1636 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1637 /*
1638 * Disable MMU-500's not-particularly-beneficial next-page
1639 * prefetcher for the sake of errata #841119 and #826419.
1640 */
1641 if (smmu->model == ARM_MMU500) {
1642 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1643 reg &= ~ARM_MMU500_ACTLR_CPRE;
1644 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1645 }
1646 }
1647
1648 /* Invalidate the TLB, just in case */
1649 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1650 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1651
1652 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1653
1654 /* Enable fault reporting */
1655 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1656
1657 /* Disable TLB broadcasting. */
1658 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1659
1660 /* Enable client access, handling unmatched streams as appropriate */
1661 reg &= ~sCR0_CLIENTPD;
1662 if (disable_bypass)
1663 reg |= sCR0_USFCFG;
1664 else
1665 reg &= ~sCR0_USFCFG;
1666
1667 /* Disable forced broadcasting */
1668 reg &= ~sCR0_FB;
1669
1670 /* Don't upgrade barriers */
1671 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1672
1673 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1674 reg |= sCR0_VMID16EN;
1675
1676 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1677 reg |= sCR0_EXIDENABLE;
1678
1679 /* Push the button */
1680 arm_smmu_tlb_sync_global(smmu);
1681 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1682}
1683
1684static int arm_smmu_id_size_to_bits(int size)
1685{
1686 switch (size) {
1687 case 0:
1688 return 32;
1689 case 1:
1690 return 36;
1691 case 2:
1692 return 40;
1693 case 3:
1694 return 42;
1695 case 4:
1696 return 44;
1697 case 5:
1698 default:
1699 return 48;
1700 }
1701}
1702
1703static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1704{
1705 unsigned long size;
1706 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1707 u32 id;
1708 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1709 int i;
1710
1711 dev_notice(smmu->dev, "probing hardware configuration...\n");
1712 dev_notice(smmu->dev, "SMMUv%d with:\n",
1713 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1714
1715 /* ID0 */
1716 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1717
1718 /* Restrict available stages based on module parameter */
1719 if (force_stage == 1)
1720 id &= ~(ID0_S2TS | ID0_NTS);
1721 else if (force_stage == 2)
1722 id &= ~(ID0_S1TS | ID0_NTS);
1723
1724 if (id & ID0_S1TS) {
1725 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1726 dev_notice(smmu->dev, "\tstage 1 translation\n");
1727 }
1728
1729 if (id & ID0_S2TS) {
1730 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1731 dev_notice(smmu->dev, "\tstage 2 translation\n");
1732 }
1733
1734 if (id & ID0_NTS) {
1735 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1736 dev_notice(smmu->dev, "\tnested translation\n");
1737 }
1738
1739 if (!(smmu->features &
1740 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1741 dev_err(smmu->dev, "\tno translation support!\n");
1742 return -ENODEV;
1743 }
1744
1745 if ((id & ID0_S1TS) &&
1746 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1747 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1748 dev_notice(smmu->dev, "\taddress translation ops\n");
1749 }
1750
1751 /*
1752 * In order for DMA API calls to work properly, we must defer to what
1753 * the FW says about coherency, regardless of what the hardware claims.
1754 * Fortunately, this also opens up a workaround for systems where the
1755 * ID register value has ended up configured incorrectly.
1756 */
1757 cttw_reg = !!(id & ID0_CTTW);
1758 if (cttw_fw || cttw_reg)
1759 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1760 cttw_fw ? "" : "non-");
1761 if (cttw_fw != cttw_reg)
1762 dev_notice(smmu->dev,
1763 "\t(IDR0.CTTW overridden by FW configuration)\n");
1764
1765 /* Max. number of entries we have for stream matching/indexing */
1766 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1767 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1768 size = 1 << 16;
1769 } else {
1770 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1771 }
1772 smmu->streamid_mask = size - 1;
1773 if (id & ID0_SMS) {
1774 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1775 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1776 if (size == 0) {
1777 dev_err(smmu->dev,
1778 "stream-matching supported, but no SMRs present!\n");
1779 return -ENODEV;
1780 }
1781
1782 /* Zero-initialised to mark as invalid */
1783 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1784 GFP_KERNEL);
1785 if (!smmu->smrs)
1786 return -ENOMEM;
1787
1788 dev_notice(smmu->dev,
1789 "\tstream matching with %lu register groups", size);
1790 }
1791 /* s2cr->type == 0 means translation, so initialise explicitly */
1792 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1793 GFP_KERNEL);
1794 if (!smmu->s2crs)
1795 return -ENOMEM;
1796 for (i = 0; i < size; i++)
1797 smmu->s2crs[i] = s2cr_init_val;
1798
1799 smmu->num_mapping_groups = size;
1800 mutex_init(&smmu->stream_map_mutex);
1801 spin_lock_init(&smmu->global_sync_lock);
1802
1803 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1804 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1805 if (!(id & ID0_PTFS_NO_AARCH32S))
1806 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1807 }
1808
1809 /* ID1 */
1810 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1811 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1812
1813 /* Check for size mismatch of SMMU address space from mapped region */
1814 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1815 size <<= smmu->pgshift;
1816 if (smmu->cb_base != gr0_base + size)
1817 dev_warn(smmu->dev,
1818 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1819 size * 2, (smmu->cb_base - gr0_base) * 2);
1820
1821 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1822 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1823 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1824 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1825 return -ENODEV;
1826 }
1827 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1828 smmu->num_context_banks, smmu->num_s2_context_banks);
1829 /*
1830 * Cavium CN88xx erratum #27704.
1831 * Ensure ASID and VMID allocation is unique across all SMMUs in
1832 * the system.
1833 */
1834 if (smmu->model == CAVIUM_SMMUV2) {
1835 smmu->cavium_id_base =
1836 atomic_add_return(smmu->num_context_banks,
1837 &cavium_smmu_context_count);
1838 smmu->cavium_id_base -= smmu->num_context_banks;
1839 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1840 }
1841 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1842 sizeof(*smmu->cbs), GFP_KERNEL);
1843 if (!smmu->cbs)
1844 return -ENOMEM;
1845
1846 /* ID2 */
1847 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1848 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1849 smmu->ipa_size = size;
1850
1851 /* The output mask is also applied for bypass */
1852 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1853 smmu->pa_size = size;
1854
1855 if (id & ID2_VMID16)
1856 smmu->features |= ARM_SMMU_FEAT_VMID16;
1857
1858 /*
1859 * What the page table walker can address actually depends on which
1860 * descriptor format is in use, but since a) we don't know that yet,
1861 * and b) it can vary per context bank, this will have to do...
1862 */
1863 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1864 dev_warn(smmu->dev,
1865 "failed to set DMA mask for table walker\n");
1866
1867 if (smmu->version < ARM_SMMU_V2) {
1868 smmu->va_size = smmu->ipa_size;
1869 if (smmu->version == ARM_SMMU_V1_64K)
1870 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1871 } else {
1872 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1873 smmu->va_size = arm_smmu_id_size_to_bits(size);
1874 if (id & ID2_PTFS_4K)
1875 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1876 if (id & ID2_PTFS_16K)
1877 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1878 if (id & ID2_PTFS_64K)
1879 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1880 }
1881
1882 /* Now we've corralled the various formats, what'll it do? */
1883 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1884 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1885 if (smmu->features &
1886 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1887 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1888 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1889 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1890 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1891 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1892
1893 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1894 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1895 else
1896 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1897 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1898 smmu->pgsize_bitmap);
1899
1900
1901 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1902 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1903 smmu->va_size, smmu->ipa_size);
1904
1905 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1906 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1907 smmu->ipa_size, smmu->pa_size);
1908
1909 return 0;
1910}
1911
1912struct arm_smmu_match_data {
1913 enum arm_smmu_arch_version version;
1914 enum arm_smmu_implementation model;
1915};
1916
1917#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1918static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1919
1920ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1921ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1922ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1923ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1924ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1925ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1926
1927static const struct of_device_id arm_smmu_of_match[] = {
1928 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1929 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1930 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1931 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1932 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1933 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1934 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935 { },
1936};
1937MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1938
1939#ifdef CONFIG_ACPI
1940static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1941{
1942 int ret = 0;
1943
1944 switch (model) {
1945 case ACPI_IORT_SMMU_V1:
1946 case ACPI_IORT_SMMU_CORELINK_MMU400:
1947 smmu->version = ARM_SMMU_V1;
1948 smmu->model = GENERIC_SMMU;
1949 break;
1950 case ACPI_IORT_SMMU_CORELINK_MMU401:
1951 smmu->version = ARM_SMMU_V1_64K;
1952 smmu->model = GENERIC_SMMU;
1953 break;
1954 case ACPI_IORT_SMMU_V2:
1955 smmu->version = ARM_SMMU_V2;
1956 smmu->model = GENERIC_SMMU;
1957 break;
1958 case ACPI_IORT_SMMU_CORELINK_MMU500:
1959 smmu->version = ARM_SMMU_V2;
1960 smmu->model = ARM_MMU500;
1961 break;
1962 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1963 smmu->version = ARM_SMMU_V2;
1964 smmu->model = CAVIUM_SMMUV2;
1965 break;
1966 default:
1967 ret = -ENODEV;
1968 }
1969
1970 return ret;
1971}
1972
1973static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1974 struct arm_smmu_device *smmu)
1975{
1976 struct device *dev = smmu->dev;
1977 struct acpi_iort_node *node =
1978 *(struct acpi_iort_node **)dev_get_platdata(dev);
1979 struct acpi_iort_smmu *iort_smmu;
1980 int ret;
1981
1982 /* Retrieve SMMU1/2 specific data */
1983 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1984
1985 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986 if (ret < 0)
1987 return ret;
1988
1989 /* Ignore the configuration access interrupt */
1990 smmu->num_global_irqs = 1;
1991
1992 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1993 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1994
1995 return 0;
1996}
1997#else
1998static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1999 struct arm_smmu_device *smmu)
2000{
2001 return -ENODEV;
2002}
2003#endif
2004
2005static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2006 struct arm_smmu_device *smmu)
2007{
2008 const struct arm_smmu_match_data *data;
2009 struct device *dev = &pdev->dev;
2010 bool legacy_binding;
2011
2012 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2013 &smmu->num_global_irqs)) {
2014 dev_err(dev, "missing #global-interrupts property\n");
2015 return -ENODEV;
2016 }
2017
2018 data = of_device_get_match_data(dev);
2019 smmu->version = data->version;
2020 smmu->model = data->model;
2021
2022 parse_driver_options(smmu);
2023
2024 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2025 if (legacy_binding && !using_generic_binding) {
2026 if (!using_legacy_binding)
2027 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2028 using_legacy_binding = true;
2029 } else if (!legacy_binding && !using_legacy_binding) {
2030 using_generic_binding = true;
2031 } else {
2032 dev_err(dev, "not probing due to mismatched DT properties\n");
2033 return -ENODEV;
2034 }
2035
2036 if (of_dma_is_coherent(dev->of_node))
2037 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2038
2039 return 0;
2040}
2041
2042static void arm_smmu_bus_init(void)
2043{
2044 /* Oh, for a proper bus abstraction */
2045 if (!iommu_present(&platform_bus_type))
2046 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2047#ifdef CONFIG_ARM_AMBA
2048 if (!iommu_present(&amba_bustype))
2049 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2050#endif
2051#ifdef CONFIG_PCI
2052 if (!iommu_present(&pci_bus_type)) {
2053 pci_request_acs();
2054 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2055 }
2056#endif
2057}
2058
2059static int arm_smmu_device_probe(struct platform_device *pdev)
2060{
2061 struct resource *res;
2062 resource_size_t ioaddr;
2063 struct arm_smmu_device *smmu;
2064 struct device *dev = &pdev->dev;
2065 int num_irqs, i, err;
2066
2067 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2068 if (!smmu) {
2069 dev_err(dev, "failed to allocate arm_smmu_device\n");
2070 return -ENOMEM;
2071 }
2072 smmu->dev = dev;
2073
2074 if (dev->of_node)
2075 err = arm_smmu_device_dt_probe(pdev, smmu);
2076 else
2077 err = arm_smmu_device_acpi_probe(pdev, smmu);
2078
2079 if (err)
2080 return err;
2081
2082 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2083 ioaddr = res->start;
2084 smmu->base = devm_ioremap_resource(dev, res);
2085 if (IS_ERR(smmu->base))
2086 return PTR_ERR(smmu->base);
2087 smmu->cb_base = smmu->base + resource_size(res) / 2;
2088
2089 num_irqs = 0;
2090 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2091 num_irqs++;
2092 if (num_irqs > smmu->num_global_irqs)
2093 smmu->num_context_irqs++;
2094 }
2095
2096 if (!smmu->num_context_irqs) {
2097 dev_err(dev, "found %d interrupts but expected at least %d\n",
2098 num_irqs, smmu->num_global_irqs + 1);
2099 return -ENODEV;
2100 }
2101
2102 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2103 GFP_KERNEL);
2104 if (!smmu->irqs) {
2105 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2106 return -ENOMEM;
2107 }
2108
2109 for (i = 0; i < num_irqs; ++i) {
2110 int irq = platform_get_irq(pdev, i);
2111
2112 if (irq < 0) {
2113 dev_err(dev, "failed to get irq index %d\n", i);
2114 return -ENODEV;
2115 }
2116 smmu->irqs[i] = irq;
2117 }
2118
2119 err = arm_smmu_device_cfg_probe(smmu);
2120 if (err)
2121 return err;
2122
2123 if (smmu->version == ARM_SMMU_V2) {
2124 if (smmu->num_context_banks > smmu->num_context_irqs) {
2125 dev_err(dev,
2126 "found only %d context irq(s) but %d required\n",
2127 smmu->num_context_irqs, smmu->num_context_banks);
2128 return -ENODEV;
2129 }
2130
2131 /* Ignore superfluous interrupts */
2132 smmu->num_context_irqs = smmu->num_context_banks;
2133 }
2134
2135 for (i = 0; i < smmu->num_global_irqs; ++i) {
2136 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2137 arm_smmu_global_fault,
2138 IRQF_SHARED,
2139 "arm-smmu global fault",
2140 smmu);
2141 if (err) {
2142 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2143 i, smmu->irqs[i]);
2144 return err;
2145 }
2146 }
2147
2148 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2149 "smmu.%pa", &ioaddr);
2150 if (err) {
2151 dev_err(dev, "Failed to register iommu in sysfs\n");
2152 return err;
2153 }
2154
2155 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2156 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2157
2158 err = iommu_device_register(&smmu->iommu);
2159 if (err) {
2160 dev_err(dev, "Failed to register iommu\n");
2161 return err;
2162 }
2163
2164 platform_set_drvdata(pdev, smmu);
2165 arm_smmu_device_reset(smmu);
2166 arm_smmu_test_smr_masks(smmu);
2167
2168 /*
2169 * For ACPI and generic DT bindings, an SMMU will be probed before
2170 * any device which might need it, so we want the bus ops in place
2171 * ready to handle default domain setup as soon as any SMMU exists.
2172 */
2173 if (!using_legacy_binding)
2174 arm_smmu_bus_init();
2175
2176 return 0;
2177}
2178
2179/*
2180 * With the legacy DT binding in play, though, we have no guarantees about
2181 * probe order, but then we're also not doing default domains, so we can
2182 * delay setting bus ops until we're sure every possible SMMU is ready,
2183 * and that way ensure that no add_device() calls get missed.
2184 */
2185static int arm_smmu_legacy_bus_init(void)
2186{
2187 if (using_legacy_binding)
2188 arm_smmu_bus_init();
2189 return 0;
2190}
2191device_initcall_sync(arm_smmu_legacy_bus_init);
2192
2193static int arm_smmu_device_remove(struct platform_device *pdev)
2194{
2195 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2196
2197 if (!smmu)
2198 return -ENODEV;
2199
2200 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2201 dev_err(&pdev->dev, "removing device with active domains!\n");
2202
2203 /* Turn the thing off */
2204 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2205 return 0;
2206}
2207
2208static void arm_smmu_device_shutdown(struct platform_device *pdev)
2209{
2210 arm_smmu_device_remove(pdev);
2211}
2212
2213static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2214{
2215 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2216
2217 arm_smmu_device_reset(smmu);
2218 return 0;
2219}
2220
2221static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2222
2223static struct platform_driver arm_smmu_driver = {
2224 .driver = {
2225 .name = "arm-smmu",
2226 .of_match_table = of_match_ptr(arm_smmu_of_match),
2227 .pm = &arm_smmu_pm_ops,
2228 },
2229 .probe = arm_smmu_device_probe,
2230 .remove = arm_smmu_device_remove,
2231 .shutdown = arm_smmu_device_shutdown,
2232};
2233module_platform_driver(arm_smmu_driver);
2234
2235MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2236MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2237MODULE_LICENSE("GPL v2");