blob: a3b5118b4f673ab90b7d8398391ec729993180a5 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * IOMMU API for ARM architected SMMU implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Copyright (C) 2013 ARM Limited
18 *
19 * Author: Will Deacon <will.deacon@arm.com>
20 *
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
28 */
29
30#define pr_fmt(fmt) "arm-smmu: " fmt
31
32#include <linux/acpi.h>
33#include <linux/acpi_iort.h>
34#include <linux/atomic.h>
35#include <linux/delay.h>
36#include <linux/dma-iommu.h>
37#include <linux/dma-mapping.h>
38#include <linux/err.h>
39#include <linux/interrupt.h>
40#include <linux/io.h>
41#include <linux/io-64-nonatomic-hi-lo.h>
42#include <linux/iommu.h>
43#include <linux/iopoll.h>
44#include <linux/module.h>
45#include <linux/of.h>
46#include <linux/of_address.h>
47#include <linux/of_device.h>
48#include <linux/of_iommu.h>
49#include <linux/pci.h>
50#include <linux/platform_device.h>
51#include <linux/slab.h>
52#include <linux/spinlock.h>
53
54#include <linux/amba/bus.h>
55
56#include "io-pgtable.h"
57#include "arm-smmu-regs.h"
58
59/*
60 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61 * global register space are still, in fact, using a hypervisor to mediate it
62 * by trapping and emulating register accesses. Sadly, some deployed versions
63 * of said trapping code have bugs wherein they go horribly wrong for stores
64 * using r31 (i.e. XZR/WZR) as the source register.
65 */
66#define QCOM_DUMMY_VAL -1
67
68#define ARM_MMU500_ACTLR_CPRE (1 << 1)
69
70#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
71#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
72
73#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
74#define TLB_SPIN_COUNT 10
75
76/* Maximum number of context banks per SMMU */
77#define ARM_SMMU_MAX_CBS 128
78
79/* SMMU global address space */
80#define ARM_SMMU_GR0(smmu) ((smmu)->base)
81#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
82
83/*
84 * SMMU global address space with conditional offset to access secure
85 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
86 * nsGFSYNR0: 0x450)
87 */
88#define ARM_SMMU_GR0_NS(smmu) \
89 ((smmu)->base + \
90 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
91 ? 0x400 : 0))
92
93/*
94 * Some 64-bit registers only make sense to write atomically, but in such
95 * cases all the data relevant to AArch32 formats lies within the lower word,
96 * therefore this actually makes more sense than it might first appear.
97 */
98#ifdef CONFIG_64BIT
99#define smmu_write_atomic_lq writeq_relaxed
100#else
101#define smmu_write_atomic_lq writel_relaxed
102#endif
103
104/* Translation context bank */
105#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
106
107#define MSI_IOVA_BASE 0x8000000
108#define MSI_IOVA_LENGTH 0x100000
109
110static int force_stage;
111module_param(force_stage, int, S_IRUGO);
112MODULE_PARM_DESC(force_stage,
113 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
114static bool disable_bypass;
115module_param(disable_bypass, bool, S_IRUGO);
116MODULE_PARM_DESC(disable_bypass,
117 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
118
119enum arm_smmu_arch_version {
120 ARM_SMMU_V1,
121 ARM_SMMU_V1_64K,
122 ARM_SMMU_V2,
123};
124
125enum arm_smmu_implementation {
126 GENERIC_SMMU,
127 ARM_MMU500,
128 CAVIUM_SMMUV2,
129 QCOM_SMMUV2,
130};
131
132/* Until ACPICA headers cover IORT rev. C */
133#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
134#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
135#endif
136#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
137#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
138#endif
139
140struct arm_smmu_s2cr {
141 struct iommu_group *group;
142 int count;
143 enum arm_smmu_s2cr_type type;
144 enum arm_smmu_s2cr_privcfg privcfg;
145 u8 cbndx;
146};
147
148#define s2cr_init_val (struct arm_smmu_s2cr){ \
149 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
150}
151
152struct arm_smmu_smr {
153 u16 mask;
154 u16 id;
155 bool valid;
156};
157
158struct arm_smmu_cb {
159 u64 ttbr[2];
160 u32 tcr[2];
161 u32 mair[2];
162 struct arm_smmu_cfg *cfg;
163};
164
165struct arm_smmu_master_cfg {
166 struct arm_smmu_device *smmu;
167 s16 smendx[];
168};
169#define INVALID_SMENDX -1
170#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
171#define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
172#define fwspec_smendx(fw, i) \
173 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
174#define for_each_cfg_sme(fw, i, idx) \
175 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
176
177struct arm_smmu_device {
178 struct device *dev;
179
180 void __iomem *base;
181 void __iomem *cb_base;
182 unsigned long pgshift;
183
184#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
185#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
186#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
187#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
188#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
189#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
190#define ARM_SMMU_FEAT_VMID16 (1 << 6)
191#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
192#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
193#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
194#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
195#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
196#define ARM_SMMU_FEAT_EXIDS (1 << 12)
197 u32 features;
198
199#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
200 u32 options;
201 enum arm_smmu_arch_version version;
202 enum arm_smmu_implementation model;
203
204 u32 num_context_banks;
205 u32 num_s2_context_banks;
206 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
207 struct arm_smmu_cb *cbs;
208 atomic_t irptndx;
209
210 u32 num_mapping_groups;
211 u16 streamid_mask;
212 u16 smr_mask_mask;
213 struct arm_smmu_smr *smrs;
214 struct arm_smmu_s2cr *s2crs;
215 struct mutex stream_map_mutex;
216
217 unsigned long va_size;
218 unsigned long ipa_size;
219 unsigned long pa_size;
220 unsigned long pgsize_bitmap;
221
222 u32 num_global_irqs;
223 u32 num_context_irqs;
224 unsigned int *irqs;
225
226 u32 cavium_id_base; /* Specific to Cavium */
227
228 spinlock_t global_sync_lock;
229
230 /* IOMMU core code handle */
231 struct iommu_device iommu;
232};
233
234enum arm_smmu_context_fmt {
235 ARM_SMMU_CTX_FMT_NONE,
236 ARM_SMMU_CTX_FMT_AARCH64,
237 ARM_SMMU_CTX_FMT_AARCH32_L,
238 ARM_SMMU_CTX_FMT_AARCH32_S,
239};
240
241struct arm_smmu_cfg {
242 u8 cbndx;
243 u8 irptndx;
244 union {
245 u16 asid;
246 u16 vmid;
247 };
248 u32 cbar;
249 enum arm_smmu_context_fmt fmt;
250};
251#define INVALID_IRPTNDX 0xff
252
253enum arm_smmu_domain_stage {
254 ARM_SMMU_DOMAIN_S1 = 0,
255 ARM_SMMU_DOMAIN_S2,
256 ARM_SMMU_DOMAIN_NESTED,
257 ARM_SMMU_DOMAIN_BYPASS,
258};
259
260struct arm_smmu_domain {
261 struct arm_smmu_device *smmu;
262 struct io_pgtable_ops *pgtbl_ops;
263 const struct iommu_gather_ops *tlb_ops;
264 struct arm_smmu_cfg cfg;
265 enum arm_smmu_domain_stage stage;
266 struct mutex init_mutex; /* Protects smmu pointer */
267 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
268 struct iommu_domain domain;
269};
270
271struct arm_smmu_option_prop {
272 u32 opt;
273 const char *prop;
274};
275
276static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
277
278static bool using_legacy_binding, using_generic_binding;
279
280static struct arm_smmu_option_prop arm_smmu_options[] = {
281 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
282 { 0, NULL},
283};
284
285static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
286{
287 return container_of(dom, struct arm_smmu_domain, domain);
288}
289
290static void parse_driver_options(struct arm_smmu_device *smmu)
291{
292 int i = 0;
293
294 do {
295 if (of_property_read_bool(smmu->dev->of_node,
296 arm_smmu_options[i].prop)) {
297 smmu->options |= arm_smmu_options[i].opt;
298 dev_notice(smmu->dev, "option %s\n",
299 arm_smmu_options[i].prop);
300 }
301 } while (arm_smmu_options[++i].opt);
302}
303
304static struct device_node *dev_get_dev_node(struct device *dev)
305{
306 if (dev_is_pci(dev)) {
307 struct pci_bus *bus = to_pci_dev(dev)->bus;
308
309 while (!pci_is_root_bus(bus))
310 bus = bus->parent;
311 return of_node_get(bus->bridge->parent->of_node);
312 }
313
314 return of_node_get(dev->of_node);
315}
316
317static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
318{
319 *((__be32 *)data) = cpu_to_be32(alias);
320 return 0; /* Continue walking */
321}
322
323static int __find_legacy_master_phandle(struct device *dev, void *data)
324{
325 struct of_phandle_iterator *it = *(void **)data;
326 struct device_node *np = it->node;
327 int err;
328
329 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
330 "#stream-id-cells", 0)
331 if (it->node == np) {
332 *(void **)data = dev;
333 return 1;
334 }
335 it->node = np;
336 return err == -ENOENT ? 0 : err;
337}
338
339static struct platform_driver arm_smmu_driver;
340static struct iommu_ops arm_smmu_ops;
341
342static int arm_smmu_register_legacy_master(struct device *dev,
343 struct arm_smmu_device **smmu)
344{
345 struct device *smmu_dev;
346 struct device_node *np;
347 struct of_phandle_iterator it;
348 void *data = &it;
349 u32 *sids;
350 __be32 pci_sid;
351 int err;
352
353 np = dev_get_dev_node(dev);
354 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
355 of_node_put(np);
356 return -ENODEV;
357 }
358
359 it.node = np;
360 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
361 __find_legacy_master_phandle);
362 smmu_dev = data;
363 of_node_put(np);
364 if (err == 0)
365 return -ENODEV;
366 if (err < 0)
367 return err;
368
369 if (dev_is_pci(dev)) {
370 /* "mmu-masters" assumes Stream ID == Requester ID */
371 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
372 &pci_sid);
373 it.cur = &pci_sid;
374 it.cur_count = 1;
375 }
376
377 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
378 &arm_smmu_ops);
379 if (err)
380 return err;
381
382 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
383 if (!sids)
384 return -ENOMEM;
385
386 *smmu = dev_get_drvdata(smmu_dev);
387 of_phandle_iterator_args(&it, sids, it.cur_count);
388 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
389 kfree(sids);
390 return err;
391}
392
393static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
394{
395 int idx;
396
397 do {
398 idx = find_next_zero_bit(map, end, start);
399 if (idx == end)
400 return -ENOSPC;
401 } while (test_and_set_bit(idx, map));
402
403 return idx;
404}
405
406static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
407{
408 clear_bit(idx, map);
409}
410
411/* Wait for any pending TLB invalidations to complete */
412static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
413 void __iomem *sync, void __iomem *status)
414{
415 unsigned int spin_cnt, delay;
416
417 writel_relaxed(QCOM_DUMMY_VAL, sync);
418 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
419 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
420 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
421 return;
422 cpu_relax();
423 }
424 udelay(delay);
425 }
426 dev_err_ratelimited(smmu->dev,
427 "TLB sync timed out -- SMMU may be deadlocked\n");
428}
429
430static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
431{
432 void __iomem *base = ARM_SMMU_GR0(smmu);
433 unsigned long flags;
434
435 spin_lock_irqsave(&smmu->global_sync_lock, flags);
436 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
437 base + ARM_SMMU_GR0_sTLBGSTATUS);
438 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
439}
440
441static void arm_smmu_tlb_sync_context(void *cookie)
442{
443 struct arm_smmu_domain *smmu_domain = cookie;
444 struct arm_smmu_device *smmu = smmu_domain->smmu;
445 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
446 unsigned long flags;
447
448 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
449 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
450 base + ARM_SMMU_CB_TLBSTATUS);
451 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
452}
453
454static void arm_smmu_tlb_sync_vmid(void *cookie)
455{
456 struct arm_smmu_domain *smmu_domain = cookie;
457
458 arm_smmu_tlb_sync_global(smmu_domain->smmu);
459}
460
461static void arm_smmu_tlb_inv_context_s1(void *cookie)
462{
463 struct arm_smmu_domain *smmu_domain = cookie;
464 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
465 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
466
467 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
468 arm_smmu_tlb_sync_context(cookie);
469}
470
471static void arm_smmu_tlb_inv_context_s2(void *cookie)
472{
473 struct arm_smmu_domain *smmu_domain = cookie;
474 struct arm_smmu_device *smmu = smmu_domain->smmu;
475 void __iomem *base = ARM_SMMU_GR0(smmu);
476
477 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
478 arm_smmu_tlb_sync_global(smmu);
479}
480
481static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
482 size_t granule, bool leaf, void *cookie)
483{
484 struct arm_smmu_domain *smmu_domain = cookie;
485 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
486 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
487 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
488
489 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
490 wmb();
491
492 if (stage1) {
493 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
494
495 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
496 iova &= ~12UL;
497 iova |= cfg->asid;
498 do {
499 writel_relaxed(iova, reg);
500 iova += granule;
501 } while (size -= granule);
502 } else {
503 iova >>= 12;
504 iova |= (u64)cfg->asid << 48;
505 do {
506 writeq_relaxed(iova, reg);
507 iova += granule >> 12;
508 } while (size -= granule);
509 }
510 } else {
511 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
512 ARM_SMMU_CB_S2_TLBIIPAS2;
513 iova >>= 12;
514 do {
515 smmu_write_atomic_lq(iova, reg);
516 iova += granule >> 12;
517 } while (size -= granule);
518 }
519}
520
521/*
522 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
523 * almost negligible, but the benefit of getting the first one in as far ahead
524 * of the sync as possible is significant, hence we don't just make this a
525 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
526 */
527static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
528 size_t granule, bool leaf, void *cookie)
529{
530 struct arm_smmu_domain *smmu_domain = cookie;
531 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
532
533 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
534 wmb();
535
536 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
537}
538
539static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
540 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
541 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
542 .tlb_sync = arm_smmu_tlb_sync_context,
543};
544
545static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
546 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
547 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
548 .tlb_sync = arm_smmu_tlb_sync_context,
549};
550
551static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
552 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
553 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
554 .tlb_sync = arm_smmu_tlb_sync_vmid,
555};
556
557static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
558{
559 u32 fsr, fsynr;
560 unsigned long iova;
561 struct iommu_domain *domain = dev;
562 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
563 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
564 struct arm_smmu_device *smmu = smmu_domain->smmu;
565 void __iomem *cb_base;
566
567 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
568 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
569
570 if (!(fsr & FSR_FAULT))
571 return IRQ_NONE;
572
573 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
574 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
575
576 dev_err_ratelimited(smmu->dev,
577 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
578 fsr, iova, fsynr, cfg->cbndx);
579
580 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
581 return IRQ_HANDLED;
582}
583
584static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
585{
586 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
587 struct arm_smmu_device *smmu = dev;
588 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
589
590 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
591 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
592 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
593 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
594
595 if (!gfsr)
596 return IRQ_NONE;
597
598 dev_err_ratelimited(smmu->dev,
599 "Unexpected global fault, this could be serious\n");
600 dev_err_ratelimited(smmu->dev,
601 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
602 gfsr, gfsynr0, gfsynr1, gfsynr2);
603
604 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
605 return IRQ_HANDLED;
606}
607
608static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
609 struct io_pgtable_cfg *pgtbl_cfg)
610{
611 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
612 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
613 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
614
615 cb->cfg = cfg;
616
617 /* TTBCR */
618 if (stage1) {
619 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
620 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
621 } else {
622 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
623 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
624 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
625 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
626 cb->tcr[1] |= TTBCR2_AS;
627 }
628 } else {
629 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
630 }
631
632 /* TTBRs */
633 if (stage1) {
634 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
635 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
636 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
637 } else {
638 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
639 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
640 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
641 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
642 }
643 } else {
644 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
645 }
646
647 /* MAIRs (stage-1 only) */
648 if (stage1) {
649 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
650 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
651 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
652 } else {
653 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
654 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
655 }
656 }
657}
658
659static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
660{
661 u32 reg;
662 bool stage1;
663 struct arm_smmu_cb *cb = &smmu->cbs[idx];
664 struct arm_smmu_cfg *cfg = cb->cfg;
665 void __iomem *cb_base, *gr1_base;
666
667 cb_base = ARM_SMMU_CB(smmu, idx);
668
669 /* Unassigned context banks only need disabling */
670 if (!cfg) {
671 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
672 return;
673 }
674
675 gr1_base = ARM_SMMU_GR1(smmu);
676 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
677
678 /* CBA2R */
679 if (smmu->version > ARM_SMMU_V1) {
680 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
681 reg = CBA2R_RW64_64BIT;
682 else
683 reg = CBA2R_RW64_32BIT;
684 /* 16-bit VMIDs live in CBA2R */
685 if (smmu->features & ARM_SMMU_FEAT_VMID16)
686 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
687
688 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
689 }
690
691 /* CBAR */
692 reg = cfg->cbar;
693 if (smmu->version < ARM_SMMU_V2)
694 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
695
696 /*
697 * Use the weakest shareability/memory types, so they are
698 * overridden by the ttbcr/pte.
699 */
700 if (stage1) {
701 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
702 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
703 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
704 /* 8-bit VMIDs live in CBAR */
705 reg |= cfg->vmid << CBAR_VMID_SHIFT;
706 }
707 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
708
709 /*
710 * TTBCR
711 * We must write this before the TTBRs, since it determines the
712 * access behaviour of some fields (in particular, ASID[15:8]).
713 */
714 if (stage1 && smmu->version > ARM_SMMU_V1)
715 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
716 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
717
718 /* TTBRs */
719 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
720 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
721 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
722 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
723 } else {
724 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
725 if (stage1)
726 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
727 }
728
729 /* MAIRs (stage-1 only) */
730 if (stage1) {
731 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
732 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
733 }
734
735 /* SCTLR */
736 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
737 if (stage1)
738 reg |= SCTLR_S1_ASIDPNE;
739 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
740 reg |= SCTLR_E;
741
742 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
743}
744
745static int arm_smmu_init_domain_context(struct iommu_domain *domain,
746 struct arm_smmu_device *smmu)
747{
748 int irq, start, ret = 0;
749 unsigned long ias, oas;
750 struct io_pgtable_ops *pgtbl_ops;
751 struct io_pgtable_cfg pgtbl_cfg;
752 enum io_pgtable_fmt fmt;
753 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
754 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
755
756 mutex_lock(&smmu_domain->init_mutex);
757 if (smmu_domain->smmu)
758 goto out_unlock;
759
760 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
761 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
762 smmu_domain->smmu = smmu;
763 goto out_unlock;
764 }
765
766 /*
767 * Mapping the requested stage onto what we support is surprisingly
768 * complicated, mainly because the spec allows S1+S2 SMMUs without
769 * support for nested translation. That means we end up with the
770 * following table:
771 *
772 * Requested Supported Actual
773 * S1 N S1
774 * S1 S1+S2 S1
775 * S1 S2 S2
776 * S1 S1 S1
777 * N N N
778 * N S1+S2 S2
779 * N S2 S2
780 * N S1 S1
781 *
782 * Note that you can't actually request stage-2 mappings.
783 */
784 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
785 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
786 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
787 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
788
789 /*
790 * Choosing a suitable context format is even more fiddly. Until we
791 * grow some way for the caller to express a preference, and/or move
792 * the decision into the io-pgtable code where it arguably belongs,
793 * just aim for the closest thing to the rest of the system, and hope
794 * that the hardware isn't esoteric enough that we can't assume AArch64
795 * support to be a superset of AArch32 support...
796 */
797 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
798 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
799 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
800 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
801 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
802 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
803 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
804 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
805 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
806 ARM_SMMU_FEAT_FMT_AARCH64_16K |
807 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
808 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
809
810 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
811 ret = -EINVAL;
812 goto out_unlock;
813 }
814
815 switch (smmu_domain->stage) {
816 case ARM_SMMU_DOMAIN_S1:
817 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
818 start = smmu->num_s2_context_banks;
819 ias = smmu->va_size;
820 oas = smmu->ipa_size;
821 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822 fmt = ARM_64_LPAE_S1;
823 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
824 fmt = ARM_32_LPAE_S1;
825 ias = min(ias, 32UL);
826 oas = min(oas, 40UL);
827 } else {
828 fmt = ARM_V7S;
829 ias = min(ias, 32UL);
830 oas = min(oas, 32UL);
831 }
832 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
833 break;
834 case ARM_SMMU_DOMAIN_NESTED:
835 /*
836 * We will likely want to change this if/when KVM gets
837 * involved.
838 */
839 case ARM_SMMU_DOMAIN_S2:
840 cfg->cbar = CBAR_TYPE_S2_TRANS;
841 start = 0;
842 ias = smmu->ipa_size;
843 oas = smmu->pa_size;
844 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
845 fmt = ARM_64_LPAE_S2;
846 } else {
847 fmt = ARM_32_LPAE_S2;
848 ias = min(ias, 40UL);
849 oas = min(oas, 40UL);
850 }
851 if (smmu->version == ARM_SMMU_V2)
852 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
853 else
854 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
855 break;
856 default:
857 ret = -EINVAL;
858 goto out_unlock;
859 }
860 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
861 smmu->num_context_banks);
862 if (ret < 0)
863 goto out_unlock;
864
865 cfg->cbndx = ret;
866 if (smmu->version < ARM_SMMU_V2) {
867 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
868 cfg->irptndx %= smmu->num_context_irqs;
869 } else {
870 cfg->irptndx = cfg->cbndx;
871 }
872
873 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
874 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
875 else
876 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
877
878 pgtbl_cfg = (struct io_pgtable_cfg) {
879 .pgsize_bitmap = smmu->pgsize_bitmap,
880 .ias = ias,
881 .oas = oas,
882 .tlb = smmu_domain->tlb_ops,
883 .iommu_dev = smmu->dev,
884 };
885
886 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
887 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
888
889 smmu_domain->smmu = smmu;
890 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
891 if (!pgtbl_ops) {
892 ret = -ENOMEM;
893 goto out_clear_smmu;
894 }
895
896 /* Update the domain's page sizes to reflect the page table format */
897 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
898 domain->geometry.aperture_end = (1UL << ias) - 1;
899 domain->geometry.force_aperture = true;
900
901 /* Initialise the context bank with our page table cfg */
902 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
903 arm_smmu_write_context_bank(smmu, cfg->cbndx);
904
905 /*
906 * Request context fault interrupt. Do this last to avoid the
907 * handler seeing a half-initialised domain state.
908 */
909 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
910 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
911 IRQF_SHARED, "arm-smmu-context-fault", domain);
912 if (ret < 0) {
913 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
914 cfg->irptndx, irq);
915 cfg->irptndx = INVALID_IRPTNDX;
916 }
917
918 mutex_unlock(&smmu_domain->init_mutex);
919
920 /* Publish page table ops for map/unmap */
921 smmu_domain->pgtbl_ops = pgtbl_ops;
922 return 0;
923
924out_clear_smmu:
925 smmu_domain->smmu = NULL;
926out_unlock:
927 mutex_unlock(&smmu_domain->init_mutex);
928 return ret;
929}
930
931static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
932{
933 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
934 struct arm_smmu_device *smmu = smmu_domain->smmu;
935 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
936 int irq;
937
938 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
939 return;
940
941 /*
942 * Disable the context bank and free the page tables before freeing
943 * it.
944 */
945 smmu->cbs[cfg->cbndx].cfg = NULL;
946 arm_smmu_write_context_bank(smmu, cfg->cbndx);
947
948 if (cfg->irptndx != INVALID_IRPTNDX) {
949 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
950 devm_free_irq(smmu->dev, irq, domain);
951 }
952
953 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
954 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
955}
956
957static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
958{
959 struct arm_smmu_domain *smmu_domain;
960
961 if (type != IOMMU_DOMAIN_UNMANAGED &&
962 type != IOMMU_DOMAIN_DMA &&
963 type != IOMMU_DOMAIN_IDENTITY)
964 return NULL;
965 /*
966 * Allocate the domain and initialise some of its data structures.
967 * We can't really do anything meaningful until we've added a
968 * master.
969 */
970 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
971 if (!smmu_domain)
972 return NULL;
973
974 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
975 iommu_get_dma_cookie(&smmu_domain->domain))) {
976 kfree(smmu_domain);
977 return NULL;
978 }
979
980 mutex_init(&smmu_domain->init_mutex);
981 spin_lock_init(&smmu_domain->cb_lock);
982
983 return &smmu_domain->domain;
984}
985
986static void arm_smmu_domain_free(struct iommu_domain *domain)
987{
988 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
989
990 /*
991 * Free the domain resources. We assume that all devices have
992 * already been detached.
993 */
994 iommu_put_dma_cookie(domain);
995 arm_smmu_destroy_domain_context(domain);
996 kfree(smmu_domain);
997}
998
999static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1000{
1001 struct arm_smmu_smr *smr = smmu->smrs + idx;
1002 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1003
1004 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1005 reg |= SMR_VALID;
1006 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1007}
1008
1009static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1010{
1011 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1012 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1013 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1014 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1015
1016 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1017 smmu->smrs[idx].valid)
1018 reg |= S2CR_EXIDVALID;
1019 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1020}
1021
1022static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1023{
1024 arm_smmu_write_s2cr(smmu, idx);
1025 if (smmu->smrs)
1026 arm_smmu_write_smr(smmu, idx);
1027}
1028
1029/*
1030 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1031 * should be called after sCR0 is written.
1032 */
1033static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1034{
1035 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1036 u32 smr;
1037
1038 if (!smmu->smrs)
1039 return;
1040
1041 /*
1042 * SMR.ID bits may not be preserved if the corresponding MASK
1043 * bits are set, so check each one separately. We can reject
1044 * masters later if they try to claim IDs outside these masks.
1045 */
1046 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1047 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1048 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1049 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1050
1051 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1052 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1053 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1054 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1055}
1056
1057static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1058{
1059 struct arm_smmu_smr *smrs = smmu->smrs;
1060 int i, free_idx = -ENOSPC;
1061
1062 /* Stream indexing is blissfully easy */
1063 if (!smrs)
1064 return id;
1065
1066 /* Validating SMRs is... less so */
1067 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1068 if (!smrs[i].valid) {
1069 /*
1070 * Note the first free entry we come across, which
1071 * we'll claim in the end if nothing else matches.
1072 */
1073 if (free_idx < 0)
1074 free_idx = i;
1075 continue;
1076 }
1077 /*
1078 * If the new entry is _entirely_ matched by an existing entry,
1079 * then reuse that, with the guarantee that there also cannot
1080 * be any subsequent conflicting entries. In normal use we'd
1081 * expect simply identical entries for this case, but there's
1082 * no harm in accommodating the generalisation.
1083 */
1084 if ((mask & smrs[i].mask) == mask &&
1085 !((id ^ smrs[i].id) & ~smrs[i].mask))
1086 return i;
1087 /*
1088 * If the new entry has any other overlap with an existing one,
1089 * though, then there always exists at least one stream ID
1090 * which would cause a conflict, and we can't allow that risk.
1091 */
1092 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1093 return -EINVAL;
1094 }
1095
1096 return free_idx;
1097}
1098
1099static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1100{
1101 if (--smmu->s2crs[idx].count)
1102 return false;
1103
1104 smmu->s2crs[idx] = s2cr_init_val;
1105 if (smmu->smrs)
1106 smmu->smrs[idx].valid = false;
1107
1108 return true;
1109}
1110
1111static int arm_smmu_master_alloc_smes(struct device *dev)
1112{
1113 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1114 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1115 struct arm_smmu_device *smmu = cfg->smmu;
1116 struct arm_smmu_smr *smrs = smmu->smrs;
1117 struct iommu_group *group;
1118 int i, idx, ret;
1119
1120 mutex_lock(&smmu->stream_map_mutex);
1121 /* Figure out a viable stream map entry allocation */
1122 for_each_cfg_sme(fwspec, i, idx) {
1123 u16 sid = fwspec->ids[i];
1124 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1125
1126 if (idx != INVALID_SMENDX) {
1127 ret = -EEXIST;
1128 goto out_err;
1129 }
1130
1131 ret = arm_smmu_find_sme(smmu, sid, mask);
1132 if (ret < 0)
1133 goto out_err;
1134
1135 idx = ret;
1136 if (smrs && smmu->s2crs[idx].count == 0) {
1137 smrs[idx].id = sid;
1138 smrs[idx].mask = mask;
1139 smrs[idx].valid = true;
1140 }
1141 smmu->s2crs[idx].count++;
1142 cfg->smendx[i] = (s16)idx;
1143 }
1144
1145 group = iommu_group_get_for_dev(dev);
1146 if (!group)
1147 group = ERR_PTR(-ENOMEM);
1148 if (IS_ERR(group)) {
1149 ret = PTR_ERR(group);
1150 goto out_err;
1151 }
1152 iommu_group_put(group);
1153
1154 /* It worked! Now, poke the actual hardware */
1155 for_each_cfg_sme(fwspec, i, idx) {
1156 arm_smmu_write_sme(smmu, idx);
1157 smmu->s2crs[idx].group = group;
1158 }
1159
1160 mutex_unlock(&smmu->stream_map_mutex);
1161 return 0;
1162
1163out_err:
1164 while (i--) {
1165 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1166 cfg->smendx[i] = INVALID_SMENDX;
1167 }
1168 mutex_unlock(&smmu->stream_map_mutex);
1169 return ret;
1170}
1171
1172static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1173{
1174 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1175 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1176 int i, idx;
1177
1178 mutex_lock(&smmu->stream_map_mutex);
1179 for_each_cfg_sme(fwspec, i, idx) {
1180 if (arm_smmu_free_sme(smmu, idx))
1181 arm_smmu_write_sme(smmu, idx);
1182 cfg->smendx[i] = INVALID_SMENDX;
1183 }
1184 mutex_unlock(&smmu->stream_map_mutex);
1185}
1186
1187static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1188 struct iommu_fwspec *fwspec)
1189{
1190 struct arm_smmu_device *smmu = smmu_domain->smmu;
1191 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1192 u8 cbndx = smmu_domain->cfg.cbndx;
1193 enum arm_smmu_s2cr_type type;
1194 int i, idx;
1195
1196 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1197 type = S2CR_TYPE_BYPASS;
1198 else
1199 type = S2CR_TYPE_TRANS;
1200
1201 for_each_cfg_sme(fwspec, i, idx) {
1202 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1203 continue;
1204
1205 s2cr[idx].type = type;
1206 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1207 s2cr[idx].cbndx = cbndx;
1208 arm_smmu_write_s2cr(smmu, idx);
1209 }
1210 return 0;
1211}
1212
1213static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1214{
1215 int ret;
1216 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1217 struct arm_smmu_device *smmu;
1218 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1219
1220 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1221 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1222 return -ENXIO;
1223 }
1224
1225 /*
1226 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1227 * domains between of_xlate() and add_device() - we have no way to cope
1228 * with that, so until ARM gets converted to rely on groups and default
1229 * domains, just say no (but more politely than by dereferencing NULL).
1230 * This should be at least a WARN_ON once that's sorted.
1231 */
1232 if (!fwspec->iommu_priv)
1233 return -ENODEV;
1234
1235 smmu = fwspec_smmu(fwspec);
1236 /* Ensure that the domain is finalised */
1237 ret = arm_smmu_init_domain_context(domain, smmu);
1238 if (ret < 0)
1239 return ret;
1240
1241 /*
1242 * Sanity check the domain. We don't support domains across
1243 * different SMMUs.
1244 */
1245 if (smmu_domain->smmu != smmu) {
1246 dev_err(dev,
1247 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1248 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1249 return -EINVAL;
1250 }
1251
1252 /* Looks ok, so add the device to the domain */
1253 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1254}
1255
1256static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1257 phys_addr_t paddr, size_t size, int prot)
1258{
1259 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1260
1261 if (!ops)
1262 return -ENODEV;
1263
1264 return ops->map(ops, iova, paddr, size, prot);
1265}
1266
1267static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1268 size_t size)
1269{
1270 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1271
1272 if (!ops)
1273 return 0;
1274
1275 return ops->unmap(ops, iova, size);
1276}
1277
1278static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1279{
1280 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1281
1282 if (smmu_domain->tlb_ops)
1283 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1284}
1285
1286static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1287 dma_addr_t iova)
1288{
1289 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1290 struct arm_smmu_device *smmu = smmu_domain->smmu;
1291 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1292 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1293 struct device *dev = smmu->dev;
1294 void __iomem *cb_base;
1295 u32 tmp;
1296 u64 phys;
1297 unsigned long va, flags;
1298
1299 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1300
1301 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1302 /* ATS1 registers can only be written atomically */
1303 va = iova & ~0xfffUL;
1304 if (smmu->version == ARM_SMMU_V2)
1305 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1306 else /* Register is only 32-bit in v1 */
1307 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1308
1309 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1310 !(tmp & ATSR_ACTIVE), 5, 50)) {
1311 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312 dev_err(dev,
1313 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1314 &iova);
1315 return ops->iova_to_phys(ops, iova);
1316 }
1317
1318 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1319 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1320 if (phys & CB_PAR_F) {
1321 dev_err(dev, "translation fault!\n");
1322 dev_err(dev, "PAR = 0x%llx\n", phys);
1323 return 0;
1324 }
1325
1326 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1327}
1328
1329static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1330 dma_addr_t iova)
1331{
1332 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1333 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1334
1335 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1336 return iova;
1337
1338 if (!ops)
1339 return 0;
1340
1341 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1342 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1343 return arm_smmu_iova_to_phys_hard(domain, iova);
1344
1345 return ops->iova_to_phys(ops, iova);
1346}
1347
1348static bool arm_smmu_capable(enum iommu_cap cap)
1349{
1350 switch (cap) {
1351 case IOMMU_CAP_CACHE_COHERENCY:
1352 /*
1353 * Return true here as the SMMU can always send out coherent
1354 * requests.
1355 */
1356 return true;
1357 case IOMMU_CAP_NOEXEC:
1358 return true;
1359 default:
1360 return false;
1361 }
1362}
1363
1364static int arm_smmu_match_node(struct device *dev, void *data)
1365{
1366 return dev->fwnode == data;
1367}
1368
1369static
1370struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1371{
1372 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1373 fwnode, arm_smmu_match_node);
1374 put_device(dev);
1375 return dev ? dev_get_drvdata(dev) : NULL;
1376}
1377
1378static int arm_smmu_add_device(struct device *dev)
1379{
1380 struct arm_smmu_device *smmu;
1381 struct arm_smmu_master_cfg *cfg;
1382 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1383 int i, ret;
1384
1385 if (using_legacy_binding) {
1386 ret = arm_smmu_register_legacy_master(dev, &smmu);
1387
1388 /*
1389 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1390 * will allocate/initialise a new one. Thus we need to update fwspec for
1391 * later use.
1392 */
1393 fwspec = dev->iommu_fwspec;
1394 if (ret)
1395 goto out_free;
1396 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1397 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1398 } else {
1399 return -ENODEV;
1400 }
1401
1402 ret = -EINVAL;
1403 for (i = 0; i < fwspec->num_ids; i++) {
1404 u16 sid = fwspec->ids[i];
1405 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1406
1407 if (sid & ~smmu->streamid_mask) {
1408 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1409 sid, smmu->streamid_mask);
1410 goto out_free;
1411 }
1412 if (mask & ~smmu->smr_mask_mask) {
1413 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1414 mask, smmu->smr_mask_mask);
1415 goto out_free;
1416 }
1417 }
1418
1419 ret = -ENOMEM;
1420 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1421 GFP_KERNEL);
1422 if (!cfg)
1423 goto out_free;
1424
1425 cfg->smmu = smmu;
1426 fwspec->iommu_priv = cfg;
1427 while (i--)
1428 cfg->smendx[i] = INVALID_SMENDX;
1429
1430 ret = arm_smmu_master_alloc_smes(dev);
1431 if (ret)
1432 goto out_cfg_free;
1433
1434 iommu_device_link(&smmu->iommu, dev);
1435
1436 return 0;
1437
1438out_cfg_free:
1439 kfree(cfg);
1440out_free:
1441 iommu_fwspec_free(dev);
1442 return ret;
1443}
1444
1445static void arm_smmu_remove_device(struct device *dev)
1446{
1447 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1448 struct arm_smmu_master_cfg *cfg;
1449 struct arm_smmu_device *smmu;
1450
1451
1452 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1453 return;
1454
1455 cfg = fwspec->iommu_priv;
1456 smmu = cfg->smmu;
1457
1458 iommu_device_unlink(&smmu->iommu, dev);
1459 arm_smmu_master_free_smes(fwspec);
1460 iommu_group_remove_device(dev);
1461 kfree(fwspec->iommu_priv);
1462 iommu_fwspec_free(dev);
1463}
1464
1465static struct iommu_group *arm_smmu_device_group(struct device *dev)
1466{
1467 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1468 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1469 struct iommu_group *group = NULL;
1470 int i, idx;
1471
1472 for_each_cfg_sme(fwspec, i, idx) {
1473 if (group && smmu->s2crs[idx].group &&
1474 group != smmu->s2crs[idx].group)
1475 return ERR_PTR(-EINVAL);
1476
1477 group = smmu->s2crs[idx].group;
1478 }
1479
1480 if (group)
1481 return iommu_group_ref_get(group);
1482
1483 if (dev_is_pci(dev))
1484 group = pci_device_group(dev);
1485 else
1486 group = generic_device_group(dev);
1487
1488 return group;
1489}
1490
1491static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1492 enum iommu_attr attr, void *data)
1493{
1494 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1495
1496 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1497 return -EINVAL;
1498
1499 switch (attr) {
1500 case DOMAIN_ATTR_NESTING:
1501 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1502 return 0;
1503 default:
1504 return -ENODEV;
1505 }
1506}
1507
1508static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1509 enum iommu_attr attr, void *data)
1510{
1511 int ret = 0;
1512 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513
1514 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1515 return -EINVAL;
1516
1517 mutex_lock(&smmu_domain->init_mutex);
1518
1519 switch (attr) {
1520 case DOMAIN_ATTR_NESTING:
1521 if (smmu_domain->smmu) {
1522 ret = -EPERM;
1523 goto out_unlock;
1524 }
1525
1526 if (*(int *)data)
1527 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1528 else
1529 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1530
1531 break;
1532 default:
1533 ret = -ENODEV;
1534 }
1535
1536out_unlock:
1537 mutex_unlock(&smmu_domain->init_mutex);
1538 return ret;
1539}
1540
1541static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1542{
1543 u32 mask, fwid = 0;
1544
1545 if (args->args_count > 0)
1546 fwid |= (u16)args->args[0];
1547
1548 if (args->args_count > 1)
1549 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1550 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1551 fwid |= (u16)mask << SMR_MASK_SHIFT;
1552
1553 return iommu_fwspec_add_ids(dev, &fwid, 1);
1554}
1555
1556static void arm_smmu_get_resv_regions(struct device *dev,
1557 struct list_head *head)
1558{
1559 struct iommu_resv_region *region;
1560 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1561
1562 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1563 prot, IOMMU_RESV_SW_MSI);
1564 if (!region)
1565 return;
1566
1567 list_add_tail(&region->list, head);
1568
1569 iommu_dma_get_resv_regions(dev, head);
1570}
1571
1572static void arm_smmu_put_resv_regions(struct device *dev,
1573 struct list_head *head)
1574{
1575 struct iommu_resv_region *entry, *next;
1576
1577 list_for_each_entry_safe(entry, next, head, list)
1578 kfree(entry);
1579}
1580
1581static struct iommu_ops arm_smmu_ops = {
1582 .capable = arm_smmu_capable,
1583 .domain_alloc = arm_smmu_domain_alloc,
1584 .domain_free = arm_smmu_domain_free,
1585 .attach_dev = arm_smmu_attach_dev,
1586 .map = arm_smmu_map,
1587 .unmap = arm_smmu_unmap,
1588 .map_sg = default_iommu_map_sg,
1589 .flush_iotlb_all = arm_smmu_iotlb_sync,
1590 .iotlb_sync = arm_smmu_iotlb_sync,
1591 .iova_to_phys = arm_smmu_iova_to_phys,
1592 .add_device = arm_smmu_add_device,
1593 .remove_device = arm_smmu_remove_device,
1594 .device_group = arm_smmu_device_group,
1595 .domain_get_attr = arm_smmu_domain_get_attr,
1596 .domain_set_attr = arm_smmu_domain_set_attr,
1597 .of_xlate = arm_smmu_of_xlate,
1598 .get_resv_regions = arm_smmu_get_resv_regions,
1599 .put_resv_regions = arm_smmu_put_resv_regions,
1600 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1601};
1602
1603static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1604{
1605 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1606 int i;
1607 u32 reg, major;
1608
1609 /* clear global FSR */
1610 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1611 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1612
1613 /*
1614 * Reset stream mapping groups: Initial values mark all SMRn as
1615 * invalid and all S2CRn as bypass unless overridden.
1616 */
1617 for (i = 0; i < smmu->num_mapping_groups; ++i)
1618 arm_smmu_write_sme(smmu, i);
1619
1620 if (smmu->model == ARM_MMU500) {
1621 /*
1622 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1623 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1624 * bit is only present in MMU-500r2 onwards.
1625 */
1626 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1627 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1628 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1629 if (major >= 2)
1630 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1631 /*
1632 * Allow unmatched Stream IDs to allocate bypass
1633 * TLB entries for reduced latency.
1634 */
1635 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1636 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1637 }
1638
1639 /* Make sure all context banks are disabled and clear CB_FSR */
1640 for (i = 0; i < smmu->num_context_banks; ++i) {
1641 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1642
1643 arm_smmu_write_context_bank(smmu, i);
1644 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1645 /*
1646 * Disable MMU-500's not-particularly-beneficial next-page
1647 * prefetcher for the sake of errata #841119 and #826419.
1648 */
1649 if (smmu->model == ARM_MMU500) {
1650 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1651 reg &= ~ARM_MMU500_ACTLR_CPRE;
1652 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1653 }
1654 }
1655
1656 /* Invalidate the TLB, just in case */
1657 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1658 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1659
1660 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1661
1662 /* Enable fault reporting */
1663 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1664
1665 /* Disable TLB broadcasting. */
1666 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1667
1668 /* Enable client access, handling unmatched streams as appropriate */
1669 reg &= ~sCR0_CLIENTPD;
1670 if (disable_bypass)
1671 reg |= sCR0_USFCFG;
1672 else
1673 reg &= ~sCR0_USFCFG;
1674
1675 /* Disable forced broadcasting */
1676 reg &= ~sCR0_FB;
1677
1678 /* Don't upgrade barriers */
1679 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1680
1681 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1682 reg |= sCR0_VMID16EN;
1683
1684 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1685 reg |= sCR0_EXIDENABLE;
1686
1687 /* Push the button */
1688 arm_smmu_tlb_sync_global(smmu);
1689 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1690}
1691
1692static int arm_smmu_id_size_to_bits(int size)
1693{
1694 switch (size) {
1695 case 0:
1696 return 32;
1697 case 1:
1698 return 36;
1699 case 2:
1700 return 40;
1701 case 3:
1702 return 42;
1703 case 4:
1704 return 44;
1705 case 5:
1706 default:
1707 return 48;
1708 }
1709}
1710
1711static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1712{
1713 unsigned long size;
1714 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1715 u32 id;
1716 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1717 int i;
1718
1719 dev_notice(smmu->dev, "probing hardware configuration...\n");
1720 dev_notice(smmu->dev, "SMMUv%d with:\n",
1721 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1722
1723 /* ID0 */
1724 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1725
1726 /* Restrict available stages based on module parameter */
1727 if (force_stage == 1)
1728 id &= ~(ID0_S2TS | ID0_NTS);
1729 else if (force_stage == 2)
1730 id &= ~(ID0_S1TS | ID0_NTS);
1731
1732 if (id & ID0_S1TS) {
1733 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1734 dev_notice(smmu->dev, "\tstage 1 translation\n");
1735 }
1736
1737 if (id & ID0_S2TS) {
1738 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1739 dev_notice(smmu->dev, "\tstage 2 translation\n");
1740 }
1741
1742 if (id & ID0_NTS) {
1743 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1744 dev_notice(smmu->dev, "\tnested translation\n");
1745 }
1746
1747 if (!(smmu->features &
1748 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1749 dev_err(smmu->dev, "\tno translation support!\n");
1750 return -ENODEV;
1751 }
1752
1753 if ((id & ID0_S1TS) &&
1754 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1755 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1756 dev_notice(smmu->dev, "\taddress translation ops\n");
1757 }
1758
1759 /*
1760 * In order for DMA API calls to work properly, we must defer to what
1761 * the FW says about coherency, regardless of what the hardware claims.
1762 * Fortunately, this also opens up a workaround for systems where the
1763 * ID register value has ended up configured incorrectly.
1764 */
1765 cttw_reg = !!(id & ID0_CTTW);
1766 if (cttw_fw || cttw_reg)
1767 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1768 cttw_fw ? "" : "non-");
1769 if (cttw_fw != cttw_reg)
1770 dev_notice(smmu->dev,
1771 "\t(IDR0.CTTW overridden by FW configuration)\n");
1772
1773 /* Max. number of entries we have for stream matching/indexing */
1774 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1775 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1776 size = 1 << 16;
1777 } else {
1778 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1779 }
1780 smmu->streamid_mask = size - 1;
1781 if (id & ID0_SMS) {
1782 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1783 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1784 if (size == 0) {
1785 dev_err(smmu->dev,
1786 "stream-matching supported, but no SMRs present!\n");
1787 return -ENODEV;
1788 }
1789
1790 /* Zero-initialised to mark as invalid */
1791 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1792 GFP_KERNEL);
1793 if (!smmu->smrs)
1794 return -ENOMEM;
1795
1796 dev_notice(smmu->dev,
1797 "\tstream matching with %lu register groups", size);
1798 }
1799 /* s2cr->type == 0 means translation, so initialise explicitly */
1800 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1801 GFP_KERNEL);
1802 if (!smmu->s2crs)
1803 return -ENOMEM;
1804 for (i = 0; i < size; i++)
1805 smmu->s2crs[i] = s2cr_init_val;
1806
1807 smmu->num_mapping_groups = size;
1808 mutex_init(&smmu->stream_map_mutex);
1809 spin_lock_init(&smmu->global_sync_lock);
1810
1811 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1812 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1813 if (!(id & ID0_PTFS_NO_AARCH32S))
1814 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1815 }
1816
1817 /* ID1 */
1818 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1819 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1820
1821 /* Check for size mismatch of SMMU address space from mapped region */
1822 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1823 size <<= smmu->pgshift;
1824 if (smmu->cb_base != gr0_base + size)
1825 dev_warn(smmu->dev,
1826 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1827 size * 2, (smmu->cb_base - gr0_base) * 2);
1828
1829 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1830 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1831 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1832 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1833 return -ENODEV;
1834 }
1835 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1836 smmu->num_context_banks, smmu->num_s2_context_banks);
1837 /*
1838 * Cavium CN88xx erratum #27704.
1839 * Ensure ASID and VMID allocation is unique across all SMMUs in
1840 * the system.
1841 */
1842 if (smmu->model == CAVIUM_SMMUV2) {
1843 smmu->cavium_id_base =
1844 atomic_add_return(smmu->num_context_banks,
1845 &cavium_smmu_context_count);
1846 smmu->cavium_id_base -= smmu->num_context_banks;
1847 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1848 }
1849 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1850 sizeof(*smmu->cbs), GFP_KERNEL);
1851 if (!smmu->cbs)
1852 return -ENOMEM;
1853
1854 /* ID2 */
1855 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1856 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1857 smmu->ipa_size = size;
1858
1859 /* The output mask is also applied for bypass */
1860 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1861 smmu->pa_size = size;
1862
1863 if (id & ID2_VMID16)
1864 smmu->features |= ARM_SMMU_FEAT_VMID16;
1865
1866 /*
1867 * What the page table walker can address actually depends on which
1868 * descriptor format is in use, but since a) we don't know that yet,
1869 * and b) it can vary per context bank, this will have to do...
1870 */
1871 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1872 dev_warn(smmu->dev,
1873 "failed to set DMA mask for table walker\n");
1874
1875 if (smmu->version < ARM_SMMU_V2) {
1876 smmu->va_size = smmu->ipa_size;
1877 if (smmu->version == ARM_SMMU_V1_64K)
1878 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1879 } else {
1880 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1881 smmu->va_size = arm_smmu_id_size_to_bits(size);
1882 if (id & ID2_PTFS_4K)
1883 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1884 if (id & ID2_PTFS_16K)
1885 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1886 if (id & ID2_PTFS_64K)
1887 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1888 }
1889
1890 /* Now we've corralled the various formats, what'll it do? */
1891 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1892 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1893 if (smmu->features &
1894 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1895 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1896 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1897 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1898 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1899 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1900
1901 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1902 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1903 else
1904 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1905 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1906 smmu->pgsize_bitmap);
1907
1908
1909 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1910 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1911 smmu->va_size, smmu->ipa_size);
1912
1913 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1914 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1915 smmu->ipa_size, smmu->pa_size);
1916
1917 return 0;
1918}
1919
1920struct arm_smmu_match_data {
1921 enum arm_smmu_arch_version version;
1922 enum arm_smmu_implementation model;
1923};
1924
1925#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1926static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1927
1928ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1929ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1930ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1931ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1932ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1933ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1934
1935static const struct of_device_id arm_smmu_of_match[] = {
1936 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1937 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1938 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1939 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1940 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1941 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1942 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1943 { },
1944};
1945MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1946
1947#ifdef CONFIG_ACPI
1948static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1949{
1950 int ret = 0;
1951
1952 switch (model) {
1953 case ACPI_IORT_SMMU_V1:
1954 case ACPI_IORT_SMMU_CORELINK_MMU400:
1955 smmu->version = ARM_SMMU_V1;
1956 smmu->model = GENERIC_SMMU;
1957 break;
1958 case ACPI_IORT_SMMU_CORELINK_MMU401:
1959 smmu->version = ARM_SMMU_V1_64K;
1960 smmu->model = GENERIC_SMMU;
1961 break;
1962 case ACPI_IORT_SMMU_V2:
1963 smmu->version = ARM_SMMU_V2;
1964 smmu->model = GENERIC_SMMU;
1965 break;
1966 case ACPI_IORT_SMMU_CORELINK_MMU500:
1967 smmu->version = ARM_SMMU_V2;
1968 smmu->model = ARM_MMU500;
1969 break;
1970 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1971 smmu->version = ARM_SMMU_V2;
1972 smmu->model = CAVIUM_SMMUV2;
1973 break;
1974 default:
1975 ret = -ENODEV;
1976 }
1977
1978 return ret;
1979}
1980
1981static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1982 struct arm_smmu_device *smmu)
1983{
1984 struct device *dev = smmu->dev;
1985 struct acpi_iort_node *node =
1986 *(struct acpi_iort_node **)dev_get_platdata(dev);
1987 struct acpi_iort_smmu *iort_smmu;
1988 int ret;
1989
1990 /* Retrieve SMMU1/2 specific data */
1991 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1992
1993 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1994 if (ret < 0)
1995 return ret;
1996
1997 /* Ignore the configuration access interrupt */
1998 smmu->num_global_irqs = 1;
1999
2000 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2001 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2002
2003 return 0;
2004}
2005#else
2006static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2007 struct arm_smmu_device *smmu)
2008{
2009 return -ENODEV;
2010}
2011#endif
2012
2013static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2014 struct arm_smmu_device *smmu)
2015{
2016 const struct arm_smmu_match_data *data;
2017 struct device *dev = &pdev->dev;
2018 bool legacy_binding;
2019
2020 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2021 &smmu->num_global_irqs)) {
2022 dev_err(dev, "missing #global-interrupts property\n");
2023 return -ENODEV;
2024 }
2025
2026 data = of_device_get_match_data(dev);
2027 smmu->version = data->version;
2028 smmu->model = data->model;
2029
2030 parse_driver_options(smmu);
2031
2032 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2033 if (legacy_binding && !using_generic_binding) {
2034 if (!using_legacy_binding)
2035 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2036 using_legacy_binding = true;
2037 } else if (!legacy_binding && !using_legacy_binding) {
2038 using_generic_binding = true;
2039 } else {
2040 dev_err(dev, "not probing due to mismatched DT properties\n");
2041 return -ENODEV;
2042 }
2043
2044 if (of_dma_is_coherent(dev->of_node))
2045 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2046
2047 return 0;
2048}
2049
2050static void arm_smmu_bus_init(void)
2051{
2052 /* Oh, for a proper bus abstraction */
2053 if (!iommu_present(&platform_bus_type))
2054 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2055#ifdef CONFIG_ARM_AMBA
2056 if (!iommu_present(&amba_bustype))
2057 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2058#endif
2059#ifdef CONFIG_PCI
2060 if (!iommu_present(&pci_bus_type)) {
2061 pci_request_acs();
2062 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2063 }
2064#endif
2065}
2066
2067static int arm_smmu_device_probe(struct platform_device *pdev)
2068{
2069 struct resource *res;
2070 resource_size_t ioaddr;
2071 struct arm_smmu_device *smmu;
2072 struct device *dev = &pdev->dev;
2073 int num_irqs, i, err;
2074
2075 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2076 if (!smmu) {
2077 dev_err(dev, "failed to allocate arm_smmu_device\n");
2078 return -ENOMEM;
2079 }
2080 smmu->dev = dev;
2081
2082 if (dev->of_node)
2083 err = arm_smmu_device_dt_probe(pdev, smmu);
2084 else
2085 err = arm_smmu_device_acpi_probe(pdev, smmu);
2086
2087 if (err)
2088 return err;
2089
2090 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2091 ioaddr = res->start;
2092 smmu->base = devm_ioremap_resource(dev, res);
2093 if (IS_ERR(smmu->base))
2094 return PTR_ERR(smmu->base);
2095 smmu->cb_base = smmu->base + resource_size(res) / 2;
2096
2097 num_irqs = 0;
2098 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2099 num_irqs++;
2100 if (num_irqs > smmu->num_global_irqs)
2101 smmu->num_context_irqs++;
2102 }
2103
2104 if (!smmu->num_context_irqs) {
2105 dev_err(dev, "found %d interrupts but expected at least %d\n",
2106 num_irqs, smmu->num_global_irqs + 1);
2107 return -ENODEV;
2108 }
2109
2110 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2111 GFP_KERNEL);
2112 if (!smmu->irqs) {
2113 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2114 return -ENOMEM;
2115 }
2116
2117 for (i = 0; i < num_irqs; ++i) {
2118 int irq = platform_get_irq(pdev, i);
2119
2120 if (irq < 0) {
2121 dev_err(dev, "failed to get irq index %d\n", i);
2122 return -ENODEV;
2123 }
2124 smmu->irqs[i] = irq;
2125 }
2126
2127 err = arm_smmu_device_cfg_probe(smmu);
2128 if (err)
2129 return err;
2130
2131 if (smmu->version == ARM_SMMU_V2) {
2132 if (smmu->num_context_banks > smmu->num_context_irqs) {
2133 dev_err(dev,
2134 "found only %d context irq(s) but %d required\n",
2135 smmu->num_context_irqs, smmu->num_context_banks);
2136 return -ENODEV;
2137 }
2138
2139 /* Ignore superfluous interrupts */
2140 smmu->num_context_irqs = smmu->num_context_banks;
2141 }
2142
2143 for (i = 0; i < smmu->num_global_irqs; ++i) {
2144 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2145 arm_smmu_global_fault,
2146 IRQF_SHARED,
2147 "arm-smmu global fault",
2148 smmu);
2149 if (err) {
2150 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2151 i, smmu->irqs[i]);
2152 return err;
2153 }
2154 }
2155
2156 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2157 "smmu.%pa", &ioaddr);
2158 if (err) {
2159 dev_err(dev, "Failed to register iommu in sysfs\n");
2160 return err;
2161 }
2162
2163 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2164 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2165
2166 err = iommu_device_register(&smmu->iommu);
2167 if (err) {
2168 dev_err(dev, "Failed to register iommu\n");
2169 return err;
2170 }
2171
2172 platform_set_drvdata(pdev, smmu);
2173 arm_smmu_device_reset(smmu);
2174 arm_smmu_test_smr_masks(smmu);
2175
2176 /*
2177 * For ACPI and generic DT bindings, an SMMU will be probed before
2178 * any device which might need it, so we want the bus ops in place
2179 * ready to handle default domain setup as soon as any SMMU exists.
2180 */
2181 if (!using_legacy_binding)
2182 arm_smmu_bus_init();
2183
2184 return 0;
2185}
2186
2187/*
2188 * With the legacy DT binding in play, though, we have no guarantees about
2189 * probe order, but then we're also not doing default domains, so we can
2190 * delay setting bus ops until we're sure every possible SMMU is ready,
2191 * and that way ensure that no add_device() calls get missed.
2192 */
2193static int arm_smmu_legacy_bus_init(void)
2194{
2195 if (using_legacy_binding)
2196 arm_smmu_bus_init();
2197 return 0;
2198}
2199device_initcall_sync(arm_smmu_legacy_bus_init);
2200
2201static int arm_smmu_device_remove(struct platform_device *pdev)
2202{
2203 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2204
2205 if (!smmu)
2206 return -ENODEV;
2207
2208 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2209 dev_err(&pdev->dev, "removing device with active domains!\n");
2210
2211 /* Turn the thing off */
2212 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2213 return 0;
2214}
2215
2216static void arm_smmu_device_shutdown(struct platform_device *pdev)
2217{
2218 arm_smmu_device_remove(pdev);
2219}
2220
2221static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2222{
2223 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2224
2225 arm_smmu_device_reset(smmu);
2226 return 0;
2227}
2228
2229static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2230
2231static struct platform_driver arm_smmu_driver = {
2232 .driver = {
2233 .name = "arm-smmu",
2234 .of_match_table = of_match_ptr(arm_smmu_of_match),
2235 .pm = &arm_smmu_pm_ops,
2236 },
2237 .probe = arm_smmu_device_probe,
2238 .remove = arm_smmu_device_remove,
2239 .shutdown = arm_smmu_device_shutdown,
2240};
2241module_platform_driver(arm_smmu_driver);
2242
2243IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2244IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2245IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2246IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2247IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2248IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2249
2250MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2251MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2252MODULE_LICENSE("GPL v2");