Blame - marvell/linux/drivers/iommu/virtio-iommu.c - T108

blob: 37e2267acf2951908bc297145a2dc5e14fc5279d [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Virtio driver for the paravirtualized IOMMU
				4	*
				5	* Copyright (C) 2019 Arm Limited
				6	*/
				7
				8	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				9
				10	#include <linux/amba/bus.h>
				11	#include <linux/delay.h>
				12	#include <linux/dma-iommu.h>
				13	#include <linux/freezer.h>
				14	#include <linux/interval_tree.h>
				15	#include <linux/iommu.h>
				16	#include <linux/module.h>
				17	#include <linux/of_iommu.h>
				18	#include <linux/of_platform.h>
				19	#include <linux/pci.h>
				20	#include <linux/platform_device.h>
				21	#include <linux/virtio.h>
				22	#include <linux/virtio_config.h>
				23	#include <linux/virtio_ids.h>
				24	#include <linux/wait.h>
				25
				26	#include <uapi/linux/virtio_iommu.h>
				27
				28	#define MSI_IOVA_BASE 0x8000000
				29	#define MSI_IOVA_LENGTH 0x100000
				30
				31	#define VIOMMU_REQUEST_VQ 0
				32	#define VIOMMU_EVENT_VQ 1
				33	#define VIOMMU_NR_VQS 2
				34
				35	struct viommu_dev {
				36	struct iommu_device iommu;
				37	struct device *dev;
				38	struct virtio_device *vdev;
				39
				40	struct ida domain_ids;
				41
				42	struct virtqueue *vqs[VIOMMU_NR_VQS];
				43	spinlock_t request_lock;
				44	struct list_head requests;
				45	void *evts;
				46
				47	/* Device configuration */
				48	struct iommu_domain_geometry geometry;
				49	u64 pgsize_bitmap;
				50	u32 first_domain;
				51	u32 last_domain;
				52	/* Supported MAP flags */
				53	u32 map_flags;
				54	u32 probe_size;
				55	};
				56
				57	struct viommu_mapping {
				58	phys_addr_t paddr;
				59	struct interval_tree_node iova;
				60	u32 flags;
				61	};
				62
				63	struct viommu_domain {
				64	struct iommu_domain domain;
				65	struct viommu_dev *viommu;
				66	struct mutex mutex; /* protects viommu pointer */
				67	unsigned int id;
				68	u32 map_flags;
				69
				70	spinlock_t mappings_lock;
				71	struct rb_root_cached mappings;
				72
				73	unsigned long nr_endpoints;
				74	};
				75
				76	struct viommu_endpoint {
				77	struct device *dev;
				78	struct viommu_dev *viommu;
				79	struct viommu_domain *vdomain;
				80	struct list_head resv_regions;
				81	};
				82
				83	struct viommu_request {
				84	struct list_head list;
				85	void *writeback;
				86	unsigned int write_offset;
				87	unsigned int len;
				88	char buf[];
				89	};
				90
				91	#define VIOMMU_FAULT_RESV_MASK 0xffffff00
				92
				93	struct viommu_event {
				94	union {
				95	u32 head;
				96	struct virtio_iommu_fault fault;
				97	};
				98	};
				99
				100	#define to_viommu_domain(domain) \
				101	container_of(domain, struct viommu_domain, domain)
				102
				103	static int viommu_get_req_errno(void *buf, size_t len)
				104	{
				105	struct virtio_iommu_req_tail tail = buf + len - sizeof(tail);
				106
				107	switch (tail->status) {
				108	case VIRTIO_IOMMU_S_OK:
				109	return 0;
				110	case VIRTIO_IOMMU_S_UNSUPP:
				111	return -ENOSYS;
				112	case VIRTIO_IOMMU_S_INVAL:
				113	return -EINVAL;
				114	case VIRTIO_IOMMU_S_RANGE:
				115	return -ERANGE;
				116	case VIRTIO_IOMMU_S_NOENT:
				117	return -ENOENT;
				118	case VIRTIO_IOMMU_S_FAULT:
				119	return -EFAULT;
				120	case VIRTIO_IOMMU_S_NOMEM:
				121	return -ENOMEM;
				122	case VIRTIO_IOMMU_S_IOERR:
				123	case VIRTIO_IOMMU_S_DEVERR:
				124	default:
				125	return -EIO;
				126	}
				127	}
				128
				129	static void viommu_set_req_status(void *buf, size_t len, int status)
				130	{
				131	struct virtio_iommu_req_tail tail = buf + len - sizeof(tail);
				132
				133	tail->status = status;
				134	}
				135
				136	static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu,
				137	struct virtio_iommu_req_head *req,
				138	size_t len)
				139	{
				140	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
				141
				142	if (req->type == VIRTIO_IOMMU_T_PROBE)
				143	return len - viommu->probe_size - tail_size;
				144
				145	return len - tail_size;
				146	}
				147
				148	/*
				149	* __viommu_sync_req - Complete all in-flight requests
				150	*
				151	* Wait for all added requests to complete. When this function returns, all
				152	* requests that were in-flight at the time of the call have completed.
				153	*/
				154	static int __viommu_sync_req(struct viommu_dev *viommu)
				155	{
				156	int ret = 0;
				157	unsigned int len;
				158	size_t write_len;
				159	struct viommu_request *req;
				160	struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
				161
				162	assert_spin_locked(&viommu->request_lock);
				163
				164	virtqueue_kick(vq);
				165
				166	while (!list_empty(&viommu->requests)) {
				167	len = 0;
				168	req = virtqueue_get_buf(vq, &len);
				169	if (!req)
				170	continue;
				171
				172	if (!len)
				173	viommu_set_req_status(req->buf, req->len,
				174	VIRTIO_IOMMU_S_IOERR);
				175
				176	write_len = req->len - req->write_offset;
				177	if (req->writeback && len == write_len)
				178	memcpy(req->writeback, req->buf + req->write_offset,
				179	write_len);
				180
				181	list_del(&req->list);
				182	kfree(req);
				183	}
				184
				185	return ret;
				186	}
				187
				188	static int viommu_sync_req(struct viommu_dev *viommu)
				189	{
				190	int ret;
				191	unsigned long flags;
				192
				193	spin_lock_irqsave(&viommu->request_lock, flags);
				194	ret = __viommu_sync_req(viommu);
				195	if (ret)
				196	dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
				197	spin_unlock_irqrestore(&viommu->request_lock, flags);
				198
				199	return ret;
				200	}
				201
				202	/*
				203	* __viommu_add_request - Add one request to the queue
				204	* @buf: pointer to the request buffer
				205	* @len: length of the request buffer
				206	* @writeback: copy data back to the buffer when the request completes.
				207	*
				208	* Add a request to the queue. Only synchronize the queue if it's already full.
				209	* Otherwise don't kick the queue nor wait for requests to complete.
				210	*
				211	* When @writeback is true, data written by the device, including the request
				212	* status, is copied into @buf after the request completes. This is unsafe if
				213	* the caller allocates @buf on stack and drops the lock between add_req() and
				214	* sync_req().
				215	*
				216	* Return 0 if the request was successfully added to the queue.
				217	*/
				218	static int __viommu_add_req(struct viommu_dev viommu, void buf, size_t len,
				219	bool writeback)
				220	{
				221	int ret;
				222	off_t write_offset;
				223	struct viommu_request *req;
				224	struct scatterlist top_sg, bottom_sg;
				225	struct scatterlist *sg[2] = { &top_sg, &bottom_sg };
				226	struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
				227
				228	assert_spin_locked(&viommu->request_lock);
				229
				230	write_offset = viommu_get_write_desc_offset(viommu, buf, len);
				231	if (write_offset <= 0)
				232	return -EINVAL;
				233
				234	req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
				235	if (!req)
				236	return -ENOMEM;
				237
				238	req->len = len;
				239	if (writeback) {
				240	req->writeback = buf + write_offset;
				241	req->write_offset = write_offset;
				242	}
				243	memcpy(&req->buf, buf, write_offset);
				244
				245	sg_init_one(&top_sg, req->buf, write_offset);
				246	sg_init_one(&bottom_sg, req->buf + write_offset, len - write_offset);
				247
				248	ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
				249	if (ret == -ENOSPC) {
				250	/* If the queue is full, sync and retry */
				251	if (!__viommu_sync_req(viommu))
				252	ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
				253	}
				254	if (ret)
				255	goto err_free;
				256
				257	list_add_tail(&req->list, &viommu->requests);
				258	return 0;
				259
				260	err_free:
				261	kfree(req);
				262	return ret;
				263	}
				264
				265	static int viommu_add_req(struct viommu_dev viommu, void buf, size_t len)
				266	{
				267	int ret;
				268	unsigned long flags;
				269
				270	spin_lock_irqsave(&viommu->request_lock, flags);
				271	ret = __viommu_add_req(viommu, buf, len, false);
				272	if (ret)
				273	dev_dbg(viommu->dev, "could not add request: %d\n", ret);
				274	spin_unlock_irqrestore(&viommu->request_lock, flags);
				275
				276	return ret;
				277	}
				278
				279	/*
				280	* Send a request and wait for it to complete. Return the request status (as an
				281	* errno)
				282	*/
				283	static int viommu_send_req_sync(struct viommu_dev viommu, void buf,
				284	size_t len)
				285	{
				286	int ret;
				287	unsigned long flags;
				288
				289	spin_lock_irqsave(&viommu->request_lock, flags);
				290
				291	ret = __viommu_add_req(viommu, buf, len, true);
				292	if (ret) {
				293	dev_dbg(viommu->dev, "could not add request (%d)\n", ret);
				294	goto out_unlock;
				295	}
				296
				297	ret = __viommu_sync_req(viommu);
				298	if (ret) {
				299	dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
				300	/* Fall-through (get the actual request status) */
				301	}
				302
				303	ret = viommu_get_req_errno(buf, len);
				304	out_unlock:
				305	spin_unlock_irqrestore(&viommu->request_lock, flags);
				306	return ret;
				307	}
				308
				309	/*
				310	* viommu_add_mapping - add a mapping to the internal tree
				311	*
				312	* On success, return the new mapping. Otherwise return NULL.
				313	*/
				314	static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
				315	phys_addr_t paddr, size_t size, u32 flags)
				316	{
				317	unsigned long irqflags;
				318	struct viommu_mapping *mapping;
				319
				320	mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
				321	if (!mapping)
				322	return -ENOMEM;
				323
				324	mapping->paddr = paddr;
				325	mapping->iova.start = iova;
				326	mapping->iova.last = iova + size - 1;
				327	mapping->flags = flags;
				328
				329	spin_lock_irqsave(&vdomain->mappings_lock, irqflags);
				330	interval_tree_insert(&mapping->iova, &vdomain->mappings);
				331	spin_unlock_irqrestore(&vdomain->mappings_lock, irqflags);
				332
				333	return 0;
				334	}
				335
				336	/*
				337	* viommu_del_mappings - remove mappings from the internal tree
				338	*
				339	* @vdomain: the domain
				340	* @iova: start of the range
				341	* @size: size of the range. A size of 0 corresponds to the entire address
				342	* space.
				343	*
				344	* On success, returns the number of unmapped bytes (>= size)
				345	*/
				346	static size_t viommu_del_mappings(struct viommu_domain *vdomain,
				347	unsigned long iova, size_t size)
				348	{
				349	size_t unmapped = 0;
				350	unsigned long flags;
				351	unsigned long last = iova + size - 1;
				352	struct viommu_mapping *mapping = NULL;
				353	struct interval_tree_node node, next;
				354
				355	spin_lock_irqsave(&vdomain->mappings_lock, flags);
				356	next = interval_tree_iter_first(&vdomain->mappings, iova, last);
				357	while (next) {
				358	node = next;
				359	mapping = container_of(node, struct viommu_mapping, iova);
				360	next = interval_tree_iter_next(node, iova, last);
				361
				362	/* Trying to split a mapping? */
				363	if (mapping->iova.start < iova)
				364	break;
				365
				366	/*
				367	* Virtio-iommu doesn't allow UNMAP to split a mapping created
				368	* with a single MAP request, so remove the full mapping.
				369	*/
				370	unmapped += mapping->iova.last - mapping->iova.start + 1;
				371
				372	interval_tree_remove(node, &vdomain->mappings);
				373	kfree(mapping);
				374	}
				375	spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
				376
				377	return unmapped;
				378	}
				379
				380	/*
				381	* viommu_replay_mappings - re-send MAP requests
				382	*
				383	* When reattaching a domain that was previously detached from all endpoints,
				384	* mappings were deleted from the device. Re-create the mappings available in
				385	* the internal tree.
				386	*/
				387	static int viommu_replay_mappings(struct viommu_domain *vdomain)
				388	{
				389	int ret = 0;
				390	unsigned long flags;
				391	struct viommu_mapping *mapping;
				392	struct interval_tree_node *node;
				393	struct virtio_iommu_req_map map;
				394
				395	spin_lock_irqsave(&vdomain->mappings_lock, flags);
				396	node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
				397	while (node) {
				398	mapping = container_of(node, struct viommu_mapping, iova);
				399	map = (struct virtio_iommu_req_map) {
				400	.head.type = VIRTIO_IOMMU_T_MAP,
				401	.domain = cpu_to_le32(vdomain->id),
				402	.virt_start = cpu_to_le64(mapping->iova.start),
				403	.virt_end = cpu_to_le64(mapping->iova.last),
				404	.phys_start = cpu_to_le64(mapping->paddr),
				405	.flags = cpu_to_le32(mapping->flags),
				406	};
				407
				408	ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
				409	if (ret)
				410	break;
				411
				412	node = interval_tree_iter_next(node, 0, -1UL);
				413	}
				414	spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
				415
				416	return ret;
				417	}
				418
				419	static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
				420	struct virtio_iommu_probe_resv_mem *mem,
				421	size_t len)
				422	{
				423	size_t size;
				424	u64 start64, end64;
				425	phys_addr_t start, end;
				426	struct iommu_resv_region *region = NULL;
				427	unsigned long prot = IOMMU_WRITE \| IOMMU_NOEXEC \| IOMMU_MMIO;
				428
				429	start = start64 = le64_to_cpu(mem->start);
				430	end = end64 = le64_to_cpu(mem->end);
				431	size = end64 - start64 + 1;
				432
				433	/* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */
				434	if (start != start64 \|\| end != end64 \|\| size < end64 - start64)
				435	return -EOVERFLOW;
				436
				437	if (len < sizeof(*mem))
				438	return -EINVAL;
				439
				440	switch (mem->subtype) {
				441	default:
				442	dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
				443	mem->subtype);
				444	/* Fall-through */
				445	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
				446	region = iommu_alloc_resv_region(start, size, 0,
				447	IOMMU_RESV_RESERVED);
				448	break;
				449	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
				450	region = iommu_alloc_resv_region(start, size, prot,
				451	IOMMU_RESV_MSI);
				452	break;
				453	}
				454	if (!region)
				455	return -ENOMEM;
				456
				457	list_add(&region->list, &vdev->resv_regions);
				458	return 0;
				459	}
				460
				461	static int viommu_probe_endpoint(struct viommu_dev viommu, struct device dev)
				462	{
				463	int ret;
				464	u16 type, len;
				465	size_t cur = 0;
				466	size_t probe_len;
				467	struct virtio_iommu_req_probe *probe;
				468	struct virtio_iommu_probe_property *prop;
				469	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
				470	struct viommu_endpoint *vdev = fwspec->iommu_priv;
				471
				472	if (!fwspec->num_ids)
				473	return -EINVAL;
				474
				475	probe_len = sizeof(*probe) + viommu->probe_size +
				476	sizeof(struct virtio_iommu_req_tail);
				477	probe = kzalloc(probe_len, GFP_KERNEL);
				478	if (!probe)
				479	return -ENOMEM;
				480
				481	probe->head.type = VIRTIO_IOMMU_T_PROBE;
				482	/*
				483	* For now, assume that properties of an endpoint that outputs multiple
				484	* IDs are consistent. Only probe the first one.
				485	*/
				486	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
				487
				488	ret = viommu_send_req_sync(viommu, probe, probe_len);
				489	if (ret)
				490	goto out_free;
				491
				492	prop = (void *)probe->properties;
				493	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
				494
				495	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
				496	cur < viommu->probe_size) {
				497	len = le16_to_cpu(prop->length) + sizeof(*prop);
				498
				499	switch (type) {
				500	case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
				501	ret = viommu_add_resv_mem(vdev, (void *)prop, len);
				502	break;
				503	default:
				504	dev_err(dev, "unknown viommu prop 0x%x\n", type);
				505	}
				506
				507	if (ret)
				508	dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
				509
				510	cur += len;
				511	if (cur >= viommu->probe_size)
				512	break;
				513
				514	prop = (void *)probe->properties + cur;
				515	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
				516	}
				517
				518	out_free:
				519	kfree(probe);
				520	return ret;
				521	}
				522
				523	static int viommu_fault_handler(struct viommu_dev *viommu,
				524	struct virtio_iommu_fault *fault)
				525	{
				526	char *reason_str;
				527
				528	u8 reason = fault->reason;
				529	u32 flags = le32_to_cpu(fault->flags);
				530	u32 endpoint = le32_to_cpu(fault->endpoint);
				531	u64 address = le64_to_cpu(fault->address);
				532
				533	switch (reason) {
				534	case VIRTIO_IOMMU_FAULT_R_DOMAIN:
				535	reason_str = "domain";
				536	break;
				537	case VIRTIO_IOMMU_FAULT_R_MAPPING:
				538	reason_str = "page";
				539	break;
				540	case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
				541	default:
				542	reason_str = "unknown";
				543	break;
				544	}
				545
				546	/* TODO: find EP by ID and report_iommu_fault */
				547	if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
				548	dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n",
				549	reason_str, endpoint, address,
				550	flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "",
				551	flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "",
				552	flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : "");
				553	else
				554	dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n",
				555	reason_str, endpoint);
				556	return 0;
				557	}
				558
				559	static void viommu_event_handler(struct virtqueue *vq)
				560	{
				561	int ret;
				562	unsigned int len;
				563	struct scatterlist sg[1];
				564	struct viommu_event *evt;
				565	struct viommu_dev *viommu = vq->vdev->priv;
				566
				567	while ((evt = virtqueue_get_buf(vq, &len)) != NULL) {
				568	if (len > sizeof(*evt)) {
				569	dev_err(viommu->dev,
				570	"invalid event buffer (len %u != %zu)\n",
				571	len, sizeof(*evt));
				572	} else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) {
				573	viommu_fault_handler(viommu, &evt->fault);
				574	}
				575
				576	sg_init_one(sg, evt, sizeof(*evt));
				577	ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC);
				578	if (ret)
				579	dev_err(viommu->dev, "could not add event buffer\n");
				580	}
				581
				582	virtqueue_kick(vq);
				583	}
				584
				585	/* IOMMU API */
				586
				587	static struct iommu_domain *viommu_domain_alloc(unsigned type)
				588	{
				589	struct viommu_domain *vdomain;
				590
				591	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
				592	return NULL;
				593
				594	vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
				595	if (!vdomain)
				596	return NULL;
				597
				598	mutex_init(&vdomain->mutex);
				599	spin_lock_init(&vdomain->mappings_lock);
				600	vdomain->mappings = RB_ROOT_CACHED;
				601
				602	if (type == IOMMU_DOMAIN_DMA &&
				603	iommu_get_dma_cookie(&vdomain->domain)) {
				604	kfree(vdomain);
				605	return NULL;
				606	}
				607
				608	return &vdomain->domain;
				609	}
				610
				611	static int viommu_domain_finalise(struct viommu_dev *viommu,
				612	struct iommu_domain *domain)
				613	{
				614	int ret;
				615	struct viommu_domain *vdomain = to_viommu_domain(domain);
				616
				617	ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
				618	viommu->last_domain, GFP_KERNEL);
				619	if (ret < 0)
				620	return ret;
				621
				622	vdomain->id = (unsigned int)ret;
				623
				624	domain->pgsize_bitmap = viommu->pgsize_bitmap;
				625	domain->geometry = viommu->geometry;
				626
				627	vdomain->map_flags = viommu->map_flags;
				628	vdomain->viommu = viommu;
				629
				630	return 0;
				631	}
				632
				633	static void viommu_domain_free(struct iommu_domain *domain)
				634	{
				635	struct viommu_domain *vdomain = to_viommu_domain(domain);
				636
				637	iommu_put_dma_cookie(domain);
				638
				639	/* Free all remaining mappings (size 2^64) */
				640	viommu_del_mappings(vdomain, 0, 0);
				641
				642	if (vdomain->viommu)
				643	ida_free(&vdomain->viommu->domain_ids, vdomain->id);
				644
				645	kfree(vdomain);
				646	}
				647
				648	static int viommu_attach_dev(struct iommu_domain domain, struct device dev)
				649	{
				650	int i;
				651	int ret = 0;
				652	struct virtio_iommu_req_attach req;
				653	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
				654	struct viommu_endpoint *vdev = fwspec->iommu_priv;
				655	struct viommu_domain *vdomain = to_viommu_domain(domain);
				656
				657	mutex_lock(&vdomain->mutex);
				658	if (!vdomain->viommu) {
				659	/*
				660	* Properly initialize the domain now that we know which viommu
				661	* owns it.
				662	*/
				663	ret = viommu_domain_finalise(vdev->viommu, domain);
				664	} else if (vdomain->viommu != vdev->viommu) {
				665	dev_err(dev, "cannot attach to foreign vIOMMU\n");
				666	ret = -EXDEV;
				667	}
				668	mutex_unlock(&vdomain->mutex);
				669
				670	if (ret)
				671	return ret;
				672
				673	/*
				674	* In the virtio-iommu device, when attaching the endpoint to a new
				675	* domain, it is detached from the old one and, if as as a result the
				676	* old domain isn't attached to any endpoint, all mappings are removed
				677	* from the old domain and it is freed.
				678	*
				679	* In the driver the old domain still exists, and its mappings will be
				680	* recreated if it gets reattached to an endpoint. Otherwise it will be
				681	* freed explicitly.
				682	*
				683	* vdev->vdomain is protected by group->mutex
				684	*/
				685	if (vdev->vdomain)
				686	vdev->vdomain->nr_endpoints--;
				687
				688	req = (struct virtio_iommu_req_attach) {
				689	.head.type = VIRTIO_IOMMU_T_ATTACH,
				690	.domain = cpu_to_le32(vdomain->id),
				691	};
				692
				693	for (i = 0; i < fwspec->num_ids; i++) {
				694	req.endpoint = cpu_to_le32(fwspec->ids[i]);
				695
				696	ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req));
				697	if (ret)
				698	return ret;
				699	}
				700
				701	if (!vdomain->nr_endpoints) {
				702	/*
				703	* This endpoint is the first to be attached to the domain.
				704	* Replay existing mappings (e.g. SW MSI).
				705	*/
				706	ret = viommu_replay_mappings(vdomain);
				707	if (ret)
				708	return ret;
				709	}
				710
				711	vdomain->nr_endpoints++;
				712	vdev->vdomain = vdomain;
				713
				714	return 0;
				715	}
				716
				717	static int viommu_map(struct iommu_domain *domain, unsigned long iova,
				718	phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
				719	{
				720	int ret;
				721	u32 flags;
				722	struct virtio_iommu_req_map map;
				723	struct viommu_domain *vdomain = to_viommu_domain(domain);
				724
				725	flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) \|
				726	(prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) \|
				727	(prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0);
				728
				729	if (flags & ~vdomain->map_flags)
				730	return -EINVAL;
				731
				732	ret = viommu_add_mapping(vdomain, iova, paddr, size, flags);
				733	if (ret)
				734	return ret;
				735
				736	map = (struct virtio_iommu_req_map) {
				737	.head.type = VIRTIO_IOMMU_T_MAP,
				738	.domain = cpu_to_le32(vdomain->id),
				739	.virt_start = cpu_to_le64(iova),
				740	.phys_start = cpu_to_le64(paddr),
				741	.virt_end = cpu_to_le64(iova + size - 1),
				742	.flags = cpu_to_le32(flags),
				743	};
				744
				745	if (!vdomain->nr_endpoints)
				746	return 0;
				747
				748	ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
				749	if (ret)
				750	viommu_del_mappings(vdomain, iova, size);
				751
				752	return ret;
				753	}
				754
				755	static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
				756	size_t size, struct iommu_iotlb_gather *gather)
				757	{
				758	int ret = 0;
				759	size_t unmapped;
				760	struct virtio_iommu_req_unmap unmap;
				761	struct viommu_domain *vdomain = to_viommu_domain(domain);
				762
				763	unmapped = viommu_del_mappings(vdomain, iova, size);
				764	if (unmapped < size)
				765	return 0;
				766
				767	/* Device already removed all mappings after detach. */
				768	if (!vdomain->nr_endpoints)
				769	return unmapped;
				770
				771	unmap = (struct virtio_iommu_req_unmap) {
				772	.head.type = VIRTIO_IOMMU_T_UNMAP,
				773	.domain = cpu_to_le32(vdomain->id),
				774	.virt_start = cpu_to_le64(iova),
				775	.virt_end = cpu_to_le64(iova + unmapped - 1),
				776	};
				777
				778	ret = viommu_add_req(vdomain->viommu, &unmap, sizeof(unmap));
				779	return ret ? 0 : unmapped;
				780	}
				781
				782	static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
				783	dma_addr_t iova)
				784	{
				785	u64 paddr = 0;
				786	unsigned long flags;
				787	struct viommu_mapping *mapping;
				788	struct interval_tree_node *node;
				789	struct viommu_domain *vdomain = to_viommu_domain(domain);
				790
				791	spin_lock_irqsave(&vdomain->mappings_lock, flags);
				792	node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
				793	if (node) {
				794	mapping = container_of(node, struct viommu_mapping, iova);
				795	paddr = mapping->paddr + (iova - mapping->iova.start);
				796	}
				797	spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
				798
				799	return paddr;
				800	}
				801
				802	static void viommu_iotlb_sync(struct iommu_domain *domain,
				803	struct iommu_iotlb_gather *gather)
				804	{
				805	struct viommu_domain *vdomain = to_viommu_domain(domain);
				806
				807	viommu_sync_req(vdomain->viommu);
				808	}
				809
				810	static void viommu_get_resv_regions(struct device dev, struct list_head head)
				811	{
				812	struct iommu_resv_region entry, new_entry, *msi = NULL;
				813	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
				814	struct viommu_endpoint *vdev = fwspec->iommu_priv;
				815	int prot = IOMMU_WRITE \| IOMMU_NOEXEC \| IOMMU_MMIO;
				816
				817	list_for_each_entry(entry, &vdev->resv_regions, list) {
				818	if (entry->type == IOMMU_RESV_MSI)
				819	msi = entry;
				820
				821	new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
				822	if (!new_entry)
				823	return;
				824	list_add_tail(&new_entry->list, head);
				825	}
				826
				827	/*
				828	* If the device didn't register any bypass MSI window, add a
				829	* software-mapped region.
				830	*/
				831	if (!msi) {
				832	msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
				833	prot, IOMMU_RESV_SW_MSI);
				834	if (!msi)
				835	return;
				836
				837	list_add_tail(&msi->list, head);
				838	}
				839
				840	iommu_dma_get_resv_regions(dev, head);
				841	}
				842
				843	static void viommu_put_resv_regions(struct device dev, struct list_head head)
				844	{
				845	struct iommu_resv_region entry, next;
				846
				847	list_for_each_entry_safe(entry, next, head, list)
				848	kfree(entry);
				849	}
				850
				851	static struct iommu_ops viommu_ops;
				852	static struct virtio_driver virtio_iommu_drv;
				853
				854	static int viommu_match_node(struct device dev, const void data)
				855	{
				856	return dev->parent->fwnode == data;
				857	}
				858
				859	static struct viommu_dev viommu_get_by_fwnode(struct fwnode_handle fwnode)
				860	{
				861	struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
				862	fwnode, viommu_match_node);
				863	put_device(dev);
				864
				865	return dev ? dev_to_virtio(dev)->priv : NULL;
				866	}
				867
				868	static int viommu_add_device(struct device *dev)
				869	{
				870	int ret;
				871	struct iommu_group *group;
				872	struct viommu_endpoint *vdev;
				873	struct viommu_dev *viommu = NULL;
				874	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
				875
				876	if (!fwspec \|\| fwspec->ops != &viommu_ops)
				877	return -ENODEV;
				878
				879	viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
				880	if (!viommu)
				881	return -ENODEV;
				882
				883	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
				884	if (!vdev)
				885	return -ENOMEM;
				886
				887	vdev->dev = dev;
				888	vdev->viommu = viommu;
				889	INIT_LIST_HEAD(&vdev->resv_regions);
				890	fwspec->iommu_priv = vdev;
				891
				892	if (viommu->probe_size) {
				893	/* Get additional information for this endpoint */
				894	ret = viommu_probe_endpoint(viommu, dev);
				895	if (ret)
				896	goto err_free_dev;
				897	}
				898
				899	ret = iommu_device_link(&viommu->iommu, dev);
				900	if (ret)
				901	goto err_free_dev;
				902
				903	/*
				904	* Last step creates a default domain and attaches to it. Everything
				905	* must be ready.
				906	*/
				907	group = iommu_group_get_for_dev(dev);
				908	if (IS_ERR(group)) {
				909	ret = PTR_ERR(group);
				910	goto err_unlink_dev;
				911	}
				912
				913	iommu_group_put(group);
				914
				915	return PTR_ERR_OR_ZERO(group);
				916
				917	err_unlink_dev:
				918	iommu_device_unlink(&viommu->iommu, dev);
				919	err_free_dev:
				920	viommu_put_resv_regions(dev, &vdev->resv_regions);
				921	kfree(vdev);
				922
				923	return ret;
				924	}
				925
				926	static void viommu_remove_device(struct device *dev)
				927	{
				928	struct viommu_endpoint *vdev;
				929	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
				930
				931	if (!fwspec \|\| fwspec->ops != &viommu_ops)
				932	return;
				933
				934	vdev = fwspec->iommu_priv;
				935
				936	iommu_group_remove_device(dev);
				937	iommu_device_unlink(&vdev->viommu->iommu, dev);
				938	viommu_put_resv_regions(dev, &vdev->resv_regions);
				939	kfree(vdev);
				940	}
				941
				942	static struct iommu_group viommu_device_group(struct device dev)
				943	{
				944	if (dev_is_pci(dev))
				945	return pci_device_group(dev);
				946	else
				947	return generic_device_group(dev);
				948	}
				949
				950	static int viommu_of_xlate(struct device dev, struct of_phandle_args args)
				951	{
				952	return iommu_fwspec_add_ids(dev, args->args, 1);
				953	}
				954
				955	static struct iommu_ops viommu_ops = {
				956	.domain_alloc = viommu_domain_alloc,
				957	.domain_free = viommu_domain_free,
				958	.attach_dev = viommu_attach_dev,
				959	.map = viommu_map,
				960	.unmap = viommu_unmap,
				961	.iova_to_phys = viommu_iova_to_phys,
				962	.iotlb_sync = viommu_iotlb_sync,
				963	.add_device = viommu_add_device,
				964	.remove_device = viommu_remove_device,
				965	.device_group = viommu_device_group,
				966	.get_resv_regions = viommu_get_resv_regions,
				967	.put_resv_regions = viommu_put_resv_regions,
				968	.of_xlate = viommu_of_xlate,
				969	};
				970
				971	static int viommu_init_vqs(struct viommu_dev *viommu)
				972	{
				973	struct virtio_device *vdev = dev_to_virtio(viommu->dev);
				974	const char *names[] = { "request", "event" };
				975	vq_callback_t *callbacks[] = {
				976	NULL, /* No async requests */
				977	viommu_event_handler,
				978	};
				979
				980	return virtio_find_vqs(vdev, VIOMMU_NR_VQS, viommu->vqs, callbacks,
				981	names, NULL);
				982	}
				983
				984	static int viommu_fill_evtq(struct viommu_dev *viommu)
				985	{
				986	int i, ret;
				987	struct scatterlist sg[1];
				988	struct viommu_event *evts;
				989	struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ];
				990	size_t nr_evts = vq->num_free;
				991
				992	viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts,
				993	sizeof(*evts), GFP_KERNEL);
				994	if (!evts)
				995	return -ENOMEM;
				996
				997	for (i = 0; i < nr_evts; i++) {
				998	sg_init_one(sg, &evts[i], sizeof(*evts));
				999	ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL);
				1000	if (ret)
				1001	return ret;
				1002	}
				1003
				1004	return 0;
				1005	}
				1006
				1007	static int viommu_probe(struct virtio_device *vdev)
				1008	{
				1009	struct device *parent_dev = vdev->dev.parent;
				1010	struct viommu_dev *viommu = NULL;
				1011	struct device *dev = &vdev->dev;
				1012	u64 input_start = 0;
				1013	u64 input_end = -1UL;
				1014	int ret;
				1015
				1016	if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) \|\|
				1017	!virtio_has_feature(vdev, VIRTIO_IOMMU_F_MAP_UNMAP))
				1018	return -ENODEV;
				1019
				1020	viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL);
				1021	if (!viommu)
				1022	return -ENOMEM;
				1023
				1024	spin_lock_init(&viommu->request_lock);
				1025	ida_init(&viommu->domain_ids);
				1026	viommu->dev = dev;
				1027	viommu->vdev = vdev;
				1028	INIT_LIST_HEAD(&viommu->requests);
				1029
				1030	ret = viommu_init_vqs(viommu);
				1031	if (ret)
				1032	return ret;
				1033
				1034	virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
				1035	&viommu->pgsize_bitmap);
				1036
				1037	if (!viommu->pgsize_bitmap) {
				1038	ret = -EINVAL;
				1039	goto err_free_vqs;
				1040	}
				1041
				1042	viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ \| VIRTIO_IOMMU_MAP_F_WRITE;
				1043	viommu->last_domain = ~0U;
				1044
				1045	/* Optional features */
				1046	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
				1047	struct virtio_iommu_config, input_range.start,
				1048	&input_start);
				1049
				1050	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
				1051	struct virtio_iommu_config, input_range.end,
				1052	&input_end);
				1053
				1054	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
				1055	struct virtio_iommu_config, domain_range.start,
				1056	&viommu->first_domain);
				1057
				1058	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
				1059	struct virtio_iommu_config, domain_range.end,
				1060	&viommu->last_domain);
				1061
				1062	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
				1063	struct virtio_iommu_config, probe_size,
				1064	&viommu->probe_size);
				1065
				1066	viommu->geometry = (struct iommu_domain_geometry) {
				1067	.aperture_start = input_start,
				1068	.aperture_end = input_end,
				1069	.force_aperture = true,
				1070	};
				1071
				1072	if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO))
				1073	viommu->map_flags \|= VIRTIO_IOMMU_MAP_F_MMIO;
				1074
				1075	viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
				1076
				1077	virtio_device_ready(vdev);
				1078
				1079	/* Populate the event queue with buffers */
				1080	ret = viommu_fill_evtq(viommu);
				1081	if (ret)
				1082	goto err_free_vqs;
				1083
				1084	ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
				1085	virtio_bus_name(vdev));
				1086	if (ret)
				1087	goto err_free_vqs;
				1088
				1089	iommu_device_set_ops(&viommu->iommu, &viommu_ops);
				1090	iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
				1091
				1092	iommu_device_register(&viommu->iommu);
				1093
				1094	#ifdef CONFIG_PCI
				1095	if (pci_bus_type.iommu_ops != &viommu_ops) {
				1096	pci_request_acs();
				1097	ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
				1098	if (ret)
				1099	goto err_unregister;
				1100	}
				1101	#endif
				1102	#ifdef CONFIG_ARM_AMBA
				1103	if (amba_bustype.iommu_ops != &viommu_ops) {
				1104	ret = bus_set_iommu(&amba_bustype, &viommu_ops);
				1105	if (ret)
				1106	goto err_unregister;
				1107	}
				1108	#endif
				1109	if (platform_bus_type.iommu_ops != &viommu_ops) {
				1110	ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
				1111	if (ret)
				1112	goto err_unregister;
				1113	}
				1114
				1115	vdev->priv = viommu;
				1116
				1117	dev_info(dev, "input address: %u bits\n",
				1118	order_base_2(viommu->geometry.aperture_end));
				1119	dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
				1120
				1121	return 0;
				1122
				1123	err_unregister:
				1124	iommu_device_sysfs_remove(&viommu->iommu);
				1125	iommu_device_unregister(&viommu->iommu);
				1126	err_free_vqs:
				1127	vdev->config->del_vqs(vdev);
				1128
				1129	return ret;
				1130	}
				1131
				1132	static void viommu_remove(struct virtio_device *vdev)
				1133	{
				1134	struct viommu_dev *viommu = vdev->priv;
				1135
				1136	iommu_device_sysfs_remove(&viommu->iommu);
				1137	iommu_device_unregister(&viommu->iommu);
				1138
				1139	/* Stop all virtqueues */
				1140	vdev->config->reset(vdev);
				1141	vdev->config->del_vqs(vdev);
				1142
				1143	dev_info(&vdev->dev, "device removed\n");
				1144	}
				1145
				1146	static void viommu_config_changed(struct virtio_device *vdev)
				1147	{
				1148	dev_warn(&vdev->dev, "config changed\n");
				1149	}
				1150
				1151	static unsigned int features[] = {
				1152	VIRTIO_IOMMU_F_MAP_UNMAP,
				1153	VIRTIO_IOMMU_F_INPUT_RANGE,
				1154	VIRTIO_IOMMU_F_DOMAIN_RANGE,
				1155	VIRTIO_IOMMU_F_PROBE,
				1156	VIRTIO_IOMMU_F_MMIO,
				1157	};
				1158
				1159	static struct virtio_device_id id_table[] = {
				1160	{ VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
				1161	{ 0 },
				1162	};
				1163
				1164	static struct virtio_driver virtio_iommu_drv = {
				1165	.driver.name = KBUILD_MODNAME,
				1166	.driver.owner = THIS_MODULE,
				1167	.id_table = id_table,
				1168	.feature_table = features,
				1169	.feature_table_size = ARRAY_SIZE(features),
				1170	.probe = viommu_probe,
				1171	.remove = viommu_remove,
				1172	.config_changed = viommu_config_changed,
				1173	};
				1174
				1175	module_virtio_driver(virtio_iommu_drv);
				1176
				1177	MODULE_DESCRIPTION("Virtio IOMMU driver");
				1178	MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
				1179	MODULE_LICENSE("GPL v2");