| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * (c) 2017 Stefano Stabellini <stefano@aporeto.com> | 
 | 3 |  * | 
 | 4 |  * This program is free software; you can redistribute it and/or modify | 
 | 5 |  * it under the terms of the GNU General Public License as published by | 
 | 6 |  * the Free Software Foundation; either version 2 of the License, or | 
 | 7 |  * (at your option) any later version. | 
 | 8 |  * | 
 | 9 |  * This program is distributed in the hope that it will be useful, | 
 | 10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 | 12 |  * GNU General Public License for more details. | 
 | 13 |  */ | 
 | 14 |  | 
 | 15 | #include <linux/inet.h> | 
 | 16 | #include <linux/kthread.h> | 
 | 17 | #include <linux/list.h> | 
 | 18 | #include <linux/radix-tree.h> | 
 | 19 | #include <linux/module.h> | 
 | 20 | #include <linux/semaphore.h> | 
 | 21 | #include <linux/wait.h> | 
 | 22 | #include <net/sock.h> | 
 | 23 | #include <net/inet_common.h> | 
 | 24 | #include <net/inet_connection_sock.h> | 
 | 25 | #include <net/request_sock.h> | 
 | 26 |  | 
 | 27 | #include <xen/events.h> | 
 | 28 | #include <xen/grant_table.h> | 
 | 29 | #include <xen/xen.h> | 
 | 30 | #include <xen/xenbus.h> | 
 | 31 | #include <xen/interface/io/pvcalls.h> | 
 | 32 |  | 
 | 33 | #define PVCALLS_VERSIONS "1" | 
 | 34 | #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER | 
 | 35 |  | 
 | 36 | struct pvcalls_back_global { | 
 | 37 | 	struct list_head frontends; | 
 | 38 | 	struct semaphore frontends_lock; | 
 | 39 | } pvcalls_back_global; | 
 | 40 |  | 
 | 41 | /* | 
 | 42 |  * Per-frontend data structure. It contains pointers to the command | 
 | 43 |  * ring, its event channel, a list of active sockets and a tree of | 
 | 44 |  * passive sockets. | 
 | 45 |  */ | 
 | 46 | struct pvcalls_fedata { | 
 | 47 | 	struct list_head list; | 
 | 48 | 	struct xenbus_device *dev; | 
 | 49 | 	struct xen_pvcalls_sring *sring; | 
 | 50 | 	struct xen_pvcalls_back_ring ring; | 
 | 51 | 	int irq; | 
 | 52 | 	struct list_head socket_mappings; | 
 | 53 | 	struct radix_tree_root socketpass_mappings; | 
 | 54 | 	struct semaphore socket_lock; | 
 | 55 | }; | 
 | 56 |  | 
 | 57 | struct pvcalls_ioworker { | 
 | 58 | 	struct work_struct register_work; | 
 | 59 | 	struct workqueue_struct *wq; | 
 | 60 | }; | 
 | 61 |  | 
 | 62 | struct sock_mapping { | 
 | 63 | 	struct list_head list; | 
 | 64 | 	struct pvcalls_fedata *fedata; | 
 | 65 | 	struct sockpass_mapping *sockpass; | 
 | 66 | 	struct socket *sock; | 
 | 67 | 	uint64_t id; | 
 | 68 | 	grant_ref_t ref; | 
 | 69 | 	struct pvcalls_data_intf *ring; | 
 | 70 | 	void *bytes; | 
 | 71 | 	struct pvcalls_data data; | 
 | 72 | 	uint32_t ring_order; | 
 | 73 | 	int irq; | 
 | 74 | 	atomic_t read; | 
 | 75 | 	atomic_t write; | 
 | 76 | 	atomic_t io; | 
 | 77 | 	atomic_t release; | 
 | 78 | 	void (*saved_data_ready)(struct sock *sk); | 
 | 79 | 	struct pvcalls_ioworker ioworker; | 
 | 80 | }; | 
 | 81 |  | 
 | 82 | struct sockpass_mapping { | 
 | 83 | 	struct list_head list; | 
 | 84 | 	struct pvcalls_fedata *fedata; | 
 | 85 | 	struct socket *sock; | 
 | 86 | 	uint64_t id; | 
 | 87 | 	struct xen_pvcalls_request reqcopy; | 
 | 88 | 	spinlock_t copy_lock; | 
 | 89 | 	struct workqueue_struct *wq; | 
 | 90 | 	struct work_struct register_work; | 
 | 91 | 	void (*saved_data_ready)(struct sock *sk); | 
 | 92 | }; | 
 | 93 |  | 
 | 94 | static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); | 
 | 95 | static int pvcalls_back_release_active(struct xenbus_device *dev, | 
 | 96 | 				       struct pvcalls_fedata *fedata, | 
 | 97 | 				       struct sock_mapping *map); | 
 | 98 |  | 
 | 99 | static void pvcalls_conn_back_read(void *opaque) | 
 | 100 | { | 
 | 101 | 	struct sock_mapping *map = (struct sock_mapping *)opaque; | 
 | 102 | 	struct msghdr msg; | 
 | 103 | 	struct kvec vec[2]; | 
 | 104 | 	RING_IDX cons, prod, size, wanted, array_size, masked_prod, masked_cons; | 
 | 105 | 	int32_t error; | 
 | 106 | 	struct pvcalls_data_intf *intf = map->ring; | 
 | 107 | 	struct pvcalls_data *data = &map->data; | 
 | 108 | 	unsigned long flags; | 
 | 109 | 	int ret; | 
 | 110 |  | 
 | 111 | 	array_size = XEN_FLEX_RING_SIZE(map->ring_order); | 
 | 112 | 	cons = intf->in_cons; | 
 | 113 | 	prod = intf->in_prod; | 
 | 114 | 	error = intf->in_error; | 
 | 115 | 	/* read the indexes first, then deal with the data */ | 
 | 116 | 	virt_mb(); | 
 | 117 |  | 
 | 118 | 	if (error) | 
 | 119 | 		return; | 
 | 120 |  | 
 | 121 | 	size = pvcalls_queued(prod, cons, array_size); | 
 | 122 | 	if (size >= array_size) | 
 | 123 | 		return; | 
 | 124 | 	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); | 
 | 125 | 	if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { | 
 | 126 | 		atomic_set(&map->read, 0); | 
 | 127 | 		spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, | 
 | 128 | 				flags); | 
 | 129 | 		return; | 
 | 130 | 	} | 
 | 131 | 	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); | 
 | 132 | 	wanted = array_size - size; | 
 | 133 | 	masked_prod = pvcalls_mask(prod, array_size); | 
 | 134 | 	masked_cons = pvcalls_mask(cons, array_size); | 
 | 135 |  | 
 | 136 | 	memset(&msg, 0, sizeof(msg)); | 
 | 137 | 	if (masked_prod < masked_cons) { | 
 | 138 | 		vec[0].iov_base = data->in + masked_prod; | 
 | 139 | 		vec[0].iov_len = wanted; | 
 | 140 | 		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted); | 
 | 141 | 	} else { | 
 | 142 | 		vec[0].iov_base = data->in + masked_prod; | 
 | 143 | 		vec[0].iov_len = array_size - masked_prod; | 
 | 144 | 		vec[1].iov_base = data->in; | 
 | 145 | 		vec[1].iov_len = wanted - vec[0].iov_len; | 
 | 146 | 		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted); | 
 | 147 | 	} | 
 | 148 |  | 
 | 149 | 	atomic_set(&map->read, 0); | 
 | 150 | 	ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); | 
 | 151 | 	WARN_ON(ret > wanted); | 
 | 152 | 	if (ret == -EAGAIN) /* shouldn't happen */ | 
 | 153 | 		return; | 
 | 154 | 	if (!ret) | 
 | 155 | 		ret = -ENOTCONN; | 
 | 156 | 	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); | 
 | 157 | 	if (ret > 0 && !skb_queue_empty(&map->sock->sk->sk_receive_queue)) | 
 | 158 | 		atomic_inc(&map->read); | 
 | 159 | 	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); | 
 | 160 |  | 
 | 161 | 	/* write the data, then modify the indexes */ | 
 | 162 | 	virt_wmb(); | 
 | 163 | 	if (ret < 0) { | 
 | 164 | 		atomic_set(&map->read, 0); | 
 | 165 | 		intf->in_error = ret; | 
 | 166 | 	} else | 
 | 167 | 		intf->in_prod = prod + ret; | 
 | 168 | 	/* update the indexes, then notify the other end */ | 
 | 169 | 	virt_wmb(); | 
 | 170 | 	notify_remote_via_irq(map->irq); | 
 | 171 |  | 
 | 172 | 	return; | 
 | 173 | } | 
 | 174 |  | 
 | 175 | static void pvcalls_conn_back_write(struct sock_mapping *map) | 
 | 176 | { | 
 | 177 | 	struct pvcalls_data_intf *intf = map->ring; | 
 | 178 | 	struct pvcalls_data *data = &map->data; | 
 | 179 | 	struct msghdr msg; | 
 | 180 | 	struct kvec vec[2]; | 
 | 181 | 	RING_IDX cons, prod, size, array_size; | 
 | 182 | 	int ret; | 
 | 183 |  | 
 | 184 | 	cons = intf->out_cons; | 
 | 185 | 	prod = intf->out_prod; | 
 | 186 | 	/* read the indexes before dealing with the data */ | 
 | 187 | 	virt_mb(); | 
 | 188 |  | 
 | 189 | 	array_size = XEN_FLEX_RING_SIZE(map->ring_order); | 
 | 190 | 	size = pvcalls_queued(prod, cons, array_size); | 
 | 191 | 	if (size == 0) | 
 | 192 | 		return; | 
 | 193 |  | 
 | 194 | 	memset(&msg, 0, sizeof(msg)); | 
 | 195 | 	msg.msg_flags |= MSG_DONTWAIT; | 
 | 196 | 	if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { | 
 | 197 | 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); | 
 | 198 | 		vec[0].iov_len = size; | 
 | 199 | 		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size); | 
 | 200 | 	} else { | 
 | 201 | 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); | 
 | 202 | 		vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); | 
 | 203 | 		vec[1].iov_base = data->out; | 
 | 204 | 		vec[1].iov_len = size - vec[0].iov_len; | 
 | 205 | 		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size); | 
 | 206 | 	} | 
 | 207 |  | 
 | 208 | 	atomic_set(&map->write, 0); | 
 | 209 | 	ret = inet_sendmsg(map->sock, &msg, size); | 
 | 210 | 	if (ret == -EAGAIN || (ret >= 0 && ret < size)) { | 
 | 211 | 		atomic_inc(&map->write); | 
 | 212 | 		atomic_inc(&map->io); | 
 | 213 | 	} | 
 | 214 | 	if (ret == -EAGAIN) | 
 | 215 | 		return; | 
 | 216 |  | 
 | 217 | 	/* write the data, then update the indexes */ | 
 | 218 | 	virt_wmb(); | 
 | 219 | 	if (ret < 0) { | 
 | 220 | 		intf->out_error = ret; | 
 | 221 | 	} else { | 
 | 222 | 		intf->out_error = 0; | 
 | 223 | 		intf->out_cons = cons + ret; | 
 | 224 | 		prod = intf->out_prod; | 
 | 225 | 	} | 
 | 226 | 	/* update the indexes, then notify the other end */ | 
 | 227 | 	virt_wmb(); | 
 | 228 | 	if (prod != cons + ret) | 
 | 229 | 		atomic_inc(&map->write); | 
 | 230 | 	notify_remote_via_irq(map->irq); | 
 | 231 | } | 
 | 232 |  | 
 | 233 | static void pvcalls_back_ioworker(struct work_struct *work) | 
 | 234 | { | 
 | 235 | 	struct pvcalls_ioworker *ioworker = container_of(work, | 
 | 236 | 		struct pvcalls_ioworker, register_work); | 
 | 237 | 	struct sock_mapping *map = container_of(ioworker, struct sock_mapping, | 
 | 238 | 		ioworker); | 
 | 239 |  | 
 | 240 | 	while (atomic_read(&map->io) > 0) { | 
 | 241 | 		if (atomic_read(&map->release) > 0) { | 
 | 242 | 			atomic_set(&map->release, 0); | 
 | 243 | 			return; | 
 | 244 | 		} | 
 | 245 |  | 
 | 246 | 		if (atomic_read(&map->read) > 0) | 
 | 247 | 			pvcalls_conn_back_read(map); | 
 | 248 | 		if (atomic_read(&map->write) > 0) | 
 | 249 | 			pvcalls_conn_back_write(map); | 
 | 250 |  | 
 | 251 | 		atomic_dec(&map->io); | 
 | 252 | 	} | 
 | 253 | } | 
 | 254 |  | 
 | 255 | static int pvcalls_back_socket(struct xenbus_device *dev, | 
 | 256 | 		struct xen_pvcalls_request *req) | 
 | 257 | { | 
 | 258 | 	struct pvcalls_fedata *fedata; | 
 | 259 | 	int ret; | 
 | 260 | 	struct xen_pvcalls_response *rsp; | 
 | 261 |  | 
 | 262 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 263 |  | 
 | 264 | 	if (req->u.socket.domain != AF_INET || | 
 | 265 | 	    req->u.socket.type != SOCK_STREAM || | 
 | 266 | 	    (req->u.socket.protocol != IPPROTO_IP && | 
 | 267 | 	     req->u.socket.protocol != AF_INET)) | 
 | 268 | 		ret = -EAFNOSUPPORT; | 
 | 269 | 	else | 
 | 270 | 		ret = 0; | 
 | 271 |  | 
 | 272 | 	/* leave the actual socket allocation for later */ | 
 | 273 |  | 
 | 274 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 275 | 	rsp->req_id = req->req_id; | 
 | 276 | 	rsp->cmd = req->cmd; | 
 | 277 | 	rsp->u.socket.id = req->u.socket.id; | 
 | 278 | 	rsp->ret = ret; | 
 | 279 |  | 
 | 280 | 	return 0; | 
 | 281 | } | 
 | 282 |  | 
 | 283 | static void pvcalls_sk_state_change(struct sock *sock) | 
 | 284 | { | 
 | 285 | 	struct sock_mapping *map = sock->sk_user_data; | 
 | 286 |  | 
 | 287 | 	if (map == NULL) | 
 | 288 | 		return; | 
 | 289 |  | 
 | 290 | 	atomic_inc(&map->read); | 
 | 291 | 	notify_remote_via_irq(map->irq); | 
 | 292 | } | 
 | 293 |  | 
 | 294 | static void pvcalls_sk_data_ready(struct sock *sock) | 
 | 295 | { | 
 | 296 | 	struct sock_mapping *map = sock->sk_user_data; | 
 | 297 | 	struct pvcalls_ioworker *iow; | 
 | 298 |  | 
 | 299 | 	if (map == NULL) | 
 | 300 | 		return; | 
 | 301 |  | 
 | 302 | 	iow = &map->ioworker; | 
 | 303 | 	atomic_inc(&map->read); | 
 | 304 | 	atomic_inc(&map->io); | 
 | 305 | 	queue_work(iow->wq, &iow->register_work); | 
 | 306 | } | 
 | 307 |  | 
 | 308 | static struct sock_mapping *pvcalls_new_active_socket( | 
 | 309 | 		struct pvcalls_fedata *fedata, | 
 | 310 | 		uint64_t id, | 
 | 311 | 		grant_ref_t ref, | 
 | 312 | 		uint32_t evtchn, | 
 | 313 | 		struct socket *sock) | 
 | 314 | { | 
 | 315 | 	int ret; | 
 | 316 | 	struct sock_mapping *map; | 
 | 317 | 	void *page; | 
 | 318 |  | 
 | 319 | 	map = kzalloc(sizeof(*map), GFP_KERNEL); | 
 | 320 | 	if (map == NULL) | 
 | 321 | 		return NULL; | 
 | 322 |  | 
 | 323 | 	map->fedata = fedata; | 
 | 324 | 	map->sock = sock; | 
 | 325 | 	map->id = id; | 
 | 326 | 	map->ref = ref; | 
 | 327 |  | 
 | 328 | 	ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page); | 
 | 329 | 	if (ret < 0) | 
 | 330 | 		goto out; | 
 | 331 | 	map->ring = page; | 
 | 332 | 	map->ring_order = map->ring->ring_order; | 
 | 333 | 	/* first read the order, then map the data ring */ | 
 | 334 | 	virt_rmb(); | 
 | 335 | 	if (map->ring_order > MAX_RING_ORDER) { | 
 | 336 | 		pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n", | 
 | 337 | 				__func__, map->ring_order, MAX_RING_ORDER); | 
 | 338 | 		goto out; | 
 | 339 | 	} | 
 | 340 | 	ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref, | 
 | 341 | 				     (1 << map->ring_order), &page); | 
 | 342 | 	if (ret < 0) | 
 | 343 | 		goto out; | 
 | 344 | 	map->bytes = page; | 
 | 345 |  | 
 | 346 | 	ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, | 
 | 347 | 						    evtchn, | 
 | 348 | 						    pvcalls_back_conn_event, | 
 | 349 | 						    0, | 
 | 350 | 						    "pvcalls-backend", | 
 | 351 | 						    map); | 
 | 352 | 	if (ret < 0) | 
 | 353 | 		goto out; | 
 | 354 | 	map->irq = ret; | 
 | 355 |  | 
 | 356 | 	map->data.in = map->bytes; | 
 | 357 | 	map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order); | 
 | 358 |  | 
 | 359 | 	map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1); | 
 | 360 | 	if (!map->ioworker.wq) | 
 | 361 | 		goto out; | 
 | 362 | 	atomic_set(&map->io, 1); | 
 | 363 | 	INIT_WORK(&map->ioworker.register_work,	pvcalls_back_ioworker); | 
 | 364 |  | 
 | 365 | 	down(&fedata->socket_lock); | 
 | 366 | 	list_add_tail(&map->list, &fedata->socket_mappings); | 
 | 367 | 	up(&fedata->socket_lock); | 
 | 368 |  | 
 | 369 | 	write_lock_bh(&map->sock->sk->sk_callback_lock); | 
 | 370 | 	map->saved_data_ready = map->sock->sk->sk_data_ready; | 
 | 371 | 	map->sock->sk->sk_user_data = map; | 
 | 372 | 	map->sock->sk->sk_data_ready = pvcalls_sk_data_ready; | 
 | 373 | 	map->sock->sk->sk_state_change = pvcalls_sk_state_change; | 
 | 374 | 	write_unlock_bh(&map->sock->sk->sk_callback_lock); | 
 | 375 |  | 
 | 376 | 	return map; | 
 | 377 | out: | 
 | 378 | 	down(&fedata->socket_lock); | 
 | 379 | 	list_del(&map->list); | 
 | 380 | 	pvcalls_back_release_active(fedata->dev, fedata, map); | 
 | 381 | 	up(&fedata->socket_lock); | 
 | 382 | 	return NULL; | 
 | 383 | } | 
 | 384 |  | 
 | 385 | static int pvcalls_back_connect(struct xenbus_device *dev, | 
 | 386 | 				struct xen_pvcalls_request *req) | 
 | 387 | { | 
 | 388 | 	struct pvcalls_fedata *fedata; | 
 | 389 | 	int ret = -EINVAL; | 
 | 390 | 	struct socket *sock; | 
 | 391 | 	struct sock_mapping *map; | 
 | 392 | 	struct xen_pvcalls_response *rsp; | 
 | 393 | 	struct sockaddr *sa = (struct sockaddr *)&req->u.connect.addr; | 
 | 394 |  | 
 | 395 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 396 |  | 
 | 397 | 	if (req->u.connect.len < sizeof(sa->sa_family) || | 
 | 398 | 	    req->u.connect.len > sizeof(req->u.connect.addr) || | 
 | 399 | 	    sa->sa_family != AF_INET) | 
 | 400 | 		goto out; | 
 | 401 |  | 
 | 402 | 	ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); | 
 | 403 | 	if (ret < 0) | 
 | 404 | 		goto out; | 
 | 405 | 	ret = inet_stream_connect(sock, sa, req->u.connect.len, 0); | 
 | 406 | 	if (ret < 0) { | 
 | 407 | 		sock_release(sock); | 
 | 408 | 		goto out; | 
 | 409 | 	} | 
 | 410 |  | 
 | 411 | 	map = pvcalls_new_active_socket(fedata, | 
 | 412 | 					req->u.connect.id, | 
 | 413 | 					req->u.connect.ref, | 
 | 414 | 					req->u.connect.evtchn, | 
 | 415 | 					sock); | 
 | 416 | 	if (!map) { | 
 | 417 | 		ret = -EFAULT; | 
 | 418 | 		sock_release(sock); | 
 | 419 | 	} | 
 | 420 |  | 
 | 421 | out: | 
 | 422 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 423 | 	rsp->req_id = req->req_id; | 
 | 424 | 	rsp->cmd = req->cmd; | 
 | 425 | 	rsp->u.connect.id = req->u.connect.id; | 
 | 426 | 	rsp->ret = ret; | 
 | 427 |  | 
 | 428 | 	return 0; | 
 | 429 | } | 
 | 430 |  | 
 | 431 | static int pvcalls_back_release_active(struct xenbus_device *dev, | 
 | 432 | 				       struct pvcalls_fedata *fedata, | 
 | 433 | 				       struct sock_mapping *map) | 
 | 434 | { | 
 | 435 | 	disable_irq(map->irq); | 
 | 436 | 	if (map->sock->sk != NULL) { | 
 | 437 | 		write_lock_bh(&map->sock->sk->sk_callback_lock); | 
 | 438 | 		map->sock->sk->sk_user_data = NULL; | 
 | 439 | 		map->sock->sk->sk_data_ready = map->saved_data_ready; | 
 | 440 | 		write_unlock_bh(&map->sock->sk->sk_callback_lock); | 
 | 441 | 	} | 
 | 442 |  | 
 | 443 | 	atomic_set(&map->release, 1); | 
 | 444 | 	flush_work(&map->ioworker.register_work); | 
 | 445 |  | 
 | 446 | 	xenbus_unmap_ring_vfree(dev, map->bytes); | 
 | 447 | 	xenbus_unmap_ring_vfree(dev, (void *)map->ring); | 
 | 448 | 	unbind_from_irqhandler(map->irq, map); | 
 | 449 |  | 
 | 450 | 	sock_release(map->sock); | 
 | 451 | 	kfree(map); | 
 | 452 |  | 
 | 453 | 	return 0; | 
 | 454 | } | 
 | 455 |  | 
 | 456 | static int pvcalls_back_release_passive(struct xenbus_device *dev, | 
 | 457 | 					struct pvcalls_fedata *fedata, | 
 | 458 | 					struct sockpass_mapping *mappass) | 
 | 459 | { | 
 | 460 | 	if (mappass->sock->sk != NULL) { | 
 | 461 | 		write_lock_bh(&mappass->sock->sk->sk_callback_lock); | 
 | 462 | 		mappass->sock->sk->sk_user_data = NULL; | 
 | 463 | 		mappass->sock->sk->sk_data_ready = mappass->saved_data_ready; | 
 | 464 | 		write_unlock_bh(&mappass->sock->sk->sk_callback_lock); | 
 | 465 | 	} | 
 | 466 | 	sock_release(mappass->sock); | 
 | 467 | 	flush_workqueue(mappass->wq); | 
 | 468 | 	destroy_workqueue(mappass->wq); | 
 | 469 | 	kfree(mappass); | 
 | 470 |  | 
 | 471 | 	return 0; | 
 | 472 | } | 
 | 473 |  | 
 | 474 | static int pvcalls_back_release(struct xenbus_device *dev, | 
 | 475 | 				struct xen_pvcalls_request *req) | 
 | 476 | { | 
 | 477 | 	struct pvcalls_fedata *fedata; | 
 | 478 | 	struct sock_mapping *map, *n; | 
 | 479 | 	struct sockpass_mapping *mappass; | 
 | 480 | 	int ret = 0; | 
 | 481 | 	struct xen_pvcalls_response *rsp; | 
 | 482 |  | 
 | 483 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 484 |  | 
 | 485 | 	down(&fedata->socket_lock); | 
 | 486 | 	list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) { | 
 | 487 | 		if (map->id == req->u.release.id) { | 
 | 488 | 			list_del(&map->list); | 
 | 489 | 			up(&fedata->socket_lock); | 
 | 490 | 			ret = pvcalls_back_release_active(dev, fedata, map); | 
 | 491 | 			goto out; | 
 | 492 | 		} | 
 | 493 | 	} | 
 | 494 | 	mappass = radix_tree_lookup(&fedata->socketpass_mappings, | 
 | 495 | 				    req->u.release.id); | 
 | 496 | 	if (mappass != NULL) { | 
 | 497 | 		radix_tree_delete(&fedata->socketpass_mappings, mappass->id); | 
 | 498 | 		up(&fedata->socket_lock); | 
 | 499 | 		ret = pvcalls_back_release_passive(dev, fedata, mappass); | 
 | 500 | 	} else | 
 | 501 | 		up(&fedata->socket_lock); | 
 | 502 |  | 
 | 503 | out: | 
 | 504 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 505 | 	rsp->req_id = req->req_id; | 
 | 506 | 	rsp->u.release.id = req->u.release.id; | 
 | 507 | 	rsp->cmd = req->cmd; | 
 | 508 | 	rsp->ret = ret; | 
 | 509 | 	return 0; | 
 | 510 | } | 
 | 511 |  | 
 | 512 | static void __pvcalls_back_accept(struct work_struct *work) | 
 | 513 | { | 
 | 514 | 	struct sockpass_mapping *mappass = container_of( | 
 | 515 | 		work, struct sockpass_mapping, register_work); | 
 | 516 | 	struct sock_mapping *map; | 
 | 517 | 	struct pvcalls_ioworker *iow; | 
 | 518 | 	struct pvcalls_fedata *fedata; | 
 | 519 | 	struct socket *sock; | 
 | 520 | 	struct xen_pvcalls_response *rsp; | 
 | 521 | 	struct xen_pvcalls_request *req; | 
 | 522 | 	int notify; | 
 | 523 | 	int ret = -EINVAL; | 
 | 524 | 	unsigned long flags; | 
 | 525 |  | 
 | 526 | 	fedata = mappass->fedata; | 
 | 527 | 	/* | 
 | 528 | 	 * __pvcalls_back_accept can race against pvcalls_back_accept. | 
 | 529 | 	 * We only need to check the value of "cmd" on read. It could be | 
 | 530 | 	 * done atomically, but to simplify the code on the write side, we | 
 | 531 | 	 * use a spinlock. | 
 | 532 | 	 */ | 
 | 533 | 	spin_lock_irqsave(&mappass->copy_lock, flags); | 
 | 534 | 	req = &mappass->reqcopy; | 
 | 535 | 	if (req->cmd != PVCALLS_ACCEPT) { | 
 | 536 | 		spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 537 | 		return; | 
 | 538 | 	} | 
 | 539 | 	spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 540 |  | 
 | 541 | 	sock = sock_alloc(); | 
 | 542 | 	if (sock == NULL) | 
 | 543 | 		goto out_error; | 
 | 544 | 	sock->type = mappass->sock->type; | 
 | 545 | 	sock->ops = mappass->sock->ops; | 
 | 546 |  | 
 | 547 | 	ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true); | 
 | 548 | 	if (ret == -EAGAIN) { | 
 | 549 | 		sock_release(sock); | 
 | 550 | 		return; | 
 | 551 | 	} | 
 | 552 |  | 
 | 553 | 	map = pvcalls_new_active_socket(fedata, | 
 | 554 | 					req->u.accept.id_new, | 
 | 555 | 					req->u.accept.ref, | 
 | 556 | 					req->u.accept.evtchn, | 
 | 557 | 					sock); | 
 | 558 | 	if (!map) { | 
 | 559 | 		ret = -EFAULT; | 
 | 560 | 		sock_release(sock); | 
 | 561 | 		goto out_error; | 
 | 562 | 	} | 
 | 563 |  | 
 | 564 | 	map->sockpass = mappass; | 
 | 565 | 	iow = &map->ioworker; | 
 | 566 | 	atomic_inc(&map->read); | 
 | 567 | 	atomic_inc(&map->io); | 
 | 568 | 	queue_work(iow->wq, &iow->register_work); | 
 | 569 |  | 
 | 570 | out_error: | 
 | 571 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 572 | 	rsp->req_id = req->req_id; | 
 | 573 | 	rsp->cmd = req->cmd; | 
 | 574 | 	rsp->u.accept.id = req->u.accept.id; | 
 | 575 | 	rsp->ret = ret; | 
 | 576 | 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify); | 
 | 577 | 	if (notify) | 
 | 578 | 		notify_remote_via_irq(fedata->irq); | 
 | 579 |  | 
 | 580 | 	mappass->reqcopy.cmd = 0; | 
 | 581 | } | 
 | 582 |  | 
 | 583 | static void pvcalls_pass_sk_data_ready(struct sock *sock) | 
 | 584 | { | 
 | 585 | 	struct sockpass_mapping *mappass = sock->sk_user_data; | 
 | 586 | 	struct pvcalls_fedata *fedata; | 
 | 587 | 	struct xen_pvcalls_response *rsp; | 
 | 588 | 	unsigned long flags; | 
 | 589 | 	int notify; | 
 | 590 |  | 
 | 591 | 	if (mappass == NULL) | 
 | 592 | 		return; | 
 | 593 |  | 
 | 594 | 	fedata = mappass->fedata; | 
 | 595 | 	spin_lock_irqsave(&mappass->copy_lock, flags); | 
 | 596 | 	if (mappass->reqcopy.cmd == PVCALLS_POLL) { | 
 | 597 | 		rsp = RING_GET_RESPONSE(&fedata->ring, | 
 | 598 | 					fedata->ring.rsp_prod_pvt++); | 
 | 599 | 		rsp->req_id = mappass->reqcopy.req_id; | 
 | 600 | 		rsp->u.poll.id = mappass->reqcopy.u.poll.id; | 
 | 601 | 		rsp->cmd = mappass->reqcopy.cmd; | 
 | 602 | 		rsp->ret = 0; | 
 | 603 |  | 
 | 604 | 		mappass->reqcopy.cmd = 0; | 
 | 605 | 		spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 606 |  | 
 | 607 | 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify); | 
 | 608 | 		if (notify) | 
 | 609 | 			notify_remote_via_irq(mappass->fedata->irq); | 
 | 610 | 	} else { | 
 | 611 | 		spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 612 | 		queue_work(mappass->wq, &mappass->register_work); | 
 | 613 | 	} | 
 | 614 | } | 
 | 615 |  | 
 | 616 | static int pvcalls_back_bind(struct xenbus_device *dev, | 
 | 617 | 			     struct xen_pvcalls_request *req) | 
 | 618 | { | 
 | 619 | 	struct pvcalls_fedata *fedata; | 
 | 620 | 	int ret; | 
 | 621 | 	struct sockpass_mapping *map; | 
 | 622 | 	struct xen_pvcalls_response *rsp; | 
 | 623 |  | 
 | 624 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 625 |  | 
 | 626 | 	map = kzalloc(sizeof(*map), GFP_KERNEL); | 
 | 627 | 	if (map == NULL) { | 
 | 628 | 		ret = -ENOMEM; | 
 | 629 | 		goto out; | 
 | 630 | 	} | 
 | 631 |  | 
 | 632 | 	INIT_WORK(&map->register_work, __pvcalls_back_accept); | 
 | 633 | 	spin_lock_init(&map->copy_lock); | 
 | 634 | 	map->wq = alloc_workqueue("pvcalls_wq", WQ_UNBOUND, 1); | 
 | 635 | 	if (!map->wq) { | 
 | 636 | 		ret = -ENOMEM; | 
 | 637 | 		goto out; | 
 | 638 | 	} | 
 | 639 |  | 
 | 640 | 	ret = sock_create(AF_INET, SOCK_STREAM, 0, &map->sock); | 
 | 641 | 	if (ret < 0) | 
 | 642 | 		goto out; | 
 | 643 |  | 
 | 644 | 	ret = inet_bind(map->sock, (struct sockaddr *)&req->u.bind.addr, | 
 | 645 | 			req->u.bind.len); | 
 | 646 | 	if (ret < 0) | 
 | 647 | 		goto out; | 
 | 648 |  | 
 | 649 | 	map->fedata = fedata; | 
 | 650 | 	map->id = req->u.bind.id; | 
 | 651 |  | 
 | 652 | 	down(&fedata->socket_lock); | 
 | 653 | 	ret = radix_tree_insert(&fedata->socketpass_mappings, map->id, | 
 | 654 | 				map); | 
 | 655 | 	up(&fedata->socket_lock); | 
 | 656 | 	if (ret) | 
 | 657 | 		goto out; | 
 | 658 |  | 
 | 659 | 	write_lock_bh(&map->sock->sk->sk_callback_lock); | 
 | 660 | 	map->saved_data_ready = map->sock->sk->sk_data_ready; | 
 | 661 | 	map->sock->sk->sk_user_data = map; | 
 | 662 | 	map->sock->sk->sk_data_ready = pvcalls_pass_sk_data_ready; | 
 | 663 | 	write_unlock_bh(&map->sock->sk->sk_callback_lock); | 
 | 664 |  | 
 | 665 | out: | 
 | 666 | 	if (ret) { | 
 | 667 | 		if (map && map->sock) | 
 | 668 | 			sock_release(map->sock); | 
 | 669 | 		if (map && map->wq) | 
 | 670 | 			destroy_workqueue(map->wq); | 
 | 671 | 		kfree(map); | 
 | 672 | 	} | 
 | 673 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 674 | 	rsp->req_id = req->req_id; | 
 | 675 | 	rsp->cmd = req->cmd; | 
 | 676 | 	rsp->u.bind.id = req->u.bind.id; | 
 | 677 | 	rsp->ret = ret; | 
 | 678 | 	return 0; | 
 | 679 | } | 
 | 680 |  | 
 | 681 | static int pvcalls_back_listen(struct xenbus_device *dev, | 
 | 682 | 			       struct xen_pvcalls_request *req) | 
 | 683 | { | 
 | 684 | 	struct pvcalls_fedata *fedata; | 
 | 685 | 	int ret = -EINVAL; | 
 | 686 | 	struct sockpass_mapping *map; | 
 | 687 | 	struct xen_pvcalls_response *rsp; | 
 | 688 |  | 
 | 689 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 690 |  | 
 | 691 | 	down(&fedata->socket_lock); | 
 | 692 | 	map = radix_tree_lookup(&fedata->socketpass_mappings, req->u.listen.id); | 
 | 693 | 	up(&fedata->socket_lock); | 
 | 694 | 	if (map == NULL) | 
 | 695 | 		goto out; | 
 | 696 |  | 
 | 697 | 	ret = inet_listen(map->sock, req->u.listen.backlog); | 
 | 698 |  | 
 | 699 | out: | 
 | 700 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 701 | 	rsp->req_id = req->req_id; | 
 | 702 | 	rsp->cmd = req->cmd; | 
 | 703 | 	rsp->u.listen.id = req->u.listen.id; | 
 | 704 | 	rsp->ret = ret; | 
 | 705 | 	return 0; | 
 | 706 | } | 
 | 707 |  | 
 | 708 | static int pvcalls_back_accept(struct xenbus_device *dev, | 
 | 709 | 			       struct xen_pvcalls_request *req) | 
 | 710 | { | 
 | 711 | 	struct pvcalls_fedata *fedata; | 
 | 712 | 	struct sockpass_mapping *mappass; | 
 | 713 | 	int ret = -EINVAL; | 
 | 714 | 	struct xen_pvcalls_response *rsp; | 
 | 715 | 	unsigned long flags; | 
 | 716 |  | 
 | 717 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 718 |  | 
 | 719 | 	down(&fedata->socket_lock); | 
 | 720 | 	mappass = radix_tree_lookup(&fedata->socketpass_mappings, | 
 | 721 | 		req->u.accept.id); | 
 | 722 | 	up(&fedata->socket_lock); | 
 | 723 | 	if (mappass == NULL) | 
 | 724 | 		goto out_error; | 
 | 725 |  | 
 | 726 | 	/* | 
 | 727 | 	 * Limitation of the current implementation: only support one | 
 | 728 | 	 * concurrent accept or poll call on one socket. | 
 | 729 | 	 */ | 
 | 730 | 	spin_lock_irqsave(&mappass->copy_lock, flags); | 
 | 731 | 	if (mappass->reqcopy.cmd != 0) { | 
 | 732 | 		spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 733 | 		ret = -EINTR; | 
 | 734 | 		goto out_error; | 
 | 735 | 	} | 
 | 736 |  | 
 | 737 | 	mappass->reqcopy = *req; | 
 | 738 | 	spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 739 | 	queue_work(mappass->wq, &mappass->register_work); | 
 | 740 |  | 
 | 741 | 	/* Tell the caller we don't need to send back a notification yet */ | 
 | 742 | 	return -1; | 
 | 743 |  | 
 | 744 | out_error: | 
 | 745 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 746 | 	rsp->req_id = req->req_id; | 
 | 747 | 	rsp->cmd = req->cmd; | 
 | 748 | 	rsp->u.accept.id = req->u.accept.id; | 
 | 749 | 	rsp->ret = ret; | 
 | 750 | 	return 0; | 
 | 751 | } | 
 | 752 |  | 
 | 753 | static int pvcalls_back_poll(struct xenbus_device *dev, | 
 | 754 | 			     struct xen_pvcalls_request *req) | 
 | 755 | { | 
 | 756 | 	struct pvcalls_fedata *fedata; | 
 | 757 | 	struct sockpass_mapping *mappass; | 
 | 758 | 	struct xen_pvcalls_response *rsp; | 
 | 759 | 	struct inet_connection_sock *icsk; | 
 | 760 | 	struct request_sock_queue *queue; | 
 | 761 | 	unsigned long flags; | 
 | 762 | 	int ret; | 
 | 763 | 	bool data; | 
 | 764 |  | 
 | 765 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 766 |  | 
 | 767 | 	down(&fedata->socket_lock); | 
 | 768 | 	mappass = radix_tree_lookup(&fedata->socketpass_mappings, | 
 | 769 | 				    req->u.poll.id); | 
 | 770 | 	up(&fedata->socket_lock); | 
 | 771 | 	if (mappass == NULL) | 
 | 772 | 		return -EINVAL; | 
 | 773 |  | 
 | 774 | 	/* | 
 | 775 | 	 * Limitation of the current implementation: only support one | 
 | 776 | 	 * concurrent accept or poll call on one socket. | 
 | 777 | 	 */ | 
 | 778 | 	spin_lock_irqsave(&mappass->copy_lock, flags); | 
 | 779 | 	if (mappass->reqcopy.cmd != 0) { | 
 | 780 | 		ret = -EINTR; | 
 | 781 | 		goto out; | 
 | 782 | 	} | 
 | 783 |  | 
 | 784 | 	mappass->reqcopy = *req; | 
 | 785 | 	icsk = inet_csk(mappass->sock->sk); | 
 | 786 | 	queue = &icsk->icsk_accept_queue; | 
 | 787 | 	data = queue->rskq_accept_head != NULL; | 
 | 788 | 	if (data) { | 
 | 789 | 		mappass->reqcopy.cmd = 0; | 
 | 790 | 		ret = 0; | 
 | 791 | 		goto out; | 
 | 792 | 	} | 
 | 793 | 	spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 794 |  | 
 | 795 | 	/* Tell the caller we don't need to send back a notification yet */ | 
 | 796 | 	return -1; | 
 | 797 |  | 
 | 798 | out: | 
 | 799 | 	spin_unlock_irqrestore(&mappass->copy_lock, flags); | 
 | 800 |  | 
 | 801 | 	rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 802 | 	rsp->req_id = req->req_id; | 
 | 803 | 	rsp->cmd = req->cmd; | 
 | 804 | 	rsp->u.poll.id = req->u.poll.id; | 
 | 805 | 	rsp->ret = ret; | 
 | 806 | 	return 0; | 
 | 807 | } | 
 | 808 |  | 
 | 809 | static int pvcalls_back_handle_cmd(struct xenbus_device *dev, | 
 | 810 | 				   struct xen_pvcalls_request *req) | 
 | 811 | { | 
 | 812 | 	int ret = 0; | 
 | 813 |  | 
 | 814 | 	switch (req->cmd) { | 
 | 815 | 	case PVCALLS_SOCKET: | 
 | 816 | 		ret = pvcalls_back_socket(dev, req); | 
 | 817 | 		break; | 
 | 818 | 	case PVCALLS_CONNECT: | 
 | 819 | 		ret = pvcalls_back_connect(dev, req); | 
 | 820 | 		break; | 
 | 821 | 	case PVCALLS_RELEASE: | 
 | 822 | 		ret = pvcalls_back_release(dev, req); | 
 | 823 | 		break; | 
 | 824 | 	case PVCALLS_BIND: | 
 | 825 | 		ret = pvcalls_back_bind(dev, req); | 
 | 826 | 		break; | 
 | 827 | 	case PVCALLS_LISTEN: | 
 | 828 | 		ret = pvcalls_back_listen(dev, req); | 
 | 829 | 		break; | 
 | 830 | 	case PVCALLS_ACCEPT: | 
 | 831 | 		ret = pvcalls_back_accept(dev, req); | 
 | 832 | 		break; | 
 | 833 | 	case PVCALLS_POLL: | 
 | 834 | 		ret = pvcalls_back_poll(dev, req); | 
 | 835 | 		break; | 
 | 836 | 	default: | 
 | 837 | 	{ | 
 | 838 | 		struct pvcalls_fedata *fedata; | 
 | 839 | 		struct xen_pvcalls_response *rsp; | 
 | 840 |  | 
 | 841 | 		fedata = dev_get_drvdata(&dev->dev); | 
 | 842 | 		rsp = RING_GET_RESPONSE( | 
 | 843 | 				&fedata->ring, fedata->ring.rsp_prod_pvt++); | 
 | 844 | 		rsp->req_id = req->req_id; | 
 | 845 | 		rsp->cmd = req->cmd; | 
 | 846 | 		rsp->ret = -ENOTSUPP; | 
 | 847 | 		break; | 
 | 848 | 	} | 
 | 849 | 	} | 
 | 850 | 	return ret; | 
 | 851 | } | 
 | 852 |  | 
 | 853 | static void pvcalls_back_work(struct pvcalls_fedata *fedata) | 
 | 854 | { | 
 | 855 | 	int notify, notify_all = 0, more = 1; | 
 | 856 | 	struct xen_pvcalls_request req; | 
 | 857 | 	struct xenbus_device *dev = fedata->dev; | 
 | 858 |  | 
 | 859 | 	while (more) { | 
 | 860 | 		while (RING_HAS_UNCONSUMED_REQUESTS(&fedata->ring)) { | 
 | 861 | 			RING_COPY_REQUEST(&fedata->ring, | 
 | 862 | 					  fedata->ring.req_cons++, | 
 | 863 | 					  &req); | 
 | 864 |  | 
 | 865 | 			if (!pvcalls_back_handle_cmd(dev, &req)) { | 
 | 866 | 				RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( | 
 | 867 | 					&fedata->ring, notify); | 
 | 868 | 				notify_all += notify; | 
 | 869 | 			} | 
 | 870 | 		} | 
 | 871 |  | 
 | 872 | 		if (notify_all) { | 
 | 873 | 			notify_remote_via_irq(fedata->irq); | 
 | 874 | 			notify_all = 0; | 
 | 875 | 		} | 
 | 876 |  | 
 | 877 | 		RING_FINAL_CHECK_FOR_REQUESTS(&fedata->ring, more); | 
 | 878 | 	} | 
 | 879 | } | 
 | 880 |  | 
 | 881 | static irqreturn_t pvcalls_back_event(int irq, void *dev_id) | 
 | 882 | { | 
 | 883 | 	struct xenbus_device *dev = dev_id; | 
 | 884 | 	struct pvcalls_fedata *fedata = NULL; | 
 | 885 |  | 
 | 886 | 	if (dev == NULL) | 
 | 887 | 		return IRQ_HANDLED; | 
 | 888 |  | 
 | 889 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 890 | 	if (fedata == NULL) | 
 | 891 | 		return IRQ_HANDLED; | 
 | 892 |  | 
 | 893 | 	pvcalls_back_work(fedata); | 
 | 894 | 	return IRQ_HANDLED; | 
 | 895 | } | 
 | 896 |  | 
 | 897 | static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) | 
 | 898 | { | 
 | 899 | 	struct sock_mapping *map = sock_map; | 
 | 900 | 	struct pvcalls_ioworker *iow; | 
 | 901 |  | 
 | 902 | 	if (map == NULL || map->sock == NULL || map->sock->sk == NULL || | 
 | 903 | 		map->sock->sk->sk_user_data != map) | 
 | 904 | 		return IRQ_HANDLED; | 
 | 905 |  | 
 | 906 | 	iow = &map->ioworker; | 
 | 907 |  | 
 | 908 | 	atomic_inc(&map->write); | 
 | 909 | 	atomic_inc(&map->io); | 
 | 910 | 	queue_work(iow->wq, &iow->register_work); | 
 | 911 |  | 
 | 912 | 	return IRQ_HANDLED; | 
 | 913 | } | 
 | 914 |  | 
 | 915 | static int backend_connect(struct xenbus_device *dev) | 
 | 916 | { | 
 | 917 | 	int err, evtchn; | 
 | 918 | 	grant_ref_t ring_ref; | 
 | 919 | 	struct pvcalls_fedata *fedata = NULL; | 
 | 920 |  | 
 | 921 | 	fedata = kzalloc(sizeof(struct pvcalls_fedata), GFP_KERNEL); | 
 | 922 | 	if (!fedata) | 
 | 923 | 		return -ENOMEM; | 
 | 924 |  | 
 | 925 | 	fedata->irq = -1; | 
 | 926 | 	err = xenbus_scanf(XBT_NIL, dev->otherend, "port", "%u", | 
 | 927 | 			   &evtchn); | 
 | 928 | 	if (err != 1) { | 
 | 929 | 		err = -EINVAL; | 
 | 930 | 		xenbus_dev_fatal(dev, err, "reading %s/event-channel", | 
 | 931 | 				 dev->otherend); | 
 | 932 | 		goto error; | 
 | 933 | 	} | 
 | 934 |  | 
 | 935 | 	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", "%u", &ring_ref); | 
 | 936 | 	if (err != 1) { | 
 | 937 | 		err = -EINVAL; | 
 | 938 | 		xenbus_dev_fatal(dev, err, "reading %s/ring-ref", | 
 | 939 | 				 dev->otherend); | 
 | 940 | 		goto error; | 
 | 941 | 	} | 
 | 942 |  | 
 | 943 | 	err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); | 
 | 944 | 	if (err < 0) | 
 | 945 | 		goto error; | 
 | 946 | 	fedata->irq = err; | 
 | 947 |  | 
 | 948 | 	err = request_threaded_irq(fedata->irq, NULL, pvcalls_back_event, | 
 | 949 | 				   IRQF_ONESHOT, "pvcalls-back", dev); | 
 | 950 | 	if (err < 0) | 
 | 951 | 		goto error; | 
 | 952 |  | 
 | 953 | 	err = xenbus_map_ring_valloc(dev, &ring_ref, 1, | 
 | 954 | 				     (void **)&fedata->sring); | 
 | 955 | 	if (err < 0) | 
 | 956 | 		goto error; | 
 | 957 |  | 
 | 958 | 	BACK_RING_INIT(&fedata->ring, fedata->sring, XEN_PAGE_SIZE * 1); | 
 | 959 | 	fedata->dev = dev; | 
 | 960 |  | 
 | 961 | 	INIT_LIST_HEAD(&fedata->socket_mappings); | 
 | 962 | 	INIT_RADIX_TREE(&fedata->socketpass_mappings, GFP_KERNEL); | 
 | 963 | 	sema_init(&fedata->socket_lock, 1); | 
 | 964 | 	dev_set_drvdata(&dev->dev, fedata); | 
 | 965 |  | 
 | 966 | 	down(&pvcalls_back_global.frontends_lock); | 
 | 967 | 	list_add_tail(&fedata->list, &pvcalls_back_global.frontends); | 
 | 968 | 	up(&pvcalls_back_global.frontends_lock); | 
 | 969 |  | 
 | 970 | 	return 0; | 
 | 971 |  | 
 | 972 |  error: | 
 | 973 | 	if (fedata->irq >= 0) | 
 | 974 | 		unbind_from_irqhandler(fedata->irq, dev); | 
 | 975 | 	if (fedata->sring != NULL) | 
 | 976 | 		xenbus_unmap_ring_vfree(dev, fedata->sring); | 
 | 977 | 	kfree(fedata); | 
 | 978 | 	return err; | 
 | 979 | } | 
 | 980 |  | 
 | 981 | static int backend_disconnect(struct xenbus_device *dev) | 
 | 982 | { | 
 | 983 | 	struct pvcalls_fedata *fedata; | 
 | 984 | 	struct sock_mapping *map, *n; | 
 | 985 | 	struct sockpass_mapping *mappass; | 
 | 986 | 	struct radix_tree_iter iter; | 
 | 987 | 	void **slot; | 
 | 988 |  | 
 | 989 |  | 
 | 990 | 	fedata = dev_get_drvdata(&dev->dev); | 
 | 991 |  | 
 | 992 | 	down(&fedata->socket_lock); | 
 | 993 | 	list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) { | 
 | 994 | 		list_del(&map->list); | 
 | 995 | 		pvcalls_back_release_active(dev, fedata, map); | 
 | 996 | 	} | 
 | 997 |  | 
 | 998 | 	radix_tree_for_each_slot(slot, &fedata->socketpass_mappings, &iter, 0) { | 
 | 999 | 		mappass = radix_tree_deref_slot(slot); | 
 | 1000 | 		if (!mappass) | 
 | 1001 | 			continue; | 
 | 1002 | 		if (radix_tree_exception(mappass)) { | 
 | 1003 | 			if (radix_tree_deref_retry(mappass)) | 
 | 1004 | 				slot = radix_tree_iter_retry(&iter); | 
 | 1005 | 		} else { | 
 | 1006 | 			radix_tree_delete(&fedata->socketpass_mappings, | 
 | 1007 | 					  mappass->id); | 
 | 1008 | 			pvcalls_back_release_passive(dev, fedata, mappass); | 
 | 1009 | 		} | 
 | 1010 | 	} | 
 | 1011 | 	up(&fedata->socket_lock); | 
 | 1012 |  | 
 | 1013 | 	unbind_from_irqhandler(fedata->irq, dev); | 
 | 1014 | 	xenbus_unmap_ring_vfree(dev, fedata->sring); | 
 | 1015 |  | 
 | 1016 | 	list_del(&fedata->list); | 
 | 1017 | 	kfree(fedata); | 
 | 1018 | 	dev_set_drvdata(&dev->dev, NULL); | 
 | 1019 |  | 
 | 1020 | 	return 0; | 
 | 1021 | } | 
 | 1022 |  | 
 | 1023 | static int pvcalls_back_probe(struct xenbus_device *dev, | 
 | 1024 | 			      const struct xenbus_device_id *id) | 
 | 1025 | { | 
 | 1026 | 	int err, abort; | 
 | 1027 | 	struct xenbus_transaction xbt; | 
 | 1028 |  | 
 | 1029 | again: | 
 | 1030 | 	abort = 1; | 
 | 1031 |  | 
 | 1032 | 	err = xenbus_transaction_start(&xbt); | 
 | 1033 | 	if (err) { | 
 | 1034 | 		pr_warn("%s cannot create xenstore transaction\n", __func__); | 
 | 1035 | 		return err; | 
 | 1036 | 	} | 
 | 1037 |  | 
 | 1038 | 	err = xenbus_printf(xbt, dev->nodename, "versions", "%s", | 
 | 1039 | 			    PVCALLS_VERSIONS); | 
 | 1040 | 	if (err) { | 
 | 1041 | 		pr_warn("%s write out 'versions' failed\n", __func__); | 
 | 1042 | 		goto abort; | 
 | 1043 | 	} | 
 | 1044 |  | 
 | 1045 | 	err = xenbus_printf(xbt, dev->nodename, "max-page-order", "%u", | 
 | 1046 | 			    MAX_RING_ORDER); | 
 | 1047 | 	if (err) { | 
 | 1048 | 		pr_warn("%s write out 'max-page-order' failed\n", __func__); | 
 | 1049 | 		goto abort; | 
 | 1050 | 	} | 
 | 1051 |  | 
 | 1052 | 	err = xenbus_printf(xbt, dev->nodename, "function-calls", | 
 | 1053 | 			    XENBUS_FUNCTIONS_CALLS); | 
 | 1054 | 	if (err) { | 
 | 1055 | 		pr_warn("%s write out 'function-calls' failed\n", __func__); | 
 | 1056 | 		goto abort; | 
 | 1057 | 	} | 
 | 1058 |  | 
 | 1059 | 	abort = 0; | 
 | 1060 | abort: | 
 | 1061 | 	err = xenbus_transaction_end(xbt, abort); | 
 | 1062 | 	if (err) { | 
 | 1063 | 		if (err == -EAGAIN && !abort) | 
 | 1064 | 			goto again; | 
 | 1065 | 		pr_warn("%s cannot complete xenstore transaction\n", __func__); | 
 | 1066 | 		return err; | 
 | 1067 | 	} | 
 | 1068 |  | 
 | 1069 | 	if (abort) | 
 | 1070 | 		return -EFAULT; | 
 | 1071 |  | 
 | 1072 | 	xenbus_switch_state(dev, XenbusStateInitWait); | 
 | 1073 |  | 
 | 1074 | 	return 0; | 
 | 1075 | } | 
 | 1076 |  | 
 | 1077 | static void set_backend_state(struct xenbus_device *dev, | 
 | 1078 | 			      enum xenbus_state state) | 
 | 1079 | { | 
 | 1080 | 	while (dev->state != state) { | 
 | 1081 | 		switch (dev->state) { | 
 | 1082 | 		case XenbusStateClosed: | 
 | 1083 | 			switch (state) { | 
 | 1084 | 			case XenbusStateInitWait: | 
 | 1085 | 			case XenbusStateConnected: | 
 | 1086 | 				xenbus_switch_state(dev, XenbusStateInitWait); | 
 | 1087 | 				break; | 
 | 1088 | 			case XenbusStateClosing: | 
 | 1089 | 				xenbus_switch_state(dev, XenbusStateClosing); | 
 | 1090 | 				break; | 
 | 1091 | 			default: | 
 | 1092 | 				WARN_ON(1); | 
 | 1093 | 			} | 
 | 1094 | 			break; | 
 | 1095 | 		case XenbusStateInitWait: | 
 | 1096 | 		case XenbusStateInitialised: | 
 | 1097 | 			switch (state) { | 
 | 1098 | 			case XenbusStateConnected: | 
 | 1099 | 				backend_connect(dev); | 
 | 1100 | 				xenbus_switch_state(dev, XenbusStateConnected); | 
 | 1101 | 				break; | 
 | 1102 | 			case XenbusStateClosing: | 
 | 1103 | 			case XenbusStateClosed: | 
 | 1104 | 				xenbus_switch_state(dev, XenbusStateClosing); | 
 | 1105 | 				break; | 
 | 1106 | 			default: | 
 | 1107 | 				WARN_ON(1); | 
 | 1108 | 			} | 
 | 1109 | 			break; | 
 | 1110 | 		case XenbusStateConnected: | 
 | 1111 | 			switch (state) { | 
 | 1112 | 			case XenbusStateInitWait: | 
 | 1113 | 			case XenbusStateClosing: | 
 | 1114 | 			case XenbusStateClosed: | 
 | 1115 | 				down(&pvcalls_back_global.frontends_lock); | 
 | 1116 | 				backend_disconnect(dev); | 
 | 1117 | 				up(&pvcalls_back_global.frontends_lock); | 
 | 1118 | 				xenbus_switch_state(dev, XenbusStateClosing); | 
 | 1119 | 				break; | 
 | 1120 | 			default: | 
 | 1121 | 				WARN_ON(1); | 
 | 1122 | 			} | 
 | 1123 | 			break; | 
 | 1124 | 		case XenbusStateClosing: | 
 | 1125 | 			switch (state) { | 
 | 1126 | 			case XenbusStateInitWait: | 
 | 1127 | 			case XenbusStateConnected: | 
 | 1128 | 			case XenbusStateClosed: | 
 | 1129 | 				xenbus_switch_state(dev, XenbusStateClosed); | 
 | 1130 | 				break; | 
 | 1131 | 			default: | 
 | 1132 | 				WARN_ON(1); | 
 | 1133 | 			} | 
 | 1134 | 			break; | 
 | 1135 | 		default: | 
 | 1136 | 			WARN_ON(1); | 
 | 1137 | 		} | 
 | 1138 | 	} | 
 | 1139 | } | 
 | 1140 |  | 
 | 1141 | static void pvcalls_back_changed(struct xenbus_device *dev, | 
 | 1142 | 				 enum xenbus_state frontend_state) | 
 | 1143 | { | 
 | 1144 | 	switch (frontend_state) { | 
 | 1145 | 	case XenbusStateInitialising: | 
 | 1146 | 		set_backend_state(dev, XenbusStateInitWait); | 
 | 1147 | 		break; | 
 | 1148 |  | 
 | 1149 | 	case XenbusStateInitialised: | 
 | 1150 | 	case XenbusStateConnected: | 
 | 1151 | 		set_backend_state(dev, XenbusStateConnected); | 
 | 1152 | 		break; | 
 | 1153 |  | 
 | 1154 | 	case XenbusStateClosing: | 
 | 1155 | 		set_backend_state(dev, XenbusStateClosing); | 
 | 1156 | 		break; | 
 | 1157 |  | 
 | 1158 | 	case XenbusStateClosed: | 
 | 1159 | 		set_backend_state(dev, XenbusStateClosed); | 
 | 1160 | 		if (xenbus_dev_is_online(dev)) | 
 | 1161 | 			break; | 
 | 1162 | 		device_unregister(&dev->dev); | 
 | 1163 | 		break; | 
 | 1164 | 	case XenbusStateUnknown: | 
 | 1165 | 		set_backend_state(dev, XenbusStateClosed); | 
 | 1166 | 		device_unregister(&dev->dev); | 
 | 1167 | 		break; | 
 | 1168 |  | 
 | 1169 | 	default: | 
 | 1170 | 		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | 
 | 1171 | 				 frontend_state); | 
 | 1172 | 		break; | 
 | 1173 | 	} | 
 | 1174 | } | 
 | 1175 |  | 
 | 1176 | static int pvcalls_back_remove(struct xenbus_device *dev) | 
 | 1177 | { | 
 | 1178 | 	return 0; | 
 | 1179 | } | 
 | 1180 |  | 
 | 1181 | static int pvcalls_back_uevent(struct xenbus_device *xdev, | 
 | 1182 | 			       struct kobj_uevent_env *env) | 
 | 1183 | { | 
 | 1184 | 	return 0; | 
 | 1185 | } | 
 | 1186 |  | 
 | 1187 | static const struct xenbus_device_id pvcalls_back_ids[] = { | 
 | 1188 | 	{ "pvcalls" }, | 
 | 1189 | 	{ "" } | 
 | 1190 | }; | 
 | 1191 |  | 
 | 1192 | static struct xenbus_driver pvcalls_back_driver = { | 
 | 1193 | 	.ids = pvcalls_back_ids, | 
 | 1194 | 	.probe = pvcalls_back_probe, | 
 | 1195 | 	.remove = pvcalls_back_remove, | 
 | 1196 | 	.uevent = pvcalls_back_uevent, | 
 | 1197 | 	.otherend_changed = pvcalls_back_changed, | 
 | 1198 | }; | 
 | 1199 |  | 
 | 1200 | static int __init pvcalls_back_init(void) | 
 | 1201 | { | 
 | 1202 | 	int ret; | 
 | 1203 |  | 
 | 1204 | 	if (!xen_domain()) | 
 | 1205 | 		return -ENODEV; | 
 | 1206 |  | 
 | 1207 | 	ret = xenbus_register_backend(&pvcalls_back_driver); | 
 | 1208 | 	if (ret < 0) | 
 | 1209 | 		return ret; | 
 | 1210 |  | 
 | 1211 | 	sema_init(&pvcalls_back_global.frontends_lock, 1); | 
 | 1212 | 	INIT_LIST_HEAD(&pvcalls_back_global.frontends); | 
 | 1213 | 	return 0; | 
 | 1214 | } | 
 | 1215 | module_init(pvcalls_back_init); | 
 | 1216 |  | 
 | 1217 | static void __exit pvcalls_back_fin(void) | 
 | 1218 | { | 
 | 1219 | 	struct pvcalls_fedata *fedata, *nfedata; | 
 | 1220 |  | 
 | 1221 | 	down(&pvcalls_back_global.frontends_lock); | 
 | 1222 | 	list_for_each_entry_safe(fedata, nfedata, | 
 | 1223 | 				 &pvcalls_back_global.frontends, list) { | 
 | 1224 | 		backend_disconnect(fedata->dev); | 
 | 1225 | 	} | 
 | 1226 | 	up(&pvcalls_back_global.frontends_lock); | 
 | 1227 |  | 
 | 1228 | 	xenbus_unregister_driver(&pvcalls_back_driver); | 
 | 1229 | } | 
 | 1230 |  | 
 | 1231 | module_exit(pvcalls_back_fin); | 
 | 1232 |  | 
 | 1233 | MODULE_DESCRIPTION("Xen PV Calls backend driver"); | 
 | 1234 | MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>"); | 
 | 1235 | MODULE_LICENSE("GPL"); |