Blame - ap/os/linux/linux-3.4.x/drivers/block/nbd.c - T106_DC

blob: eac37055456c5d0188246d89558e13ed43e477e0 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* Network block device - make block devices work over TCP
				3	*
				4	* Note that you can not swap over this thing, yet. Seems to work but
				5	* deadlocks sometimes - you can not swap over TCP in general.
				6	*
				7	* Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
				8	* Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
				9	*
				10	* This file is released under GPLv2 or later.
				11	*
				12	* (part of code stolen from loop.c)
				13	*/
				14
				15	#include <linux/major.h>
				16
				17	#include <linux/blkdev.h>
				18	#include <linux/module.h>
				19	#include <linux/init.h>
				20	#include <linux/sched.h>
				21	#include <linux/fs.h>
				22	#include <linux/bio.h>
				23	#include <linux/stat.h>
				24	#include <linux/errno.h>
				25	#include <linux/file.h>
				26	#include <linux/ioctl.h>
				27	#include <linux/mutex.h>
				28	#include <linux/compiler.h>
				29	#include <linux/err.h>
				30	#include <linux/kernel.h>
				31	#include <linux/slab.h>
				32	#include <net/sock.h>
				33	#include <linux/net.h>
				34	#include <linux/kthread.h>
				35
				36	#include <asm/uaccess.h>
				37	#include <asm/types.h>
				38
				39	#include <linux/nbd.h>
				40
				41	#define NBD_MAGIC 0x68797548
				42
				43	#ifdef NDEBUG
				44	#define dprintk(flags, fmt...)
				45	#else /* NDEBUG */
				46	#define dprintk(flags, fmt...) do { \
				47	if (debugflags & (flags)) printk(KERN_DEBUG fmt); \
				48	} while (0)
				49	#define DBG_IOCTL 0x0004
				50	#define DBG_INIT 0x0010
				51	#define DBG_EXIT 0x0020
				52	#define DBG_BLKDEV 0x0100
				53	#define DBG_RX 0x0200
				54	#define DBG_TX 0x0400
				55	static unsigned int debugflags;
				56	#endif /* NDEBUG */
				57
				58	static unsigned int nbds_max = 16;
				59	static struct nbd_device *nbd_dev;
				60	static int max_part;
				61
				62	/*
				63	* Use just one lock (or at most 1 per NIC). Two arguments for this:
				64	* 1. Each NIC is essentially a synchronization point for all servers
				65	* accessed through that NIC so there's no need to have more locks
				66	* than NICs anyway.
				67	* 2. More locks lead to more "Dirty cache line bouncing" which will slow
				68	* down each lock to the point where they're actually slower than just
				69	* a single lock.
				70	* Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
				71	*/
				72	static DEFINE_SPINLOCK(nbd_lock);
				73
				74	#ifndef NDEBUG
				75	static const char *ioctl_cmd_to_ascii(int cmd)
				76	{
				77	switch (cmd) {
				78	case NBD_SET_SOCK: return "set-sock";
				79	case NBD_SET_BLKSIZE: return "set-blksize";
				80	case NBD_SET_SIZE: return "set-size";
				81	case NBD_DO_IT: return "do-it";
				82	case NBD_CLEAR_SOCK: return "clear-sock";
				83	case NBD_CLEAR_QUE: return "clear-que";
				84	case NBD_PRINT_DEBUG: return "print-debug";
				85	case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
				86	case NBD_DISCONNECT: return "disconnect";
				87	case BLKROSET: return "set-read-only";
				88	case BLKFLSBUF: return "flush-buffer-cache";
				89	}
				90	return "unknown";
				91	}
				92
				93	static const char *nbdcmd_to_ascii(int cmd)
				94	{
				95	switch (cmd) {
				96	case NBD_CMD_READ: return "read";
				97	case NBD_CMD_WRITE: return "write";
				98	case NBD_CMD_DISC: return "disconnect";
				99	}
				100	return "invalid";
				101	}
				102	#endif /* NDEBUG */
				103
				104	static void nbd_end_request(struct request *req)
				105	{
				106	int error = req->errors ? -EIO : 0;
				107	struct request_queue *q = req->q;
				108	unsigned long flags;
				109
				110	dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
				111	req, error ? "failed" : "done");
				112
				113	spin_lock_irqsave(q->queue_lock, flags);
				114	__blk_end_request_all(req, error);
				115	spin_unlock_irqrestore(q->queue_lock, flags);
				116	}
				117
				118	static void sock_shutdown(struct nbd_device *nbd, int lock)
				119	{
				120	/* Forcibly shutdown the socket causing all listeners
				121	* to error
				122	*
				123	* FIXME: This code is duplicated from sys_shutdown, but
				124	* there should be a more generic interface rather than
				125	* calling socket ops directly here */
				126	if (lock)
				127	mutex_lock(&nbd->tx_lock);
				128	if (nbd->sock) {
				129	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
				130	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
				131	nbd->sock = NULL;
				132	}
				133	if (lock)
				134	mutex_unlock(&nbd->tx_lock);
				135	}
				136
				137	static void nbd_xmit_timeout(unsigned long arg)
				138	{
				139	struct task_struct task = (struct task_struct )arg;
				140
				141	printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
				142	task->comm, task->pid);
				143	force_sig(SIGKILL, task);
				144	}
				145
				146	/*
				147	* Send or receive packet.
				148	*/
				149	static int sock_xmit(struct nbd_device nbd, int send, void buf, int size,
				150	int msg_flags)
				151	{
				152	struct socket *sock = nbd->sock;
				153	int result;
				154	struct msghdr msg;
				155	struct kvec iov;
				156	sigset_t blocked, oldset;
				157
				158	if (unlikely(!sock)) {
				159	dev_err(disk_to_dev(nbd->disk),
				160	"Attempted %s on closed socket in sock_xmit\n",
				161	(send ? "send" : "recv"));
				162	return -EINVAL;
				163	}
				164
				165	/* Allow interception of SIGKILL only
				166	* Don't allow other signals to interrupt the transmission */
				167	siginitsetinv(&blocked, sigmask(SIGKILL));
				168	sigprocmask(SIG_SETMASK, &blocked, &oldset);
				169
				170	do {
				171	sock->sk->sk_allocation = GFP_NOIO;
				172	iov.iov_base = buf;
				173	iov.iov_len = size;
				174	msg.msg_name = NULL;
				175	msg.msg_namelen = 0;
				176	msg.msg_control = NULL;
				177	msg.msg_controllen = 0;
				178	msg.msg_flags = msg_flags \| MSG_NOSIGNAL;
				179
				180	if (send) {
				181	struct timer_list ti;
				182
				183	if (nbd->xmit_timeout) {
				184	init_timer(&ti);
				185	ti.function = nbd_xmit_timeout;
				186	ti.data = (unsigned long)current;
				187	ti.expires = jiffies + nbd->xmit_timeout;
				188	add_timer(&ti);
				189	}
				190	result = kernel_sendmsg(sock, &msg, &iov, 1, size);
				191	if (nbd->xmit_timeout)
				192	del_timer_sync(&ti);
				193	} else
				194	result = kernel_recvmsg(sock, &msg, &iov, 1, size,
				195	msg.msg_flags);
				196
				197	if (signal_pending(current)) {
				198	siginfo_t info;
				199	printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
				200	task_pid_nr(current), current->comm,
				201	dequeue_signal_lock(current, &current->blocked, &info));
				202	result = -EINTR;
				203	sock_shutdown(nbd, !send);
				204	break;
				205	}
				206
				207	if (result <= 0) {
				208	if (result == 0)
				209	result = -EPIPE; /* short read */
				210	break;
				211	}
				212	size -= result;
				213	buf += result;
				214	} while (size > 0);
				215
				216	sigprocmask(SIG_SETMASK, &oldset, NULL);
				217
				218	return result;
				219	}
				220
				221	static inline int sock_send_bvec(struct nbd_device nbd, struct bio_vec bvec,
				222	int flags)
				223	{
				224	int result;
				225	void *kaddr = kmap(bvec->bv_page);
				226	result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
				227	bvec->bv_len, flags);
				228	kunmap(bvec->bv_page);
				229	return result;
				230	}
				231
				232	/* always call with the tx_lock held */
				233	static int nbd_send_req(struct nbd_device nbd, struct request req)
				234	{
				235	int result, flags;
				236	struct nbd_request request;
				237	unsigned long size = blk_rq_bytes(req);
				238
				239	request.magic = htonl(NBD_REQUEST_MAGIC);
				240	request.type = htonl(nbd_cmd(req));
				241	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
				242	request.len = htonl(size);
				243	memcpy(request.handle, &req, sizeof(req));
				244
				245	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
				246	nbd->disk->disk_name, req,
				247	nbdcmd_to_ascii(nbd_cmd(req)),
				248	(unsigned long long)blk_rq_pos(req) << 9,
				249	blk_rq_bytes(req));
				250	result = sock_xmit(nbd, 1, &request, sizeof(request),
				251	(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
				252	if (result <= 0) {
				253	dev_err(disk_to_dev(nbd->disk),
				254	"Send control failed (result %d)\n", result);
				255	goto error_out;
				256	}
				257
				258	if (nbd_cmd(req) == NBD_CMD_WRITE) {
				259	struct req_iterator iter;
				260	struct bio_vec *bvec;
				261	/*
				262	* we are really probing at internals to determine
				263	* whether to set MSG_MORE or not...
				264	*/
				265	rq_for_each_segment(bvec, req, iter) {
				266	flags = 0;
				267	if (!rq_iter_last(req, iter))
				268	flags = MSG_MORE;
				269	dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
				270	nbd->disk->disk_name, req, bvec->bv_len);
				271	result = sock_send_bvec(nbd, bvec, flags);
				272	if (result <= 0) {
				273	dev_err(disk_to_dev(nbd->disk),
				274	"Send data failed (result %d)\n",
				275	result);
				276	goto error_out;
				277	}
				278	}
				279	}
				280	return 0;
				281
				282	error_out:
				283	return -EIO;
				284	}
				285
				286	static struct request nbd_find_request(struct nbd_device nbd,
				287	struct request *xreq)
				288	{
				289	struct request req, tmp;
				290	int err;
				291
				292	err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
				293	if (unlikely(err))
				294	goto out;
				295
				296	spin_lock(&nbd->queue_lock);
				297	list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
				298	if (req != xreq)
				299	continue;
				300	list_del_init(&req->queuelist);
				301	spin_unlock(&nbd->queue_lock);
				302	return req;
				303	}
				304	spin_unlock(&nbd->queue_lock);
				305
				306	err = -ENOENT;
				307
				308	out:
				309	return ERR_PTR(err);
				310	}
				311
				312	static inline int sock_recv_bvec(struct nbd_device nbd, struct bio_vec bvec)
				313	{
				314	int result;
				315	void *kaddr = kmap(bvec->bv_page);
				316	result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
				317	MSG_WAITALL);
				318	kunmap(bvec->bv_page);
				319	return result;
				320	}
				321
				322	/* NULL returned = something went wrong, inform userspace */
				323	static struct request nbd_read_stat(struct nbd_device nbd)
				324	{
				325	int result;
				326	struct nbd_reply reply;
				327	struct request *req;
				328
				329	reply.magic = 0;
				330	result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
				331	if (result <= 0) {
				332	dev_err(disk_to_dev(nbd->disk),
				333	"Receive control failed (result %d)\n", result);
				334	goto harderror;
				335	}
				336
				337	if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
				338	dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
				339	(unsigned long)ntohl(reply.magic));
				340	result = -EPROTO;
				341	goto harderror;
				342	}
				343
				344	req = nbd_find_request(nbd, (struct request *)reply.handle);
				345	if (IS_ERR(req)) {
				346	result = PTR_ERR(req);
				347	if (result != -ENOENT)
				348	goto harderror;
				349
				350	dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
				351	reply.handle);
				352	result = -EBADR;
				353	goto harderror;
				354	}
				355
				356	if (ntohl(reply.error)) {
				357	dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
				358	ntohl(reply.error));
				359	req->errors++;
				360	return req;
				361	}
				362
				363	dprintk(DBG_RX, "%s: request %p: got reply\n",
				364	nbd->disk->disk_name, req);
				365	if (nbd_cmd(req) == NBD_CMD_READ) {
				366	struct req_iterator iter;
				367	struct bio_vec *bvec;
				368
				369	rq_for_each_segment(bvec, req, iter) {
				370	result = sock_recv_bvec(nbd, bvec);
				371	if (result <= 0) {
				372	dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
				373	result);
				374	req->errors++;
				375	return req;
				376	}
				377	dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
				378	nbd->disk->disk_name, req, bvec->bv_len);
				379	}
				380	}
				381	return req;
				382	harderror:
				383	nbd->harderror = result;
				384	return NULL;
				385	}
				386
				387	static ssize_t pid_show(struct device *dev,
				388	struct device_attribute attr, char buf)
				389	{
				390	struct gendisk *disk = dev_to_disk(dev);
				391
				392	return sprintf(buf, "%ld\n",
				393	(long) ((struct nbd_device *)disk->private_data)->pid);
				394	}
				395
				396	static struct device_attribute pid_attr = {
				397	.attr = { .name = "pid", .mode = S_IRUGO},
				398	.show = pid_show,
				399	};
				400
				401	static int nbd_do_it(struct nbd_device *nbd)
				402	{
				403	struct request *req;
				404	int ret;
				405
				406	BUG_ON(nbd->magic != NBD_MAGIC);
				407
				408	nbd->pid = task_pid_nr(current);
				409	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
				410	if (ret) {
				411	dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
				412	nbd->pid = 0;
				413	return ret;
				414	}
				415
				416	while ((req = nbd_read_stat(nbd)) != NULL)
				417	nbd_end_request(req);
				418
				419	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
				420	nbd->pid = 0;
				421	return 0;
				422	}
				423
				424	static void nbd_clear_que(struct nbd_device *nbd)
				425	{
				426	struct request *req;
				427
				428	BUG_ON(nbd->magic != NBD_MAGIC);
				429
				430	/*
				431	* Because we have set nbd->sock to NULL under the tx_lock, all
				432	* modifications to the list must have completed by now. For
				433	* the same reason, the active_req must be NULL.
				434	*
				435	* As a consequence, we don't need to take the spin lock while
				436	* purging the list here.
				437	*/
				438	BUG_ON(nbd->sock);
				439	BUG_ON(nbd->active_req);
				440
				441	while (!list_empty(&nbd->queue_head)) {
				442	req = list_entry(nbd->queue_head.next, struct request,
				443	queuelist);
				444	list_del_init(&req->queuelist);
				445	req->errors++;
				446	nbd_end_request(req);
				447	}
				448
				449	while (!list_empty(&nbd->waiting_queue)) {
				450	req = list_entry(nbd->waiting_queue.next, struct request,
				451	queuelist);
				452	list_del_init(&req->queuelist);
				453	req->errors++;
				454	nbd_end_request(req);
				455	}
				456	}
				457
				458
				459	static void nbd_handle_req(struct nbd_device nbd, struct request req)
				460	{
				461	if (req->cmd_type != REQ_TYPE_FS)
				462	goto error_out;
				463
				464	nbd_cmd(req) = NBD_CMD_READ;
				465	if (rq_data_dir(req) == WRITE) {
				466	nbd_cmd(req) = NBD_CMD_WRITE;
				467	if (nbd->flags & NBD_READ_ONLY) {
				468	dev_err(disk_to_dev(nbd->disk),
				469	"Write on read-only\n");
				470	goto error_out;
				471	}
				472	}
				473
				474	req->errors = 0;
				475
				476	mutex_lock(&nbd->tx_lock);
				477	if (unlikely(!nbd->sock)) {
				478	mutex_unlock(&nbd->tx_lock);
				479	dev_err(disk_to_dev(nbd->disk),
				480	"Attempted send on closed socket\n");
				481	goto error_out;
				482	}
				483
				484	nbd->active_req = req;
				485
				486	if (nbd_send_req(nbd, req) != 0) {
				487	dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
				488	req->errors++;
				489	nbd_end_request(req);
				490	} else {
				491	spin_lock(&nbd->queue_lock);
				492	list_add(&req->queuelist, &nbd->queue_head);
				493	spin_unlock(&nbd->queue_lock);
				494	}
				495
				496	nbd->active_req = NULL;
				497	mutex_unlock(&nbd->tx_lock);
				498	wake_up_all(&nbd->active_wq);
				499
				500	return;
				501
				502	error_out:
				503	req->errors++;
				504	nbd_end_request(req);
				505	}
				506
				507	static int nbd_thread(void *data)
				508	{
				509	struct nbd_device *nbd = data;
				510	struct request *req;
				511
				512	set_user_nice(current, -20);
				513	while (!kthread_should_stop() \|\| !list_empty(&nbd->waiting_queue)) {
				514	/* wait for something to do */
				515	wait_event_interruptible(nbd->waiting_wq,
				516	kthread_should_stop() \|\|
				517	!list_empty(&nbd->waiting_queue));
				518
				519	/* extract request */
				520	if (list_empty(&nbd->waiting_queue))
				521	continue;
				522
				523	spin_lock_irq(&nbd->queue_lock);
				524	req = list_entry(nbd->waiting_queue.next, struct request,
				525	queuelist);
				526	list_del_init(&req->queuelist);
				527	spin_unlock_irq(&nbd->queue_lock);
				528
				529	/* handle request */
				530	nbd_handle_req(nbd, req);
				531	}
				532	return 0;
				533	}
				534
				535	/*
				536	* We always wait for result of write, for now. It would be nice to make it optional
				537	* in future
				538	* if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
				539	* { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
				540	*/
				541
				542	static void do_nbd_request(struct request_queue *q)
				543	{
				544	struct request *req;
				545
				546	while ((req = blk_fetch_request(q)) != NULL) {
				547	struct nbd_device *nbd;
				548
				549	spin_unlock_irq(q->queue_lock);
				550
				551	dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
				552	req->rq_disk->disk_name, req, req->cmd_type);
				553
				554	nbd = req->rq_disk->private_data;
				555
				556	BUG_ON(nbd->magic != NBD_MAGIC);
				557
				558	if (unlikely(!nbd->sock)) {
				559	dev_err(disk_to_dev(nbd->disk),
				560	"Attempted send on closed socket\n");
				561	req->errors++;
				562	nbd_end_request(req);
				563	spin_lock_irq(q->queue_lock);
				564	continue;
				565	}
				566
				567	spin_lock_irq(&nbd->queue_lock);
				568	list_add_tail(&req->queuelist, &nbd->waiting_queue);
				569	spin_unlock_irq(&nbd->queue_lock);
				570
				571	wake_up(&nbd->waiting_wq);
				572
				573	spin_lock_irq(q->queue_lock);
				574	}
				575	}
				576
				577	/* Must be called with tx_lock held */
				578
				579	static int __nbd_ioctl(struct block_device bdev, struct nbd_device nbd,
				580	unsigned int cmd, unsigned long arg)
				581	{
				582	switch (cmd) {
				583	case NBD_DISCONNECT: {
				584	struct request sreq;
				585
				586	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
				587	if (!nbd->sock)
				588	return -EINVAL;
				589
				590	mutex_unlock(&nbd->tx_lock);
				591	fsync_bdev(bdev);
				592	mutex_lock(&nbd->tx_lock);
				593	blk_rq_init(NULL, &sreq);
				594	sreq.cmd_type = REQ_TYPE_SPECIAL;
				595	nbd_cmd(&sreq) = NBD_CMD_DISC;
				596
				597	/* Check again after getting mutex back. */
				598	if (!nbd->sock)
				599	return -EINVAL;
				600
				601	nbd->disconnect = 1;
				602
				603	nbd_send_req(nbd, &sreq);
				604	return 0;
				605	}
				606
				607	case NBD_CLEAR_SOCK: {
				608	struct file *file;
				609
				610	nbd->sock = NULL;
				611	file = nbd->file;
				612	nbd->file = NULL;
				613	nbd_clear_que(nbd);
				614	BUG_ON(!list_empty(&nbd->queue_head));
				615	BUG_ON(!list_empty(&nbd->waiting_queue));
				616	kill_bdev(bdev);
				617	if (file)
				618	fput(file);
				619	return 0;
				620	}
				621
				622	case NBD_SET_SOCK: {
				623	struct file *file;
				624	if (nbd->file)
				625	return -EBUSY;
				626	file = fget(arg);
				627	if (file) {
				628	struct inode *inode = file->f_path.dentry->d_inode;
				629	if (S_ISSOCK(inode->i_mode)) {
				630	nbd->file = file;
				631	nbd->sock = SOCKET_I(inode);
				632	if (max_part > 0)
				633	bdev->bd_invalidated = 1;
				634	nbd->disconnect = 0; /* we're connected now */
				635	return 0;
				636	} else {
				637	fput(file);
				638	}
				639	}
				640	return -EINVAL;
				641	}
				642
				643	case NBD_SET_BLKSIZE:
				644	nbd->blksize = arg;
				645	nbd->bytesize &= ~(nbd->blksize-1);
				646	bdev->bd_inode->i_size = nbd->bytesize;
				647	set_blocksize(bdev, nbd->blksize);
				648	set_capacity(nbd->disk, nbd->bytesize >> 9);
				649	return 0;
				650
				651	case NBD_SET_SIZE:
				652	nbd->bytesize = arg & ~(nbd->blksize-1);
				653	bdev->bd_inode->i_size = nbd->bytesize;
				654	set_blocksize(bdev, nbd->blksize);
				655	set_capacity(nbd->disk, nbd->bytesize >> 9);
				656	return 0;
				657
				658	case NBD_SET_TIMEOUT:
				659	nbd->xmit_timeout = arg * HZ;
				660	return 0;
				661
				662	case NBD_SET_SIZE_BLOCKS:
				663	nbd->bytesize = ((u64) arg) * nbd->blksize;
				664	bdev->bd_inode->i_size = nbd->bytesize;
				665	set_blocksize(bdev, nbd->blksize);
				666	set_capacity(nbd->disk, nbd->bytesize >> 9);
				667	return 0;
				668
				669	case NBD_DO_IT: {
				670	struct task_struct *thread;
				671	struct file *file;
				672	int error;
				673
				674	if (nbd->pid)
				675	return -EBUSY;
				676	if (!nbd->file)
				677	return -EINVAL;
				678
				679	mutex_unlock(&nbd->tx_lock);
				680
				681	thread = kthread_create(nbd_thread, nbd, "%s",
				682	nbd->disk->disk_name);
				683	if (IS_ERR(thread)) {
				684	mutex_lock(&nbd->tx_lock);
				685	return PTR_ERR(thread);
				686	}
				687	wake_up_process(thread);
				688	error = nbd_do_it(nbd);
				689	kthread_stop(thread);
				690
				691	mutex_lock(&nbd->tx_lock);
				692	if (error)
				693	return error;
				694	sock_shutdown(nbd, 0);
				695	file = nbd->file;
				696	nbd->file = NULL;
				697	nbd_clear_que(nbd);
				698	dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
				699	kill_bdev(bdev);
				700	if (file)
				701	fput(file);
				702	nbd->bytesize = 0;
				703	bdev->bd_inode->i_size = 0;
				704	set_capacity(nbd->disk, 0);
				705	if (max_part > 0)
				706	ioctl_by_bdev(bdev, BLKRRPART, 0);
				707	if (nbd->disconnect) /* user requested, ignore socket errors */
				708	return 0;
				709	return nbd->harderror;
				710	}
				711
				712	case NBD_CLEAR_QUE:
				713	/*
				714	* This is for compatibility only. The queue is always cleared
				715	* by NBD_DO_IT or NBD_CLEAR_SOCK.
				716	*/
				717	BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
				718	return 0;
				719
				720	case NBD_PRINT_DEBUG:
				721	dev_info(disk_to_dev(nbd->disk),
				722	"next = %p, prev = %p, head = %p\n",
				723	nbd->queue_head.next, nbd->queue_head.prev,
				724	&nbd->queue_head);
				725	return 0;
				726	}
				727	return -ENOTTY;
				728	}
				729
				730	static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
				731	unsigned int cmd, unsigned long arg)
				732	{
				733	struct nbd_device *nbd = bdev->bd_disk->private_data;
				734	int error;
				735
				736	if (!capable(CAP_SYS_ADMIN))
				737	return -EPERM;
				738
				739	BUG_ON(nbd->magic != NBD_MAGIC);
				740
				741	/* Anyone capable of this syscall can do real bad things */
				742	dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
				743	nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
				744
				745	mutex_lock(&nbd->tx_lock);
				746	error = __nbd_ioctl(bdev, nbd, cmd, arg);
				747	mutex_unlock(&nbd->tx_lock);
				748
				749	return error;
				750	}
				751
				752	static const struct block_device_operations nbd_fops =
				753	{
				754	.owner = THIS_MODULE,
				755	.ioctl = nbd_ioctl,
				756	};
				757
				758	/*
				759	* And here should be modules and kernel interface
				760	* (Just smiley confuses emacs :-)
				761	*/
				762
				763	static int __init nbd_init(void)
				764	{
				765	int err = -ENOMEM;
				766	int i;
				767	int part_shift;
				768
				769	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
				770
				771	if (max_part < 0) {
				772	printk(KERN_ERR "nbd: max_part must be >= 0\n");
				773	return -EINVAL;
				774	}
				775
				776	part_shift = 0;
				777	if (max_part > 0) {
				778	part_shift = fls(max_part);
				779
				780	/*
				781	* Adjust max_part according to part_shift as it is exported
				782	* to user space so that user can know the max number of
				783	* partition kernel should be able to manage.
				784	*
				785	* Note that -1 is required because partition 0 is reserved
				786	* for the whole disk.
				787	*/
				788	max_part = (1UL << part_shift) - 1;
				789	}
				790
				791	if ((1UL << part_shift) > DISK_MAX_PARTS)
				792	return -EINVAL;
				793
				794	if (nbds_max > 1UL << (MINORBITS - part_shift))
				795	return -EINVAL;
				796
				797	nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
				798	if (!nbd_dev)
				799	return -ENOMEM;
				800
				801	for (i = 0; i < nbds_max; i++) {
				802	struct gendisk *disk = alloc_disk(1 << part_shift);
				803	if (!disk)
				804	goto out;
				805	nbd_dev[i].disk = disk;
				806	/*
				807	* The new linux 2.5 block layer implementation requires
				808	* every gendisk to have its very own request_queue struct.
				809	* These structs are big so we dynamically allocate them.
				810	*/
				811	disk->queue = blk_init_queue(do_nbd_request, &nbd_lock);
				812	if (!disk->queue) {
				813	put_disk(disk);
				814	goto out;
				815	}
				816	/*
				817	* Tell the block layer that we are not a rotational device
				818	*/
				819	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
				820	}
				821
				822	if (register_blkdev(NBD_MAJOR, "nbd")) {
				823	err = -EIO;
				824	goto out;
				825	}
				826
				827	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
				828	dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags);
				829
				830	for (i = 0; i < nbds_max; i++) {
				831	struct gendisk *disk = nbd_dev[i].disk;
				832	nbd_dev[i].file = NULL;
				833	nbd_dev[i].magic = NBD_MAGIC;
				834	nbd_dev[i].flags = 0;
				835	INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
				836	spin_lock_init(&nbd_dev[i].queue_lock);
				837	INIT_LIST_HEAD(&nbd_dev[i].queue_head);
				838	mutex_init(&nbd_dev[i].tx_lock);
				839	init_waitqueue_head(&nbd_dev[i].active_wq);
				840	init_waitqueue_head(&nbd_dev[i].waiting_wq);
				841	nbd_dev[i].blksize = 1024;
				842	nbd_dev[i].bytesize = 0;
				843	disk->major = NBD_MAJOR;
				844	disk->first_minor = i << part_shift;
				845	disk->fops = &nbd_fops;
				846	disk->private_data = &nbd_dev[i];
				847	sprintf(disk->disk_name, "nbd%d", i);
				848	set_capacity(disk, 0);
				849	add_disk(disk);
				850	}
				851
				852	return 0;
				853	out:
				854	while (i--) {
				855	blk_cleanup_queue(nbd_dev[i].disk->queue);
				856	put_disk(nbd_dev[i].disk);
				857	}
				858	kfree(nbd_dev);
				859	return err;
				860	}
				861
				862	static void __exit nbd_cleanup(void)
				863	{
				864	int i;
				865	for (i = 0; i < nbds_max; i++) {
				866	struct gendisk *disk = nbd_dev[i].disk;
				867	nbd_dev[i].magic = 0;
				868	if (disk) {
				869	del_gendisk(disk);
				870	blk_cleanup_queue(disk->queue);
				871	put_disk(disk);
				872	}
				873	}
				874	unregister_blkdev(NBD_MAJOR, "nbd");
				875	kfree(nbd_dev);
				876	printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
				877	}
				878
				879	module_init(nbd_init);
				880	module_exit(nbd_cleanup);
				881
				882	MODULE_DESCRIPTION("Network Block Device");
				883	MODULE_LICENSE("GPL");
				884
				885	module_param(nbds_max, int, 0444);
				886	MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
				887	module_param(max_part, int, 0444);
				888	MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
				889	#ifndef NDEBUG
				890	module_param(debugflags, int, 0644);
				891	MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
				892	#endif