Blame - ap/os/linux/linux-3.4.x/net/unix/af_unix.c - R306

blob: 2ff802c0219cfda9d429cb2503c43c27dcff88dc [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* NET4: Implementation of BSD Unix domain sockets.
				3	*
				4	* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	* Fixes:
				12	* Linus Torvalds : Assorted bug cures.
				13	* Niibe Yutaka : async I/O support.
				14	* Carsten Paeth : PF_UNIX check, address fixes.
				15	* Alan Cox : Limit size of allocated blocks.
				16	* Alan Cox : Fixed the stupid socketpair bug.
				17	* Alan Cox : BSD compatibility fine tuning.
				18	* Alan Cox : Fixed a bug in connect when interrupted.
				19	* Alan Cox : Sorted out a proper draft version of
				20	* file descriptor passing hacked up from
				21	* Mike Shaver's work.
				22	* Marty Leisner : Fixes to fd passing
				23	* Nick Nevin : recvmsg bugfix.
				24	* Alan Cox : Started proper garbage collector
				25	* Heiko EiBfeldt : Missing verify_area check
				26	* Alan Cox : Started POSIXisms
				27	* Andreas Schwab : Replace inode by dentry for proper
				28	* reference counting
				29	* Kirk Petersen : Made this a module
				30	* Christoph Rohland : Elegant non-blocking accept/connect algorithm.
				31	* Lots of bug fixes.
				32	* Alexey Kuznetosv : Repaired (I hope) bugs introduces
				33	* by above two patches.
				34	* Andrea Arcangeli : If possible we block in connect(2)
				35	* if the max backlog of the listen socket
				36	* is been reached. This won't break
				37	* old apps and it will avoid huge amount
				38	* of socks hashed (this for unix_gc()
				39	* performances reasons).
				40	* Security fix that limits the max
				41	* number of socks to 2*max_files and
				42	* the number of skb queueable in the
				43	* dgram receiver.
				44	* Artur Skawina : Hash function optimizations
				45	* Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
				46	* Malcolm Beattie : Set peercred for socketpair
				47	* Michal Ostrowski : Module initialization cleanup.
				48	* Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
				49	* the core infrastructure is doing that
				50	* for all net proto families now (2.5.69+)
				51	*
				52	*
				53	* Known differences from reference BSD that was tested:
				54	*
				55	* [TO FIX]
				56	* ECONNREFUSED is not returned from one end of a connected() socket to the
				57	* other the moment one end closes.
				58	* fstat() doesn't return st_dev=0, and give the blksize as high water mark
				59	* and a fake inode identifier (nor the BSD first socket fstat twice bug).
				60	* [NOT TO FIX]
				61	* accept() returns a path name even if the connecting socket has closed
				62	* in the meantime (BSD loses the path and gives up).
				63	* accept() returns 0 length path for an unbound connector. BSD returns 16
				64	* and a null first byte in the path (but not for gethost/peername - BSD bug ??)
				65	* socketpair(...SOCK_RAW..) doesn't panic the kernel.
				66	* BSD af_unix apparently has connect forgetting to block properly.
				67	* (need to check this with the POSIX spec in detail)
				68	*
				69	* Differences from 2.0.0-11-... (ANK)
				70	* Bug fixes and improvements.
				71	* - client shutdown killed server socket.
				72	* - removed all useless cli/sti pairs.
				73	*
				74	* Semantic changes/extensions.
				75	* - generic control message passing.
				76	* - SCM_CREDENTIALS control message.
				77	* - "Abstract" (not FS based) socket bindings.
				78	* Abstract names are sequences of bytes (not zero terminated)
				79	* started by 0, so that this name space does not intersect
				80	* with BSD names.
				81	*/
				82
				83	#include <linux/module.h>
				84	#include <linux/kernel.h>
				85	#include <linux/signal.h>
				86	#include <linux/sched.h>
				87	#include <linux/errno.h>
				88	#include <linux/string.h>
				89	#include <linux/stat.h>
				90	#include <linux/dcache.h>
				91	#include <linux/namei.h>
				92	#include <linux/socket.h>
				93	#include <linux/un.h>
				94	#include <linux/fcntl.h>
				95	#include <linux/termios.h>
				96	#include <linux/sockios.h>
				97	#include <linux/net.h>
				98	#include <linux/in.h>
				99	#include <linux/fs.h>
				100	#include <linux/slab.h>
				101	#include <asm/uaccess.h>
				102	#include <linux/skbuff.h>
				103	#include <linux/netdevice.h>
				104	#include <net/net_namespace.h>
				105	#include <net/sock.h>
				106	#include <net/tcp_states.h>
				107	#include <net/af_unix.h>
				108	#include <linux/proc_fs.h>
				109	#include <linux/seq_file.h>
				110	#include <net/scm.h>
				111	#include <linux/init.h>
				112	#include <linux/poll.h>
				113	#include <linux/rtnetlink.h>
				114	#include <linux/mount.h>
				115	#include <net/checksum.h>
				116	#include <linux/security.h>
				117
				118	struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
				119	EXPORT_SYMBOL_GPL(unix_socket_table);
				120	DEFINE_SPINLOCK(unix_table_lock);
				121	EXPORT_SYMBOL_GPL(unix_table_lock);
				122	static atomic_long_t unix_nr_socks;
				123
				124	#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
				125
				126	#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
				127
				128	#ifdef CONFIG_SECURITY_NETWORK
				129	static void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				130	{
				131	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
				132	}
				133
				134	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				135	{
				136	scm->secid = *UNIXSID(skb);
				137	}
				138	#else
				139	static inline void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				140	{ }
				141
				142	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				143	{ }
				144	#endif /* CONFIG_SECURITY_NETWORK */
				145
				146	/*
				147	* SMP locking strategy:
				148	* hash table is protected with spinlock unix_table_lock
				149	* each socket state is protected by separate spin lock.
				150	*/
				151
				152	static inline unsigned unix_hash_fold(__wsum n)
				153	{
				154	unsigned hash = (__force unsigned)n;
				155	hash ^= hash>>16;
				156	hash ^= hash>>8;
				157	return hash&(UNIX_HASH_SIZE-1);
				158	}
				159
				160	#define unix_peer(sk) (unix_sk(sk)->peer)
				161
				162	static inline int unix_our_peer(struct sock sk, struct sock osk)
				163	{
				164	return unix_peer(osk) == sk;
				165	}
				166
				167	static inline int unix_may_send(struct sock sk, struct sock osk)
				168	{
				169	return unix_peer(osk) == NULL \|\| unix_our_peer(sk, osk);
				170	}
				171
				172	static inline int unix_recvq_full(struct sock const *sk)
				173	{
				174	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
				175	}
				176
				177	struct sock unix_peer_get(struct sock s)
				178	{
				179	struct sock *peer;
				180
				181	unix_state_lock(s);
				182	peer = unix_peer(s);
				183	if (peer)
				184	sock_hold(peer);
				185	unix_state_unlock(s);
				186	return peer;
				187	}
				188	EXPORT_SYMBOL_GPL(unix_peer_get);
				189
				190	static inline void unix_release_addr(struct unix_address *addr)
				191	{
				192	if (atomic_dec_and_test(&addr->refcnt))
				193	kfree(addr);
				194	}
				195
				196	/*
				197	* Check unix socket name:
				198	* - should be not zero length.
				199	* - if started by not zero, should be NULL terminated (FS object)
				200	* - if started by zero, it is abstract name.
				201	*/
				202
				203	static int unix_mkname(struct sockaddr_un sunaddr, int len, unsigned hashp)
				204	{
				205	if (len <= sizeof(short) \|\| len > sizeof(*sunaddr))
				206	return -EINVAL;
				207	if (!sunaddr \|\| sunaddr->sun_family != AF_UNIX)
				208	return -EINVAL;
				209	if (sunaddr->sun_path[0]) {
				210	/*
				211	* This may look like an off by one error but it is a bit more
				212	* subtle. 108 is the longest valid AF_UNIX path for a binding.
				213	* sun_path[108] doesn't as such exist. However in kernel space
				214	* we are guaranteed that it is a valid memory location in our
				215	* kernel address buffer.
				216	*/
				217	((char *)sunaddr)[len] = 0;
				218	len = strlen(sunaddr->sun_path)+1+sizeof(short);
				219	return len;
				220	}
				221
				222	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
				223	return len;
				224	}
				225
				226	static void __unix_remove_socket(struct sock *sk)
				227	{
				228	sk_del_node_init(sk);
				229	}
				230
				231	static void __unix_insert_socket(struct hlist_head list, struct sock sk)
				232	{
				233	WARN_ON(!sk_unhashed(sk));
				234	sk_add_node(sk, list);
				235	}
				236
				237	static inline void unix_remove_socket(struct sock *sk)
				238	{
				239	spin_lock(&unix_table_lock);
				240	__unix_remove_socket(sk);
				241	spin_unlock(&unix_table_lock);
				242	}
				243
				244	static inline void unix_insert_socket(struct hlist_head list, struct sock sk)
				245	{
				246	spin_lock(&unix_table_lock);
				247	__unix_insert_socket(list, sk);
				248	spin_unlock(&unix_table_lock);
				249	}
				250
				251	static struct sock __unix_find_socket_byname(struct net net,
				252	struct sockaddr_un *sunname,
				253	int len, int type, unsigned hash)
				254	{
				255	struct sock *s;
				256	struct hlist_node *node;
				257
				258	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
				259	struct unix_sock *u = unix_sk(s);
				260
				261	if (!net_eq(sock_net(s), net))
				262	continue;
				263
				264	if (u->addr->len == len &&
				265	!memcmp(u->addr->name, sunname, len))
				266	goto found;
				267	}
				268	s = NULL;
				269	found:
				270	return s;
				271	}
				272
				273	static inline struct sock unix_find_socket_byname(struct net net,
				274	struct sockaddr_un *sunname,
				275	int len, int type,
				276	unsigned hash)
				277	{
				278	struct sock *s;
				279
				280	spin_lock(&unix_table_lock);
				281	s = __unix_find_socket_byname(net, sunname, len, type, hash);
				282	if (s)
				283	sock_hold(s);
				284	spin_unlock(&unix_table_lock);
				285	return s;
				286	}
				287
				288	static struct sock unix_find_socket_byinode(struct inode i)
				289	{
				290	struct sock *s;
				291	struct hlist_node *node;
				292
				293	spin_lock(&unix_table_lock);
				294	sk_for_each(s, node,
				295	&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
				296	struct dentry *dentry = unix_sk(s)->path.dentry;
				297
				298	if (dentry && dentry->d_inode == i) {
				299	sock_hold(s);
				300	goto found;
				301	}
				302	}
				303	s = NULL;
				304	found:
				305	spin_unlock(&unix_table_lock);
				306	return s;
				307	}
				308
				309	static inline int unix_writable(struct sock *sk)
				310	{
				311	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
				312	}
				313
				314	static void unix_write_space(struct sock *sk)
				315	{
				316	struct socket_wq *wq;
				317
				318	rcu_read_lock();
				319	if (unix_writable(sk)) {
				320	wq = rcu_dereference(sk->sk_wq);
				321	if (wq_has_sleeper(wq))
				322	wake_up_interruptible_sync_poll(&wq->wait,
				323	POLLOUT \| POLLWRNORM \| POLLWRBAND);
				324	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
				325	}
				326	rcu_read_unlock();
				327	}
				328
				329	/* When dgram socket disconnects (or changes its peer), we clear its receive
				330	* queue of packets arrived from previous peer. First, it allows to do
				331	* flow control based only on wmem_alloc; second, sk connected to peer
				332	* may receive messages only from that peer. */
				333	static void unix_dgram_disconnected(struct sock sk, struct sock other)
				334	{
				335	if (!skb_queue_empty(&sk->sk_receive_queue)) {
				336	skb_queue_purge(&sk->sk_receive_queue);
				337	wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
				338
				339	/* If one link of bidirectional dgram pipe is disconnected,
				340	* we signal error. Messages are lost. Do not make this,
				341	* when peer was not connected to us.
				342	*/
				343	if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
				344	other->sk_err = ECONNRESET;
				345	other->sk_error_report(other);
				346	}
				347	}
				348	}
				349
				350	static void unix_sock_destructor(struct sock *sk)
				351	{
				352	struct unix_sock *u = unix_sk(sk);
				353
				354	skb_queue_purge(&sk->sk_receive_queue);
				355
				356	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
				357	WARN_ON(!sk_unhashed(sk));
				358	WARN_ON(sk->sk_socket);
				359	if (!sock_flag(sk, SOCK_DEAD)) {
				360	printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
				361	return;
				362	}
				363
				364	if (u->addr)
				365	unix_release_addr(u->addr);
				366
				367	atomic_long_dec(&unix_nr_socks);
				368	local_bh_disable();
				369	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				370	local_bh_enable();
				371	#ifdef UNIX_REFCNT_DEBUG
				372	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
				373	atomic_long_read(&unix_nr_socks));
				374	#endif
				375	}
				376
				377	static void unix_release_sock(struct sock *sk, int embrion)
				378	{
				379	struct unix_sock *u = unix_sk(sk);
				380	struct path path;
				381	struct sock *skpair;
				382	struct sk_buff *skb;
				383	int state;
				384
				385	unix_remove_socket(sk);
				386
				387	/* Clear state */
				388	unix_state_lock(sk);
				389	sock_orphan(sk);
				390	sk->sk_shutdown = SHUTDOWN_MASK;
				391	path = u->path;
				392	u->path.dentry = NULL;
				393	u->path.mnt = NULL;
				394	state = sk->sk_state;
				395	sk->sk_state = TCP_CLOSE;
				396	unix_state_unlock(sk);
				397
				398	wake_up_interruptible_all(&u->peer_wait);
				399
				400	skpair = unix_peer(sk);
				401
				402	if (skpair != NULL) {
				403	if (sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) {
				404	unix_state_lock(skpair);
				405	/* No more writes */
				406	skpair->sk_shutdown = SHUTDOWN_MASK;
				407	if (!skb_queue_empty(&sk->sk_receive_queue) \|\| embrion)
				408	skpair->sk_err = ECONNRESET;
				409	unix_state_unlock(skpair);
				410	skpair->sk_state_change(skpair);
				411	sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
				412	}
				413	sock_put(skpair); /* It may now die */
				414	unix_peer(sk) = NULL;
				415	}
				416
				417	/* Try to flush out this socket. Throw out buffers at least */
				418
				419	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
				420	if (state == TCP_LISTEN)
				421	unix_release_sock(skb->sk, 1);
				422	/* passed fds are erased in the kfree_skb hook */
				423	kfree_skb(skb);
				424	}
				425
				426	if (path.dentry)
				427	path_put(&path);
				428
				429	sock_put(sk);
				430
				431	/* ---- Socket is dead now and most probably destroyed ---- */
				432
				433	/*
				434	* Fixme: BSD difference: In BSD all sockets connected to use get
				435	* ECONNRESET and we die on the spot. In Linux we behave
				436	* like files and pipes do and wait for the last
				437	* dereference.
				438	*
				439	* Can't we simply set sock->err?
				440	*
				441	* What the above comment does talk about? --ANK(980817)
				442	*/
				443
				444	if (unix_tot_inflight)
				445	unix_gc(); /* Garbage collect fds */
				446	}
				447
				448	static void init_peercred(struct sock *sk)
				449	{
				450	put_pid(sk->sk_peer_pid);
				451	if (sk->sk_peer_cred)
				452	put_cred(sk->sk_peer_cred);
				453	sk->sk_peer_pid = get_pid(task_tgid(current));
				454	sk->sk_peer_cred = get_current_cred();
				455	}
				456
				457	static void copy_peercred(struct sock sk, struct sock peersk)
				458	{
				459	put_pid(sk->sk_peer_pid);
				460	if (sk->sk_peer_cred)
				461	put_cred(sk->sk_peer_cred);
				462	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
				463	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
				464	}
				465
				466	static int unix_listen(struct socket *sock, int backlog)
				467	{
				468	int err;
				469	struct sock *sk = sock->sk;
				470	struct unix_sock *u = unix_sk(sk);
				471	struct pid *old_pid = NULL;
				472	const struct cred *old_cred = NULL;
				473
				474	err = -EOPNOTSUPP;
				475	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				476	goto out; /* Only stream/seqpacket sockets accept */
				477	err = -EINVAL;
				478	if (!u->addr)
				479	goto out; /* No listens on an unbound socket */
				480	unix_state_lock(sk);
				481	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
				482	goto out_unlock;
				483	if (backlog > sk->sk_max_ack_backlog)
				484	wake_up_interruptible_all(&u->peer_wait);
				485	sk->sk_max_ack_backlog = backlog;
				486	sk->sk_state = TCP_LISTEN;
				487	/* set credentials so connect can copy them */
				488	init_peercred(sk);
				489	err = 0;
				490
				491	out_unlock:
				492	unix_state_unlock(sk);
				493	put_pid(old_pid);
				494	if (old_cred)
				495	put_cred(old_cred);
				496	out:
				497	return err;
				498	}
				499
				500	static int unix_release(struct socket *);
				501	static int unix_bind(struct socket , struct sockaddr , int);
				502	static int unix_stream_connect(struct socket , struct sockaddr ,
				503	int addr_len, int flags);
				504	static int unix_socketpair(struct socket , struct socket );
				505	static int unix_accept(struct socket , struct socket , int);
				506	static int unix_getname(struct socket , struct sockaddr , int *, int);
				507	static unsigned int unix_poll(struct file , struct socket , poll_table *);
				508	static unsigned int unix_dgram_poll(struct file , struct socket ,
				509	poll_table *);
				510	static int unix_ioctl(struct socket *, unsigned int, unsigned long);
				511	static int unix_shutdown(struct socket *, int);
				512	static int unix_stream_sendmsg(struct kiocb , struct socket ,
				513	struct msghdr *, size_t);
				514	static int unix_stream_recvmsg(struct kiocb , struct socket ,
				515	struct msghdr *, size_t, int);
				516	static int unix_dgram_sendmsg(struct kiocb , struct socket ,
				517	struct msghdr *, size_t);
				518	static int unix_dgram_recvmsg(struct kiocb , struct socket ,
				519	struct msghdr *, size_t, int);
				520	static int unix_dgram_connect(struct socket , struct sockaddr ,
				521	int, int);
				522	static int unix_seqpacket_sendmsg(struct kiocb , struct socket ,
				523	struct msghdr *, size_t);
				524	static int unix_seqpacket_recvmsg(struct kiocb , struct socket ,
				525	struct msghdr *, size_t, int);
				526
				527	static int unix_set_peek_off(struct sock *sk, int val)
				528	{
				529	struct unix_sock *u = unix_sk(sk);
				530
				531	if (mutex_lock_interruptible(&u->readlock))
				532	return -EINTR;
				533
				534	sk->sk_peek_off = val;
				535	mutex_unlock(&u->readlock);
				536
				537	return 0;
				538	}
				539
				540
				541	static const struct proto_ops unix_stream_ops = {
				542	.family = PF_UNIX,
				543	.owner = THIS_MODULE,
				544	.release = unix_release,
				545	.bind = unix_bind,
				546	.connect = unix_stream_connect,
				547	.socketpair = unix_socketpair,
				548	.accept = unix_accept,
				549	.getname = unix_getname,
				550	.poll = unix_poll,
				551	.ioctl = unix_ioctl,
				552	.listen = unix_listen,
				553	.shutdown = unix_shutdown,
				554	.setsockopt = sock_no_setsockopt,
				555	.getsockopt = sock_no_getsockopt,
				556	.sendmsg = unix_stream_sendmsg,
				557	.recvmsg = unix_stream_recvmsg,
				558	.mmap = sock_no_mmap,
				559	.sendpage = sock_no_sendpage,
				560	.set_peek_off = unix_set_peek_off,
				561	};
				562
				563	static const struct proto_ops unix_dgram_ops = {
				564	.family = PF_UNIX,
				565	.owner = THIS_MODULE,
				566	.release = unix_release,
				567	.bind = unix_bind,
				568	.connect = unix_dgram_connect,
				569	.socketpair = unix_socketpair,
				570	.accept = sock_no_accept,
				571	.getname = unix_getname,
				572	.poll = unix_dgram_poll,
				573	.ioctl = unix_ioctl,
				574	.listen = sock_no_listen,
				575	.shutdown = unix_shutdown,
				576	.setsockopt = sock_no_setsockopt,
				577	.getsockopt = sock_no_getsockopt,
				578	.sendmsg = unix_dgram_sendmsg,
				579	.recvmsg = unix_dgram_recvmsg,
				580	.mmap = sock_no_mmap,
				581	.sendpage = sock_no_sendpage,
				582	.set_peek_off = unix_set_peek_off,
				583	};
				584
				585	static const struct proto_ops unix_seqpacket_ops = {
				586	.family = PF_UNIX,
				587	.owner = THIS_MODULE,
				588	.release = unix_release,
				589	.bind = unix_bind,
				590	.connect = unix_stream_connect,
				591	.socketpair = unix_socketpair,
				592	.accept = unix_accept,
				593	.getname = unix_getname,
				594	.poll = unix_dgram_poll,
				595	.ioctl = unix_ioctl,
				596	.listen = unix_listen,
				597	.shutdown = unix_shutdown,
				598	.setsockopt = sock_no_setsockopt,
				599	.getsockopt = sock_no_getsockopt,
				600	.sendmsg = unix_seqpacket_sendmsg,
				601	.recvmsg = unix_seqpacket_recvmsg,
				602	.mmap = sock_no_mmap,
				603	.sendpage = sock_no_sendpage,
				604	.set_peek_off = unix_set_peek_off,
				605	};
				606
				607	static struct proto unix_proto = {
				608	.name = "UNIX",
				609	.owner = THIS_MODULE,
				610	.obj_size = sizeof(struct unix_sock),
				611	};
				612
				613	/*
				614	* AF_UNIX sockets do not interact with hardware, hence they
				615	* dont trigger interrupts - so it's safe for them to have
				616	* bh-unsafe locking for their sk_receive_queue.lock. Split off
				617	* this special lock-class by reinitializing the spinlock key:
				618	*/
				619	static struct lock_class_key af_unix_sk_receive_queue_lock_key;
				620
				621	static struct sock unix_create1(struct net net, struct socket *sock)
				622	{
				623	struct sock *sk = NULL;
				624	struct unix_sock *u;
				625
				626	atomic_long_inc(&unix_nr_socks);
				627	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
				628	goto out;
				629
				630	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
				631	if (!sk)
				632	goto out;
				633
				634	sock_init_data(sock, sk);
				635	lockdep_set_class(&sk->sk_receive_queue.lock,
				636	&af_unix_sk_receive_queue_lock_key);
				637
				638	sk->sk_write_space = unix_write_space;
				639	sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
				640	sk->sk_destruct = unix_sock_destructor;
				641	u = unix_sk(sk);
				642	u->path.dentry = NULL;
				643	u->path.mnt = NULL;
				644	spin_lock_init(&u->lock);
				645	atomic_long_set(&u->inflight, 0);
				646	INIT_LIST_HEAD(&u->link);
				647	mutex_init(&u->readlock); /* single task reading lock */
				648	init_waitqueue_head(&u->peer_wait);
				649	unix_insert_socket(unix_sockets_unbound, sk);
				650	out:
				651	if (sk == NULL)
				652	atomic_long_dec(&unix_nr_socks);
				653	else {
				654	local_bh_disable();
				655	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				656	local_bh_enable();
				657	}
				658	return sk;
				659	}
				660
				661	static int unix_create(struct net net, struct socket sock, int protocol,
				662	int kern)
				663	{
				664	if (protocol && protocol != PF_UNIX)
				665	return -EPROTONOSUPPORT;
				666
				667	sock->state = SS_UNCONNECTED;
				668
				669	switch (sock->type) {
				670	case SOCK_STREAM:
				671	sock->ops = &unix_stream_ops;
				672	break;
				673	/*
				674	* Believe it or not BSD has AF_UNIX, SOCK_RAW though
				675	* nothing uses it.
				676	*/
				677	case SOCK_RAW:
				678	sock->type = SOCK_DGRAM;
				679	case SOCK_DGRAM:
				680	sock->ops = &unix_dgram_ops;
				681	break;
				682	case SOCK_SEQPACKET:
				683	sock->ops = &unix_seqpacket_ops;
				684	break;
				685	default:
				686	return -ESOCKTNOSUPPORT;
				687	}
				688
				689	return unix_create1(net, sock) ? 0 : -ENOMEM;
				690	}
				691
				692	static int unix_release(struct socket *sock)
				693	{
				694	struct sock *sk = sock->sk;
				695
				696	if (!sk)
				697	return 0;
				698
				699	unix_release_sock(sk, 0);
				700	sock->sk = NULL;
				701
				702	return 0;
				703	}
				704
				705	static int unix_autobind(struct socket *sock)
				706	{
				707	struct sock *sk = sock->sk;
				708	struct net *net = sock_net(sk);
				709	struct unix_sock *u = unix_sk(sk);
				710	static u32 ordernum = 1;
				711	struct unix_address *addr;
				712	int err;
				713	unsigned int retries = 0;
				714
				715	err = mutex_lock_interruptible(&u->readlock);
				716	if (err)
				717	return err;
				718
				719	err = 0;
				720	if (u->addr)
				721	goto out;
				722
				723	err = -ENOMEM;
				724	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
				725	if (!addr)
				726	goto out;
				727
				728	addr->name->sun_family = AF_UNIX;
				729	atomic_set(&addr->refcnt, 1);
				730
				731	retry:
				732	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
				733	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
				734
				735	spin_lock(&unix_table_lock);
				736	ordernum = (ordernum+1)&0xFFFFF;
				737
				738	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
				739	addr->hash)) {
				740	spin_unlock(&unix_table_lock);
				741	/*
				742	* __unix_find_socket_byname() may take long time if many names
				743	* are already in use.
				744	*/
				745	cond_resched();
				746	/* Give up if all names seems to be in use. */
				747	if (retries++ == 0xFFFFF) {
				748	err = -ENOSPC;
				749	kfree(addr);
				750	goto out;
				751	}
				752	goto retry;
				753	}
				754	addr->hash ^= sk->sk_type;
				755
				756	__unix_remove_socket(sk);
				757	u->addr = addr;
				758	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
				759	spin_unlock(&unix_table_lock);
				760	err = 0;
				761
				762	out: mutex_unlock(&u->readlock);
				763	return err;
				764	}
				765
				766	static struct sock unix_find_other(struct net net,
				767	struct sockaddr_un *sunname, int len,
				768	int type, unsigned hash, int *error)
				769	{
				770	struct sock *u;
				771	struct path path;
				772	int err = 0;
				773
				774	if (sunname->sun_path[0]) {
				775	struct inode *inode;
				776	err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
				777	if (err)
				778	goto fail;
				779	inode = path.dentry->d_inode;
				780	err = inode_permission(inode, MAY_WRITE);
				781	if (err)
				782	goto put_fail;
				783
				784	err = -ECONNREFUSED;
				785	if (!S_ISSOCK(inode->i_mode))
				786	goto put_fail;
				787	u = unix_find_socket_byinode(inode);
				788	if (!u)
				789	goto put_fail;
				790
				791	if (u->sk_type == type)
				792	touch_atime(&path);
				793
				794	path_put(&path);
				795
				796	err = -EPROTOTYPE;
				797	if (u->sk_type != type) {
				798	sock_put(u);
				799	goto fail;
				800	}
				801	} else {
				802	err = -ECONNREFUSED;
				803	u = unix_find_socket_byname(net, sunname, len, type, hash);
				804	if (u) {
				805	struct dentry *dentry;
				806	dentry = unix_sk(u)->path.dentry;
				807	if (dentry)
				808	touch_atime(&unix_sk(u)->path);
				809	} else
				810	goto fail;
				811	}
				812	return u;
				813
				814	put_fail:
				815	path_put(&path);
				816	fail:
				817	*error = err;
				818	return NULL;
				819	}
				820
				821
				822	static int unix_bind(struct socket sock, struct sockaddr uaddr, int addr_len)
				823	{
				824	struct sock *sk = sock->sk;
				825	struct net *net = sock_net(sk);
				826	struct unix_sock *u = unix_sk(sk);
				827	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				828	char *sun_path = sunaddr->sun_path;
				829	struct dentry *dentry = NULL;
				830	struct path path;
				831	int err;
				832	unsigned hash;
				833	struct unix_address *addr;
				834	struct hlist_head *list;
				835
				836	err = -EINVAL;
				837	if (sunaddr->sun_family != AF_UNIX)
				838	goto out;
				839
				840	if (addr_len == sizeof(short)) {
				841	err = unix_autobind(sock);
				842	goto out;
				843	}
				844
				845	err = unix_mkname(sunaddr, addr_len, &hash);
				846	if (err < 0)
				847	goto out;
				848	addr_len = err;
				849
				850	err = mutex_lock_interruptible(&u->readlock);
				851	if (err)
				852	goto out;
				853
				854	err = -EINVAL;
				855	if (u->addr)
				856	goto out_up;
				857
				858	err = -ENOMEM;
				859	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
				860	if (!addr)
				861	goto out_up;
				862
				863	memcpy(addr->name, sunaddr, addr_len);
				864	addr->len = addr_len;
				865	addr->hash = hash ^ sk->sk_type;
				866	atomic_set(&addr->refcnt, 1);
				867
				868	if (sun_path[0]) {
				869	umode_t mode;
				870	err = 0;
				871	/*
				872	* Get the parent directory, calculate the hash for last
				873	* component.
				874	*/
				875	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
				876	err = PTR_ERR(dentry);
				877	if (IS_ERR(dentry))
				878	goto out_mknod_parent;
				879
				880	/*
				881	* All right, let's create it.
				882	*/
				883	mode = S_IFSOCK \|
				884	(SOCK_INODE(sock)->i_mode & ~current_umask());
				885	err = mnt_want_write(path.mnt);
				886	if (err)
				887	goto out_mknod_dput;
				888	err = security_path_mknod(&path, dentry, mode, 0);
				889	if (err)
				890	goto out_mknod_drop_write;
				891	err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
				892	out_mknod_drop_write:
				893	mnt_drop_write(path.mnt);
				894	if (err)
				895	goto out_mknod_dput;
				896	mutex_unlock(&path.dentry->d_inode->i_mutex);
				897	dput(path.dentry);
				898	path.dentry = dentry;
				899
				900	addr->hash = UNIX_HASH_SIZE;
				901	}
				902
				903	spin_lock(&unix_table_lock);
				904
				905	if (!sun_path[0]) {
				906	err = -EADDRINUSE;
				907	if (__unix_find_socket_byname(net, sunaddr, addr_len,
				908	sk->sk_type, hash)) {
				909	unix_release_addr(addr);
				910	goto out_unlock;
				911	}
				912
				913	list = &unix_socket_table[addr->hash];
				914	} else {
				915	list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
				916	u->path = path;
				917	}
				918
				919	err = 0;
				920	__unix_remove_socket(sk);
				921	u->addr = addr;
				922	__unix_insert_socket(list, sk);
				923
				924	out_unlock:
				925	spin_unlock(&unix_table_lock);
				926	out_up:
				927	mutex_unlock(&u->readlock);
				928	out:
				929	return err;
				930
				931	out_mknod_dput:
				932	dput(dentry);
				933	mutex_unlock(&path.dentry->d_inode->i_mutex);
				934	path_put(&path);
				935	out_mknod_parent:
				936	if (err == -EEXIST)
				937	err = -EADDRINUSE;
				938	unix_release_addr(addr);
				939	goto out_up;
				940	}
				941
				942	static void unix_state_double_lock(struct sock sk1, struct sock sk2)
				943	{
				944	if (unlikely(sk1 == sk2) \|\| !sk2) {
				945	unix_state_lock(sk1);
				946	return;
				947	}
				948	if (sk1 < sk2) {
				949	unix_state_lock(sk1);
				950	unix_state_lock_nested(sk2);
				951	} else {
				952	unix_state_lock(sk2);
				953	unix_state_lock_nested(sk1);
				954	}
				955	}
				956
				957	static void unix_state_double_unlock(struct sock sk1, struct sock sk2)
				958	{
				959	if (unlikely(sk1 == sk2) \|\| !sk2) {
				960	unix_state_unlock(sk1);
				961	return;
				962	}
				963	unix_state_unlock(sk1);
				964	unix_state_unlock(sk2);
				965	}
				966
				967	static int unix_dgram_connect(struct socket sock, struct sockaddr addr,
				968	int alen, int flags)
				969	{
				970	struct sock *sk = sock->sk;
				971	struct net *net = sock_net(sk);
				972	struct sockaddr_un sunaddr = (struct sockaddr_un )addr;
				973	struct sock *other;
				974	unsigned hash;
				975	int err;
				976
				977	if (addr->sa_family != AF_UNSPEC) {
				978	err = unix_mkname(sunaddr, alen, &hash);
				979	if (err < 0)
				980	goto out;
				981	alen = err;
				982
				983	if (test_bit(SOCK_PASSCRED, &sock->flags) &&
				984	!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
				985	goto out;
				986
				987	restart:
				988	other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
				989	if (!other)
				990	goto out;
				991
				992	unix_state_double_lock(sk, other);
				993
				994	/* Apparently VFS overslept socket death. Retry. */
				995	if (sock_flag(other, SOCK_DEAD)) {
				996	unix_state_double_unlock(sk, other);
				997	sock_put(other);
				998	goto restart;
				999	}
				1000
				1001	err = -EPERM;
				1002	if (!unix_may_send(sk, other))
				1003	goto out_unlock;
				1004
				1005	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1006	if (err)
				1007	goto out_unlock;
				1008
				1009	} else {
				1010	/*
				1011	* 1003.1g breaking connected state with AF_UNSPEC
				1012	*/
				1013	other = NULL;
				1014	unix_state_double_lock(sk, other);
				1015	}
				1016
				1017	/*
				1018	* If it was connected, reconnect.
				1019	*/
				1020	if (unix_peer(sk)) {
				1021	struct sock *old_peer = unix_peer(sk);
				1022	unix_peer(sk) = other;
				1023	unix_state_double_unlock(sk, other);
				1024
				1025	if (other != old_peer)
				1026	unix_dgram_disconnected(sk, old_peer);
				1027	sock_put(old_peer);
				1028	} else {
				1029	unix_peer(sk) = other;
				1030	unix_state_double_unlock(sk, other);
				1031	}
				1032	return 0;
				1033
				1034	out_unlock:
				1035	unix_state_double_unlock(sk, other);
				1036	sock_put(other);
				1037	out:
				1038	return err;
				1039	}
				1040
				1041	static long unix_wait_for_peer(struct sock *other, long timeo)
				1042	{
				1043	struct unix_sock *u = unix_sk(other);
				1044	int sched;
				1045	DEFINE_WAIT(wait);
				1046
				1047	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
				1048
				1049	sched = !sock_flag(other, SOCK_DEAD) &&
				1050	!(other->sk_shutdown & RCV_SHUTDOWN) &&
				1051	unix_recvq_full(other);
				1052
				1053	unix_state_unlock(other);
				1054
				1055	if (sched)
				1056	timeo = schedule_timeout(timeo);
				1057
				1058	finish_wait(&u->peer_wait, &wait);
				1059	return timeo;
				1060	}
				1061
				1062	static int unix_stream_connect(struct socket sock, struct sockaddr uaddr,
				1063	int addr_len, int flags)
				1064	{
				1065	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1066	struct sock *sk = sock->sk;
				1067	struct net *net = sock_net(sk);
				1068	struct unix_sock u = unix_sk(sk), newu, *otheru;
				1069	struct sock *newsk = NULL;
				1070	struct sock *other = NULL;
				1071	struct sk_buff *skb = NULL;
				1072	unsigned hash;
				1073	int st;
				1074	int err;
				1075	long timeo;
				1076
				1077	err = unix_mkname(sunaddr, addr_len, &hash);
				1078	if (err < 0)
				1079	goto out;
				1080	addr_len = err;
				1081
				1082	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
				1083	(err = unix_autobind(sock)) != 0)
				1084	goto out;
				1085
				1086	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
				1087
				1088	/* First of all allocate resources.
				1089	If we will make it after state is locked,
				1090	we will have to recheck all again in any case.
				1091	*/
				1092
				1093	err = -ENOMEM;
				1094
				1095	/* create new sock for complete connection */
				1096	newsk = unix_create1(sock_net(sk), NULL);
				1097	if (newsk == NULL)
				1098	goto out;
				1099
				1100	/* Allocate skb for sending to listening sock */
				1101	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
				1102	if (skb == NULL)
				1103	goto out;
				1104
				1105	restart:
				1106	/* Find listening sock. */
				1107	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
				1108	if (!other)
				1109	goto out;
				1110
				1111	/* Latch state of peer */
				1112	unix_state_lock(other);
				1113
				1114	/* Apparently VFS overslept socket death. Retry. */
				1115	if (sock_flag(other, SOCK_DEAD)) {
				1116	unix_state_unlock(other);
				1117	sock_put(other);
				1118	goto restart;
				1119	}
				1120
				1121	err = -ECONNREFUSED;
				1122	if (other->sk_state != TCP_LISTEN)
				1123	goto out_unlock;
				1124	if (other->sk_shutdown & RCV_SHUTDOWN)
				1125	goto out_unlock;
				1126
				1127	if (unix_recvq_full(other)) {
				1128	err = -EAGAIN;
				1129	if (!timeo)
				1130	goto out_unlock;
				1131
				1132	timeo = unix_wait_for_peer(other, timeo);
				1133
				1134	err = sock_intr_errno(timeo);
				1135	if (signal_pending(current))
				1136	goto out;
				1137	sock_put(other);
				1138	goto restart;
				1139	}
				1140
				1141	/* Latch our state.
				1142
				1143	It is tricky place. We need to grab our state lock and cannot
				1144	drop lock on peer. It is dangerous because deadlock is
				1145	possible. Connect to self case and simultaneous
				1146	attempt to connect are eliminated by checking socket
				1147	state. other is TCP_LISTEN, if sk is TCP_LISTEN we
				1148	check this before attempt to grab lock.
				1149
				1150	Well, and we have to recheck the state after socket locked.
				1151	*/
				1152	st = sk->sk_state;
				1153
				1154	switch (st) {
				1155	case TCP_CLOSE:
				1156	/* This is ok... continue with connect */
				1157	break;
				1158	case TCP_ESTABLISHED:
				1159	/* Socket is already connected */
				1160	err = -EISCONN;
				1161	goto out_unlock;
				1162	default:
				1163	err = -EINVAL;
				1164	goto out_unlock;
				1165	}
				1166
				1167	unix_state_lock_nested(sk);
				1168
				1169	if (sk->sk_state != st) {
				1170	unix_state_unlock(sk);
				1171	unix_state_unlock(other);
				1172	sock_put(other);
				1173	goto restart;
				1174	}
				1175
				1176	err = security_unix_stream_connect(sk, other, newsk);
				1177	if (err) {
				1178	unix_state_unlock(sk);
				1179	goto out_unlock;
				1180	}
				1181
				1182	/* The way is open! Fastly set all the necessary fields... */
				1183
				1184	sock_hold(sk);
				1185	unix_peer(newsk) = sk;
				1186	newsk->sk_state = TCP_ESTABLISHED;
				1187	newsk->sk_type = sk->sk_type;
				1188	init_peercred(newsk);
				1189	newu = unix_sk(newsk);
				1190	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
				1191	otheru = unix_sk(other);
				1192
				1193	/* copy address information from listening to new sock*/
				1194	if (otheru->addr) {
				1195	atomic_inc(&otheru->addr->refcnt);
				1196	newu->addr = otheru->addr;
				1197	}
				1198	if (otheru->path.dentry) {
				1199	path_get(&otheru->path);
				1200	newu->path = otheru->path;
				1201	}
				1202
				1203	/* Set credentials */
				1204	copy_peercred(sk, other);
				1205
				1206	sock->state = SS_CONNECTED;
				1207	sk->sk_state = TCP_ESTABLISHED;
				1208	sock_hold(newsk);
				1209
				1210	smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
				1211	unix_peer(sk) = newsk;
				1212
				1213	unix_state_unlock(sk);
				1214
				1215	/* take ten and and send info to listening sock */
				1216	spin_lock(&other->sk_receive_queue.lock);
				1217	__skb_queue_tail(&other->sk_receive_queue, skb);
				1218	spin_unlock(&other->sk_receive_queue.lock);
				1219	unix_state_unlock(other);
				1220	other->sk_data_ready(other, 0);
				1221	sock_put(other);
				1222	return 0;
				1223
				1224	out_unlock:
				1225	if (other)
				1226	unix_state_unlock(other);
				1227
				1228	out:
				1229	kfree_skb(skb);
				1230	if (newsk)
				1231	unix_release_sock(newsk, 0);
				1232	if (other)
				1233	sock_put(other);
				1234	return err;
				1235	}
				1236
				1237	static int unix_socketpair(struct socket socka, struct socket sockb)
				1238	{
				1239	struct sock ska = socka->sk, skb = sockb->sk;
				1240
				1241	/* Join our sockets back to back */
				1242	sock_hold(ska);
				1243	sock_hold(skb);
				1244	unix_peer(ska) = skb;
				1245	unix_peer(skb) = ska;
				1246	init_peercred(ska);
				1247	init_peercred(skb);
				1248
				1249	if (ska->sk_type != SOCK_DGRAM) {
				1250	ska->sk_state = TCP_ESTABLISHED;
				1251	skb->sk_state = TCP_ESTABLISHED;
				1252	socka->state = SS_CONNECTED;
				1253	sockb->state = SS_CONNECTED;
				1254	}
				1255	return 0;
				1256	}
				1257
				1258	static void unix_sock_inherit_flags(const struct socket *old,
				1259	struct socket *new)
				1260	{
				1261	if (test_bit(SOCK_PASSCRED, &old->flags))
				1262	set_bit(SOCK_PASSCRED, &new->flags);
				1263	if (test_bit(SOCK_PASSSEC, &old->flags))
				1264	set_bit(SOCK_PASSSEC, &new->flags);
				1265	}
				1266
				1267	static int unix_accept(struct socket sock, struct socket newsock, int flags)
				1268	{
				1269	struct sock *sk = sock->sk;
				1270	struct sock *tsk;
				1271	struct sk_buff *skb;
				1272	int err;
				1273
				1274	err = -EOPNOTSUPP;
				1275	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				1276	goto out;
				1277
				1278	err = -EINVAL;
				1279	if (sk->sk_state != TCP_LISTEN)
				1280	goto out;
				1281
				1282	/* If socket state is TCP_LISTEN it cannot change (for now...),
				1283	* so that no locks are necessary.
				1284	*/
				1285
				1286	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
				1287	if (!skb) {
				1288	/* This means receive shutdown. */
				1289	if (err == 0)
				1290	err = -EINVAL;
				1291	goto out;
				1292	}
				1293
				1294	tsk = skb->sk;
				1295	skb_free_datagram(sk, skb);
				1296	wake_up_interruptible(&unix_sk(sk)->peer_wait);
				1297
				1298	/* attach accepted sock to socket */
				1299	unix_state_lock(tsk);
				1300	newsock->state = SS_CONNECTED;
				1301	unix_sock_inherit_flags(sock, newsock);
				1302	sock_graft(tsk, newsock);
				1303	unix_state_unlock(tsk);
				1304	return 0;
				1305
				1306	out:
				1307	return err;
				1308	}
				1309
				1310
				1311	static int unix_getname(struct socket sock, struct sockaddr uaddr, int *uaddr_len, int peer)
				1312	{
				1313	struct sock *sk = sock->sk;
				1314	struct unix_sock *u;
				1315	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
				1316	int err = 0;
				1317
				1318	if (peer) {
				1319	sk = unix_peer_get(sk);
				1320
				1321	err = -ENOTCONN;
				1322	if (!sk)
				1323	goto out;
				1324	err = 0;
				1325	} else {
				1326	sock_hold(sk);
				1327	}
				1328
				1329	u = unix_sk(sk);
				1330	unix_state_lock(sk);
				1331	if (!u->addr) {
				1332	sunaddr->sun_family = AF_UNIX;
				1333	sunaddr->sun_path[0] = 0;
				1334	*uaddr_len = sizeof(short);
				1335	} else {
				1336	struct unix_address *addr = u->addr;
				1337
				1338	*uaddr_len = addr->len;
				1339	memcpy(sunaddr, addr->name, *uaddr_len);
				1340	}
				1341	unix_state_unlock(sk);
				1342	sock_put(sk);
				1343	out:
				1344	return err;
				1345	}
				1346
				1347	static void unix_detach_fds(struct scm_cookie scm, struct sk_buff skb)
				1348	{
				1349	int i;
				1350
				1351	scm->fp = UNIXCB(skb).fp;
				1352	UNIXCB(skb).fp = NULL;
				1353
				1354	for (i = scm->fp->count-1; i >= 0; i--)
				1355	unix_notinflight(scm->fp->fp[i]);
				1356	}
				1357
				1358	static void unix_destruct_scm(struct sk_buff *skb)
				1359	{
				1360	struct scm_cookie scm;
				1361	memset(&scm, 0, sizeof(scm));
				1362	scm.pid = UNIXCB(skb).pid;
				1363	scm.cred = UNIXCB(skb).cred;
				1364	if (UNIXCB(skb).fp)
				1365	unix_detach_fds(&scm, skb);
				1366
				1367	/* Alas, it calls VFS */
				1368	/* So fscking what? fput() had been SMP-safe since the last Summer */
				1369	scm_destroy(&scm);
				1370	sock_wfree(skb);
				1371	}
				1372
				1373	#define MAX_RECURSION_LEVEL 4
				1374
				1375	static int unix_attach_fds(struct scm_cookie scm, struct sk_buff skb)
				1376	{
				1377	int i;
				1378	unsigned char max_level = 0;
				1379	int unix_sock_count = 0;
				1380
				1381	for (i = scm->fp->count - 1; i >= 0; i--) {
				1382	struct sock *sk = unix_get_socket(scm->fp->fp[i]);
				1383
				1384	if (sk) {
				1385	unix_sock_count++;
				1386	max_level = max(max_level,
				1387	unix_sk(sk)->recursion_level);
				1388	}
				1389	}
				1390	if (unlikely(max_level > MAX_RECURSION_LEVEL))
				1391	return -ETOOMANYREFS;
				1392
				1393	/*
				1394	* Need to duplicate file references for the sake of garbage
				1395	* collection. Otherwise a socket in the fps might become a
				1396	* candidate for GC while the skb is not yet queued.
				1397	*/
				1398	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
				1399	if (!UNIXCB(skb).fp)
				1400	return -ENOMEM;
				1401
				1402	if (unix_sock_count) {
				1403	for (i = scm->fp->count - 1; i >= 0; i--)
				1404	unix_inflight(scm->fp->fp[i]);
				1405	}
				1406	return max_level;
				1407	}
				1408
				1409	static int unix_scm_to_skb(struct scm_cookie scm, struct sk_buff skb, bool send_fds)
				1410	{
				1411	int err = 0;
				1412
				1413	UNIXCB(skb).pid = get_pid(scm->pid);
				1414	if (scm->cred)
				1415	UNIXCB(skb).cred = get_cred(scm->cred);
				1416	UNIXCB(skb).fp = NULL;
				1417	if (scm->fp && send_fds)
				1418	err = unix_attach_fds(scm, skb);
				1419
				1420	skb->destructor = unix_destruct_scm;
				1421	return err;
				1422	}
				1423
				1424	/*
				1425	* Some apps rely on write() giving SCM_CREDENTIALS
				1426	* We include credentials if source or destination socket
				1427	* asserted SOCK_PASSCRED.
				1428	*/
				1429	static void maybe_add_creds(struct sk_buff skb, const struct socket sock,
				1430	const struct sock *other)
				1431	{
				1432	if (UNIXCB(skb).cred)
				1433	return;
				1434	if (test_bit(SOCK_PASSCRED, &sock->flags) \|\|
				1435	!other->sk_socket \|\|
				1436	test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
				1437	UNIXCB(skb).pid = get_pid(task_tgid(current));
				1438	UNIXCB(skb).cred = get_current_cred();
				1439	}
				1440	}
				1441
				1442	/*
				1443	* Send AF_UNIX data.
				1444	*/
				1445
				1446	static int unix_dgram_sendmsg(struct kiocb kiocb, struct socket sock,
				1447	struct msghdr *msg, size_t len)
				1448	{
				1449	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
				1450	struct sock *sk = sock->sk;
				1451	struct net *net = sock_net(sk);
				1452	struct unix_sock *u = unix_sk(sk);
				1453	struct sockaddr_un *sunaddr = msg->msg_name;
				1454	struct sock *other = NULL;
				1455	int namelen = 0; /* fake GCC */
				1456	int err;
				1457	unsigned hash;
				1458	struct sk_buff *skb;
				1459	long timeo;
				1460	struct scm_cookie tmp_scm;
				1461	int max_level;
				1462
				1463	if (NULL == siocb->scm)
				1464	siocb->scm = &tmp_scm;
				1465	wait_for_unix_gc();
				1466	err = scm_send(sock, msg, siocb->scm, false);
				1467	if (err < 0)
				1468	return err;
				1469
				1470	err = -EOPNOTSUPP;
				1471	if (msg->msg_flags&MSG_OOB)
				1472	goto out;
				1473
				1474	if (msg->msg_namelen) {
				1475	err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
				1476	if (err < 0)
				1477	goto out;
				1478	namelen = err;
				1479	} else {
				1480	sunaddr = NULL;
				1481	err = -ENOTCONN;
				1482	other = unix_peer_get(sk);
				1483	if (!other)
				1484	goto out;
				1485	}
				1486
				1487	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
				1488	&& (err = unix_autobind(sock)) != 0)
				1489	goto out;
				1490
				1491	err = -EMSGSIZE;
				1492	if (len > sk->sk_sndbuf - 32)
				1493	goto out;
				1494
				1495	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
				1496	if (skb == NULL)
				1497	goto out;
				1498
				1499	err = unix_scm_to_skb(siocb->scm, skb, true);
				1500	if (err < 0)
				1501	goto out_free;
				1502	max_level = err + 1;
				1503	unix_get_secdata(siocb->scm, skb);
				1504
				1505	skb_reset_transport_header(skb);
				1506	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
				1507	if (err)
				1508	goto out_free;
				1509
				1510	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
				1511
				1512	restart:
				1513	if (!other) {
				1514	err = -ECONNRESET;
				1515	if (sunaddr == NULL)
				1516	goto out_free;
				1517
				1518	other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
				1519	hash, &err);
				1520	if (other == NULL)
				1521	goto out_free;
				1522	}
				1523
				1524	if (sk_filter(other, skb) < 0) {
				1525	/* Toss the packet but do not return any error to the sender */
				1526	err = len;
				1527	goto out_free;
				1528	}
				1529
				1530	unix_state_lock(other);
				1531	err = -EPERM;
				1532	if (!unix_may_send(sk, other))
				1533	goto out_unlock;
				1534
				1535	if (sock_flag(other, SOCK_DEAD)) {
				1536	/*
				1537	* Check with 1003.1g - what should
				1538	* datagram error
				1539	*/
				1540	unix_state_unlock(other);
				1541	sock_put(other);
				1542
				1543	err = 0;
				1544	unix_state_lock(sk);
				1545	if (unix_peer(sk) == other) {
				1546	unix_peer(sk) = NULL;
				1547	unix_state_unlock(sk);
				1548
				1549	unix_dgram_disconnected(sk, other);
				1550	sock_put(other);
				1551	err = -ECONNREFUSED;
				1552	} else {
				1553	unix_state_unlock(sk);
				1554	}
				1555
				1556	other = NULL;
				1557	if (err)
				1558	goto out_free;
				1559	goto restart;
				1560	}
				1561
				1562	err = -EPIPE;
				1563	if (other->sk_shutdown & RCV_SHUTDOWN)
				1564	goto out_unlock;
				1565
				1566	if (sk->sk_type != SOCK_SEQPACKET) {
				1567	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1568	if (err)
				1569	goto out_unlock;
				1570	}
				1571
				1572	if (unix_peer(other) != sk && unix_recvq_full(other)) {
				1573	if (!timeo) {
				1574	err = -EAGAIN;
				1575	goto out_unlock;
				1576	}
				1577
				1578	timeo = unix_wait_for_peer(other, timeo);
				1579
				1580	err = sock_intr_errno(timeo);
				1581	if (signal_pending(current))
				1582	goto out_free;
				1583
				1584	goto restart;
				1585	}
				1586
				1587	if (sock_flag(other, SOCK_RCVTSTAMP))
				1588	__net_timestamp(skb);
				1589	maybe_add_creds(skb, sock, other);
				1590	skb_queue_tail(&other->sk_receive_queue, skb);
				1591	if (max_level > unix_sk(other)->recursion_level)
				1592	unix_sk(other)->recursion_level = max_level;
				1593	unix_state_unlock(other);
				1594	other->sk_data_ready(other, len);
				1595	sock_put(other);
				1596	scm_destroy(siocb->scm);
				1597	return len;
				1598
				1599	out_unlock:
				1600	unix_state_unlock(other);
				1601	out_free:
				1602	kfree_skb(skb);
				1603	out:
				1604	if (other)
				1605	sock_put(other);
				1606	scm_destroy(siocb->scm);
				1607	return err;
				1608	}
				1609
				1610
				1611	static int unix_stream_sendmsg(struct kiocb kiocb, struct socket sock,
				1612	struct msghdr *msg, size_t len)
				1613	{
				1614	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
				1615	struct sock *sk = sock->sk;
				1616	struct sock *other = NULL;
				1617	int err, size;
				1618	struct sk_buff *skb;
				1619	int sent = 0;
				1620	struct scm_cookie tmp_scm;
				1621	bool fds_sent = false;
				1622	int max_level;
				1623
				1624	if (NULL == siocb->scm)
				1625	siocb->scm = &tmp_scm;
				1626	wait_for_unix_gc();
				1627	err = scm_send(sock, msg, siocb->scm, false);
				1628	if (err < 0)
				1629	return err;
				1630
				1631	err = -EOPNOTSUPP;
				1632	if (msg->msg_flags&MSG_OOB)
				1633	goto out_err;
				1634
				1635	if (msg->msg_namelen) {
				1636	err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
				1637	goto out_err;
				1638	} else {
				1639	err = -ENOTCONN;
				1640	other = unix_peer(sk);
				1641	if (!other)
				1642	goto out_err;
				1643	}
				1644
				1645	if (sk->sk_shutdown & SEND_SHUTDOWN)
				1646	goto pipe_err;
				1647
				1648	while (sent < len) {
				1649	/*
				1650	* Optimisation for the fact that under 0.01% of X
				1651	* messages typically need breaking up.
				1652	*/
				1653
				1654	size = len-sent;
				1655
				1656	/* Keep two messages in the pipe so it schedules better */
				1657	if (size > ((sk->sk_sndbuf >> 1) - 64))
				1658	size = (sk->sk_sndbuf >> 1) - 64;
				1659
				1660	if (size > SKB_MAX_ALLOC)
				1661	size = SKB_MAX_ALLOC;
				1662
				1663	/*
				1664	* Grab a buffer
				1665	*/
				1666
				1667	skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
				1668	&err);
				1669
				1670	if (skb == NULL)
				1671	goto out_err;
				1672
				1673	/*
				1674	* If you pass two values to the sock_alloc_send_skb
				1675	* it tries to grab the large buffer with GFP_NOFS
				1676	* (which can fail easily), and if it fails grab the
				1677	* fallback size buffer which is under a page and will
				1678	* succeed. [Alan]
				1679	*/
				1680	size = min_t(int, size, skb_tailroom(skb));
				1681
				1682
				1683	/* Only send the fds in the first buffer */
				1684	err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
				1685	if (err < 0) {
				1686	kfree_skb(skb);
				1687	goto out_err;
				1688	}
				1689	max_level = err + 1;
				1690	fds_sent = true;
				1691
				1692	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
				1693	if (err) {
				1694	kfree_skb(skb);
				1695	goto out_err;
				1696	}
				1697
				1698	unix_state_lock(other);
				1699
				1700	if (sock_flag(other, SOCK_DEAD) \|\|
				1701	(other->sk_shutdown & RCV_SHUTDOWN))
				1702	goto pipe_err_free;
				1703
				1704	maybe_add_creds(skb, sock, other);
				1705	skb_queue_tail(&other->sk_receive_queue, skb);
				1706	if (max_level > unix_sk(other)->recursion_level)
				1707	unix_sk(other)->recursion_level = max_level;
				1708	unix_state_unlock(other);
				1709	other->sk_data_ready(other, size);
				1710	sent += size;
				1711	}
				1712
				1713	scm_destroy(siocb->scm);
				1714	siocb->scm = NULL;
				1715
				1716	return sent;
				1717
				1718	pipe_err_free:
				1719	unix_state_unlock(other);
				1720	kfree_skb(skb);
				1721	pipe_err:
				1722	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
				1723	send_sig(SIGPIPE, current, 0);
				1724	err = -EPIPE;
				1725	out_err:
				1726	scm_destroy(siocb->scm);
				1727	siocb->scm = NULL;
				1728	return sent ? : err;
				1729	}
				1730
				1731	static int unix_seqpacket_sendmsg(struct kiocb kiocb, struct socket sock,
				1732	struct msghdr *msg, size_t len)
				1733	{
				1734	int err;
				1735	struct sock *sk = sock->sk;
				1736
				1737	err = sock_error(sk);
				1738	if (err)
				1739	return err;
				1740
				1741	if (sk->sk_state != TCP_ESTABLISHED)
				1742	return -ENOTCONN;
				1743
				1744	if (msg->msg_namelen)
				1745	msg->msg_namelen = 0;
				1746
				1747	return unix_dgram_sendmsg(kiocb, sock, msg, len);
				1748	}
				1749
				1750	static int unix_seqpacket_recvmsg(struct kiocb iocb, struct socket sock,
				1751	struct msghdr *msg, size_t size,
				1752	int flags)
				1753	{
				1754	struct sock *sk = sock->sk;
				1755
				1756	if (sk->sk_state != TCP_ESTABLISHED)
				1757	return -ENOTCONN;
				1758
				1759	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
				1760	}
				1761
				1762	static void unix_copy_addr(struct msghdr msg, struct sock sk)
				1763	{
				1764	struct unix_sock *u = unix_sk(sk);
				1765
				1766	if (u->addr) {
				1767	msg->msg_namelen = u->addr->len;
				1768	memcpy(msg->msg_name, u->addr->name, u->addr->len);
				1769	}
				1770	}
				1771
				1772	static int unix_dgram_recvmsg(struct kiocb iocb, struct socket sock,
				1773	struct msghdr *msg, size_t size,
				1774	int flags)
				1775	{
				1776	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
				1777	struct scm_cookie tmp_scm;
				1778	struct sock *sk = sock->sk;
				1779	struct unix_sock *u = unix_sk(sk);
				1780	int noblock = flags & MSG_DONTWAIT;
				1781	struct sk_buff *skb;
				1782	int err;
				1783	int peeked, skip;
				1784
				1785	err = -EOPNOTSUPP;
				1786	if (flags&MSG_OOB)
				1787	goto out;
				1788
				1789	err = mutex_lock_interruptible(&u->readlock);
				1790	if (unlikely(err)) {
				1791	/* recvmsg() in non blocking mode is supposed to return -EAGAIN
				1792	* sk_rcvtimeo is not honored by mutex_lock_interruptible()
				1793	*/
				1794	err = noblock ? -EAGAIN : -ERESTARTSYS;
				1795	goto out;
				1796	}
				1797
				1798	skip = sk_peek_offset(sk, flags);
				1799
				1800	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
				1801	if (!skb) {
				1802	unix_state_lock(sk);
				1803	/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
				1804	if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
				1805	(sk->sk_shutdown & RCV_SHUTDOWN))
				1806	err = 0;
				1807	unix_state_unlock(sk);
				1808	goto out_unlock;
				1809	}
				1810
				1811	wake_up_interruptible_sync_poll(&u->peer_wait,
				1812	POLLOUT \| POLLWRNORM \| POLLWRBAND);
				1813
				1814	if (msg->msg_name)
				1815	unix_copy_addr(msg, skb->sk);
				1816
				1817	if (size > skb->len - skip)
				1818	size = skb->len - skip;
				1819	else if (size < skb->len - skip)
				1820	msg->msg_flags \|= MSG_TRUNC;
				1821
				1822	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
				1823	if (err)
				1824	goto out_free;
				1825
				1826	if (sock_flag(sk, SOCK_RCVTSTAMP))
				1827	__sock_recv_timestamp(msg, sk, skb);
				1828
				1829	if (!siocb->scm) {
				1830	siocb->scm = &tmp_scm;
				1831	memset(&tmp_scm, 0, sizeof(tmp_scm));
				1832	}
				1833	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
				1834	unix_set_secdata(siocb->scm, skb);
				1835
				1836	if (!(flags & MSG_PEEK)) {
				1837	if (UNIXCB(skb).fp)
				1838	unix_detach_fds(siocb->scm, skb);
				1839
				1840	sk_peek_offset_bwd(sk, skb->len);
				1841	} else {
				1842	/* It is questionable: on PEEK we could:
				1843	- do not return fds - good, but too simple 8)
				1844	- return fds, and do not return them on read (old strategy,
				1845	apparently wrong)
				1846	- clone fds (I chose it for now, it is the most universal
				1847	solution)
				1848
				1849	POSIX 1003.1g does not actually define this clearly
				1850	at all. POSIX 1003.1g doesn't define a lot of things
				1851	clearly however!
				1852
				1853	*/
				1854
				1855	sk_peek_offset_fwd(sk, size);
				1856	if (UNIXCB(skb).fp)
				1857	siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
				1858	}
				1859	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
				1860
				1861	scm_recv(sock, msg, siocb->scm, flags);
				1862
				1863	out_free:
				1864	skb_free_datagram(sk, skb);
				1865	out_unlock:
				1866	mutex_unlock(&u->readlock);
				1867	out:
				1868	return err;
				1869	}
				1870
				1871	/*
				1872	* Sleep until data has arrive. But check for races..
				1873	*/
				1874
				1875	static long unix_stream_data_wait(struct sock *sk, long timeo)
				1876	{
				1877	DEFINE_WAIT(wait);
				1878
				1879	unix_state_lock(sk);
				1880
				1881	for (;;) {
				1882	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
				1883
				1884	if (!skb_queue_empty(&sk->sk_receive_queue) \|\|
				1885	sk->sk_err \|\|
				1886	(sk->sk_shutdown & RCV_SHUTDOWN) \|\|
				1887	signal_pending(current) \|\|
				1888	!timeo)
				1889	break;
				1890
				1891	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
				1892	unix_state_unlock(sk);
				1893	timeo = schedule_timeout(timeo);
				1894	unix_state_lock(sk);
				1895	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
				1896	}
				1897
				1898	finish_wait(sk_sleep(sk), &wait);
				1899	unix_state_unlock(sk);
				1900	return timeo;
				1901	}
				1902
				1903
				1904
				1905	static int unix_stream_recvmsg(struct kiocb iocb, struct socket sock,
				1906	struct msghdr *msg, size_t size,
				1907	int flags)
				1908	{
				1909	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
				1910	struct scm_cookie tmp_scm;
				1911	struct sock *sk = sock->sk;
				1912	struct unix_sock *u = unix_sk(sk);
				1913	struct sockaddr_un *sunaddr = msg->msg_name;
				1914	int copied = 0;
				1915	int noblock = flags & MSG_DONTWAIT;
				1916	int check_creds = 0;
				1917	int target;
				1918	int err = 0;
				1919	long timeo;
				1920	int skip;
				1921
				1922	err = -EINVAL;
				1923	if (sk->sk_state != TCP_ESTABLISHED)
				1924	goto out;
				1925
				1926	err = -EOPNOTSUPP;
				1927	if (flags&MSG_OOB)
				1928	goto out;
				1929
				1930	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
				1931	timeo = sock_rcvtimeo(sk, noblock);
				1932
				1933	/* Lock the socket to prevent queue disordering
				1934	* while sleeps in memcpy_tomsg
				1935	*/
				1936
				1937	if (!siocb->scm) {
				1938	siocb->scm = &tmp_scm;
				1939	memset(&tmp_scm, 0, sizeof(tmp_scm));
				1940	}
				1941
				1942	err = mutex_lock_interruptible(&u->readlock);
				1943	if (unlikely(err)) {
				1944	/* recvmsg() in non blocking mode is supposed to return -EAGAIN
				1945	* sk_rcvtimeo is not honored by mutex_lock_interruptible()
				1946	*/
				1947	err = noblock ? -EAGAIN : -ERESTARTSYS;
				1948	goto out;
				1949	}
				1950
				1951	skip = sk_peek_offset(sk, flags);
				1952
				1953	do {
				1954	int chunk;
				1955	struct sk_buff *skb;
				1956
				1957	unix_state_lock(sk);
				1958	skb = skb_peek(&sk->sk_receive_queue);
				1959	again:
				1960	if (skb == NULL) {
				1961	unix_sk(sk)->recursion_level = 0;
				1962	if (copied >= target)
				1963	goto unlock;
				1964
				1965	/*
				1966	* POSIX 1003.1g mandates this order.
				1967	*/
				1968
				1969	err = sock_error(sk);
				1970	if (err)
				1971	goto unlock;
				1972	if (sk->sk_shutdown & RCV_SHUTDOWN)
				1973	goto unlock;
				1974
				1975	unix_state_unlock(sk);
				1976	err = -EAGAIN;
				1977	if (!timeo)
				1978	break;
				1979	mutex_unlock(&u->readlock);
				1980
				1981	timeo = unix_stream_data_wait(sk, timeo);
				1982
				1983	if (signal_pending(current)
				1984	\|\| mutex_lock_interruptible(&u->readlock)) {
				1985	err = sock_intr_errno(timeo);
				1986	goto out;
				1987	}
				1988
				1989	continue;
				1990	unlock:
				1991	unix_state_unlock(sk);
				1992	break;
				1993	}
				1994
				1995	if (skip >= skb->len) {
				1996	skip -= skb->len;
				1997	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				1998	goto again;
				1999	}
				2000
				2001	unix_state_unlock(sk);
				2002
				2003	if (check_creds) {
				2004	/* Never glue messages from different writers */
				2005	if ((UNIXCB(skb).pid != siocb->scm->pid) \|\|
				2006	(UNIXCB(skb).cred != siocb->scm->cred))
				2007	break;
				2008	} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
				2009	/* Copy credentials */
				2010	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
				2011	check_creds = 1;
				2012	}
				2013
				2014	/* Copy address just once */
				2015	if (sunaddr) {
				2016	unix_copy_addr(msg, skb->sk);
				2017	sunaddr = NULL;
				2018	}
				2019
				2020	chunk = min_t(unsigned int, skb->len - skip, size);
				2021	if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
				2022	if (copied == 0)
				2023	copied = -EFAULT;
				2024	break;
				2025	}
				2026	copied += chunk;
				2027	size -= chunk;
				2028
				2029	/* Mark read part of skb as used */
				2030	if (!(flags & MSG_PEEK)) {
				2031	skb_pull(skb, chunk);
				2032
				2033	sk_peek_offset_bwd(sk, chunk);
				2034
				2035	if (UNIXCB(skb).fp)
				2036	unix_detach_fds(siocb->scm, skb);
				2037
				2038	if (skb->len)
				2039	break;
				2040
				2041	skb_unlink(skb, &sk->sk_receive_queue);
				2042	consume_skb(skb);
				2043
				2044	if (siocb->scm->fp)
				2045	break;
				2046	} else {
				2047	/* It is questionable, see note in unix_dgram_recvmsg.
				2048	*/
				2049	if (UNIXCB(skb).fp)
				2050	siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
				2051
				2052	sk_peek_offset_fwd(sk, chunk);
				2053
				2054	break;
				2055	}
				2056	} while (size);
				2057
				2058	mutex_unlock(&u->readlock);
				2059	scm_recv(sock, msg, siocb->scm, flags);
				2060	out:
				2061	return copied ? : err;
				2062	}
				2063
				2064	static int unix_shutdown(struct socket *sock, int mode)
				2065	{
				2066	struct sock *sk = sock->sk;
				2067	struct sock *other;
				2068
				2069	mode = (mode+1)&(RCV_SHUTDOWN\|SEND_SHUTDOWN);
				2070
				2071	if (!mode)
				2072	return 0;
				2073
				2074	unix_state_lock(sk);
				2075	sk->sk_shutdown \|= mode;
				2076	other = unix_peer(sk);
				2077	if (other)
				2078	sock_hold(other);
				2079	unix_state_unlock(sk);
				2080	sk->sk_state_change(sk);
				2081
				2082	if (other &&
				2083	(sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET)) {
				2084
				2085	int peer_mode = 0;
				2086
				2087	if (mode&RCV_SHUTDOWN)
				2088	peer_mode \|= SEND_SHUTDOWN;
				2089	if (mode&SEND_SHUTDOWN)
				2090	peer_mode \|= RCV_SHUTDOWN;
				2091	unix_state_lock(other);
				2092	other->sk_shutdown \|= peer_mode;
				2093	unix_state_unlock(other);
				2094	other->sk_state_change(other);
				2095	if (peer_mode == SHUTDOWN_MASK)
				2096	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
				2097	else if (peer_mode & RCV_SHUTDOWN)
				2098	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
				2099	}
				2100	if (other)
				2101	sock_put(other);
				2102
				2103	return 0;
				2104	}
				2105
				2106	long unix_inq_len(struct sock *sk)
				2107	{
				2108	struct sk_buff *skb;
				2109	long amount = 0;
				2110
				2111	if (sk->sk_state == TCP_LISTEN)
				2112	return -EINVAL;
				2113
				2114	spin_lock(&sk->sk_receive_queue.lock);
				2115	if (sk->sk_type == SOCK_STREAM \|\|
				2116	sk->sk_type == SOCK_SEQPACKET) {
				2117	skb_queue_walk(&sk->sk_receive_queue, skb)
				2118	amount += skb->len;
				2119	} else {
				2120	skb = skb_peek(&sk->sk_receive_queue);
				2121	if (skb)
				2122	amount = skb->len;
				2123	}
				2124	spin_unlock(&sk->sk_receive_queue.lock);
				2125
				2126	return amount;
				2127	}
				2128	EXPORT_SYMBOL_GPL(unix_inq_len);
				2129
				2130	long unix_outq_len(struct sock *sk)
				2131	{
				2132	return sk_wmem_alloc_get(sk);
				2133	}
				2134	EXPORT_SYMBOL_GPL(unix_outq_len);
				2135
				2136	static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2137	{
				2138	struct sock *sk = sock->sk;
				2139	long amount = 0;
				2140	int err;
				2141
				2142	switch (cmd) {
				2143	case SIOCOUTQ:
				2144	amount = unix_outq_len(sk);
				2145	err = put_user(amount, (int __user *)arg);
				2146	break;
				2147	case SIOCINQ:
				2148	amount = unix_inq_len(sk);
				2149	if (amount < 0)
				2150	err = amount;
				2151	else
				2152	err = put_user(amount, (int __user *)arg);
				2153	break;
				2154	default:
				2155	err = -ENOIOCTLCMD;
				2156	break;
				2157	}
				2158	return err;
				2159	}
				2160
				2161	static unsigned int unix_poll(struct file file, struct socket sock, poll_table *wait)
				2162	{
				2163	struct sock *sk = sock->sk;
				2164	unsigned int mask;
				2165
				2166	sock_poll_wait(file, sk_sleep(sk), wait);
				2167	mask = 0;
				2168
				2169	/* exceptional events? */
				2170	if (sk->sk_err)
				2171	mask \|= POLLERR;
				2172	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2173	mask \|= POLLHUP;
				2174	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2175	mask \|= POLLRDHUP \| POLLIN \| POLLRDNORM;
				2176
				2177	/* readable? */
				2178	if (!skb_queue_empty(&sk->sk_receive_queue))
				2179	mask \|= POLLIN \| POLLRDNORM;
				2180
				2181	/* Connection-based need to check for termination and startup */
				2182	if ((sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) &&
				2183	sk->sk_state == TCP_CLOSE)
				2184	mask \|= POLLHUP;
				2185
				2186	/*
				2187	* we set writable also when the other side has shut down the
				2188	* connection. This prevents stuck sockets.
				2189	*/
				2190	if (unix_writable(sk))
				2191	mask \|= POLLOUT \| POLLWRNORM \| POLLWRBAND;
				2192
				2193	return mask;
				2194	}
				2195
				2196	static unsigned int unix_dgram_poll(struct file file, struct socket sock,
				2197	poll_table *wait)
				2198	{
				2199	struct sock sk = sock->sk, other;
				2200	unsigned int mask, writable;
				2201
				2202	sock_poll_wait(file, sk_sleep(sk), wait);
				2203	mask = 0;
				2204
				2205	/* exceptional events? */
				2206	if (sk->sk_err \|\| !skb_queue_empty(&sk->sk_error_queue))
				2207	mask \|= POLLERR;
				2208	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2209	mask \|= POLLRDHUP \| POLLIN \| POLLRDNORM;
				2210	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2211	mask \|= POLLHUP;
				2212
				2213	/* readable? */
				2214	if (!skb_queue_empty(&sk->sk_receive_queue))
				2215	mask \|= POLLIN \| POLLRDNORM;
				2216
				2217	/* Connection-based need to check for termination and startup */
				2218	if (sk->sk_type == SOCK_SEQPACKET) {
				2219	if (sk->sk_state == TCP_CLOSE)
				2220	mask \|= POLLHUP;
				2221	/* connection hasn't started yet? */
				2222	if (sk->sk_state == TCP_SYN_SENT)
				2223	return mask;
				2224	}
				2225
				2226	/* No write status requested, avoid expensive OUT tests. */
				2227	if (!(poll_requested_events(wait) & (POLLWRBAND\|POLLWRNORM\|POLLOUT)))
				2228	return mask;
				2229
				2230	writable = unix_writable(sk);
				2231	other = unix_peer_get(sk);
				2232	if (other) {
				2233	if (unix_peer(other) != sk) {
				2234	sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
				2235	if (unix_recvq_full(other))
				2236	writable = 0;
				2237	}
				2238	sock_put(other);
				2239	}
				2240
				2241	if (writable)
				2242	mask \|= POLLOUT \| POLLWRNORM \| POLLWRBAND;
				2243	else
				2244	set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
				2245
				2246	return mask;
				2247	}
				2248
				2249	#ifdef CONFIG_PROC_FS
				2250	static struct sock first_unix_socket(int i)
				2251	{
				2252	for (i = 0; i <= UNIX_HASH_SIZE; (*i)++) {
				2253	if (!hlist_empty(&unix_socket_table[*i]))
				2254	return __sk_head(&unix_socket_table[*i]);
				2255	}
				2256	return NULL;
				2257	}
				2258
				2259	static struct sock next_unix_socket(int i, struct sock *s)
				2260	{
				2261	struct sock *next = sk_next(s);
				2262	/* More in this chain? */
				2263	if (next)
				2264	return next;
				2265	/* Look for next non-empty chain. */
				2266	for ((i)++; i <= UNIX_HASH_SIZE; (*i)++) {
				2267	if (!hlist_empty(&unix_socket_table[*i]))
				2268	return __sk_head(&unix_socket_table[*i]);
				2269	}
				2270	return NULL;
				2271	}
				2272
				2273	struct unix_iter_state {
				2274	struct seq_net_private p;
				2275	int i;
				2276	};
				2277
				2278	static struct sock unix_seq_idx(struct seq_file seq, loff_t pos)
				2279	{
				2280	struct unix_iter_state *iter = seq->private;
				2281	loff_t off = 0;
				2282	struct sock *s;
				2283
				2284	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
				2285	if (sock_net(s) != seq_file_net(seq))
				2286	continue;
				2287	if (off == pos)
				2288	return s;
				2289	++off;
				2290	}
				2291	return NULL;
				2292	}
				2293
				2294	static void unix_seq_start(struct seq_file seq, loff_t *pos)
				2295	__acquires(unix_table_lock)
				2296	{
				2297	spin_lock(&unix_table_lock);
				2298	return pos ? unix_seq_idx(seq, pos - 1) : SEQ_START_TOKEN;
				2299	}
				2300
				2301	static void unix_seq_next(struct seq_file seq, void v, loff_t pos)
				2302	{
				2303	struct unix_iter_state *iter = seq->private;
				2304	struct sock *sk = v;
				2305	++*pos;
				2306
				2307	if (v == SEQ_START_TOKEN)
				2308	sk = first_unix_socket(&iter->i);
				2309	else
				2310	sk = next_unix_socket(&iter->i, sk);
				2311	while (sk && (sock_net(sk) != seq_file_net(seq)))
				2312	sk = next_unix_socket(&iter->i, sk);
				2313	return sk;
				2314	}
				2315
				2316	static void unix_seq_stop(struct seq_file seq, void v)
				2317	__releases(unix_table_lock)
				2318	{
				2319	spin_unlock(&unix_table_lock);
				2320	}
				2321
				2322	static int unix_seq_show(struct seq_file seq, void v)
				2323	{
				2324
				2325	if (v == SEQ_START_TOKEN)
				2326	seq_puts(seq, "Num RefCount Protocol Flags Type St "
				2327	"Inode Path\n");
				2328	else {
				2329	struct sock *s = v;
				2330	struct unix_sock *u = unix_sk(s);
				2331	unix_state_lock(s);
				2332
				2333	seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
				2334	s,
				2335	atomic_read(&s->sk_refcnt),
				2336	0,
				2337	s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
				2338	s->sk_type,
				2339	s->sk_socket ?
				2340	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
				2341	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
				2342	sock_i_ino(s));
				2343
				2344	if (u->addr) {
				2345	int i, len;
				2346	seq_putc(seq, ' ');
				2347
				2348	i = 0;
				2349	len = u->addr->len - sizeof(short);
				2350	if (!UNIX_ABSTRACT(s))
				2351	len--;
				2352	else {
				2353	seq_putc(seq, '@');
				2354	i++;
				2355	}
				2356	for ( ; i < len; i++)
				2357	seq_putc(seq, u->addr->name->sun_path[i]);
				2358	}
				2359	unix_state_unlock(s);
				2360	seq_putc(seq, '\n');
				2361	}
				2362
				2363	return 0;
				2364	}
				2365
				2366	static const struct seq_operations unix_seq_ops = {
				2367	.start = unix_seq_start,
				2368	.next = unix_seq_next,
				2369	.stop = unix_seq_stop,
				2370	.show = unix_seq_show,
				2371	};
				2372
				2373	static int unix_seq_open(struct inode inode, struct file file)
				2374	{
				2375	return seq_open_net(inode, file, &unix_seq_ops,
				2376	sizeof(struct unix_iter_state));
				2377	}
				2378
				2379	static const struct file_operations unix_seq_fops = {
				2380	.owner = THIS_MODULE,
				2381	.open = unix_seq_open,
				2382	.read = seq_read,
				2383	.llseek = seq_lseek,
				2384	.release = seq_release_net,
				2385	};
				2386
				2387	#endif
				2388
				2389	static const struct net_proto_family unix_family_ops = {
				2390	.family = PF_UNIX,
				2391	.create = unix_create,
				2392	.owner = THIS_MODULE,
				2393	};
				2394
				2395
				2396	static int __net_init unix_net_init(struct net *net)
				2397	{
				2398	int error = -ENOMEM;
				2399
				2400	net->unx.sysctl_max_dgram_qlen = 10;
				2401	if (unix_sysctl_register(net))
				2402	goto out;
				2403
				2404	#ifdef CONFIG_PROC_FS
				2405	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
				2406	unix_sysctl_unregister(net);
				2407	goto out;
				2408	}
				2409	#endif
				2410	error = 0;
				2411	out:
				2412	return error;
				2413	}
				2414
				2415	static void __net_exit unix_net_exit(struct net *net)
				2416	{
				2417	unix_sysctl_unregister(net);
				2418	proc_net_remove(net, "unix");
				2419	}
				2420
				2421	static struct pernet_operations unix_net_ops = {
				2422	.init = unix_net_init,
				2423	.exit = unix_net_exit,
				2424	};
				2425
				2426	static int __init af_unix_init(void)
				2427	{
				2428	int rc = -1;
				2429	struct sk_buff *dummy_skb;
				2430
				2431	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
				2432
				2433	rc = proto_register(&unix_proto, 1);
				2434	if (rc != 0) {
				2435	printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
				2436	__func__);
				2437	goto out;
				2438	}
				2439
				2440	sock_register(&unix_family_ops);
				2441	register_pernet_subsys(&unix_net_ops);
				2442	out:
				2443	return rc;
				2444	}
				2445
				2446	static void __exit af_unix_exit(void)
				2447	{
				2448	sock_unregister(PF_UNIX);
				2449	proto_unregister(&unix_proto);
				2450	unregister_pernet_subsys(&unix_net_ops);
				2451	}
				2452
				2453	/* Earlier than device_initcall() so that other drivers invoking
				2454	request_module() don't end up in a loop when modprobe tries
				2455	to use a UNIX socket. But later than subsys_initcall() because
				2456	we depend on stuff initialised there */
				2457	fs_initcall(af_unix_init);
				2458	module_exit(af_unix_exit);
				2459
				2460	MODULE_LICENSE("GPL");
				2461	MODULE_ALIAS_NETPROTO(PF_UNIX);