Blame - marvell/linux/net/unix/af_unix.c - T108

blob: c47a734e1f2da79063f4cff2ac9c847b2b9f774d [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* NET4: Implementation of BSD Unix domain sockets.
				4	*
				5	* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
				6	*
				7	* Fixes:
				8	* Linus Torvalds : Assorted bug cures.
				9	* Niibe Yutaka : async I/O support.
				10	* Carsten Paeth : PF_UNIX check, address fixes.
				11	* Alan Cox : Limit size of allocated blocks.
				12	* Alan Cox : Fixed the stupid socketpair bug.
				13	* Alan Cox : BSD compatibility fine tuning.
				14	* Alan Cox : Fixed a bug in connect when interrupted.
				15	* Alan Cox : Sorted out a proper draft version of
				16	* file descriptor passing hacked up from
				17	* Mike Shaver's work.
				18	* Marty Leisner : Fixes to fd passing
				19	* Nick Nevin : recvmsg bugfix.
				20	* Alan Cox : Started proper garbage collector
				21	* Heiko EiBfeldt : Missing verify_area check
				22	* Alan Cox : Started POSIXisms
				23	* Andreas Schwab : Replace inode by dentry for proper
				24	* reference counting
				25	* Kirk Petersen : Made this a module
				26	* Christoph Rohland : Elegant non-blocking accept/connect algorithm.
				27	* Lots of bug fixes.
				28	* Alexey Kuznetosv : Repaired (I hope) bugs introduces
				29	* by above two patches.
				30	* Andrea Arcangeli : If possible we block in connect(2)
				31	* if the max backlog of the listen socket
				32	* is been reached. This won't break
				33	* old apps and it will avoid huge amount
				34	* of socks hashed (this for unix_gc()
				35	* performances reasons).
				36	* Security fix that limits the max
				37	* number of socks to 2*max_files and
				38	* the number of skb queueable in the
				39	* dgram receiver.
				40	* Artur Skawina : Hash function optimizations
				41	* Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
				42	* Malcolm Beattie : Set peercred for socketpair
				43	* Michal Ostrowski : Module initialization cleanup.
				44	* Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
				45	* the core infrastructure is doing that
				46	* for all net proto families now (2.5.69+)
				47	*
				48	* Known differences from reference BSD that was tested:
				49	*
				50	* [TO FIX]
				51	* ECONNREFUSED is not returned from one end of a connected() socket to the
				52	* other the moment one end closes.
				53	* fstat() doesn't return st_dev=0, and give the blksize as high water mark
				54	* and a fake inode identifier (nor the BSD first socket fstat twice bug).
				55	* [NOT TO FIX]
				56	* accept() returns a path name even if the connecting socket has closed
				57	* in the meantime (BSD loses the path and gives up).
				58	* accept() returns 0 length path for an unbound connector. BSD returns 16
				59	* and a null first byte in the path (but not for gethost/peername - BSD bug ??)
				60	* socketpair(...SOCK_RAW..) doesn't panic the kernel.
				61	* BSD af_unix apparently has connect forgetting to block properly.
				62	* (need to check this with the POSIX spec in detail)
				63	*
				64	* Differences from 2.0.0-11-... (ANK)
				65	* Bug fixes and improvements.
				66	* - client shutdown killed server socket.
				67	* - removed all useless cli/sti pairs.
				68	*
				69	* Semantic changes/extensions.
				70	* - generic control message passing.
				71	* - SCM_CREDENTIALS control message.
				72	* - "Abstract" (not FS based) socket bindings.
				73	* Abstract names are sequences of bytes (not zero terminated)
				74	* started by 0, so that this name space does not intersect
				75	* with BSD names.
				76	*/
				77
				78	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				79
				80	#include <linux/module.h>
				81	#include <linux/kernel.h>
				82	#include <linux/signal.h>
				83	#include <linux/sched/signal.h>
				84	#include <linux/errno.h>
				85	#include <linux/string.h>
				86	#include <linux/stat.h>
				87	#include <linux/dcache.h>
				88	#include <linux/namei.h>
				89	#include <linux/socket.h>
				90	#include <linux/un.h>
				91	#include <linux/fcntl.h>
				92	#include <linux/termios.h>
				93	#include <linux/sockios.h>
				94	#include <linux/net.h>
				95	#include <linux/in.h>
				96	#include <linux/fs.h>
				97	#include <linux/slab.h>
				98	#include <linux/uaccess.h>
				99	#include <linux/skbuff.h>
				100	#include <linux/netdevice.h>
				101	#include <net/net_namespace.h>
				102	#include <net/sock.h>
				103	#include <net/tcp_states.h>
				104	#include <net/af_unix.h>
				105	#include <linux/proc_fs.h>
				106	#include <linux/seq_file.h>
				107	#include <net/scm.h>
				108	#include <linux/init.h>
				109	#include <linux/poll.h>
				110	#include <linux/rtnetlink.h>
				111	#include <linux/mount.h>
				112	#include <net/checksum.h>
				113	#include <linux/security.h>
				114	#include <linux/freezer.h>
				115	#include <linux/file.h>
				116
				117	#include "scm.h"
				118
				119	struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
				120	EXPORT_SYMBOL_GPL(unix_socket_table);
				121	DEFINE_SPINLOCK(unix_table_lock);
				122	EXPORT_SYMBOL_GPL(unix_table_lock);
				123	static atomic_long_t unix_nr_socks;
				124
				125
				126	static struct hlist_head unix_sockets_unbound(void addr)
				127	{
				128	unsigned long hash = (unsigned long)addr;
				129
				130	hash ^= hash >> 16;
				131	hash ^= hash >> 8;
				132	hash %= UNIX_HASH_SIZE;
				133	return &unix_socket_table[UNIX_HASH_SIZE + hash];
				134	}
				135
				136	#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
				137
				138	#ifdef CONFIG_SECURITY_NETWORK
				139	static void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				140	{
				141	UNIXCB(skb).secid = scm->secid;
				142	}
				143
				144	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				145	{
				146	scm->secid = UNIXCB(skb).secid;
				147	}
				148
				149	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				150	{
				151	return (scm->secid == UNIXCB(skb).secid);
				152	}
				153	#else
				154	static inline void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				155	{ }
				156
				157	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				158	{ }
				159
				160	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				161	{
				162	return true;
				163	}
				164	#endif /* CONFIG_SECURITY_NETWORK */
				165
				166	/*
				167	* SMP locking strategy:
				168	* hash table is protected with spinlock unix_table_lock
				169	* each socket state is protected by separate spin lock.
				170	*/
				171
				172	static inline unsigned int unix_hash_fold(__wsum n)
				173	{
				174	unsigned int hash = (__force unsigned int)csum_fold(n);
				175
				176	hash ^= hash>>8;
				177	return hash&(UNIX_HASH_SIZE-1);
				178	}
				179
				180	#define unix_peer(sk) (unix_sk(sk)->peer)
				181
				182	static inline int unix_our_peer(struct sock sk, struct sock osk)
				183	{
				184	return unix_peer(osk) == sk;
				185	}
				186
				187	static inline int unix_may_send(struct sock sk, struct sock osk)
				188	{
				189	return unix_peer(osk) == NULL \|\| unix_our_peer(sk, osk);
				190	}
				191
				192	static inline int unix_recvq_full_lockless(const struct sock *sk)
				193	{
				194	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
				195	}
				196
				197	struct sock unix_peer_get(struct sock s)
				198	{
				199	struct sock *peer;
				200
				201	unix_state_lock(s);
				202	peer = unix_peer(s);
				203	if (peer)
				204	sock_hold(peer);
				205	unix_state_unlock(s);
				206	return peer;
				207	}
				208	EXPORT_SYMBOL_GPL(unix_peer_get);
				209
				210	static inline void unix_release_addr(struct unix_address *addr)
				211	{
				212	if (refcount_dec_and_test(&addr->refcnt))
				213	kfree(addr);
				214	}
				215
				216	/*
				217	* Check unix socket name:
				218	* - should be not zero length.
				219	* - if started by not zero, should be NULL terminated (FS object)
				220	* - if started by zero, it is abstract name.
				221	*/
				222
				223	static int unix_mkname(struct sockaddr_un sunaddr, int len, unsigned int hashp)
				224	{
				225	*hashp = 0;
				226
				227	if (len <= sizeof(short) \|\| len > sizeof(*sunaddr))
				228	return -EINVAL;
				229	if (!sunaddr \|\| sunaddr->sun_family != AF_UNIX)
				230	return -EINVAL;
				231	if (sunaddr->sun_path[0]) {
				232	/*
				233	* This may look like an off by one error but it is a bit more
				234	* subtle. 108 is the longest valid AF_UNIX path for a binding.
				235	* sun_path[108] doesn't as such exist. However in kernel space
				236	* we are guaranteed that it is a valid memory location in our
				237	* kernel address buffer.
				238	*/
				239	((char *)sunaddr)[len] = 0;
				240	len = strlen(sunaddr->sun_path)+1+sizeof(short);
				241	return len;
				242	}
				243
				244	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
				245	return len;
				246	}
				247
				248	static void __unix_remove_socket(struct sock *sk)
				249	{
				250	sk_del_node_init(sk);
				251	}
				252
				253	static void __unix_insert_socket(struct hlist_head list, struct sock sk)
				254	{
				255	WARN_ON(!sk_unhashed(sk));
				256	sk_add_node(sk, list);
				257	}
				258
				259	static inline void unix_remove_socket(struct sock *sk)
				260	{
				261	spin_lock(&unix_table_lock);
				262	__unix_remove_socket(sk);
				263	spin_unlock(&unix_table_lock);
				264	}
				265
				266	static inline void unix_insert_socket(struct hlist_head list, struct sock sk)
				267	{
				268	spin_lock(&unix_table_lock);
				269	__unix_insert_socket(list, sk);
				270	spin_unlock(&unix_table_lock);
				271	}
				272
				273	static struct sock __unix_find_socket_byname(struct net net,
				274	struct sockaddr_un *sunname,
				275	int len, int type, unsigned int hash)
				276	{
				277	struct sock *s;
				278
				279	sk_for_each(s, &unix_socket_table[hash ^ type]) {
				280	struct unix_sock *u = unix_sk(s);
				281
				282	if (!net_eq(sock_net(s), net))
				283	continue;
				284
				285	if (u->addr->len == len &&
				286	!memcmp(u->addr->name, sunname, len))
				287	goto found;
				288	}
				289	s = NULL;
				290	found:
				291	return s;
				292	}
				293
				294	static inline struct sock unix_find_socket_byname(struct net net,
				295	struct sockaddr_un *sunname,
				296	int len, int type,
				297	unsigned int hash)
				298	{
				299	struct sock *s;
				300
				301	spin_lock(&unix_table_lock);
				302	s = __unix_find_socket_byname(net, sunname, len, type, hash);
				303	if (s)
				304	sock_hold(s);
				305	spin_unlock(&unix_table_lock);
				306	return s;
				307	}
				308
				309	static struct sock unix_find_socket_byinode(struct inode i)
				310	{
				311	struct sock *s;
				312
				313	spin_lock(&unix_table_lock);
				314	sk_for_each(s,
				315	&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
				316	struct dentry *dentry = unix_sk(s)->path.dentry;
				317
				318	if (dentry && d_backing_inode(dentry) == i) {
				319	sock_hold(s);
				320	goto found;
				321	}
				322	}
				323	s = NULL;
				324	found:
				325	spin_unlock(&unix_table_lock);
				326	return s;
				327	}
				328
				329	/* Support code for asymmetrically connected dgram sockets
				330	*
				331	* If a datagram socket is connected to a socket not itself connected
				332	* to the first socket (eg, /dev/log), clients may only enqueue more
				333	* messages if the present receive queue of the server socket is not
				334	* "too large". This means there's a second writeability condition
				335	* poll and sendmsg need to test. The dgram recv code will do a wake
				336	* up on the peer_wait wait queue of a socket upon reception of a
				337	* datagram which needs to be propagated to sleeping would-be writers
				338	* since these might not have sent anything so far. This can't be
				339	* accomplished via poll_wait because the lifetime of the server
				340	* socket might be less than that of its clients if these break their
				341	* association with it or if the server socket is closed while clients
				342	* are still connected to it and there's no way to inform "a polling
				343	* implementation" that it should let go of a certain wait queue
				344	*
				345	* In order to propagate a wake up, a wait_queue_entry_t of the client
				346	* socket is enqueued on the peer_wait queue of the server socket
				347	* whose wake function does a wake_up on the ordinary client socket
				348	* wait queue. This connection is established whenever a write (or
				349	* poll for write) hit the flow control condition and broken when the
				350	* association to the server socket is dissolved or after a wake up
				351	* was relayed.
				352	*/
				353
				354	static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
				355	void *key)
				356	{
				357	struct unix_sock *u;
				358	wait_queue_head_t *u_sleep;
				359
				360	u = container_of(q, struct unix_sock, peer_wake);
				361
				362	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
				363	q);
				364	u->peer_wake.private = NULL;
				365
				366	/* relaying can only happen while the wq still exists */
				367	u_sleep = sk_sleep(&u->sk);
				368	if (u_sleep)
				369	wake_up_interruptible_poll(u_sleep, key_to_poll(key));
				370
				371	return 0;
				372	}
				373
				374	static int unix_dgram_peer_wake_connect(struct sock sk, struct sock other)
				375	{
				376	struct unix_sock u, u_other;
				377	int rc;
				378
				379	u = unix_sk(sk);
				380	u_other = unix_sk(other);
				381	rc = 0;
				382	spin_lock(&u_other->peer_wait.lock);
				383
				384	if (!u->peer_wake.private) {
				385	u->peer_wake.private = other;
				386	__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
				387
				388	rc = 1;
				389	}
				390
				391	spin_unlock(&u_other->peer_wait.lock);
				392	return rc;
				393	}
				394
				395	static void unix_dgram_peer_wake_disconnect(struct sock *sk,
				396	struct sock *other)
				397	{
				398	struct unix_sock u, u_other;
				399
				400	u = unix_sk(sk);
				401	u_other = unix_sk(other);
				402	spin_lock(&u_other->peer_wait.lock);
				403
				404	if (u->peer_wake.private == other) {
				405	__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
				406	u->peer_wake.private = NULL;
				407	}
				408
				409	spin_unlock(&u_other->peer_wait.lock);
				410	}
				411
				412	static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
				413	struct sock *other)
				414	{
				415	unix_dgram_peer_wake_disconnect(sk, other);
				416	wake_up_interruptible_poll(sk_sleep(sk),
				417	EPOLLOUT \|
				418	EPOLLWRNORM \|
				419	EPOLLWRBAND);
				420	}
				421
				422	/* preconditions:
				423	* - unix_peer(sk) == other
				424	* - association is stable
				425	*/
				426	static int unix_dgram_peer_wake_me(struct sock sk, struct sock other)
				427	{
				428	int connected;
				429
				430	connected = unix_dgram_peer_wake_connect(sk, other);
				431
				432	/* If other is SOCK_DEAD, we want to make sure we signal
				433	* POLLOUT, such that a subsequent write() can get a
				434	* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
				435	* to other and its full, we will hang waiting for POLLOUT.
				436	*/
				437	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
				438	return 1;
				439
				440	if (connected)
				441	unix_dgram_peer_wake_disconnect(sk, other);
				442
				443	return 0;
				444	}
				445
				446	static int unix_writable(const struct sock *sk, unsigned char state)
				447	{
				448	return state != TCP_LISTEN &&
				449	(refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
				450	}
				451
				452	static void unix_write_space(struct sock *sk)
				453	{
				454	struct socket_wq *wq;
				455
				456	rcu_read_lock();
				457	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
				458	wq = rcu_dereference(sk->sk_wq);
				459	if (skwq_has_sleeper(wq))
				460	wake_up_interruptible_sync_poll(&wq->wait,
				461	EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND);
				462	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
				463	}
				464	rcu_read_unlock();
				465	}
				466
				467	/* When dgram socket disconnects (or changes its peer), we clear its receive
				468	* queue of packets arrived from previous peer. First, it allows to do
				469	* flow control based only on wmem_alloc; second, sk connected to peer
				470	* may receive messages only from that peer. */
				471	static void unix_dgram_disconnected(struct sock sk, struct sock other)
				472	{
				473	if (!skb_queue_empty(&sk->sk_receive_queue)) {
				474	skb_queue_purge(&sk->sk_receive_queue);
				475	wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
				476
				477	/* If one link of bidirectional dgram pipe is disconnected,
				478	* we signal error. Messages are lost. Do not make this,
				479	* when peer was not connected to us.
				480	*/
				481	if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
				482	other->sk_err = ECONNRESET;
				483	other->sk_error_report(other);
				484	}
				485	}
				486	}
				487
				488	static void unix_sock_destructor(struct sock *sk)
				489	{
				490	struct unix_sock *u = unix_sk(sk);
				491
				492	skb_queue_purge(&sk->sk_receive_queue);
				493
				494	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
				495	WARN_ON(!sk_unhashed(sk));
				496	WARN_ON(sk->sk_socket);
				497	if (!sock_flag(sk, SOCK_DEAD)) {
				498	pr_info("Attempt to release alive unix socket: %p\n", sk);
				499	return;
				500	}
				501
				502	if (u->addr)
				503	unix_release_addr(u->addr);
				504
				505	atomic_long_dec(&unix_nr_socks);
				506	local_bh_disable();
				507	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				508	local_bh_enable();
				509	#ifdef UNIX_REFCNT_DEBUG
				510	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
				511	atomic_long_read(&unix_nr_socks));
				512	#endif
				513	}
				514
				515	static void unix_release_sock(struct sock *sk, int embrion)
				516	{
				517	struct unix_sock *u = unix_sk(sk);
				518	struct path path;
				519	struct sock *skpair;
				520	struct sk_buff *skb;
				521	int state;
				522
				523	unix_remove_socket(sk);
				524
				525	/* Clear state */
				526	unix_state_lock(sk);
				527	sock_orphan(sk);
				528	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
				529	path = u->path;
				530	u->path.dentry = NULL;
				531	u->path.mnt = NULL;
				532	state = sk->sk_state;
				533	sk->sk_state = TCP_CLOSE;
				534
				535	skpair = unix_peer(sk);
				536	unix_peer(sk) = NULL;
				537
				538	unix_state_unlock(sk);
				539
				540	wake_up_interruptible_all(&u->peer_wait);
				541
				542	if (skpair != NULL) {
				543	if (sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) {
				544	unix_state_lock(skpair);
				545	/* No more writes */
				546	WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
				547	if (!skb_queue_empty(&sk->sk_receive_queue) \|\| embrion)
				548	skpair->sk_err = ECONNRESET;
				549	unix_state_unlock(skpair);
				550	skpair->sk_state_change(skpair);
				551	sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
				552	}
				553
				554	unix_dgram_peer_wake_disconnect(sk, skpair);
				555	sock_put(skpair); /* It may now die */
				556	}
				557
				558	/* Try to flush out this socket. Throw out buffers at least */
				559
				560	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
				561	if (state == TCP_LISTEN)
				562	unix_release_sock(skb->sk, 1);
				563	/* passed fds are erased in the kfree_skb hook */
				564	UNIXCB(skb).consumed = skb->len;
				565	kfree_skb(skb);
				566	}
				567
				568	if (path.dentry)
				569	path_put(&path);
				570
				571	sock_put(sk);
				572
				573	/* ---- Socket is dead now and most probably destroyed ---- */
				574
				575	/*
				576	* Fixme: BSD difference: In BSD all sockets connected to us get
				577	* ECONNRESET and we die on the spot. In Linux we behave
				578	* like files and pipes do and wait for the last
				579	* dereference.
				580	*
				581	* Can't we simply set sock->err?
				582	*
				583	* What the above comment does talk about? --ANK(980817)
				584	*/
				585
				586	if (READ_ONCE(unix_tot_inflight))
				587	unix_gc(); /* Garbage collect fds */
				588	}
				589
				590	static void init_peercred(struct sock *sk)
				591	{
				592	const struct cred *old_cred;
				593	struct pid *old_pid;
				594
				595	spin_lock(&sk->sk_peer_lock);
				596	old_pid = sk->sk_peer_pid;
				597	old_cred = sk->sk_peer_cred;
				598	sk->sk_peer_pid = get_pid(task_tgid(current));
				599	sk->sk_peer_cred = get_current_cred();
				600	spin_unlock(&sk->sk_peer_lock);
				601
				602	put_pid(old_pid);
				603	put_cred(old_cred);
				604	}
				605
				606	static void copy_peercred(struct sock sk, struct sock peersk)
				607	{
				608	if (sk < peersk) {
				609	spin_lock(&sk->sk_peer_lock);
				610	spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
				611	} else {
				612	spin_lock(&peersk->sk_peer_lock);
				613	spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
				614	}
				615
				616	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
				617	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
				618
				619	spin_unlock(&sk->sk_peer_lock);
				620	spin_unlock(&peersk->sk_peer_lock);
				621	}
				622
				623	static int unix_listen(struct socket *sock, int backlog)
				624	{
				625	int err;
				626	struct sock *sk = sock->sk;
				627	struct unix_sock *u = unix_sk(sk);
				628	struct pid *old_pid = NULL;
				629
				630	err = -EOPNOTSUPP;
				631	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				632	goto out; /* Only stream/seqpacket sockets accept */
				633	err = -EINVAL;
				634	if (!u->addr)
				635	goto out; /* No listens on an unbound socket */
				636	unix_state_lock(sk);
				637	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
				638	goto out_unlock;
				639	if (backlog > sk->sk_max_ack_backlog)
				640	wake_up_interruptible_all(&u->peer_wait);
				641	sk->sk_max_ack_backlog = backlog;
				642	sk->sk_state = TCP_LISTEN;
				643	/* set credentials so connect can copy them */
				644	init_peercred(sk);
				645	err = 0;
				646
				647	out_unlock:
				648	unix_state_unlock(sk);
				649	put_pid(old_pid);
				650	out:
				651	return err;
				652	}
				653
				654	static int unix_release(struct socket *);
				655	static int unix_bind(struct socket , struct sockaddr , int);
				656	static int unix_stream_connect(struct socket , struct sockaddr ,
				657	int addr_len, int flags);
				658	static int unix_socketpair(struct socket , struct socket );
				659	static int unix_accept(struct socket , struct socket , int, bool);
				660	static int unix_getname(struct socket , struct sockaddr , int);
				661	static __poll_t unix_poll(struct file , struct socket , poll_table *);
				662	static __poll_t unix_dgram_poll(struct file , struct socket ,
				663	poll_table *);
				664	static int unix_ioctl(struct socket *, unsigned int, unsigned long);
				665	#ifdef CONFIG_COMPAT
				666	static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
				667	#endif
				668	static int unix_shutdown(struct socket *, int);
				669	static int unix_stream_sendmsg(struct socket , struct msghdr , size_t);
				670	static int unix_stream_recvmsg(struct socket , struct msghdr , size_t, int);
				671	static ssize_t unix_stream_sendpage(struct socket , struct page , int offset,
				672	size_t size, int flags);
				673	static ssize_t unix_stream_splice_read(struct socket , loff_t ppos,
				674	struct pipe_inode_info *, size_t size,
				675	unsigned int flags);
				676	static int unix_dgram_sendmsg(struct socket , struct msghdr , size_t);
				677	static int unix_dgram_recvmsg(struct socket , struct msghdr , size_t, int);
				678	static int unix_dgram_connect(struct socket , struct sockaddr ,
				679	int, int);
				680	static int unix_seqpacket_sendmsg(struct socket , struct msghdr , size_t);
				681	static int unix_seqpacket_recvmsg(struct socket , struct msghdr , size_t,
				682	int);
				683
				684	static int unix_set_peek_off(struct sock *sk, int val)
				685	{
				686	struct unix_sock *u = unix_sk(sk);
				687
				688	if (mutex_lock_interruptible(&u->iolock))
				689	return -EINTR;
				690
				691	WRITE_ONCE(sk->sk_peek_off, val);
				692	mutex_unlock(&u->iolock);
				693
				694	return 0;
				695	}
				696
				697
				698	static const struct proto_ops unix_stream_ops = {
				699	.family = PF_UNIX,
				700	.owner = THIS_MODULE,
				701	.release = unix_release,
				702	.bind = unix_bind,
				703	.connect = unix_stream_connect,
				704	.socketpair = unix_socketpair,
				705	.accept = unix_accept,
				706	.getname = unix_getname,
				707	.poll = unix_poll,
				708	.ioctl = unix_ioctl,
				709	#ifdef CONFIG_COMPAT
				710	.compat_ioctl = unix_compat_ioctl,
				711	#endif
				712	.listen = unix_listen,
				713	.shutdown = unix_shutdown,
				714	.setsockopt = sock_no_setsockopt,
				715	.getsockopt = sock_no_getsockopt,
				716	.sendmsg = unix_stream_sendmsg,
				717	.recvmsg = unix_stream_recvmsg,
				718	.mmap = sock_no_mmap,
				719	.sendpage = unix_stream_sendpage,
				720	.splice_read = unix_stream_splice_read,
				721	.set_peek_off = unix_set_peek_off,
				722	};
				723
				724	static const struct proto_ops unix_dgram_ops = {
				725	.family = PF_UNIX,
				726	.owner = THIS_MODULE,
				727	.release = unix_release,
				728	.bind = unix_bind,
				729	.connect = unix_dgram_connect,
				730	.socketpair = unix_socketpair,
				731	.accept = sock_no_accept,
				732	.getname = unix_getname,
				733	.poll = unix_dgram_poll,
				734	.ioctl = unix_ioctl,
				735	#ifdef CONFIG_COMPAT
				736	.compat_ioctl = unix_compat_ioctl,
				737	#endif
				738	.listen = sock_no_listen,
				739	.shutdown = unix_shutdown,
				740	.setsockopt = sock_no_setsockopt,
				741	.getsockopt = sock_no_getsockopt,
				742	.sendmsg = unix_dgram_sendmsg,
				743	.recvmsg = unix_dgram_recvmsg,
				744	.mmap = sock_no_mmap,
				745	.sendpage = sock_no_sendpage,
				746	.set_peek_off = unix_set_peek_off,
				747	};
				748
				749	static const struct proto_ops unix_seqpacket_ops = {
				750	.family = PF_UNIX,
				751	.owner = THIS_MODULE,
				752	.release = unix_release,
				753	.bind = unix_bind,
				754	.connect = unix_stream_connect,
				755	.socketpair = unix_socketpair,
				756	.accept = unix_accept,
				757	.getname = unix_getname,
				758	.poll = unix_dgram_poll,
				759	.ioctl = unix_ioctl,
				760	#ifdef CONFIG_COMPAT
				761	.compat_ioctl = unix_compat_ioctl,
				762	#endif
				763	.listen = unix_listen,
				764	.shutdown = unix_shutdown,
				765	.setsockopt = sock_no_setsockopt,
				766	.getsockopt = sock_no_getsockopt,
				767	.sendmsg = unix_seqpacket_sendmsg,
				768	.recvmsg = unix_seqpacket_recvmsg,
				769	.mmap = sock_no_mmap,
				770	.sendpage = sock_no_sendpage,
				771	.set_peek_off = unix_set_peek_off,
				772	};
				773
				774	static struct proto unix_proto = {
				775	.name = "UNIX",
				776	.owner = THIS_MODULE,
				777	.obj_size = sizeof(struct unix_sock),
				778	};
				779
				780	static struct sock unix_create1(struct net net, struct socket *sock, int kern)
				781	{
				782	struct sock *sk = NULL;
				783	struct unix_sock *u;
				784
				785	atomic_long_inc(&unix_nr_socks);
				786	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
				787	goto out;
				788
				789	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
				790	if (!sk)
				791	goto out;
				792
				793	sock_init_data(sock, sk);
				794
				795	sk->sk_allocation = GFP_KERNEL_ACCOUNT;
				796	sk->sk_write_space = unix_write_space;
				797	sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
				798	sk->sk_destruct = unix_sock_destructor;
				799	u = unix_sk(sk);
				800	u->inflight = 0;
				801	u->path.dentry = NULL;
				802	u->path.mnt = NULL;
				803	spin_lock_init(&u->lock);
				804	INIT_LIST_HEAD(&u->link);
				805	mutex_init(&u->iolock); /* single task reading lock */
				806	mutex_init(&u->bindlock); /* single task binding lock */
				807	init_waitqueue_head(&u->peer_wait);
				808	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
				809	unix_insert_socket(unix_sockets_unbound(sk), sk);
				810	out:
				811	if (sk == NULL)
				812	atomic_long_dec(&unix_nr_socks);
				813	else {
				814	local_bh_disable();
				815	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				816	local_bh_enable();
				817	}
				818	return sk;
				819	}
				820
				821	static int unix_create(struct net net, struct socket sock, int protocol,
				822	int kern)
				823	{
				824	if (protocol && protocol != PF_UNIX)
				825	return -EPROTONOSUPPORT;
				826
				827	sock->state = SS_UNCONNECTED;
				828
				829	switch (sock->type) {
				830	case SOCK_STREAM:
				831	sock->ops = &unix_stream_ops;
				832	break;
				833	/*
				834	* Believe it or not BSD has AF_UNIX, SOCK_RAW though
				835	* nothing uses it.
				836	*/
				837	case SOCK_RAW:
				838	sock->type = SOCK_DGRAM;
				839	/* fall through */
				840	case SOCK_DGRAM:
				841	sock->ops = &unix_dgram_ops;
				842	break;
				843	case SOCK_SEQPACKET:
				844	sock->ops = &unix_seqpacket_ops;
				845	break;
				846	default:
				847	return -ESOCKTNOSUPPORT;
				848	}
				849
				850	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
				851	}
				852
				853	static int unix_release(struct socket *sock)
				854	{
				855	struct sock *sk = sock->sk;
				856
				857	if (!sk)
				858	return 0;
				859
				860	unix_release_sock(sk, 0);
				861	sock->sk = NULL;
				862
				863	return 0;
				864	}
				865
				866	static int unix_autobind(struct socket *sock)
				867	{
				868	struct sock *sk = sock->sk;
				869	struct net *net = sock_net(sk);
				870	struct unix_sock *u = unix_sk(sk);
				871	static u32 ordernum = 1;
				872	struct unix_address *addr;
				873	int err;
				874	unsigned int retries = 0;
				875
				876	err = mutex_lock_interruptible(&u->bindlock);
				877	if (err)
				878	return err;
				879
				880	err = 0;
				881	if (u->addr)
				882	goto out;
				883
				884	err = -ENOMEM;
				885	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
				886	if (!addr)
				887	goto out;
				888
				889	addr->name->sun_family = AF_UNIX;
				890	refcount_set(&addr->refcnt, 1);
				891
				892	retry:
				893	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
				894	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
				895
				896	spin_lock(&unix_table_lock);
				897	ordernum = (ordernum+1)&0xFFFFF;
				898
				899	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
				900	addr->hash)) {
				901	spin_unlock(&unix_table_lock);
				902	/*
				903	* __unix_find_socket_byname() may take long time if many names
				904	* are already in use.
				905	*/
				906	cond_resched();
				907	/* Give up if all names seems to be in use. */
				908	if (retries++ == 0xFFFFF) {
				909	err = -ENOSPC;
				910	kfree(addr);
				911	goto out;
				912	}
				913	goto retry;
				914	}
				915	addr->hash ^= sk->sk_type;
				916
				917	__unix_remove_socket(sk);
				918	smp_store_release(&u->addr, addr);
				919	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
				920	spin_unlock(&unix_table_lock);
				921	err = 0;
				922
				923	out: mutex_unlock(&u->bindlock);
				924	return err;
				925	}
				926
				927	static struct sock unix_find_other(struct net net,
				928	struct sockaddr_un *sunname, int len,
				929	int type, unsigned int hash, int *error)
				930	{
				931	struct sock *u;
				932	struct path path;
				933	int err = 0;
				934
				935	if (sunname->sun_path[0]) {
				936	struct inode *inode;
				937	err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
				938	if (err)
				939	goto fail;
				940	inode = d_backing_inode(path.dentry);
				941	err = inode_permission(inode, MAY_WRITE);
				942	if (err)
				943	goto put_fail;
				944
				945	err = -ECONNREFUSED;
				946	if (!S_ISSOCK(inode->i_mode))
				947	goto put_fail;
				948	u = unix_find_socket_byinode(inode);
				949	if (!u)
				950	goto put_fail;
				951
				952	if (u->sk_type == type)
				953	touch_atime(&path);
				954
				955	path_put(&path);
				956
				957	err = -EPROTOTYPE;
				958	if (u->sk_type != type) {
				959	sock_put(u);
				960	goto fail;
				961	}
				962	} else {
				963	err = -ECONNREFUSED;
				964	u = unix_find_socket_byname(net, sunname, len, type, hash);
				965	if (u) {
				966	struct dentry *dentry;
				967	dentry = unix_sk(u)->path.dentry;
				968	if (dentry)
				969	touch_atime(&unix_sk(u)->path);
				970	} else
				971	goto fail;
				972	}
				973	return u;
				974
				975	put_fail:
				976	path_put(&path);
				977	fail:
				978	*error = err;
				979	return NULL;
				980	}
				981
				982	static int unix_mknod(const char sun_path, umode_t mode, struct path res)
				983	{
				984	struct dentry *dentry;
				985	struct path path;
				986	int err = 0;
				987	/*
				988	* Get the parent directory, calculate the hash for last
				989	* component.
				990	*/
				991	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
				992	err = PTR_ERR(dentry);
				993	if (IS_ERR(dentry))
				994	return err;
				995
				996	/*
				997	* All right, let's create it.
				998	*/
				999	err = security_path_mknod(&path, dentry, mode, 0);
				1000	if (!err) {
				1001	err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
				1002	if (!err) {
				1003	res->mnt = mntget(path.mnt);
				1004	res->dentry = dget(dentry);
				1005	}
				1006	}
				1007	done_path_create(&path, dentry);
				1008	return err;
				1009	}
				1010
				1011	static int unix_bind(struct socket sock, struct sockaddr uaddr, int addr_len)
				1012	{
				1013	struct sock *sk = sock->sk;
				1014	struct net *net = sock_net(sk);
				1015	struct unix_sock *u = unix_sk(sk);
				1016	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1017	char *sun_path = sunaddr->sun_path;
				1018	int err;
				1019	unsigned int hash;
				1020	struct unix_address *addr;
				1021	struct hlist_head *list;
				1022	struct path path = { };
				1023
				1024	err = -EINVAL;
				1025	if (addr_len < offsetofend(struct sockaddr_un, sun_family) \|\|
				1026	sunaddr->sun_family != AF_UNIX)
				1027	goto out;
				1028
				1029	if (addr_len == sizeof(short)) {
				1030	err = unix_autobind(sock);
				1031	goto out;
				1032	}
				1033
				1034	err = unix_mkname(sunaddr, addr_len, &hash);
				1035	if (err < 0)
				1036	goto out;
				1037	addr_len = err;
				1038
				1039	if (sun_path[0]) {
				1040	umode_t mode = S_IFSOCK \|
				1041	(SOCK_INODE(sock)->i_mode & ~current_umask());
				1042	err = unix_mknod(sun_path, mode, &path);
				1043	if (err) {
				1044	if (err == -EEXIST)
				1045	err = -EADDRINUSE;
				1046	goto out;
				1047	}
				1048	}
				1049
				1050	err = mutex_lock_interruptible(&u->bindlock);
				1051	if (err)
				1052	goto out_put;
				1053
				1054	err = -EINVAL;
				1055	if (u->addr)
				1056	goto out_up;
				1057
				1058	err = -ENOMEM;
				1059	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
				1060	if (!addr)
				1061	goto out_up;
				1062
				1063	memcpy(addr->name, sunaddr, addr_len);
				1064	addr->len = addr_len;
				1065	addr->hash = hash ^ sk->sk_type;
				1066	refcount_set(&addr->refcnt, 1);
				1067
				1068	if (sun_path[0]) {
				1069	addr->hash = UNIX_HASH_SIZE;
				1070	hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
				1071	spin_lock(&unix_table_lock);
				1072	u->path = path;
				1073	list = &unix_socket_table[hash];
				1074	} else {
				1075	spin_lock(&unix_table_lock);
				1076	err = -EADDRINUSE;
				1077	if (__unix_find_socket_byname(net, sunaddr, addr_len,
				1078	sk->sk_type, hash)) {
				1079	unix_release_addr(addr);
				1080	goto out_unlock;
				1081	}
				1082
				1083	list = &unix_socket_table[addr->hash];
				1084	}
				1085
				1086	err = 0;
				1087	__unix_remove_socket(sk);
				1088	smp_store_release(&u->addr, addr);
				1089	__unix_insert_socket(list, sk);
				1090
				1091	out_unlock:
				1092	spin_unlock(&unix_table_lock);
				1093	out_up:
				1094	mutex_unlock(&u->bindlock);
				1095	out_put:
				1096	if (err)
				1097	path_put(&path);
				1098	out:
				1099	return err;
				1100	}
				1101
				1102	static void unix_state_double_lock(struct sock sk1, struct sock sk2)
				1103	{
				1104	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1105	unix_state_lock(sk1);
				1106	return;
				1107	}
				1108	if (sk1 > sk2)
				1109	swap(sk1, sk2);
				1110
				1111	unix_state_lock(sk1);
				1112	unix_state_lock_nested(sk2, U_LOCK_SECOND);
				1113	}
				1114
				1115	static void unix_state_double_unlock(struct sock sk1, struct sock sk2)
				1116	{
				1117	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1118	unix_state_unlock(sk1);
				1119	return;
				1120	}
				1121	unix_state_unlock(sk1);
				1122	unix_state_unlock(sk2);
				1123	}
				1124
				1125	static int unix_dgram_connect(struct socket sock, struct sockaddr addr,
				1126	int alen, int flags)
				1127	{
				1128	struct sock *sk = sock->sk;
				1129	struct net *net = sock_net(sk);
				1130	struct sockaddr_un sunaddr = (struct sockaddr_un )addr;
				1131	struct sock *other;
				1132	unsigned int hash;
				1133	int err;
				1134
				1135	err = -EINVAL;
				1136	if (alen < offsetofend(struct sockaddr, sa_family))
				1137	goto out;
				1138
				1139	if (addr->sa_family != AF_UNSPEC) {
				1140	err = unix_mkname(sunaddr, alen, &hash);
				1141	if (err < 0)
				1142	goto out;
				1143	alen = err;
				1144
				1145	if (test_bit(SOCK_PASSCRED, &sock->flags) &&
				1146	!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
				1147	goto out;
				1148
				1149	restart:
				1150	other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
				1151	if (!other)
				1152	goto out;
				1153
				1154	unix_state_double_lock(sk, other);
				1155
				1156	/* Apparently VFS overslept socket death. Retry. */
				1157	if (sock_flag(other, SOCK_DEAD)) {
				1158	unix_state_double_unlock(sk, other);
				1159	sock_put(other);
				1160	goto restart;
				1161	}
				1162
				1163	err = -EPERM;
				1164	if (!unix_may_send(sk, other))
				1165	goto out_unlock;
				1166
				1167	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1168	if (err)
				1169	goto out_unlock;
				1170
				1171	} else {
				1172	/*
				1173	* 1003.1g breaking connected state with AF_UNSPEC
				1174	*/
				1175	other = NULL;
				1176	unix_state_double_lock(sk, other);
				1177	}
				1178
				1179	/*
				1180	* If it was connected, reconnect.
				1181	*/
				1182	if (unix_peer(sk)) {
				1183	struct sock *old_peer = unix_peer(sk);
				1184	unix_peer(sk) = other;
				1185	unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
				1186
				1187	unix_state_double_unlock(sk, other);
				1188
				1189	if (other != old_peer)
				1190	unix_dgram_disconnected(sk, old_peer);
				1191	sock_put(old_peer);
				1192	} else {
				1193	unix_peer(sk) = other;
				1194	unix_state_double_unlock(sk, other);
				1195	}
				1196	return 0;
				1197
				1198	out_unlock:
				1199	unix_state_double_unlock(sk, other);
				1200	sock_put(other);
				1201	out:
				1202	return err;
				1203	}
				1204
				1205	static long unix_wait_for_peer(struct sock *other, long timeo)
				1206	{
				1207	struct unix_sock *u = unix_sk(other);
				1208	int sched;
				1209	DEFINE_WAIT(wait);
				1210
				1211	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
				1212
				1213	sched = !sock_flag(other, SOCK_DEAD) &&
				1214	!(other->sk_shutdown & RCV_SHUTDOWN) &&
				1215	unix_recvq_full_lockless(other);
				1216
				1217	unix_state_unlock(other);
				1218
				1219	if (sched)
				1220	timeo = schedule_timeout(timeo);
				1221
				1222	finish_wait(&u->peer_wait, &wait);
				1223	return timeo;
				1224	}
				1225
				1226	static int unix_stream_connect(struct socket sock, struct sockaddr uaddr,
				1227	int addr_len, int flags)
				1228	{
				1229	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1230	struct sock *sk = sock->sk;
				1231	struct net *net = sock_net(sk);
				1232	struct unix_sock u = unix_sk(sk), newu, *otheru;
				1233	struct sock *newsk = NULL;
				1234	struct sock *other = NULL;
				1235	struct sk_buff *skb = NULL;
				1236	unsigned int hash;
				1237	int st;
				1238	int err;
				1239	long timeo;
				1240
				1241	err = unix_mkname(sunaddr, addr_len, &hash);
				1242	if (err < 0)
				1243	goto out;
				1244	addr_len = err;
				1245
				1246	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
				1247	(err = unix_autobind(sock)) != 0)
				1248	goto out;
				1249
				1250	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
				1251
				1252	/* First of all allocate resources.
				1253	If we will make it after state is locked,
				1254	we will have to recheck all again in any case.
				1255	*/
				1256
				1257	err = -ENOMEM;
				1258
				1259	/* create new sock for complete connection */
				1260	newsk = unix_create1(sock_net(sk), NULL, 0);
				1261	if (newsk == NULL)
				1262	goto out;
				1263
				1264	/* Allocate skb for sending to listening sock */
				1265	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
				1266	if (skb == NULL)
				1267	goto out;
				1268
				1269	restart:
				1270	/* Find listening sock. */
				1271	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
				1272	if (!other)
				1273	goto out;
				1274
				1275	/* Latch state of peer */
				1276	unix_state_lock(other);
				1277
				1278	/* Apparently VFS overslept socket death. Retry. */
				1279	if (sock_flag(other, SOCK_DEAD)) {
				1280	unix_state_unlock(other);
				1281	sock_put(other);
				1282	goto restart;
				1283	}
				1284
				1285	err = -ECONNREFUSED;
				1286	if (other->sk_state != TCP_LISTEN)
				1287	goto out_unlock;
				1288	if (other->sk_shutdown & RCV_SHUTDOWN)
				1289	goto out_unlock;
				1290
				1291	if (unix_recvq_full_lockless(other)) {
				1292	err = -EAGAIN;
				1293	if (!timeo)
				1294	goto out_unlock;
				1295
				1296	timeo = unix_wait_for_peer(other, timeo);
				1297
				1298	err = sock_intr_errno(timeo);
				1299	if (signal_pending(current))
				1300	goto out;
				1301	sock_put(other);
				1302	goto restart;
				1303	}
				1304
				1305	/* Latch our state.
				1306
				1307	It is tricky place. We need to grab our state lock and cannot
				1308	drop lock on peer. It is dangerous because deadlock is
				1309	possible. Connect to self case and simultaneous
				1310	attempt to connect are eliminated by checking socket
				1311	state. other is TCP_LISTEN, if sk is TCP_LISTEN we
				1312	check this before attempt to grab lock.
				1313
				1314	Well, and we have to recheck the state after socket locked.
				1315	*/
				1316	st = sk->sk_state;
				1317
				1318	switch (st) {
				1319	case TCP_CLOSE:
				1320	/* This is ok... continue with connect */
				1321	break;
				1322	case TCP_ESTABLISHED:
				1323	/* Socket is already connected */
				1324	err = -EISCONN;
				1325	goto out_unlock;
				1326	default:
				1327	err = -EINVAL;
				1328	goto out_unlock;
				1329	}
				1330
				1331	unix_state_lock_nested(sk, U_LOCK_SECOND);
				1332
				1333	if (sk->sk_state != st) {
				1334	unix_state_unlock(sk);
				1335	unix_state_unlock(other);
				1336	sock_put(other);
				1337	goto restart;
				1338	}
				1339
				1340	err = security_unix_stream_connect(sk, other, newsk);
				1341	if (err) {
				1342	unix_state_unlock(sk);
				1343	goto out_unlock;
				1344	}
				1345
				1346	/* The way is open! Fastly set all the necessary fields... */
				1347
				1348	sock_hold(sk);
				1349	unix_peer(newsk) = sk;
				1350	newsk->sk_state = TCP_ESTABLISHED;
				1351	newsk->sk_type = sk->sk_type;
				1352	init_peercred(newsk);
				1353	newu = unix_sk(newsk);
				1354	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
				1355	otheru = unix_sk(other);
				1356
				1357	/* copy address information from listening to new sock
				1358	*
				1359	* The contents of *(otheru->addr) and otheru->path
				1360	* are seen fully set up here, since we have found
				1361	* otheru in hash under unix_table_lock. Insertion
				1362	* into the hash chain we'd found it in had been done
				1363	* in an earlier critical area protected by unix_table_lock,
				1364	* the same one where we'd set *(otheru->addr) contents,
				1365	* as well as otheru->path and otheru->addr itself.
				1366	*
				1367	* Using smp_store_release() here to set newu->addr
				1368	* is enough to make those stores, as well as stores
				1369	* to newu->path visible to anyone who gets newu->addr
				1370	* by smp_load_acquire(). IOW, the same warranties
				1371	* as for unix_sock instances bound in unix_bind() or
				1372	* in unix_autobind().
				1373	*/
				1374	if (otheru->path.dentry) {
				1375	path_get(&otheru->path);
				1376	newu->path = otheru->path;
				1377	}
				1378	refcount_inc(&otheru->addr->refcnt);
				1379	smp_store_release(&newu->addr, otheru->addr);
				1380
				1381	/* Set credentials */
				1382	copy_peercred(sk, other);
				1383
				1384	sock->state = SS_CONNECTED;
				1385	sk->sk_state = TCP_ESTABLISHED;
				1386	sock_hold(newsk);
				1387
				1388	smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
				1389	unix_peer(sk) = newsk;
				1390
				1391	unix_state_unlock(sk);
				1392
				1393	/* take ten and and send info to listening sock */
				1394	spin_lock(&other->sk_receive_queue.lock);
				1395	__skb_queue_tail(&other->sk_receive_queue, skb);
				1396	spin_unlock(&other->sk_receive_queue.lock);
				1397	unix_state_unlock(other);
				1398	other->sk_data_ready(other);
				1399	sock_put(other);
				1400	return 0;
				1401
				1402	out_unlock:
				1403	if (other)
				1404	unix_state_unlock(other);
				1405
				1406	out:
				1407	kfree_skb(skb);
				1408	if (newsk)
				1409	unix_release_sock(newsk, 0);
				1410	if (other)
				1411	sock_put(other);
				1412	return err;
				1413	}
				1414
				1415	static int unix_socketpair(struct socket socka, struct socket sockb)
				1416	{
				1417	struct sock ska = socka->sk, skb = sockb->sk;
				1418
				1419	/* Join our sockets back to back */
				1420	sock_hold(ska);
				1421	sock_hold(skb);
				1422	unix_peer(ska) = skb;
				1423	unix_peer(skb) = ska;
				1424	init_peercred(ska);
				1425	init_peercred(skb);
				1426
				1427	if (ska->sk_type != SOCK_DGRAM) {
				1428	ska->sk_state = TCP_ESTABLISHED;
				1429	skb->sk_state = TCP_ESTABLISHED;
				1430	socka->state = SS_CONNECTED;
				1431	sockb->state = SS_CONNECTED;
				1432	}
				1433	return 0;
				1434	}
				1435
				1436	static void unix_sock_inherit_flags(const struct socket *old,
				1437	struct socket *new)
				1438	{
				1439	if (test_bit(SOCK_PASSCRED, &old->flags))
				1440	set_bit(SOCK_PASSCRED, &new->flags);
				1441	if (test_bit(SOCK_PASSSEC, &old->flags))
				1442	set_bit(SOCK_PASSSEC, &new->flags);
				1443	}
				1444
				1445	static int unix_accept(struct socket sock, struct socket newsock, int flags,
				1446	bool kern)
				1447	{
				1448	struct sock *sk = sock->sk;
				1449	struct sock *tsk;
				1450	struct sk_buff *skb;
				1451	int err;
				1452
				1453	err = -EOPNOTSUPP;
				1454	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				1455	goto out;
				1456
				1457	err = -EINVAL;
				1458	if (sk->sk_state != TCP_LISTEN)
				1459	goto out;
				1460
				1461	/* If socket state is TCP_LISTEN it cannot change (for now...),
				1462	* so that no locks are necessary.
				1463	*/
				1464
				1465	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
				1466	if (!skb) {
				1467	/* This means receive shutdown. */
				1468	if (err == 0)
				1469	err = -EINVAL;
				1470	goto out;
				1471	}
				1472
				1473	tsk = skb->sk;
				1474	skb_free_datagram(sk, skb);
				1475	wake_up_interruptible(&unix_sk(sk)->peer_wait);
				1476
				1477	/* attach accepted sock to socket */
				1478	unix_state_lock(tsk);
				1479	newsock->state = SS_CONNECTED;
				1480	unix_sock_inherit_flags(sock, newsock);
				1481	sock_graft(tsk, newsock);
				1482	unix_state_unlock(tsk);
				1483	return 0;
				1484
				1485	out:
				1486	return err;
				1487	}
				1488
				1489
				1490	static int unix_getname(struct socket sock, struct sockaddr uaddr, int peer)
				1491	{
				1492	struct sock *sk = sock->sk;
				1493	struct unix_address *addr;
				1494	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
				1495	int err = 0;
				1496
				1497	if (peer) {
				1498	sk = unix_peer_get(sk);
				1499
				1500	err = -ENOTCONN;
				1501	if (!sk)
				1502	goto out;
				1503	err = 0;
				1504	} else {
				1505	sock_hold(sk);
				1506	}
				1507
				1508	addr = smp_load_acquire(&unix_sk(sk)->addr);
				1509	if (!addr) {
				1510	sunaddr->sun_family = AF_UNIX;
				1511	sunaddr->sun_path[0] = 0;
				1512	err = sizeof(short);
				1513	} else {
				1514	err = addr->len;
				1515	memcpy(sunaddr, addr->name, addr->len);
				1516	}
				1517	sock_put(sk);
				1518	out:
				1519	return err;
				1520	}
				1521
				1522	static void unix_peek_fds(struct scm_cookie scm, struct sk_buff skb)
				1523	{
				1524	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
				1525
				1526	/*
				1527	* Garbage collection of unix sockets starts by selecting a set of
				1528	* candidate sockets which have reference only from being in flight
				1529	* (total_refs == inflight_refs). This condition is checked once during
				1530	* the candidate collection phase, and candidates are marked as such, so
				1531	* that non-candidates can later be ignored. While inflight_refs is
				1532	* protected by unix_gc_lock, total_refs (file count) is not, hence this
				1533	* is an instantaneous decision.
				1534	*
				1535	* Once a candidate, however, the socket must not be reinstalled into a
				1536	* file descriptor while the garbage collection is in progress.
				1537	*
				1538	* If the above conditions are met, then the directed graph of
				1539	* candidates (*) does not change while unix_gc_lock is held.
				1540	*
				1541	* Any operations that changes the file count through file descriptors
				1542	* (dup, close, sendmsg) does not change the graph since candidates are
				1543	* not installed in fds.
				1544	*
				1545	* Dequeing a candidate via recvmsg would install it into an fd, but
				1546	* that takes unix_gc_lock to decrement the inflight count, so it's
				1547	* serialized with garbage collection.
				1548	*
				1549	* MSG_PEEK is special in that it does not change the inflight count,
				1550	* yet does install the socket into an fd. The following lock/unlock
				1551	* pair is to ensure serialization with garbage collection. It must be
				1552	* done between incrementing the file count and installing the file into
				1553	* an fd.
				1554	*
				1555	* If garbage collection starts after the barrier provided by the
				1556	* lock/unlock, then it will see the elevated refcount and not mark this
				1557	* as a candidate. If a garbage collection is already in progress
				1558	* before the file count was incremented, then the lock/unlock pair will
				1559	* ensure that garbage collection is finished before progressing to
				1560	* installing the fd.
				1561	*
				1562	* (*) A -> B where B is on the queue of A or B is on the queue of C
				1563	* which is on the queue of listening socket A.
				1564	*/
				1565	spin_lock(&unix_gc_lock);
				1566	spin_unlock(&unix_gc_lock);
				1567	}
				1568
				1569	static int unix_scm_to_skb(struct scm_cookie scm, struct sk_buff skb, bool send_fds)
				1570	{
				1571	int err = 0;
				1572
				1573	UNIXCB(skb).pid = get_pid(scm->pid);
				1574	UNIXCB(skb).uid = scm->creds.uid;
				1575	UNIXCB(skb).gid = scm->creds.gid;
				1576	UNIXCB(skb).fp = NULL;
				1577	unix_get_secdata(scm, skb);
				1578	if (scm->fp && send_fds)
				1579	err = unix_attach_fds(scm, skb);
				1580
				1581	skb->destructor = unix_destruct_scm;
				1582	return err;
				1583	}
				1584
				1585	static bool unix_passcred_enabled(const struct socket *sock,
				1586	const struct sock *other)
				1587	{
				1588	return test_bit(SOCK_PASSCRED, &sock->flags) \|\|
				1589	!other->sk_socket \|\|
				1590	test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
				1591	}
				1592
				1593	/*
				1594	* Some apps rely on write() giving SCM_CREDENTIALS
				1595	* We include credentials if source or destination socket
				1596	* asserted SOCK_PASSCRED.
				1597	*/
				1598	static void maybe_add_creds(struct sk_buff skb, const struct socket sock,
				1599	const struct sock *other)
				1600	{
				1601	if (UNIXCB(skb).pid)
				1602	return;
				1603	if (unix_passcred_enabled(sock, other)) {
				1604	UNIXCB(skb).pid = get_pid(task_tgid(current));
				1605	current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
				1606	}
				1607	}
				1608
				1609	static int maybe_init_creds(struct scm_cookie *scm,
				1610	struct socket *socket,
				1611	const struct sock *other)
				1612	{
				1613	int err;
				1614	struct msghdr msg = { .msg_controllen = 0 };
				1615
				1616	err = scm_send(socket, &msg, scm, false);
				1617	if (err)
				1618	return err;
				1619
				1620	if (unix_passcred_enabled(socket, other)) {
				1621	scm->pid = get_pid(task_tgid(current));
				1622	current_uid_gid(&scm->creds.uid, &scm->creds.gid);
				1623	}
				1624	return err;
				1625	}
				1626
				1627	static bool unix_skb_scm_eq(struct sk_buff *skb,
				1628	struct scm_cookie *scm)
				1629	{
				1630	const struct unix_skb_parms *u = &UNIXCB(skb);
				1631
				1632	return u->pid == scm->pid &&
				1633	uid_eq(u->uid, scm->creds.uid) &&
				1634	gid_eq(u->gid, scm->creds.gid) &&
				1635	unix_secdata_eq(scm, skb);
				1636	}
				1637
				1638	/*
				1639	* Send AF_UNIX data.
				1640	*/
				1641
				1642	static int unix_dgram_sendmsg(struct socket sock, struct msghdr msg,
				1643	size_t len)
				1644	{
				1645	struct sock *sk = sock->sk;
				1646	struct net *net = sock_net(sk);
				1647	struct unix_sock *u = unix_sk(sk);
				1648	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
				1649	struct sock *other = NULL;
				1650	int namelen = 0; /* fake GCC */
				1651	int err;
				1652	unsigned int hash;
				1653	struct sk_buff *skb;
				1654	long timeo;
				1655	struct scm_cookie scm;
				1656	int data_len = 0;
				1657	int sk_locked;
				1658
				1659	wait_for_unix_gc();
				1660	err = scm_send(sock, msg, &scm, false);
				1661	if (err < 0)
				1662	return err;
				1663
				1664	err = -EOPNOTSUPP;
				1665	if (msg->msg_flags&MSG_OOB)
				1666	goto out;
				1667
				1668	if (msg->msg_namelen) {
				1669	err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
				1670	if (err < 0)
				1671	goto out;
				1672	namelen = err;
				1673	} else {
				1674	sunaddr = NULL;
				1675	err = -ENOTCONN;
				1676	other = unix_peer_get(sk);
				1677	if (!other)
				1678	goto out;
				1679	}
				1680
				1681	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
				1682	&& (err = unix_autobind(sock)) != 0)
				1683	goto out;
				1684
				1685	err = -EMSGSIZE;
				1686	if (len > sk->sk_sndbuf - 32)
				1687	goto out;
				1688
				1689	if (len > SKB_MAX_ALLOC) {
				1690	data_len = min_t(size_t,
				1691	len - SKB_MAX_ALLOC,
				1692	MAX_SKB_FRAGS * PAGE_SIZE);
				1693	data_len = PAGE_ALIGN(data_len);
				1694
				1695	BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
				1696	}
				1697
				1698	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
				1699	msg->msg_flags & MSG_DONTWAIT, &err,
				1700	PAGE_ALLOC_COSTLY_ORDER);
				1701	if (skb == NULL)
				1702	goto out;
				1703
				1704	err = unix_scm_to_skb(&scm, skb, true);
				1705	if (err < 0)
				1706	goto out_free;
				1707
				1708	skb_put(skb, len - data_len);
				1709	skb->data_len = data_len;
				1710	skb->len = len;
				1711	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
				1712	if (err)
				1713	goto out_free;
				1714
				1715	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
				1716
				1717	restart:
				1718	if (!other) {
				1719	err = -ECONNRESET;
				1720	if (sunaddr == NULL)
				1721	goto out_free;
				1722
				1723	other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
				1724	hash, &err);
				1725	if (other == NULL)
				1726	goto out_free;
				1727	}
				1728
				1729	if (sk_filter(other, skb) < 0) {
				1730	/* Toss the packet but do not return any error to the sender */
				1731	err = len;
				1732	goto out_free;
				1733	}
				1734
				1735	sk_locked = 0;
				1736	unix_state_lock(other);
				1737	restart_locked:
				1738	err = -EPERM;
				1739	if (!unix_may_send(sk, other))
				1740	goto out_unlock;
				1741
				1742	if (unlikely(sock_flag(other, SOCK_DEAD))) {
				1743	/*
				1744	* Check with 1003.1g - what should
				1745	* datagram error
				1746	*/
				1747	unix_state_unlock(other);
				1748	sock_put(other);
				1749
				1750	if (!sk_locked)
				1751	unix_state_lock(sk);
				1752
				1753	err = 0;
				1754	if (unix_peer(sk) == other) {
				1755	unix_peer(sk) = NULL;
				1756	unix_dgram_peer_wake_disconnect_wakeup(sk, other);
				1757
				1758	unix_state_unlock(sk);
				1759
				1760	unix_dgram_disconnected(sk, other);
				1761	sock_put(other);
				1762	err = -ECONNREFUSED;
				1763	} else {
				1764	unix_state_unlock(sk);
				1765	}
				1766
				1767	other = NULL;
				1768	if (err)
				1769	goto out_free;
				1770	goto restart;
				1771	}
				1772
				1773	err = -EPIPE;
				1774	if (other->sk_shutdown & RCV_SHUTDOWN)
				1775	goto out_unlock;
				1776
				1777	if (sk->sk_type != SOCK_SEQPACKET) {
				1778	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1779	if (err)
				1780	goto out_unlock;
				1781	}
				1782
				1783	/* other == sk && unix_peer(other) != sk if
				1784	* - unix_peer(sk) == NULL, destination address bound to sk
				1785	* - unix_peer(sk) == sk by time of get but disconnected before lock
				1786	*/
				1787	if (other != sk &&
				1788	unlikely(unix_peer(other) != sk &&
				1789	unix_recvq_full_lockless(other))) {
				1790	if (timeo) {
				1791	timeo = unix_wait_for_peer(other, timeo);
				1792
				1793	err = sock_intr_errno(timeo);
				1794	if (signal_pending(current))
				1795	goto out_free;
				1796
				1797	goto restart;
				1798	}
				1799
				1800	if (!sk_locked) {
				1801	unix_state_unlock(other);
				1802	unix_state_double_lock(sk, other);
				1803	}
				1804
				1805	if (unix_peer(sk) != other \|\|
				1806	unix_dgram_peer_wake_me(sk, other)) {
				1807	err = -EAGAIN;
				1808	sk_locked = 1;
				1809	goto out_unlock;
				1810	}
				1811
				1812	if (!sk_locked) {
				1813	sk_locked = 1;
				1814	goto restart_locked;
				1815	}
				1816	}
				1817
				1818	if (unlikely(sk_locked))
				1819	unix_state_unlock(sk);
				1820
				1821	if (sock_flag(other, SOCK_RCVTSTAMP))
				1822	__net_timestamp(skb);
				1823	maybe_add_creds(skb, sock, other);
				1824	skb_queue_tail(&other->sk_receive_queue, skb);
				1825	unix_state_unlock(other);
				1826	other->sk_data_ready(other);
				1827	sock_put(other);
				1828	scm_destroy(&scm);
				1829	return len;
				1830
				1831	out_unlock:
				1832	if (sk_locked)
				1833	unix_state_unlock(sk);
				1834	unix_state_unlock(other);
				1835	out_free:
				1836	kfree_skb(skb);
				1837	out:
				1838	if (other)
				1839	sock_put(other);
				1840	scm_destroy(&scm);
				1841	return err;
				1842	}
				1843
				1844	/* We use paged skbs for stream sockets, and limit occupancy to 32768
				1845	* bytes, and a minimum of a full page.
				1846	*/
				1847	#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
				1848
				1849	static int unix_stream_sendmsg(struct socket sock, struct msghdr msg,
				1850	size_t len)
				1851	{
				1852	struct sock *sk = sock->sk;
				1853	struct sock *other = NULL;
				1854	int err, size;
				1855	struct sk_buff *skb;
				1856	int sent = 0;
				1857	struct scm_cookie scm;
				1858	bool fds_sent = false;
				1859	int data_len;
				1860
				1861	wait_for_unix_gc();
				1862	err = scm_send(sock, msg, &scm, false);
				1863	if (err < 0)
				1864	return err;
				1865
				1866	err = -EOPNOTSUPP;
				1867	if (msg->msg_flags&MSG_OOB)
				1868	goto out_err;
				1869
				1870	if (msg->msg_namelen) {
				1871	err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
				1872	goto out_err;
				1873	} else {
				1874	err = -ENOTCONN;
				1875	other = unix_peer(sk);
				1876	if (!other)
				1877	goto out_err;
				1878	}
				1879
				1880	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
				1881	goto pipe_err;
				1882
				1883	while (sent < len) {
				1884	size = len - sent;
				1885
				1886	/* Keep two messages in the pipe so it schedules better */
				1887	size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
				1888
				1889	/* allow fallback to order-0 allocations */
				1890	size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
				1891
				1892	data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
				1893
				1894	data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
				1895
				1896	skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
				1897	msg->msg_flags & MSG_DONTWAIT, &err,
				1898	get_order(UNIX_SKB_FRAGS_SZ));
				1899	if (!skb)
				1900	goto out_err;
				1901
				1902	/* Only send the fds in the first buffer */
				1903	err = unix_scm_to_skb(&scm, skb, !fds_sent);
				1904	if (err < 0) {
				1905	kfree_skb(skb);
				1906	goto out_err;
				1907	}
				1908	fds_sent = true;
				1909
				1910	skb_put(skb, size - data_len);
				1911	skb->data_len = data_len;
				1912	skb->len = size;
				1913	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
				1914	if (err) {
				1915	kfree_skb(skb);
				1916	goto out_err;
				1917	}
				1918
				1919	unix_state_lock(other);
				1920
				1921	if (sock_flag(other, SOCK_DEAD) \|\|
				1922	(other->sk_shutdown & RCV_SHUTDOWN))
				1923	goto pipe_err_free;
				1924
				1925	maybe_add_creds(skb, sock, other);
				1926	skb_queue_tail(&other->sk_receive_queue, skb);
				1927	unix_state_unlock(other);
				1928	other->sk_data_ready(other);
				1929	sent += size;
				1930	}
				1931
				1932	scm_destroy(&scm);
				1933
				1934	return sent;
				1935
				1936	pipe_err_free:
				1937	unix_state_unlock(other);
				1938	kfree_skb(skb);
				1939	pipe_err:
				1940	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
				1941	send_sig(SIGPIPE, current, 0);
				1942	err = -EPIPE;
				1943	out_err:
				1944	scm_destroy(&scm);
				1945	return sent ? : err;
				1946	}
				1947
				1948	static ssize_t unix_stream_sendpage(struct socket socket, struct page page,
				1949	int offset, size_t size, int flags)
				1950	{
				1951	int err;
				1952	bool send_sigpipe = false;
				1953	bool init_scm = true;
				1954	struct scm_cookie scm;
				1955	struct sock other, sk = socket->sk;
				1956	struct sk_buff skb, newskb = NULL, *tail = NULL;
				1957
				1958	if (flags & MSG_OOB)
				1959	return -EOPNOTSUPP;
				1960
				1961	other = unix_peer(sk);
				1962	if (!other \|\| sk->sk_state != TCP_ESTABLISHED)
				1963	return -ENOTCONN;
				1964
				1965	if (false) {
				1966	alloc_skb:
				1967	spin_unlock(&other->sk_receive_queue.lock);
				1968	unix_state_unlock(other);
				1969	mutex_unlock(&unix_sk(other)->iolock);
				1970	newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
				1971	&err, 0);
				1972	if (!newskb)
				1973	goto err;
				1974	}
				1975
				1976	/* we must acquire iolock as we modify already present
				1977	* skbs in the sk_receive_queue and mess with skb->len
				1978	*/
				1979	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
				1980	if (err) {
				1981	err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
				1982	goto err;
				1983	}
				1984
				1985	if (sk->sk_shutdown & SEND_SHUTDOWN) {
				1986	err = -EPIPE;
				1987	send_sigpipe = true;
				1988	goto err_unlock;
				1989	}
				1990
				1991	unix_state_lock(other);
				1992
				1993	if (sock_flag(other, SOCK_DEAD) \|\|
				1994	other->sk_shutdown & RCV_SHUTDOWN) {
				1995	err = -EPIPE;
				1996	send_sigpipe = true;
				1997	goto err_state_unlock;
				1998	}
				1999
				2000	if (init_scm) {
				2001	err = maybe_init_creds(&scm, socket, other);
				2002	if (err)
				2003	goto err_state_unlock;
				2004	init_scm = false;
				2005	}
				2006
				2007	spin_lock(&other->sk_receive_queue.lock);
				2008	skb = skb_peek_tail(&other->sk_receive_queue);
				2009	if (tail && tail == skb) {
				2010	skb = newskb;
				2011	} else if (!skb \|\| !unix_skb_scm_eq(skb, &scm)) {
				2012	if (newskb) {
				2013	skb = newskb;
				2014	} else {
				2015	tail = skb;
				2016	goto alloc_skb;
				2017	}
				2018	} else if (newskb) {
				2019	/* this is fast path, we don't necessarily need to
				2020	* call to kfree_skb even though with newskb == NULL
				2021	* this - does no harm
				2022	*/
				2023	consume_skb(newskb);
				2024	newskb = NULL;
				2025	}
				2026
				2027	if (skb_append_pagefrags(skb, page, offset, size)) {
				2028	tail = skb;
				2029	goto alloc_skb;
				2030	}
				2031
				2032	skb->len += size;
				2033	skb->data_len += size;
				2034	skb->truesize += size;
				2035	refcount_add(size, &sk->sk_wmem_alloc);
				2036
				2037	if (newskb) {
				2038	unix_scm_to_skb(&scm, skb, false);
				2039	__skb_queue_tail(&other->sk_receive_queue, newskb);
				2040	}
				2041
				2042	spin_unlock(&other->sk_receive_queue.lock);
				2043	unix_state_unlock(other);
				2044	mutex_unlock(&unix_sk(other)->iolock);
				2045
				2046	other->sk_data_ready(other);
				2047	scm_destroy(&scm);
				2048	return size;
				2049
				2050	err_state_unlock:
				2051	unix_state_unlock(other);
				2052	err_unlock:
				2053	mutex_unlock(&unix_sk(other)->iolock);
				2054	err:
				2055	kfree_skb(newskb);
				2056	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
				2057	send_sig(SIGPIPE, current, 0);
				2058	if (!init_scm)
				2059	scm_destroy(&scm);
				2060	return err;
				2061	}
				2062
				2063	static int unix_seqpacket_sendmsg(struct socket sock, struct msghdr msg,
				2064	size_t len)
				2065	{
				2066	int err;
				2067	struct sock *sk = sock->sk;
				2068
				2069	err = sock_error(sk);
				2070	if (err)
				2071	return err;
				2072
				2073	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
				2074	return -ENOTCONN;
				2075
				2076	if (msg->msg_namelen)
				2077	msg->msg_namelen = 0;
				2078
				2079	return unix_dgram_sendmsg(sock, msg, len);
				2080	}
				2081
				2082	static int unix_seqpacket_recvmsg(struct socket sock, struct msghdr msg,
				2083	size_t size, int flags)
				2084	{
				2085	struct sock *sk = sock->sk;
				2086
				2087	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
				2088	return -ENOTCONN;
				2089
				2090	return unix_dgram_recvmsg(sock, msg, size, flags);
				2091	}
				2092
				2093	static void unix_copy_addr(struct msghdr msg, struct sock sk)
				2094	{
				2095	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
				2096
				2097	if (addr) {
				2098	msg->msg_namelen = addr->len;
				2099	memcpy(msg->msg_name, addr->name, addr->len);
				2100	}
				2101	}
				2102
				2103	static int unix_dgram_recvmsg(struct socket sock, struct msghdr msg,
				2104	size_t size, int flags)
				2105	{
				2106	struct scm_cookie scm;
				2107	struct sock *sk = sock->sk;
				2108	struct unix_sock *u = unix_sk(sk);
				2109	struct sk_buff skb, last;
				2110	long timeo;
				2111	int skip;
				2112	int err;
				2113
				2114	err = -EOPNOTSUPP;
				2115	if (flags&MSG_OOB)
				2116	goto out;
				2117
				2118	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
				2119
				2120	do {
				2121	mutex_lock(&u->iolock);
				2122
				2123	skip = sk_peek_offset(sk, flags);
				2124	skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
				2125	&last);
				2126	if (skb)
				2127	break;
				2128
				2129	mutex_unlock(&u->iolock);
				2130
				2131	if (err != -EAGAIN)
				2132	break;
				2133	} while (timeo &&
				2134	!__skb_wait_for_more_packets(sk, &err, &timeo, last));
				2135
				2136	if (!skb) { /* implies iolock unlocked */
				2137	unix_state_lock(sk);
				2138	/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
				2139	if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
				2140	(sk->sk_shutdown & RCV_SHUTDOWN))
				2141	err = 0;
				2142	unix_state_unlock(sk);
				2143	goto out;
				2144	}
				2145
				2146	if (wq_has_sleeper(&u->peer_wait))
				2147	wake_up_interruptible_sync_poll(&u->peer_wait,
				2148	EPOLLOUT \| EPOLLWRNORM \|
				2149	EPOLLWRBAND);
				2150
				2151	if (msg->msg_name)
				2152	unix_copy_addr(msg, skb->sk);
				2153
				2154	if (size > skb->len - skip)
				2155	size = skb->len - skip;
				2156	else if (size < skb->len - skip)
				2157	msg->msg_flags \|= MSG_TRUNC;
				2158
				2159	err = skb_copy_datagram_msg(skb, skip, msg, size);
				2160	if (err)
				2161	goto out_free;
				2162
				2163	if (sock_flag(sk, SOCK_RCVTSTAMP))
				2164	__sock_recv_timestamp(msg, sk, skb);
				2165
				2166	memset(&scm, 0, sizeof(scm));
				2167
				2168	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2169	unix_set_secdata(&scm, skb);
				2170
				2171	if (!(flags & MSG_PEEK)) {
				2172	if (UNIXCB(skb).fp)
				2173	unix_detach_fds(&scm, skb);
				2174
				2175	sk_peek_offset_bwd(sk, skb->len);
				2176	} else {
				2177	/* It is questionable: on PEEK we could:
				2178	- do not return fds - good, but too simple 8)
				2179	- return fds, and do not return them on read (old strategy,
				2180	apparently wrong)
				2181	- clone fds (I chose it for now, it is the most universal
				2182	solution)
				2183
				2184	POSIX 1003.1g does not actually define this clearly
				2185	at all. POSIX 1003.1g doesn't define a lot of things
				2186	clearly however!
				2187
				2188	*/
				2189
				2190	sk_peek_offset_fwd(sk, size);
				2191
				2192	if (UNIXCB(skb).fp)
				2193	unix_peek_fds(&scm, skb);
				2194	}
				2195	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
				2196
				2197	scm_recv(sock, msg, &scm, flags);
				2198
				2199	out_free:
				2200	skb_free_datagram(sk, skb);
				2201	mutex_unlock(&u->iolock);
				2202	out:
				2203	return err;
				2204	}
				2205
				2206	/*
				2207	* Sleep until more data has arrived. But check for races..
				2208	*/
				2209	static long unix_stream_data_wait(struct sock *sk, long timeo,
				2210	struct sk_buff *last, unsigned int last_len,
				2211	bool freezable)
				2212	{
				2213	struct sk_buff *tail;
				2214	DEFINE_WAIT(wait);
				2215
				2216	unix_state_lock(sk);
				2217
				2218	for (;;) {
				2219	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
				2220
				2221	tail = skb_peek_tail(&sk->sk_receive_queue);
				2222	if (tail != last \|\|
				2223	(tail && tail->len != last_len) \|\|
				2224	sk->sk_err \|\|
				2225	(sk->sk_shutdown & RCV_SHUTDOWN) \|\|
				2226	signal_pending(current) \|\|
				2227	!timeo)
				2228	break;
				2229
				2230	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2231	unix_state_unlock(sk);
				2232	if (freezable)
				2233	timeo = freezable_schedule_timeout(timeo);
				2234	else
				2235	timeo = schedule_timeout(timeo);
				2236	unix_state_lock(sk);
				2237
				2238	if (sock_flag(sk, SOCK_DEAD))
				2239	break;
				2240
				2241	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2242	}
				2243
				2244	finish_wait(sk_sleep(sk), &wait);
				2245	unix_state_unlock(sk);
				2246	return timeo;
				2247	}
				2248
				2249	static unsigned int unix_skb_len(const struct sk_buff *skb)
				2250	{
				2251	return skb->len - UNIXCB(skb).consumed;
				2252	}
				2253
				2254	struct unix_stream_read_state {
				2255	int (recv_actor)(struct sk_buff , int, int,
				2256	struct unix_stream_read_state *);
				2257	struct socket *socket;
				2258	struct msghdr *msg;
				2259	struct pipe_inode_info *pipe;
				2260	size_t size;
				2261	int flags;
				2262	unsigned int splice_flags;
				2263	};
				2264
				2265	static int unix_stream_read_generic(struct unix_stream_read_state *state,
				2266	bool freezable)
				2267	{
				2268	struct scm_cookie scm;
				2269	struct socket *sock = state->socket;
				2270	struct sock *sk = sock->sk;
				2271	struct unix_sock *u = unix_sk(sk);
				2272	int copied = 0;
				2273	int flags = state->flags;
				2274	int noblock = flags & MSG_DONTWAIT;
				2275	bool check_creds = false;
				2276	int target;
				2277	int err = 0;
				2278	long timeo;
				2279	int skip;
				2280	size_t size = state->size;
				2281	unsigned int last_len;
				2282
				2283	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
				2284	err = -EINVAL;
				2285	goto out;
				2286	}
				2287
				2288	if (unlikely(flags & MSG_OOB)) {
				2289	err = -EOPNOTSUPP;
				2290	goto out;
				2291	}
				2292
				2293	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
				2294	timeo = sock_rcvtimeo(sk, noblock);
				2295
				2296	memset(&scm, 0, sizeof(scm));
				2297
				2298	/* Lock the socket to prevent queue disordering
				2299	* while sleeps in memcpy_tomsg
				2300	*/
				2301	mutex_lock(&u->iolock);
				2302
				2303	skip = max(sk_peek_offset(sk, flags), 0);
				2304
				2305	do {
				2306	int chunk;
				2307	bool drop_skb;
				2308	struct sk_buff skb, last;
				2309
				2310	redo:
				2311	unix_state_lock(sk);
				2312	if (sock_flag(sk, SOCK_DEAD)) {
				2313	err = -ECONNRESET;
				2314	goto unlock;
				2315	}
				2316	last = skb = skb_peek(&sk->sk_receive_queue);
				2317	last_len = last ? last->len : 0;
				2318	again:
				2319	if (skb == NULL) {
				2320	if (copied >= target)
				2321	goto unlock;
				2322
				2323	/*
				2324	* POSIX 1003.1g mandates this order.
				2325	*/
				2326
				2327	err = sock_error(sk);
				2328	if (err)
				2329	goto unlock;
				2330	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2331	goto unlock;
				2332
				2333	unix_state_unlock(sk);
				2334	if (!timeo) {
				2335	err = -EAGAIN;
				2336	break;
				2337	}
				2338
				2339	mutex_unlock(&u->iolock);
				2340
				2341	timeo = unix_stream_data_wait(sk, timeo, last,
				2342	last_len, freezable);
				2343
				2344	if (signal_pending(current)) {
				2345	err = sock_intr_errno(timeo);
				2346	scm_destroy(&scm);
				2347	goto out;
				2348	}
				2349
				2350	mutex_lock(&u->iolock);
				2351	goto redo;
				2352	unlock:
				2353	unix_state_unlock(sk);
				2354	break;
				2355	}
				2356
				2357	while (skip >= unix_skb_len(skb)) {
				2358	skip -= unix_skb_len(skb);
				2359	last = skb;
				2360	last_len = skb->len;
				2361	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2362	if (!skb)
				2363	goto again;
				2364	}
				2365
				2366	unix_state_unlock(sk);
				2367
				2368	if (check_creds) {
				2369	/* Never glue messages from different writers */
				2370	if (!unix_skb_scm_eq(skb, &scm))
				2371	break;
				2372	} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
				2373	/* Copy credentials */
				2374	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2375	unix_set_secdata(&scm, skb);
				2376	check_creds = true;
				2377	}
				2378
				2379	/* Copy address just once */
				2380	if (state->msg && state->msg->msg_name) {
				2381	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
				2382	state->msg->msg_name);
				2383	unix_copy_addr(state->msg, skb->sk);
				2384	sunaddr = NULL;
				2385	}
				2386
				2387	chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
				2388	skb_get(skb);
				2389	chunk = state->recv_actor(skb, skip, chunk, state);
				2390	drop_skb = !unix_skb_len(skb);
				2391	/* skb is only safe to use if !drop_skb */
				2392	consume_skb(skb);
				2393	if (chunk < 0) {
				2394	if (copied == 0)
				2395	copied = -EFAULT;
				2396	break;
				2397	}
				2398	copied += chunk;
				2399	size -= chunk;
				2400
				2401	if (drop_skb) {
				2402	/* the skb was touched by a concurrent reader;
				2403	* we should not expect anything from this skb
				2404	* anymore and assume it invalid - we can be
				2405	* sure it was dropped from the socket queue
				2406	*
				2407	* let's report a short read
				2408	*/
				2409	err = 0;
				2410	break;
				2411	}
				2412
				2413	/* Mark read part of skb as used */
				2414	if (!(flags & MSG_PEEK)) {
				2415	UNIXCB(skb).consumed += chunk;
				2416
				2417	sk_peek_offset_bwd(sk, chunk);
				2418
				2419	if (UNIXCB(skb).fp)
				2420	unix_detach_fds(&scm, skb);
				2421
				2422	if (unix_skb_len(skb))
				2423	break;
				2424
				2425	skb_unlink(skb, &sk->sk_receive_queue);
				2426	consume_skb(skb);
				2427
				2428	if (scm.fp)
				2429	break;
				2430	} else {
				2431	/* It is questionable, see note in unix_dgram_recvmsg.
				2432	*/
				2433	if (UNIXCB(skb).fp)
				2434	unix_peek_fds(&scm, skb);
				2435
				2436	sk_peek_offset_fwd(sk, chunk);
				2437
				2438	if (UNIXCB(skb).fp)
				2439	break;
				2440
				2441	skip = 0;
				2442	last = skb;
				2443	last_len = skb->len;
				2444	unix_state_lock(sk);
				2445	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2446	if (skb)
				2447	goto again;
				2448	unix_state_unlock(sk);
				2449	break;
				2450	}
				2451	} while (size);
				2452
				2453	mutex_unlock(&u->iolock);
				2454	if (state->msg)
				2455	scm_recv(sock, state->msg, &scm, flags);
				2456	else
				2457	scm_destroy(&scm);
				2458	out:
				2459	return copied ? : err;
				2460	}
				2461
				2462	static int unix_stream_read_actor(struct sk_buff *skb,
				2463	int skip, int chunk,
				2464	struct unix_stream_read_state *state)
				2465	{
				2466	int ret;
				2467
				2468	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
				2469	state->msg, chunk);
				2470	return ret ?: chunk;
				2471	}
				2472
				2473	static int unix_stream_recvmsg(struct socket sock, struct msghdr msg,
				2474	size_t size, int flags)
				2475	{
				2476	struct unix_stream_read_state state = {
				2477	.recv_actor = unix_stream_read_actor,
				2478	.socket = sock,
				2479	.msg = msg,
				2480	.size = size,
				2481	.flags = flags
				2482	};
				2483
				2484	return unix_stream_read_generic(&state, true);
				2485	}
				2486
				2487	static int unix_stream_splice_actor(struct sk_buff *skb,
				2488	int skip, int chunk,
				2489	struct unix_stream_read_state *state)
				2490	{
				2491	return skb_splice_bits(skb, state->socket->sk,
				2492	UNIXCB(skb).consumed + skip,
				2493	state->pipe, chunk, state->splice_flags);
				2494	}
				2495
				2496	static ssize_t unix_stream_splice_read(struct socket sock, loff_t ppos,
				2497	struct pipe_inode_info *pipe,
				2498	size_t size, unsigned int flags)
				2499	{
				2500	struct unix_stream_read_state state = {
				2501	.recv_actor = unix_stream_splice_actor,
				2502	.socket = sock,
				2503	.pipe = pipe,
				2504	.size = size,
				2505	.splice_flags = flags,
				2506	};
				2507
				2508	if (unlikely(*ppos))
				2509	return -ESPIPE;
				2510
				2511	if (sock->file->f_flags & O_NONBLOCK \|\|
				2512	flags & SPLICE_F_NONBLOCK)
				2513	state.flags = MSG_DONTWAIT;
				2514
				2515	return unix_stream_read_generic(&state, false);
				2516	}
				2517
				2518	static int unix_shutdown(struct socket *sock, int mode)
				2519	{
				2520	struct sock *sk = sock->sk;
				2521	struct sock *other;
				2522
				2523	if (mode < SHUT_RD \|\| mode > SHUT_RDWR)
				2524	return -EINVAL;
				2525	/* This maps:
				2526	* SHUT_RD (0) -> RCV_SHUTDOWN (1)
				2527	* SHUT_WR (1) -> SEND_SHUTDOWN (2)
				2528	* SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
				2529	*/
				2530	++mode;
				2531
				2532	unix_state_lock(sk);
				2533	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown \| mode);
				2534	other = unix_peer(sk);
				2535	if (other)
				2536	sock_hold(other);
				2537	unix_state_unlock(sk);
				2538	sk->sk_state_change(sk);
				2539
				2540	if (other &&
				2541	(sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET)) {
				2542
				2543	int peer_mode = 0;
				2544
				2545	if (mode&RCV_SHUTDOWN)
				2546	peer_mode \|= SEND_SHUTDOWN;
				2547	if (mode&SEND_SHUTDOWN)
				2548	peer_mode \|= RCV_SHUTDOWN;
				2549	unix_state_lock(other);
				2550	WRITE_ONCE(other->sk_shutdown, other->sk_shutdown \| peer_mode);
				2551	unix_state_unlock(other);
				2552	other->sk_state_change(other);
				2553	if (peer_mode == SHUTDOWN_MASK)
				2554	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
				2555	else if (peer_mode & RCV_SHUTDOWN)
				2556	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
				2557	}
				2558	if (other)
				2559	sock_put(other);
				2560
				2561	return 0;
				2562	}
				2563
				2564	long unix_inq_len(struct sock *sk)
				2565	{
				2566	struct sk_buff *skb;
				2567	long amount = 0;
				2568
				2569	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
				2570	return -EINVAL;
				2571
				2572	spin_lock(&sk->sk_receive_queue.lock);
				2573	if (sk->sk_type == SOCK_STREAM \|\|
				2574	sk->sk_type == SOCK_SEQPACKET) {
				2575	skb_queue_walk(&sk->sk_receive_queue, skb)
				2576	amount += unix_skb_len(skb);
				2577	} else {
				2578	skb = skb_peek(&sk->sk_receive_queue);
				2579	if (skb)
				2580	amount = skb->len;
				2581	}
				2582	spin_unlock(&sk->sk_receive_queue.lock);
				2583
				2584	return amount;
				2585	}
				2586	EXPORT_SYMBOL_GPL(unix_inq_len);
				2587
				2588	long unix_outq_len(struct sock *sk)
				2589	{
				2590	return sk_wmem_alloc_get(sk);
				2591	}
				2592	EXPORT_SYMBOL_GPL(unix_outq_len);
				2593
				2594	static int unix_open_file(struct sock *sk)
				2595	{
				2596	struct path path;
				2597	struct file *f;
				2598	int fd;
				2599
				2600	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
				2601	return -EPERM;
				2602
				2603	if (!smp_load_acquire(&unix_sk(sk)->addr))
				2604	return -ENOENT;
				2605
				2606	path = unix_sk(sk)->path;
				2607	if (!path.dentry)
				2608	return -ENOENT;
				2609
				2610	path_get(&path);
				2611
				2612	fd = get_unused_fd_flags(O_CLOEXEC);
				2613	if (fd < 0)
				2614	goto out;
				2615
				2616	f = dentry_open(&path, O_PATH, current_cred());
				2617	if (IS_ERR(f)) {
				2618	put_unused_fd(fd);
				2619	fd = PTR_ERR(f);
				2620	goto out;
				2621	}
				2622
				2623	fd_install(fd, f);
				2624	out:
				2625	path_put(&path);
				2626
				2627	return fd;
				2628	}
				2629
				2630	static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2631	{
				2632	struct sock *sk = sock->sk;
				2633	long amount = 0;
				2634	int err;
				2635
				2636	switch (cmd) {
				2637	case SIOCOUTQ:
				2638	amount = unix_outq_len(sk);
				2639	err = put_user(amount, (int __user *)arg);
				2640	break;
				2641	case SIOCINQ:
				2642	amount = unix_inq_len(sk);
				2643	if (amount < 0)
				2644	err = amount;
				2645	else
				2646	err = put_user(amount, (int __user *)arg);
				2647	break;
				2648	case SIOCUNIXFILE:
				2649	err = unix_open_file(sk);
				2650	break;
				2651	default:
				2652	err = -ENOIOCTLCMD;
				2653	break;
				2654	}
				2655	return err;
				2656	}
				2657
				2658	#ifdef CONFIG_COMPAT
				2659	static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2660	{
				2661	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
				2662	}
				2663	#endif
				2664
				2665	static __poll_t unix_poll(struct file file, struct socket sock, poll_table *wait)
				2666	{
				2667	struct sock *sk = sock->sk;
				2668	unsigned char state;
				2669	__poll_t mask;
				2670	u8 shutdown;
				2671
				2672	sock_poll_wait(file, sock, wait);
				2673	mask = 0;
				2674	shutdown = READ_ONCE(sk->sk_shutdown);
				2675	state = READ_ONCE(sk->sk_state);
				2676
				2677	/* exceptional events? */
				2678	if (sk->sk_err)
				2679	mask \|= EPOLLERR;
				2680	if (shutdown == SHUTDOWN_MASK)
				2681	mask \|= EPOLLHUP;
				2682	if (shutdown & RCV_SHUTDOWN)
				2683	mask \|= EPOLLRDHUP \| EPOLLIN \| EPOLLRDNORM;
				2684
				2685	/* readable? */
				2686	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
				2687	mask \|= EPOLLIN \| EPOLLRDNORM;
				2688
				2689	/* Connection-based need to check for termination and startup */
				2690	if ((sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) &&
				2691	state == TCP_CLOSE)
				2692	mask \|= EPOLLHUP;
				2693
				2694	/*
				2695	* we set writable also when the other side has shut down the
				2696	* connection. This prevents stuck sockets.
				2697	*/
				2698	if (unix_writable(sk, state))
				2699	mask \|= EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND;
				2700
				2701	return mask;
				2702	}
				2703
				2704	static __poll_t unix_dgram_poll(struct file file, struct socket sock,
				2705	poll_table *wait)
				2706	{
				2707	struct sock sk = sock->sk, other;
				2708	unsigned int writable;
				2709	unsigned char state;
				2710	__poll_t mask;
				2711	u8 shutdown;
				2712
				2713	sock_poll_wait(file, sock, wait);
				2714	mask = 0;
				2715	shutdown = READ_ONCE(sk->sk_shutdown);
				2716	state = READ_ONCE(sk->sk_state);
				2717
				2718	/* exceptional events? */
				2719	if (sk->sk_err \|\| !skb_queue_empty_lockless(&sk->sk_error_queue))
				2720	mask \|= EPOLLERR \|
				2721	(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
				2722
				2723	if (shutdown & RCV_SHUTDOWN)
				2724	mask \|= EPOLLRDHUP \| EPOLLIN \| EPOLLRDNORM;
				2725	if (shutdown == SHUTDOWN_MASK)
				2726	mask \|= EPOLLHUP;
				2727
				2728	/* readable? */
				2729	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
				2730	mask \|= EPOLLIN \| EPOLLRDNORM;
				2731
				2732	/* Connection-based need to check for termination and startup */
				2733	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
				2734	mask \|= EPOLLHUP;
				2735
				2736	/* No write status requested, avoid expensive OUT tests. */
				2737	if (!(poll_requested_events(wait) & (EPOLLWRBAND\|EPOLLWRNORM\|EPOLLOUT)))
				2738	return mask;
				2739
				2740	writable = unix_writable(sk, state);
				2741	if (writable) {
				2742	unix_state_lock(sk);
				2743
				2744	other = unix_peer(sk);
				2745	if (other && unix_peer(other) != sk &&
				2746	unix_recvq_full_lockless(other) &&
				2747	unix_dgram_peer_wake_me(sk, other))
				2748	writable = 0;
				2749
				2750	unix_state_unlock(sk);
				2751	}
				2752
				2753	if (writable)
				2754	mask \|= EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND;
				2755	else
				2756	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
				2757
				2758	return mask;
				2759	}
				2760
				2761	#ifdef CONFIG_PROC_FS
				2762
				2763	#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
				2764
				2765	#define get_bucket(x) ((x) >> BUCKET_SPACE)
				2766	#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
				2767	#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE \| (o))
				2768
				2769	static struct sock unix_from_bucket(struct seq_file seq, loff_t *pos)
				2770	{
				2771	unsigned long offset = get_offset(*pos);
				2772	unsigned long bucket = get_bucket(*pos);
				2773	struct sock *sk;
				2774	unsigned long count = 0;
				2775
				2776	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
				2777	if (sock_net(sk) != seq_file_net(seq))
				2778	continue;
				2779	if (++count == offset)
				2780	break;
				2781	}
				2782
				2783	return sk;
				2784	}
				2785
				2786	static struct sock unix_next_socket(struct seq_file seq,
				2787	struct sock *sk,
				2788	loff_t *pos)
				2789	{
				2790	unsigned long bucket;
				2791
				2792	while (sk > (struct sock *)SEQ_START_TOKEN) {
				2793	sk = sk_next(sk);
				2794	if (!sk)
				2795	goto next_bucket;
				2796	if (sock_net(sk) == seq_file_net(seq))
				2797	return sk;
				2798	}
				2799
				2800	do {
				2801	sk = unix_from_bucket(seq, pos);
				2802	if (sk)
				2803	return sk;
				2804
				2805	next_bucket:
				2806	bucket = get_bucket(*pos) + 1;
				2807	*pos = set_bucket_offset(bucket, 1);
				2808	} while (bucket < ARRAY_SIZE(unix_socket_table));
				2809
				2810	return NULL;
				2811	}
				2812
				2813	static void unix_seq_start(struct seq_file seq, loff_t *pos)
				2814	__acquires(unix_table_lock)
				2815	{
				2816	spin_lock(&unix_table_lock);
				2817
				2818	if (!*pos)
				2819	return SEQ_START_TOKEN;
				2820
				2821	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
				2822	return NULL;
				2823
				2824	return unix_next_socket(seq, NULL, pos);
				2825	}
				2826
				2827	static void unix_seq_next(struct seq_file seq, void v, loff_t pos)
				2828	{
				2829	++*pos;
				2830	return unix_next_socket(seq, v, pos);
				2831	}
				2832
				2833	static void unix_seq_stop(struct seq_file seq, void v)
				2834	__releases(unix_table_lock)
				2835	{
				2836	spin_unlock(&unix_table_lock);
				2837	}
				2838
				2839	static int unix_seq_show(struct seq_file seq, void v)
				2840	{
				2841
				2842	if (v == SEQ_START_TOKEN)
				2843	seq_puts(seq, "Num RefCount Protocol Flags Type St "
				2844	"Inode Path\n");
				2845	else {
				2846	struct sock *s = v;
				2847	struct unix_sock *u = unix_sk(s);
				2848	unix_state_lock(s);
				2849
				2850	seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
				2851	s,
				2852	refcount_read(&s->sk_refcnt),
				2853	0,
				2854	s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
				2855	s->sk_type,
				2856	s->sk_socket ?
				2857	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
				2858	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
				2859	sock_i_ino(s));
				2860
				2861	if (u->addr) { // under unix_table_lock here
				2862	int i, len;
				2863	seq_putc(seq, ' ');
				2864
				2865	i = 0;
				2866	len = u->addr->len - sizeof(short);
				2867	if (!UNIX_ABSTRACT(s))
				2868	len--;
				2869	else {
				2870	seq_putc(seq, '@');
				2871	i++;
				2872	}
				2873	for ( ; i < len; i++)
				2874	seq_putc(seq, u->addr->name->sun_path[i] ?:
				2875	'@');
				2876	}
				2877	unix_state_unlock(s);
				2878	seq_putc(seq, '\n');
				2879	}
				2880
				2881	return 0;
				2882	}
				2883
				2884	static const struct seq_operations unix_seq_ops = {
				2885	.start = unix_seq_start,
				2886	.next = unix_seq_next,
				2887	.stop = unix_seq_stop,
				2888	.show = unix_seq_show,
				2889	};
				2890	#endif
				2891
				2892	static const struct net_proto_family unix_family_ops = {
				2893	.family = PF_UNIX,
				2894	.create = unix_create,
				2895	.owner = THIS_MODULE,
				2896	};
				2897
				2898
				2899	static int __net_init unix_net_init(struct net *net)
				2900	{
				2901	int error = -ENOMEM;
				2902
				2903	net->unx.sysctl_max_dgram_qlen = 10;
				2904	if (unix_sysctl_register(net))
				2905	goto out;
				2906
				2907	#ifdef CONFIG_PROC_FS
				2908	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
				2909	sizeof(struct seq_net_private))) {
				2910	unix_sysctl_unregister(net);
				2911	goto out;
				2912	}
				2913	#endif
				2914	error = 0;
				2915	out:
				2916	return error;
				2917	}
				2918
				2919	static void __net_exit unix_net_exit(struct net *net)
				2920	{
				2921	unix_sysctl_unregister(net);
				2922	remove_proc_entry("unix", net->proc_net);
				2923	}
				2924
				2925	static struct pernet_operations unix_net_ops = {
				2926	.init = unix_net_init,
				2927	.exit = unix_net_exit,
				2928	};
				2929
				2930	static int __init af_unix_init(void)
				2931	{
				2932	int rc = -1;
				2933
				2934	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
				2935
				2936	rc = proto_register(&unix_proto, 1);
				2937	if (rc != 0) {
				2938	pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
				2939	goto out;
				2940	}
				2941
				2942	sock_register(&unix_family_ops);
				2943	register_pernet_subsys(&unix_net_ops);
				2944	out:
				2945	return rc;
				2946	}
				2947
				2948	static void __exit af_unix_exit(void)
				2949	{
				2950	sock_unregister(PF_UNIX);
				2951	proto_unregister(&unix_proto);
				2952	unregister_pernet_subsys(&unix_net_ops);
				2953	}
				2954
				2955	/* Earlier than device_initcall() so that other drivers invoking
				2956	request_module() don't end up in a loop when modprobe tries
				2957	to use a UNIX socket. But later than subsys_initcall() because
				2958	we depend on stuff initialised there */
				2959	fs_initcall(af_unix_init);
				2960	module_exit(af_unix_exit);
				2961
				2962	MODULE_LICENSE("GPL");
				2963	MODULE_ALIAS_NETPROTO(PF_UNIX);