blob: 2ff802c0219cfda9d429cb2503c43c27dcff88dc [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83#include <linux/module.h>
84#include <linux/kernel.h>
85#include <linux/signal.h>
86#include <linux/sched.h>
87#include <linux/errno.h>
88#include <linux/string.h>
89#include <linux/stat.h>
90#include <linux/dcache.h>
91#include <linux/namei.h>
92#include <linux/socket.h>
93#include <linux/un.h>
94#include <linux/fcntl.h>
95#include <linux/termios.h>
96#include <linux/sockios.h>
97#include <linux/net.h>
98#include <linux/in.h>
99#include <linux/fs.h>
100#include <linux/slab.h>
101#include <asm/uaccess.h>
102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <net/net_namespace.h>
105#include <net/sock.h>
106#include <net/tcp_states.h>
107#include <net/af_unix.h>
108#include <linux/proc_fs.h>
109#include <linux/seq_file.h>
110#include <net/scm.h>
111#include <linux/init.h>
112#include <linux/poll.h>
113#include <linux/rtnetlink.h>
114#include <linux/mount.h>
115#include <net/checksum.h>
116#include <linux/security.h>
117
118struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119EXPORT_SYMBOL_GPL(unix_socket_table);
120DEFINE_SPINLOCK(unix_table_lock);
121EXPORT_SYMBOL_GPL(unix_table_lock);
122static atomic_long_t unix_nr_socks;
123
124#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
125
126#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127
128#ifdef CONFIG_SECURITY_NETWORK
129static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130{
131 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132}
133
134static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135{
136 scm->secid = *UNIXSID(skb);
137}
138#else
139static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140{ }
141
142static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143{ }
144#endif /* CONFIG_SECURITY_NETWORK */
145
146/*
147 * SMP locking strategy:
148 * hash table is protected with spinlock unix_table_lock
149 * each socket state is protected by separate spin lock.
150 */
151
152static inline unsigned unix_hash_fold(__wsum n)
153{
154 unsigned hash = (__force unsigned)n;
155 hash ^= hash>>16;
156 hash ^= hash>>8;
157 return hash&(UNIX_HASH_SIZE-1);
158}
159
160#define unix_peer(sk) (unix_sk(sk)->peer)
161
162static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163{
164 return unix_peer(osk) == sk;
165}
166
167static inline int unix_may_send(struct sock *sk, struct sock *osk)
168{
169 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
170}
171
172static inline int unix_recvq_full(struct sock const *sk)
173{
174 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175}
176
177struct sock *unix_peer_get(struct sock *s)
178{
179 struct sock *peer;
180
181 unix_state_lock(s);
182 peer = unix_peer(s);
183 if (peer)
184 sock_hold(peer);
185 unix_state_unlock(s);
186 return peer;
187}
188EXPORT_SYMBOL_GPL(unix_peer_get);
189
190static inline void unix_release_addr(struct unix_address *addr)
191{
192 if (atomic_dec_and_test(&addr->refcnt))
193 kfree(addr);
194}
195
196/*
197 * Check unix socket name:
198 * - should be not zero length.
199 * - if started by not zero, should be NULL terminated (FS object)
200 * - if started by zero, it is abstract name.
201 */
202
203static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
204{
205 if (len <= sizeof(short) || len > sizeof(*sunaddr))
206 return -EINVAL;
207 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
208 return -EINVAL;
209 if (sunaddr->sun_path[0]) {
210 /*
211 * This may look like an off by one error but it is a bit more
212 * subtle. 108 is the longest valid AF_UNIX path for a binding.
213 * sun_path[108] doesn't as such exist. However in kernel space
214 * we are guaranteed that it is a valid memory location in our
215 * kernel address buffer.
216 */
217 ((char *)sunaddr)[len] = 0;
218 len = strlen(sunaddr->sun_path)+1+sizeof(short);
219 return len;
220 }
221
222 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
223 return len;
224}
225
226static void __unix_remove_socket(struct sock *sk)
227{
228 sk_del_node_init(sk);
229}
230
231static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
232{
233 WARN_ON(!sk_unhashed(sk));
234 sk_add_node(sk, list);
235}
236
237static inline void unix_remove_socket(struct sock *sk)
238{
239 spin_lock(&unix_table_lock);
240 __unix_remove_socket(sk);
241 spin_unlock(&unix_table_lock);
242}
243
244static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
245{
246 spin_lock(&unix_table_lock);
247 __unix_insert_socket(list, sk);
248 spin_unlock(&unix_table_lock);
249}
250
251static struct sock *__unix_find_socket_byname(struct net *net,
252 struct sockaddr_un *sunname,
253 int len, int type, unsigned hash)
254{
255 struct sock *s;
256 struct hlist_node *node;
257
258 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259 struct unix_sock *u = unix_sk(s);
260
261 if (!net_eq(sock_net(s), net))
262 continue;
263
264 if (u->addr->len == len &&
265 !memcmp(u->addr->name, sunname, len))
266 goto found;
267 }
268 s = NULL;
269found:
270 return s;
271}
272
273static inline struct sock *unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
275 int len, int type,
276 unsigned hash)
277{
278 struct sock *s;
279
280 spin_lock(&unix_table_lock);
281 s = __unix_find_socket_byname(net, sunname, len, type, hash);
282 if (s)
283 sock_hold(s);
284 spin_unlock(&unix_table_lock);
285 return s;
286}
287
288static struct sock *unix_find_socket_byinode(struct inode *i)
289{
290 struct sock *s;
291 struct hlist_node *node;
292
293 spin_lock(&unix_table_lock);
294 sk_for_each(s, node,
295 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
296 struct dentry *dentry = unix_sk(s)->path.dentry;
297
298 if (dentry && dentry->d_inode == i) {
299 sock_hold(s);
300 goto found;
301 }
302 }
303 s = NULL;
304found:
305 spin_unlock(&unix_table_lock);
306 return s;
307}
308
309static inline int unix_writable(struct sock *sk)
310{
311 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312}
313
314static void unix_write_space(struct sock *sk)
315{
316 struct socket_wq *wq;
317
318 rcu_read_lock();
319 if (unix_writable(sk)) {
320 wq = rcu_dereference(sk->sk_wq);
321 if (wq_has_sleeper(wq))
322 wake_up_interruptible_sync_poll(&wq->wait,
323 POLLOUT | POLLWRNORM | POLLWRBAND);
324 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
325 }
326 rcu_read_unlock();
327}
328
329/* When dgram socket disconnects (or changes its peer), we clear its receive
330 * queue of packets arrived from previous peer. First, it allows to do
331 * flow control based only on wmem_alloc; second, sk connected to peer
332 * may receive messages only from that peer. */
333static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
334{
335 if (!skb_queue_empty(&sk->sk_receive_queue)) {
336 skb_queue_purge(&sk->sk_receive_queue);
337 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
338
339 /* If one link of bidirectional dgram pipe is disconnected,
340 * we signal error. Messages are lost. Do not make this,
341 * when peer was not connected to us.
342 */
343 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
344 other->sk_err = ECONNRESET;
345 other->sk_error_report(other);
346 }
347 }
348}
349
350static void unix_sock_destructor(struct sock *sk)
351{
352 struct unix_sock *u = unix_sk(sk);
353
354 skb_queue_purge(&sk->sk_receive_queue);
355
356 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
357 WARN_ON(!sk_unhashed(sk));
358 WARN_ON(sk->sk_socket);
359 if (!sock_flag(sk, SOCK_DEAD)) {
360 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
361 return;
362 }
363
364 if (u->addr)
365 unix_release_addr(u->addr);
366
367 atomic_long_dec(&unix_nr_socks);
368 local_bh_disable();
369 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
370 local_bh_enable();
371#ifdef UNIX_REFCNT_DEBUG
372 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
373 atomic_long_read(&unix_nr_socks));
374#endif
375}
376
377static void unix_release_sock(struct sock *sk, int embrion)
378{
379 struct unix_sock *u = unix_sk(sk);
380 struct path path;
381 struct sock *skpair;
382 struct sk_buff *skb;
383 int state;
384
385 unix_remove_socket(sk);
386
387 /* Clear state */
388 unix_state_lock(sk);
389 sock_orphan(sk);
390 sk->sk_shutdown = SHUTDOWN_MASK;
391 path = u->path;
392 u->path.dentry = NULL;
393 u->path.mnt = NULL;
394 state = sk->sk_state;
395 sk->sk_state = TCP_CLOSE;
396 unix_state_unlock(sk);
397
398 wake_up_interruptible_all(&u->peer_wait);
399
400 skpair = unix_peer(sk);
401
402 if (skpair != NULL) {
403 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
404 unix_state_lock(skpair);
405 /* No more writes */
406 skpair->sk_shutdown = SHUTDOWN_MASK;
407 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
408 skpair->sk_err = ECONNRESET;
409 unix_state_unlock(skpair);
410 skpair->sk_state_change(skpair);
411 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
412 }
413 sock_put(skpair); /* It may now die */
414 unix_peer(sk) = NULL;
415 }
416
417 /* Try to flush out this socket. Throw out buffers at least */
418
419 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 if (state == TCP_LISTEN)
421 unix_release_sock(skb->sk, 1);
422 /* passed fds are erased in the kfree_skb hook */
423 kfree_skb(skb);
424 }
425
426 if (path.dentry)
427 path_put(&path);
428
429 sock_put(sk);
430
431 /* ---- Socket is dead now and most probably destroyed ---- */
432
433 /*
434 * Fixme: BSD difference: In BSD all sockets connected to use get
435 * ECONNRESET and we die on the spot. In Linux we behave
436 * like files and pipes do and wait for the last
437 * dereference.
438 *
439 * Can't we simply set sock->err?
440 *
441 * What the above comment does talk about? --ANK(980817)
442 */
443
444 if (unix_tot_inflight)
445 unix_gc(); /* Garbage collect fds */
446}
447
448static void init_peercred(struct sock *sk)
449{
450 put_pid(sk->sk_peer_pid);
451 if (sk->sk_peer_cred)
452 put_cred(sk->sk_peer_cred);
453 sk->sk_peer_pid = get_pid(task_tgid(current));
454 sk->sk_peer_cred = get_current_cred();
455}
456
457static void copy_peercred(struct sock *sk, struct sock *peersk)
458{
459 put_pid(sk->sk_peer_pid);
460 if (sk->sk_peer_cred)
461 put_cred(sk->sk_peer_cred);
462 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
463 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
464}
465
466static int unix_listen(struct socket *sock, int backlog)
467{
468 int err;
469 struct sock *sk = sock->sk;
470 struct unix_sock *u = unix_sk(sk);
471 struct pid *old_pid = NULL;
472 const struct cred *old_cred = NULL;
473
474 err = -EOPNOTSUPP;
475 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
476 goto out; /* Only stream/seqpacket sockets accept */
477 err = -EINVAL;
478 if (!u->addr)
479 goto out; /* No listens on an unbound socket */
480 unix_state_lock(sk);
481 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
482 goto out_unlock;
483 if (backlog > sk->sk_max_ack_backlog)
484 wake_up_interruptible_all(&u->peer_wait);
485 sk->sk_max_ack_backlog = backlog;
486 sk->sk_state = TCP_LISTEN;
487 /* set credentials so connect can copy them */
488 init_peercred(sk);
489 err = 0;
490
491out_unlock:
492 unix_state_unlock(sk);
493 put_pid(old_pid);
494 if (old_cred)
495 put_cred(old_cred);
496out:
497 return err;
498}
499
500static int unix_release(struct socket *);
501static int unix_bind(struct socket *, struct sockaddr *, int);
502static int unix_stream_connect(struct socket *, struct sockaddr *,
503 int addr_len, int flags);
504static int unix_socketpair(struct socket *, struct socket *);
505static int unix_accept(struct socket *, struct socket *, int);
506static int unix_getname(struct socket *, struct sockaddr *, int *, int);
507static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
508static unsigned int unix_dgram_poll(struct file *, struct socket *,
509 poll_table *);
510static int unix_ioctl(struct socket *, unsigned int, unsigned long);
511static int unix_shutdown(struct socket *, int);
512static int unix_stream_sendmsg(struct kiocb *, struct socket *,
513 struct msghdr *, size_t);
514static int unix_stream_recvmsg(struct kiocb *, struct socket *,
515 struct msghdr *, size_t, int);
516static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
517 struct msghdr *, size_t);
518static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
519 struct msghdr *, size_t, int);
520static int unix_dgram_connect(struct socket *, struct sockaddr *,
521 int, int);
522static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
523 struct msghdr *, size_t);
524static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
525 struct msghdr *, size_t, int);
526
527static int unix_set_peek_off(struct sock *sk, int val)
528{
529 struct unix_sock *u = unix_sk(sk);
530
531 if (mutex_lock_interruptible(&u->readlock))
532 return -EINTR;
533
534 sk->sk_peek_off = val;
535 mutex_unlock(&u->readlock);
536
537 return 0;
538}
539
540
541static const struct proto_ops unix_stream_ops = {
542 .family = PF_UNIX,
543 .owner = THIS_MODULE,
544 .release = unix_release,
545 .bind = unix_bind,
546 .connect = unix_stream_connect,
547 .socketpair = unix_socketpair,
548 .accept = unix_accept,
549 .getname = unix_getname,
550 .poll = unix_poll,
551 .ioctl = unix_ioctl,
552 .listen = unix_listen,
553 .shutdown = unix_shutdown,
554 .setsockopt = sock_no_setsockopt,
555 .getsockopt = sock_no_getsockopt,
556 .sendmsg = unix_stream_sendmsg,
557 .recvmsg = unix_stream_recvmsg,
558 .mmap = sock_no_mmap,
559 .sendpage = sock_no_sendpage,
560 .set_peek_off = unix_set_peek_off,
561};
562
563static const struct proto_ops unix_dgram_ops = {
564 .family = PF_UNIX,
565 .owner = THIS_MODULE,
566 .release = unix_release,
567 .bind = unix_bind,
568 .connect = unix_dgram_connect,
569 .socketpair = unix_socketpair,
570 .accept = sock_no_accept,
571 .getname = unix_getname,
572 .poll = unix_dgram_poll,
573 .ioctl = unix_ioctl,
574 .listen = sock_no_listen,
575 .shutdown = unix_shutdown,
576 .setsockopt = sock_no_setsockopt,
577 .getsockopt = sock_no_getsockopt,
578 .sendmsg = unix_dgram_sendmsg,
579 .recvmsg = unix_dgram_recvmsg,
580 .mmap = sock_no_mmap,
581 .sendpage = sock_no_sendpage,
582 .set_peek_off = unix_set_peek_off,
583};
584
585static const struct proto_ops unix_seqpacket_ops = {
586 .family = PF_UNIX,
587 .owner = THIS_MODULE,
588 .release = unix_release,
589 .bind = unix_bind,
590 .connect = unix_stream_connect,
591 .socketpair = unix_socketpair,
592 .accept = unix_accept,
593 .getname = unix_getname,
594 .poll = unix_dgram_poll,
595 .ioctl = unix_ioctl,
596 .listen = unix_listen,
597 .shutdown = unix_shutdown,
598 .setsockopt = sock_no_setsockopt,
599 .getsockopt = sock_no_getsockopt,
600 .sendmsg = unix_seqpacket_sendmsg,
601 .recvmsg = unix_seqpacket_recvmsg,
602 .mmap = sock_no_mmap,
603 .sendpage = sock_no_sendpage,
604 .set_peek_off = unix_set_peek_off,
605};
606
607static struct proto unix_proto = {
608 .name = "UNIX",
609 .owner = THIS_MODULE,
610 .obj_size = sizeof(struct unix_sock),
611};
612
613/*
614 * AF_UNIX sockets do not interact with hardware, hence they
615 * dont trigger interrupts - so it's safe for them to have
616 * bh-unsafe locking for their sk_receive_queue.lock. Split off
617 * this special lock-class by reinitializing the spinlock key:
618 */
619static struct lock_class_key af_unix_sk_receive_queue_lock_key;
620
621static struct sock *unix_create1(struct net *net, struct socket *sock)
622{
623 struct sock *sk = NULL;
624 struct unix_sock *u;
625
626 atomic_long_inc(&unix_nr_socks);
627 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
628 goto out;
629
630 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
631 if (!sk)
632 goto out;
633
634 sock_init_data(sock, sk);
635 lockdep_set_class(&sk->sk_receive_queue.lock,
636 &af_unix_sk_receive_queue_lock_key);
637
638 sk->sk_write_space = unix_write_space;
639 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
640 sk->sk_destruct = unix_sock_destructor;
641 u = unix_sk(sk);
642 u->path.dentry = NULL;
643 u->path.mnt = NULL;
644 spin_lock_init(&u->lock);
645 atomic_long_set(&u->inflight, 0);
646 INIT_LIST_HEAD(&u->link);
647 mutex_init(&u->readlock); /* single task reading lock */
648 init_waitqueue_head(&u->peer_wait);
649 unix_insert_socket(unix_sockets_unbound, sk);
650out:
651 if (sk == NULL)
652 atomic_long_dec(&unix_nr_socks);
653 else {
654 local_bh_disable();
655 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
656 local_bh_enable();
657 }
658 return sk;
659}
660
661static int unix_create(struct net *net, struct socket *sock, int protocol,
662 int kern)
663{
664 if (protocol && protocol != PF_UNIX)
665 return -EPROTONOSUPPORT;
666
667 sock->state = SS_UNCONNECTED;
668
669 switch (sock->type) {
670 case SOCK_STREAM:
671 sock->ops = &unix_stream_ops;
672 break;
673 /*
674 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
675 * nothing uses it.
676 */
677 case SOCK_RAW:
678 sock->type = SOCK_DGRAM;
679 case SOCK_DGRAM:
680 sock->ops = &unix_dgram_ops;
681 break;
682 case SOCK_SEQPACKET:
683 sock->ops = &unix_seqpacket_ops;
684 break;
685 default:
686 return -ESOCKTNOSUPPORT;
687 }
688
689 return unix_create1(net, sock) ? 0 : -ENOMEM;
690}
691
692static int unix_release(struct socket *sock)
693{
694 struct sock *sk = sock->sk;
695
696 if (!sk)
697 return 0;
698
699 unix_release_sock(sk, 0);
700 sock->sk = NULL;
701
702 return 0;
703}
704
705static int unix_autobind(struct socket *sock)
706{
707 struct sock *sk = sock->sk;
708 struct net *net = sock_net(sk);
709 struct unix_sock *u = unix_sk(sk);
710 static u32 ordernum = 1;
711 struct unix_address *addr;
712 int err;
713 unsigned int retries = 0;
714
715 err = mutex_lock_interruptible(&u->readlock);
716 if (err)
717 return err;
718
719 err = 0;
720 if (u->addr)
721 goto out;
722
723 err = -ENOMEM;
724 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
725 if (!addr)
726 goto out;
727
728 addr->name->sun_family = AF_UNIX;
729 atomic_set(&addr->refcnt, 1);
730
731retry:
732 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
733 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
734
735 spin_lock(&unix_table_lock);
736 ordernum = (ordernum+1)&0xFFFFF;
737
738 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
739 addr->hash)) {
740 spin_unlock(&unix_table_lock);
741 /*
742 * __unix_find_socket_byname() may take long time if many names
743 * are already in use.
744 */
745 cond_resched();
746 /* Give up if all names seems to be in use. */
747 if (retries++ == 0xFFFFF) {
748 err = -ENOSPC;
749 kfree(addr);
750 goto out;
751 }
752 goto retry;
753 }
754 addr->hash ^= sk->sk_type;
755
756 __unix_remove_socket(sk);
757 u->addr = addr;
758 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
759 spin_unlock(&unix_table_lock);
760 err = 0;
761
762out: mutex_unlock(&u->readlock);
763 return err;
764}
765
766static struct sock *unix_find_other(struct net *net,
767 struct sockaddr_un *sunname, int len,
768 int type, unsigned hash, int *error)
769{
770 struct sock *u;
771 struct path path;
772 int err = 0;
773
774 if (sunname->sun_path[0]) {
775 struct inode *inode;
776 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
777 if (err)
778 goto fail;
779 inode = path.dentry->d_inode;
780 err = inode_permission(inode, MAY_WRITE);
781 if (err)
782 goto put_fail;
783
784 err = -ECONNREFUSED;
785 if (!S_ISSOCK(inode->i_mode))
786 goto put_fail;
787 u = unix_find_socket_byinode(inode);
788 if (!u)
789 goto put_fail;
790
791 if (u->sk_type == type)
792 touch_atime(&path);
793
794 path_put(&path);
795
796 err = -EPROTOTYPE;
797 if (u->sk_type != type) {
798 sock_put(u);
799 goto fail;
800 }
801 } else {
802 err = -ECONNREFUSED;
803 u = unix_find_socket_byname(net, sunname, len, type, hash);
804 if (u) {
805 struct dentry *dentry;
806 dentry = unix_sk(u)->path.dentry;
807 if (dentry)
808 touch_atime(&unix_sk(u)->path);
809 } else
810 goto fail;
811 }
812 return u;
813
814put_fail:
815 path_put(&path);
816fail:
817 *error = err;
818 return NULL;
819}
820
821
822static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
823{
824 struct sock *sk = sock->sk;
825 struct net *net = sock_net(sk);
826 struct unix_sock *u = unix_sk(sk);
827 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
828 char *sun_path = sunaddr->sun_path;
829 struct dentry *dentry = NULL;
830 struct path path;
831 int err;
832 unsigned hash;
833 struct unix_address *addr;
834 struct hlist_head *list;
835
836 err = -EINVAL;
837 if (sunaddr->sun_family != AF_UNIX)
838 goto out;
839
840 if (addr_len == sizeof(short)) {
841 err = unix_autobind(sock);
842 goto out;
843 }
844
845 err = unix_mkname(sunaddr, addr_len, &hash);
846 if (err < 0)
847 goto out;
848 addr_len = err;
849
850 err = mutex_lock_interruptible(&u->readlock);
851 if (err)
852 goto out;
853
854 err = -EINVAL;
855 if (u->addr)
856 goto out_up;
857
858 err = -ENOMEM;
859 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
860 if (!addr)
861 goto out_up;
862
863 memcpy(addr->name, sunaddr, addr_len);
864 addr->len = addr_len;
865 addr->hash = hash ^ sk->sk_type;
866 atomic_set(&addr->refcnt, 1);
867
868 if (sun_path[0]) {
869 umode_t mode;
870 err = 0;
871 /*
872 * Get the parent directory, calculate the hash for last
873 * component.
874 */
875 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
876 err = PTR_ERR(dentry);
877 if (IS_ERR(dentry))
878 goto out_mknod_parent;
879
880 /*
881 * All right, let's create it.
882 */
883 mode = S_IFSOCK |
884 (SOCK_INODE(sock)->i_mode & ~current_umask());
885 err = mnt_want_write(path.mnt);
886 if (err)
887 goto out_mknod_dput;
888 err = security_path_mknod(&path, dentry, mode, 0);
889 if (err)
890 goto out_mknod_drop_write;
891 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
892out_mknod_drop_write:
893 mnt_drop_write(path.mnt);
894 if (err)
895 goto out_mknod_dput;
896 mutex_unlock(&path.dentry->d_inode->i_mutex);
897 dput(path.dentry);
898 path.dentry = dentry;
899
900 addr->hash = UNIX_HASH_SIZE;
901 }
902
903 spin_lock(&unix_table_lock);
904
905 if (!sun_path[0]) {
906 err = -EADDRINUSE;
907 if (__unix_find_socket_byname(net, sunaddr, addr_len,
908 sk->sk_type, hash)) {
909 unix_release_addr(addr);
910 goto out_unlock;
911 }
912
913 list = &unix_socket_table[addr->hash];
914 } else {
915 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
916 u->path = path;
917 }
918
919 err = 0;
920 __unix_remove_socket(sk);
921 u->addr = addr;
922 __unix_insert_socket(list, sk);
923
924out_unlock:
925 spin_unlock(&unix_table_lock);
926out_up:
927 mutex_unlock(&u->readlock);
928out:
929 return err;
930
931out_mknod_dput:
932 dput(dentry);
933 mutex_unlock(&path.dentry->d_inode->i_mutex);
934 path_put(&path);
935out_mknod_parent:
936 if (err == -EEXIST)
937 err = -EADDRINUSE;
938 unix_release_addr(addr);
939 goto out_up;
940}
941
942static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
943{
944 if (unlikely(sk1 == sk2) || !sk2) {
945 unix_state_lock(sk1);
946 return;
947 }
948 if (sk1 < sk2) {
949 unix_state_lock(sk1);
950 unix_state_lock_nested(sk2);
951 } else {
952 unix_state_lock(sk2);
953 unix_state_lock_nested(sk1);
954 }
955}
956
957static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
958{
959 if (unlikely(sk1 == sk2) || !sk2) {
960 unix_state_unlock(sk1);
961 return;
962 }
963 unix_state_unlock(sk1);
964 unix_state_unlock(sk2);
965}
966
967static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
968 int alen, int flags)
969{
970 struct sock *sk = sock->sk;
971 struct net *net = sock_net(sk);
972 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
973 struct sock *other;
974 unsigned hash;
975 int err;
976
977 if (addr->sa_family != AF_UNSPEC) {
978 err = unix_mkname(sunaddr, alen, &hash);
979 if (err < 0)
980 goto out;
981 alen = err;
982
983 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
984 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
985 goto out;
986
987restart:
988 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
989 if (!other)
990 goto out;
991
992 unix_state_double_lock(sk, other);
993
994 /* Apparently VFS overslept socket death. Retry. */
995 if (sock_flag(other, SOCK_DEAD)) {
996 unix_state_double_unlock(sk, other);
997 sock_put(other);
998 goto restart;
999 }
1000
1001 err = -EPERM;
1002 if (!unix_may_send(sk, other))
1003 goto out_unlock;
1004
1005 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1006 if (err)
1007 goto out_unlock;
1008
1009 } else {
1010 /*
1011 * 1003.1g breaking connected state with AF_UNSPEC
1012 */
1013 other = NULL;
1014 unix_state_double_lock(sk, other);
1015 }
1016
1017 /*
1018 * If it was connected, reconnect.
1019 */
1020 if (unix_peer(sk)) {
1021 struct sock *old_peer = unix_peer(sk);
1022 unix_peer(sk) = other;
1023 unix_state_double_unlock(sk, other);
1024
1025 if (other != old_peer)
1026 unix_dgram_disconnected(sk, old_peer);
1027 sock_put(old_peer);
1028 } else {
1029 unix_peer(sk) = other;
1030 unix_state_double_unlock(sk, other);
1031 }
1032 return 0;
1033
1034out_unlock:
1035 unix_state_double_unlock(sk, other);
1036 sock_put(other);
1037out:
1038 return err;
1039}
1040
1041static long unix_wait_for_peer(struct sock *other, long timeo)
1042{
1043 struct unix_sock *u = unix_sk(other);
1044 int sched;
1045 DEFINE_WAIT(wait);
1046
1047 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1048
1049 sched = !sock_flag(other, SOCK_DEAD) &&
1050 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1051 unix_recvq_full(other);
1052
1053 unix_state_unlock(other);
1054
1055 if (sched)
1056 timeo = schedule_timeout(timeo);
1057
1058 finish_wait(&u->peer_wait, &wait);
1059 return timeo;
1060}
1061
1062static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1063 int addr_len, int flags)
1064{
1065 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1066 struct sock *sk = sock->sk;
1067 struct net *net = sock_net(sk);
1068 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1069 struct sock *newsk = NULL;
1070 struct sock *other = NULL;
1071 struct sk_buff *skb = NULL;
1072 unsigned hash;
1073 int st;
1074 int err;
1075 long timeo;
1076
1077 err = unix_mkname(sunaddr, addr_len, &hash);
1078 if (err < 0)
1079 goto out;
1080 addr_len = err;
1081
1082 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1083 (err = unix_autobind(sock)) != 0)
1084 goto out;
1085
1086 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1087
1088 /* First of all allocate resources.
1089 If we will make it after state is locked,
1090 we will have to recheck all again in any case.
1091 */
1092
1093 err = -ENOMEM;
1094
1095 /* create new sock for complete connection */
1096 newsk = unix_create1(sock_net(sk), NULL);
1097 if (newsk == NULL)
1098 goto out;
1099
1100 /* Allocate skb for sending to listening sock */
1101 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1102 if (skb == NULL)
1103 goto out;
1104
1105restart:
1106 /* Find listening sock. */
1107 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1108 if (!other)
1109 goto out;
1110
1111 /* Latch state of peer */
1112 unix_state_lock(other);
1113
1114 /* Apparently VFS overslept socket death. Retry. */
1115 if (sock_flag(other, SOCK_DEAD)) {
1116 unix_state_unlock(other);
1117 sock_put(other);
1118 goto restart;
1119 }
1120
1121 err = -ECONNREFUSED;
1122 if (other->sk_state != TCP_LISTEN)
1123 goto out_unlock;
1124 if (other->sk_shutdown & RCV_SHUTDOWN)
1125 goto out_unlock;
1126
1127 if (unix_recvq_full(other)) {
1128 err = -EAGAIN;
1129 if (!timeo)
1130 goto out_unlock;
1131
1132 timeo = unix_wait_for_peer(other, timeo);
1133
1134 err = sock_intr_errno(timeo);
1135 if (signal_pending(current))
1136 goto out;
1137 sock_put(other);
1138 goto restart;
1139 }
1140
1141 /* Latch our state.
1142
1143 It is tricky place. We need to grab our state lock and cannot
1144 drop lock on peer. It is dangerous because deadlock is
1145 possible. Connect to self case and simultaneous
1146 attempt to connect are eliminated by checking socket
1147 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1148 check this before attempt to grab lock.
1149
1150 Well, and we have to recheck the state after socket locked.
1151 */
1152 st = sk->sk_state;
1153
1154 switch (st) {
1155 case TCP_CLOSE:
1156 /* This is ok... continue with connect */
1157 break;
1158 case TCP_ESTABLISHED:
1159 /* Socket is already connected */
1160 err = -EISCONN;
1161 goto out_unlock;
1162 default:
1163 err = -EINVAL;
1164 goto out_unlock;
1165 }
1166
1167 unix_state_lock_nested(sk);
1168
1169 if (sk->sk_state != st) {
1170 unix_state_unlock(sk);
1171 unix_state_unlock(other);
1172 sock_put(other);
1173 goto restart;
1174 }
1175
1176 err = security_unix_stream_connect(sk, other, newsk);
1177 if (err) {
1178 unix_state_unlock(sk);
1179 goto out_unlock;
1180 }
1181
1182 /* The way is open! Fastly set all the necessary fields... */
1183
1184 sock_hold(sk);
1185 unix_peer(newsk) = sk;
1186 newsk->sk_state = TCP_ESTABLISHED;
1187 newsk->sk_type = sk->sk_type;
1188 init_peercred(newsk);
1189 newu = unix_sk(newsk);
1190 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1191 otheru = unix_sk(other);
1192
1193 /* copy address information from listening to new sock*/
1194 if (otheru->addr) {
1195 atomic_inc(&otheru->addr->refcnt);
1196 newu->addr = otheru->addr;
1197 }
1198 if (otheru->path.dentry) {
1199 path_get(&otheru->path);
1200 newu->path = otheru->path;
1201 }
1202
1203 /* Set credentials */
1204 copy_peercred(sk, other);
1205
1206 sock->state = SS_CONNECTED;
1207 sk->sk_state = TCP_ESTABLISHED;
1208 sock_hold(newsk);
1209
1210 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1211 unix_peer(sk) = newsk;
1212
1213 unix_state_unlock(sk);
1214
1215 /* take ten and and send info to listening sock */
1216 spin_lock(&other->sk_receive_queue.lock);
1217 __skb_queue_tail(&other->sk_receive_queue, skb);
1218 spin_unlock(&other->sk_receive_queue.lock);
1219 unix_state_unlock(other);
1220 other->sk_data_ready(other, 0);
1221 sock_put(other);
1222 return 0;
1223
1224out_unlock:
1225 if (other)
1226 unix_state_unlock(other);
1227
1228out:
1229 kfree_skb(skb);
1230 if (newsk)
1231 unix_release_sock(newsk, 0);
1232 if (other)
1233 sock_put(other);
1234 return err;
1235}
1236
1237static int unix_socketpair(struct socket *socka, struct socket *sockb)
1238{
1239 struct sock *ska = socka->sk, *skb = sockb->sk;
1240
1241 /* Join our sockets back to back */
1242 sock_hold(ska);
1243 sock_hold(skb);
1244 unix_peer(ska) = skb;
1245 unix_peer(skb) = ska;
1246 init_peercred(ska);
1247 init_peercred(skb);
1248
1249 if (ska->sk_type != SOCK_DGRAM) {
1250 ska->sk_state = TCP_ESTABLISHED;
1251 skb->sk_state = TCP_ESTABLISHED;
1252 socka->state = SS_CONNECTED;
1253 sockb->state = SS_CONNECTED;
1254 }
1255 return 0;
1256}
1257
1258static void unix_sock_inherit_flags(const struct socket *old,
1259 struct socket *new)
1260{
1261 if (test_bit(SOCK_PASSCRED, &old->flags))
1262 set_bit(SOCK_PASSCRED, &new->flags);
1263 if (test_bit(SOCK_PASSSEC, &old->flags))
1264 set_bit(SOCK_PASSSEC, &new->flags);
1265}
1266
1267static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1268{
1269 struct sock *sk = sock->sk;
1270 struct sock *tsk;
1271 struct sk_buff *skb;
1272 int err;
1273
1274 err = -EOPNOTSUPP;
1275 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1276 goto out;
1277
1278 err = -EINVAL;
1279 if (sk->sk_state != TCP_LISTEN)
1280 goto out;
1281
1282 /* If socket state is TCP_LISTEN it cannot change (for now...),
1283 * so that no locks are necessary.
1284 */
1285
1286 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1287 if (!skb) {
1288 /* This means receive shutdown. */
1289 if (err == 0)
1290 err = -EINVAL;
1291 goto out;
1292 }
1293
1294 tsk = skb->sk;
1295 skb_free_datagram(sk, skb);
1296 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1297
1298 /* attach accepted sock to socket */
1299 unix_state_lock(tsk);
1300 newsock->state = SS_CONNECTED;
1301 unix_sock_inherit_flags(sock, newsock);
1302 sock_graft(tsk, newsock);
1303 unix_state_unlock(tsk);
1304 return 0;
1305
1306out:
1307 return err;
1308}
1309
1310
1311static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1312{
1313 struct sock *sk = sock->sk;
1314 struct unix_sock *u;
1315 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1316 int err = 0;
1317
1318 if (peer) {
1319 sk = unix_peer_get(sk);
1320
1321 err = -ENOTCONN;
1322 if (!sk)
1323 goto out;
1324 err = 0;
1325 } else {
1326 sock_hold(sk);
1327 }
1328
1329 u = unix_sk(sk);
1330 unix_state_lock(sk);
1331 if (!u->addr) {
1332 sunaddr->sun_family = AF_UNIX;
1333 sunaddr->sun_path[0] = 0;
1334 *uaddr_len = sizeof(short);
1335 } else {
1336 struct unix_address *addr = u->addr;
1337
1338 *uaddr_len = addr->len;
1339 memcpy(sunaddr, addr->name, *uaddr_len);
1340 }
1341 unix_state_unlock(sk);
1342 sock_put(sk);
1343out:
1344 return err;
1345}
1346
1347static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1348{
1349 int i;
1350
1351 scm->fp = UNIXCB(skb).fp;
1352 UNIXCB(skb).fp = NULL;
1353
1354 for (i = scm->fp->count-1; i >= 0; i--)
1355 unix_notinflight(scm->fp->fp[i]);
1356}
1357
1358static void unix_destruct_scm(struct sk_buff *skb)
1359{
1360 struct scm_cookie scm;
1361 memset(&scm, 0, sizeof(scm));
1362 scm.pid = UNIXCB(skb).pid;
1363 scm.cred = UNIXCB(skb).cred;
1364 if (UNIXCB(skb).fp)
1365 unix_detach_fds(&scm, skb);
1366
1367 /* Alas, it calls VFS */
1368 /* So fscking what? fput() had been SMP-safe since the last Summer */
1369 scm_destroy(&scm);
1370 sock_wfree(skb);
1371}
1372
1373#define MAX_RECURSION_LEVEL 4
1374
1375static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1376{
1377 int i;
1378 unsigned char max_level = 0;
1379 int unix_sock_count = 0;
1380
1381 for (i = scm->fp->count - 1; i >= 0; i--) {
1382 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1383
1384 if (sk) {
1385 unix_sock_count++;
1386 max_level = max(max_level,
1387 unix_sk(sk)->recursion_level);
1388 }
1389 }
1390 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1391 return -ETOOMANYREFS;
1392
1393 /*
1394 * Need to duplicate file references for the sake of garbage
1395 * collection. Otherwise a socket in the fps might become a
1396 * candidate for GC while the skb is not yet queued.
1397 */
1398 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1399 if (!UNIXCB(skb).fp)
1400 return -ENOMEM;
1401
1402 if (unix_sock_count) {
1403 for (i = scm->fp->count - 1; i >= 0; i--)
1404 unix_inflight(scm->fp->fp[i]);
1405 }
1406 return max_level;
1407}
1408
1409static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1410{
1411 int err = 0;
1412
1413 UNIXCB(skb).pid = get_pid(scm->pid);
1414 if (scm->cred)
1415 UNIXCB(skb).cred = get_cred(scm->cred);
1416 UNIXCB(skb).fp = NULL;
1417 if (scm->fp && send_fds)
1418 err = unix_attach_fds(scm, skb);
1419
1420 skb->destructor = unix_destruct_scm;
1421 return err;
1422}
1423
1424/*
1425 * Some apps rely on write() giving SCM_CREDENTIALS
1426 * We include credentials if source or destination socket
1427 * asserted SOCK_PASSCRED.
1428 */
1429static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1430 const struct sock *other)
1431{
1432 if (UNIXCB(skb).cred)
1433 return;
1434 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1435 !other->sk_socket ||
1436 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1437 UNIXCB(skb).pid = get_pid(task_tgid(current));
1438 UNIXCB(skb).cred = get_current_cred();
1439 }
1440}
1441
1442/*
1443 * Send AF_UNIX data.
1444 */
1445
1446static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1447 struct msghdr *msg, size_t len)
1448{
1449 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1450 struct sock *sk = sock->sk;
1451 struct net *net = sock_net(sk);
1452 struct unix_sock *u = unix_sk(sk);
1453 struct sockaddr_un *sunaddr = msg->msg_name;
1454 struct sock *other = NULL;
1455 int namelen = 0; /* fake GCC */
1456 int err;
1457 unsigned hash;
1458 struct sk_buff *skb;
1459 long timeo;
1460 struct scm_cookie tmp_scm;
1461 int max_level;
1462
1463 if (NULL == siocb->scm)
1464 siocb->scm = &tmp_scm;
1465 wait_for_unix_gc();
1466 err = scm_send(sock, msg, siocb->scm, false);
1467 if (err < 0)
1468 return err;
1469
1470 err = -EOPNOTSUPP;
1471 if (msg->msg_flags&MSG_OOB)
1472 goto out;
1473
1474 if (msg->msg_namelen) {
1475 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1476 if (err < 0)
1477 goto out;
1478 namelen = err;
1479 } else {
1480 sunaddr = NULL;
1481 err = -ENOTCONN;
1482 other = unix_peer_get(sk);
1483 if (!other)
1484 goto out;
1485 }
1486
1487 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1488 && (err = unix_autobind(sock)) != 0)
1489 goto out;
1490
1491 err = -EMSGSIZE;
1492 if (len > sk->sk_sndbuf - 32)
1493 goto out;
1494
1495 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1496 if (skb == NULL)
1497 goto out;
1498
1499 err = unix_scm_to_skb(siocb->scm, skb, true);
1500 if (err < 0)
1501 goto out_free;
1502 max_level = err + 1;
1503 unix_get_secdata(siocb->scm, skb);
1504
1505 skb_reset_transport_header(skb);
1506 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1507 if (err)
1508 goto out_free;
1509
1510 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1511
1512restart:
1513 if (!other) {
1514 err = -ECONNRESET;
1515 if (sunaddr == NULL)
1516 goto out_free;
1517
1518 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1519 hash, &err);
1520 if (other == NULL)
1521 goto out_free;
1522 }
1523
1524 if (sk_filter(other, skb) < 0) {
1525 /* Toss the packet but do not return any error to the sender */
1526 err = len;
1527 goto out_free;
1528 }
1529
1530 unix_state_lock(other);
1531 err = -EPERM;
1532 if (!unix_may_send(sk, other))
1533 goto out_unlock;
1534
1535 if (sock_flag(other, SOCK_DEAD)) {
1536 /*
1537 * Check with 1003.1g - what should
1538 * datagram error
1539 */
1540 unix_state_unlock(other);
1541 sock_put(other);
1542
1543 err = 0;
1544 unix_state_lock(sk);
1545 if (unix_peer(sk) == other) {
1546 unix_peer(sk) = NULL;
1547 unix_state_unlock(sk);
1548
1549 unix_dgram_disconnected(sk, other);
1550 sock_put(other);
1551 err = -ECONNREFUSED;
1552 } else {
1553 unix_state_unlock(sk);
1554 }
1555
1556 other = NULL;
1557 if (err)
1558 goto out_free;
1559 goto restart;
1560 }
1561
1562 err = -EPIPE;
1563 if (other->sk_shutdown & RCV_SHUTDOWN)
1564 goto out_unlock;
1565
1566 if (sk->sk_type != SOCK_SEQPACKET) {
1567 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1568 if (err)
1569 goto out_unlock;
1570 }
1571
1572 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1573 if (!timeo) {
1574 err = -EAGAIN;
1575 goto out_unlock;
1576 }
1577
1578 timeo = unix_wait_for_peer(other, timeo);
1579
1580 err = sock_intr_errno(timeo);
1581 if (signal_pending(current))
1582 goto out_free;
1583
1584 goto restart;
1585 }
1586
1587 if (sock_flag(other, SOCK_RCVTSTAMP))
1588 __net_timestamp(skb);
1589 maybe_add_creds(skb, sock, other);
1590 skb_queue_tail(&other->sk_receive_queue, skb);
1591 if (max_level > unix_sk(other)->recursion_level)
1592 unix_sk(other)->recursion_level = max_level;
1593 unix_state_unlock(other);
1594 other->sk_data_ready(other, len);
1595 sock_put(other);
1596 scm_destroy(siocb->scm);
1597 return len;
1598
1599out_unlock:
1600 unix_state_unlock(other);
1601out_free:
1602 kfree_skb(skb);
1603out:
1604 if (other)
1605 sock_put(other);
1606 scm_destroy(siocb->scm);
1607 return err;
1608}
1609
1610
1611static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1612 struct msghdr *msg, size_t len)
1613{
1614 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1615 struct sock *sk = sock->sk;
1616 struct sock *other = NULL;
1617 int err, size;
1618 struct sk_buff *skb;
1619 int sent = 0;
1620 struct scm_cookie tmp_scm;
1621 bool fds_sent = false;
1622 int max_level;
1623
1624 if (NULL == siocb->scm)
1625 siocb->scm = &tmp_scm;
1626 wait_for_unix_gc();
1627 err = scm_send(sock, msg, siocb->scm, false);
1628 if (err < 0)
1629 return err;
1630
1631 err = -EOPNOTSUPP;
1632 if (msg->msg_flags&MSG_OOB)
1633 goto out_err;
1634
1635 if (msg->msg_namelen) {
1636 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1637 goto out_err;
1638 } else {
1639 err = -ENOTCONN;
1640 other = unix_peer(sk);
1641 if (!other)
1642 goto out_err;
1643 }
1644
1645 if (sk->sk_shutdown & SEND_SHUTDOWN)
1646 goto pipe_err;
1647
1648 while (sent < len) {
1649 /*
1650 * Optimisation for the fact that under 0.01% of X
1651 * messages typically need breaking up.
1652 */
1653
1654 size = len-sent;
1655
1656 /* Keep two messages in the pipe so it schedules better */
1657 if (size > ((sk->sk_sndbuf >> 1) - 64))
1658 size = (sk->sk_sndbuf >> 1) - 64;
1659
1660 if (size > SKB_MAX_ALLOC)
1661 size = SKB_MAX_ALLOC;
1662
1663 /*
1664 * Grab a buffer
1665 */
1666
1667 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1668 &err);
1669
1670 if (skb == NULL)
1671 goto out_err;
1672
1673 /*
1674 * If you pass two values to the sock_alloc_send_skb
1675 * it tries to grab the large buffer with GFP_NOFS
1676 * (which can fail easily), and if it fails grab the
1677 * fallback size buffer which is under a page and will
1678 * succeed. [Alan]
1679 */
1680 size = min_t(int, size, skb_tailroom(skb));
1681
1682
1683 /* Only send the fds in the first buffer */
1684 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1685 if (err < 0) {
1686 kfree_skb(skb);
1687 goto out_err;
1688 }
1689 max_level = err + 1;
1690 fds_sent = true;
1691
1692 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1693 if (err) {
1694 kfree_skb(skb);
1695 goto out_err;
1696 }
1697
1698 unix_state_lock(other);
1699
1700 if (sock_flag(other, SOCK_DEAD) ||
1701 (other->sk_shutdown & RCV_SHUTDOWN))
1702 goto pipe_err_free;
1703
1704 maybe_add_creds(skb, sock, other);
1705 skb_queue_tail(&other->sk_receive_queue, skb);
1706 if (max_level > unix_sk(other)->recursion_level)
1707 unix_sk(other)->recursion_level = max_level;
1708 unix_state_unlock(other);
1709 other->sk_data_ready(other, size);
1710 sent += size;
1711 }
1712
1713 scm_destroy(siocb->scm);
1714 siocb->scm = NULL;
1715
1716 return sent;
1717
1718pipe_err_free:
1719 unix_state_unlock(other);
1720 kfree_skb(skb);
1721pipe_err:
1722 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1723 send_sig(SIGPIPE, current, 0);
1724 err = -EPIPE;
1725out_err:
1726 scm_destroy(siocb->scm);
1727 siocb->scm = NULL;
1728 return sent ? : err;
1729}
1730
1731static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1732 struct msghdr *msg, size_t len)
1733{
1734 int err;
1735 struct sock *sk = sock->sk;
1736
1737 err = sock_error(sk);
1738 if (err)
1739 return err;
1740
1741 if (sk->sk_state != TCP_ESTABLISHED)
1742 return -ENOTCONN;
1743
1744 if (msg->msg_namelen)
1745 msg->msg_namelen = 0;
1746
1747 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1748}
1749
1750static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1751 struct msghdr *msg, size_t size,
1752 int flags)
1753{
1754 struct sock *sk = sock->sk;
1755
1756 if (sk->sk_state != TCP_ESTABLISHED)
1757 return -ENOTCONN;
1758
1759 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1760}
1761
1762static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1763{
1764 struct unix_sock *u = unix_sk(sk);
1765
1766 if (u->addr) {
1767 msg->msg_namelen = u->addr->len;
1768 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1769 }
1770}
1771
1772static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1773 struct msghdr *msg, size_t size,
1774 int flags)
1775{
1776 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1777 struct scm_cookie tmp_scm;
1778 struct sock *sk = sock->sk;
1779 struct unix_sock *u = unix_sk(sk);
1780 int noblock = flags & MSG_DONTWAIT;
1781 struct sk_buff *skb;
1782 int err;
1783 int peeked, skip;
1784
1785 err = -EOPNOTSUPP;
1786 if (flags&MSG_OOB)
1787 goto out;
1788
1789 err = mutex_lock_interruptible(&u->readlock);
1790 if (unlikely(err)) {
1791 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1792 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1793 */
1794 err = noblock ? -EAGAIN : -ERESTARTSYS;
1795 goto out;
1796 }
1797
1798 skip = sk_peek_offset(sk, flags);
1799
1800 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1801 if (!skb) {
1802 unix_state_lock(sk);
1803 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1804 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1805 (sk->sk_shutdown & RCV_SHUTDOWN))
1806 err = 0;
1807 unix_state_unlock(sk);
1808 goto out_unlock;
1809 }
1810
1811 wake_up_interruptible_sync_poll(&u->peer_wait,
1812 POLLOUT | POLLWRNORM | POLLWRBAND);
1813
1814 if (msg->msg_name)
1815 unix_copy_addr(msg, skb->sk);
1816
1817 if (size > skb->len - skip)
1818 size = skb->len - skip;
1819 else if (size < skb->len - skip)
1820 msg->msg_flags |= MSG_TRUNC;
1821
1822 err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1823 if (err)
1824 goto out_free;
1825
1826 if (sock_flag(sk, SOCK_RCVTSTAMP))
1827 __sock_recv_timestamp(msg, sk, skb);
1828
1829 if (!siocb->scm) {
1830 siocb->scm = &tmp_scm;
1831 memset(&tmp_scm, 0, sizeof(tmp_scm));
1832 }
1833 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1834 unix_set_secdata(siocb->scm, skb);
1835
1836 if (!(flags & MSG_PEEK)) {
1837 if (UNIXCB(skb).fp)
1838 unix_detach_fds(siocb->scm, skb);
1839
1840 sk_peek_offset_bwd(sk, skb->len);
1841 } else {
1842 /* It is questionable: on PEEK we could:
1843 - do not return fds - good, but too simple 8)
1844 - return fds, and do not return them on read (old strategy,
1845 apparently wrong)
1846 - clone fds (I chose it for now, it is the most universal
1847 solution)
1848
1849 POSIX 1003.1g does not actually define this clearly
1850 at all. POSIX 1003.1g doesn't define a lot of things
1851 clearly however!
1852
1853 */
1854
1855 sk_peek_offset_fwd(sk, size);
1856 if (UNIXCB(skb).fp)
1857 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1858 }
1859 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1860
1861 scm_recv(sock, msg, siocb->scm, flags);
1862
1863out_free:
1864 skb_free_datagram(sk, skb);
1865out_unlock:
1866 mutex_unlock(&u->readlock);
1867out:
1868 return err;
1869}
1870
1871/*
1872 * Sleep until data has arrive. But check for races..
1873 */
1874
1875static long unix_stream_data_wait(struct sock *sk, long timeo)
1876{
1877 DEFINE_WAIT(wait);
1878
1879 unix_state_lock(sk);
1880
1881 for (;;) {
1882 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1883
1884 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1885 sk->sk_err ||
1886 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1887 signal_pending(current) ||
1888 !timeo)
1889 break;
1890
1891 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1892 unix_state_unlock(sk);
1893 timeo = schedule_timeout(timeo);
1894 unix_state_lock(sk);
1895 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1896 }
1897
1898 finish_wait(sk_sleep(sk), &wait);
1899 unix_state_unlock(sk);
1900 return timeo;
1901}
1902
1903
1904
1905static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1906 struct msghdr *msg, size_t size,
1907 int flags)
1908{
1909 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1910 struct scm_cookie tmp_scm;
1911 struct sock *sk = sock->sk;
1912 struct unix_sock *u = unix_sk(sk);
1913 struct sockaddr_un *sunaddr = msg->msg_name;
1914 int copied = 0;
1915 int noblock = flags & MSG_DONTWAIT;
1916 int check_creds = 0;
1917 int target;
1918 int err = 0;
1919 long timeo;
1920 int skip;
1921
1922 err = -EINVAL;
1923 if (sk->sk_state != TCP_ESTABLISHED)
1924 goto out;
1925
1926 err = -EOPNOTSUPP;
1927 if (flags&MSG_OOB)
1928 goto out;
1929
1930 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1931 timeo = sock_rcvtimeo(sk, noblock);
1932
1933 /* Lock the socket to prevent queue disordering
1934 * while sleeps in memcpy_tomsg
1935 */
1936
1937 if (!siocb->scm) {
1938 siocb->scm = &tmp_scm;
1939 memset(&tmp_scm, 0, sizeof(tmp_scm));
1940 }
1941
1942 err = mutex_lock_interruptible(&u->readlock);
1943 if (unlikely(err)) {
1944 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1945 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1946 */
1947 err = noblock ? -EAGAIN : -ERESTARTSYS;
1948 goto out;
1949 }
1950
1951 skip = sk_peek_offset(sk, flags);
1952
1953 do {
1954 int chunk;
1955 struct sk_buff *skb;
1956
1957 unix_state_lock(sk);
1958 skb = skb_peek(&sk->sk_receive_queue);
1959again:
1960 if (skb == NULL) {
1961 unix_sk(sk)->recursion_level = 0;
1962 if (copied >= target)
1963 goto unlock;
1964
1965 /*
1966 * POSIX 1003.1g mandates this order.
1967 */
1968
1969 err = sock_error(sk);
1970 if (err)
1971 goto unlock;
1972 if (sk->sk_shutdown & RCV_SHUTDOWN)
1973 goto unlock;
1974
1975 unix_state_unlock(sk);
1976 err = -EAGAIN;
1977 if (!timeo)
1978 break;
1979 mutex_unlock(&u->readlock);
1980
1981 timeo = unix_stream_data_wait(sk, timeo);
1982
1983 if (signal_pending(current)
1984 || mutex_lock_interruptible(&u->readlock)) {
1985 err = sock_intr_errno(timeo);
1986 goto out;
1987 }
1988
1989 continue;
1990 unlock:
1991 unix_state_unlock(sk);
1992 break;
1993 }
1994
1995 if (skip >= skb->len) {
1996 skip -= skb->len;
1997 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1998 goto again;
1999 }
2000
2001 unix_state_unlock(sk);
2002
2003 if (check_creds) {
2004 /* Never glue messages from different writers */
2005 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
2006 (UNIXCB(skb).cred != siocb->scm->cred))
2007 break;
2008 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2009 /* Copy credentials */
2010 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
2011 check_creds = 1;
2012 }
2013
2014 /* Copy address just once */
2015 if (sunaddr) {
2016 unix_copy_addr(msg, skb->sk);
2017 sunaddr = NULL;
2018 }
2019
2020 chunk = min_t(unsigned int, skb->len - skip, size);
2021 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2022 if (copied == 0)
2023 copied = -EFAULT;
2024 break;
2025 }
2026 copied += chunk;
2027 size -= chunk;
2028
2029 /* Mark read part of skb as used */
2030 if (!(flags & MSG_PEEK)) {
2031 skb_pull(skb, chunk);
2032
2033 sk_peek_offset_bwd(sk, chunk);
2034
2035 if (UNIXCB(skb).fp)
2036 unix_detach_fds(siocb->scm, skb);
2037
2038 if (skb->len)
2039 break;
2040
2041 skb_unlink(skb, &sk->sk_receive_queue);
2042 consume_skb(skb);
2043
2044 if (siocb->scm->fp)
2045 break;
2046 } else {
2047 /* It is questionable, see note in unix_dgram_recvmsg.
2048 */
2049 if (UNIXCB(skb).fp)
2050 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2051
2052 sk_peek_offset_fwd(sk, chunk);
2053
2054 break;
2055 }
2056 } while (size);
2057
2058 mutex_unlock(&u->readlock);
2059 scm_recv(sock, msg, siocb->scm, flags);
2060out:
2061 return copied ? : err;
2062}
2063
2064static int unix_shutdown(struct socket *sock, int mode)
2065{
2066 struct sock *sk = sock->sk;
2067 struct sock *other;
2068
2069 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2070
2071 if (!mode)
2072 return 0;
2073
2074 unix_state_lock(sk);
2075 sk->sk_shutdown |= mode;
2076 other = unix_peer(sk);
2077 if (other)
2078 sock_hold(other);
2079 unix_state_unlock(sk);
2080 sk->sk_state_change(sk);
2081
2082 if (other &&
2083 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2084
2085 int peer_mode = 0;
2086
2087 if (mode&RCV_SHUTDOWN)
2088 peer_mode |= SEND_SHUTDOWN;
2089 if (mode&SEND_SHUTDOWN)
2090 peer_mode |= RCV_SHUTDOWN;
2091 unix_state_lock(other);
2092 other->sk_shutdown |= peer_mode;
2093 unix_state_unlock(other);
2094 other->sk_state_change(other);
2095 if (peer_mode == SHUTDOWN_MASK)
2096 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2097 else if (peer_mode & RCV_SHUTDOWN)
2098 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2099 }
2100 if (other)
2101 sock_put(other);
2102
2103 return 0;
2104}
2105
2106long unix_inq_len(struct sock *sk)
2107{
2108 struct sk_buff *skb;
2109 long amount = 0;
2110
2111 if (sk->sk_state == TCP_LISTEN)
2112 return -EINVAL;
2113
2114 spin_lock(&sk->sk_receive_queue.lock);
2115 if (sk->sk_type == SOCK_STREAM ||
2116 sk->sk_type == SOCK_SEQPACKET) {
2117 skb_queue_walk(&sk->sk_receive_queue, skb)
2118 amount += skb->len;
2119 } else {
2120 skb = skb_peek(&sk->sk_receive_queue);
2121 if (skb)
2122 amount = skb->len;
2123 }
2124 spin_unlock(&sk->sk_receive_queue.lock);
2125
2126 return amount;
2127}
2128EXPORT_SYMBOL_GPL(unix_inq_len);
2129
2130long unix_outq_len(struct sock *sk)
2131{
2132 return sk_wmem_alloc_get(sk);
2133}
2134EXPORT_SYMBOL_GPL(unix_outq_len);
2135
2136static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2137{
2138 struct sock *sk = sock->sk;
2139 long amount = 0;
2140 int err;
2141
2142 switch (cmd) {
2143 case SIOCOUTQ:
2144 amount = unix_outq_len(sk);
2145 err = put_user(amount, (int __user *)arg);
2146 break;
2147 case SIOCINQ:
2148 amount = unix_inq_len(sk);
2149 if (amount < 0)
2150 err = amount;
2151 else
2152 err = put_user(amount, (int __user *)arg);
2153 break;
2154 default:
2155 err = -ENOIOCTLCMD;
2156 break;
2157 }
2158 return err;
2159}
2160
2161static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2162{
2163 struct sock *sk = sock->sk;
2164 unsigned int mask;
2165
2166 sock_poll_wait(file, sk_sleep(sk), wait);
2167 mask = 0;
2168
2169 /* exceptional events? */
2170 if (sk->sk_err)
2171 mask |= POLLERR;
2172 if (sk->sk_shutdown == SHUTDOWN_MASK)
2173 mask |= POLLHUP;
2174 if (sk->sk_shutdown & RCV_SHUTDOWN)
2175 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2176
2177 /* readable? */
2178 if (!skb_queue_empty(&sk->sk_receive_queue))
2179 mask |= POLLIN | POLLRDNORM;
2180
2181 /* Connection-based need to check for termination and startup */
2182 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2183 sk->sk_state == TCP_CLOSE)
2184 mask |= POLLHUP;
2185
2186 /*
2187 * we set writable also when the other side has shut down the
2188 * connection. This prevents stuck sockets.
2189 */
2190 if (unix_writable(sk))
2191 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2192
2193 return mask;
2194}
2195
2196static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2197 poll_table *wait)
2198{
2199 struct sock *sk = sock->sk, *other;
2200 unsigned int mask, writable;
2201
2202 sock_poll_wait(file, sk_sleep(sk), wait);
2203 mask = 0;
2204
2205 /* exceptional events? */
2206 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2207 mask |= POLLERR;
2208 if (sk->sk_shutdown & RCV_SHUTDOWN)
2209 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2210 if (sk->sk_shutdown == SHUTDOWN_MASK)
2211 mask |= POLLHUP;
2212
2213 /* readable? */
2214 if (!skb_queue_empty(&sk->sk_receive_queue))
2215 mask |= POLLIN | POLLRDNORM;
2216
2217 /* Connection-based need to check for termination and startup */
2218 if (sk->sk_type == SOCK_SEQPACKET) {
2219 if (sk->sk_state == TCP_CLOSE)
2220 mask |= POLLHUP;
2221 /* connection hasn't started yet? */
2222 if (sk->sk_state == TCP_SYN_SENT)
2223 return mask;
2224 }
2225
2226 /* No write status requested, avoid expensive OUT tests. */
2227 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2228 return mask;
2229
2230 writable = unix_writable(sk);
2231 other = unix_peer_get(sk);
2232 if (other) {
2233 if (unix_peer(other) != sk) {
2234 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2235 if (unix_recvq_full(other))
2236 writable = 0;
2237 }
2238 sock_put(other);
2239 }
2240
2241 if (writable)
2242 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2243 else
2244 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2245
2246 return mask;
2247}
2248
2249#ifdef CONFIG_PROC_FS
2250static struct sock *first_unix_socket(int *i)
2251{
2252 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2253 if (!hlist_empty(&unix_socket_table[*i]))
2254 return __sk_head(&unix_socket_table[*i]);
2255 }
2256 return NULL;
2257}
2258
2259static struct sock *next_unix_socket(int *i, struct sock *s)
2260{
2261 struct sock *next = sk_next(s);
2262 /* More in this chain? */
2263 if (next)
2264 return next;
2265 /* Look for next non-empty chain. */
2266 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2267 if (!hlist_empty(&unix_socket_table[*i]))
2268 return __sk_head(&unix_socket_table[*i]);
2269 }
2270 return NULL;
2271}
2272
2273struct unix_iter_state {
2274 struct seq_net_private p;
2275 int i;
2276};
2277
2278static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2279{
2280 struct unix_iter_state *iter = seq->private;
2281 loff_t off = 0;
2282 struct sock *s;
2283
2284 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2285 if (sock_net(s) != seq_file_net(seq))
2286 continue;
2287 if (off == pos)
2288 return s;
2289 ++off;
2290 }
2291 return NULL;
2292}
2293
2294static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2295 __acquires(unix_table_lock)
2296{
2297 spin_lock(&unix_table_lock);
2298 return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2299}
2300
2301static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2302{
2303 struct unix_iter_state *iter = seq->private;
2304 struct sock *sk = v;
2305 ++*pos;
2306
2307 if (v == SEQ_START_TOKEN)
2308 sk = first_unix_socket(&iter->i);
2309 else
2310 sk = next_unix_socket(&iter->i, sk);
2311 while (sk && (sock_net(sk) != seq_file_net(seq)))
2312 sk = next_unix_socket(&iter->i, sk);
2313 return sk;
2314}
2315
2316static void unix_seq_stop(struct seq_file *seq, void *v)
2317 __releases(unix_table_lock)
2318{
2319 spin_unlock(&unix_table_lock);
2320}
2321
2322static int unix_seq_show(struct seq_file *seq, void *v)
2323{
2324
2325 if (v == SEQ_START_TOKEN)
2326 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2327 "Inode Path\n");
2328 else {
2329 struct sock *s = v;
2330 struct unix_sock *u = unix_sk(s);
2331 unix_state_lock(s);
2332
2333 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2334 s,
2335 atomic_read(&s->sk_refcnt),
2336 0,
2337 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2338 s->sk_type,
2339 s->sk_socket ?
2340 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2341 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2342 sock_i_ino(s));
2343
2344 if (u->addr) {
2345 int i, len;
2346 seq_putc(seq, ' ');
2347
2348 i = 0;
2349 len = u->addr->len - sizeof(short);
2350 if (!UNIX_ABSTRACT(s))
2351 len--;
2352 else {
2353 seq_putc(seq, '@');
2354 i++;
2355 }
2356 for ( ; i < len; i++)
2357 seq_putc(seq, u->addr->name->sun_path[i]);
2358 }
2359 unix_state_unlock(s);
2360 seq_putc(seq, '\n');
2361 }
2362
2363 return 0;
2364}
2365
2366static const struct seq_operations unix_seq_ops = {
2367 .start = unix_seq_start,
2368 .next = unix_seq_next,
2369 .stop = unix_seq_stop,
2370 .show = unix_seq_show,
2371};
2372
2373static int unix_seq_open(struct inode *inode, struct file *file)
2374{
2375 return seq_open_net(inode, file, &unix_seq_ops,
2376 sizeof(struct unix_iter_state));
2377}
2378
2379static const struct file_operations unix_seq_fops = {
2380 .owner = THIS_MODULE,
2381 .open = unix_seq_open,
2382 .read = seq_read,
2383 .llseek = seq_lseek,
2384 .release = seq_release_net,
2385};
2386
2387#endif
2388
2389static const struct net_proto_family unix_family_ops = {
2390 .family = PF_UNIX,
2391 .create = unix_create,
2392 .owner = THIS_MODULE,
2393};
2394
2395
2396static int __net_init unix_net_init(struct net *net)
2397{
2398 int error = -ENOMEM;
2399
2400 net->unx.sysctl_max_dgram_qlen = 10;
2401 if (unix_sysctl_register(net))
2402 goto out;
2403
2404#ifdef CONFIG_PROC_FS
2405 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2406 unix_sysctl_unregister(net);
2407 goto out;
2408 }
2409#endif
2410 error = 0;
2411out:
2412 return error;
2413}
2414
2415static void __net_exit unix_net_exit(struct net *net)
2416{
2417 unix_sysctl_unregister(net);
2418 proc_net_remove(net, "unix");
2419}
2420
2421static struct pernet_operations unix_net_ops = {
2422 .init = unix_net_init,
2423 .exit = unix_net_exit,
2424};
2425
2426static int __init af_unix_init(void)
2427{
2428 int rc = -1;
2429 struct sk_buff *dummy_skb;
2430
2431 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2432
2433 rc = proto_register(&unix_proto, 1);
2434 if (rc != 0) {
2435 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2436 __func__);
2437 goto out;
2438 }
2439
2440 sock_register(&unix_family_ops);
2441 register_pernet_subsys(&unix_net_ops);
2442out:
2443 return rc;
2444}
2445
2446static void __exit af_unix_exit(void)
2447{
2448 sock_unregister(PF_UNIX);
2449 proto_unregister(&unix_proto);
2450 unregister_pernet_subsys(&unix_net_ops);
2451}
2452
2453/* Earlier than device_initcall() so that other drivers invoking
2454 request_module() don't end up in a loop when modprobe tries
2455 to use a UNIX socket. But later than subsys_initcall() because
2456 we depend on stuff initialised there */
2457fs_initcall(af_unix_init);
2458module_exit(af_unix_exit);
2459
2460MODULE_LICENSE("GPL");
2461MODULE_ALIAS_NETPROTO(PF_UNIX);