blob: 92ebaf626cfbfd3706b279c435f627256f379f39 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*
2 * NETLINK Kernel-user communication protocol.
3 *
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13 * added netlink_proto_exit
14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15 * use nlk_sk, as sk->protinfo is on a diet 8)
16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17 * - inc module use count of module that owns
18 * the kernel socket in case userspace opens
19 * socket of same protocol
20 * - remove all module support, since netlink is
21 * mandatory if CONFIG_NET=y these days
22 */
23
24#include <linux/module.h>
25
26#include <linux/capability.h>
27#include <linux/kernel.h>
28#include <linux/init.h>
29#include <linux/signal.h>
30#include <linux/sched.h>
31#include <linux/errno.h>
32#include <linux/string.h>
33#include <linux/stat.h>
34#include <linux/socket.h>
35#include <linux/un.h>
36#include <linux/fcntl.h>
37#include <linux/termios.h>
38#include <linux/sockios.h>
39#include <linux/net.h>
40#include <linux/fs.h>
41#include <linux/slab.h>
42#include <asm/uaccess.h>
43#include <linux/skbuff.h>
44#include <linux/netdevice.h>
45#include <linux/rtnetlink.h>
46#include <linux/proc_fs.h>
47#include <linux/seq_file.h>
48#include <linux/notifier.h>
49#include <linux/security.h>
50#include <linux/jhash.h>
51#include <linux/jiffies.h>
52#include <linux/random.h>
53#include <linux/bitops.h>
54#include <linux/mm.h>
55#include <linux/types.h>
56#include <linux/audit.h>
57#include <linux/mutex.h>
58
59#include <net/net_namespace.h>
60#include <net/sock.h>
61#include <net/scm.h>
62#include <net/netlink.h>
63
64#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
65#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
66
67struct netlink_sock {
68 /* struct sock has to be the first member of netlink_sock */
69 struct sock sk;
70 u32 pid;
71 u32 dst_pid;
72 u32 dst_group;
73 u32 flags;
74 u32 subscriptions;
75 u32 ngroups;
76 unsigned long *groups;
77 unsigned long state;
78 wait_queue_head_t wait;
79 struct netlink_callback *cb;
80 struct mutex *cb_mutex;
81 struct mutex cb_def_mutex;
82 void (*netlink_rcv)(struct sk_buff *skb);
83 struct module *module;
84};
85
86struct listeners {
87 struct rcu_head rcu;
88 unsigned long masks[0];
89};
90
91#define NETLINK_KERNEL_SOCKET 0x1
92#define NETLINK_RECV_PKTINFO 0x2
93#define NETLINK_BROADCAST_SEND_ERROR 0x4
94#define NETLINK_RECV_NO_ENOBUFS 0x8
95
96/*add for the skb of netlink uevent*/
97#define NETLINK_ERR_SK (1)
98#define NETLINK_ERR_PID (2)
99#define NETLINK_ERR_NETEQ (3)
100#define NETLINK_ERR_PFAIL (4)
101#define NETLINK_ERR_SKB2 (5)
102#define NETLINK_ERR_TXFILTER (6)
103#define NETLINK_ERR_SKFILTER (7)
104#define NETLINK_ERR_DELIVER (8)
105
106volatile int g_test_netlink = 0;
107
108static inline struct netlink_sock *nlk_sk(struct sock *sk)
109{
110 return container_of(sk, struct netlink_sock, sk);
111}
112
113u32 mnl_get_pid(struct sock *sk)
114{
115 return nlk_sk(sk)->pid;
116}
117
118static inline int netlink_is_kernel(struct sock *sk)
119{
120 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
121}
122
123struct nl_pid_hash {
124 struct hlist_head *table;
125 unsigned long rehash_time;
126
127 unsigned int mask;
128 unsigned int shift;
129
130 unsigned int entries;
131 unsigned int max_shift;
132
133 u32 rnd;
134};
135
136struct netlink_table {
137 struct nl_pid_hash hash;
138 struct hlist_head mc_list;
139 struct listeners __rcu *listeners;
140 unsigned int nl_nonroot;
141 unsigned int groups;
142 struct mutex *cb_mutex;
143 struct module *module;
144 int registered;
145};
146
147static struct netlink_table *nl_table;
148
149static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
150
151static int netlink_dump(struct sock *sk);
152static void netlink_destroy_callback(struct netlink_callback *cb);
153
154static DEFINE_RWLOCK(nl_table_lock);
155static atomic_t nl_table_users = ATOMIC_INIT(0);
156
157#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
158
159static ATOMIC_NOTIFIER_HEAD(netlink_chain);
160
161static inline u32 netlink_group_mask(u32 group)
162{
163 return group ? 1 << (group - 1) : 0;
164}
165
166static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
167{
168 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
169}
170
171static void netlink_sock_destruct(struct sock *sk)
172{
173 struct netlink_sock *nlk = nlk_sk(sk);
174
175 if (nlk->cb) {
176 if (nlk->cb->done)
177 nlk->cb->done(nlk->cb);
178
179 module_put(nlk->cb->module);
180 netlink_destroy_callback(nlk->cb);
181 }
182
183 skb_queue_purge(&sk->sk_receive_queue);
184
185 if (!sock_flag(sk, SOCK_DEAD)) {
186 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
187 return;
188 }
189
190 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
191 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
192 WARN_ON(nlk_sk(sk)->groups);
193}
194
195/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
196 * SMP. Look, when several writers sleep and reader wakes them up, all but one
197 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
198 * this, _but_ remember, it adds useless work on UP machines.
199 */
200
201void netlink_table_grab(void)
202 __acquires(nl_table_lock)
203{
204 might_sleep();
205
206 write_lock_irq(&nl_table_lock);
207
208 if (atomic_read(&nl_table_users)) {
209 DECLARE_WAITQUEUE(wait, current);
210
211 add_wait_queue_exclusive(&nl_table_wait, &wait);
212 for (;;) {
213 set_current_state(TASK_UNINTERRUPTIBLE);
214 if (atomic_read(&nl_table_users) == 0)
215 break;
216 write_unlock_irq(&nl_table_lock);
217 schedule();
218 write_lock_irq(&nl_table_lock);
219 }
220
221 __set_current_state(TASK_RUNNING);
222 remove_wait_queue(&nl_table_wait, &wait);
223 }
224}
225
226void netlink_table_ungrab(void)
227 __releases(nl_table_lock)
228{
229 write_unlock_irq(&nl_table_lock);
230 wake_up(&nl_table_wait);
231}
232
233static inline void
234netlink_lock_table(void)
235{
236 /* read_lock() synchronizes us to netlink_table_grab */
237
238 read_lock(&nl_table_lock);
239 atomic_inc(&nl_table_users);
240 read_unlock(&nl_table_lock);
241}
242
243static inline void
244netlink_unlock_table(void)
245{
246 if (atomic_dec_and_test(&nl_table_users))
247 wake_up(&nl_table_wait);
248}
249
250static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
251{
252 struct nl_pid_hash *hash = &nl_table[protocol].hash;
253 struct hlist_head *head;
254 struct sock *sk;
255 struct hlist_node *node;
256
257 read_lock(&nl_table_lock);
258 head = nl_pid_hashfn(hash, pid);
259 sk_for_each(sk, node, head) {
260 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
261 sock_hold(sk);
262 goto found;
263 }
264 }
265 sk = NULL;
266found:
267 read_unlock(&nl_table_lock);
268 return sk;
269}
270
271static struct hlist_head *nl_pid_hash_zalloc(size_t size)
272{
273 if (size <= PAGE_SIZE)
274 return kzalloc(size, GFP_ATOMIC);
275 else
276 return (struct hlist_head *)
277 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
278 get_order(size));
279}
280
281static void nl_pid_hash_free(struct hlist_head *table, size_t size)
282{
283 if (size <= PAGE_SIZE)
284 kfree(table);
285 else
286 free_pages((unsigned long)table, get_order(size));
287}
288
289static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
290{
291 unsigned int omask, mask, shift;
292 size_t osize, size;
293 struct hlist_head *otable, *table;
294 int i;
295
296 omask = mask = hash->mask;
297 osize = size = (mask + 1) * sizeof(*table);
298 shift = hash->shift;
299
300 if (grow) {
301 if (++shift > hash->max_shift)
302 return 0;
303 mask = mask * 2 + 1;
304 size *= 2;
305 }
306
307 table = nl_pid_hash_zalloc(size);
308 if (!table)
309 return 0;
310
311 otable = hash->table;
312 hash->table = table;
313 hash->mask = mask;
314 hash->shift = shift;
315 get_random_bytes(&hash->rnd, sizeof(hash->rnd));
316
317 for (i = 0; i <= omask; i++) {
318 struct sock *sk;
319 struct hlist_node *node, *tmp;
320
321 sk_for_each_safe(sk, node, tmp, &otable[i])
322 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
323 }
324
325 nl_pid_hash_free(otable, osize);
326 hash->rehash_time = jiffies + 10 * 60 * HZ;
327 return 1;
328}
329
330static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
331{
332 int avg = hash->entries >> hash->shift;
333
334 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
335 return 1;
336
337 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
338 nl_pid_hash_rehash(hash, 0);
339 return 1;
340 }
341
342 return 0;
343}
344
345static const struct proto_ops netlink_ops;
346
347static void
348netlink_update_listeners(struct sock *sk)
349{
350 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
351 struct hlist_node *node;
352 unsigned long mask;
353 unsigned int i;
354 struct listeners *listeners;
355
356 listeners = nl_deref_protected(tbl->listeners);
357 if (!listeners)
358 return;
359
360 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
361 mask = 0;
362 sk_for_each_bound(sk, node, &tbl->mc_list) {
363 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
364 mask |= nlk_sk(sk)->groups[i];
365 }
366 listeners->masks[i] = mask;
367 }
368 /* this function is only called with the netlink table "grabbed", which
369 * makes sure updates are visible before bind or setsockopt return. */
370}
371
372static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
373{
374 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
375 struct hlist_head *head;
376 int err = -EADDRINUSE;
377 struct sock *osk;
378 struct hlist_node *node;
379 int len;
380
381 netlink_table_grab();
382 head = nl_pid_hashfn(hash, pid);
383 len = 0;
384 sk_for_each(osk, node, head) {
385 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
386 break;
387 len++;
388 }
389 if (node)
390 goto err;
391
392 err = -EBUSY;
393 if (nlk_sk(sk)->pid)
394 goto err;
395
396 err = -ENOMEM;
397 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
398 goto err;
399
400 if (len && nl_pid_hash_dilute(hash, len))
401 head = nl_pid_hashfn(hash, pid);
402 hash->entries++;
403 nlk_sk(sk)->pid = pid;
404 sk_add_node(sk, head);
405 err = 0;
406
407err:
408 netlink_table_ungrab();
409 return err;
410}
411
412static void netlink_remove(struct sock *sk)
413{
414 netlink_table_grab();
415 if (sk_del_node_init(sk))
416 nl_table[sk->sk_protocol].hash.entries--;
417 if (nlk_sk(sk)->subscriptions)
418 __sk_del_bind_node(sk);
419 netlink_table_ungrab();
420}
421
422static struct proto netlink_proto = {
423 .name = "NETLINK",
424 .owner = THIS_MODULE,
425 .obj_size = sizeof(struct netlink_sock),
426};
427
428static int __netlink_create(struct net *net, struct socket *sock,
429 struct mutex *cb_mutex, int protocol)
430{
431 struct sock *sk;
432 struct netlink_sock *nlk;
433
434 sock->ops = &netlink_ops;
435
436 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
437 if (!sk)
438 return -ENOMEM;
439
440 sock_init_data(sock, sk);
441
442 nlk = nlk_sk(sk);
443 if (cb_mutex)
444 nlk->cb_mutex = cb_mutex;
445 else {
446 nlk->cb_mutex = &nlk->cb_def_mutex;
447 mutex_init(nlk->cb_mutex);
448 }
449 init_waitqueue_head(&nlk->wait);
450
451 sk->sk_destruct = netlink_sock_destruct;
452 sk->sk_protocol = protocol;
453 return 0;
454}
455
456static int netlink_create(struct net *net, struct socket *sock, int protocol,
457 int kern)
458{
459 struct module *module = NULL;
460 struct mutex *cb_mutex;
461 struct netlink_sock *nlk;
462 int err = 0;
463
464 sock->state = SS_UNCONNECTED;
465
466 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
467 return -ESOCKTNOSUPPORT;
468
469 if (protocol < 0 || protocol >= MAX_LINKS)
470 return -EPROTONOSUPPORT;
471
472 netlink_lock_table();
473#ifdef CONFIG_MODULES
474 if (!nl_table[protocol].registered) {
475 netlink_unlock_table();
476 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
477 netlink_lock_table();
478 }
479#endif
480 if (nl_table[protocol].registered &&
481 try_module_get(nl_table[protocol].module))
482 module = nl_table[protocol].module;
483 else
484 err = -EPROTONOSUPPORT;
485 cb_mutex = nl_table[protocol].cb_mutex;
486 netlink_unlock_table();
487
488 if (err < 0)
489 goto out;
490
491 err = __netlink_create(net, sock, cb_mutex, protocol);
492 if (err < 0)
493 goto out_module;
494
495 local_bh_disable();
496 sock_prot_inuse_add(net, &netlink_proto, 1);
497 local_bh_enable();
498
499 nlk = nlk_sk(sock->sk);
500 nlk->module = module;
501out:
502 return err;
503
504out_module:
505 module_put(module);
506 goto out;
507}
508
509static int netlink_release(struct socket *sock)
510{
511 struct sock *sk = sock->sk;
512 struct netlink_sock *nlk;
513
514 if (!sk)
515 return 0;
516
517 netlink_remove(sk);
518 sock_orphan(sk);
519 nlk = nlk_sk(sk);
520
521 /*
522 * OK. Socket is unlinked, any packets that arrive now
523 * will be purged.
524 */
525
526 sock->sk = NULL;
527 wake_up_interruptible_all(&nlk->wait);
528
529 skb_queue_purge(&sk->sk_write_queue);
530
531 if (nlk->pid) {
532 struct netlink_notify n = {
533 .net = sock_net(sk),
534 .protocol = sk->sk_protocol,
535 .pid = nlk->pid,
536 };
537 atomic_notifier_call_chain(&netlink_chain,
538 NETLINK_URELEASE, &n);
539 }
540
541 module_put(nlk->module);
542
543 netlink_table_grab();
544 if (netlink_is_kernel(sk)) {
545 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
546 if (--nl_table[sk->sk_protocol].registered == 0) {
547 struct listeners *old;
548
549 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
550 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
551 kfree_rcu(old, rcu);
552 nl_table[sk->sk_protocol].module = NULL;
553 nl_table[sk->sk_protocol].registered = 0;
554 }
555 } else if (nlk->subscriptions)
556 netlink_update_listeners(sk);
557 netlink_table_ungrab();
558
559 kfree(nlk->groups);
560 nlk->groups = NULL;
561
562 local_bh_disable();
563 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
564 local_bh_enable();
565 sock_put(sk);
566 return 0;
567}
568
569static int netlink_autobind(struct socket *sock)
570{
571 struct sock *sk = sock->sk;
572 struct net *net = sock_net(sk);
573 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
574 struct hlist_head *head;
575 struct sock *osk;
576 struct hlist_node *node;
577 s32 pid = task_tgid_vnr(current);
578 int err;
579 static s32 rover = -4097;
580
581retry:
582 cond_resched();
583 netlink_table_grab();
584 head = nl_pid_hashfn(hash, pid);
585 sk_for_each(osk, node, head) {
586 if (!net_eq(sock_net(osk), net))
587 continue;
588 if (nlk_sk(osk)->pid == pid) {
589 /* Bind collision, search negative pid values. */
590 pid = rover--;
591 if (rover > -4097)
592 rover = -4097;
593 netlink_table_ungrab();
594 goto retry;
595 }
596 }
597 netlink_table_ungrab();
598
599 err = netlink_insert(sk, net, pid);
600 if (err == -EADDRINUSE)
601 goto retry;
602
603 /* If 2 threads race to autobind, that is fine. */
604 if (err == -EBUSY)
605 err = 0;
606
607 return err;
608}
609
610static inline int netlink_capable(const struct socket *sock, unsigned int flag)
611{
612 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
613 capable(CAP_NET_ADMIN);
614}
615
616static void
617netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
618{
619 struct netlink_sock *nlk = nlk_sk(sk);
620
621 if (nlk->subscriptions && !subscriptions)
622 __sk_del_bind_node(sk);
623 else if (!nlk->subscriptions && subscriptions)
624 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
625 nlk->subscriptions = subscriptions;
626}
627
628static int netlink_realloc_groups(struct sock *sk)
629{
630 struct netlink_sock *nlk = nlk_sk(sk);
631 unsigned int groups;
632 unsigned long *new_groups;
633 int err = 0;
634
635 netlink_table_grab();
636
637 groups = nl_table[sk->sk_protocol].groups;
638 if (!nl_table[sk->sk_protocol].registered) {
639 err = -ENOENT;
640 goto out_unlock;
641 }
642
643 if (nlk->ngroups >= groups)
644 goto out_unlock;
645
646 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
647 if (new_groups == NULL) {
648 err = -ENOMEM;
649 goto out_unlock;
650 }
651 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
652 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
653
654 nlk->groups = new_groups;
655 nlk->ngroups = groups;
656 out_unlock:
657 netlink_table_ungrab();
658 return err;
659}
660
661static int netlink_bind(struct socket *sock, struct sockaddr *addr,
662 int addr_len)
663{
664 struct sock *sk = sock->sk;
665 struct net *net = sock_net(sk);
666 struct netlink_sock *nlk = nlk_sk(sk);
667 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
668 int err;
669
670 if (nladdr->nl_family != AF_NETLINK)
671 return -EINVAL;
672
673 /* Only superuser is allowed to listen multicasts */
674 if (nladdr->nl_groups) {
675 if (!netlink_capable(sock, NL_NONROOT_RECV))
676 return -EPERM;
677 err = netlink_realloc_groups(sk);
678 if (err)
679 return err;
680 }
681
682 if (nlk->pid) {
683 if (nladdr->nl_pid != nlk->pid)
684 return -EINVAL;
685 } else {
686 err = nladdr->nl_pid ?
687 netlink_insert(sk, net, nladdr->nl_pid) :
688 netlink_autobind(sock);
689 if (err)
690 return err;
691 }
692
693 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
694 return 0;
695
696 netlink_table_grab();
697 netlink_update_subscriptions(sk, nlk->subscriptions +
698 hweight32(nladdr->nl_groups) -
699 hweight32(nlk->groups[0]));
700 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
701 netlink_update_listeners(sk);
702 netlink_table_ungrab();
703
704 return 0;
705}
706
707static int netlink_connect(struct socket *sock, struct sockaddr *addr,
708 int alen, int flags)
709{
710 int err = 0;
711 struct sock *sk = sock->sk;
712 struct netlink_sock *nlk = nlk_sk(sk);
713 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
714
715 if (alen < sizeof(addr->sa_family))
716 return -EINVAL;
717
718 if (addr->sa_family == AF_UNSPEC) {
719 sk->sk_state = NETLINK_UNCONNECTED;
720 nlk->dst_pid = 0;
721 nlk->dst_group = 0;
722 return 0;
723 }
724 if (addr->sa_family != AF_NETLINK)
725 return -EINVAL;
726
727 /* Only superuser is allowed to send multicasts */
728 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
729 return -EPERM;
730
731 if (!nlk->pid)
732 err = netlink_autobind(sock);
733
734 if (err == 0) {
735 sk->sk_state = NETLINK_CONNECTED;
736 nlk->dst_pid = nladdr->nl_pid;
737 nlk->dst_group = ffs(nladdr->nl_groups);
738 }
739
740 return err;
741}
742
743static int netlink_getname(struct socket *sock, struct sockaddr *addr,
744 int *addr_len, int peer)
745{
746 struct sock *sk = sock->sk;
747 struct netlink_sock *nlk = nlk_sk(sk);
748 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
749
750 nladdr->nl_family = AF_NETLINK;
751 nladdr->nl_pad = 0;
752 *addr_len = sizeof(*nladdr);
753
754 if (peer) {
755 nladdr->nl_pid = nlk->dst_pid;
756 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
757 } else {
758 nladdr->nl_pid = nlk->pid;
759 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
760 }
761 return 0;
762}
763
764static void netlink_overrun(struct sock *sk)
765{
766 struct netlink_sock *nlk = nlk_sk(sk);
767
768 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
769 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
770 sk->sk_err = ENOBUFS;
771 sk->sk_error_report(sk);
772 }
773 }
774 atomic_inc(&sk->sk_drops);
775}
776
777static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
778{
779 struct sock *sock;
780 struct netlink_sock *nlk;
781
782 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
783 if (!sock)
784 return ERR_PTR(-ECONNREFUSED);
785
786 /* Don't bother queuing skb if kernel socket has no input function */
787 nlk = nlk_sk(sock);
788 if (sock->sk_state == NETLINK_CONNECTED &&
789 nlk->dst_pid != nlk_sk(ssk)->pid) {
790 sock_put(sock);
791 return ERR_PTR(-ECONNREFUSED);
792 }
793 return sock;
794}
795
796struct sock *netlink_getsockbyfilp(struct file *filp)
797{
798 struct inode *inode = filp->f_path.dentry->d_inode;
799 struct sock *sock;
800
801 if (!S_ISSOCK(inode->i_mode))
802 return ERR_PTR(-ENOTSOCK);
803
804 sock = SOCKET_I(inode)->sk;
805 if (sock->sk_family != AF_NETLINK)
806 return ERR_PTR(-EINVAL);
807
808 sock_hold(sock);
809 return sock;
810}
811
812/*
813 * Attach a skb to a netlink socket.
814 * The caller must hold a reference to the destination socket. On error, the
815 * reference is dropped. The skb is not send to the destination, just all
816 * all error checks are performed and memory in the queue is reserved.
817 * Return values:
818 * < 0: error. skb freed, reference to sock dropped.
819 * 0: continue
820 * 1: repeat lookup - reference dropped while waiting for socket memory.
821 */
822int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
823 long *timeo, struct sock *ssk)
824{
825 struct netlink_sock *nlk;
826
827 nlk = nlk_sk(sk);
828
829 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
830 test_bit(0, &nlk->state)) {
831 DECLARE_WAITQUEUE(wait, current);
832 if (!*timeo) {
833 if (!ssk || netlink_is_kernel(ssk))
834 netlink_overrun(sk);
835 sock_put(sk);
836 kfree_skb(skb);
837 return -EAGAIN;
838 }
839
840 __set_current_state(TASK_INTERRUPTIBLE);
841 add_wait_queue(&nlk->wait, &wait);
842
843 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
844 test_bit(0, &nlk->state)) &&
845 !sock_flag(sk, SOCK_DEAD))
846 *timeo = schedule_timeout(*timeo);
847
848 __set_current_state(TASK_RUNNING);
849 remove_wait_queue(&nlk->wait, &wait);
850 sock_put(sk);
851
852 if (signal_pending(current)) {
853 kfree_skb(skb);
854 return sock_intr_errno(*timeo);
855 }
856 return 1;
857 }
858 skb_set_owner_r(skb, sk);
859 return 0;
860}
861
862static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
863{
864 int len = skb->len;
865
866 skb_queue_tail(&sk->sk_receive_queue, skb);
867 sk->sk_data_ready(sk, len);
868 return len;
869}
870
871int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
872{
873 int len = __netlink_sendskb(sk, skb);
874
875 sock_put(sk);
876 return len;
877}
878
879void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
880{
881 kfree_skb(skb);
882 sock_put(sk);
883}
884
885static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
886{
887 int delta;
888
889 skb_orphan(skb);
890
891 delta = skb->end - skb->tail;
892 if (delta * 2 < skb->truesize)
893 return skb;
894
895 if (skb_shared(skb)) {
896 struct sk_buff *nskb = skb_clone(skb, allocation);
897 if (!nskb)
898 return skb;
899 kfree_skb(skb);
900 skb = nskb;
901 }
902
903 if (!pskb_expand_head(skb, 0, -delta, allocation))
904 skb->truesize -= delta;
905
906 return skb;
907}
908
909static void netlink_rcv_wake(struct sock *sk)
910{
911 struct netlink_sock *nlk = nlk_sk(sk);
912
913 if (skb_queue_empty(&sk->sk_receive_queue))
914 clear_bit(0, &nlk->state);
915 if (!test_bit(0, &nlk->state))
916 wake_up_interruptible(&nlk->wait);
917}
918
919static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
920{
921 int ret;
922 struct netlink_sock *nlk = nlk_sk(sk);
923
924 ret = -ECONNREFUSED;
925 if (nlk->netlink_rcv != NULL) {
926 ret = skb->len;
927 skb_set_owner_r(skb, sk);
928 nlk->netlink_rcv(skb);
929 }
930 kfree_skb(skb);
931 sock_put(sk);
932 return ret;
933}
934
935int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
936 u32 pid, int nonblock)
937{
938 struct sock *sk;
939 int err;
940 long timeo;
941 track_netlink(skb,0);
942 skb = netlink_trim(skb, gfp_any());
943
944 timeo = sock_sndtimeo(ssk, nonblock);
945retry:
946 sk = netlink_getsockbypid(ssk, pid);
947 if (IS_ERR(sk)) {
948 kfree_skb(skb);
949 return PTR_ERR(sk);
950 }
951 if (netlink_is_kernel(sk))
952 return netlink_unicast_kernel(sk, skb);
953
954 if (sk_filter(sk, skb)) {
955 err = skb->len;
956 kfree_skb(skb);
957 sock_put(sk);
958 return err;
959 }
960
961 err = netlink_attachskb(sk, skb, &timeo, ssk);
962 if (err == 1)
963 goto retry;
964 if (err)
965 return err;
966
967 return netlink_sendskb(sk, skb);
968}
969EXPORT_SYMBOL(netlink_unicast);
970
971int netlink_has_listeners(struct sock *sk, unsigned int group)
972{
973 int res = 0;
974 struct listeners *listeners;
975
976 BUG_ON(!netlink_is_kernel(sk));
977
978 rcu_read_lock();
979 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
980
981 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
982 res = test_bit(group - 1, listeners->masks);
983
984 rcu_read_unlock();
985
986 return res;
987}
988EXPORT_SYMBOL_GPL(netlink_has_listeners);
989
990static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
991{
992 struct netlink_sock *nlk = nlk_sk(sk);
993
994 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
995 !test_bit(0, &nlk->state)) {
996 skb_set_owner_r(skb, sk);
997 __netlink_sendskb(sk, skb);
998 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
999 }
1000 return -1;
1001}
1002
1003struct netlink_broadcast_data {
1004 struct sock *exclude_sk;
1005 struct net *net;
1006 u32 pid;
1007 u32 group;
1008 int failure;
1009 int delivery_failure;
1010 int congested;
1011 int delivered;
1012 gfp_t allocation;
1013 struct sk_buff *skb, *skb2;
1014 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1015 void *tx_data;
1016};
1017
1018static int do_one_broadcast(struct sock *sk,
1019 struct netlink_broadcast_data *p)
1020{
1021 struct netlink_sock *nlk = nlk_sk(sk);
1022 int val;
1023
1024 if (p->exclude_sk == sk) {
1025 g_test_netlink = NETLINK_ERR_SK;
1026 goto out;
1027
1028 }
1029 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1030 !test_bit(p->group - 1, nlk->groups)) {
1031 g_test_netlink = NETLINK_ERR_PID;
1032 goto out;
1033 }
1034 if (!net_eq(sock_net(sk), p->net)) {
1035 g_test_netlink = NETLINK_ERR_NETEQ;
1036 goto out;
1037 }
1038 if (p->failure) {
1039 netlink_overrun(sk);
1040 g_test_netlink = NETLINK_ERR_PFAIL;
1041 goto out;
1042 }
1043
1044 sock_hold(sk);
1045 if (p->skb2 == NULL) {
1046 if (skb_shared(p->skb)) {
1047 p->skb2 = skb_clone(p->skb, p->allocation);
1048 } else {
1049 p->skb2 = skb_get(p->skb);
1050 /*
1051 * skb ownership may have been set when
1052 * delivered to a previous socket.
1053 */
1054 skb_orphan(p->skb2);
1055 }
1056 }
1057 if (p->skb2 == NULL) {
1058 netlink_overrun(sk);
1059 /* Clone failed. Notify ALL listeners. */
1060 p->failure = 1;
1061 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1062 p->delivery_failure = 1;
1063 g_test_netlink = NETLINK_ERR_SKB2;
1064 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1065 kfree_skb(p->skb2);
1066 p->skb2 = NULL;
1067 g_test_netlink = NETLINK_ERR_TXFILTER;
1068 } else if (sk_filter(sk, p->skb2)) {
1069 kfree_skb(p->skb2);
1070 p->skb2 = NULL;
1071 g_test_netlink = NETLINK_ERR_SKFILTER;
1072 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1073 netlink_overrun(sk);
1074 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1075 p->delivery_failure = 1;
1076 g_test_netlink = NETLINK_ERR_DELIVER;;
1077 } else {
1078 p->congested |= val;
1079 p->delivered = 1;
1080 p->skb2 = NULL;
1081 }
1082 sock_put(sk);
1083
1084out:
1085 return 0;
1086}
1087
1088int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1089 u32 group, gfp_t allocation,
1090 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1091 void *filter_data)
1092{
1093 struct net *net = sock_net(ssk);
1094 struct netlink_broadcast_data info;
1095 struct hlist_node *node;
1096 struct sock *sk;
1097
1098 track_netlink(skb,group);
1099 skb = netlink_trim(skb, allocation);
1100
1101 info.exclude_sk = ssk;
1102 info.net = net;
1103 info.pid = pid;
1104 info.group = group;
1105 info.failure = 0;
1106 info.delivery_failure = 0;
1107 info.congested = 0;
1108 info.delivered = 0;
1109 info.allocation = allocation;
1110 info.skb = skb;
1111 info.skb2 = NULL;
1112 info.tx_filter = filter;
1113 info.tx_data = filter_data;
1114
1115 /* While we sleep in clone, do not allow to change socket list */
1116
1117 netlink_lock_table();
1118
1119 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1120 do_one_broadcast(sk, &info);
1121
1122 consume_skb(skb);
1123
1124 netlink_unlock_table();
1125
1126 if (info.delivery_failure) {
1127 kfree_skb(info.skb2);
1128 return -ENOBUFS;
1129 } else
1130 consume_skb(info.skb2);
1131
1132 if (info.delivered) {
1133 if (info.congested && (allocation & __GFP_WAIT))
1134 yield();
1135 return 0;
1136 }
1137 return -ESRCH;
1138}
1139EXPORT_SYMBOL(netlink_broadcast_filtered);
1140
1141int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1142 u32 group, gfp_t allocation)
1143{
1144 return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1145 NULL, NULL);
1146}
1147EXPORT_SYMBOL(netlink_broadcast);
1148
1149struct netlink_set_err_data {
1150 struct sock *exclude_sk;
1151 u32 pid;
1152 u32 group;
1153 int code;
1154};
1155
1156static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1157{
1158 struct netlink_sock *nlk = nlk_sk(sk);
1159 int ret = 0;
1160
1161 if (sk == p->exclude_sk)
1162 goto out;
1163
1164 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1165 goto out;
1166
1167 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1168 !test_bit(p->group - 1, nlk->groups))
1169 goto out;
1170
1171 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1172 ret = 1;
1173 goto out;
1174 }
1175
1176 sk->sk_err = p->code;
1177 sk->sk_error_report(sk);
1178out:
1179 return ret;
1180}
1181
1182/**
1183 * netlink_set_err - report error to broadcast listeners
1184 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1185 * @pid: the PID of a process that we want to skip (if any)
1186 * @groups: the broadcast group that will notice the error
1187 * @code: error code, must be negative (as usual in kernelspace)
1188 *
1189 * This function returns the number of broadcast listeners that have set the
1190 * NETLINK_RECV_NO_ENOBUFS socket option.
1191 */
1192int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1193{
1194 struct netlink_set_err_data info;
1195 struct hlist_node *node;
1196 struct sock *sk;
1197 int ret = 0;
1198
1199 info.exclude_sk = ssk;
1200 info.pid = pid;
1201 info.group = group;
1202 /* sk->sk_err wants a positive error value */
1203 info.code = -code;
1204
1205 read_lock(&nl_table_lock);
1206
1207 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1208 ret += do_one_set_err(sk, &info);
1209
1210 read_unlock(&nl_table_lock);
1211 return ret;
1212}
1213EXPORT_SYMBOL(netlink_set_err);
1214
1215/* must be called with netlink table grabbed */
1216static void netlink_update_socket_mc(struct netlink_sock *nlk,
1217 unsigned int group,
1218 int is_new)
1219{
1220 int old, new = !!is_new, subscriptions;
1221
1222 old = test_bit(group - 1, nlk->groups);
1223 subscriptions = nlk->subscriptions - old + new;
1224 if (new)
1225 __set_bit(group - 1, nlk->groups);
1226 else
1227 __clear_bit(group - 1, nlk->groups);
1228 netlink_update_subscriptions(&nlk->sk, subscriptions);
1229 netlink_update_listeners(&nlk->sk);
1230}
1231
1232static int netlink_setsockopt(struct socket *sock, int level, int optname,
1233 char __user *optval, unsigned int optlen)
1234{
1235 struct sock *sk = sock->sk;
1236 struct netlink_sock *nlk = nlk_sk(sk);
1237 unsigned int val = 0;
1238 int err;
1239
1240 if (level != SOL_NETLINK)
1241 return -ENOPROTOOPT;
1242
1243 if (optlen >= sizeof(int) &&
1244 get_user(val, (unsigned int __user *)optval))
1245 return -EFAULT;
1246
1247 switch (optname) {
1248 case NETLINK_PKTINFO:
1249 if (val)
1250 nlk->flags |= NETLINK_RECV_PKTINFO;
1251 else
1252 nlk->flags &= ~NETLINK_RECV_PKTINFO;
1253 err = 0;
1254 break;
1255 case NETLINK_ADD_MEMBERSHIP:
1256 case NETLINK_DROP_MEMBERSHIP: {
1257 if (!netlink_capable(sock, NL_NONROOT_RECV))
1258 return -EPERM;
1259 err = netlink_realloc_groups(sk);
1260 if (err)
1261 return err;
1262 if (!val || val - 1 >= nlk->ngroups)
1263 return -EINVAL;
1264 netlink_table_grab();
1265 netlink_update_socket_mc(nlk, val,
1266 optname == NETLINK_ADD_MEMBERSHIP);
1267 netlink_table_ungrab();
1268 err = 0;
1269 break;
1270 }
1271 case NETLINK_BROADCAST_ERROR:
1272 if (val)
1273 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1274 else
1275 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1276 err = 0;
1277 break;
1278 case NETLINK_NO_ENOBUFS:
1279 if (val) {
1280 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1281 clear_bit(0, &nlk->state);
1282 wake_up_interruptible(&nlk->wait);
1283 } else
1284 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1285 err = 0;
1286 break;
1287 default:
1288 err = -ENOPROTOOPT;
1289 }
1290 return err;
1291}
1292
1293static int netlink_getsockopt(struct socket *sock, int level, int optname,
1294 char __user *optval, int __user *optlen)
1295{
1296 struct sock *sk = sock->sk;
1297 struct netlink_sock *nlk = nlk_sk(sk);
1298 int len, val, err;
1299
1300 if (level != SOL_NETLINK)
1301 return -ENOPROTOOPT;
1302
1303 if (get_user(len, optlen))
1304 return -EFAULT;
1305 if (len < 0)
1306 return -EINVAL;
1307
1308 switch (optname) {
1309 case NETLINK_PKTINFO:
1310 if (len < sizeof(int))
1311 return -EINVAL;
1312 len = sizeof(int);
1313 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1314 if (put_user(len, optlen) ||
1315 put_user(val, optval))
1316 return -EFAULT;
1317 err = 0;
1318 break;
1319 case NETLINK_BROADCAST_ERROR:
1320 if (len < sizeof(int))
1321 return -EINVAL;
1322 len = sizeof(int);
1323 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1324 if (put_user(len, optlen) ||
1325 put_user(val, optval))
1326 return -EFAULT;
1327 err = 0;
1328 break;
1329 case NETLINK_NO_ENOBUFS:
1330 if (len < sizeof(int))
1331 return -EINVAL;
1332 len = sizeof(int);
1333 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1334 if (put_user(len, optlen) ||
1335 put_user(val, optval))
1336 return -EFAULT;
1337 err = 0;
1338 break;
1339 default:
1340 err = -ENOPROTOOPT;
1341 }
1342 return err;
1343}
1344
1345static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1346{
1347 struct nl_pktinfo info;
1348
1349 info.group = NETLINK_CB(skb).dst_group;
1350 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1351}
1352
1353static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1354 struct msghdr *msg, size_t len)
1355{
1356 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1357 struct sock *sk = sock->sk;
1358 struct netlink_sock *nlk = nlk_sk(sk);
1359 struct sockaddr_nl *addr = msg->msg_name;
1360 u32 dst_pid;
1361 u32 dst_group;
1362 struct sk_buff *skb;
1363 int err;
1364 struct scm_cookie scm;
1365
1366 if (msg->msg_flags&MSG_OOB)
1367 return -EOPNOTSUPP;
1368
1369 if (NULL == siocb->scm)
1370 siocb->scm = &scm;
1371
1372 err = scm_send(sock, msg, siocb->scm, true);
1373 if (err < 0)
1374 return err;
1375
1376 if (msg->msg_namelen) {
1377 err = -EINVAL;
1378 if (addr->nl_family != AF_NETLINK)
1379 goto out;
1380 dst_pid = addr->nl_pid;
1381 dst_group = ffs(addr->nl_groups);
1382 err = -EPERM;
1383 if ((dst_group || dst_pid) &&
1384 !netlink_capable(sock, NL_NONROOT_SEND))
1385 goto out;
1386 } else {
1387 dst_pid = nlk->dst_pid;
1388 dst_group = nlk->dst_group;
1389 }
1390
1391 if (!nlk->pid) {
1392 err = netlink_autobind(sock);
1393 if (err)
1394 goto out;
1395 }
1396
1397 err = -EMSGSIZE;
1398 if (len > sk->sk_sndbuf - 32)
1399 goto out;
1400 err = -ENOBUFS;
1401 skb = alloc_skb(len, GFP_KERNEL);
1402 if (skb == NULL)
1403 goto out;
1404
1405 NETLINK_CB(skb).pid = nlk->pid;
1406 NETLINK_CB(skb).dst_group = dst_group;
1407 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1408
1409 err = -EFAULT;
1410 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1411 kfree_skb(skb);
1412 goto out;
1413 }
1414
1415 err = security_netlink_send(sk, skb);
1416 if (err) {
1417 kfree_skb(skb);
1418 goto out;
1419 }
1420
1421 if (dst_group) {
1422 atomic_inc(&skb->users);
1423 track_add(skb, 0, USER_INFO, 0);
1424 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1425 }
1426 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1427
1428out:
1429 scm_destroy(siocb->scm);
1430 return err;
1431}
1432
1433static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1434 struct msghdr *msg, size_t len,
1435 int flags)
1436{
1437 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1438 struct scm_cookie scm;
1439 struct sock *sk = sock->sk;
1440 struct netlink_sock *nlk = nlk_sk(sk);
1441 int noblock = flags&MSG_DONTWAIT;
1442 size_t copied;
1443 struct sk_buff *skb, *data_skb;
1444 int err, ret;
1445
1446 if (flags&MSG_OOB)
1447 return -EOPNOTSUPP;
1448
1449 copied = 0;
1450
1451 skb = skb_recv_datagram(sk, flags, noblock, &err);
1452 if (skb == NULL)
1453 goto out;
1454
1455 data_skb = skb;
1456
1457#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1458 if (unlikely(skb_shinfo(skb)->frag_list)) {
1459 /*
1460 * If this skb has a frag_list, then here that means that we
1461 * will have to use the frag_list skb's data for compat tasks
1462 * and the regular skb's data for normal (non-compat) tasks.
1463 *
1464 * If we need to send the compat skb, assign it to the
1465 * 'data_skb' variable so that it will be used below for data
1466 * copying. We keep 'skb' for everything else, including
1467 * freeing both later.
1468 */
1469 if (flags & MSG_CMSG_COMPAT)
1470 data_skb = skb_shinfo(skb)->frag_list;
1471 }
1472#endif
1473
1474 copied = data_skb->len;
1475 if (len < copied) {
1476 msg->msg_flags |= MSG_TRUNC;
1477 copied = len;
1478 }
1479
1480 skb_reset_transport_header(data_skb);
1481 err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1482
1483 if (msg->msg_name) {
1484 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1485 addr->nl_family = AF_NETLINK;
1486 addr->nl_pad = 0;
1487 addr->nl_pid = NETLINK_CB(skb).pid;
1488 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1489 msg->msg_namelen = sizeof(*addr);
1490 }
1491
1492 if (nlk->flags & NETLINK_RECV_PKTINFO)
1493 netlink_cmsg_recv_pktinfo(msg, skb);
1494
1495 if (NULL == siocb->scm) {
1496 memset(&scm, 0, sizeof(scm));
1497 siocb->scm = &scm;
1498 }
1499 siocb->scm->creds = *NETLINK_CREDS(skb);
1500 if (flags & MSG_TRUNC)
1501 copied = data_skb->len;
1502
1503 skb_free_datagram(sk, skb);
1504
1505 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1506 ret = netlink_dump(sk);
1507 if (ret) {
1508 sk->sk_err = ret;
1509 sk->sk_error_report(sk);
1510 }
1511 }
1512
1513 scm_recv(sock, msg, siocb->scm, flags);
1514out:
1515 netlink_rcv_wake(sk);
1516 return err ? : copied;
1517}
1518
1519static void netlink_data_ready(struct sock *sk, int len)
1520{
1521 BUG();
1522}
1523
1524/*
1525 * We export these functions to other modules. They provide a
1526 * complete set of kernel non-blocking support for message
1527 * queueing.
1528 */
1529
1530struct sock *
1531netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1532 void (*input)(struct sk_buff *skb),
1533 struct mutex *cb_mutex, struct module *module)
1534{
1535 struct socket *sock;
1536 struct sock *sk;
1537 struct netlink_sock *nlk;
1538 struct listeners *listeners = NULL;
1539
1540 BUG_ON(!nl_table);
1541
1542 if (unit < 0 || unit >= MAX_LINKS)
1543 return NULL;
1544
1545 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1546 return NULL;
1547
1548 /*
1549 * We have to just have a reference on the net from sk, but don't
1550 * get_net it. Besides, we cannot get and then put the net here.
1551 * So we create one inside init_net and the move it to net.
1552 */
1553
1554 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1555 goto out_sock_release_nosk;
1556
1557 sk = sock->sk;
1558 sk_change_net(sk, net);
1559
1560 if (groups < 32)
1561 groups = 32;
1562
1563 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1564 if (!listeners)
1565 goto out_sock_release;
1566
1567 sk->sk_data_ready = netlink_data_ready;
1568 if (input)
1569 nlk_sk(sk)->netlink_rcv = input;
1570
1571 if (netlink_insert(sk, net, 0))
1572 goto out_sock_release;
1573
1574 nlk = nlk_sk(sk);
1575 nlk->flags |= NETLINK_KERNEL_SOCKET;
1576
1577 netlink_table_grab();
1578 if (!nl_table[unit].registered) {
1579 nl_table[unit].groups = groups;
1580 rcu_assign_pointer(nl_table[unit].listeners, listeners);
1581 nl_table[unit].cb_mutex = cb_mutex;
1582 nl_table[unit].module = module;
1583 nl_table[unit].registered = 1;
1584 } else {
1585 kfree(listeners);
1586 nl_table[unit].registered++;
1587 }
1588 netlink_table_ungrab();
1589 return sk;
1590
1591out_sock_release:
1592 kfree(listeners);
1593 netlink_kernel_release(sk);
1594 return NULL;
1595
1596out_sock_release_nosk:
1597 sock_release(sock);
1598 return NULL;
1599}
1600EXPORT_SYMBOL(netlink_kernel_create);
1601
1602
1603void
1604netlink_kernel_release(struct sock *sk)
1605{
1606 sk_release_kernel(sk);
1607}
1608EXPORT_SYMBOL(netlink_kernel_release);
1609
1610int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1611{
1612 struct listeners *new, *old;
1613 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1614
1615 if (groups < 32)
1616 groups = 32;
1617
1618 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1619 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1620 if (!new)
1621 return -ENOMEM;
1622 old = nl_deref_protected(tbl->listeners);
1623 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1624 rcu_assign_pointer(tbl->listeners, new);
1625
1626 kfree_rcu(old, rcu);
1627 }
1628 tbl->groups = groups;
1629
1630 return 0;
1631}
1632
1633/**
1634 * netlink_change_ngroups - change number of multicast groups
1635 *
1636 * This changes the number of multicast groups that are available
1637 * on a certain netlink family. Note that it is not possible to
1638 * change the number of groups to below 32. Also note that it does
1639 * not implicitly call netlink_clear_multicast_users() when the
1640 * number of groups is reduced.
1641 *
1642 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1643 * @groups: The new number of groups.
1644 */
1645int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1646{
1647 int err;
1648
1649 netlink_table_grab();
1650 err = __netlink_change_ngroups(sk, groups);
1651 netlink_table_ungrab();
1652
1653 return err;
1654}
1655
1656void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1657{
1658 struct sock *sk;
1659 struct hlist_node *node;
1660 struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1661
1662 sk_for_each_bound(sk, node, &tbl->mc_list)
1663 netlink_update_socket_mc(nlk_sk(sk), group, 0);
1664}
1665
1666/**
1667 * netlink_clear_multicast_users - kick off multicast listeners
1668 *
1669 * This function removes all listeners from the given group.
1670 * @ksk: The kernel netlink socket, as returned by
1671 * netlink_kernel_create().
1672 * @group: The multicast group to clear.
1673 */
1674void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1675{
1676 netlink_table_grab();
1677 __netlink_clear_multicast_users(ksk, group);
1678 netlink_table_ungrab();
1679}
1680
1681void netlink_set_nonroot(int protocol, unsigned int flags)
1682{
1683 if ((unsigned int)protocol < MAX_LINKS)
1684 nl_table[protocol].nl_nonroot = flags;
1685}
1686EXPORT_SYMBOL(netlink_set_nonroot);
1687
1688static void netlink_destroy_callback(struct netlink_callback *cb)
1689{
1690 kfree_skb(cb->skb);
1691 kfree(cb);
1692}
1693
1694struct nlmsghdr *
1695__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
1696{
1697 struct nlmsghdr *nlh;
1698 int size = NLMSG_LENGTH(len);
1699
1700 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1701 nlh->nlmsg_type = type;
1702 nlh->nlmsg_len = size;
1703 nlh->nlmsg_flags = flags;
1704 nlh->nlmsg_pid = pid;
1705 nlh->nlmsg_seq = seq;
1706 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1707 memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1708 return nlh;
1709}
1710EXPORT_SYMBOL(__nlmsg_put);
1711
1712/*
1713 * It looks a bit ugly.
1714 * It would be better to create kernel thread.
1715 */
1716
1717static int netlink_dump(struct sock *sk)
1718{
1719 struct netlink_sock *nlk = nlk_sk(sk);
1720 struct netlink_callback *cb;
1721 struct sk_buff *skb = NULL;
1722 struct nlmsghdr *nlh;
1723 int len, err = -ENOBUFS;
1724 int alloc_size;
1725 struct module *module;
1726 struct sk_buff *skb2;
1727
1728 mutex_lock(nlk->cb_mutex);
1729
1730 cb = nlk->cb;
1731 if (cb == NULL) {
1732 err = -EINVAL;
1733 goto errout_skb;
1734 }
1735
1736 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1737
1738 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1739 if (!skb)
1740 goto errout_skb;
1741
1742 len = cb->dump(skb, cb);
1743
1744 if (len > 0) {
1745 mutex_unlock(nlk->cb_mutex);
1746
1747 if (sk_filter(sk, skb))
1748 kfree_skb(skb);
1749 else
1750 __netlink_sendskb(sk, skb);
1751 return 0;
1752 }
1753
1754 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1755 if (!nlh)
1756 goto errout_skb;
1757
1758 nl_dump_check_consistent(cb, nlh);
1759
1760 memcpy(nlmsg_data(nlh), &len, sizeof(len));
1761
1762 if (sk_filter(sk, skb))
1763 kfree_skb(skb);
1764 else
1765 __netlink_sendskb(sk, skb);
1766
1767 if (cb->done)
1768 cb->done(cb);
1769 nlk->cb = NULL;
1770
1771 module = cb->module;
1772 skb2 = cb->skb;
1773 mutex_unlock(nlk->cb_mutex);
1774
1775 module_put(module);
1776 kfree_skb(skb2);
1777 kfree(cb);
1778 return 0;
1779
1780errout_skb:
1781 mutex_unlock(nlk->cb_mutex);
1782 kfree_skb(skb);
1783 return err;
1784}
1785
1786int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1787 const struct nlmsghdr *nlh,
1788 struct netlink_dump_control *control)
1789{
1790 struct netlink_callback *cb;
1791 struct sock *sk;
1792 struct netlink_sock *nlk;
1793 int ret;
1794
1795 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1796 if (cb == NULL)
1797 return -ENOBUFS;
1798
1799 cb->dump = control->dump;
1800 cb->done = control->done;
1801 cb->nlh = nlh;
1802 cb->data = control->data;
1803 cb->module = control->module;
1804 cb->min_dump_alloc = control->min_dump_alloc;
1805 atomic_inc(&skb->users);
1806 track_add(skb, 0, USER_INFO, 0);
1807 cb->skb = skb;
1808
1809 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1810 if (sk == NULL) {
1811 netlink_destroy_callback(cb);
1812 return -ECONNREFUSED;
1813 }
1814 nlk = nlk_sk(sk);
1815
1816 mutex_lock(nlk->cb_mutex);
1817 /* A dump is in progress... */
1818 if (nlk->cb) {
1819 mutex_unlock(nlk->cb_mutex);
1820 netlink_destroy_callback(cb);
1821 ret = -EBUSY;
1822 goto out;
1823 }
1824 /* add reference of module which cb->dump belongs to */
1825 if (!try_module_get(cb->module)) {
1826 mutex_unlock(nlk->cb_mutex);
1827 netlink_destroy_callback(cb);
1828 ret = -EPROTONOSUPPORT;
1829 goto out;
1830 }
1831
1832 nlk->cb = cb;
1833 mutex_unlock(nlk->cb_mutex);
1834
1835 ret = netlink_dump(sk);
1836out:
1837 sock_put(sk);
1838
1839 if (ret)
1840 return ret;
1841
1842 /* We successfully started a dump, by returning -EINTR we
1843 * signal not to send ACK even if it was requested.
1844 */
1845 return -EINTR;
1846}
1847EXPORT_SYMBOL(__netlink_dump_start);
1848
1849void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1850{
1851 struct sk_buff *skb;
1852 struct nlmsghdr *rep;
1853 struct nlmsgerr *errmsg;
1854 size_t payload = sizeof(*errmsg);
1855
1856 /* error messages get the original request appened */
1857 if (err)
1858 payload += nlmsg_len(nlh);
1859
1860 skb = nlmsg_new(payload, GFP_KERNEL);
1861 if (!skb) {
1862 struct sock *sk;
1863
1864 sk = netlink_lookup(sock_net(in_skb->sk),
1865 in_skb->sk->sk_protocol,
1866 NETLINK_CB(in_skb).pid);
1867 if (sk) {
1868 sk->sk_err = ENOBUFS;
1869 sk->sk_error_report(sk);
1870 sock_put(sk);
1871 }
1872 return;
1873 }
1874
1875 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1876 NLMSG_ERROR, payload, 0);
1877 errmsg = nlmsg_data(rep);
1878 errmsg->error = err;
1879 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1880 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1881}
1882EXPORT_SYMBOL(netlink_ack);
1883
1884int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1885 struct nlmsghdr *))
1886{
1887 struct nlmsghdr *nlh;
1888 int err;
1889
1890 while (skb->len >= nlmsg_total_size(0)) {
1891 int msglen;
1892
1893 nlh = nlmsg_hdr(skb);
1894 err = 0;
1895
1896 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1897 return 0;
1898
1899 /* Only requests are handled by the kernel */
1900 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1901 goto ack;
1902
1903 /* Skip control messages */
1904 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1905 goto ack;
1906
1907 err = cb(skb, nlh);
1908 if (err == -EINTR)
1909 goto skip;
1910
1911ack:
1912 if (nlh->nlmsg_flags & NLM_F_ACK || err)
1913 netlink_ack(skb, nlh, err);
1914
1915skip:
1916 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1917 if (msglen > skb->len)
1918 msglen = skb->len;
1919 skb_pull(skb, msglen);
1920 }
1921
1922 return 0;
1923}
1924EXPORT_SYMBOL(netlink_rcv_skb);
1925
1926/**
1927 * nlmsg_notify - send a notification netlink message
1928 * @sk: netlink socket to use
1929 * @skb: notification message
1930 * @pid: destination netlink pid for reports or 0
1931 * @group: destination multicast group or 0
1932 * @report: 1 to report back, 0 to disable
1933 * @flags: allocation flags
1934 */
1935int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1936 unsigned int group, int report, gfp_t flags)
1937{
1938 int err = 0;
1939
1940 if (group) {
1941 int exclude_pid = 0;
1942
1943 if (report) {
1944 track_add(skb, 0, USER_INFO, 0);
1945 atomic_inc(&skb->users);
1946 exclude_pid = pid;
1947 }
1948
1949 /* errors reported via destination sk->sk_err, but propagate
1950 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1951 err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1952 }
1953
1954 if (report) {
1955 int err2;
1956
1957 err2 = nlmsg_unicast(sk, skb, pid);
1958 if (!err || err == -ESRCH)
1959 err = err2;
1960 }
1961
1962 return err;
1963}
1964EXPORT_SYMBOL(nlmsg_notify);
1965
1966#ifdef CONFIG_PROC_FS
1967struct nl_seq_iter {
1968 struct seq_net_private p;
1969 int link;
1970 int hash_idx;
1971};
1972
1973static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1974{
1975 struct nl_seq_iter *iter = seq->private;
1976 int i, j;
1977 struct sock *s;
1978 struct hlist_node *node;
1979 loff_t off = 0;
1980
1981 for (i = 0; i < MAX_LINKS; i++) {
1982 struct nl_pid_hash *hash = &nl_table[i].hash;
1983
1984 for (j = 0; j <= hash->mask; j++) {
1985 sk_for_each(s, node, &hash->table[j]) {
1986 if (sock_net(s) != seq_file_net(seq))
1987 continue;
1988 if (off == pos) {
1989 iter->link = i;
1990 iter->hash_idx = j;
1991 return s;
1992 }
1993 ++off;
1994 }
1995 }
1996 }
1997 return NULL;
1998}
1999
2000static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2001 __acquires(nl_table_lock)
2002{
2003 read_lock(&nl_table_lock);
2004 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2005}
2006
2007static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2008{
2009 struct sock *s;
2010 struct nl_seq_iter *iter;
2011 int i, j;
2012
2013 ++*pos;
2014
2015 if (v == SEQ_START_TOKEN)
2016 return netlink_seq_socket_idx(seq, 0);
2017
2018 iter = seq->private;
2019 s = v;
2020 do {
2021 s = sk_next(s);
2022 } while (s && sock_net(s) != seq_file_net(seq));
2023 if (s)
2024 return s;
2025
2026 i = iter->link;
2027 j = iter->hash_idx + 1;
2028
2029 do {
2030 struct nl_pid_hash *hash = &nl_table[i].hash;
2031
2032 for (; j <= hash->mask; j++) {
2033 s = sk_head(&hash->table[j]);
2034 while (s && sock_net(s) != seq_file_net(seq))
2035 s = sk_next(s);
2036 if (s) {
2037 iter->link = i;
2038 iter->hash_idx = j;
2039 return s;
2040 }
2041 }
2042
2043 j = 0;
2044 } while (++i < MAX_LINKS);
2045
2046 return NULL;
2047}
2048
2049static void netlink_seq_stop(struct seq_file *seq, void *v)
2050 __releases(nl_table_lock)
2051{
2052 read_unlock(&nl_table_lock);
2053}
2054
2055
2056static int netlink_seq_show(struct seq_file *seq, void *v)
2057{
2058 if (v == SEQ_START_TOKEN)
2059 seq_puts(seq,
2060 "sk Eth Pid Groups "
2061 "Rmem Wmem Dump Locks Drops Inode\n");
2062 else {
2063 struct sock *s = v;
2064 struct netlink_sock *nlk = nlk_sk(s);
2065
2066 seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2067 s,
2068 s->sk_protocol,
2069 nlk->pid,
2070 nlk->groups ? (u32)nlk->groups[0] : 0,
2071 sk_rmem_alloc_get(s),
2072 sk_wmem_alloc_get(s),
2073 nlk->cb,
2074 atomic_read(&s->sk_refcnt),
2075 atomic_read(&s->sk_drops),
2076 sock_i_ino(s)
2077 );
2078
2079 }
2080 return 0;
2081}
2082
2083static const struct seq_operations netlink_seq_ops = {
2084 .start = netlink_seq_start,
2085 .next = netlink_seq_next,
2086 .stop = netlink_seq_stop,
2087 .show = netlink_seq_show,
2088};
2089
2090
2091static int netlink_seq_open(struct inode *inode, struct file *file)
2092{
2093 return seq_open_net(inode, file, &netlink_seq_ops,
2094 sizeof(struct nl_seq_iter));
2095}
2096
2097static const struct file_operations netlink_seq_fops = {
2098 .owner = THIS_MODULE,
2099 .open = netlink_seq_open,
2100 .read = seq_read,
2101 .llseek = seq_lseek,
2102 .release = seq_release_net,
2103};
2104
2105#endif
2106
2107int netlink_register_notifier(struct notifier_block *nb)
2108{
2109 return atomic_notifier_chain_register(&netlink_chain, nb);
2110}
2111EXPORT_SYMBOL(netlink_register_notifier);
2112
2113int netlink_unregister_notifier(struct notifier_block *nb)
2114{
2115 return atomic_notifier_chain_unregister(&netlink_chain, nb);
2116}
2117EXPORT_SYMBOL(netlink_unregister_notifier);
2118
2119static const struct proto_ops netlink_ops = {
2120 .family = PF_NETLINK,
2121 .owner = THIS_MODULE,
2122 .release = netlink_release,
2123 .bind = netlink_bind,
2124 .connect = netlink_connect,
2125 .socketpair = sock_no_socketpair,
2126 .accept = sock_no_accept,
2127 .getname = netlink_getname,
2128 .poll = datagram_poll,
2129 .ioctl = sock_no_ioctl,
2130 .listen = sock_no_listen,
2131 .shutdown = sock_no_shutdown,
2132 .setsockopt = netlink_setsockopt,
2133 .getsockopt = netlink_getsockopt,
2134 .sendmsg = netlink_sendmsg,
2135 .recvmsg = netlink_recvmsg,
2136 .mmap = sock_no_mmap,
2137 .sendpage = sock_no_sendpage,
2138};
2139
2140static const struct net_proto_family netlink_family_ops = {
2141 .family = PF_NETLINK,
2142 .create = netlink_create,
2143 .owner = THIS_MODULE, /* for consistency 8) */
2144};
2145
2146static int __net_init netlink_net_init(struct net *net)
2147{
2148#ifdef CONFIG_PROC_FS
2149 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2150 return -ENOMEM;
2151#endif
2152 return 0;
2153}
2154
2155static void __net_exit netlink_net_exit(struct net *net)
2156{
2157#ifdef CONFIG_PROC_FS
2158 proc_net_remove(net, "netlink");
2159#endif
2160}
2161
2162static void __init netlink_add_usersock_entry(void)
2163{
2164 struct listeners *listeners;
2165 int groups = 32;
2166
2167 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2168 if (!listeners)
2169 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2170
2171 netlink_table_grab();
2172
2173 nl_table[NETLINK_USERSOCK].groups = groups;
2174 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2175 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2176 nl_table[NETLINK_USERSOCK].registered = 1;
2177 nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
2178
2179 netlink_table_ungrab();
2180}
2181
2182static struct pernet_operations __net_initdata netlink_net_ops = {
2183 .init = netlink_net_init,
2184 .exit = netlink_net_exit,
2185};
2186
2187static int __init netlink_proto_init(void)
2188{
2189 struct sk_buff *dummy_skb;
2190 int i;
2191 unsigned long limit;
2192 unsigned int order;
2193 int err = proto_register(&netlink_proto, 0);
2194
2195 if (err != 0)
2196 goto out;
2197
2198 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2199
2200 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2201 if (!nl_table)
2202 goto panic;
2203
2204 if (totalram_pages >= (128 * 1024))
2205 limit = totalram_pages >> (21 - PAGE_SHIFT);
2206 else
2207 limit = totalram_pages >> (23 - PAGE_SHIFT);
2208
2209 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2210 limit = (1UL << order) / sizeof(struct hlist_head);
2211 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2212
2213 for (i = 0; i < MAX_LINKS; i++) {
2214 struct nl_pid_hash *hash = &nl_table[i].hash;
2215
2216 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2217 if (!hash->table) {
2218 while (i-- > 0)
2219 nl_pid_hash_free(nl_table[i].hash.table,
2220 1 * sizeof(*hash->table));
2221 kfree(nl_table);
2222 goto panic;
2223 }
2224 hash->max_shift = order;
2225 hash->shift = 0;
2226 hash->mask = 0;
2227 hash->rehash_time = jiffies;
2228 }
2229
2230 netlink_add_usersock_entry();
2231
2232 sock_register(&netlink_family_ops);
2233 register_pernet_subsys(&netlink_net_ops);
2234 /* The netlink device handler may be needed early. */
2235 rtnetlink_init();
2236out:
2237 return err;
2238panic:
2239 panic("netlink_init: Cannot allocate nl_table\n");
2240}
2241
2242core_initcall(netlink_proto_init);