blob: 7da85ffc222a93219ba4ef3a134f0c0e75fdabd1 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <linux/bitops.h>
31#include <linux/capability.h>
32#include <linux/module.h>
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/string.h>
36#include <linux/mm.h>
37#include <linux/socket.h>
38#include <linux/sockios.h>
39#include <linux/in.h>
40#include <linux/errno.h>
41#include <linux/interrupt.h>
42#include <linux/if_addr.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/skbuff.h>
48#include <linux/init.h>
49#include <linux/notifier.h>
50#include <linux/inetdevice.h>
51#include <linux/igmp.h>
52#include <linux/slab.h>
53#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58
59#include <net/arp.h>
60#include <net/ip.h>
61#include <net/tcp.h>
62#include <net/route.h>
63#include <net/ip_fib.h>
64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
66
67#include "fib_lookup.h"
68
69static struct ipv4_devconf ipv4_devconf = {
70 .data = {
71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 },
76};
77
78static struct ipv4_devconf ipv4_devconf_dflt = {
79 .data = {
80 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 },
86};
87
88#define IPV4_DEVCONF_DFLT(net, attr) \
89 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 [IFA_LOCAL] = { .type = NLA_U32 },
93 [IFA_ADDRESS] = { .type = NLA_U32 },
94 [IFA_BROADCAST] = { .type = NLA_U32 },
95 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96};
97
98/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99 * value. So if you change this define, make appropriate changes to
100 * inet_addr_hash as well.
101 */
102#define IN4_ADDR_HSIZE 256
103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107{
108 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111 (IN4_ADDR_HSIZE - 1));
112}
113
114static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115{
116 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118 spin_lock(&inet_addr_hash_lock);
119 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 spin_unlock(&inet_addr_hash_lock);
121 net_run_track(PRT_IFA,"insert ifa");
122}
123
124static void inet_hash_remove(struct in_ifaddr *ifa)
125{
126 spin_lock(&inet_addr_hash_lock);
127 hlist_del_init_rcu(&ifa->hash);
128 spin_unlock(&inet_addr_hash_lock);
129 net_run_track(PRT_IFA,"insert ifa");
130}
131
132/**
133 * __ip_dev_find - find the first device with a given source address.
134 * @net: the net namespace
135 * @addr: the source address
136 * @devref: if true, take a reference on the found device
137 *
138 * If a caller uses devref=false, it should be protected by RCU, or RTNL
139 */
140struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141{
142 unsigned int hash = inet_addr_hash(net, addr);
143 struct net_device *result = NULL;
144 struct in_ifaddr *ifa;
145 struct hlist_node *node;
146
147 rcu_read_lock();
148 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
149 struct net_device *dev = ifa->ifa_dev->dev;
150
151 if (!net_eq(dev_net(dev), net))
152 continue;
153 if (ifa->ifa_local == addr) {
154 result = dev;
155 break;
156 }
157 }
158 if (!result) {
159 struct flowi4 fl4 = { .daddr = addr };
160 struct fib_result res = { 0 };
161 struct fib_table *local;
162
163 /* Fallback to FIB local table so that communication
164 * over loopback subnets work.
165 */
166 local = fib_get_table(net, RT_TABLE_LOCAL);
167 if (local &&
168 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 res.type == RTN_LOCAL)
170 result = FIB_RES_DEV(res);
171 }
172 if (result && devref)
173 dev_hold(result);
174 rcu_read_unlock();
175 net_run_track(PRT_IFA,"find ifa");
176 return result;
177}
178EXPORT_SYMBOL(__ip_dev_find);
179
180static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181
182static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184 int destroy);
185#ifdef CONFIG_SYSCTL
186static void devinet_sysctl_register(struct in_device *idev);
187static void devinet_sysctl_unregister(struct in_device *idev);
188#else
189static inline void devinet_sysctl_register(struct in_device *idev)
190{
191}
192static inline void devinet_sysctl_unregister(struct in_device *idev)
193{
194}
195#endif
196
197/* Locks all the inet devices. */
198
199static struct in_ifaddr *inet_alloc_ifa(void)
200{
201 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202}
203
204static void inet_rcu_free_ifa(struct rcu_head *head)
205{
206 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 if (ifa->ifa_dev)
208 in_dev_put(ifa->ifa_dev);
209 kfree(ifa);
210}
211
212static inline void inet_free_ifa(struct in_ifaddr *ifa)
213{
214 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215}
216
217void in_dev_finish_destroy(struct in_device *idev)
218{
219 struct net_device *dev = idev->dev;
220
221 WARN_ON(idev->ifa_list);
222 WARN_ON(idev->mc_list);
223#ifdef NET_REFCNT_DEBUG
224 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
225 idev, dev ? dev->name : "NIL");
226#endif
227 dev_put(dev);
228 if (!idev->dead)
229 pr_err("Freeing alive in_device %p\n", idev);
230 else
231 kfree(idev);
232}
233EXPORT_SYMBOL(in_dev_finish_destroy);
234
235static struct in_device *inetdev_init(struct net_device *dev)
236{
237 struct in_device *in_dev;
238
239 ASSERT_RTNL();
240
241 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242 if (!in_dev)
243 goto out;
244 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 sizeof(in_dev->cnf));
246 in_dev->cnf.sysctl = NULL;
247 in_dev->dev = dev;
248 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 if (!in_dev->arp_parms)
250 goto out_kfree;
251 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 dev_disable_lro(dev);
253 /* Reference in_dev->dev */
254 dev_hold(dev);
255 /* Account for reference dev->ip_ptr (below) */
256 in_dev_hold(in_dev);
257
258 devinet_sysctl_register(in_dev);
259 ip_mc_init_dev(in_dev);
260 if (dev->flags & IFF_UP)
261 ip_mc_up(in_dev);
262
263 /* we can receive as soon as ip_ptr is set -- do this last */
264 rcu_assign_pointer(dev->ip_ptr, in_dev);
265out:
266 return in_dev;
267out_kfree:
268 kfree(in_dev);
269 in_dev = NULL;
270 goto out;
271}
272
273static void in_dev_rcu_put(struct rcu_head *head)
274{
275 struct in_device *idev = container_of(head, struct in_device, rcu_head);
276 in_dev_put(idev);
277}
278
279static void inetdev_destroy(struct in_device *in_dev)
280{
281 struct in_ifaddr *ifa;
282 struct net_device *dev;
283
284 ASSERT_RTNL();
285
286 dev = in_dev->dev;
287
288 in_dev->dead = 1;
289
290 ip_mc_destroy_dev(in_dev);
291
292 while ((ifa = in_dev->ifa_list) != NULL) {
293 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294 inet_free_ifa(ifa);
295 }
296
297 RCU_INIT_POINTER(dev->ip_ptr, NULL);
298
299 devinet_sysctl_unregister(in_dev);
300 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301 arp_ifdown(dev);
302
303 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304}
305
306int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307{
308 rcu_read_lock();
309 for_primary_ifa(in_dev) {
310 if (inet_ifa_match(a, ifa)) {
311 if (!b || inet_ifa_match(b, ifa)) {
312 rcu_read_unlock();
313 return 1;
314 }
315 }
316 } endfor_ifa(in_dev);
317 rcu_read_unlock();
318 return 0;
319}
320
321static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 int destroy, struct nlmsghdr *nlh, u32 pid)
323{
324 struct in_ifaddr *promote = NULL;
325 struct in_ifaddr *ifa, *ifa1 = *ifap;
326 struct in_ifaddr *last_prim = in_dev->ifa_list;
327 struct in_ifaddr *prev_prom = NULL;
328 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329
330 ASSERT_RTNL();
331
332 /* 1. Deleting primary ifaddr forces deletion all secondaries
333 * unless alias promotion is set
334 **/
335
336 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338
339 while ((ifa = *ifap1) != NULL) {
340 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 ifa1->ifa_scope <= ifa->ifa_scope)
342 last_prim = ifa;
343
344 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 ifa1->ifa_mask != ifa->ifa_mask ||
346 !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 ifap1 = &ifa->ifa_next;
348 prev_prom = ifa;
349 continue;
350 }
351
352 if (!do_promote) {
353 inet_hash_remove(ifa);
354 *ifap1 = ifa->ifa_next;
355
356 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
357 blocking_notifier_call_chain(&inetaddr_chain,
358 NETDEV_DOWN, ifa);
359 inet_free_ifa(ifa);
360 } else {
361 promote = ifa;
362 break;
363 }
364 }
365 }
366
367 /* On promotion all secondaries from subnet are changing
368 * the primary IP, we must remove all their routes silently
369 * and later to add them back with new prefsrc. Do this
370 * while all addresses are on the device list.
371 */
372 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 if (ifa1->ifa_mask == ifa->ifa_mask &&
374 inet_ifa_match(ifa1->ifa_address, ifa))
375 fib_del_ifaddr(ifa, ifa1);
376 }
377
378 /* 2. Unlink it */
379
380 *ifap = ifa1->ifa_next;
381 inet_hash_remove(ifa1);
382
383 /* 3. Announce address deletion */
384
385 /* Send message first, then call notifier.
386 At first sight, FIB update triggered by notifier
387 will refer to already deleted ifaddr, that could confuse
388 netlink listeners. It is not true: look, gated sees
389 that route deleted and if it still thinks that ifaddr
390 is valid, it will try to restore deleted routes... Grr.
391 So that, this order is correct.
392 */
393 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
394 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395
396 if (promote) {
397 struct in_ifaddr *next_sec = promote->ifa_next;
398
399 if (prev_prom) {
400 prev_prom->ifa_next = promote->ifa_next;
401 promote->ifa_next = last_prim->ifa_next;
402 last_prim->ifa_next = promote;
403 }
404
405 promote->ifa_flags &= ~IFA_F_SECONDARY;
406 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
407 blocking_notifier_call_chain(&inetaddr_chain,
408 NETDEV_UP, promote);
409 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 if (ifa1->ifa_mask != ifa->ifa_mask ||
411 !inet_ifa_match(ifa1->ifa_address, ifa))
412 continue;
413 fib_add_ifaddr(ifa);
414 }
415
416 }
417 if (destroy)
418 inet_free_ifa(ifa1);
419}
420
421static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422 int destroy)
423{
424 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425}
426
427static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
428 u32 pid)
429{
430 struct in_device *in_dev = ifa->ifa_dev;
431 struct in_ifaddr *ifa1, **ifap, **last_primary;
432
433 ASSERT_RTNL();
434
435 if (!ifa->ifa_local) {
436 inet_free_ifa(ifa);
437 return 0;
438 }
439
440 ifa->ifa_flags &= ~IFA_F_SECONDARY;
441 last_primary = &in_dev->ifa_list;
442
443 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
444 ifap = &ifa1->ifa_next) {
445 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
446 ifa->ifa_scope <= ifa1->ifa_scope)
447 last_primary = &ifa1->ifa_next;
448 if (ifa1->ifa_mask == ifa->ifa_mask &&
449 inet_ifa_match(ifa1->ifa_address, ifa)) {
450 if (ifa1->ifa_local == ifa->ifa_local) {
451 inet_free_ifa(ifa);
452 return -EEXIST;
453 }
454 if (ifa1->ifa_scope != ifa->ifa_scope) {
455 inet_free_ifa(ifa);
456 return -EINVAL;
457 }
458 ifa->ifa_flags |= IFA_F_SECONDARY;
459 }
460 }
461
462 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
463 net_srandom(ifa->ifa_local);
464 ifap = last_primary;
465 }
466
467 ifa->ifa_next = *ifap;
468 *ifap = ifa;
469
470 inet_hash_insert(dev_net(in_dev->dev), ifa);
471
472 /* Send message first, then call notifier.
473 Notifier will trigger FIB update, so that
474 listeners of netlink will know about new ifaddr */
475 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
476 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
477 net_run_track(PRT_IFA,"insert ifa");
478 return 0;
479}
480
481static int inet_insert_ifa(struct in_ifaddr *ifa)
482{
483 return __inet_insert_ifa(ifa, NULL, 0);
484}
485
486static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
487{
488 struct in_device *in_dev = __in_dev_get_rtnl(dev);
489
490 ASSERT_RTNL();
491
492 if (!in_dev) {
493 inet_free_ifa(ifa);
494 return -ENOBUFS;
495 }
496 ipv4_devconf_setall(in_dev);
497 if (ifa->ifa_dev != in_dev) {
498 WARN_ON(ifa->ifa_dev);
499 in_dev_hold(in_dev);
500 ifa->ifa_dev = in_dev;
501 }
502 if (ipv4_is_loopback(ifa->ifa_local))
503 ifa->ifa_scope = RT_SCOPE_HOST;
504
505 return inet_insert_ifa(ifa);
506}
507
508/* Caller must hold RCU or RTNL :
509 * We dont take a reference on found in_device
510 */
511struct in_device *inetdev_by_index(struct net *net, int ifindex)
512{
513 struct net_device *dev;
514 struct in_device *in_dev = NULL;
515
516 rcu_read_lock();
517 dev = dev_get_by_index_rcu(net, ifindex);
518 if (dev)
519 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520 rcu_read_unlock();
521 return in_dev;
522}
523EXPORT_SYMBOL(inetdev_by_index);
524
525/* Called only from RTNL semaphored context. No locks. */
526
527struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528 __be32 mask)
529{
530 ASSERT_RTNL();
531
532 for_primary_ifa(in_dev) {
533 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534 return ifa;
535 } endfor_ifa(in_dev);
536 return NULL;
537}
538
539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
540{
541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1];
543 struct in_device *in_dev;
544 struct ifaddrmsg *ifm;
545 struct in_ifaddr *ifa, **ifap;
546 int err = -EINVAL;
547
548 ASSERT_RTNL();
549
550 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551 if (err < 0)
552 goto errout;
553
554 ifm = nlmsg_data(nlh);
555 in_dev = inetdev_by_index(net, ifm->ifa_index);
556 if (in_dev == NULL) {
557 err = -ENODEV;
558 goto errout;
559 }
560
561 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 ifap = &ifa->ifa_next) {
563 if (tb[IFA_LOCAL] &&
564 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565 continue;
566
567 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568 continue;
569
570 if (tb[IFA_ADDRESS] &&
571 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573 continue;
574
575 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
576 return 0;
577 }
578
579 err = -EADDRNOTAVAIL;
580errout:
581 return err;
582}
583
584static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
585{
586 struct nlattr *tb[IFA_MAX+1];
587 struct in_ifaddr *ifa;
588 struct ifaddrmsg *ifm;
589 struct net_device *dev;
590 struct in_device *in_dev;
591 int err;
592
593 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
594 if (err < 0)
595 goto errout;
596
597 ifm = nlmsg_data(nlh);
598 err = -EINVAL;
599 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
600 goto errout;
601
602 dev = __dev_get_by_index(net, ifm->ifa_index);
603 err = -ENODEV;
604 if (dev == NULL)
605 goto errout;
606
607 in_dev = __in_dev_get_rtnl(dev);
608 err = -ENOBUFS;
609 if (in_dev == NULL)
610 goto errout;
611
612 ifa = inet_alloc_ifa();
613 if (ifa == NULL)
614 /*
615 * A potential indev allocation can be left alive, it stays
616 * assigned to its device and is destroy with it.
617 */
618 goto errout;
619
620 ipv4_devconf_setall(in_dev);
621 in_dev_hold(in_dev);
622
623 if (tb[IFA_ADDRESS] == NULL)
624 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
625
626 INIT_HLIST_NODE(&ifa->hash);
627 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
628 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
629 ifa->ifa_flags = ifm->ifa_flags;
630 ifa->ifa_scope = ifm->ifa_scope;
631 ifa->ifa_dev = in_dev;
632
633 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
634 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
635
636 if (tb[IFA_BROADCAST])
637 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
638
639 if (tb[IFA_LABEL])
640 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
641 else
642 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
643
644 return ifa;
645
646errout:
647 return ERR_PTR(err);
648}
649
650static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
651{
652 struct net *net = sock_net(skb->sk);
653 struct in_ifaddr *ifa;
654
655 ASSERT_RTNL();
656
657 ifa = rtm_to_ifaddr(net, nlh);
658 if (IS_ERR(ifa))
659 return PTR_ERR(ifa);
660
661 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
662}
663
664/*
665 * Determine a default network mask, based on the IP address.
666 */
667
668static inline int inet_abc_len(__be32 addr)
669{
670 int rc = -1; /* Something else, probably a multicast. */
671
672 if (ipv4_is_zeronet(addr))
673 rc = 0;
674 else {
675 __u32 haddr = ntohl(addr);
676
677 if (IN_CLASSA(haddr))
678 rc = 8;
679 else if (IN_CLASSB(haddr))
680 rc = 16;
681 else if (IN_CLASSC(haddr))
682 rc = 24;
683 }
684
685 return rc;
686}
687
688unsigned char br_ipchange_flag = 0;
689int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
690{
691 struct ifreq ifr;
692 struct sockaddr_in sin_orig;
693 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
694 struct in_device *in_dev;
695 struct in_ifaddr **ifap = NULL;
696 struct in_ifaddr *ifa = NULL;
697 struct net_device *dev;
698 char *colon;
699 int ret = -EFAULT;
700 int tryaddrmatch = 0;
701 unsigned int lx_addr;//add by linxu
702
703 /*
704 * Fetch the caller's info block into kernel space
705 */
706
707 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
708 goto out;
709 ifr.ifr_name[IFNAMSIZ - 1] = 0;
710
711 /* save original address for comparison */
712 memcpy(&sin_orig, sin, sizeof(*sin));
713
714 colon = strchr(ifr.ifr_name, ':');
715 if (colon)
716 *colon = 0;
717
718 dev_load(net, ifr.ifr_name);
719
720 switch (cmd) {
721 case SIOCGIFADDR: /* Get interface address */
722 case SIOCGIFBRDADDR: /* Get the broadcast address */
723 case SIOCGIFDSTADDR: /* Get the destination address */
724 case SIOCGIFNETMASK: /* Get the netmask for the interface */
725 /* Note that these ioctls will not sleep,
726 so that we do not impose a lock.
727 One day we will be forced to put shlock here (I mean SMP)
728 */
729 tryaddrmatch = (sin_orig.sin_family == AF_INET);
730 memset(sin, 0, sizeof(*sin));
731 sin->sin_family = AF_INET;
732 break;
733
734 case SIOCSIFFLAGS:
735 ret = -EACCES;
736 if (!capable(CAP_NET_ADMIN))
737 goto out;
738 break;
739 case SIOCSIFADDR: /* Set interface address (and family) */
740 case SIOCSIFBRDADDR: /* Set the broadcast address */
741 case SIOCSIFDSTADDR: /* Set the destination address */
742 case SIOCSIFNETMASK: /* Set the netmask for the interface */
743 case SIOCKILLADDR: /* Nuke all sockets on this address */
744 ret = -EACCES;
745 if (!capable(CAP_NET_ADMIN))
746 goto out;
747 ret = -EINVAL;
748 if (sin->sin_family != AF_INET)
749 goto out;
750 break;
751 default:
752 ret = -EINVAL;
753 goto out;
754 }
755
756 rtnl_lock();
757
758 ret = -ENODEV;
759 dev = __dev_get_by_name(net, ifr.ifr_name);
760 if (!dev)
761 goto done;
762
763 if (colon)
764 *colon = ':';
765
766 in_dev = __in_dev_get_rtnl(dev);
767 if (in_dev) {
768 if (tryaddrmatch) {
769 /* Matthias Andree */
770 /* compare label and address (4.4BSD style) */
771 /* note: we only do this for a limited set of ioctls
772 and only if the original address family was AF_INET.
773 This is checked above. */
774 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
775 ifap = &ifa->ifa_next) {
776 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
777 sin_orig.sin_addr.s_addr ==
778 ifa->ifa_local) {
779 break; /* found */
780 }
781 }
782 }
783 /* we didn't get a match, maybe the application is
784 4.3BSD-style and passed in junk so we fall back to
785 comparing just the label */
786 if (!ifa) {
787 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
788 ifap = &ifa->ifa_next)
789 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
790 break;
791 }
792 }
793
794 ret = -EADDRNOTAVAIL;
795 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
796 && cmd != SIOCKILLADDR)
797 goto done;
798
799 switch (cmd) {
800 case SIOCGIFADDR: /* Get interface address */
801 sin->sin_addr.s_addr = ifa->ifa_local;
802 goto rarok;
803
804 case SIOCGIFBRDADDR: /* Get the broadcast address */
805 sin->sin_addr.s_addr = ifa->ifa_broadcast;
806 goto rarok;
807
808 case SIOCGIFDSTADDR: /* Get the destination address */
809 sin->sin_addr.s_addr = ifa->ifa_address;
810 goto rarok;
811
812 case SIOCGIFNETMASK: /* Get the netmask for the interface */
813 sin->sin_addr.s_addr = ifa->ifa_mask;
814 goto rarok;
815
816 case SIOCSIFFLAGS:
817 if (colon) {
818 ret = -EADDRNOTAVAIL;
819 if (!ifa)
820 break;
821 ret = 0;
822 if (!(ifr.ifr_flags & IFF_UP))
823 inet_del_ifa(in_dev, ifap, 1);
824 break;
825 }
826 ret = dev_change_flags(dev, ifr.ifr_flags);
827 break;
828
829 case SIOCSIFADDR: /* Set interface address (and family) */
830 /*¼à¿Øbr0µØÖ·±»ÄªÃû¸Ä±ästart*/
831 if(br_ipchange_flag)
832 {
833 lx_addr = (sin->sin_addr.s_addr)>>24;
834 if (strcmp(dev->name,"br0") == 0 && lx_addr != 1)
835 panic("!!!!!!!!!br0 ipaddr should not change \n");
836 }
837 /*¼à¿Øbr0µØÖ·±»ÄªÃû¸Ä±äend*/
838 ret = -EINVAL;
839 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
840 break;
841
842 if (!ifa) {
843 ret = -ENOBUFS;
844 ifa = inet_alloc_ifa();
845 INIT_HLIST_NODE(&ifa->hash);
846 if (!ifa)
847 break;
848 if (colon)
849 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
850 else
851 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
852 } else {
853 ret = 0;
854 if (ifa->ifa_local == sin->sin_addr.s_addr)
855 break;
856 inet_del_ifa(in_dev, ifap, 0);
857 ifa->ifa_broadcast = 0;
858 ifa->ifa_scope = 0;
859 }
860
861 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
862
863 if (!(dev->flags & IFF_POINTOPOINT)) {
864 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
865 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
866 if ((dev->flags & IFF_BROADCAST) &&
867 ifa->ifa_prefixlen < 31)
868 ifa->ifa_broadcast = ifa->ifa_address |
869 ~ifa->ifa_mask;
870 } else {
871 ifa->ifa_prefixlen = 32;
872 ifa->ifa_mask = inet_make_mask(32);
873 }
874 ret = inet_set_ifa(dev, ifa);
875 break;
876
877 case SIOCSIFBRDADDR: /* Set the broadcast address */
878 ret = 0;
879 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
880 inet_del_ifa(in_dev, ifap, 0);
881 ifa->ifa_broadcast = sin->sin_addr.s_addr;
882 inet_insert_ifa(ifa);
883 }
884 break;
885
886 case SIOCSIFDSTADDR: /* Set the destination address */
887 ret = 0;
888 if (ifa->ifa_address == sin->sin_addr.s_addr)
889 break;
890 ret = -EINVAL;
891 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
892 break;
893 ret = 0;
894 inet_del_ifa(in_dev, ifap, 0);
895 ifa->ifa_address = sin->sin_addr.s_addr;
896 inet_insert_ifa(ifa);
897 break;
898
899 case SIOCSIFNETMASK: /* Set the netmask for the interface */
900
901 /*
902 * The mask we set must be legal.
903 */
904 ret = -EINVAL;
905 if (bad_mask(sin->sin_addr.s_addr, 0))
906 break;
907 ret = 0;
908 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
909 __be32 old_mask = ifa->ifa_mask;
910 inet_del_ifa(in_dev, ifap, 0);
911 ifa->ifa_mask = sin->sin_addr.s_addr;
912 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
913
914 /* See if current broadcast address matches
915 * with current netmask, then recalculate
916 * the broadcast address. Otherwise it's a
917 * funny address, so don't touch it since
918 * the user seems to know what (s)he's doing...
919 */
920 if ((dev->flags & IFF_BROADCAST) &&
921 (ifa->ifa_prefixlen < 31) &&
922 (ifa->ifa_broadcast ==
923 (ifa->ifa_local|~old_mask))) {
924 ifa->ifa_broadcast = (ifa->ifa_local |
925 ~sin->sin_addr.s_addr);
926 }
927 inet_insert_ifa(ifa);
928 }
929 break;
930 case SIOCKILLADDR: /* Nuke all connections on this address */
931 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
932 break;
933 }
934done:
935 rtnl_unlock();
936out:
937 return ret;
938rarok:
939 rtnl_unlock();
940 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
941 goto out;
942}
943
944static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
945{
946 struct in_device *in_dev = __in_dev_get_rtnl(dev);
947 struct in_ifaddr *ifa;
948 struct ifreq ifr;
949 int done = 0;
950
951 if (!in_dev)
952 goto out;
953
954 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
955 if (!buf) {
956 done += sizeof(ifr);
957 continue;
958 }
959 if (len < (int) sizeof(ifr))
960 break;
961 memset(&ifr, 0, sizeof(struct ifreq));
962 if (ifa->ifa_label)
963 strcpy(ifr.ifr_name, ifa->ifa_label);
964 else
965 strcpy(ifr.ifr_name, dev->name);
966
967 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
968 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
969 ifa->ifa_local;
970
971 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
972 done = -EFAULT;
973 break;
974 }
975 buf += sizeof(struct ifreq);
976 len -= sizeof(struct ifreq);
977 done += sizeof(struct ifreq);
978 }
979out:
980 return done;
981}
982
983__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
984{
985 __be32 addr = 0;
986 struct in_device *in_dev;
987 struct net *net = dev_net(dev);
988
989 rcu_read_lock();
990 in_dev = __in_dev_get_rcu(dev);
991 if (!in_dev)
992 goto no_in_dev;
993
994 for_primary_ifa(in_dev) {
995 if (ifa->ifa_scope > scope)
996 continue;
997 if (!dst || inet_ifa_match(dst, ifa)) {
998 addr = ifa->ifa_local;
999 break;
1000 }
1001 if (!addr)
1002 addr = ifa->ifa_local;
1003 } endfor_ifa(in_dev);
1004
1005 if (addr)
1006 goto out_unlock;
1007no_in_dev:
1008
1009 /* Not loopback addresses on loopback should be preferred
1010 in this case. It is importnat that lo is the first interface
1011 in dev_base list.
1012 */
1013 for_each_netdev_rcu(net, dev) {
1014 in_dev = __in_dev_get_rcu(dev);
1015 if (!in_dev)
1016 continue;
1017
1018 for_primary_ifa(in_dev) {
1019 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1020 ifa->ifa_scope <= scope) {
1021 addr = ifa->ifa_local;
1022 goto out_unlock;
1023 }
1024 } endfor_ifa(in_dev);
1025 }
1026out_unlock:
1027 rcu_read_unlock();
1028 return addr;
1029}
1030EXPORT_SYMBOL(inet_select_addr);
1031
1032static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1033 __be32 local, int scope)
1034{
1035 int same = 0;
1036 __be32 addr = 0;
1037
1038 for_ifa(in_dev) {
1039 if (!addr &&
1040 (local == ifa->ifa_local || !local) &&
1041 ifa->ifa_scope <= scope) {
1042 addr = ifa->ifa_local;
1043 if (same)
1044 break;
1045 }
1046 if (!same) {
1047 same = (!local || inet_ifa_match(local, ifa)) &&
1048 (!dst || inet_ifa_match(dst, ifa));
1049 if (same && addr) {
1050 if (local || !dst)
1051 break;
1052 /* Is the selected addr into dst subnet? */
1053 if (inet_ifa_match(addr, ifa))
1054 break;
1055 /* No, then can we use new local src? */
1056 if (ifa->ifa_scope <= scope) {
1057 addr = ifa->ifa_local;
1058 break;
1059 }
1060 /* search for large dst subnet for addr */
1061 same = 0;
1062 }
1063 }
1064 } endfor_ifa(in_dev);
1065
1066 return same ? addr : 0;
1067}
1068
1069/*
1070 * Confirm that local IP address exists using wildcards:
1071 * - in_dev: only on this interface, 0=any interface
1072 * - dst: only in the same subnet as dst, 0=any dst
1073 * - local: address, 0=autoselect the local address
1074 * - scope: maximum allowed scope value for the local address
1075 */
1076__be32 inet_confirm_addr(struct in_device *in_dev,
1077 __be32 dst, __be32 local, int scope)
1078{
1079 __be32 addr = 0;
1080 struct net_device *dev;
1081 struct net *net;
1082
1083 if (scope != RT_SCOPE_LINK)
1084 return confirm_addr_indev(in_dev, dst, local, scope);
1085
1086 net = dev_net(in_dev->dev);
1087 rcu_read_lock();
1088 for_each_netdev_rcu(net, dev) {
1089 in_dev = __in_dev_get_rcu(dev);
1090 if (in_dev) {
1091 addr = confirm_addr_indev(in_dev, dst, local, scope);
1092 if (addr)
1093 break;
1094 }
1095 }
1096 rcu_read_unlock();
1097
1098 return addr;
1099}
1100EXPORT_SYMBOL(inet_confirm_addr);
1101
1102/*
1103 * Device notifier
1104 */
1105
1106int register_inetaddr_notifier(struct notifier_block *nb)
1107{
1108 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1109}
1110EXPORT_SYMBOL(register_inetaddr_notifier);
1111
1112int unregister_inetaddr_notifier(struct notifier_block *nb)
1113{
1114 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1115}
1116EXPORT_SYMBOL(unregister_inetaddr_notifier);
1117
1118/* Rename ifa_labels for a device name change. Make some effort to preserve
1119 * existing alias numbering and to create unique labels if possible.
1120*/
1121static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1122{
1123 struct in_ifaddr *ifa;
1124 int named = 0;
1125
1126 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1127 char old[IFNAMSIZ], *dot;
1128
1129 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1130 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1131 if (named++ == 0)
1132 goto skip;
1133 dot = strchr(old, ':');
1134 if (dot == NULL) {
1135 sprintf(old, ":%d", named);
1136 dot = old;
1137 }
1138 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1139 strcat(ifa->ifa_label, dot);
1140 else
1141 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1142skip:
1143 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1144 }
1145}
1146
1147static inline bool inetdev_valid_mtu(unsigned mtu)
1148{
1149 return mtu >= 68;
1150}
1151
1152static void inetdev_send_gratuitous_arp(struct net_device *dev,
1153 struct in_device *in_dev)
1154
1155{
1156 struct in_ifaddr *ifa;
1157
1158 for (ifa = in_dev->ifa_list; ifa;
1159 ifa = ifa->ifa_next) {
1160 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1161 ifa->ifa_local, dev,
1162 ifa->ifa_local, NULL,
1163 dev->dev_addr, NULL);
1164 }
1165}
1166
1167/* Called only under RTNL semaphore */
1168
1169static int inetdev_event(struct notifier_block *this, unsigned long event,
1170 void *ptr)
1171{
1172 struct net_device *dev = ptr;
1173 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1174
1175 ASSERT_RTNL();
1176
1177 if (!in_dev) {
1178 if (event == NETDEV_REGISTER) {
1179 in_dev = inetdev_init(dev);
1180 if (!in_dev)
1181 return notifier_from_errno(-ENOMEM);
1182 if (dev->flags & IFF_LOOPBACK) {
1183 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1184 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1185 }
1186 } else if (event == NETDEV_CHANGEMTU) {
1187 /* Re-enabling IP */
1188 if (inetdev_valid_mtu(dev->mtu))
1189 in_dev = inetdev_init(dev);
1190 }
1191 goto out;
1192 }
1193
1194 switch (event) {
1195 case NETDEV_REGISTER:
1196 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_REGISTER",dev->name,event);
1197 printk(KERN_DEBUG "inetdev_event: bug\n");
1198 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1199 break;
1200 case NETDEV_UP:
1201 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_UP",dev->name,event);
1202 if (!inetdev_valid_mtu(dev->mtu))
1203 break;
1204 if (dev->flags & IFF_LOOPBACK) {
1205 struct in_ifaddr *ifa = inet_alloc_ifa();
1206
1207 if (ifa) {
1208 INIT_HLIST_NODE(&ifa->hash);
1209 ifa->ifa_local =
1210 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1211 ifa->ifa_prefixlen = 8;
1212 ifa->ifa_mask = inet_make_mask(8);
1213 in_dev_hold(in_dev);
1214 ifa->ifa_dev = in_dev;
1215 ifa->ifa_scope = RT_SCOPE_HOST;
1216 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1217 inet_insert_ifa(ifa);
1218 }
1219 }
1220 ip_mc_up(in_dev);
1221 /* fall through */
1222 case NETDEV_CHANGEADDR:
1223 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_CHANGEADDR",dev->name,event);
1224 if (!IN_DEV_ARP_NOTIFY(in_dev))
1225 break;
1226 /* fall through */
1227 case NETDEV_NOTIFY_PEERS:
1228 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_NOTIFY_PEERS",dev->name,event);
1229 /* Send gratuitous ARP to notify of link change */
1230 inetdev_send_gratuitous_arp(dev, in_dev);
1231 break;
1232 case NETDEV_DOWN:
1233 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_DOWN",dev->name,event);
1234 ip_mc_down(in_dev);
1235 break;
1236 case NETDEV_PRE_TYPE_CHANGE:
1237 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_PRE_TYPE_CHANGE",dev->name,event);
1238 ip_mc_unmap(in_dev);
1239 break;
1240 case NETDEV_POST_TYPE_CHANGE:
1241 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_POST_TYPE_CHANGE",dev->name,event);
1242 ip_mc_remap(in_dev);
1243 break;
1244 case NETDEV_CHANGEMTU:
1245 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_CHANGEMTU",dev->name,event);
1246 if (inetdev_valid_mtu(dev->mtu))
1247 break;
1248 /* disable IP when MTU is not enough */
1249 case NETDEV_UNREGISTER:
1250 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_UNREGISTER",dev->name,event);
1251 inetdev_destroy(in_dev);
1252 break;
1253 case NETDEV_CHANGENAME:
1254 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_CHANGENAME",dev->name,event);
1255 /* Do not notify about label change, this event is
1256 * not interesting to applications using netlink.
1257 */
1258 inetdev_changename(dev, in_dev);
1259
1260 devinet_sysctl_unregister(in_dev);
1261 devinet_sysctl_register(in_dev);
1262 break;
1263 }
1264out:
1265 return NOTIFY_DONE;
1266}
1267
1268static struct notifier_block ip_netdev_notifier = {
1269 .notifier_call = inetdev_event,
1270};
1271
1272static inline size_t inet_nlmsg_size(void)
1273{
1274 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1275 + nla_total_size(4) /* IFA_ADDRESS */
1276 + nla_total_size(4) /* IFA_LOCAL */
1277 + nla_total_size(4) /* IFA_BROADCAST */
1278 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1279}
1280
1281static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1282 u32 pid, u32 seq, int event, unsigned int flags)
1283{
1284 struct ifaddrmsg *ifm;
1285 struct nlmsghdr *nlh;
1286
1287 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1288 if (nlh == NULL)
1289 return -EMSGSIZE;
1290
1291 ifm = nlmsg_data(nlh);
1292 ifm->ifa_family = AF_INET;
1293 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1294 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1295 ifm->ifa_scope = ifa->ifa_scope;
1296 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1297
1298 if (ifa->ifa_address)
1299 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1300
1301 if (ifa->ifa_local)
1302 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1303
1304 if (ifa->ifa_broadcast)
1305 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1306
1307 if (ifa->ifa_label[0])
1308 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1309
1310 return nlmsg_end(skb, nlh);
1311
1312nla_put_failure:
1313 nlmsg_cancel(skb, nlh);
1314 return -EMSGSIZE;
1315}
1316
1317static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1318{
1319 struct net *net = sock_net(skb->sk);
1320 int h, s_h;
1321 int idx, s_idx;
1322 int ip_idx, s_ip_idx;
1323 struct net_device *dev;
1324 struct in_device *in_dev;
1325 struct in_ifaddr *ifa;
1326 struct hlist_head *head;
1327 struct hlist_node *node;
1328
1329 s_h = cb->args[0];
1330 s_idx = idx = cb->args[1];
1331 s_ip_idx = ip_idx = cb->args[2];
1332
1333 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1334 idx = 0;
1335 head = &net->dev_index_head[h];
1336 rcu_read_lock();
1337 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1338 if (idx < s_idx)
1339 goto cont;
1340 if (h > s_h || idx > s_idx)
1341 s_ip_idx = 0;
1342 in_dev = __in_dev_get_rcu(dev);
1343 if (!in_dev)
1344 goto cont;
1345
1346 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1347 ifa = ifa->ifa_next, ip_idx++) {
1348 if (ip_idx < s_ip_idx)
1349 continue;
1350 if (inet_fill_ifaddr(skb, ifa,
1351 NETLINK_CB(cb->skb).pid,
1352 cb->nlh->nlmsg_seq,
1353 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1354 rcu_read_unlock();
1355 goto done;
1356 }
1357 }
1358cont:
1359 idx++;
1360 }
1361 rcu_read_unlock();
1362 }
1363
1364done:
1365 cb->args[0] = h;
1366 cb->args[1] = idx;
1367 cb->args[2] = ip_idx;
1368
1369 return skb->len;
1370}
1371
1372static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1373 u32 pid)
1374{
1375 struct sk_buff *skb;
1376 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1377 int err = -ENOBUFS;
1378 struct net *net;
1379
1380 net = dev_net(ifa->ifa_dev->dev);
1381 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1382 if (skb == NULL)
1383 goto errout;
1384
1385 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1386 if (err < 0) {
1387 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1388 WARN_ON(err == -EMSGSIZE);
1389 kfree_skb(skb);
1390 goto errout;
1391 }
1392 //print_sun(SUN_LEARN,"dev:%s,rtmsg_ifa::rtnl_notify;type=%d,for example RTM_NEWNEIGH",ifa->ifa_dev->dev->name,event);
1393 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1394 return;
1395errout:
1396 if (err < 0)
1397 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1398}
1399
1400static size_t inet_get_link_af_size(const struct net_device *dev)
1401{
1402 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1403
1404 if (!in_dev)
1405 return 0;
1406
1407 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1408}
1409
1410static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1411{
1412 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1413 struct nlattr *nla;
1414 int i;
1415
1416 if (!in_dev)
1417 return -ENODATA;
1418
1419 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1420 if (nla == NULL)
1421 return -EMSGSIZE;
1422
1423 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1424 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1425
1426 return 0;
1427}
1428
1429static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1430 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1431};
1432
1433static int inet_validate_link_af(const struct net_device *dev,
1434 const struct nlattr *nla)
1435{
1436 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1437 int err, rem;
1438
1439 if (dev && !__in_dev_get_rtnl(dev))
1440 return -EAFNOSUPPORT;
1441
1442 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1443 if (err < 0)
1444 return err;
1445
1446 if (tb[IFLA_INET_CONF]) {
1447 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1448 int cfgid = nla_type(a);
1449
1450 if (nla_len(a) < 4)
1451 return -EINVAL;
1452
1453 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1454 return -EINVAL;
1455 }
1456 }
1457
1458 return 0;
1459}
1460
1461static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1462{
1463 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1464 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1465 int rem;
1466
1467 if (!in_dev)
1468 return -EAFNOSUPPORT;
1469
1470 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1471 BUG();
1472
1473 if (tb[IFLA_INET_CONF]) {
1474 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1475 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1476 }
1477
1478 return 0;
1479}
1480
1481#ifdef CONFIG_SYSCTL
1482
1483static void devinet_copy_dflt_conf(struct net *net, int i)
1484{
1485 struct net_device *dev;
1486
1487 rcu_read_lock();
1488 for_each_netdev_rcu(net, dev) {
1489 struct in_device *in_dev;
1490
1491 in_dev = __in_dev_get_rcu(dev);
1492 if (in_dev && !test_bit(i, in_dev->cnf.state))
1493 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1494 }
1495 rcu_read_unlock();
1496}
1497
1498/* called with RTNL locked */
1499static void inet_forward_change(struct net *net)
1500{
1501 struct net_device *dev;
1502 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1503
1504 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1505 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1506
1507 for_each_netdev(net, dev) {
1508 struct in_device *in_dev;
1509 if (on)
1510 dev_disable_lro(dev);
1511 rcu_read_lock();
1512 in_dev = __in_dev_get_rcu(dev);
1513 if (in_dev)
1514 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1515 rcu_read_unlock();
1516 }
1517}
1518
1519static int devinet_conf_proc(ctl_table *ctl, int write,
1520 void __user *buffer,
1521 size_t *lenp, loff_t *ppos)
1522{
1523 int old_value = *(int *)ctl->data;
1524 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1525 int new_value = *(int *)ctl->data;
1526
1527 if (write) {
1528 struct ipv4_devconf *cnf = ctl->extra1;
1529 struct net *net = ctl->extra2;
1530 int i = (int *)ctl->data - cnf->data;
1531
1532 set_bit(i, cnf->state);
1533
1534 if (cnf == net->ipv4.devconf_dflt)
1535 devinet_copy_dflt_conf(net, i);
1536 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1537 if ((new_value == 0) && (old_value != 0))
1538 rt_cache_flush(net, 0);
1539 }
1540
1541 return ret;
1542}
1543
1544static int devinet_sysctl_forward(ctl_table *ctl, int write,
1545 void __user *buffer,
1546 size_t *lenp, loff_t *ppos)
1547{
1548 int *valp = ctl->data;
1549 int val = *valp;
1550 loff_t pos = *ppos;
1551 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1552
1553 if (write && *valp != val) {
1554 struct net *net = ctl->extra2;
1555
1556 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1557 if (!rtnl_trylock()) {
1558 /* Restore the original values before restarting */
1559 *valp = val;
1560 *ppos = pos;
1561 return restart_syscall();
1562 }
1563 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1564 inet_forward_change(net);
1565 } else if (*valp) {
1566 struct ipv4_devconf *cnf = ctl->extra1;
1567 struct in_device *idev =
1568 container_of(cnf, struct in_device, cnf);
1569 dev_disable_lro(idev->dev);
1570 }
1571 rtnl_unlock();
1572 rt_cache_flush(net, 0);
1573 }
1574 }
1575
1576 return ret;
1577}
1578
1579static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1580 void __user *buffer,
1581 size_t *lenp, loff_t *ppos)
1582{
1583 int *valp = ctl->data;
1584 int val = *valp;
1585 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1586 struct net *net = ctl->extra2;
1587
1588 if (write && *valp != val)
1589 rt_cache_flush(net, 0);
1590
1591 return ret;
1592}
1593
1594#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1595 { \
1596 .procname = name, \
1597 .data = ipv4_devconf.data + \
1598 IPV4_DEVCONF_ ## attr - 1, \
1599 .maxlen = sizeof(int), \
1600 .mode = mval, \
1601 .proc_handler = proc, \
1602 .extra1 = &ipv4_devconf, \
1603 }
1604
1605#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1606 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1607
1608#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1609 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1610
1611#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1612 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1613
1614#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1615 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1616
1617static struct devinet_sysctl_table {
1618 struct ctl_table_header *sysctl_header;
1619 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1620 char *dev_name;
1621} devinet_sysctl = {
1622 .devinet_vars = {
1623 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1624 devinet_sysctl_forward),
1625 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1626
1627 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1628 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1629 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1630 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1631 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1632 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1633 "accept_source_route"),
1634 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1635 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1636 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1637 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1638 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1639 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1640 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1641 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1642 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1643 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1644 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1645 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1646 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1647
1648 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1649 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1650 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1651 "force_igmp_version"),
1652 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1653 "promote_secondaries"),
1654 },
1655};
1656
1657static int __devinet_sysctl_register(struct net *net, char *dev_name,
1658 struct ipv4_devconf *p)
1659{
1660 int i;
1661 struct devinet_sysctl_table *t;
1662
1663#define DEVINET_CTL_PATH_DEV 3
1664
1665 struct ctl_path devinet_ctl_path[] = {
1666 { .procname = "net", },
1667 { .procname = "ipv4", },
1668 { .procname = "conf", },
1669 { /* to be set */ },
1670 { },
1671 };
1672
1673 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1674 if (!t)
1675 goto out;
1676
1677 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1678 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1679 t->devinet_vars[i].extra1 = p;
1680 t->devinet_vars[i].extra2 = net;
1681 }
1682
1683 /*
1684 * Make a copy of dev_name, because '.procname' is regarded as const
1685 * by sysctl and we wouldn't want anyone to change it under our feet
1686 * (see SIOCSIFNAME).
1687 */
1688 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1689 if (!t->dev_name)
1690 goto free;
1691
1692 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1693
1694 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1695 t->devinet_vars);
1696 if (!t->sysctl_header)
1697 goto free_procname;
1698
1699 p->sysctl = t;
1700 return 0;
1701
1702free_procname:
1703 kfree(t->dev_name);
1704free:
1705 kfree(t);
1706out:
1707 return -ENOBUFS;
1708}
1709
1710static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1711{
1712 struct devinet_sysctl_table *t = cnf->sysctl;
1713
1714 if (t == NULL)
1715 return;
1716
1717 cnf->sysctl = NULL;
1718 unregister_net_sysctl_table(t->sysctl_header);
1719 kfree(t->dev_name);
1720 kfree(t);
1721}
1722
1723static void devinet_sysctl_register(struct in_device *idev)
1724{
1725 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1726 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1727 &idev->cnf);
1728}
1729
1730static void devinet_sysctl_unregister(struct in_device *idev)
1731{
1732 __devinet_sysctl_unregister(&idev->cnf);
1733 neigh_sysctl_unregister(idev->arp_parms);
1734}
1735
1736static struct ctl_table ctl_forward_entry[] = {
1737 {
1738 .procname = "ip_forward",
1739 .data = &ipv4_devconf.data[
1740 IPV4_DEVCONF_FORWARDING - 1],
1741 .maxlen = sizeof(int),
1742 .mode = 0644,
1743 .proc_handler = devinet_sysctl_forward,
1744 .extra1 = &ipv4_devconf,
1745 .extra2 = &init_net,
1746 },
1747 { },
1748};
1749
1750static __net_initdata struct ctl_path net_ipv4_path[] = {
1751 { .procname = "net", },
1752 { .procname = "ipv4", },
1753 { },
1754};
1755#endif
1756
1757static __net_init int devinet_init_net(struct net *net)
1758{
1759 int err;
1760 struct ipv4_devconf *all, *dflt;
1761#ifdef CONFIG_SYSCTL
1762 struct ctl_table *tbl = ctl_forward_entry;
1763 struct ctl_table_header *forw_hdr;
1764#endif
1765
1766 err = -ENOMEM;
1767 all = &ipv4_devconf;
1768 dflt = &ipv4_devconf_dflt;
1769
1770 if (!net_eq(net, &init_net)) {
1771 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1772 if (all == NULL)
1773 goto err_alloc_all;
1774
1775 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1776 if (dflt == NULL)
1777 goto err_alloc_dflt;
1778
1779#ifdef CONFIG_SYSCTL
1780 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1781 if (tbl == NULL)
1782 goto err_alloc_ctl;
1783
1784 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1785 tbl[0].extra1 = all;
1786 tbl[0].extra2 = net;
1787#endif
1788 }
1789
1790#ifdef CONFIG_SYSCTL
1791 err = __devinet_sysctl_register(net, "all", all);
1792 if (err < 0)
1793 goto err_reg_all;
1794
1795 err = __devinet_sysctl_register(net, "default", dflt);
1796 if (err < 0)
1797 goto err_reg_dflt;
1798
1799 err = -ENOMEM;
1800 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1801 if (forw_hdr == NULL)
1802 goto err_reg_ctl;
1803 net->ipv4.forw_hdr = forw_hdr;
1804#endif
1805
1806 net->ipv4.devconf_all = all;
1807 net->ipv4.devconf_dflt = dflt;
1808 return 0;
1809
1810#ifdef CONFIG_SYSCTL
1811err_reg_ctl:
1812 __devinet_sysctl_unregister(dflt);
1813err_reg_dflt:
1814 __devinet_sysctl_unregister(all);
1815err_reg_all:
1816 if (tbl != ctl_forward_entry)
1817 kfree(tbl);
1818err_alloc_ctl:
1819#endif
1820 if (dflt != &ipv4_devconf_dflt)
1821 kfree(dflt);
1822err_alloc_dflt:
1823 if (all != &ipv4_devconf)
1824 kfree(all);
1825err_alloc_all:
1826 return err;
1827}
1828
1829static __net_exit void devinet_exit_net(struct net *net)
1830{
1831#ifdef CONFIG_SYSCTL
1832 struct ctl_table *tbl;
1833
1834 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1835 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1836 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1837 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1838 kfree(tbl);
1839#endif
1840 kfree(net->ipv4.devconf_dflt);
1841 kfree(net->ipv4.devconf_all);
1842}
1843
1844static __net_initdata struct pernet_operations devinet_ops = {
1845 .init = devinet_init_net,
1846 .exit = devinet_exit_net,
1847};
1848
1849static struct rtnl_af_ops inet_af_ops = {
1850 .family = AF_INET,
1851 .fill_link_af = inet_fill_link_af,
1852 .get_link_af_size = inet_get_link_af_size,
1853 .validate_link_af = inet_validate_link_af,
1854 .set_link_af = inet_set_link_af,
1855};
1856
1857void __init devinet_init(void)
1858{
1859 int i;
1860
1861 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1862 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1863
1864 register_pernet_subsys(&devinet_ops);
1865
1866 register_gifconf(PF_INET, inet_gifconf);
1867 register_netdevice_notifier(&ip_netdev_notifier);
1868
1869 rtnl_af_register(&inet_af_ops);
1870
1871 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1872 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1873 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1874}
1875