blob: 27305247218021f02f050f1bd5c31c15f14075d8 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27#include <linux/capability.h>
28#include <linux/errno.h>
29#include <linux/export.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/mroute6.h>
39#include <linux/init.h>
40#include <linux/if_arp.h>
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#include <linux/nsproxy.h>
44#include <linux/slab.h>
45#include <net/net_namespace.h>
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56#include <net/netevent.h>
57#include <net/netlink.h>
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
66 const struct in6_addr *dest);
67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
69static unsigned int ip6_mtu(const struct dst_entry *dst);
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
74static int ip6_dst_gc(struct dst_ops *ops);
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
81#ifdef CONFIG_IPV6_ROUTE_INFO
82static struct rt6_info *rt6_add_route_info(struct net *net,
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
85 unsigned pref);
86static struct rt6_info *rt6_get_route_info(struct net *net,
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
89#endif
90
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
97 if (!(rt->dst.flags & DST_HOST))
98 return dst_cow_metrics_generic(dst, old);
99
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
128 if (!ipv6_addr_any(p))
129 return (const void *) p;
130 return daddr;
131}
132
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146{
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
156}
157
158static struct dst_ops ip6_dst_ops_template = {
159 .family = AF_INET6,
160 .protocol = cpu_to_be16(ETH_P_IPV6),
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
164 .default_advmss = ip6_default_advmss,
165 .mtu = ip6_mtu,
166 .cow_metrics = ipv6_cow_metrics,
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
172 .local_out = __ip6_local_out,
173 .neigh_lookup = ip6_neigh_lookup,
174};
175
176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177{
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
181}
182
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
195 .protocol = cpu_to_be16(ETH_P_IPV6),
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
198 .mtu = ip6_blackhole_mtu,
199 .default_advmss = ip6_default_advmss,
200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
202 .neigh_lookup = ip6_neigh_lookup,
203};
204
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 0,
207};
208
209static struct rt6_info ip6_null_entry_template = {
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
219 .rt6i_protocol = RTPROT_KERNEL,
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228
229static struct rt6_info ip6_prohibit_entry_template = {
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
239 .rt6i_protocol = RTPROT_KERNEL,
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
244static struct rt6_info ip6_blk_hole_entry_template = {
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
250 .input = dst_discard,
251 .output = dst_discard,
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
254 .rt6i_protocol = RTPROT_KERNEL,
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
261/* allocate dst with ip6_dst_ops */
262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 struct net_device *dev,
264 int flags)
265{
266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267
268 if (rt)
269 memset(&rt->rt6i_table, 0,
270 sizeof(*rt) - sizeof(struct dst_entry));
271
272 return rt;
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
279 struct inet_peer *peer = rt->rt6i_peer;
280
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
284 if (idev) {
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
287 }
288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
292 if (peer) {
293 rt->rt6i_peer = NULL;
294 inet_putpeer(peer);
295 }
296}
297
298static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300u32 rt6_peer_genid(void)
301{
302 return atomic_read(&__rt6_peer_genid);
303}
304
305void rt6_bind_peer(struct rt6_info *rt, int create)
306{
307 struct inet_peer *peer;
308
309 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
314}
315
316static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318{
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
321 struct net_device *loopback_dev =
322 dev_net(dev)->loopback_dev;
323
324 if (dev != loopback_dev && idev && idev->dev == dev) {
325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
327 if (loopback_idev) {
328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332}
333
334static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335{
336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return 1;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return 0;
347}
348
349static inline int rt6_need_strict(const struct in6_addr *daddr)
350{
351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
353}
354
355/*
356 * Route lookup. Any table->tb6_lock is implied.
357 */
358
359static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
361 const struct in6_addr *saddr,
362 int oif,
363 int flags)
364{
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
372 struct net_device *dev = sprt->dst.dev;
373
374 if (oif) {
375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
378 if (!sprt->rt6i_idev ||
379 sprt->rt6i_idev->dev->ifindex != oif) {
380 if (flags & RT6_LOOKUP_F_IFACE && oif)
381 continue;
382 if (local && (!oif ||
383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
392 }
393 }
394
395 if (oif) {
396 if (local)
397 return local;
398
399 if (flags & RT6_LOOKUP_F_IFACE)
400 return net->ipv6.ip6_null_entry;
401 }
402out:
403 return rt;
404}
405
406#ifdef CONFIG_IPV6_ROUTER_PREF
407static void rt6_probe(struct rt6_info *rt)
408{
409 struct neighbour *neigh;
410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
418 rcu_read_lock();
419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
420 if (!neigh || (neigh->nud_state & NUD_VALID))
421 goto out;
422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
434 } else {
435 read_unlock_bh(&neigh->lock);
436 }
437out:
438 rcu_read_unlock();
439}
440#else
441static inline void rt6_probe(struct rt6_info *rt)
442{
443}
444#endif
445
446/*
447 * Default Router Selection (RFC 2461 6.3.6)
448 */
449static inline int rt6_check_dev(struct rt6_info *rt, int oif)
450{
451 struct net_device *dev = rt->dst.dev;
452 if (!oif || dev->ifindex == oif)
453 return 2;
454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
458}
459
460static inline int rt6_check_neigh(struct rt6_info *rt)
461{
462 struct neighbour *neigh;
463 int m;
464
465 rcu_read_lock();
466 neigh = dst_get_neighbour_noref(&rt->dst);
467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
473 m = 2;
474#ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477#endif
478 else
479 m = 1;
480 read_unlock_bh(&neigh->lock);
481 } else
482 m = 0;
483 rcu_read_unlock();
484 return m;
485}
486
487static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
489{
490 int m, n;
491
492 m = rt6_check_dev(rt, oif);
493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
494 return -1;
495#ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497#endif
498 n = rt6_check_neigh(rt);
499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
500 return -1;
501 return m;
502}
503
504static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
506{
507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525out:
526 return match;
527}
528
529static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532{
533 struct rt6_info *rt, *match;
534 int mpri = -1;
535
536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
538 rt = rt->dst.rt6_next)
539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
541 rt = rt->dst.rt6_next)
542 match = find_match(rt, oif, strict, &mpri, match);
543
544 return match;
545}
546
547static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548{
549 struct rt6_info *match, *rt0;
550 struct net *net;
551
552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
555
556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
557
558 if (!match &&
559 (strict & RT6_LOOKUP_F_REACHABLE)) {
560 struct rt6_info *next = rt0->dst.rt6_next;
561
562 /* no entries matched; do round-robin */
563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
568 }
569
570 net = dev_net(rt0->dst.dev);
571 return match ? match : net->ipv6.ip6_null_entry;
572}
573
574#ifdef CONFIG_IPV6_ROUTE_INFO
575int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
576 const struct in6_addr *gwaddr)
577{
578 struct net *net = dev_net(dev);
579 struct route_info *rinfo = (struct route_info *) opt;
580 struct in6_addr prefix_buf, *prefix;
581 unsigned int pref;
582 unsigned long lifetime;
583 struct rt6_info *rt;
584
585 if (len < sizeof(struct route_info)) {
586 return -EINVAL;
587 }
588
589 /* Sanity check for prefix_len and length */
590 if (rinfo->length > 3) {
591 return -EINVAL;
592 } else if (rinfo->prefix_len > 128) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 64) {
595 if (rinfo->length < 2) {
596 return -EINVAL;
597 }
598 } else if (rinfo->prefix_len > 0) {
599 if (rinfo->length < 1) {
600 return -EINVAL;
601 }
602 }
603
604 pref = rinfo->route_pref;
605 if (pref == ICMPV6_ROUTER_PREF_INVALID)
606 return -EINVAL;
607
608 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
609
610 if (rinfo->length == 3)
611 prefix = (struct in6_addr *)rinfo->prefix;
612 else {
613 /* this function is safe */
614 ipv6_addr_prefix(&prefix_buf,
615 (struct in6_addr *)rinfo->prefix,
616 rinfo->prefix_len);
617 prefix = &prefix_buf;
618 }
619
620 if (rinfo->prefix_len == 0)
621 rt = rt6_get_dflt_router(gwaddr, dev);
622 else
623 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
624 gwaddr, dev->ifindex);
625
626 if (rt && !lifetime) {
627 ip6_del_rt(rt);
628 rt = NULL;
629 }
630
631 if (!rt && lifetime)
632 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
633 pref);
634 else if (rt)
635 rt->rt6i_flags = RTF_ROUTEINFO |
636 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
637
638 if (rt) {
639 if (!addrconf_finite_timeout(lifetime))
640 rt6_clean_expires(rt);
641 else
642 rt6_set_expires(rt, jiffies + HZ * lifetime);
643
644 dst_release(&rt->dst);
645 }
646 return 0;
647}
648#endif
649
650#define BACKTRACK(__net, saddr) \
651do { \
652 if (rt == __net->ipv6.ip6_null_entry) { \
653 struct fib6_node *pn; \
654 while (1) { \
655 if (fn->fn_flags & RTN_TL_ROOT) \
656 goto out; \
657 pn = fn->parent; \
658 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
659 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
660 else \
661 fn = pn; \
662 if (fn->fn_flags & RTN_RTINFO) \
663 goto restart; \
664 } \
665 } \
666} while (0)
667
668static struct rt6_info *ip6_pol_route_lookup(struct net *net,
669 struct fib6_table *table,
670 struct flowi6 *fl6, int flags)
671{
672 struct fib6_node *fn;
673 struct rt6_info *rt;
674
675 read_lock_bh(&table->tb6_lock);
676 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
677restart:
678 rt = fn->leaf;
679 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
680 BACKTRACK(net, &fl6->saddr);
681out:
682 dst_use(&rt->dst, jiffies);
683 read_unlock_bh(&table->tb6_lock);
684 return rt;
685
686}
687
688struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
689 int flags)
690{
691 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
692}
693EXPORT_SYMBOL_GPL(ip6_route_lookup);
694
695struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
696 const struct in6_addr *saddr, int oif, int strict)
697{
698 struct flowi6 fl6 = {
699 .flowi6_oif = oif,
700 .daddr = *daddr,
701 };
702 struct dst_entry *dst;
703 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
704
705 if (saddr) {
706 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
707 flags |= RT6_LOOKUP_F_HAS_SADDR;
708 }
709
710 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
711 if (dst->error == 0)
712 return (struct rt6_info *) dst;
713
714 dst_release(dst);
715
716 return NULL;
717}
718
719EXPORT_SYMBOL(rt6_lookup);
720
721/* ip6_ins_rt is called with FREE table->tb6_lock.
722 It takes new route entry, the addition fails by any reason the
723 route is freed. In any case, if caller does not hold it, it may
724 be destroyed.
725 */
726
727static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
728{
729 int err;
730 struct fib6_table *table;
731
732 table = rt->rt6i_table;
733 write_lock_bh(&table->tb6_lock);
734 err = fib6_add(&table->tb6_root, rt, info);
735 write_unlock_bh(&table->tb6_lock);
736
737 return err;
738}
739
740int ip6_ins_rt(struct rt6_info *rt)
741{
742 struct nl_info info = {
743 .nl_net = dev_net(rt->dst.dev),
744 };
745 return __ip6_ins_rt(rt, &info);
746}
747
748static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
749 const struct in6_addr *daddr,
750 const struct in6_addr *saddr)
751{
752 struct rt6_info *rt;
753
754 /*
755 * Clone the route.
756 */
757
758 rt = ip6_rt_copy(ort, daddr);
759
760 if (rt) {
761 int attempts = !in_softirq();
762
763 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
764 if (ort->rt6i_dst.plen != 128 &&
765 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
766 rt->rt6i_flags |= RTF_ANYCAST;
767 rt->rt6i_gateway = *daddr;
768 }
769
770 rt->rt6i_flags |= RTF_CACHE;
771
772#ifdef CONFIG_IPV6_SUBTREES
773 if (rt->rt6i_src.plen && saddr) {
774 rt->rt6i_src.addr = *saddr;
775 rt->rt6i_src.plen = 128;
776 }
777#endif
778
779 retry:
780 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
781 struct net *net = dev_net(rt->dst.dev);
782 int saved_rt_min_interval =
783 net->ipv6.sysctl.ip6_rt_gc_min_interval;
784 int saved_rt_elasticity =
785 net->ipv6.sysctl.ip6_rt_gc_elasticity;
786
787 if (attempts-- > 0) {
788 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
789 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
790
791 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
792
793 net->ipv6.sysctl.ip6_rt_gc_elasticity =
794 saved_rt_elasticity;
795 net->ipv6.sysctl.ip6_rt_gc_min_interval =
796 saved_rt_min_interval;
797 goto retry;
798 }
799
800 if (net_ratelimit())
801 printk(KERN_WARNING
802 "ipv6: Neighbour table overflow.\n");
803 dst_free(&rt->dst);
804 return NULL;
805 }
806 }
807
808 return rt;
809}
810
811static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
812 const struct in6_addr *daddr)
813{
814 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
815
816 if (rt) {
817 rt->rt6i_flags |= RTF_CACHE;
818 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
819 }
820 return rt;
821}
822
823static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
824 struct flowi6 *fl6, int flags, bool input)
825{
826 struct fib6_node *fn;
827 struct rt6_info *rt, *nrt;
828 int strict = 0;
829 int attempts = 3;
830 int err;
831 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
832 int local = RTF_NONEXTHOP;
833
834 strict |= flags & RT6_LOOKUP_F_IFACE;
835 if (input)
836 local |= RTF_LOCAL;
837
838relookup:
839 read_lock_bh(&table->tb6_lock);
840
841restart_2:
842 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
843
844restart:
845 rt = rt6_select(fn, oif, strict | reachable);
846
847 BACKTRACK(net, &fl6->saddr);
848 if (rt == net->ipv6.ip6_null_entry ||
849 rt->rt6i_flags & RTF_CACHE)
850 goto out;
851
852 dst_hold(&rt->dst);
853 read_unlock_bh(&table->tb6_lock);
854
855 if (!dst_get_neighbour_noref_raw(&rt->dst) &&
856 !(rt->rt6i_flags & local))
857 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
858 else if (!(rt->dst.flags & DST_HOST))
859 nrt = rt6_alloc_clone(rt, &fl6->daddr);
860 else
861 goto out2;
862
863 dst_release(&rt->dst);
864 rt = nrt ? : net->ipv6.ip6_null_entry;
865
866 dst_hold(&rt->dst);
867 if (nrt) {
868 err = ip6_ins_rt(nrt);
869 if (!err)
870 goto out2;
871 }
872
873 if (--attempts <= 0)
874 goto out2;
875
876 /*
877 * Race condition! In the gap, when table->tb6_lock was
878 * released someone could insert this route. Relookup.
879 */
880 dst_release(&rt->dst);
881 goto relookup;
882
883out:
884 if (reachable) {
885 reachable = 0;
886 goto restart_2;
887 }
888 dst_hold(&rt->dst);
889 read_unlock_bh(&table->tb6_lock);
890out2:
891 rt->dst.lastuse = jiffies;
892 rt->dst.__use++;
893
894 return rt;
895}
896
897static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
898 struct flowi6 *fl6, int flags)
899{
900 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags, true);
901}
902
903static struct dst_entry *ip6_route_input_lookup(struct net *net,
904 struct net_device *dev,
905 struct flowi6 *fl6, int flags)
906{
907 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
908 flags |= RT6_LOOKUP_F_IFACE;
909
910 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
911}
912
913void ip6_route_input(struct sk_buff *skb)
914{
915 const struct ipv6hdr *iph = ipv6_hdr(skb);
916 struct net *net = dev_net(skb->dev);
917 int flags = RT6_LOOKUP_F_HAS_SADDR;
918 struct flowi6 fl6 = {
919 .flowi6_iif = skb->dev->ifindex,
920 .daddr = iph->daddr,
921 .saddr = iph->saddr,
922 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
923 .flowi6_mark = skb->mark,
924 .flowi6_proto = iph->nexthdr,
925 };
926
927 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
928}
929
930static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
931 struct flowi6 *fl6, int flags)
932{
933 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags, false);
934}
935
936struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
937 struct flowi6 *fl6)
938{
939 int flags = 0;
940
941 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
942 flags |= RT6_LOOKUP_F_IFACE;
943
944 if (!ipv6_addr_any(&fl6->saddr))
945 flags |= RT6_LOOKUP_F_HAS_SADDR;
946 else if (sk)
947 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
948
949 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
950}
951
952EXPORT_SYMBOL(ip6_route_output);
953
954struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
955{
956 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
957 struct dst_entry *new = NULL;
958
959 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
960 if (rt) {
961 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
962
963 new = &rt->dst;
964
965 new->__use = 1;
966 new->input = dst_discard;
967 new->output = dst_discard;
968
969 if (dst_metrics_read_only(&ort->dst))
970 new->_metrics = ort->dst._metrics;
971 else
972 dst_copy_metrics(new, &ort->dst);
973 rt->rt6i_idev = ort->rt6i_idev;
974 if (rt->rt6i_idev)
975 in6_dev_hold(rt->rt6i_idev);
976
977 rt->rt6i_gateway = ort->rt6i_gateway;
978 rt->rt6i_flags = ort->rt6i_flags;
979 rt6_clean_expires(rt);
980 rt->rt6i_metric = 0;
981
982 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983#ifdef CONFIG_IPV6_SUBTREES
984 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985#endif
986
987 dst_free(new);
988 }
989
990 dst_release(dst_orig);
991 return new ? new : ERR_PTR(-ENOMEM);
992}
993
994/*
995 * Destination cache support functions
996 */
997
998static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999{
1000 struct rt6_info *rt;
1001
1002 rt = (struct rt6_info *) dst;
1003
1004 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1006 if (!rt->rt6i_peer)
1007 rt6_bind_peer(rt, 0);
1008 rt->rt6i_peer_genid = rt6_peer_genid();
1009 }
1010 return dst;
1011 }
1012 return NULL;
1013}
1014
1015static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016{
1017 struct rt6_info *rt = (struct rt6_info *) dst;
1018
1019 if (rt) {
1020 if (rt->rt6i_flags & RTF_CACHE) {
1021 if (rt6_check_expired(rt)) {
1022 ip6_del_rt(rt);
1023 dst = NULL;
1024 }
1025 } else {
1026 dst_release(dst);
1027 dst = NULL;
1028 }
1029 }
1030 return dst;
1031}
1032
1033static void ip6_link_failure(struct sk_buff *skb)
1034{
1035 struct rt6_info *rt;
1036
1037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1038
1039 rt = (struct rt6_info *) skb_dst(skb);
1040 if (rt) {
1041 if (rt->rt6i_flags & RTF_CACHE) {
1042 dst_hold(&rt->dst);
1043 if (ip6_del_rt(rt))
1044 dst_free(&rt->dst);
1045 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1046 rt->rt6i_node->fn_sernum = -1;
1047 }
1048 }
1049}
1050
1051static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1052{
1053 struct rt6_info *rt6 = (struct rt6_info*)dst;
1054
1055 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1056 rt6->rt6i_flags |= RTF_MODIFIED;
1057 if (mtu < IPV6_MIN_MTU) {
1058 u32 features = dst_metric(dst, RTAX_FEATURES);
1059 mtu = IPV6_MIN_MTU;
1060 features |= RTAX_FEATURE_ALLFRAG;
1061 dst_metric_set(dst, RTAX_FEATURES, features);
1062 }
1063 dst_metric_set(dst, RTAX_MTU, mtu);
1064 }
1065}
1066
1067static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1068{
1069 struct net_device *dev = dst->dev;
1070 unsigned int mtu = dst_mtu(dst);
1071 struct net *net = dev_net(dev);
1072
1073 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1074
1075 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1076 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1077
1078 /*
1079 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1080 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1081 * IPV6_MAXPLEN is also valid and means: "any MSS,
1082 * rely only on pmtu discovery"
1083 */
1084 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1085 mtu = IPV6_MAXPLEN;
1086 return mtu;
1087}
1088
1089static unsigned int ip6_mtu(const struct dst_entry *dst)
1090{
1091 struct inet6_dev *idev;
1092 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1093
1094 if (mtu)
1095 goto out;
1096
1097 mtu = IPV6_MIN_MTU;
1098
1099 rcu_read_lock();
1100 idev = __in6_dev_get(dst->dev);
1101 if (idev)
1102 mtu = idev->cnf.mtu6;
1103 rcu_read_unlock();
1104
1105out:
1106 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1107}
1108
1109static struct dst_entry *icmp6_dst_gc_list;
1110static DEFINE_SPINLOCK(icmp6_dst_lock);
1111
1112struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1113 struct neighbour *neigh,
1114 struct flowi6 *fl6)
1115{
1116 struct dst_entry *dst;
1117 struct rt6_info *rt;
1118 struct inet6_dev *idev = in6_dev_get(dev);
1119 struct net *net = dev_net(dev);
1120
1121 if (unlikely(!idev))
1122 return ERR_PTR(-ENODEV);
1123
1124 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1125 if (unlikely(!rt)) {
1126 in6_dev_put(idev);
1127 dst = ERR_PTR(-ENOMEM);
1128 goto out;
1129 }
1130
1131 if (neigh)
1132 neigh_hold(neigh);
1133 else {
1134 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1135 if (IS_ERR(neigh)) {
1136 in6_dev_put(idev);
1137 dst_free(&rt->dst);
1138 return ERR_CAST(neigh);
1139 }
1140 }
1141
1142 rt->dst.flags |= DST_HOST;
1143 rt->dst.output = ip6_output;
1144 dst_set_neighbour(&rt->dst, neigh);
1145 atomic_set(&rt->dst.__refcnt, 1);
1146 rt->rt6i_dst.addr = fl6->daddr;
1147 rt->rt6i_dst.plen = 128;
1148 rt->rt6i_idev = idev;
1149 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1150
1151 spin_lock_bh(&icmp6_dst_lock);
1152 rt->dst.next = icmp6_dst_gc_list;
1153 icmp6_dst_gc_list = &rt->dst;
1154 spin_unlock_bh(&icmp6_dst_lock);
1155
1156 fib6_force_start_gc(net);
1157
1158 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1159
1160out:
1161 return dst;
1162}
1163
1164int icmp6_dst_gc(void)
1165{
1166 struct dst_entry *dst, **pprev;
1167 int more = 0;
1168
1169 spin_lock_bh(&icmp6_dst_lock);
1170 pprev = &icmp6_dst_gc_list;
1171
1172 while ((dst = *pprev) != NULL) {
1173 if (!atomic_read(&dst->__refcnt)) {
1174 *pprev = dst->next;
1175 dst_free(dst);
1176 } else {
1177 pprev = &dst->next;
1178 ++more;
1179 }
1180 }
1181
1182 spin_unlock_bh(&icmp6_dst_lock);
1183
1184 return more;
1185}
1186
1187static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1188 void *arg)
1189{
1190 struct dst_entry *dst, **pprev;
1191
1192 spin_lock_bh(&icmp6_dst_lock);
1193 pprev = &icmp6_dst_gc_list;
1194 while ((dst = *pprev) != NULL) {
1195 struct rt6_info *rt = (struct rt6_info *) dst;
1196 if (func(rt, arg)) {
1197 *pprev = dst->next;
1198 dst_free(dst);
1199 } else {
1200 pprev = &dst->next;
1201 }
1202 }
1203 spin_unlock_bh(&icmp6_dst_lock);
1204}
1205
1206static int ip6_dst_gc(struct dst_ops *ops)
1207{
1208 unsigned long now = jiffies;
1209 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1210 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1211 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1212 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1213 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1214 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1215 int entries;
1216
1217 entries = dst_entries_get_fast(ops);
1218 if (time_after(rt_last_gc + rt_min_interval, now) &&
1219 entries <= rt_max_size)
1220 goto out;
1221
1222 net->ipv6.ip6_rt_gc_expire++;
1223 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1224 net->ipv6.ip6_rt_last_gc = now;
1225 entries = dst_entries_get_slow(ops);
1226 if (entries < ops->gc_thresh)
1227 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1228out:
1229 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1230 return entries > rt_max_size;
1231}
1232
1233/* Clean host part of a prefix. Not necessary in radix tree,
1234 but results in cleaner routing tables.
1235
1236 Remove it only when all the things will work!
1237 */
1238
1239int ip6_dst_hoplimit(struct dst_entry *dst)
1240{
1241 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1242 if (hoplimit == 0) {
1243 struct net_device *dev = dst->dev;
1244 struct inet6_dev *idev;
1245
1246 rcu_read_lock();
1247 idev = __in6_dev_get(dev);
1248 if (idev)
1249 hoplimit = idev->cnf.hop_limit;
1250 else
1251 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1252 rcu_read_unlock();
1253 }
1254 return hoplimit;
1255}
1256EXPORT_SYMBOL(ip6_dst_hoplimit);
1257
1258/*
1259 *
1260 */
1261
1262int ip6_route_add(struct fib6_config *cfg)
1263{
1264 int err;
1265 struct net *net = cfg->fc_nlinfo.nl_net;
1266 struct rt6_info *rt = NULL;
1267 struct net_device *dev = NULL;
1268 struct inet6_dev *idev = NULL;
1269 struct fib6_table *table;
1270 int addr_type;
1271
1272 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1273 return -EINVAL;
1274#ifndef CONFIG_IPV6_SUBTREES
1275 if (cfg->fc_src_len)
1276 return -EINVAL;
1277#endif
1278 if (cfg->fc_ifindex) {
1279 err = -ENODEV;
1280 dev = dev_get_by_index(net, cfg->fc_ifindex);
1281 if (!dev)
1282 goto out;
1283 idev = in6_dev_get(dev);
1284 if (!idev)
1285 goto out;
1286 }
1287
1288 if (cfg->fc_metric == 0)
1289 cfg->fc_metric = IP6_RT_PRIO_USER;
1290
1291 err = -ENOBUFS;
1292 if (cfg->fc_nlinfo.nlh &&
1293 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1294 table = fib6_get_table(net, cfg->fc_table);
1295 if (!table) {
1296 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1297 table = fib6_new_table(net, cfg->fc_table);
1298 }
1299 } else {
1300 table = fib6_new_table(net, cfg->fc_table);
1301 }
1302
1303 if (!table)
1304 goto out;
1305
1306 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1307
1308 if (!rt) {
1309 err = -ENOMEM;
1310 goto out;
1311 }
1312
1313 rt->dst.obsolete = -1;
1314
1315 if (cfg->fc_flags & RTF_EXPIRES)
1316 rt6_set_expires(rt, jiffies +
1317 clock_t_to_jiffies(cfg->fc_expires));
1318 else
1319 rt6_clean_expires(rt);
1320
1321 if (cfg->fc_protocol == RTPROT_UNSPEC)
1322 cfg->fc_protocol = RTPROT_BOOT;
1323 rt->rt6i_protocol = cfg->fc_protocol;
1324
1325 addr_type = ipv6_addr_type(&cfg->fc_dst);
1326
1327 if (addr_type & IPV6_ADDR_MULTICAST)
1328 rt->dst.input = ip6_mc_input;
1329 else if (cfg->fc_flags & RTF_LOCAL)
1330 rt->dst.input = ip6_input;
1331 else
1332 rt->dst.input = ip6_forward;
1333
1334 rt->dst.output = ip6_output;
1335
1336 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1337 rt->rt6i_dst.plen = cfg->fc_dst_len;
1338 if (rt->rt6i_dst.plen == 128)
1339 rt->dst.flags |= DST_HOST;
1340
1341 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1342 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1343 if (!metrics) {
1344 err = -ENOMEM;
1345 goto out;
1346 }
1347 dst_init_metrics(&rt->dst, metrics, 0);
1348 }
1349#ifdef CONFIG_IPV6_SUBTREES
1350 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1351 rt->rt6i_src.plen = cfg->fc_src_len;
1352#endif
1353
1354 rt->rt6i_metric = cfg->fc_metric;
1355
1356 /* We cannot add true routes via loopback here,
1357 they would result in kernel looping; promote them to reject routes
1358 */
1359 if ((cfg->fc_flags & RTF_REJECT) ||
1360 (dev && (dev->flags & IFF_LOOPBACK) &&
1361 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1362 !(cfg->fc_flags & RTF_LOCAL))) {
1363 /* hold loopback dev/idev if we haven't done so. */
1364 if (dev != net->loopback_dev) {
1365 if (dev) {
1366 dev_put(dev);
1367 in6_dev_put(idev);
1368 }
1369 dev = net->loopback_dev;
1370 dev_hold(dev);
1371 idev = in6_dev_get(dev);
1372 if (!idev) {
1373 err = -ENODEV;
1374 goto out;
1375 }
1376 }
1377 rt->dst.output = ip6_pkt_discard_out;
1378 rt->dst.input = ip6_pkt_discard;
1379 rt->dst.error = -ENETUNREACH;
1380 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1381 goto install_route;
1382 }
1383
1384 if (cfg->fc_flags & RTF_GATEWAY) {
1385 const struct in6_addr *gw_addr;
1386 int gwa_type;
1387
1388 gw_addr = &cfg->fc_gateway;
1389 rt->rt6i_gateway = *gw_addr;
1390 gwa_type = ipv6_addr_type(gw_addr);
1391
1392 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1393 struct rt6_info *grt;
1394
1395 /* IPv6 strictly inhibits using not link-local
1396 addresses as nexthop address.
1397 Otherwise, router will not able to send redirects.
1398 It is very good, but in some (rare!) circumstances
1399 (SIT, PtP, NBMA NOARP links) it is handy to allow
1400 some exceptions. --ANK
1401 */
1402 err = -EINVAL;
1403 if (!(gwa_type & IPV6_ADDR_UNICAST))
1404 goto out;
1405
1406 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1407
1408 err = -EHOSTUNREACH;
1409 if (!grt)
1410 goto out;
1411 if (dev) {
1412 if (dev != grt->dst.dev) {
1413 dst_release(&grt->dst);
1414 goto out;
1415 }
1416 } else {
1417 dev = grt->dst.dev;
1418 idev = grt->rt6i_idev;
1419 dev_hold(dev);
1420 in6_dev_hold(grt->rt6i_idev);
1421 }
1422 if (!(grt->rt6i_flags & RTF_GATEWAY))
1423 err = 0;
1424 dst_release(&grt->dst);
1425
1426 if (err)
1427 goto out;
1428 }
1429 err = -EINVAL;
1430 if (!dev || (dev->flags & IFF_LOOPBACK))
1431 goto out;
1432 }
1433
1434 err = -ENODEV;
1435 if (!dev)
1436 goto out;
1437
1438 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1439 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1440 err = -EINVAL;
1441 goto out;
1442 }
1443 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1444 rt->rt6i_prefsrc.plen = 128;
1445 } else
1446 rt->rt6i_prefsrc.plen = 0;
1447
1448 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1449 err = rt6_bind_neighbour(rt, dev);
1450 if (err)
1451 goto out;
1452 }
1453
1454 rt->rt6i_flags = cfg->fc_flags;
1455
1456install_route:
1457 if (cfg->fc_mx) {
1458 struct nlattr *nla;
1459 int remaining;
1460
1461 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1462 int type = nla_type(nla);
1463
1464 if (type) {
1465 if (type > RTAX_MAX) {
1466 err = -EINVAL;
1467 goto out;
1468 }
1469
1470 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1471 }
1472 }
1473 }
1474
1475 rt->dst.dev = dev;
1476 rt->rt6i_idev = idev;
1477 rt->rt6i_table = table;
1478
1479 cfg->fc_nlinfo.nl_net = dev_net(dev);
1480
1481 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1482
1483out:
1484 if (dev)
1485 dev_put(dev);
1486 if (idev)
1487 in6_dev_put(idev);
1488 if (rt)
1489 dst_free(&rt->dst);
1490 return err;
1491}
1492
1493static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1494{
1495 int err;
1496 struct fib6_table *table;
1497 struct net *net = dev_net(rt->dst.dev);
1498
1499 if (rt == net->ipv6.ip6_null_entry) {
1500 err = -ENOENT;
1501 goto out;
1502 }
1503
1504 table = rt->rt6i_table;
1505 write_lock_bh(&table->tb6_lock);
1506 err = fib6_del(rt, info);
1507 write_unlock_bh(&table->tb6_lock);
1508
1509out:
1510 dst_release(&rt->dst);
1511 return err;
1512}
1513
1514int ip6_del_rt(struct rt6_info *rt)
1515{
1516 struct nl_info info = {
1517 .nl_net = dev_net(rt->dst.dev),
1518 };
1519 return __ip6_del_rt(rt, &info);
1520}
1521
1522static int ip6_route_del(struct fib6_config *cfg)
1523{
1524 struct fib6_table *table;
1525 struct fib6_node *fn;
1526 struct rt6_info *rt;
1527 int err = -ESRCH;
1528
1529 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1530 if (!table)
1531 return err;
1532
1533 read_lock_bh(&table->tb6_lock);
1534
1535 fn = fib6_locate(&table->tb6_root,
1536 &cfg->fc_dst, cfg->fc_dst_len,
1537 &cfg->fc_src, cfg->fc_src_len);
1538
1539 if (fn) {
1540 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1541 if (cfg->fc_ifindex &&
1542 (!rt->dst.dev ||
1543 rt->dst.dev->ifindex != cfg->fc_ifindex))
1544 continue;
1545 if (cfg->fc_flags & RTF_GATEWAY &&
1546 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1547 continue;
1548 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1549 continue;
1550 dst_hold(&rt->dst);
1551 read_unlock_bh(&table->tb6_lock);
1552
1553 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1554 }
1555 }
1556 read_unlock_bh(&table->tb6_lock);
1557
1558 return err;
1559}
1560
1561/*
1562 * Handle redirects
1563 */
1564struct ip6rd_flowi {
1565 struct flowi6 fl6;
1566 struct in6_addr gateway;
1567};
1568
1569static struct rt6_info *__ip6_route_redirect(struct net *net,
1570 struct fib6_table *table,
1571 struct flowi6 *fl6,
1572 int flags)
1573{
1574 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1575 struct rt6_info *rt;
1576 struct fib6_node *fn;
1577
1578 /*
1579 * Get the "current" route for this destination and
1580 * check if the redirect has come from approriate router.
1581 *
1582 * RFC 2461 specifies that redirects should only be
1583 * accepted if they come from the nexthop to the target.
1584 * Due to the way the routes are chosen, this notion
1585 * is a bit fuzzy and one might need to check all possible
1586 * routes.
1587 */
1588
1589 read_lock_bh(&table->tb6_lock);
1590 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1591restart:
1592 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1593 /*
1594 * Current route is on-link; redirect is always invalid.
1595 *
1596 * Seems, previous statement is not true. It could
1597 * be node, which looks for us as on-link (f.e. proxy ndisc)
1598 * But then router serving it might decide, that we should
1599 * know truth 8)8) --ANK (980726).
1600 */
1601 if (rt6_check_expired(rt))
1602 continue;
1603 if (!(rt->rt6i_flags & RTF_GATEWAY))
1604 continue;
1605 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1606 continue;
1607 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1608 continue;
1609 break;
1610 }
1611
1612 if (!rt)
1613 rt = net->ipv6.ip6_null_entry;
1614 BACKTRACK(net, &fl6->saddr);
1615out:
1616 dst_hold(&rt->dst);
1617
1618 read_unlock_bh(&table->tb6_lock);
1619
1620 return rt;
1621};
1622
1623static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1624 const struct in6_addr *src,
1625 const struct in6_addr *gateway,
1626 struct net_device *dev)
1627{
1628 int flags = RT6_LOOKUP_F_HAS_SADDR;
1629 struct net *net = dev_net(dev);
1630 struct ip6rd_flowi rdfl = {
1631 .fl6 = {
1632 .flowi6_oif = dev->ifindex,
1633 .daddr = *dest,
1634 .saddr = *src,
1635 },
1636 };
1637
1638 rdfl.gateway = *gateway;
1639
1640 if (rt6_need_strict(dest))
1641 flags |= RT6_LOOKUP_F_IFACE;
1642
1643 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1644 flags, __ip6_route_redirect);
1645}
1646
1647void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1648 const struct in6_addr *saddr,
1649 struct neighbour *neigh, u8 *lladdr, int on_link)
1650{
1651 struct rt6_info *rt, *nrt = NULL;
1652 struct netevent_redirect netevent;
1653 struct net *net = dev_net(neigh->dev);
1654
1655 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1656
1657 if (rt == net->ipv6.ip6_null_entry) {
1658 if (net_ratelimit())
1659 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1660 "for redirect target\n");
1661 goto out;
1662 }
1663
1664#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1665 if (rt == net->ipv6.ip6_blk_hole_entry ||
1666 rt == net->ipv6.ip6_prohibit_entry) {
1667 if (net_ratelimit())
1668 printk(KERN_DEBUG "rt6_redirect: source isn't a valid" \
1669 " nexthop for redirect target " \
1670 "(blackhole or prohibited)\n");
1671 goto out;
1672 }
1673#endif
1674
1675 /*
1676 * We have finally decided to accept it.
1677 */
1678
1679 neigh_update(neigh, lladdr, NUD_STALE,
1680 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1681 NEIGH_UPDATE_F_OVERRIDE|
1682 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1683 NEIGH_UPDATE_F_ISROUTER))
1684 );
1685
1686 /*
1687 * Redirect received -> path was valid.
1688 * Look, redirects are sent only in response to data packets,
1689 * so that this nexthop apparently is reachable. --ANK
1690 */
1691 dst_confirm(&rt->dst);
1692
1693 /* Duplicate redirect: silently ignore. */
1694 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1695 goto out;
1696
1697 nrt = ip6_rt_copy(rt, dest);
1698 if (!nrt)
1699 goto out;
1700
1701 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1702 if (on_link)
1703 nrt->rt6i_flags &= ~RTF_GATEWAY;
1704
1705 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1706 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1707
1708 if (ip6_ins_rt(nrt))
1709 goto out;
1710
1711 netevent.old = &rt->dst;
1712 netevent.new = &nrt->dst;
1713 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1714
1715 if (rt->rt6i_flags & RTF_CACHE) {
1716 ip6_del_rt(rt);
1717 return;
1718 }
1719
1720out:
1721 dst_release(&rt->dst);
1722}
1723
1724/*
1725 * Handle ICMP "packet too big" messages
1726 * i.e. Path MTU discovery
1727 */
1728
1729static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1730 struct net *net, u32 pmtu, int ifindex)
1731{
1732 struct rt6_info *rt, *nrt;
1733 int allfrag = 0;
1734again:
1735 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1736 if (!rt)
1737 return;
1738
1739 if (rt6_check_expired(rt)) {
1740 ip6_del_rt(rt);
1741 goto again;
1742 }
1743
1744 if (pmtu >= dst_mtu(&rt->dst))
1745 goto out;
1746
1747 if (pmtu < IPV6_MIN_MTU) {
1748 /*
1749 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1750 * MTU (1280) and a fragment header should always be included
1751 * after a node receiving Too Big message reporting PMTU is
1752 * less than the IPv6 Minimum Link MTU.
1753 */
1754 pmtu = IPV6_MIN_MTU;
1755 allfrag = 1;
1756 }
1757
1758 /* New mtu received -> path was valid.
1759 They are sent only in response to data packets,
1760 so that this nexthop apparently is reachable. --ANK
1761 */
1762 dst_confirm(&rt->dst);
1763
1764 /* Host route. If it is static, it would be better
1765 not to override it, but add new one, so that
1766 when cache entry will expire old pmtu
1767 would return automatically.
1768 */
1769 if (rt->rt6i_flags & RTF_CACHE) {
1770 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1771 if (allfrag) {
1772 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1773 features |= RTAX_FEATURE_ALLFRAG;
1774 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1775 }
1776 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1777 rt->rt6i_flags |= RTF_MODIFIED;
1778 goto out;
1779 }
1780
1781 /* Network route.
1782 Two cases are possible:
1783 1. It is connected route. Action: COW
1784 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1785 */
1786 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1787 nrt = rt6_alloc_cow(rt, daddr, saddr);
1788 else
1789 nrt = rt6_alloc_clone(rt, daddr);
1790
1791 if (nrt) {
1792 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1793 if (allfrag) {
1794 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1795 features |= RTAX_FEATURE_ALLFRAG;
1796 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1797 }
1798
1799 /* According to RFC 1981, detecting PMTU increase shouldn't be
1800 * happened within 5 mins, the recommended timer is 10 mins.
1801 * Here this route expiration time is set to ip6_rt_mtu_expires
1802 * which is 10 mins. After 10 mins the decreased pmtu is expired
1803 * and detecting PMTU increase will be automatically happened.
1804 */
1805 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1806 nrt->rt6i_flags |= RTF_DYNAMIC;
1807 ip6_ins_rt(nrt);
1808 }
1809out:
1810 dst_release(&rt->dst);
1811}
1812
1813void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1814 struct net_device *dev, u32 pmtu)
1815{
1816 struct net *net = dev_net(dev);
1817
1818 /*
1819 * RFC 1981 states that a node "MUST reduce the size of the packets it
1820 * is sending along the path" that caused the Packet Too Big message.
1821 * Since it's not possible in the general case to determine which
1822 * interface was used to send the original packet, we update the MTU
1823 * on the interface that will be used to send future packets. We also
1824 * update the MTU on the interface that received the Packet Too Big in
1825 * case the original packet was forced out that interface with
1826 * SO_BINDTODEVICE or similar. This is the next best thing to the
1827 * correct behaviour, which would be to update the MTU on all
1828 * interfaces.
1829 */
1830 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1831 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1832}
1833
1834/*
1835 * Misc support functions
1836 */
1837
1838static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1839 const struct in6_addr *dest)
1840{
1841 struct net *net = dev_net(ort->dst.dev);
1842 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1843 ort->dst.dev, 0);
1844
1845 if (rt) {
1846 rt->dst.input = ort->dst.input;
1847 rt->dst.output = ort->dst.output;
1848 rt->dst.flags |= DST_HOST;
1849
1850 rt->rt6i_dst.addr = *dest;
1851 rt->rt6i_dst.plen = 128;
1852 dst_copy_metrics(&rt->dst, &ort->dst);
1853 rt->dst.error = ort->dst.error;
1854 rt->rt6i_idev = ort->rt6i_idev;
1855 if (rt->rt6i_idev)
1856 in6_dev_hold(rt->rt6i_idev);
1857 rt->dst.lastuse = jiffies;
1858
1859 rt->rt6i_gateway = ort->rt6i_gateway;
1860 rt->rt6i_flags = ort->rt6i_flags;
1861 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1862 (RTF_DEFAULT | RTF_ADDRCONF))
1863 rt6_set_from(rt, ort);
1864 else
1865 rt6_clean_expires(rt);
1866 rt->rt6i_metric = 0;
1867
1868#ifdef CONFIG_IPV6_SUBTREES
1869 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1870#endif
1871 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1872 rt->rt6i_table = ort->rt6i_table;
1873 }
1874 return rt;
1875}
1876
1877#ifdef CONFIG_IPV6_ROUTE_INFO
1878static struct rt6_info *rt6_get_route_info(struct net *net,
1879 const struct in6_addr *prefix, int prefixlen,
1880 const struct in6_addr *gwaddr, int ifindex)
1881{
1882 struct fib6_node *fn;
1883 struct rt6_info *rt = NULL;
1884 struct fib6_table *table;
1885
1886 table = fib6_get_table(net, RT6_TABLE_INFO);
1887 if (!table)
1888 return NULL;
1889
1890 write_lock_bh(&table->tb6_lock);
1891 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1892 if (!fn)
1893 goto out;
1894
1895 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1896 if (rt->dst.dev->ifindex != ifindex)
1897 continue;
1898 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1899 continue;
1900 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1901 continue;
1902 dst_hold(&rt->dst);
1903 break;
1904 }
1905out:
1906 write_unlock_bh(&table->tb6_lock);
1907 return rt;
1908}
1909
1910static struct rt6_info *rt6_add_route_info(struct net *net,
1911 const struct in6_addr *prefix, int prefixlen,
1912 const struct in6_addr *gwaddr, int ifindex,
1913 unsigned pref)
1914{
1915 struct fib6_config cfg = {
1916 .fc_table = RT6_TABLE_INFO,
1917 .fc_metric = IP6_RT_PRIO_USER,
1918 .fc_ifindex = ifindex,
1919 .fc_dst_len = prefixlen,
1920 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1921 RTF_UP | RTF_PREF(pref),
1922 .fc_nlinfo.pid = 0,
1923 .fc_nlinfo.nlh = NULL,
1924 .fc_nlinfo.nl_net = net,
1925 };
1926
1927 cfg.fc_dst = *prefix;
1928 cfg.fc_gateway = *gwaddr;
1929
1930 /* We should treat it as a default route if prefix length is 0. */
1931 if (!prefixlen)
1932 cfg.fc_flags |= RTF_DEFAULT;
1933
1934 ip6_route_add(&cfg);
1935
1936 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1937}
1938#endif
1939
1940struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1941{
1942 struct rt6_info *rt;
1943 struct fib6_table *table;
1944
1945 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1946 if (!table)
1947 return NULL;
1948
1949 write_lock_bh(&table->tb6_lock);
1950 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1951 if (dev == rt->dst.dev &&
1952 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1953 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1954 break;
1955 }
1956 if (rt)
1957 dst_hold(&rt->dst);
1958 write_unlock_bh(&table->tb6_lock);
1959 return rt;
1960}
1961
1962struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1963 struct net_device *dev,
1964 unsigned int pref)
1965{
1966 struct fib6_config cfg = {
1967 .fc_table = RT6_TABLE_DFLT,
1968 .fc_metric = IP6_RT_PRIO_USER,
1969 .fc_ifindex = dev->ifindex,
1970 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1971 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1972 .fc_nlinfo.pid = 0,
1973 .fc_nlinfo.nlh = NULL,
1974 .fc_nlinfo.nl_net = dev_net(dev),
1975 };
1976
1977 cfg.fc_gateway = *gwaddr;
1978
1979 ip6_route_add(&cfg);
1980
1981 return rt6_get_dflt_router(gwaddr, dev);
1982}
1983
1984void rt6_purge_dflt_routers(struct net *net)
1985{
1986 struct rt6_info *rt;
1987 struct fib6_table *table;
1988
1989 /* NOTE: Keep consistent with rt6_get_dflt_router */
1990 table = fib6_get_table(net, RT6_TABLE_DFLT);
1991 if (!table)
1992 return;
1993
1994restart:
1995 read_lock_bh(&table->tb6_lock);
1996 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1997 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1998 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1999 dst_hold(&rt->dst);
2000 read_unlock_bh(&table->tb6_lock);
2001 ip6_del_rt(rt);
2002 goto restart;
2003 }
2004 }
2005 read_unlock_bh(&table->tb6_lock);
2006}
2007
2008static void rtmsg_to_fib6_config(struct net *net,
2009 struct in6_rtmsg *rtmsg,
2010 struct fib6_config *cfg)
2011{
2012 memset(cfg, 0, sizeof(*cfg));
2013
2014 cfg->fc_table = RT6_TABLE_MAIN;
2015 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2016 cfg->fc_metric = rtmsg->rtmsg_metric;
2017 cfg->fc_expires = rtmsg->rtmsg_info;
2018 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2019 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2020 cfg->fc_flags = rtmsg->rtmsg_flags;
2021
2022 cfg->fc_nlinfo.nl_net = net;
2023
2024 cfg->fc_dst = rtmsg->rtmsg_dst;
2025 cfg->fc_src = rtmsg->rtmsg_src;
2026 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2027}
2028
2029int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2030{
2031 struct fib6_config cfg;
2032 struct in6_rtmsg rtmsg;
2033 int err;
2034
2035 switch(cmd) {
2036 case SIOCADDRT: /* Add a route */
2037 case SIOCDELRT: /* Delete a route */
2038 if (!capable(CAP_NET_ADMIN))
2039 return -EPERM;
2040 err = copy_from_user(&rtmsg, arg,
2041 sizeof(struct in6_rtmsg));
2042 if (err)
2043 return -EFAULT;
2044
2045 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2046
2047 rtnl_lock();
2048 switch (cmd) {
2049 case SIOCADDRT:
2050 err = ip6_route_add(&cfg);
2051 break;
2052 case SIOCDELRT:
2053 err = ip6_route_del(&cfg);
2054 break;
2055 default:
2056 err = -EINVAL;
2057 }
2058 rtnl_unlock();
2059
2060 return err;
2061 }
2062
2063 return -EINVAL;
2064}
2065
2066/*
2067 * Drop the packet on the floor
2068 */
2069
2070static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2071{
2072 int type;
2073 struct dst_entry *dst = skb_dst(skb);
2074 switch (ipstats_mib_noroutes) {
2075 case IPSTATS_MIB_INNOROUTES:
2076 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2077 if (type == IPV6_ADDR_ANY) {
2078 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2079 IPSTATS_MIB_INADDRERRORS);
2080 break;
2081 }
2082 /* FALLTHROUGH */
2083 case IPSTATS_MIB_OUTNOROUTES:
2084 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2085 ipstats_mib_noroutes);
2086 break;
2087 }
2088 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2089 kfree_skb(skb);
2090 return 0;
2091}
2092
2093static int ip6_pkt_discard(struct sk_buff *skb)
2094{
2095 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2096}
2097
2098static int ip6_pkt_discard_out(struct sk_buff *skb)
2099{
2100 skb->dev = skb_dst(skb)->dev;
2101 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2102}
2103
2104#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2105
2106static int ip6_pkt_prohibit(struct sk_buff *skb)
2107{
2108 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2109}
2110
2111static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2112{
2113 skb->dev = skb_dst(skb)->dev;
2114 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2115}
2116
2117#endif
2118
2119/*
2120 * Allocate a dst for local (unicast / anycast) address.
2121 */
2122
2123struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2124 const struct in6_addr *addr,
2125 bool anycast)
2126{
2127 struct net *net = dev_net(idev->dev);
2128 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2129 net->loopback_dev, DST_NOCOUNT);
2130 int err;
2131
2132 if (!rt)
2133 return ERR_PTR(-ENOMEM);
2134
2135 in6_dev_hold(idev);
2136
2137 rt->dst.flags |= DST_HOST;
2138 rt->dst.input = ip6_input;
2139 rt->dst.output = ip6_output;
2140 rt->rt6i_idev = idev;
2141 rt->dst.obsolete = -1;
2142
2143 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2144 if (anycast)
2145 rt->rt6i_flags |= RTF_ANYCAST;
2146 else
2147 rt->rt6i_flags |= RTF_LOCAL;
2148 err = rt6_bind_neighbour(rt, rt->dst.dev);
2149 if (err) {
2150 dst_free(&rt->dst);
2151 return ERR_PTR(err);
2152 }
2153
2154 rt->rt6i_dst.addr = *addr;
2155 rt->rt6i_dst.plen = 128;
2156 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2157
2158 atomic_set(&rt->dst.__refcnt, 1);
2159
2160 return rt;
2161}
2162
2163int ip6_route_get_saddr(struct net *net,
2164 struct rt6_info *rt,
2165 const struct in6_addr *daddr,
2166 unsigned int prefs,
2167 struct in6_addr *saddr)
2168{
2169 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2170 int err = 0;
2171 if (rt->rt6i_prefsrc.plen)
2172 *saddr = rt->rt6i_prefsrc.addr;
2173 else
2174 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2175 daddr, prefs, saddr);
2176 return err;
2177}
2178
2179/* remove deleted ip from prefsrc entries */
2180struct arg_dev_net_ip {
2181 struct net_device *dev;
2182 struct net *net;
2183 struct in6_addr *addr;
2184};
2185
2186static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2187{
2188 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2189 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2190 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2191
2192 if (((void *)rt->dst.dev == dev || !dev) &&
2193 rt != net->ipv6.ip6_null_entry &&
2194 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2195 /* remove prefsrc entry */
2196 rt->rt6i_prefsrc.plen = 0;
2197 }
2198 return 0;
2199}
2200
2201void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2202{
2203 struct net *net = dev_net(ifp->idev->dev);
2204 struct arg_dev_net_ip adni = {
2205 .dev = ifp->idev->dev,
2206 .net = net,
2207 .addr = &ifp->addr,
2208 };
2209 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2210}
2211
2212struct arg_dev_net {
2213 struct net_device *dev;
2214 struct net *net;
2215};
2216
2217static int fib6_ifdown(struct rt6_info *rt, void *arg)
2218{
2219 const struct arg_dev_net *adn = arg;
2220 const struct net_device *dev = adn->dev;
2221
2222 if ((rt->dst.dev == dev || !dev) &&
2223 rt != adn->net->ipv6.ip6_null_entry)
2224 return -1;
2225
2226 return 0;
2227}
2228
2229void rt6_ifdown(struct net *net, struct net_device *dev)
2230{
2231 struct arg_dev_net adn = {
2232 .dev = dev,
2233 .net = net,
2234 };
2235
2236 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2237 icmp6_clean_all(fib6_ifdown, &adn);
2238}
2239
2240struct rt6_mtu_change_arg
2241{
2242 struct net_device *dev;
2243 unsigned mtu;
2244};
2245
2246static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2247{
2248 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2249 struct inet6_dev *idev;
2250
2251 /* In IPv6 pmtu discovery is not optional,
2252 so that RTAX_MTU lock cannot disable it.
2253 We still use this lock to block changes
2254 caused by addrconf/ndisc.
2255 */
2256
2257 idev = __in6_dev_get(arg->dev);
2258 if (!idev)
2259 return 0;
2260
2261 /* For administrative MTU increase, there is no way to discover
2262 IPv6 PMTU increase, so PMTU increase should be updated here.
2263 Since RFC 1981 doesn't include administrative MTU increase
2264 update PMTU increase is a MUST. (i.e. jumbo frame)
2265 */
2266 /*
2267 If new MTU is less than route PMTU, this new MTU will be the
2268 lowest MTU in the path, update the route PMTU to reflect PMTU
2269 decreases; if new MTU is greater than route PMTU, and the
2270 old MTU is the lowest MTU in the path, update the route PMTU
2271 to reflect the increase. In this case if the other nodes' MTU
2272 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2273 PMTU discouvery.
2274 */
2275 if (rt->dst.dev == arg->dev &&
2276 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2277 (dst_mtu(&rt->dst) >= arg->mtu ||
2278 (dst_mtu(&rt->dst) < arg->mtu &&
2279 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2280 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2281 }
2282 return 0;
2283}
2284
2285void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2286{
2287 struct rt6_mtu_change_arg arg = {
2288 .dev = dev,
2289 .mtu = mtu,
2290 };
2291
2292 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2293}
2294
2295static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2296 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2297 [RTA_OIF] = { .type = NLA_U32 },
2298 [RTA_IIF] = { .type = NLA_U32 },
2299 [RTA_PRIORITY] = { .type = NLA_U32 },
2300 [RTA_METRICS] = { .type = NLA_NESTED },
2301};
2302
2303static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2304 struct fib6_config *cfg)
2305{
2306 struct rtmsg *rtm;
2307 struct nlattr *tb[RTA_MAX+1];
2308 int err;
2309
2310 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2311 if (err < 0)
2312 goto errout;
2313
2314 err = -EINVAL;
2315 rtm = nlmsg_data(nlh);
2316 memset(cfg, 0, sizeof(*cfg));
2317
2318 cfg->fc_table = rtm->rtm_table;
2319 cfg->fc_dst_len = rtm->rtm_dst_len;
2320 cfg->fc_src_len = rtm->rtm_src_len;
2321 cfg->fc_flags = RTF_UP;
2322 cfg->fc_protocol = rtm->rtm_protocol;
2323
2324 if (rtm->rtm_type == RTN_UNREACHABLE)
2325 cfg->fc_flags |= RTF_REJECT;
2326
2327 if (rtm->rtm_type == RTN_LOCAL)
2328 cfg->fc_flags |= RTF_LOCAL;
2329
2330 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2331 cfg->fc_nlinfo.nlh = nlh;
2332 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2333
2334 if (tb[RTA_GATEWAY]) {
2335 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2336 cfg->fc_flags |= RTF_GATEWAY;
2337 }
2338
2339 if (tb[RTA_DST]) {
2340 int plen = (rtm->rtm_dst_len + 7) >> 3;
2341
2342 if (nla_len(tb[RTA_DST]) < plen)
2343 goto errout;
2344
2345 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2346 }
2347
2348 if (tb[RTA_SRC]) {
2349 int plen = (rtm->rtm_src_len + 7) >> 3;
2350
2351 if (nla_len(tb[RTA_SRC]) < plen)
2352 goto errout;
2353
2354 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2355 }
2356
2357 if (tb[RTA_PREFSRC])
2358 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2359
2360 if (tb[RTA_OIF])
2361 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2362
2363 if (tb[RTA_PRIORITY])
2364 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2365
2366 if (tb[RTA_METRICS]) {
2367 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2368 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2369 }
2370
2371 if (tb[RTA_TABLE])
2372 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2373
2374 err = 0;
2375errout:
2376 return err;
2377}
2378
2379static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2380{
2381 struct fib6_config cfg;
2382 int err;
2383
2384 err = rtm_to_fib6_config(skb, nlh, &cfg);
2385 if (err < 0)
2386 return err;
2387
2388 return ip6_route_del(&cfg);
2389}
2390
2391static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2392{
2393 struct fib6_config cfg;
2394 int err;
2395
2396 err = rtm_to_fib6_config(skb, nlh, &cfg);
2397 if (err < 0)
2398 return err;
2399
2400 return ip6_route_add(&cfg);
2401}
2402
2403static inline size_t rt6_nlmsg_size(void)
2404{
2405 return NLMSG_ALIGN(sizeof(struct rtmsg))
2406 + nla_total_size(16) /* RTA_SRC */
2407 + nla_total_size(16) /* RTA_DST */
2408 + nla_total_size(16) /* RTA_GATEWAY */
2409 + nla_total_size(16) /* RTA_PREFSRC */
2410 + nla_total_size(4) /* RTA_TABLE */
2411 + nla_total_size(4) /* RTA_IIF */
2412 + nla_total_size(4) /* RTA_OIF */
2413 + nla_total_size(4) /* RTA_PRIORITY */
2414 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2415 + nla_total_size(sizeof(struct rta_cacheinfo));
2416}
2417
2418static int rt6_fill_node(struct net *net,
2419 struct sk_buff *skb, struct rt6_info *rt,
2420 struct in6_addr *dst, struct in6_addr *src,
2421 int iif, int type, u32 pid, u32 seq,
2422 int prefix, int nowait, unsigned int flags)
2423{
2424 const struct inet_peer *peer;
2425 struct rtmsg *rtm;
2426 struct nlmsghdr *nlh;
2427 long expires;
2428 u32 table;
2429 struct neighbour *n;
2430 u32 ts, tsage;
2431
2432 if (prefix) { /* user wants prefix routes only */
2433 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2434 /* success since this is not a prefix route */
2435 return 1;
2436 }
2437 }
2438
2439 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2440 if (!nlh)
2441 return -EMSGSIZE;
2442
2443 rtm = nlmsg_data(nlh);
2444 rtm->rtm_family = AF_INET6;
2445 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2446 rtm->rtm_src_len = rt->rt6i_src.plen;
2447 rtm->rtm_tos = 0;
2448 if (rt->rt6i_table)
2449 table = rt->rt6i_table->tb6_id;
2450 else
2451 table = RT6_TABLE_UNSPEC;
2452 rtm->rtm_table = table;
2453 NLA_PUT_U32(skb, RTA_TABLE, table);
2454 if (rt->rt6i_flags & RTF_REJECT)
2455 rtm->rtm_type = RTN_UNREACHABLE;
2456 else if (rt->rt6i_flags & RTF_LOCAL)
2457 rtm->rtm_type = RTN_LOCAL;
2458 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2459 rtm->rtm_type = RTN_LOCAL;
2460 else
2461 rtm->rtm_type = RTN_UNICAST;
2462 net_run_track(PRT_ROUTE,"route");
2463 rtm->rtm_flags = 0;
2464 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2465 rtm->rtm_protocol = rt->rt6i_protocol;
2466 if (rt->rt6i_flags & RTF_DYNAMIC)
2467 rtm->rtm_protocol = RTPROT_REDIRECT;
2468 else if (rt->rt6i_flags & RTF_ADDRCONF)
2469 rtm->rtm_protocol = RTPROT_KERNEL;
2470 else if (rt->rt6i_flags & RTF_DEFAULT)
2471 rtm->rtm_protocol = RTPROT_RA;
2472
2473 if (rt->rt6i_flags & RTF_CACHE)
2474 rtm->rtm_flags |= RTM_F_CLONED;
2475
2476 if (dst) {
2477 NLA_PUT(skb, RTA_DST, 16, dst);
2478 rtm->rtm_dst_len = 128;
2479 } else if (rtm->rtm_dst_len)
2480 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2481#ifdef CONFIG_IPV6_SUBTREES
2482 if (src) {
2483 NLA_PUT(skb, RTA_SRC, 16, src);
2484 rtm->rtm_src_len = 128;
2485 } else if (rtm->rtm_src_len)
2486 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2487#endif
2488 if (iif) {
2489#ifdef CONFIG_IPV6_MROUTE
2490 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2491 int err = ip6mr_get_route(net, skb, rtm, nowait);
2492 if (err <= 0) {
2493 if (!nowait) {
2494 if (err == 0)
2495 return 0;
2496 goto nla_put_failure;
2497 } else {
2498 if (err == -EMSGSIZE)
2499 goto nla_put_failure;
2500 }
2501 }
2502 } else
2503#endif
2504 NLA_PUT_U32(skb, RTA_IIF, iif);
2505 } else if (dst) {
2506 struct in6_addr saddr_buf;
2507 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2508 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2509 }
2510
2511 if (rt->rt6i_prefsrc.plen) {
2512 struct in6_addr saddr_buf;
2513 saddr_buf = rt->rt6i_prefsrc.addr;
2514 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2515 }
2516
2517 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2518 goto nla_put_failure;
2519
2520 rcu_read_lock();
2521 n = dst_get_neighbour_noref(&rt->dst);
2522 if (n) {
2523 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2524 rcu_read_unlock();
2525 goto nla_put_failure;
2526 }
2527 }
2528 rcu_read_unlock();
2529
2530 if (rt->dst.dev)
2531 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2532
2533 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2534
2535 if (!(rt->rt6i_flags & RTF_EXPIRES))
2536 expires = 0;
2537 else if (rt->dst.expires - jiffies < INT_MAX)
2538 expires = rt->dst.expires - jiffies;
2539 else
2540 expires = INT_MAX;
2541
2542 peer = rt->rt6i_peer;
2543 ts = tsage = 0;
2544 if (peer && peer->tcp_ts_stamp) {
2545 ts = peer->tcp_ts;
2546 tsage = get_seconds() - peer->tcp_ts_stamp;
2547 }
2548
2549 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2550 expires, rt->dst.error) < 0)
2551 goto nla_put_failure;
2552
2553 return nlmsg_end(skb, nlh);
2554
2555nla_put_failure:
2556 nlmsg_cancel(skb, nlh);
2557 return -EMSGSIZE;
2558}
2559
2560int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2561{
2562 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2563 int prefix;
2564
2565 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2566 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2567 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2568 } else
2569 prefix = 0;
2570
2571 return rt6_fill_node(arg->net,
2572 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2573 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2574 prefix, 0, NLM_F_MULTI);
2575}
2576
2577static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2578{
2579 struct net *net = sock_net(in_skb->sk);
2580 struct nlattr *tb[RTA_MAX+1];
2581 struct rt6_info *rt;
2582 struct sk_buff *skb;
2583 struct rtmsg *rtm;
2584 struct flowi6 fl6;
2585 int err, iif = 0, oif = 0;
2586
2587 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2588 if (err < 0)
2589 goto errout;
2590
2591 err = -EINVAL;
2592 memset(&fl6, 0, sizeof(fl6));
2593
2594 if (tb[RTA_SRC]) {
2595 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2596 goto errout;
2597
2598 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2599 }
2600
2601 if (tb[RTA_DST]) {
2602 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2603 goto errout;
2604
2605 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2606 }
2607
2608 if (tb[RTA_IIF])
2609 iif = nla_get_u32(tb[RTA_IIF]);
2610
2611 if (tb[RTA_OIF])
2612 oif = nla_get_u32(tb[RTA_OIF]);
2613
2614 if (iif) {
2615 struct net_device *dev;
2616 int flags = 0;
2617
2618 dev = __dev_get_by_index(net, iif);
2619 if (!dev) {
2620 err = -ENODEV;
2621 goto errout;
2622 }
2623
2624 fl6.flowi6_iif = iif;
2625
2626 if (!ipv6_addr_any(&fl6.saddr))
2627 flags |= RT6_LOOKUP_F_HAS_SADDR;
2628
2629 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2630 flags);
2631 } else {
2632 fl6.flowi6_oif = oif;
2633
2634 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2635 }
2636
2637 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2638 if (!skb) {
2639 err = -ENOBUFS;
2640 goto errout;
2641 }
2642
2643 /* Reserve room for dummy headers, this skb can pass
2644 through good chunk of routing engine.
2645 */
2646 skb_reset_mac_header(skb);
2647 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2648
2649 skb_dst_set(skb, &rt->dst);
2650
2651 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2652 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2653 nlh->nlmsg_seq, 0, 0, 0);
2654 if (err < 0) {
2655 kfree_skb(skb);
2656 goto errout;
2657 }
2658
2659 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2660errout:
2661 return err;
2662}
2663
2664void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2665{
2666 struct sk_buff *skb;
2667 struct net *net = info->nl_net;
2668 u32 seq;
2669 int err;
2670
2671 err = -ENOBUFS;
2672 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2673
2674 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2675 if (!skb)
2676 goto errout;
2677
2678 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2679 event, info->pid, seq, 0, 0, 0);
2680 if (err < 0) {
2681 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2682 WARN_ON(err == -EMSGSIZE);
2683 kfree_skb(skb);
2684 goto errout;
2685 }
2686 //print_sun(SUN_LEARN,"dev:IPV6 ROUTE,inet6_rt_notify::rtnl_notify;type=%d,for example RTM_NEWNDUSEROPT",event);
2687 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2688 info->nlh, gfp_any());
2689 return;
2690errout:
2691 if (err < 0)
2692 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2693}
2694
2695static int ip6_route_dev_notify(struct notifier_block *this,
2696 unsigned long event, void *data)
2697{
2698 struct net_device *dev = (struct net_device *)data;
2699 struct net *net = dev_net(dev);
2700
2701 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2702 net->ipv6.ip6_null_entry->dst.dev = dev;
2703 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2704#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2705 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2706 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2707 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2708 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2709#endif
2710 }
2711 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_REGISTER",dev->name,event);
2712 return NOTIFY_OK;
2713}
2714
2715/*
2716 * /proc
2717 */
2718
2719#ifdef CONFIG_PROC_FS
2720
2721struct rt6_proc_arg
2722{
2723 char *buffer;
2724 int offset;
2725 int length;
2726 int skip;
2727 int len;
2728};
2729
2730static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2731{
2732 struct seq_file *m = p_arg;
2733 struct neighbour *n;
2734
2735 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2736
2737#ifdef CONFIG_IPV6_SUBTREES
2738 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2739#else
2740 seq_puts(m, "00000000000000000000000000000000 00 ");
2741#endif
2742 rcu_read_lock();
2743 n = dst_get_neighbour_noref(&rt->dst);
2744 if (n) {
2745 seq_printf(m, "%pi6", n->primary_key);
2746 } else {
2747 seq_puts(m, "00000000000000000000000000000000");
2748 }
2749 rcu_read_unlock();
2750 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2751 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2752 rt->dst.__use, rt->rt6i_flags,
2753 rt->dst.dev ? rt->dst.dev->name : "");
2754 return 0;
2755}
2756
2757static int ipv6_route_show(struct seq_file *m, void *v)
2758{
2759 struct net *net = (struct net *)m->private;
2760 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2761 return 0;
2762}
2763
2764static int ipv6_route_open(struct inode *inode, struct file *file)
2765{
2766 return single_open_net(inode, file, ipv6_route_show);
2767}
2768
2769static const struct file_operations ipv6_route_proc_fops = {
2770 .owner = THIS_MODULE,
2771 .open = ipv6_route_open,
2772 .read = seq_read,
2773 .llseek = seq_lseek,
2774 .release = single_release_net,
2775};
2776
2777static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2778{
2779 struct net *net = (struct net *)seq->private;
2780 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2781 net->ipv6.rt6_stats->fib_nodes,
2782 net->ipv6.rt6_stats->fib_route_nodes,
2783 net->ipv6.rt6_stats->fib_rt_alloc,
2784 net->ipv6.rt6_stats->fib_rt_entries,
2785 net->ipv6.rt6_stats->fib_rt_cache,
2786 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2787 net->ipv6.rt6_stats->fib_discarded_routes);
2788
2789 return 0;
2790}
2791
2792static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2793{
2794 return single_open_net(inode, file, rt6_stats_seq_show);
2795}
2796
2797static const struct file_operations rt6_stats_seq_fops = {
2798 .owner = THIS_MODULE,
2799 .open = rt6_stats_seq_open,
2800 .read = seq_read,
2801 .llseek = seq_lseek,
2802 .release = single_release_net,
2803};
2804#endif /* CONFIG_PROC_FS */
2805
2806#ifdef CONFIG_SYSCTL
2807
2808static
2809int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2810 void __user *buffer, size_t *lenp, loff_t *ppos)
2811{
2812 struct net *net;
2813 int delay;
2814 if (!write)
2815 return -EINVAL;
2816
2817 net = (struct net *)ctl->extra1;
2818 delay = net->ipv6.sysctl.flush_delay;
2819 proc_dointvec(ctl, write, buffer, lenp, ppos);
2820 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2821 return 0;
2822}
2823
2824ctl_table ipv6_route_table_template[] = {
2825 {
2826 .procname = "flush",
2827 .data = &init_net.ipv6.sysctl.flush_delay,
2828 .maxlen = sizeof(int),
2829 .mode = 0200,
2830 .proc_handler = ipv6_sysctl_rtcache_flush
2831 },
2832 {
2833 .procname = "gc_thresh",
2834 .data = &ip6_dst_ops_template.gc_thresh,
2835 .maxlen = sizeof(int),
2836 .mode = 0644,
2837 .proc_handler = proc_dointvec,
2838 },
2839 {
2840 .procname = "max_size",
2841 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
2844 .proc_handler = proc_dointvec,
2845 },
2846 {
2847 .procname = "gc_min_interval",
2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
2851 .proc_handler = proc_dointvec_jiffies,
2852 },
2853 {
2854 .procname = "gc_timeout",
2855 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2856 .maxlen = sizeof(int),
2857 .mode = 0644,
2858 .proc_handler = proc_dointvec_jiffies,
2859 },
2860 {
2861 .procname = "gc_interval",
2862 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
2865 .proc_handler = proc_dointvec_jiffies,
2866 },
2867 {
2868 .procname = "gc_elasticity",
2869 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
2872 .proc_handler = proc_dointvec,
2873 },
2874 {
2875 .procname = "mtu_expires",
2876 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2877 .maxlen = sizeof(int),
2878 .mode = 0644,
2879 .proc_handler = proc_dointvec_jiffies,
2880 },
2881 {
2882 .procname = "min_adv_mss",
2883 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2884 .maxlen = sizeof(int),
2885 .mode = 0644,
2886 .proc_handler = proc_dointvec,
2887 },
2888 {
2889 .procname = "gc_min_interval_ms",
2890 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2891 .maxlen = sizeof(int),
2892 .mode = 0644,
2893 .proc_handler = proc_dointvec_ms_jiffies,
2894 },
2895 { }
2896};
2897
2898struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2899{
2900 struct ctl_table *table;
2901
2902 table = kmemdup(ipv6_route_table_template,
2903 sizeof(ipv6_route_table_template),
2904 GFP_KERNEL);
2905
2906 if (table) {
2907 table[0].data = &net->ipv6.sysctl.flush_delay;
2908 table[0].extra1 = net;
2909 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2910 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2911 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2912 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2913 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2914 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2915 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2916 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2917 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2918 }
2919
2920 return table;
2921}
2922#endif
2923
2924static int __net_init ip6_route_net_init(struct net *net)
2925{
2926 int ret = -ENOMEM;
2927
2928 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2929 sizeof(net->ipv6.ip6_dst_ops));
2930
2931 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2932 goto out_ip6_dst_ops;
2933
2934 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2935 sizeof(*net->ipv6.ip6_null_entry),
2936 GFP_KERNEL);
2937 if (!net->ipv6.ip6_null_entry)
2938 goto out_ip6_dst_entries;
2939 net->ipv6.ip6_null_entry->dst.path =
2940 (struct dst_entry *)net->ipv6.ip6_null_entry;
2941 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2942 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2943 ip6_template_metrics, true);
2944
2945#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2947 sizeof(*net->ipv6.ip6_prohibit_entry),
2948 GFP_KERNEL);
2949 if (!net->ipv6.ip6_prohibit_entry)
2950 goto out_ip6_null_entry;
2951 net->ipv6.ip6_prohibit_entry->dst.path =
2952 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2953 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2954 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2955 ip6_template_metrics, true);
2956
2957 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2958 sizeof(*net->ipv6.ip6_blk_hole_entry),
2959 GFP_KERNEL);
2960 if (!net->ipv6.ip6_blk_hole_entry)
2961 goto out_ip6_prohibit_entry;
2962 net->ipv6.ip6_blk_hole_entry->dst.path =
2963 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2964 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2965 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2966 ip6_template_metrics, true);
2967#endif
2968
2969 net->ipv6.sysctl.flush_delay = 0;
2970 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2971 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2972 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2973 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2974 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2975 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2976 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2977
2978 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2979
2980 ret = 0;
2981out:
2982 return ret;
2983
2984#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2985out_ip6_prohibit_entry:
2986 kfree(net->ipv6.ip6_prohibit_entry);
2987out_ip6_null_entry:
2988 kfree(net->ipv6.ip6_null_entry);
2989#endif
2990out_ip6_dst_entries:
2991 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992out_ip6_dst_ops:
2993 goto out;
2994}
2995
2996static void __net_exit ip6_route_net_exit(struct net *net)
2997{
2998 kfree(net->ipv6.ip6_null_entry);
2999#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3000 kfree(net->ipv6.ip6_prohibit_entry);
3001 kfree(net->ipv6.ip6_blk_hole_entry);
3002#endif
3003 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3004}
3005
3006static int __net_init ip6_route_net_init_late(struct net *net)
3007{
3008#ifdef CONFIG_PROC_FS
3009 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3010 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3011#endif
3012 return 0;
3013}
3014
3015static void __net_exit ip6_route_net_exit_late(struct net *net)
3016{
3017#ifdef CONFIG_PROC_FS
3018 proc_net_remove(net, "ipv6_route");
3019 proc_net_remove(net, "rt6_stats");
3020#endif
3021}
3022
3023static struct pernet_operations ip6_route_net_ops = {
3024 .init = ip6_route_net_init,
3025 .exit = ip6_route_net_exit,
3026};
3027
3028static struct pernet_operations ip6_route_net_late_ops = {
3029 .init = ip6_route_net_init_late,
3030 .exit = ip6_route_net_exit_late,
3031};
3032
3033static struct notifier_block ip6_route_dev_notifier = {
3034 .notifier_call = ip6_route_dev_notify,
3035 .priority = 0,
3036};
3037
3038int __init ip6_route_init(void)
3039{
3040 int ret;
3041
3042 ret = -ENOMEM;
3043 ip6_dst_ops_template.kmem_cachep =
3044 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3045 SLAB_HWCACHE_ALIGN, NULL);
3046 if (!ip6_dst_ops_template.kmem_cachep)
3047 goto out;
3048
3049 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3050 if (ret)
3051 goto out_kmem_cache;
3052
3053 ret = register_pernet_subsys(&ip6_route_net_ops);
3054 if (ret)
3055 goto out_dst_entries;
3056
3057 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3058
3059 /* Registering of the loopback is done before this portion of code,
3060 * the loopback reference in rt6_info will not be taken, do it
3061 * manually for init_net */
3062 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3063 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3064 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3065 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3066 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3067 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3068 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3069 #endif
3070 ret = fib6_init();
3071 if (ret)
3072 goto out_register_subsys;
3073
3074 ret = xfrm6_init();
3075 if (ret)
3076 goto out_fib6_init;
3077
3078 ret = fib6_rules_init();
3079 if (ret)
3080 goto xfrm6_init;
3081
3082 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3083 if (ret)
3084 goto fib6_rules_init;
3085
3086 ret = -ENOBUFS;
3087 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3088 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3089 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3090 goto out_register_late_subsys;
3091
3092 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3093 if (ret)
3094 goto out_register_late_subsys;
3095
3096out:
3097 return ret;
3098
3099out_register_late_subsys:
3100 unregister_pernet_subsys(&ip6_route_net_late_ops);
3101fib6_rules_init:
3102 fib6_rules_cleanup();
3103xfrm6_init:
3104 xfrm6_fini();
3105out_fib6_init:
3106 fib6_gc_cleanup();
3107out_register_subsys:
3108 unregister_pernet_subsys(&ip6_route_net_ops);
3109out_dst_entries:
3110 dst_entries_destroy(&ip6_dst_blackhole_ops);
3111out_kmem_cache:
3112 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3113 goto out;
3114}
3115
3116void ip6_route_cleanup(void)
3117{
3118 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3119 unregister_pernet_subsys(&ip6_route_net_late_ops);
3120 fib6_rules_cleanup();
3121 xfrm6_fini();
3122 fib6_gc_cleanup();
3123 unregister_pernet_subsys(&ip6_route_net_ops);
3124 dst_entries_destroy(&ip6_dst_blackhole_ops);
3125 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3126}