blob: fc8c41c971399775b817ce7c91f8eb9180015cc7 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <linux/uaccess.h>
20#include <linux/types.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/mm.h>
24#include <linux/kernel.h>
25#include <linux/fcntl.h>
26#include <linux/stat.h>
27#include <linux/socket.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h>
30#include <linux/inetdevice.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/init.h>
34#include <linux/compat.h>
35#include <linux/rhashtable.h>
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/raw.h>
39#include <linux/notifier.h>
40#include <linux/if_arp.h>
41#include <net/checksum.h>
42#include <net/netlink.h>
43#include <net/fib_rules.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
48#include <linux/pim.h>
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51#include <linux/export.h>
52#include <net/ip6_checksum.h>
53#include <linux/netconf.h>
54#include <net/ip_tunnels.h>
55
56#include <linux/nospec.h>
57
58struct ip6mr_rule {
59 struct fib_rule common;
60};
61
62struct ip6mr_result {
63 struct mr_table *mrt;
64};
65
66/* Big lock, protecting vif table, mrt cache and mroute socket state.
67 Note that the changes are semaphored via rtnl_lock.
68 */
69
70static DEFINE_RWLOCK(mrt_lock);
71
72/* Multicast router control variables */
73
74/* Special spinlock for queue of unresolved entries */
75static DEFINE_SPINLOCK(mfc_unres_lock);
76
77/* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
81
82 In this case data path is free of exclusive locks at all.
83 */
84
85static struct kmem_cache *mrt_cachep __read_mostly;
86
87static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88static void ip6mr_free_table(struct mr_table *mrt);
89
90static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct sk_buff *skb, struct mfc6_cache *cache);
92static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
93 mifi_t mifi, int assert);
94static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
95 int cmd);
96static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
97static int ip6mr_rtm_dumproute(struct sk_buff *skb,
98 struct netlink_callback *cb);
99static void mroute_clean_tables(struct mr_table *mrt, bool all);
100static void ipmr_expire_process(struct timer_list *t);
101
102#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
103#define ip6mr_for_each_table(mrt, net) \
104 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
105
106static struct mr_table *ip6mr_mr_table_iter(struct net *net,
107 struct mr_table *mrt)
108{
109 struct mr_table *ret;
110
111 if (!mrt)
112 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
113 struct mr_table, list);
114 else
115 ret = list_entry_rcu(mrt->list.next,
116 struct mr_table, list);
117
118 if (&ret->list == &net->ipv6.mr6_tables)
119 return NULL;
120 return ret;
121}
122
123static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
124{
125 struct mr_table *mrt;
126
127 ip6mr_for_each_table(mrt, net) {
128 if (mrt->id == id)
129 return mrt;
130 }
131 return NULL;
132}
133
134static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
135 struct mr_table **mrt)
136{
137 int err;
138 struct ip6mr_result res;
139 struct fib_lookup_arg arg = {
140 .result = &res,
141 .flags = FIB_LOOKUP_NOREF,
142 };
143
144 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
145 flowi6_to_flowi(flp6), 0, &arg);
146 if (err < 0)
147 return err;
148 *mrt = res.mrt;
149 return 0;
150}
151
152static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
153 int flags, struct fib_lookup_arg *arg)
154{
155 struct ip6mr_result *res = arg->result;
156 struct mr_table *mrt;
157
158 switch (rule->action) {
159 case FR_ACT_TO_TBL:
160 break;
161 case FR_ACT_UNREACHABLE:
162 return -ENETUNREACH;
163 case FR_ACT_PROHIBIT:
164 return -EACCES;
165 case FR_ACT_POLICY_FAILED:
166 return -EACCES;
167 case FR_ACT_BLACKHOLE:
168 default:
169 return -EINVAL;
170 }
171
172 mrt = ip6mr_get_table(rule->fr_net, rule->table);
173 if (!mrt)
174 return -EAGAIN;
175 res->mrt = mrt;
176 return 0;
177}
178
179static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
180{
181 return 1;
182}
183
184static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
185 FRA_GENERIC_POLICY,
186};
187
188static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
189 struct fib_rule_hdr *frh, struct nlattr **tb,
190 struct netlink_ext_ack *extack)
191{
192 return 0;
193}
194
195static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
196 struct nlattr **tb)
197{
198 return 1;
199}
200
201static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
202 struct fib_rule_hdr *frh)
203{
204 frh->dst_len = 0;
205 frh->src_len = 0;
206 frh->tos = 0;
207 return 0;
208}
209
210static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
211 .family = RTNL_FAMILY_IP6MR,
212 .rule_size = sizeof(struct ip6mr_rule),
213 .addr_size = sizeof(struct in6_addr),
214 .action = ip6mr_rule_action,
215 .match = ip6mr_rule_match,
216 .configure = ip6mr_rule_configure,
217 .compare = ip6mr_rule_compare,
218 .fill = ip6mr_rule_fill,
219 .nlgroup = RTNLGRP_IPV6_RULE,
220 .policy = ip6mr_rule_policy,
221 .owner = THIS_MODULE,
222};
223
224static int __net_init ip6mr_rules_init(struct net *net)
225{
226 struct fib_rules_ops *ops;
227 struct mr_table *mrt;
228 int err;
229
230 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
231 if (IS_ERR(ops))
232 return PTR_ERR(ops);
233
234 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
235
236 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
237 if (IS_ERR(mrt)) {
238 err = PTR_ERR(mrt);
239 goto err1;
240 }
241
242 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
243 if (err < 0)
244 goto err2;
245
246 net->ipv6.mr6_rules_ops = ops;
247 return 0;
248
249err2:
250 ip6mr_free_table(mrt);
251err1:
252 fib_rules_unregister(ops);
253 return err;
254}
255
256static void __net_exit ip6mr_rules_exit(struct net *net)
257{
258 struct mr_table *mrt, *next;
259
260 rtnl_lock();
261 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
262 list_del(&mrt->list);
263 ip6mr_free_table(mrt);
264 }
265 fib_rules_unregister(net->ipv6.mr6_rules_ops);
266 rtnl_unlock();
267}
268
269static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
270{
271 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
272}
273
274static unsigned int ip6mr_rules_seq_read(struct net *net)
275{
276 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
277}
278
279bool ip6mr_rule_default(const struct fib_rule *rule)
280{
281 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
282 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
283}
284EXPORT_SYMBOL(ip6mr_rule_default);
285#else
286#define ip6mr_for_each_table(mrt, net) \
287 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
288
289static struct mr_table *ip6mr_mr_table_iter(struct net *net,
290 struct mr_table *mrt)
291{
292 if (!mrt)
293 return net->ipv6.mrt6;
294 return NULL;
295}
296
297static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
298{
299 return net->ipv6.mrt6;
300}
301
302static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
303 struct mr_table **mrt)
304{
305 *mrt = net->ipv6.mrt6;
306 return 0;
307}
308
309static int __net_init ip6mr_rules_init(struct net *net)
310{
311 struct mr_table *mrt;
312
313 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
314 if (IS_ERR(mrt))
315 return PTR_ERR(mrt);
316 net->ipv6.mrt6 = mrt;
317 return 0;
318}
319
320static void __net_exit ip6mr_rules_exit(struct net *net)
321{
322 rtnl_lock();
323 ip6mr_free_table(net->ipv6.mrt6);
324 net->ipv6.mrt6 = NULL;
325 rtnl_unlock();
326}
327
328static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
329{
330 return 0;
331}
332
333static unsigned int ip6mr_rules_seq_read(struct net *net)
334{
335 return 0;
336}
337#endif
338
339static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
340 const void *ptr)
341{
342 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
343 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
344
345 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
346 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
347}
348
349static const struct rhashtable_params ip6mr_rht_params = {
350 .head_offset = offsetof(struct mr_mfc, mnode),
351 .key_offset = offsetof(struct mfc6_cache, cmparg),
352 .key_len = sizeof(struct mfc6_cache_cmp_arg),
353 .nelem_hint = 3,
354 .locks_mul = 1,
355 .obj_cmpfn = ip6mr_hash_cmp,
356 .automatic_shrinking = true,
357};
358
359static void ip6mr_new_table_set(struct mr_table *mrt,
360 struct net *net)
361{
362#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
363 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
364#endif
365}
366
367static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
368 .mf6c_origin = IN6ADDR_ANY_INIT,
369 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
370};
371
372static struct mr_table_ops ip6mr_mr_table_ops = {
373 .rht_params = &ip6mr_rht_params,
374 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
375};
376
377static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
378{
379 struct mr_table *mrt;
380
381 mrt = ip6mr_get_table(net, id);
382 if (mrt)
383 return mrt;
384
385 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
386 ipmr_expire_process, ip6mr_new_table_set);
387}
388
389static void ip6mr_free_table(struct mr_table *mrt)
390{
391 del_timer_sync(&mrt->ipmr_expire_timer);
392 mroute_clean_tables(mrt, true);
393 rhltable_destroy(&mrt->mfc_hash);
394 kfree(mrt);
395}
396
397#ifdef CONFIG_PROC_FS
398/* The /proc interfaces to multicast routing
399 * /proc/ip6_mr_cache /proc/ip6_mr_vif
400 */
401
402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 __acquires(mrt_lock)
404{
405 struct mr_vif_iter *iter = seq->private;
406 struct net *net = seq_file_net(seq);
407 struct mr_table *mrt;
408
409 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 if (!mrt)
411 return ERR_PTR(-ENOENT);
412
413 iter->mrt = mrt;
414
415 read_lock(&mrt_lock);
416 return mr_vif_seq_start(seq, pos);
417}
418
419static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
420 __releases(mrt_lock)
421{
422 read_unlock(&mrt_lock);
423}
424
425static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
426{
427 struct mr_vif_iter *iter = seq->private;
428 struct mr_table *mrt = iter->mrt;
429
430 if (v == SEQ_START_TOKEN) {
431 seq_puts(seq,
432 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
433 } else {
434 const struct vif_device *vif = v;
435 const char *name = vif->dev ? vif->dev->name : "none";
436
437 seq_printf(seq,
438 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
439 vif - mrt->vif_table,
440 name, vif->bytes_in, vif->pkt_in,
441 vif->bytes_out, vif->pkt_out,
442 vif->flags);
443 }
444 return 0;
445}
446
447static const struct seq_operations ip6mr_vif_seq_ops = {
448 .start = ip6mr_vif_seq_start,
449 .next = mr_vif_seq_next,
450 .stop = ip6mr_vif_seq_stop,
451 .show = ip6mr_vif_seq_show,
452};
453
454static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
455{
456 struct net *net = seq_file_net(seq);
457 struct mr_table *mrt;
458
459 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
460 if (!mrt)
461 return ERR_PTR(-ENOENT);
462
463 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
464}
465
466static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
467{
468 int n;
469
470 if (v == SEQ_START_TOKEN) {
471 seq_puts(seq,
472 "Group "
473 "Origin "
474 "Iif Pkts Bytes Wrong Oifs\n");
475 } else {
476 const struct mfc6_cache *mfc = v;
477 const struct mr_mfc_iter *it = seq->private;
478 struct mr_table *mrt = it->mrt;
479
480 seq_printf(seq, "%pI6 %pI6 %-3hd",
481 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
482 mfc->_c.mfc_parent);
483
484 if (it->cache != &mrt->mfc_unres_queue) {
485 seq_printf(seq, " %8lu %8lu %8lu",
486 mfc->_c.mfc_un.res.pkt,
487 mfc->_c.mfc_un.res.bytes,
488 mfc->_c.mfc_un.res.wrong_if);
489 for (n = mfc->_c.mfc_un.res.minvif;
490 n < mfc->_c.mfc_un.res.maxvif; n++) {
491 if (VIF_EXISTS(mrt, n) &&
492 mfc->_c.mfc_un.res.ttls[n] < 255)
493 seq_printf(seq,
494 " %2d:%-3d", n,
495 mfc->_c.mfc_un.res.ttls[n]);
496 }
497 } else {
498 /* unresolved mfc_caches don't contain
499 * pkt, bytes and wrong_if values
500 */
501 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
502 }
503 seq_putc(seq, '\n');
504 }
505 return 0;
506}
507
508static const struct seq_operations ipmr_mfc_seq_ops = {
509 .start = ipmr_mfc_seq_start,
510 .next = mr_mfc_seq_next,
511 .stop = mr_mfc_seq_stop,
512 .show = ipmr_mfc_seq_show,
513};
514#endif
515
516#ifdef CONFIG_IPV6_PIMSM_V2
517
518static int pim6_rcv(struct sk_buff *skb)
519{
520 struct pimreghdr *pim;
521 struct ipv6hdr *encap;
522 struct net_device *reg_dev = NULL;
523 struct net *net = dev_net(skb->dev);
524 struct mr_table *mrt;
525 struct flowi6 fl6 = {
526 .flowi6_iif = skb->dev->ifindex,
527 .flowi6_mark = skb->mark,
528 };
529 int reg_vif_num;
530
531 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
532 goto drop;
533
534 pim = (struct pimreghdr *)skb_transport_header(skb);
535 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
536 (pim->flags & PIM_NULL_REGISTER) ||
537 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
538 sizeof(*pim), IPPROTO_PIM,
539 csum_partial((void *)pim, sizeof(*pim), 0)) &&
540 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
541 goto drop;
542
543 /* check if the inner packet is destined to mcast group */
544 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
545 sizeof(*pim));
546
547 if (!ipv6_addr_is_multicast(&encap->daddr) ||
548 encap->payload_len == 0 ||
549 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
550 goto drop;
551
552 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
553 goto drop;
554 reg_vif_num = mrt->mroute_reg_vif_num;
555
556 read_lock(&mrt_lock);
557 if (reg_vif_num >= 0)
558 reg_dev = mrt->vif_table[reg_vif_num].dev;
559 if (reg_dev)
560 dev_hold(reg_dev);
561 read_unlock(&mrt_lock);
562
563 if (!reg_dev)
564 goto drop;
565
566 skb->mac_header = skb->network_header;
567 skb_pull(skb, (u8 *)encap - skb->data);
568 skb_reset_network_header(skb);
569 skb->protocol = htons(ETH_P_IPV6);
570 skb->ip_summed = CHECKSUM_NONE;
571
572 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
573
574 netif_rx(skb);
575
576 dev_put(reg_dev);
577 return 0;
578 drop:
579 kfree_skb(skb);
580 return 0;
581}
582
583static const struct inet6_protocol pim6_protocol = {
584 .handler = pim6_rcv,
585};
586
587/* Service routines creating virtual interfaces: PIMREG */
588
589static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
590 struct net_device *dev)
591{
592 struct net *net = dev_net(dev);
593 struct mr_table *mrt;
594 struct flowi6 fl6 = {
595 .flowi6_oif = dev->ifindex,
596 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
597 .flowi6_mark = skb->mark,
598 };
599
600 if (!pskb_inet_may_pull(skb))
601 goto tx_err;
602
603 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
604 goto tx_err;
605
606 read_lock(&mrt_lock);
607 dev->stats.tx_bytes += skb->len;
608 dev->stats.tx_packets++;
609 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
610 read_unlock(&mrt_lock);
611 kfree_skb(skb);
612 return NETDEV_TX_OK;
613
614tx_err:
615 dev->stats.tx_errors++;
616 kfree_skb(skb);
617 return NETDEV_TX_OK;
618}
619
620static int reg_vif_get_iflink(const struct net_device *dev)
621{
622 return 0;
623}
624
625static const struct net_device_ops reg_vif_netdev_ops = {
626 .ndo_start_xmit = reg_vif_xmit,
627 .ndo_get_iflink = reg_vif_get_iflink,
628};
629
630static void reg_vif_setup(struct net_device *dev)
631{
632 dev->type = ARPHRD_PIMREG;
633 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
634 dev->flags = IFF_NOARP;
635 dev->netdev_ops = &reg_vif_netdev_ops;
636 dev->needs_free_netdev = true;
637 dev->features |= NETIF_F_NETNS_LOCAL;
638}
639
640static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
641{
642 struct net_device *dev;
643 char name[IFNAMSIZ];
644
645 if (mrt->id == RT6_TABLE_DFLT)
646 sprintf(name, "pim6reg");
647 else
648 sprintf(name, "pim6reg%u", mrt->id);
649
650 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
651 if (!dev)
652 return NULL;
653
654 dev_net_set(dev, net);
655
656 if (register_netdevice(dev)) {
657 free_netdev(dev);
658 return NULL;
659 }
660
661 if (dev_open(dev))
662 goto failure;
663
664 dev_hold(dev);
665 return dev;
666
667failure:
668 unregister_netdevice(dev);
669 return NULL;
670}
671#endif
672
673static int call_ip6mr_vif_entry_notifiers(struct net *net,
674 enum fib_event_type event_type,
675 struct vif_device *vif,
676 mifi_t vif_index, u32 tb_id)
677{
678 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
679 vif, vif_index, tb_id,
680 &net->ipv6.ipmr_seq);
681}
682
683static int call_ip6mr_mfc_entry_notifiers(struct net *net,
684 enum fib_event_type event_type,
685 struct mfc6_cache *mfc, u32 tb_id)
686{
687 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
688 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
689}
690
691/* Delete a VIF entry */
692static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
693 struct list_head *head)
694{
695 struct vif_device *v;
696 struct net_device *dev;
697 struct inet6_dev *in6_dev;
698
699 if (vifi < 0 || vifi >= mrt->maxvif)
700 return -EADDRNOTAVAIL;
701
702 v = &mrt->vif_table[vifi];
703
704 if (VIF_EXISTS(mrt, vifi))
705 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
706 FIB_EVENT_VIF_DEL, v, vifi,
707 mrt->id);
708
709 write_lock_bh(&mrt_lock);
710 dev = v->dev;
711 v->dev = NULL;
712
713 if (!dev) {
714 write_unlock_bh(&mrt_lock);
715 return -EADDRNOTAVAIL;
716 }
717
718#ifdef CONFIG_IPV6_PIMSM_V2
719 if (vifi == mrt->mroute_reg_vif_num)
720 mrt->mroute_reg_vif_num = -1;
721#endif
722
723 if (vifi + 1 == mrt->maxvif) {
724 int tmp;
725 for (tmp = vifi - 1; tmp >= 0; tmp--) {
726 if (VIF_EXISTS(mrt, tmp))
727 break;
728 }
729 mrt->maxvif = tmp + 1;
730 }
731
732 write_unlock_bh(&mrt_lock);
733
734 dev_set_allmulti(dev, -1);
735
736 in6_dev = __in6_dev_get(dev);
737 if (in6_dev) {
738 in6_dev->cnf.mc_forwarding--;
739 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
740 NETCONFA_MC_FORWARDING,
741 dev->ifindex, &in6_dev->cnf);
742 }
743
744 if ((v->flags & MIFF_REGISTER) && !notify)
745 unregister_netdevice_queue(dev, head);
746
747 dev_put(dev);
748 return 0;
749}
750
751static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
752{
753 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
754
755 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
756}
757
758static inline void ip6mr_cache_free(struct mfc6_cache *c)
759{
760 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
761}
762
763/* Destroy an unresolved cache entry, killing queued skbs
764 and reporting error to netlink readers.
765 */
766
767static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
768{
769 struct net *net = read_pnet(&mrt->net);
770 struct sk_buff *skb;
771
772 atomic_dec(&mrt->cache_resolve_queue_len);
773
774 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
775 if (ipv6_hdr(skb)->version == 0) {
776 struct nlmsghdr *nlh = skb_pull(skb,
777 sizeof(struct ipv6hdr));
778 nlh->nlmsg_type = NLMSG_ERROR;
779 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
780 skb_trim(skb, nlh->nlmsg_len);
781 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
782 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
783 } else
784 kfree_skb(skb);
785 }
786
787 ip6mr_cache_free(c);
788}
789
790
791/* Timer process for all the unresolved queue. */
792
793static void ipmr_do_expire_process(struct mr_table *mrt)
794{
795 unsigned long now = jiffies;
796 unsigned long expires = 10 * HZ;
797 struct mr_mfc *c, *next;
798
799 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
800 if (time_after(c->mfc_un.unres.expires, now)) {
801 /* not yet... */
802 unsigned long interval = c->mfc_un.unres.expires - now;
803 if (interval < expires)
804 expires = interval;
805 continue;
806 }
807
808 list_del(&c->list);
809 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
810 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
811 }
812
813 if (!list_empty(&mrt->mfc_unres_queue))
814 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
815}
816
817static void ipmr_expire_process(struct timer_list *t)
818{
819 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
820
821 if (!spin_trylock(&mfc_unres_lock)) {
822 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
823 return;
824 }
825
826 if (!list_empty(&mrt->mfc_unres_queue))
827 ipmr_do_expire_process(mrt);
828
829 spin_unlock(&mfc_unres_lock);
830}
831
832/* Fill oifs list. It is called under write locked mrt_lock. */
833
834static void ip6mr_update_thresholds(struct mr_table *mrt,
835 struct mr_mfc *cache,
836 unsigned char *ttls)
837{
838 int vifi;
839
840 cache->mfc_un.res.minvif = MAXMIFS;
841 cache->mfc_un.res.maxvif = 0;
842 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
843
844 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
845 if (VIF_EXISTS(mrt, vifi) &&
846 ttls[vifi] && ttls[vifi] < 255) {
847 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
848 if (cache->mfc_un.res.minvif > vifi)
849 cache->mfc_un.res.minvif = vifi;
850 if (cache->mfc_un.res.maxvif <= vifi)
851 cache->mfc_un.res.maxvif = vifi + 1;
852 }
853 }
854 cache->mfc_un.res.lastuse = jiffies;
855}
856
857static int mif6_add(struct net *net, struct mr_table *mrt,
858 struct mif6ctl *vifc, int mrtsock)
859{
860 int vifi = vifc->mif6c_mifi;
861 struct vif_device *v = &mrt->vif_table[vifi];
862 struct net_device *dev;
863 struct inet6_dev *in6_dev;
864 int err;
865
866 /* Is vif busy ? */
867 if (VIF_EXISTS(mrt, vifi))
868 return -EADDRINUSE;
869
870 switch (vifc->mif6c_flags) {
871#ifdef CONFIG_IPV6_PIMSM_V2
872 case MIFF_REGISTER:
873 /*
874 * Special Purpose VIF in PIM
875 * All the packets will be sent to the daemon
876 */
877 if (mrt->mroute_reg_vif_num >= 0)
878 return -EADDRINUSE;
879 dev = ip6mr_reg_vif(net, mrt);
880 if (!dev)
881 return -ENOBUFS;
882 err = dev_set_allmulti(dev, 1);
883 if (err) {
884 unregister_netdevice(dev);
885 dev_put(dev);
886 return err;
887 }
888 break;
889#endif
890 case 0:
891 dev = dev_get_by_index(net, vifc->mif6c_pifi);
892 if (!dev)
893 return -EADDRNOTAVAIL;
894 err = dev_set_allmulti(dev, 1);
895 if (err) {
896 dev_put(dev);
897 return err;
898 }
899 break;
900 default:
901 return -EINVAL;
902 }
903
904 in6_dev = __in6_dev_get(dev);
905 if (in6_dev) {
906 in6_dev->cnf.mc_forwarding++;
907 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
908 NETCONFA_MC_FORWARDING,
909 dev->ifindex, &in6_dev->cnf);
910 }
911
912 /* Fill in the VIF structures */
913 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
914 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
915 MIFF_REGISTER);
916
917 /* And finish update writing critical data */
918 write_lock_bh(&mrt_lock);
919 v->dev = dev;
920#ifdef CONFIG_IPV6_PIMSM_V2
921 if (v->flags & MIFF_REGISTER)
922 mrt->mroute_reg_vif_num = vifi;
923#endif
924 if (vifi + 1 > mrt->maxvif)
925 mrt->maxvif = vifi + 1;
926 write_unlock_bh(&mrt_lock);
927 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
928 v, vifi, mrt->id);
929 return 0;
930}
931
932static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
933 const struct in6_addr *origin,
934 const struct in6_addr *mcastgrp)
935{
936 struct mfc6_cache_cmp_arg arg = {
937 .mf6c_origin = *origin,
938 .mf6c_mcastgrp = *mcastgrp,
939 };
940
941 return mr_mfc_find(mrt, &arg);
942}
943
944/* Look for a (*,G) entry */
945static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
946 struct in6_addr *mcastgrp,
947 mifi_t mifi)
948{
949 struct mfc6_cache_cmp_arg arg = {
950 .mf6c_origin = in6addr_any,
951 .mf6c_mcastgrp = *mcastgrp,
952 };
953
954 if (ipv6_addr_any(mcastgrp))
955 return mr_mfc_find_any_parent(mrt, mifi);
956 return mr_mfc_find_any(mrt, mifi, &arg);
957}
958
959/* Look for a (S,G,iif) entry if parent != -1 */
960static struct mfc6_cache *
961ip6mr_cache_find_parent(struct mr_table *mrt,
962 const struct in6_addr *origin,
963 const struct in6_addr *mcastgrp,
964 int parent)
965{
966 struct mfc6_cache_cmp_arg arg = {
967 .mf6c_origin = *origin,
968 .mf6c_mcastgrp = *mcastgrp,
969 };
970
971 return mr_mfc_find_parent(mrt, &arg, parent);
972}
973
974/* Allocate a multicast cache entry */
975static struct mfc6_cache *ip6mr_cache_alloc(void)
976{
977 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
978 if (!c)
979 return NULL;
980 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
981 c->_c.mfc_un.res.minvif = MAXMIFS;
982 c->_c.free = ip6mr_cache_free_rcu;
983 refcount_set(&c->_c.mfc_un.res.refcount, 1);
984 return c;
985}
986
987static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
988{
989 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
990 if (!c)
991 return NULL;
992 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
993 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
994 return c;
995}
996
997/*
998 * A cache entry has gone into a resolved state from queued
999 */
1000
1001static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1002 struct mfc6_cache *uc, struct mfc6_cache *c)
1003{
1004 struct sk_buff *skb;
1005
1006 /*
1007 * Play the pending entries through our router
1008 */
1009
1010 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1011 if (ipv6_hdr(skb)->version == 0) {
1012 struct nlmsghdr *nlh = skb_pull(skb,
1013 sizeof(struct ipv6hdr));
1014
1015 if (mr_fill_mroute(mrt, skb, &c->_c,
1016 nlmsg_data(nlh)) > 0) {
1017 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1018 } else {
1019 nlh->nlmsg_type = NLMSG_ERROR;
1020 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1021 skb_trim(skb, nlh->nlmsg_len);
1022 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1023 }
1024 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1025 } else
1026 ip6_mr_forward(net, mrt, skb, c);
1027 }
1028}
1029
1030/*
1031 * Bounce a cache query up to pim6sd and netlink.
1032 *
1033 * Called under mrt_lock.
1034 */
1035
1036static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1037 mifi_t mifi, int assert)
1038{
1039 struct sock *mroute6_sk;
1040 struct sk_buff *skb;
1041 struct mrt6msg *msg;
1042 int ret;
1043
1044#ifdef CONFIG_IPV6_PIMSM_V2
1045 if (assert == MRT6MSG_WHOLEPKT)
1046 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1047 +sizeof(*msg));
1048 else
1049#endif
1050 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1051
1052 if (!skb)
1053 return -ENOBUFS;
1054
1055 /* I suppose that internal messages
1056 * do not require checksums */
1057
1058 skb->ip_summed = CHECKSUM_UNNECESSARY;
1059
1060#ifdef CONFIG_IPV6_PIMSM_V2
1061 if (assert == MRT6MSG_WHOLEPKT) {
1062 /* Ugly, but we have no choice with this interface.
1063 Duplicate old header, fix length etc.
1064 And all this only to mangle msg->im6_msgtype and
1065 to set msg->im6_mbz to "mbz" :-)
1066 */
1067 skb_push(skb, -skb_network_offset(pkt));
1068
1069 skb_push(skb, sizeof(*msg));
1070 skb_reset_transport_header(skb);
1071 msg = (struct mrt6msg *)skb_transport_header(skb);
1072 msg->im6_mbz = 0;
1073 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1074 msg->im6_mif = mrt->mroute_reg_vif_num;
1075 msg->im6_pad = 0;
1076 msg->im6_src = ipv6_hdr(pkt)->saddr;
1077 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1078
1079 skb->ip_summed = CHECKSUM_UNNECESSARY;
1080 } else
1081#endif
1082 {
1083 /*
1084 * Copy the IP header
1085 */
1086
1087 skb_put(skb, sizeof(struct ipv6hdr));
1088 skb_reset_network_header(skb);
1089 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1090
1091 /*
1092 * Add our header
1093 */
1094 skb_put(skb, sizeof(*msg));
1095 skb_reset_transport_header(skb);
1096 msg = (struct mrt6msg *)skb_transport_header(skb);
1097
1098 msg->im6_mbz = 0;
1099 msg->im6_msgtype = assert;
1100 msg->im6_mif = mifi;
1101 msg->im6_pad = 0;
1102 msg->im6_src = ipv6_hdr(pkt)->saddr;
1103 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1104
1105 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1106 skb->ip_summed = CHECKSUM_UNNECESSARY;
1107 }
1108
1109 rcu_read_lock();
1110 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1111 if (!mroute6_sk) {
1112 rcu_read_unlock();
1113 kfree_skb(skb);
1114 return -EINVAL;
1115 }
1116
1117 mrt6msg_netlink_event(mrt, skb);
1118
1119 /* Deliver to user space multicast routing algorithms */
1120 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1121 rcu_read_unlock();
1122 if (ret < 0) {
1123 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1124 kfree_skb(skb);
1125 }
1126
1127 return ret;
1128}
1129
1130/* Queue a packet for resolution. It gets locked cache entry! */
1131static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1132 struct sk_buff *skb)
1133{
1134 struct mfc6_cache *c;
1135 bool found = false;
1136 int err;
1137
1138 spin_lock_bh(&mfc_unres_lock);
1139 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1140 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1141 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1142 found = true;
1143 break;
1144 }
1145 }
1146
1147 if (!found) {
1148 /*
1149 * Create a new entry if allowable
1150 */
1151
1152 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1153 (c = ip6mr_cache_alloc_unres()) == NULL) {
1154 spin_unlock_bh(&mfc_unres_lock);
1155
1156 kfree_skb(skb);
1157 return -ENOBUFS;
1158 }
1159
1160 /* Fill in the new cache entry */
1161 c->_c.mfc_parent = -1;
1162 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1163 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1164
1165 /*
1166 * Reflect first query at pim6sd
1167 */
1168 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1169 if (err < 0) {
1170 /* If the report failed throw the cache entry
1171 out - Brad Parker
1172 */
1173 spin_unlock_bh(&mfc_unres_lock);
1174
1175 ip6mr_cache_free(c);
1176 kfree_skb(skb);
1177 return err;
1178 }
1179
1180 atomic_inc(&mrt->cache_resolve_queue_len);
1181 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1182 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1183
1184 ipmr_do_expire_process(mrt);
1185 }
1186
1187 /* See if we can append the packet */
1188 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1189 kfree_skb(skb);
1190 err = -ENOBUFS;
1191 } else {
1192 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1193 err = 0;
1194 }
1195
1196 spin_unlock_bh(&mfc_unres_lock);
1197 return err;
1198}
1199
1200/*
1201 * MFC6 cache manipulation by user space
1202 */
1203
1204static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1205 int parent)
1206{
1207 struct mfc6_cache *c;
1208
1209 /* The entries are added/deleted only under RTNL */
1210 rcu_read_lock();
1211 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1212 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1213 rcu_read_unlock();
1214 if (!c)
1215 return -ENOENT;
1216 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1217 list_del_rcu(&c->_c.list);
1218
1219 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1220 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1221 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1222 mr_cache_put(&c->_c);
1223 return 0;
1224}
1225
1226static int ip6mr_device_event(struct notifier_block *this,
1227 unsigned long event, void *ptr)
1228{
1229 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1230 struct net *net = dev_net(dev);
1231 struct mr_table *mrt;
1232 struct vif_device *v;
1233 int ct;
1234
1235 if (event != NETDEV_UNREGISTER)
1236 return NOTIFY_DONE;
1237
1238 ip6mr_for_each_table(mrt, net) {
1239 v = &mrt->vif_table[0];
1240 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1241 if (v->dev == dev)
1242 mif6_delete(mrt, ct, 1, NULL);
1243 }
1244 }
1245
1246 return NOTIFY_DONE;
1247}
1248
1249static unsigned int ip6mr_seq_read(struct net *net)
1250{
1251 ASSERT_RTNL();
1252
1253 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1254}
1255
1256static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1257{
1258 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1259 ip6mr_mr_table_iter, &mrt_lock);
1260}
1261
1262static struct notifier_block ip6_mr_notifier = {
1263 .notifier_call = ip6mr_device_event
1264};
1265
1266static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1267 .family = RTNL_FAMILY_IP6MR,
1268 .fib_seq_read = ip6mr_seq_read,
1269 .fib_dump = ip6mr_dump,
1270 .owner = THIS_MODULE,
1271};
1272
1273static int __net_init ip6mr_notifier_init(struct net *net)
1274{
1275 struct fib_notifier_ops *ops;
1276
1277 net->ipv6.ipmr_seq = 0;
1278
1279 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1280 if (IS_ERR(ops))
1281 return PTR_ERR(ops);
1282
1283 net->ipv6.ip6mr_notifier_ops = ops;
1284
1285 return 0;
1286}
1287
1288static void __net_exit ip6mr_notifier_exit(struct net *net)
1289{
1290 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1291 net->ipv6.ip6mr_notifier_ops = NULL;
1292}
1293
1294/* Setup for IP multicast routing */
1295static int __net_init ip6mr_net_init(struct net *net)
1296{
1297 int err;
1298
1299 err = ip6mr_notifier_init(net);
1300 if (err)
1301 return err;
1302
1303 err = ip6mr_rules_init(net);
1304 if (err < 0)
1305 goto ip6mr_rules_fail;
1306
1307#ifdef CONFIG_PROC_FS
1308 err = -ENOMEM;
1309 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1310 sizeof(struct mr_vif_iter)))
1311 goto proc_vif_fail;
1312 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1313 sizeof(struct mr_mfc_iter)))
1314 goto proc_cache_fail;
1315#endif
1316
1317 return 0;
1318
1319#ifdef CONFIG_PROC_FS
1320proc_cache_fail:
1321 remove_proc_entry("ip6_mr_vif", net->proc_net);
1322proc_vif_fail:
1323 ip6mr_rules_exit(net);
1324#endif
1325ip6mr_rules_fail:
1326 ip6mr_notifier_exit(net);
1327 return err;
1328}
1329
1330static void __net_exit ip6mr_net_exit(struct net *net)
1331{
1332#ifdef CONFIG_PROC_FS
1333 remove_proc_entry("ip6_mr_cache", net->proc_net);
1334 remove_proc_entry("ip6_mr_vif", net->proc_net);
1335#endif
1336 ip6mr_rules_exit(net);
1337 ip6mr_notifier_exit(net);
1338}
1339
1340static struct pernet_operations ip6mr_net_ops = {
1341 .init = ip6mr_net_init,
1342 .exit = ip6mr_net_exit,
1343};
1344
1345int __init ip6_mr_init(void)
1346{
1347 int err;
1348
1349 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1350 sizeof(struct mfc6_cache),
1351 0, SLAB_HWCACHE_ALIGN,
1352 NULL);
1353 if (!mrt_cachep)
1354 return -ENOMEM;
1355
1356 err = register_pernet_subsys(&ip6mr_net_ops);
1357 if (err)
1358 goto reg_pernet_fail;
1359
1360 err = register_netdevice_notifier(&ip6_mr_notifier);
1361 if (err)
1362 goto reg_notif_fail;
1363#ifdef CONFIG_IPV6_PIMSM_V2
1364 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1365 pr_err("%s: can't add PIM protocol\n", __func__);
1366 err = -EAGAIN;
1367 goto add_proto_fail;
1368 }
1369#endif
1370 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1371 NULL, ip6mr_rtm_dumproute, 0);
1372 if (err == 0)
1373 return 0;
1374
1375#ifdef CONFIG_IPV6_PIMSM_V2
1376 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1377add_proto_fail:
1378 unregister_netdevice_notifier(&ip6_mr_notifier);
1379#endif
1380reg_notif_fail:
1381 unregister_pernet_subsys(&ip6mr_net_ops);
1382reg_pernet_fail:
1383 kmem_cache_destroy(mrt_cachep);
1384 return err;
1385}
1386
1387void ip6_mr_cleanup(void)
1388{
1389 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1390#ifdef CONFIG_IPV6_PIMSM_V2
1391 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1392#endif
1393 unregister_netdevice_notifier(&ip6_mr_notifier);
1394 unregister_pernet_subsys(&ip6mr_net_ops);
1395 kmem_cache_destroy(mrt_cachep);
1396}
1397
1398static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1399 struct mf6cctl *mfc, int mrtsock, int parent)
1400{
1401 unsigned char ttls[MAXMIFS];
1402 struct mfc6_cache *uc, *c;
1403 struct mr_mfc *_uc;
1404 bool found;
1405 int i, err;
1406
1407 if (mfc->mf6cc_parent >= MAXMIFS)
1408 return -ENFILE;
1409
1410 memset(ttls, 255, MAXMIFS);
1411 for (i = 0; i < MAXMIFS; i++) {
1412 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1413 ttls[i] = 1;
1414 }
1415
1416 /* The entries are added/deleted only under RTNL */
1417 rcu_read_lock();
1418 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1419 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1420 rcu_read_unlock();
1421 if (c) {
1422 write_lock_bh(&mrt_lock);
1423 c->_c.mfc_parent = mfc->mf6cc_parent;
1424 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1425 if (!mrtsock)
1426 c->_c.mfc_flags |= MFC_STATIC;
1427 write_unlock_bh(&mrt_lock);
1428 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1429 c, mrt->id);
1430 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1431 return 0;
1432 }
1433
1434 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1435 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1436 return -EINVAL;
1437
1438 c = ip6mr_cache_alloc();
1439 if (!c)
1440 return -ENOMEM;
1441
1442 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1443 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1444 c->_c.mfc_parent = mfc->mf6cc_parent;
1445 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1446 if (!mrtsock)
1447 c->_c.mfc_flags |= MFC_STATIC;
1448
1449 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1450 ip6mr_rht_params);
1451 if (err) {
1452 pr_err("ip6mr: rhtable insert error %d\n", err);
1453 ip6mr_cache_free(c);
1454 return err;
1455 }
1456 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1457
1458 /* Check to see if we resolved a queued list. If so we
1459 * need to send on the frames and tidy up.
1460 */
1461 found = false;
1462 spin_lock_bh(&mfc_unres_lock);
1463 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1464 uc = (struct mfc6_cache *)_uc;
1465 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1466 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1467 list_del(&_uc->list);
1468 atomic_dec(&mrt->cache_resolve_queue_len);
1469 found = true;
1470 break;
1471 }
1472 }
1473 if (list_empty(&mrt->mfc_unres_queue))
1474 del_timer(&mrt->ipmr_expire_timer);
1475 spin_unlock_bh(&mfc_unres_lock);
1476
1477 if (found) {
1478 ip6mr_cache_resolve(net, mrt, uc, c);
1479 ip6mr_cache_free(uc);
1480 }
1481 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1482 c, mrt->id);
1483 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1484 return 0;
1485}
1486
1487/*
1488 * Close the multicast socket, and clear the vif tables etc
1489 */
1490
1491static void mroute_clean_tables(struct mr_table *mrt, bool all)
1492{
1493 struct mr_mfc *c, *tmp;
1494 LIST_HEAD(list);
1495 int i;
1496
1497 /* Shut down all active vif entries */
1498 for (i = 0; i < mrt->maxvif; i++) {
1499 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1500 continue;
1501 mif6_delete(mrt, i, 0, &list);
1502 }
1503 unregister_netdevice_many(&list);
1504
1505 /* Wipe the cache */
1506 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1507 if (!all && (c->mfc_flags & MFC_STATIC))
1508 continue;
1509 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1510 list_del_rcu(&c->list);
1511 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1512 FIB_EVENT_ENTRY_DEL,
1513 (struct mfc6_cache *)c, mrt->id);
1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1515 mr_cache_put(c);
1516 }
1517
1518 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1519 spin_lock_bh(&mfc_unres_lock);
1520 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1521 list_del(&c->list);
1522 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1523 RTM_DELROUTE);
1524 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1525 }
1526 spin_unlock_bh(&mfc_unres_lock);
1527 }
1528}
1529
1530static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1531{
1532 int err = 0;
1533 struct net *net = sock_net(sk);
1534
1535 rtnl_lock();
1536 write_lock_bh(&mrt_lock);
1537 if (rtnl_dereference(mrt->mroute_sk)) {
1538 err = -EADDRINUSE;
1539 } else {
1540 rcu_assign_pointer(mrt->mroute_sk, sk);
1541 sock_set_flag(sk, SOCK_RCU_FREE);
1542 net->ipv6.devconf_all->mc_forwarding++;
1543 }
1544 write_unlock_bh(&mrt_lock);
1545
1546 if (!err)
1547 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1548 NETCONFA_MC_FORWARDING,
1549 NETCONFA_IFINDEX_ALL,
1550 net->ipv6.devconf_all);
1551 rtnl_unlock();
1552
1553 return err;
1554}
1555
1556int ip6mr_sk_done(struct sock *sk)
1557{
1558 int err = -EACCES;
1559 struct net *net = sock_net(sk);
1560 struct mr_table *mrt;
1561
1562 if (sk->sk_type != SOCK_RAW ||
1563 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1564 return err;
1565
1566 rtnl_lock();
1567 ip6mr_for_each_table(mrt, net) {
1568 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1569 write_lock_bh(&mrt_lock);
1570 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1571 /* Note that mroute_sk had SOCK_RCU_FREE set,
1572 * so the RCU grace period before sk freeing
1573 * is guaranteed by sk_destruct()
1574 */
1575 net->ipv6.devconf_all->mc_forwarding--;
1576 write_unlock_bh(&mrt_lock);
1577 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1578 NETCONFA_MC_FORWARDING,
1579 NETCONFA_IFINDEX_ALL,
1580 net->ipv6.devconf_all);
1581
1582 mroute_clean_tables(mrt, false);
1583 err = 0;
1584 break;
1585 }
1586 }
1587 rtnl_unlock();
1588
1589 return err;
1590}
1591
1592bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1593{
1594 struct mr_table *mrt;
1595 struct flowi6 fl6 = {
1596 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1597 .flowi6_oif = skb->dev->ifindex,
1598 .flowi6_mark = skb->mark,
1599 };
1600
1601 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1602 return NULL;
1603
1604 return rcu_access_pointer(mrt->mroute_sk);
1605}
1606EXPORT_SYMBOL(mroute6_is_socket);
1607
1608/*
1609 * Socket options and virtual interface manipulation. The whole
1610 * virtual interface system is a complete heap, but unfortunately
1611 * that's how BSD mrouted happens to think. Maybe one day with a proper
1612 * MOSPF/PIM router set up we can clean this up.
1613 */
1614
1615int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1616{
1617 int ret, parent = 0;
1618 struct mif6ctl vif;
1619 struct mf6cctl mfc;
1620 mifi_t mifi;
1621 struct net *net = sock_net(sk);
1622 struct mr_table *mrt;
1623
1624 if (sk->sk_type != SOCK_RAW ||
1625 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1626 return -EOPNOTSUPP;
1627
1628 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1629 if (!mrt)
1630 return -ENOENT;
1631
1632 if (optname != MRT6_INIT) {
1633 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1634 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1635 return -EACCES;
1636 }
1637
1638 switch (optname) {
1639 case MRT6_INIT:
1640 if (optlen < sizeof(int))
1641 return -EINVAL;
1642
1643 return ip6mr_sk_init(mrt, sk);
1644
1645 case MRT6_DONE:
1646 return ip6mr_sk_done(sk);
1647
1648 case MRT6_ADD_MIF:
1649 if (optlen < sizeof(vif))
1650 return -EINVAL;
1651 if (copy_from_user(&vif, optval, sizeof(vif)))
1652 return -EFAULT;
1653 if (vif.mif6c_mifi >= MAXMIFS)
1654 return -ENFILE;
1655 rtnl_lock();
1656 ret = mif6_add(net, mrt, &vif,
1657 sk == rtnl_dereference(mrt->mroute_sk));
1658 rtnl_unlock();
1659 return ret;
1660
1661 case MRT6_DEL_MIF:
1662 if (optlen < sizeof(mifi_t))
1663 return -EINVAL;
1664 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1665 return -EFAULT;
1666 rtnl_lock();
1667 ret = mif6_delete(mrt, mifi, 0, NULL);
1668 rtnl_unlock();
1669 return ret;
1670
1671 /*
1672 * Manipulate the forwarding caches. These live
1673 * in a sort of kernel/user symbiosis.
1674 */
1675 case MRT6_ADD_MFC:
1676 case MRT6_DEL_MFC:
1677 parent = -1;
1678 /* fall through */
1679 case MRT6_ADD_MFC_PROXY:
1680 case MRT6_DEL_MFC_PROXY:
1681 if (optlen < sizeof(mfc))
1682 return -EINVAL;
1683 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1684 return -EFAULT;
1685 if (parent == 0)
1686 parent = mfc.mf6cc_parent;
1687 rtnl_lock();
1688 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1689 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1690 else
1691 ret = ip6mr_mfc_add(net, mrt, &mfc,
1692 sk ==
1693 rtnl_dereference(mrt->mroute_sk),
1694 parent);
1695 rtnl_unlock();
1696 return ret;
1697
1698 /*
1699 * Control PIM assert (to activate pim will activate assert)
1700 */
1701 case MRT6_ASSERT:
1702 {
1703 int v;
1704
1705 if (optlen != sizeof(v))
1706 return -EINVAL;
1707 if (get_user(v, (int __user *)optval))
1708 return -EFAULT;
1709 mrt->mroute_do_assert = v;
1710 return 0;
1711 }
1712
1713#ifdef CONFIG_IPV6_PIMSM_V2
1714 case MRT6_PIM:
1715 {
1716 int v;
1717
1718 if (optlen != sizeof(v))
1719 return -EINVAL;
1720 if (get_user(v, (int __user *)optval))
1721 return -EFAULT;
1722 v = !!v;
1723 rtnl_lock();
1724 ret = 0;
1725 if (v != mrt->mroute_do_pim) {
1726 mrt->mroute_do_pim = v;
1727 mrt->mroute_do_assert = v;
1728 }
1729 rtnl_unlock();
1730 return ret;
1731 }
1732
1733#endif
1734#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1735 case MRT6_TABLE:
1736 {
1737 u32 v;
1738
1739 if (optlen != sizeof(u32))
1740 return -EINVAL;
1741 if (get_user(v, (u32 __user *)optval))
1742 return -EFAULT;
1743 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1744 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1745 return -EINVAL;
1746 if (sk == rcu_access_pointer(mrt->mroute_sk))
1747 return -EBUSY;
1748
1749 rtnl_lock();
1750 ret = 0;
1751 mrt = ip6mr_new_table(net, v);
1752 if (IS_ERR(mrt))
1753 ret = PTR_ERR(mrt);
1754 else
1755 raw6_sk(sk)->ip6mr_table = v;
1756 rtnl_unlock();
1757 return ret;
1758 }
1759#endif
1760 /*
1761 * Spurious command, or MRT6_VERSION which you cannot
1762 * set.
1763 */
1764 default:
1765 return -ENOPROTOOPT;
1766 }
1767}
1768
1769/*
1770 * Getsock opt support for the multicast routing system.
1771 */
1772
1773int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1774 int __user *optlen)
1775{
1776 int olr;
1777 int val;
1778 struct net *net = sock_net(sk);
1779 struct mr_table *mrt;
1780
1781 if (sk->sk_type != SOCK_RAW ||
1782 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1783 return -EOPNOTSUPP;
1784
1785 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1786 if (!mrt)
1787 return -ENOENT;
1788
1789 switch (optname) {
1790 case MRT6_VERSION:
1791 val = 0x0305;
1792 break;
1793#ifdef CONFIG_IPV6_PIMSM_V2
1794 case MRT6_PIM:
1795 val = mrt->mroute_do_pim;
1796 break;
1797#endif
1798 case MRT6_ASSERT:
1799 val = mrt->mroute_do_assert;
1800 break;
1801 default:
1802 return -ENOPROTOOPT;
1803 }
1804
1805 if (get_user(olr, optlen))
1806 return -EFAULT;
1807
1808 olr = min_t(int, olr, sizeof(int));
1809 if (olr < 0)
1810 return -EINVAL;
1811
1812 if (put_user(olr, optlen))
1813 return -EFAULT;
1814 if (copy_to_user(optval, &val, olr))
1815 return -EFAULT;
1816 return 0;
1817}
1818
1819/*
1820 * The IP multicast ioctl support routines.
1821 */
1822
1823int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1824{
1825 struct sioc_sg_req6 sr;
1826 struct sioc_mif_req6 vr;
1827 struct vif_device *vif;
1828 struct mfc6_cache *c;
1829 struct net *net = sock_net(sk);
1830 struct mr_table *mrt;
1831
1832 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1833 if (!mrt)
1834 return -ENOENT;
1835
1836 switch (cmd) {
1837 case SIOCGETMIFCNT_IN6:
1838 if (copy_from_user(&vr, arg, sizeof(vr)))
1839 return -EFAULT;
1840 if (vr.mifi >= mrt->maxvif)
1841 return -EINVAL;
1842 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1843 read_lock(&mrt_lock);
1844 vif = &mrt->vif_table[vr.mifi];
1845 if (VIF_EXISTS(mrt, vr.mifi)) {
1846 vr.icount = vif->pkt_in;
1847 vr.ocount = vif->pkt_out;
1848 vr.ibytes = vif->bytes_in;
1849 vr.obytes = vif->bytes_out;
1850 read_unlock(&mrt_lock);
1851
1852 if (copy_to_user(arg, &vr, sizeof(vr)))
1853 return -EFAULT;
1854 return 0;
1855 }
1856 read_unlock(&mrt_lock);
1857 return -EADDRNOTAVAIL;
1858 case SIOCGETSGCNT_IN6:
1859 if (copy_from_user(&sr, arg, sizeof(sr)))
1860 return -EFAULT;
1861
1862 rcu_read_lock();
1863 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1864 if (c) {
1865 sr.pktcnt = c->_c.mfc_un.res.pkt;
1866 sr.bytecnt = c->_c.mfc_un.res.bytes;
1867 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1868 rcu_read_unlock();
1869
1870 if (copy_to_user(arg, &sr, sizeof(sr)))
1871 return -EFAULT;
1872 return 0;
1873 }
1874 rcu_read_unlock();
1875 return -EADDRNOTAVAIL;
1876 default:
1877 return -ENOIOCTLCMD;
1878 }
1879}
1880
1881#ifdef CONFIG_COMPAT
1882struct compat_sioc_sg_req6 {
1883 struct sockaddr_in6 src;
1884 struct sockaddr_in6 grp;
1885 compat_ulong_t pktcnt;
1886 compat_ulong_t bytecnt;
1887 compat_ulong_t wrong_if;
1888};
1889
1890struct compat_sioc_mif_req6 {
1891 mifi_t mifi;
1892 compat_ulong_t icount;
1893 compat_ulong_t ocount;
1894 compat_ulong_t ibytes;
1895 compat_ulong_t obytes;
1896};
1897
1898int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1899{
1900 struct compat_sioc_sg_req6 sr;
1901 struct compat_sioc_mif_req6 vr;
1902 struct vif_device *vif;
1903 struct mfc6_cache *c;
1904 struct net *net = sock_net(sk);
1905 struct mr_table *mrt;
1906
1907 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1908 if (!mrt)
1909 return -ENOENT;
1910
1911 switch (cmd) {
1912 case SIOCGETMIFCNT_IN6:
1913 if (copy_from_user(&vr, arg, sizeof(vr)))
1914 return -EFAULT;
1915 if (vr.mifi >= mrt->maxvif)
1916 return -EINVAL;
1917 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1918 read_lock(&mrt_lock);
1919 vif = &mrt->vif_table[vr.mifi];
1920 if (VIF_EXISTS(mrt, vr.mifi)) {
1921 vr.icount = vif->pkt_in;
1922 vr.ocount = vif->pkt_out;
1923 vr.ibytes = vif->bytes_in;
1924 vr.obytes = vif->bytes_out;
1925 read_unlock(&mrt_lock);
1926
1927 if (copy_to_user(arg, &vr, sizeof(vr)))
1928 return -EFAULT;
1929 return 0;
1930 }
1931 read_unlock(&mrt_lock);
1932 return -EADDRNOTAVAIL;
1933 case SIOCGETSGCNT_IN6:
1934 if (copy_from_user(&sr, arg, sizeof(sr)))
1935 return -EFAULT;
1936
1937 rcu_read_lock();
1938 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1939 if (c) {
1940 sr.pktcnt = c->_c.mfc_un.res.pkt;
1941 sr.bytecnt = c->_c.mfc_un.res.bytes;
1942 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1943 rcu_read_unlock();
1944
1945 if (copy_to_user(arg, &sr, sizeof(sr)))
1946 return -EFAULT;
1947 return 0;
1948 }
1949 rcu_read_unlock();
1950 return -EADDRNOTAVAIL;
1951 default:
1952 return -ENOIOCTLCMD;
1953 }
1954}
1955#endif
1956
1957static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1958{
1959 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1960 IPSTATS_MIB_OUTFORWDATAGRAMS);
1961 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1962 IPSTATS_MIB_OUTOCTETS, skb->len);
1963 return dst_output(net, sk, skb);
1964}
1965
1966/*
1967 * Processing handlers for ip6mr_forward
1968 */
1969
1970static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1971 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1972{
1973 struct ipv6hdr *ipv6h;
1974 struct vif_device *vif = &mrt->vif_table[vifi];
1975 struct net_device *dev;
1976 struct dst_entry *dst;
1977 struct flowi6 fl6;
1978
1979 if (!vif->dev)
1980 goto out_free;
1981
1982#ifdef CONFIG_IPV6_PIMSM_V2
1983 if (vif->flags & MIFF_REGISTER) {
1984 vif->pkt_out++;
1985 vif->bytes_out += skb->len;
1986 vif->dev->stats.tx_bytes += skb->len;
1987 vif->dev->stats.tx_packets++;
1988 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1989 goto out_free;
1990 }
1991#endif
1992
1993 ipv6h = ipv6_hdr(skb);
1994
1995 fl6 = (struct flowi6) {
1996 .flowi6_oif = vif->link,
1997 .daddr = ipv6h->daddr,
1998 };
1999
2000 dst = ip6_route_output(net, NULL, &fl6);
2001 if (dst->error) {
2002 dst_release(dst);
2003 goto out_free;
2004 }
2005
2006 skb_dst_drop(skb);
2007 skb_dst_set(skb, dst);
2008
2009 /*
2010 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2011 * not only before forwarding, but after forwarding on all output
2012 * interfaces. It is clear, if mrouter runs a multicasting
2013 * program, it should receive packets not depending to what interface
2014 * program is joined.
2015 * If we will not make it, the program will have to join on all
2016 * interfaces. On the other hand, multihoming host (or router, but
2017 * not mrouter) cannot join to more than one interface - it will
2018 * result in receiving multiple packets.
2019 */
2020 dev = vif->dev;
2021 skb->dev = dev;
2022 vif->pkt_out++;
2023 vif->bytes_out += skb->len;
2024
2025 /* We are about to write */
2026 /* XXX: extension headers? */
2027 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2028 goto out_free;
2029
2030 ipv6h = ipv6_hdr(skb);
2031 ipv6h->hop_limit--;
2032
2033 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2034
2035 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2036 net, NULL, skb, skb->dev, dev,
2037 ip6mr_forward2_finish);
2038
2039out_free:
2040 kfree_skb(skb);
2041 return 0;
2042}
2043
2044static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2045{
2046 int ct;
2047
2048 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2049 if (mrt->vif_table[ct].dev == dev)
2050 break;
2051 }
2052 return ct;
2053}
2054
2055static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2056 struct sk_buff *skb, struct mfc6_cache *c)
2057{
2058 int psend = -1;
2059 int vif, ct;
2060 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2061
2062 vif = c->_c.mfc_parent;
2063 c->_c.mfc_un.res.pkt++;
2064 c->_c.mfc_un.res.bytes += skb->len;
2065 c->_c.mfc_un.res.lastuse = jiffies;
2066
2067 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2068 struct mfc6_cache *cache_proxy;
2069
2070 /* For an (*,G) entry, we only check that the incoming
2071 * interface is part of the static tree.
2072 */
2073 rcu_read_lock();
2074 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2075 if (cache_proxy &&
2076 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2077 rcu_read_unlock();
2078 goto forward;
2079 }
2080 rcu_read_unlock();
2081 }
2082
2083 /*
2084 * Wrong interface: drop packet and (maybe) send PIM assert.
2085 */
2086 if (mrt->vif_table[vif].dev != skb->dev) {
2087 c->_c.mfc_un.res.wrong_if++;
2088
2089 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2090 /* pimsm uses asserts, when switching from RPT to SPT,
2091 so that we cannot check that packet arrived on an oif.
2092 It is bad, but otherwise we would need to move pretty
2093 large chunk of pimd to kernel. Ough... --ANK
2094 */
2095 (mrt->mroute_do_pim ||
2096 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2097 time_after(jiffies,
2098 c->_c.mfc_un.res.last_assert +
2099 MFC_ASSERT_THRESH)) {
2100 c->_c.mfc_un.res.last_assert = jiffies;
2101 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2102 }
2103 goto dont_forward;
2104 }
2105
2106forward:
2107 mrt->vif_table[vif].pkt_in++;
2108 mrt->vif_table[vif].bytes_in += skb->len;
2109
2110 /*
2111 * Forward the frame
2112 */
2113 if (ipv6_addr_any(&c->mf6c_origin) &&
2114 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2115 if (true_vifi >= 0 &&
2116 true_vifi != c->_c.mfc_parent &&
2117 ipv6_hdr(skb)->hop_limit >
2118 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2119 /* It's an (*,*) entry and the packet is not coming from
2120 * the upstream: forward the packet to the upstream
2121 * only.
2122 */
2123 psend = c->_c.mfc_parent;
2124 goto last_forward;
2125 }
2126 goto dont_forward;
2127 }
2128 for (ct = c->_c.mfc_un.res.maxvif - 1;
2129 ct >= c->_c.mfc_un.res.minvif; ct--) {
2130 /* For (*,G) entry, don't forward to the incoming interface */
2131 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2132 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2133 if (psend != -1) {
2134 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2135 if (skb2)
2136 ip6mr_forward2(net, mrt, skb2,
2137 c, psend);
2138 }
2139 psend = ct;
2140 }
2141 }
2142last_forward:
2143 if (psend != -1) {
2144 ip6mr_forward2(net, mrt, skb, c, psend);
2145 return;
2146 }
2147
2148dont_forward:
2149 kfree_skb(skb);
2150}
2151
2152
2153/*
2154 * Multicast packets for forwarding arrive here
2155 */
2156
2157int ip6_mr_input(struct sk_buff *skb)
2158{
2159 struct mfc6_cache *cache;
2160 struct net *net = dev_net(skb->dev);
2161 struct mr_table *mrt;
2162 struct flowi6 fl6 = {
2163 .flowi6_iif = skb->dev->ifindex,
2164 .flowi6_mark = skb->mark,
2165 };
2166 int err;
2167
2168 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2169 if (err < 0) {
2170 kfree_skb(skb);
2171 return err;
2172 }
2173
2174 read_lock(&mrt_lock);
2175 cache = ip6mr_cache_find(mrt,
2176 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2177 if (!cache) {
2178 int vif = ip6mr_find_vif(mrt, skb->dev);
2179
2180 if (vif >= 0)
2181 cache = ip6mr_cache_find_any(mrt,
2182 &ipv6_hdr(skb)->daddr,
2183 vif);
2184 }
2185
2186 /*
2187 * No usable cache entry
2188 */
2189 if (!cache) {
2190 int vif;
2191
2192 vif = ip6mr_find_vif(mrt, skb->dev);
2193 if (vif >= 0) {
2194 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2195 read_unlock(&mrt_lock);
2196
2197 return err;
2198 }
2199 read_unlock(&mrt_lock);
2200 kfree_skb(skb);
2201 return -ENODEV;
2202 }
2203
2204 ip6_mr_forward(net, mrt, skb, cache);
2205
2206 read_unlock(&mrt_lock);
2207
2208 return 0;
2209}
2210
2211int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2212 u32 portid)
2213{
2214 int err;
2215 struct mr_table *mrt;
2216 struct mfc6_cache *cache;
2217 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2218
2219 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2220 if (!mrt)
2221 return -ENOENT;
2222
2223 read_lock(&mrt_lock);
2224 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2225 if (!cache && skb->dev) {
2226 int vif = ip6mr_find_vif(mrt, skb->dev);
2227
2228 if (vif >= 0)
2229 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2230 vif);
2231 }
2232
2233 if (!cache) {
2234 struct sk_buff *skb2;
2235 struct ipv6hdr *iph;
2236 struct net_device *dev;
2237 int vif;
2238
2239 dev = skb->dev;
2240 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2241 read_unlock(&mrt_lock);
2242 return -ENODEV;
2243 }
2244
2245 /* really correct? */
2246 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2247 if (!skb2) {
2248 read_unlock(&mrt_lock);
2249 return -ENOMEM;
2250 }
2251
2252 NETLINK_CB(skb2).portid = portid;
2253 skb_reset_transport_header(skb2);
2254
2255 skb_put(skb2, sizeof(struct ipv6hdr));
2256 skb_reset_network_header(skb2);
2257
2258 iph = ipv6_hdr(skb2);
2259 iph->version = 0;
2260 iph->priority = 0;
2261 iph->flow_lbl[0] = 0;
2262 iph->flow_lbl[1] = 0;
2263 iph->flow_lbl[2] = 0;
2264 iph->payload_len = 0;
2265 iph->nexthdr = IPPROTO_NONE;
2266 iph->hop_limit = 0;
2267 iph->saddr = rt->rt6i_src.addr;
2268 iph->daddr = rt->rt6i_dst.addr;
2269
2270 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2271 read_unlock(&mrt_lock);
2272
2273 return err;
2274 }
2275
2276 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2277 read_unlock(&mrt_lock);
2278 return err;
2279}
2280
2281static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2282 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2283 int flags)
2284{
2285 struct nlmsghdr *nlh;
2286 struct rtmsg *rtm;
2287 int err;
2288
2289 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2290 if (!nlh)
2291 return -EMSGSIZE;
2292
2293 rtm = nlmsg_data(nlh);
2294 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2295 rtm->rtm_dst_len = 128;
2296 rtm->rtm_src_len = 128;
2297 rtm->rtm_tos = 0;
2298 rtm->rtm_table = mrt->id;
2299 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2300 goto nla_put_failure;
2301 rtm->rtm_type = RTN_MULTICAST;
2302 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2303 if (c->_c.mfc_flags & MFC_STATIC)
2304 rtm->rtm_protocol = RTPROT_STATIC;
2305 else
2306 rtm->rtm_protocol = RTPROT_MROUTED;
2307 rtm->rtm_flags = 0;
2308
2309 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2310 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2311 goto nla_put_failure;
2312 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2313 /* do not break the dump if cache is unresolved */
2314 if (err < 0 && err != -ENOENT)
2315 goto nla_put_failure;
2316
2317 nlmsg_end(skb, nlh);
2318 return 0;
2319
2320nla_put_failure:
2321 nlmsg_cancel(skb, nlh);
2322 return -EMSGSIZE;
2323}
2324
2325static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2326 u32 portid, u32 seq, struct mr_mfc *c,
2327 int cmd, int flags)
2328{
2329 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2330 cmd, flags);
2331}
2332
2333static int mr6_msgsize(bool unresolved, int maxvif)
2334{
2335 size_t len =
2336 NLMSG_ALIGN(sizeof(struct rtmsg))
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2339 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2340 ;
2341
2342 if (!unresolved)
2343 len = len
2344 + nla_total_size(4) /* RTA_IIF */
2345 + nla_total_size(0) /* RTA_MULTIPATH */
2346 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2347 /* RTA_MFC_STATS */
2348 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2349 ;
2350
2351 return len;
2352}
2353
2354static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2355 int cmd)
2356{
2357 struct net *net = read_pnet(&mrt->net);
2358 struct sk_buff *skb;
2359 int err = -ENOBUFS;
2360
2361 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2362 GFP_ATOMIC);
2363 if (!skb)
2364 goto errout;
2365
2366 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2367 if (err < 0)
2368 goto errout;
2369
2370 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2371 return;
2372
2373errout:
2374 kfree_skb(skb);
2375 if (err < 0)
2376 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2377}
2378
2379static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2380{
2381 size_t len =
2382 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2383 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2384 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2385 /* IP6MRA_CREPORT_SRC_ADDR */
2386 + nla_total_size(sizeof(struct in6_addr))
2387 /* IP6MRA_CREPORT_DST_ADDR */
2388 + nla_total_size(sizeof(struct in6_addr))
2389 /* IP6MRA_CREPORT_PKT */
2390 + nla_total_size(payloadlen)
2391 ;
2392
2393 return len;
2394}
2395
2396static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2397{
2398 struct net *net = read_pnet(&mrt->net);
2399 struct nlmsghdr *nlh;
2400 struct rtgenmsg *rtgenm;
2401 struct mrt6msg *msg;
2402 struct sk_buff *skb;
2403 struct nlattr *nla;
2404 int payloadlen;
2405
2406 payloadlen = pkt->len - sizeof(struct mrt6msg);
2407 msg = (struct mrt6msg *)skb_transport_header(pkt);
2408
2409 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2410 if (!skb)
2411 goto errout;
2412
2413 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2414 sizeof(struct rtgenmsg), 0);
2415 if (!nlh)
2416 goto errout;
2417 rtgenm = nlmsg_data(nlh);
2418 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2419 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2420 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2421 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2422 &msg->im6_src) ||
2423 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2424 &msg->im6_dst))
2425 goto nla_put_failure;
2426
2427 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2428 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2429 nla_data(nla), payloadlen))
2430 goto nla_put_failure;
2431
2432 nlmsg_end(skb, nlh);
2433
2434 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2435 return;
2436
2437nla_put_failure:
2438 nlmsg_cancel(skb, nlh);
2439errout:
2440 kfree_skb(skb);
2441 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2442}
2443
2444static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2445{
2446 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2447 _ip6mr_fill_mroute, &mfc_unres_lock);
2448}