blob: 6f470fee01616f6af9de84734ae72de5ae1b66ac [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/uaccess.h>
20#include <linux/types.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/timer.h>
24#include <linux/mm.h>
25#include <linux/kernel.h>
26#include <linux/fcntl.h>
27#include <linux/stat.h>
28#include <linux/socket.h>
29#include <linux/inet.h>
30#include <linux/netdevice.h>
31#include <linux/inetdevice.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/init.h>
35#include <linux/slab.h>
36#include <linux/compat.h>
37#include <net/protocol.h>
38#include <linux/skbuff.h>
39#include <net/sock.h>
40#include <net/raw.h>
41#include <linux/notifier.h>
42#include <linux/if_arp.h>
43#include <net/checksum.h>
44#include <net/netlink.h>
45#include <net/fib_rules.h>
46
47#include <net/ipv6.h>
48#include <net/ip6_route.h>
49#include <linux/mroute6.h>
50#include <linux/pim.h>
51#include <net/addrconf.h>
52#include <linux/netfilter_ipv6.h>
53#include <linux/export.h>
54#include <net/ip6_checksum.h>
55
56struct mr6_table {
57 struct list_head list;
58#ifdef CONFIG_NET_NS
59 struct net *net;
60#endif
61 u32 id;
62 struct sock *mroute6_sk;
63 struct timer_list ipmr_expire_timer;
64 struct list_head mfc6_unres_queue;
65 struct list_head mfc6_cache_array[MFC6_LINES];
66 struct mif_device vif6_table[MAXMIFS];
67 int maxvif;
68 atomic_t cache_resolve_queue_len;
69 int mroute_do_assert;
70 int mroute_do_pim;
71#ifdef CONFIG_IPV6_PIMSM_V2
72 int mroute_reg_vif_num;
73#endif
74};
75
76struct ip6mr_rule {
77 struct fib_rule common;
78};
79
80struct ip6mr_result {
81 struct mr6_table *mrt;
82};
83
84/* Big lock, protecting vif table, mrt cache and mroute socket state.
85 Note that the changes are semaphored via rtnl_lock.
86 */
87
88static DEFINE_RWLOCK(mrt_lock);
89
90/*
91 * Multicast router control variables
92 */
93
94#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95
96/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock);
98
99/* We return to original Alan's scheme. Hash table of resolved
100 entries is changed only in process context and protected
101 with weak lock mrt_lock. Queue of unresolved entries is protected
102 with strong spinlock mfc_unres_lock.
103
104 In this case data path is free of exclusive locks at all.
105 */
106
107static struct kmem_cache *mrt_cachep __read_mostly;
108
109static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110static void ip6mr_free_table(struct mr6_table *mrt);
111
112static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 struct sk_buff *skb, struct mfc6_cache *cache);
114static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 mifi_t mifi, int assert);
116static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 struct mfc6_cache *c, struct rtmsg *rtm);
118static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 struct netlink_callback *cb);
120static void mroute_clean_tables(struct mr6_table *mrt);
121static void ipmr_expire_process(unsigned long arg);
122
123#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124#define ip6mr_for_each_table(mrt, net) \
125 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126
127static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128{
129 struct mr6_table *mrt;
130
131 ip6mr_for_each_table(mrt, net) {
132 if (mrt->id == id)
133 return mrt;
134 }
135 return NULL;
136}
137
138static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 struct mr6_table **mrt)
140{
141 int err;
142 struct ip6mr_result res;
143 struct fib_lookup_arg arg = {
144 .result = &res,
145 .flags = FIB_LOOKUP_NOREF,
146 };
147
148 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 flowi6_to_flowi(flp6), 0, &arg);
150 if (err < 0)
151 return err;
152 *mrt = res.mrt;
153 return 0;
154}
155
156static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 int flags, struct fib_lookup_arg *arg)
158{
159 struct ip6mr_result *res = arg->result;
160 struct mr6_table *mrt;
161
162 switch (rule->action) {
163 case FR_ACT_TO_TBL:
164 break;
165 case FR_ACT_UNREACHABLE:
166 return -ENETUNREACH;
167 case FR_ACT_PROHIBIT:
168 return -EACCES;
169 case FR_ACT_BLACKHOLE:
170 default:
171 return -EINVAL;
172 }
173
174 mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 if (mrt == NULL)
176 return -EAGAIN;
177 res->mrt = mrt;
178 return 0;
179}
180
181static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182{
183 return 1;
184}
185
186static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 FRA_GENERIC_POLICY,
188};
189
190static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 struct fib_rule_hdr *frh, struct nlattr **tb)
192{
193 return 0;
194}
195
196static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 struct nlattr **tb)
198{
199 return 1;
200}
201
202static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 struct fib_rule_hdr *frh)
204{
205 frh->dst_len = 0;
206 frh->src_len = 0;
207 frh->tos = 0;
208 return 0;
209}
210
211static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
212 .family = RTNL_FAMILY_IP6MR,
213 .rule_size = sizeof(struct ip6mr_rule),
214 .addr_size = sizeof(struct in6_addr),
215 .action = ip6mr_rule_action,
216 .match = ip6mr_rule_match,
217 .configure = ip6mr_rule_configure,
218 .compare = ip6mr_rule_compare,
219 .default_pref = fib_default_rule_pref,
220 .fill = ip6mr_rule_fill,
221 .nlgroup = RTNLGRP_IPV6_RULE,
222 .policy = ip6mr_rule_policy,
223 .owner = THIS_MODULE,
224};
225
226static int __net_init ip6mr_rules_init(struct net *net)
227{
228 struct fib_rules_ops *ops;
229 struct mr6_table *mrt;
230 int err;
231
232 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 if (IS_ERR(ops))
234 return PTR_ERR(ops);
235
236 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237
238 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 if (mrt == NULL) {
240 err = -ENOMEM;
241 goto err1;
242 }
243
244 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 if (err < 0)
246 goto err2;
247
248 net->ipv6.mr6_rules_ops = ops;
249 return 0;
250
251err2:
252 kfree(mrt);
253err1:
254 fib_rules_unregister(ops);
255 return err;
256}
257
258static void __net_exit ip6mr_rules_exit(struct net *net)
259{
260 struct mr6_table *mrt, *next;
261
262 rtnl_lock();
263 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 list_del(&mrt->list);
265 ip6mr_free_table(mrt);
266 }
267 rtnl_unlock();
268 fib_rules_unregister(net->ipv6.mr6_rules_ops);
269}
270#else
271#define ip6mr_for_each_table(mrt, net) \
272 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273
274static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275{
276 return net->ipv6.mrt6;
277}
278
279static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 struct mr6_table **mrt)
281{
282 *mrt = net->ipv6.mrt6;
283 return 0;
284}
285
286static int __net_init ip6mr_rules_init(struct net *net)
287{
288 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 return net->ipv6.mrt6 ? 0 : -ENOMEM;
290}
291
292static void __net_exit ip6mr_rules_exit(struct net *net)
293{
294 rtnl_lock();
295 ip6mr_free_table(net->ipv6.mrt6);
296 net->ipv6.mrt6 = NULL;
297 rtnl_unlock();
298}
299#endif
300
301static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302{
303 struct mr6_table *mrt;
304 unsigned int i;
305
306 mrt = ip6mr_get_table(net, id);
307 if (mrt != NULL)
308 return mrt;
309
310 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 if (mrt == NULL)
312 return NULL;
313 mrt->id = id;
314 write_pnet(&mrt->net, net);
315
316 /* Forwarding cache */
317 for (i = 0; i < MFC6_LINES; i++)
318 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319
320 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321
322 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 (unsigned long)mrt);
324
325#ifdef CONFIG_IPV6_PIMSM_V2
326 mrt->mroute_reg_vif_num = -1;
327#endif
328#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330#endif
331 return mrt;
332}
333
334static void ip6mr_free_table(struct mr6_table *mrt)
335{
336 del_timer(&mrt->ipmr_expire_timer);
337 mroute_clean_tables(mrt);
338 kfree(mrt);
339}
340
341#ifdef CONFIG_PROC_FS
342
343struct ipmr_mfc_iter {
344 struct seq_net_private p;
345 struct mr6_table *mrt;
346 struct list_head *cache;
347 int ct;
348};
349
350
351static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 struct ipmr_mfc_iter *it, loff_t pos)
353{
354 struct mr6_table *mrt = it->mrt;
355 struct mfc6_cache *mfc;
356
357 read_lock(&mrt_lock);
358 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 it->cache = &mrt->mfc6_cache_array[it->ct];
360 list_for_each_entry(mfc, it->cache, list)
361 if (pos-- == 0)
362 return mfc;
363 }
364 read_unlock(&mrt_lock);
365
366 spin_lock_bh(&mfc_unres_lock);
367 it->cache = &mrt->mfc6_unres_queue;
368 list_for_each_entry(mfc, it->cache, list)
369 if (pos-- == 0)
370 return mfc;
371 spin_unlock_bh(&mfc_unres_lock);
372
373 it->cache = NULL;
374 return NULL;
375}
376
377/*
378 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379 */
380
381struct ipmr_vif_iter {
382 struct seq_net_private p;
383 struct mr6_table *mrt;
384 int ct;
385};
386
387static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 struct ipmr_vif_iter *iter,
389 loff_t pos)
390{
391 struct mr6_table *mrt = iter->mrt;
392
393 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 if (!MIF_EXISTS(mrt, iter->ct))
395 continue;
396 if (pos-- == 0)
397 return &mrt->vif6_table[iter->ct];
398 }
399 return NULL;
400}
401
402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 __acquires(mrt_lock)
404{
405 struct ipmr_vif_iter *iter = seq->private;
406 struct net *net = seq_file_net(seq);
407 struct mr6_table *mrt;
408
409 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 if (mrt == NULL)
411 return ERR_PTR(-ENOENT);
412
413 iter->mrt = mrt;
414
415 read_lock(&mrt_lock);
416 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 : SEQ_START_TOKEN;
418}
419
420static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421{
422 struct ipmr_vif_iter *iter = seq->private;
423 struct net *net = seq_file_net(seq);
424 struct mr6_table *mrt = iter->mrt;
425
426 ++*pos;
427 if (v == SEQ_START_TOKEN)
428 return ip6mr_vif_seq_idx(net, iter, 0);
429
430 while (++iter->ct < mrt->maxvif) {
431 if (!MIF_EXISTS(mrt, iter->ct))
432 continue;
433 return &mrt->vif6_table[iter->ct];
434 }
435 return NULL;
436}
437
438static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 __releases(mrt_lock)
440{
441 read_unlock(&mrt_lock);
442}
443
444static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445{
446 struct ipmr_vif_iter *iter = seq->private;
447 struct mr6_table *mrt = iter->mrt;
448
449 if (v == SEQ_START_TOKEN) {
450 seq_puts(seq,
451 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
452 } else {
453 const struct mif_device *vif = v;
454 const char *name = vif->dev ? vif->dev->name : "none";
455
456 seq_printf(seq,
457 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
458 vif - mrt->vif6_table,
459 name, vif->bytes_in, vif->pkt_in,
460 vif->bytes_out, vif->pkt_out,
461 vif->flags);
462 }
463 return 0;
464}
465
466static const struct seq_operations ip6mr_vif_seq_ops = {
467 .start = ip6mr_vif_seq_start,
468 .next = ip6mr_vif_seq_next,
469 .stop = ip6mr_vif_seq_stop,
470 .show = ip6mr_vif_seq_show,
471};
472
473static int ip6mr_vif_open(struct inode *inode, struct file *file)
474{
475 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 sizeof(struct ipmr_vif_iter));
477}
478
479static const struct file_operations ip6mr_vif_fops = {
480 .owner = THIS_MODULE,
481 .open = ip6mr_vif_open,
482 .read = seq_read,
483 .llseek = seq_lseek,
484 .release = seq_release_net,
485};
486
487static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488{
489 struct ipmr_mfc_iter *it = seq->private;
490 struct net *net = seq_file_net(seq);
491 struct mr6_table *mrt;
492
493 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 if (mrt == NULL)
495 return ERR_PTR(-ENOENT);
496
497 it->mrt = mrt;
498 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 : SEQ_START_TOKEN;
500}
501
502static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503{
504 struct mfc6_cache *mfc = v;
505 struct ipmr_mfc_iter *it = seq->private;
506 struct net *net = seq_file_net(seq);
507 struct mr6_table *mrt = it->mrt;
508
509 ++*pos;
510
511 if (v == SEQ_START_TOKEN)
512 return ipmr_mfc_seq_idx(net, seq->private, 0);
513
514 if (mfc->list.next != it->cache)
515 return list_entry(mfc->list.next, struct mfc6_cache, list);
516
517 if (it->cache == &mrt->mfc6_unres_queue)
518 goto end_of_list;
519
520 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521
522 while (++it->ct < MFC6_LINES) {
523 it->cache = &mrt->mfc6_cache_array[it->ct];
524 if (list_empty(it->cache))
525 continue;
526 return list_first_entry(it->cache, struct mfc6_cache, list);
527 }
528
529 /* exhausted cache_array, show unresolved */
530 read_unlock(&mrt_lock);
531 it->cache = &mrt->mfc6_unres_queue;
532 it->ct = 0;
533
534 spin_lock_bh(&mfc_unres_lock);
535 if (!list_empty(it->cache))
536 return list_first_entry(it->cache, struct mfc6_cache, list);
537
538 end_of_list:
539 spin_unlock_bh(&mfc_unres_lock);
540 it->cache = NULL;
541
542 return NULL;
543}
544
545static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546{
547 struct ipmr_mfc_iter *it = seq->private;
548 struct mr6_table *mrt = it->mrt;
549
550 if (it->cache == &mrt->mfc6_unres_queue)
551 spin_unlock_bh(&mfc_unres_lock);
552 else if (it->cache == mrt->mfc6_cache_array)
553 read_unlock(&mrt_lock);
554}
555
556static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557{
558 int n;
559
560 if (v == SEQ_START_TOKEN) {
561 seq_puts(seq,
562 "Group "
563 "Origin "
564 "Iif Pkts Bytes Wrong Oifs\n");
565 } else {
566 const struct mfc6_cache *mfc = v;
567 const struct ipmr_mfc_iter *it = seq->private;
568 struct mr6_table *mrt = it->mrt;
569
570 seq_printf(seq, "%pI6 %pI6 %-3hd",
571 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 mfc->mf6c_parent);
573
574 if (it->cache != &mrt->mfc6_unres_queue) {
575 seq_printf(seq, " %8lu %8lu %8lu",
576 mfc->mfc_un.res.pkt,
577 mfc->mfc_un.res.bytes,
578 mfc->mfc_un.res.wrong_if);
579 for (n = mfc->mfc_un.res.minvif;
580 n < mfc->mfc_un.res.maxvif; n++) {
581 if (MIF_EXISTS(mrt, n) &&
582 mfc->mfc_un.res.ttls[n] < 255)
583 seq_printf(seq,
584 " %2d:%-3d",
585 n, mfc->mfc_un.res.ttls[n]);
586 }
587 } else {
588 /* unresolved mfc_caches don't contain
589 * pkt, bytes and wrong_if values
590 */
591 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 }
593 seq_putc(seq, '\n');
594 }
595 return 0;
596}
597
598static const struct seq_operations ipmr_mfc_seq_ops = {
599 .start = ipmr_mfc_seq_start,
600 .next = ipmr_mfc_seq_next,
601 .stop = ipmr_mfc_seq_stop,
602 .show = ipmr_mfc_seq_show,
603};
604
605static int ipmr_mfc_open(struct inode *inode, struct file *file)
606{
607 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 sizeof(struct ipmr_mfc_iter));
609}
610
611static const struct file_operations ip6mr_mfc_fops = {
612 .owner = THIS_MODULE,
613 .open = ipmr_mfc_open,
614 .read = seq_read,
615 .llseek = seq_lseek,
616 .release = seq_release_net,
617};
618#endif
619
620#ifdef CONFIG_IPV6_PIMSM_V2
621
622static int pim6_rcv(struct sk_buff *skb)
623{
624 struct pimreghdr *pim;
625 struct ipv6hdr *encap;
626 struct net_device *reg_dev = NULL;
627 struct net *net = dev_net(skb->dev);
628 struct mr6_table *mrt;
629 struct flowi6 fl6 = {
630 .flowi6_iif = skb->dev->ifindex,
631 .flowi6_mark = skb->mark,
632 };
633 int reg_vif_num;
634
635 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 goto drop;
637
638 pim = (struct pimreghdr *)skb_transport_header(skb);
639 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640 (pim->flags & PIM_NULL_REGISTER) ||
641 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 sizeof(*pim), IPPROTO_PIM,
643 csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 goto drop;
646
647 /* check if the inner packet is destined to mcast group */
648 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 sizeof(*pim));
650
651 if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 encap->payload_len == 0 ||
653 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 goto drop;
655
656 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 goto drop;
658 reg_vif_num = mrt->mroute_reg_vif_num;
659
660 read_lock(&mrt_lock);
661 if (reg_vif_num >= 0)
662 reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 if (reg_dev)
664 dev_hold(reg_dev);
665 read_unlock(&mrt_lock);
666
667 if (reg_dev == NULL)
668 goto drop;
669
670 skb->mac_header = skb->network_header;
671 skb_pull(skb, (u8 *)encap - skb->data);
672 skb_reset_network_header(skb);
673 skb->protocol = htons(ETH_P_IPV6);
674 skb->ip_summed = CHECKSUM_NONE;
675 skb->pkt_type = PACKET_HOST;
676
677 skb_tunnel_rx(skb, reg_dev);
678
679 netif_rx(skb);
680
681 dev_put(reg_dev);
682 return 0;
683 drop:
684 kfree_skb(skb);
685 return 0;
686}
687
688static const struct inet6_protocol pim6_protocol = {
689 .handler = pim6_rcv,
690};
691
692/* Service routines creating virtual interfaces: PIMREG */
693
694static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
695 struct net_device *dev)
696{
697 struct net *net = dev_net(dev);
698 struct mr6_table *mrt;
699 struct flowi6 fl6 = {
700 .flowi6_oif = dev->ifindex,
701 .flowi6_iif = skb->skb_iif,
702 .flowi6_mark = skb->mark,
703 };
704 int err;
705
706 err = ip6mr_fib_lookup(net, &fl6, &mrt);
707 if (err < 0) {
708 kfree_skb(skb);
709 return err;
710 }
711
712 read_lock(&mrt_lock);
713 dev->stats.tx_bytes += skb->len;
714 dev->stats.tx_packets++;
715 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
716 read_unlock(&mrt_lock);
717 kfree_skb(skb);
718 return NETDEV_TX_OK;
719}
720
721static const struct net_device_ops reg_vif_netdev_ops = {
722 .ndo_start_xmit = reg_vif_xmit,
723};
724
725static void reg_vif_setup(struct net_device *dev)
726{
727 dev->type = ARPHRD_PIMREG;
728 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
729 dev->flags = IFF_NOARP;
730 dev->netdev_ops = &reg_vif_netdev_ops;
731 dev->destructor = free_netdev;
732 dev->features |= NETIF_F_NETNS_LOCAL;
733}
734
735static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
736{
737 struct net_device *dev;
738 char name[IFNAMSIZ];
739
740 if (mrt->id == RT6_TABLE_DFLT)
741 sprintf(name, "pim6reg");
742 else
743 sprintf(name, "pim6reg%u", mrt->id);
744
745 dev = alloc_netdev(0, name, reg_vif_setup);
746 if (dev == NULL)
747 return NULL;
748
749 dev_net_set(dev, net);
750
751 if (register_netdevice(dev)) {
752 free_netdev(dev);
753 return NULL;
754 }
755 dev->iflink = 0;
756
757 if (dev_open(dev))
758 goto failure;
759
760 dev_hold(dev);
761 return dev;
762
763failure:
764 /* allow the register to be completed before unregistering. */
765 rtnl_unlock();
766 rtnl_lock();
767
768 unregister_netdevice(dev);
769 return NULL;
770}
771#endif
772
773/*
774 * Delete a VIF entry
775 */
776
777static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778{
779 struct mif_device *v;
780 struct net_device *dev;
781 struct inet6_dev *in6_dev;
782
783 if (vifi < 0 || vifi >= mrt->maxvif)
784 return -EADDRNOTAVAIL;
785
786 v = &mrt->vif6_table[vifi];
787
788 write_lock_bh(&mrt_lock);
789 dev = v->dev;
790 v->dev = NULL;
791
792 if (!dev) {
793 write_unlock_bh(&mrt_lock);
794 return -EADDRNOTAVAIL;
795 }
796
797#ifdef CONFIG_IPV6_PIMSM_V2
798 if (vifi == mrt->mroute_reg_vif_num)
799 mrt->mroute_reg_vif_num = -1;
800#endif
801
802 if (vifi + 1 == mrt->maxvif) {
803 int tmp;
804 for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 if (MIF_EXISTS(mrt, tmp))
806 break;
807 }
808 mrt->maxvif = tmp + 1;
809 }
810
811 write_unlock_bh(&mrt_lock);
812
813 dev_set_allmulti(dev, -1);
814
815 in6_dev = __in6_dev_get(dev);
816 if (in6_dev)
817 in6_dev->cnf.mc_forwarding--;
818
819 if (v->flags & MIFF_REGISTER)
820 unregister_netdevice_queue(dev, head);
821
822 dev_put(dev);
823 return 0;
824}
825
826static inline void ip6mr_cache_free(struct mfc6_cache *c)
827{
828 kmem_cache_free(mrt_cachep, c);
829}
830
831/* Destroy an unresolved cache entry, killing queued skbs
832 and reporting error to netlink readers.
833 */
834
835static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
836{
837 struct net *net = read_pnet(&mrt->net);
838 struct sk_buff *skb;
839
840 atomic_dec(&mrt->cache_resolve_queue_len);
841
842 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
843 if (ipv6_hdr(skb)->version == 0) {
844 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
845 nlh->nlmsg_type = NLMSG_ERROR;
846 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
847 skb_trim(skb, nlh->nlmsg_len);
848 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
849 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
850 } else
851 kfree_skb(skb);
852 }
853
854 ip6mr_cache_free(c);
855}
856
857
858/* Timer process for all the unresolved queue. */
859
860static void ipmr_do_expire_process(struct mr6_table *mrt)
861{
862 unsigned long now = jiffies;
863 unsigned long expires = 10 * HZ;
864 struct mfc6_cache *c, *next;
865
866 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
867 if (time_after(c->mfc_un.unres.expires, now)) {
868 /* not yet... */
869 unsigned long interval = c->mfc_un.unres.expires - now;
870 if (interval < expires)
871 expires = interval;
872 continue;
873 }
874
875 list_del(&c->list);
876 ip6mr_destroy_unres(mrt, c);
877 }
878
879 if (!list_empty(&mrt->mfc6_unres_queue))
880 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
881}
882
883static void ipmr_expire_process(unsigned long arg)
884{
885 struct mr6_table *mrt = (struct mr6_table *)arg;
886
887 if (!spin_trylock(&mfc_unres_lock)) {
888 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
889 return;
890 }
891
892 if (!list_empty(&mrt->mfc6_unres_queue))
893 ipmr_do_expire_process(mrt);
894
895 spin_unlock(&mfc_unres_lock);
896}
897
898/* Fill oifs list. It is called under write locked mrt_lock. */
899
900static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
901 unsigned char *ttls)
902{
903 int vifi;
904
905 cache->mfc_un.res.minvif = MAXMIFS;
906 cache->mfc_un.res.maxvif = 0;
907 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
908
909 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
910 if (MIF_EXISTS(mrt, vifi) &&
911 ttls[vifi] && ttls[vifi] < 255) {
912 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
913 if (cache->mfc_un.res.minvif > vifi)
914 cache->mfc_un.res.minvif = vifi;
915 if (cache->mfc_un.res.maxvif <= vifi)
916 cache->mfc_un.res.maxvif = vifi + 1;
917 }
918 }
919}
920
921static int mif6_add(struct net *net, struct mr6_table *mrt,
922 struct mif6ctl *vifc, int mrtsock)
923{
924 int vifi = vifc->mif6c_mifi;
925 struct mif_device *v = &mrt->vif6_table[vifi];
926 struct net_device *dev;
927 struct inet6_dev *in6_dev;
928 int err;
929
930 /* Is vif busy ? */
931 if (MIF_EXISTS(mrt, vifi))
932 return -EADDRINUSE;
933
934 switch (vifc->mif6c_flags) {
935#ifdef CONFIG_IPV6_PIMSM_V2
936 case MIFF_REGISTER:
937 /*
938 * Special Purpose VIF in PIM
939 * All the packets will be sent to the daemon
940 */
941 if (mrt->mroute_reg_vif_num >= 0)
942 return -EADDRINUSE;
943 dev = ip6mr_reg_vif(net, mrt);
944 if (!dev)
945 return -ENOBUFS;
946 err = dev_set_allmulti(dev, 1);
947 if (err) {
948 unregister_netdevice(dev);
949 dev_put(dev);
950 return err;
951 }
952 break;
953#endif
954 case 0:
955 dev = dev_get_by_index(net, vifc->mif6c_pifi);
956 if (!dev)
957 return -EADDRNOTAVAIL;
958 err = dev_set_allmulti(dev, 1);
959 if (err) {
960 dev_put(dev);
961 return err;
962 }
963 break;
964 default:
965 return -EINVAL;
966 }
967
968 in6_dev = __in6_dev_get(dev);
969 if (in6_dev)
970 in6_dev->cnf.mc_forwarding++;
971
972 /*
973 * Fill in the VIF structures
974 */
975 v->rate_limit = vifc->vifc_rate_limit;
976 v->flags = vifc->mif6c_flags;
977 if (!mrtsock)
978 v->flags |= VIFF_STATIC;
979 v->threshold = vifc->vifc_threshold;
980 v->bytes_in = 0;
981 v->bytes_out = 0;
982 v->pkt_in = 0;
983 v->pkt_out = 0;
984 v->link = dev->ifindex;
985 if (v->flags & MIFF_REGISTER)
986 v->link = dev->iflink;
987
988 /* And finish update writing critical data */
989 write_lock_bh(&mrt_lock);
990 v->dev = dev;
991#ifdef CONFIG_IPV6_PIMSM_V2
992 if (v->flags & MIFF_REGISTER)
993 mrt->mroute_reg_vif_num = vifi;
994#endif
995 if (vifi + 1 > mrt->maxvif)
996 mrt->maxvif = vifi + 1;
997 write_unlock_bh(&mrt_lock);
998 return 0;
999}
1000
1001static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1002 const struct in6_addr *origin,
1003 const struct in6_addr *mcastgrp)
1004{
1005 int line = MFC6_HASH(mcastgrp, origin);
1006 struct mfc6_cache *c;
1007
1008 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1009 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1010 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1011 return c;
1012 }
1013 return NULL;
1014}
1015
1016/*
1017 * Allocate a multicast cache entry
1018 */
1019static struct mfc6_cache *ip6mr_cache_alloc(void)
1020{
1021 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1022 if (c == NULL)
1023 return NULL;
1024 c->mfc_un.res.minvif = MAXMIFS;
1025 return c;
1026}
1027
1028static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1029{
1030 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1031 if (c == NULL)
1032 return NULL;
1033 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1034 c->mfc_un.unres.expires = jiffies + 10 * HZ;
1035 return c;
1036}
1037
1038/*
1039 * A cache entry has gone into a resolved state from queued
1040 */
1041
1042static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1043 struct mfc6_cache *uc, struct mfc6_cache *c)
1044{
1045 struct sk_buff *skb;
1046
1047 /*
1048 * Play the pending entries through our router
1049 */
1050
1051 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1052 if (ipv6_hdr(skb)->version == 0) {
1053 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1054
1055 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1056 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1057 } else {
1058 nlh->nlmsg_type = NLMSG_ERROR;
1059 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1060 skb_trim(skb, nlh->nlmsg_len);
1061 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1062 }
1063 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1064 } else
1065 ip6_mr_forward(net, mrt, skb, c);
1066 }
1067}
1068
1069/*
1070 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1071 * expects the following bizarre scheme.
1072 *
1073 * Called under mrt_lock.
1074 */
1075
1076static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1077 mifi_t mifi, int assert)
1078{
1079 struct sk_buff *skb;
1080 struct mrt6msg *msg;
1081 int ret;
1082
1083#ifdef CONFIG_IPV6_PIMSM_V2
1084 if (assert == MRT6MSG_WHOLEPKT)
1085 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1086 +sizeof(*msg));
1087 else
1088#endif
1089 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1090
1091 if (!skb)
1092 return -ENOBUFS;
1093
1094 /* I suppose that internal messages
1095 * do not require checksums */
1096
1097 skb->ip_summed = CHECKSUM_UNNECESSARY;
1098
1099#ifdef CONFIG_IPV6_PIMSM_V2
1100 if (assert == MRT6MSG_WHOLEPKT) {
1101 /* Ugly, but we have no choice with this interface.
1102 Duplicate old header, fix length etc.
1103 And all this only to mangle msg->im6_msgtype and
1104 to set msg->im6_mbz to "mbz" :-)
1105 */
1106 skb_push(skb, -skb_network_offset(pkt));
1107
1108 skb_push(skb, sizeof(*msg));
1109 skb_reset_transport_header(skb);
1110 msg = (struct mrt6msg *)skb_transport_header(skb);
1111 msg->im6_mbz = 0;
1112 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1113 msg->im6_mif = mrt->mroute_reg_vif_num;
1114 msg->im6_pad = 0;
1115 msg->im6_src = ipv6_hdr(pkt)->saddr;
1116 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1117
1118 skb->ip_summed = CHECKSUM_UNNECESSARY;
1119 } else
1120#endif
1121 {
1122 /*
1123 * Copy the IP header
1124 */
1125
1126 skb_put(skb, sizeof(struct ipv6hdr));
1127 skb_reset_network_header(skb);
1128 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1129
1130 /*
1131 * Add our header
1132 */
1133 skb_put(skb, sizeof(*msg));
1134 skb_reset_transport_header(skb);
1135 msg = (struct mrt6msg *)skb_transport_header(skb);
1136
1137 msg->im6_mbz = 0;
1138 msg->im6_msgtype = assert;
1139 msg->im6_mif = mifi;
1140 msg->im6_pad = 0;
1141 msg->im6_src = ipv6_hdr(pkt)->saddr;
1142 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1143
1144 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1145 skb->ip_summed = CHECKSUM_UNNECESSARY;
1146 }
1147
1148 if (mrt->mroute6_sk == NULL) {
1149 kfree_skb(skb);
1150 return -EINVAL;
1151 }
1152
1153 /*
1154 * Deliver to user space multicast routing algorithms
1155 */
1156 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1157 if (ret < 0) {
1158 if (net_ratelimit())
1159 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1160 kfree_skb(skb);
1161 }
1162
1163 return ret;
1164}
1165
1166/*
1167 * Queue a packet for resolution. It gets locked cache entry!
1168 */
1169
1170static int
1171ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1172{
1173 bool found = false;
1174 int err;
1175 struct mfc6_cache *c;
1176
1177 spin_lock_bh(&mfc_unres_lock);
1178 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1179 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1180 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1181 found = true;
1182 break;
1183 }
1184 }
1185
1186 if (!found) {
1187 /*
1188 * Create a new entry if allowable
1189 */
1190
1191 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1192 (c = ip6mr_cache_alloc_unres()) == NULL) {
1193 spin_unlock_bh(&mfc_unres_lock);
1194
1195 kfree_skb(skb);
1196 return -ENOBUFS;
1197 }
1198
1199 /*
1200 * Fill in the new cache entry
1201 */
1202 c->mf6c_parent = -1;
1203 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1204 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1205
1206 /*
1207 * Reflect first query at pim6sd
1208 */
1209 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1210 if (err < 0) {
1211 /* If the report failed throw the cache entry
1212 out - Brad Parker
1213 */
1214 spin_unlock_bh(&mfc_unres_lock);
1215
1216 ip6mr_cache_free(c);
1217 kfree_skb(skb);
1218 return err;
1219 }
1220
1221 atomic_inc(&mrt->cache_resolve_queue_len);
1222 list_add(&c->list, &mrt->mfc6_unres_queue);
1223
1224 ipmr_do_expire_process(mrt);
1225 }
1226
1227 /*
1228 * See if we can append the packet
1229 */
1230 if (c->mfc_un.unres.unresolved.qlen > 3) {
1231 kfree_skb(skb);
1232 err = -ENOBUFS;
1233 } else {
1234 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1235 err = 0;
1236 }
1237
1238 spin_unlock_bh(&mfc_unres_lock);
1239 return err;
1240}
1241
1242/*
1243 * MFC6 cache manipulation by user space
1244 */
1245
1246static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1247{
1248 int line;
1249 struct mfc6_cache *c, *next;
1250
1251 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1252
1253 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1254 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1255 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1256 write_lock_bh(&mrt_lock);
1257 list_del(&c->list);
1258 write_unlock_bh(&mrt_lock);
1259
1260 ip6mr_cache_free(c);
1261 return 0;
1262 }
1263 }
1264 return -ENOENT;
1265}
1266
1267static int ip6mr_device_event(struct notifier_block *this,
1268 unsigned long event, void *ptr)
1269{
1270 struct net_device *dev = ptr;
1271 struct net *net = dev_net(dev);
1272 struct mr6_table *mrt;
1273 struct mif_device *v;
1274 int ct;
1275 LIST_HEAD(list);
1276
1277 if (event != NETDEV_UNREGISTER)
1278 return NOTIFY_DONE;
1279 //print_sun(SUN_LEARN,"dev:%s--notify:event=%d : NETDEV_UNREGISTER",dev->name,event);
1280 ip6mr_for_each_table(mrt, net) {
1281 v = &mrt->vif6_table[0];
1282 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1283 if (v->dev == dev)
1284 mif6_delete(mrt, ct, &list);
1285 }
1286 }
1287 unregister_netdevice_many(&list);
1288
1289 return NOTIFY_DONE;
1290}
1291
1292static struct notifier_block ip6_mr_notifier = {
1293 .notifier_call = ip6mr_device_event
1294};
1295
1296/*
1297 * Setup for IP multicast routing
1298 */
1299
1300static int __net_init ip6mr_net_init(struct net *net)
1301{
1302 int err;
1303
1304 err = ip6mr_rules_init(net);
1305 if (err < 0)
1306 goto fail;
1307
1308#ifdef CONFIG_PROC_FS
1309 err = -ENOMEM;
1310 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1311 goto proc_vif_fail;
1312 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1313 goto proc_cache_fail;
1314#endif
1315
1316 return 0;
1317
1318#ifdef CONFIG_PROC_FS
1319proc_cache_fail:
1320 proc_net_remove(net, "ip6_mr_vif");
1321proc_vif_fail:
1322 ip6mr_rules_exit(net);
1323#endif
1324fail:
1325 return err;
1326}
1327
1328static void __net_exit ip6mr_net_exit(struct net *net)
1329{
1330#ifdef CONFIG_PROC_FS
1331 proc_net_remove(net, "ip6_mr_cache");
1332 proc_net_remove(net, "ip6_mr_vif");
1333#endif
1334 ip6mr_rules_exit(net);
1335}
1336
1337static struct pernet_operations ip6mr_net_ops = {
1338 .init = ip6mr_net_init,
1339 .exit = ip6mr_net_exit,
1340};
1341
1342int __init ip6_mr_init(void)
1343{
1344 int err;
1345
1346 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1347 sizeof(struct mfc6_cache),
1348 0, SLAB_HWCACHE_ALIGN,
1349 NULL);
1350 if (!mrt_cachep)
1351 return -ENOMEM;
1352
1353 err = register_pernet_subsys(&ip6mr_net_ops);
1354 if (err)
1355 goto reg_pernet_fail;
1356
1357 err = register_netdevice_notifier(&ip6_mr_notifier);
1358 if (err)
1359 goto reg_notif_fail;
1360#ifdef CONFIG_IPV6_PIMSM_V2
1361 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1362 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1363 err = -EAGAIN;
1364 goto add_proto_fail;
1365 }
1366#endif
1367 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1368 ip6mr_rtm_dumproute, NULL);
1369 return 0;
1370#ifdef CONFIG_IPV6_PIMSM_V2
1371add_proto_fail:
1372 unregister_netdevice_notifier(&ip6_mr_notifier);
1373#endif
1374reg_notif_fail:
1375 unregister_pernet_subsys(&ip6mr_net_ops);
1376reg_pernet_fail:
1377 kmem_cache_destroy(mrt_cachep);
1378 return err;
1379}
1380
1381void ip6_mr_cleanup(void)
1382{
1383 unregister_netdevice_notifier(&ip6_mr_notifier);
1384 unregister_pernet_subsys(&ip6mr_net_ops);
1385 kmem_cache_destroy(mrt_cachep);
1386}
1387
1388static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1389 struct mf6cctl *mfc, int mrtsock)
1390{
1391 bool found = false;
1392 int line;
1393 struct mfc6_cache *uc, *c;
1394 unsigned char ttls[MAXMIFS];
1395 int i;
1396
1397 if (mfc->mf6cc_parent >= MAXMIFS)
1398 return -ENFILE;
1399
1400 memset(ttls, 255, MAXMIFS);
1401 for (i = 0; i < MAXMIFS; i++) {
1402 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1403 ttls[i] = 1;
1404
1405 }
1406
1407 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1408
1409 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1410 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1411 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1412 found = true;
1413 break;
1414 }
1415 }
1416
1417 if (found) {
1418 write_lock_bh(&mrt_lock);
1419 c->mf6c_parent = mfc->mf6cc_parent;
1420 ip6mr_update_thresholds(mrt, c, ttls);
1421 if (!mrtsock)
1422 c->mfc_flags |= MFC_STATIC;
1423 write_unlock_bh(&mrt_lock);
1424 return 0;
1425 }
1426
1427 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1428 return -EINVAL;
1429
1430 c = ip6mr_cache_alloc();
1431 if (c == NULL)
1432 return -ENOMEM;
1433
1434 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1435 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1436 c->mf6c_parent = mfc->mf6cc_parent;
1437 ip6mr_update_thresholds(mrt, c, ttls);
1438 if (!mrtsock)
1439 c->mfc_flags |= MFC_STATIC;
1440
1441 write_lock_bh(&mrt_lock);
1442 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1443 write_unlock_bh(&mrt_lock);
1444
1445 /*
1446 * Check to see if we resolved a queued list. If so we
1447 * need to send on the frames and tidy up.
1448 */
1449 found = false;
1450 spin_lock_bh(&mfc_unres_lock);
1451 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1452 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1453 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1454 list_del(&uc->list);
1455 atomic_dec(&mrt->cache_resolve_queue_len);
1456 found = true;
1457 break;
1458 }
1459 }
1460 if (list_empty(&mrt->mfc6_unres_queue))
1461 del_timer(&mrt->ipmr_expire_timer);
1462 spin_unlock_bh(&mfc_unres_lock);
1463
1464 if (found) {
1465 ip6mr_cache_resolve(net, mrt, uc, c);
1466 ip6mr_cache_free(uc);
1467 }
1468 return 0;
1469}
1470
1471/*
1472 * Close the multicast socket, and clear the vif tables etc
1473 */
1474
1475static void mroute_clean_tables(struct mr6_table *mrt)
1476{
1477 int i;
1478 LIST_HEAD(list);
1479 struct mfc6_cache *c, *next;
1480
1481 /*
1482 * Shut down all active vif entries
1483 */
1484 for (i = 0; i < mrt->maxvif; i++) {
1485 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1486 mif6_delete(mrt, i, &list);
1487 }
1488 unregister_netdevice_many(&list);
1489
1490 /*
1491 * Wipe the cache
1492 */
1493 for (i = 0; i < MFC6_LINES; i++) {
1494 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1495 if (c->mfc_flags & MFC_STATIC)
1496 continue;
1497 write_lock_bh(&mrt_lock);
1498 list_del(&c->list);
1499 write_unlock_bh(&mrt_lock);
1500
1501 ip6mr_cache_free(c);
1502 }
1503 }
1504
1505 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1506 spin_lock_bh(&mfc_unres_lock);
1507 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1508 list_del(&c->list);
1509 ip6mr_destroy_unres(mrt, c);
1510 }
1511 spin_unlock_bh(&mfc_unres_lock);
1512 }
1513}
1514
1515static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1516{
1517 int err = 0;
1518 struct net *net = sock_net(sk);
1519
1520 rtnl_lock();
1521 write_lock_bh(&mrt_lock);
1522 if (likely(mrt->mroute6_sk == NULL)) {
1523 mrt->mroute6_sk = sk;
1524 net->ipv6.devconf_all->mc_forwarding++;
1525 }
1526 else
1527 err = -EADDRINUSE;
1528 write_unlock_bh(&mrt_lock);
1529
1530 rtnl_unlock();
1531
1532 return err;
1533}
1534
1535int ip6mr_sk_done(struct sock *sk)
1536{
1537 int err = -EACCES;
1538 struct net *net = sock_net(sk);
1539 struct mr6_table *mrt;
1540
1541 rtnl_lock();
1542 ip6mr_for_each_table(mrt, net) {
1543 if (sk == mrt->mroute6_sk) {
1544 write_lock_bh(&mrt_lock);
1545 mrt->mroute6_sk = NULL;
1546 net->ipv6.devconf_all->mc_forwarding--;
1547 write_unlock_bh(&mrt_lock);
1548
1549 mroute_clean_tables(mrt);
1550 err = 0;
1551 break;
1552 }
1553 }
1554 rtnl_unlock();
1555
1556 return err;
1557}
1558
1559struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1560{
1561 struct mr6_table *mrt;
1562 struct flowi6 fl6 = {
1563 .flowi6_iif = skb->skb_iif,
1564 .flowi6_oif = skb->dev->ifindex,
1565 .flowi6_mark = skb->mark,
1566 };
1567
1568 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1569 return NULL;
1570
1571 return mrt->mroute6_sk;
1572}
1573
1574/*
1575 * Socket options and virtual interface manipulation. The whole
1576 * virtual interface system is a complete heap, but unfortunately
1577 * that's how BSD mrouted happens to think. Maybe one day with a proper
1578 * MOSPF/PIM router set up we can clean this up.
1579 */
1580
1581int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1582{
1583 int ret;
1584 struct mif6ctl vif;
1585 struct mf6cctl mfc;
1586 mifi_t mifi;
1587 struct net *net = sock_net(sk);
1588 struct mr6_table *mrt;
1589
1590 /*CVE-2017-18509*/
1591 if (sk->sk_type != SOCK_RAW ||
1592 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1593 return -EOPNOTSUPP;
1594
1595 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1596 if (mrt == NULL)
1597 return -ENOENT;
1598
1599 if (optname != MRT6_INIT) {
1600 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1601 return -EACCES;
1602 }
1603
1604 switch (optname) {
1605 case MRT6_INIT:
1606 /*CVE-2017-18509,
1607 delete the following contents:
1608 if (sk->sk_type != SOCK_RAW ||
1609 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1610 return -EOPNOTSUPP;)*/
1611
1612 if (optlen < sizeof(int))
1613 return -EINVAL;
1614
1615 return ip6mr_sk_init(mrt, sk);
1616
1617 case MRT6_DONE:
1618 return ip6mr_sk_done(sk);
1619
1620 case MRT6_ADD_MIF:
1621 if (optlen < sizeof(vif))
1622 return -EINVAL;
1623 if (copy_from_user(&vif, optval, sizeof(vif)))
1624 return -EFAULT;
1625 if (vif.mif6c_mifi >= MAXMIFS)
1626 return -ENFILE;
1627 rtnl_lock();
1628 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1629 rtnl_unlock();
1630 return ret;
1631
1632 case MRT6_DEL_MIF:
1633 if (optlen < sizeof(mifi_t))
1634 return -EINVAL;
1635 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1636 return -EFAULT;
1637 rtnl_lock();
1638 ret = mif6_delete(mrt, mifi, NULL);
1639 rtnl_unlock();
1640 return ret;
1641
1642 /*
1643 * Manipulate the forwarding caches. These live
1644 * in a sort of kernel/user symbiosis.
1645 */
1646 case MRT6_ADD_MFC:
1647 case MRT6_DEL_MFC:
1648 if (optlen < sizeof(mfc))
1649 return -EINVAL;
1650 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1651 return -EFAULT;
1652 rtnl_lock();
1653 if (optname == MRT6_DEL_MFC)
1654 ret = ip6mr_mfc_delete(mrt, &mfc);
1655 else
1656 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1657 rtnl_unlock();
1658 return ret;
1659
1660 /*
1661 * Control PIM assert (to activate pim will activate assert)
1662 */
1663 case MRT6_ASSERT:
1664 {
1665 int v;
1666 if (get_user(v, (int __user *)optval))
1667 return -EFAULT;
1668 mrt->mroute_do_assert = !!v;
1669 return 0;
1670 }
1671
1672#ifdef CONFIG_IPV6_PIMSM_V2
1673 case MRT6_PIM:
1674 {
1675 int v;
1676 if (get_user(v, (int __user *)optval))
1677 return -EFAULT;
1678 v = !!v;
1679 rtnl_lock();
1680 ret = 0;
1681 if (v != mrt->mroute_do_pim) {
1682 mrt->mroute_do_pim = v;
1683 mrt->mroute_do_assert = v;
1684 }
1685 rtnl_unlock();
1686 return ret;
1687 }
1688
1689#endif
1690#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1691 case MRT6_TABLE:
1692 {
1693 u32 v;
1694
1695 if (optlen != sizeof(u32))
1696 return -EINVAL;
1697 if (get_user(v, (u32 __user *)optval))
1698 return -EFAULT;
1699 if (sk == mrt->mroute6_sk)
1700 return -EBUSY;
1701
1702 rtnl_lock();
1703 ret = 0;
1704 if (!ip6mr_new_table(net, v))
1705 ret = -ENOMEM;
1706 raw6_sk(sk)->ip6mr_table = v;
1707 rtnl_unlock();
1708 return ret;
1709 }
1710#endif
1711 /*
1712 * Spurious command, or MRT6_VERSION which you cannot
1713 * set.
1714 */
1715 default:
1716 return -ENOPROTOOPT;
1717 }
1718}
1719
1720/*
1721 * Getsock opt support for the multicast routing system.
1722 */
1723
1724int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1725 int __user *optlen)
1726{
1727 int olr;
1728 int val;
1729 struct net *net = sock_net(sk);
1730 struct mr6_table *mrt;
1731
1732 /*CVE-2017-18509*/
1733 if (sk->sk_type != SOCK_RAW ||
1734 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1735 return -EOPNOTSUPP;
1736
1737 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1738 if (mrt == NULL)
1739 return -ENOENT;
1740
1741 switch (optname) {
1742 case MRT6_VERSION:
1743 val = 0x0305;
1744 break;
1745#ifdef CONFIG_IPV6_PIMSM_V2
1746 case MRT6_PIM:
1747 val = mrt->mroute_do_pim;
1748 break;
1749#endif
1750 case MRT6_ASSERT:
1751 val = mrt->mroute_do_assert;
1752 break;
1753 default:
1754 return -ENOPROTOOPT;
1755 }
1756
1757 if (get_user(olr, optlen))
1758 return -EFAULT;
1759
1760 olr = min_t(int, olr, sizeof(int));
1761 if (olr < 0)
1762 return -EINVAL;
1763
1764 if (put_user(olr, optlen))
1765 return -EFAULT;
1766 if (copy_to_user(optval, &val, olr))
1767 return -EFAULT;
1768 return 0;
1769}
1770
1771/*
1772 * The IP multicast ioctl support routines.
1773 */
1774
1775int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1776{
1777 struct sioc_sg_req6 sr;
1778 struct sioc_mif_req6 vr;
1779 struct mif_device *vif;
1780 struct mfc6_cache *c;
1781 struct net *net = sock_net(sk);
1782 struct mr6_table *mrt;
1783
1784 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1785 if (mrt == NULL)
1786 return -ENOENT;
1787
1788 switch (cmd) {
1789 case SIOCGETMIFCNT_IN6:
1790 if (copy_from_user(&vr, arg, sizeof(vr)))
1791 return -EFAULT;
1792 if (vr.mifi >= mrt->maxvif)
1793 return -EINVAL;
1794 read_lock(&mrt_lock);
1795 vif = &mrt->vif6_table[vr.mifi];
1796 if (MIF_EXISTS(mrt, vr.mifi)) {
1797 vr.icount = vif->pkt_in;
1798 vr.ocount = vif->pkt_out;
1799 vr.ibytes = vif->bytes_in;
1800 vr.obytes = vif->bytes_out;
1801 read_unlock(&mrt_lock);
1802
1803 if (copy_to_user(arg, &vr, sizeof(vr)))
1804 return -EFAULT;
1805 return 0;
1806 }
1807 read_unlock(&mrt_lock);
1808 return -EADDRNOTAVAIL;
1809 case SIOCGETSGCNT_IN6:
1810 if (copy_from_user(&sr, arg, sizeof(sr)))
1811 return -EFAULT;
1812
1813 read_lock(&mrt_lock);
1814 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1815 if (c) {
1816 sr.pktcnt = c->mfc_un.res.pkt;
1817 sr.bytecnt = c->mfc_un.res.bytes;
1818 sr.wrong_if = c->mfc_un.res.wrong_if;
1819 read_unlock(&mrt_lock);
1820
1821 if (copy_to_user(arg, &sr, sizeof(sr)))
1822 return -EFAULT;
1823 return 0;
1824 }
1825 read_unlock(&mrt_lock);
1826 return -EADDRNOTAVAIL;
1827 default:
1828 return -ENOIOCTLCMD;
1829 }
1830}
1831
1832#ifdef CONFIG_COMPAT
1833struct compat_sioc_sg_req6 {
1834 struct sockaddr_in6 src;
1835 struct sockaddr_in6 grp;
1836 compat_ulong_t pktcnt;
1837 compat_ulong_t bytecnt;
1838 compat_ulong_t wrong_if;
1839};
1840
1841struct compat_sioc_mif_req6 {
1842 mifi_t mifi;
1843 compat_ulong_t icount;
1844 compat_ulong_t ocount;
1845 compat_ulong_t ibytes;
1846 compat_ulong_t obytes;
1847};
1848
1849int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1850{
1851 struct compat_sioc_sg_req6 sr;
1852 struct compat_sioc_mif_req6 vr;
1853 struct mif_device *vif;
1854 struct mfc6_cache *c;
1855 struct net *net = sock_net(sk);
1856 struct mr6_table *mrt;
1857
1858 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1859 if (mrt == NULL)
1860 return -ENOENT;
1861
1862 switch (cmd) {
1863 case SIOCGETMIFCNT_IN6:
1864 if (copy_from_user(&vr, arg, sizeof(vr)))
1865 return -EFAULT;
1866 if (vr.mifi >= mrt->maxvif)
1867 return -EINVAL;
1868 read_lock(&mrt_lock);
1869 vif = &mrt->vif6_table[vr.mifi];
1870 if (MIF_EXISTS(mrt, vr.mifi)) {
1871 vr.icount = vif->pkt_in;
1872 vr.ocount = vif->pkt_out;
1873 vr.ibytes = vif->bytes_in;
1874 vr.obytes = vif->bytes_out;
1875 read_unlock(&mrt_lock);
1876
1877 if (copy_to_user(arg, &vr, sizeof(vr)))
1878 return -EFAULT;
1879 return 0;
1880 }
1881 read_unlock(&mrt_lock);
1882 return -EADDRNOTAVAIL;
1883 case SIOCGETSGCNT_IN6:
1884 if (copy_from_user(&sr, arg, sizeof(sr)))
1885 return -EFAULT;
1886
1887 read_lock(&mrt_lock);
1888 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1889 if (c) {
1890 sr.pktcnt = c->mfc_un.res.pkt;
1891 sr.bytecnt = c->mfc_un.res.bytes;
1892 sr.wrong_if = c->mfc_un.res.wrong_if;
1893 read_unlock(&mrt_lock);
1894
1895 if (copy_to_user(arg, &sr, sizeof(sr)))
1896 return -EFAULT;
1897 return 0;
1898 }
1899 read_unlock(&mrt_lock);
1900 return -EADDRNOTAVAIL;
1901 default:
1902 return -ENOIOCTLCMD;
1903 }
1904}
1905#endif
1906
1907static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1908{
1909 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1910 IPSTATS_MIB_OUTFORWDATAGRAMS);
1911 return dst_output(skb);
1912}
1913
1914/*
1915 * Processing handlers for ip6mr_forward
1916 */
1917
1918static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1919 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1920{
1921 struct ipv6hdr *ipv6h;
1922 struct mif_device *vif = &mrt->vif6_table[vifi];
1923 struct net_device *dev;
1924 struct dst_entry *dst;
1925 struct flowi6 fl6;
1926
1927 if (vif->dev == NULL)
1928 goto out_free;
1929
1930#ifdef CONFIG_IPV6_PIMSM_V2
1931 if (vif->flags & MIFF_REGISTER) {
1932 vif->pkt_out++;
1933 vif->bytes_out += skb->len;
1934 vif->dev->stats.tx_bytes += skb->len;
1935 vif->dev->stats.tx_packets++;
1936 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1937 goto out_free;
1938 }
1939#endif
1940
1941 ipv6h = ipv6_hdr(skb);
1942
1943 fl6 = (struct flowi6) {
1944 .flowi6_oif = vif->link,
1945 .daddr = ipv6h->daddr,
1946 };
1947
1948 dst = ip6_route_output(net, NULL, &fl6);
1949 if (dst->error) {
1950 dst_release(dst);
1951 goto out_free;
1952 }
1953
1954 skb_dst_drop(skb);
1955 skb_dst_set(skb, dst);
1956
1957 /*
1958 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1959 * not only before forwarding, but after forwarding on all output
1960 * interfaces. It is clear, if mrouter runs a multicasting
1961 * program, it should receive packets not depending to what interface
1962 * program is joined.
1963 * If we will not make it, the program will have to join on all
1964 * interfaces. On the other hand, multihoming host (or router, but
1965 * not mrouter) cannot join to more than one interface - it will
1966 * result in receiving multiple packets.
1967 */
1968 dev = vif->dev;
1969 skb->dev = dev;
1970 vif->pkt_out++;
1971 vif->bytes_out += skb->len;
1972
1973 /* We are about to write */
1974 /* XXX: extension headers? */
1975 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1976 goto out_free;
1977
1978 ipv6h = ipv6_hdr(skb);
1979 ipv6h->hop_limit--;
1980
1981 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1982
1983 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1984 ip6mr_forward2_finish);
1985
1986out_free:
1987 kfree_skb(skb);
1988 return 0;
1989}
1990
1991static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1992{
1993 int ct;
1994
1995 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1996 if (mrt->vif6_table[ct].dev == dev)
1997 break;
1998 }
1999 return ct;
2000}
2001
2002static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2003 struct sk_buff *skb, struct mfc6_cache *cache)
2004{
2005 int psend = -1;
2006 int vif, ct;
2007
2008 vif = cache->mf6c_parent;
2009 cache->mfc_un.res.pkt++;
2010 cache->mfc_un.res.bytes += skb->len;
2011
2012 /*
2013 * Wrong interface: drop packet and (maybe) send PIM assert.
2014 */
2015 if (mrt->vif6_table[vif].dev != skb->dev) {
2016 int true_vifi;
2017
2018 cache->mfc_un.res.wrong_if++;
2019 true_vifi = ip6mr_find_vif(mrt, skb->dev);
2020
2021 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2022 /* pimsm uses asserts, when switching from RPT to SPT,
2023 so that we cannot check that packet arrived on an oif.
2024 It is bad, but otherwise we would need to move pretty
2025 large chunk of pimd to kernel. Ough... --ANK
2026 */
2027 (mrt->mroute_do_pim ||
2028 cache->mfc_un.res.ttls[true_vifi] < 255) &&
2029 time_after(jiffies,
2030 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2031 cache->mfc_un.res.last_assert = jiffies;
2032 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2033 }
2034 goto dont_forward;
2035 }
2036
2037 mrt->vif6_table[vif].pkt_in++;
2038 mrt->vif6_table[vif].bytes_in += skb->len;
2039
2040 /*
2041 * Forward the frame
2042 */
2043 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2044 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2045 if (psend != -1) {
2046 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2047 if (skb2)
2048 ip6mr_forward2(net, mrt, skb2, cache, psend);
2049 }
2050 psend = ct;
2051 }
2052 }
2053 if (psend != -1) {
2054 ip6mr_forward2(net, mrt, skb, cache, psend);
2055 return 0;
2056 }
2057
2058dont_forward:
2059 kfree_skb(skb);
2060 return 0;
2061}
2062
2063
2064/*
2065 * Multicast packets for forwarding arrive here
2066 */
2067
2068int ip6_mr_input(struct sk_buff *skb)
2069{
2070 struct mfc6_cache *cache;
2071 struct net *net = dev_net(skb->dev);
2072 struct mr6_table *mrt;
2073 struct flowi6 fl6 = {
2074 .flowi6_iif = skb->dev->ifindex,
2075 .flowi6_mark = skb->mark,
2076 };
2077 int err;
2078
2079 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2080 if (err < 0) {
2081 kfree_skb(skb);
2082 return err;
2083 }
2084
2085 read_lock(&mrt_lock);
2086 cache = ip6mr_cache_find(mrt,
2087 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2088
2089 /*
2090 * No usable cache entry
2091 */
2092 if (cache == NULL) {
2093 int vif;
2094
2095 vif = ip6mr_find_vif(mrt, skb->dev);
2096 if (vif >= 0) {
2097 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2098 read_unlock(&mrt_lock);
2099
2100 return err;
2101 }
2102 read_unlock(&mrt_lock);
2103 kfree_skb(skb);
2104 return -ENODEV;
2105 }
2106
2107 ip6_mr_forward(net, mrt, skb, cache);
2108
2109 read_unlock(&mrt_lock);
2110
2111 return 0;
2112}
2113
2114
2115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2116 struct mfc6_cache *c, struct rtmsg *rtm)
2117{
2118 int ct;
2119 struct rtnexthop *nhp;
2120 u8 *b = skb_tail_pointer(skb);
2121 struct rtattr *mp_head;
2122
2123 /* If cache is unresolved, don't try to parse IIF and OIF */
2124 if (c->mf6c_parent >= MAXMIFS)
2125 return -ENOENT;
2126
2127 if (MIF_EXISTS(mrt, c->mf6c_parent))
2128 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2129
2130 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2131
2132 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2133 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2134 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2135 goto rtattr_failure;
2136 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2137 nhp->rtnh_flags = 0;
2138 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2139 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2140 nhp->rtnh_len = sizeof(*nhp);
2141 }
2142 }
2143 mp_head->rta_type = RTA_MULTIPATH;
2144 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2145 rtm->rtm_type = RTN_MULTICAST;
2146 return 1;
2147
2148rtattr_failure:
2149 nlmsg_trim(skb, b);
2150 return -EMSGSIZE;
2151}
2152
2153int ip6mr_get_route(struct net *net,
2154 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2155{
2156 int err;
2157 struct mr6_table *mrt;
2158 struct mfc6_cache *cache;
2159 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2160
2161 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2162 if (mrt == NULL)
2163 return -ENOENT;
2164
2165 read_lock(&mrt_lock);
2166 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2167
2168 if (!cache) {
2169 struct sk_buff *skb2;
2170 struct ipv6hdr *iph;
2171 struct net_device *dev;
2172 int vif;
2173
2174 if (nowait) {
2175 read_unlock(&mrt_lock);
2176 return -EAGAIN;
2177 }
2178
2179 dev = skb->dev;
2180 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2181 read_unlock(&mrt_lock);
2182 return -ENODEV;
2183 }
2184
2185 /* really correct? */
2186 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2187 if (!skb2) {
2188 read_unlock(&mrt_lock);
2189 return -ENOMEM;
2190 }
2191
2192 skb_reset_transport_header(skb2);
2193
2194 skb_put(skb2, sizeof(struct ipv6hdr));
2195 skb_reset_network_header(skb2);
2196
2197 iph = ipv6_hdr(skb2);
2198 iph->version = 0;
2199 iph->priority = 0;
2200 iph->flow_lbl[0] = 0;
2201 iph->flow_lbl[1] = 0;
2202 iph->flow_lbl[2] = 0;
2203 iph->payload_len = 0;
2204 iph->nexthdr = IPPROTO_NONE;
2205 iph->hop_limit = 0;
2206 iph->saddr = rt->rt6i_src.addr;
2207 iph->daddr = rt->rt6i_dst.addr;
2208
2209 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2210 read_unlock(&mrt_lock);
2211
2212 return err;
2213 }
2214
2215 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2216 cache->mfc_flags |= MFC_NOTIFY;
2217
2218 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2219 read_unlock(&mrt_lock);
2220 return err;
2221}
2222
2223static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2224 u32 pid, u32 seq, struct mfc6_cache *c)
2225{
2226 struct nlmsghdr *nlh;
2227 struct rtmsg *rtm;
2228
2229 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2230 if (nlh == NULL)
2231 return -EMSGSIZE;
2232
2233 rtm = nlmsg_data(nlh);
2234 rtm->rtm_family = RTNL_FAMILY_IPMR;
2235 rtm->rtm_dst_len = 128;
2236 rtm->rtm_src_len = 128;
2237 rtm->rtm_tos = 0;
2238 rtm->rtm_table = mrt->id;
2239 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2240 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2241 rtm->rtm_protocol = RTPROT_UNSPEC;
2242 rtm->rtm_flags = 0;
2243
2244 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2245 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2246
2247 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2248 goto nla_put_failure;
2249
2250 return nlmsg_end(skb, nlh);
2251
2252nla_put_failure:
2253 nlmsg_cancel(skb, nlh);
2254 return -EMSGSIZE;
2255}
2256
2257static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2258{
2259 struct net *net = sock_net(skb->sk);
2260 struct mr6_table *mrt;
2261 struct mfc6_cache *mfc;
2262 unsigned int t = 0, s_t;
2263 unsigned int h = 0, s_h;
2264 unsigned int e = 0, s_e;
2265
2266 s_t = cb->args[0];
2267 s_h = cb->args[1];
2268 s_e = cb->args[2];
2269
2270 read_lock(&mrt_lock);
2271 ip6mr_for_each_table(mrt, net) {
2272 if (t < s_t)
2273 goto next_table;
2274 if (t > s_t)
2275 s_h = 0;
2276 for (h = s_h; h < MFC6_LINES; h++) {
2277 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2278 if (e < s_e)
2279 goto next_entry;
2280 if (ip6mr_fill_mroute(mrt, skb,
2281 NETLINK_CB(cb->skb).pid,
2282 cb->nlh->nlmsg_seq,
2283 mfc) < 0)
2284 goto done;
2285next_entry:
2286 e++;
2287 }
2288 e = s_e = 0;
2289 }
2290 s_h = 0;
2291next_table:
2292 t++;
2293 }
2294done:
2295 read_unlock(&mrt_lock);
2296
2297 cb->args[2] = e;
2298 cb->args[1] = h;
2299 cb->args[0] = t;
2300
2301 return skb->len;
2302}