blob: fb3f917db57ad875c4dd998289bda229d213cf07 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 * Changes:
15 * Roger Venning <r.venning@telstra.com>: 6to4 support
16 * Nate Thompson <nate@thebog.net>: 6to4 support
17 * Fred Templin <fred.l.templin@boeing.com>: isatap support
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/module.h>
23#include <linux/capability.h>
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/net.h>
29#include <linux/in6.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/icmp.h>
33#include <linux/slab.h>
34#include <linux/uaccess.h>
35#include <linux/init.h>
36#include <linux/netfilter_ipv4.h>
37#include <linux/if_ether.h>
38
39#include <net/sock.h>
40#include <net/snmp.h>
41
42#include <net/ipv6.h>
43#include <net/protocol.h>
44#include <net/transp_v6.h>
45#include <net/ip6_fib.h>
46#include <net/ip6_route.h>
47#include <net/ndisc.h>
48#include <net/addrconf.h>
49#include <net/ip.h>
50#include <net/udp.h>
51#include <net/icmp.h>
52#include <net/ip_tunnels.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/dsfield.h>
56#include <net/net_namespace.h>
57#include <net/netns/generic.h>
58
59/*
60 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
61
62 For comments look at net/ipv4/ip_gre.c --ANK
63 */
64
65#define IP6_SIT_HASH_SIZE 16
66#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
67
68static bool log_ecn_error = true;
69module_param(log_ecn_error, bool, 0644);
70MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
71
72static int ipip6_tunnel_init(struct net_device *dev);
73static void ipip6_tunnel_setup(struct net_device *dev);
74static void ipip6_dev_free(struct net_device *dev);
75static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
76 __be32 *v4dst);
77static struct rtnl_link_ops sit_link_ops __read_mostly;
78
79static unsigned int sit_net_id __read_mostly;
80struct sit_net {
81 struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
82 struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
83 struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
84 struct ip_tunnel __rcu *tunnels_wc[1];
85 struct ip_tunnel __rcu **tunnels[4];
86
87 struct net_device *fb_tunnel_dev;
88};
89
90/*
91 * Must be invoked with rcu_read_lock
92 */
93static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
94 struct net_device *dev, __be32 remote, __be32 local)
95{
96 unsigned int h0 = HASH(remote);
97 unsigned int h1 = HASH(local);
98 struct ip_tunnel *t;
99 struct sit_net *sitn = net_generic(net, sit_net_id);
100
101 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
102 if (local == t->parms.iph.saddr &&
103 remote == t->parms.iph.daddr &&
104 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
105 (t->dev->flags & IFF_UP))
106 return t;
107 }
108 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
109 if (remote == t->parms.iph.daddr &&
110 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
111 (t->dev->flags & IFF_UP))
112 return t;
113 }
114 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
115 if (local == t->parms.iph.saddr &&
116 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
117 (t->dev->flags & IFF_UP))
118 return t;
119 }
120 t = rcu_dereference(sitn->tunnels_wc[0]);
121 if (t && (t->dev->flags & IFF_UP))
122 return t;
123 return NULL;
124}
125
126static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
127 struct ip_tunnel_parm *parms)
128{
129 __be32 remote = parms->iph.daddr;
130 __be32 local = parms->iph.saddr;
131 unsigned int h = 0;
132 int prio = 0;
133
134 if (remote) {
135 prio |= 2;
136 h ^= HASH(remote);
137 }
138 if (local) {
139 prio |= 1;
140 h ^= HASH(local);
141 }
142 return &sitn->tunnels[prio][h];
143}
144
145static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
146 struct ip_tunnel *t)
147{
148 return __ipip6_bucket(sitn, &t->parms);
149}
150
151static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
152{
153 struct ip_tunnel __rcu **tp;
154 struct ip_tunnel *iter;
155
156 for (tp = ipip6_bucket(sitn, t);
157 (iter = rtnl_dereference(*tp)) != NULL;
158 tp = &iter->next) {
159 if (t == iter) {
160 rcu_assign_pointer(*tp, t->next);
161 break;
162 }
163 }
164}
165
166static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
167{
168 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
169
170 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
171 rcu_assign_pointer(*tp, t);
172}
173
174static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
175{
176#ifdef CONFIG_IPV6_SIT_6RD
177 struct ip_tunnel *t = netdev_priv(dev);
178
179 if (dev == sitn->fb_tunnel_dev) {
180 ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
181 t->ip6rd.relay_prefix = 0;
182 t->ip6rd.prefixlen = 16;
183 t->ip6rd.relay_prefixlen = 0;
184 } else {
185 struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
186 memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
187 }
188#endif
189}
190
191static int ipip6_tunnel_create(struct net_device *dev)
192{
193 struct ip_tunnel *t = netdev_priv(dev);
194 struct net *net = dev_net(dev);
195 struct sit_net *sitn = net_generic(net, sit_net_id);
196 int err;
197
198 memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
199 memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
200
201 if ((__force u16)t->parms.i_flags & SIT_ISATAP)
202 dev->priv_flags |= IFF_ISATAP;
203
204 dev->rtnl_link_ops = &sit_link_ops;
205
206 err = register_netdevice(dev);
207 if (err < 0)
208 goto out;
209
210 ipip6_tunnel_clone_6rd(dev, sitn);
211
212 dev_hold(dev);
213
214 ipip6_tunnel_link(sitn, t);
215 return 0;
216
217out:
218 return err;
219}
220
221static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
222 struct ip_tunnel_parm *parms, int create)
223{
224 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, *nt;
227 struct ip_tunnel __rcu **tp;
228 struct net_device *dev;
229 char name[IFNAMSIZ];
230 struct sit_net *sitn = net_generic(net, sit_net_id);
231
232 for (tp = __ipip6_bucket(sitn, parms);
233 (t = rtnl_dereference(*tp)) != NULL;
234 tp = &t->next) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 parms->link == t->parms.link) {
238 if (create)
239 return NULL;
240 else
241 return t;
242 }
243 }
244 if (!create)
245 goto failed;
246
247 if (parms->name[0]) {
248 if (!dev_valid_name(parms->name))
249 goto failed;
250 strlcpy(name, parms->name, IFNAMSIZ);
251 } else {
252 strcpy(name, "sit%d");
253 }
254 dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
255 ipip6_tunnel_setup);
256 if (!dev)
257 return NULL;
258
259 dev_net_set(dev, net);
260
261 nt = netdev_priv(dev);
262
263 nt->parms = *parms;
264 if (ipip6_tunnel_create(dev) < 0)
265 goto failed_free;
266
267 return nt;
268
269failed_free:
270 free_netdev(dev);
271failed:
272 return NULL;
273}
274
275#define for_each_prl_rcu(start) \
276 for (prl = rcu_dereference(start); \
277 prl; \
278 prl = rcu_dereference(prl->next))
279
280static struct ip_tunnel_prl_entry *
281__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
282{
283 struct ip_tunnel_prl_entry *prl;
284
285 for_each_prl_rcu(t->prl)
286 if (prl->addr == addr)
287 break;
288 return prl;
289
290}
291
292static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
293 struct ip_tunnel_prl __user *a)
294{
295 struct ip_tunnel_prl kprl, *kp;
296 struct ip_tunnel_prl_entry *prl;
297 unsigned int cmax, c = 0, ca, len;
298 int ret = 0;
299
300 if (copy_from_user(&kprl, a, sizeof(kprl)))
301 return -EFAULT;
302 cmax = kprl.datalen / sizeof(kprl);
303 if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
304 cmax = 1;
305
306 /* For simple GET or for root users,
307 * we try harder to allocate.
308 */
309 kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
310 kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
311 NULL;
312
313 rcu_read_lock();
314
315 ca = t->prl_count < cmax ? t->prl_count : cmax;
316
317 if (!kp) {
318 /* We don't try hard to allocate much memory for
319 * non-root users.
320 * For root users, retry allocating enough memory for
321 * the answer.
322 */
323 kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
324 if (!kp) {
325 ret = -ENOMEM;
326 goto out;
327 }
328 }
329
330 c = 0;
331 for_each_prl_rcu(t->prl) {
332 if (c >= cmax)
333 break;
334 if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
335 continue;
336 kp[c].addr = prl->addr;
337 kp[c].flags = prl->flags;
338 c++;
339 if (kprl.addr != htonl(INADDR_ANY))
340 break;
341 }
342out:
343 rcu_read_unlock();
344
345 len = sizeof(*kp) * c;
346 ret = 0;
347 if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
348 ret = -EFAULT;
349
350 kfree(kp);
351
352 return ret;
353}
354
355static int
356ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
357{
358 struct ip_tunnel_prl_entry *p;
359 int err = 0;
360
361 if (a->addr == htonl(INADDR_ANY))
362 return -EINVAL;
363
364 ASSERT_RTNL();
365
366 for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
367 if (p->addr == a->addr) {
368 if (chg) {
369 p->flags = a->flags;
370 goto out;
371 }
372 err = -EEXIST;
373 goto out;
374 }
375 }
376
377 if (chg) {
378 err = -ENXIO;
379 goto out;
380 }
381
382 p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
383 if (!p) {
384 err = -ENOBUFS;
385 goto out;
386 }
387
388 p->next = t->prl;
389 p->addr = a->addr;
390 p->flags = a->flags;
391 t->prl_count++;
392 rcu_assign_pointer(t->prl, p);
393out:
394 return err;
395}
396
397static void prl_list_destroy_rcu(struct rcu_head *head)
398{
399 struct ip_tunnel_prl_entry *p, *n;
400
401 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
402 do {
403 n = rcu_dereference_protected(p->next, 1);
404 kfree(p);
405 p = n;
406 } while (p);
407}
408
409static int
410ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
411{
412 struct ip_tunnel_prl_entry *x;
413 struct ip_tunnel_prl_entry __rcu **p;
414 int err = 0;
415
416 ASSERT_RTNL();
417
418 if (a && a->addr != htonl(INADDR_ANY)) {
419 for (p = &t->prl;
420 (x = rtnl_dereference(*p)) != NULL;
421 p = &x->next) {
422 if (x->addr == a->addr) {
423 *p = x->next;
424 kfree_rcu(x, rcu_head);
425 t->prl_count--;
426 goto out;
427 }
428 }
429 err = -ENXIO;
430 } else {
431 x = rtnl_dereference(t->prl);
432 if (x) {
433 t->prl_count = 0;
434 call_rcu(&x->rcu_head, prl_list_destroy_rcu);
435 t->prl = NULL;
436 }
437 }
438out:
439 return err;
440}
441
442static int
443isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
444{
445 struct ip_tunnel_prl_entry *p;
446 int ok = 1;
447
448 rcu_read_lock();
449 p = __ipip6_tunnel_locate_prl(t, iph->saddr);
450 if (p) {
451 if (p->flags & PRL_DEFAULT)
452 skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
453 else
454 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
455 } else {
456 const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
457
458 if (ipv6_addr_is_isatap(addr6) &&
459 (addr6->s6_addr32[3] == iph->saddr) &&
460 ipv6_chk_prefix(addr6, t->dev))
461 skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
462 else
463 ok = 0;
464 }
465 rcu_read_unlock();
466 return ok;
467}
468
469static void ipip6_tunnel_uninit(struct net_device *dev)
470{
471 struct ip_tunnel *tunnel = netdev_priv(dev);
472 struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
473
474 if (dev == sitn->fb_tunnel_dev) {
475 RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
476 } else {
477 ipip6_tunnel_unlink(sitn, tunnel);
478 ipip6_tunnel_del_prl(tunnel, NULL);
479 }
480 dst_cache_reset(&tunnel->dst_cache);
481 dev_put(dev);
482}
483
484static int ipip6_err(struct sk_buff *skb, u32 info)
485{
486 const struct iphdr *iph = (const struct iphdr *)skb->data;
487 const int type = icmp_hdr(skb)->type;
488 const int code = icmp_hdr(skb)->code;
489 unsigned int data_len = 0;
490 struct ip_tunnel *t;
491 int err;
492
493 switch (type) {
494 default:
495 case ICMP_PARAMETERPROB:
496 return 0;
497
498 case ICMP_DEST_UNREACH:
499 switch (code) {
500 case ICMP_SR_FAILED:
501 /* Impossible event. */
502 return 0;
503 default:
504 /* All others are translated to HOST_UNREACH.
505 rfc2003 contains "deep thoughts" about NET_UNREACH,
506 I believe they are just ether pollution. --ANK
507 */
508 break;
509 }
510 break;
511 case ICMP_TIME_EXCEEDED:
512 if (code != ICMP_EXC_TTL)
513 return 0;
514 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
515 break;
516 case ICMP_REDIRECT:
517 break;
518 }
519
520 err = -ENOENT;
521
522 t = ipip6_tunnel_lookup(dev_net(skb->dev),
523 skb->dev,
524 iph->daddr,
525 iph->saddr);
526 if (!t)
527 goto out;
528
529 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
530 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
531 t->parms.link, 0, iph->protocol, 0);
532 err = 0;
533 goto out;
534 }
535 if (type == ICMP_REDIRECT) {
536 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
537 iph->protocol, 0);
538 err = 0;
539 goto out;
540 }
541
542 err = 0;
543 if (__in6_dev_get(skb->dev) &&
544 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
545 goto out;
546
547 if (t->parms.iph.daddr == 0)
548 goto out;
549
550 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
551 goto out;
552
553 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
554 t->err_count++;
555 else
556 t->err_count = 1;
557 t->err_time = jiffies;
558out:
559 return err;
560}
561
562static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
563 const struct in6_addr *v6addr)
564{
565 __be32 v4embed = 0;
566 if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
567 return true;
568 return false;
569}
570
571/* Checks if an address matches an address on the tunnel interface.
572 * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
573 * Long story:
574 * This function is called after we considered the packet as spoofed
575 * in is_spoofed_6rd.
576 * We may have a router that is doing NAT for proto 41 packets
577 * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
578 * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
579 * function will return true, dropping the packet.
580 * But, we can still check if is spoofed against the IP
581 * addresses associated with the interface.
582 */
583static bool only_dnatted(const struct ip_tunnel *tunnel,
584 const struct in6_addr *v6dst)
585{
586 int prefix_len;
587
588#ifdef CONFIG_IPV6_SIT_6RD
589 prefix_len = tunnel->ip6rd.prefixlen + 32
590 - tunnel->ip6rd.relay_prefixlen;
591#else
592 prefix_len = 48;
593#endif
594 return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
595}
596
597/* Returns true if a packet is spoofed */
598static bool packet_is_spoofed(struct sk_buff *skb,
599 const struct iphdr *iph,
600 struct ip_tunnel *tunnel)
601{
602 const struct ipv6hdr *ipv6h;
603
604 if (tunnel->dev->priv_flags & IFF_ISATAP) {
605 if (!isatap_chksrc(skb, iph, tunnel))
606 return true;
607
608 return false;
609 }
610
611 if (tunnel->dev->flags & IFF_POINTOPOINT)
612 return false;
613
614 ipv6h = ipv6_hdr(skb);
615
616 if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
617 net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
618 &iph->saddr, &ipv6h->saddr,
619 &iph->daddr, &ipv6h->daddr);
620 return true;
621 }
622
623 if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
624 return false;
625
626 if (only_dnatted(tunnel, &ipv6h->daddr))
627 return false;
628
629 net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
630 &iph->saddr, &ipv6h->saddr,
631 &iph->daddr, &ipv6h->daddr);
632 return true;
633}
634
635static int ipip6_rcv(struct sk_buff *skb)
636{
637 const struct iphdr *iph = ip_hdr(skb);
638 struct ip_tunnel *tunnel;
639 int err;
640
641 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
642 iph->saddr, iph->daddr);
643 if (tunnel) {
644 struct pcpu_sw_netstats *tstats;
645
646 if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
647 tunnel->parms.iph.protocol != 0)
648 goto out;
649
650 skb->mac_header = skb->network_header;
651 skb_reset_network_header(skb);
652 IPCB(skb)->flags = 0;
653 skb->dev = tunnel->dev;
654
655 if (packet_is_spoofed(skb, iph, tunnel)) {
656 tunnel->dev->stats.rx_errors++;
657 goto out;
658 }
659
660 if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
661 !net_eq(tunnel->net, dev_net(tunnel->dev))))
662 goto out;
663
664 /* skb can be uncloned in iptunnel_pull_header, so
665 * old iph is no longer valid
666 */
667 iph = (const struct iphdr *)skb_mac_header(skb);
668 err = IP_ECN_decapsulate(iph, skb);
669 if (unlikely(err)) {
670 if (log_ecn_error)
671 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
672 &iph->saddr, iph->tos);
673 if (err > 1) {
674 ++tunnel->dev->stats.rx_frame_errors;
675 ++tunnel->dev->stats.rx_errors;
676 goto out;
677 }
678 }
679
680 tstats = this_cpu_ptr(tunnel->dev->tstats);
681 u64_stats_update_begin(&tstats->syncp);
682 tstats->rx_packets++;
683 tstats->rx_bytes += skb->len;
684 u64_stats_update_end(&tstats->syncp);
685
686 netif_rx(skb);
687
688 return 0;
689 }
690
691 /* no tunnel matched, let upstream know, ipsec may handle it */
692 return 1;
693out:
694 kfree_skb(skb);
695 return 0;
696}
697
698static const struct tnl_ptk_info ipip_tpi = {
699 /* no tunnel info required for ipip. */
700 .proto = htons(ETH_P_IP),
701};
702
703#if IS_ENABLED(CONFIG_MPLS)
704static const struct tnl_ptk_info mplsip_tpi = {
705 /* no tunnel info required for mplsip. */
706 .proto = htons(ETH_P_MPLS_UC),
707};
708#endif
709
710static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
711{
712 const struct iphdr *iph;
713 struct ip_tunnel *tunnel;
714
715 iph = ip_hdr(skb);
716 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
717 iph->saddr, iph->daddr);
718 if (tunnel) {
719 const struct tnl_ptk_info *tpi;
720
721 if (tunnel->parms.iph.protocol != ipproto &&
722 tunnel->parms.iph.protocol != 0)
723 goto drop;
724
725 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
726 goto drop;
727#if IS_ENABLED(CONFIG_MPLS)
728 if (ipproto == IPPROTO_MPLS)
729 tpi = &mplsip_tpi;
730 else
731#endif
732 tpi = &ipip_tpi;
733 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
734 goto drop;
735 return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
736 }
737
738 return 1;
739
740drop:
741 kfree_skb(skb);
742 return 0;
743}
744
745static int ipip_rcv(struct sk_buff *skb)
746{
747 return sit_tunnel_rcv(skb, IPPROTO_IPIP);
748}
749
750#if IS_ENABLED(CONFIG_MPLS)
751static int mplsip_rcv(struct sk_buff *skb)
752{
753 return sit_tunnel_rcv(skb, IPPROTO_MPLS);
754}
755#endif
756
757/*
758 * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
759 * stores the embedded IPv4 address in v4dst and returns true.
760 */
761static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
762 __be32 *v4dst)
763{
764#ifdef CONFIG_IPV6_SIT_6RD
765 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
766 tunnel->ip6rd.prefixlen)) {
767 unsigned int pbw0, pbi0;
768 int pbi1;
769 u32 d;
770
771 pbw0 = tunnel->ip6rd.prefixlen >> 5;
772 pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
773
774 d = tunnel->ip6rd.relay_prefixlen < 32 ?
775 (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
776 tunnel->ip6rd.relay_prefixlen : 0;
777
778 pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
779 if (pbi1 > 0)
780 d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
781 (32 - pbi1);
782
783 *v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
784 return true;
785 }
786#else
787 if (v6dst->s6_addr16[0] == htons(0x2002)) {
788 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
789 memcpy(v4dst, &v6dst->s6_addr16[1], 4);
790 return true;
791 }
792#endif
793 return false;
794}
795
796static inline __be32 try_6rd(struct ip_tunnel *tunnel,
797 const struct in6_addr *v6dst)
798{
799 __be32 dst = 0;
800 check_6rd(tunnel, v6dst, &dst);
801 return dst;
802}
803
804/*
805 * This function assumes it is being called from dev_queue_xmit()
806 * and that skb is filled properly by that function.
807 */
808
809static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
810 struct net_device *dev)
811{
812 struct ip_tunnel *tunnel = netdev_priv(dev);
813 const struct iphdr *tiph = &tunnel->parms.iph;
814 const struct ipv6hdr *iph6 = ipv6_hdr(skb);
815 u8 tos = tunnel->parms.iph.tos;
816 __be16 df = tiph->frag_off;
817 struct rtable *rt; /* Route to the other host */
818 struct net_device *tdev; /* Device to other host */
819 unsigned int max_headroom; /* The extra header space needed */
820 __be32 dst = tiph->daddr;
821 struct flowi4 fl4;
822 int mtu;
823 const struct in6_addr *addr6;
824 int addr_type;
825 u8 ttl;
826 u8 protocol = IPPROTO_IPV6;
827 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
828
829 if (tos == 1)
830 tos = ipv6_get_dsfield(iph6);
831
832 /* ISATAP (RFC4214) - must come before 6to4 */
833 if (dev->priv_flags & IFF_ISATAP) {
834 struct neighbour *neigh = NULL;
835 bool do_tx_error = false;
836
837 if (skb_dst(skb))
838 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
839
840 if (!neigh) {
841 net_dbg_ratelimited("nexthop == NULL\n");
842 goto tx_error;
843 }
844
845 addr6 = (const struct in6_addr *)&neigh->primary_key;
846 addr_type = ipv6_addr_type(addr6);
847
848 if ((addr_type & IPV6_ADDR_UNICAST) &&
849 ipv6_addr_is_isatap(addr6))
850 dst = addr6->s6_addr32[3];
851 else
852 do_tx_error = true;
853
854 neigh_release(neigh);
855 if (do_tx_error)
856 goto tx_error;
857 }
858
859 if (!dst)
860 dst = try_6rd(tunnel, &iph6->daddr);
861
862 if (!dst) {
863 struct neighbour *neigh = NULL;
864 bool do_tx_error = false;
865
866 if (skb_dst(skb))
867 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
868
869 if (!neigh) {
870 net_dbg_ratelimited("nexthop == NULL\n");
871 goto tx_error;
872 }
873
874 addr6 = (const struct in6_addr *)&neigh->primary_key;
875 addr_type = ipv6_addr_type(addr6);
876
877 if (addr_type == IPV6_ADDR_ANY) {
878 addr6 = &ipv6_hdr(skb)->daddr;
879 addr_type = ipv6_addr_type(addr6);
880 }
881
882 if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
883 dst = addr6->s6_addr32[3];
884 else
885 do_tx_error = true;
886
887 neigh_release(neigh);
888 if (do_tx_error)
889 goto tx_error;
890 }
891
892 flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
893 RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
894 0, dst, tiph->saddr, 0, 0,
895 sock_net_uid(tunnel->net, NULL));
896 rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
897
898 if (IS_ERR(rt)) {
899 dev->stats.tx_carrier_errors++;
900 goto tx_error_icmp;
901 }
902 if (rt->rt_type != RTN_UNICAST) {
903 ip_rt_put(rt);
904 dev->stats.tx_carrier_errors++;
905 goto tx_error_icmp;
906 }
907 tdev = rt->dst.dev;
908
909 if (tdev == dev) {
910 ip_rt_put(rt);
911 dev->stats.collisions++;
912 goto tx_error;
913 }
914
915 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) {
916 ip_rt_put(rt);
917 goto tx_error;
918 }
919
920 if (df) {
921 mtu = dst_mtu(&rt->dst) - t_hlen;
922
923 if (mtu < 68) {
924 dev->stats.collisions++;
925 ip_rt_put(rt);
926 goto tx_error;
927 }
928
929 if (mtu < IPV6_MIN_MTU) {
930 mtu = IPV6_MIN_MTU;
931 df = 0;
932 }
933
934 if (tunnel->parms.iph.daddr)
935 skb_dst_update_pmtu_no_confirm(skb, mtu);
936
937 if (skb->len > mtu && !skb_is_gso(skb)) {
938 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
939 ip_rt_put(rt);
940 goto tx_error;
941 }
942 }
943
944 if (tunnel->err_count > 0) {
945 if (time_before(jiffies,
946 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
947 tunnel->err_count--;
948 dst_link_failure(skb);
949 } else
950 tunnel->err_count = 0;
951 }
952
953 /*
954 * Okay, now see if we can stuff it in the buffer as-is.
955 */
956 max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
957
958 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
959 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
960 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
961 if (!new_skb) {
962 ip_rt_put(rt);
963 dev->stats.tx_dropped++;
964 kfree_skb(skb);
965 return NETDEV_TX_OK;
966 }
967 if (skb->sk)
968 skb_set_owner_w(new_skb, skb->sk);
969 dev_kfree_skb(skb);
970 skb = new_skb;
971 iph6 = ipv6_hdr(skb);
972 }
973 ttl = tiph->ttl;
974 if (ttl == 0)
975 ttl = iph6->hop_limit;
976 tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
977
978 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
979 ip_rt_put(rt);
980 goto tx_error;
981 }
982
983 skb_set_inner_ipproto(skb, IPPROTO_IPV6);
984
985 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
986 df, !net_eq(tunnel->net, dev_net(dev)));
987 return NETDEV_TX_OK;
988
989tx_error_icmp:
990 dst_link_failure(skb);
991tx_error:
992 kfree_skb(skb);
993 dev->stats.tx_errors++;
994 return NETDEV_TX_OK;
995}
996
997static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
998 struct net_device *dev, u8 ipproto)
999{
1000 struct ip_tunnel *tunnel = netdev_priv(dev);
1001 const struct iphdr *tiph = &tunnel->parms.iph;
1002
1003 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
1004 goto tx_error;
1005
1006 skb_set_inner_ipproto(skb, ipproto);
1007
1008 ip_tunnel_xmit(skb, dev, tiph, ipproto);
1009 return NETDEV_TX_OK;
1010tx_error:
1011 kfree_skb(skb);
1012 dev->stats.tx_errors++;
1013 return NETDEV_TX_OK;
1014}
1015
1016static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
1017 struct net_device *dev)
1018{
1019 switch (skb->protocol) {
1020 case htons(ETH_P_IP):
1021 sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
1022 break;
1023 case htons(ETH_P_IPV6):
1024 ipip6_tunnel_xmit(skb, dev);
1025 break;
1026#if IS_ENABLED(CONFIG_MPLS)
1027 case htons(ETH_P_MPLS_UC):
1028 sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
1029 break;
1030#endif
1031 default:
1032 goto tx_err;
1033 }
1034
1035 return NETDEV_TX_OK;
1036
1037tx_err:
1038 dev->stats.tx_errors++;
1039 kfree_skb(skb);
1040 return NETDEV_TX_OK;
1041
1042}
1043
1044static void ipip6_tunnel_bind_dev(struct net_device *dev)
1045{
1046 struct net_device *tdev = NULL;
1047 struct ip_tunnel *tunnel;
1048 const struct iphdr *iph;
1049 struct flowi4 fl4;
1050
1051 tunnel = netdev_priv(dev);
1052 iph = &tunnel->parms.iph;
1053
1054 if (iph->daddr) {
1055 struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
1056 NULL,
1057 iph->daddr, iph->saddr,
1058 0, 0,
1059 IPPROTO_IPV6,
1060 RT_TOS(iph->tos),
1061 tunnel->parms.link);
1062
1063 if (!IS_ERR(rt)) {
1064 tdev = rt->dst.dev;
1065 ip_rt_put(rt);
1066 }
1067 dev->flags |= IFF_POINTOPOINT;
1068 }
1069
1070 if (!tdev && tunnel->parms.link)
1071 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
1072
1073 if (tdev && !netif_is_l3_master(tdev)) {
1074 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1075
1076 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
1077 dev->mtu = tdev->mtu - t_hlen;
1078 if (dev->mtu < IPV6_MIN_MTU)
1079 dev->mtu = IPV6_MIN_MTU;
1080 }
1081}
1082
1083static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
1084 __u32 fwmark)
1085{
1086 struct net *net = t->net;
1087 struct sit_net *sitn = net_generic(net, sit_net_id);
1088
1089 ipip6_tunnel_unlink(sitn, t);
1090 synchronize_net();
1091 t->parms.iph.saddr = p->iph.saddr;
1092 t->parms.iph.daddr = p->iph.daddr;
1093 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
1094 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
1095 ipip6_tunnel_link(sitn, t);
1096 t->parms.iph.ttl = p->iph.ttl;
1097 t->parms.iph.tos = p->iph.tos;
1098 t->parms.iph.frag_off = p->iph.frag_off;
1099 if (t->parms.link != p->link || t->fwmark != fwmark) {
1100 t->parms.link = p->link;
1101 t->fwmark = fwmark;
1102 ipip6_tunnel_bind_dev(t->dev);
1103 }
1104 dst_cache_reset(&t->dst_cache);
1105 netdev_state_change(t->dev);
1106}
1107
1108#ifdef CONFIG_IPV6_SIT_6RD
1109static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
1110 struct ip_tunnel_6rd *ip6rd)
1111{
1112 struct in6_addr prefix;
1113 __be32 relay_prefix;
1114
1115 if (ip6rd->relay_prefixlen > 32 ||
1116 ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
1117 return -EINVAL;
1118
1119 ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
1120 if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
1121 return -EINVAL;
1122 if (ip6rd->relay_prefixlen)
1123 relay_prefix = ip6rd->relay_prefix &
1124 htonl(0xffffffffUL <<
1125 (32 - ip6rd->relay_prefixlen));
1126 else
1127 relay_prefix = 0;
1128 if (relay_prefix != ip6rd->relay_prefix)
1129 return -EINVAL;
1130
1131 t->ip6rd.prefix = prefix;
1132 t->ip6rd.relay_prefix = relay_prefix;
1133 t->ip6rd.prefixlen = ip6rd->prefixlen;
1134 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
1135 dst_cache_reset(&t->dst_cache);
1136 netdev_state_change(t->dev);
1137 return 0;
1138}
1139#endif
1140
1141static bool ipip6_valid_ip_proto(u8 ipproto)
1142{
1143 return ipproto == IPPROTO_IPV6 ||
1144 ipproto == IPPROTO_IPIP ||
1145#if IS_ENABLED(CONFIG_MPLS)
1146 ipproto == IPPROTO_MPLS ||
1147#endif
1148 ipproto == 0;
1149}
1150
1151static int
1152ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1153{
1154 int err = 0;
1155 struct ip_tunnel_parm p;
1156 struct ip_tunnel_prl prl;
1157 struct ip_tunnel *t = netdev_priv(dev);
1158 struct net *net = t->net;
1159 struct sit_net *sitn = net_generic(net, sit_net_id);
1160#ifdef CONFIG_IPV6_SIT_6RD
1161 struct ip_tunnel_6rd ip6rd;
1162#endif
1163
1164 switch (cmd) {
1165 case SIOCGETTUNNEL:
1166#ifdef CONFIG_IPV6_SIT_6RD
1167 case SIOCGET6RD:
1168#endif
1169 if (dev == sitn->fb_tunnel_dev) {
1170 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1171 err = -EFAULT;
1172 break;
1173 }
1174 t = ipip6_tunnel_locate(net, &p, 0);
1175 if (!t)
1176 t = netdev_priv(dev);
1177 }
1178
1179 err = -EFAULT;
1180 if (cmd == SIOCGETTUNNEL) {
1181 memcpy(&p, &t->parms, sizeof(p));
1182 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
1183 sizeof(p)))
1184 goto done;
1185#ifdef CONFIG_IPV6_SIT_6RD
1186 } else {
1187 ip6rd.prefix = t->ip6rd.prefix;
1188 ip6rd.relay_prefix = t->ip6rd.relay_prefix;
1189 ip6rd.prefixlen = t->ip6rd.prefixlen;
1190 ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
1191 if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
1192 sizeof(ip6rd)))
1193 goto done;
1194#endif
1195 }
1196 err = 0;
1197 break;
1198
1199 case SIOCADDTUNNEL:
1200 case SIOCCHGTUNNEL:
1201 err = -EPERM;
1202 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1203 goto done;
1204
1205 err = -EFAULT;
1206 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1207 goto done;
1208
1209 err = -EINVAL;
1210 if (!ipip6_valid_ip_proto(p.iph.protocol))
1211 goto done;
1212 if (p.iph.version != 4 ||
1213 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
1214 goto done;
1215 if (p.iph.ttl)
1216 p.iph.frag_off |= htons(IP_DF);
1217
1218 t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1219
1220 if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1221 if (t) {
1222 if (t->dev != dev) {
1223 err = -EEXIST;
1224 break;
1225 }
1226 } else {
1227 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
1228 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
1229 err = -EINVAL;
1230 break;
1231 }
1232 t = netdev_priv(dev);
1233 }
1234
1235 ipip6_tunnel_update(t, &p, t->fwmark);
1236 }
1237
1238 if (t) {
1239 err = 0;
1240 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1241 err = -EFAULT;
1242 } else
1243 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1244 break;
1245
1246 case SIOCDELTUNNEL:
1247 err = -EPERM;
1248 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1249 goto done;
1250
1251 if (dev == sitn->fb_tunnel_dev) {
1252 err = -EFAULT;
1253 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1254 goto done;
1255 err = -ENOENT;
1256 t = ipip6_tunnel_locate(net, &p, 0);
1257 if (!t)
1258 goto done;
1259 err = -EPERM;
1260 if (t == netdev_priv(sitn->fb_tunnel_dev))
1261 goto done;
1262 dev = t->dev;
1263 }
1264 unregister_netdevice(dev);
1265 err = 0;
1266 break;
1267
1268 case SIOCGETPRL:
1269 err = -EINVAL;
1270 if (dev == sitn->fb_tunnel_dev)
1271 goto done;
1272 err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
1273 break;
1274
1275 case SIOCADDPRL:
1276 case SIOCDELPRL:
1277 case SIOCCHGPRL:
1278 err = -EPERM;
1279 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1280 goto done;
1281 err = -EINVAL;
1282 if (dev == sitn->fb_tunnel_dev)
1283 goto done;
1284 err = -EFAULT;
1285 if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
1286 goto done;
1287
1288 switch (cmd) {
1289 case SIOCDELPRL:
1290 err = ipip6_tunnel_del_prl(t, &prl);
1291 break;
1292 case SIOCADDPRL:
1293 case SIOCCHGPRL:
1294 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
1295 break;
1296 }
1297 dst_cache_reset(&t->dst_cache);
1298 netdev_state_change(dev);
1299 break;
1300
1301#ifdef CONFIG_IPV6_SIT_6RD
1302 case SIOCADD6RD:
1303 case SIOCCHG6RD:
1304 case SIOCDEL6RD:
1305 err = -EPERM;
1306 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1307 goto done;
1308
1309 err = -EFAULT;
1310 if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
1311 sizeof(ip6rd)))
1312 goto done;
1313
1314 if (cmd != SIOCDEL6RD) {
1315 err = ipip6_tunnel_update_6rd(t, &ip6rd);
1316 if (err < 0)
1317 goto done;
1318 } else
1319 ipip6_tunnel_clone_6rd(dev, sitn);
1320
1321 err = 0;
1322 break;
1323#endif
1324
1325 default:
1326 err = -EINVAL;
1327 }
1328
1329done:
1330 return err;
1331}
1332
1333static const struct net_device_ops ipip6_netdev_ops = {
1334 .ndo_init = ipip6_tunnel_init,
1335 .ndo_uninit = ipip6_tunnel_uninit,
1336 .ndo_start_xmit = sit_tunnel_xmit,
1337 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1338 .ndo_get_stats64 = ip_tunnel_get_stats64,
1339 .ndo_get_iflink = ip_tunnel_get_iflink,
1340};
1341
1342static void ipip6_dev_free(struct net_device *dev)
1343{
1344 struct ip_tunnel *tunnel = netdev_priv(dev);
1345
1346 dst_cache_destroy(&tunnel->dst_cache);
1347 free_percpu(dev->tstats);
1348}
1349
1350#define SIT_FEATURES (NETIF_F_SG | \
1351 NETIF_F_FRAGLIST | \
1352 NETIF_F_HIGHDMA | \
1353 NETIF_F_GSO_SOFTWARE | \
1354 NETIF_F_HW_CSUM)
1355
1356static void ipip6_tunnel_setup(struct net_device *dev)
1357{
1358 struct ip_tunnel *tunnel = netdev_priv(dev);
1359 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1360
1361 dev->netdev_ops = &ipip6_netdev_ops;
1362 dev->needs_free_netdev = true;
1363 dev->priv_destructor = ipip6_dev_free;
1364
1365 dev->type = ARPHRD_SIT;
1366 dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1367 dev->mtu = ETH_DATA_LEN - t_hlen;
1368 dev->min_mtu = IPV6_MIN_MTU;
1369 dev->max_mtu = IP6_MAX_MTU - t_hlen;
1370 dev->flags = IFF_NOARP;
1371 netif_keep_dst(dev);
1372 dev->addr_len = 4;
1373 dev->features |= NETIF_F_LLTX;
1374 dev->features |= SIT_FEATURES;
1375 dev->hw_features |= SIT_FEATURES;
1376}
1377
1378static int ipip6_tunnel_init(struct net_device *dev)
1379{
1380 struct ip_tunnel *tunnel = netdev_priv(dev);
1381 int err;
1382
1383 tunnel->dev = dev;
1384 tunnel->net = dev_net(dev);
1385 strcpy(tunnel->parms.name, dev->name);
1386
1387 ipip6_tunnel_bind_dev(dev);
1388 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1389 if (!dev->tstats)
1390 return -ENOMEM;
1391
1392 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1393 if (err) {
1394 free_percpu(dev->tstats);
1395 dev->tstats = NULL;
1396 return err;
1397 }
1398
1399 return 0;
1400}
1401
1402static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1403{
1404 struct ip_tunnel *tunnel = netdev_priv(dev);
1405 struct iphdr *iph = &tunnel->parms.iph;
1406 struct net *net = dev_net(dev);
1407 struct sit_net *sitn = net_generic(net, sit_net_id);
1408
1409 iph->version = 4;
1410 iph->protocol = IPPROTO_IPV6;
1411 iph->ihl = 5;
1412 iph->ttl = 64;
1413
1414 dev_hold(dev);
1415 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1416}
1417
1418static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
1419 struct netlink_ext_ack *extack)
1420{
1421 u8 proto;
1422
1423 if (!data || !data[IFLA_IPTUN_PROTO])
1424 return 0;
1425
1426 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1427 if (!ipip6_valid_ip_proto(proto))
1428 return -EINVAL;
1429
1430 return 0;
1431}
1432
1433static void ipip6_netlink_parms(struct nlattr *data[],
1434 struct ip_tunnel_parm *parms,
1435 __u32 *fwmark)
1436{
1437 memset(parms, 0, sizeof(*parms));
1438
1439 parms->iph.version = 4;
1440 parms->iph.protocol = IPPROTO_IPV6;
1441 parms->iph.ihl = 5;
1442 parms->iph.ttl = 64;
1443
1444 if (!data)
1445 return;
1446
1447 if (data[IFLA_IPTUN_LINK])
1448 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1449
1450 if (data[IFLA_IPTUN_LOCAL])
1451 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
1452
1453 if (data[IFLA_IPTUN_REMOTE])
1454 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
1455
1456 if (data[IFLA_IPTUN_TTL]) {
1457 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
1458 if (parms->iph.ttl)
1459 parms->iph.frag_off = htons(IP_DF);
1460 }
1461
1462 if (data[IFLA_IPTUN_TOS])
1463 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
1464
1465 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
1466 parms->iph.frag_off = htons(IP_DF);
1467
1468 if (data[IFLA_IPTUN_FLAGS])
1469 parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
1470
1471 if (data[IFLA_IPTUN_PROTO])
1472 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1473
1474 if (data[IFLA_IPTUN_FWMARK])
1475 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
1476}
1477
1478/* This function returns true when ENCAP attributes are present in the nl msg */
1479static bool ipip6_netlink_encap_parms(struct nlattr *data[],
1480 struct ip_tunnel_encap *ipencap)
1481{
1482 bool ret = false;
1483
1484 memset(ipencap, 0, sizeof(*ipencap));
1485
1486 if (!data)
1487 return ret;
1488
1489 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
1490 ret = true;
1491 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
1492 }
1493
1494 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
1495 ret = true;
1496 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
1497 }
1498
1499 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
1500 ret = true;
1501 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
1502 }
1503
1504 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
1505 ret = true;
1506 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
1507 }
1508
1509 return ret;
1510}
1511
1512#ifdef CONFIG_IPV6_SIT_6RD
1513/* This function returns true when 6RD attributes are present in the nl msg */
1514static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
1515 struct ip_tunnel_6rd *ip6rd)
1516{
1517 bool ret = false;
1518 memset(ip6rd, 0, sizeof(*ip6rd));
1519
1520 if (!data)
1521 return ret;
1522
1523 if (data[IFLA_IPTUN_6RD_PREFIX]) {
1524 ret = true;
1525 ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]);
1526 }
1527
1528 if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
1529 ret = true;
1530 ip6rd->relay_prefix =
1531 nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
1532 }
1533
1534 if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
1535 ret = true;
1536 ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
1537 }
1538
1539 if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
1540 ret = true;
1541 ip6rd->relay_prefixlen =
1542 nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
1543 }
1544
1545 return ret;
1546}
1547#endif
1548
1549static int ipip6_newlink(struct net *src_net, struct net_device *dev,
1550 struct nlattr *tb[], struct nlattr *data[],
1551 struct netlink_ext_ack *extack)
1552{
1553 struct net *net = dev_net(dev);
1554 struct ip_tunnel *nt;
1555 struct ip_tunnel_encap ipencap;
1556#ifdef CONFIG_IPV6_SIT_6RD
1557 struct ip_tunnel_6rd ip6rd;
1558#endif
1559 int err;
1560
1561 nt = netdev_priv(dev);
1562
1563 if (ipip6_netlink_encap_parms(data, &ipencap)) {
1564 err = ip_tunnel_encap_setup(nt, &ipencap);
1565 if (err < 0)
1566 return err;
1567 }
1568
1569 ipip6_netlink_parms(data, &nt->parms, &nt->fwmark);
1570
1571 if (ipip6_tunnel_locate(net, &nt->parms, 0))
1572 return -EEXIST;
1573
1574 err = ipip6_tunnel_create(dev);
1575 if (err < 0)
1576 return err;
1577
1578 if (tb[IFLA_MTU]) {
1579 u32 mtu = nla_get_u32(tb[IFLA_MTU]);
1580
1581 if (mtu >= IPV6_MIN_MTU &&
1582 mtu <= IP6_MAX_MTU - dev->hard_header_len)
1583 dev->mtu = mtu;
1584 }
1585
1586#ifdef CONFIG_IPV6_SIT_6RD
1587 if (ipip6_netlink_6rd_parms(data, &ip6rd))
1588 err = ipip6_tunnel_update_6rd(nt, &ip6rd);
1589#endif
1590
1591 return err;
1592}
1593
1594static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
1595 struct nlattr *data[],
1596 struct netlink_ext_ack *extack)
1597{
1598 struct ip_tunnel *t = netdev_priv(dev);
1599 struct ip_tunnel_parm p;
1600 struct ip_tunnel_encap ipencap;
1601 struct net *net = t->net;
1602 struct sit_net *sitn = net_generic(net, sit_net_id);
1603#ifdef CONFIG_IPV6_SIT_6RD
1604 struct ip_tunnel_6rd ip6rd;
1605#endif
1606 __u32 fwmark = t->fwmark;
1607 int err;
1608
1609 if (dev == sitn->fb_tunnel_dev)
1610 return -EINVAL;
1611
1612 if (ipip6_netlink_encap_parms(data, &ipencap)) {
1613 err = ip_tunnel_encap_setup(t, &ipencap);
1614 if (err < 0)
1615 return err;
1616 }
1617
1618 ipip6_netlink_parms(data, &p, &fwmark);
1619
1620 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
1621 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
1622 return -EINVAL;
1623
1624 t = ipip6_tunnel_locate(net, &p, 0);
1625
1626 if (t) {
1627 if (t->dev != dev)
1628 return -EEXIST;
1629 } else
1630 t = netdev_priv(dev);
1631
1632 ipip6_tunnel_update(t, &p, fwmark);
1633
1634#ifdef CONFIG_IPV6_SIT_6RD
1635 if (ipip6_netlink_6rd_parms(data, &ip6rd))
1636 return ipip6_tunnel_update_6rd(t, &ip6rd);
1637#endif
1638
1639 return 0;
1640}
1641
1642static size_t ipip6_get_size(const struct net_device *dev)
1643{
1644 return
1645 /* IFLA_IPTUN_LINK */
1646 nla_total_size(4) +
1647 /* IFLA_IPTUN_LOCAL */
1648 nla_total_size(4) +
1649 /* IFLA_IPTUN_REMOTE */
1650 nla_total_size(4) +
1651 /* IFLA_IPTUN_TTL */
1652 nla_total_size(1) +
1653 /* IFLA_IPTUN_TOS */
1654 nla_total_size(1) +
1655 /* IFLA_IPTUN_PMTUDISC */
1656 nla_total_size(1) +
1657 /* IFLA_IPTUN_FLAGS */
1658 nla_total_size(2) +
1659 /* IFLA_IPTUN_PROTO */
1660 nla_total_size(1) +
1661#ifdef CONFIG_IPV6_SIT_6RD
1662 /* IFLA_IPTUN_6RD_PREFIX */
1663 nla_total_size(sizeof(struct in6_addr)) +
1664 /* IFLA_IPTUN_6RD_RELAY_PREFIX */
1665 nla_total_size(4) +
1666 /* IFLA_IPTUN_6RD_PREFIXLEN */
1667 nla_total_size(2) +
1668 /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
1669 nla_total_size(2) +
1670#endif
1671 /* IFLA_IPTUN_ENCAP_TYPE */
1672 nla_total_size(2) +
1673 /* IFLA_IPTUN_ENCAP_FLAGS */
1674 nla_total_size(2) +
1675 /* IFLA_IPTUN_ENCAP_SPORT */
1676 nla_total_size(2) +
1677 /* IFLA_IPTUN_ENCAP_DPORT */
1678 nla_total_size(2) +
1679 /* IFLA_IPTUN_FWMARK */
1680 nla_total_size(4) +
1681 0;
1682}
1683
1684static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1685{
1686 struct ip_tunnel *tunnel = netdev_priv(dev);
1687 struct ip_tunnel_parm *parm = &tunnel->parms;
1688
1689 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1690 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
1691 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
1692 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
1693 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1694 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
1695 !!(parm->iph.frag_off & htons(IP_DF))) ||
1696 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
1697 nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
1698 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
1699 goto nla_put_failure;
1700
1701#ifdef CONFIG_IPV6_SIT_6RD
1702 if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX,
1703 &tunnel->ip6rd.prefix) ||
1704 nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
1705 tunnel->ip6rd.relay_prefix) ||
1706 nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
1707 tunnel->ip6rd.prefixlen) ||
1708 nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
1709 tunnel->ip6rd.relay_prefixlen))
1710 goto nla_put_failure;
1711#endif
1712
1713 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
1714 tunnel->encap.type) ||
1715 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
1716 tunnel->encap.sport) ||
1717 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
1718 tunnel->encap.dport) ||
1719 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
1720 tunnel->encap.flags))
1721 goto nla_put_failure;
1722
1723 return 0;
1724
1725nla_put_failure:
1726 return -EMSGSIZE;
1727}
1728
1729static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
1730 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
1731 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
1732 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
1733 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
1734 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1735 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
1736 [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
1737 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
1738#ifdef CONFIG_IPV6_SIT_6RD
1739 [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
1740 [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
1741 [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
1742 [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
1743#endif
1744 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
1745 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
1746 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
1747 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
1748 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
1749};
1750
1751static void ipip6_dellink(struct net_device *dev, struct list_head *head)
1752{
1753 struct net *net = dev_net(dev);
1754 struct sit_net *sitn = net_generic(net, sit_net_id);
1755
1756 if (dev != sitn->fb_tunnel_dev)
1757 unregister_netdevice_queue(dev, head);
1758}
1759
1760static struct rtnl_link_ops sit_link_ops __read_mostly = {
1761 .kind = "sit",
1762 .maxtype = IFLA_IPTUN_MAX,
1763 .policy = ipip6_policy,
1764 .priv_size = sizeof(struct ip_tunnel),
1765 .setup = ipip6_tunnel_setup,
1766 .validate = ipip6_validate,
1767 .newlink = ipip6_newlink,
1768 .changelink = ipip6_changelink,
1769 .get_size = ipip6_get_size,
1770 .fill_info = ipip6_fill_info,
1771 .dellink = ipip6_dellink,
1772 .get_link_net = ip_tunnel_get_link_net,
1773};
1774
1775static struct xfrm_tunnel sit_handler __read_mostly = {
1776 .handler = ipip6_rcv,
1777 .err_handler = ipip6_err,
1778 .priority = 1,
1779};
1780
1781static struct xfrm_tunnel ipip_handler __read_mostly = {
1782 .handler = ipip_rcv,
1783 .err_handler = ipip6_err,
1784 .priority = 2,
1785};
1786
1787#if IS_ENABLED(CONFIG_MPLS)
1788static struct xfrm_tunnel mplsip_handler __read_mostly = {
1789 .handler = mplsip_rcv,
1790 .err_handler = ipip6_err,
1791 .priority = 2,
1792};
1793#endif
1794
1795static void __net_exit sit_destroy_tunnels(struct net *net,
1796 struct list_head *head)
1797{
1798 struct sit_net *sitn = net_generic(net, sit_net_id);
1799 struct net_device *dev, *aux;
1800 int prio;
1801
1802 for_each_netdev_safe(net, dev, aux)
1803 if (dev->rtnl_link_ops == &sit_link_ops)
1804 unregister_netdevice_queue(dev, head);
1805
1806 for (prio = 1; prio < 4; prio++) {
1807 int h;
1808 for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
1809 struct ip_tunnel *t;
1810
1811 t = rtnl_dereference(sitn->tunnels[prio][h]);
1812 while (t) {
1813 /* If dev is in the same netns, it has already
1814 * been added to the list by the previous loop.
1815 */
1816 if (!net_eq(dev_net(t->dev), net))
1817 unregister_netdevice_queue(t->dev,
1818 head);
1819 t = rtnl_dereference(t->next);
1820 }
1821 }
1822 }
1823}
1824
1825static int __net_init sit_init_net(struct net *net)
1826{
1827 struct sit_net *sitn = net_generic(net, sit_net_id);
1828 struct ip_tunnel *t;
1829 int err;
1830
1831 sitn->tunnels[0] = sitn->tunnels_wc;
1832 sitn->tunnels[1] = sitn->tunnels_l;
1833 sitn->tunnels[2] = sitn->tunnels_r;
1834 sitn->tunnels[3] = sitn->tunnels_r_l;
1835
1836 sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
1837 NET_NAME_UNKNOWN,
1838 ipip6_tunnel_setup);
1839 if (!sitn->fb_tunnel_dev) {
1840 err = -ENOMEM;
1841 goto err_alloc_dev;
1842 }
1843 dev_net_set(sitn->fb_tunnel_dev, net);
1844 sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1845 /* FB netdevice is special: we have one, and only one per netns.
1846 * Allowing to move it to another netns is clearly unsafe.
1847 */
1848 sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1849
1850 err = register_netdev(sitn->fb_tunnel_dev);
1851 if (err)
1852 goto err_reg_dev;
1853
1854 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1855 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1856
1857 t = netdev_priv(sitn->fb_tunnel_dev);
1858
1859 strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
1860 return 0;
1861
1862err_reg_dev:
1863 ipip6_dev_free(sitn->fb_tunnel_dev);
1864 free_netdev(sitn->fb_tunnel_dev);
1865err_alloc_dev:
1866 return err;
1867}
1868
1869static void __net_exit sit_exit_net(struct net *net)
1870{
1871 LIST_HEAD(list);
1872
1873 rtnl_lock();
1874 sit_destroy_tunnels(net, &list);
1875 unregister_netdevice_many(&list);
1876 rtnl_unlock();
1877}
1878
1879static struct pernet_operations sit_net_ops = {
1880 .init = sit_init_net,
1881 .exit = sit_exit_net,
1882 .id = &sit_net_id,
1883 .size = sizeof(struct sit_net),
1884};
1885
1886static void __exit sit_cleanup(void)
1887{
1888 rtnl_link_unregister(&sit_link_ops);
1889 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1890 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1891#if IS_ENABLED(CONFIG_MPLS)
1892 xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
1893#endif
1894
1895 unregister_pernet_device(&sit_net_ops);
1896 rcu_barrier(); /* Wait for completion of call_rcu()'s */
1897}
1898
1899static int __init sit_init(void)
1900{
1901 int err;
1902
1903 pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
1904
1905 err = register_pernet_device(&sit_net_ops);
1906 if (err < 0)
1907 return err;
1908 err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1909 if (err < 0) {
1910 pr_info("%s: can't register ip6ip4\n", __func__);
1911 goto xfrm_tunnel_failed;
1912 }
1913 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1914 if (err < 0) {
1915 pr_info("%s: can't register ip4ip4\n", __func__);
1916 goto xfrm_tunnel4_failed;
1917 }
1918#if IS_ENABLED(CONFIG_MPLS)
1919 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
1920 if (err < 0) {
1921 pr_info("%s: can't register mplsip\n", __func__);
1922 goto xfrm_tunnel_mpls_failed;
1923 }
1924#endif
1925 err = rtnl_link_register(&sit_link_ops);
1926 if (err < 0)
1927 goto rtnl_link_failed;
1928
1929out:
1930 return err;
1931
1932rtnl_link_failed:
1933#if IS_ENABLED(CONFIG_MPLS)
1934 xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
1935xfrm_tunnel_mpls_failed:
1936#endif
1937 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1938xfrm_tunnel4_failed:
1939 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1940xfrm_tunnel_failed:
1941 unregister_pernet_device(&sit_net_ops);
1942 goto out;
1943}
1944
1945module_init(sit_init);
1946module_exit(sit_cleanup);
1947MODULE_LICENSE("GPL");
1948MODULE_ALIAS_RTNL_LINK("sit");
1949MODULE_ALIAS_NETDEV("sit0");