blob: 9940a59306b5137975f2ece18a5b504114d3e972 [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/if_vlan.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ip_tunnels.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
48#include <net/rtnetlink.h>
49#include <net/gre.h>
50#include <net/dst_metadata.h>
51#include <net/erspan.h>
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107 Alexey Kuznetsov.
108 */
109
110static bool log_ecn_error = true;
111module_param(log_ecn_error, bool, 0644);
112MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115static int ipgre_tunnel_init(struct net_device *dev);
116static void erspan_build_header(struct sk_buff *skb,
117 __be32 id, u32 index, bool truncate);
118
119static unsigned int ipgre_net_id __read_mostly;
120static unsigned int gre_tap_net_id __read_mostly;
121static unsigned int erspan_net_id __read_mostly;
122
123static void ipgre_err(struct sk_buff *skb, u32 info,
124 const struct tnl_ptk_info *tpi)
125{
126
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
130
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
134 them "thank you".
135
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
138 by themselves???
139 */
140 struct net *net = dev_net(skb->dev);
141 struct ip_tunnel_net *itn;
142 const struct iphdr *iph;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145 unsigned int data_len = 0;
146 struct ip_tunnel *t;
147
148 switch (type) {
149 default:
150 case ICMP_PARAMETERPROB:
151 return;
152
153 case ICMP_DEST_UNREACH:
154 switch (code) {
155 case ICMP_SR_FAILED:
156 case ICMP_PORT_UNREACH:
157 /* Impossible event. */
158 return;
159 default:
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
163 */
164 break;
165 }
166 break;
167
168 case ICMP_TIME_EXCEEDED:
169 if (code != ICMP_EXC_TTL)
170 return;
171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172 break;
173
174 case ICMP_REDIRECT:
175 break;
176 }
177
178 if (tpi->proto == htons(ETH_P_TEB))
179 itn = net_generic(net, gre_tap_net_id);
180 else if (tpi->proto == htons(ETH_P_ERSPAN))
181 itn = net_generic(net, erspan_net_id);
182 else
183 itn = net_generic(net, ipgre_net_id);
184
185 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
186 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
187 iph->daddr, iph->saddr, tpi->key);
188
189 if (!t)
190 return;
191
192#if IS_ENABLED(CONFIG_IPV6)
193 if (tpi->proto == htons(ETH_P_IPV6) &&
194 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
195 type, data_len))
196 return;
197#endif
198
199 if (t->parms.iph.daddr == 0 ||
200 ipv4_is_multicast(t->parms.iph.daddr))
201 return;
202
203 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
204 return;
205
206 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
207 t->err_count++;
208 else
209 t->err_count = 1;
210 t->err_time = jiffies;
211}
212
213static void gre_err(struct sk_buff *skb, u32 info)
214{
215 /* All the routers (except for Linux) return only
216 * 8 bytes of packet payload. It means, that precise relaying of
217 * ICMP in the real Internet is absolutely infeasible.
218 *
219 * Moreover, Cisco "wise men" put GRE key to the third word
220 * in GRE header. It makes impossible maintaining even soft
221 * state for keyed
222 * GRE tunnels with enabled checksum. Tell them "thank you".
223 *
224 * Well, I wonder, rfc1812 was written by Cisco employee,
225 * what the hell these idiots break standards established
226 * by themselves???
227 */
228
229 const struct iphdr *iph = (struct iphdr *)skb->data;
230 const int type = icmp_hdr(skb)->type;
231 const int code = icmp_hdr(skb)->code;
232 struct tnl_ptk_info tpi;
233
234 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
235 iph->ihl * 4) < 0)
236 return;
237
238 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
239 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
240 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
241 return;
242 }
243 if (type == ICMP_REDIRECT) {
244 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
245 IPPROTO_GRE, 0);
246 return;
247 }
248
249 ipgre_err(skb, info, &tpi);
250}
251
252static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
253 int gre_hdr_len)
254{
255 struct net *net = dev_net(skb->dev);
256 struct metadata_dst *tun_dst = NULL;
257 struct ip_tunnel_net *itn;
258 struct ip_tunnel *tunnel;
259 struct erspanhdr *ershdr;
260 const struct iphdr *iph;
261 __be32 index;
262 int len;
263
264 itn = net_generic(net, erspan_net_id);
265 len = gre_hdr_len + sizeof(*ershdr);
266
267 if (unlikely(!pskb_may_pull(skb, len)))
268 return -ENOMEM;
269
270 iph = ip_hdr(skb);
271 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
272
273 /* The original GRE header does not have key field,
274 * Use ERSPAN 10-bit session ID as key.
275 */
276 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
277 index = ershdr->md.index;
278 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
279 tpi->flags | TUNNEL_KEY,
280 iph->saddr, iph->daddr, tpi->key);
281
282 if (tunnel) {
283 if (__iptunnel_pull_header(skb,
284 gre_hdr_len + sizeof(*ershdr),
285 htons(ETH_P_TEB),
286 false, false) < 0)
287 goto drop;
288
289 if (tunnel->collect_md) {
290 struct ip_tunnel_info *info;
291 struct erspan_metadata *md;
292 __be64 tun_id;
293 __be16 flags;
294
295 tpi->flags |= TUNNEL_KEY;
296 flags = tpi->flags;
297 tun_id = key32_to_tunnel_id(tpi->key);
298
299 tun_dst = ip_tun_rx_dst(skb, flags,
300 tun_id, sizeof(*md));
301 if (!tun_dst)
302 return PACKET_REJECT;
303
304 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
305 if (!md) {
306 dst_release((struct dst_entry *)tun_dst);
307 return PACKET_REJECT;
308 }
309
310 md->index = index;
311 info = &tun_dst->u.tun_info;
312 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
313 info->options_len = sizeof(*md);
314 } else {
315 tunnel->index = ntohl(index);
316 }
317
318 skb_reset_mac_header(skb);
319 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
320 return PACKET_RCVD;
321 }
322 return PACKET_REJECT;
323
324drop:
325 kfree_skb(skb);
326 return PACKET_RCVD;
327}
328
329static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
330 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
331{
332 struct metadata_dst *tun_dst = NULL;
333 const struct iphdr *iph;
334 struct ip_tunnel *tunnel;
335
336 iph = ip_hdr(skb);
337 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
338 iph->saddr, iph->daddr, tpi->key);
339
340 if (tunnel) {
341 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
342 raw_proto, false) < 0)
343 goto drop;
344
345 if (tunnel->dev->type != ARPHRD_NONE)
346 skb_pop_mac_header(skb);
347 else
348 skb_reset_mac_header(skb);
349 if (tunnel->collect_md) {
350 __be16 flags;
351 __be64 tun_id;
352
353 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
354 tun_id = key32_to_tunnel_id(tpi->key);
355 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
356 if (!tun_dst)
357 return PACKET_REJECT;
358 }
359
360 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
361 return PACKET_RCVD;
362 }
363 return PACKET_NEXT;
364
365drop:
366 kfree_skb(skb);
367 return PACKET_RCVD;
368}
369
370static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
371 int hdr_len)
372{
373 struct net *net = dev_net(skb->dev);
374 struct ip_tunnel_net *itn;
375 int res;
376
377 if (tpi->proto == htons(ETH_P_TEB))
378 itn = net_generic(net, gre_tap_net_id);
379 else
380 itn = net_generic(net, ipgre_net_id);
381
382 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
383 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
384 /* ipgre tunnels in collect metadata mode should receive
385 * also ETH_P_TEB traffic.
386 */
387 itn = net_generic(net, ipgre_net_id);
388 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
389 }
390 return res;
391}
392
393static int gre_rcv(struct sk_buff *skb)
394{
395 struct tnl_ptk_info tpi;
396 bool csum_err = false;
397 int hdr_len;
398
399#ifdef CONFIG_NET_IPGRE_BROADCAST
400 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
401 /* Looped back packet, drop it! */
402 if (rt_is_output_route(skb_rtable(skb)))
403 goto drop;
404 }
405#endif
406
407 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
408 if (hdr_len < 0)
409 goto drop;
410
411 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
412 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
413 return 0;
414 goto out;
415 }
416
417 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
418 return 0;
419
420out:
421 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
422drop:
423 kfree_skb(skb);
424 return 0;
425}
426
427static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
428 const struct iphdr *tnl_params,
429 __be16 proto)
430{
431 struct ip_tunnel *tunnel = netdev_priv(dev);
432
433 if (tunnel->parms.o_flags & TUNNEL_SEQ)
434 tunnel->o_seqno++;
435
436 /* Push GRE header. */
437 gre_build_header(skb, tunnel->tun_hlen,
438 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
439 htonl(tunnel->o_seqno));
440
441 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
442}
443
444static int gre_handle_offloads(struct sk_buff *skb, bool csum)
445{
446 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
447}
448
449static struct rtable *gre_get_rt(struct sk_buff *skb,
450 struct net_device *dev,
451 struct flowi4 *fl,
452 const struct ip_tunnel_key *key)
453{
454 struct net *net = dev_net(dev);
455
456 memset(fl, 0, sizeof(*fl));
457 fl->daddr = key->u.ipv4.dst;
458 fl->saddr = key->u.ipv4.src;
459 fl->flowi4_tos = RT_TOS(key->tos);
460 fl->flowi4_mark = skb->mark;
461 fl->flowi4_proto = IPPROTO_GRE;
462
463 return ip_route_output_key(net, fl);
464}
465
466static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
467 struct net_device *dev,
468 struct flowi4 *fl,
469 int tunnel_hlen)
470{
471 struct ip_tunnel_info *tun_info;
472 const struct ip_tunnel_key *key;
473 struct rtable *rt = NULL;
474 int min_headroom;
475 bool use_cache;
476 int err;
477
478 tun_info = skb_tunnel_info(skb);
479 key = &tun_info->key;
480 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
481
482 if (use_cache)
483 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
484 if (!rt) {
485 rt = gre_get_rt(skb, dev, fl, key);
486 if (IS_ERR(rt))
487 goto err_free_skb;
488 if (use_cache)
489 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
490 fl->saddr);
491 }
492
493 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
494 + tunnel_hlen + sizeof(struct iphdr);
495 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
496 int head_delta = SKB_DATA_ALIGN(min_headroom -
497 skb_headroom(skb) +
498 16);
499 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
500 0, GFP_ATOMIC);
501 if (unlikely(err))
502 goto err_free_rt;
503 }
504 return rt;
505
506err_free_rt:
507 ip_rt_put(rt);
508err_free_skb:
509 kfree_skb(skb);
510 dev->stats.tx_dropped++;
511 return NULL;
512}
513
514static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
515 __be16 proto)
516{
517 struct ip_tunnel_info *tun_info;
518 const struct ip_tunnel_key *key;
519 struct rtable *rt = NULL;
520 struct flowi4 fl;
521 int tunnel_hlen;
522 __be16 df, flags;
523
524 tun_info = skb_tunnel_info(skb);
525 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
526 ip_tunnel_info_af(tun_info) != AF_INET))
527 goto err_free_skb;
528
529 key = &tun_info->key;
530 tunnel_hlen = gre_calc_hlen(key->tun_flags);
531
532 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
533 if (!rt)
534 return;
535
536 /* Push Tunnel header. */
537 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
538 goto err_free_rt;
539
540 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
541 gre_build_header(skb, tunnel_hlen, flags, proto,
542 tunnel_id_to_key32(tun_info->key.tun_id), 0);
543
544 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
545
546 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
547 key->tos, key->ttl, df, false);
548 return;
549
550err_free_rt:
551 ip_rt_put(rt);
552err_free_skb:
553 kfree_skb(skb);
554 dev->stats.tx_dropped++;
555}
556
557static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
558 __be16 proto)
559{
560 struct ip_tunnel *tunnel = netdev_priv(dev);
561 struct ip_tunnel_info *tun_info;
562 const struct ip_tunnel_key *key;
563 struct erspan_metadata *md;
564 struct rtable *rt = NULL;
565 bool truncate = false;
566 struct flowi4 fl;
567 int tunnel_hlen;
568 __be16 df;
569
570 tun_info = skb_tunnel_info(skb);
571 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
572 ip_tunnel_info_af(tun_info) != AF_INET))
573 goto err_free_skb;
574
575 key = &tun_info->key;
576
577 /* ERSPAN has fixed 8 byte GRE header */
578 tunnel_hlen = 8 + sizeof(struct erspanhdr);
579
580 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
581 if (!rt)
582 return;
583
584 if (gre_handle_offloads(skb, false))
585 goto err_free_rt;
586
587 if (skb->len > dev->mtu + dev->hard_header_len) {
588 pskb_trim(skb, dev->mtu + dev->hard_header_len);
589 truncate = true;
590 }
591
592 if (tun_info->options_len < sizeof(*md))
593 goto err_free_rt;
594
595 md = ip_tunnel_info_opts(tun_info);
596 if (!md)
597 goto err_free_rt;
598
599 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
600 ntohl(md->index), truncate);
601
602 gre_build_header(skb, 8, TUNNEL_SEQ,
603 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
604
605 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
606
607 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
608 key->tos, key->ttl, df, false);
609 return;
610
611err_free_rt:
612 ip_rt_put(rt);
613err_free_skb:
614 kfree_skb(skb);
615 dev->stats.tx_dropped++;
616}
617
618static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
619{
620 struct ip_tunnel_info *info = skb_tunnel_info(skb);
621 struct rtable *rt;
622 struct flowi4 fl4;
623
624 if (ip_tunnel_info_af(info) != AF_INET)
625 return -EINVAL;
626
627 rt = gre_get_rt(skb, dev, &fl4, &info->key);
628 if (IS_ERR(rt))
629 return PTR_ERR(rt);
630
631 ip_rt_put(rt);
632 info->key.u.ipv4.src = fl4.saddr;
633 return 0;
634}
635
636static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
637 struct net_device *dev)
638{
639 struct ip_tunnel *tunnel = netdev_priv(dev);
640 const struct iphdr *tnl_params;
641
642 if (tunnel->collect_md) {
643 gre_fb_xmit(skb, dev, skb->protocol);
644 return NETDEV_TX_OK;
645 }
646
647 if (dev->header_ops) {
648 /* Need space for new headers */
649 if (skb_cow_head(skb, dev->needed_headroom -
650 (tunnel->hlen + sizeof(struct iphdr))))
651 goto free_skb;
652
653 tnl_params = (const struct iphdr *)skb->data;
654
655 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
656 * to gre header.
657 */
658 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
659 skb_reset_mac_header(skb);
660 } else {
661 if (skb_cow_head(skb, dev->needed_headroom))
662 goto free_skb;
663
664 tnl_params = &tunnel->parms.iph;
665 }
666
667 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
668 goto free_skb;
669
670 __gre_xmit(skb, dev, tnl_params, skb->protocol);
671 return NETDEV_TX_OK;
672
673free_skb:
674 kfree_skb(skb);
675 dev->stats.tx_dropped++;
676 return NETDEV_TX_OK;
677}
678
679static inline u8 tos_to_cos(u8 tos)
680{
681 u8 dscp, cos;
682
683 dscp = tos >> 2;
684 cos = dscp >> 3;
685 return cos;
686}
687
688static void erspan_build_header(struct sk_buff *skb,
689 __be32 id, u32 index, bool truncate)
690{
691 struct iphdr *iphdr = ip_hdr(skb);
692 struct ethhdr *eth = (struct ethhdr *)skb->data;
693 enum erspan_encap_type enc_type;
694 struct erspanhdr *ershdr;
695 struct qtag_prefix {
696 __be16 eth_type;
697 __be16 tci;
698 } *qp;
699 u16 vlan_tci = 0;
700
701 enc_type = ERSPAN_ENCAP_NOVLAN;
702
703 /* If mirrored packet has vlan tag, extract tci and
704 * perserve vlan header in the mirrored frame.
705 */
706 if (eth->h_proto == htons(ETH_P_8021Q)) {
707 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
708 vlan_tci = ntohs(qp->tci);
709 enc_type = ERSPAN_ENCAP_INFRAME;
710 }
711
712 skb_push(skb, sizeof(*ershdr));
713 ershdr = (struct erspanhdr *)skb->data;
714 memset(ershdr, 0, sizeof(*ershdr));
715
716 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
717 (ERSPAN_VERSION << VER_OFFSET));
718 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
719 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
720 (enc_type << EN_OFFSET & EN_MASK) |
721 ((truncate << T_OFFSET) & T_MASK));
722 ershdr->md.index = htonl(index & INDEX_MASK);
723}
724
725static netdev_tx_t erspan_xmit(struct sk_buff *skb,
726 struct net_device *dev)
727{
728 struct ip_tunnel *tunnel = netdev_priv(dev);
729 bool truncate = false;
730
731 if (tunnel->collect_md) {
732 erspan_fb_xmit(skb, dev, skb->protocol);
733 return NETDEV_TX_OK;
734 }
735
736 if (gre_handle_offloads(skb, false))
737 goto free_skb;
738
739 if (skb_cow_head(skb, dev->needed_headroom))
740 goto free_skb;
741
742 if (skb->len > dev->mtu + dev->hard_header_len) {
743 pskb_trim(skb, dev->mtu + dev->hard_header_len);
744 truncate = true;
745 }
746
747 /* Push ERSPAN header */
748 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
749 tunnel->parms.o_flags &= ~TUNNEL_KEY;
750 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
751 return NETDEV_TX_OK;
752
753free_skb:
754 kfree_skb(skb);
755 dev->stats.tx_dropped++;
756 return NETDEV_TX_OK;
757}
758
759static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
760 struct net_device *dev)
761{
762 struct ip_tunnel *tunnel = netdev_priv(dev);
763
764 if (tunnel->collect_md) {
765 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
766 return NETDEV_TX_OK;
767 }
768
769 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
770 goto free_skb;
771
772 if (skb_cow_head(skb, dev->needed_headroom))
773 goto free_skb;
774
775 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
776 return NETDEV_TX_OK;
777
778free_skb:
779 kfree_skb(skb);
780 dev->stats.tx_dropped++;
781 return NETDEV_TX_OK;
782}
783
784static int ipgre_tunnel_ioctl(struct net_device *dev,
785 struct ifreq *ifr, int cmd)
786{
787 int err;
788 struct ip_tunnel_parm p;
789
790 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
791 return -EFAULT;
792 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
793 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
794 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
795 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
796 return -EINVAL;
797 }
798 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
799 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
800
801 err = ip_tunnel_ioctl(dev, &p, cmd);
802 if (err)
803 return err;
804
805 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
806 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
807
808 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
809 return -EFAULT;
810 return 0;
811}
812
813/* Nice toy. Unfortunately, useless in real life :-)
814 It allows to construct virtual multiprotocol broadcast "LAN"
815 over the Internet, provided multicast routing is tuned.
816
817
818 I have no idea was this bicycle invented before me,
819 so that I had to set ARPHRD_IPGRE to a random value.
820 I have an impression, that Cisco could make something similar,
821 but this feature is apparently missing in IOS<=11.2(8).
822
823 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
824 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
825
826 ping -t 255 224.66.66.66
827
828 If nobody answers, mbone does not work.
829
830 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
831 ip addr add 10.66.66.<somewhat>/24 dev Universe
832 ifconfig Universe up
833 ifconfig Universe add fe80::<Your_real_addr>/10
834 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
835 ftp 10.66.66.66
836 ...
837 ftp fec0:6666:6666::193.233.7.65
838 ...
839 */
840static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
841 unsigned short type,
842 const void *daddr, const void *saddr, unsigned int len)
843{
844 struct ip_tunnel *t = netdev_priv(dev);
845 struct iphdr *iph;
846 struct gre_base_hdr *greh;
847
848 iph = skb_push(skb, t->hlen + sizeof(*iph));
849 greh = (struct gre_base_hdr *)(iph+1);
850 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
851 greh->protocol = htons(type);
852
853 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
854
855 /* Set the source hardware address. */
856 if (saddr)
857 memcpy(&iph->saddr, saddr, 4);
858 if (daddr)
859 memcpy(&iph->daddr, daddr, 4);
860 if (iph->daddr)
861 return t->hlen + sizeof(*iph);
862
863 return -(t->hlen + sizeof(*iph));
864}
865
866static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
867{
868 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
869 memcpy(haddr, &iph->saddr, 4);
870 return 4;
871}
872
873static const struct header_ops ipgre_header_ops = {
874 .create = ipgre_header,
875 .parse = ipgre_header_parse,
876};
877
878#ifdef CONFIG_NET_IPGRE_BROADCAST
879static int ipgre_open(struct net_device *dev)
880{
881 struct ip_tunnel *t = netdev_priv(dev);
882
883 if (ipv4_is_multicast(t->parms.iph.daddr)) {
884 struct flowi4 fl4;
885 struct rtable *rt;
886
887 rt = ip_route_output_gre(t->net, &fl4,
888 t->parms.iph.daddr,
889 t->parms.iph.saddr,
890 t->parms.o_key,
891 RT_TOS(t->parms.iph.tos),
892 t->parms.link);
893 if (IS_ERR(rt))
894 return -EADDRNOTAVAIL;
895 dev = rt->dst.dev;
896 ip_rt_put(rt);
897 if (!__in_dev_get_rtnl(dev))
898 return -EADDRNOTAVAIL;
899 t->mlink = dev->ifindex;
900 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
901 }
902 return 0;
903}
904
905static int ipgre_close(struct net_device *dev)
906{
907 struct ip_tunnel *t = netdev_priv(dev);
908
909 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
910 struct in_device *in_dev;
911 in_dev = inetdev_by_index(t->net, t->mlink);
912 if (in_dev)
913 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
914 }
915 return 0;
916}
917#endif
918
919static const struct net_device_ops ipgre_netdev_ops = {
920 .ndo_init = ipgre_tunnel_init,
921 .ndo_uninit = ip_tunnel_uninit,
922#ifdef CONFIG_NET_IPGRE_BROADCAST
923 .ndo_open = ipgre_open,
924 .ndo_stop = ipgre_close,
925#endif
926 .ndo_start_xmit = ipgre_xmit,
927 .ndo_do_ioctl = ipgre_tunnel_ioctl,
928 .ndo_change_mtu = ip_tunnel_change_mtu,
929 .ndo_get_stats64 = ip_tunnel_get_stats64,
930 .ndo_get_iflink = ip_tunnel_get_iflink,
931};
932
933#define GRE_FEATURES (NETIF_F_SG | \
934 NETIF_F_FRAGLIST | \
935 NETIF_F_HIGHDMA | \
936 NETIF_F_HW_CSUM)
937
938static void ipgre_tunnel_setup(struct net_device *dev)
939{
940 dev->netdev_ops = &ipgre_netdev_ops;
941 dev->type = ARPHRD_IPGRE;
942 ip_tunnel_setup(dev, ipgre_net_id);
943}
944
945static void __gre_tunnel_init(struct net_device *dev)
946{
947 struct ip_tunnel *tunnel;
948 int t_hlen;
949
950 tunnel = netdev_priv(dev);
951 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
952 tunnel->parms.iph.protocol = IPPROTO_GRE;
953
954 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
955
956 t_hlen = tunnel->hlen + sizeof(struct iphdr);
957
958 dev->features |= GRE_FEATURES;
959 dev->hw_features |= GRE_FEATURES;
960
961 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
962 /* TCP offload with GRE SEQ is not supported, nor
963 * can we support 2 levels of outer headers requiring
964 * an update.
965 */
966 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
967 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
968 dev->features |= NETIF_F_GSO_SOFTWARE;
969 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
970 }
971
972 /* Can use a lockless transmit, unless we generate
973 * output sequences
974 */
975 dev->features |= NETIF_F_LLTX;
976 }
977}
978
979static int ipgre_tunnel_init(struct net_device *dev)
980{
981 struct ip_tunnel *tunnel = netdev_priv(dev);
982 struct iphdr *iph = &tunnel->parms.iph;
983
984 __gre_tunnel_init(dev);
985
986 memcpy(dev->dev_addr, &iph->saddr, 4);
987 memcpy(dev->broadcast, &iph->daddr, 4);
988
989 dev->flags = IFF_NOARP;
990 netif_keep_dst(dev);
991 dev->addr_len = 4;
992
993 if (iph->daddr && !tunnel->collect_md) {
994#ifdef CONFIG_NET_IPGRE_BROADCAST
995 if (ipv4_is_multicast(iph->daddr)) {
996 if (!iph->saddr)
997 return -EINVAL;
998 dev->flags = IFF_BROADCAST;
999 dev->header_ops = &ipgre_header_ops;
1000 }
1001#endif
1002 } else if (!tunnel->collect_md) {
1003 dev->header_ops = &ipgre_header_ops;
1004 }
1005
1006 return ip_tunnel_init(dev);
1007}
1008
1009static const struct gre_protocol ipgre_protocol = {
1010 .handler = gre_rcv,
1011 .err_handler = gre_err,
1012};
1013
1014static int __net_init ipgre_init_net(struct net *net)
1015{
1016 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1017}
1018
1019static void __net_exit ipgre_exit_net(struct net *net)
1020{
1021 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1022 ip_tunnel_delete_net(itn, &ipgre_link_ops);
1023}
1024
1025static struct pernet_operations ipgre_net_ops = {
1026 .init = ipgre_init_net,
1027 .exit = ipgre_exit_net,
1028 .id = &ipgre_net_id,
1029 .size = sizeof(struct ip_tunnel_net),
1030};
1031
1032static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1033 struct netlink_ext_ack *extack)
1034{
1035 __be16 flags;
1036
1037 if (!data)
1038 return 0;
1039
1040 flags = 0;
1041 if (data[IFLA_GRE_IFLAGS])
1042 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1043 if (data[IFLA_GRE_OFLAGS])
1044 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1045 if (flags & (GRE_VERSION|GRE_ROUTING))
1046 return -EINVAL;
1047
1048 if (data[IFLA_GRE_COLLECT_METADATA] &&
1049 data[IFLA_GRE_ENCAP_TYPE] &&
1050 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1051 return -EINVAL;
1052
1053 return 0;
1054}
1055
1056static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1057 struct netlink_ext_ack *extack)
1058{
1059 __be32 daddr;
1060
1061 if (tb[IFLA_ADDRESS]) {
1062 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1063 return -EINVAL;
1064 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1065 return -EADDRNOTAVAIL;
1066 }
1067
1068 if (!data)
1069 goto out;
1070
1071 if (data[IFLA_GRE_REMOTE]) {
1072 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1073 if (!daddr)
1074 return -EINVAL;
1075 }
1076
1077out:
1078 return ipgre_tunnel_validate(tb, data, extack);
1079}
1080
1081static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1082 struct netlink_ext_ack *extack)
1083{
1084 __be16 flags = 0;
1085 int ret;
1086
1087 if (!data)
1088 return 0;
1089
1090 ret = ipgre_tap_validate(tb, data, extack);
1091 if (ret)
1092 return ret;
1093
1094 /* ERSPAN should only have GRE sequence and key flag */
1095 if (data[IFLA_GRE_OFLAGS])
1096 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1097 if (data[IFLA_GRE_IFLAGS])
1098 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1099 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1100 flags != (GRE_SEQ | GRE_KEY))
1101 return -EINVAL;
1102
1103 /* ERSPAN Session ID only has 10-bit. Since we reuse
1104 * 32-bit key field as ID, check it's range.
1105 */
1106 if (data[IFLA_GRE_IKEY] &&
1107 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1108 return -EINVAL;
1109
1110 if (data[IFLA_GRE_OKEY] &&
1111 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1112 return -EINVAL;
1113
1114 return 0;
1115}
1116
1117static int ipgre_netlink_parms(struct net_device *dev,
1118 struct nlattr *data[],
1119 struct nlattr *tb[],
1120 struct ip_tunnel_parm *parms,
1121 __u32 *fwmark)
1122{
1123 struct ip_tunnel *t = netdev_priv(dev);
1124
1125 memset(parms, 0, sizeof(*parms));
1126
1127 parms->iph.protocol = IPPROTO_GRE;
1128
1129 if (!data)
1130 return 0;
1131
1132 if (data[IFLA_GRE_LINK])
1133 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1134
1135 if (data[IFLA_GRE_IFLAGS])
1136 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1137
1138 if (data[IFLA_GRE_OFLAGS])
1139 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1140
1141 if (data[IFLA_GRE_IKEY])
1142 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1143
1144 if (data[IFLA_GRE_OKEY])
1145 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1146
1147 if (data[IFLA_GRE_LOCAL])
1148 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1149
1150 if (data[IFLA_GRE_REMOTE])
1151 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1152
1153 if (data[IFLA_GRE_TTL])
1154 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1155
1156 if (data[IFLA_GRE_TOS])
1157 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1158
1159 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1160 if (t->ignore_df)
1161 return -EINVAL;
1162 parms->iph.frag_off = htons(IP_DF);
1163 }
1164
1165 if (data[IFLA_GRE_COLLECT_METADATA]) {
1166 t->collect_md = true;
1167 if (dev->type == ARPHRD_IPGRE)
1168 dev->type = ARPHRD_NONE;
1169 }
1170
1171 if (data[IFLA_GRE_IGNORE_DF]) {
1172 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1173 && (parms->iph.frag_off & htons(IP_DF)))
1174 return -EINVAL;
1175 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1176 }
1177
1178 if (data[IFLA_GRE_FWMARK])
1179 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1180
1181 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1182 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1183
1184 if (t->index & ~INDEX_MASK)
1185 return -EINVAL;
1186 }
1187
1188 return 0;
1189}
1190
1191/* This function returns true when ENCAP attributes are present in the nl msg */
1192static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1193 struct ip_tunnel_encap *ipencap)
1194{
1195 bool ret = false;
1196
1197 memset(ipencap, 0, sizeof(*ipencap));
1198
1199 if (!data)
1200 return ret;
1201
1202 if (data[IFLA_GRE_ENCAP_TYPE]) {
1203 ret = true;
1204 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1205 }
1206
1207 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1208 ret = true;
1209 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1210 }
1211
1212 if (data[IFLA_GRE_ENCAP_SPORT]) {
1213 ret = true;
1214 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1215 }
1216
1217 if (data[IFLA_GRE_ENCAP_DPORT]) {
1218 ret = true;
1219 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1220 }
1221
1222 return ret;
1223}
1224
1225static int gre_tap_init(struct net_device *dev)
1226{
1227 __gre_tunnel_init(dev);
1228 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1229 netif_keep_dst(dev);
1230
1231 return ip_tunnel_init(dev);
1232}
1233
1234static const struct net_device_ops gre_tap_netdev_ops = {
1235 .ndo_init = gre_tap_init,
1236 .ndo_uninit = ip_tunnel_uninit,
1237 .ndo_start_xmit = gre_tap_xmit,
1238 .ndo_set_mac_address = eth_mac_addr,
1239 .ndo_validate_addr = eth_validate_addr,
1240 .ndo_change_mtu = ip_tunnel_change_mtu,
1241 .ndo_get_stats64 = ip_tunnel_get_stats64,
1242 .ndo_get_iflink = ip_tunnel_get_iflink,
1243 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1244};
1245
1246static int erspan_tunnel_init(struct net_device *dev)
1247{
1248 struct ip_tunnel *tunnel = netdev_priv(dev);
1249 int t_hlen;
1250
1251 tunnel->tun_hlen = 8;
1252 tunnel->parms.iph.protocol = IPPROTO_GRE;
1253 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1254 sizeof(struct erspanhdr);
1255 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1256
1257 dev->features |= GRE_FEATURES;
1258 dev->hw_features |= GRE_FEATURES;
1259 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1260 netif_keep_dst(dev);
1261
1262 return ip_tunnel_init(dev);
1263}
1264
1265static const struct net_device_ops erspan_netdev_ops = {
1266 .ndo_init = erspan_tunnel_init,
1267 .ndo_uninit = ip_tunnel_uninit,
1268 .ndo_start_xmit = erspan_xmit,
1269 .ndo_set_mac_address = eth_mac_addr,
1270 .ndo_validate_addr = eth_validate_addr,
1271 .ndo_change_mtu = ip_tunnel_change_mtu,
1272 .ndo_get_stats64 = ip_tunnel_get_stats64,
1273 .ndo_get_iflink = ip_tunnel_get_iflink,
1274 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1275};
1276
1277static void ipgre_tap_setup(struct net_device *dev)
1278{
1279 ether_setup(dev);
1280 dev->max_mtu = 0;
1281 dev->netdev_ops = &gre_tap_netdev_ops;
1282 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1283 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1284 ip_tunnel_setup(dev, gre_tap_net_id);
1285}
1286
1287static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1288 struct nlattr *tb[], struct nlattr *data[],
1289 struct netlink_ext_ack *extack)
1290{
1291 struct ip_tunnel_parm p;
1292 struct ip_tunnel_encap ipencap;
1293 __u32 fwmark = 0;
1294 int err;
1295
1296 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1297 struct ip_tunnel *t = netdev_priv(dev);
1298 err = ip_tunnel_encap_setup(t, &ipencap);
1299
1300 if (err < 0)
1301 return err;
1302 }
1303
1304 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1305 if (err < 0)
1306 return err;
1307 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1308}
1309
1310static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1311 struct nlattr *data[],
1312 struct netlink_ext_ack *extack)
1313{
1314 struct ip_tunnel *t = netdev_priv(dev);
1315 struct ip_tunnel_parm p;
1316 struct ip_tunnel_encap ipencap;
1317 __u32 fwmark = t->fwmark;
1318 int err;
1319
1320 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1321 err = ip_tunnel_encap_setup(t, &ipencap);
1322
1323 if (err < 0)
1324 return err;
1325 }
1326
1327 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1328 if (err < 0)
1329 return err;
1330 return ip_tunnel_changelink(dev, tb, &p, fwmark);
1331}
1332
1333static size_t ipgre_get_size(const struct net_device *dev)
1334{
1335 return
1336 /* IFLA_GRE_LINK */
1337 nla_total_size(4) +
1338 /* IFLA_GRE_IFLAGS */
1339 nla_total_size(2) +
1340 /* IFLA_GRE_OFLAGS */
1341 nla_total_size(2) +
1342 /* IFLA_GRE_IKEY */
1343 nla_total_size(4) +
1344 /* IFLA_GRE_OKEY */
1345 nla_total_size(4) +
1346 /* IFLA_GRE_LOCAL */
1347 nla_total_size(4) +
1348 /* IFLA_GRE_REMOTE */
1349 nla_total_size(4) +
1350 /* IFLA_GRE_TTL */
1351 nla_total_size(1) +
1352 /* IFLA_GRE_TOS */
1353 nla_total_size(1) +
1354 /* IFLA_GRE_PMTUDISC */
1355 nla_total_size(1) +
1356 /* IFLA_GRE_ENCAP_TYPE */
1357 nla_total_size(2) +
1358 /* IFLA_GRE_ENCAP_FLAGS */
1359 nla_total_size(2) +
1360 /* IFLA_GRE_ENCAP_SPORT */
1361 nla_total_size(2) +
1362 /* IFLA_GRE_ENCAP_DPORT */
1363 nla_total_size(2) +
1364 /* IFLA_GRE_COLLECT_METADATA */
1365 nla_total_size(0) +
1366 /* IFLA_GRE_IGNORE_DF */
1367 nla_total_size(1) +
1368 /* IFLA_GRE_FWMARK */
1369 nla_total_size(4) +
1370 /* IFLA_GRE_ERSPAN_INDEX */
1371 nla_total_size(4) +
1372 0;
1373}
1374
1375static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1376{
1377 struct ip_tunnel *t = netdev_priv(dev);
1378 struct ip_tunnel_parm *p = &t->parms;
1379
1380 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1381 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1382 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1383 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1384 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1385 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1386 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1387 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1388 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1389 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1390 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1391 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1392 !!(p->iph.frag_off & htons(IP_DF))) ||
1393 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1394 goto nla_put_failure;
1395
1396 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1397 t->encap.type) ||
1398 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1399 t->encap.sport) ||
1400 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1401 t->encap.dport) ||
1402 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1403 t->encap.flags))
1404 goto nla_put_failure;
1405
1406 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1407 goto nla_put_failure;
1408
1409 if (t->collect_md) {
1410 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1411 goto nla_put_failure;
1412 }
1413
1414 if (t->index)
1415 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1416 goto nla_put_failure;
1417
1418 return 0;
1419
1420nla_put_failure:
1421 return -EMSGSIZE;
1422}
1423
1424static void erspan_setup(struct net_device *dev)
1425{
1426 ether_setup(dev);
1427 dev->max_mtu = 0;
1428 dev->netdev_ops = &erspan_netdev_ops;
1429 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1430 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1431 ip_tunnel_setup(dev, erspan_net_id);
1432}
1433
1434static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1435 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1436 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1437 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1438 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1439 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1440 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1441 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1442 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1443 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1444 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1445 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1446 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1447 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1448 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1449 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1450 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1451 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1452 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1453};
1454
1455static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1456 .kind = "gre",
1457 .maxtype = IFLA_GRE_MAX,
1458 .policy = ipgre_policy,
1459 .priv_size = sizeof(struct ip_tunnel),
1460 .setup = ipgre_tunnel_setup,
1461 .validate = ipgre_tunnel_validate,
1462 .newlink = ipgre_newlink,
1463 .changelink = ipgre_changelink,
1464 .dellink = ip_tunnel_dellink,
1465 .get_size = ipgre_get_size,
1466 .fill_info = ipgre_fill_info,
1467 .get_link_net = ip_tunnel_get_link_net,
1468};
1469
1470static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1471 .kind = "gretap",
1472 .maxtype = IFLA_GRE_MAX,
1473 .policy = ipgre_policy,
1474 .priv_size = sizeof(struct ip_tunnel),
1475 .setup = ipgre_tap_setup,
1476 .validate = ipgre_tap_validate,
1477 .newlink = ipgre_newlink,
1478 .changelink = ipgre_changelink,
1479 .dellink = ip_tunnel_dellink,
1480 .get_size = ipgre_get_size,
1481 .fill_info = ipgre_fill_info,
1482 .get_link_net = ip_tunnel_get_link_net,
1483};
1484
1485static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1486 .kind = "erspan",
1487 .maxtype = IFLA_GRE_MAX,
1488 .policy = ipgre_policy,
1489 .priv_size = sizeof(struct ip_tunnel),
1490 .setup = erspan_setup,
1491 .validate = erspan_validate,
1492 .newlink = ipgre_newlink,
1493 .changelink = ipgre_changelink,
1494 .dellink = ip_tunnel_dellink,
1495 .get_size = ipgre_get_size,
1496 .fill_info = ipgre_fill_info,
1497 .get_link_net = ip_tunnel_get_link_net,
1498};
1499
1500struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1501 u8 name_assign_type)
1502{
1503 struct nlattr *tb[IFLA_MAX + 1];
1504 struct net_device *dev;
1505 LIST_HEAD(list_kill);
1506 struct ip_tunnel *t;
1507 int err;
1508
1509 memset(&tb, 0, sizeof(tb));
1510
1511 dev = rtnl_create_link(net, name, name_assign_type,
1512 &ipgre_tap_ops, tb);
1513 if (IS_ERR(dev))
1514 return dev;
1515
1516 /* Configure flow based GRE device. */
1517 t = netdev_priv(dev);
1518 t->collect_md = true;
1519
1520 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1521 if (err < 0) {
1522 free_netdev(dev);
1523 return ERR_PTR(err);
1524 }
1525
1526 /* openvswitch users expect packet sizes to be unrestricted,
1527 * so set the largest MTU we can.
1528 */
1529 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1530 if (err)
1531 goto out;
1532
1533 err = rtnl_configure_link(dev, NULL);
1534 if (err < 0)
1535 goto out;
1536
1537 return dev;
1538out:
1539 ip_tunnel_dellink(dev, &list_kill);
1540 unregister_netdevice_many(&list_kill);
1541 return ERR_PTR(err);
1542}
1543EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1544
1545static int __net_init ipgre_tap_init_net(struct net *net)
1546{
1547 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1548}
1549
1550static void __net_exit ipgre_tap_exit_net(struct net *net)
1551{
1552 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1553 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1554}
1555
1556static struct pernet_operations ipgre_tap_net_ops = {
1557 .init = ipgre_tap_init_net,
1558 .exit = ipgre_tap_exit_net,
1559 .id = &gre_tap_net_id,
1560 .size = sizeof(struct ip_tunnel_net),
1561};
1562
1563static int __net_init erspan_init_net(struct net *net)
1564{
1565 return ip_tunnel_init_net(net, erspan_net_id,
1566 &erspan_link_ops, "erspan0");
1567}
1568
1569static void __net_exit erspan_exit_net(struct net *net)
1570{
1571 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
1572
1573 ip_tunnel_delete_net(itn, &erspan_link_ops);
1574}
1575
1576static struct pernet_operations erspan_net_ops = {
1577 .init = erspan_init_net,
1578 .exit = erspan_exit_net,
1579 .id = &erspan_net_id,
1580 .size = sizeof(struct ip_tunnel_net),
1581};
1582
1583static int __init ipgre_init(void)
1584{
1585 int err;
1586
1587 pr_info("GRE over IPv4 tunneling driver\n");
1588
1589 err = register_pernet_device(&ipgre_net_ops);
1590 if (err < 0)
1591 return err;
1592
1593 err = register_pernet_device(&ipgre_tap_net_ops);
1594 if (err < 0)
1595 goto pnet_tap_failed;
1596
1597 err = register_pernet_device(&erspan_net_ops);
1598 if (err < 0)
1599 goto pnet_erspan_failed;
1600
1601 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1602 if (err < 0) {
1603 pr_info("%s: can't add protocol\n", __func__);
1604 goto add_proto_failed;
1605 }
1606
1607 err = rtnl_link_register(&ipgre_link_ops);
1608 if (err < 0)
1609 goto rtnl_link_failed;
1610
1611 err = rtnl_link_register(&ipgre_tap_ops);
1612 if (err < 0)
1613 goto tap_ops_failed;
1614
1615 err = rtnl_link_register(&erspan_link_ops);
1616 if (err < 0)
1617 goto erspan_link_failed;
1618
1619 return 0;
1620
1621erspan_link_failed:
1622 rtnl_link_unregister(&ipgre_tap_ops);
1623tap_ops_failed:
1624 rtnl_link_unregister(&ipgre_link_ops);
1625rtnl_link_failed:
1626 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1627add_proto_failed:
1628 unregister_pernet_device(&erspan_net_ops);
1629pnet_erspan_failed:
1630 unregister_pernet_device(&ipgre_tap_net_ops);
1631pnet_tap_failed:
1632 unregister_pernet_device(&ipgre_net_ops);
1633 return err;
1634}
1635
1636static void __exit ipgre_fini(void)
1637{
1638 rtnl_link_unregister(&ipgre_tap_ops);
1639 rtnl_link_unregister(&ipgre_link_ops);
1640 rtnl_link_unregister(&erspan_link_ops);
1641 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1642 unregister_pernet_device(&ipgre_tap_net_ops);
1643 unregister_pernet_device(&ipgre_net_ops);
1644 unregister_pernet_device(&erspan_net_ops);
1645}
1646
1647module_init(ipgre_init);
1648module_exit(ipgre_fini);
1649MODULE_LICENSE("GPL");
1650MODULE_ALIAS_RTNL_LINK("gre");
1651MODULE_ALIAS_RTNL_LINK("gretap");
1652MODULE_ALIAS_RTNL_LINK("erspan");
1653MODULE_ALIAS_NETDEV("gre0");
1654MODULE_ALIAS_NETDEV("gretap0");
1655MODULE_ALIAS_NETDEV("erspan0");