blob: a3f77441f3e6977538b3f9f7ab9afb16d123f026 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/if_vlan.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ip_tunnels.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
48#include <net/rtnetlink.h>
49#include <net/gre.h>
50#include <net/dst_metadata.h>
51#include <net/erspan.h>
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107 Alexey Kuznetsov.
108 */
109
110static bool log_ecn_error = true;
111module_param(log_ecn_error, bool, 0644);
112MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115static int ipgre_tunnel_init(struct net_device *dev);
116static void erspan_build_header(struct sk_buff *skb,
117 u32 id, u32 index,
118 bool truncate, bool is_ipv4);
119
120static unsigned int ipgre_net_id __read_mostly;
121static unsigned int gre_tap_net_id __read_mostly;
122static unsigned int erspan_net_id __read_mostly;
123
124static void ipgre_err(struct sk_buff *skb, u32 info,
125 const struct tnl_ptk_info *tpi)
126{
127
128 /* All the routers (except for Linux) return only
129 8 bytes of packet payload. It means, that precise relaying of
130 ICMP in the real Internet is absolutely infeasible.
131
132 Moreover, Cisco "wise men" put GRE key to the third word
133 in GRE header. It makes impossible maintaining even soft
134 state for keyed GRE tunnels with enabled checksum. Tell
135 them "thank you".
136
137 Well, I wonder, rfc1812 was written by Cisco employee,
138 what the hell these idiots break standards established
139 by themselves???
140 */
141 struct net *net = dev_net(skb->dev);
142 struct ip_tunnel_net *itn;
143 const struct iphdr *iph;
144 const int type = icmp_hdr(skb)->type;
145 const int code = icmp_hdr(skb)->code;
146 unsigned int data_len = 0;
147 struct ip_tunnel *t;
148
149 switch (type) {
150 default:
151 case ICMP_PARAMETERPROB:
152 return;
153
154 case ICMP_DEST_UNREACH:
155 switch (code) {
156 case ICMP_SR_FAILED:
157 case ICMP_PORT_UNREACH:
158 /* Impossible event. */
159 return;
160 default:
161 /* All others are translated to HOST_UNREACH.
162 rfc2003 contains "deep thoughts" about NET_UNREACH,
163 I believe they are just ether pollution. --ANK
164 */
165 break;
166 }
167 break;
168
169 case ICMP_TIME_EXCEEDED:
170 if (code != ICMP_EXC_TTL)
171 return;
172 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
173 break;
174
175 case ICMP_REDIRECT:
176 break;
177 }
178
179 if (tpi->proto == htons(ETH_P_TEB))
180 itn = net_generic(net, gre_tap_net_id);
181 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
182 tpi->proto == htons(ETH_P_ERSPAN2))
183 itn = net_generic(net, erspan_net_id);
184 else
185 itn = net_generic(net, ipgre_net_id);
186
187 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
188 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
189 iph->daddr, iph->saddr, tpi->key);
190
191 if (!t)
192 return;
193
194#if IS_ENABLED(CONFIG_IPV6)
195 if (tpi->proto == htons(ETH_P_IPV6) &&
196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197 type, data_len))
198 return;
199#endif
200
201 if (t->parms.iph.daddr == 0 ||
202 ipv4_is_multicast(t->parms.iph.daddr))
203 return;
204
205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
206 return;
207
208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
209 t->err_count++;
210 else
211 t->err_count = 1;
212 t->err_time = jiffies;
213}
214
215static void gre_err(struct sk_buff *skb, u32 info)
216{
217 /* All the routers (except for Linux) return only
218 * 8 bytes of packet payload. It means, that precise relaying of
219 * ICMP in the real Internet is absolutely infeasible.
220 *
221 * Moreover, Cisco "wise men" put GRE key to the third word
222 * in GRE header. It makes impossible maintaining even soft
223 * state for keyed
224 * GRE tunnels with enabled checksum. Tell them "thank you".
225 *
226 * Well, I wonder, rfc1812 was written by Cisco employee,
227 * what the hell these idiots break standards established
228 * by themselves???
229 */
230
231 const struct iphdr *iph = (struct iphdr *)skb->data;
232 const int type = icmp_hdr(skb)->type;
233 const int code = icmp_hdr(skb)->code;
234 struct tnl_ptk_info tpi;
235
236 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
237 iph->ihl * 4) < 0)
238 return;
239
240 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
241 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
242 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
243 return;
244 }
245 if (type == ICMP_REDIRECT) {
246 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
247 IPPROTO_GRE, 0);
248 return;
249 }
250
251 ipgre_err(skb, info, &tpi);
252}
253
254static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
255 int gre_hdr_len)
256{
257 struct net *net = dev_net(skb->dev);
258 struct metadata_dst *tun_dst = NULL;
259 struct erspan_base_hdr *ershdr;
260 struct ip_tunnel_net *itn;
261 struct ip_tunnel *tunnel;
262 const struct iphdr *iph;
263 struct erspan_md2 *md2;
264 int ver;
265 int len;
266
267 itn = net_generic(net, erspan_net_id);
268
269 iph = ip_hdr(skb);
270 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
271 ver = ershdr->ver;
272
273 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
274 tpi->flags | TUNNEL_KEY,
275 iph->saddr, iph->daddr, tpi->key);
276
277 if (tunnel) {
278 len = gre_hdr_len + erspan_hdr_len(ver);
279 if (unlikely(!pskb_may_pull(skb, len)))
280 return PACKET_REJECT;
281
282 if (__iptunnel_pull_header(skb,
283 len,
284 htons(ETH_P_TEB),
285 false, false) < 0)
286 goto drop;
287
288 if (tunnel->collect_md) {
289 struct erspan_metadata *pkt_md, *md;
290 struct ip_tunnel_info *info;
291 unsigned char *gh;
292 __be64 tun_id;
293 __be16 flags;
294
295 tpi->flags |= TUNNEL_KEY;
296 flags = tpi->flags;
297 tun_id = key32_to_tunnel_id(tpi->key);
298
299 tun_dst = ip_tun_rx_dst(skb, flags,
300 tun_id, sizeof(*md));
301 if (!tun_dst)
302 return PACKET_REJECT;
303
304 /* skb can be uncloned in __iptunnel_pull_header, so
305 * old pkt_md is no longer valid and we need to reset
306 * it
307 */
308 gh = skb_network_header(skb) +
309 skb_network_header_len(skb);
310 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
311 sizeof(*ershdr));
312 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
313 md->version = ver;
314 md2 = &md->u.md2;
315 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
316 ERSPAN_V2_MDSIZE);
317
318 info = &tun_dst->u.tun_info;
319 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
320 info->options_len = sizeof(*md);
321 }
322
323 skb_reset_mac_header(skb);
324 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
325 return PACKET_RCVD;
326 }
327 return PACKET_REJECT;
328
329drop:
330 kfree_skb(skb);
331 return PACKET_RCVD;
332}
333
334static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
335 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
336{
337 struct metadata_dst *tun_dst = NULL;
338 const struct iphdr *iph;
339 struct ip_tunnel *tunnel;
340
341 iph = ip_hdr(skb);
342 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
343 iph->saddr, iph->daddr, tpi->key);
344
345 if (tunnel) {
346 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
347 raw_proto, false) < 0)
348 goto drop;
349
350 if (tunnel->dev->type != ARPHRD_NONE)
351 skb_pop_mac_header(skb);
352 else
353 skb_reset_mac_header(skb);
354 if (tunnel->collect_md) {
355 __be16 flags;
356 __be64 tun_id;
357
358 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
359 tun_id = key32_to_tunnel_id(tpi->key);
360 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
361 if (!tun_dst)
362 return PACKET_REJECT;
363 }
364
365 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
366 return PACKET_RCVD;
367 }
368 return PACKET_NEXT;
369
370drop:
371 kfree_skb(skb);
372 return PACKET_RCVD;
373}
374
375static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
376 int hdr_len)
377{
378 struct net *net = dev_net(skb->dev);
379 struct ip_tunnel_net *itn;
380 int res;
381
382 if (tpi->proto == htons(ETH_P_TEB))
383 itn = net_generic(net, gre_tap_net_id);
384 else
385 itn = net_generic(net, ipgre_net_id);
386
387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
388 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
389 /* ipgre tunnels in collect metadata mode should receive
390 * also ETH_P_TEB traffic.
391 */
392 itn = net_generic(net, ipgre_net_id);
393 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
394 }
395 return res;
396}
397
398static int gre_rcv(struct sk_buff *skb)
399{
400 struct tnl_ptk_info tpi;
401 bool csum_err = false;
402 int hdr_len;
403
404#ifdef CONFIG_NET_IPGRE_BROADCAST
405 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
406 /* Looped back packet, drop it! */
407 if (rt_is_output_route(skb_rtable(skb)))
408 goto drop;
409 }
410#endif
411
412 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
413 if (hdr_len < 0)
414 goto drop;
415
416 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
417 tpi.proto == htons(ETH_P_ERSPAN2))) {
418 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
419 return 0;
420 goto out;
421 }
422
423 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
424 return 0;
425
426out:
427 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
428drop:
429 kfree_skb(skb);
430 return 0;
431}
432
433static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
434 const struct iphdr *tnl_params,
435 __be16 proto)
436{
437 struct ip_tunnel *tunnel = netdev_priv(dev);
438
439 if (tunnel->parms.o_flags & TUNNEL_SEQ)
440 tunnel->o_seqno++;
441
442 /* Push GRE header. */
443 gre_build_header(skb, tunnel->tun_hlen,
444 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
445 htonl(tunnel->o_seqno));
446
447 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
448}
449
450static int gre_handle_offloads(struct sk_buff *skb, bool csum)
451{
452 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
453}
454
455static struct rtable *gre_get_rt(struct sk_buff *skb,
456 struct net_device *dev,
457 struct flowi4 *fl,
458 const struct ip_tunnel_key *key)
459{
460 struct net *net = dev_net(dev);
461
462 memset(fl, 0, sizeof(*fl));
463 fl->daddr = key->u.ipv4.dst;
464 fl->saddr = key->u.ipv4.src;
465 fl->flowi4_tos = RT_TOS(key->tos);
466 fl->flowi4_mark = skb->mark;
467 fl->flowi4_proto = IPPROTO_GRE;
468
469 return ip_route_output_key(net, fl);
470}
471
472static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
473 struct net_device *dev,
474 struct flowi4 *fl,
475 int tunnel_hlen)
476{
477 struct ip_tunnel_info *tun_info;
478 const struct ip_tunnel_key *key;
479 struct rtable *rt = NULL;
480 int min_headroom;
481 bool use_cache;
482 int err;
483
484 tun_info = skb_tunnel_info(skb);
485 key = &tun_info->key;
486 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
487
488 if (use_cache)
489 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
490 if (!rt) {
491 rt = gre_get_rt(skb, dev, fl, key);
492 if (IS_ERR(rt))
493 goto err_free_skb;
494 if (use_cache)
495 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
496 fl->saddr);
497 }
498
499 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
500 + tunnel_hlen + sizeof(struct iphdr);
501 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
502 int head_delta = SKB_DATA_ALIGN(min_headroom -
503 skb_headroom(skb) +
504 16);
505 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
506 0, GFP_ATOMIC);
507 if (unlikely(err))
508 goto err_free_rt;
509 }
510 return rt;
511
512err_free_rt:
513 ip_rt_put(rt);
514err_free_skb:
515 kfree_skb(skb);
516 dev->stats.tx_dropped++;
517 return NULL;
518}
519
520static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
521 __be16 proto)
522{
523 struct ip_tunnel *tunnel = netdev_priv(dev);
524 struct ip_tunnel_info *tun_info;
525 const struct ip_tunnel_key *key;
526 struct rtable *rt = NULL;
527 struct flowi4 fl;
528 int tunnel_hlen;
529 __be16 df, flags;
530
531 tun_info = skb_tunnel_info(skb);
532 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
533 ip_tunnel_info_af(tun_info) != AF_INET))
534 goto err_free_skb;
535
536 key = &tun_info->key;
537 tunnel_hlen = gre_calc_hlen(key->tun_flags);
538
539 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
540 if (!rt)
541 return;
542
543 /* Push Tunnel header. */
544 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
545 goto err_free_rt;
546
547 flags = tun_info->key.tun_flags &
548 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
549 gre_build_header(skb, tunnel_hlen, flags, proto,
550 tunnel_id_to_key32(tun_info->key.tun_id),
551 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
552
553 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
554
555 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
556 key->tos, key->ttl, df, false);
557 return;
558
559err_free_rt:
560 ip_rt_put(rt);
561err_free_skb:
562 kfree_skb(skb);
563 dev->stats.tx_dropped++;
564}
565
566static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
567{
568 struct ip_tunnel *tunnel = netdev_priv(dev);
569 struct ip_tunnel_info *tun_info;
570 const struct ip_tunnel_key *key;
571 struct erspan_metadata *md;
572 struct rtable *rt = NULL;
573 bool truncate = false;
574 __be16 df, proto;
575 struct flowi4 fl;
576 int tunnel_hlen;
577 int version;
578 int nhoff;
579 int thoff;
580
581 tun_info = skb_tunnel_info(skb);
582 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
583 ip_tunnel_info_af(tun_info) != AF_INET))
584 goto err_free_skb;
585
586 key = &tun_info->key;
587 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
588 goto err_free_rt;
589 if (tun_info->options_len < sizeof(*md))
590 goto err_free_rt;
591 md = ip_tunnel_info_opts(tun_info);
592
593 /* ERSPAN has fixed 8 byte GRE header */
594 version = md->version;
595 tunnel_hlen = 8 + erspan_hdr_len(version);
596
597 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
598 if (!rt)
599 return;
600
601 if (gre_handle_offloads(skb, false))
602 goto err_free_rt;
603
604 if (skb->len > dev->mtu + dev->hard_header_len) {
605 pskb_trim(skb, dev->mtu + dev->hard_header_len);
606 truncate = true;
607 }
608
609 nhoff = skb_network_header(skb) - skb_mac_header(skb);
610 if (skb->protocol == htons(ETH_P_IP) &&
611 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
612 truncate = true;
613
614 thoff = skb_transport_header(skb) - skb_mac_header(skb);
615 if (skb->protocol == htons(ETH_P_IPV6) &&
616 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
617 truncate = true;
618
619 if (version == 1) {
620 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
621 ntohl(md->u.index), truncate, true);
622 proto = htons(ETH_P_ERSPAN);
623 } else if (version == 2) {
624 erspan_build_header_v2(skb,
625 ntohl(tunnel_id_to_key32(key->tun_id)),
626 md->u.md2.dir,
627 get_hwid(&md->u.md2),
628 truncate, true);
629 proto = htons(ETH_P_ERSPAN2);
630 } else {
631 goto err_free_rt;
632 }
633
634 gre_build_header(skb, 8, TUNNEL_SEQ,
635 proto, 0, htonl(tunnel->o_seqno++));
636
637 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
638
639 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
640 key->tos, key->ttl, df, false);
641 return;
642
643err_free_rt:
644 ip_rt_put(rt);
645err_free_skb:
646 kfree_skb(skb);
647 dev->stats.tx_dropped++;
648}
649
650static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
651{
652 struct ip_tunnel_info *info = skb_tunnel_info(skb);
653 struct rtable *rt;
654 struct flowi4 fl4;
655
656 if (ip_tunnel_info_af(info) != AF_INET)
657 return -EINVAL;
658
659 rt = gre_get_rt(skb, dev, &fl4, &info->key);
660 if (IS_ERR(rt))
661 return PTR_ERR(rt);
662
663 ip_rt_put(rt);
664 info->key.u.ipv4.src = fl4.saddr;
665 return 0;
666}
667
668static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
669 struct net_device *dev)
670{
671 struct ip_tunnel *tunnel = netdev_priv(dev);
672 const struct iphdr *tnl_params;
673
674 if (!pskb_inet_may_pull(skb))
675 goto free_skb;
676
677 if (tunnel->collect_md) {
678 gre_fb_xmit(skb, dev, skb->protocol);
679 return NETDEV_TX_OK;
680 }
681
682 if (dev->header_ops) {
683 /* Need space for new headers */
684 if (skb_cow_head(skb, dev->needed_headroom -
685 (tunnel->hlen + sizeof(struct iphdr))))
686 goto free_skb;
687
688 tnl_params = (const struct iphdr *)skb->data;
689
690 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
691 * to gre header.
692 */
693 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
694 skb_reset_mac_header(skb);
695 } else {
696 if (skb_cow_head(skb, dev->needed_headroom))
697 goto free_skb;
698
699 tnl_params = &tunnel->parms.iph;
700 }
701
702 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
703 goto free_skb;
704
705 __gre_xmit(skb, dev, tnl_params, skb->protocol);
706 return NETDEV_TX_OK;
707
708free_skb:
709 kfree_skb(skb);
710 dev->stats.tx_dropped++;
711 return NETDEV_TX_OK;
712}
713
714static netdev_tx_t erspan_xmit(struct sk_buff *skb,
715 struct net_device *dev)
716{
717 struct ip_tunnel *tunnel = netdev_priv(dev);
718 bool truncate = false;
719 __be16 proto;
720
721 if (!pskb_inet_may_pull(skb))
722 goto free_skb;
723
724 if (tunnel->collect_md) {
725 erspan_fb_xmit(skb, dev);
726 return NETDEV_TX_OK;
727 }
728
729 if (gre_handle_offloads(skb, false))
730 goto free_skb;
731
732 if (skb_cow_head(skb, dev->needed_headroom))
733 goto free_skb;
734
735 if (skb->len > dev->mtu + dev->hard_header_len) {
736 pskb_trim(skb, dev->mtu + dev->hard_header_len);
737 truncate = true;
738 }
739
740 /* Push ERSPAN header */
741 if (tunnel->erspan_ver == 1) {
742 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
743 tunnel->index,
744 truncate, true);
745 proto = htons(ETH_P_ERSPAN);
746 } else if (tunnel->erspan_ver == 2) {
747 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
748 tunnel->dir, tunnel->hwid,
749 truncate, true);
750 proto = htons(ETH_P_ERSPAN2);
751 } else {
752 goto free_skb;
753 }
754
755 tunnel->parms.o_flags &= ~TUNNEL_KEY;
756 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
757 return NETDEV_TX_OK;
758
759free_skb:
760 kfree_skb(skb);
761 dev->stats.tx_dropped++;
762 return NETDEV_TX_OK;
763}
764
765static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
766 struct net_device *dev)
767{
768 struct ip_tunnel *tunnel = netdev_priv(dev);
769
770 if (!pskb_inet_may_pull(skb))
771 goto free_skb;
772
773 if (tunnel->collect_md) {
774 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
775 return NETDEV_TX_OK;
776 }
777
778 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
779 goto free_skb;
780
781 if (skb_cow_head(skb, dev->needed_headroom))
782 goto free_skb;
783
784 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
785 return NETDEV_TX_OK;
786
787free_skb:
788 kfree_skb(skb);
789 dev->stats.tx_dropped++;
790 return NETDEV_TX_OK;
791}
792
793static void ipgre_link_update(struct net_device *dev, bool set_mtu)
794{
795 struct ip_tunnel *tunnel = netdev_priv(dev);
796 int len;
797
798 len = tunnel->tun_hlen;
799 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
800 len = tunnel->tun_hlen - len;
801 tunnel->hlen = tunnel->hlen + len;
802
803 dev->needed_headroom = dev->needed_headroom + len;
804 if (set_mtu)
805 dev->mtu = max_t(int, dev->mtu - len, 68);
806
807 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
808 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
809 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
810 dev->features |= NETIF_F_GSO_SOFTWARE;
811 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
812 } else {
813 dev->features &= ~NETIF_F_GSO_SOFTWARE;
814 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
815 }
816 dev->features |= NETIF_F_LLTX;
817 } else {
818 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
819 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
820 }
821}
822
823static int ipgre_tunnel_ioctl(struct net_device *dev,
824 struct ifreq *ifr, int cmd)
825{
826 struct ip_tunnel_parm p;
827 int err;
828
829 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
830 return -EFAULT;
831
832 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
833 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
834 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
835 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
836 return -EINVAL;
837 }
838
839 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
840 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
841
842 err = ip_tunnel_ioctl(dev, &p, cmd);
843 if (err)
844 return err;
845
846 if (cmd == SIOCCHGTUNNEL) {
847 struct ip_tunnel *t = netdev_priv(dev);
848
849 t->parms.i_flags = p.i_flags;
850 t->parms.o_flags = p.o_flags;
851
852 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
853 ipgre_link_update(dev, true);
854 }
855
856 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
857 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
858
859 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
860 return -EFAULT;
861
862 return 0;
863}
864
865/* Nice toy. Unfortunately, useless in real life :-)
866 It allows to construct virtual multiprotocol broadcast "LAN"
867 over the Internet, provided multicast routing is tuned.
868
869
870 I have no idea was this bicycle invented before me,
871 so that I had to set ARPHRD_IPGRE to a random value.
872 I have an impression, that Cisco could make something similar,
873 but this feature is apparently missing in IOS<=11.2(8).
874
875 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
876 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
877
878 ping -t 255 224.66.66.66
879
880 If nobody answers, mbone does not work.
881
882 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
883 ip addr add 10.66.66.<somewhat>/24 dev Universe
884 ifconfig Universe up
885 ifconfig Universe add fe80::<Your_real_addr>/10
886 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
887 ftp 10.66.66.66
888 ...
889 ftp fec0:6666:6666::193.233.7.65
890 ...
891 */
892static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
893 unsigned short type,
894 const void *daddr, const void *saddr, unsigned int len)
895{
896 struct ip_tunnel *t = netdev_priv(dev);
897 struct iphdr *iph;
898 struct gre_base_hdr *greh;
899
900 iph = skb_push(skb, t->hlen + sizeof(*iph));
901 greh = (struct gre_base_hdr *)(iph+1);
902 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
903 greh->protocol = htons(type);
904
905 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
906
907 /* Set the source hardware address. */
908 if (saddr)
909 memcpy(&iph->saddr, saddr, 4);
910 if (daddr)
911 memcpy(&iph->daddr, daddr, 4);
912 if (iph->daddr)
913 return t->hlen + sizeof(*iph);
914
915 return -(t->hlen + sizeof(*iph));
916}
917
918static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
919{
920 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
921 memcpy(haddr, &iph->saddr, 4);
922 return 4;
923}
924
925static const struct header_ops ipgre_header_ops = {
926 .create = ipgre_header,
927 .parse = ipgre_header_parse,
928};
929
930#ifdef CONFIG_NET_IPGRE_BROADCAST
931static int ipgre_open(struct net_device *dev)
932{
933 struct ip_tunnel *t = netdev_priv(dev);
934
935 if (ipv4_is_multicast(t->parms.iph.daddr)) {
936 struct flowi4 fl4;
937 struct rtable *rt;
938
939 rt = ip_route_output_gre(t->net, &fl4,
940 t->parms.iph.daddr,
941 t->parms.iph.saddr,
942 t->parms.o_key,
943 RT_TOS(t->parms.iph.tos),
944 t->parms.link);
945 if (IS_ERR(rt))
946 return -EADDRNOTAVAIL;
947 dev = rt->dst.dev;
948 ip_rt_put(rt);
949 if (!__in_dev_get_rtnl(dev))
950 return -EADDRNOTAVAIL;
951 t->mlink = dev->ifindex;
952 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
953 }
954 return 0;
955}
956
957static int ipgre_close(struct net_device *dev)
958{
959 struct ip_tunnel *t = netdev_priv(dev);
960
961 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
962 struct in_device *in_dev;
963 in_dev = inetdev_by_index(t->net, t->mlink);
964 if (in_dev)
965 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
966 }
967 return 0;
968}
969#endif
970
971static const struct net_device_ops ipgre_netdev_ops = {
972 .ndo_init = ipgre_tunnel_init,
973 .ndo_uninit = ip_tunnel_uninit,
974#ifdef CONFIG_NET_IPGRE_BROADCAST
975 .ndo_open = ipgre_open,
976 .ndo_stop = ipgre_close,
977#endif
978 .ndo_start_xmit = ipgre_xmit,
979 .ndo_do_ioctl = ipgre_tunnel_ioctl,
980 .ndo_change_mtu = ip_tunnel_change_mtu,
981 .ndo_get_stats64 = ip_tunnel_get_stats64,
982 .ndo_get_iflink = ip_tunnel_get_iflink,
983};
984
985#define GRE_FEATURES (NETIF_F_SG | \
986 NETIF_F_FRAGLIST | \
987 NETIF_F_HIGHDMA | \
988 NETIF_F_HW_CSUM)
989
990static void ipgre_tunnel_setup(struct net_device *dev)
991{
992 dev->netdev_ops = &ipgre_netdev_ops;
993 dev->type = ARPHRD_IPGRE;
994 ip_tunnel_setup(dev, ipgre_net_id);
995}
996
997static void __gre_tunnel_init(struct net_device *dev)
998{
999 struct ip_tunnel *tunnel;
1000
1001 tunnel = netdev_priv(dev);
1002 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1003 tunnel->parms.iph.protocol = IPPROTO_GRE;
1004
1005 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1006
1007 dev->features |= GRE_FEATURES;
1008 dev->hw_features |= GRE_FEATURES;
1009
1010 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1011 /* TCP offload with GRE SEQ is not supported, nor
1012 * can we support 2 levels of outer headers requiring
1013 * an update.
1014 */
1015 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1016 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1017 dev->features |= NETIF_F_GSO_SOFTWARE;
1018 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1019 }
1020
1021 /* Can use a lockless transmit, unless we generate
1022 * output sequences
1023 */
1024 dev->features |= NETIF_F_LLTX;
1025 }
1026}
1027
1028static int ipgre_tunnel_init(struct net_device *dev)
1029{
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 struct iphdr *iph = &tunnel->parms.iph;
1032
1033 __gre_tunnel_init(dev);
1034
1035 memcpy(dev->dev_addr, &iph->saddr, 4);
1036 memcpy(dev->broadcast, &iph->daddr, 4);
1037
1038 dev->flags = IFF_NOARP;
1039 netif_keep_dst(dev);
1040 dev->addr_len = 4;
1041
1042 if (iph->daddr && !tunnel->collect_md) {
1043#ifdef CONFIG_NET_IPGRE_BROADCAST
1044 if (ipv4_is_multicast(iph->daddr)) {
1045 if (!iph->saddr)
1046 return -EINVAL;
1047 dev->flags = IFF_BROADCAST;
1048 dev->header_ops = &ipgre_header_ops;
1049 }
1050#endif
1051 } else if (!tunnel->collect_md) {
1052 dev->header_ops = &ipgre_header_ops;
1053 }
1054
1055 return ip_tunnel_init(dev);
1056}
1057
1058static const struct gre_protocol ipgre_protocol = {
1059 .handler = gre_rcv,
1060 .err_handler = gre_err,
1061};
1062
1063static int __net_init ipgre_init_net(struct net *net)
1064{
1065 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1066}
1067
1068static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1069{
1070 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1071}
1072
1073static struct pernet_operations ipgre_net_ops = {
1074 .init = ipgre_init_net,
1075 .exit_batch = ipgre_exit_batch_net,
1076 .id = &ipgre_net_id,
1077 .size = sizeof(struct ip_tunnel_net),
1078};
1079
1080static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1081 struct netlink_ext_ack *extack)
1082{
1083 __be16 flags;
1084
1085 if (!data)
1086 return 0;
1087
1088 flags = 0;
1089 if (data[IFLA_GRE_IFLAGS])
1090 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1091 if (data[IFLA_GRE_OFLAGS])
1092 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1093 if (flags & (GRE_VERSION|GRE_ROUTING))
1094 return -EINVAL;
1095
1096 if (data[IFLA_GRE_COLLECT_METADATA] &&
1097 data[IFLA_GRE_ENCAP_TYPE] &&
1098 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1099 return -EINVAL;
1100
1101 return 0;
1102}
1103
1104static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1105 struct netlink_ext_ack *extack)
1106{
1107 __be32 daddr;
1108
1109 if (tb[IFLA_ADDRESS]) {
1110 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1111 return -EINVAL;
1112 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1113 return -EADDRNOTAVAIL;
1114 }
1115
1116 if (!data)
1117 goto out;
1118
1119 if (data[IFLA_GRE_REMOTE]) {
1120 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1121 if (!daddr)
1122 return -EINVAL;
1123 }
1124
1125out:
1126 return ipgre_tunnel_validate(tb, data, extack);
1127}
1128
1129static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1130 struct netlink_ext_ack *extack)
1131{
1132 __be16 flags = 0;
1133 int ret;
1134
1135 if (!data)
1136 return 0;
1137
1138 ret = ipgre_tap_validate(tb, data, extack);
1139 if (ret)
1140 return ret;
1141
1142 /* ERSPAN should only have GRE sequence and key flag */
1143 if (data[IFLA_GRE_OFLAGS])
1144 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1145 if (data[IFLA_GRE_IFLAGS])
1146 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1147 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1148 flags != (GRE_SEQ | GRE_KEY))
1149 return -EINVAL;
1150
1151 /* ERSPAN Session ID only has 10-bit. Since we reuse
1152 * 32-bit key field as ID, check it's range.
1153 */
1154 if (data[IFLA_GRE_IKEY] &&
1155 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1156 return -EINVAL;
1157
1158 if (data[IFLA_GRE_OKEY] &&
1159 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1160 return -EINVAL;
1161
1162 return 0;
1163}
1164
1165static int ipgre_netlink_parms(struct net_device *dev,
1166 struct nlattr *data[],
1167 struct nlattr *tb[],
1168 struct ip_tunnel_parm *parms,
1169 __u32 *fwmark)
1170{
1171 struct ip_tunnel *t = netdev_priv(dev);
1172
1173 memset(parms, 0, sizeof(*parms));
1174
1175 parms->iph.protocol = IPPROTO_GRE;
1176
1177 if (!data)
1178 return 0;
1179
1180 if (data[IFLA_GRE_LINK])
1181 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1182
1183 if (data[IFLA_GRE_IFLAGS])
1184 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1185
1186 if (data[IFLA_GRE_OFLAGS])
1187 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1188
1189 if (data[IFLA_GRE_IKEY])
1190 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1191
1192 if (data[IFLA_GRE_OKEY])
1193 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1194
1195 if (data[IFLA_GRE_LOCAL])
1196 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1197
1198 if (data[IFLA_GRE_REMOTE])
1199 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1200
1201 if (data[IFLA_GRE_TTL])
1202 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1203
1204 if (data[IFLA_GRE_TOS])
1205 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1206
1207 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1208 if (t->ignore_df)
1209 return -EINVAL;
1210 parms->iph.frag_off = htons(IP_DF);
1211 }
1212
1213 if (data[IFLA_GRE_COLLECT_METADATA]) {
1214 t->collect_md = true;
1215 if (dev->type == ARPHRD_IPGRE)
1216 dev->type = ARPHRD_NONE;
1217 }
1218
1219 if (data[IFLA_GRE_IGNORE_DF]) {
1220 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1221 && (parms->iph.frag_off & htons(IP_DF)))
1222 return -EINVAL;
1223 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1224 }
1225
1226 if (data[IFLA_GRE_FWMARK])
1227 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1228
1229 if (data[IFLA_GRE_ERSPAN_VER]) {
1230 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1231
1232 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1233 return -EINVAL;
1234 }
1235
1236 if (t->erspan_ver == 1) {
1237 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1238 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1239 if (t->index & ~INDEX_MASK)
1240 return -EINVAL;
1241 }
1242 } else if (t->erspan_ver == 2) {
1243 if (data[IFLA_GRE_ERSPAN_DIR]) {
1244 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1245 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1246 return -EINVAL;
1247 }
1248 if (data[IFLA_GRE_ERSPAN_HWID]) {
1249 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1250 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1251 return -EINVAL;
1252 }
1253 }
1254
1255 return 0;
1256}
1257
1258/* This function returns true when ENCAP attributes are present in the nl msg */
1259static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1260 struct ip_tunnel_encap *ipencap)
1261{
1262 bool ret = false;
1263
1264 memset(ipencap, 0, sizeof(*ipencap));
1265
1266 if (!data)
1267 return ret;
1268
1269 if (data[IFLA_GRE_ENCAP_TYPE]) {
1270 ret = true;
1271 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1272 }
1273
1274 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1275 ret = true;
1276 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1277 }
1278
1279 if (data[IFLA_GRE_ENCAP_SPORT]) {
1280 ret = true;
1281 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1282 }
1283
1284 if (data[IFLA_GRE_ENCAP_DPORT]) {
1285 ret = true;
1286 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1287 }
1288
1289 return ret;
1290}
1291
1292static int gre_tap_init(struct net_device *dev)
1293{
1294 __gre_tunnel_init(dev);
1295 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1296 netif_keep_dst(dev);
1297
1298 return ip_tunnel_init(dev);
1299}
1300
1301static const struct net_device_ops gre_tap_netdev_ops = {
1302 .ndo_init = gre_tap_init,
1303 .ndo_uninit = ip_tunnel_uninit,
1304 .ndo_start_xmit = gre_tap_xmit,
1305 .ndo_set_mac_address = eth_mac_addr,
1306 .ndo_validate_addr = eth_validate_addr,
1307 .ndo_change_mtu = ip_tunnel_change_mtu,
1308 .ndo_get_stats64 = ip_tunnel_get_stats64,
1309 .ndo_get_iflink = ip_tunnel_get_iflink,
1310 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1311};
1312
1313static int erspan_tunnel_init(struct net_device *dev)
1314{
1315 struct ip_tunnel *tunnel = netdev_priv(dev);
1316
1317 tunnel->tun_hlen = 8;
1318 tunnel->parms.iph.protocol = IPPROTO_GRE;
1319 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1320 erspan_hdr_len(tunnel->erspan_ver);
1321
1322 dev->features |= GRE_FEATURES;
1323 dev->hw_features |= GRE_FEATURES;
1324 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1325 netif_keep_dst(dev);
1326
1327 return ip_tunnel_init(dev);
1328}
1329
1330static const struct net_device_ops erspan_netdev_ops = {
1331 .ndo_init = erspan_tunnel_init,
1332 .ndo_uninit = ip_tunnel_uninit,
1333 .ndo_start_xmit = erspan_xmit,
1334 .ndo_set_mac_address = eth_mac_addr,
1335 .ndo_validate_addr = eth_validate_addr,
1336 .ndo_change_mtu = ip_tunnel_change_mtu,
1337 .ndo_get_stats64 = ip_tunnel_get_stats64,
1338 .ndo_get_iflink = ip_tunnel_get_iflink,
1339 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1340};
1341
1342static void ipgre_tap_setup(struct net_device *dev)
1343{
1344 ether_setup(dev);
1345 dev->max_mtu = 0;
1346 dev->netdev_ops = &gre_tap_netdev_ops;
1347 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1348 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1349 ip_tunnel_setup(dev, gre_tap_net_id);
1350}
1351
1352bool is_gretap_dev(const struct net_device *dev)
1353{
1354 return dev->netdev_ops == &gre_tap_netdev_ops;
1355}
1356EXPORT_SYMBOL_GPL(is_gretap_dev);
1357
1358static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1359 struct nlattr *tb[], struct nlattr *data[],
1360 struct netlink_ext_ack *extack)
1361{
1362 struct ip_tunnel_parm p;
1363 struct ip_tunnel_encap ipencap;
1364 __u32 fwmark = 0;
1365 int err;
1366
1367 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1368 struct ip_tunnel *t = netdev_priv(dev);
1369 err = ip_tunnel_encap_setup(t, &ipencap);
1370
1371 if (err < 0)
1372 return err;
1373 }
1374
1375 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1376 if (err < 0)
1377 return err;
1378 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1379}
1380
1381static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1382 struct nlattr *data[],
1383 struct netlink_ext_ack *extack)
1384{
1385 struct ip_tunnel *t = netdev_priv(dev);
1386 struct ip_tunnel_encap ipencap;
1387 __u32 fwmark = t->fwmark;
1388 struct ip_tunnel_parm p;
1389 int err;
1390
1391 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1392 err = ip_tunnel_encap_setup(t, &ipencap);
1393
1394 if (err < 0)
1395 return err;
1396 }
1397
1398 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1399 if (err < 0)
1400 return err;
1401
1402 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1403 if (err < 0)
1404 return err;
1405
1406 t->parms.i_flags = p.i_flags;
1407 t->parms.o_flags = p.o_flags;
1408
1409 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1410 ipgre_link_update(dev, !tb[IFLA_MTU]);
1411
1412 return 0;
1413}
1414
1415static size_t ipgre_get_size(const struct net_device *dev)
1416{
1417 return
1418 /* IFLA_GRE_LINK */
1419 nla_total_size(4) +
1420 /* IFLA_GRE_IFLAGS */
1421 nla_total_size(2) +
1422 /* IFLA_GRE_OFLAGS */
1423 nla_total_size(2) +
1424 /* IFLA_GRE_IKEY */
1425 nla_total_size(4) +
1426 /* IFLA_GRE_OKEY */
1427 nla_total_size(4) +
1428 /* IFLA_GRE_LOCAL */
1429 nla_total_size(4) +
1430 /* IFLA_GRE_REMOTE */
1431 nla_total_size(4) +
1432 /* IFLA_GRE_TTL */
1433 nla_total_size(1) +
1434 /* IFLA_GRE_TOS */
1435 nla_total_size(1) +
1436 /* IFLA_GRE_PMTUDISC */
1437 nla_total_size(1) +
1438 /* IFLA_GRE_ENCAP_TYPE */
1439 nla_total_size(2) +
1440 /* IFLA_GRE_ENCAP_FLAGS */
1441 nla_total_size(2) +
1442 /* IFLA_GRE_ENCAP_SPORT */
1443 nla_total_size(2) +
1444 /* IFLA_GRE_ENCAP_DPORT */
1445 nla_total_size(2) +
1446 /* IFLA_GRE_COLLECT_METADATA */
1447 nla_total_size(0) +
1448 /* IFLA_GRE_IGNORE_DF */
1449 nla_total_size(1) +
1450 /* IFLA_GRE_FWMARK */
1451 nla_total_size(4) +
1452 /* IFLA_GRE_ERSPAN_INDEX */
1453 nla_total_size(4) +
1454 /* IFLA_GRE_ERSPAN_VER */
1455 nla_total_size(1) +
1456 /* IFLA_GRE_ERSPAN_DIR */
1457 nla_total_size(1) +
1458 /* IFLA_GRE_ERSPAN_HWID */
1459 nla_total_size(2) +
1460 0;
1461}
1462
1463static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1464{
1465 struct ip_tunnel *t = netdev_priv(dev);
1466 struct ip_tunnel_parm *p = &t->parms;
1467 __be16 o_flags = p->o_flags;
1468
1469 if (t->erspan_ver == 1 || t->erspan_ver == 2) {
1470 if (!t->collect_md)
1471 o_flags |= TUNNEL_KEY;
1472
1473 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1474 goto nla_put_failure;
1475
1476 if (t->erspan_ver == 1) {
1477 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1478 goto nla_put_failure;
1479 } else {
1480 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1481 goto nla_put_failure;
1482 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1483 goto nla_put_failure;
1484 }
1485 }
1486
1487 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1488 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1489 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1490 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1491 gre_tnl_flags_to_gre_flags(o_flags)) ||
1492 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1493 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1494 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1495 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1496 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1497 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1498 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1499 !!(p->iph.frag_off & htons(IP_DF))) ||
1500 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1501 goto nla_put_failure;
1502
1503 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1504 t->encap.type) ||
1505 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1506 t->encap.sport) ||
1507 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1508 t->encap.dport) ||
1509 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1510 t->encap.flags))
1511 goto nla_put_failure;
1512
1513 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1514 goto nla_put_failure;
1515
1516 if (t->collect_md) {
1517 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1518 goto nla_put_failure;
1519 }
1520
1521 return 0;
1522
1523nla_put_failure:
1524 return -EMSGSIZE;
1525}
1526
1527static void erspan_setup(struct net_device *dev)
1528{
1529 struct ip_tunnel *t = netdev_priv(dev);
1530
1531 ether_setup(dev);
1532 dev->max_mtu = 0;
1533 dev->netdev_ops = &erspan_netdev_ops;
1534 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1535 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1536 ip_tunnel_setup(dev, erspan_net_id);
1537 t->erspan_ver = 1;
1538}
1539
1540static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1541 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1542 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1543 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1544 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1545 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1546 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1547 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1548 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1549 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1550 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1551 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1552 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1553 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1554 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1555 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1556 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1557 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1558 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1559 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1560 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1561 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1562};
1563
1564static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1565 .kind = "gre",
1566 .maxtype = IFLA_GRE_MAX,
1567 .policy = ipgre_policy,
1568 .priv_size = sizeof(struct ip_tunnel),
1569 .setup = ipgre_tunnel_setup,
1570 .validate = ipgre_tunnel_validate,
1571 .newlink = ipgre_newlink,
1572 .changelink = ipgre_changelink,
1573 .dellink = ip_tunnel_dellink,
1574 .get_size = ipgre_get_size,
1575 .fill_info = ipgre_fill_info,
1576 .get_link_net = ip_tunnel_get_link_net,
1577};
1578
1579static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1580 .kind = "gretap",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tap_setup,
1585 .validate = ipgre_tap_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .dellink = ip_tunnel_dellink,
1589 .get_size = ipgre_get_size,
1590 .fill_info = ipgre_fill_info,
1591 .get_link_net = ip_tunnel_get_link_net,
1592};
1593
1594static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1595 .kind = "erspan",
1596 .maxtype = IFLA_GRE_MAX,
1597 .policy = ipgre_policy,
1598 .priv_size = sizeof(struct ip_tunnel),
1599 .setup = erspan_setup,
1600 .validate = erspan_validate,
1601 .newlink = ipgre_newlink,
1602 .changelink = ipgre_changelink,
1603 .dellink = ip_tunnel_dellink,
1604 .get_size = ipgre_get_size,
1605 .fill_info = ipgre_fill_info,
1606 .get_link_net = ip_tunnel_get_link_net,
1607};
1608
1609struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1610 u8 name_assign_type)
1611{
1612 struct nlattr *tb[IFLA_MAX + 1];
1613 struct net_device *dev;
1614 LIST_HEAD(list_kill);
1615 struct ip_tunnel *t;
1616 int err;
1617
1618 memset(&tb, 0, sizeof(tb));
1619
1620 dev = rtnl_create_link(net, name, name_assign_type,
1621 &ipgre_tap_ops, tb);
1622 if (IS_ERR(dev))
1623 return dev;
1624
1625 /* Configure flow based GRE device. */
1626 t = netdev_priv(dev);
1627 t->collect_md = true;
1628
1629 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1630 if (err < 0) {
1631 free_netdev(dev);
1632 return ERR_PTR(err);
1633 }
1634
1635 /* openvswitch users expect packet sizes to be unrestricted,
1636 * so set the largest MTU we can.
1637 */
1638 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1639 if (err)
1640 goto out;
1641
1642 err = rtnl_configure_link(dev, NULL);
1643 if (err < 0)
1644 goto out;
1645
1646 return dev;
1647out:
1648 ip_tunnel_dellink(dev, &list_kill);
1649 unregister_netdevice_many(&list_kill);
1650 return ERR_PTR(err);
1651}
1652EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1653
1654static int __net_init ipgre_tap_init_net(struct net *net)
1655{
1656 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1657}
1658
1659static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1660{
1661 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1662}
1663
1664static struct pernet_operations ipgre_tap_net_ops = {
1665 .init = ipgre_tap_init_net,
1666 .exit_batch = ipgre_tap_exit_batch_net,
1667 .id = &gre_tap_net_id,
1668 .size = sizeof(struct ip_tunnel_net),
1669};
1670
1671static int __net_init erspan_init_net(struct net *net)
1672{
1673 return ip_tunnel_init_net(net, erspan_net_id,
1674 &erspan_link_ops, "erspan0");
1675}
1676
1677static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1678{
1679 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1680}
1681
1682static struct pernet_operations erspan_net_ops = {
1683 .init = erspan_init_net,
1684 .exit_batch = erspan_exit_batch_net,
1685 .id = &erspan_net_id,
1686 .size = sizeof(struct ip_tunnel_net),
1687};
1688
1689static int __init ipgre_init(void)
1690{
1691 int err;
1692
1693 pr_info("GRE over IPv4 tunneling driver\n");
1694
1695 err = register_pernet_device(&ipgre_net_ops);
1696 if (err < 0)
1697 return err;
1698
1699 err = register_pernet_device(&ipgre_tap_net_ops);
1700 if (err < 0)
1701 goto pnet_tap_failed;
1702
1703 err = register_pernet_device(&erspan_net_ops);
1704 if (err < 0)
1705 goto pnet_erspan_failed;
1706
1707 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1708 if (err < 0) {
1709 pr_info("%s: can't add protocol\n", __func__);
1710 goto add_proto_failed;
1711 }
1712
1713 err = rtnl_link_register(&ipgre_link_ops);
1714 if (err < 0)
1715 goto rtnl_link_failed;
1716
1717 err = rtnl_link_register(&ipgre_tap_ops);
1718 if (err < 0)
1719 goto tap_ops_failed;
1720
1721 err = rtnl_link_register(&erspan_link_ops);
1722 if (err < 0)
1723 goto erspan_link_failed;
1724
1725 return 0;
1726
1727erspan_link_failed:
1728 rtnl_link_unregister(&ipgre_tap_ops);
1729tap_ops_failed:
1730 rtnl_link_unregister(&ipgre_link_ops);
1731rtnl_link_failed:
1732 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1733add_proto_failed:
1734 unregister_pernet_device(&erspan_net_ops);
1735pnet_erspan_failed:
1736 unregister_pernet_device(&ipgre_tap_net_ops);
1737pnet_tap_failed:
1738 unregister_pernet_device(&ipgre_net_ops);
1739 return err;
1740}
1741
1742static void __exit ipgre_fini(void)
1743{
1744 rtnl_link_unregister(&ipgre_tap_ops);
1745 rtnl_link_unregister(&ipgre_link_ops);
1746 rtnl_link_unregister(&erspan_link_ops);
1747 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1748 unregister_pernet_device(&ipgre_tap_net_ops);
1749 unregister_pernet_device(&ipgre_net_ops);
1750 unregister_pernet_device(&erspan_net_ops);
1751}
1752
1753module_init(ipgre_init);
1754module_exit(ipgre_fini);
1755MODULE_LICENSE("GPL");
1756MODULE_ALIAS_RTNL_LINK("gre");
1757MODULE_ALIAS_RTNL_LINK("gretap");
1758MODULE_ALIAS_RTNL_LINK("erspan");
1759MODULE_ALIAS_NETDEV("gre0");
1760MODULE_ALIAS_NETDEV("gretap0");
1761MODULE_ALIAS_NETDEV("erspan0");