blob: daadf4c91e3119b4659c659f120be3e0911caf15 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
39#include <linux/module.h>
40#include <linux/slab.h>
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
57#include <linux/mroute6.h>
58
59#include <net/SI/fast_common.h>
60
61
62int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
63
64int __ip6_local_out(struct sk_buff *skb)
65{
66 int len;
67
68 len = skb->len - sizeof(struct ipv6hdr);
69 if (len > IPV6_MAXPLEN)
70 len = 0;
71 ipv6_hdr(skb)->payload_len = htons(len);
72
73 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
74 skb_dst(skb)->dev, dst_output);
75}
76
77int ip6_local_out(struct sk_buff *skb)
78{
79 int err;
80
81 err = __ip6_local_out(skb);
82 if (likely(err == 1))
83 err = dst_output(skb);
84
85 return err;
86}
87EXPORT_SYMBOL_GPL(ip6_local_out);
88
89/* dev_loopback_xmit for use with netfilter. */
90static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
91{
92 skb_reset_mac_header(newskb);
93 __skb_pull(newskb, skb_network_offset(newskb));
94 newskb->pkt_type = PACKET_LOOPBACK;
95 newskb->ip_summed = CHECKSUM_UNNECESSARY;
96 WARN_ON(!skb_dst(newskb));
97
98 netif_rx_ni(newskb);
99 return 0;
100}
101
102static int ip6_finish_output2(struct sk_buff *skb)
103{
104 struct dst_entry *dst = skb_dst(skb);
105 struct net_device *dev = dst->dev;
106 struct neighbour *neigh;
107
108 skb->protocol = htons(ETH_P_IPV6);
109 skb->dev = dev;
110
111 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
112 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
113
114 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
115 ((mroute6_socket(dev_net(dev), skb) &&
116 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
117 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
118 &ipv6_hdr(skb)->saddr))) {
119 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
120
121 /* Do not check for IFF_ALLMULTI; multicast routing
122 is not supported in any case.
123 */
124 if (newskb)
125 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
126 newskb, NULL, newskb->dev,
127 ip6_dev_loopback_xmit);
128
129 if (ipv6_hdr(skb)->hop_limit == 0) {
130 IP6_INC_STATS(dev_net(dev), idev,
131 IPSTATS_MIB_OUTDISCARDS);
132 kfree_skb(skb);
133 return 0;
134 }
135 }
136
137 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
138 skb->len);
139 }
140
141 rcu_read_lock();
142 neigh = dst_get_neighbour_noref(dst);
143 if (neigh) {
144 int res = neigh_output(neigh, skb);
145
146 rcu_read_unlock();
147 return res;
148 }
149 rcu_read_unlock();
150 IP6_INC_STATS(dev_net(dst->dev),
151 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
152 kfree_skb(skb);
153 return -EINVAL;
154}
155
156static int ip6_finish_output(struct sk_buff *skb)
157{
158 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
159 dst_allfrag(skb_dst(skb)))
160 return ip6_fragment(skb, ip6_finish_output2);
161 else
162 return ip6_finish_output2(skb);
163}
164
165int ip6_output(struct sk_buff *skb)
166{
167 struct net_device *dev = skb_dst(skb)->dev;
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
174 }
175
176 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
177 ip6_finish_output,
178 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179}
180
181/*
182 * xmit an sk_buff (used by TCP, SCTP and DCCP)
183 */
184
185int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
186 struct ipv6_txoptions *opt, int tclass)
187{
188 struct net *net = sock_net(sk);
189 struct ipv6_pinfo *np = inet6_sk(sk);
190 struct in6_addr *first_hop = &fl6->daddr;
191 struct dst_entry *dst = skb_dst(skb);
192 struct ipv6hdr *hdr;
193 u8 proto = fl6->flowi6_proto;
194 int seg_len = skb->len;
195 int hlimit = -1;
196 u32 mtu;
197
198 if (opt) {
199 unsigned int head_room;
200
201 /* First: exthdrs may take lots of space (~8K for now)
202 MAX_HEADER is not enough.
203 */
204 head_room = opt->opt_nflen + opt->opt_flen;
205 seg_len += head_room;
206 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
207
208 if (skb_headroom(skb) < head_room) {
209 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
210 if (skb2 == NULL) {
211 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
212 IPSTATS_MIB_OUTDISCARDS);
213 kfree_skb(skb);
214 return -ENOBUFS;
215 }
216 kfree_skb(skb);
217 skb = skb2;
218 skb_set_owner_w(skb, sk);
219 }
220 if (opt->opt_flen)
221 ipv6_push_frag_opts(skb, opt, &proto);
222 if (opt->opt_nflen)
223 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
224 }
225
226 skb_push(skb, sizeof(struct ipv6hdr));
227 skb_reset_network_header(skb);
228 hdr = ipv6_hdr(skb);
229
230 /*
231 * Fill in the IPv6 header
232 */
233 if (np)
234 hlimit = np->hop_limit;
235 if (hlimit < 0)
236 hlimit = ip6_dst_hoplimit(dst);
237
238 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
239
240 hdr->payload_len = htons(seg_len);
241 hdr->nexthdr = proto;
242 hdr->hop_limit = hlimit;
243
244 hdr->saddr = fl6->saddr;
245 hdr->daddr = *first_hop;
246
247 skb->priority = sk->sk_priority;
248 skb->mark = sk->sk_mark;
249
250 mtu = dst_mtu(dst);
251 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
252 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
253 IPSTATS_MIB_OUT, skb->len);
254 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
255 dst->dev, dst_output);
256 }
257
258 if (net_ratelimit())
259 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
260 skb->dev = dst->dev;
261 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
262 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
263 kfree_skb(skb);
264 return -EMSGSIZE;
265}
266
267EXPORT_SYMBOL(ip6_xmit);
268
269/*
270 * To avoid extra problems ND packets are send through this
271 * routine. It's code duplication but I really want to avoid
272 * extra checks since ipv6_build_header is used by TCP (which
273 * is for us performance critical)
274 */
275
276int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
277 const struct in6_addr *saddr, const struct in6_addr *daddr,
278 int proto, int len)
279{
280 struct ipv6_pinfo *np = inet6_sk(sk);
281 struct ipv6hdr *hdr;
282
283 skb->protocol = htons(ETH_P_IPV6);
284 skb->dev = dev;
285
286 skb_reset_network_header(skb);
287 skb_put(skb, sizeof(struct ipv6hdr));
288 hdr = ipv6_hdr(skb);
289
290 *(__be32*)hdr = htonl(0x60000000);
291
292 hdr->payload_len = htons(len);
293 hdr->nexthdr = proto;
294 hdr->hop_limit = np->hop_limit;
295
296 hdr->saddr = *saddr;
297 hdr->daddr = *daddr;
298
299 return 0;
300}
301
302static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
303{
304 struct ip6_ra_chain *ra;
305 struct sock *last = NULL;
306
307 read_lock(&ip6_ra_lock);
308 for (ra = ip6_ra_chain; ra; ra = ra->next) {
309 struct sock *sk = ra->sk;
310 if (sk && ra->sel == sel &&
311 (!sk->sk_bound_dev_if ||
312 sk->sk_bound_dev_if == skb->dev->ifindex)) {
313 if (last) {
314 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 if (skb2)
316 rawv6_rcv(last, skb2);
317 }
318 last = sk;
319 }
320 }
321
322 if (last) {
323 rawv6_rcv(last, skb);
324 read_unlock(&ip6_ra_lock);
325 return 1;
326 }
327 read_unlock(&ip6_ra_lock);
328 return 0;
329}
330
331static int ip6_forward_proxy_check(struct sk_buff *skb)
332{
333 struct ipv6hdr *hdr = ipv6_hdr(skb);
334 u8 nexthdr = hdr->nexthdr;
335 __be16 frag_off;
336 int offset;
337
338 if (ipv6_ext_hdr(nexthdr)) {
339 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
340 if (offset < 0)
341 return 0;
342 } else
343 offset = sizeof(struct ipv6hdr);
344
345 if (nexthdr == IPPROTO_ICMPV6) {
346 struct icmp6hdr *icmp6;
347
348 if (!pskb_may_pull(skb, (skb_network_header(skb) +
349 offset + 1 - skb->data)))
350 return 0;
351
352 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
353
354 switch (icmp6->icmp6_type) {
355 case NDISC_ROUTER_SOLICITATION:
356 case NDISC_ROUTER_ADVERTISEMENT:
357 case NDISC_NEIGHBOUR_SOLICITATION:
358 case NDISC_NEIGHBOUR_ADVERTISEMENT:
359 case NDISC_REDIRECT:
360 /* For reaction involving unicast neighbor discovery
361 * message destined to the proxied address, pass it to
362 * input function.
363 */
364 return 1;
365 default:
366 break;
367 }
368 }
369
370 /*
371 * The proxying router can't forward traffic sent to a link-local
372 * address, so signal the sender and discard the packet. This
373 * behavior is clarified by the MIPv6 specification.
374 */
375 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
376 dst_link_failure(skb);
377 return -1;
378 }
379
380 return 0;
381}
382
383static inline int ip6_forward_finish(struct sk_buff *skb)
384{
385 return dst_output(skb);
386}
387
388int ip6_forward(struct sk_buff *skb)
389{
390 struct dst_entry *dst = skb_dst(skb);
391 struct ipv6hdr *hdr = ipv6_hdr(skb);
392 struct inet6_skb_parm *opt = IP6CB(skb);
393 struct net *net = dev_net(dst->dev);
394 u32 mtu;
395
396 if (net->ipv6.devconf_all->forwarding == 0)
397 goto error;
398
399 if (skb_warn_if_lro(skb))
400 goto drop;
401
402 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
403 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
404 goto drop;
405 }
406
407 if (skb->pkt_type != PACKET_HOST)
408 goto drop;
409
410 skb_forward_csum(skb);
411
412 /*
413 * We DO NOT make any processing on
414 * RA packets, pushing them to user level AS IS
415 * without ane WARRANTY that application will be able
416 * to interpret them. The reason is that we
417 * cannot make anything clever here.
418 *
419 * We are not end-node, so that if packet contains
420 * AH/ESP, we cannot make anything.
421 * Defragmentation also would be mistake, RA packets
422 * cannot be fragmented, because there is no warranty
423 * that different fragments will go along one path. --ANK
424 */
425 if (opt->ra) {
426 u8 *ptr = skb_network_header(skb) + opt->ra;
427 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
428 return 0;
429 }
430
431 /*
432 * check and decrement ttl
433 */
434 if (hdr->hop_limit <= 1) {
435 /* Force OUTPUT device used as source address */
436 skb->dev = dst->dev;
437 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
438 IP6_INC_STATS_BH(net,
439 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
440
441 kfree_skb(skb);
442 return -ETIMEDOUT;
443 }
444
445 /* XXX: idev->cnf.proxy_ndp? */
446 if (net->ipv6.devconf_all->proxy_ndp &&
447 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
448 int proxied = ip6_forward_proxy_check(skb);
449 if (proxied > 0)
450 return ip6_input(skb);
451 else if (proxied < 0) {
452 IP6_INC_STATS(net, ip6_dst_idev(dst),
453 IPSTATS_MIB_INDISCARDS);
454 goto drop;
455 }
456 }
457
458 if (!xfrm6_route_forward(skb)) {
459 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
460 goto drop;
461 }
462 dst = skb_dst(skb);
463
464 /* IPv6 specs say nothing about it, but it is clear that we cannot
465 send redirects to source routed frames.
466 We don't send redirects to frames decapsulated from IPsec.
467 */
468 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
469 struct in6_addr *target = NULL;
470 struct rt6_info *rt;
471
472 /*
473 * incoming and outgoing devices are the same
474 * send a redirect.
475 */
476
477 rt = (struct rt6_info *) dst;
478 if (rt->rt6i_flags & RTF_GATEWAY)
479 target = &rt->rt6i_gateway;
480 else
481 target = &hdr->daddr;
482
483 if (!rt->rt6i_peer)
484 rt6_bind_peer(rt, 1);
485
486 /* Limit redirects both by destination (here)
487 and by source (inside ndisc_send_redirect)
488 */
489 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
490 ndisc_send_redirect(skb, target);
491 } else {
492 int addrtype = ipv6_addr_type(&hdr->saddr);
493
494 /* This check is security critical. */
495 if (addrtype == IPV6_ADDR_ANY ||
496 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
497 goto error;
498 if (addrtype & IPV6_ADDR_LINKLOCAL) {
499 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
500 ICMPV6_NOT_NEIGHBOUR, 0);
501 goto error;
502 }
503 }
504
505 mtu = dst_mtu(dst);
506 if (mtu < IPV6_MIN_MTU)
507 mtu = IPV6_MIN_MTU;
508
509 if (skb->len > mtu && !skb_is_gso(skb)) {
510 /* Again, force OUTPUT device used as source address */
511 skb->dev = dst->dev;
512 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
513 IP6_INC_STATS_BH(net,
514 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
515 IP6_INC_STATS_BH(net,
516 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
517 kfree_skb(skb);
518 return -EMSGSIZE;
519 }
520
521 if (skb_cow(skb, dst->dev->hard_header_len)) {
522 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
523 goto drop;
524 }
525
526 hdr = ipv6_hdr(skb);
527
528 /* Mangling hops number delayed to point after skb COW */
529
530 hdr->hop_limit--;
531
532 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
533 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
534 ip6_forward_finish);
535
536error:
537 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
538drop:
539 kfree_skb(skb);
540 return -EINVAL;
541}
542
543static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
544{
545 to->pkt_type = from->pkt_type;
546 to->priority = from->priority;
547 to->protocol = from->protocol;
548 skb_dst_drop(to);
549 skb_dst_set(to, dst_clone(skb_dst(from)));
550 to->dev = from->dev;
551 to->mark = from->mark;
552
553#ifdef CONFIG_NET_SCHED
554 to->tc_index = from->tc_index;
555#endif
556 nf_copy(to, from);
557#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
558 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
559 to->nf_trace = from->nf_trace;
560#endif
561 skb_copy_secmark(to, from);
562}
563
564int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
565{
566 u16 offset = sizeof(struct ipv6hdr);
567 //struct ipv6_opt_hdr *exthdr =//CVE-2017-9074
568 //(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
569 unsigned int packet_len = skb->tail - skb->network_header;
570 int found_rhdr = 0;
571 *nexthdr = &ipv6_hdr(skb)->nexthdr;
572
573 //while (offset + 1 <= packet_len) {//CVE-2017-9074
574 while (offset <= packet_len) {
575 struct ipv6_opt_hdr *exthdr;//CVE-2017-9074
576 switch (**nexthdr) {
577
578 case NEXTHDR_HOP:
579 break;
580 case NEXTHDR_ROUTING:
581 found_rhdr = 1;
582 break;
583 case NEXTHDR_DEST:
584#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
585 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
586 break;
587#endif
588 if (found_rhdr)
589 return offset;
590 break;
591 default :
592 return offset;
593 }
594
595 //offset += ipv6_optlen(exthdr);//CVE-2017-9074
596 //*nexthdr = &exthdr->nexthdr;
597 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
598 return -EINVAL;//CVE-2017-9074
599 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
600 offset);
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;//CVE-2017-9074
603 }
604
605 //return offset;
606 return -EINVAL;//CVE-2017-9074
607}
608
609void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
610{
611 static u32 ip6_idents_hashrnd __read_mostly;
612 static bool hashrnd_initialized = false;
613 u32 hash, id;
614
615 if (unlikely(!hashrnd_initialized)) {
616 hashrnd_initialized = true;
617 get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
618 }
619 hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
620 hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
621
622 id = ip_idents_reserve(hash, 1);
623 fhdr->identification = htonl(id);
624}
625
626int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
627{
628 struct sk_buff *frag;
629 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
630 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
631 struct ipv6hdr *tmp_hdr;
632 struct frag_hdr *fh;
633 unsigned int mtu, hlen, left, len;
634 int hroom, troom;
635 __be32 frag_id = 0;
636 int ptr, offset = 0, err=0;
637 u8 *prevhdr, nexthdr = 0;
638 struct net *net = dev_net(skb_dst(skb)->dev);
639
640 hlen = ip6_find_1stfragopt(skb, &prevhdr);
641 if (hlen < 0) {//CVE-2017-9074
642 err = hlen;
643 goto fail;
644 }
645 nexthdr = *prevhdr;
646
647 mtu = ip6_skb_dst_mtu(skb);
648
649 /* We must not fragment if the socket is set to force MTU discovery
650 * or if the skb it not generated by a local socket.
651 */
652 if (!skb->local_df && skb->len > mtu) {
653 skb->dev = skb_dst(skb)->dev;
654 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
655 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
656 IPSTATS_MIB_FRAGFAILS);
657 kfree_skb(skb);
658 return -EMSGSIZE;
659 }
660
661 if (np && np->frag_size < mtu) {
662 if (np->frag_size)
663 mtu = np->frag_size;
664 }
665 mtu -= hlen + sizeof(struct frag_hdr);
666
667 if (skb_has_frag_list(skb)) {
668 int first_len = skb_pagelen(skb);
669 struct sk_buff *frag2;
670
671 if (first_len - hlen > mtu ||
672 ((first_len - hlen) & 7) ||
673 skb_cloned(skb))
674 goto slow_path;
675
676 skb_walk_frags(skb, frag) {
677 /* Correct geometry. */
678 if (frag->len > mtu ||
679 ((frag->len & 7) && frag->next) ||
680 skb_headroom(frag) < hlen)
681 goto slow_path_clean;
682
683 /* Partially cloned skb? */
684 if (skb_shared(frag))
685 goto slow_path_clean;
686
687 BUG_ON(frag->sk);
688 if (skb->sk) {
689 frag->sk = skb->sk;
690 frag->destructor = sock_wfree;
691 }
692 skb->truesize -= frag->truesize;
693 }
694
695 err = 0;
696 offset = 0;
697 frag = skb_shinfo(skb)->frag_list;
698 skb_frag_list_init(skb);
699 /* BUILD HEADER */
700
701 *prevhdr = NEXTHDR_FRAGMENT;
702 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
703 if (!tmp_hdr) {
704 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
705 IPSTATS_MIB_FRAGFAILS);
706 return -ENOMEM;
707 }
708
709 __skb_pull(skb, hlen);
710 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
711 __skb_push(skb, hlen);
712 skb_reset_network_header(skb);
713 memcpy(skb_network_header(skb), tmp_hdr, hlen);
714
715 ipv6_select_ident(fh, rt);
716 fh->nexthdr = nexthdr;
717 fh->reserved = 0;
718 fh->frag_off = htons(IP6_MF);
719 frag_id = fh->identification;
720
721 first_len = skb_pagelen(skb);
722 skb->data_len = first_len - skb_headlen(skb);
723 skb->len = first_len;
724 ipv6_hdr(skb)->payload_len = htons(first_len -
725 sizeof(struct ipv6hdr));
726
727 dst_hold(&rt->dst);
728
729 for (;;) {
730 /* Prepare header of the next frame,
731 * before previous one went down. */
732 if (frag) {
733 frag->ip_summed = CHECKSUM_NONE;
734 skb_reset_transport_header(frag);
735 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
736 __skb_push(frag, hlen);
737 skb_reset_network_header(frag);
738 memcpy(skb_network_header(frag), tmp_hdr,
739 hlen);
740 offset += skb->len - hlen - sizeof(struct frag_hdr);
741 fh->nexthdr = nexthdr;
742 fh->reserved = 0;
743 fh->frag_off = htons(offset);
744 if (frag->next != NULL)
745 fh->frag_off |= htons(IP6_MF);
746 fh->identification = frag_id;
747 ipv6_hdr(frag)->payload_len =
748 htons(frag->len -
749 sizeof(struct ipv6hdr));
750 ip6_copy_metadata(frag, skb);
751 }
752
753 err = output(skb);
754 if(!err)
755 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
756 IPSTATS_MIB_FRAGCREATES);
757
758 if (err || !frag)
759 break;
760
761 skb = frag;
762 frag = skb->next;
763 skb->next = NULL;
764 }
765
766 kfree(tmp_hdr);
767
768 if (err == 0) {
769 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
770 IPSTATS_MIB_FRAGOKS);
771 dst_release(&rt->dst);
772 return 0;
773 }
774
775 while (frag) {
776 skb = frag->next;
777 kfree_skb(frag);
778 frag = skb;
779 }
780
781 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
782 IPSTATS_MIB_FRAGFAILS);
783 dst_release(&rt->dst);
784 return err;
785
786slow_path_clean:
787 skb_walk_frags(skb, frag2) {
788 if (frag2 == frag)
789 break;
790 frag2->sk = NULL;
791 frag2->destructor = NULL;
792 skb->truesize += frag2->truesize;
793 }
794 }
795
796slow_path:
797 left = skb->len - hlen; /* Space per frame */
798 ptr = hlen; /* Where to start from */
799
800 /*
801 * Fragment the datagram.
802 */
803
804 *prevhdr = NEXTHDR_FRAGMENT;
805 hroom = LL_RESERVED_SPACE(rt->dst.dev);
806 troom = rt->dst.dev->needed_tailroom;
807
808 /*
809 * Keep copying data until we run out.
810 */
811 while(left > 0) {
812 len = left;
813 /* IF: it doesn't fit, use 'mtu' - the data space left */
814 if (len > mtu)
815 len = mtu;
816 /* IF: we are not sending up to and including the packet end
817 then align the next start on an eight byte boundary */
818 if (len < left) {
819 len &= ~7;
820 }
821 /*
822 * Allocate buffer.
823 */
824
825 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
826 hroom + troom, GFP_ATOMIC)) == NULL) {
827 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
828 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
829 IPSTATS_MIB_FRAGFAILS);
830 err = -ENOMEM;
831 goto fail;
832 }
833
834 /*
835 * Set up data on packet
836 */
837
838 ip6_copy_metadata(frag, skb);
839 skb_reserve(frag, hroom);
840 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
841 skb_reset_network_header(frag);
842 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
843 frag->transport_header = (frag->network_header + hlen +
844 sizeof(struct frag_hdr));
845
846 /*
847 * Charge the memory for the fragment to any owner
848 * it might possess
849 */
850 if (skb->sk)
851 skb_set_owner_w(frag, skb->sk);
852
853 /*
854 * Copy the packet header into the new buffer.
855 */
856 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
857
858 /*
859 * Build fragment header.
860 */
861 fh->nexthdr = nexthdr;
862 fh->reserved = 0;
863 if (!frag_id) {
864 ipv6_select_ident(fh, rt);
865 frag_id = fh->identification;
866 } else
867 fh->identification = frag_id;
868
869 /*
870 * Copy a block of the IP datagram.
871 */
872 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
873 BUG();
874 left -= len;
875
876 fh->frag_off = htons(offset);
877 if (left > 0)
878 fh->frag_off |= htons(IP6_MF);
879 ipv6_hdr(frag)->payload_len = htons(frag->len -
880 sizeof(struct ipv6hdr));
881
882 ptr += len;
883 offset += len;
884
885 /*
886 * Put this fragment into the sending queue.
887 */
888 err = output(frag);
889 if (err)
890 goto fail;
891
892 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
893 IPSTATS_MIB_FRAGCREATES);
894 }
895 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
896 IPSTATS_MIB_FRAGOKS);
897 kfree_skb(skb);
898 return err;
899
900fail:
901 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
902 IPSTATS_MIB_FRAGFAILS);
903 kfree_skb(skb);
904 return err;
905}
906
907static inline int ip6_rt_check(const struct rt6key *rt_key,
908 const struct in6_addr *fl_addr,
909 const struct in6_addr *addr_cache)
910{
911 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
912 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
913}
914
915static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
916 struct dst_entry *dst,
917 const struct flowi6 *fl6)
918{
919 struct ipv6_pinfo *np = inet6_sk(sk);
920 struct rt6_info *rt;
921
922 if (!dst)
923 goto out;
924
925 if (dst->ops->family != AF_INET6) {
926 dst_release(dst);
927 return NULL;
928 }
929
930 rt = (struct rt6_info *)dst;
931 /* Yes, checking route validity in not connected
932 * case is not very simple. Take into account,
933 * that we do not support routing by source, TOS,
934 * and MSG_DONTROUTE --ANK (980726)
935 *
936 * 1. ip6_rt_check(): If route was host route,
937 * check that cached destination is current.
938 * If it is network route, we still may
939 * check its validity using saved pointer
940 * to the last used address: daddr_cache.
941 * We do not want to save whole address now,
942 * (because main consumer of this service
943 * is tcp, which has not this problem),
944 * so that the last trick works only on connected
945 * sockets.
946 * 2. oif also should be the same.
947 */
948 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
949#ifdef CONFIG_IPV6_SUBTREES
950 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
951#endif
952 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
953 dst_release(dst);
954 dst = NULL;
955 }
956
957out:
958 return dst;
959}
960
961static int ip6_dst_lookup_tail(struct sock *sk,
962 struct dst_entry **dst, struct flowi6 *fl6)
963{
964 struct net *net = sock_net(sk);
965#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
966 struct neighbour *n;
967#endif
968 int err;
969
970 if (*dst == NULL)
971 *dst = ip6_route_output(net, sk, fl6);
972
973 if ((err = (*dst)->error))
974 goto out_err_release;
975
976 if (ipv6_addr_any(&fl6->saddr)) {
977 struct rt6_info *rt = (struct rt6_info *) *dst;
978 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
979 sk ? inet6_sk(sk)->srcprefs : 0,
980 &fl6->saddr);
981 if (err)
982 goto out_err_release;
983 }
984
985#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
986 /*
987 * Here if the dst entry we've looked up
988 * has a neighbour entry that is in the INCOMPLETE
989 * state and the src address from the flow is
990 * marked as OPTIMISTIC, we release the found
991 * dst entry and replace it instead with the
992 * dst entry of the nexthop router
993 */
994 rcu_read_lock();
995 n = dst_get_neighbour_noref(*dst);
996 if (n && !(n->nud_state & NUD_VALID)) {
997 struct inet6_ifaddr *ifp;
998 struct flowi6 fl_gw6;
999 int redirect;
1000
1001 rcu_read_unlock();
1002 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1003 (*dst)->dev, 1);
1004
1005 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1006 if (ifp)
1007 in6_ifa_put(ifp);
1008
1009 if (redirect) {
1010 /*
1011 * We need to get the dst entry for the
1012 * default router instead
1013 */
1014 dst_release(*dst);
1015 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1016 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1017 *dst = ip6_route_output(net, sk, &fl_gw6);
1018 if ((err = (*dst)->error))
1019 goto out_err_release;
1020 }
1021 } else {
1022 rcu_read_unlock();
1023 }
1024#endif
1025
1026 return 0;
1027
1028out_err_release:
1029 if (err == -ENETUNREACH)
1030 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1031 dst_release(*dst);
1032 *dst = NULL;
1033 return err;
1034}
1035
1036/**
1037 * ip6_dst_lookup - perform route lookup on flow
1038 * @sk: socket which provides route info
1039 * @dst: pointer to dst_entry * for result
1040 * @fl6: flow to lookup
1041 *
1042 * This function performs a route lookup on the given flow.
1043 *
1044 * It returns zero on success, or a standard errno code on error.
1045 */
1046int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1047{
1048 *dst = NULL;
1049 return ip6_dst_lookup_tail(sk, dst, fl6);
1050}
1051EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1052
1053/**
1054 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1055 * @sk: socket which provides route info
1056 * @fl6: flow to lookup
1057 * @final_dst: final destination address for ipsec lookup
1058 * @can_sleep: we are in a sleepable context
1059 *
1060 * This function performs a route lookup on the given flow.
1061 *
1062 * It returns a valid dst pointer on success, or a pointer encoded
1063 * error code.
1064 */
1065struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1066 const struct in6_addr *final_dst,
1067 bool can_sleep)
1068{
1069 struct dst_entry *dst = NULL;
1070 int err;
1071
1072 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1073 if (err)
1074 return ERR_PTR(err);
1075 if (final_dst)
1076 fl6->daddr = *final_dst;
1077 if (can_sleep)
1078 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1079
1080 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1081}
1082EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1083
1084/**
1085 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1086 * @sk: socket which provides the dst cache and route info
1087 * @fl6: flow to lookup
1088 * @final_dst: final destination address for ipsec lookup
1089 * @can_sleep: we are in a sleepable context
1090 *
1091 * This function performs a route lookup on the given flow with the
1092 * possibility of using the cached route in the socket if it is valid.
1093 * It will take the socket dst lock when operating on the dst cache.
1094 * As a result, this function can only be used in process context.
1095 *
1096 * It returns a valid dst pointer on success, or a pointer encoded
1097 * error code.
1098 */
1099struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1100 const struct in6_addr *final_dst,
1101 bool can_sleep)
1102{
1103 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1104 int err;
1105
1106 dst = ip6_sk_dst_check(sk, dst, fl6);
1107
1108 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1109 if (err)
1110 return ERR_PTR(err);
1111 if (final_dst)
1112 fl6->daddr = *final_dst;
1113 if (can_sleep)
1114 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1115
1116 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1117}
1118EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1119
1120static inline int ip6_ufo_append_data(struct sock *sk,
1121 int getfrag(void *from, char *to, int offset, int len,
1122 int odd, struct sk_buff *skb),
1123 void *from, int length, int hh_len, int fragheaderlen,
1124 int transhdrlen, int mtu,unsigned int flags,
1125 struct rt6_info *rt)
1126
1127{
1128 struct sk_buff *skb;
1129 int err;
1130
1131 /* There is support for UDP large send offload by network
1132 * device, so create one single skb packet containing complete
1133 * udp datagram
1134 */
1135 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1136 struct frag_hdr fhdr;
1137
1138 skb = sock_alloc_send_skb(sk,
1139 hh_len + fragheaderlen + transhdrlen + 20,
1140 (flags & MSG_DONTWAIT), &err);
1141 if (skb == NULL)
1142 return err;
1143
1144 /* reserve space for Hardware header */
1145 skb_reserve(skb, hh_len);
1146
1147 /* create space for UDP/IP header */
1148 skb_put(skb,fragheaderlen + transhdrlen);
1149
1150 /* initialize network header pointer */
1151 skb_reset_network_header(skb);
1152
1153 /* initialize protocol header pointer */
1154 skb->transport_header = skb->network_header + fragheaderlen;
1155
1156 skb->ip_summed = CHECKSUM_PARTIAL;
1157 skb->csum = 0;
1158
1159 /* Specify the length of each IPv6 datagram fragment.
1160 * It has to be a multiple of 8.
1161 */
1162 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1163 sizeof(struct frag_hdr)) & ~7;
1164 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1165 ipv6_select_ident(&fhdr, rt);
1166 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1167 __skb_queue_tail(&sk->sk_write_queue, skb);
1168 }
1169
1170 return skb_append_datato_frags(sk, skb, getfrag, from,
1171 (length - transhdrlen));
1172}
1173
1174static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1175 gfp_t gfp)
1176{
1177 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1178}
1179
1180static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1181 gfp_t gfp)
1182{
1183 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1184}
1185
1186static void ip6_append_data_mtu(unsigned int *mtu,
1187 int *maxfraglen,
1188 unsigned int fragheaderlen,
1189 struct sk_buff *skb,
1190 struct rt6_info *rt,
1191 unsigned int orig_mtu)
1192{
1193 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1194 if (skb == NULL) {
1195 /* first fragment, reserve header_len */
1196 *mtu = orig_mtu - rt->dst.header_len;
1197
1198 } else {
1199 /*
1200 * this fragment is not first, the headers
1201 * space is regarded as data space.
1202 */
1203 *mtu = orig_mtu;
1204 }
1205 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1206 + fragheaderlen - sizeof(struct frag_hdr);
1207 }
1208}
1209
1210int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1211 int offset, int len, int odd, struct sk_buff *skb),
1212 void *from, int length, int transhdrlen,
1213 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1214 struct rt6_info *rt, unsigned int flags, int dontfrag)
1215{
1216 struct inet_sock *inet = inet_sk(sk);
1217 struct ipv6_pinfo *np = inet6_sk(sk);
1218 struct inet_cork *cork;
1219 struct sk_buff *skb, *skb_prev = NULL;
1220 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1221 int exthdrlen;
1222 int dst_exthdrlen;
1223 int hh_len;
1224 int copy;
1225 int err;
1226 int offset = 0;
1227 int csummode = CHECKSUM_NONE;
1228 __u8 tx_flags = 0;
1229
1230 if (flags&MSG_PROBE)
1231 return 0;
1232 cork = &inet->cork.base;
1233 if (skb_queue_empty(&sk->sk_write_queue)) {
1234 /*
1235 * setup for corking
1236 */
1237 if (opt) {
1238 if (WARN_ON(np->cork.opt))
1239 return -EINVAL;
1240
1241 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1242 if (unlikely(np->cork.opt == NULL))
1243 return -ENOBUFS;
1244
1245 np->cork.opt->tot_len = opt->tot_len;
1246 np->cork.opt->opt_flen = opt->opt_flen;
1247 np->cork.opt->opt_nflen = opt->opt_nflen;
1248
1249 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1250 sk->sk_allocation);
1251 if (opt->dst0opt && !np->cork.opt->dst0opt)
1252 return -ENOBUFS;
1253
1254 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1255 sk->sk_allocation);
1256 if (opt->dst1opt && !np->cork.opt->dst1opt)
1257 return -ENOBUFS;
1258
1259 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1260 sk->sk_allocation);
1261 if (opt->hopopt && !np->cork.opt->hopopt)
1262 return -ENOBUFS;
1263
1264 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1265 sk->sk_allocation);
1266 if (opt->srcrt && !np->cork.opt->srcrt)
1267 return -ENOBUFS;
1268
1269 /* need source address above miyazawa*/
1270 }
1271 dst_hold(&rt->dst);
1272 cork->dst = &rt->dst;
1273 inet->cork.fl.u.ip6 = *fl6;
1274 np->cork.hop_limit = hlimit;
1275 np->cork.tclass = tclass;
1276 if (rt->dst.flags & DST_XFRM_TUNNEL)
1277 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1278 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1279 else
1280 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1281 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1282 if (np->frag_size < mtu) {
1283 if (np->frag_size)
1284 mtu = np->frag_size;
1285 }
1286 cork->fragsize = mtu;
1287 if (dst_allfrag(rt->dst.path))
1288 cork->flags |= IPCORK_ALLFRAG;
1289 cork->length = 0;
1290 sk->sk_sndmsg_page = NULL;
1291 sk->sk_sndmsg_off = 0;
1292 exthdrlen = (opt ? opt->opt_flen : 0);
1293 length += exthdrlen;
1294 transhdrlen += exthdrlen;
1295 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1296 } else {
1297 rt = (struct rt6_info *)cork->dst;
1298 fl6 = &inet->cork.fl.u.ip6;
1299 opt = np->cork.opt;
1300 transhdrlen = 0;
1301 exthdrlen = 0;
1302 dst_exthdrlen = 0;
1303 mtu = cork->fragsize;
1304 }
1305 orig_mtu = mtu;
1306
1307 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1308
1309 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1310 (opt ? opt->opt_nflen : 0);
1311 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1312
1313 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1314 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1315 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1316 return -EMSGSIZE;
1317 }
1318 }
1319
1320 /* For UDP, check if TX timestamp is enabled */
1321 if (sk->sk_type == SOCK_DGRAM) {
1322 err = sock_tx_timestamp(sk, &tx_flags);
1323 if (err)
1324 goto error;
1325 }
1326
1327 /*
1328 * Let's try using as much space as possible.
1329 * Use MTU if total length of the message fits into the MTU.
1330 * Otherwise, we need to reserve fragment header and
1331 * fragment alignment (= 8-15 octects, in total).
1332 *
1333 * Note that we may need to "move" the data from the tail of
1334 * of the buffer to the new fragment when we split
1335 * the message.
1336 *
1337 * FIXME: It may be fragmented into multiple chunks
1338 * at once if non-fragmentable extension headers
1339 * are too large.
1340 * --yoshfuji
1341 */
1342
1343 if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1344 sk->sk_protocol == IPPROTO_RAW)) {
1345 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1346 return -EMSGSIZE;
1347 }
1348
1349 skb = skb_peek_tail(&sk->sk_write_queue);
1350 cork->length += length;
1351/*
1352 if (((length > mtu) ||
1353 (skb && skb_has_frags(skb))) &&
1354 (sk->sk_protocol == IPPROTO_UDP) &&
1355 (rt->dst.dev->features & NETIF_F_UFO)) {
1356*/
1357 if ((skb && skb_is_gso(skb)) ||//CVE-2017-1000112
1358 (((length > mtu) ||
1359 (skb && skb_has_frags(skb))) &&
1360 (sk->sk_protocol == IPPROTO_UDP) &&
1361 (rt->dst.dev->features & NETIF_F_UFO))) {
1362 err = ip6_ufo_append_data(sk, getfrag, from, length,
1363 hh_len, fragheaderlen,
1364 transhdrlen, mtu, flags, rt);
1365 if (err)
1366 goto error;
1367 return 0;
1368 }
1369
1370 if (!skb)
1371 goto alloc_new_skb;
1372
1373 while (length > 0) {
1374 /* Check if the remaining data fits into current packet. */
1375 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1376 if (copy < length)
1377 copy = maxfraglen - skb->len;
1378
1379 if (copy <= 0) {
1380 char *data;
1381 unsigned int datalen;
1382 unsigned int fraglen;
1383 unsigned int fraggap;
1384 unsigned int alloclen;
1385alloc_new_skb:
1386 /* There's no room in the current skb */
1387 if (skb)
1388 fraggap = skb->len - maxfraglen;
1389 else
1390 fraggap = 0;
1391 /* update mtu and maxfraglen if necessary */
1392 if (skb == NULL || skb_prev == NULL)
1393 ip6_append_data_mtu(&mtu, &maxfraglen,
1394 fragheaderlen, skb, rt,
1395 orig_mtu);
1396
1397 skb_prev = skb;
1398
1399 /*
1400 * If remaining data exceeds the mtu,
1401 * we know we need more fragment(s).
1402 */
1403 datalen = length + fraggap;
1404
1405 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1406 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1407 if ((flags & MSG_MORE) &&
1408 !(rt->dst.dev->features&NETIF_F_SG))
1409 alloclen = mtu;
1410 else
1411 alloclen = datalen + fragheaderlen;
1412
1413 alloclen += dst_exthdrlen;
1414
1415 if (datalen != length + fraggap) {
1416 /*
1417 * this is not the last fragment, the trailer
1418 * space is regarded as data space.
1419 */
1420 datalen += rt->dst.trailer_len;
1421 }
1422
1423 alloclen += rt->dst.trailer_len;
1424 fraglen = datalen + fragheaderlen;
1425
1426 /*
1427 * We just reserve space for fragment header.
1428 * Note: this may be overallocation if the message
1429 * (without MSG_MORE) fits into the MTU.
1430 */
1431 alloclen += sizeof(struct frag_hdr);
1432
1433 if (transhdrlen) {
1434 skb = sock_alloc_send_skb(sk,
1435 alloclen + hh_len,
1436 (flags & MSG_DONTWAIT), &err);
1437 } else {
1438 skb = NULL;
1439 if (atomic_read(&sk->sk_wmem_alloc) <=
1440 2 * sk->sk_sndbuf)
1441 skb = sock_wmalloc(sk,
1442 alloclen + hh_len, 1,
1443 sk->sk_allocation);
1444 if (unlikely(skb == NULL))
1445 err = -ENOBUFS;
1446 else {
1447 /* Only the initial fragment
1448 * is time stamped.
1449 */
1450 tx_flags = 0;
1451 }
1452 }
1453 if (skb == NULL)
1454 goto error;
1455 /*
1456 * Fill in the control structures
1457 */
1458 skb->ip_summed = csummode;
1459 skb->csum = 0;
1460 /* reserve for fragmentation and ipsec header */
1461 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1462 dst_exthdrlen);
1463
1464 if (sk->sk_type == SOCK_DGRAM)
1465 skb_shinfo(skb)->tx_flags = tx_flags;
1466
1467 /*
1468 * Find where to start putting bytes
1469 */
1470 data = skb_put(skb, fraglen);
1471 skb_set_network_header(skb, exthdrlen);
1472 data += fragheaderlen;
1473 skb->transport_header = (skb->network_header +
1474 fragheaderlen);
1475 if (fraggap) {
1476 skb->csum = skb_copy_and_csum_bits(
1477 skb_prev, maxfraglen,
1478 data + transhdrlen, fraggap, 0);
1479 skb_prev->csum = csum_sub(skb_prev->csum,
1480 skb->csum);
1481 data += fraggap;
1482 pskb_trim_unique(skb_prev, maxfraglen);
1483 }
1484 copy = datalen - transhdrlen - fraggap;
1485
1486 if (copy < 0) {
1487 err = -EINVAL;
1488 kfree_skb(skb);
1489 goto error;
1490 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1491 err = -EFAULT;
1492 kfree_skb(skb);
1493 goto error;
1494 }
1495
1496 offset += copy;
1497 length -= datalen - fraggap;
1498 transhdrlen = 0;
1499 exthdrlen = 0;
1500 dst_exthdrlen = 0;
1501 csummode = CHECKSUM_NONE;
1502
1503 /*
1504 * Put the packet on the pending queue
1505 */
1506 __skb_queue_tail(&sk->sk_write_queue, skb);
1507 continue;
1508 }
1509
1510 if (copy > length)
1511 copy = length;
1512
1513 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1514 unsigned int off;
1515
1516 off = skb->len;
1517 if (getfrag(from, skb_put(skb, copy),
1518 offset, copy, off, skb) < 0) {
1519 __skb_trim(skb, off);
1520 err = -EFAULT;
1521 goto error;
1522 }
1523 } else {
1524 int i = skb_shinfo(skb)->nr_frags;
1525 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1526 struct page *page = sk->sk_sndmsg_page;
1527 int off = sk->sk_sndmsg_off;
1528 unsigned int left;
1529
1530 if (page && (left = PAGE_SIZE - off) > 0) {
1531 if (copy >= left)
1532 copy = left;
1533 if (page != skb_frag_page(frag)) {
1534 if (i == MAX_SKB_FRAGS) {
1535 err = -EMSGSIZE;
1536 goto error;
1537 }
1538 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1539 skb_frag_ref(skb, i);
1540 frag = &skb_shinfo(skb)->frags[i];
1541 }
1542 } else if(i < MAX_SKB_FRAGS) {
1543 if (copy > PAGE_SIZE)
1544 copy = PAGE_SIZE;
1545 page = alloc_pages(sk->sk_allocation, 0);
1546 if (page == NULL) {
1547 err = -ENOMEM;
1548 goto error;
1549 }
1550 netslab_inc(IP6_OUTPUT_ALLOC_PAGES);
1551 sk->sk_sndmsg_page = page;
1552 sk->sk_sndmsg_off = 0;
1553
1554 skb_fill_page_desc(skb, i, page, 0, 0);
1555 frag = &skb_shinfo(skb)->frags[i];
1556 } else {
1557 err = -EMSGSIZE;
1558 goto error;
1559 }
1560 if (getfrag(from,
1561 skb_frag_address(frag) + skb_frag_size(frag),
1562 offset, copy, skb->len, skb) < 0) {
1563 err = -EFAULT;
1564 goto error;
1565 }
1566 sk->sk_sndmsg_off += copy;
1567 skb_frag_size_add(frag, copy);
1568 skb->len += copy;
1569 skb->data_len += copy;
1570 skb->truesize += copy;
1571 atomic_add(copy, &sk->sk_wmem_alloc);
1572 }
1573 offset += copy;
1574 length -= copy;
1575 }
1576 return 0;
1577error:
1578 cork->length -= length;
1579 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1580 return err;
1581}
1582
1583extern int fast_local6_output_num;
1584static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1585{
1586 if (np->cork.opt) {
1587 kfree(np->cork.opt->dst0opt);
1588 kfree(np->cork.opt->dst1opt);
1589 kfree(np->cork.opt->hopopt);
1590 kfree(np->cork.opt->srcrt);
1591 kfree(np->cork.opt);
1592 np->cork.opt = NULL;
1593 }
1594
1595 if (inet->cork.base.dst) {
1596 dst_release(inet->cork.base.dst);
1597 inet->cork.base.dst = NULL;
1598 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1599 }
1600 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1601}
1602
1603int ip6_push_pending_frames(struct sock *sk)
1604{
1605 struct sk_buff *skb, *tmp_skb;
1606 struct sk_buff **tail_skb;
1607 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1608 struct inet_sock *inet = inet_sk(sk);
1609 struct ipv6_pinfo *np = inet6_sk(sk);
1610 struct net *net = sock_net(sk);
1611 struct ipv6hdr *hdr;
1612 struct ipv6_txoptions *opt = np->cork.opt;
1613 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1614 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1615 unsigned char proto = fl6->flowi6_proto;
1616 int err = 0;
1617
1618 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1619 goto out;
1620 tail_skb = &(skb_shinfo(skb)->frag_list);
1621
1622 /* move skb->data to ip header from ext header */
1623 if (skb->data < skb_network_header(skb))
1624 __skb_pull(skb, skb_network_offset(skb));
1625 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1626 __skb_pull(tmp_skb, skb_network_header_len(skb));
1627 *tail_skb = tmp_skb;
1628 tail_skb = &(tmp_skb->next);
1629 skb->len += tmp_skb->len;
1630 skb->data_len += tmp_skb->len;
1631 skb->truesize += tmp_skb->truesize;
1632 tmp_skb->destructor = NULL;
1633 tmp_skb->sk = NULL;
1634 }
1635
1636 /* Allow local fragmentation. */
1637 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1638 skb->local_df = 1;
1639
1640 *final_dst = fl6->daddr;
1641 __skb_pull(skb, skb_network_header_len(skb));
1642 if (opt && opt->opt_flen)
1643 ipv6_push_frag_opts(skb, opt, &proto);
1644 if (opt && opt->opt_nflen)
1645 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1646
1647 skb_push(skb, sizeof(struct ipv6hdr));
1648 skb_reset_network_header(skb);
1649 hdr = ipv6_hdr(skb);
1650
1651 *(__be32*)hdr = fl6->flowlabel |
1652 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1653
1654 hdr->hop_limit = np->cork.hop_limit;
1655 hdr->nexthdr = proto;
1656 hdr->saddr = fl6->saddr;
1657 hdr->daddr = *final_dst;
1658
1659 skb->priority = sk->sk_priority;
1660 skb->mark = sk->sk_mark;
1661
1662 skb_dst_set(skb, dst_clone(&rt->dst));
1663 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1664 if (proto == IPPROTO_ICMPV6) {
1665 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1666
1667 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1668 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1669 }
1670
1671 //Èç¹ûÒѾ­·¢Ëͳ¬¹ýãÐÖµÁË£¬Ö±½Ó¿ìËÙÌí¼ÓMACÍ·£¬Ìø¹ýËùÓеÄIP²ãHOOK¹³×Óº¯Êý
1672 if (fast_local6_output_proc && fast_local6_output_proc(skb))
1673 {
1674 fast_local6_output_num++;
1675 err = ip6_finish_output(skb);
1676 } else {
1677 err = ip6_local_out(skb);
1678 }
1679 if (err) {
1680 if (err > 0)
1681 err = net_xmit_errno(err);
1682 if (err)
1683 goto error;
1684 }
1685
1686out:
1687 ip6_cork_release(inet, np);
1688 return err;
1689error:
1690 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1691 goto out;
1692}
1693
1694void ip6_flush_pending_frames(struct sock *sk)
1695{
1696 struct sk_buff *skb;
1697
1698 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1699 if (skb_dst(skb))
1700 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1701 IPSTATS_MIB_OUTDISCARDS);
1702 kfree_skb(skb);
1703 }
1704
1705 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1706}