blob: ba9f774a9eb0771f3edc7ed23242bf6eb472d75b [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57
58static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59{
60 struct dst_entry *dst = skb_dst(skb);
61 struct net_device *dev = dst->dev;
62 struct inet6_dev *idev = ip6_dst_idev(dst);
63 unsigned int hh_len = LL_RESERVED_SPACE(dev);
64 const struct in6_addr *daddr, *nexthop;
65 struct ipv6hdr *hdr;
66 struct neighbour *neigh;
67 int ret;
68
69 /* Be paranoid, rather than too clever. */
70 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
71 /* Make sure idev stays alive */
72 rcu_read_lock();
73 skb = skb_expand_head(skb, hh_len);
74 if (!skb) {
75 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
76 rcu_read_unlock();
77 return -ENOMEM;
78 }
79 rcu_read_unlock();
80 }
81
82 hdr = ipv6_hdr(skb);
83 daddr = &hdr->daddr;
84 if (ipv6_addr_is_multicast(daddr)) {
85 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
86 ((mroute6_is_socket(net, skb) &&
87 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
88 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
89 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
90
91 /* Do not check for IFF_ALLMULTI; multicast routing
92 is not supported in any case.
93 */
94 if (newskb)
95 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
96 net, sk, newskb, NULL, newskb->dev,
97 dev_loopback_xmit);
98
99 if (hdr->hop_limit == 0) {
100 IP6_INC_STATS(net, idev,
101 IPSTATS_MIB_OUTDISCARDS);
102 kfree_skb(skb);
103 return 0;
104 }
105 }
106
107 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
108 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
109 !(dev->flags & IFF_LOOPBACK)) {
110 kfree_skb(skb);
111 return 0;
112 }
113 }
114
115 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
116 int res = lwtunnel_xmit(skb);
117
118 if (res != LWTUNNEL_XMIT_CONTINUE)
119 return res;
120 }
121
122 rcu_read_lock_bh();
123 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
124 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
125 if (unlikely(!neigh))
126 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
127 if (!IS_ERR(neigh)) {
128 sock_confirm_neigh(skb, neigh);
129 ret = neigh_output(neigh, skb, false);
130 rcu_read_unlock_bh();
131 return ret;
132 }
133 rcu_read_unlock_bh();
134
135 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
136 kfree_skb(skb);
137 return -EINVAL;
138}
139
140static int
141ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
142 struct sk_buff *skb, unsigned int mtu)
143{
144 struct sk_buff *segs, *nskb;
145 netdev_features_t features;
146 int ret = 0;
147
148 /* Please see corresponding comment in ip_finish_output_gso
149 * describing the cases where GSO segment length exceeds the
150 * egress MTU.
151 */
152 features = netif_skb_features(skb);
153 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
154 if (IS_ERR_OR_NULL(segs)) {
155 kfree_skb(skb);
156 return -ENOMEM;
157 }
158
159 consume_skb(skb);
160
161 skb_list_walk_safe(segs, segs, nskb) {
162 int err;
163
164 skb_mark_not_on_list(segs);
165 /* Last GSO segment can be smaller than gso_size (and MTU).
166 * Adding a fragment header would produce an "atomic fragment",
167 * which is considered harmful (RFC-8021). Avoid that.
168 */
169 err = segs->len > mtu ?
170 ip6_fragment(net, sk, segs, ip6_finish_output2) :
171 ip6_finish_output2(net, sk, segs);
172 if (err && ret == 0)
173 ret = err;
174 }
175
176 return ret;
177}
178
179static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
180{
181 unsigned int mtu;
182
183#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
184 /* Policy lookup after SNAT yielded a new policy */
185 if (skb_dst(skb)->xfrm) {
186 IP6CB(skb)->flags |= IP6SKB_REROUTED;
187 return dst_output(net, sk, skb);
188 }
189#endif
190
191 mtu = ip6_skb_dst_mtu(skb);
192 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
193 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
194
195 if ((skb->len > mtu && !skb_is_gso(skb)) ||
196 dst_allfrag(skb_dst(skb)) ||
197 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
198 return ip6_fragment(net, sk, skb, ip6_finish_output2);
199 else
200 return ip6_finish_output2(net, sk, skb);
201}
202
203static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
204{
205 int ret;
206
207 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
208 switch (ret) {
209 case NET_XMIT_SUCCESS:
210 return __ip6_finish_output(net, sk, skb);
211 case NET_XMIT_CN:
212 return __ip6_finish_output(net, sk, skb) ? : ret;
213 default:
214 kfree_skb(skb);
215 return ret;
216 }
217}
218
219int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
220{
221 struct net_device *dev = skb_dst(skb)->dev;
222 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
223
224 skb->protocol = htons(ETH_P_IPV6);
225 skb->dev = dev;
226
227 if (unlikely(idev->cnf.disable_ipv6)) {
228 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
229 kfree_skb(skb);
230 return 0;
231 }
232
233 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
234 net, sk, skb, NULL, dev,
235 ip6_finish_output,
236 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
237}
238
239bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
240{
241 if (!np->autoflowlabel_set)
242 return ip6_default_np_autolabel(net);
243 else
244 return np->autoflowlabel;
245}
246
247/*
248 * xmit an sk_buff (used by TCP, SCTP and DCCP)
249 * Note : socket lock is not held for SYNACK packets, but might be modified
250 * by calls to skb_set_owner_w() and ipv6_local_error(),
251 * which are using proper atomic operations or spinlocks.
252 */
253int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
254 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
255{
256 struct net *net = sock_net(sk);
257 const struct ipv6_pinfo *np = inet6_sk(sk);
258 struct in6_addr *first_hop = &fl6->daddr;
259 struct dst_entry *dst = skb_dst(skb);
260 struct net_device *dev = dst->dev;
261 struct inet6_dev *idev = ip6_dst_idev(dst);
262 unsigned int head_room;
263 struct ipv6hdr *hdr;
264 u8 proto = fl6->flowi6_proto;
265 int seg_len = skb->len;
266 int hlimit = -1;
267 u32 mtu;
268
269 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
270 if (opt)
271 head_room += opt->opt_nflen + opt->opt_flen;
272
273 if (unlikely(head_room > skb_headroom(skb))) {
274 /* Make sure idev stays alive */
275 rcu_read_lock();
276 skb = skb_expand_head(skb, head_room);
277 if (!skb) {
278 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
279 rcu_read_unlock();
280 return -ENOBUFS;
281 }
282 rcu_read_unlock();
283 }
284
285 if (opt) {
286 seg_len += opt->opt_nflen + opt->opt_flen;
287
288 if (opt->opt_flen)
289 ipv6_push_frag_opts(skb, opt, &proto);
290
291 if (opt->opt_nflen)
292 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
293 &fl6->saddr);
294 }
295
296 skb_push(skb, sizeof(struct ipv6hdr));
297 skb_reset_network_header(skb);
298 hdr = ipv6_hdr(skb);
299
300 /*
301 * Fill in the IPv6 header
302 */
303 if (np)
304 hlimit = np->hop_limit;
305 if (hlimit < 0)
306 hlimit = ip6_dst_hoplimit(dst);
307
308 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
309 ip6_autoflowlabel(net, np), fl6));
310
311 hdr->payload_len = htons(seg_len);
312 hdr->nexthdr = proto;
313 hdr->hop_limit = hlimit;
314
315 hdr->saddr = fl6->saddr;
316 hdr->daddr = *first_hop;
317
318 skb->protocol = htons(ETH_P_IPV6);
319 skb->priority = priority;
320 skb->mark = mark;
321
322 mtu = dst_mtu(dst);
323 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
324 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
325
326 /* if egress device is enslaved to an L3 master device pass the
327 * skb to its handler for processing
328 */
329 skb = l3mdev_ip6_out((struct sock *)sk, skb);
330 if (unlikely(!skb))
331 return 0;
332
333 /* hooks should never assume socket lock is held.
334 * we promote our socket to non const
335 */
336 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
337 net, (struct sock *)sk, skb, NULL, dev,
338 dst_output);
339 }
340
341 skb->dev = dev;
342 /* ipv6_local_error() does not require socket lock,
343 * we promote our socket to non const
344 */
345 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
346
347 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
348 kfree_skb(skb);
349 return -EMSGSIZE;
350}
351EXPORT_SYMBOL(ip6_xmit);
352
353static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
354{
355 struct ip6_ra_chain *ra;
356 struct sock *last = NULL;
357
358 read_lock(&ip6_ra_lock);
359 for (ra = ip6_ra_chain; ra; ra = ra->next) {
360 struct sock *sk = ra->sk;
361 if (sk && ra->sel == sel &&
362 (!sk->sk_bound_dev_if ||
363 sk->sk_bound_dev_if == skb->dev->ifindex)) {
364 struct ipv6_pinfo *np = inet6_sk(sk);
365
366 if (np && np->rtalert_isolate &&
367 !net_eq(sock_net(sk), dev_net(skb->dev))) {
368 continue;
369 }
370 if (last) {
371 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
372 if (skb2)
373 rawv6_rcv(last, skb2);
374 }
375 last = sk;
376 }
377 }
378
379 if (last) {
380 rawv6_rcv(last, skb);
381 read_unlock(&ip6_ra_lock);
382 return 1;
383 }
384 read_unlock(&ip6_ra_lock);
385 return 0;
386}
387
388static int ip6_forward_proxy_check(struct sk_buff *skb)
389{
390 struct ipv6hdr *hdr = ipv6_hdr(skb);
391 u8 nexthdr = hdr->nexthdr;
392 __be16 frag_off;
393 int offset;
394
395 if (ipv6_ext_hdr(nexthdr)) {
396 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
397 if (offset < 0)
398 return 0;
399 } else
400 offset = sizeof(struct ipv6hdr);
401
402 if (nexthdr == IPPROTO_ICMPV6) {
403 struct icmp6hdr *icmp6;
404
405 if (!pskb_may_pull(skb, (skb_network_header(skb) +
406 offset + 1 - skb->data)))
407 return 0;
408
409 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
410
411 switch (icmp6->icmp6_type) {
412 case NDISC_ROUTER_SOLICITATION:
413 case NDISC_ROUTER_ADVERTISEMENT:
414 case NDISC_NEIGHBOUR_SOLICITATION:
415 case NDISC_NEIGHBOUR_ADVERTISEMENT:
416 case NDISC_REDIRECT:
417 /* For reaction involving unicast neighbor discovery
418 * message destined to the proxied address, pass it to
419 * input function.
420 */
421 return 1;
422 default:
423 break;
424 }
425 }
426
427 /*
428 * The proxying router can't forward traffic sent to a link-local
429 * address, so signal the sender and discard the packet. This
430 * behavior is clarified by the MIPv6 specification.
431 */
432 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
433 dst_link_failure(skb);
434 return -1;
435 }
436
437 return 0;
438}
439
440static inline int ip6_forward_finish(struct net *net, struct sock *sk,
441 struct sk_buff *skb)
442{
443 struct dst_entry *dst = skb_dst(skb);
444
445 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
446 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
447
448#ifdef CONFIG_NET_SWITCHDEV
449 if (skb->offload_l3_fwd_mark) {
450 consume_skb(skb);
451 return 0;
452 }
453#endif
454
455 skb->tstamp = 0;
456 return dst_output(net, sk, skb);
457}
458
459static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
460{
461 if (skb->len <= mtu)
462 return false;
463
464 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
465 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
466 return true;
467
468 if (skb->ignore_df)
469 return false;
470
471 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
472 return false;
473
474 return true;
475}
476
477int ip6_forward(struct sk_buff *skb)
478{
479 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
480 struct dst_entry *dst = skb_dst(skb);
481 struct ipv6hdr *hdr = ipv6_hdr(skb);
482 struct inet6_skb_parm *opt = IP6CB(skb);
483 struct net *net = dev_net(dst->dev);
484 u32 mtu;
485
486 if (net->ipv6.devconf_all->forwarding == 0)
487 goto error;
488
489 if (skb->pkt_type != PACKET_HOST)
490 goto drop;
491
492 if (unlikely(skb->sk))
493 goto drop;
494
495 if (skb_warn_if_lro(skb))
496 goto drop;
497
498 if (!net->ipv6.devconf_all->disable_policy &&
499 (!idev || !idev->cnf.disable_policy) &&
500 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
501 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
502 goto drop;
503 }
504
505 skb_forward_csum(skb);
506
507 /*
508 * We DO NOT make any processing on
509 * RA packets, pushing them to user level AS IS
510 * without ane WARRANTY that application will be able
511 * to interpret them. The reason is that we
512 * cannot make anything clever here.
513 *
514 * We are not end-node, so that if packet contains
515 * AH/ESP, we cannot make anything.
516 * Defragmentation also would be mistake, RA packets
517 * cannot be fragmented, because there is no warranty
518 * that different fragments will go along one path. --ANK
519 */
520 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
521 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
522 return 0;
523 }
524
525 /*
526 * check and decrement ttl
527 */
528 if (hdr->hop_limit <= 1) {
529 /* Force OUTPUT device used as source address */
530 skb->dev = dst->dev;
531 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
532 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
533
534 kfree_skb(skb);
535 return -ETIMEDOUT;
536 }
537
538 /* XXX: idev->cnf.proxy_ndp? */
539 if (net->ipv6.devconf_all->proxy_ndp &&
540 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
541 int proxied = ip6_forward_proxy_check(skb);
542 if (proxied > 0)
543 return ip6_input(skb);
544 else if (proxied < 0) {
545 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
546 goto drop;
547 }
548 }
549
550 if (!xfrm6_route_forward(skb)) {
551 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
552 goto drop;
553 }
554 dst = skb_dst(skb);
555
556 /* IPv6 specs say nothing about it, but it is clear that we cannot
557 send redirects to source routed frames.
558 We don't send redirects to frames decapsulated from IPsec.
559 */
560 if (IP6CB(skb)->iif == dst->dev->ifindex &&
561 opt->srcrt == 0 && !skb_sec_path(skb)) {
562 struct in6_addr *target = NULL;
563 struct inet_peer *peer;
564 struct rt6_info *rt;
565
566 /*
567 * incoming and outgoing devices are the same
568 * send a redirect.
569 */
570
571 rt = (struct rt6_info *) dst;
572 if (rt->rt6i_flags & RTF_GATEWAY)
573 target = &rt->rt6i_gateway;
574 else
575 target = &hdr->daddr;
576
577 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
578
579 /* Limit redirects both by destination (here)
580 and by source (inside ndisc_send_redirect)
581 */
582 if (inet_peer_xrlim_allow(peer, 1*HZ))
583 ndisc_send_redirect(skb, target);
584 if (peer)
585 inet_putpeer(peer);
586 } else {
587 int addrtype = ipv6_addr_type(&hdr->saddr);
588
589 /* This check is security critical. */
590 if (addrtype == IPV6_ADDR_ANY ||
591 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
592 goto error;
593 if (addrtype & IPV6_ADDR_LINKLOCAL) {
594 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
595 ICMPV6_NOT_NEIGHBOUR, 0);
596 goto error;
597 }
598 }
599
600 mtu = ip6_dst_mtu_forward(dst);
601 if (mtu < IPV6_MIN_MTU)
602 mtu = IPV6_MIN_MTU;
603
604 if (ip6_pkt_too_big(skb, mtu)) {
605 /* Again, force OUTPUT device used as source address */
606 skb->dev = dst->dev;
607 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
608 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
609 __IP6_INC_STATS(net, ip6_dst_idev(dst),
610 IPSTATS_MIB_FRAGFAILS);
611 kfree_skb(skb);
612 return -EMSGSIZE;
613 }
614
615 if (skb_cow(skb, dst->dev->hard_header_len)) {
616 __IP6_INC_STATS(net, ip6_dst_idev(dst),
617 IPSTATS_MIB_OUTDISCARDS);
618 goto drop;
619 }
620
621 hdr = ipv6_hdr(skb);
622
623 /* Mangling hops number delayed to point after skb COW */
624
625 hdr->hop_limit--;
626
627 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
628 net, NULL, skb, skb->dev, dst->dev,
629 ip6_forward_finish);
630
631error:
632 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
633drop:
634 kfree_skb(skb);
635 return -EINVAL;
636}
637
638static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
639{
640 to->pkt_type = from->pkt_type;
641 to->priority = from->priority;
642 to->protocol = from->protocol;
643 skb_dst_drop(to);
644 skb_dst_set(to, dst_clone(skb_dst(from)));
645 to->dev = from->dev;
646 to->mark = from->mark;
647
648 skb_copy_hash(to, from);
649
650#ifdef CONFIG_NET_SCHED
651 to->tc_index = from->tc_index;
652#endif
653 nf_copy(to, from);
654 skb_ext_copy(to, from);
655 skb_copy_secmark(to, from);
656}
657
658int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
659 u8 nexthdr, __be32 frag_id,
660 struct ip6_fraglist_iter *iter)
661{
662 unsigned int first_len;
663 struct frag_hdr *fh;
664
665 /* BUILD HEADER */
666 *prevhdr = NEXTHDR_FRAGMENT;
667 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
668 if (!iter->tmp_hdr)
669 return -ENOMEM;
670
671 iter->frag = skb_shinfo(skb)->frag_list;
672 skb_frag_list_init(skb);
673
674 iter->offset = 0;
675 iter->hlen = hlen;
676 iter->frag_id = frag_id;
677 iter->nexthdr = nexthdr;
678
679 __skb_pull(skb, hlen);
680 fh = __skb_push(skb, sizeof(struct frag_hdr));
681 __skb_push(skb, hlen);
682 skb_reset_network_header(skb);
683 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
684
685 fh->nexthdr = nexthdr;
686 fh->reserved = 0;
687 fh->frag_off = htons(IP6_MF);
688 fh->identification = frag_id;
689
690 first_len = skb_pagelen(skb);
691 skb->data_len = first_len - skb_headlen(skb);
692 skb->len = first_len;
693 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
694
695 return 0;
696}
697EXPORT_SYMBOL(ip6_fraglist_init);
698
699void ip6_fraglist_prepare(struct sk_buff *skb,
700 struct ip6_fraglist_iter *iter)
701{
702 struct sk_buff *frag = iter->frag;
703 unsigned int hlen = iter->hlen;
704 struct frag_hdr *fh;
705
706 frag->ip_summed = CHECKSUM_NONE;
707 skb_reset_transport_header(frag);
708 fh = __skb_push(frag, sizeof(struct frag_hdr));
709 __skb_push(frag, hlen);
710 skb_reset_network_header(frag);
711 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
712 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
713 fh->nexthdr = iter->nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(iter->offset);
716 if (frag->next)
717 fh->frag_off |= htons(IP6_MF);
718 fh->identification = iter->frag_id;
719 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
720 ip6_copy_metadata(frag, skb);
721}
722EXPORT_SYMBOL(ip6_fraglist_prepare);
723
724void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
725 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
726 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
727{
728 state->prevhdr = prevhdr;
729 state->nexthdr = nexthdr;
730 state->frag_id = frag_id;
731
732 state->hlen = hlen;
733 state->mtu = mtu;
734
735 state->left = skb->len - hlen; /* Space per frame */
736 state->ptr = hlen; /* Where to start from */
737
738 state->hroom = hdr_room;
739 state->troom = needed_tailroom;
740
741 state->offset = 0;
742}
743EXPORT_SYMBOL(ip6_frag_init);
744
745struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
746{
747 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
748 struct sk_buff *frag;
749 struct frag_hdr *fh;
750 unsigned int len;
751
752 len = state->left;
753 /* IF: it doesn't fit, use 'mtu' - the data space left */
754 if (len > state->mtu)
755 len = state->mtu;
756 /* IF: we are not sending up to and including the packet end
757 then align the next start on an eight byte boundary */
758 if (len < state->left)
759 len &= ~7;
760
761 /* Allocate buffer */
762 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
763 state->hroom + state->troom, GFP_ATOMIC);
764 if (!frag)
765 return ERR_PTR(-ENOMEM);
766
767 /*
768 * Set up data on packet
769 */
770
771 ip6_copy_metadata(frag, skb);
772 skb_reserve(frag, state->hroom);
773 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
774 skb_reset_network_header(frag);
775 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
776 frag->transport_header = (frag->network_header + state->hlen +
777 sizeof(struct frag_hdr));
778
779 /*
780 * Charge the memory for the fragment to any owner
781 * it might possess
782 */
783 if (skb->sk)
784 skb_set_owner_w(frag, skb->sk);
785
786 /*
787 * Copy the packet header into the new buffer.
788 */
789 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
790
791 fragnexthdr_offset = skb_network_header(frag);
792 fragnexthdr_offset += prevhdr - skb_network_header(skb);
793 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
794
795 /*
796 * Build fragment header.
797 */
798 fh->nexthdr = state->nexthdr;
799 fh->reserved = 0;
800 fh->identification = state->frag_id;
801
802 /*
803 * Copy a block of the IP datagram.
804 */
805 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
806 len));
807 state->left -= len;
808
809 fh->frag_off = htons(state->offset);
810 if (state->left > 0)
811 fh->frag_off |= htons(IP6_MF);
812 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
813
814 state->ptr += len;
815 state->offset += len;
816
817 return frag;
818}
819EXPORT_SYMBOL(ip6_frag_next);
820
821int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
822 int (*output)(struct net *, struct sock *, struct sk_buff *))
823{
824 struct sk_buff *frag;
825 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
826 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
827 inet6_sk(skb->sk) : NULL;
828 struct ip6_frag_state state;
829 unsigned int mtu, hlen, nexthdr_offset;
830 ktime_t tstamp = skb->tstamp;
831 int hroom, err = 0;
832 __be32 frag_id;
833 u8 *prevhdr, nexthdr = 0;
834
835 err = ip6_find_1stfragopt(skb, &prevhdr);
836 if (err < 0)
837 goto fail;
838 hlen = err;
839 nexthdr = *prevhdr;
840 nexthdr_offset = prevhdr - skb_network_header(skb);
841
842 mtu = ip6_skb_dst_mtu(skb);
843
844 /* We must not fragment if the socket is set to force MTU discovery
845 * or if the skb it not generated by a local socket.
846 */
847 if (unlikely(!skb->ignore_df && skb->len > mtu))
848 goto fail_toobig;
849
850 if (IP6CB(skb)->frag_max_size) {
851 if (IP6CB(skb)->frag_max_size > mtu)
852 goto fail_toobig;
853
854 /* don't send fragments larger than what we received */
855 mtu = IP6CB(skb)->frag_max_size;
856 if (mtu < IPV6_MIN_MTU)
857 mtu = IPV6_MIN_MTU;
858 }
859
860 if (np && np->frag_size < mtu) {
861 if (np->frag_size)
862 mtu = np->frag_size;
863 }
864 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
865 goto fail_toobig;
866 mtu -= hlen + sizeof(struct frag_hdr);
867
868 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
869 &ipv6_hdr(skb)->saddr);
870
871 if (skb->ip_summed == CHECKSUM_PARTIAL &&
872 (err = skb_checksum_help(skb)))
873 goto fail;
874
875 prevhdr = skb_network_header(skb) + nexthdr_offset;
876 hroom = LL_RESERVED_SPACE(rt->dst.dev);
877 if (skb_has_frag_list(skb)) {
878 unsigned int first_len = skb_pagelen(skb);
879 struct ip6_fraglist_iter iter;
880 struct sk_buff *frag2;
881
882 if (first_len - hlen > mtu ||
883 ((first_len - hlen) & 7) ||
884 skb_cloned(skb) ||
885 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
886 goto slow_path;
887
888 skb_walk_frags(skb, frag) {
889 /* Correct geometry. */
890 if (frag->len > mtu ||
891 ((frag->len & 7) && frag->next) ||
892 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
893 goto slow_path_clean;
894
895 /* Partially cloned skb? */
896 if (skb_shared(frag))
897 goto slow_path_clean;
898
899 BUG_ON(frag->sk);
900 if (skb->sk) {
901 frag->sk = skb->sk;
902 frag->destructor = sock_wfree;
903 }
904 skb->truesize -= frag->truesize;
905 }
906
907 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
908 &iter);
909 if (err < 0)
910 goto fail;
911
912 /* We prevent @rt from being freed. */
913 rcu_read_lock();
914
915 for (;;) {
916 /* Prepare header of the next frame,
917 * before previous one went down. */
918 if (iter.frag)
919 ip6_fraglist_prepare(skb, &iter);
920
921 skb->tstamp = tstamp;
922 err = output(net, sk, skb);
923 if (!err)
924 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
925 IPSTATS_MIB_FRAGCREATES);
926
927 if (err || !iter.frag)
928 break;
929
930 skb = ip6_fraglist_next(&iter);
931 }
932
933 kfree(iter.tmp_hdr);
934
935 if (err == 0) {
936 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
937 IPSTATS_MIB_FRAGOKS);
938 rcu_read_unlock();
939 return 0;
940 }
941
942 kfree_skb_list(iter.frag);
943
944 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
945 IPSTATS_MIB_FRAGFAILS);
946 rcu_read_unlock();
947 return err;
948
949slow_path_clean:
950 skb_walk_frags(skb, frag2) {
951 if (frag2 == frag)
952 break;
953 frag2->sk = NULL;
954 frag2->destructor = NULL;
955 skb->truesize += frag2->truesize;
956 }
957 }
958
959slow_path:
960 /*
961 * Fragment the datagram.
962 */
963
964 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
965 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
966 &state);
967
968 /*
969 * Keep copying data until we run out.
970 */
971
972 while (state.left > 0) {
973 frag = ip6_frag_next(skb, &state);
974 if (IS_ERR(frag)) {
975 err = PTR_ERR(frag);
976 goto fail;
977 }
978
979 /*
980 * Put this fragment into the sending queue.
981 */
982 frag->tstamp = tstamp;
983 err = output(net, sk, frag);
984 if (err)
985 goto fail;
986
987 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
988 IPSTATS_MIB_FRAGCREATES);
989 }
990 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
991 IPSTATS_MIB_FRAGOKS);
992 consume_skb(skb);
993 return err;
994
995fail_toobig:
996 if (skb->sk && dst_allfrag(skb_dst(skb)))
997 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
998
999 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1000 err = -EMSGSIZE;
1001
1002fail:
1003 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1004 IPSTATS_MIB_FRAGFAILS);
1005 kfree_skb(skb);
1006 return err;
1007}
1008
1009static inline int ip6_rt_check(const struct rt6key *rt_key,
1010 const struct in6_addr *fl_addr,
1011 const struct in6_addr *addr_cache)
1012{
1013 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1014 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1015}
1016
1017static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1018 struct dst_entry *dst,
1019 const struct flowi6 *fl6)
1020{
1021 struct ipv6_pinfo *np = inet6_sk(sk);
1022 struct rt6_info *rt;
1023
1024 if (!dst)
1025 goto out;
1026
1027 if (dst->ops->family != AF_INET6) {
1028 dst_release(dst);
1029 return NULL;
1030 }
1031
1032 rt = (struct rt6_info *)dst;
1033 /* Yes, checking route validity in not connected
1034 * case is not very simple. Take into account,
1035 * that we do not support routing by source, TOS,
1036 * and MSG_DONTROUTE --ANK (980726)
1037 *
1038 * 1. ip6_rt_check(): If route was host route,
1039 * check that cached destination is current.
1040 * If it is network route, we still may
1041 * check its validity using saved pointer
1042 * to the last used address: daddr_cache.
1043 * We do not want to save whole address now,
1044 * (because main consumer of this service
1045 * is tcp, which has not this problem),
1046 * so that the last trick works only on connected
1047 * sockets.
1048 * 2. oif also should be the same.
1049 */
1050 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1051#ifdef CONFIG_IPV6_SUBTREES
1052 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1053#endif
1054 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1055 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1056 dst_release(dst);
1057 dst = NULL;
1058 }
1059
1060out:
1061 return dst;
1062}
1063
1064static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1065 struct dst_entry **dst, struct flowi6 *fl6)
1066{
1067#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1068 struct neighbour *n;
1069 struct rt6_info *rt;
1070#endif
1071 int err;
1072 int flags = 0;
1073
1074 /* The correct way to handle this would be to do
1075 * ip6_route_get_saddr, and then ip6_route_output; however,
1076 * the route-specific preferred source forces the
1077 * ip6_route_output call _before_ ip6_route_get_saddr.
1078 *
1079 * In source specific routing (no src=any default route),
1080 * ip6_route_output will fail given src=any saddr, though, so
1081 * that's why we try it again later.
1082 */
1083 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1084 struct fib6_info *from;
1085 struct rt6_info *rt;
1086 bool had_dst = *dst != NULL;
1087
1088 if (!had_dst)
1089 *dst = ip6_route_output(net, sk, fl6);
1090 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1091
1092 rcu_read_lock();
1093 from = rt ? rcu_dereference(rt->from) : NULL;
1094 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1095 sk ? inet6_sk(sk)->srcprefs : 0,
1096 &fl6->saddr);
1097 rcu_read_unlock();
1098
1099 if (err)
1100 goto out_err_release;
1101
1102 /* If we had an erroneous initial result, pretend it
1103 * never existed and let the SA-enabled version take
1104 * over.
1105 */
1106 if (!had_dst && (*dst)->error) {
1107 dst_release(*dst);
1108 *dst = NULL;
1109 }
1110
1111 if (fl6->flowi6_oif)
1112 flags |= RT6_LOOKUP_F_IFACE;
1113 }
1114
1115 if (!*dst)
1116 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1117
1118 err = (*dst)->error;
1119 if (err)
1120 goto out_err_release;
1121
1122#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1123 /*
1124 * Here if the dst entry we've looked up
1125 * has a neighbour entry that is in the INCOMPLETE
1126 * state and the src address from the flow is
1127 * marked as OPTIMISTIC, we release the found
1128 * dst entry and replace it instead with the
1129 * dst entry of the nexthop router
1130 */
1131 rt = (struct rt6_info *) *dst;
1132 rcu_read_lock_bh();
1133 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1134 rt6_nexthop(rt, &fl6->daddr));
1135 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1136 rcu_read_unlock_bh();
1137
1138 if (err) {
1139 struct inet6_ifaddr *ifp;
1140 struct flowi6 fl_gw6;
1141 int redirect;
1142
1143 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1144 (*dst)->dev, 1);
1145
1146 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1147 if (ifp)
1148 in6_ifa_put(ifp);
1149
1150 if (redirect) {
1151 /*
1152 * We need to get the dst entry for the
1153 * default router instead
1154 */
1155 dst_release(*dst);
1156 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1157 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1158 *dst = ip6_route_output(net, sk, &fl_gw6);
1159 err = (*dst)->error;
1160 if (err)
1161 goto out_err_release;
1162 }
1163 }
1164#endif
1165 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1166 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1167 err = -EAFNOSUPPORT;
1168 goto out_err_release;
1169 }
1170
1171 return 0;
1172
1173out_err_release:
1174 dst_release(*dst);
1175 *dst = NULL;
1176
1177 if (err == -ENETUNREACH)
1178 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1179 return err;
1180}
1181
1182/**
1183 * ip6_dst_lookup - perform route lookup on flow
1184 * @sk: socket which provides route info
1185 * @dst: pointer to dst_entry * for result
1186 * @fl6: flow to lookup
1187 *
1188 * This function performs a route lookup on the given flow.
1189 *
1190 * It returns zero on success, or a standard errno code on error.
1191 */
1192int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1193 struct flowi6 *fl6)
1194{
1195 *dst = NULL;
1196 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1197}
1198EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1199
1200/**
1201 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1202 * @sk: socket which provides route info
1203 * @fl6: flow to lookup
1204 * @final_dst: final destination address for ipsec lookup
1205 *
1206 * This function performs a route lookup on the given flow.
1207 *
1208 * It returns a valid dst pointer on success, or a pointer encoded
1209 * error code.
1210 */
1211struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1212 const struct in6_addr *final_dst)
1213{
1214 struct dst_entry *dst = NULL;
1215 int err;
1216
1217 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1218 if (err)
1219 return ERR_PTR(err);
1220 if (final_dst)
1221 fl6->daddr = *final_dst;
1222
1223 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1224}
1225EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1226
1227/**
1228 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1229 * @sk: socket which provides the dst cache and route info
1230 * @fl6: flow to lookup
1231 * @final_dst: final destination address for ipsec lookup
1232 * @connected: whether @sk is connected or not
1233 *
1234 * This function performs a route lookup on the given flow with the
1235 * possibility of using the cached route in the socket if it is valid.
1236 * It will take the socket dst lock when operating on the dst cache.
1237 * As a result, this function can only be used in process context.
1238 *
1239 * In addition, for a connected socket, cache the dst in the socket
1240 * if the current cache is not valid.
1241 *
1242 * It returns a valid dst pointer on success, or a pointer encoded
1243 * error code.
1244 */
1245struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1246 const struct in6_addr *final_dst,
1247 bool connected)
1248{
1249 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1250
1251 dst = ip6_sk_dst_check(sk, dst, fl6);
1252 if (dst)
1253 return dst;
1254
1255 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1256 if (connected && !IS_ERR(dst))
1257 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1258
1259 return dst;
1260}
1261EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1262
1263static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1264 gfp_t gfp)
1265{
1266 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1267}
1268
1269static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1270 gfp_t gfp)
1271{
1272 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273}
1274
1275static void ip6_append_data_mtu(unsigned int *mtu,
1276 int *maxfraglen,
1277 unsigned int fragheaderlen,
1278 struct sk_buff *skb,
1279 struct rt6_info *rt,
1280 unsigned int orig_mtu)
1281{
1282 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1283 if (!skb) {
1284 /* first fragment, reserve header_len */
1285 *mtu = orig_mtu - rt->dst.header_len;
1286
1287 } else {
1288 /*
1289 * this fragment is not first, the headers
1290 * space is regarded as data space.
1291 */
1292 *mtu = orig_mtu;
1293 }
1294 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1295 + fragheaderlen - sizeof(struct frag_hdr);
1296 }
1297}
1298
1299static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1300 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1301 struct rt6_info *rt, struct flowi6 *fl6)
1302{
1303 struct ipv6_pinfo *np = inet6_sk(sk);
1304 unsigned int mtu;
1305 struct ipv6_txoptions *opt = ipc6->opt;
1306
1307 /*
1308 * setup for corking
1309 */
1310 if (opt) {
1311 if (WARN_ON(v6_cork->opt))
1312 return -EINVAL;
1313
1314 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1315 if (unlikely(!v6_cork->opt))
1316 return -ENOBUFS;
1317
1318 v6_cork->opt->tot_len = sizeof(*opt);
1319 v6_cork->opt->opt_flen = opt->opt_flen;
1320 v6_cork->opt->opt_nflen = opt->opt_nflen;
1321
1322 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1323 sk->sk_allocation);
1324 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1325 return -ENOBUFS;
1326
1327 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1328 sk->sk_allocation);
1329 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1330 return -ENOBUFS;
1331
1332 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1333 sk->sk_allocation);
1334 if (opt->hopopt && !v6_cork->opt->hopopt)
1335 return -ENOBUFS;
1336
1337 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1338 sk->sk_allocation);
1339 if (opt->srcrt && !v6_cork->opt->srcrt)
1340 return -ENOBUFS;
1341
1342 /* need source address above miyazawa*/
1343 }
1344 dst_hold(&rt->dst);
1345 cork->base.dst = &rt->dst;
1346 cork->fl.u.ip6 = *fl6;
1347 v6_cork->hop_limit = ipc6->hlimit;
1348 v6_cork->tclass = ipc6->tclass;
1349 if (rt->dst.flags & DST_XFRM_TUNNEL)
1350 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1351 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1352 else
1353 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1354 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1355 if (np->frag_size < mtu) {
1356 if (np->frag_size)
1357 mtu = np->frag_size;
1358 }
1359 cork->base.fragsize = mtu;
1360 cork->base.gso_size = ipc6->gso_size;
1361 cork->base.tx_flags = 0;
1362 cork->base.mark = ipc6->sockc.mark;
1363 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1364
1365 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1366 cork->base.flags |= IPCORK_ALLFRAG;
1367 cork->base.length = 0;
1368
1369 cork->base.transmit_time = ipc6->sockc.transmit_time;
1370
1371 return 0;
1372}
1373
1374static int __ip6_append_data(struct sock *sk,
1375 struct flowi6 *fl6,
1376 struct sk_buff_head *queue,
1377 struct inet_cork *cork,
1378 struct inet6_cork *v6_cork,
1379 struct page_frag *pfrag,
1380 int getfrag(void *from, char *to, int offset,
1381 int len, int odd, struct sk_buff *skb),
1382 void *from, int length, int transhdrlen,
1383 unsigned int flags, struct ipcm6_cookie *ipc6)
1384{
1385 struct sk_buff *skb, *skb_prev = NULL;
1386 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1387 struct ubuf_info *uarg = NULL;
1388 int exthdrlen = 0;
1389 int dst_exthdrlen = 0;
1390 int hh_len;
1391 int copy;
1392 int err;
1393 int offset = 0;
1394 u32 tskey = 0;
1395 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1396 struct ipv6_txoptions *opt = v6_cork->opt;
1397 int csummode = CHECKSUM_NONE;
1398 unsigned int maxnonfragsize, headersize;
1399 unsigned int wmem_alloc_delta = 0;
1400 bool paged, extra_uref = false;
1401
1402 skb = skb_peek_tail(queue);
1403 if (!skb) {
1404 exthdrlen = opt ? opt->opt_flen : 0;
1405 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1406 }
1407
1408 paged = !!cork->gso_size;
1409 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1410 orig_mtu = mtu;
1411
1412 if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1413 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1414 tskey = sk->sk_tskey++;
1415
1416 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1417
1418 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1419 (opt ? opt->opt_nflen : 0);
1420
1421 headersize = sizeof(struct ipv6hdr) +
1422 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1423 (dst_allfrag(&rt->dst) ?
1424 sizeof(struct frag_hdr) : 0) +
1425 rt->rt6i_nfheader_len;
1426
1427 if (mtu <= fragheaderlen ||
1428 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1429 goto emsgsize;
1430
1431 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1432 sizeof(struct frag_hdr);
1433
1434 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1435 * the first fragment
1436 */
1437 if (headersize + transhdrlen > mtu)
1438 goto emsgsize;
1439
1440 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1441 (sk->sk_protocol == IPPROTO_UDP ||
1442 sk->sk_protocol == IPPROTO_RAW)) {
1443 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1444 sizeof(struct ipv6hdr));
1445 goto emsgsize;
1446 }
1447
1448 if (ip6_sk_ignore_df(sk))
1449 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1450 else
1451 maxnonfragsize = mtu;
1452
1453 if (cork->length + length > maxnonfragsize - headersize) {
1454emsgsize:
1455 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1456 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1457 return -EMSGSIZE;
1458 }
1459
1460 /* CHECKSUM_PARTIAL only with no extension headers and when
1461 * we are not going to fragment
1462 */
1463 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1464 headersize == sizeof(struct ipv6hdr) &&
1465 length <= mtu - headersize &&
1466 (!(flags & MSG_MORE) || cork->gso_size) &&
1467 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1468 csummode = CHECKSUM_PARTIAL;
1469
1470 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1471 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1472 if (!uarg)
1473 return -ENOBUFS;
1474 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1475 if (rt->dst.dev->features & NETIF_F_SG &&
1476 csummode == CHECKSUM_PARTIAL) {
1477 paged = true;
1478 } else {
1479 uarg->zerocopy = 0;
1480 skb_zcopy_set(skb, uarg, &extra_uref);
1481 }
1482 }
1483
1484 /*
1485 * Let's try using as much space as possible.
1486 * Use MTU if total length of the message fits into the MTU.
1487 * Otherwise, we need to reserve fragment header and
1488 * fragment alignment (= 8-15 octects, in total).
1489 *
1490 * Note that we may need to "move" the data from the tail of
1491 * of the buffer to the new fragment when we split
1492 * the message.
1493 *
1494 * FIXME: It may be fragmented into multiple chunks
1495 * at once if non-fragmentable extension headers
1496 * are too large.
1497 * --yoshfuji
1498 */
1499
1500 cork->length += length;
1501 if (!skb)
1502 goto alloc_new_skb;
1503
1504 while (length > 0) {
1505 /* Check if the remaining data fits into current packet. */
1506 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1507 if (copy < length)
1508 copy = maxfraglen - skb->len;
1509
1510 if (copy <= 0) {
1511 char *data;
1512 unsigned int datalen;
1513 unsigned int fraglen;
1514 unsigned int fraggap;
1515 unsigned int alloclen, alloc_extra;
1516 unsigned int pagedlen;
1517alloc_new_skb:
1518 /* There's no room in the current skb */
1519 if (skb)
1520 fraggap = skb->len - maxfraglen;
1521 else
1522 fraggap = 0;
1523 /* update mtu and maxfraglen if necessary */
1524 if (!skb || !skb_prev)
1525 ip6_append_data_mtu(&mtu, &maxfraglen,
1526 fragheaderlen, skb, rt,
1527 orig_mtu);
1528
1529 skb_prev = skb;
1530
1531 /*
1532 * If remaining data exceeds the mtu,
1533 * we know we need more fragment(s).
1534 */
1535 datalen = length + fraggap;
1536
1537 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1538 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1539 fraglen = datalen + fragheaderlen;
1540 pagedlen = 0;
1541
1542 alloc_extra = hh_len;
1543 alloc_extra += dst_exthdrlen;
1544 alloc_extra += rt->dst.trailer_len;
1545
1546 /* We just reserve space for fragment header.
1547 * Note: this may be overallocation if the message
1548 * (without MSG_MORE) fits into the MTU.
1549 */
1550 alloc_extra += sizeof(struct frag_hdr);
1551
1552 if ((flags & MSG_MORE) &&
1553 !(rt->dst.dev->features&NETIF_F_SG))
1554 alloclen = mtu;
1555 else if (!paged &&
1556 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1557 !(rt->dst.dev->features & NETIF_F_SG)))
1558 alloclen = fraglen;
1559 else {
1560 alloclen = min_t(int, fraglen, MAX_HEADER);
1561 pagedlen = fraglen - alloclen;
1562 }
1563 alloclen += alloc_extra;
1564
1565 if (datalen != length + fraggap) {
1566 /*
1567 * this is not the last fragment, the trailer
1568 * space is regarded as data space.
1569 */
1570 datalen += rt->dst.trailer_len;
1571 }
1572
1573 fraglen = datalen + fragheaderlen;
1574
1575 copy = datalen - transhdrlen - fraggap - pagedlen;
1576 if (copy < 0) {
1577 err = -EINVAL;
1578 goto error;
1579 }
1580 if (transhdrlen) {
1581 skb = sock_alloc_send_skb(sk, alloclen,
1582 (flags & MSG_DONTWAIT), &err);
1583 } else {
1584 skb = NULL;
1585 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1586 2 * sk->sk_sndbuf)
1587 skb = alloc_skb(alloclen,
1588 sk->sk_allocation);
1589 if (unlikely(!skb))
1590 err = -ENOBUFS;
1591 }
1592 if (!skb)
1593 goto error;
1594 /*
1595 * Fill in the control structures
1596 */
1597 skb->protocol = htons(ETH_P_IPV6);
1598 skb->ip_summed = csummode;
1599 skb->csum = 0;
1600 /* reserve for fragmentation and ipsec header */
1601 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1602 dst_exthdrlen);
1603
1604 /*
1605 * Find where to start putting bytes
1606 */
1607 data = skb_put(skb, fraglen - pagedlen);
1608 skb_set_network_header(skb, exthdrlen);
1609 data += fragheaderlen;
1610 skb->transport_header = (skb->network_header +
1611 fragheaderlen);
1612 if (fraggap) {
1613 skb->csum = skb_copy_and_csum_bits(
1614 skb_prev, maxfraglen,
1615 data + transhdrlen, fraggap, 0);
1616 skb_prev->csum = csum_sub(skb_prev->csum,
1617 skb->csum);
1618 data += fraggap;
1619 pskb_trim_unique(skb_prev, maxfraglen);
1620 }
1621 if (copy > 0 &&
1622 getfrag(from, data + transhdrlen, offset,
1623 copy, fraggap, skb) < 0) {
1624 err = -EFAULT;
1625 kfree_skb(skb);
1626 goto error;
1627 }
1628
1629 offset += copy;
1630 length -= copy + transhdrlen;
1631 transhdrlen = 0;
1632 exthdrlen = 0;
1633 dst_exthdrlen = 0;
1634
1635 /* Only the initial fragment is time stamped */
1636 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1637 cork->tx_flags = 0;
1638 skb_shinfo(skb)->tskey = tskey;
1639 tskey = 0;
1640 skb_zcopy_set(skb, uarg, &extra_uref);
1641
1642 if ((flags & MSG_CONFIRM) && !skb_prev)
1643 skb_set_dst_pending_confirm(skb, 1);
1644
1645 /*
1646 * Put the packet on the pending queue
1647 */
1648 if (!skb->destructor) {
1649 skb->destructor = sock_wfree;
1650 skb->sk = sk;
1651 wmem_alloc_delta += skb->truesize;
1652 }
1653 __skb_queue_tail(queue, skb);
1654 continue;
1655 }
1656
1657 if (copy > length)
1658 copy = length;
1659
1660 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1661 skb_tailroom(skb) >= copy) {
1662 unsigned int off;
1663
1664 off = skb->len;
1665 if (getfrag(from, skb_put(skb, copy),
1666 offset, copy, off, skb) < 0) {
1667 __skb_trim(skb, off);
1668 err = -EFAULT;
1669 goto error;
1670 }
1671 } else if (!uarg || !uarg->zerocopy) {
1672 int i = skb_shinfo(skb)->nr_frags;
1673
1674 err = -ENOMEM;
1675 if (!sk_page_frag_refill(sk, pfrag))
1676 goto error;
1677
1678 if (!skb_can_coalesce(skb, i, pfrag->page,
1679 pfrag->offset)) {
1680 err = -EMSGSIZE;
1681 if (i == MAX_SKB_FRAGS)
1682 goto error;
1683
1684 __skb_fill_page_desc(skb, i, pfrag->page,
1685 pfrag->offset, 0);
1686 skb_shinfo(skb)->nr_frags = ++i;
1687 get_page(pfrag->page);
1688 }
1689 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1690 if (getfrag(from,
1691 page_address(pfrag->page) + pfrag->offset,
1692 offset, copy, skb->len, skb) < 0)
1693 goto error_efault;
1694
1695 pfrag->offset += copy;
1696 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1697 skb->len += copy;
1698 skb->data_len += copy;
1699 skb->truesize += copy;
1700 wmem_alloc_delta += copy;
1701 } else {
1702 err = skb_zerocopy_iter_dgram(skb, from, copy);
1703 if (err < 0)
1704 goto error;
1705 }
1706 offset += copy;
1707 length -= copy;
1708 }
1709
1710 if (wmem_alloc_delta)
1711 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1712 return 0;
1713
1714error_efault:
1715 err = -EFAULT;
1716error:
1717 if (uarg)
1718 sock_zerocopy_put_abort(uarg, extra_uref);
1719 cork->length -= length;
1720 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1721 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1722 return err;
1723}
1724
1725int ip6_append_data(struct sock *sk,
1726 int getfrag(void *from, char *to, int offset, int len,
1727 int odd, struct sk_buff *skb),
1728 void *from, int length, int transhdrlen,
1729 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1730 struct rt6_info *rt, unsigned int flags)
1731{
1732 struct inet_sock *inet = inet_sk(sk);
1733 struct ipv6_pinfo *np = inet6_sk(sk);
1734 int exthdrlen;
1735 int err;
1736
1737 if (flags&MSG_PROBE)
1738 return 0;
1739 if (skb_queue_empty(&sk->sk_write_queue)) {
1740 /*
1741 * setup for corking
1742 */
1743 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1744 ipc6, rt, fl6);
1745 if (err)
1746 return err;
1747
1748 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1749 length += exthdrlen;
1750 transhdrlen += exthdrlen;
1751 } else {
1752 fl6 = &inet->cork.fl.u.ip6;
1753 transhdrlen = 0;
1754 }
1755
1756 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1757 &np->cork, sk_page_frag(sk), getfrag,
1758 from, length, transhdrlen, flags, ipc6);
1759}
1760EXPORT_SYMBOL_GPL(ip6_append_data);
1761
1762static void ip6_cork_release(struct inet_cork_full *cork,
1763 struct inet6_cork *v6_cork)
1764{
1765 if (v6_cork->opt) {
1766 kfree(v6_cork->opt->dst0opt);
1767 kfree(v6_cork->opt->dst1opt);
1768 kfree(v6_cork->opt->hopopt);
1769 kfree(v6_cork->opt->srcrt);
1770 kfree(v6_cork->opt);
1771 v6_cork->opt = NULL;
1772 }
1773
1774 if (cork->base.dst) {
1775 dst_release(cork->base.dst);
1776 cork->base.dst = NULL;
1777 cork->base.flags &= ~IPCORK_ALLFRAG;
1778 }
1779 memset(&cork->fl, 0, sizeof(cork->fl));
1780}
1781
1782struct sk_buff *__ip6_make_skb(struct sock *sk,
1783 struct sk_buff_head *queue,
1784 struct inet_cork_full *cork,
1785 struct inet6_cork *v6_cork)
1786{
1787 struct sk_buff *skb, *tmp_skb;
1788 struct sk_buff **tail_skb;
1789 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1790 struct ipv6_pinfo *np = inet6_sk(sk);
1791 struct net *net = sock_net(sk);
1792 struct ipv6hdr *hdr;
1793 struct ipv6_txoptions *opt = v6_cork->opt;
1794 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1795 struct flowi6 *fl6 = &cork->fl.u.ip6;
1796 unsigned char proto = fl6->flowi6_proto;
1797
1798 skb = __skb_dequeue(queue);
1799 if (!skb)
1800 goto out;
1801 tail_skb = &(skb_shinfo(skb)->frag_list);
1802
1803 /* move skb->data to ip header from ext header */
1804 if (skb->data < skb_network_header(skb))
1805 __skb_pull(skb, skb_network_offset(skb));
1806 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1807 __skb_pull(tmp_skb, skb_network_header_len(skb));
1808 *tail_skb = tmp_skb;
1809 tail_skb = &(tmp_skb->next);
1810 skb->len += tmp_skb->len;
1811 skb->data_len += tmp_skb->len;
1812 skb->truesize += tmp_skb->truesize;
1813 tmp_skb->destructor = NULL;
1814 tmp_skb->sk = NULL;
1815 }
1816
1817 /* Allow local fragmentation. */
1818 skb->ignore_df = ip6_sk_ignore_df(sk);
1819
1820 *final_dst = fl6->daddr;
1821 __skb_pull(skb, skb_network_header_len(skb));
1822 if (opt && opt->opt_flen)
1823 ipv6_push_frag_opts(skb, opt, &proto);
1824 if (opt && opt->opt_nflen)
1825 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1826
1827 skb_push(skb, sizeof(struct ipv6hdr));
1828 skb_reset_network_header(skb);
1829 hdr = ipv6_hdr(skb);
1830
1831 ip6_flow_hdr(hdr, v6_cork->tclass,
1832 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1833 ip6_autoflowlabel(net, np), fl6));
1834 hdr->hop_limit = v6_cork->hop_limit;
1835 hdr->nexthdr = proto;
1836 hdr->saddr = fl6->saddr;
1837 hdr->daddr = *final_dst;
1838
1839 skb->priority = sk->sk_priority;
1840 skb->mark = cork->base.mark;
1841
1842 skb->tstamp = cork->base.transmit_time;
1843
1844 skb_dst_set(skb, dst_clone(&rt->dst));
1845 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1846 if (proto == IPPROTO_ICMPV6) {
1847 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1848 u8 icmp6_type;
1849
1850 if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1851 icmp6_type = fl6->fl6_icmp_type;
1852 else
1853 icmp6_type = icmp6_hdr(skb)->icmp6_type;
1854 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1855 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1856 }
1857
1858 ip6_cork_release(cork, v6_cork);
1859out:
1860 return skb;
1861}
1862
1863int ip6_send_skb(struct sk_buff *skb)
1864{
1865 struct net *net = sock_net(skb->sk);
1866 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1867 int err;
1868
1869 rcu_read_lock();
1870 err = ip6_local_out(net, skb->sk, skb);
1871 if (err) {
1872 if (err > 0)
1873 err = net_xmit_errno(err);
1874 if (err)
1875 IP6_INC_STATS(net, rt->rt6i_idev,
1876 IPSTATS_MIB_OUTDISCARDS);
1877 }
1878
1879 rcu_read_unlock();
1880 return err;
1881}
1882
1883int ip6_push_pending_frames(struct sock *sk)
1884{
1885 struct sk_buff *skb;
1886
1887 skb = ip6_finish_skb(sk);
1888 if (!skb)
1889 return 0;
1890
1891 return ip6_send_skb(skb);
1892}
1893EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1894
1895static void __ip6_flush_pending_frames(struct sock *sk,
1896 struct sk_buff_head *queue,
1897 struct inet_cork_full *cork,
1898 struct inet6_cork *v6_cork)
1899{
1900 struct sk_buff *skb;
1901
1902 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1903 if (skb_dst(skb))
1904 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1905 IPSTATS_MIB_OUTDISCARDS);
1906 kfree_skb(skb);
1907 }
1908
1909 ip6_cork_release(cork, v6_cork);
1910}
1911
1912void ip6_flush_pending_frames(struct sock *sk)
1913{
1914 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1915 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1916}
1917EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1918
1919struct sk_buff *ip6_make_skb(struct sock *sk,
1920 int getfrag(void *from, char *to, int offset,
1921 int len, int odd, struct sk_buff *skb),
1922 void *from, int length, int transhdrlen,
1923 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1924 struct rt6_info *rt, unsigned int flags,
1925 struct inet_cork_full *cork)
1926{
1927 struct inet6_cork v6_cork;
1928 struct sk_buff_head queue;
1929 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1930 int err;
1931
1932 if (flags & MSG_PROBE)
1933 return NULL;
1934
1935 __skb_queue_head_init(&queue);
1936
1937 cork->base.flags = 0;
1938 cork->base.addr = 0;
1939 cork->base.opt = NULL;
1940 cork->base.dst = NULL;
1941 v6_cork.opt = NULL;
1942 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1943 if (err) {
1944 ip6_cork_release(cork, &v6_cork);
1945 return ERR_PTR(err);
1946 }
1947 if (ipc6->dontfrag < 0)
1948 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1949
1950 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1951 &current->task_frag, getfrag, from,
1952 length + exthdrlen, transhdrlen + exthdrlen,
1953 flags, ipc6);
1954 if (err) {
1955 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1956 return ERR_PTR(err);
1957 }
1958
1959 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1960}