Blame - marvell/linux/net/ipv6/ip6_output.c - T108

blob: ba9f774a9eb0771f3edc7ed23242bf6eb472d75b [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* IPv6 output functions
				4	* Linux INET6 implementation
				5	*
				6	* Authors:
				7	* Pedro Roque <roque@di.fc.ul.pt>
				8	*
				9	* Based on linux/net/ipv4/ip_output.c
				10	*
				11	* Changes:
				12	* A.N.Kuznetsov : airthmetics in fragmentation.
				13	* extension headers are implemented.
				14	* route changes now work.
				15	* ip6_forward does not confuse sniffers.
				16	* etc.
				17	*
				18	* H. von Brand : Added missing #include <linux/string.h>
				19	* Imran Patel : frag id should be in NBO
				20	* Kazunori MIYAZAWA @USAGI
				21	* : add ip6_append_data and related functions
				22	* for datagram xmit
				23	*/
				24
				25	#include <linux/errno.h>
				26	#include <linux/kernel.h>
				27	#include <linux/string.h>
				28	#include <linux/socket.h>
				29	#include <linux/net.h>
				30	#include <linux/netdevice.h>
				31	#include <linux/if_arp.h>
				32	#include <linux/in6.h>
				33	#include <linux/tcp.h>
				34	#include <linux/route.h>
				35	#include <linux/module.h>
				36	#include <linux/slab.h>
				37
				38	#include <linux/bpf-cgroup.h>
				39	#include <linux/netfilter.h>
				40	#include <linux/netfilter_ipv6.h>
				41
				42	#include <net/sock.h>
				43	#include <net/snmp.h>
				44
				45	#include <net/ipv6.h>
				46	#include <net/ndisc.h>
				47	#include <net/protocol.h>
				48	#include <net/ip6_route.h>
				49	#include <net/addrconf.h>
				50	#include <net/rawv6.h>
				51	#include <net/icmp.h>
				52	#include <net/xfrm.h>
				53	#include <net/checksum.h>
				54	#include <linux/mroute6.h>
				55	#include <net/l3mdev.h>
				56	#include <net/lwtunnel.h>
				57
				58	static int ip6_finish_output2(struct net net, struct sock sk, struct sk_buff *skb)
				59	{
				60	struct dst_entry *dst = skb_dst(skb);
				61	struct net_device *dev = dst->dev;
				62	struct inet6_dev *idev = ip6_dst_idev(dst);
				63	unsigned int hh_len = LL_RESERVED_SPACE(dev);
				64	const struct in6_addr daddr, nexthop;
				65	struct ipv6hdr *hdr;
				66	struct neighbour *neigh;
				67	int ret;
				68
				69	/* Be paranoid, rather than too clever. */
				70	if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
				71	/* Make sure idev stays alive */
				72	rcu_read_lock();
				73	skb = skb_expand_head(skb, hh_len);
				74	if (!skb) {
				75	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
				76	rcu_read_unlock();
				77	return -ENOMEM;
				78	}
				79	rcu_read_unlock();
				80	}
				81
				82	hdr = ipv6_hdr(skb);
				83	daddr = &hdr->daddr;
				84	if (ipv6_addr_is_multicast(daddr)) {
				85	if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
				86	((mroute6_is_socket(net, skb) &&
				87	!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) \|\|
				88	ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
				89	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
				90
				91	/* Do not check for IFF_ALLMULTI; multicast routing
				92	is not supported in any case.
				93	*/
				94	if (newskb)
				95	NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
				96	net, sk, newskb, NULL, newskb->dev,
				97	dev_loopback_xmit);
				98
				99	if (hdr->hop_limit == 0) {
				100	IP6_INC_STATS(net, idev,
				101	IPSTATS_MIB_OUTDISCARDS);
				102	kfree_skb(skb);
				103	return 0;
				104	}
				105	}
				106
				107	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
				108	if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
				109	!(dev->flags & IFF_LOOPBACK)) {
				110	kfree_skb(skb);
				111	return 0;
				112	}
				113	}
				114
				115	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
				116	int res = lwtunnel_xmit(skb);
				117
				118	if (res != LWTUNNEL_XMIT_CONTINUE)
				119	return res;
				120	}
				121
				122	rcu_read_lock_bh();
				123	nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
				124	neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
				125	if (unlikely(!neigh))
				126	neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
				127	if (!IS_ERR(neigh)) {
				128	sock_confirm_neigh(skb, neigh);
				129	ret = neigh_output(neigh, skb, false);
				130	rcu_read_unlock_bh();
				131	return ret;
				132	}
				133	rcu_read_unlock_bh();
				134
				135	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
				136	kfree_skb(skb);
				137	return -EINVAL;
				138	}
				139
				140	static int
				141	ip6_finish_output_gso_slowpath_drop(struct net net, struct sock sk,
				142	struct sk_buff *skb, unsigned int mtu)
				143	{
				144	struct sk_buff segs, nskb;
				145	netdev_features_t features;
				146	int ret = 0;
				147
				148	/* Please see corresponding comment in ip_finish_output_gso
				149	* describing the cases where GSO segment length exceeds the
				150	* egress MTU.
				151	*/
				152	features = netif_skb_features(skb);
				153	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
				154	if (IS_ERR_OR_NULL(segs)) {
				155	kfree_skb(skb);
				156	return -ENOMEM;
				157	}
				158
				159	consume_skb(skb);
				160
				161	skb_list_walk_safe(segs, segs, nskb) {
				162	int err;
				163
				164	skb_mark_not_on_list(segs);
				165	/* Last GSO segment can be smaller than gso_size (and MTU).
				166	* Adding a fragment header would produce an "atomic fragment",
				167	* which is considered harmful (RFC-8021). Avoid that.
				168	*/
				169	err = segs->len > mtu ?
				170	ip6_fragment(net, sk, segs, ip6_finish_output2) :
				171	ip6_finish_output2(net, sk, segs);
				172	if (err && ret == 0)
				173	ret = err;
				174	}
				175
				176	return ret;
				177	}
				178
				179	static int __ip6_finish_output(struct net net, struct sock sk, struct sk_buff *skb)
				180	{
				181	unsigned int mtu;
				182
				183	#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
				184	/* Policy lookup after SNAT yielded a new policy */
				185	if (skb_dst(skb)->xfrm) {
				186	IP6CB(skb)->flags \|= IP6SKB_REROUTED;
				187	return dst_output(net, sk, skb);
				188	}
				189	#endif
				190
				191	mtu = ip6_skb_dst_mtu(skb);
				192	if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
				193	return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
				194
				195	if ((skb->len > mtu && !skb_is_gso(skb)) \|\|
				196	dst_allfrag(skb_dst(skb)) \|\|
				197	(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
				198	return ip6_fragment(net, sk, skb, ip6_finish_output2);
				199	else
				200	return ip6_finish_output2(net, sk, skb);
				201	}
				202
				203	static int ip6_finish_output(struct net net, struct sock sk, struct sk_buff *skb)
				204	{
				205	int ret;
				206
				207	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
				208	switch (ret) {
				209	case NET_XMIT_SUCCESS:
				210	return __ip6_finish_output(net, sk, skb);
				211	case NET_XMIT_CN:
				212	return __ip6_finish_output(net, sk, skb) ? : ret;
				213	default:
				214	kfree_skb(skb);
				215	return ret;
				216	}
				217	}
				218
				219	int ip6_output(struct net net, struct sock sk, struct sk_buff *skb)
				220	{
				221	struct net_device *dev = skb_dst(skb)->dev;
				222	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
				223
				224	skb->protocol = htons(ETH_P_IPV6);
				225	skb->dev = dev;
				226
				227	if (unlikely(idev->cnf.disable_ipv6)) {
				228	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
				229	kfree_skb(skb);
				230	return 0;
				231	}
				232
				233	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
				234	net, sk, skb, NULL, dev,
				235	ip6_finish_output,
				236	!(IP6CB(skb)->flags & IP6SKB_REROUTED));
				237	}
				238
				239	bool ip6_autoflowlabel(struct net net, const struct ipv6_pinfo np)
				240	{
				241	if (!np->autoflowlabel_set)
				242	return ip6_default_np_autolabel(net);
				243	else
				244	return np->autoflowlabel;
				245	}
				246
				247	/*
				248	* xmit an sk_buff (used by TCP, SCTP and DCCP)
				249	* Note : socket lock is not held for SYNACK packets, but might be modified
				250	* by calls to skb_set_owner_w() and ipv6_local_error(),
				251	* which are using proper atomic operations or spinlocks.
				252	*/
				253	int ip6_xmit(const struct sock sk, struct sk_buff skb, struct flowi6 *fl6,
				254	__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
				255	{
				256	struct net *net = sock_net(sk);
				257	const struct ipv6_pinfo *np = inet6_sk(sk);
				258	struct in6_addr *first_hop = &fl6->daddr;
				259	struct dst_entry *dst = skb_dst(skb);
				260	struct net_device *dev = dst->dev;
				261	struct inet6_dev *idev = ip6_dst_idev(dst);
				262	unsigned int head_room;
				263	struct ipv6hdr *hdr;
				264	u8 proto = fl6->flowi6_proto;
				265	int seg_len = skb->len;
				266	int hlimit = -1;
				267	u32 mtu;
				268
				269	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
				270	if (opt)
				271	head_room += opt->opt_nflen + opt->opt_flen;
				272
				273	if (unlikely(head_room > skb_headroom(skb))) {
				274	/* Make sure idev stays alive */
				275	rcu_read_lock();
				276	skb = skb_expand_head(skb, head_room);
				277	if (!skb) {
				278	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
				279	rcu_read_unlock();
				280	return -ENOBUFS;
				281	}
				282	rcu_read_unlock();
				283	}
				284
				285	if (opt) {
				286	seg_len += opt->opt_nflen + opt->opt_flen;
				287
				288	if (opt->opt_flen)
				289	ipv6_push_frag_opts(skb, opt, &proto);
				290
				291	if (opt->opt_nflen)
				292	ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
				293	&fl6->saddr);
				294	}
				295
				296	skb_push(skb, sizeof(struct ipv6hdr));
				297	skb_reset_network_header(skb);
				298	hdr = ipv6_hdr(skb);
				299
				300	/*
				301	* Fill in the IPv6 header
				302	*/
				303	if (np)
				304	hlimit = np->hop_limit;
				305	if (hlimit < 0)
				306	hlimit = ip6_dst_hoplimit(dst);
				307
				308	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
				309	ip6_autoflowlabel(net, np), fl6));
				310
				311	hdr->payload_len = htons(seg_len);
				312	hdr->nexthdr = proto;
				313	hdr->hop_limit = hlimit;
				314
				315	hdr->saddr = fl6->saddr;
				316	hdr->daddr = *first_hop;
				317
				318	skb->protocol = htons(ETH_P_IPV6);
				319	skb->priority = priority;
				320	skb->mark = mark;
				321
				322	mtu = dst_mtu(dst);
				323	if ((skb->len <= mtu) \|\| skb->ignore_df \|\| skb_is_gso(skb)) {
				324	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
				325
				326	/* if egress device is enslaved to an L3 master device pass the
				327	* skb to its handler for processing
				328	*/
				329	skb = l3mdev_ip6_out((struct sock *)sk, skb);
				330	if (unlikely(!skb))
				331	return 0;
				332
				333	/* hooks should never assume socket lock is held.
				334	* we promote our socket to non const
				335	*/
				336	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
				337	net, (struct sock *)sk, skb, NULL, dev,
				338	dst_output);
				339	}
				340
				341	skb->dev = dev;
				342	/* ipv6_local_error() does not require socket lock,
				343	* we promote our socket to non const
				344	*/
				345	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
				346
				347	IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
				348	kfree_skb(skb);
				349	return -EMSGSIZE;
				350	}
				351	EXPORT_SYMBOL(ip6_xmit);
				352
				353	static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
				354	{
				355	struct ip6_ra_chain *ra;
				356	struct sock *last = NULL;
				357
				358	read_lock(&ip6_ra_lock);
				359	for (ra = ip6_ra_chain; ra; ra = ra->next) {
				360	struct sock *sk = ra->sk;
				361	if (sk && ra->sel == sel &&
				362	(!sk->sk_bound_dev_if \|\|
				363	sk->sk_bound_dev_if == skb->dev->ifindex)) {
				364	struct ipv6_pinfo *np = inet6_sk(sk);
				365
				366	if (np && np->rtalert_isolate &&
				367	!net_eq(sock_net(sk), dev_net(skb->dev))) {
				368	continue;
				369	}
				370	if (last) {
				371	struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				372	if (skb2)
				373	rawv6_rcv(last, skb2);
				374	}
				375	last = sk;
				376	}
				377	}
				378
				379	if (last) {
				380	rawv6_rcv(last, skb);
				381	read_unlock(&ip6_ra_lock);
				382	return 1;
				383	}
				384	read_unlock(&ip6_ra_lock);
				385	return 0;
				386	}
				387
				388	static int ip6_forward_proxy_check(struct sk_buff *skb)
				389	{
				390	struct ipv6hdr *hdr = ipv6_hdr(skb);
				391	u8 nexthdr = hdr->nexthdr;
				392	__be16 frag_off;
				393	int offset;
				394
				395	if (ipv6_ext_hdr(nexthdr)) {
				396	offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
				397	if (offset < 0)
				398	return 0;
				399	} else
				400	offset = sizeof(struct ipv6hdr);
				401
				402	if (nexthdr == IPPROTO_ICMPV6) {
				403	struct icmp6hdr *icmp6;
				404
				405	if (!pskb_may_pull(skb, (skb_network_header(skb) +
				406	offset + 1 - skb->data)))
				407	return 0;
				408
				409	icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
				410
				411	switch (icmp6->icmp6_type) {
				412	case NDISC_ROUTER_SOLICITATION:
				413	case NDISC_ROUTER_ADVERTISEMENT:
				414	case NDISC_NEIGHBOUR_SOLICITATION:
				415	case NDISC_NEIGHBOUR_ADVERTISEMENT:
				416	case NDISC_REDIRECT:
				417	/* For reaction involving unicast neighbor discovery
				418	* message destined to the proxied address, pass it to
				419	* input function.
				420	*/
				421	return 1;
				422	default:
				423	break;
				424	}
				425	}
				426
				427	/*
				428	* The proxying router can't forward traffic sent to a link-local
				429	* address, so signal the sender and discard the packet. This
				430	* behavior is clarified by the MIPv6 specification.
				431	*/
				432	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
				433	dst_link_failure(skb);
				434	return -1;
				435	}
				436
				437	return 0;
				438	}
				439
				440	static inline int ip6_forward_finish(struct net net, struct sock sk,
				441	struct sk_buff *skb)
				442	{
				443	struct dst_entry *dst = skb_dst(skb);
				444
				445	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
				446	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
				447
				448	#ifdef CONFIG_NET_SWITCHDEV
				449	if (skb->offload_l3_fwd_mark) {
				450	consume_skb(skb);
				451	return 0;
				452	}
				453	#endif
				454
				455	skb->tstamp = 0;
				456	return dst_output(net, sk, skb);
				457	}
				458
				459	static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
				460	{
				461	if (skb->len <= mtu)
				462	return false;
				463
				464	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
				465	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
				466	return true;
				467
				468	if (skb->ignore_df)
				469	return false;
				470
				471	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
				472	return false;
				473
				474	return true;
				475	}
				476
				477	int ip6_forward(struct sk_buff *skb)
				478	{
				479	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
				480	struct dst_entry *dst = skb_dst(skb);
				481	struct ipv6hdr *hdr = ipv6_hdr(skb);
				482	struct inet6_skb_parm *opt = IP6CB(skb);
				483	struct net *net = dev_net(dst->dev);
				484	u32 mtu;
				485
				486	if (net->ipv6.devconf_all->forwarding == 0)
				487	goto error;
				488
				489	if (skb->pkt_type != PACKET_HOST)
				490	goto drop;
				491
				492	if (unlikely(skb->sk))
				493	goto drop;
				494
				495	if (skb_warn_if_lro(skb))
				496	goto drop;
				497
				498	if (!net->ipv6.devconf_all->disable_policy &&
				499	(!idev \|\| !idev->cnf.disable_policy) &&
				500	!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
				501	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				502	goto drop;
				503	}
				504
				505	skb_forward_csum(skb);
				506
				507	/*
				508	* We DO NOT make any processing on
				509	* RA packets, pushing them to user level AS IS
				510	* without ane WARRANTY that application will be able
				511	* to interpret them. The reason is that we
				512	* cannot make anything clever here.
				513	*
				514	* We are not end-node, so that if packet contains
				515	* AH/ESP, we cannot make anything.
				516	* Defragmentation also would be mistake, RA packets
				517	* cannot be fragmented, because there is no warranty
				518	* that different fragments will go along one path. --ANK
				519	*/
				520	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
				521	if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
				522	return 0;
				523	}
				524
				525	/*
				526	* check and decrement ttl
				527	*/
				528	if (hdr->hop_limit <= 1) {
				529	/* Force OUTPUT device used as source address */
				530	skb->dev = dst->dev;
				531	icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
				532	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
				533
				534	kfree_skb(skb);
				535	return -ETIMEDOUT;
				536	}
				537
				538	/* XXX: idev->cnf.proxy_ndp? */
				539	if (net->ipv6.devconf_all->proxy_ndp &&
				540	pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
				541	int proxied = ip6_forward_proxy_check(skb);
				542	if (proxied > 0)
				543	return ip6_input(skb);
				544	else if (proxied < 0) {
				545	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				546	goto drop;
				547	}
				548	}
				549
				550	if (!xfrm6_route_forward(skb)) {
				551	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				552	goto drop;
				553	}
				554	dst = skb_dst(skb);
				555
				556	/* IPv6 specs say nothing about it, but it is clear that we cannot
				557	send redirects to source routed frames.
				558	We don't send redirects to frames decapsulated from IPsec.
				559	*/
				560	if (IP6CB(skb)->iif == dst->dev->ifindex &&
				561	opt->srcrt == 0 && !skb_sec_path(skb)) {
				562	struct in6_addr *target = NULL;
				563	struct inet_peer *peer;
				564	struct rt6_info *rt;
				565
				566	/*
				567	* incoming and outgoing devices are the same
				568	* send a redirect.
				569	*/
				570
				571	rt = (struct rt6_info *) dst;
				572	if (rt->rt6i_flags & RTF_GATEWAY)
				573	target = &rt->rt6i_gateway;
				574	else
				575	target = &hdr->daddr;
				576
				577	peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
				578
				579	/* Limit redirects both by destination (here)
				580	and by source (inside ndisc_send_redirect)
				581	*/
				582	if (inet_peer_xrlim_allow(peer, 1*HZ))
				583	ndisc_send_redirect(skb, target);
				584	if (peer)
				585	inet_putpeer(peer);
				586	} else {
				587	int addrtype = ipv6_addr_type(&hdr->saddr);
				588
				589	/* This check is security critical. */
				590	if (addrtype == IPV6_ADDR_ANY \|\|
				591	addrtype & (IPV6_ADDR_MULTICAST \| IPV6_ADDR_LOOPBACK))
				592	goto error;
				593	if (addrtype & IPV6_ADDR_LINKLOCAL) {
				594	icmpv6_send(skb, ICMPV6_DEST_UNREACH,
				595	ICMPV6_NOT_NEIGHBOUR, 0);
				596	goto error;
				597	}
				598	}
				599
				600	mtu = ip6_dst_mtu_forward(dst);
				601	if (mtu < IPV6_MIN_MTU)
				602	mtu = IPV6_MIN_MTU;
				603
				604	if (ip6_pkt_too_big(skb, mtu)) {
				605	/* Again, force OUTPUT device used as source address */
				606	skb->dev = dst->dev;
				607	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
				608	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
				609	__IP6_INC_STATS(net, ip6_dst_idev(dst),
				610	IPSTATS_MIB_FRAGFAILS);
				611	kfree_skb(skb);
				612	return -EMSGSIZE;
				613	}
				614
				615	if (skb_cow(skb, dst->dev->hard_header_len)) {
				616	__IP6_INC_STATS(net, ip6_dst_idev(dst),
				617	IPSTATS_MIB_OUTDISCARDS);
				618	goto drop;
				619	}
				620
				621	hdr = ipv6_hdr(skb);
				622
				623	/* Mangling hops number delayed to point after skb COW */
				624
				625	hdr->hop_limit--;
				626
				627	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
				628	net, NULL, skb, skb->dev, dst->dev,
				629	ip6_forward_finish);
				630
				631	error:
				632	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
				633	drop:
				634	kfree_skb(skb);
				635	return -EINVAL;
				636	}
				637
				638	static void ip6_copy_metadata(struct sk_buff to, struct sk_buff from)
				639	{
				640	to->pkt_type = from->pkt_type;
				641	to->priority = from->priority;
				642	to->protocol = from->protocol;
				643	skb_dst_drop(to);
				644	skb_dst_set(to, dst_clone(skb_dst(from)));
				645	to->dev = from->dev;
				646	to->mark = from->mark;
				647
				648	skb_copy_hash(to, from);
				649
				650	#ifdef CONFIG_NET_SCHED
				651	to->tc_index = from->tc_index;
				652	#endif
				653	nf_copy(to, from);
				654	skb_ext_copy(to, from);
				655	skb_copy_secmark(to, from);
				656	}
				657
				658	int ip6_fraglist_init(struct sk_buff skb, unsigned int hlen, u8 prevhdr,
				659	u8 nexthdr, __be32 frag_id,
				660	struct ip6_fraglist_iter *iter)
				661	{
				662	unsigned int first_len;
				663	struct frag_hdr *fh;
				664
				665	/* BUILD HEADER */
				666	*prevhdr = NEXTHDR_FRAGMENT;
				667	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
				668	if (!iter->tmp_hdr)
				669	return -ENOMEM;
				670
				671	iter->frag = skb_shinfo(skb)->frag_list;
				672	skb_frag_list_init(skb);
				673
				674	iter->offset = 0;
				675	iter->hlen = hlen;
				676	iter->frag_id = frag_id;
				677	iter->nexthdr = nexthdr;
				678
				679	__skb_pull(skb, hlen);
				680	fh = __skb_push(skb, sizeof(struct frag_hdr));
				681	__skb_push(skb, hlen);
				682	skb_reset_network_header(skb);
				683	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
				684
				685	fh->nexthdr = nexthdr;
				686	fh->reserved = 0;
				687	fh->frag_off = htons(IP6_MF);
				688	fh->identification = frag_id;
				689
				690	first_len = skb_pagelen(skb);
				691	skb->data_len = first_len - skb_headlen(skb);
				692	skb->len = first_len;
				693	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
				694
				695	return 0;
				696	}
				697	EXPORT_SYMBOL(ip6_fraglist_init);
				698
				699	void ip6_fraglist_prepare(struct sk_buff *skb,
				700	struct ip6_fraglist_iter *iter)
				701	{
				702	struct sk_buff *frag = iter->frag;
				703	unsigned int hlen = iter->hlen;
				704	struct frag_hdr *fh;
				705
				706	frag->ip_summed = CHECKSUM_NONE;
				707	skb_reset_transport_header(frag);
				708	fh = __skb_push(frag, sizeof(struct frag_hdr));
				709	__skb_push(frag, hlen);
				710	skb_reset_network_header(frag);
				711	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
				712	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
				713	fh->nexthdr = iter->nexthdr;
				714	fh->reserved = 0;
				715	fh->frag_off = htons(iter->offset);
				716	if (frag->next)
				717	fh->frag_off \|= htons(IP6_MF);
				718	fh->identification = iter->frag_id;
				719	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
				720	ip6_copy_metadata(frag, skb);
				721	}
				722	EXPORT_SYMBOL(ip6_fraglist_prepare);
				723
				724	void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
				725	unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
				726	u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
				727	{
				728	state->prevhdr = prevhdr;
				729	state->nexthdr = nexthdr;
				730	state->frag_id = frag_id;
				731
				732	state->hlen = hlen;
				733	state->mtu = mtu;
				734
				735	state->left = skb->len - hlen; /* Space per frame */
				736	state->ptr = hlen; /* Where to start from */
				737
				738	state->hroom = hdr_room;
				739	state->troom = needed_tailroom;
				740
				741	state->offset = 0;
				742	}
				743	EXPORT_SYMBOL(ip6_frag_init);
				744
				745	struct sk_buff ip6_frag_next(struct sk_buff skb, struct ip6_frag_state *state)
				746	{
				747	u8 prevhdr = state->prevhdr, fragnexthdr_offset;
				748	struct sk_buff *frag;
				749	struct frag_hdr *fh;
				750	unsigned int len;
				751
				752	len = state->left;
				753	/* IF: it doesn't fit, use 'mtu' - the data space left */
				754	if (len > state->mtu)
				755	len = state->mtu;
				756	/* IF: we are not sending up to and including the packet end
				757	then align the next start on an eight byte boundary */
				758	if (len < state->left)
				759	len &= ~7;
				760
				761	/* Allocate buffer */
				762	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
				763	state->hroom + state->troom, GFP_ATOMIC);
				764	if (!frag)
				765	return ERR_PTR(-ENOMEM);
				766
				767	/*
				768	* Set up data on packet
				769	*/
				770
				771	ip6_copy_metadata(frag, skb);
				772	skb_reserve(frag, state->hroom);
				773	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
				774	skb_reset_network_header(frag);
				775	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
				776	frag->transport_header = (frag->network_header + state->hlen +
				777	sizeof(struct frag_hdr));
				778
				779	/*
				780	* Charge the memory for the fragment to any owner
				781	* it might possess
				782	*/
				783	if (skb->sk)
				784	skb_set_owner_w(frag, skb->sk);
				785
				786	/*
				787	* Copy the packet header into the new buffer.
				788	*/
				789	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
				790
				791	fragnexthdr_offset = skb_network_header(frag);
				792	fragnexthdr_offset += prevhdr - skb_network_header(skb);
				793	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
				794
				795	/*
				796	* Build fragment header.
				797	*/
				798	fh->nexthdr = state->nexthdr;
				799	fh->reserved = 0;
				800	fh->identification = state->frag_id;
				801
				802	/*
				803	* Copy a block of the IP datagram.
				804	*/
				805	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
				806	len));
				807	state->left -= len;
				808
				809	fh->frag_off = htons(state->offset);
				810	if (state->left > 0)
				811	fh->frag_off \|= htons(IP6_MF);
				812	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
				813
				814	state->ptr += len;
				815	state->offset += len;
				816
				817	return frag;
				818	}
				819	EXPORT_SYMBOL(ip6_frag_next);
				820
				821	int ip6_fragment(struct net net, struct sock sk, struct sk_buff *skb,
				822	int (output)(struct net , struct sock , struct sk_buff ))
				823	{
				824	struct sk_buff *frag;
				825	struct rt6_info rt = (struct rt6_info )skb_dst(skb);
				826	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
				827	inet6_sk(skb->sk) : NULL;
				828	struct ip6_frag_state state;
				829	unsigned int mtu, hlen, nexthdr_offset;
				830	ktime_t tstamp = skb->tstamp;
				831	int hroom, err = 0;
				832	__be32 frag_id;
				833	u8 *prevhdr, nexthdr = 0;
				834
				835	err = ip6_find_1stfragopt(skb, &prevhdr);
				836	if (err < 0)
				837	goto fail;
				838	hlen = err;
				839	nexthdr = *prevhdr;
				840	nexthdr_offset = prevhdr - skb_network_header(skb);
				841
				842	mtu = ip6_skb_dst_mtu(skb);
				843
				844	/* We must not fragment if the socket is set to force MTU discovery
				845	* or if the skb it not generated by a local socket.
				846	*/
				847	if (unlikely(!skb->ignore_df && skb->len > mtu))
				848	goto fail_toobig;
				849
				850	if (IP6CB(skb)->frag_max_size) {
				851	if (IP6CB(skb)->frag_max_size > mtu)
				852	goto fail_toobig;
				853
				854	/* don't send fragments larger than what we received */
				855	mtu = IP6CB(skb)->frag_max_size;
				856	if (mtu < IPV6_MIN_MTU)
				857	mtu = IPV6_MIN_MTU;
				858	}
				859
				860	if (np && np->frag_size < mtu) {
				861	if (np->frag_size)
				862	mtu = np->frag_size;
				863	}
				864	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
				865	goto fail_toobig;
				866	mtu -= hlen + sizeof(struct frag_hdr);
				867
				868	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
				869	&ipv6_hdr(skb)->saddr);
				870
				871	if (skb->ip_summed == CHECKSUM_PARTIAL &&
				872	(err = skb_checksum_help(skb)))
				873	goto fail;
				874
				875	prevhdr = skb_network_header(skb) + nexthdr_offset;
				876	hroom = LL_RESERVED_SPACE(rt->dst.dev);
				877	if (skb_has_frag_list(skb)) {
				878	unsigned int first_len = skb_pagelen(skb);
				879	struct ip6_fraglist_iter iter;
				880	struct sk_buff *frag2;
				881
				882	if (first_len - hlen > mtu \|\|
				883	((first_len - hlen) & 7) \|\|
				884	skb_cloned(skb) \|\|
				885	skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
				886	goto slow_path;
				887
				888	skb_walk_frags(skb, frag) {
				889	/* Correct geometry. */
				890	if (frag->len > mtu \|\|
				891	((frag->len & 7) && frag->next) \|\|
				892	skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
				893	goto slow_path_clean;
				894
				895	/* Partially cloned skb? */
				896	if (skb_shared(frag))
				897	goto slow_path_clean;
				898
				899	BUG_ON(frag->sk);
				900	if (skb->sk) {
				901	frag->sk = skb->sk;
				902	frag->destructor = sock_wfree;
				903	}
				904	skb->truesize -= frag->truesize;
				905	}
				906
				907	err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
				908	&iter);
				909	if (err < 0)
				910	goto fail;
				911
				912	/* We prevent @rt from being freed. */
				913	rcu_read_lock();
				914
				915	for (;;) {
				916	/* Prepare header of the next frame,
				917	* before previous one went down. */
				918	if (iter.frag)
				919	ip6_fraglist_prepare(skb, &iter);
				920
				921	skb->tstamp = tstamp;
				922	err = output(net, sk, skb);
				923	if (!err)
				924	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				925	IPSTATS_MIB_FRAGCREATES);
				926
				927	if (err \|\| !iter.frag)
				928	break;
				929
				930	skb = ip6_fraglist_next(&iter);
				931	}
				932
				933	kfree(iter.tmp_hdr);
				934
				935	if (err == 0) {
				936	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				937	IPSTATS_MIB_FRAGOKS);
				938	rcu_read_unlock();
				939	return 0;
				940	}
				941
				942	kfree_skb_list(iter.frag);
				943
				944	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				945	IPSTATS_MIB_FRAGFAILS);
				946	rcu_read_unlock();
				947	return err;
				948
				949	slow_path_clean:
				950	skb_walk_frags(skb, frag2) {
				951	if (frag2 == frag)
				952	break;
				953	frag2->sk = NULL;
				954	frag2->destructor = NULL;
				955	skb->truesize += frag2->truesize;
				956	}
				957	}
				958
				959	slow_path:
				960	/*
				961	* Fragment the datagram.
				962	*/
				963
				964	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
				965	LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
				966	&state);
				967
				968	/*
				969	* Keep copying data until we run out.
				970	*/
				971
				972	while (state.left > 0) {
				973	frag = ip6_frag_next(skb, &state);
				974	if (IS_ERR(frag)) {
				975	err = PTR_ERR(frag);
				976	goto fail;
				977	}
				978
				979	/*
				980	* Put this fragment into the sending queue.
				981	*/
				982	frag->tstamp = tstamp;
				983	err = output(net, sk, frag);
				984	if (err)
				985	goto fail;
				986
				987	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				988	IPSTATS_MIB_FRAGCREATES);
				989	}
				990	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				991	IPSTATS_MIB_FRAGOKS);
				992	consume_skb(skb);
				993	return err;
				994
				995	fail_toobig:
				996	if (skb->sk && dst_allfrag(skb_dst(skb)))
				997	sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
				998
				999	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
				1000	err = -EMSGSIZE;
				1001
				1002	fail:
				1003	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				1004	IPSTATS_MIB_FRAGFAILS);
				1005	kfree_skb(skb);
				1006	return err;
				1007	}
				1008
				1009	static inline int ip6_rt_check(const struct rt6key *rt_key,
				1010	const struct in6_addr *fl_addr,
				1011	const struct in6_addr *addr_cache)
				1012	{
				1013	return (rt_key->plen != 128 \|\| !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
				1014	(!addr_cache \|\| !ipv6_addr_equal(fl_addr, addr_cache));
				1015	}
				1016
				1017	static struct dst_entry ip6_sk_dst_check(struct sock sk,
				1018	struct dst_entry *dst,
				1019	const struct flowi6 *fl6)
				1020	{
				1021	struct ipv6_pinfo *np = inet6_sk(sk);
				1022	struct rt6_info *rt;
				1023
				1024	if (!dst)
				1025	goto out;
				1026
				1027	if (dst->ops->family != AF_INET6) {
				1028	dst_release(dst);
				1029	return NULL;
				1030	}
				1031
				1032	rt = (struct rt6_info *)dst;
				1033	/* Yes, checking route validity in not connected
				1034	* case is not very simple. Take into account,
				1035	* that we do not support routing by source, TOS,
				1036	* and MSG_DONTROUTE --ANK (980726)
				1037	*
				1038	* 1. ip6_rt_check(): If route was host route,
				1039	* check that cached destination is current.
				1040	* If it is network route, we still may
				1041	* check its validity using saved pointer
				1042	* to the last used address: daddr_cache.
				1043	* We do not want to save whole address now,
				1044	* (because main consumer of this service
				1045	* is tcp, which has not this problem),
				1046	* so that the last trick works only on connected
				1047	* sockets.
				1048	* 2. oif also should be the same.
				1049	*/
				1050	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) \|\|
				1051	#ifdef CONFIG_IPV6_SUBTREES
				1052	ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) \|\|
				1053	#endif
				1054	(!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
				1055	(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
				1056	dst_release(dst);
				1057	dst = NULL;
				1058	}
				1059
				1060	out:
				1061	return dst;
				1062	}
				1063
				1064	static int ip6_dst_lookup_tail(struct net net, const struct sock sk,
				1065	struct dst_entry *dst, struct flowi6 fl6)
				1066	{
				1067	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
				1068	struct neighbour *n;
				1069	struct rt6_info *rt;
				1070	#endif
				1071	int err;
				1072	int flags = 0;
				1073
				1074	/* The correct way to handle this would be to do
				1075	* ip6_route_get_saddr, and then ip6_route_output; however,
				1076	* the route-specific preferred source forces the
				1077	* ip6_route_output call _before_ ip6_route_get_saddr.
				1078	*
				1079	* In source specific routing (no src=any default route),
				1080	* ip6_route_output will fail given src=any saddr, though, so
				1081	* that's why we try it again later.
				1082	*/
				1083	if (ipv6_addr_any(&fl6->saddr) && (!dst \|\| !(dst)->error)) {
				1084	struct fib6_info *from;
				1085	struct rt6_info *rt;
				1086	bool had_dst = *dst != NULL;
				1087
				1088	if (!had_dst)
				1089	*dst = ip6_route_output(net, sk, fl6);
				1090	rt = (dst)->error ? NULL : (struct rt6_info )*dst;
				1091
				1092	rcu_read_lock();
				1093	from = rt ? rcu_dereference(rt->from) : NULL;
				1094	err = ip6_route_get_saddr(net, from, &fl6->daddr,
				1095	sk ? inet6_sk(sk)->srcprefs : 0,
				1096	&fl6->saddr);
				1097	rcu_read_unlock();
				1098
				1099	if (err)
				1100	goto out_err_release;
				1101
				1102	/* If we had an erroneous initial result, pretend it
				1103	* never existed and let the SA-enabled version take
				1104	* over.
				1105	*/
				1106	if (!had_dst && (*dst)->error) {
				1107	dst_release(*dst);
				1108	*dst = NULL;
				1109	}
				1110
				1111	if (fl6->flowi6_oif)
				1112	flags \|= RT6_LOOKUP_F_IFACE;
				1113	}
				1114
				1115	if (!*dst)
				1116	*dst = ip6_route_output_flags(net, sk, fl6, flags);
				1117
				1118	err = (*dst)->error;
				1119	if (err)
				1120	goto out_err_release;
				1121
				1122	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
				1123	/*
				1124	* Here if the dst entry we've looked up
				1125	* has a neighbour entry that is in the INCOMPLETE
				1126	* state and the src address from the flow is
				1127	* marked as OPTIMISTIC, we release the found
				1128	* dst entry and replace it instead with the
				1129	* dst entry of the nexthop router
				1130	*/
				1131	rt = (struct rt6_info ) dst;
				1132	rcu_read_lock_bh();
				1133	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
				1134	rt6_nexthop(rt, &fl6->daddr));
				1135	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
				1136	rcu_read_unlock_bh();
				1137
				1138	if (err) {
				1139	struct inet6_ifaddr *ifp;
				1140	struct flowi6 fl_gw6;
				1141	int redirect;
				1142
				1143	ifp = ipv6_get_ifaddr(net, &fl6->saddr,
				1144	(*dst)->dev, 1);
				1145
				1146	redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
				1147	if (ifp)
				1148	in6_ifa_put(ifp);
				1149
				1150	if (redirect) {
				1151	/*
				1152	* We need to get the dst entry for the
				1153	* default router instead
				1154	*/
				1155	dst_release(*dst);
				1156	memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
				1157	memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
				1158	*dst = ip6_route_output(net, sk, &fl_gw6);
				1159	err = (*dst)->error;
				1160	if (err)
				1161	goto out_err_release;
				1162	}
				1163	}
				1164	#endif
				1165	if (ipv6_addr_v4mapped(&fl6->saddr) &&
				1166	!(ipv6_addr_v4mapped(&fl6->daddr) \|\| ipv6_addr_any(&fl6->daddr))) {
				1167	err = -EAFNOSUPPORT;
				1168	goto out_err_release;
				1169	}
				1170
				1171	return 0;
				1172
				1173	out_err_release:
				1174	dst_release(*dst);
				1175	*dst = NULL;
				1176
				1177	if (err == -ENETUNREACH)
				1178	IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
				1179	return err;
				1180	}
				1181
				1182	/**
				1183	* ip6_dst_lookup - perform route lookup on flow
				1184	* @sk: socket which provides route info
				1185	* @dst: pointer to dst_entry * for result
				1186	* @fl6: flow to lookup
				1187	*
				1188	* This function performs a route lookup on the given flow.
				1189	*
				1190	* It returns zero on success, or a standard errno code on error.
				1191	*/
				1192	int ip6_dst_lookup(struct net net, struct sock sk, struct dst_entry **dst,
				1193	struct flowi6 *fl6)
				1194	{
				1195	*dst = NULL;
				1196	return ip6_dst_lookup_tail(net, sk, dst, fl6);
				1197	}
				1198	EXPORT_SYMBOL_GPL(ip6_dst_lookup);
				1199
				1200	/**
				1201	* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
				1202	* @sk: socket which provides route info
				1203	* @fl6: flow to lookup
				1204	* @final_dst: final destination address for ipsec lookup
				1205	*
				1206	* This function performs a route lookup on the given flow.
				1207	*
				1208	* It returns a valid dst pointer on success, or a pointer encoded
				1209	* error code.
				1210	*/
				1211	struct dst_entry ip6_dst_lookup_flow(struct net net, const struct sock sk, struct flowi6 fl6,
				1212	const struct in6_addr *final_dst)
				1213	{
				1214	struct dst_entry *dst = NULL;
				1215	int err;
				1216
				1217	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
				1218	if (err)
				1219	return ERR_PTR(err);
				1220	if (final_dst)
				1221	fl6->daddr = *final_dst;
				1222
				1223	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
				1224	}
				1225	EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
				1226
				1227	/**
				1228	* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
				1229	* @sk: socket which provides the dst cache and route info
				1230	* @fl6: flow to lookup
				1231	* @final_dst: final destination address for ipsec lookup
				1232	* @connected: whether @sk is connected or not
				1233	*
				1234	* This function performs a route lookup on the given flow with the
				1235	* possibility of using the cached route in the socket if it is valid.
				1236	* It will take the socket dst lock when operating on the dst cache.
				1237	* As a result, this function can only be used in process context.
				1238	*
				1239	* In addition, for a connected socket, cache the dst in the socket
				1240	* if the current cache is not valid.
				1241	*
				1242	* It returns a valid dst pointer on success, or a pointer encoded
				1243	* error code.
				1244	*/
				1245	struct dst_entry ip6_sk_dst_lookup_flow(struct sock sk, struct flowi6 *fl6,
				1246	const struct in6_addr *final_dst,
				1247	bool connected)
				1248	{
				1249	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
				1250
				1251	dst = ip6_sk_dst_check(sk, dst, fl6);
				1252	if (dst)
				1253	return dst;
				1254
				1255	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
				1256	if (connected && !IS_ERR(dst))
				1257	ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
				1258
				1259	return dst;
				1260	}
				1261	EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
				1262
				1263	static inline struct ipv6_opt_hdr ip6_opt_dup(struct ipv6_opt_hdr src,
				1264	gfp_t gfp)
				1265	{
				1266	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
				1267	}
				1268
				1269	static inline struct ipv6_rt_hdr ip6_rthdr_dup(struct ipv6_rt_hdr src,
				1270	gfp_t gfp)
				1271	{
				1272	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
				1273	}
				1274
				1275	static void ip6_append_data_mtu(unsigned int *mtu,
				1276	int *maxfraglen,
				1277	unsigned int fragheaderlen,
				1278	struct sk_buff *skb,
				1279	struct rt6_info *rt,
				1280	unsigned int orig_mtu)
				1281	{
				1282	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
				1283	if (!skb) {
				1284	/* first fragment, reserve header_len */
				1285	*mtu = orig_mtu - rt->dst.header_len;
				1286
				1287	} else {
				1288	/*
				1289	* this fragment is not first, the headers
				1290	* space is regarded as data space.
				1291	*/
				1292	*mtu = orig_mtu;
				1293	}
				1294	maxfraglen = ((mtu - fragheaderlen) & ~7)
				1295	+ fragheaderlen - sizeof(struct frag_hdr);
				1296	}
				1297	}
				1298
				1299	static int ip6_setup_cork(struct sock sk, struct inet_cork_full cork,
				1300	struct inet6_cork v6_cork, struct ipcm6_cookie ipc6,
				1301	struct rt6_info rt, struct flowi6 fl6)
				1302	{
				1303	struct ipv6_pinfo *np = inet6_sk(sk);
				1304	unsigned int mtu;
				1305	struct ipv6_txoptions *opt = ipc6->opt;
				1306
				1307	/*
				1308	* setup for corking
				1309	*/
				1310	if (opt) {
				1311	if (WARN_ON(v6_cork->opt))
				1312	return -EINVAL;
				1313
				1314	v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
				1315	if (unlikely(!v6_cork->opt))
				1316	return -ENOBUFS;
				1317
				1318	v6_cork->opt->tot_len = sizeof(*opt);
				1319	v6_cork->opt->opt_flen = opt->opt_flen;
				1320	v6_cork->opt->opt_nflen = opt->opt_nflen;
				1321
				1322	v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
				1323	sk->sk_allocation);
				1324	if (opt->dst0opt && !v6_cork->opt->dst0opt)
				1325	return -ENOBUFS;
				1326
				1327	v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
				1328	sk->sk_allocation);
				1329	if (opt->dst1opt && !v6_cork->opt->dst1opt)
				1330	return -ENOBUFS;
				1331
				1332	v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
				1333	sk->sk_allocation);
				1334	if (opt->hopopt && !v6_cork->opt->hopopt)
				1335	return -ENOBUFS;
				1336
				1337	v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
				1338	sk->sk_allocation);
				1339	if (opt->srcrt && !v6_cork->opt->srcrt)
				1340	return -ENOBUFS;
				1341
				1342	/* need source address above miyazawa*/
				1343	}
				1344	dst_hold(&rt->dst);
				1345	cork->base.dst = &rt->dst;
				1346	cork->fl.u.ip6 = *fl6;
				1347	v6_cork->hop_limit = ipc6->hlimit;
				1348	v6_cork->tclass = ipc6->tclass;
				1349	if (rt->dst.flags & DST_XFRM_TUNNEL)
				1350	mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
				1351	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
				1352	else
				1353	mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
				1354	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
				1355	if (np->frag_size < mtu) {
				1356	if (np->frag_size)
				1357	mtu = np->frag_size;
				1358	}
				1359	cork->base.fragsize = mtu;
				1360	cork->base.gso_size = ipc6->gso_size;
				1361	cork->base.tx_flags = 0;
				1362	cork->base.mark = ipc6->sockc.mark;
				1363	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
				1364
				1365	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
				1366	cork->base.flags \|= IPCORK_ALLFRAG;
				1367	cork->base.length = 0;
				1368
				1369	cork->base.transmit_time = ipc6->sockc.transmit_time;
				1370
				1371	return 0;
				1372	}
				1373
				1374	static int __ip6_append_data(struct sock *sk,
				1375	struct flowi6 *fl6,
				1376	struct sk_buff_head *queue,
				1377	struct inet_cork *cork,
				1378	struct inet6_cork *v6_cork,
				1379	struct page_frag *pfrag,
				1380	int getfrag(void from, char to, int offset,
				1381	int len, int odd, struct sk_buff *skb),
				1382	void *from, int length, int transhdrlen,
				1383	unsigned int flags, struct ipcm6_cookie *ipc6)
				1384	{
				1385	struct sk_buff skb, skb_prev = NULL;
				1386	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
				1387	struct ubuf_info *uarg = NULL;
				1388	int exthdrlen = 0;
				1389	int dst_exthdrlen = 0;
				1390	int hh_len;
				1391	int copy;
				1392	int err;
				1393	int offset = 0;
				1394	u32 tskey = 0;
				1395	struct rt6_info rt = (struct rt6_info )cork->dst;
				1396	struct ipv6_txoptions *opt = v6_cork->opt;
				1397	int csummode = CHECKSUM_NONE;
				1398	unsigned int maxnonfragsize, headersize;
				1399	unsigned int wmem_alloc_delta = 0;
				1400	bool paged, extra_uref = false;
				1401
				1402	skb = skb_peek_tail(queue);
				1403	if (!skb) {
				1404	exthdrlen = opt ? opt->opt_flen : 0;
				1405	dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
				1406	}
				1407
				1408	paged = !!cork->gso_size;
				1409	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
				1410	orig_mtu = mtu;
				1411
				1412	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
				1413	sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
				1414	tskey = sk->sk_tskey++;
				1415
				1416	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
				1417
				1418	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
				1419	(opt ? opt->opt_nflen : 0);
				1420
				1421	headersize = sizeof(struct ipv6hdr) +
				1422	(opt ? opt->opt_flen + opt->opt_nflen : 0) +
				1423	(dst_allfrag(&rt->dst) ?
				1424	sizeof(struct frag_hdr) : 0) +
				1425	rt->rt6i_nfheader_len;
				1426
				1427	if (mtu <= fragheaderlen \|\|
				1428	((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
				1429	goto emsgsize;
				1430
				1431	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
				1432	sizeof(struct frag_hdr);
				1433
				1434	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
				1435	* the first fragment
				1436	*/
				1437	if (headersize + transhdrlen > mtu)
				1438	goto emsgsize;
				1439
				1440	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
				1441	(sk->sk_protocol == IPPROTO_UDP \|\|
				1442	sk->sk_protocol == IPPROTO_RAW)) {
				1443	ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
				1444	sizeof(struct ipv6hdr));
				1445	goto emsgsize;
				1446	}
				1447
				1448	if (ip6_sk_ignore_df(sk))
				1449	maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
				1450	else
				1451	maxnonfragsize = mtu;
				1452
				1453	if (cork->length + length > maxnonfragsize - headersize) {
				1454	emsgsize:
				1455	pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
				1456	ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
				1457	return -EMSGSIZE;
				1458	}
				1459
				1460	/* CHECKSUM_PARTIAL only with no extension headers and when
				1461	* we are not going to fragment
				1462	*/
				1463	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
				1464	headersize == sizeof(struct ipv6hdr) &&
				1465	length <= mtu - headersize &&
				1466	(!(flags & MSG_MORE) \|\| cork->gso_size) &&
				1467	rt->dst.dev->features & (NETIF_F_IPV6_CSUM \| NETIF_F_HW_CSUM))
				1468	csummode = CHECKSUM_PARTIAL;
				1469
				1470	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
				1471	uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
				1472	if (!uarg)
				1473	return -ENOBUFS;
				1474	extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
				1475	if (rt->dst.dev->features & NETIF_F_SG &&
				1476	csummode == CHECKSUM_PARTIAL) {
				1477	paged = true;
				1478	} else {
				1479	uarg->zerocopy = 0;
				1480	skb_zcopy_set(skb, uarg, &extra_uref);
				1481	}
				1482	}
				1483
				1484	/*
				1485	* Let's try using as much space as possible.
				1486	* Use MTU if total length of the message fits into the MTU.
				1487	* Otherwise, we need to reserve fragment header and
				1488	* fragment alignment (= 8-15 octects, in total).
				1489	*
				1490	* Note that we may need to "move" the data from the tail of
				1491	* of the buffer to the new fragment when we split
				1492	* the message.
				1493	*
				1494	* FIXME: It may be fragmented into multiple chunks
				1495	* at once if non-fragmentable extension headers
				1496	* are too large.
				1497	* --yoshfuji
				1498	*/
				1499
				1500	cork->length += length;
				1501	if (!skb)
				1502	goto alloc_new_skb;
				1503
				1504	while (length > 0) {
				1505	/* Check if the remaining data fits into current packet. */
				1506	copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
				1507	if (copy < length)
				1508	copy = maxfraglen - skb->len;
				1509
				1510	if (copy <= 0) {
				1511	char *data;
				1512	unsigned int datalen;
				1513	unsigned int fraglen;
				1514	unsigned int fraggap;
				1515	unsigned int alloclen, alloc_extra;
				1516	unsigned int pagedlen;
				1517	alloc_new_skb:
				1518	/* There's no room in the current skb */
				1519	if (skb)
				1520	fraggap = skb->len - maxfraglen;
				1521	else
				1522	fraggap = 0;
				1523	/* update mtu and maxfraglen if necessary */
				1524	if (!skb \|\| !skb_prev)
				1525	ip6_append_data_mtu(&mtu, &maxfraglen,
				1526	fragheaderlen, skb, rt,
				1527	orig_mtu);
				1528
				1529	skb_prev = skb;
				1530
				1531	/*
				1532	* If remaining data exceeds the mtu,
				1533	* we know we need more fragment(s).
				1534	*/
				1535	datalen = length + fraggap;
				1536
				1537	if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
				1538	datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
				1539	fraglen = datalen + fragheaderlen;
				1540	pagedlen = 0;
				1541
				1542	alloc_extra = hh_len;
				1543	alloc_extra += dst_exthdrlen;
				1544	alloc_extra += rt->dst.trailer_len;
				1545
				1546	/* We just reserve space for fragment header.
				1547	* Note: this may be overallocation if the message
				1548	* (without MSG_MORE) fits into the MTU.
				1549	*/
				1550	alloc_extra += sizeof(struct frag_hdr);
				1551
				1552	if ((flags & MSG_MORE) &&
				1553	!(rt->dst.dev->features&NETIF_F_SG))
				1554	alloclen = mtu;
				1555	else if (!paged &&
				1556	(fraglen + alloc_extra < SKB_MAX_ALLOC \|\|
				1557	!(rt->dst.dev->features & NETIF_F_SG)))
				1558	alloclen = fraglen;
				1559	else {
				1560	alloclen = min_t(int, fraglen, MAX_HEADER);
				1561	pagedlen = fraglen - alloclen;
				1562	}
				1563	alloclen += alloc_extra;
				1564
				1565	if (datalen != length + fraggap) {
				1566	/*
				1567	* this is not the last fragment, the trailer
				1568	* space is regarded as data space.
				1569	*/
				1570	datalen += rt->dst.trailer_len;
				1571	}
				1572
				1573	fraglen = datalen + fragheaderlen;
				1574
				1575	copy = datalen - transhdrlen - fraggap - pagedlen;
				1576	if (copy < 0) {
				1577	err = -EINVAL;
				1578	goto error;
				1579	}
				1580	if (transhdrlen) {
				1581	skb = sock_alloc_send_skb(sk, alloclen,
				1582	(flags & MSG_DONTWAIT), &err);
				1583	} else {
				1584	skb = NULL;
				1585	if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
				1586	2 * sk->sk_sndbuf)
				1587	skb = alloc_skb(alloclen,
				1588	sk->sk_allocation);
				1589	if (unlikely(!skb))
				1590	err = -ENOBUFS;
				1591	}
				1592	if (!skb)
				1593	goto error;
				1594	/*
				1595	* Fill in the control structures
				1596	*/
				1597	skb->protocol = htons(ETH_P_IPV6);
				1598	skb->ip_summed = csummode;
				1599	skb->csum = 0;
				1600	/* reserve for fragmentation and ipsec header */
				1601	skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
				1602	dst_exthdrlen);
				1603
				1604	/*
				1605	* Find where to start putting bytes
				1606	*/
				1607	data = skb_put(skb, fraglen - pagedlen);
				1608	skb_set_network_header(skb, exthdrlen);
				1609	data += fragheaderlen;
				1610	skb->transport_header = (skb->network_header +
				1611	fragheaderlen);
				1612	if (fraggap) {
				1613	skb->csum = skb_copy_and_csum_bits(
				1614	skb_prev, maxfraglen,
				1615	data + transhdrlen, fraggap, 0);
				1616	skb_prev->csum = csum_sub(skb_prev->csum,
				1617	skb->csum);
				1618	data += fraggap;
				1619	pskb_trim_unique(skb_prev, maxfraglen);
				1620	}
				1621	if (copy > 0 &&
				1622	getfrag(from, data + transhdrlen, offset,
				1623	copy, fraggap, skb) < 0) {
				1624	err = -EFAULT;
				1625	kfree_skb(skb);
				1626	goto error;
				1627	}
				1628
				1629	offset += copy;
				1630	length -= copy + transhdrlen;
				1631	transhdrlen = 0;
				1632	exthdrlen = 0;
				1633	dst_exthdrlen = 0;
				1634
				1635	/* Only the initial fragment is time stamped */
				1636	skb_shinfo(skb)->tx_flags = cork->tx_flags;
				1637	cork->tx_flags = 0;
				1638	skb_shinfo(skb)->tskey = tskey;
				1639	tskey = 0;
				1640	skb_zcopy_set(skb, uarg, &extra_uref);
				1641
				1642	if ((flags & MSG_CONFIRM) && !skb_prev)
				1643	skb_set_dst_pending_confirm(skb, 1);
				1644
				1645	/*
				1646	* Put the packet on the pending queue
				1647	*/
				1648	if (!skb->destructor) {
				1649	skb->destructor = sock_wfree;
				1650	skb->sk = sk;
				1651	wmem_alloc_delta += skb->truesize;
				1652	}
				1653	__skb_queue_tail(queue, skb);
				1654	continue;
				1655	}
				1656
				1657	if (copy > length)
				1658	copy = length;
				1659
				1660	if (!(rt->dst.dev->features&NETIF_F_SG) &&
				1661	skb_tailroom(skb) >= copy) {
				1662	unsigned int off;
				1663
				1664	off = skb->len;
				1665	if (getfrag(from, skb_put(skb, copy),
				1666	offset, copy, off, skb) < 0) {
				1667	__skb_trim(skb, off);
				1668	err = -EFAULT;
				1669	goto error;
				1670	}
				1671	} else if (!uarg \|\| !uarg->zerocopy) {
				1672	int i = skb_shinfo(skb)->nr_frags;
				1673
				1674	err = -ENOMEM;
				1675	if (!sk_page_frag_refill(sk, pfrag))
				1676	goto error;
				1677
				1678	if (!skb_can_coalesce(skb, i, pfrag->page,
				1679	pfrag->offset)) {
				1680	err = -EMSGSIZE;
				1681	if (i == MAX_SKB_FRAGS)
				1682	goto error;
				1683
				1684	__skb_fill_page_desc(skb, i, pfrag->page,
				1685	pfrag->offset, 0);
				1686	skb_shinfo(skb)->nr_frags = ++i;
				1687	get_page(pfrag->page);
				1688	}
				1689	copy = min_t(int, copy, pfrag->size - pfrag->offset);
				1690	if (getfrag(from,
				1691	page_address(pfrag->page) + pfrag->offset,
				1692	offset, copy, skb->len, skb) < 0)
				1693	goto error_efault;
				1694
				1695	pfrag->offset += copy;
				1696	skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
				1697	skb->len += copy;
				1698	skb->data_len += copy;
				1699	skb->truesize += copy;
				1700	wmem_alloc_delta += copy;
				1701	} else {
				1702	err = skb_zerocopy_iter_dgram(skb, from, copy);
				1703	if (err < 0)
				1704	goto error;
				1705	}
				1706	offset += copy;
				1707	length -= copy;
				1708	}
				1709
				1710	if (wmem_alloc_delta)
				1711	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
				1712	return 0;
				1713
				1714	error_efault:
				1715	err = -EFAULT;
				1716	error:
				1717	if (uarg)
				1718	sock_zerocopy_put_abort(uarg, extra_uref);
				1719	cork->length -= length;
				1720	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
				1721	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
				1722	return err;
				1723	}
				1724
				1725	int ip6_append_data(struct sock *sk,
				1726	int getfrag(void from, char to, int offset, int len,
				1727	int odd, struct sk_buff *skb),
				1728	void *from, int length, int transhdrlen,
				1729	struct ipcm6_cookie ipc6, struct flowi6 fl6,
				1730	struct rt6_info *rt, unsigned int flags)
				1731	{
				1732	struct inet_sock *inet = inet_sk(sk);
				1733	struct ipv6_pinfo *np = inet6_sk(sk);
				1734	int exthdrlen;
				1735	int err;
				1736
				1737	if (flags&MSG_PROBE)
				1738	return 0;
				1739	if (skb_queue_empty(&sk->sk_write_queue)) {
				1740	/*
				1741	* setup for corking
				1742	*/
				1743	err = ip6_setup_cork(sk, &inet->cork, &np->cork,
				1744	ipc6, rt, fl6);
				1745	if (err)
				1746	return err;
				1747
				1748	exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
				1749	length += exthdrlen;
				1750	transhdrlen += exthdrlen;
				1751	} else {
				1752	fl6 = &inet->cork.fl.u.ip6;
				1753	transhdrlen = 0;
				1754	}
				1755
				1756	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
				1757	&np->cork, sk_page_frag(sk), getfrag,
				1758	from, length, transhdrlen, flags, ipc6);
				1759	}
				1760	EXPORT_SYMBOL_GPL(ip6_append_data);
				1761
				1762	static void ip6_cork_release(struct inet_cork_full *cork,
				1763	struct inet6_cork *v6_cork)
				1764	{
				1765	if (v6_cork->opt) {
				1766	kfree(v6_cork->opt->dst0opt);
				1767	kfree(v6_cork->opt->dst1opt);
				1768	kfree(v6_cork->opt->hopopt);
				1769	kfree(v6_cork->opt->srcrt);
				1770	kfree(v6_cork->opt);
				1771	v6_cork->opt = NULL;
				1772	}
				1773
				1774	if (cork->base.dst) {
				1775	dst_release(cork->base.dst);
				1776	cork->base.dst = NULL;
				1777	cork->base.flags &= ~IPCORK_ALLFRAG;
				1778	}
				1779	memset(&cork->fl, 0, sizeof(cork->fl));
				1780	}
				1781
				1782	struct sk_buff __ip6_make_skb(struct sock sk,
				1783	struct sk_buff_head *queue,
				1784	struct inet_cork_full *cork,
				1785	struct inet6_cork *v6_cork)
				1786	{
				1787	struct sk_buff skb, tmp_skb;
				1788	struct sk_buff **tail_skb;
				1789	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
				1790	struct ipv6_pinfo *np = inet6_sk(sk);
				1791	struct net *net = sock_net(sk);
				1792	struct ipv6hdr *hdr;
				1793	struct ipv6_txoptions *opt = v6_cork->opt;
				1794	struct rt6_info rt = (struct rt6_info )cork->base.dst;
				1795	struct flowi6 *fl6 = &cork->fl.u.ip6;
				1796	unsigned char proto = fl6->flowi6_proto;
				1797
				1798	skb = __skb_dequeue(queue);
				1799	if (!skb)
				1800	goto out;
				1801	tail_skb = &(skb_shinfo(skb)->frag_list);
				1802
				1803	/* move skb->data to ip header from ext header */
				1804	if (skb->data < skb_network_header(skb))
				1805	__skb_pull(skb, skb_network_offset(skb));
				1806	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
				1807	__skb_pull(tmp_skb, skb_network_header_len(skb));
				1808	*tail_skb = tmp_skb;
				1809	tail_skb = &(tmp_skb->next);
				1810	skb->len += tmp_skb->len;
				1811	skb->data_len += tmp_skb->len;
				1812	skb->truesize += tmp_skb->truesize;
				1813	tmp_skb->destructor = NULL;
				1814	tmp_skb->sk = NULL;
				1815	}
				1816
				1817	/* Allow local fragmentation. */
				1818	skb->ignore_df = ip6_sk_ignore_df(sk);
				1819
				1820	*final_dst = fl6->daddr;
				1821	__skb_pull(skb, skb_network_header_len(skb));
				1822	if (opt && opt->opt_flen)
				1823	ipv6_push_frag_opts(skb, opt, &proto);
				1824	if (opt && opt->opt_nflen)
				1825	ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
				1826
				1827	skb_push(skb, sizeof(struct ipv6hdr));
				1828	skb_reset_network_header(skb);
				1829	hdr = ipv6_hdr(skb);
				1830
				1831	ip6_flow_hdr(hdr, v6_cork->tclass,
				1832	ip6_make_flowlabel(net, skb, fl6->flowlabel,
				1833	ip6_autoflowlabel(net, np), fl6));
				1834	hdr->hop_limit = v6_cork->hop_limit;
				1835	hdr->nexthdr = proto;
				1836	hdr->saddr = fl6->saddr;
				1837	hdr->daddr = *final_dst;
				1838
				1839	skb->priority = sk->sk_priority;
				1840	skb->mark = cork->base.mark;
				1841
				1842	skb->tstamp = cork->base.transmit_time;
				1843
				1844	skb_dst_set(skb, dst_clone(&rt->dst));
				1845	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
				1846	if (proto == IPPROTO_ICMPV6) {
				1847	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
				1848	u8 icmp6_type;
				1849
				1850	if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
				1851	icmp6_type = fl6->fl6_icmp_type;
				1852	else
				1853	icmp6_type = icmp6_hdr(skb)->icmp6_type;
				1854	ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
				1855	ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
				1856	}
				1857
				1858	ip6_cork_release(cork, v6_cork);
				1859	out:
				1860	return skb;
				1861	}
				1862
				1863	int ip6_send_skb(struct sk_buff *skb)
				1864	{
				1865	struct net *net = sock_net(skb->sk);
				1866	struct rt6_info rt = (struct rt6_info )skb_dst(skb);
				1867	int err;
				1868
				1869	rcu_read_lock();
				1870	err = ip6_local_out(net, skb->sk, skb);
				1871	if (err) {
				1872	if (err > 0)
				1873	err = net_xmit_errno(err);
				1874	if (err)
				1875	IP6_INC_STATS(net, rt->rt6i_idev,
				1876	IPSTATS_MIB_OUTDISCARDS);
				1877	}
				1878
				1879	rcu_read_unlock();
				1880	return err;
				1881	}
				1882
				1883	int ip6_push_pending_frames(struct sock *sk)
				1884	{
				1885	struct sk_buff *skb;
				1886
				1887	skb = ip6_finish_skb(sk);
				1888	if (!skb)
				1889	return 0;
				1890
				1891	return ip6_send_skb(skb);
				1892	}
				1893	EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
				1894
				1895	static void __ip6_flush_pending_frames(struct sock *sk,
				1896	struct sk_buff_head *queue,
				1897	struct inet_cork_full *cork,
				1898	struct inet6_cork *v6_cork)
				1899	{
				1900	struct sk_buff *skb;
				1901
				1902	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
				1903	if (skb_dst(skb))
				1904	IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
				1905	IPSTATS_MIB_OUTDISCARDS);
				1906	kfree_skb(skb);
				1907	}
				1908
				1909	ip6_cork_release(cork, v6_cork);
				1910	}
				1911
				1912	void ip6_flush_pending_frames(struct sock *sk)
				1913	{
				1914	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
				1915	&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
				1916	}
				1917	EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
				1918
				1919	struct sk_buff ip6_make_skb(struct sock sk,
				1920	int getfrag(void from, char to, int offset,
				1921	int len, int odd, struct sk_buff *skb),
				1922	void *from, int length, int transhdrlen,
				1923	struct ipcm6_cookie ipc6, struct flowi6 fl6,
				1924	struct rt6_info *rt, unsigned int flags,
				1925	struct inet_cork_full *cork)
				1926	{
				1927	struct inet6_cork v6_cork;
				1928	struct sk_buff_head queue;
				1929	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
				1930	int err;
				1931
				1932	if (flags & MSG_PROBE)
				1933	return NULL;
				1934
				1935	__skb_queue_head_init(&queue);
				1936
				1937	cork->base.flags = 0;
				1938	cork->base.addr = 0;
				1939	cork->base.opt = NULL;
				1940	cork->base.dst = NULL;
				1941	v6_cork.opt = NULL;
				1942	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
				1943	if (err) {
				1944	ip6_cork_release(cork, &v6_cork);
				1945	return ERR_PTR(err);
				1946	}
				1947	if (ipc6->dontfrag < 0)
				1948	ipc6->dontfrag = inet6_sk(sk)->dontfrag;
				1949
				1950	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
				1951	&current->task_frag, getfrag, from,
				1952	length + exthdrlen, transhdrlen + exthdrlen,
				1953	flags, ipc6);
				1954	if (err) {
				1955	__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
				1956	return ERR_PTR(err);
				1957	}
				1958
				1959	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
				1960	}