Blame - src/kernel/linux/v4.19/net/ipv6/ip6_output.c - T800

blob: 9d8581bf7f84645845651df1637b541801dd5061 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* IPv6 output functions
				3	* Linux INET6 implementation
				4	*
				5	* Authors:
				6	* Pedro Roque <roque@di.fc.ul.pt>
				7	*
				8	* Based on linux/net/ipv4/ip_output.c
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public License
				12	* as published by the Free Software Foundation; either version
				13	* 2 of the License, or (at your option) any later version.
				14	*
				15	* Changes:
				16	* A.N.Kuznetsov : airthmetics in fragmentation.
				17	* extension headers are implemented.
				18	* route changes now work.
				19	* ip6_forward does not confuse sniffers.
				20	* etc.
				21	*
				22	* H. von Brand : Added missing #include <linux/string.h>
				23	* Imran Patel : frag id should be in NBO
				24	* Kazunori MIYAZAWA @USAGI
				25	* : add ip6_append_data and related functions
				26	* for datagram xmit
				27	*/
				28
				29	#include <linux/errno.h>
				30	#include <linux/kernel.h>
				31	#include <linux/string.h>
				32	#include <linux/socket.h>
				33	#include <linux/net.h>
				34	#include <linux/netdevice.h>
				35	#include <linux/if_arp.h>
				36	#include <linux/in6.h>
				37	#include <linux/tcp.h>
				38	#include <linux/route.h>
				39	#include <linux/module.h>
				40	#include <linux/slab.h>
				41
				42	#include <linux/bpf-cgroup.h>
				43	#include <linux/netfilter.h>
				44	#include <linux/netfilter_ipv6.h>
				45
				46	#include <net/sock.h>
				47	#include <net/snmp.h>
				48
				49	#include <net/ipv6.h>
				50	#include <net/ndisc.h>
				51	#include <net/protocol.h>
				52	#include <net/ip6_route.h>
				53	#include <net/addrconf.h>
				54	#include <net/rawv6.h>
				55	#include <net/icmp.h>
				56	#include <net/xfrm.h>
				57	#include <net/checksum.h>
				58	#include <linux/mroute6.h>
				59	#include <net/l3mdev.h>
				60	#include <net/lwtunnel.h>
				61
				62	static int ip6_finish_output2(struct net net, struct sock sk, struct sk_buff *skb)
				63	{
				64	struct dst_entry *dst = skb_dst(skb);
				65	struct net_device *dev = dst->dev;
				66	struct neighbour *neigh;
				67	struct in6_addr *nexthop;
				68	int ret;
				69
				70	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
				71	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
				72
				73	if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
				74	((mroute6_is_socket(net, skb) &&
				75	!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) \|\|
				76	ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
				77	&ipv6_hdr(skb)->saddr))) {
				78	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
				79
				80	/* Do not check for IFF_ALLMULTI; multicast routing
				81	is not supported in any case.
				82	*/
				83	if (newskb)
				84	NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
				85	net, sk, newskb, NULL, newskb->dev,
				86	dev_loopback_xmit);
				87
				88	if (ipv6_hdr(skb)->hop_limit == 0) {
				89	IP6_INC_STATS(net, idev,
				90	IPSTATS_MIB_OUTDISCARDS);
				91	kfree_skb(skb);
				92	return 0;
				93	}
				94	}
				95
				96	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
				97
				98	if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
				99	IPV6_ADDR_SCOPE_NODELOCAL &&
				100	!(dev->flags & IFF_LOOPBACK)) {
				101	kfree_skb(skb);
				102	return 0;
				103	}
				104	}
				105
				106	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
				107	int res = lwtunnel_xmit(skb);
				108
				109	if (res < 0 \|\| res == LWTUNNEL_XMIT_DONE)
				110	return res;
				111	}
				112
				113	rcu_read_lock_bh();
				114	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
				115	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
				116	if (unlikely(!neigh))
				117	neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
				118	if (!IS_ERR(neigh)) {
				119	sock_confirm_neigh(skb, neigh);
				120	ret = neigh_output(neigh, skb);
				121	rcu_read_unlock_bh();
				122	return ret;
				123	}
				124	rcu_read_unlock_bh();
				125
				126	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
				127	kfree_skb(skb);
				128	return -EINVAL;
				129	}
				130
				131	static int ip6_finish_output(struct net net, struct sock sk, struct sk_buff *skb)
				132	{
				133	int ret;
				134
				135	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
				136	if (ret) {
				137	kfree_skb(skb);
				138	return ret;
				139	}
				140
				141	#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
				142	/* Policy lookup after SNAT yielded a new policy */
				143	if (skb_dst(skb)->xfrm) {
				144	IPCB(skb)->flags \|= IPSKB_REROUTED;
				145	return dst_output(net, sk, skb);
				146	}
				147	#endif
				148
				149	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) \|\|
				150	dst_allfrag(skb_dst(skb)) \|\|
				151	(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
				152	return ip6_fragment(net, sk, skb, ip6_finish_output2);
				153	else
				154	return ip6_finish_output2(net, sk, skb);
				155	}
				156
				157	int ip6_output(struct net net, struct sock sk, struct sk_buff *skb)
				158	{
				159	struct net_device *dev = skb_dst(skb)->dev;
				160	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
				161
				162	skb->protocol = htons(ETH_P_IPV6);
				163	skb->dev = dev;
				164
				165	if (unlikely(idev->cnf.disable_ipv6)) {
				166	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
				167	kfree_skb(skb);
				168	return 0;
				169	}
				170
				171	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
				172	net, sk, skb, NULL, dev,
				173	ip6_finish_output,
				174	!(IP6CB(skb)->flags & IP6SKB_REROUTED));
				175	}
				176
				177	bool ip6_autoflowlabel(struct net net, const struct ipv6_pinfo np)
				178	{
				179	if (!np->autoflowlabel_set)
				180	return ip6_default_np_autolabel(net);
				181	else
				182	return np->autoflowlabel;
				183	}
				184
				185	/*
				186	* xmit an sk_buff (used by TCP, SCTP and DCCP)
				187	* Note : socket lock is not held for SYNACK packets, but might be modified
				188	* by calls to skb_set_owner_w() and ipv6_local_error(),
				189	* which are using proper atomic operations or spinlocks.
				190	*/
				191	int ip6_xmit(const struct sock sk, struct sk_buff skb, struct flowi6 *fl6,
				192	__u32 mark, struct ipv6_txoptions *opt, int tclass)
				193	{
				194	struct net *net = sock_net(sk);
				195	const struct ipv6_pinfo *np = inet6_sk(sk);
				196	struct in6_addr *first_hop = &fl6->daddr;
				197	struct dst_entry *dst = skb_dst(skb);
				198	unsigned int head_room;
				199	struct ipv6hdr *hdr;
				200	u8 proto = fl6->flowi6_proto;
				201	int seg_len = skb->len;
				202	int hlimit = -1;
				203	u32 mtu;
				204
				205	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
				206	if (opt)
				207	head_room += opt->opt_nflen + opt->opt_flen;
				208
				209	if (unlikely(skb_headroom(skb) < head_room)) {
				210	struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
				211	if (!skb2) {
				212	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				213	IPSTATS_MIB_OUTDISCARDS);
				214	kfree_skb(skb);
				215	return -ENOBUFS;
				216	}
				217	if (skb->sk)
				218	skb_set_owner_w(skb2, skb->sk);
				219	consume_skb(skb);
				220	skb = skb2;
				221	}
				222
				223	if (opt) {
				224	seg_len += opt->opt_nflen + opt->opt_flen;
				225
				226	if (opt->opt_flen)
				227	ipv6_push_frag_opts(skb, opt, &proto);
				228
				229	if (opt->opt_nflen)
				230	ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
				231	&fl6->saddr);
				232	}
				233
				234	skb_push(skb, sizeof(struct ipv6hdr));
				235	skb_reset_network_header(skb);
				236	hdr = ipv6_hdr(skb);
				237
				238	/*
				239	* Fill in the IPv6 header
				240	*/
				241	if (np)
				242	hlimit = np->hop_limit;
				243	if (hlimit < 0)
				244	hlimit = ip6_dst_hoplimit(dst);
				245
				246	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
				247	ip6_autoflowlabel(net, np), fl6));
				248
				249	hdr->payload_len = htons(seg_len);
				250	hdr->nexthdr = proto;
				251	hdr->hop_limit = hlimit;
				252
				253	hdr->saddr = fl6->saddr;
				254	hdr->daddr = *first_hop;
				255
				256	skb->protocol = htons(ETH_P_IPV6);
				257	skb->priority = sk->sk_priority;
				258	skb->mark = mark;
				259
				260	mtu = dst_mtu(dst);
				261	if ((skb->len <= mtu) \|\| skb->ignore_df \|\| skb_is_gso(skb)) {
				262	IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
				263	IPSTATS_MIB_OUT, skb->len);
				264
				265	/* if egress device is enslaved to an L3 master device pass the
				266	* skb to its handler for processing
				267	*/
				268	skb = l3mdev_ip6_out((struct sock *)sk, skb);
				269	if (unlikely(!skb))
				270	return 0;
				271
				272	/* hooks should never assume socket lock is held.
				273	* we promote our socket to non const
				274	*/
				275	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
				276	net, (struct sock *)sk, skb, NULL, dst->dev,
				277	dst_output);
				278	}
				279
				280	skb->dev = dst->dev;
				281	/* ipv6_local_error() does not require socket lock,
				282	* we promote our socket to non const
				283	*/
				284	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
				285
				286	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
				287	kfree_skb(skb);
				288	return -EMSGSIZE;
				289	}
				290	EXPORT_SYMBOL(ip6_xmit);
				291
				292	static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
				293	{
				294	struct ip6_ra_chain *ra;
				295	struct sock *last = NULL;
				296
				297	read_lock(&ip6_ra_lock);
				298	for (ra = ip6_ra_chain; ra; ra = ra->next) {
				299	struct sock *sk = ra->sk;
				300	if (sk && ra->sel == sel &&
				301	(!sk->sk_bound_dev_if \|\|
				302	sk->sk_bound_dev_if == skb->dev->ifindex)) {
				303	if (last) {
				304	struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				305	if (skb2)
				306	rawv6_rcv(last, skb2);
				307	}
				308	last = sk;
				309	}
				310	}
				311
				312	if (last) {
				313	rawv6_rcv(last, skb);
				314	read_unlock(&ip6_ra_lock);
				315	return 1;
				316	}
				317	read_unlock(&ip6_ra_lock);
				318	return 0;
				319	}
				320
				321	static int ip6_forward_proxy_check(struct sk_buff *skb)
				322	{
				323	struct ipv6hdr *hdr = ipv6_hdr(skb);
				324	u8 nexthdr = hdr->nexthdr;
				325	__be16 frag_off;
				326	int offset;
				327
				328	if (ipv6_ext_hdr(nexthdr)) {
				329	offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
				330	if (offset < 0)
				331	return 0;
				332	} else
				333	offset = sizeof(struct ipv6hdr);
				334
				335	if (nexthdr == IPPROTO_ICMPV6) {
				336	struct icmp6hdr *icmp6;
				337
				338	if (!pskb_may_pull(skb, (skb_network_header(skb) +
				339	offset + 1 - skb->data)))
				340	return 0;
				341
				342	icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
				343
				344	switch (icmp6->icmp6_type) {
				345	case NDISC_ROUTER_SOLICITATION:
				346	case NDISC_ROUTER_ADVERTISEMENT:
				347	case NDISC_NEIGHBOUR_SOLICITATION:
				348	case NDISC_NEIGHBOUR_ADVERTISEMENT:
				349	case NDISC_REDIRECT:
				350	/* For reaction involving unicast neighbor discovery
				351	* message destined to the proxied address, pass it to
				352	* input function.
				353	*/
				354	return 1;
				355	default:
				356	break;
				357	}
				358	}
				359
				360	/*
				361	* The proxying router can't forward traffic sent to a link-local
				362	* address, so signal the sender and discard the packet. This
				363	* behavior is clarified by the MIPv6 specification.
				364	*/
				365	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
				366	dst_link_failure(skb);
				367	return -1;
				368	}
				369
				370	return 0;
				371	}
				372
				373	static inline int ip6_forward_finish(struct net net, struct sock sk,
				374	struct sk_buff *skb)
				375	{
				376	struct dst_entry *dst = skb_dst(skb);
				377
				378	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
				379	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
				380
				381	skb->tstamp = 0;
				382	return dst_output(net, sk, skb);
				383	}
				384
				385	static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
				386	{
				387	if (skb->len <= mtu)
				388	return false;
				389
				390	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
				391	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
				392	return true;
				393
				394	if (skb->ignore_df)
				395	return false;
				396
				397	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
				398	return false;
				399
				400	return true;
				401	}
				402
				403	int ip6_forward(struct sk_buff *skb)
				404	{
				405	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
				406	struct dst_entry *dst = skb_dst(skb);
				407	struct ipv6hdr *hdr = ipv6_hdr(skb);
				408	struct inet6_skb_parm *opt = IP6CB(skb);
				409	struct net *net = dev_net(dst->dev);
				410	u32 mtu;
				411
				412	if (net->ipv6.devconf_all->forwarding == 0)
				413	goto error;
				414
				415	if (skb->pkt_type != PACKET_HOST)
				416	goto drop;
				417
				418	if (unlikely(skb->sk))
				419	goto drop;
				420
				421	if (skb_warn_if_lro(skb))
				422	goto drop;
				423
				424	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
				425	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				426	goto drop;
				427	}
				428
				429	skb_forward_csum(skb);
				430
				431	/*
				432	* We DO NOT make any processing on
				433	* RA packets, pushing them to user level AS IS
				434	* without ane WARRANTY that application will be able
				435	* to interpret them. The reason is that we
				436	* cannot make anything clever here.
				437	*
				438	* We are not end-node, so that if packet contains
				439	* AH/ESP, we cannot make anything.
				440	* Defragmentation also would be mistake, RA packets
				441	* cannot be fragmented, because there is no warranty
				442	* that different fragments will go along one path. --ANK
				443	*/
				444	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
				445	if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
				446	return 0;
				447	}
				448
				449	/*
				450	* check and decrement ttl
				451	*/
				452	if (hdr->hop_limit <= 1) {
				453	/* Force OUTPUT device used as source address */
				454	skb->dev = dst->dev;
				455	icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
				456	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
				457
				458	kfree_skb(skb);
				459	return -ETIMEDOUT;
				460	}
				461
				462	/* XXX: idev->cnf.proxy_ndp? */
				463	if (net->ipv6.devconf_all->proxy_ndp &&
				464	pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
				465	int proxied = ip6_forward_proxy_check(skb);
				466	if (proxied > 0)
				467	return ip6_input(skb);
				468	else if (proxied < 0) {
				469	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				470	goto drop;
				471	}
				472	}
				473
				474	if (!xfrm6_route_forward(skb)) {
				475	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
				476	goto drop;
				477	}
				478	dst = skb_dst(skb);
				479
				480	/* IPv6 specs say nothing about it, but it is clear that we cannot
				481	send redirects to source routed frames.
				482	We don't send redirects to frames decapsulated from IPsec.
				483	*/
				484	if (IP6CB(skb)->iif == dst->dev->ifindex &&
				485	opt->srcrt == 0 && !skb_sec_path(skb)) {
				486	struct in6_addr *target = NULL;
				487	struct inet_peer *peer;
				488	struct rt6_info *rt;
				489
				490	/*
				491	* incoming and outgoing devices are the same
				492	* send a redirect.
				493	*/
				494
				495	rt = (struct rt6_info *) dst;
				496	if (rt->rt6i_flags & RTF_GATEWAY)
				497	target = &rt->rt6i_gateway;
				498	else
				499	target = &hdr->daddr;
				500
				501	peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
				502
				503	/* Limit redirects both by destination (here)
				504	and by source (inside ndisc_send_redirect)
				505	*/
				506	if (inet_peer_xrlim_allow(peer, 1*HZ))
				507	ndisc_send_redirect(skb, target);
				508	if (peer)
				509	inet_putpeer(peer);
				510	} else {
				511	int addrtype = ipv6_addr_type(&hdr->saddr);
				512
				513	/* This check is security critical. */
				514	if (addrtype == IPV6_ADDR_ANY \|\|
				515	addrtype & (IPV6_ADDR_MULTICAST \| IPV6_ADDR_LOOPBACK))
				516	goto error;
				517	if (addrtype & IPV6_ADDR_LINKLOCAL) {
				518	icmpv6_send(skb, ICMPV6_DEST_UNREACH,
				519	ICMPV6_NOT_NEIGHBOUR, 0);
				520	goto error;
				521	}
				522	}
				523
				524	mtu = ip6_dst_mtu_forward(dst);
				525	if (mtu < IPV6_MIN_MTU)
				526	mtu = IPV6_MIN_MTU;
				527
				528	if (ip6_pkt_too_big(skb, mtu)) {
				529	/* Again, force OUTPUT device used as source address */
				530	skb->dev = dst->dev;
				531	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
				532	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
				533	__IP6_INC_STATS(net, ip6_dst_idev(dst),
				534	IPSTATS_MIB_FRAGFAILS);
				535	kfree_skb(skb);
				536	return -EMSGSIZE;
				537	}
				538
				539	if (skb_cow(skb, dst->dev->hard_header_len)) {
				540	__IP6_INC_STATS(net, ip6_dst_idev(dst),
				541	IPSTATS_MIB_OUTDISCARDS);
				542	goto drop;
				543	}
				544
				545	hdr = ipv6_hdr(skb);
				546
				547	/* Mangling hops number delayed to point after skb COW */
				548
				549	hdr->hop_limit--;
				550
				551	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
				552	net, NULL, skb, skb->dev, dst->dev,
				553	ip6_forward_finish);
				554
				555	error:
				556	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
				557	drop:
				558	kfree_skb(skb);
				559	return -EINVAL;
				560	}
				561
				562	static void ip6_copy_metadata(struct sk_buff to, struct sk_buff from)
				563	{
				564	to->pkt_type = from->pkt_type;
				565	to->priority = from->priority;
				566	to->protocol = from->protocol;
				567	skb_dst_drop(to);
				568	skb_dst_set(to, dst_clone(skb_dst(from)));
				569	to->dev = from->dev;
				570	to->mark = from->mark;
				571
				572	skb_copy_hash(to, from);
				573
				574	#ifdef CONFIG_NET_SCHED
				575	to->tc_index = from->tc_index;
				576	#endif
				577	nf_copy(to, from);
				578	skb_copy_secmark(to, from);
				579	}
				580
				581	static int ignore_double_fragment(struct sk_buff *skb)
				582	{
				583	struct frag_hdr *fh;
				584	u8 prevhdr = ipv6_hdr(skb)->nexthdr;
				585
				586	if (prevhdr != NEXTHDR_FRAGMENT)
				587	return 0;
				588	fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
				589	if (fh->nexthdr == NEXTHDR_ESP)
				590	return 1;
				591	return 0;
				592	}
				593
				594	int ip6_fragment(struct net net, struct sock sk, struct sk_buff *skb,
				595	int (output)(struct net , struct sock , struct sk_buff ))
				596	{
				597	struct sk_buff *frag;
				598	struct rt6_info rt = (struct rt6_info )skb_dst(skb);
				599	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
				600	inet6_sk(skb->sk) : NULL;
				601	struct ipv6hdr *tmp_hdr;
				602	struct frag_hdr *fh;
				603	unsigned int mtu, hlen, left, len, nexthdr_offset;
				604	int hroom, troom;
				605	__be32 frag_id;
				606	int ptr, offset = 0, err = 0;
				607	u8 *prevhdr, nexthdr = 0;
				608
				609	err = ip6_find_1stfragopt(skb, &prevhdr);
				610	if (err < 0)
				611	goto fail;
				612	hlen = err;
				613	nexthdr = *prevhdr;
				614	nexthdr_offset = prevhdr - skb_network_header(skb);
				615
				616	mtu = ip6_skb_dst_mtu(skb);
				617	if (ignore_double_fragment(skb) && skb->len > mtu) {
				618	pr_info_ratelimited("[mtk_net] %s ignore to avoid double fragment\n",
				619	__func__);
				620	err = output(net, sk, skb);
				621	return err;
				622	}
				623
				624	/* We must not fragment if the socket is set to force MTU discovery
				625	* or if the skb it not generated by a local socket.
				626	*/
				627	if (unlikely(!skb->ignore_df && skb->len > mtu)) {
				628	if (ipv6_hdr(skb)->nexthdr != NEXTHDR_ESP)
				629	goto fail_toobig;
				630	pr_info_ratelimited("[mtk_net] fix tcp packet_too_big\n");
				631	}
				632
				633	if (IP6CB(skb)->frag_max_size) {
				634	if (IP6CB(skb)->frag_max_size > mtu)
				635	goto fail_toobig;
				636
				637	/* don't send fragments larger than what we received */
				638	mtu = IP6CB(skb)->frag_max_size;
				639	if (mtu < IPV6_MIN_MTU)
				640	mtu = IPV6_MIN_MTU;
				641	}
				642
				643	if (np && np->frag_size < mtu) {
				644	if (np->frag_size)
				645	mtu = np->frag_size;
				646	}
				647	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
				648	goto fail_toobig;
				649	mtu -= hlen + sizeof(struct frag_hdr);
				650
				651	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
				652	&ipv6_hdr(skb)->saddr);
				653
				654	if (skb->ip_summed == CHECKSUM_PARTIAL &&
				655	(err = skb_checksum_help(skb)))
				656	goto fail;
				657
				658	prevhdr = skb_network_header(skb) + nexthdr_offset;
				659	hroom = LL_RESERVED_SPACE(rt->dst.dev);
				660	if (skb_has_frag_list(skb)) {
				661	unsigned int first_len = skb_pagelen(skb);
				662	struct sk_buff *frag2;
				663
				664	if (first_len - hlen > mtu \|\|
				665	((first_len - hlen) & 7) \|\|
				666	skb_cloned(skb) \|\|
				667	skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
				668	goto slow_path;
				669
				670	skb_walk_frags(skb, frag) {
				671	/* Correct geometry. */
				672	if (frag->len > mtu \|\|
				673	((frag->len & 7) && frag->next) \|\|
				674	skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
				675	goto slow_path_clean;
				676
				677	/* Partially cloned skb? */
				678	if (skb_shared(frag))
				679	goto slow_path_clean;
				680
				681	BUG_ON(frag->sk);
				682	if (skb->sk) {
				683	frag->sk = skb->sk;
				684	frag->destructor = sock_wfree;
				685	}
				686	skb->truesize -= frag->truesize;
				687	}
				688
				689	err = 0;
				690	offset = 0;
				691	/* BUILD HEADER */
				692
				693	*prevhdr = NEXTHDR_FRAGMENT;
				694	tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
				695	if (!tmp_hdr) {
				696	err = -ENOMEM;
				697	goto fail;
				698	}
				699	frag = skb_shinfo(skb)->frag_list;
				700	skb_frag_list_init(skb);
				701
				702	__skb_pull(skb, hlen);
				703	fh = __skb_push(skb, sizeof(struct frag_hdr));
				704	__skb_push(skb, hlen);
				705	skb_reset_network_header(skb);
				706	memcpy(skb_network_header(skb), tmp_hdr, hlen);
				707
				708	fh->nexthdr = nexthdr;
				709	fh->reserved = 0;
				710	fh->frag_off = htons(IP6_MF);
				711	fh->identification = frag_id;
				712
				713	first_len = skb_pagelen(skb);
				714	skb->data_len = first_len - skb_headlen(skb);
				715	skb->len = first_len;
				716	ipv6_hdr(skb)->payload_len = htons(first_len -
				717	sizeof(struct ipv6hdr));
				718
				719	for (;;) {
				720	/* Prepare header of the next frame,
				721	* before previous one went down. */
				722	if (frag) {
				723	frag->ip_summed = CHECKSUM_NONE;
				724	skb_reset_transport_header(frag);
				725	fh = __skb_push(frag, sizeof(struct frag_hdr));
				726	__skb_push(frag, hlen);
				727	skb_reset_network_header(frag);
				728	memcpy(skb_network_header(frag), tmp_hdr,
				729	hlen);
				730	offset += skb->len - hlen - sizeof(struct frag_hdr);
				731	fh->nexthdr = nexthdr;
				732	fh->reserved = 0;
				733	fh->frag_off = htons(offset);
				734	if (frag->next)
				735	fh->frag_off \|= htons(IP6_MF);
				736	fh->identification = frag_id;
				737	ipv6_hdr(frag)->payload_len =
				738	htons(frag->len -
				739	sizeof(struct ipv6hdr));
				740	ip6_copy_metadata(frag, skb);
				741	}
				742
				743	err = output(net, sk, skb);
				744	if (!err)
				745	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				746	IPSTATS_MIB_FRAGCREATES);
				747
				748	if (err \|\| !frag)
				749	break;
				750
				751	skb = frag;
				752	frag = skb->next;
				753	skb->next = NULL;
				754	}
				755
				756	kfree(tmp_hdr);
				757
				758	if (err == 0) {
				759	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				760	IPSTATS_MIB_FRAGOKS);
				761	return 0;
				762	}
				763
				764	kfree_skb_list(frag);
				765
				766	IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
				767	IPSTATS_MIB_FRAGFAILS);
				768	return err;
				769
				770	slow_path_clean:
				771	skb_walk_frags(skb, frag2) {
				772	if (frag2 == frag)
				773	break;
				774	frag2->sk = NULL;
				775	frag2->destructor = NULL;
				776	skb->truesize += frag2->truesize;
				777	}
				778	}
				779
				780	slow_path:
				781	left = skb->len - hlen; /* Space per frame */
				782	ptr = hlen; /* Where to start from */
				783
				784	/*
				785	* Fragment the datagram.
				786	*/
				787
				788	troom = rt->dst.dev->needed_tailroom;
				789
				790	/*
				791	* Keep copying data until we run out.
				792	*/
				793	while (left > 0) {
				794	u8 *fragnexthdr_offset;
				795
				796	len = left;
				797	/* IF: it doesn't fit, use 'mtu' - the data space left */
				798	if (len > mtu)
				799	len = mtu;
				800	/* IF: we are not sending up to and including the packet end
				801	then align the next start on an eight byte boundary */
				802	if (len < left) {
				803	len &= ~7;
				804	}
				805
				806	/* Allocate buffer */
				807	frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
				808	hroom + troom, GFP_ATOMIC);
				809	if (!frag) {
				810	err = -ENOMEM;
				811	goto fail;
				812	}
				813
				814	/*
				815	* Set up data on packet
				816	*/
				817
				818	ip6_copy_metadata(frag, skb);
				819	skb_reserve(frag, hroom);
				820	skb_put(frag, len + hlen + sizeof(struct frag_hdr));
				821	skb_reset_network_header(frag);
				822	fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
				823	frag->transport_header = (frag->network_header + hlen +
				824	sizeof(struct frag_hdr));
				825
				826	/*
				827	* Charge the memory for the fragment to any owner
				828	* it might possess
				829	*/
				830	if (skb->sk)
				831	skb_set_owner_w(frag, skb->sk);
				832
				833	/*
				834	* Copy the packet header into the new buffer.
				835	*/
				836	skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
				837
				838	fragnexthdr_offset = skb_network_header(frag);
				839	fragnexthdr_offset += prevhdr - skb_network_header(skb);
				840	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
				841
				842	/*
				843	* Build fragment header.
				844	*/
				845	fh->nexthdr = nexthdr;
				846	fh->reserved = 0;
				847	fh->identification = frag_id;
				848
				849	/*
				850	* Copy a block of the IP datagram.
				851	*/
				852	BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
				853	len));
				854	left -= len;
				855
				856	fh->frag_off = htons(offset);
				857	if (left > 0)
				858	fh->frag_off \|= htons(IP6_MF);
				859	ipv6_hdr(frag)->payload_len = htons(frag->len -
				860	sizeof(struct ipv6hdr));
				861
				862	ptr += len;
				863	offset += len;
				864
				865	/*
				866	* Put this fragment into the sending queue.
				867	*/
				868	err = output(net, sk, frag);
				869	if (err)
				870	goto fail;
				871
				872	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				873	IPSTATS_MIB_FRAGCREATES);
				874	}
				875	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				876	IPSTATS_MIB_FRAGOKS);
				877	consume_skb(skb);
				878	return err;
				879
				880	fail_toobig:
				881	if (skb->sk && dst_allfrag(skb_dst(skb)))
				882	sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
				883
				884	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
				885	err = -EMSGSIZE;
				886
				887	fail:
				888	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				889	IPSTATS_MIB_FRAGFAILS);
				890	kfree_skb(skb);
				891	return err;
				892	}
				893
				894	static inline int ip6_rt_check(const struct rt6key *rt_key,
				895	const struct in6_addr *fl_addr,
				896	const struct in6_addr *addr_cache)
				897	{
				898	return (rt_key->plen != 128 \|\| !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
				899	(!addr_cache \|\| !ipv6_addr_equal(fl_addr, addr_cache));
				900	}
				901
				902	static struct dst_entry ip6_sk_dst_check(struct sock sk,
				903	struct dst_entry *dst,
				904	const struct flowi6 *fl6)
				905	{
				906	struct ipv6_pinfo *np = inet6_sk(sk);
				907	struct rt6_info *rt;
				908
				909	if (!dst)
				910	goto out;
				911
				912	if (dst->ops->family != AF_INET6) {
				913	dst_release(dst);
				914	return NULL;
				915	}
				916
				917	rt = (struct rt6_info *)dst;
				918	/* Yes, checking route validity in not connected
				919	* case is not very simple. Take into account,
				920	* that we do not support routing by source, TOS,
				921	* and MSG_DONTROUTE --ANK (980726)
				922	*
				923	* 1. ip6_rt_check(): If route was host route,
				924	* check that cached destination is current.
				925	* If it is network route, we still may
				926	* check its validity using saved pointer
				927	* to the last used address: daddr_cache.
				928	* We do not want to save whole address now,
				929	* (because main consumer of this service
				930	* is tcp, which has not this problem),
				931	* so that the last trick works only on connected
				932	* sockets.
				933	* 2. oif also should be the same.
				934	*/
				935	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) \|\|
				936	#ifdef CONFIG_IPV6_SUBTREES
				937	ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) \|\|
				938	#endif
				939	(!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
				940	(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
				941	dst_release(dst);
				942	dst = NULL;
				943	}
				944
				945	out:
				946	return dst;
				947	}
				948
				949	static int ip6_dst_lookup_tail(struct net net, const struct sock sk,
				950	struct dst_entry *dst, struct flowi6 fl6)
				951	{
				952	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
				953	struct neighbour *n;
				954	struct rt6_info *rt;
				955	#endif
				956	int err;
				957	int flags = 0;
				958
				959	/* The correct way to handle this would be to do
				960	* ip6_route_get_saddr, and then ip6_route_output; however,
				961	* the route-specific preferred source forces the
				962	* ip6_route_output call _before_ ip6_route_get_saddr.
				963	*
				964	* In source specific routing (no src=any default route),
				965	* ip6_route_output will fail given src=any saddr, though, so
				966	* that's why we try it again later.
				967	*/
				968	if (ipv6_addr_any(&fl6->saddr) && (!dst \|\| !(dst)->error)) {
				969	struct fib6_info *from;
				970	struct rt6_info *rt;
				971	bool had_dst = *dst != NULL;
				972
				973	if (!had_dst)
				974	*dst = ip6_route_output(net, sk, fl6);
				975	rt = (dst)->error ? NULL : (struct rt6_info )*dst;
				976
				977	rcu_read_lock();
				978	from = rt ? rcu_dereference(rt->from) : NULL;
				979	err = ip6_route_get_saddr(net, from, &fl6->daddr,
				980	sk ? inet6_sk(sk)->srcprefs : 0,
				981	&fl6->saddr);
				982	rcu_read_unlock();
				983
				984	if (err)
				985	goto out_err_release;
				986
				987	/* If we had an erroneous initial result, pretend it
				988	* never existed and let the SA-enabled version take
				989	* over.
				990	*/
				991	if (!had_dst && (*dst)->error) {
				992	dst_release(*dst);
				993	*dst = NULL;
				994	}
				995
				996	if (fl6->flowi6_oif)
				997	flags \|= RT6_LOOKUP_F_IFACE;
				998	}
				999
				1000	if (!*dst)
				1001	*dst = ip6_route_output_flags(net, sk, fl6, flags);
				1002
				1003	err = (*dst)->error;
				1004	if (err)
				1005	goto out_err_release;
				1006
				1007	#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
				1008	/*
				1009	* Here if the dst entry we've looked up
				1010	* has a neighbour entry that is in the INCOMPLETE
				1011	* state and the src address from the flow is
				1012	* marked as OPTIMISTIC, we release the found
				1013	* dst entry and replace it instead with the
				1014	* dst entry of the nexthop router
				1015	*/
				1016	rt = (struct rt6_info ) dst;
				1017	rcu_read_lock_bh();
				1018	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
				1019	rt6_nexthop(rt, &fl6->daddr));
				1020	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
				1021	rcu_read_unlock_bh();
				1022
				1023	if (err) {
				1024	struct inet6_ifaddr *ifp;
				1025	struct flowi6 fl_gw6;
				1026	int redirect;
				1027
				1028	ifp = ipv6_get_ifaddr(net, &fl6->saddr,
				1029	(*dst)->dev, 1);
				1030
				1031	redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
				1032	if (ifp)
				1033	in6_ifa_put(ifp);
				1034
				1035	if (redirect) {
				1036	/*
				1037	* We need to get the dst entry for the
				1038	* default router instead
				1039	*/
				1040	dst_release(*dst);
				1041	memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
				1042	memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
				1043	*dst = ip6_route_output(net, sk, &fl_gw6);
				1044	err = (*dst)->error;
				1045	if (err)
				1046	goto out_err_release;
				1047	}
				1048	}
				1049	#endif
				1050	if (ipv6_addr_v4mapped(&fl6->saddr) &&
				1051	!(ipv6_addr_v4mapped(&fl6->daddr) \|\| ipv6_addr_any(&fl6->daddr))) {
				1052	err = -EAFNOSUPPORT;
				1053	goto out_err_release;
				1054	}
				1055
				1056	return 0;
				1057
				1058	out_err_release:
				1059	dst_release(*dst);
				1060	*dst = NULL;
				1061
				1062	if (err == -ENETUNREACH)
				1063	IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
				1064	return err;
				1065	}
				1066
				1067	/**
				1068	* ip6_dst_lookup - perform route lookup on flow
				1069	* @sk: socket which provides route info
				1070	* @dst: pointer to dst_entry * for result
				1071	* @fl6: flow to lookup
				1072	*
				1073	* This function performs a route lookup on the given flow.
				1074	*
				1075	* It returns zero on success, or a standard errno code on error.
				1076	*/
				1077	int ip6_dst_lookup(struct net net, struct sock sk, struct dst_entry **dst,
				1078	struct flowi6 *fl6)
				1079	{
				1080	*dst = NULL;
				1081	return ip6_dst_lookup_tail(net, sk, dst, fl6);
				1082	}
				1083	EXPORT_SYMBOL_GPL(ip6_dst_lookup);
				1084
				1085	/**
				1086	* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
				1087	* @sk: socket which provides route info
				1088	* @fl6: flow to lookup
				1089	* @final_dst: final destination address for ipsec lookup
				1090	*
				1091	* This function performs a route lookup on the given flow.
				1092	*
				1093	* It returns a valid dst pointer on success, or a pointer encoded
				1094	* error code.
				1095	*/
				1096	struct dst_entry ip6_dst_lookup_flow(const struct sock sk, struct flowi6 *fl6,
				1097	const struct in6_addr *final_dst)
				1098	{
				1099	struct dst_entry *dst = NULL;
				1100	int err;
				1101
				1102	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
				1103	if (err)
				1104	return ERR_PTR(err);
				1105	if (final_dst)
				1106	fl6->daddr = *final_dst;
				1107
				1108	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
				1109	}
				1110	EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
				1111
				1112	/**
				1113	* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
				1114	* @sk: socket which provides the dst cache and route info
				1115	* @fl6: flow to lookup
				1116	* @final_dst: final destination address for ipsec lookup
				1117	* @connected: whether @sk is connected or not
				1118	*
				1119	* This function performs a route lookup on the given flow with the
				1120	* possibility of using the cached route in the socket if it is valid.
				1121	* It will take the socket dst lock when operating on the dst cache.
				1122	* As a result, this function can only be used in process context.
				1123	*
				1124	* In addition, for a connected socket, cache the dst in the socket
				1125	* if the current cache is not valid.
				1126	*
				1127	* It returns a valid dst pointer on success, or a pointer encoded
				1128	* error code.
				1129	*/
				1130	struct dst_entry ip6_sk_dst_lookup_flow(struct sock sk, struct flowi6 *fl6,
				1131	const struct in6_addr *final_dst,
				1132	bool connected)
				1133	{
				1134	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
				1135
				1136	dst = ip6_sk_dst_check(sk, dst, fl6);
				1137	if (dst)
				1138	return dst;
				1139
				1140	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
				1141	if (connected && !IS_ERR(dst))
				1142	ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
				1143
				1144	return dst;
				1145	}
				1146	EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
				1147
				1148	static inline struct ipv6_opt_hdr ip6_opt_dup(struct ipv6_opt_hdr src,
				1149	gfp_t gfp)
				1150	{
				1151	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
				1152	}
				1153
				1154	static inline struct ipv6_rt_hdr ip6_rthdr_dup(struct ipv6_rt_hdr src,
				1155	gfp_t gfp)
				1156	{
				1157	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
				1158	}
				1159
				1160	static void ip6_append_data_mtu(unsigned int *mtu,
				1161	int *maxfraglen,
				1162	unsigned int fragheaderlen,
				1163	struct sk_buff *skb,
				1164	struct rt6_info *rt,
				1165	unsigned int orig_mtu)
				1166	{
				1167	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
				1168	if (!skb) {
				1169	/* first fragment, reserve header_len */
				1170	*mtu = orig_mtu - rt->dst.header_len;
				1171
				1172	} else {
				1173	/*
				1174	* this fragment is not first, the headers
				1175	* space is regarded as data space.
				1176	*/
				1177	*mtu = orig_mtu;
				1178	}
				1179	maxfraglen = ((mtu - fragheaderlen) & ~7)
				1180	+ fragheaderlen - sizeof(struct frag_hdr);
				1181	}
				1182	}
				1183
				1184	static int ip6_setup_cork(struct sock sk, struct inet_cork_full cork,
				1185	struct inet6_cork v6_cork, struct ipcm6_cookie ipc6,
				1186	struct rt6_info rt, struct flowi6 fl6)
				1187	{
				1188	struct ipv6_pinfo *np = inet6_sk(sk);
				1189	unsigned int mtu;
				1190	struct ipv6_txoptions *opt = ipc6->opt;
				1191
				1192	/*
				1193	* setup for corking
				1194	*/
				1195	if (opt) {
				1196	if (WARN_ON(v6_cork->opt))
				1197	return -EINVAL;
				1198
				1199	v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
				1200	if (unlikely(!v6_cork->opt))
				1201	return -ENOBUFS;
				1202
				1203	v6_cork->opt->tot_len = sizeof(*opt);
				1204	v6_cork->opt->opt_flen = opt->opt_flen;
				1205	v6_cork->opt->opt_nflen = opt->opt_nflen;
				1206
				1207	v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
				1208	sk->sk_allocation);
				1209	if (opt->dst0opt && !v6_cork->opt->dst0opt)
				1210	return -ENOBUFS;
				1211
				1212	v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
				1213	sk->sk_allocation);
				1214	if (opt->dst1opt && !v6_cork->opt->dst1opt)
				1215	return -ENOBUFS;
				1216
				1217	v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
				1218	sk->sk_allocation);
				1219	if (opt->hopopt && !v6_cork->opt->hopopt)
				1220	return -ENOBUFS;
				1221
				1222	v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
				1223	sk->sk_allocation);
				1224	if (opt->srcrt && !v6_cork->opt->srcrt)
				1225	return -ENOBUFS;
				1226
				1227	/* need source address above miyazawa*/
				1228	}
				1229	dst_hold(&rt->dst);
				1230	cork->base.dst = &rt->dst;
				1231	cork->fl.u.ip6 = *fl6;
				1232	v6_cork->hop_limit = ipc6->hlimit;
				1233	v6_cork->tclass = ipc6->tclass;
				1234	if (rt->dst.flags & DST_XFRM_TUNNEL)
				1235	mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
				1236	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
				1237	else
				1238	mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
				1239	READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
				1240	if (np->frag_size < mtu) {
				1241	if (np->frag_size)
				1242	mtu = np->frag_size;
				1243	}
				1244	if (mtu < IPV6_MIN_MTU)
				1245	return -EINVAL;
				1246	cork->base.fragsize = mtu;
				1247	cork->base.gso_size = ipc6->gso_size;
				1248	cork->base.tx_flags = 0;
				1249	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
				1250
				1251	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
				1252	cork->base.flags \|= IPCORK_ALLFRAG;
				1253	cork->base.length = 0;
				1254
				1255	cork->base.transmit_time = ipc6->sockc.transmit_time;
				1256
				1257	return 0;
				1258	}
				1259
				1260	static int __ip6_append_data(struct sock *sk,
				1261	struct flowi6 *fl6,
				1262	struct sk_buff_head *queue,
				1263	struct inet_cork *cork,
				1264	struct inet6_cork *v6_cork,
				1265	struct page_frag *pfrag,
				1266	int getfrag(void from, char to, int offset,
				1267	int len, int odd, struct sk_buff *skb),
				1268	void *from, int length, int transhdrlen,
				1269	unsigned int flags, struct ipcm6_cookie *ipc6)
				1270	{
				1271	struct sk_buff skb, skb_prev = NULL;
				1272	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
				1273	int exthdrlen = 0;
				1274	int dst_exthdrlen = 0;
				1275	int hh_len;
				1276	int copy;
				1277	int err;
				1278	int offset = 0;
				1279	u32 tskey = 0;
				1280	struct rt6_info rt = (struct rt6_info )cork->dst;
				1281	struct ipv6_txoptions *opt = v6_cork->opt;
				1282	int csummode = CHECKSUM_NONE;
				1283	unsigned int maxnonfragsize, headersize;
				1284	unsigned int wmem_alloc_delta = 0;
				1285	bool paged;
				1286
				1287	skb = skb_peek_tail(queue);
				1288	if (!skb) {
				1289	exthdrlen = opt ? opt->opt_flen : 0;
				1290	dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
				1291	}
				1292
				1293	paged = !!cork->gso_size;
				1294	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
				1295	orig_mtu = mtu;
				1296
				1297	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
				1298	sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
				1299	tskey = sk->sk_tskey++;
				1300
				1301	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
				1302
				1303	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
				1304	(opt ? opt->opt_nflen : 0);
				1305	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
				1306	sizeof(struct frag_hdr);
				1307
				1308	headersize = sizeof(struct ipv6hdr) +
				1309	(opt ? opt->opt_flen + opt->opt_nflen : 0) +
				1310	(dst_allfrag(&rt->dst) ?
				1311	sizeof(struct frag_hdr) : 0) +
				1312	rt->rt6i_nfheader_len;
				1313
				1314	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
				1315	* the first fragment
				1316	*/
				1317	if (headersize + transhdrlen > mtu)
				1318	goto emsgsize;
				1319
				1320	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
				1321	(sk->sk_protocol == IPPROTO_UDP \|\|
				1322	sk->sk_protocol == IPPROTO_RAW)) {
				1323	ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
				1324	sizeof(struct ipv6hdr));
				1325	goto emsgsize;
				1326	}
				1327
				1328	if (ip6_sk_ignore_df(sk))
				1329	maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
				1330	else
				1331	maxnonfragsize = mtu;
				1332
				1333	if (cork->length + length > maxnonfragsize - headersize) {
				1334	emsgsize:
				1335	pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
				1336	ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
				1337	return -EMSGSIZE;
				1338	}
				1339
				1340	/* CHECKSUM_PARTIAL only with no extension headers and when
				1341	* we are not going to fragment
				1342	*/
				1343	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
				1344	headersize == sizeof(struct ipv6hdr) &&
				1345	length <= mtu - headersize &&
				1346	(!(flags & MSG_MORE) \|\| cork->gso_size) &&
				1347	rt->dst.dev->features & (NETIF_F_IPV6_CSUM \| NETIF_F_HW_CSUM))
				1348	csummode = CHECKSUM_PARTIAL;
				1349
				1350	/*
				1351	* Let's try using as much space as possible.
				1352	* Use MTU if total length of the message fits into the MTU.
				1353	* Otherwise, we need to reserve fragment header and
				1354	* fragment alignment (= 8-15 octects, in total).
				1355	*
				1356	* Note that we may need to "move" the data from the tail of
				1357	* of the buffer to the new fragment when we split
				1358	* the message.
				1359	*
				1360	* FIXME: It may be fragmented into multiple chunks
				1361	* at once if non-fragmentable extension headers
				1362	* are too large.
				1363	* --yoshfuji
				1364	*/
				1365
				1366	cork->length += length;
				1367	if (!skb)
				1368	goto alloc_new_skb;
				1369
				1370	while (length > 0) {
				1371	/* Check if the remaining data fits into current packet. */
				1372	copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
				1373	if (copy < length)
				1374	copy = maxfraglen - skb->len;
				1375
				1376	if (copy <= 0) {
				1377	char *data;
				1378	unsigned int datalen;
				1379	unsigned int fraglen;
				1380	unsigned int fraggap;
				1381	unsigned int alloclen;
				1382	unsigned int pagedlen = 0;
				1383	alloc_new_skb:
				1384	/* There's no room in the current skb */
				1385	if (skb)
				1386	fraggap = skb->len - maxfraglen;
				1387	else
				1388	fraggap = 0;
				1389	/* update mtu and maxfraglen if necessary */
				1390	if (!skb \|\| !skb_prev)
				1391	ip6_append_data_mtu(&mtu, &maxfraglen,
				1392	fragheaderlen, skb, rt,
				1393	orig_mtu);
				1394
				1395	skb_prev = skb;
				1396
				1397	/*
				1398	* If remaining data exceeds the mtu,
				1399	* we know we need more fragment(s).
				1400	*/
				1401	datalen = length + fraggap;
				1402
				1403	if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
				1404	datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
				1405	fraglen = datalen + fragheaderlen;
				1406
				1407	if ((flags & MSG_MORE) &&
				1408	!(rt->dst.dev->features&NETIF_F_SG))
				1409	alloclen = mtu;
				1410	else if (!paged)
				1411	alloclen = fraglen;
				1412	else {
				1413	alloclen = min_t(int, fraglen, MAX_HEADER);
				1414	pagedlen = fraglen - alloclen;
				1415	}
				1416
				1417	alloclen += dst_exthdrlen;
				1418
				1419	if (datalen != length + fraggap) {
				1420	/*
				1421	* this is not the last fragment, the trailer
				1422	* space is regarded as data space.
				1423	*/
				1424	datalen += rt->dst.trailer_len;
				1425	}
				1426
				1427	alloclen += rt->dst.trailer_len;
				1428	fraglen = datalen + fragheaderlen;
				1429
				1430	/*
				1431	* We just reserve space for fragment header.
				1432	* Note: this may be overallocation if the message
				1433	* (without MSG_MORE) fits into the MTU.
				1434	*/
				1435	alloclen += sizeof(struct frag_hdr);
				1436
				1437	copy = datalen - transhdrlen - fraggap - pagedlen;
				1438	if (copy < 0) {
				1439	err = -EINVAL;
				1440	goto error;
				1441	}
				1442	if (transhdrlen) {
				1443	skb = sock_alloc_send_skb(sk,
				1444	alloclen + hh_len,
				1445	(flags & MSG_DONTWAIT), &err);
				1446	} else {
				1447	skb = NULL;
				1448	if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
				1449	2 * sk->sk_sndbuf)
				1450	skb = alloc_skb(alloclen + hh_len,
				1451	sk->sk_allocation);
				1452	if (unlikely(!skb))
				1453	err = -ENOBUFS;
				1454	}
				1455	if (!skb)
				1456	goto error;
				1457	/*
				1458	* Fill in the control structures
				1459	*/
				1460	skb->protocol = htons(ETH_P_IPV6);
				1461	skb->ip_summed = csummode;
				1462	skb->csum = 0;
				1463	/* reserve for fragmentation and ipsec header */
				1464	skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
				1465	dst_exthdrlen);
				1466
				1467	/* Only the initial fragment is time stamped */
				1468	skb_shinfo(skb)->tx_flags = cork->tx_flags;
				1469	cork->tx_flags = 0;
				1470	skb_shinfo(skb)->tskey = tskey;
				1471	tskey = 0;
				1472
				1473	/*
				1474	* Find where to start putting bytes
				1475	*/
				1476	data = skb_put(skb, fraglen - pagedlen);
				1477	skb_set_network_header(skb, exthdrlen);
				1478	data += fragheaderlen;
				1479	skb->transport_header = (skb->network_header +
				1480	fragheaderlen);
				1481	if (fraggap) {
				1482	skb->csum = skb_copy_and_csum_bits(
				1483	skb_prev, maxfraglen,
				1484	data + transhdrlen, fraggap, 0);
				1485	skb_prev->csum = csum_sub(skb_prev->csum,
				1486	skb->csum);
				1487	data += fraggap;
				1488	pskb_trim_unique(skb_prev, maxfraglen);
				1489	}
				1490	if (copy > 0 &&
				1491	getfrag(from, data + transhdrlen, offset,
				1492	copy, fraggap, skb) < 0) {
				1493	err = -EFAULT;
				1494	kfree_skb(skb);
				1495	goto error;
				1496	}
				1497
				1498	offset += copy;
				1499	length -= copy + transhdrlen;
				1500	transhdrlen = 0;
				1501	exthdrlen = 0;
				1502	dst_exthdrlen = 0;
				1503
				1504	if ((flags & MSG_CONFIRM) && !skb_prev)
				1505	skb_set_dst_pending_confirm(skb, 1);
				1506
				1507	/*
				1508	* Put the packet on the pending queue
				1509	*/
				1510	if (!skb->destructor) {
				1511	skb->destructor = sock_wfree;
				1512	skb->sk = sk;
				1513	wmem_alloc_delta += skb->truesize;
				1514	}
				1515	__skb_queue_tail(queue, skb);
				1516	continue;
				1517	}
				1518
				1519	if (copy > length)
				1520	copy = length;
				1521
				1522	if (!(rt->dst.dev->features&NETIF_F_SG) &&
				1523	skb_tailroom(skb) >= copy) {
				1524	unsigned int off;
				1525
				1526	off = skb->len;
				1527	if (getfrag(from, skb_put(skb, copy),
				1528	offset, copy, off, skb) < 0) {
				1529	__skb_trim(skb, off);
				1530	err = -EFAULT;
				1531	goto error;
				1532	}
				1533	} else {
				1534	int i = skb_shinfo(skb)->nr_frags;
				1535
				1536	err = -ENOMEM;
				1537	if (!sk_page_frag_refill(sk, pfrag))
				1538	goto error;
				1539
				1540	if (!skb_can_coalesce(skb, i, pfrag->page,
				1541	pfrag->offset)) {
				1542	err = -EMSGSIZE;
				1543	if (i == MAX_SKB_FRAGS)
				1544	goto error;
				1545
				1546	__skb_fill_page_desc(skb, i, pfrag->page,
				1547	pfrag->offset, 0);
				1548	skb_shinfo(skb)->nr_frags = ++i;
				1549	get_page(pfrag->page);
				1550	}
				1551	copy = min_t(int, copy, pfrag->size - pfrag->offset);
				1552	if (getfrag(from,
				1553	page_address(pfrag->page) + pfrag->offset,
				1554	offset, copy, skb->len, skb) < 0)
				1555	goto error_efault;
				1556
				1557	pfrag->offset += copy;
				1558	skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
				1559	skb->len += copy;
				1560	skb->data_len += copy;
				1561	skb->truesize += copy;
				1562	wmem_alloc_delta += copy;
				1563	}
				1564	offset += copy;
				1565	length -= copy;
				1566	}
				1567
				1568	if (wmem_alloc_delta)
				1569	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
				1570	return 0;
				1571
				1572	error_efault:
				1573	err = -EFAULT;
				1574	error:
				1575	cork->length -= length;
				1576	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
				1577	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
				1578	return err;
				1579	}
				1580
				1581	int ip6_append_data(struct sock *sk,
				1582	int getfrag(void from, char to, int offset, int len,
				1583	int odd, struct sk_buff *skb),
				1584	void *from, int length, int transhdrlen,
				1585	struct ipcm6_cookie ipc6, struct flowi6 fl6,
				1586	struct rt6_info *rt, unsigned int flags)
				1587	{
				1588	struct inet_sock *inet = inet_sk(sk);
				1589	struct ipv6_pinfo *np = inet6_sk(sk);
				1590	int exthdrlen;
				1591	int err;
				1592
				1593	if (flags&MSG_PROBE)
				1594	return 0;
				1595	if (skb_queue_empty(&sk->sk_write_queue)) {
				1596	/*
				1597	* setup for corking
				1598	*/
				1599	err = ip6_setup_cork(sk, &inet->cork, &np->cork,
				1600	ipc6, rt, fl6);
				1601	if (err)
				1602	return err;
				1603
				1604	exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
				1605	length += exthdrlen;
				1606	transhdrlen += exthdrlen;
				1607	} else {
				1608	fl6 = &inet->cork.fl.u.ip6;
				1609	transhdrlen = 0;
				1610	}
				1611
				1612	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
				1613	&np->cork, sk_page_frag(sk), getfrag,
				1614	from, length, transhdrlen, flags, ipc6);
				1615	}
				1616	EXPORT_SYMBOL_GPL(ip6_append_data);
				1617
				1618	static void ip6_cork_release(struct inet_cork_full *cork,
				1619	struct inet6_cork *v6_cork)
				1620	{
				1621	if (v6_cork->opt) {
				1622	kfree(v6_cork->opt->dst0opt);
				1623	kfree(v6_cork->opt->dst1opt);
				1624	kfree(v6_cork->opt->hopopt);
				1625	kfree(v6_cork->opt->srcrt);
				1626	kfree(v6_cork->opt);
				1627	v6_cork->opt = NULL;
				1628	}
				1629
				1630	if (cork->base.dst) {
				1631	dst_release(cork->base.dst);
				1632	cork->base.dst = NULL;
				1633	cork->base.flags &= ~IPCORK_ALLFRAG;
				1634	}
				1635	memset(&cork->fl, 0, sizeof(cork->fl));
				1636	}
				1637
				1638	struct sk_buff __ip6_make_skb(struct sock sk,
				1639	struct sk_buff_head *queue,
				1640	struct inet_cork_full *cork,
				1641	struct inet6_cork *v6_cork)
				1642	{
				1643	struct sk_buff skb, tmp_skb;
				1644	struct sk_buff **tail_skb;
				1645	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
				1646	struct ipv6_pinfo *np = inet6_sk(sk);
				1647	struct net *net = sock_net(sk);
				1648	struct ipv6hdr *hdr;
				1649	struct ipv6_txoptions *opt = v6_cork->opt;
				1650	struct rt6_info rt = (struct rt6_info )cork->base.dst;
				1651	struct flowi6 *fl6 = &cork->fl.u.ip6;
				1652	unsigned char proto = fl6->flowi6_proto;
				1653
				1654	skb = __skb_dequeue(queue);
				1655	if (!skb)
				1656	goto out;
				1657	tail_skb = &(skb_shinfo(skb)->frag_list);
				1658
				1659	/* move skb->data to ip header from ext header */
				1660	if (skb->data < skb_network_header(skb))
				1661	__skb_pull(skb, skb_network_offset(skb));
				1662	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
				1663	__skb_pull(tmp_skb, skb_network_header_len(skb));
				1664	*tail_skb = tmp_skb;
				1665	tail_skb = &(tmp_skb->next);
				1666	skb->len += tmp_skb->len;
				1667	skb->data_len += tmp_skb->len;
				1668	skb->truesize += tmp_skb->truesize;
				1669	tmp_skb->destructor = NULL;
				1670	tmp_skb->sk = NULL;
				1671	}
				1672
				1673	/* Allow local fragmentation. */
				1674	skb->ignore_df = ip6_sk_ignore_df(sk);
				1675
				1676	*final_dst = fl6->daddr;
				1677	__skb_pull(skb, skb_network_header_len(skb));
				1678	if (opt && opt->opt_flen)
				1679	ipv6_push_frag_opts(skb, opt, &proto);
				1680	if (opt && opt->opt_nflen)
				1681	ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
				1682
				1683	skb_push(skb, sizeof(struct ipv6hdr));
				1684	skb_reset_network_header(skb);
				1685	hdr = ipv6_hdr(skb);
				1686
				1687	ip6_flow_hdr(hdr, v6_cork->tclass,
				1688	ip6_make_flowlabel(net, skb, fl6->flowlabel,
				1689	ip6_autoflowlabel(net, np), fl6));
				1690	hdr->hop_limit = v6_cork->hop_limit;
				1691	hdr->nexthdr = proto;
				1692	hdr->saddr = fl6->saddr;
				1693	hdr->daddr = *final_dst;
				1694
				1695	skb->priority = sk->sk_priority;
				1696	skb->mark = sk->sk_mark;
				1697
				1698	skb->tstamp = cork->base.transmit_time;
				1699
				1700	skb_dst_set(skb, dst_clone(&rt->dst));
				1701	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
				1702	if (proto == IPPROTO_ICMPV6) {
				1703	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
				1704
				1705	ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
				1706	ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
				1707	}
				1708
				1709	ip6_cork_release(cork, v6_cork);
				1710	out:
				1711	return skb;
				1712	}
				1713
				1714	int ip6_send_skb(struct sk_buff *skb)
				1715	{
				1716	struct net *net = sock_net(skb->sk);
				1717	struct rt6_info rt = (struct rt6_info )skb_dst(skb);
				1718	int err;
				1719
				1720	err = ip6_local_out(net, skb->sk, skb);
				1721	if (err) {
				1722	if (err > 0)
				1723	err = net_xmit_errno(err);
				1724	if (err)
				1725	IP6_INC_STATS(net, rt->rt6i_idev,
				1726	IPSTATS_MIB_OUTDISCARDS);
				1727	}
				1728
				1729	return err;
				1730	}
				1731
				1732	int ip6_push_pending_frames(struct sock *sk)
				1733	{
				1734	struct sk_buff *skb;
				1735
				1736	skb = ip6_finish_skb(sk);
				1737	if (!skb)
				1738	return 0;
				1739
				1740	return ip6_send_skb(skb);
				1741	}
				1742	EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
				1743
				1744	static void __ip6_flush_pending_frames(struct sock *sk,
				1745	struct sk_buff_head *queue,
				1746	struct inet_cork_full *cork,
				1747	struct inet6_cork *v6_cork)
				1748	{
				1749	struct sk_buff *skb;
				1750
				1751	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
				1752	if (skb_dst(skb))
				1753	IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
				1754	IPSTATS_MIB_OUTDISCARDS);
				1755	kfree_skb(skb);
				1756	}
				1757
				1758	ip6_cork_release(cork, v6_cork);
				1759	}
				1760
				1761	void ip6_flush_pending_frames(struct sock *sk)
				1762	{
				1763	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
				1764	&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
				1765	}
				1766	EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
				1767
				1768	struct sk_buff ip6_make_skb(struct sock sk,
				1769	int getfrag(void from, char to, int offset,
				1770	int len, int odd, struct sk_buff *skb),
				1771	void *from, int length, int transhdrlen,
				1772	struct ipcm6_cookie ipc6, struct flowi6 fl6,
				1773	struct rt6_info *rt, unsigned int flags,
				1774	struct inet_cork_full *cork)
				1775	{
				1776	struct inet6_cork v6_cork;
				1777	struct sk_buff_head queue;
				1778	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
				1779	int err;
				1780
				1781	if (flags & MSG_PROBE)
				1782	return NULL;
				1783
				1784	__skb_queue_head_init(&queue);
				1785
				1786	cork->base.flags = 0;
				1787	cork->base.addr = 0;
				1788	cork->base.opt = NULL;
				1789	cork->base.dst = NULL;
				1790	v6_cork.opt = NULL;
				1791	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
				1792	if (err) {
				1793	ip6_cork_release(cork, &v6_cork);
				1794	return ERR_PTR(err);
				1795	}
				1796	if (ipc6->dontfrag < 0)
				1797	ipc6->dontfrag = inet6_sk(sk)->dontfrag;
				1798
				1799	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
				1800	&current->task_frag, getfrag, from,
				1801	length + exthdrlen, transhdrlen + exthdrlen,
				1802	flags, ipc6);
				1803	if (err) {
				1804	__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
				1805	return ERR_PTR(err);
				1806	}
				1807
				1808	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
				1809	}