Blame - marvell/linux/net/core/skbuff.c - T108

blob: 10314950654b1a5841df17aac82d6265db43b00f [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* Routines having to do with the 'struct sk_buff' memory handlers.
				4	*
				5	* Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
				6	* Florian La Roche <rzsfl@rz.uni-sb.de>
				7	*
				8	* Fixes:
				9	* Alan Cox : Fixed the worst of the load
				10	* balancer bugs.
				11	* Dave Platt : Interrupt stacking fix.
				12	* Richard Kooijman : Timestamp fixes.
				13	* Alan Cox : Changed buffer format.
				14	* Alan Cox : destructor hook for AF_UNIX etc.
				15	* Linus Torvalds : Better skb_clone.
				16	* Alan Cox : Added skb_copy.
				17	* Alan Cox : Added all the changed routines Linus
				18	* only put in the headers
				19	* Ray VanTassle : Fixed --skb->lock in free
				20	* Alan Cox : skb_copy copy arp field
				21	* Andi Kleen : slabified it.
				22	* Robert Olsson : Removed skb_head_pool
				23	*
				24	* NOTE:
				25	* The __skb_ routines should be called with interrupts
				26	* disabled, or you better be real sure that the operation is atomic
				27	* with respect to whatever list is being frobbed (e.g. via lock_sock()
				28	* or via disabling bottom half handlers, etc).
				29	*/
				30
				31	/*
				32	* The functions in this file will not compile correctly with gcc 2.4.x
				33	*/
				34
				35	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				36
				37	#include <linux/module.h>
				38	#include <linux/types.h>
				39	#include <linux/kernel.h>
				40	#include <linux/mm.h>
				41	#include <linux/interrupt.h>
				42	#include <linux/in.h>
				43	#include <linux/inet.h>
				44	#include <linux/slab.h>
				45	#include <linux/tcp.h>
				46	#include <linux/udp.h>
				47	#include <linux/sctp.h>
				48	#include <linux/netdevice.h>
				49	#ifdef CONFIG_NET_CLS_ACT
				50	#include <net/pkt_sched.h>
				51	#endif
				52	#include <linux/string.h>
				53	#include <linux/skbuff.h>
				54	#include <linux/splice.h>
				55	#include <linux/cache.h>
				56	#include <linux/rtnetlink.h>
				57	#include <linux/init.h>
				58	#include <linux/scatterlist.h>
				59	#include <linux/errqueue.h>
				60	#include <linux/prefetch.h>
				61	#include <linux/if_vlan.h>
				62	#include <linux/mpls.h>
				63	#include <linux/if.h>
				64
				65	#include <net/protocol.h>
				66	#include <net/dst.h>
				67	#include <net/sock.h>
				68	#include <net/checksum.h>
				69	#include <net/ip6_checksum.h>
				70	#include <net/xfrm.h>
				71	#include <net/mpls.h>
				72
				73	#include <linux/uaccess.h>
				74	#include <trace/events/skb.h>
				75	#include <linux/highmem.h>
				76	#include <linux/capability.h>
				77	#include <linux/user_namespace.h>
				78	#include <linux/indirect_call_wrapper.h>
				79	#include <trace/hooks/net.h>
				80	#ifdef CONFIG_ASR_BM
				81	#include <linux/asrbm.h>
				82	#endif
				83	#include <linux/icmp.h>
				84	#include "datagram.h"
				85	#include "sock_destructor.h"
				86
				87	struct kmem_cache *skbuff_head_cache __ro_after_init;
				88	static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
				89	#ifdef CONFIG_SKB_EXTENSIONS
				90	static struct kmem_cache *skbuff_ext_cache __ro_after_init;
				91	#endif
				92	int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
				93	EXPORT_SYMBOL(sysctl_max_skb_frags);
				94
				95	static void skb_p_revert(struct sk_buff *skb);
				96
				97	/**
				98	* skb_panic - private function for out-of-line support
				99	* @skb: buffer
				100	* @sz: size
				101	* @addr: address
				102	* @msg: skb_over_panic or skb_under_panic
				103	*
				104	* Out-of-line support for skb_put() and skb_push().
				105	* Called via the wrapper skb_over_panic() or skb_under_panic().
				106	* Keep out of line to prevent kernel bloat.
				107	* __builtin_return_address is not used because it is not always reliable.
				108	*/
				109	static void skb_panic(struct sk_buff skb, unsigned int sz, void addr,
				110	const char msg[])
				111	{
				112	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
				113	msg, addr, skb->len, sz, skb->head, skb->data,
				114	(unsigned long)skb->tail, (unsigned long)skb->end,
				115	skb->dev ? skb->dev->name : "<NULL>");
				116	BUG();
				117	}
				118
				119	static void skb_over_panic(struct sk_buff skb, unsigned int sz, void addr)
				120	{
				121	skb_panic(skb, sz, addr, __func__);
				122	}
				123
				124	static void skb_under_panic(struct sk_buff skb, unsigned int sz, void addr)
				125	{
				126	skb_panic(skb, sz, addr, __func__);
				127	}
				128
				129	void netdev_pkt_dump(struct iphdr iph, const char func)
				130	{
				131	struct icmphdr *icmph;
				132
				133	if (iph->protocol == IPPROTO_ICMP) {
				134	icmph = (struct icmphdr )((u8 )iph + (iph->ihl << 2));
				135	if (icmph->type == ICMP_ECHO)
				136	printk(KERN_DEBUG "%s: ICMP request: From %pI4 to %pI4, "
				137	"code=%d sequence=%d\n",
				138	func, &iph->saddr, &iph->daddr, icmph->code,
				139	be16_to_cpu(icmph->un.echo.sequence));
				140	else if (icmph->type == ICMP_ECHOREPLY)
				141	printk(KERN_DEBUG "%s: ICMP reply: From %pI4 to %pI4 "
				142	"code=%d sequence=%d\n",
				143	func, &iph->saddr, &iph->daddr, icmph->code,
				144	be16_to_cpu(icmph->un.echo.sequence));
				145	} else if (iph->protocol == IPPROTO_TCP) {
				146	struct tcphdr th = (struct tcphdr )(iph + 1);
				147
				148	printk(KERN_DEBUG "%s: TCP: From%pI4:%u to dst=%pI4:%u "
				149	"ID=%u seq=%u ack=%u\r\n",
				150	func, &iph->saddr, ntohs(th->source),
				151	&iph->daddr, ntohs(th->dest),
				152	ntohs(iph->id), ntohl(th->seq), ntohl(th->ack_seq));
				153	}
				154	}
				155
				156	/*
				157	* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
				158	* the caller if emergency pfmemalloc reserves are being used. If it is and
				159	* the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
				160	* may be used. Otherwise, the packet data may be discarded until enough
				161	* memory is free
				162	*/
				163	#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
				164	__kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
				165
				166	static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
				167	unsigned long ip, bool *pfmemalloc)
				168	{
				169	void *obj;
				170	bool ret_pfmemalloc = false;
				171
				172	/*
				173	* Try a regular allocation, when that fails and we're not entitled
				174	* to the reserves, fail.
				175	*/
				176	obj = kmalloc_node_track_caller(size,
				177	flags \| __GFP_NOMEMALLOC \| __GFP_NOWARN,
				178	node);
				179	if (obj \|\| !(gfp_pfmemalloc_allowed(flags)))
				180	goto out;
				181
				182	/* Try again but now we are using pfmemalloc reserves */
				183	ret_pfmemalloc = true;
				184	obj = kmalloc_node_track_caller(size, flags, node);
				185
				186	out:
				187	if (pfmemalloc)
				188	*pfmemalloc = ret_pfmemalloc;
				189
				190	return obj;
				191	}
				192
				193	/* Allocate a new skbuff. We do this ourselves so we can fill in a few
				194	* 'private' fields and also do memory statistics to find all the
				195	* [BEEP] leaks.
				196	*
				197	*/
				198
				199	/**
				200	* __alloc_skb - allocate a network buffer
				201	* @size: size to allocate
				202	* @gfp_mask: allocation mask
				203	* @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
				204	* instead of head cache and allocate a cloned (child) skb.
				205	* If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
				206	* allocations in case the data is required for writeback
				207	* @node: numa node to allocate memory on
				208	*
				209	* Allocate a new &sk_buff. The returned buffer has no headroom and a
				210	* tail room of at least size bytes. The object has a reference count
				211	* of one. The return is the buffer. On a failure the return is %NULL.
				212	*
				213	* Buffers may only be allocated from interrupts using a @gfp_mask of
				214	* %GFP_ATOMIC.
				215	*/
				216	struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
				217	int flags, int node)
				218	{
				219	struct kmem_cache *cache;
				220	struct skb_shared_info *shinfo;
				221	struct sk_buff *skb;
				222	u8 *data;
				223	bool pfmemalloc;
				224
				225	cache = (flags & SKB_ALLOC_FCLONE)
				226	? skbuff_fclone_cache : skbuff_head_cache;
				227
				228	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
				229	gfp_mask \|= __GFP_MEMALLOC;
				230
				231	/* Get the HEAD */
				232	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
				233	if (!skb)
				234	goto out;
				235	prefetchw(skb);
				236
				237	/* We do our best to align skb_shared_info on a separate cache
				238	* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
				239	* aligned memory blocks, unless SLUB/SLAB debug is enabled.
				240	* Both skb->head and skb_shared_info are cache line aligned.
				241	*/
				242	size = SKB_DATA_ALIGN(size);
				243	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
				244	data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
				245	if (!data)
				246	goto nodata;
				247	/* kmalloc(size) might give us more room than requested.
				248	* Put skb_shared_info exactly at the end of allocated zone,
				249	* to allow max possible filling before reallocation.
				250	*/
				251	size = SKB_WITH_OVERHEAD(ksize(data));
				252	prefetchw(data + size);
				253
				254	/*
				255	* Only clear those fields we need to clear, not those that we will
				256	* actually initialise below. Hence, don't put any more fields after
				257	* the tail pointer in struct sk_buff!
				258	*/
				259	memset(skb, 0, offsetof(struct sk_buff, tail));
				260	/* Account for allocated memory : skb + skb->head */
				261	skb->truesize = SKB_TRUESIZE(size);
				262	skb->pfmemalloc = pfmemalloc;
				263	refcount_set(&skb->users, 1);
				264	skb->head = data;
				265	skb->data = data;
				266	skb_reset_tail_pointer(skb);
				267	skb->end = skb->tail + size;
				268	skb->mac_header = (typeof(skb->mac_header))~0U;
				269	skb->transport_header = (typeof(skb->transport_header))~0U;
				270
				271	/* make sure we initialize shinfo sequentially */
				272	shinfo = skb_shinfo(skb);
				273	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
				274	atomic_set(&shinfo->dataref, 1);
				275
				276	if (flags & SKB_ALLOC_FCLONE) {
				277	struct sk_buff_fclones *fclones;
				278
				279	fclones = container_of(skb, struct sk_buff_fclones, skb1);
				280
				281	skb->fclone = SKB_FCLONE_ORIG;
				282	refcount_set(&fclones->fclone_ref, 1);
				283
				284	fclones->skb2.fclone = SKB_FCLONE_CLONE;
				285	}
				286	out:
				287	return skb;
				288	nodata:
				289	kmem_cache_free(cache, skb);
				290	skb = NULL;
				291	goto out;
				292	}
				293	EXPORT_SYMBOL(__alloc_skb);
				294
				295	/* Caller must provide SKB that is memset cleared */
				296	static struct sk_buff __build_skb_around(struct sk_buff skb,
				297	void *data, unsigned int frag_size)
				298	{
				299	struct skb_shared_info *shinfo;
				300	unsigned int size = frag_size ? : ksize(data);
				301
				302	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
				303
				304	/* Assumes caller memset cleared SKB */
				305	skb->truesize = SKB_TRUESIZE(size);
				306	refcount_set(&skb->users, 1);
				307	skb->head = data;
				308	skb->data = data;
				309	skb_reset_tail_pointer(skb);
				310	skb->end = skb->tail + size;
				311	skb->mac_header = (typeof(skb->mac_header))~0U;
				312	skb->transport_header = (typeof(skb->transport_header))~0U;
				313
				314	/* make sure we initialize shinfo sequentially */
				315	shinfo = skb_shinfo(skb);
				316	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
				317	atomic_set(&shinfo->dataref, 1);
				318
				319	return skb;
				320	}
				321
				322	/**
				323	* __build_skb - build a network buffer
				324	* @data: data buffer provided by caller
				325	* @frag_size: size of data, or 0 if head was kmalloced
				326	*
				327	* Allocate a new &sk_buff. Caller provides space holding head and
				328	* skb_shared_info. @data must have been allocated by kmalloc() only if
				329	* @frag_size is 0, otherwise data should come from the page allocator
				330	* or vmalloc()
				331	* The return is the new skb buffer.
				332	* On a failure the return is %NULL, and @data is not freed.
				333	* Notes :
				334	* Before IO, driver allocates only data buffer where NIC put incoming frame
				335	* Driver should add room at head (NET_SKB_PAD) and
				336	* MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
				337	* After IO, driver calls build_skb(), to allocate sk_buff and populate it
				338	* before giving packet to stack.
				339	* RX rings only contains data buffers, not full skbs.
				340	*/
				341	struct sk_buff __build_skb(void data, unsigned int frag_size)
				342	{
				343	struct sk_buff *skb;
				344
				345	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
				346	if (unlikely(!skb))
				347	return NULL;
				348
				349	memset(skb, 0, offsetof(struct sk_buff, tail));
				350
				351	return __build_skb_around(skb, data, frag_size);
				352	}
				353
				354	/* build_skb() is wrapper over __build_skb(), that specifically
				355	* takes care of skb->head and skb->pfmemalloc
				356	* This means that if @frag_size is not zero, then @data must be backed
				357	* by a page fragment, not kmalloc() or vmalloc()
				358	*/
				359	struct sk_buff build_skb(void data, unsigned int frag_size)
				360	{
				361	struct sk_buff *skb = __build_skb(data, frag_size);
				362
				363	if (skb && frag_size) {
				364	skb->head_frag = 1;
				365	if (page_is_pfmemalloc(virt_to_head_page(data)))
				366	skb->pfmemalloc = 1;
				367	}
				368	return skb;
				369	}
				370	EXPORT_SYMBOL(build_skb);
				371
				372	/**
				373	* build_skb_around - build a network buffer around provided skb
				374	* @skb: sk_buff provide by caller, must be memset cleared
				375	* @data: data buffer provided by caller
				376	* @frag_size: size of data, or 0 if head was kmalloced
				377	*/
				378	struct sk_buff build_skb_around(struct sk_buff skb,
				379	void *data, unsigned int frag_size)
				380	{
				381	if (unlikely(!skb))
				382	return NULL;
				383
				384	skb = __build_skb_around(skb, data, frag_size);
				385
				386	if (skb && frag_size) {
				387	skb->head_frag = 1;
				388	if (page_is_pfmemalloc(virt_to_head_page(data)))
				389	skb->pfmemalloc = 1;
				390	}
				391	return skb;
				392	}
				393	EXPORT_SYMBOL(build_skb_around);
				394
				395	#define NAPI_SKB_CACHE_SIZE 64
				396
				397	struct napi_alloc_cache {
				398	struct page_frag_cache page;
				399	unsigned int skb_count;
				400	void *skb_cache[NAPI_SKB_CACHE_SIZE];
				401	};
				402
				403	static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
				404	static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
				405
				406	static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
				407	{
				408	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
				409
				410	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
				411	}
				412
				413	void *napi_alloc_frag(unsigned int fragsz)
				414	{
				415	fragsz = SKB_DATA_ALIGN(fragsz);
				416
				417	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
				418	}
				419	EXPORT_SYMBOL(napi_alloc_frag);
				420
				421	/**
				422	* netdev_alloc_frag - allocate a page fragment
				423	* @fragsz: fragment size
				424	*
				425	* Allocates a frag from a page for receive buffer.
				426	* Uses GFP_ATOMIC allocations.
				427	*/
				428	void *netdev_alloc_frag(unsigned int fragsz)
				429	{
				430	struct page_frag_cache *nc;
				431	void *data;
				432
				433	fragsz = SKB_DATA_ALIGN(fragsz);
				434	if (in_irq() \|\| irqs_disabled()) {
				435	nc = this_cpu_ptr(&netdev_alloc_cache);
				436	data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
				437	} else {
				438	local_bh_disable();
				439	data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
				440	local_bh_enable();
				441	}
				442	return data;
				443	}
				444	EXPORT_SYMBOL(netdev_alloc_frag);
				445
				446	/**
				447	* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
				448	* @dev: network device to receive on
				449	* @len: length to allocate
				450	* @gfp_mask: get_free_pages mask, passed to alloc_skb
				451	*
				452	* Allocate a new &sk_buff and assign it a usage count of one. The
				453	* buffer has NET_SKB_PAD headroom built in. Users should allocate
				454	* the headroom they think they need without accounting for the
				455	* built in space. The built in space is used for optimisations.
				456	*
				457	* %NULL is returned if there is no free memory.
				458	*/
				459	struct sk_buff __netdev_alloc_skb(struct net_device dev, unsigned int len,
				460	gfp_t gfp_mask)
				461	{
				462	struct page_frag_cache *nc;
				463	struct sk_buff *skb;
				464	bool pfmemalloc;
				465	void *data;
				466
				467	len += NET_SKB_PAD;
				468
				469	/* If requested length is either too small or too big,
				470	* we use kmalloc() for skb->head allocation.
				471	*/
				472	if (len <= SKB_WITH_OVERHEAD(1024) \|\|
				473	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
				474	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
				475	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
				476	if (!skb)
				477	goto skb_fail;
				478	goto skb_success;
				479	}
				480
				481	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
				482	len = SKB_DATA_ALIGN(len);
				483
				484	if (sk_memalloc_socks())
				485	gfp_mask \|= __GFP_MEMALLOC;
				486
				487	if (in_irq() \|\| irqs_disabled()) {
				488	nc = this_cpu_ptr(&netdev_alloc_cache);
				489	data = page_frag_alloc(nc, len, gfp_mask);
				490	pfmemalloc = nc->pfmemalloc;
				491	} else {
				492	local_bh_disable();
				493	nc = this_cpu_ptr(&napi_alloc_cache.page);
				494	data = page_frag_alloc(nc, len, gfp_mask);
				495	pfmemalloc = nc->pfmemalloc;
				496	local_bh_enable();
				497	}
				498
				499	if (unlikely(!data))
				500	return NULL;
				501
				502	skb = __build_skb(data, len);
				503	if (unlikely(!skb)) {
				504	skb_free_frag(data);
				505	return NULL;
				506	}
				507
				508	/* use OR instead of assignment to avoid clearing of bits in mask */
				509	if (pfmemalloc)
				510	skb->pfmemalloc = 1;
				511	skb->head_frag = 1;
				512
				513	skb_success:
				514	skb_reserve(skb, NET_SKB_PAD);
				515	skb->dev = dev;
				516
				517	skb_fail:
				518	return skb;
				519	}
				520	EXPORT_SYMBOL(__netdev_alloc_skb);
				521
				522	/**
				523	* __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
				524	* @napi: napi instance this buffer was allocated for
				525	* @len: length to allocate
				526	* @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
				527	*
				528	* Allocate a new sk_buff for use in NAPI receive. This buffer will
				529	* attempt to allocate the head from a special reserved region used
				530	* only for NAPI Rx allocation. By doing this we can save several
				531	* CPU cycles by avoiding having to disable and re-enable IRQs.
				532	*
				533	* %NULL is returned if there is no free memory.
				534	*/
				535	struct sk_buff __napi_alloc_skb(struct napi_struct napi, unsigned int len,
				536	gfp_t gfp_mask)
				537	{
				538	struct napi_alloc_cache *nc;
				539	struct sk_buff *skb;
				540	void *data;
				541
				542	len += NET_SKB_PAD + NET_IP_ALIGN;
				543
				544	/* If requested length is either too small or too big,
				545	* we use kmalloc() for skb->head allocation.
				546	*/
				547	if (len <= SKB_WITH_OVERHEAD(1024) \|\|
				548	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
				549	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
				550	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
				551	if (!skb)
				552	goto skb_fail;
				553	goto skb_success;
				554	}
				555
				556	nc = this_cpu_ptr(&napi_alloc_cache);
				557	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
				558	len = SKB_DATA_ALIGN(len);
				559
				560	if (sk_memalloc_socks())
				561	gfp_mask \|= __GFP_MEMALLOC;
				562
				563	data = page_frag_alloc(&nc->page, len, gfp_mask);
				564	if (unlikely(!data))
				565	return NULL;
				566
				567	skb = __build_skb(data, len);
				568	if (unlikely(!skb)) {
				569	skb_free_frag(data);
				570	return NULL;
				571	}
				572
				573	/* use OR instead of assignment to avoid clearing of bits in mask */
				574	if (nc->page.pfmemalloc)
				575	skb->pfmemalloc = 1;
				576	skb->head_frag = 1;
				577
				578	skb_success:
				579	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
				580	skb->dev = napi->dev;
				581
				582	skb_fail:
				583	return skb;
				584	}
				585	EXPORT_SYMBOL(__napi_alloc_skb);
				586
				587	struct sk_buff __netdev_alloc_skb_ip_align(struct net_device dev,
				588	unsigned int length, gfp_t gfp)
				589	{
				590	struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);
				591
				592	#ifdef CONFIG_ETHERNET_PACKET_MANGLE
				593	if (dev && (dev->priv_flags & IFF_NO_IP_ALIGN))
				594	return skb;
				595	#endif
				596
				597	if (NET_IP_ALIGN && skb)
				598	skb_reserve(skb, NET_IP_ALIGN);
				599	return skb;
				600	}
				601	EXPORT_SYMBOL(__netdev_alloc_skb_ip_align);
				602
				603	void skb_add_rx_frag(struct sk_buff skb, int i, struct page page, int off,
				604	int size, unsigned int truesize)
				605	{
				606	skb_fill_page_desc(skb, i, page, off, size);
				607	skb->len += size;
				608	skb->data_len += size;
				609	skb->truesize += truesize;
				610	}
				611	EXPORT_SYMBOL(skb_add_rx_frag);
				612
				613	void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
				614	unsigned int truesize)
				615	{
				616	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				617
				618	skb_frag_size_add(frag, size);
				619	skb->len += size;
				620	skb->data_len += size;
				621	skb->truesize += truesize;
				622	}
				623	EXPORT_SYMBOL(skb_coalesce_rx_frag);
				624
				625	static void skb_drop_list(struct sk_buff **listp)
				626	{
				627	kfree_skb_list(*listp);
				628	*listp = NULL;
				629	}
				630
				631	static inline void skb_drop_fraglist(struct sk_buff *skb)
				632	{
				633	skb_drop_list(&skb_shinfo(skb)->frag_list);
				634	}
				635
				636	static void skb_clone_fraglist(struct sk_buff *skb)
				637	{
				638	struct sk_buff *list;
				639
				640	skb_walk_frags(skb, list)
				641	skb_get(list);
				642	}
				643
				644	static void skb_free_head(struct sk_buff *skb)
				645	{
				646	unsigned char *head;
				647	#ifdef CONFIG_ASR_BM
				648	bool cached_head = false;
				649	#endif
				650
				651	if (skb_shinfo_is_ptr(skb)) {
				652	#ifdef CONFIG_ASR_BM
				653	/* in case of normal case & skb clone */
				654	if (skb->cached_head)
				655	cached_head = true;
				656	#endif
				657
				658	skb_p_revert(skb);
				659	}
				660
				661	#ifdef CONFIG_ASR_BM
				662	if (cached_head)
				663	return;
				664	#endif
				665
				666	head = skb->head;
				667	if (skb->head_frag)
				668	skb_free_frag(head);
				669	else
				670	kfree(head);
				671	}
				672
				673	static void skb_release_data(struct sk_buff *skb)
				674	{
				675	struct skb_shared_info *shinfo = skb_shinfo(skb);
				676	int i;
				677
				678	if (skb->cloned &&
				679	atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
				680	&shinfo->dataref))
				681	return;
				682
				683	for (i = 0; i < shinfo->nr_frags; i++)
				684	__skb_frag_unref(&shinfo->frags[i]);
				685
				686	if (shinfo->frag_list)
				687	kfree_skb_list(shinfo->frag_list);
				688
				689	skb_zcopy_clear(skb, true);
				690	skb_free_head(skb);
				691	}
				692
				693	/*
				694	* Free an skbuff by memory without cleaning the state.
				695	*/
				696	static void kfree_skbmem(struct sk_buff *skb)
				697	{
				698	struct sk_buff_fclones *fclones;
				699
				700	#ifdef CONFIG_ASR_BM
				701	if (skb->cached_skb) {
				702	bm_cache_skb_free(skb);
				703	return;
				704	}
				705	#endif
				706
				707	switch (skb->fclone) {
				708	case SKB_FCLONE_UNAVAILABLE:
				709	kmem_cache_free(skbuff_head_cache, skb);
				710	return;
				711
				712	case SKB_FCLONE_ORIG:
				713	fclones = container_of(skb, struct sk_buff_fclones, skb1);
				714
				715	/* We usually free the clone (TX completion) before original skb
				716	* This test would have no chance to be true for the clone,
				717	* while here, branch prediction will be good.
				718	*/
				719	if (refcount_read(&fclones->fclone_ref) == 1)
				720	goto fastpath;
				721	break;
				722
				723	default: /* SKB_FCLONE_CLONE */
				724	fclones = container_of(skb, struct sk_buff_fclones, skb2);
				725	break;
				726	}
				727	if (!refcount_dec_and_test(&fclones->fclone_ref))
				728	return;
				729	fastpath:
				730	kmem_cache_free(skbuff_fclone_cache, fclones);
				731	}
				732
				733	void skb_release_head_state(struct sk_buff *skb)
				734	{
				735	skb_dst_drop(skb);
				736	if (skb->destructor) {
				737	WARN_ON(in_irq());
				738	skb->destructor(skb);
				739	}
				740	#if IS_ENABLED(CONFIG_NF_CONNTRACK)
				741	nf_conntrack_put(skb_nfct(skb));
				742	#endif
				743	skb_ext_put(skb);
				744	}
				745
				746	/* Free everything but the sk_buff shell. */
				747	static void skb_release_all(struct sk_buff *skb)
				748	{
				749	skb_release_head_state(skb);
				750	if (likely(skb->head))
				751	skb_release_data(skb);
				752	}
				753
				754	/**
				755	* __kfree_skb - private function
				756	* @skb: buffer
				757	*
				758	* Free an sk_buff. Release anything attached to the buffer.
				759	* Clean the state. This is an internal helper function. Users should
				760	* always call kfree_skb
				761	*/
				762
				763	void __kfree_skb(struct sk_buff *skb)
				764	{
				765	skb_release_all(skb);
				766	kfree_skbmem(skb);
				767	}
				768	EXPORT_SYMBOL(__kfree_skb);
				769
				770	/**
				771	* kfree_skb - free an sk_buff
				772	* @skb: buffer to free
				773	*
				774	* Drop a reference to the buffer and free it if the usage count has
				775	* hit zero.
				776	*/
				777	void kfree_skb(struct sk_buff *skb)
				778	{
				779	if (!skb_unref(skb))
				780	return;
				781
				782	trace_android_vh_kfree_skb(skb);
				783	trace_kfree_skb(skb, __builtin_return_address(0));
				784	__kfree_skb(skb);
				785	}
				786	EXPORT_SYMBOL(kfree_skb);
				787
				788	void kfree_skb_list(struct sk_buff *segs)
				789	{
				790	while (segs) {
				791	struct sk_buff *next = segs->next;
				792
				793	kfree_skb(segs);
				794	segs = next;
				795	}
				796	}
				797	EXPORT_SYMBOL(kfree_skb_list);
				798
				799	/* Dump skb information and contents.
				800	*
				801	* Must only be called from net_ratelimit()-ed paths.
				802	*
				803	* Dumps up to can_dump_full whole packets if full_pkt, headers otherwise.
				804	*/
				805	void skb_dump(const char level, const struct sk_buff skb, bool full_pkt)
				806	{
				807	static atomic_t can_dump_full = ATOMIC_INIT(5);
				808	struct skb_shared_info *sh = skb_shinfo(skb);
				809	struct net_device *dev = skb->dev;
				810	struct sock *sk = skb->sk;
				811	struct sk_buff *list_skb;
				812	bool has_mac, has_trans;
				813	int headroom, tailroom;
				814	int i, len, seg_len;
				815
				816	if (full_pkt)
				817	full_pkt = atomic_dec_if_positive(&can_dump_full) >= 0;
				818
				819	if (full_pkt)
				820	len = skb->len;
				821	else
				822	len = min_t(int, skb->len, MAX_HEADER + 128);
				823
				824	headroom = skb_headroom(skb);
				825	tailroom = skb_tailroom(skb);
				826
				827	has_mac = skb_mac_header_was_set(skb);
				828	has_trans = skb_transport_header_was_set(skb);
				829
				830	printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
				831	"mac=(%d,%d) net=(%d,%d) trans=%d\n"
				832	"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
				833	"csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
				834	"hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
				835	level, skb->len, headroom, skb_headlen(skb), tailroom,
				836	has_mac ? skb->mac_header : -1,
				837	has_mac ? skb_mac_header_len(skb) : -1,
				838	skb->network_header,
				839	has_trans ? skb_network_header_len(skb) : -1,
				840	has_trans ? skb->transport_header : -1,
				841	sh->tx_flags, sh->nr_frags,
				842	sh->gso_size, sh->gso_type, sh->gso_segs,
				843	skb->csum, skb->ip_summed, skb->csum_complete_sw,
				844	skb->csum_valid, skb->csum_level,
				845	skb->hash, skb->sw_hash, skb->l4_hash,
				846	ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
				847
				848	if (dev)
				849	printk("%sdev name=%s feat=%pNF\n",
				850	level, dev->name, &dev->features);
				851	if (sk)
				852	printk("%ssk family=%hu type=%u proto=%u\n",
				853	level, sk->sk_family, sk->sk_type, sk->sk_protocol);
				854
				855	if (full_pkt && headroom)
				856	print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
				857	16, 1, skb->head, headroom, false);
				858
				859	seg_len = min_t(int, skb_headlen(skb), len);
				860	if (seg_len)
				861	print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET,
				862	16, 1, skb->data, seg_len, false);
				863	len -= seg_len;
				864
				865	if (full_pkt && tailroom)
				866	print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
				867	16, 1, skb_tail_pointer(skb), tailroom, false);
				868
				869	for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
				870	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				871	u32 p_off, p_len, copied;
				872	struct page *p;
				873	u8 *vaddr;
				874
				875	skb_frag_foreach_page(frag, skb_frag_off(frag),
				876	skb_frag_size(frag), p, p_off, p_len,
				877	copied) {
				878	seg_len = min_t(int, p_len, len);
				879	vaddr = kmap_atomic(p);
				880	print_hex_dump(level, "skb frag: ",
				881	DUMP_PREFIX_OFFSET,
				882	16, 1, vaddr + p_off, seg_len, false);
				883	kunmap_atomic(vaddr);
				884	len -= seg_len;
				885	if (!len)
				886	break;
				887	}
				888	}
				889
				890	if (full_pkt && skb_has_frag_list(skb)) {
				891	printk("skb fraglist:\n");
				892	skb_walk_frags(skb, list_skb)
				893	skb_dump(level, list_skb, true);
				894	}
				895	}
				896	EXPORT_SYMBOL(skb_dump);
				897
				898	/**
				899	* skb_tx_error - report an sk_buff xmit error
				900	* @skb: buffer that triggered an error
				901	*
				902	* Report xmit error if a device callback is tracking this skb.
				903	* skb must be freed afterwards.
				904	*/
				905	void skb_tx_error(struct sk_buff *skb)
				906	{
				907	skb_zcopy_clear(skb, true);
				908	}
				909	EXPORT_SYMBOL(skb_tx_error);
				910
				911	/**
				912	* consume_skb - free an skbuff
				913	* @skb: buffer to free
				914	*
				915	* Drop a ref to the buffer and free it if the usage count has hit zero
				916	* Functions identically to kfree_skb, but kfree_skb assumes that the frame
				917	* is being dropped after a failure and notes that
				918	*/
				919	void consume_skb(struct sk_buff *skb)
				920	{
				921	if (!skb_unref(skb))
				922	return;
				923
				924	trace_consume_skb(skb);
				925	__kfree_skb(skb);
				926	}
				927	EXPORT_SYMBOL(consume_skb);
				928
				929	/**
				930	* consume_stateless_skb - free an skbuff, assuming it is stateless
				931	* @skb: buffer to free
				932	*
				933	* Alike consume_skb(), but this variant assumes that this is the last
				934	* skb reference and all the head states have been already dropped
				935	*/
				936	void __consume_stateless_skb(struct sk_buff *skb)
				937	{
				938	trace_consume_skb(skb);
				939	skb_release_data(skb);
				940	kfree_skbmem(skb);
				941	}
				942
				943	void __kfree_skb_flush(void)
				944	{
				945	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
				946
				947	/* flush skb_cache if containing objects */
				948	if (nc->skb_count) {
				949	kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
				950	nc->skb_cache);
				951	nc->skb_count = 0;
				952	}
				953	}
				954
				955	static inline void _kfree_skb_defer(struct sk_buff *skb)
				956	{
				957	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
				958
				959	/* drop skb->head and call any destructors for packet */
				960	skb_release_all(skb);
				961
				962	#ifdef CONFIG_ASR_BM
				963	if (skb->cached_skb) {
				964	bm_cache_skb_free(skb);
				965	return;
				966	}
				967	#endif
				968
				969	/* record skb to CPU local list */
				970	nc->skb_cache[nc->skb_count++] = skb;
				971
				972	#ifdef CONFIG_SLUB
				973	/* SLUB writes into objects when freeing */
				974	prefetchw(skb);
				975	#endif
				976
				977	/* flush skb_cache if it is filled */
				978	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
				979	kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
				980	nc->skb_cache);
				981	nc->skb_count = 0;
				982	}
				983	}
				984	void __kfree_skb_defer(struct sk_buff *skb)
				985	{
				986	_kfree_skb_defer(skb);
				987	}
				988
				989	void napi_consume_skb(struct sk_buff *skb, int budget)
				990	{
				991	if (unlikely(!skb))
				992	return;
				993
				994	/* Zero budget indicate non-NAPI context called us, like netpoll */
				995	if (unlikely(!budget)) {
				996	dev_consume_skb_any(skb);
				997	return;
				998	}
				999
				1000	if (!skb_unref(skb))
				1001	return;
				1002
				1003	/* if reaching here SKB is ready to free */
				1004	trace_consume_skb(skb);
				1005
				1006	/* if SKB is a clone, don't handle this case */
				1007	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
				1008	__kfree_skb(skb);
				1009	return;
				1010	}
				1011
				1012	_kfree_skb_defer(skb);
				1013	}
				1014	EXPORT_SYMBOL(napi_consume_skb);
				1015
				1016	/* Make sure a field is enclosed inside headers_start/headers_end section */
				1017	#define CHECK_SKB_FIELD(field) \
				1018	BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
				1019	offsetof(struct sk_buff, headers_start)); \
				1020	BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
				1021	offsetof(struct sk_buff, headers_end)); \
				1022
				1023	static void __copy_skb_header(struct sk_buff new, const struct sk_buff old)
				1024	{
				1025	new->tstamp = old->tstamp;
				1026	/* We do not copy old->sk */
				1027	new->dev = old->dev;
				1028	memcpy(new->cb, old->cb, sizeof(old->cb));
				1029	skb_dst_copy(new, old);
				1030	__skb_ext_copy(new, old);
				1031	__nf_copy(new, old, false);
				1032
				1033	/* Note : this field could be in headers_start/headers_end section
				1034	* It is not yet because we do not want to have a 16 bit hole
				1035	*/
				1036	new->queue_mapping = old->queue_mapping;
				1037
				1038	memcpy(&new->headers_start, &old->headers_start,
				1039	offsetof(struct sk_buff, headers_end) -
				1040	offsetof(struct sk_buff, headers_start));
				1041	CHECK_SKB_FIELD(protocol);
				1042	CHECK_SKB_FIELD(csum);
				1043	CHECK_SKB_FIELD(hash);
				1044	CHECK_SKB_FIELD(priority);
				1045	CHECK_SKB_FIELD(skb_iif);
				1046	CHECK_SKB_FIELD(vlan_proto);
				1047	CHECK_SKB_FIELD(vlan_tci);
				1048	CHECK_SKB_FIELD(transport_header);
				1049	CHECK_SKB_FIELD(network_header);
				1050	CHECK_SKB_FIELD(mac_header);
				1051	CHECK_SKB_FIELD(inner_protocol);
				1052	CHECK_SKB_FIELD(inner_transport_header);
				1053	CHECK_SKB_FIELD(inner_network_header);
				1054	CHECK_SKB_FIELD(inner_mac_header);
				1055	CHECK_SKB_FIELD(mark);
				1056	#ifdef CONFIG_NETWORK_SECMARK
				1057	CHECK_SKB_FIELD(secmark);
				1058	#endif
				1059	#ifdef CONFIG_NET_RX_BUSY_POLL
				1060	CHECK_SKB_FIELD(napi_id);
				1061	#endif
				1062	#ifdef CONFIG_XPS
				1063	CHECK_SKB_FIELD(sender_cpu);
				1064	#endif
				1065	#ifdef CONFIG_NET_SCHED
				1066	CHECK_SKB_FIELD(tc_index);
				1067	#endif
				1068
				1069	}
				1070
				1071	/*
				1072	* You should not add any new code to this function. Add it to
				1073	* __copy_skb_header above instead.
				1074	*/
				1075	static struct sk_buff __skb_clone(struct sk_buff n, struct sk_buff *skb)
				1076	{
				1077	#define C(x) n->x = skb->x
				1078
				1079	n->next = n->prev = NULL;
				1080	n->sk = NULL;
				1081	__copy_skb_header(n, skb);
				1082
				1083	C(len);
				1084	C(data_len);
				1085	C(mac_len);
				1086	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
				1087	n->cloned = 1;
				1088	n->nohdr = 0;
				1089	n->peeked = 0;
				1090	C(pfmemalloc);
				1091	n->destructor = NULL;
				1092	C(tail);
				1093	C(end);
				1094	C(head);
				1095	C(head_frag);
				1096	C(data);
				1097	C(truesize);
				1098	C(shared_info_ptr);
				1099	#ifdef CONFIG_ASR_BM
				1100	C(cached_head);
				1101	n->cached_skb = 0;
				1102	n->in_use = 0;
				1103	n->rsvd = 0;
				1104	#endif
				1105	refcount_set(&n->users, 1);
				1106
				1107	atomic_inc(&(skb_shinfo(skb)->dataref));
				1108	skb->cloned = 1;
				1109
				1110	return n;
				1111	#undef C
				1112	}
				1113
				1114	/**
				1115	* alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
				1116	* @first: first sk_buff of the msg
				1117	*/
				1118	struct sk_buff alloc_skb_for_msg(struct sk_buff first)
				1119	{
				1120	struct sk_buff *n;
				1121
				1122	n = alloc_skb(0, GFP_ATOMIC);
				1123	if (!n)
				1124	return NULL;
				1125
				1126	n->len = first->len;
				1127	n->data_len = first->len;
				1128	n->truesize = first->truesize;
				1129
				1130	skb_shinfo(n)->frag_list = first;
				1131
				1132	__copy_skb_header(n, first);
				1133	n->destructor = NULL;
				1134
				1135	return n;
				1136	}
				1137	EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
				1138
				1139	/**
				1140	* skb_morph - morph one skb into another
				1141	* @dst: the skb to receive the contents
				1142	* @src: the skb to supply the contents
				1143	*
				1144	* This is identical to skb_clone except that the target skb is
				1145	* supplied by the user.
				1146	*
				1147	* The target skb is returned upon exit.
				1148	*/
				1149	struct sk_buff skb_morph(struct sk_buff dst, struct sk_buff *src)
				1150	{
				1151	skb_release_all(dst);
				1152	return __skb_clone(dst, src);
				1153	}
				1154	EXPORT_SYMBOL_GPL(skb_morph);
				1155
				1156	int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
				1157	{
				1158	unsigned long max_pg, num_pg, new_pg, old_pg;
				1159	struct user_struct *user;
				1160
				1161	if (capable(CAP_IPC_LOCK) \|\| !size)
				1162	return 0;
				1163
				1164	num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */
				1165	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				1166	user = mmp->user ? : current_user();
				1167
				1168	do {
				1169	old_pg = atomic_long_read(&user->locked_vm);
				1170	new_pg = old_pg + num_pg;
				1171	if (new_pg > max_pg)
				1172	return -ENOBUFS;
				1173	} while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
				1174	old_pg);
				1175
				1176	if (!mmp->user) {
				1177	mmp->user = get_uid(user);
				1178	mmp->num_pg = num_pg;
				1179	} else {
				1180	mmp->num_pg += num_pg;
				1181	}
				1182
				1183	return 0;
				1184	}
				1185	EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
				1186
				1187	void mm_unaccount_pinned_pages(struct mmpin *mmp)
				1188	{
				1189	if (mmp->user) {
				1190	atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
				1191	free_uid(mmp->user);
				1192	}
				1193	}
				1194	EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
				1195
				1196	struct ubuf_info sock_zerocopy_alloc(struct sock sk, size_t size)
				1197	{
				1198	struct ubuf_info *uarg;
				1199	struct sk_buff *skb;
				1200
				1201	WARN_ON_ONCE(!in_task());
				1202
				1203	skb = sock_omalloc(sk, 0, GFP_KERNEL);
				1204	if (!skb)
				1205	return NULL;
				1206
				1207	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
				1208	uarg = (void *)skb->cb;
				1209	uarg->mmp.user = NULL;
				1210
				1211	if (mm_account_pinned_pages(&uarg->mmp, size)) {
				1212	kfree_skb(skb);
				1213	return NULL;
				1214	}
				1215
				1216	uarg->callback = sock_zerocopy_callback;
				1217	uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
				1218	uarg->len = 1;
				1219	uarg->bytelen = size;
				1220	uarg->zerocopy = 1;
				1221	refcount_set(&uarg->refcnt, 1);
				1222	sock_hold(sk);
				1223
				1224	return uarg;
				1225	}
				1226	EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
				1227
				1228	static inline struct sk_buff skb_from_uarg(struct ubuf_info uarg)
				1229	{
				1230	return container_of((void *)uarg, struct sk_buff, cb);
				1231	}
				1232
				1233	struct ubuf_info sock_zerocopy_realloc(struct sock sk, size_t size,
				1234	struct ubuf_info *uarg)
				1235	{
				1236	if (uarg) {
				1237	const u32 byte_limit = 1 << 19; /* limit to a few TSO */
				1238	u32 bytelen, next;
				1239
				1240	/* realloc only when socket is locked (TCP, UDP cork),
				1241	* so uarg->len and sk_zckey access is serialized
				1242	*/
				1243	if (!sock_owned_by_user(sk)) {
				1244	WARN_ON_ONCE(1);
				1245	return NULL;
				1246	}
				1247
				1248	bytelen = uarg->bytelen + size;
				1249	if (uarg->len == USHRT_MAX - 1 \|\| bytelen > byte_limit) {
				1250	/* TCP can create new skb to attach new uarg */
				1251	if (sk->sk_type == SOCK_STREAM)
				1252	goto new_alloc;
				1253	return NULL;
				1254	}
				1255
				1256	next = (u32)atomic_read(&sk->sk_zckey);
				1257	if ((u32)(uarg->id + uarg->len) == next) {
				1258	if (mm_account_pinned_pages(&uarg->mmp, size))
				1259	return NULL;
				1260	uarg->len++;
				1261	uarg->bytelen = bytelen;
				1262	atomic_set(&sk->sk_zckey, ++next);
				1263
				1264	/* no extra ref when appending to datagram (MSG_MORE) */
				1265	if (sk->sk_type == SOCK_STREAM)
				1266	sock_zerocopy_get(uarg);
				1267
				1268	return uarg;
				1269	}
				1270	}
				1271
				1272	new_alloc:
				1273	return sock_zerocopy_alloc(sk, size);
				1274	}
				1275	EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
				1276
				1277	static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
				1278	{
				1279	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
				1280	u32 old_lo, old_hi;
				1281	u64 sum_len;
				1282
				1283	old_lo = serr->ee.ee_info;
				1284	old_hi = serr->ee.ee_data;
				1285	sum_len = old_hi - old_lo + 1ULL + len;
				1286
				1287	if (sum_len >= (1ULL << 32))
				1288	return false;
				1289
				1290	if (lo != old_hi + 1)
				1291	return false;
				1292
				1293	serr->ee.ee_data += len;
				1294	return true;
				1295	}
				1296
				1297	void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
				1298	{
				1299	struct sk_buff tail, skb = skb_from_uarg(uarg);
				1300	struct sock_exterr_skb *serr;
				1301	struct sock *sk = skb->sk;
				1302	struct sk_buff_head *q;
				1303	unsigned long flags;
				1304	u32 lo, hi;
				1305	u16 len;
				1306
				1307	mm_unaccount_pinned_pages(&uarg->mmp);
				1308
				1309	/* if !len, there was only 1 call, and it was aborted
				1310	* so do not queue a completion notification
				1311	*/
				1312	if (!uarg->len \|\| sock_flag(sk, SOCK_DEAD))
				1313	goto release;
				1314
				1315	len = uarg->len;
				1316	lo = uarg->id;
				1317	hi = uarg->id + len - 1;
				1318
				1319	serr = SKB_EXT_ERR(skb);
				1320	memset(serr, 0, sizeof(*serr));
				1321	serr->ee.ee_errno = 0;
				1322	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
				1323	serr->ee.ee_data = hi;
				1324	serr->ee.ee_info = lo;
				1325	if (!success)
				1326	serr->ee.ee_code \|= SO_EE_CODE_ZEROCOPY_COPIED;
				1327
				1328	q = &sk->sk_error_queue;
				1329	spin_lock_irqsave(&q->lock, flags);
				1330	tail = skb_peek_tail(q);
				1331	if (!tail \|\| SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY \|\|
				1332	!skb_zerocopy_notify_extend(tail, lo, len)) {
				1333	__skb_queue_tail(q, skb);
				1334	skb = NULL;
				1335	}
				1336	spin_unlock_irqrestore(&q->lock, flags);
				1337
				1338	sk->sk_error_report(sk);
				1339
				1340	release:
				1341	consume_skb(skb);
				1342	sock_put(sk);
				1343	}
				1344	EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
				1345
				1346	void sock_zerocopy_put(struct ubuf_info *uarg)
				1347	{
				1348	if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
				1349	if (uarg->callback)
				1350	uarg->callback(uarg, uarg->zerocopy);
				1351	else
				1352	consume_skb(skb_from_uarg(uarg));
				1353	}
				1354	}
				1355	EXPORT_SYMBOL_GPL(sock_zerocopy_put);
				1356
				1357	void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
				1358	{
				1359	if (uarg) {
				1360	struct sock *sk = skb_from_uarg(uarg)->sk;
				1361
				1362	atomic_dec(&sk->sk_zckey);
				1363	uarg->len--;
				1364
				1365	if (have_uref)
				1366	sock_zerocopy_put(uarg);
				1367	}
				1368	}
				1369	EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
				1370
				1371	int skb_zerocopy_iter_dgram(struct sk_buff skb, struct msghdr msg, int len)
				1372	{
				1373	return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
				1374	}
				1375	EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
				1376
				1377	int skb_zerocopy_iter_stream(struct sock sk, struct sk_buff skb,
				1378	struct msghdr *msg, int len,
				1379	struct ubuf_info *uarg)
				1380	{
				1381	struct ubuf_info *orig_uarg = skb_zcopy(skb);
				1382	struct iov_iter orig_iter = msg->msg_iter;
				1383	int err, orig_len = skb->len;
				1384
				1385	/* An skb can only point to one uarg. This edge case happens when
				1386	* TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
				1387	*/
				1388	if (orig_uarg && uarg != orig_uarg)
				1389	return -EEXIST;
				1390
				1391	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
				1392	if (err == -EFAULT \|\| (err == -EMSGSIZE && skb->len == orig_len)) {
				1393	struct sock *save_sk = skb->sk;
				1394
				1395	/* Streams do not free skb on error. Reset to prev state. */
				1396	msg->msg_iter = orig_iter;
				1397	skb->sk = sk;
				1398	___pskb_trim(skb, orig_len);
				1399	skb->sk = save_sk;
				1400	return err;
				1401	}
				1402
				1403	skb_zcopy_set(skb, uarg, NULL);
				1404	return skb->len - orig_len;
				1405	}
				1406	EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
				1407
				1408	static int skb_zerocopy_clone(struct sk_buff nskb, struct sk_buff orig,
				1409	gfp_t gfp_mask)
				1410	{
				1411	if (skb_zcopy(orig)) {
				1412	if (skb_zcopy(nskb)) {
				1413	/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
				1414	if (!gfp_mask) {
				1415	WARN_ON_ONCE(1);
				1416	return -ENOMEM;
				1417	}
				1418	if (skb_uarg(nskb) == skb_uarg(orig))
				1419	return 0;
				1420	if (skb_copy_ubufs(nskb, GFP_ATOMIC))
				1421	return -EIO;
				1422	}
				1423	skb_zcopy_set(nskb, skb_uarg(orig), NULL);
				1424	}
				1425	return 0;
				1426	}
				1427
				1428	/**
				1429	* skb_copy_ubufs - copy userspace skb frags buffers to kernel
				1430	* @skb: the skb to modify
				1431	* @gfp_mask: allocation priority
				1432	*
				1433	* This must be called on SKBTX_DEV_ZEROCOPY skb.
				1434	* It will copy all frags into kernel and drop the reference
				1435	* to userspace pages.
				1436	*
				1437	* If this function is called from an interrupt gfp_mask() must be
				1438	* %GFP_ATOMIC.
				1439	*
				1440	* Returns 0 on success or a negative error code on failure
				1441	* to allocate kernel memory to copy to.
				1442	*/
				1443	int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
				1444	{
				1445	int num_frags = skb_shinfo(skb)->nr_frags;
				1446	struct page page, head = NULL;
				1447	int i, new_frags;
				1448	u32 d_off;
				1449
				1450	if (skb_shared(skb) \|\| skb_unclone(skb, gfp_mask))
				1451	return -EINVAL;
				1452
				1453	if (!num_frags)
				1454	goto release;
				1455
				1456	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1457	for (i = 0; i < new_frags; i++) {
				1458	page = alloc_page(gfp_mask);
				1459	if (!page) {
				1460	while (head) {
				1461	struct page next = (struct page )page_private(head);
				1462	put_page(head);
				1463	head = next;
				1464	}
				1465	return -ENOMEM;
				1466	}
				1467	set_page_private(page, (unsigned long)head);
				1468	head = page;
				1469	}
				1470
				1471	page = head;
				1472	d_off = 0;
				1473	for (i = 0; i < num_frags; i++) {
				1474	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
				1475	u32 p_off, p_len, copied;
				1476	struct page *p;
				1477	u8 *vaddr;
				1478
				1479	skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
				1480	p, p_off, p_len, copied) {
				1481	u32 copy, done = 0;
				1482	vaddr = kmap_atomic(p);
				1483
				1484	while (done < p_len) {
				1485	if (d_off == PAGE_SIZE) {
				1486	d_off = 0;
				1487	page = (struct page *)page_private(page);
				1488	}
				1489	copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
				1490	memcpy(page_address(page) + d_off,
				1491	vaddr + p_off + done, copy);
				1492	done += copy;
				1493	d_off += copy;
				1494	}
				1495	kunmap_atomic(vaddr);
				1496	}
				1497	}
				1498
				1499	/* skb frags release userspace buffers */
				1500	for (i = 0; i < num_frags; i++)
				1501	skb_frag_unref(skb, i);
				1502
				1503	/* skb frags point to kernel buffers */
				1504	for (i = 0; i < new_frags - 1; i++) {
				1505	__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
				1506	head = (struct page *)page_private(head);
				1507	}
				1508	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
				1509	skb_shinfo(skb)->nr_frags = new_frags;
				1510
				1511	release:
				1512	skb_zcopy_clear(skb, false);
				1513	return 0;
				1514	}
				1515	EXPORT_SYMBOL_GPL(skb_copy_ubufs);
				1516
				1517	/**
				1518	* skb_clone - duplicate an sk_buff
				1519	* @skb: buffer to clone
				1520	* @gfp_mask: allocation priority
				1521	*
				1522	* Duplicate an &sk_buff. The new one is not owned by a socket. Both
				1523	* copies share the same packet data but not structure. The new
				1524	* buffer has a reference count of 1. If the allocation fails the
				1525	* function returns %NULL otherwise the new buffer is returned.
				1526	*
				1527	* If this function is called from an interrupt gfp_mask() must be
				1528	* %GFP_ATOMIC.
				1529	*/
				1530
				1531	struct sk_buff skb_clone(struct sk_buff skb, gfp_t gfp_mask)
				1532	{
				1533	struct sk_buff_fclones *fclones = container_of(skb,
				1534	struct sk_buff_fclones,
				1535	skb1);
				1536	struct sk_buff *n;
				1537
				1538	if (skb_orphan_frags(skb, gfp_mask))
				1539	return NULL;
				1540
				1541	if (skb->fclone == SKB_FCLONE_ORIG &&
				1542	refcount_read(&fclones->fclone_ref) == 1) {
				1543	n = &fclones->skb2;
				1544	refcount_set(&fclones->fclone_ref, 2);
				1545	} else {
				1546	if (skb_pfmemalloc(skb))
				1547	gfp_mask \|= __GFP_MEMALLOC;
				1548
				1549	n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
				1550	if (!n)
				1551	return NULL;
				1552
				1553	n->fclone = SKB_FCLONE_UNAVAILABLE;
				1554	}
				1555
				1556	return __skb_clone(n, skb);
				1557	}
				1558	EXPORT_SYMBOL(skb_clone);
				1559
				1560	void skb_headers_offset_update(struct sk_buff *skb, int off)
				1561	{
				1562	/* Only adjust this if it actually is csum_start rather than csum */
				1563	if (skb->ip_summed == CHECKSUM_PARTIAL)
				1564	skb->csum_start += off;
				1565	/* {transport,network,mac}_header and tail are relative to skb->head */
				1566	skb->transport_header += off;
				1567	skb->network_header += off;
				1568	if (skb_mac_header_was_set(skb))
				1569	skb->mac_header += off;
				1570	skb->inner_transport_header += off;
				1571	skb->inner_network_header += off;
				1572	skb->inner_mac_header += off;
				1573	}
				1574	EXPORT_SYMBOL(skb_headers_offset_update);
				1575
				1576	static void skb_p_revert(struct sk_buff *skb)
				1577	{
				1578	long off;
				1579
				1580	if (skb_shinfo(skb)->priv_free_func) {
				1581	skb_shinfo(skb)->priv_free_func(
				1582	skb_shinfo(skb)->priv_data,
				1583	skb->head, skb_end_offset(skb));
				1584	skb_shinfo(skb)->priv_free_func = NULL;
				1585	skb_shinfo(skb)->priv_data = NULL;
				1586	}
				1587
				1588	skb->truesize -= skb_end_offset(skb);
				1589
				1590	if (skb_shinfo(skb)->priv_head) {
				1591	off = skb_shinfo(skb)->priv_head - skb->head;
				1592	skb->head += off;
				1593	skb->data += off;
				1594	#ifndef NET_SKBUFF_DATA_USES_OFFSET
				1595	skb->end += off;
				1596	skb->tail += off;
				1597	#endif
				1598	skb_shinfo(skb)->priv_head = NULL;
				1599	}
				1600
				1601	skb->shared_info_ptr = NULL;
				1602	}
				1603
				1604	void skb_copy_header(struct sk_buff new, const struct sk_buff old)
				1605	{
				1606	__copy_skb_header(new, old);
				1607
				1608	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
				1609	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
				1610	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
				1611	}
				1612	EXPORT_SYMBOL(skb_copy_header);
				1613
				1614	static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
				1615	{
				1616	if (skb_pfmemalloc(skb))
				1617	return SKB_ALLOC_RX;
				1618	return 0;
				1619	}
				1620
				1621	/**
				1622	* skb_copy - create private copy of an sk_buff
				1623	* @skb: buffer to copy
				1624	* @gfp_mask: allocation priority
				1625	*
				1626	* Make a copy of both an &sk_buff and its data. This is used when the
				1627	* caller wishes to modify the data and needs a private copy of the
				1628	* data to alter. Returns %NULL on failure or the pointer to the buffer
				1629	* on success. The returned buffer has a reference count of 1.
				1630	*
				1631	* As by-product this function converts non-linear &sk_buff to linear
				1632	* one, so that &sk_buff becomes completely private and caller is allowed
				1633	* to modify all the data of returned buffer. This means that this
				1634	* function is not recommended for use in circumstances when only
				1635	* header is going to be modified. Use pskb_copy() instead.
				1636	*/
				1637
				1638	struct sk_buff skb_copy(const struct sk_buff skb, gfp_t gfp_mask)
				1639	{
				1640	int headerlen = skb_headroom(skb);
				1641	unsigned int size = skb_end_offset(skb) + skb->data_len;
				1642	struct sk_buff *n = __alloc_skb(size, gfp_mask,
				1643	skb_alloc_rx_flag(skb), NUMA_NO_NODE);
				1644
				1645	if (!n)
				1646	return NULL;
				1647
				1648	/* Set the data pointer */
				1649	skb_reserve(n, headerlen);
				1650	/* Set the tail pointer and length */
				1651	skb_put(n, skb->len);
				1652
				1653	BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
				1654
				1655	skb_copy_header(n, skb);
				1656	return n;
				1657	}
				1658	EXPORT_SYMBOL(skb_copy);
				1659
				1660	/**
				1661	* __pskb_copy_fclone - create copy of an sk_buff with private head.
				1662	* @skb: buffer to copy
				1663	* @headroom: headroom of new skb
				1664	* @gfp_mask: allocation priority
				1665	* @fclone: if true allocate the copy of the skb from the fclone
				1666	* cache instead of the head cache; it is recommended to set this
				1667	* to true for the cases where the copy will likely be cloned
				1668	*
				1669	* Make a copy of both an &sk_buff and part of its data, located
				1670	* in header. Fragmented data remain shared. This is used when
				1671	* the caller wishes to modify only header of &sk_buff and needs
				1672	* private copy of the header to alter. Returns %NULL on failure
				1673	* or the pointer to the buffer on success.
				1674	* The returned buffer has a reference count of 1.
				1675	*/
				1676
				1677	struct sk_buff __pskb_copy_fclone(struct sk_buff skb, int headroom,
				1678	gfp_t gfp_mask, bool fclone)
				1679	{
				1680	unsigned int size = skb_headlen(skb) + headroom;
				1681	int flags;
				1682	struct sk_buff *n;
				1683
				1684	flags = skb_alloc_rx_flag(skb) \| (fclone ? SKB_ALLOC_FCLONE : 0);
				1685	n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
				1686	if (!n)
				1687	goto out;
				1688
				1689	/* Set the data pointer */
				1690	skb_reserve(n, headroom);
				1691	/* Set the tail pointer and length */
				1692	skb_put(n, skb_headlen(skb));
				1693	/* Copy the bytes */
				1694	skb_copy_from_linear_data(skb, n->data, n->len);
				1695
				1696	n->truesize += skb->data_len;
				1697	n->data_len = skb->data_len;
				1698	n->len = skb->len;
				1699
				1700	if (skb_shinfo(skb)->nr_frags) {
				1701	int i;
				1702
				1703	if (skb_orphan_frags(skb, gfp_mask) \|\|
				1704	skb_zerocopy_clone(n, skb, gfp_mask)) {
				1705	kfree_skb(n);
				1706	n = NULL;
				1707	goto out;
				1708	}
				1709	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				1710	skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
				1711	skb_frag_ref(skb, i);
				1712	}
				1713	skb_shinfo(n)->nr_frags = i;
				1714	}
				1715
				1716	if (skb_has_frag_list(skb)) {
				1717	skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
				1718	skb_clone_fraglist(n);
				1719	}
				1720
				1721	skb_copy_header(n, skb);
				1722	out:
				1723	return n;
				1724	}
				1725	EXPORT_SYMBOL(__pskb_copy_fclone);
				1726
				1727	/**
				1728	* pskb_expand_head - reallocate header of &sk_buff
				1729	* @skb: buffer to reallocate
				1730	* @nhead: room to add at head
				1731	* @ntail: room to add at tail
				1732	* @gfp_mask: allocation priority
				1733	*
				1734	* Expands (or creates identical copy, if @nhead and @ntail are zero)
				1735	* header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
				1736	* reference count of 1. Returns zero in the case of success or error,
				1737	* if expansion failed. In the last case, &sk_buff is not changed.
				1738	*
				1739	* All the pointers pointing into skb header may change and must be
				1740	* reloaded after call to this function.
				1741	*/
				1742
				1743	int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
				1744	gfp_t gfp_mask)
				1745	{
				1746	int i, osize = skb_end_offset(skb);
				1747	int size = osize + nhead + ntail;
				1748	long off;
				1749	u8 *data;
				1750
				1751	BUG_ON(nhead < 0);
				1752
				1753	BUG_ON(skb_shared(skb));
				1754
				1755	size = SKB_DATA_ALIGN(size);
				1756
				1757	if (skb_pfmemalloc(skb))
				1758	gfp_mask \|= __GFP_MEMALLOC;
				1759	data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
				1760	gfp_mask, NUMA_NO_NODE, NULL);
				1761	if (!data)
				1762	goto nodata;
				1763	size = SKB_WITH_OVERHEAD(ksize(data));
				1764
				1765	/* Copy only real data... and, alas, header. This should be
				1766	* optimized for the cases when header is void.
				1767	*/
				1768	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
				1769
				1770	memcpy((struct skb_shared_info *)(data + size),
				1771	skb_shinfo(skb),
				1772	offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
				1773
				1774	/*
				1775	* if shinfo is shared we must drop the old head gracefully, but if it
				1776	* is not we can just drop the old head and let the existing refcount
				1777	* be since all we did is relocate the values
				1778	*/
				1779	if (skb_cloned(skb)) {
				1780	if (skb_orphan_frags(skb, gfp_mask))
				1781	goto nofrags;
				1782	if (skb_zcopy(skb))
				1783	refcount_inc(&skb_uarg(skb)->refcnt);
				1784	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
				1785	skb_frag_ref(skb, i);
				1786
				1787	if (skb_has_frag_list(skb))
				1788	skb_clone_fraglist(skb);
				1789
				1790	skb_release_data(skb);
				1791	} else {
				1792	skb_free_head(skb);
				1793	}
				1794	off = (data + nhead) - skb->head;
				1795
				1796	skb->head = data;
				1797	skb->head_frag = 0;
				1798	skb->data += off;
				1799	#ifdef NET_SKBUFF_DATA_USES_OFFSET
				1800	skb->end = size;
				1801	off = nhead;
				1802	#else
				1803	skb->end = skb->head + size;
				1804	#endif
				1805	skb->tail += off;
				1806	skb_headers_offset_update(skb, nhead);
				1807	skb->cloned = 0;
				1808	skb->hdr_len = 0;
				1809	skb->nohdr = 0;
				1810	skb->shared_info_ptr = NULL;
				1811
				1812	atomic_set(&skb_shinfo(skb)->dataref, 1);
				1813
				1814	skb_metadata_clear(skb);
				1815
				1816	/* It is not generally safe to change skb->truesize.
				1817	* For the moment, we really care of rx path, or
				1818	* when skb is orphaned (not attached to a socket).
				1819	*/
				1820	if (!skb->sk \|\| skb->destructor == sock_edemux)
				1821	skb->truesize += size - osize;
				1822
				1823	return 0;
				1824
				1825	nofrags:
				1826	kfree(data);
				1827	nodata:
				1828	return -ENOMEM;
				1829	}
				1830	EXPORT_SYMBOL(pskb_expand_head);
				1831
				1832	/* Make private copy of skb with writable head and some headroom */
				1833
				1834	struct sk_buff skb_realloc_headroom(struct sk_buff skb, unsigned int headroom)
				1835	{
				1836	struct sk_buff *skb2;
				1837	int delta;
				1838
				1839	delta = headroom - skb_headroom(skb);
				1840	if (delta <= 0)
				1841	skb2 = pskb_copy(skb, GFP_ATOMIC);
				1842	else {
				1843	skb2 = skb_clone(skb, GFP_ATOMIC);
				1844	if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
				1845	GFP_ATOMIC)) {
				1846	kfree_skb(skb2);
				1847	skb2 = NULL;
				1848	}
				1849	}
				1850	return skb2;
				1851	}
				1852	EXPORT_SYMBOL(skb_realloc_headroom);
				1853
				1854	/**
				1855	* skb_expand_head - reallocate header of &sk_buff
				1856	* @skb: buffer to reallocate
				1857	* @headroom: needed headroom
				1858	*
				1859	* Unlike skb_realloc_headroom, this one does not allocate a new skb
				1860	* if possible; copies skb->sk to new skb as needed
				1861	* and frees original skb in case of failures.
				1862	*
				1863	* It expect increased headroom and generates warning otherwise.
				1864	*/
				1865
				1866	struct sk_buff skb_expand_head(struct sk_buff skb, unsigned int headroom)
				1867	{
				1868	int delta = headroom - skb_headroom(skb);
				1869	int osize = skb_end_offset(skb);
				1870	struct sock *sk = skb->sk;
				1871
				1872	if (WARN_ONCE(delta <= 0,
				1873	"%s is expecting an increase in the headroom", __func__))
				1874	return skb;
				1875
				1876	delta = SKB_DATA_ALIGN(delta);
				1877	/* pskb_expand_head() might crash, if skb is shared. */
				1878	if (skb_shared(skb) \|\| !is_skb_wmem(skb)) {
				1879	struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
				1880
				1881	if (unlikely(!nskb))
				1882	goto fail;
				1883
				1884	if (sk)
				1885	skb_set_owner_w(nskb, sk);
				1886	consume_skb(skb);
				1887	skb = nskb;
				1888	}
				1889	if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
				1890	goto fail;
				1891
				1892	if (sk && is_skb_wmem(skb)) {
				1893	delta = skb_end_offset(skb) - osize;
				1894	refcount_add(delta, &sk->sk_wmem_alloc);
				1895	skb->truesize += delta;
				1896	}
				1897	return skb;
				1898
				1899	fail:
				1900	kfree_skb(skb);
				1901	return NULL;
				1902	}
				1903	EXPORT_SYMBOL(skb_expand_head);
				1904
				1905	/**
				1906	* skb_copy_expand - copy and expand sk_buff
				1907	* @skb: buffer to copy
				1908	* @newheadroom: new free bytes at head
				1909	* @newtailroom: new free bytes at tail
				1910	* @gfp_mask: allocation priority
				1911	*
				1912	* Make a copy of both an &sk_buff and its data and while doing so
				1913	* allocate additional space.
				1914	*
				1915	* This is used when the caller wishes to modify the data and needs a
				1916	* private copy of the data to alter as well as more space for new fields.
				1917	* Returns %NULL on failure or the pointer to the buffer
				1918	* on success. The returned buffer has a reference count of 1.
				1919	*
				1920	* You must pass %GFP_ATOMIC as the allocation priority if this function
				1921	* is called from an interrupt.
				1922	*/
				1923	struct sk_buff skb_copy_expand(const struct sk_buff skb,
				1924	int newheadroom, int newtailroom,
				1925	gfp_t gfp_mask)
				1926	{
				1927	/*
				1928	* Allocate the copy buffer
				1929	*/
				1930	struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
				1931	gfp_mask, skb_alloc_rx_flag(skb),
				1932	NUMA_NO_NODE);
				1933	int oldheadroom = skb_headroom(skb);
				1934	int head_copy_len, head_copy_off;
				1935
				1936	if (!n)
				1937	return NULL;
				1938
				1939	skb_reserve(n, newheadroom);
				1940
				1941	/* Set the tail pointer and length */
				1942	skb_put(n, skb->len);
				1943
				1944	head_copy_len = oldheadroom;
				1945	head_copy_off = 0;
				1946	if (newheadroom <= head_copy_len)
				1947	head_copy_len = newheadroom;
				1948	else
				1949	head_copy_off = newheadroom - head_copy_len;
				1950
				1951	/* Copy the linear header and data. */
				1952	BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
				1953	skb->len + head_copy_len));
				1954
				1955	skb_copy_header(n, skb);
				1956
				1957	skb_headers_offset_update(n, newheadroom - oldheadroom);
				1958
				1959	return n;
				1960	}
				1961	EXPORT_SYMBOL(skb_copy_expand);
				1962
				1963	/**
				1964	* __skb_pad - zero pad the tail of an skb
				1965	* @skb: buffer to pad
				1966	* @pad: space to pad
				1967	* @free_on_error: free buffer on error
				1968	*
				1969	* Ensure that a buffer is followed by a padding area that is zero
				1970	* filled. Used by network drivers which may DMA or transfer data
				1971	* beyond the buffer end onto the wire.
				1972	*
				1973	* May return error in out of memory cases. The skb is freed on error
				1974	* if @free_on_error is true.
				1975	*/
				1976
				1977	int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
				1978	{
				1979	int err;
				1980	int ntail;
				1981
				1982	/* If the skbuff is non linear tailroom is always zero.. */
				1983	if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
				1984	memset(skb->data+skb->len, 0, pad);
				1985	return 0;
				1986	}
				1987
				1988	ntail = skb->data_len + pad - (skb->end - skb->tail);
				1989	if (likely(skb_cloned(skb) \|\| ntail > 0)) {
				1990	err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
				1991	if (unlikely(err))
				1992	goto free_skb;
				1993	}
				1994
				1995	/* FIXME: The use of this function with non-linear skb's really needs
				1996	* to be audited.
				1997	*/
				1998	err = skb_linearize(skb);
				1999	if (unlikely(err))
				2000	goto free_skb;
				2001
				2002	memset(skb->data + skb->len, 0, pad);
				2003	return 0;
				2004
				2005	free_skb:
				2006	if (free_on_error)
				2007	kfree_skb(skb);
				2008	return err;
				2009	}
				2010	EXPORT_SYMBOL(__skb_pad);
				2011
				2012	/**
				2013	* pskb_put - add data to the tail of a potentially fragmented buffer
				2014	* @skb: start of the buffer to use
				2015	* @tail: tail fragment of the buffer to use
				2016	* @len: amount of data to add
				2017	*
				2018	* This function extends the used data area of the potentially
				2019	* fragmented buffer. @tail must be the last fragment of @skb -- or
				2020	* @skb itself. If this would exceed the total buffer size the kernel
				2021	* will panic. A pointer to the first byte of the extra data is
				2022	* returned.
				2023	*/
				2024
				2025	void pskb_put(struct sk_buff skb, struct sk_buff *tail, int len)
				2026	{
				2027	if (tail != skb) {
				2028	skb->data_len += len;
				2029	skb->len += len;
				2030	}
				2031	return skb_put(tail, len);
				2032	}
				2033	EXPORT_SYMBOL_GPL(pskb_put);
				2034
				2035	/**
				2036	* skb_put - add data to a buffer
				2037	* @skb: buffer to use
				2038	* @len: amount of data to add
				2039	*
				2040	* This function extends the used data area of the buffer. If this would
				2041	* exceed the total buffer size the kernel will panic. A pointer to the
				2042	* first byte of the extra data is returned.
				2043	*/
				2044	void skb_put(struct sk_buff skb, unsigned int len)
				2045	{
				2046	void *tmp = skb_tail_pointer(skb);
				2047	SKB_LINEAR_ASSERT(skb);
				2048	skb->tail += len;
				2049	skb->len += len;
				2050	if (unlikely(skb->tail > skb->end))
				2051	skb_over_panic(skb, len, __builtin_return_address(0));
				2052	return tmp;
				2053	}
				2054	EXPORT_SYMBOL(skb_put);
				2055
				2056	/**
				2057	* skb_push - add data to the start of a buffer
				2058	* @skb: buffer to use
				2059	* @len: amount of data to add
				2060	*
				2061	* This function extends the used data area of the buffer at the buffer
				2062	* start. If this would exceed the total buffer headroom the kernel will
				2063	* panic. A pointer to the first byte of the extra data is returned.
				2064	*/
				2065	void skb_push(struct sk_buff skb, unsigned int len)
				2066	{
				2067	skb->data -= len;
				2068	skb->len += len;
				2069	if (unlikely(skb->data < skb->head))
				2070	skb_under_panic(skb, len, __builtin_return_address(0));
				2071	return skb->data;
				2072	}
				2073	EXPORT_SYMBOL(skb_push);
				2074
				2075	/**
				2076	* skb_pull - remove data from the start of a buffer
				2077	* @skb: buffer to use
				2078	* @len: amount of data to remove
				2079	*
				2080	* This function removes data from the start of a buffer, returning
				2081	* the memory to the headroom. A pointer to the next data in the buffer
				2082	* is returned. Once the data has been pulled future pushes will overwrite
				2083	* the old data.
				2084	*/
				2085	void skb_pull(struct sk_buff skb, unsigned int len)
				2086	{
				2087	return skb_pull_inline(skb, len);
				2088	}
				2089	EXPORT_SYMBOL(skb_pull);
				2090
				2091	/**
				2092	* skb_trim - remove end from a buffer
				2093	* @skb: buffer to alter
				2094	* @len: new length
				2095	*
				2096	* Cut the length of a buffer down by removing data from the tail. If
				2097	* the buffer is already under the length specified it is not modified.
				2098	* The skb must be linear.
				2099	*/
				2100	void skb_trim(struct sk_buff *skb, unsigned int len)
				2101	{
				2102	if (skb->len > len)
				2103	__skb_trim(skb, len);
				2104	}
				2105	EXPORT_SYMBOL(skb_trim);
				2106
				2107	/* Trims skb to length len. It can change skb pointers.
				2108	*/
				2109
				2110	int ___pskb_trim(struct sk_buff *skb, unsigned int len)
				2111	{
				2112	struct sk_buff **fragp;
				2113	struct sk_buff *frag;
				2114	int offset = skb_headlen(skb);
				2115	int nfrags = skb_shinfo(skb)->nr_frags;
				2116	int i;
				2117	int err;
				2118
				2119	if (skb_cloned(skb) &&
				2120	unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
				2121	return err;
				2122
				2123	i = 0;
				2124	if (offset >= len)
				2125	goto drop_pages;
				2126
				2127	for (; i < nfrags; i++) {
				2128	int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
				2129
				2130	if (end < len) {
				2131	offset = end;
				2132	continue;
				2133	}
				2134
				2135	skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
				2136
				2137	drop_pages:
				2138	skb_shinfo(skb)->nr_frags = i;
				2139
				2140	for (; i < nfrags; i++)
				2141	skb_frag_unref(skb, i);
				2142
				2143	if (skb_has_frag_list(skb))
				2144	skb_drop_fraglist(skb);
				2145	goto done;
				2146	}
				2147
				2148	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
				2149	fragp = &frag->next) {
				2150	int end = offset + frag->len;
				2151
				2152	if (skb_shared(frag)) {
				2153	struct sk_buff *nfrag;
				2154
				2155	nfrag = skb_clone(frag, GFP_ATOMIC);
				2156	if (unlikely(!nfrag))
				2157	return -ENOMEM;
				2158
				2159	nfrag->next = frag->next;
				2160	consume_skb(frag);
				2161	frag = nfrag;
				2162	*fragp = frag;
				2163	}
				2164
				2165	if (end < len) {
				2166	offset = end;
				2167	continue;
				2168	}
				2169
				2170	if (end > len &&
				2171	unlikely((err = pskb_trim(frag, len - offset))))
				2172	return err;
				2173
				2174	if (frag->next)
				2175	skb_drop_list(&frag->next);
				2176	break;
				2177	}
				2178
				2179	done:
				2180	if (len > skb_headlen(skb)) {
				2181	skb->data_len -= skb->len - len;
				2182	skb->len = len;
				2183	} else {
				2184	skb->len = len;
				2185	skb->data_len = 0;
				2186	skb_set_tail_pointer(skb, len);
				2187	}
				2188
				2189	if (!skb->sk \|\| skb->destructor == sock_edemux)
				2190	skb_condense(skb);
				2191	return 0;
				2192	}
				2193	EXPORT_SYMBOL(___pskb_trim);
				2194
				2195	/* Note : use pskb_trim_rcsum() instead of calling this directly
				2196	*/
				2197	int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
				2198	{
				2199	if (skb->ip_summed == CHECKSUM_COMPLETE) {
				2200	int delta = skb->len - len;
				2201
				2202	skb->csum = csum_block_sub(skb->csum,
				2203	skb_checksum(skb, len, delta, 0),
				2204	len);
				2205	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
				2206	int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
				2207	int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
				2208
				2209	if (offset + sizeof(__sum16) > hdlen)
				2210	return -EINVAL;
				2211	}
				2212	return __pskb_trim(skb, len);
				2213	}
				2214	EXPORT_SYMBOL(pskb_trim_rcsum_slow);
				2215
				2216	/**
				2217	* __pskb_pull_tail - advance tail of skb header
				2218	* @skb: buffer to reallocate
				2219	* @delta: number of bytes to advance tail
				2220	*
				2221	* The function makes a sense only on a fragmented &sk_buff,
				2222	* it expands header moving its tail forward and copying necessary
				2223	* data from fragmented part.
				2224	*
				2225	* &sk_buff MUST have reference count of 1.
				2226	*
				2227	* Returns %NULL (and &sk_buff does not change) if pull failed
				2228	* or value of new tail of skb in the case of success.
				2229	*
				2230	* All the pointers pointing into skb header may change and must be
				2231	* reloaded after call to this function.
				2232	*/
				2233
				2234	/* Moves tail of skb head forward, copying data from fragmented part,
				2235	* when it is necessary.
				2236	* 1. It may fail due to malloc failure.
				2237	* 2. It may change skb pointers.
				2238	*
				2239	* It is pretty complicated. Luckily, it is called only in exceptional cases.
				2240	*/
				2241	void __pskb_pull_tail(struct sk_buff skb, int delta)
				2242	{
				2243	/* If skb has not enough free space at tail, get new one
				2244	* plus 128 bytes for future expansions. If we have enough
				2245	* room at tail, reallocate without expansion only if skb is cloned.
				2246	*/
				2247	int i, k, eat = (skb->tail + delta) - skb->end;
				2248
				2249	if (eat > 0 \|\| skb_cloned(skb)) {
				2250	if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
				2251	GFP_ATOMIC))
				2252	return NULL;
				2253	}
				2254
				2255	BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
				2256	skb_tail_pointer(skb), delta));
				2257
				2258	/* Optimization: no fragments, no reasons to preestimate
				2259	* size of pulled pages. Superb.
				2260	*/
				2261	if (!skb_has_frag_list(skb))
				2262	goto pull_pages;
				2263
				2264	/* Estimate size of pulled pages. */
				2265	eat = delta;
				2266	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2267	int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
				2268
				2269	if (size >= eat)
				2270	goto pull_pages;
				2271	eat -= size;
				2272	}
				2273
				2274	/* If we need update frag list, we are in troubles.
				2275	* Certainly, it is possible to add an offset to skb data,
				2276	* but taking into account that pulling is expected to
				2277	* be very rare operation, it is worth to fight against
				2278	* further bloating skb head and crucify ourselves here instead.
				2279	* Pure masohism, indeed. 8)8)
				2280	*/
				2281	if (eat) {
				2282	struct sk_buff *list = skb_shinfo(skb)->frag_list;
				2283	struct sk_buff *clone = NULL;
				2284	struct sk_buff *insp = NULL;
				2285
				2286	do {
				2287	if (list->len <= eat) {
				2288	/* Eaten as whole. */
				2289	eat -= list->len;
				2290	list = list->next;
				2291	insp = list;
				2292	} else {
				2293	/* Eaten partially. */
				2294	if (skb_is_gso(skb) && !list->head_frag &&
				2295	skb_headlen(list))
				2296	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
				2297
				2298	if (skb_shared(list)) {
				2299	/* Sucks! We need to fork list. :-( */
				2300	clone = skb_clone(list, GFP_ATOMIC);
				2301	if (!clone)
				2302	return NULL;
				2303	insp = list->next;
				2304	list = clone;
				2305	} else {
				2306	/* This may be pulled without
				2307	* problems. */
				2308	insp = list;
				2309	}
				2310	if (!pskb_pull(list, eat)) {
				2311	kfree_skb(clone);
				2312	return NULL;
				2313	}
				2314	break;
				2315	}
				2316	} while (eat);
				2317
				2318	/* Free pulled out fragments. */
				2319	while ((list = skb_shinfo(skb)->frag_list) != insp) {
				2320	skb_shinfo(skb)->frag_list = list->next;
				2321	consume_skb(list);
				2322	}
				2323	/* And insert new clone at head. */
				2324	if (clone) {
				2325	clone->next = list;
				2326	skb_shinfo(skb)->frag_list = clone;
				2327	}
				2328	}
				2329	/* Success! Now we may commit changes to skb data. */
				2330
				2331	pull_pages:
				2332	eat = delta;
				2333	k = 0;
				2334	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2335	int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
				2336
				2337	if (size <= eat) {
				2338	skb_frag_unref(skb, i);
				2339	eat -= size;
				2340	} else {
				2341	skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
				2342
				2343	*frag = skb_shinfo(skb)->frags[i];
				2344	if (eat) {
				2345	skb_frag_off_add(frag, eat);
				2346	skb_frag_size_sub(frag, eat);
				2347	if (!i)
				2348	goto end;
				2349	eat = 0;
				2350	}
				2351	k++;
				2352	}
				2353	}
				2354	skb_shinfo(skb)->nr_frags = k;
				2355
				2356	end:
				2357	skb->tail += delta;
				2358	skb->data_len -= delta;
				2359
				2360	if (!skb->data_len)
				2361	skb_zcopy_clear(skb, false);
				2362
				2363	return skb_tail_pointer(skb);
				2364	}
				2365	EXPORT_SYMBOL(__pskb_pull_tail);
				2366
				2367	/**
				2368	* skb_copy_bits - copy bits from skb to kernel buffer
				2369	* @skb: source skb
				2370	* @offset: offset in source
				2371	* @to: destination buffer
				2372	* @len: number of bytes to copy
				2373	*
				2374	* Copy the specified number of bytes from the source skb to the
				2375	* destination buffer.
				2376	*
				2377	* CAUTION ! :
				2378	* If its prototype is ever changed,
				2379	* check arch/{}/net/{}.S files,
				2380	* since it is called from BPF assembly code.
				2381	*/
				2382	int skb_copy_bits(const struct sk_buff skb, int offset, void to, int len)
				2383	{
				2384	int start = skb_headlen(skb);
				2385	struct sk_buff *frag_iter;
				2386	int i, copy;
				2387
				2388	if (offset > (int)skb->len - len)
				2389	goto fault;
				2390
				2391	/* Copy header. */
				2392	if ((copy = start - offset) > 0) {
				2393	if (copy > len)
				2394	copy = len;
				2395	skb_copy_from_linear_data_offset(skb, offset, to, copy);
				2396	if ((len -= copy) == 0)
				2397	return 0;
				2398	offset += copy;
				2399	to += copy;
				2400	}
				2401
				2402	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2403	int end;
				2404	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
				2405
				2406	WARN_ON(start > offset + len);
				2407
				2408	end = start + skb_frag_size(f);
				2409	if ((copy = end - offset) > 0) {
				2410	u32 p_off, p_len, copied;
				2411	struct page *p;
				2412	u8 *vaddr;
				2413
				2414	if (copy > len)
				2415	copy = len;
				2416
				2417	skb_frag_foreach_page(f,
				2418	skb_frag_off(f) + offset - start,
				2419	copy, p, p_off, p_len, copied) {
				2420	vaddr = kmap_atomic(p);
				2421	memcpy(to + copied, vaddr + p_off, p_len);
				2422	kunmap_atomic(vaddr);
				2423	}
				2424
				2425	if ((len -= copy) == 0)
				2426	return 0;
				2427	offset += copy;
				2428	to += copy;
				2429	}
				2430	start = end;
				2431	}
				2432
				2433	skb_walk_frags(skb, frag_iter) {
				2434	int end;
				2435
				2436	WARN_ON(start > offset + len);
				2437
				2438	end = start + frag_iter->len;
				2439	if ((copy = end - offset) > 0) {
				2440	if (copy > len)
				2441	copy = len;
				2442	if (skb_copy_bits(frag_iter, offset - start, to, copy))
				2443	goto fault;
				2444	if ((len -= copy) == 0)
				2445	return 0;
				2446	offset += copy;
				2447	to += copy;
				2448	}
				2449	start = end;
				2450	}
				2451
				2452	if (!len)
				2453	return 0;
				2454
				2455	fault:
				2456	return -EFAULT;
				2457	}
				2458	EXPORT_SYMBOL(skb_copy_bits);
				2459
				2460	/*
				2461	* Callback from splice_to_pipe(), if we need to release some pages
				2462	* at the end of the spd in case we error'ed out in filling the pipe.
				2463	*/
				2464	static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
				2465	{
				2466	put_page(spd->pages[i]);
				2467	}
				2468
				2469	static struct page linear_to_page(struct page page, unsigned int *len,
				2470	unsigned int *offset,
				2471	struct sock *sk)
				2472	{
				2473	struct page_frag *pfrag = sk_page_frag(sk);
				2474
				2475	if (!sk_page_frag_refill(sk, pfrag))
				2476	return NULL;
				2477
				2478	len = min_t(unsigned int, len, pfrag->size - pfrag->offset);
				2479
				2480	memcpy(page_address(pfrag->page) + pfrag->offset,
				2481	page_address(page) + offset, len);
				2482	*offset = pfrag->offset;
				2483	pfrag->offset += *len;
				2484
				2485	return pfrag->page;
				2486	}
				2487
				2488	static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
				2489	struct page *page,
				2490	unsigned int offset)
				2491	{
				2492	return spd->nr_pages &&
				2493	spd->pages[spd->nr_pages - 1] == page &&
				2494	(spd->partial[spd->nr_pages - 1].offset +
				2495	spd->partial[spd->nr_pages - 1].len == offset);
				2496	}
				2497
				2498	/*
				2499	* Fill page/offset/length into spd, if it can hold more pages.
				2500	*/
				2501	static bool spd_fill_page(struct splice_pipe_desc *spd,
				2502	struct pipe_inode_info pipe, struct page page,
				2503	unsigned int *len, unsigned int offset,
				2504	bool linear,
				2505	struct sock *sk)
				2506	{
				2507	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
				2508	return true;
				2509
				2510	if (linear) {
				2511	page = linear_to_page(page, len, &offset, sk);
				2512	if (!page)
				2513	return true;
				2514	}
				2515	if (spd_can_coalesce(spd, page, offset)) {
				2516	spd->partial[spd->nr_pages - 1].len += *len;
				2517	return false;
				2518	}
				2519	get_page(page);
				2520	spd->pages[spd->nr_pages] = page;
				2521	spd->partial[spd->nr_pages].len = *len;
				2522	spd->partial[spd->nr_pages].offset = offset;
				2523	spd->nr_pages++;
				2524
				2525	return false;
				2526	}
				2527
				2528	static bool __splice_segment(struct page *page, unsigned int poff,
				2529	unsigned int plen, unsigned int *off,
				2530	unsigned int *len,
				2531	struct splice_pipe_desc *spd, bool linear,
				2532	struct sock *sk,
				2533	struct pipe_inode_info *pipe)
				2534	{
				2535	if (!*len)
				2536	return true;
				2537
				2538	/* skip this segment if already processed */
				2539	if (*off >= plen) {
				2540	*off -= plen;
				2541	return false;
				2542	}
				2543
				2544	/* ignore any bits we already processed */
				2545	poff += *off;
				2546	plen -= *off;
				2547	*off = 0;
				2548
				2549	do {
				2550	unsigned int flen = min(*len, plen);
				2551
				2552	if (spd_fill_page(spd, pipe, page, &flen, poff,
				2553	linear, sk))
				2554	return true;
				2555	poff += flen;
				2556	plen -= flen;
				2557	*len -= flen;
				2558	} while (*len && plen);
				2559
				2560	return false;
				2561	}
				2562
				2563	/*
				2564	* Map linear and fragment data from the skb to spd. It reports true if the
				2565	* pipe is full or if we already spliced the requested length.
				2566	*/
				2567	static bool __skb_splice_bits(struct sk_buff skb, struct pipe_inode_info pipe,
				2568	unsigned int offset, unsigned int len,
				2569	struct splice_pipe_desc spd, struct sock sk)
				2570	{
				2571	int seg;
				2572	struct sk_buff *iter;
				2573
				2574	/* map the linear part :
				2575	* If skb->head_frag is set, this 'linear' part is backed by a
				2576	* fragment, and if the head is not shared with any clones then
				2577	* we can avoid a copy since we own the head portion of this page.
				2578	*/
				2579	if (__splice_segment(virt_to_page(skb->data),
				2580	(unsigned long) skb->data & (PAGE_SIZE - 1),
				2581	skb_headlen(skb),
				2582	offset, len, spd,
				2583	skb_head_is_locked(skb),
				2584	sk, pipe))
				2585	return true;
				2586
				2587	/*
				2588	* then map the fragments
				2589	*/
				2590	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
				2591	const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
				2592
				2593	if (__splice_segment(skb_frag_page(f),
				2594	skb_frag_off(f), skb_frag_size(f),
				2595	offset, len, spd, false, sk, pipe))
				2596	return true;
				2597	}
				2598
				2599	skb_walk_frags(skb, iter) {
				2600	if (*offset >= iter->len) {
				2601	*offset -= iter->len;
				2602	continue;
				2603	}
				2604	/* __skb_splice_bits() only fails if the output has no room
				2605	* left, so no point in going over the frag_list for the error
				2606	* case.
				2607	*/
				2608	if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
				2609	return true;
				2610	}
				2611
				2612	return false;
				2613	}
				2614
				2615	/*
				2616	* Map data from the skb to a pipe. Should handle both the linear part,
				2617	* the fragments, and the frag list.
				2618	*/
				2619	int skb_splice_bits(struct sk_buff skb, struct sock sk, unsigned int offset,
				2620	struct pipe_inode_info *pipe, unsigned int tlen,
				2621	unsigned int flags)
				2622	{
				2623	struct partial_page partial[MAX_SKB_FRAGS];
				2624	struct page *pages[MAX_SKB_FRAGS];
				2625	struct splice_pipe_desc spd = {
				2626	.pages = pages,
				2627	.partial = partial,
				2628	.nr_pages_max = MAX_SKB_FRAGS,
				2629	.ops = &nosteal_pipe_buf_ops,
				2630	.spd_release = sock_spd_release,
				2631	};
				2632	int ret = 0;
				2633
				2634	__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
				2635
				2636	if (spd.nr_pages)
				2637	ret = splice_to_pipe(pipe, &spd);
				2638
				2639	return ret;
				2640	}
				2641	EXPORT_SYMBOL_GPL(skb_splice_bits);
				2642
				2643	/* Send skb data on a socket. Socket must be locked. */
				2644	int skb_send_sock_locked(struct sock sk, struct sk_buff skb, int offset,
				2645	int len)
				2646	{
				2647	unsigned int orig_len = len;
				2648	struct sk_buff *head = skb;
				2649	unsigned short fragidx;
				2650	int slen, ret;
				2651
				2652	do_frag_list:
				2653
				2654	/* Deal with head data */
				2655	while (offset < skb_headlen(skb) && len) {
				2656	struct kvec kv;
				2657	struct msghdr msg;
				2658
				2659	slen = min_t(int, len, skb_headlen(skb) - offset);
				2660	kv.iov_base = skb->data + offset;
				2661	kv.iov_len = slen;
				2662	memset(&msg, 0, sizeof(msg));
				2663	msg.msg_flags = MSG_DONTWAIT;
				2664
				2665	ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
				2666	if (ret <= 0)
				2667	goto error;
				2668
				2669	offset += ret;
				2670	len -= ret;
				2671	}
				2672
				2673	/* All the data was skb head? */
				2674	if (!len)
				2675	goto out;
				2676
				2677	/* Make offset relative to start of frags */
				2678	offset -= skb_headlen(skb);
				2679
				2680	/* Find where we are in frag list */
				2681	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
				2682	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
				2683
				2684	if (offset < skb_frag_size(frag))
				2685	break;
				2686
				2687	offset -= skb_frag_size(frag);
				2688	}
				2689
				2690	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
				2691	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
				2692
				2693	slen = min_t(size_t, len, skb_frag_size(frag) - offset);
				2694
				2695	while (slen) {
				2696	ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
				2697	skb_frag_off(frag) + offset,
				2698	slen, MSG_DONTWAIT);
				2699	if (ret <= 0)
				2700	goto error;
				2701
				2702	len -= ret;
				2703	offset += ret;
				2704	slen -= ret;
				2705	}
				2706
				2707	offset = 0;
				2708	}
				2709
				2710	if (len) {
				2711	/* Process any frag lists */
				2712
				2713	if (skb == head) {
				2714	if (skb_has_frag_list(skb)) {
				2715	skb = skb_shinfo(skb)->frag_list;
				2716	goto do_frag_list;
				2717	}
				2718	} else if (skb->next) {
				2719	skb = skb->next;
				2720	goto do_frag_list;
				2721	}
				2722	}
				2723
				2724	out:
				2725	return orig_len - len;
				2726
				2727	error:
				2728	return orig_len == len ? ret : orig_len - len;
				2729	}
				2730	EXPORT_SYMBOL_GPL(skb_send_sock_locked);
				2731
				2732	/**
				2733	* skb_store_bits - store bits from kernel buffer to skb
				2734	* @skb: destination buffer
				2735	* @offset: offset in destination
				2736	* @from: source buffer
				2737	* @len: number of bytes to copy
				2738	*
				2739	* Copy the specified number of bytes from the source buffer to the
				2740	* destination skb. This function handles all the messy bits of
				2741	* traversing fragment lists and such.
				2742	*/
				2743
				2744	int skb_store_bits(struct sk_buff skb, int offset, const void from, int len)
				2745	{
				2746	int start = skb_headlen(skb);
				2747	struct sk_buff *frag_iter;
				2748	int i, copy;
				2749
				2750	if (offset > (int)skb->len - len)
				2751	goto fault;
				2752
				2753	if ((copy = start - offset) > 0) {
				2754	if (copy > len)
				2755	copy = len;
				2756	skb_copy_to_linear_data_offset(skb, offset, from, copy);
				2757	if ((len -= copy) == 0)
				2758	return 0;
				2759	offset += copy;
				2760	from += copy;
				2761	}
				2762
				2763	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2764	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				2765	int end;
				2766
				2767	WARN_ON(start > offset + len);
				2768
				2769	end = start + skb_frag_size(frag);
				2770	if ((copy = end - offset) > 0) {
				2771	u32 p_off, p_len, copied;
				2772	struct page *p;
				2773	u8 *vaddr;
				2774
				2775	if (copy > len)
				2776	copy = len;
				2777
				2778	skb_frag_foreach_page(frag,
				2779	skb_frag_off(frag) + offset - start,
				2780	copy, p, p_off, p_len, copied) {
				2781	vaddr = kmap_atomic(p);
				2782	memcpy(vaddr + p_off, from + copied, p_len);
				2783	kunmap_atomic(vaddr);
				2784	}
				2785
				2786	if ((len -= copy) == 0)
				2787	return 0;
				2788	offset += copy;
				2789	from += copy;
				2790	}
				2791	start = end;
				2792	}
				2793
				2794	skb_walk_frags(skb, frag_iter) {
				2795	int end;
				2796
				2797	WARN_ON(start > offset + len);
				2798
				2799	end = start + frag_iter->len;
				2800	if ((copy = end - offset) > 0) {
				2801	if (copy > len)
				2802	copy = len;
				2803	if (skb_store_bits(frag_iter, offset - start,
				2804	from, copy))
				2805	goto fault;
				2806	if ((len -= copy) == 0)
				2807	return 0;
				2808	offset += copy;
				2809	from += copy;
				2810	}
				2811	start = end;
				2812	}
				2813	if (!len)
				2814	return 0;
				2815
				2816	fault:
				2817	return -EFAULT;
				2818	}
				2819	EXPORT_SYMBOL(skb_store_bits);
				2820
				2821	/* Checksum skb data. */
				2822	__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
				2823	__wsum csum, const struct skb_checksum_ops *ops)
				2824	{
				2825	int start = skb_headlen(skb);
				2826	int i, copy = start - offset;
				2827	struct sk_buff *frag_iter;
				2828	int pos = 0;
				2829
				2830	/* Checksum header. */
				2831	if (copy > 0) {
				2832	if (copy > len)
				2833	copy = len;
				2834	csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
				2835	skb->data + offset, copy, csum);
				2836	if ((len -= copy) == 0)
				2837	return csum;
				2838	offset += copy;
				2839	pos = copy;
				2840	}
				2841
				2842	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2843	int end;
				2844	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				2845
				2846	WARN_ON(start > offset + len);
				2847
				2848	end = start + skb_frag_size(frag);
				2849	if ((copy = end - offset) > 0) {
				2850	u32 p_off, p_len, copied;
				2851	struct page *p;
				2852	__wsum csum2;
				2853	u8 *vaddr;
				2854
				2855	if (copy > len)
				2856	copy = len;
				2857
				2858	skb_frag_foreach_page(frag,
				2859	skb_frag_off(frag) + offset - start,
				2860	copy, p, p_off, p_len, copied) {
				2861	vaddr = kmap_atomic(p);
				2862	csum2 = INDIRECT_CALL_1(ops->update,
				2863	csum_partial_ext,
				2864	vaddr + p_off, p_len, 0);
				2865	kunmap_atomic(vaddr);
				2866	csum = INDIRECT_CALL_1(ops->combine,
				2867	csum_block_add_ext, csum,
				2868	csum2, pos, p_len);
				2869	pos += p_len;
				2870	}
				2871
				2872	if (!(len -= copy))
				2873	return csum;
				2874	offset += copy;
				2875	}
				2876	start = end;
				2877	}
				2878
				2879	skb_walk_frags(skb, frag_iter) {
				2880	int end;
				2881
				2882	WARN_ON(start > offset + len);
				2883
				2884	end = start + frag_iter->len;
				2885	if ((copy = end - offset) > 0) {
				2886	__wsum csum2;
				2887	if (copy > len)
				2888	copy = len;
				2889	csum2 = __skb_checksum(frag_iter, offset - start,
				2890	copy, 0, ops);
				2891	csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
				2892	csum, csum2, pos, copy);
				2893	if ((len -= copy) == 0)
				2894	return csum;
				2895	offset += copy;
				2896	pos += copy;
				2897	}
				2898	start = end;
				2899	}
				2900	BUG_ON(len);
				2901
				2902	return csum;
				2903	}
				2904	EXPORT_SYMBOL(__skb_checksum);
				2905
				2906	__wsum skb_checksum(const struct sk_buff *skb, int offset,
				2907	int len, __wsum csum)
				2908	{
				2909	const struct skb_checksum_ops ops = {
				2910	.update = csum_partial_ext,
				2911	.combine = csum_block_add_ext,
				2912	};
				2913
				2914	return __skb_checksum(skb, offset, len, csum, &ops);
				2915	}
				2916	EXPORT_SYMBOL(skb_checksum);
				2917
				2918	/* Both of above in one bottle. */
				2919
				2920	__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
				2921	u8 *to, int len, __wsum csum)
				2922	{
				2923	int start = skb_headlen(skb);
				2924	int i, copy = start - offset;
				2925	struct sk_buff *frag_iter;
				2926	int pos = 0;
				2927
				2928	/* Copy header. */
				2929	if (copy > 0) {
				2930	if (copy > len)
				2931	copy = len;
				2932	csum = csum_partial_copy_nocheck(skb->data + offset, to,
				2933	copy, csum);
				2934	if ((len -= copy) == 0)
				2935	return csum;
				2936	offset += copy;
				2937	to += copy;
				2938	pos = copy;
				2939	}
				2940
				2941	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				2942	int end;
				2943
				2944	WARN_ON(start > offset + len);
				2945
				2946	end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
				2947	if ((copy = end - offset) > 0) {
				2948	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				2949	u32 p_off, p_len, copied;
				2950	struct page *p;
				2951	__wsum csum2;
				2952	u8 *vaddr;
				2953
				2954	if (copy > len)
				2955	copy = len;
				2956
				2957	skb_frag_foreach_page(frag,
				2958	skb_frag_off(frag) + offset - start,
				2959	copy, p, p_off, p_len, copied) {
				2960	vaddr = kmap_atomic(p);
				2961	csum2 = csum_partial_copy_nocheck(vaddr + p_off,
				2962	to + copied,
				2963	p_len, 0);
				2964	kunmap_atomic(vaddr);
				2965	csum = csum_block_add(csum, csum2, pos);
				2966	pos += p_len;
				2967	}
				2968
				2969	if (!(len -= copy))
				2970	return csum;
				2971	offset += copy;
				2972	to += copy;
				2973	}
				2974	start = end;
				2975	}
				2976
				2977	skb_walk_frags(skb, frag_iter) {
				2978	__wsum csum2;
				2979	int end;
				2980
				2981	WARN_ON(start > offset + len);
				2982
				2983	end = start + frag_iter->len;
				2984	if ((copy = end - offset) > 0) {
				2985	if (copy > len)
				2986	copy = len;
				2987	csum2 = skb_copy_and_csum_bits(frag_iter,
				2988	offset - start,
				2989	to, copy, 0);
				2990	csum = csum_block_add(csum, csum2, pos);
				2991	if ((len -= copy) == 0)
				2992	return csum;
				2993	offset += copy;
				2994	to += copy;
				2995	pos += copy;
				2996	}
				2997	start = end;
				2998	}
				2999	BUG_ON(len);
				3000	return csum;
				3001	}
				3002	EXPORT_SYMBOL(skb_copy_and_csum_bits);
				3003
				3004	__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
				3005	{
				3006	__sum16 sum;
				3007
				3008	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
				3009	/* See comments in __skb_checksum_complete(). */
				3010	if (likely(!sum)) {
				3011	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
				3012	!skb->csum_complete_sw)
				3013	netdev_rx_csum_fault(skb->dev, skb);
				3014	}
				3015	if (!skb_shared(skb))
				3016	skb->csum_valid = !sum;
				3017	return sum;
				3018	}
				3019	EXPORT_SYMBOL(__skb_checksum_complete_head);
				3020
				3021	/* This function assumes skb->csum already holds pseudo header's checksum,
				3022	* which has been changed from the hardware checksum, for example, by
				3023	* __skb_checksum_validate_complete(). And, the original skb->csum must
				3024	* have been validated unsuccessfully for CHECKSUM_COMPLETE case.
				3025	*
				3026	* It returns non-zero if the recomputed checksum is still invalid, otherwise
				3027	* zero. The new checksum is stored back into skb->csum unless the skb is
				3028	* shared.
				3029	*/
				3030	__sum16 __skb_checksum_complete(struct sk_buff *skb)
				3031	{
				3032	__wsum csum;
				3033	__sum16 sum;
				3034
				3035	csum = skb_checksum(skb, 0, skb->len, 0);
				3036
				3037	sum = csum_fold(csum_add(skb->csum, csum));
				3038	/* This check is inverted, because we already knew the hardware
				3039	* checksum is invalid before calling this function. So, if the
				3040	* re-computed checksum is valid instead, then we have a mismatch
				3041	* between the original skb->csum and skb_checksum(). This means either
				3042	* the original hardware checksum is incorrect or we screw up skb->csum
				3043	* when moving skb->data around.
				3044	*/
				3045	if (likely(!sum)) {
				3046	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
				3047	!skb->csum_complete_sw)
				3048	netdev_rx_csum_fault(skb->dev, skb);
				3049	}
				3050
				3051	if (!skb_shared(skb)) {
				3052	/* Save full packet checksum */
				3053	skb->csum = csum;
				3054	skb->ip_summed = CHECKSUM_COMPLETE;
				3055	skb->csum_complete_sw = 1;
				3056	skb->csum_valid = !sum;
				3057	}
				3058
				3059	return sum;
				3060	}
				3061	EXPORT_SYMBOL(__skb_checksum_complete);
				3062
				3063	static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
				3064	{
				3065	net_warn_ratelimited(
				3066	"%s: attempt to compute crc32c without libcrc32c.ko\n",
				3067	__func__);
				3068	return 0;
				3069	}
				3070
				3071	static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
				3072	int offset, int len)
				3073	{
				3074	net_warn_ratelimited(
				3075	"%s: attempt to compute crc32c without libcrc32c.ko\n",
				3076	__func__);
				3077	return 0;
				3078	}
				3079
				3080	static const struct skb_checksum_ops default_crc32c_ops = {
				3081	.update = warn_crc32c_csum_update,
				3082	.combine = warn_crc32c_csum_combine,
				3083	};
				3084
				3085	const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
				3086	&default_crc32c_ops;
				3087	EXPORT_SYMBOL(crc32c_csum_stub);
				3088
				3089	/**
				3090	* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
				3091	* @from: source buffer
				3092	*
				3093	* Calculates the amount of linear headroom needed in the 'to' skb passed
				3094	* into skb_zerocopy().
				3095	*/
				3096	unsigned int
				3097	skb_zerocopy_headlen(const struct sk_buff *from)
				3098	{
				3099	unsigned int hlen = 0;
				3100
				3101	if (!from->head_frag \|\|
				3102	skb_headlen(from) < L1_CACHE_BYTES \|\|
				3103	skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
				3104	hlen = skb_headlen(from);
				3105	if (!hlen)
				3106	hlen = from->len;
				3107	}
				3108
				3109	if (skb_has_frag_list(from))
				3110	hlen = from->len;
				3111
				3112	return hlen;
				3113	}
				3114	EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
				3115
				3116	/**
				3117	* skb_zerocopy - Zero copy skb to skb
				3118	* @to: destination buffer
				3119	* @from: source buffer
				3120	* @len: number of bytes to copy from source buffer
				3121	* @hlen: size of linear headroom in destination buffer
				3122	*
				3123	* Copies up to `len` bytes from `from` to `to` by creating references
				3124	* to the frags in the source buffer.
				3125	*
				3126	* The `hlen` as calculated by skb_zerocopy_headlen() specifies the
				3127	* headroom in the `to` buffer.
				3128	*
				3129	* Return value:
				3130	* 0: everything is OK
				3131	* -ENOMEM: couldn't orphan frags of @from due to lack of memory
				3132	* -EFAULT: skb_copy_bits() found some problem with skb geometry
				3133	*/
				3134	int
				3135	skb_zerocopy(struct sk_buff to, struct sk_buff from, int len, int hlen)
				3136	{
				3137	int i, j = 0;
				3138	int plen = 0; /* length of skb->head fragment */
				3139	int ret;
				3140	struct page *page;
				3141	unsigned int offset;
				3142
				3143	BUG_ON(!from->head_frag && !hlen);
				3144
				3145	/* dont bother with small payloads */
				3146	if (len <= skb_tailroom(to))
				3147	return skb_copy_bits(from, 0, skb_put(to, len), len);
				3148
				3149	if (hlen) {
				3150	ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
				3151	if (unlikely(ret))
				3152	return ret;
				3153	len -= hlen;
				3154	} else {
				3155	plen = min_t(int, skb_headlen(from), len);
				3156	if (plen) {
				3157	page = virt_to_head_page(from->head);
				3158	offset = from->data - (unsigned char *)page_address(page);
				3159	__skb_fill_page_desc(to, 0, page, offset, plen);
				3160	get_page(page);
				3161	j = 1;
				3162	len -= plen;
				3163	}
				3164	}
				3165
				3166	to->truesize += len + plen;
				3167	to->len += len + plen;
				3168	to->data_len += len + plen;
				3169
				3170	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
				3171	skb_tx_error(from);
				3172	return -ENOMEM;
				3173	}
				3174	skb_zerocopy_clone(to, from, GFP_ATOMIC);
				3175
				3176	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
				3177	int size;
				3178
				3179	if (!len)
				3180	break;
				3181	skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
				3182	size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
				3183	len);
				3184	skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
				3185	len -= size;
				3186	skb_frag_ref(to, j);
				3187	j++;
				3188	}
				3189	skb_shinfo(to)->nr_frags = j;
				3190
				3191	return 0;
				3192	}
				3193	EXPORT_SYMBOL_GPL(skb_zerocopy);
				3194
				3195	void skb_copy_and_csum_dev(const struct sk_buff skb, u8 to)
				3196	{
				3197	__wsum csum;
				3198	long csstart;
				3199
				3200	if (skb->ip_summed == CHECKSUM_PARTIAL)
				3201	csstart = skb_checksum_start_offset(skb);
				3202	else
				3203	csstart = skb_headlen(skb);
				3204
				3205	BUG_ON(csstart > skb_headlen(skb));
				3206
				3207	skb_copy_from_linear_data(skb, to, csstart);
				3208
				3209	csum = 0;
				3210	if (csstart != skb->len)
				3211	csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
				3212	skb->len - csstart, 0);
				3213
				3214	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				3215	long csstuff = csstart + skb->csum_offset;
				3216
				3217	((__sum16 )(to + csstuff)) = csum_fold(csum);
				3218	}
				3219	}
				3220	EXPORT_SYMBOL(skb_copy_and_csum_dev);
				3221
				3222	/**
				3223	* skb_dequeue - remove from the head of the queue
				3224	* @list: list to dequeue from
				3225	*
				3226	* Remove the head of the list. The list lock is taken so the function
				3227	* may be used safely with other locking list functions. The head item is
				3228	* returned or %NULL if the list is empty.
				3229	*/
				3230
				3231	struct sk_buff skb_dequeue(struct sk_buff_head list)
				3232	{
				3233	unsigned long flags;
				3234	struct sk_buff *result;
				3235
				3236	spin_lock_irqsave(&list->lock, flags);
				3237	result = __skb_dequeue(list);
				3238	spin_unlock_irqrestore(&list->lock, flags);
				3239	return result;
				3240	}
				3241	EXPORT_SYMBOL(skb_dequeue);
				3242
				3243	/**
				3244	* skb_dequeue_tail - remove from the tail of the queue
				3245	* @list: list to dequeue from
				3246	*
				3247	* Remove the tail of the list. The list lock is taken so the function
				3248	* may be used safely with other locking list functions. The tail item is
				3249	* returned or %NULL if the list is empty.
				3250	*/
				3251	struct sk_buff skb_dequeue_tail(struct sk_buff_head list)
				3252	{
				3253	unsigned long flags;
				3254	struct sk_buff *result;
				3255
				3256	spin_lock_irqsave(&list->lock, flags);
				3257	result = __skb_dequeue_tail(list);
				3258	spin_unlock_irqrestore(&list->lock, flags);
				3259	return result;
				3260	}
				3261	EXPORT_SYMBOL(skb_dequeue_tail);
				3262
				3263	/**
				3264	* skb_queue_purge - empty a list
				3265	* @list: list to empty
				3266	*
				3267	* Delete all buffers on an &sk_buff list. Each buffer is removed from
				3268	* the list and one reference dropped. This function takes the list
				3269	* lock and is atomic with respect to other list locking functions.
				3270	*/
				3271	void skb_queue_purge(struct sk_buff_head *list)
				3272	{
				3273	struct sk_buff *skb;
				3274	while ((skb = skb_dequeue(list)) != NULL)
				3275	kfree_skb(skb);
				3276	}
				3277	EXPORT_SYMBOL(skb_queue_purge);
				3278
				3279	/**
				3280	* skb_rbtree_purge - empty a skb rbtree
				3281	* @root: root of the rbtree to empty
				3282	* Return value: the sum of truesizes of all purged skbs.
				3283	*
				3284	* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
				3285	* the list and one reference dropped. This function does not take
				3286	* any lock. Synchronization should be handled by the caller (e.g., TCP
				3287	* out-of-order queue is protected by the socket lock).
				3288	*/
				3289	unsigned int skb_rbtree_purge(struct rb_root *root)
				3290	{
				3291	struct rb_node *p = rb_first(root);
				3292	unsigned int sum = 0;
				3293
				3294	while (p) {
				3295	struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
				3296
				3297	p = rb_next(p);
				3298	rb_erase(&skb->rbnode, root);
				3299	sum += skb->truesize;
				3300	kfree_skb(skb);
				3301	}
				3302	return sum;
				3303	}
				3304
				3305	/**
				3306	* skb_queue_head - queue a buffer at the list head
				3307	* @list: list to use
				3308	* @newsk: buffer to queue
				3309	*
				3310	* Queue a buffer at the start of the list. This function takes the
				3311	* list lock and can be used safely with other locking &sk_buff functions
				3312	* safely.
				3313	*
				3314	* A buffer cannot be placed on two lists at the same time.
				3315	*/
				3316	void skb_queue_head(struct sk_buff_head list, struct sk_buff newsk)
				3317	{
				3318	unsigned long flags;
				3319
				3320	spin_lock_irqsave(&list->lock, flags);
				3321	__skb_queue_head(list, newsk);
				3322	spin_unlock_irqrestore(&list->lock, flags);
				3323	}
				3324	EXPORT_SYMBOL(skb_queue_head);
				3325
				3326	/**
				3327	* skb_queue_tail - queue a buffer at the list tail
				3328	* @list: list to use
				3329	* @newsk: buffer to queue
				3330	*
				3331	* Queue a buffer at the tail of the list. This function takes the
				3332	* list lock and can be used safely with other locking &sk_buff functions
				3333	* safely.
				3334	*
				3335	* A buffer cannot be placed on two lists at the same time.
				3336	*/
				3337	void skb_queue_tail(struct sk_buff_head list, struct sk_buff newsk)
				3338	{
				3339	unsigned long flags;
				3340
				3341	spin_lock_irqsave(&list->lock, flags);
				3342	__skb_queue_tail(list, newsk);
				3343	spin_unlock_irqrestore(&list->lock, flags);
				3344	}
				3345	EXPORT_SYMBOL(skb_queue_tail);
				3346
				3347	/**
				3348	* skb_unlink - remove a buffer from a list
				3349	* @skb: buffer to remove
				3350	* @list: list to use
				3351	*
				3352	* Remove a packet from a list. The list locks are taken and this
				3353	* function is atomic with respect to other list locked calls
				3354	*
				3355	* You must know what list the SKB is on.
				3356	*/
				3357	void skb_unlink(struct sk_buff skb, struct sk_buff_head list)
				3358	{
				3359	unsigned long flags;
				3360
				3361	spin_lock_irqsave(&list->lock, flags);
				3362	__skb_unlink(skb, list);
				3363	spin_unlock_irqrestore(&list->lock, flags);
				3364	}
				3365	EXPORT_SYMBOL(skb_unlink);
				3366
				3367	/**
				3368	* skb_append - append a buffer
				3369	* @old: buffer to insert after
				3370	* @newsk: buffer to insert
				3371	* @list: list to use
				3372	*
				3373	* Place a packet after a given packet in a list. The list locks are taken
				3374	* and this function is atomic with respect to other list locked calls.
				3375	* A buffer cannot be placed on two lists at the same time.
				3376	*/
				3377	void skb_append(struct sk_buff old, struct sk_buff newsk, struct sk_buff_head *list)
				3378	{
				3379	unsigned long flags;
				3380
				3381	spin_lock_irqsave(&list->lock, flags);
				3382	__skb_queue_after(list, old, newsk);
				3383	spin_unlock_irqrestore(&list->lock, flags);
				3384	}
				3385	EXPORT_SYMBOL(skb_append);
				3386
				3387	static inline void skb_split_inside_header(struct sk_buff *skb,
				3388	struct sk_buff* skb1,
				3389	const u32 len, const int pos)
				3390	{
				3391	int i;
				3392
				3393	skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
				3394	pos - len);
				3395	/* And move data appendix as is. */
				3396	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
				3397	skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
				3398
				3399	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
				3400	skb_shinfo(skb)->nr_frags = 0;
				3401	skb1->data_len = skb->data_len;
				3402	skb1->len += skb1->data_len;
				3403	skb->data_len = 0;
				3404	skb->len = len;
				3405	skb_set_tail_pointer(skb, len);
				3406	}
				3407
				3408	static inline void skb_split_no_header(struct sk_buff *skb,
				3409	struct sk_buff* skb1,
				3410	const u32 len, int pos)
				3411	{
				3412	int i, k = 0;
				3413	const int nfrags = skb_shinfo(skb)->nr_frags;
				3414
				3415	skb_shinfo(skb)->nr_frags = 0;
				3416	skb1->len = skb1->data_len = skb->len - len;
				3417	skb->len = len;
				3418	skb->data_len = len - pos;
				3419
				3420	for (i = 0; i < nfrags; i++) {
				3421	int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
				3422
				3423	if (pos + size > len) {
				3424	skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
				3425
				3426	if (pos < len) {
				3427	/* Split frag.
				3428	* We have two variants in this case:
				3429	* 1. Move all the frag to the second
				3430	* part, if it is possible. F.e.
				3431	* this approach is mandatory for TUX,
				3432	* where splitting is expensive.
				3433	* 2. Split is accurately. We make this.
				3434	*/
				3435	skb_frag_ref(skb, i);
				3436	skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
				3437	skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
				3438	skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
				3439	skb_shinfo(skb)->nr_frags++;
				3440	}
				3441	k++;
				3442	} else
				3443	skb_shinfo(skb)->nr_frags++;
				3444	pos += size;
				3445	}
				3446	skb_shinfo(skb1)->nr_frags = k;
				3447	}
				3448
				3449	/**
				3450	* skb_split - Split fragmented skb to two parts at length len.
				3451	* @skb: the buffer to split
				3452	* @skb1: the buffer to receive the second part
				3453	* @len: new length for skb
				3454	*/
				3455	void skb_split(struct sk_buff skb, struct sk_buff skb1, const u32 len)
				3456	{
				3457	int pos = skb_headlen(skb);
				3458
				3459	skb_shinfo(skb1)->tx_flags \|= skb_shinfo(skb)->tx_flags &
				3460	SKBTX_SHARED_FRAG;
				3461	skb_zerocopy_clone(skb1, skb, 0);
				3462	if (len < pos) /* Split line is inside header. */
				3463	skb_split_inside_header(skb, skb1, len, pos);
				3464	else /* Second chunk has no header, nothing to copy. */
				3465	skb_split_no_header(skb, skb1, len, pos);
				3466	}
				3467	EXPORT_SYMBOL(skb_split);
				3468
				3469	/* Shifting from/to a cloned skb is a no-go.
				3470	*
				3471	* Caller cannot keep skb_shinfo related pointers past calling here!
				3472	*/
				3473	static int skb_prepare_for_shift(struct sk_buff *skb)
				3474	{
				3475	int ret = 0;
				3476
				3477	if (skb_cloned(skb)) {
				3478	/* Save and restore truesize: pskb_expand_head() may reallocate
				3479	* memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
				3480	* cannot change truesize at this point.
				3481	*/
				3482	unsigned int save_truesize = skb->truesize;
				3483
				3484	ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
				3485	skb->truesize = save_truesize;
				3486	}
				3487	return ret;
				3488	}
				3489
				3490	/**
				3491	* skb_shift - Shifts paged data partially from skb to another
				3492	* @tgt: buffer into which tail data gets added
				3493	* @skb: buffer from which the paged data comes from
				3494	* @shiftlen: shift up to this many bytes
				3495	*
				3496	* Attempts to shift up to shiftlen worth of bytes, which may be less than
				3497	* the length of the skb, from skb to tgt. Returns number bytes shifted.
				3498	* It's up to caller to free skb if everything was shifted.
				3499	*
				3500	* If @tgt runs out of frags, the whole operation is aborted.
				3501	*
				3502	* Skb cannot include anything else but paged data while tgt is allowed
				3503	* to have non-paged data as well.
				3504	*
				3505	* TODO: full sized shift could be optimized but that would need
				3506	* specialized skb free'er to handle frags without up-to-date nr_frags.
				3507	*/
				3508	int skb_shift(struct sk_buff tgt, struct sk_buff skb, int shiftlen)
				3509	{
				3510	int from, to, merge, todo;
				3511	skb_frag_t fragfrom, fragto;
				3512
				3513	BUG_ON(shiftlen > skb->len);
				3514
				3515	if (skb_headlen(skb))
				3516	return 0;
				3517	if (skb_zcopy(tgt) \|\| skb_zcopy(skb))
				3518	return 0;
				3519
				3520	todo = shiftlen;
				3521	from = 0;
				3522	to = skb_shinfo(tgt)->nr_frags;
				3523	fragfrom = &skb_shinfo(skb)->frags[from];
				3524
				3525	/* Actual merge is delayed until the point when we know we can
				3526	* commit all, so that we don't have to undo partial changes
				3527	*/
				3528	if (!to \|\|
				3529	!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
				3530	skb_frag_off(fragfrom))) {
				3531	merge = -1;
				3532	} else {
				3533	merge = to - 1;
				3534
				3535	todo -= skb_frag_size(fragfrom);
				3536	if (todo < 0) {
				3537	if (skb_prepare_for_shift(skb) \|\|
				3538	skb_prepare_for_shift(tgt))
				3539	return 0;
				3540
				3541	/* All previous frag pointers might be stale! */
				3542	fragfrom = &skb_shinfo(skb)->frags[from];
				3543	fragto = &skb_shinfo(tgt)->frags[merge];
				3544
				3545	skb_frag_size_add(fragto, shiftlen);
				3546	skb_frag_size_sub(fragfrom, shiftlen);
				3547	skb_frag_off_add(fragfrom, shiftlen);
				3548
				3549	goto onlymerged;
				3550	}
				3551
				3552	from++;
				3553	}
				3554
				3555	/* Skip full, not-fitting skb to avoid expensive operations */
				3556	if ((shiftlen == skb->len) &&
				3557	(skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
				3558	return 0;
				3559
				3560	if (skb_prepare_for_shift(skb) \|\| skb_prepare_for_shift(tgt))
				3561	return 0;
				3562
				3563	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
				3564	if (to == MAX_SKB_FRAGS)
				3565	return 0;
				3566
				3567	fragfrom = &skb_shinfo(skb)->frags[from];
				3568	fragto = &skb_shinfo(tgt)->frags[to];
				3569
				3570	if (todo >= skb_frag_size(fragfrom)) {
				3571	fragto = fragfrom;
				3572	todo -= skb_frag_size(fragfrom);
				3573	from++;
				3574	to++;
				3575
				3576	} else {
				3577	__skb_frag_ref(fragfrom);
				3578	skb_frag_page_copy(fragto, fragfrom);
				3579	skb_frag_off_copy(fragto, fragfrom);
				3580	skb_frag_size_set(fragto, todo);
				3581
				3582	skb_frag_off_add(fragfrom, todo);
				3583	skb_frag_size_sub(fragfrom, todo);
				3584	todo = 0;
				3585
				3586	to++;
				3587	break;
				3588	}
				3589	}
				3590
				3591	/* Ready to "commit" this state change to tgt */
				3592	skb_shinfo(tgt)->nr_frags = to;
				3593
				3594	if (merge >= 0) {
				3595	fragfrom = &skb_shinfo(skb)->frags[0];
				3596	fragto = &skb_shinfo(tgt)->frags[merge];
				3597
				3598	skb_frag_size_add(fragto, skb_frag_size(fragfrom));
				3599	__skb_frag_unref(fragfrom);
				3600	}
				3601
				3602	/* Reposition in the original skb */
				3603	to = 0;
				3604	while (from < skb_shinfo(skb)->nr_frags)
				3605	skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
				3606	skb_shinfo(skb)->nr_frags = to;
				3607
				3608	BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
				3609
				3610	onlymerged:
				3611	/* Most likely the tgt won't ever need its checksum anymore, skb on
				3612	* the other hand might need it if it needs to be resent
				3613	*/
				3614	tgt->ip_summed = CHECKSUM_PARTIAL;
				3615	skb->ip_summed = CHECKSUM_PARTIAL;
				3616
				3617	/* Yak, is it really working this way? Some helper please? */
				3618	skb->len -= shiftlen;
				3619	skb->data_len -= shiftlen;
				3620	skb->truesize -= shiftlen;
				3621	tgt->len += shiftlen;
				3622	tgt->data_len += shiftlen;
				3623	tgt->truesize += shiftlen;
				3624
				3625	return shiftlen;
				3626	}
				3627
				3628	/**
				3629	* skb_prepare_seq_read - Prepare a sequential read of skb data
				3630	* @skb: the buffer to read
				3631	* @from: lower offset of data to be read
				3632	* @to: upper offset of data to be read
				3633	* @st: state variable
				3634	*
				3635	* Initializes the specified state variable. Must be called before
				3636	* invoking skb_seq_read() for the first time.
				3637	*/
				3638	void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
				3639	unsigned int to, struct skb_seq_state *st)
				3640	{
				3641	st->lower_offset = from;
				3642	st->upper_offset = to;
				3643	st->root_skb = st->cur_skb = skb;
				3644	st->frag_idx = st->stepped_offset = 0;
				3645	st->frag_data = NULL;
				3646	}
				3647	EXPORT_SYMBOL(skb_prepare_seq_read);
				3648
				3649	/**
				3650	* skb_seq_read - Sequentially read skb data
				3651	* @consumed: number of bytes consumed by the caller so far
				3652	* @data: destination pointer for data to be returned
				3653	* @st: state variable
				3654	*
				3655	* Reads a block of skb data at @consumed relative to the
				3656	* lower offset specified to skb_prepare_seq_read(). Assigns
				3657	* the head of the data block to @data and returns the length
				3658	* of the block or 0 if the end of the skb data or the upper
				3659	* offset has been reached.
				3660	*
				3661	* The caller is not required to consume all of the data
				3662	* returned, i.e. @consumed is typically set to the number
				3663	* of bytes already consumed and the next call to
				3664	* skb_seq_read() will return the remaining part of the block.
				3665	*
				3666	* Note 1: The size of each block of data returned can be arbitrary,
				3667	* this limitation is the cost for zerocopy sequential
				3668	* reads of potentially non linear data.
				3669	*
				3670	* Note 2: Fragment lists within fragments are not implemented
				3671	* at the moment, state->root_skb could be replaced with
				3672	* a stack for this purpose.
				3673	*/
				3674	unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
				3675	struct skb_seq_state *st)
				3676	{
				3677	unsigned int block_limit, abs_offset = consumed + st->lower_offset;
				3678	skb_frag_t *frag;
				3679
				3680	if (unlikely(abs_offset >= st->upper_offset)) {
				3681	if (st->frag_data) {
				3682	kunmap_atomic(st->frag_data);
				3683	st->frag_data = NULL;
				3684	}
				3685	return 0;
				3686	}
				3687
				3688	next_skb:
				3689	block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
				3690
				3691	if (abs_offset < block_limit && !st->frag_data) {
				3692	*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
				3693	return block_limit - abs_offset;
				3694	}
				3695
				3696	if (st->frag_idx == 0 && !st->frag_data)
				3697	st->stepped_offset += skb_headlen(st->cur_skb);
				3698
				3699	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
				3700	frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
				3701	block_limit = skb_frag_size(frag) + st->stepped_offset;
				3702
				3703	if (abs_offset < block_limit) {
				3704	if (!st->frag_data)
				3705	st->frag_data = kmap_atomic(skb_frag_page(frag));
				3706
				3707	data = (u8 ) st->frag_data + skb_frag_off(frag) +
				3708	(abs_offset - st->stepped_offset);
				3709
				3710	return block_limit - abs_offset;
				3711	}
				3712
				3713	if (st->frag_data) {
				3714	kunmap_atomic(st->frag_data);
				3715	st->frag_data = NULL;
				3716	}
				3717
				3718	st->frag_idx++;
				3719	st->stepped_offset += skb_frag_size(frag);
				3720	}
				3721
				3722	if (st->frag_data) {
				3723	kunmap_atomic(st->frag_data);
				3724	st->frag_data = NULL;
				3725	}
				3726
				3727	if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
				3728	st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
				3729	st->frag_idx = 0;
				3730	goto next_skb;
				3731	} else if (st->cur_skb->next) {
				3732	st->cur_skb = st->cur_skb->next;
				3733	st->frag_idx = 0;
				3734	goto next_skb;
				3735	}
				3736
				3737	return 0;
				3738	}
				3739	EXPORT_SYMBOL(skb_seq_read);
				3740
				3741	/**
				3742	* skb_abort_seq_read - Abort a sequential read of skb data
				3743	* @st: state variable
				3744	*
				3745	* Must be called if skb_seq_read() was not called until it
				3746	* returned 0.
				3747	*/
				3748	void skb_abort_seq_read(struct skb_seq_state *st)
				3749	{
				3750	if (st->frag_data)
				3751	kunmap_atomic(st->frag_data);
				3752	}
				3753	EXPORT_SYMBOL(skb_abort_seq_read);
				3754
				3755	#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
				3756
				3757	static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
				3758	struct ts_config *conf,
				3759	struct ts_state *state)
				3760	{
				3761	return skb_seq_read(offset, text, TS_SKB_CB(state));
				3762	}
				3763
				3764	static void skb_ts_finish(struct ts_config conf, struct ts_state state)
				3765	{
				3766	skb_abort_seq_read(TS_SKB_CB(state));
				3767	}
				3768
				3769	/**
				3770	* skb_find_text - Find a text pattern in skb data
				3771	* @skb: the buffer to look in
				3772	* @from: search offset
				3773	* @to: search limit
				3774	* @config: textsearch configuration
				3775	*
				3776	* Finds a pattern in the skb data according to the specified
				3777	* textsearch configuration. Use textsearch_next() to retrieve
				3778	* subsequent occurrences of the pattern. Returns the offset
				3779	* to the first occurrence or UINT_MAX if no match was found.
				3780	*/
				3781	unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
				3782	unsigned int to, struct ts_config *config)
				3783	{
				3784	struct ts_state state;
				3785	unsigned int ret;
				3786
				3787	config->get_next_block = skb_ts_get_next_block;
				3788	config->finish = skb_ts_finish;
				3789
				3790	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
				3791
				3792	ret = textsearch_find(config, &state);
				3793	return (ret <= to - from ? ret : UINT_MAX);
				3794	}
				3795	EXPORT_SYMBOL(skb_find_text);
				3796
				3797	int skb_append_pagefrags(struct sk_buff skb, struct page page,
				3798	int offset, size_t size)
				3799	{
				3800	int i = skb_shinfo(skb)->nr_frags;
				3801
				3802	if (skb_can_coalesce(skb, i, page, offset)) {
				3803	skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
				3804	} else if (i < MAX_SKB_FRAGS) {
				3805	get_page(page);
				3806	skb_fill_page_desc(skb, i, page, offset, size);
				3807	} else {
				3808	return -EMSGSIZE;
				3809	}
				3810
				3811	return 0;
				3812	}
				3813	EXPORT_SYMBOL_GPL(skb_append_pagefrags);
				3814
				3815	/**
				3816	* skb_pull_rcsum - pull skb and update receive checksum
				3817	* @skb: buffer to update
				3818	* @len: length of data pulled
				3819	*
				3820	* This function performs an skb_pull on the packet and updates
				3821	* the CHECKSUM_COMPLETE checksum. It should be used on
				3822	* receive path processing instead of skb_pull unless you know
				3823	* that the checksum difference is zero (e.g., a valid IP header)
				3824	* or you are setting ip_summed to CHECKSUM_NONE.
				3825	*/
				3826	void skb_pull_rcsum(struct sk_buff skb, unsigned int len)
				3827	{
				3828	unsigned char *data = skb->data;
				3829
				3830	BUG_ON(len > skb->len);
				3831	__skb_pull(skb, len);
				3832	skb_postpull_rcsum(skb, data, len);
				3833	return skb->data;
				3834	}
				3835	EXPORT_SYMBOL_GPL(skb_pull_rcsum);
				3836
				3837	static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
				3838	{
				3839	skb_frag_t head_frag;
				3840	struct page *page;
				3841
				3842	page = virt_to_head_page(frag_skb->head);
				3843	__skb_frag_set_page(&head_frag, page);
				3844	skb_frag_off_set(&head_frag, frag_skb->data -
				3845	(unsigned char *)page_address(page));
				3846	skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
				3847	return head_frag;
				3848	}
				3849
				3850	struct sk_buff skb_segment_list(struct sk_buff skb,
				3851	netdev_features_t features,
				3852	unsigned int offset)
				3853	{
				3854	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
				3855	unsigned int tnl_hlen = skb_tnl_header_len(skb);
				3856	unsigned int delta_truesize = 0;
				3857	unsigned int delta_len = 0;
				3858	struct sk_buff *tail = NULL;
				3859	struct sk_buff nskb, tmp;
				3860	int len_diff, err;
				3861
				3862	skb_push(skb, -skb_network_offset(skb) + offset);
				3863
				3864	skb_shinfo(skb)->frag_list = NULL;
				3865
				3866	do {
				3867	nskb = list_skb;
				3868	list_skb = list_skb->next;
				3869
				3870	err = 0;
				3871	if (skb_shared(nskb)) {
				3872	tmp = skb_clone(nskb, GFP_ATOMIC);
				3873	if (tmp) {
				3874	consume_skb(nskb);
				3875	nskb = tmp;
				3876	err = skb_unclone(nskb, GFP_ATOMIC);
				3877	} else {
				3878	err = -ENOMEM;
				3879	}
				3880	}
				3881
				3882	if (!tail)
				3883	skb->next = nskb;
				3884	else
				3885	tail->next = nskb;
				3886
				3887	if (unlikely(err)) {
				3888	nskb->next = list_skb;
				3889	goto err_linearize;
				3890	}
				3891
				3892	tail = nskb;
				3893
				3894	delta_len += nskb->len;
				3895	delta_truesize += nskb->truesize;
				3896
				3897	skb_push(nskb, -skb_network_offset(nskb) + offset);
				3898
				3899	skb_release_head_state(nskb);
				3900	len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
				3901	__copy_skb_header(nskb, skb);
				3902
				3903	skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
				3904	nskb->transport_header += len_diff;
				3905	skb_copy_from_linear_data_offset(skb, -tnl_hlen,
				3906	nskb->data - tnl_hlen,
				3907	offset + tnl_hlen);
				3908
				3909	if (skb_needs_linearize(nskb, features) &&
				3910	__skb_linearize(nskb))
				3911	goto err_linearize;
				3912
				3913	} while (list_skb);
				3914
				3915	skb->truesize = skb->truesize - delta_truesize;
				3916	skb->data_len = skb->data_len - delta_len;
				3917	skb->len = skb->len - delta_len;
				3918
				3919	skb_gso_reset(skb);
				3920
				3921	skb->prev = tail;
				3922
				3923	if (skb_needs_linearize(skb, features) &&
				3924	__skb_linearize(skb))
				3925	goto err_linearize;
				3926
				3927	skb_get(skb);
				3928
				3929	return skb;
				3930
				3931	err_linearize:
				3932	kfree_skb_list(skb->next);
				3933	skb->next = NULL;
				3934	return ERR_PTR(-ENOMEM);
				3935	}
				3936	EXPORT_SYMBOL_GPL(skb_segment_list);
				3937
				3938	int skb_gro_receive_list(struct sk_buff p, struct sk_buff skb)
				3939	{
				3940	if (unlikely(p->len + skb->len >= 65536))
				3941	return -E2BIG;
				3942
				3943	if (skb_shinfo_is_ptr(skb))
				3944	skb_shinfo(p)->gso_type \|= SKB_GSO_DODGY;
				3945
				3946	if (NAPI_GRO_CB(p)->last == p)
				3947	skb_shinfo(p)->frag_list = skb;
				3948	else
				3949	NAPI_GRO_CB(p)->last->next = skb;
				3950
				3951	skb_pull(skb, skb_gro_offset(skb));
				3952
				3953	NAPI_GRO_CB(p)->last = skb;
				3954	NAPI_GRO_CB(p)->count++;
				3955	p->data_len += skb->len;
				3956	p->truesize += skb->truesize;
				3957	p->len += skb->len;
				3958
				3959	NAPI_GRO_CB(skb)->same_flow = 1;
				3960
				3961	return 0;
				3962	}
				3963	EXPORT_SYMBOL_GPL(skb_gro_receive_list);
				3964
				3965	/**
				3966	* skb_segment - Perform protocol segmentation on skb.
				3967	* @head_skb: buffer to segment
				3968	* @features: features for the output path (see dev->features)
				3969	*
				3970	* This function performs segmentation on the given skb. It returns
				3971	* a pointer to the first in a list of new skbs for the segments.
				3972	* In case of error it returns ERR_PTR(err).
				3973	*/
				3974	struct sk_buff skb_segment(struct sk_buff head_skb,
				3975	netdev_features_t features)
				3976	{
				3977	struct sk_buff *segs = NULL;
				3978	struct sk_buff *tail = NULL;
				3979	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
				3980	unsigned int mss = skb_shinfo(head_skb)->gso_size;
				3981	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
				3982	unsigned int offset = doffset;
				3983	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
				3984	unsigned int partial_segs = 0;
				3985	unsigned int headroom;
				3986	unsigned int len = head_skb->len;
				3987	struct sk_buff *frag_skb;
				3988	skb_frag_t *frag;
				3989	__be16 proto;
				3990	bool csum, sg;
				3991	int err = -ENOMEM;
				3992	int i = 0;
				3993	int nfrags, pos;
				3994	int dummy;
				3995
				3996	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
				3997	mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
				3998	struct sk_buff *check_skb;
				3999
				4000	for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
				4001	if (skb_headlen(check_skb) && !check_skb->head_frag) {
				4002	/* gso_size is untrusted, and we have a frag_list with
				4003	* a linear non head_frag item.
				4004	*
				4005	* If head_skb's headlen does not fit requested gso_size,
				4006	* it means that the frag_list members do NOT terminate
				4007	* on exact gso_size boundaries. Hence we cannot perform
				4008	* skb_frag_t page sharing. Therefore we must fallback to
				4009	* copying the frag_list skbs; we do so by disabling SG.
				4010	*/
				4011	features &= ~NETIF_F_SG;
				4012	break;
				4013	}
				4014	}
				4015	}
				4016
				4017	__skb_push(head_skb, doffset);
				4018	proto = skb_network_protocol(head_skb, &dummy);
				4019	if (unlikely(!proto))
				4020	return ERR_PTR(-EINVAL);
				4021
				4022	sg = !!(features & NETIF_F_SG);
				4023	csum = !!can_checksum_protocol(features, proto);
				4024
				4025	if (sg && csum && (mss != GSO_BY_FRAGS)) {
				4026	if (!(features & NETIF_F_GSO_PARTIAL)) {
				4027	struct sk_buff *iter;
				4028	unsigned int frag_len;
				4029
				4030	if (!list_skb \|\|
				4031	!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
				4032	goto normal;
				4033
				4034	/* If we get here then all the required
				4035	* GSO features except frag_list are supported.
				4036	* Try to split the SKB to multiple GSO SKBs
				4037	* with no frag_list.
				4038	* Currently we can do that only when the buffers don't
				4039	* have a linear part and all the buffers except
				4040	* the last are of the same length.
				4041	*/
				4042	frag_len = list_skb->len;
				4043	skb_walk_frags(head_skb, iter) {
				4044	if (frag_len != iter->len && iter->next)
				4045	goto normal;
				4046	if (skb_headlen(iter) && !iter->head_frag)
				4047	goto normal;
				4048
				4049	len -= iter->len;
				4050	}
				4051
				4052	if (len != frag_len)
				4053	goto normal;
				4054	}
				4055
				4056	/* GSO partial only requires that we trim off any excess that
				4057	* doesn't fit into an MSS sized block, so take care of that
				4058	* now.
				4059	* Cap len to not accidentally hit GSO_BY_FRAGS.
				4060	*/
				4061	partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss;
				4062	if (partial_segs > 1)
				4063	mss *= partial_segs;
				4064	else
				4065	partial_segs = 0;
				4066	}
				4067
				4068	normal:
				4069	headroom = skb_headroom(head_skb);
				4070	pos = skb_headlen(head_skb);
				4071
				4072	if (skb_orphan_frags(head_skb, GFP_ATOMIC))
				4073	return ERR_PTR(-ENOMEM);
				4074
				4075	nfrags = skb_shinfo(head_skb)->nr_frags;
				4076	frag = skb_shinfo(head_skb)->frags;
				4077	frag_skb = head_skb;
				4078
				4079	do {
				4080	struct sk_buff *nskb;
				4081	skb_frag_t *nskb_frag;
				4082	int hsize;
				4083	int size;
				4084
				4085	if (unlikely(mss == GSO_BY_FRAGS)) {
				4086	len = list_skb->len;
				4087	} else {
				4088	len = head_skb->len - offset;
				4089	if (len > mss)
				4090	len = mss;
				4091	}
				4092
				4093	hsize = skb_headlen(head_skb) - offset;
				4094	if (hsize < 0)
				4095	hsize = 0;
				4096	if (hsize > len \|\| !sg)
				4097	hsize = len;
				4098
				4099	if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
				4100	(skb_headlen(list_skb) == len \|\| sg)) {
				4101	BUG_ON(skb_headlen(list_skb) > len);
				4102
				4103	nskb = skb_clone(list_skb, GFP_ATOMIC);
				4104	if (unlikely(!nskb))
				4105	goto err;
				4106
				4107	i = 0;
				4108	nfrags = skb_shinfo(list_skb)->nr_frags;
				4109	frag = skb_shinfo(list_skb)->frags;
				4110	frag_skb = list_skb;
				4111	pos += skb_headlen(list_skb);
				4112
				4113	while (pos < offset + len) {
				4114	BUG_ON(i >= nfrags);
				4115
				4116	size = skb_frag_size(frag);
				4117	if (pos + size > offset + len)
				4118	break;
				4119
				4120	i++;
				4121	pos += size;
				4122	frag++;
				4123	}
				4124
				4125	list_skb = list_skb->next;
				4126
				4127	if (unlikely(pskb_trim(nskb, len))) {
				4128	kfree_skb(nskb);
				4129	goto err;
				4130	}
				4131
				4132	hsize = skb_end_offset(nskb);
				4133	if (skb_cow_head(nskb, doffset + headroom)) {
				4134	kfree_skb(nskb);
				4135	goto err;
				4136	}
				4137
				4138	nskb->truesize += skb_end_offset(nskb) - hsize;
				4139	skb_release_head_state(nskb);
				4140	__skb_push(nskb, doffset);
				4141	} else {
				4142	nskb = __alloc_skb(hsize + doffset + headroom,
				4143	GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
				4144	NUMA_NO_NODE);
				4145
				4146	if (unlikely(!nskb))
				4147	goto err;
				4148
				4149	skb_reserve(nskb, headroom);
				4150	__skb_put(nskb, doffset);
				4151	}
				4152
				4153	if (segs)
				4154	tail->next = nskb;
				4155	else
				4156	segs = nskb;
				4157	tail = nskb;
				4158
				4159	__copy_skb_header(nskb, head_skb);
				4160
				4161	skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
				4162	skb_reset_mac_len(nskb);
				4163
				4164	skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
				4165	nskb->data - tnl_hlen,
				4166	doffset + tnl_hlen);
				4167
				4168	if (nskb->len == len + doffset)
				4169	goto perform_csum_check;
				4170
				4171	if (!sg) {
				4172	if (!nskb->remcsum_offload)
				4173	nskb->ip_summed = CHECKSUM_NONE;
				4174	SKB_GSO_CB(nskb)->csum =
				4175	skb_copy_and_csum_bits(head_skb, offset,
				4176	skb_put(nskb, len),
				4177	len, 0);
				4178	SKB_GSO_CB(nskb)->csum_start =
				4179	skb_headroom(nskb) + doffset;
				4180	continue;
				4181	}
				4182
				4183	nskb_frag = skb_shinfo(nskb)->frags;
				4184
				4185	skb_copy_from_linear_data_offset(head_skb, offset,
				4186	skb_put(nskb, hsize), hsize);
				4187
				4188	skb_shinfo(nskb)->tx_flags \|= skb_shinfo(head_skb)->tx_flags &
				4189	SKBTX_SHARED_FRAG;
				4190
				4191	if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
				4192	goto err;
				4193
				4194	while (pos < offset + len) {
				4195	if (i >= nfrags) {
				4196	if (skb_orphan_frags(list_skb, GFP_ATOMIC) \|\|
				4197	skb_zerocopy_clone(nskb, list_skb,
				4198	GFP_ATOMIC))
				4199	goto err;
				4200
				4201	i = 0;
				4202	nfrags = skb_shinfo(list_skb)->nr_frags;
				4203	frag = skb_shinfo(list_skb)->frags;
				4204	frag_skb = list_skb;
				4205	if (!skb_headlen(list_skb)) {
				4206	BUG_ON(!nfrags);
				4207	} else {
				4208	BUG_ON(!list_skb->head_frag);
				4209
				4210	/* to make room for head_frag. */
				4211	i--;
				4212	frag--;
				4213	}
				4214
				4215	list_skb = list_skb->next;
				4216	}
				4217
				4218	if (unlikely(skb_shinfo(nskb)->nr_frags >=
				4219	MAX_SKB_FRAGS)) {
				4220	net_warn_ratelimited(
				4221	"skb_segment: too many frags: %u %u\n",
				4222	pos, mss);
				4223	err = -EINVAL;
				4224	goto err;
				4225	}
				4226
				4227	nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : frag;
				4228	__skb_frag_ref(nskb_frag);
				4229	size = skb_frag_size(nskb_frag);
				4230
				4231	if (pos < offset) {
				4232	skb_frag_off_add(nskb_frag, offset - pos);
				4233	skb_frag_size_sub(nskb_frag, offset - pos);
				4234	}
				4235
				4236	skb_shinfo(nskb)->nr_frags++;
				4237
				4238	if (pos + size <= offset + len) {
				4239	i++;
				4240	frag++;
				4241	pos += size;
				4242	} else {
				4243	skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
				4244	goto skip_fraglist;
				4245	}
				4246
				4247	nskb_frag++;
				4248	}
				4249
				4250	skip_fraglist:
				4251	nskb->data_len = len - hsize;
				4252	nskb->len += nskb->data_len;
				4253	nskb->truesize += nskb->data_len;
				4254
				4255	perform_csum_check:
				4256	if (!csum) {
				4257	if (skb_has_shared_frag(nskb) &&
				4258	__skb_linearize(nskb))
				4259	goto err;
				4260
				4261	if (!nskb->remcsum_offload)
				4262	nskb->ip_summed = CHECKSUM_NONE;
				4263	SKB_GSO_CB(nskb)->csum =
				4264	skb_checksum(nskb, doffset,
				4265	nskb->len - doffset, 0);
				4266	SKB_GSO_CB(nskb)->csum_start =
				4267	skb_headroom(nskb) + doffset;
				4268	}
				4269	} while ((offset += len) < head_skb->len);
				4270
				4271	/* Some callers want to get the end of the list.
				4272	* Put it in segs->prev to avoid walking the list.
				4273	* (see validate_xmit_skb_list() for example)
				4274	*/
				4275	segs->prev = tail;
				4276
				4277	if (partial_segs) {
				4278	struct sk_buff *iter;
				4279	int type = skb_shinfo(head_skb)->gso_type;
				4280	unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
				4281
				4282	/* Update type to add partial and then remove dodgy if set */
				4283	type \|= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
				4284	type &= ~SKB_GSO_DODGY;
				4285
				4286	/* Update GSO info and prepare to start updating headers on
				4287	* our way back down the stack of protocols.
				4288	*/
				4289	for (iter = segs; iter; iter = iter->next) {
				4290	skb_shinfo(iter)->gso_size = gso_size;
				4291	skb_shinfo(iter)->gso_segs = partial_segs;
				4292	skb_shinfo(iter)->gso_type = type;
				4293	SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
				4294	}
				4295
				4296	if (tail->len - doffset <= gso_size)
				4297	skb_shinfo(tail)->gso_size = 0;
				4298	else if (tail != segs)
				4299	skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
				4300	}
				4301
				4302	/* Following permits correct backpressure, for protocols
				4303	* using skb_set_owner_w().
				4304	* Idea is to tranfert ownership from head_skb to last segment.
				4305	*/
				4306	if (head_skb->destructor == sock_wfree) {
				4307	swap(tail->truesize, head_skb->truesize);
				4308	swap(tail->destructor, head_skb->destructor);
				4309	swap(tail->sk, head_skb->sk);
				4310	}
				4311	return segs;
				4312
				4313	err:
				4314	kfree_skb_list(segs);
				4315	return ERR_PTR(err);
				4316	}
				4317	EXPORT_SYMBOL_GPL(skb_segment);
				4318
				4319	int skb_gro_receive(struct sk_buff p, struct sk_buff skb)
				4320	{
				4321	struct skb_shared_info pinfo, skbinfo = skb_shinfo(skb);
				4322	unsigned int offset = skb_gro_offset(skb);
				4323	unsigned int headlen = skb_headlen(skb);
				4324	unsigned int len = skb_gro_len(skb);
				4325	unsigned int delta_truesize;
				4326	struct sk_buff *lp;
				4327
				4328	if (unlikely(p->len + len >= 65536 \|\| NAPI_GRO_CB(skb)->flush))
				4329	return -E2BIG;
				4330
				4331	lp = NAPI_GRO_CB(p)->last;
				4332	pinfo = skb_shinfo(lp);
				4333
				4334	if (headlen <= offset) {
				4335	skb_frag_t *frag;
				4336	skb_frag_t *frag2;
				4337	int i = skbinfo->nr_frags;
				4338	int nr_frags = pinfo->nr_frags + i;
				4339
				4340	if (nr_frags > MAX_SKB_FRAGS)
				4341	goto merge;
				4342
				4343	offset -= headlen;
				4344	pinfo->nr_frags = nr_frags;
				4345	skbinfo->nr_frags = 0;
				4346
				4347	frag = pinfo->frags + nr_frags;
				4348	frag2 = skbinfo->frags + i;
				4349	do {
				4350	--frag = --frag2;
				4351	} while (--i);
				4352
				4353	skb_frag_off_add(frag, offset);
				4354	skb_frag_size_sub(frag, offset);
				4355
				4356	/* all fragments truesize : remove (head size + sk_buff) */
				4357	delta_truesize = skb->truesize -
				4358	SKB_TRUESIZE(skb_end_offset(skb));
				4359
				4360	skb->truesize -= skb->data_len;
				4361	skb->len -= skb->data_len;
				4362	skb->data_len = 0;
				4363
				4364	NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
				4365	goto done;
				4366	} else if (skb->head_frag) {
				4367	int nr_frags = pinfo->nr_frags;
				4368	skb_frag_t *frag = pinfo->frags + nr_frags;
				4369	struct page *page = virt_to_head_page(skb->head);
				4370	unsigned int first_size = headlen - offset;
				4371	unsigned int first_offset;
				4372
				4373	if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
				4374	goto merge;
				4375
				4376	first_offset = skb->data -
				4377	(unsigned char *)page_address(page) +
				4378	offset;
				4379
				4380	pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
				4381
				4382	__skb_frag_set_page(frag, page);
				4383	skb_frag_off_set(frag, first_offset);
				4384	skb_frag_size_set(frag, first_size);
				4385
				4386	memcpy(frag + 1, skbinfo->frags, sizeof(frag) skbinfo->nr_frags);
				4387	/* We dont need to clear skbinfo->nr_frags here */
				4388
				4389	delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
				4390	NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
				4391	goto done;
				4392	}
				4393
				4394	merge:
				4395	delta_truesize = skb->truesize;
				4396	if (offset > headlen) {
				4397	unsigned int eat = offset - headlen;
				4398
				4399	skb_frag_off_add(&skbinfo->frags[0], eat);
				4400	skb_frag_size_sub(&skbinfo->frags[0], eat);
				4401	skb->data_len -= eat;
				4402	skb->len -= eat;
				4403	offset = headlen;
				4404	}
				4405
				4406	__skb_pull(skb, offset);
				4407
				4408	if (skb_shinfo_is_ptr(skb))
				4409	skb_shinfo(p)->gso_type \|= SKB_GSO_DODGY;
				4410
				4411	if (NAPI_GRO_CB(p)->last == p)
				4412	skb_shinfo(p)->frag_list = skb;
				4413	else
				4414	NAPI_GRO_CB(p)->last->next = skb;
				4415	NAPI_GRO_CB(p)->last = skb;
				4416	__skb_header_release(skb);
				4417	lp = p;
				4418
				4419	done:
				4420	NAPI_GRO_CB(p)->count++;
				4421	p->data_len += len;
				4422	p->truesize += delta_truesize;
				4423	p->len += len;
				4424	if (lp != p) {
				4425	lp->data_len += len;
				4426	lp->truesize += delta_truesize;
				4427	lp->len += len;
				4428	}
				4429	NAPI_GRO_CB(skb)->same_flow = 1;
				4430	return 0;
				4431	}
				4432	EXPORT_SYMBOL_GPL(skb_gro_receive);
				4433
				4434	#ifdef CONFIG_SKB_EXTENSIONS
				4435	#define SKB_EXT_ALIGN_VALUE 8
				4436	#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
				4437
				4438	static const u8 skb_ext_type_len[] = {
				4439	#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
				4440	[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
				4441	#endif
				4442	#ifdef CONFIG_XFRM
				4443	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
				4444	#endif
				4445	#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
				4446	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
				4447	#endif
				4448	};
				4449
				4450	static __always_inline unsigned int skb_ext_total_length(void)
				4451	{
				4452	return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
				4453	#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
				4454	skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
				4455	#endif
				4456	#ifdef CONFIG_XFRM
				4457	skb_ext_type_len[SKB_EXT_SEC_PATH] +
				4458	#endif
				4459	#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
				4460	skb_ext_type_len[TC_SKB_EXT] +
				4461	#endif
				4462	0;
				4463	}
				4464
				4465	static void skb_extensions_init(void)
				4466	{
				4467	BUILD_BUG_ON(SKB_EXT_NUM >= 8);
				4468	BUILD_BUG_ON(skb_ext_total_length() > 255);
				4469
				4470	skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
				4471	SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
				4472	0,
				4473	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
				4474	NULL);
				4475	}
				4476	#else
				4477	static void skb_extensions_init(void) {}
				4478	#endif
				4479
				4480	void __init skb_init(void)
				4481	{
				4482	skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
				4483	sizeof(struct sk_buff),
				4484	0,
				4485	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
				4486	offsetof(struct sk_buff, cb),
				4487	sizeof_field(struct sk_buff, cb),
				4488	NULL);
				4489	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
				4490	sizeof(struct sk_buff_fclones),
				4491	0,
				4492	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
				4493	NULL);
				4494	skb_extensions_init();
				4495	}
				4496
				4497	static int
				4498	__skb_to_sgvec(struct sk_buff skb, struct scatterlist sg, int offset, int len,
				4499	unsigned int recursion_level)
				4500	{
				4501	int start = skb_headlen(skb);
				4502	int i, copy = start - offset;
				4503	struct sk_buff *frag_iter;
				4504	int elt = 0;
				4505
				4506	if (unlikely(recursion_level >= 24))
				4507	return -EMSGSIZE;
				4508
				4509	if (copy > 0) {
				4510	if (copy > len)
				4511	copy = len;
				4512	sg_set_buf(sg, skb->data + offset, copy);
				4513	elt++;
				4514	if ((len -= copy) == 0)
				4515	return elt;
				4516	offset += copy;
				4517	}
				4518
				4519	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
				4520	int end;
				4521
				4522	WARN_ON(start > offset + len);
				4523
				4524	end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
				4525	if ((copy = end - offset) > 0) {
				4526	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
				4527	if (unlikely(elt && sg_is_last(&sg[elt - 1])))
				4528	return -EMSGSIZE;
				4529
				4530	if (copy > len)
				4531	copy = len;
				4532	sg_set_page(&sg[elt], skb_frag_page(frag), copy,
				4533	skb_frag_off(frag) + offset - start);
				4534	elt++;
				4535	if (!(len -= copy))
				4536	return elt;
				4537	offset += copy;
				4538	}
				4539	start = end;
				4540	}
				4541
				4542	skb_walk_frags(skb, frag_iter) {
				4543	int end, ret;
				4544
				4545	WARN_ON(start > offset + len);
				4546
				4547	end = start + frag_iter->len;
				4548	if ((copy = end - offset) > 0) {
				4549	if (unlikely(elt && sg_is_last(&sg[elt - 1])))
				4550	return -EMSGSIZE;
				4551
				4552	if (copy > len)
				4553	copy = len;
				4554	ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
				4555	copy, recursion_level + 1);
				4556	if (unlikely(ret < 0))
				4557	return ret;
				4558	elt += ret;
				4559	if ((len -= copy) == 0)
				4560	return elt;
				4561	offset += copy;
				4562	}
				4563	start = end;
				4564	}
				4565	BUG_ON(len);
				4566	return elt;
				4567	}
				4568
				4569	/**
				4570	* skb_to_sgvec - Fill a scatter-gather list from a socket buffer
				4571	* @skb: Socket buffer containing the buffers to be mapped
				4572	* @sg: The scatter-gather list to map into
				4573	* @offset: The offset into the buffer's contents to start mapping
				4574	* @len: Length of buffer space to be mapped
				4575	*
				4576	* Fill the specified scatter-gather list with mappings/pointers into a
				4577	* region of the buffer space attached to a socket buffer. Returns either
				4578	* the number of scatterlist items used, or -EMSGSIZE if the contents
				4579	* could not fit.
				4580	*/
				4581	int skb_to_sgvec(struct sk_buff skb, struct scatterlist sg, int offset, int len)
				4582	{
				4583	int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
				4584
				4585	if (nsg <= 0)
				4586	return nsg;
				4587
				4588	sg_mark_end(&sg[nsg - 1]);
				4589
				4590	return nsg;
				4591	}
				4592	EXPORT_SYMBOL_GPL(skb_to_sgvec);
				4593
				4594	/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
				4595	* sglist without mark the sg which contain last skb data as the end.
				4596	* So the caller can mannipulate sg list as will when padding new data after
				4597	* the first call without calling sg_unmark_end to expend sg list.
				4598	*
				4599	* Scenario to use skb_to_sgvec_nomark:
				4600	* 1. sg_init_table
				4601	* 2. skb_to_sgvec_nomark(payload1)
				4602	* 3. skb_to_sgvec_nomark(payload2)
				4603	*
				4604	* This is equivalent to:
				4605	* 1. sg_init_table
				4606	* 2. skb_to_sgvec(payload1)
				4607	* 3. sg_unmark_end
				4608	* 4. skb_to_sgvec(payload2)
				4609	*
				4610	* When mapping mutilple payload conditionally, skb_to_sgvec_nomark
				4611	* is more preferable.
				4612	*/
				4613	int skb_to_sgvec_nomark(struct sk_buff skb, struct scatterlist sg,
				4614	int offset, int len)
				4615	{
				4616	return __skb_to_sgvec(skb, sg, offset, len, 0);
				4617	}
				4618	EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
				4619
				4620
				4621
				4622	/**
				4623	* skb_cow_data - Check that a socket buffer's data buffers are writable
				4624	* @skb: The socket buffer to check.
				4625	* @tailbits: Amount of trailing space to be added
				4626	* @trailer: Returned pointer to the skb where the @tailbits space begins
				4627	*
				4628	* Make sure that the data buffers attached to a socket buffer are
				4629	* writable. If they are not, private copies are made of the data buffers
				4630	* and the socket buffer is set to use these instead.
				4631	*
				4632	* If @tailbits is given, make sure that there is space to write @tailbits
				4633	* bytes of data beyond current end of socket buffer. @trailer will be
				4634	* set to point to the skb in which this space begins.
				4635	*
				4636	* The number of scatterlist elements required to completely map the
				4637	* COW'd and extended socket buffer will be returned.
				4638	*/
				4639	int skb_cow_data(struct sk_buff skb, int tailbits, struct sk_buff *trailer)
				4640	{
				4641	int copyflag;
				4642	int elt;
				4643	struct sk_buff skb1, *skb_p;
				4644
				4645	/* If skb is cloned or its head is paged, reallocate
				4646	* head pulling out all the pages (pages are considered not writable
				4647	* at the moment even if they are anonymous).
				4648	*/
				4649	if ((skb_cloned(skb) \|\| skb_shinfo(skb)->nr_frags) &&
				4650	__pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
				4651	return -ENOMEM;
				4652
				4653	/* Easy case. Most of packets will go this way. */
				4654	if (!skb_has_frag_list(skb)) {
				4655	/* A little of trouble, not enough of space for trailer.
				4656	* This should not happen, when stack is tuned to generate
				4657	* good frames. OK, on miss we reallocate and reserve even more
				4658	* space, 128 bytes is fair. */
				4659
				4660	if (skb_tailroom(skb) < tailbits &&
				4661	pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
				4662	return -ENOMEM;
				4663
				4664	/* Voila! */
				4665	*trailer = skb;
				4666	return 1;
				4667	}
				4668
				4669	/* Misery. We are in troubles, going to mincer fragments... */
				4670
				4671	elt = 1;
				4672	skb_p = &skb_shinfo(skb)->frag_list;
				4673	copyflag = 0;
				4674
				4675	while ((skb1 = *skb_p) != NULL) {
				4676	int ntail = 0;
				4677
				4678	/* The fragment is partially pulled by someone,
				4679	* this can happen on input. Copy it and everything
				4680	* after it. */
				4681
				4682	if (skb_shared(skb1))
				4683	copyflag = 1;
				4684
				4685	/* If the skb is the last, worry about trailer. */
				4686
				4687	if (skb1->next == NULL && tailbits) {
				4688	if (skb_shinfo(skb1)->nr_frags \|\|
				4689	skb_has_frag_list(skb1) \|\|
				4690	skb_tailroom(skb1) < tailbits)
				4691	ntail = tailbits + 128;
				4692	}
				4693
				4694	if (copyflag \|\|
				4695	skb_cloned(skb1) \|\|
				4696	ntail \|\|
				4697	skb_shinfo(skb1)->nr_frags \|\|
				4698	skb_has_frag_list(skb1)) {
				4699	struct sk_buff *skb2;
				4700
				4701	/* Fuck, we are miserable poor guys... */
				4702	if (ntail == 0)
				4703	skb2 = skb_copy(skb1, GFP_ATOMIC);
				4704	else
				4705	skb2 = skb_copy_expand(skb1,
				4706	skb_headroom(skb1),
				4707	ntail,
				4708	GFP_ATOMIC);
				4709	if (unlikely(skb2 == NULL))
				4710	return -ENOMEM;
				4711
				4712	if (skb1->sk)
				4713	skb_set_owner_w(skb2, skb1->sk);
				4714
				4715	/* Looking around. Are we still alive?
				4716	* OK, link new skb, drop old one */
				4717
				4718	skb2->next = skb1->next;
				4719	*skb_p = skb2;
				4720	kfree_skb(skb1);
				4721	skb1 = skb2;
				4722	}
				4723	elt++;
				4724	*trailer = skb1;
				4725	skb_p = &skb1->next;
				4726	}
				4727
				4728	return elt;
				4729	}
				4730	EXPORT_SYMBOL_GPL(skb_cow_data);
				4731
				4732	static void sock_rmem_free(struct sk_buff *skb)
				4733	{
				4734	struct sock *sk = skb->sk;
				4735
				4736	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
				4737	}
				4738
				4739	static void skb_set_err_queue(struct sk_buff *skb)
				4740	{
				4741	/* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
				4742	* So, it is safe to (mis)use it to mark skbs on the error queue.
				4743	*/
				4744	skb->pkt_type = PACKET_OUTGOING;
				4745	BUILD_BUG_ON(PACKET_OUTGOING == 0);
				4746	}
				4747
				4748	/*
				4749	* Note: We dont mem charge error packets (no sk_forward_alloc changes)
				4750	*/
				4751	int sock_queue_err_skb(struct sock sk, struct sk_buff skb)
				4752	{
				4753	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
				4754	(unsigned int)READ_ONCE(sk->sk_rcvbuf))
				4755	return -ENOMEM;
				4756
				4757	skb_orphan(skb);
				4758	skb->sk = sk;
				4759	skb->destructor = sock_rmem_free;
				4760	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
				4761	skb_set_err_queue(skb);
				4762
				4763	/* before exiting rcu section, make sure dst is refcounted */
				4764	skb_dst_force(skb);
				4765
				4766	skb_queue_tail(&sk->sk_error_queue, skb);
				4767	if (!sock_flag(sk, SOCK_DEAD))
				4768	sk->sk_error_report(sk);
				4769	return 0;
				4770	}
				4771	EXPORT_SYMBOL(sock_queue_err_skb);
				4772
				4773	static bool is_icmp_err_skb(const struct sk_buff *skb)
				4774	{
				4775	return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP \|\|
				4776	SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
				4777	}
				4778
				4779	struct sk_buff sock_dequeue_err_skb(struct sock sk)
				4780	{
				4781	struct sk_buff_head *q = &sk->sk_error_queue;
				4782	struct sk_buff skb, skb_next = NULL;
				4783	bool icmp_next = false;
				4784	unsigned long flags;
				4785
				4786	spin_lock_irqsave(&q->lock, flags);
				4787	skb = __skb_dequeue(q);
				4788	if (skb && (skb_next = skb_peek(q))) {
				4789	icmp_next = is_icmp_err_skb(skb_next);
				4790	if (icmp_next)
				4791	sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
				4792	}
				4793	spin_unlock_irqrestore(&q->lock, flags);
				4794
				4795	if (is_icmp_err_skb(skb) && !icmp_next)
				4796	sk->sk_err = 0;
				4797
				4798	if (skb_next)
				4799	sk->sk_error_report(sk);
				4800
				4801	return skb;
				4802	}
				4803	EXPORT_SYMBOL(sock_dequeue_err_skb);
				4804
				4805	/**
				4806	* skb_clone_sk - create clone of skb, and take reference to socket
				4807	* @skb: the skb to clone
				4808	*
				4809	* This function creates a clone of a buffer that holds a reference on
				4810	* sk_refcnt. Buffers created via this function are meant to be
				4811	* returned using sock_queue_err_skb, or free via kfree_skb.
				4812	*
				4813	* When passing buffers allocated with this function to sock_queue_err_skb
				4814	* it is necessary to wrap the call with sock_hold/sock_put in order to
				4815	* prevent the socket from being released prior to being enqueued on
				4816	* the sk_error_queue.
				4817	*/
				4818	struct sk_buff skb_clone_sk(struct sk_buff skb)
				4819	{
				4820	struct sock *sk = skb->sk;
				4821	struct sk_buff *clone;
				4822
				4823	if (!sk \|\| !refcount_inc_not_zero(&sk->sk_refcnt))
				4824	return NULL;
				4825
				4826	clone = skb_clone(skb, GFP_ATOMIC);
				4827	if (!clone) {
				4828	sock_put(sk);
				4829	return NULL;
				4830	}
				4831
				4832	clone->sk = sk;
				4833	clone->destructor = sock_efree;
				4834
				4835	return clone;
				4836	}
				4837	EXPORT_SYMBOL(skb_clone_sk);
				4838
				4839	static void __skb_complete_tx_timestamp(struct sk_buff *skb,
				4840	struct sock *sk,
				4841	int tstype,
				4842	bool opt_stats)
				4843	{
				4844	struct sock_exterr_skb *serr;
				4845	int err;
				4846
				4847	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
				4848
				4849	serr = SKB_EXT_ERR(skb);
				4850	memset(serr, 0, sizeof(*serr));
				4851	serr->ee.ee_errno = ENOMSG;
				4852	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
				4853	serr->ee.ee_info = tstype;
				4854	serr->opt_stats = opt_stats;
				4855	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
				4856	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
				4857	serr->ee.ee_data = skb_shinfo(skb)->tskey;
				4858	if (sk->sk_protocol == IPPROTO_TCP &&
				4859	sk->sk_type == SOCK_STREAM)
				4860	serr->ee.ee_data -= sk->sk_tskey;
				4861	}
				4862
				4863	err = sock_queue_err_skb(sk, skb);
				4864
				4865	if (err)
				4866	kfree_skb(skb);
				4867	}
				4868
				4869	static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
				4870	{
				4871	bool ret;
				4872
				4873	if (likely(READ_ONCE(sysctl_tstamp_allow_data) \|\| tsonly))
				4874	return true;
				4875
				4876	read_lock_bh(&sk->sk_callback_lock);
				4877	ret = sk->sk_socket && sk->sk_socket->file &&
				4878	file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
				4879	read_unlock_bh(&sk->sk_callback_lock);
				4880	return ret;
				4881	}
				4882
				4883	void skb_complete_tx_timestamp(struct sk_buff *skb,
				4884	struct skb_shared_hwtstamps *hwtstamps)
				4885	{
				4886	struct sock *sk = skb->sk;
				4887
				4888	if (!skb_may_tx_timestamp(sk, false))
				4889	goto err;
				4890
				4891	/* Take a reference to prevent skb_orphan() from freeing the socket,
				4892	* but only if the socket refcount is not zero.
				4893	*/
				4894	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
				4895	skb_hwtstamps(skb) = hwtstamps;
				4896	__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
				4897	sock_put(sk);
				4898	return;
				4899	}
				4900
				4901	err:
				4902	kfree_skb(skb);
				4903	}
				4904	EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
				4905
				4906	void __skb_tstamp_tx(struct sk_buff *orig_skb,
				4907	struct skb_shared_hwtstamps *hwtstamps,
				4908	struct sock *sk, int tstype)
				4909	{
				4910	struct sk_buff *skb;
				4911	bool tsonly, opt_stats = false;
				4912
				4913	if (!sk)
				4914	return;
				4915
				4916	if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
				4917	skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
				4918	return;
				4919
				4920	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
				4921	if (!skb_may_tx_timestamp(sk, tsonly))
				4922	return;
				4923
				4924	if (tsonly) {
				4925	#ifdef CONFIG_INET
				4926	if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
				4927	sk->sk_protocol == IPPROTO_TCP &&
				4928	sk->sk_type == SOCK_STREAM) {
				4929	skb = tcp_get_timestamping_opt_stats(sk);
				4930	opt_stats = true;
				4931	} else
				4932	#endif
				4933	skb = alloc_skb(0, GFP_ATOMIC);
				4934	} else {
				4935	skb = skb_clone(orig_skb, GFP_ATOMIC);
				4936
				4937	if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
				4938	kfree_skb(skb);
				4939	return;
				4940	}
				4941	}
				4942	if (!skb)
				4943	return;
				4944
				4945	if (tsonly) {
				4946	skb_shinfo(skb)->tx_flags \|= skb_shinfo(orig_skb)->tx_flags &
				4947	SKBTX_ANY_TSTAMP;
				4948	skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
				4949	}
				4950
				4951	if (hwtstamps)
				4952	skb_hwtstamps(skb) = hwtstamps;
				4953	else
				4954	skb->tstamp = ktime_get_real();
				4955
				4956	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
				4957	}
				4958	EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
				4959
				4960	void skb_tstamp_tx(struct sk_buff *orig_skb,
				4961	struct skb_shared_hwtstamps *hwtstamps)
				4962	{
				4963	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
				4964	SCM_TSTAMP_SND);
				4965	}
				4966	EXPORT_SYMBOL_GPL(skb_tstamp_tx);
				4967
				4968	void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
				4969	{
				4970	struct sock *sk = skb->sk;
				4971	struct sock_exterr_skb *serr;
				4972	int err = 1;
				4973
				4974	skb->wifi_acked_valid = 1;
				4975	skb->wifi_acked = acked;
				4976
				4977	serr = SKB_EXT_ERR(skb);
				4978	memset(serr, 0, sizeof(*serr));
				4979	serr->ee.ee_errno = ENOMSG;
				4980	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
				4981
				4982	/* Take a reference to prevent skb_orphan() from freeing the socket,
				4983	* but only if the socket refcount is not zero.
				4984	*/
				4985	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
				4986	err = sock_queue_err_skb(sk, skb);
				4987	sock_put(sk);
				4988	}
				4989	if (err)
				4990	kfree_skb(skb);
				4991	}
				4992	EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
				4993
				4994	/**
				4995	* skb_partial_csum_set - set up and verify partial csum values for packet
				4996	* @skb: the skb to set
				4997	* @start: the number of bytes after skb->data to start checksumming.
				4998	* @off: the offset from start to place the checksum.
				4999	*
				5000	* For untrusted partially-checksummed packets, we need to make sure the values
				5001	* for skb->csum_start and skb->csum_offset are valid so we don't oops.
				5002	*
				5003	* This function checks and sets those values and skb->ip_summed: if this
				5004	* returns false you should drop the packet.
				5005	*/
				5006	bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
				5007	{
				5008	u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
				5009	u32 csum_start = skb_headroom(skb) + (u32)start;
				5010
				5011	if (unlikely(csum_start > U16_MAX \|\| csum_end > skb_headlen(skb))) {
				5012	net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
				5013	start, off, skb_headroom(skb), skb_headlen(skb));
				5014	return false;
				5015	}
				5016	skb->ip_summed = CHECKSUM_PARTIAL;
				5017	skb->csum_start = csum_start;
				5018	skb->csum_offset = off;
				5019	skb_set_transport_header(skb, start);
				5020	return true;
				5021	}
				5022	EXPORT_SYMBOL_GPL(skb_partial_csum_set);
				5023
				5024	static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
				5025	unsigned int max)
				5026	{
				5027	if (skb_headlen(skb) >= len)
				5028	return 0;
				5029
				5030	/* If we need to pullup then pullup to the max, so we
				5031	* won't need to do it again.
				5032	*/
				5033	if (max > skb->len)
				5034	max = skb->len;
				5035
				5036	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
				5037	return -ENOMEM;
				5038
				5039	if (skb_headlen(skb) < len)
				5040	return -EPROTO;
				5041
				5042	return 0;
				5043	}
				5044
				5045	#define MAX_TCP_HDR_LEN (15 * 4)
				5046
				5047	static __sum16 skb_checksum_setup_ip(struct sk_buff skb,
				5048	typeof(IPPROTO_IP) proto,
				5049	unsigned int off)
				5050	{
				5051	switch (proto) {
				5052	int err;
				5053
				5054	case IPPROTO_TCP:
				5055	err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
				5056	off + MAX_TCP_HDR_LEN);
				5057	if (!err && !skb_partial_csum_set(skb, off,
				5058	offsetof(struct tcphdr,
				5059	check)))
				5060	err = -EPROTO;
				5061	return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
				5062
				5063	case IPPROTO_UDP:
				5064	err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
				5065	off + sizeof(struct udphdr));
				5066	if (!err && !skb_partial_csum_set(skb, off,
				5067	offsetof(struct udphdr,
				5068	check)))
				5069	err = -EPROTO;
				5070	return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
				5071	}
				5072
				5073	return ERR_PTR(-EPROTO);
				5074	}
				5075
				5076	/* This value should be large enough to cover a tagged ethernet header plus
				5077	* maximally sized IP and TCP or UDP headers.
				5078	*/
				5079	#define MAX_IP_HDR_LEN 128
				5080
				5081	static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
				5082	{
				5083	unsigned int off;
				5084	bool fragment;
				5085	__sum16 *csum;
				5086	int err;
				5087
				5088	fragment = false;
				5089
				5090	err = skb_maybe_pull_tail(skb,
				5091	sizeof(struct iphdr),
				5092	MAX_IP_HDR_LEN);
				5093	if (err < 0)
				5094	goto out;
				5095
				5096	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET \| IP_MF))
				5097	fragment = true;
				5098
				5099	off = ip_hdrlen(skb);
				5100
				5101	err = -EPROTO;
				5102
				5103	if (fragment)
				5104	goto out;
				5105
				5106	csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
				5107	if (IS_ERR(csum))
				5108	return PTR_ERR(csum);
				5109
				5110	if (recalculate)
				5111	*csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
				5112	ip_hdr(skb)->daddr,
				5113	skb->len - off,
				5114	ip_hdr(skb)->protocol, 0);
				5115	err = 0;
				5116
				5117	out:
				5118	return err;
				5119	}
				5120
				5121	/* This value should be large enough to cover a tagged ethernet header plus
				5122	* an IPv6 header, all options, and a maximal TCP or UDP header.
				5123	*/
				5124	#define MAX_IPV6_HDR_LEN 256
				5125
				5126	#define OPT_HDR(type, skb, off) \
				5127	(type *)(skb_network_header(skb) + (off))
				5128
				5129	static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
				5130	{
				5131	int err;
				5132	u8 nexthdr;
				5133	unsigned int off;
				5134	unsigned int len;
				5135	bool fragment;
				5136	bool done;
				5137	__sum16 *csum;
				5138
				5139	fragment = false;
				5140	done = false;
				5141
				5142	off = sizeof(struct ipv6hdr);
				5143
				5144	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
				5145	if (err < 0)
				5146	goto out;
				5147
				5148	nexthdr = ipv6_hdr(skb)->nexthdr;
				5149
				5150	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
				5151	while (off <= len && !done) {
				5152	switch (nexthdr) {
				5153	case IPPROTO_DSTOPTS:
				5154	case IPPROTO_HOPOPTS:
				5155	case IPPROTO_ROUTING: {
				5156	struct ipv6_opt_hdr *hp;
				5157
				5158	err = skb_maybe_pull_tail(skb,
				5159	off +
				5160	sizeof(struct ipv6_opt_hdr),
				5161	MAX_IPV6_HDR_LEN);
				5162	if (err < 0)
				5163	goto out;
				5164
				5165	hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
				5166	nexthdr = hp->nexthdr;
				5167	off += ipv6_optlen(hp);
				5168	break;
				5169	}
				5170	case IPPROTO_AH: {
				5171	struct ip_auth_hdr *hp;
				5172
				5173	err = skb_maybe_pull_tail(skb,
				5174	off +
				5175	sizeof(struct ip_auth_hdr),
				5176	MAX_IPV6_HDR_LEN);
				5177	if (err < 0)
				5178	goto out;
				5179
				5180	hp = OPT_HDR(struct ip_auth_hdr, skb, off);
				5181	nexthdr = hp->nexthdr;
				5182	off += ipv6_authlen(hp);
				5183	break;
				5184	}
				5185	case IPPROTO_FRAGMENT: {
				5186	struct frag_hdr *hp;
				5187
				5188	err = skb_maybe_pull_tail(skb,
				5189	off +
				5190	sizeof(struct frag_hdr),
				5191	MAX_IPV6_HDR_LEN);
				5192	if (err < 0)
				5193	goto out;
				5194
				5195	hp = OPT_HDR(struct frag_hdr, skb, off);
				5196
				5197	if (hp->frag_off & htons(IP6_OFFSET \| IP6_MF))
				5198	fragment = true;
				5199
				5200	nexthdr = hp->nexthdr;
				5201	off += sizeof(struct frag_hdr);
				5202	break;
				5203	}
				5204	default:
				5205	done = true;
				5206	break;
				5207	}
				5208	}
				5209
				5210	err = -EPROTO;
				5211
				5212	if (!done \|\| fragment)
				5213	goto out;
				5214
				5215	csum = skb_checksum_setup_ip(skb, nexthdr, off);
				5216	if (IS_ERR(csum))
				5217	return PTR_ERR(csum);
				5218
				5219	if (recalculate)
				5220	*csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
				5221	&ipv6_hdr(skb)->daddr,
				5222	skb->len - off, nexthdr, 0);
				5223	err = 0;
				5224
				5225	out:
				5226	return err;
				5227	}
				5228
				5229	/**
				5230	* skb_checksum_setup - set up partial checksum offset
				5231	* @skb: the skb to set up
				5232	* @recalculate: if true the pseudo-header checksum will be recalculated
				5233	*/
				5234	int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
				5235	{
				5236	int err;
				5237
				5238	switch (skb->protocol) {
				5239	case htons(ETH_P_IP):
				5240	err = skb_checksum_setup_ipv4(skb, recalculate);
				5241	break;
				5242
				5243	case htons(ETH_P_IPV6):
				5244	err = skb_checksum_setup_ipv6(skb, recalculate);
				5245	break;
				5246
				5247	default:
				5248	err = -EPROTO;
				5249	break;
				5250	}
				5251
				5252	return err;
				5253	}
				5254	EXPORT_SYMBOL(skb_checksum_setup);
				5255
				5256	/**
				5257	* skb_checksum_maybe_trim - maybe trims the given skb
				5258	* @skb: the skb to check
				5259	* @transport_len: the data length beyond the network header
				5260	*
				5261	* Checks whether the given skb has data beyond the given transport length.
				5262	* If so, returns a cloned skb trimmed to this transport length.
				5263	* Otherwise returns the provided skb. Returns NULL in error cases
				5264	* (e.g. transport_len exceeds skb length or out-of-memory).
				5265	*
				5266	* Caller needs to set the skb transport header and free any returned skb if it
				5267	* differs from the provided skb.
				5268	*/
				5269	static struct sk_buff skb_checksum_maybe_trim(struct sk_buff skb,
				5270	unsigned int transport_len)
				5271	{
				5272	struct sk_buff *skb_chk;
				5273	unsigned int len = skb_transport_offset(skb) + transport_len;
				5274	int ret;
				5275
				5276	if (skb->len < len)
				5277	return NULL;
				5278	else if (skb->len == len)
				5279	return skb;
				5280
				5281	skb_chk = skb_clone(skb, GFP_ATOMIC);
				5282	if (!skb_chk)
				5283	return NULL;
				5284
				5285	ret = pskb_trim_rcsum(skb_chk, len);
				5286	if (ret) {
				5287	kfree_skb(skb_chk);
				5288	return NULL;
				5289	}
				5290
				5291	return skb_chk;
				5292	}
				5293
				5294	/**
				5295	* skb_checksum_trimmed - validate checksum of an skb
				5296	* @skb: the skb to check
				5297	* @transport_len: the data length beyond the network header
				5298	* @skb_chkf: checksum function to use
				5299	*
				5300	* Applies the given checksum function skb_chkf to the provided skb.
				5301	* Returns a checked and maybe trimmed skb. Returns NULL on error.
				5302	*
				5303	* If the skb has data beyond the given transport length, then a
				5304	* trimmed & cloned skb is checked and returned.
				5305	*
				5306	* Caller needs to set the skb transport header and free any returned skb if it
				5307	* differs from the provided skb.
				5308	*/
				5309	struct sk_buff skb_checksum_trimmed(struct sk_buff skb,
				5310	unsigned int transport_len,
				5311	__sum16(skb_chkf)(struct sk_buff skb))
				5312	{
				5313	struct sk_buff *skb_chk;
				5314	unsigned int offset = skb_transport_offset(skb);
				5315	__sum16 ret;
				5316
				5317	skb_chk = skb_checksum_maybe_trim(skb, transport_len);
				5318	if (!skb_chk)
				5319	goto err;
				5320
				5321	if (!pskb_may_pull(skb_chk, offset))
				5322	goto err;
				5323
				5324	skb_pull_rcsum(skb_chk, offset);
				5325	ret = skb_chkf(skb_chk);
				5326	skb_push_rcsum(skb_chk, offset);
				5327
				5328	if (ret)
				5329	goto err;
				5330
				5331	return skb_chk;
				5332
				5333	err:
				5334	if (skb_chk && skb_chk != skb)
				5335	kfree_skb(skb_chk);
				5336
				5337	return NULL;
				5338
				5339	}
				5340	EXPORT_SYMBOL(skb_checksum_trimmed);
				5341
				5342	void __skb_warn_lro_forwarding(const struct sk_buff *skb)
				5343	{
				5344	net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
				5345	skb->dev->name);
				5346	}
				5347	EXPORT_SYMBOL(__skb_warn_lro_forwarding);
				5348
				5349	void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
				5350	{
				5351	if (head_stolen) {
				5352	skb_release_head_state(skb);
				5353	kmem_cache_free(skbuff_head_cache, skb);
				5354	} else {
				5355	__kfree_skb(skb);
				5356	}
				5357	}
				5358	EXPORT_SYMBOL(kfree_skb_partial);
				5359
				5360	/**
				5361	* skb_try_coalesce - try to merge skb to prior one
				5362	* @to: prior buffer
				5363	* @from: buffer to add
				5364	* @fragstolen: pointer to boolean
				5365	* @delta_truesize: how much more was allocated than was requested
				5366	*/
				5367	bool skb_try_coalesce(struct sk_buff to, struct sk_buff from,
				5368	bool fragstolen, int delta_truesize)
				5369	{
				5370	struct skb_shared_info to_shinfo, from_shinfo;
				5371	int i, delta, len = from->len;
				5372
				5373	*fragstolen = false;
				5374
				5375	if (skb_cloned(to))
				5376	return false;
				5377
				5378	if (len <= skb_tailroom(to)) {
				5379	if (len)
				5380	BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
				5381	*delta_truesize = 0;
				5382	return true;
				5383	}
				5384
				5385	to_shinfo = skb_shinfo(to);
				5386	from_shinfo = skb_shinfo(from);
				5387	if (to_shinfo->frag_list \|\| from_shinfo->frag_list)
				5388	return false;
				5389	if (skb_zcopy(to) \|\| skb_zcopy(from))
				5390	return false;
				5391	if(skb_shinfo_is_ptr(to) \|\| skb_shinfo_is_ptr(from))
				5392	return false;
				5393
				5394	if (skb_headlen(from) != 0) {
				5395	struct page *page;
				5396	unsigned int offset;
				5397
				5398	if (to_shinfo->nr_frags +
				5399	from_shinfo->nr_frags >= MAX_SKB_FRAGS)
				5400	return false;
				5401
				5402	if (skb_head_is_locked(from))
				5403	return false;
				5404
				5405	delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
				5406
				5407	page = virt_to_head_page(from->head);
				5408	offset = from->data - (unsigned char *)page_address(page);
				5409
				5410	skb_fill_page_desc(to, to_shinfo->nr_frags,
				5411	page, offset, skb_headlen(from));
				5412	*fragstolen = true;
				5413	} else {
				5414	if (to_shinfo->nr_frags +
				5415	from_shinfo->nr_frags > MAX_SKB_FRAGS)
				5416	return false;
				5417
				5418	delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
				5419	}
				5420
				5421	WARN_ON_ONCE(delta < len);
				5422
				5423	memcpy(to_shinfo->frags + to_shinfo->nr_frags,
				5424	from_shinfo->frags,
				5425	from_shinfo->nr_frags * sizeof(skb_frag_t));
				5426	to_shinfo->nr_frags += from_shinfo->nr_frags;
				5427
				5428	if (!skb_cloned(from))
				5429	from_shinfo->nr_frags = 0;
				5430
				5431	/* if the skb is not cloned this does nothing
				5432	* since we set nr_frags to 0.
				5433	*/
				5434	for (i = 0; i < from_shinfo->nr_frags; i++)
				5435	__skb_frag_ref(&from_shinfo->frags[i]);
				5436
				5437	to->truesize += delta;
				5438	to->len += len;
				5439	to->data_len += len;
				5440
				5441	*delta_truesize = delta;
				5442	return true;
				5443	}
				5444	EXPORT_SYMBOL(skb_try_coalesce);
				5445
				5446	/**
				5447	* skb_scrub_packet - scrub an skb
				5448	*
				5449	* @skb: buffer to clean
				5450	* @xnet: packet is crossing netns
				5451	*
				5452	* skb_scrub_packet can be used after encapsulating or decapsulting a packet
				5453	* into/from a tunnel. Some information have to be cleared during these
				5454	* operations.
				5455	* skb_scrub_packet can also be used to clean a skb before injecting it in
				5456	* another namespace (@xnet == true). We have to clear all information in the
				5457	* skb that could impact namespace isolation.
				5458	*/
				5459	void skb_scrub_packet(struct sk_buff *skb, bool xnet)
				5460	{
				5461	skb->pkt_type = PACKET_HOST;
				5462	skb->skb_iif = 0;
				5463	skb->ignore_df = 0;
				5464	skb_dst_drop(skb);
				5465	skb_ext_reset(skb);
				5466	nf_reset_ct(skb);
				5467	nf_reset_trace(skb);
				5468
				5469	#ifdef CONFIG_NET_SWITCHDEV
				5470	skb->offload_fwd_mark = 0;
				5471	skb->offload_l3_fwd_mark = 0;
				5472	#endif
				5473
				5474	if (!xnet)
				5475	return;
				5476
				5477	ipvs_reset(skb);
				5478	skb->mark = 0;
				5479	skb->tstamp = 0;
				5480	}
				5481	EXPORT_SYMBOL_GPL(skb_scrub_packet);
				5482
				5483	/**
				5484	* skb_gso_transport_seglen - Return length of individual segments of a gso packet
				5485	*
				5486	* @skb: GSO skb
				5487	*
				5488	* skb_gso_transport_seglen is used to determine the real size of the
				5489	* individual segments, including Layer4 headers (TCP/UDP).
				5490	*
				5491	* The MAC/L2 or network (IP, IPv6) headers are not accounted for.
				5492	*/
				5493	static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
				5494	{
				5495	const struct skb_shared_info *shinfo = skb_shinfo(skb);
				5496	unsigned int thlen = 0;
				5497
				5498	if (skb->encapsulation) {
				5499	thlen = skb_inner_transport_header(skb) -
				5500	skb_transport_header(skb);
				5501
				5502	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6)))
				5503	thlen += inner_tcp_hdrlen(skb);
				5504	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6))) {
				5505	thlen = tcp_hdrlen(skb);
				5506	} else if (unlikely(skb_is_gso_sctp(skb))) {
				5507	thlen = sizeof(struct sctphdr);
				5508	} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
				5509	thlen = sizeof(struct udphdr);
				5510	}
				5511	/* UFO sets gso_size to the size of the fragmentation
				5512	* payload, i.e. the size of the L4 (UDP) header is already
				5513	* accounted for.
				5514	*/
				5515	return thlen + shinfo->gso_size;
				5516	}
				5517
				5518	/**
				5519	* skb_gso_network_seglen - Return length of individual segments of a gso packet
				5520	*
				5521	* @skb: GSO skb
				5522	*
				5523	* skb_gso_network_seglen is used to determine the real size of the
				5524	* individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
				5525	*
				5526	* The MAC/L2 header is not accounted for.
				5527	*/
				5528	static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
				5529	{
				5530	unsigned int hdr_len = skb_transport_header(skb) -
				5531	skb_network_header(skb);
				5532
				5533	return hdr_len + skb_gso_transport_seglen(skb);
				5534	}
				5535
				5536	/**
				5537	* skb_gso_mac_seglen - Return length of individual segments of a gso packet
				5538	*
				5539	* @skb: GSO skb
				5540	*
				5541	* skb_gso_mac_seglen is used to determine the real size of the
				5542	* individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
				5543	* headers (TCP/UDP).
				5544	*/
				5545	static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
				5546	{
				5547	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
				5548
				5549	return hdr_len + skb_gso_transport_seglen(skb);
				5550	}
				5551
				5552	/**
				5553	* skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
				5554	*
				5555	* There are a couple of instances where we have a GSO skb, and we
				5556	* want to determine what size it would be after it is segmented.
				5557	*
				5558	* We might want to check:
				5559	* - L3+L4+payload size (e.g. IP forwarding)
				5560	* - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
				5561	*
				5562	* This is a helper to do that correctly considering GSO_BY_FRAGS.
				5563	*
				5564	* @skb: GSO skb
				5565	*
				5566	* @seg_len: The segmented length (from skb_gso_*_seglen). In the
				5567	* GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
				5568	*
				5569	* @max_len: The maximum permissible length.
				5570	*
				5571	* Returns true if the segmented length <= max length.
				5572	*/
				5573	static inline bool skb_gso_size_check(const struct sk_buff *skb,
				5574	unsigned int seg_len,
				5575	unsigned int max_len) {
				5576	const struct skb_shared_info *shinfo = skb_shinfo(skb);
				5577	const struct sk_buff *iter;
				5578
				5579	if (shinfo->gso_size != GSO_BY_FRAGS)
				5580	return seg_len <= max_len;
				5581
				5582	/* Undo this so we can re-use header sizes */
				5583	seg_len -= GSO_BY_FRAGS;
				5584
				5585	skb_walk_frags(skb, iter) {
				5586	if (seg_len + skb_headlen(iter) > max_len)
				5587	return false;
				5588	}
				5589
				5590	return true;
				5591	}
				5592
				5593	/**
				5594	* skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
				5595	*
				5596	* @skb: GSO skb
				5597	* @mtu: MTU to validate against
				5598	*
				5599	* skb_gso_validate_network_len validates if a given skb will fit a
				5600	* wanted MTU once split. It considers L3 headers, L4 headers, and the
				5601	* payload.
				5602	*/
				5603	bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
				5604	{
				5605	return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
				5606	}
				5607	EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
				5608
				5609	/**
				5610	* skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
				5611	*
				5612	* @skb: GSO skb
				5613	* @len: length to validate against
				5614	*
				5615	* skb_gso_validate_mac_len validates if a given skb will fit a wanted
				5616	* length once split, including L2, L3 and L4 headers and the payload.
				5617	*/
				5618	bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
				5619	{
				5620	return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
				5621	}
				5622	EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
				5623
				5624	static struct sk_buff skb_reorder_vlan_header(struct sk_buff skb)
				5625	{
				5626	int mac_len, meta_len;
				5627	void *meta;
				5628
				5629	if (skb_cow(skb, skb_headroom(skb)) < 0) {
				5630	kfree_skb(skb);
				5631	return NULL;
				5632	}
				5633
				5634	mac_len = skb->data - skb_mac_header(skb);
				5635	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
				5636	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
				5637	mac_len - VLAN_HLEN - ETH_TLEN);
				5638	}
				5639
				5640	meta_len = skb_metadata_len(skb);
				5641	if (meta_len) {
				5642	meta = skb_metadata_end(skb) - meta_len;
				5643	memmove(meta + VLAN_HLEN, meta, meta_len);
				5644	}
				5645
				5646	skb->mac_header += VLAN_HLEN;
				5647	return skb;
				5648	}
				5649
				5650	struct sk_buff skb_vlan_untag(struct sk_buff skb)
				5651	{
				5652	struct vlan_hdr *vhdr;
				5653	u16 vlan_tci;
				5654
				5655	if (unlikely(skb_vlan_tag_present(skb))) {
				5656	/* vlan_tci is already set-up so leave this for another time */
				5657	return skb;
				5658	}
				5659
				5660	skb = skb_share_check(skb, GFP_ATOMIC);
				5661	if (unlikely(!skb))
				5662	goto err_free;
				5663	/* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
				5664	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
				5665	goto err_free;
				5666
				5667	vhdr = (struct vlan_hdr *)skb->data;
				5668	vlan_tci = ntohs(vhdr->h_vlan_TCI);
				5669	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
				5670
				5671	skb_pull_rcsum(skb, VLAN_HLEN);
				5672	vlan_set_encap_proto(skb, vhdr);
				5673
				5674	skb = skb_reorder_vlan_header(skb);
				5675	if (unlikely(!skb))
				5676	goto err_free;
				5677
				5678	skb_reset_network_header(skb);
				5679	skb_reset_transport_header(skb);
				5680	skb_reset_mac_len(skb);
				5681
				5682	return skb;
				5683
				5684	err_free:
				5685	kfree_skb(skb);
				5686	return NULL;
				5687	}
				5688	EXPORT_SYMBOL(skb_vlan_untag);
				5689
				5690	int skb_ensure_writable(struct sk_buff *skb, int write_len)
				5691	{
				5692	if (!pskb_may_pull(skb, write_len))
				5693	return -ENOMEM;
				5694
				5695	if (!skb_cloned(skb) \|\| skb_clone_writable(skb, write_len))
				5696	return 0;
				5697
				5698	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
				5699	}
				5700	EXPORT_SYMBOL(skb_ensure_writable);
				5701
				5702	/* remove VLAN header from packet and update csum accordingly.
				5703	* expects a non skb_vlan_tag_present skb with a vlan tag payload
				5704	*/
				5705	int __skb_vlan_pop(struct sk_buff skb, u16 vlan_tci)
				5706	{
				5707	struct vlan_hdr *vhdr;
				5708	int offset = skb->data - skb_mac_header(skb);
				5709	int err;
				5710
				5711	if (WARN_ONCE(offset,
				5712	"__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
				5713	offset)) {
				5714	return -EINVAL;
				5715	}
				5716
				5717	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
				5718	if (unlikely(err))
				5719	return err;
				5720
				5721	skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
				5722
				5723	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
				5724	*vlan_tci = ntohs(vhdr->h_vlan_TCI);
				5725
				5726	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
				5727	__skb_pull(skb, VLAN_HLEN);
				5728
				5729	vlan_set_encap_proto(skb, vhdr);
				5730	skb->mac_header += VLAN_HLEN;
				5731
				5732	if (skb_network_offset(skb) < ETH_HLEN)
				5733	skb_set_network_header(skb, ETH_HLEN);
				5734
				5735	skb_reset_mac_len(skb);
				5736
				5737	return err;
				5738	}
				5739	EXPORT_SYMBOL(__skb_vlan_pop);
				5740
				5741	/* Pop a vlan tag either from hwaccel or from payload.
				5742	* Expects skb->data at mac header.
				5743	*/
				5744	int skb_vlan_pop(struct sk_buff *skb)
				5745	{
				5746	u16 vlan_tci;
				5747	__be16 vlan_proto;
				5748	int err;
				5749
				5750	if (likely(skb_vlan_tag_present(skb))) {
				5751	__vlan_hwaccel_clear_tag(skb);
				5752	} else {
				5753	if (unlikely(!eth_type_vlan(skb->protocol)))
				5754	return 0;
				5755
				5756	err = __skb_vlan_pop(skb, &vlan_tci);
				5757	if (err)
				5758	return err;
				5759	}
				5760	/* move next vlan tag to hw accel tag */
				5761	if (likely(!eth_type_vlan(skb->protocol)))
				5762	return 0;
				5763
				5764	vlan_proto = skb->protocol;
				5765	err = __skb_vlan_pop(skb, &vlan_tci);
				5766	if (unlikely(err))
				5767	return err;
				5768
				5769	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
				5770	return 0;
				5771	}
				5772	EXPORT_SYMBOL(skb_vlan_pop);
				5773
				5774	/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
				5775	* Expects skb->data at mac header.
				5776	*/
				5777	int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
				5778	{
				5779	if (skb_vlan_tag_present(skb)) {
				5780	int offset = skb->data - skb_mac_header(skb);
				5781	int err;
				5782
				5783	if (WARN_ONCE(offset,
				5784	"skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
				5785	offset)) {
				5786	return -EINVAL;
				5787	}
				5788
				5789	err = __vlan_insert_tag(skb, skb->vlan_proto,
				5790	skb_vlan_tag_get(skb));
				5791	if (err)
				5792	return err;
				5793
				5794	skb->protocol = skb->vlan_proto;
				5795	skb->mac_len += VLAN_HLEN;
				5796
				5797	skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
				5798	}
				5799	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
				5800	return 0;
				5801	}
				5802	EXPORT_SYMBOL(skb_vlan_push);
				5803
				5804	/* Update the ethertype of hdr and the skb csum value if required. */
				5805	static void skb_mod_eth_type(struct sk_buff skb, struct ethhdr hdr,
				5806	__be16 ethertype)
				5807	{
				5808	if (skb->ip_summed == CHECKSUM_COMPLETE) {
				5809	__be16 diff[] = { ~hdr->h_proto, ethertype };
				5810
				5811	skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
				5812	}
				5813
				5814	hdr->h_proto = ethertype;
				5815	}
				5816
				5817	/**
				5818	* skb_mpls_push() - push a new MPLS header after the mac header
				5819	*
				5820	* @skb: buffer
				5821	* @mpls_lse: MPLS label stack entry to push
				5822	* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
				5823	* @mac_len: length of the MAC header
				5824	*
				5825	* Expects skb->data at mac header.
				5826	*
				5827	* Returns 0 on success, -errno otherwise.
				5828	*/
				5829	int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
				5830	int mac_len, bool ethernet)
				5831	{
				5832	struct mpls_shim_hdr *lse;
				5833	int err;
				5834
				5835	if (unlikely(!eth_p_mpls(mpls_proto)))
				5836	return -EINVAL;
				5837
				5838	/* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
				5839	if (skb->encapsulation)
				5840	return -EINVAL;
				5841
				5842	err = skb_cow_head(skb, MPLS_HLEN);
				5843	if (unlikely(err))
				5844	return err;
				5845
				5846	if (!skb->inner_protocol) {
				5847	skb_set_inner_network_header(skb, mac_len);
				5848	skb_set_inner_protocol(skb, skb->protocol);
				5849	}
				5850
				5851	skb_push(skb, MPLS_HLEN);
				5852	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
				5853	mac_len);
				5854	skb_reset_mac_header(skb);
				5855	skb_set_network_header(skb, mac_len);
				5856
				5857	lse = mpls_hdr(skb);
				5858	lse->label_stack_entry = mpls_lse;
				5859	skb_postpush_rcsum(skb, lse, MPLS_HLEN);
				5860
				5861	if (ethernet && mac_len >= ETH_HLEN)
				5862	skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
				5863	skb->protocol = mpls_proto;
				5864
				5865	return 0;
				5866	}
				5867	EXPORT_SYMBOL_GPL(skb_mpls_push);
				5868
				5869	/**
				5870	* skb_mpls_pop() - pop the outermost MPLS header
				5871	*
				5872	* @skb: buffer
				5873	* @next_proto: ethertype of header after popped MPLS header
				5874	* @mac_len: length of the MAC header
				5875	* @ethernet: flag to indicate if ethernet header is present in packet
				5876	*
				5877	* Expects skb->data at mac header.
				5878	*
				5879	* Returns 0 on success, -errno otherwise.
				5880	*/
				5881	int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
				5882	bool ethernet)
				5883	{
				5884	int err;
				5885
				5886	if (unlikely(!eth_p_mpls(skb->protocol)))
				5887	return 0;
				5888
				5889	err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
				5890	if (unlikely(err))
				5891	return err;
				5892
				5893	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
				5894	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
				5895	mac_len);
				5896
				5897	__skb_pull(skb, MPLS_HLEN);
				5898	skb_reset_mac_header(skb);
				5899	skb_set_network_header(skb, mac_len);
				5900
				5901	if (ethernet && mac_len >= ETH_HLEN) {
				5902	struct ethhdr *hdr;
				5903
				5904	/* use mpls_hdr() to get ethertype to account for VLANs. */
				5905	hdr = (struct ethhdr )((void )mpls_hdr(skb) - ETH_HLEN);
				5906	skb_mod_eth_type(skb, hdr, next_proto);
				5907	}
				5908	skb->protocol = next_proto;
				5909
				5910	return 0;
				5911	}
				5912	EXPORT_SYMBOL_GPL(skb_mpls_pop);
				5913
				5914	/**
				5915	* skb_mpls_update_lse() - modify outermost MPLS header and update csum
				5916	*
				5917	* @skb: buffer
				5918	* @mpls_lse: new MPLS label stack entry to update to
				5919	*
				5920	* Expects skb->data at mac header.
				5921	*
				5922	* Returns 0 on success, -errno otherwise.
				5923	*/
				5924	int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
				5925	{
				5926	int err;
				5927
				5928	if (unlikely(!eth_p_mpls(skb->protocol)))
				5929	return -EINVAL;
				5930
				5931	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
				5932	if (unlikely(err))
				5933	return err;
				5934
				5935	if (skb->ip_summed == CHECKSUM_COMPLETE) {
				5936	__be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
				5937
				5938	skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
				5939	}
				5940
				5941	mpls_hdr(skb)->label_stack_entry = mpls_lse;
				5942
				5943	return 0;
				5944	}
				5945	EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
				5946
				5947	/**
				5948	* skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
				5949	*
				5950	* @skb: buffer
				5951	*
				5952	* Expects skb->data at mac header.
				5953	*
				5954	* Returns 0 on success, -errno otherwise.
				5955	*/
				5956	int skb_mpls_dec_ttl(struct sk_buff *skb)
				5957	{
				5958	u32 lse;
				5959	u8 ttl;
				5960
				5961	if (unlikely(!eth_p_mpls(skb->protocol)))
				5962	return -EINVAL;
				5963
				5964	if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
				5965	return -ENOMEM;
				5966
				5967	lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
				5968	ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
				5969	if (!--ttl)
				5970	return -EINVAL;
				5971
				5972	lse &= ~MPLS_LS_TTL_MASK;
				5973	lse \|= ttl << MPLS_LS_TTL_SHIFT;
				5974
				5975	return skb_mpls_update_lse(skb, cpu_to_be32(lse));
				5976	}
				5977	EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
				5978
				5979	/**
				5980	* alloc_skb_with_frags - allocate skb with page frags
				5981	*
				5982	* @header_len: size of linear part
				5983	* @data_len: needed length in frags
				5984	* @max_page_order: max page order desired.
				5985	* @errcode: pointer to error code if any
				5986	* @gfp_mask: allocation mask
				5987	*
				5988	* This can be used to allocate a paged skb, given a maximal order for frags.
				5989	*/
				5990	struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
				5991	unsigned long data_len,
				5992	int max_page_order,
				5993	int *errcode,
				5994	gfp_t gfp_mask)
				5995	{
				5996	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
				5997	unsigned long chunk;
				5998	struct sk_buff *skb;
				5999	struct page *page;
				6000	int i;
				6001
				6002	*errcode = -EMSGSIZE;
				6003	/* Note this test could be relaxed, if we succeed to allocate
				6004	* high order pages...
				6005	*/
				6006	if (npages > MAX_SKB_FRAGS)
				6007	return NULL;
				6008
				6009	*errcode = -ENOBUFS;
				6010	skb = alloc_skb(header_len, gfp_mask);
				6011	if (!skb)
				6012	return NULL;
				6013
				6014	skb->truesize += npages << PAGE_SHIFT;
				6015
				6016	for (i = 0; npages > 0; i++) {
				6017	int order = max_page_order;
				6018
				6019	while (order) {
				6020	if (npages >= 1 << order) {
				6021	page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) \|
				6022	__GFP_COMP \|
				6023	__GFP_NOWARN,
				6024	order);
				6025	if (page)
				6026	goto fill_page;
				6027	/* Do not retry other high order allocations */
				6028	order = 1;
				6029	max_page_order = 0;
				6030	}
				6031	order--;
				6032	}
				6033	page = alloc_page(gfp_mask);
				6034	if (!page)
				6035	goto failure;
				6036	fill_page:
				6037	chunk = min_t(unsigned long, data_len,
				6038	PAGE_SIZE << order);
				6039	skb_fill_page_desc(skb, i, page, 0, chunk);
				6040	data_len -= chunk;
				6041	npages -= 1 << order;
				6042	}
				6043	return skb;
				6044
				6045	failure:
				6046	kfree_skb(skb);
				6047	return NULL;
				6048	}
				6049	EXPORT_SYMBOL(alloc_skb_with_frags);
				6050
				6051	/* carve out the first off bytes from skb when off < headlen */
				6052	static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
				6053	const int headlen, gfp_t gfp_mask)
				6054	{
				6055	int i;
				6056	int size = skb_end_offset(skb);
				6057	int new_hlen = headlen - off;
				6058	u8 *data;
				6059
				6060	size = SKB_DATA_ALIGN(size);
				6061
				6062	if (skb_pfmemalloc(skb))
				6063	gfp_mask \|= __GFP_MEMALLOC;
				6064	data = kmalloc_reserve(size +
				6065	SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
				6066	gfp_mask, NUMA_NO_NODE, NULL);
				6067	if (!data)
				6068	return -ENOMEM;
				6069
				6070	size = SKB_WITH_OVERHEAD(ksize(data));
				6071
				6072	/* Copy real data, and all frags */
				6073	skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
				6074	skb->len -= off;
				6075
				6076	memcpy((struct skb_shared_info *)(data + size),
				6077	skb_shinfo(skb),
				6078	offsetof(struct skb_shared_info,
				6079	frags[skb_shinfo(skb)->nr_frags]));
				6080	if (skb_cloned(skb)) {
				6081	/* drop the old head gracefully */
				6082	if (skb_orphan_frags(skb, gfp_mask)) {
				6083	kfree(data);
				6084	return -ENOMEM;
				6085	}
				6086	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
				6087	skb_frag_ref(skb, i);
				6088	if (skb_has_frag_list(skb))
				6089	skb_clone_fraglist(skb);
				6090	skb_release_data(skb);
				6091	} else {
				6092	/* we can reuse existing recount- all we did was
				6093	* relocate values
				6094	*/
				6095	skb_free_head(skb);
				6096	}
				6097
				6098	skb->head = data;
				6099	skb->data = data;
				6100	skb->head_frag = 0;
				6101	#ifdef NET_SKBUFF_DATA_USES_OFFSET
				6102	skb->end = size;
				6103	#else
				6104	skb->end = skb->head + size;
				6105	#endif
				6106	skb_set_tail_pointer(skb, skb_headlen(skb));
				6107	skb_headers_offset_update(skb, 0);
				6108	skb->cloned = 0;
				6109	skb->hdr_len = 0;
				6110	skb->nohdr = 0;
				6111	skb->shared_info_ptr = NULL;
				6112	atomic_set(&skb_shinfo(skb)->dataref, 1);
				6113
				6114	return 0;
				6115	}
				6116
				6117	static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
				6118
				6119	/* carve out the first eat bytes from skb's frag_list. May recurse into
				6120	* pskb_carve()
				6121	*/
				6122	static int pskb_carve_frag_list(struct sk_buff *skb,
				6123	struct skb_shared_info *shinfo, int eat,
				6124	gfp_t gfp_mask)
				6125	{
				6126	struct sk_buff *list = shinfo->frag_list;
				6127	struct sk_buff *clone = NULL;
				6128	struct sk_buff *insp = NULL;
				6129
				6130	do {
				6131	if (!list) {
				6132	pr_err("Not enough bytes to eat. Want %d\n", eat);
				6133	return -EFAULT;
				6134	}
				6135	if (list->len <= eat) {
				6136	/* Eaten as whole. */
				6137	eat -= list->len;
				6138	list = list->next;
				6139	insp = list;
				6140	} else {
				6141	/* Eaten partially. */
				6142	if (skb_shared(list)) {
				6143	clone = skb_clone(list, gfp_mask);
				6144	if (!clone)
				6145	return -ENOMEM;
				6146	insp = list->next;
				6147	list = clone;
				6148	} else {
				6149	/* This may be pulled without problems. */
				6150	insp = list;
				6151	}
				6152	if (pskb_carve(list, eat, gfp_mask) < 0) {
				6153	kfree_skb(clone);
				6154	return -ENOMEM;
				6155	}
				6156	break;
				6157	}
				6158	} while (eat);
				6159
				6160	/* Free pulled out fragments. */
				6161	while ((list = shinfo->frag_list) != insp) {
				6162	shinfo->frag_list = list->next;
				6163	consume_skb(list);
				6164	}
				6165	/* And insert new clone at head. */
				6166	if (clone) {
				6167	clone->next = list;
				6168	shinfo->frag_list = clone;
				6169	}
				6170	return 0;
				6171	}
				6172
				6173	/* carve off first len bytes from skb. Split line (off) is in the
				6174	* non-linear part of skb
				6175	*/
				6176	static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
				6177	int pos, gfp_t gfp_mask)
				6178	{
				6179	int i, k = 0;
				6180	int size = skb_end_offset(skb);
				6181	u8 *data;
				6182	const int nfrags = skb_shinfo(skb)->nr_frags;
				6183	struct skb_shared_info *shinfo;
				6184
				6185	size = SKB_DATA_ALIGN(size);
				6186
				6187	if (skb_pfmemalloc(skb))
				6188	gfp_mask \|= __GFP_MEMALLOC;
				6189	data = kmalloc_reserve(size +
				6190	SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
				6191	gfp_mask, NUMA_NO_NODE, NULL);
				6192	if (!data)
				6193	return -ENOMEM;
				6194
				6195	size = SKB_WITH_OVERHEAD(ksize(data));
				6196
				6197	memcpy((struct skb_shared_info *)(data + size),
				6198	skb_shinfo(skb), offsetof(struct skb_shared_info,
				6199	frags[skb_shinfo(skb)->nr_frags]));
				6200	if (skb_orphan_frags(skb, gfp_mask)) {
				6201	kfree(data);
				6202	return -ENOMEM;
				6203	}
				6204	shinfo = (struct skb_shared_info *)(data + size);
				6205	for (i = 0; i < nfrags; i++) {
				6206	int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
				6207
				6208	if (pos + fsize > off) {
				6209	shinfo->frags[k] = skb_shinfo(skb)->frags[i];
				6210
				6211	if (pos < off) {
				6212	/* Split frag.
				6213	* We have two variants in this case:
				6214	* 1. Move all the frag to the second
				6215	* part, if it is possible. F.e.
				6216	* this approach is mandatory for TUX,
				6217	* where splitting is expensive.
				6218	* 2. Split is accurately. We make this.
				6219	*/
				6220	skb_frag_off_add(&shinfo->frags[0], off - pos);
				6221	skb_frag_size_sub(&shinfo->frags[0], off - pos);
				6222	}
				6223	skb_frag_ref(skb, i);
				6224	k++;
				6225	}
				6226	pos += fsize;
				6227	}
				6228	shinfo->nr_frags = k;
				6229	if (skb_has_frag_list(skb))
				6230	skb_clone_fraglist(skb);
				6231
				6232	/* split line is in frag list */
				6233	if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
				6234	/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
				6235	if (skb_has_frag_list(skb))
				6236	kfree_skb_list(skb_shinfo(skb)->frag_list);
				6237	kfree(data);
				6238	return -ENOMEM;
				6239	}
				6240	skb_release_data(skb);
				6241
				6242	skb->head = data;
				6243	skb->head_frag = 0;
				6244	skb->data = data;
				6245	#ifdef NET_SKBUFF_DATA_USES_OFFSET
				6246	skb->end = size;
				6247	#else
				6248	skb->end = skb->head + size;
				6249	#endif
				6250	skb_reset_tail_pointer(skb);
				6251	skb_headers_offset_update(skb, 0);
				6252	skb->cloned = 0;
				6253	skb->hdr_len = 0;
				6254	skb->nohdr = 0;
				6255	skb->len -= off;
				6256	skb->data_len = skb->len;
				6257	skb->shared_info_ptr = NULL;
				6258	atomic_set(&skb_shinfo(skb)->dataref, 1);
				6259	return 0;
				6260	}
				6261
				6262	/* remove len bytes from the beginning of the skb */
				6263	static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
				6264	{
				6265	int headlen = skb_headlen(skb);
				6266
				6267	if (len < headlen)
				6268	return pskb_carve_inside_header(skb, len, headlen, gfp);
				6269	else
				6270	return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
				6271	}
				6272
				6273	/* Extract to_copy bytes starting at off from skb, and return this in
				6274	* a new skb
				6275	*/
				6276	struct sk_buff pskb_extract(struct sk_buff skb, int off,
				6277	int to_copy, gfp_t gfp)
				6278	{
				6279	struct sk_buff *clone = skb_clone(skb, gfp);
				6280
				6281	if (!clone)
				6282	return NULL;
				6283
				6284	if (pskb_carve(clone, off, gfp) < 0 \|\|
				6285	pskb_trim(clone, to_copy)) {
				6286	kfree_skb(clone);
				6287	return NULL;
				6288	}
				6289	return clone;
				6290	}
				6291	EXPORT_SYMBOL(pskb_extract);
				6292
				6293	/**
				6294	* skb_condense - try to get rid of fragments/frag_list if possible
				6295	* @skb: buffer
				6296	*
				6297	* Can be used to save memory before skb is added to a busy queue.
				6298	* If packet has bytes in frags and enough tail room in skb->head,
				6299	* pull all of them, so that we can free the frags right now and adjust
				6300	* truesize.
				6301	* Notes:
				6302	* We do not reallocate skb->head thus can not fail.
				6303	* Caller must re-evaluate skb->truesize if needed.
				6304	*/
				6305	void skb_condense(struct sk_buff *skb)
				6306	{
				6307	if (skb->data_len) {
				6308	if (skb->data_len > skb->end - skb->tail \|\|
				6309	skb_cloned(skb))
				6310	return;
				6311
				6312	/* Nice, we can free page frag(s) right now */
				6313	__pskb_pull_tail(skb, skb->data_len);
				6314	}
				6315	/* At this point, skb->truesize might be over estimated,
				6316	* because skb had a fragment, and fragments do not tell
				6317	* their truesize.
				6318	* When we pulled its content into skb->head, fragment
				6319	* was freed, but __pskb_pull_tail() could not possibly
				6320	* adjust skb->truesize, not knowing the frag truesize.
				6321	*/
				6322	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
				6323	}
				6324
				6325	#ifdef CONFIG_SKB_EXTENSIONS
				6326	static void skb_ext_get_ptr(struct skb_ext ext, enum skb_ext_id id)
				6327	{
				6328	return (void )ext + (ext->offset[id] SKB_EXT_ALIGN_VALUE);
				6329	}
				6330
				6331	static struct skb_ext *skb_ext_alloc(void)
				6332	{
				6333	struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
				6334
				6335	if (new) {
				6336	memset(new->offset, 0, sizeof(new->offset));
				6337	refcount_set(&new->refcnt, 1);
				6338	}
				6339
				6340	return new;
				6341	}
				6342
				6343	static struct skb_ext skb_ext_maybe_cow(struct skb_ext old,
				6344	unsigned int old_active)
				6345	{
				6346	struct skb_ext *new;
				6347
				6348	if (refcount_read(&old->refcnt) == 1)
				6349	return old;
				6350
				6351	new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
				6352	if (!new)
				6353	return NULL;
				6354
				6355	memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
				6356	refcount_set(&new->refcnt, 1);
				6357
				6358	#ifdef CONFIG_XFRM
				6359	if (old_active & (1 << SKB_EXT_SEC_PATH)) {
				6360	struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
				6361	unsigned int i;
				6362
				6363	for (i = 0; i < sp->len; i++)
				6364	xfrm_state_hold(sp->xvec[i]);
				6365	}
				6366	#endif
				6367	__skb_ext_put(old);
				6368	return new;
				6369	}
				6370
				6371	/**
				6372	* skb_ext_add - allocate space for given extension, COW if needed
				6373	* @skb: buffer
				6374	* @id: extension to allocate space for
				6375	*
				6376	* Allocates enough space for the given extension.
				6377	* If the extension is already present, a pointer to that extension
				6378	* is returned.
				6379	*
				6380	* If the skb was cloned, COW applies and the returned memory can be
				6381	* modified without changing the extension space of clones buffers.
				6382	*
				6383	* Returns pointer to the extension or NULL on allocation failure.
				6384	*/
				6385	void skb_ext_add(struct sk_buff skb, enum skb_ext_id id)
				6386	{
				6387	struct skb_ext new, old = NULL;
				6388	unsigned int newlen, newoff;
				6389
				6390	if (skb->active_extensions) {
				6391	old = skb->extensions;
				6392
				6393	new = skb_ext_maybe_cow(old, skb->active_extensions);
				6394	if (!new)
				6395	return NULL;
				6396
				6397	if (__skb_ext_exist(new, id))
				6398	goto set_active;
				6399
				6400	newoff = new->chunks;
				6401	} else {
				6402	newoff = SKB_EXT_CHUNKSIZEOF(*new);
				6403
				6404	new = skb_ext_alloc();
				6405	if (!new)
				6406	return NULL;
				6407	}
				6408
				6409	newlen = newoff + skb_ext_type_len[id];
				6410	new->chunks = newlen;
				6411	new->offset[id] = newoff;
				6412	set_active:
				6413	skb->extensions = new;
				6414	skb->active_extensions \|= 1 << id;
				6415	return skb_ext_get_ptr(new, id);
				6416	}
				6417	EXPORT_SYMBOL(skb_ext_add);
				6418
				6419	#ifdef CONFIG_XFRM
				6420	static void skb_ext_put_sp(struct sec_path *sp)
				6421	{
				6422	unsigned int i;
				6423
				6424	for (i = 0; i < sp->len; i++)
				6425	xfrm_state_put(sp->xvec[i]);
				6426	}
				6427	#endif
				6428
				6429	void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
				6430	{
				6431	struct skb_ext *ext = skb->extensions;
				6432
				6433	skb->active_extensions &= ~(1 << id);
				6434	if (skb->active_extensions == 0) {
				6435	skb->extensions = NULL;
				6436	__skb_ext_put(ext);
				6437	#ifdef CONFIG_XFRM
				6438	} else if (id == SKB_EXT_SEC_PATH &&
				6439	refcount_read(&ext->refcnt) == 1) {
				6440	struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
				6441
				6442	skb_ext_put_sp(sp);
				6443	sp->len = 0;
				6444	#endif
				6445	}
				6446	}
				6447	EXPORT_SYMBOL(__skb_ext_del);
				6448
				6449	void __skb_ext_put(struct skb_ext *ext)
				6450	{
				6451	/* If this is last clone, nothing can increment
				6452	* it after check passes. Avoids one atomic op.
				6453	*/
				6454	if (refcount_read(&ext->refcnt) == 1)
				6455	goto free_now;
				6456
				6457	if (!refcount_dec_and_test(&ext->refcnt))
				6458	return;
				6459	free_now:
				6460	#ifdef CONFIG_XFRM
				6461	if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
				6462	skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
				6463	#endif
				6464
				6465	kmem_cache_free(skbuff_ext_cache, ext);
				6466	}
				6467	EXPORT_SYMBOL(__skb_ext_put);
				6468	#endif /* CONFIG_SKB_EXTENSIONS */