Blame - marvell/linux/drivers/net/xen-netback/netback.c - T108

blob: 03ef772d02fdb10416050af1097f43675c3a3138 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Back-end of the driver for virtual network devices. This portion of the
				3	* driver exports a 'unified' network-device interface that can be accessed
				4	* by any operating system that implements a compatible front end. A
				5	* reference front-end implementation can be found in:
				6	* drivers/net/xen-netfront.c
				7	*
				8	* Copyright (c) 2002-2005, K A Fraser
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public License version 2
				12	* as published by the Free Software Foundation; or, when distributed
				13	* separately from the Linux kernel or incorporated into other
				14	* software packages, subject to the following license:
				15	*
				16	* Permission is hereby granted, free of charge, to any person obtaining a copy
				17	* of this source file (the "Software"), to deal in the Software without
				18	* restriction, including without limitation the rights to use, copy, modify,
				19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
				20	* and to permit persons to whom the Software is furnished to do so, subject to
				21	* the following conditions:
				22	*
				23	* The above copyright notice and this permission notice shall be included in
				24	* all copies or substantial portions of the Software.
				25	*
				26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				32	* IN THE SOFTWARE.
				33	*/
				34
				35	#include "common.h"
				36
				37	#include <linux/kthread.h>
				38	#include <linux/if_vlan.h>
				39	#include <linux/udp.h>
				40	#include <linux/highmem.h>
				41
				42	#include <net/tcp.h>
				43
				44	#include <xen/xen.h>
				45	#include <xen/events.h>
				46	#include <xen/interface/memory.h>
				47	#include <xen/page.h>
				48
				49	#include <asm/xen/hypercall.h>
				50
				51	/* Provide an option to disable split event channels at load time as
				52	* event channels are limited resource. Split event channels are
				53	* enabled by default.
				54	*/
				55	bool separate_tx_rx_irq = true;
				56	module_param(separate_tx_rx_irq, bool, 0644);
				57
				58	/* The time that packets can stay on the guest Rx internal queue
				59	* before they are dropped.
				60	*/
				61	unsigned int rx_drain_timeout_msecs = 10000;
				62	module_param(rx_drain_timeout_msecs, uint, 0444);
				63
				64	/* The length of time before the frontend is considered unresponsive
				65	* because it isn't providing Rx slots.
				66	*/
				67	unsigned int rx_stall_timeout_msecs = 60000;
				68	module_param(rx_stall_timeout_msecs, uint, 0444);
				69
				70	#define MAX_QUEUES_DEFAULT 8
				71	unsigned int xenvif_max_queues;
				72	module_param_named(max_queues, xenvif_max_queues, uint, 0644);
				73	MODULE_PARM_DESC(max_queues,
				74	"Maximum number of queues per virtual interface");
				75
				76	/*
				77	* This is the maximum slots a skb can have. If a guest sends a skb
				78	* which exceeds this limit it is considered malicious.
				79	*/
				80	#define FATAL_SKB_SLOTS_DEFAULT 20
				81	static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
				82	module_param(fatal_skb_slots, uint, 0444);
				83
				84	/* The amount to copy out of the first guest Tx slot into the skb's
				85	* linear area. If the first slot has more data, it will be mapped
				86	* and put into the first frag.
				87	*
				88	* This is sized to avoid pulling headers from the frags for most
				89	* TCP/IP packets.
				90	*/
				91	#define XEN_NETBACK_TX_COPY_LEN 128
				92
				93	/* This is the maximum number of flows in the hash cache. */
				94	#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
				95	unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
				96	module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
				97	MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
				98
				99	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				100	s8 status);
				101
				102	static void make_tx_response(struct xenvif_queue *queue,
				103	const struct xen_netif_tx_request *txp,
				104	unsigned int extra_count,
				105	s8 status);
				106
				107	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
				108
				109	static inline int tx_work_todo(struct xenvif_queue *queue);
				110
				111	static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
				112	u16 idx)
				113	{
				114	return page_to_pfn(queue->mmap_pages[idx]);
				115	}
				116
				117	static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
				118	u16 idx)
				119	{
				120	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
				121	}
				122
				123	#define callback_param(vif, pending_idx) \
				124	(vif->pending_tx_info[pending_idx].callback_struct)
				125
				126	/* Find the containing VIF's structure from a pointer in pending_tx_info array
				127	*/
				128	static inline struct xenvif_queue ubuf_to_queue(const struct ubuf_info ubuf)
				129	{
				130	u16 pending_idx = ubuf->desc;
				131	struct pending_tx_info *temp =
				132	container_of(ubuf, struct pending_tx_info, callback_struct);
				133	return container_of(temp - pending_idx,
				134	struct xenvif_queue,
				135	pending_tx_info[0]);
				136	}
				137
				138	static u16 frag_get_pending_idx(skb_frag_t *frag)
				139	{
				140	return (u16)skb_frag_off(frag);
				141	}
				142
				143	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
				144	{
				145	skb_frag_off_set(frag, pending_idx);
				146	}
				147
				148	static inline pending_ring_idx_t pending_index(unsigned i)
				149	{
				150	return i & (MAX_PENDING_REQS-1);
				151	}
				152
				153	void xenvif_kick_thread(struct xenvif_queue *queue)
				154	{
				155	wake_up(&queue->wq);
				156	}
				157
				158	void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
				159	{
				160	int more_to_do;
				161
				162	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
				163
				164	if (more_to_do)
				165	napi_schedule(&queue->napi);
				166	else if (atomic_fetch_andnot(NETBK_TX_EOI \| NETBK_COMMON_EOI,
				167	&queue->eoi_pending) &
				168	(NETBK_TX_EOI \| NETBK_COMMON_EOI))
				169	xen_irq_lateeoi(queue->tx_irq, 0);
				170	}
				171
				172	static void tx_add_credit(struct xenvif_queue *queue)
				173	{
				174	unsigned long max_burst, max_credit;
				175
				176	/*
				177	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
				178	* Otherwise the interface can seize up due to insufficient credit.
				179	*/
				180	max_burst = max(131072UL, queue->credit_bytes);
				181
				182	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
				183	max_credit = queue->remaining_credit + queue->credit_bytes;
				184	if (max_credit < queue->remaining_credit)
				185	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
				186
				187	queue->remaining_credit = min(max_credit, max_burst);
				188	queue->rate_limited = false;
				189	}
				190
				191	void xenvif_tx_credit_callback(struct timer_list *t)
				192	{
				193	struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
				194	tx_add_credit(queue);
				195	xenvif_napi_schedule_or_enable_events(queue);
				196	}
				197
				198	static void xenvif_tx_err(struct xenvif_queue *queue,
				199	struct xen_netif_tx_request *txp,
				200	unsigned int extra_count, RING_IDX end)
				201	{
				202	RING_IDX cons = queue->tx.req_cons;
				203
				204	do {
				205	make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
				206	if (cons == end)
				207	break;
				208	RING_COPY_REQUEST(&queue->tx, cons++, txp);
				209	extra_count = 0; /* only the first frag can have extras */
				210	} while (1);
				211	queue->tx.req_cons = cons;
				212	}
				213
				214	static void xenvif_fatal_tx_err(struct xenvif *vif)
				215	{
				216	netdev_err(vif->dev, "fatal error; disabling device\n");
				217	vif->disabled = true;
				218	/* Disable the vif from queue 0's kthread */
				219	if (vif->num_queues)
				220	xenvif_kick_thread(&vif->queues[0]);
				221	}
				222
				223	static int xenvif_count_requests(struct xenvif_queue *queue,
				224	struct xen_netif_tx_request *first,
				225	unsigned int extra_count,
				226	struct xen_netif_tx_request *txp,
				227	int work_to_do)
				228	{
				229	RING_IDX cons = queue->tx.req_cons;
				230	int slots = 0;
				231	int drop_err = 0;
				232	int more_data;
				233
				234	if (!(first->flags & XEN_NETTXF_more_data))
				235	return 0;
				236
				237	do {
				238	struct xen_netif_tx_request dropped_tx = { 0 };
				239
				240	if (slots >= work_to_do) {
				241	netdev_err(queue->vif->dev,
				242	"Asked for %d slots but exceeds this limit\n",
				243	work_to_do);
				244	xenvif_fatal_tx_err(queue->vif);
				245	return -ENODATA;
				246	}
				247
				248	/* This guest is really using too many slots and
				249	* considered malicious.
				250	*/
				251	if (unlikely(slots >= fatal_skb_slots)) {
				252	netdev_err(queue->vif->dev,
				253	"Malicious frontend using %d slots, threshold %u\n",
				254	slots, fatal_skb_slots);
				255	xenvif_fatal_tx_err(queue->vif);
				256	return -E2BIG;
				257	}
				258
				259	/* Xen network protocol had implicit dependency on
				260	* MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
				261	* the historical MAX_SKB_FRAGS value 18 to honor the
				262	* same behavior as before. Any packet using more than
				263	* 18 slots but less than fatal_skb_slots slots is
				264	* dropped
				265	*/
				266	if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
				267	if (net_ratelimit())
				268	netdev_dbg(queue->vif->dev,
				269	"Too many slots (%d) exceeding limit (%d), dropping packet\n",
				270	slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				271	drop_err = -E2BIG;
				272	}
				273
				274	if (drop_err)
				275	txp = &dropped_tx;
				276
				277	RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
				278
				279	/* If the guest submitted a frame >= 64 KiB then
				280	* first->size overflowed and following slots will
				281	* appear to be larger than the frame.
				282	*
				283	* This cannot be fatal error as there are buggy
				284	* frontends that do this.
				285	*
				286	* Consume all slots and drop the packet.
				287	*/
				288	if (!drop_err && txp->size > first->size) {
				289	if (net_ratelimit())
				290	netdev_dbg(queue->vif->dev,
				291	"Invalid tx request, slot size %u > remaining size %u\n",
				292	txp->size, first->size);
				293	drop_err = -EIO;
				294	}
				295
				296	first->size -= txp->size;
				297	slots++;
				298
				299	if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
				300	netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
				301	txp->offset, txp->size);
				302	xenvif_fatal_tx_err(queue->vif);
				303	return -EINVAL;
				304	}
				305
				306	more_data = txp->flags & XEN_NETTXF_more_data;
				307
				308	if (!drop_err)
				309	txp++;
				310
				311	} while (more_data);
				312
				313	if (drop_err) {
				314	xenvif_tx_err(queue, first, extra_count, cons + slots);
				315	return drop_err;
				316	}
				317
				318	return slots;
				319	}
				320
				321
				322	struct xenvif_tx_cb {
				323	u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
				324	u8 copy_count;
				325	u32 split_mask;
				326	};
				327
				328	#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
				329	#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
				330	#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
				331
				332	static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
				333	u16 pending_idx,
				334	struct xen_netif_tx_request *txp,
				335	unsigned int extra_count,
				336	struct gnttab_map_grant_ref *mop)
				337	{
				338	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
				339	gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
				340	GNTMAP_host_map \| GNTMAP_readonly,
				341	txp->gref, queue->vif->domid);
				342
				343	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
				344	sizeof(*txp));
				345	queue->pending_tx_info[pending_idx].extra_count = extra_count;
				346	}
				347
				348	static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
				349	{
				350	struct sk_buff *skb =
				351	alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
				352	GFP_ATOMIC \| __GFP_NOWARN);
				353
				354	BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb));
				355	if (unlikely(skb == NULL))
				356	return NULL;
				357
				358	/* Packets passed to netif_rx() must have some headroom. */
				359	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
				360
				361	/* Initialize it here to avoid later surprises */
				362	skb_shinfo(skb)->destructor_arg = NULL;
				363
				364	return skb;
				365	}
				366
				367	static void xenvif_get_requests(struct xenvif_queue *queue,
				368	struct sk_buff *skb,
				369	struct xen_netif_tx_request *first,
				370	struct xen_netif_tx_request *txfrags,
				371	unsigned *copy_ops,
				372	unsigned *map_ops,
				373	unsigned int frag_overflow,
				374	struct sk_buff *nskb,
				375	unsigned int extra_count,
				376	unsigned int data_len)
				377	{
				378	struct skb_shared_info *shinfo = skb_shinfo(skb);
				379	skb_frag_t *frags = shinfo->frags;
				380	u16 pending_idx;
				381	pending_ring_idx_t index;
				382	unsigned int nr_slots;
				383	struct gnttab_copy cop = queue->tx_copy_ops + copy_ops;
				384	struct gnttab_map_grant_ref gop = queue->tx_map_ops + map_ops;
				385	struct xen_netif_tx_request *txp = first;
				386
				387	nr_slots = shinfo->nr_frags + frag_overflow + 1;
				388
				389	copy_count(skb) = 0;
				390	XENVIF_TX_CB(skb)->split_mask = 0;
				391
				392	/* Create copy ops for exactly data_len bytes into the skb head. */
				393	__skb_put(skb, data_len);
				394	while (data_len > 0) {
				395	int amount = data_len > txp->size ? txp->size : data_len;
				396	bool split = false;
				397
				398	cop->source.u.ref = txp->gref;
				399	cop->source.domid = queue->vif->domid;
				400	cop->source.offset = txp->offset;
				401
				402	cop->dest.domid = DOMID_SELF;
				403	cop->dest.offset = (offset_in_page(skb->data +
				404	skb_headlen(skb) -
				405	data_len)) & ~XEN_PAGE_MASK;
				406	cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
				407	- data_len);
				408
				409	/* Don't cross local page boundary! */
				410	if (cop->dest.offset + amount > XEN_PAGE_SIZE) {
				411	amount = XEN_PAGE_SIZE - cop->dest.offset;
				412	XENVIF_TX_CB(skb)->split_mask \|= 1U << copy_count(skb);
				413	split = true;
				414	}
				415
				416	cop->len = amount;
				417	cop->flags = GNTCOPY_source_gref;
				418
				419	index = pending_index(queue->pending_cons);
				420	pending_idx = queue->pending_ring[index];
				421	callback_param(queue, pending_idx).ctx = NULL;
				422	copy_pending_idx(skb, copy_count(skb)) = pending_idx;
				423	if (!split)
				424	copy_count(skb)++;
				425
				426	cop++;
				427	data_len -= amount;
				428
				429	if (amount == txp->size) {
				430	/* The copy op covered the full tx_request */
				431
				432	memcpy(&queue->pending_tx_info[pending_idx].req,
				433	txp, sizeof(*txp));
				434	queue->pending_tx_info[pending_idx].extra_count =
				435	(txp == first) ? extra_count : 0;
				436
				437	if (txp == first)
				438	txp = txfrags;
				439	else
				440	txp++;
				441	queue->pending_cons++;
				442	nr_slots--;
				443	} else {
				444	/* The copy op partially covered the tx_request.
				445	* The remainder will be mapped or copied in the next
				446	* iteration.
				447	*/
				448	txp->offset += amount;
				449	txp->size -= amount;
				450	}
				451	}
				452
				453	for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
				454	nr_slots--) {
				455	if (unlikely(!txp->size)) {
				456	make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY);
				457	++txp;
				458	continue;
				459	}
				460
				461	index = pending_index(queue->pending_cons++);
				462	pending_idx = queue->pending_ring[index];
				463	xenvif_tx_create_map_op(queue, pending_idx, txp,
				464	txp == first ? extra_count : 0, gop);
				465	frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
				466	++shinfo->nr_frags;
				467	++gop;
				468
				469	if (txp == first)
				470	txp = txfrags;
				471	else
				472	txp++;
				473	}
				474
				475	if (nr_slots > 0) {
				476
				477	shinfo = skb_shinfo(nskb);
				478	frags = shinfo->frags;
				479
				480	for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) {
				481	if (unlikely(!txp->size)) {
				482	make_tx_response(queue, txp, 0,
				483	XEN_NETIF_RSP_OKAY);
				484	continue;
				485	}
				486
				487	index = pending_index(queue->pending_cons++);
				488	pending_idx = queue->pending_ring[index];
				489	xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
				490	gop);
				491	frag_set_pending_idx(&frags[shinfo->nr_frags],
				492	pending_idx);
				493	++shinfo->nr_frags;
				494	++gop;
				495	}
				496
				497	if (shinfo->nr_frags) {
				498	skb_shinfo(skb)->frag_list = nskb;
				499	nskb = NULL;
				500	}
				501	}
				502
				503	if (nskb) {
				504	/* A frag_list skb was allocated but it is no longer needed
				505	* because enough slots were converted to copy ops above or some
				506	* were empty.
				507	*/
				508	kfree_skb(nskb);
				509	}
				510
				511	(*copy_ops) = cop - queue->tx_copy_ops;
				512	(*map_ops) = gop - queue->tx_map_ops;
				513	}
				514
				515	static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
				516	u16 pending_idx,
				517	grant_handle_t handle)
				518	{
				519	if (unlikely(queue->grant_tx_handle[pending_idx] !=
				520	NETBACK_INVALID_HANDLE)) {
				521	netdev_err(queue->vif->dev,
				522	"Trying to overwrite active handle! pending_idx: 0x%x\n",
				523	pending_idx);
				524	BUG();
				525	}
				526	queue->grant_tx_handle[pending_idx] = handle;
				527	}
				528
				529	static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
				530	u16 pending_idx)
				531	{
				532	if (unlikely(queue->grant_tx_handle[pending_idx] ==
				533	NETBACK_INVALID_HANDLE)) {
				534	netdev_err(queue->vif->dev,
				535	"Trying to unmap invalid handle! pending_idx: 0x%x\n",
				536	pending_idx);
				537	BUG();
				538	}
				539	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
				540	}
				541
				542	static int xenvif_tx_check_gop(struct xenvif_queue *queue,
				543	struct sk_buff *skb,
				544	struct gnttab_map_grant_ref **gopp_map,
				545	struct gnttab_copy **gopp_copy)
				546	{
				547	struct gnttab_map_grant_ref gop_map = gopp_map;
				548	u16 pending_idx;
				549	/* This always points to the shinfo of the skb being checked, which
				550	* could be either the first or the one on the frag_list
				551	*/
				552	struct skb_shared_info *shinfo = skb_shinfo(skb);
				553	/* If this is non-NULL, we are currently checking the frag_list skb, and
				554	* this points to the shinfo of the first one
				555	*/
				556	struct skb_shared_info *first_shinfo = NULL;
				557	int nr_frags = shinfo->nr_frags;
				558	const bool sharedslot = nr_frags &&
				559	frag_get_pending_idx(&shinfo->frags[0]) ==
				560	copy_pending_idx(skb, copy_count(skb) - 1);
				561	int i, err = 0;
				562
				563	for (i = 0; i < copy_count(skb); i++) {
				564	int newerr;
				565
				566	/* Check status of header. */
				567	pending_idx = copy_pending_idx(skb, i);
				568
				569	newerr = (*gopp_copy)->status;
				570
				571	/* Split copies need to be handled together. */
				572	if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) {
				573	(*gopp_copy)++;
				574	if (!newerr)
				575	newerr = (*gopp_copy)->status;
				576	}
				577	if (likely(!newerr)) {
				578	/* The first frag might still have this slot mapped */
				579	if (i < copy_count(skb) - 1 \|\| !sharedslot)
				580	xenvif_idx_release(queue, pending_idx,
				581	XEN_NETIF_RSP_OKAY);
				582	} else {
				583	err = newerr;
				584	if (net_ratelimit())
				585	netdev_dbg(queue->vif->dev,
				586	"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
				587	(*gopp_copy)->status,
				588	pending_idx,
				589	(*gopp_copy)->source.u.ref);
				590	/* The first frag might still have this slot mapped */
				591	if (i < copy_count(skb) - 1 \|\| !sharedslot)
				592	xenvif_idx_release(queue, pending_idx,
				593	XEN_NETIF_RSP_ERROR);
				594	}
				595	(*gopp_copy)++;
				596	}
				597
				598	check_frags:
				599	for (i = 0; i < nr_frags; i++, gop_map++) {
				600	int j, newerr;
				601
				602	pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
				603
				604	/* Check error status: if okay then remember grant handle. */
				605	newerr = gop_map->status;
				606
				607	if (likely(!newerr)) {
				608	xenvif_grant_handle_set(queue,
				609	pending_idx,
				610	gop_map->handle);
				611	/* Had a previous error? Invalidate this fragment. */
				612	if (unlikely(err)) {
				613	xenvif_idx_unmap(queue, pending_idx);
				614	/* If the mapping of the first frag was OK, but
				615	* the header's copy failed, and they are
				616	* sharing a slot, send an error
				617	*/
				618	if (i == 0 && !first_shinfo && sharedslot)
				619	xenvif_idx_release(queue, pending_idx,
				620	XEN_NETIF_RSP_ERROR);
				621	else
				622	xenvif_idx_release(queue, pending_idx,
				623	XEN_NETIF_RSP_OKAY);
				624	}
				625	continue;
				626	}
				627
				628	/* Error on this fragment: respond to client with an error. */
				629	if (net_ratelimit())
				630	netdev_dbg(queue->vif->dev,
				631	"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
				632	i,
				633	gop_map->status,
				634	pending_idx,
				635	gop_map->ref);
				636
				637	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
				638
				639	/* Not the first error? Preceding frags already invalidated. */
				640	if (err)
				641	continue;
				642
				643	/* Invalidate preceding fragments of this skb. */
				644	for (j = 0; j < i; j++) {
				645	pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
				646	xenvif_idx_unmap(queue, pending_idx);
				647	xenvif_idx_release(queue, pending_idx,
				648	XEN_NETIF_RSP_OKAY);
				649	}
				650
				651	/* And if we found the error while checking the frag_list, unmap
				652	* the first skb's frags
				653	*/
				654	if (first_shinfo) {
				655	for (j = 0; j < first_shinfo->nr_frags; j++) {
				656	pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
				657	xenvif_idx_unmap(queue, pending_idx);
				658	xenvif_idx_release(queue, pending_idx,
				659	XEN_NETIF_RSP_OKAY);
				660	}
				661	}
				662
				663	/* Remember the error: invalidate all subsequent fragments. */
				664	err = newerr;
				665	}
				666
				667	if (skb_has_frag_list(skb) && !first_shinfo) {
				668	first_shinfo = skb_shinfo(skb);
				669	shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
				670	nr_frags = shinfo->nr_frags;
				671
				672	goto check_frags;
				673	}
				674
				675	*gopp_map = gop_map;
				676	return err;
				677	}
				678
				679	static void xenvif_fill_frags(struct xenvif_queue queue, struct sk_buff skb)
				680	{
				681	struct skb_shared_info *shinfo = skb_shinfo(skb);
				682	int nr_frags = shinfo->nr_frags;
				683	int i;
				684	u16 prev_pending_idx = INVALID_PENDING_IDX;
				685
				686	for (i = 0; i < nr_frags; i++) {
				687	skb_frag_t *frag = shinfo->frags + i;
				688	struct xen_netif_tx_request *txp;
				689	struct page *page;
				690	u16 pending_idx;
				691
				692	pending_idx = frag_get_pending_idx(frag);
				693
				694	/* If this is not the first frag, chain it to the previous*/
				695	if (prev_pending_idx == INVALID_PENDING_IDX)
				696	skb_shinfo(skb)->destructor_arg =
				697	&callback_param(queue, pending_idx);
				698	else
				699	callback_param(queue, prev_pending_idx).ctx =
				700	&callback_param(queue, pending_idx);
				701
				702	callback_param(queue, pending_idx).ctx = NULL;
				703	prev_pending_idx = pending_idx;
				704
				705	txp = &queue->pending_tx_info[pending_idx].req;
				706	page = virt_to_page(idx_to_kaddr(queue, pending_idx));
				707	__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
				708	skb->len += txp->size;
				709	skb->data_len += txp->size;
				710	skb->truesize += txp->size;
				711
				712	/* Take an extra reference to offset network stack's put_page */
				713	get_page(queue->mmap_pages[pending_idx]);
				714	}
				715	}
				716
				717	static int xenvif_get_extras(struct xenvif_queue *queue,
				718	struct xen_netif_extra_info *extras,
				719	unsigned int *extra_count,
				720	int work_to_do)
				721	{
				722	struct xen_netif_extra_info extra;
				723	RING_IDX cons = queue->tx.req_cons;
				724
				725	do {
				726	if (unlikely(work_to_do-- <= 0)) {
				727	netdev_err(queue->vif->dev, "Missing extra info\n");
				728	xenvif_fatal_tx_err(queue->vif);
				729	return -EBADR;
				730	}
				731
				732	RING_COPY_REQUEST(&queue->tx, cons, &extra);
				733
				734	queue->tx.req_cons = ++cons;
				735	(*extra_count)++;
				736
				737	if (unlikely(!extra.type \|\|
				738	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
				739	netdev_err(queue->vif->dev,
				740	"Invalid extra type: %d\n", extra.type);
				741	xenvif_fatal_tx_err(queue->vif);
				742	return -EINVAL;
				743	}
				744
				745	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
				746	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
				747
				748	return work_to_do;
				749	}
				750
				751	static int xenvif_set_skb_gso(struct xenvif *vif,
				752	struct sk_buff *skb,
				753	struct xen_netif_extra_info *gso)
				754	{
				755	if (!gso->u.gso.size) {
				756	netdev_err(vif->dev, "GSO size must not be zero.\n");
				757	xenvif_fatal_tx_err(vif);
				758	return -EINVAL;
				759	}
				760
				761	switch (gso->u.gso.type) {
				762	case XEN_NETIF_GSO_TYPE_TCPV4:
				763	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
				764	break;
				765	case XEN_NETIF_GSO_TYPE_TCPV6:
				766	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
				767	break;
				768	default:
				769	netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
				770	xenvif_fatal_tx_err(vif);
				771	return -EINVAL;
				772	}
				773
				774	skb_shinfo(skb)->gso_size = gso->u.gso.size;
				775	/* gso_segs will be calculated later */
				776
				777	return 0;
				778	}
				779
				780	static int checksum_setup(struct xenvif_queue queue, struct sk_buff skb)
				781	{
				782	bool recalculate_partial_csum = false;
				783
				784	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
				785	* peers can fail to set NETRXF_csum_blank when sending a GSO
				786	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
				787	* recalculate the partial checksum.
				788	*/
				789	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
				790	queue->stats.rx_gso_checksum_fixup++;
				791	skb->ip_summed = CHECKSUM_PARTIAL;
				792	recalculate_partial_csum = true;
				793	}
				794
				795	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
				796	if (skb->ip_summed != CHECKSUM_PARTIAL)
				797	return 0;
				798
				799	return skb_checksum_setup(skb, recalculate_partial_csum);
				800	}
				801
				802	static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
				803	{
				804	u64 now = get_jiffies_64();
				805	u64 next_credit = queue->credit_window_start +
				806	msecs_to_jiffies(queue->credit_usec / 1000);
				807
				808	/* Timer could already be pending in rare cases. */
				809	if (timer_pending(&queue->credit_timeout)) {
				810	queue->rate_limited = true;
				811	return true;
				812	}
				813
				814	/* Passed the point where we can replenish credit? */
				815	if (time_after_eq64(now, next_credit)) {
				816	queue->credit_window_start = now;
				817	tx_add_credit(queue);
				818	}
				819
				820	/* Still too big to send right now? Set a callback. */
				821	if (size > queue->remaining_credit) {
				822	mod_timer(&queue->credit_timeout,
				823	next_credit);
				824	queue->credit_window_start = next_credit;
				825	queue->rate_limited = true;
				826
				827	return true;
				828	}
				829
				830	return false;
				831	}
				832
				833	/* No locking is required in xenvif_mcast_add/del() as they are
				834	* only ever invoked from NAPI poll. An RCU list is used because
				835	* xenvif_mcast_match() is called asynchronously, during start_xmit.
				836	*/
				837
				838	static int xenvif_mcast_add(struct xenvif vif, const u8 addr)
				839	{
				840	struct xenvif_mcast_addr *mcast;
				841
				842	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
				843	if (net_ratelimit())
				844	netdev_err(vif->dev,
				845	"Too many multicast addresses\n");
				846	return -ENOSPC;
				847	}
				848
				849	mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
				850	if (!mcast)
				851	return -ENOMEM;
				852
				853	ether_addr_copy(mcast->addr, addr);
				854	list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr);
				855	vif->fe_mcast_count++;
				856
				857	return 0;
				858	}
				859
				860	static void xenvif_mcast_del(struct xenvif vif, const u8 addr)
				861	{
				862	struct xenvif_mcast_addr *mcast;
				863
				864	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				865	if (ether_addr_equal(addr, mcast->addr)) {
				866	--vif->fe_mcast_count;
				867	list_del_rcu(&mcast->entry);
				868	kfree_rcu(mcast, rcu);
				869	break;
				870	}
				871	}
				872	}
				873
				874	bool xenvif_mcast_match(struct xenvif vif, const u8 addr)
				875	{
				876	struct xenvif_mcast_addr *mcast;
				877
				878	rcu_read_lock();
				879	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
				880	if (ether_addr_equal(addr, mcast->addr)) {
				881	rcu_read_unlock();
				882	return true;
				883	}
				884	}
				885	rcu_read_unlock();
				886
				887	return false;
				888	}
				889
				890	void xenvif_mcast_addr_list_free(struct xenvif *vif)
				891	{
				892	/* No need for locking or RCU here. NAPI poll and TX queue
				893	* are stopped.
				894	*/
				895	while (!list_empty(&vif->fe_mcast_addr)) {
				896	struct xenvif_mcast_addr *mcast;
				897
				898	mcast = list_first_entry(&vif->fe_mcast_addr,
				899	struct xenvif_mcast_addr,
				900	entry);
				901	--vif->fe_mcast_count;
				902	list_del(&mcast->entry);
				903	kfree(mcast);
				904	}
				905	}
				906
				907	static void xenvif_tx_build_gops(struct xenvif_queue *queue,
				908	int budget,
				909	unsigned *copy_ops,
				910	unsigned *map_ops)
				911	{
				912	struct sk_buff skb, nskb;
				913	int ret;
				914	unsigned int frag_overflow;
				915
				916	while (skb_queue_len(&queue->tx_queue) < budget) {
				917	struct xen_netif_tx_request txreq;
				918	struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
				919	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
				920	unsigned int extra_count;
				921	u16 pending_idx;
				922	RING_IDX idx;
				923	int work_to_do;
				924	unsigned int data_len;
				925	pending_ring_idx_t index;
				926
				927	if (queue->tx.sring->req_prod - queue->tx.req_cons >
				928	XEN_NETIF_TX_RING_SIZE) {
				929	netdev_err(queue->vif->dev,
				930	"Impossible number of requests. "
				931	"req_prod %d, req_cons %d, size %ld\n",
				932	queue->tx.sring->req_prod, queue->tx.req_cons,
				933	XEN_NETIF_TX_RING_SIZE);
				934	xenvif_fatal_tx_err(queue->vif);
				935	break;
				936	}
				937
				938	work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
				939	if (!work_to_do)
				940	break;
				941
				942	idx = queue->tx.req_cons;
				943	rmb(); /* Ensure that we see the request before we copy it. */
				944	RING_COPY_REQUEST(&queue->tx, idx, &txreq);
				945
				946	/* Credit-based scheduling. */
				947	if (txreq.size > queue->remaining_credit &&
				948	tx_credit_exceeded(queue, txreq.size))
				949	break;
				950
				951	queue->remaining_credit -= txreq.size;
				952
				953	work_to_do--;
				954	queue->tx.req_cons = ++idx;
				955
				956	memset(extras, 0, sizeof(extras));
				957	extra_count = 0;
				958	if (txreq.flags & XEN_NETTXF_extra_info) {
				959	work_to_do = xenvif_get_extras(queue, extras,
				960	&extra_count,
				961	work_to_do);
				962	idx = queue->tx.req_cons;
				963	if (unlikely(work_to_do < 0))
				964	break;
				965	}
				966
				967	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) {
				968	struct xen_netif_extra_info *extra;
				969
				970	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1];
				971	ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr);
				972
				973	make_tx_response(queue, &txreq, extra_count,
				974	(ret == 0) ?
				975	XEN_NETIF_RSP_OKAY :
				976	XEN_NETIF_RSP_ERROR);
				977	continue;
				978	}
				979
				980	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) {
				981	struct xen_netif_extra_info *extra;
				982
				983	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1];
				984	xenvif_mcast_del(queue->vif, extra->u.mcast.addr);
				985
				986	make_tx_response(queue, &txreq, extra_count,
				987	XEN_NETIF_RSP_OKAY);
				988	continue;
				989	}
				990
				991	data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
				992	XEN_NETBACK_TX_COPY_LEN : txreq.size;
				993
				994	ret = xenvif_count_requests(queue, &txreq, extra_count,
				995	txfrags, work_to_do);
				996
				997	if (unlikely(ret < 0))
				998	break;
				999
				1000	idx += ret;
				1001
				1002	if (unlikely(txreq.size < ETH_HLEN)) {
				1003	netdev_dbg(queue->vif->dev,
				1004	"Bad packet size: %d\n", txreq.size);
				1005	xenvif_tx_err(queue, &txreq, extra_count, idx);
				1006	break;
				1007	}
				1008
				1009	/* No crossing a page as the payload mustn't fragment. */
				1010	if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
				1011	netdev_err(queue->vif->dev, "Cross page boundary, txreq.offset: %u, size: %u\n",
				1012	txreq.offset, txreq.size);
				1013	xenvif_fatal_tx_err(queue->vif);
				1014	break;
				1015	}
				1016
				1017	index = pending_index(queue->pending_cons);
				1018	pending_idx = queue->pending_ring[index];
				1019
				1020	if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
				1021	data_len = txreq.size;
				1022
				1023	skb = xenvif_alloc_skb(data_len);
				1024	if (unlikely(skb == NULL)) {
				1025	netdev_dbg(queue->vif->dev,
				1026	"Can't allocate a skb in start_xmit.\n");
				1027	xenvif_tx_err(queue, &txreq, extra_count, idx);
				1028	break;
				1029	}
				1030
				1031	skb_shinfo(skb)->nr_frags = ret;
				1032	/* At this point shinfo->nr_frags is in fact the number of
				1033	* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
				1034	*/
				1035	frag_overflow = 0;
				1036	nskb = NULL;
				1037	if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
				1038	frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
				1039	BUG_ON(frag_overflow > MAX_SKB_FRAGS);
				1040	skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
				1041	nskb = xenvif_alloc_skb(0);
				1042	if (unlikely(nskb == NULL)) {
				1043	skb_shinfo(skb)->nr_frags = 0;
				1044	kfree_skb(skb);
				1045	xenvif_tx_err(queue, &txreq, extra_count, idx);
				1046	if (net_ratelimit())
				1047	netdev_err(queue->vif->dev,
				1048	"Can't allocate the frag_list skb.\n");
				1049	break;
				1050	}
				1051	}
				1052
				1053	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
				1054	struct xen_netif_extra_info *gso;
				1055	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
				1056
				1057	if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
				1058	/* Failure in xenvif_set_skb_gso is fatal. */
				1059	skb_shinfo(skb)->nr_frags = 0;
				1060	kfree_skb(skb);
				1061	kfree_skb(nskb);
				1062	break;
				1063	}
				1064	}
				1065
				1066	if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) {
				1067	struct xen_netif_extra_info *extra;
				1068	enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
				1069
				1070	extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
				1071
				1072	switch (extra->u.hash.type) {
				1073	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
				1074	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
				1075	type = PKT_HASH_TYPE_L3;
				1076	break;
				1077
				1078	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
				1079	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
				1080	type = PKT_HASH_TYPE_L4;
				1081	break;
				1082
				1083	default:
				1084	break;
				1085	}
				1086
				1087	if (type != PKT_HASH_TYPE_NONE)
				1088	skb_set_hash(skb,
				1089	(u32 )extra->u.hash.value,
				1090	type);
				1091	}
				1092
				1093	xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
				1094	map_ops, frag_overflow, nskb, extra_count,
				1095	data_len);
				1096
				1097	__skb_queue_tail(&queue->tx_queue, skb);
				1098
				1099	queue->tx.req_cons = idx;
				1100
				1101	if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) \|\|
				1102	(*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
				1103	break;
				1104	}
				1105
				1106	return;
				1107	}
				1108
				1109	/* Consolidate skb with a frag_list into a brand new one with local pages on
				1110	* frags. Returns 0 or -ENOMEM if can't allocate new pages.
				1111	*/
				1112	static int xenvif_handle_frag_list(struct xenvif_queue queue, struct sk_buff skb)
				1113	{
				1114	unsigned int offset = skb_headlen(skb);
				1115	skb_frag_t frags[MAX_SKB_FRAGS];
				1116	int i, f;
				1117	struct ubuf_info *uarg;
				1118	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1119
				1120	queue->stats.tx_zerocopy_sent += 2;
				1121	queue->stats.tx_frag_overflow++;
				1122
				1123	xenvif_fill_frags(queue, nskb);
				1124	/* Subtract frags size, we will correct it later */
				1125	skb->truesize -= skb->data_len;
				1126	skb->len += nskb->len;
				1127	skb->data_len += nskb->len;
				1128
				1129	/* create a brand new frags array and coalesce there */
				1130	for (i = 0; offset < skb->len; i++) {
				1131	struct page *page;
				1132	unsigned int len;
				1133
				1134	BUG_ON(i >= MAX_SKB_FRAGS);
				1135	page = alloc_page(GFP_ATOMIC);
				1136	if (!page) {
				1137	int j;
				1138	skb->truesize += skb->data_len;
				1139	for (j = 0; j < i; j++)
				1140	put_page(skb_frag_page(&frags[j]));
				1141	return -ENOMEM;
				1142	}
				1143
				1144	if (offset + PAGE_SIZE < skb->len)
				1145	len = PAGE_SIZE;
				1146	else
				1147	len = skb->len - offset;
				1148	if (skb_copy_bits(skb, offset, page_address(page), len))
				1149	BUG();
				1150
				1151	offset += len;
				1152	__skb_frag_set_page(&frags[i], page);
				1153	skb_frag_off_set(&frags[i], 0);
				1154	skb_frag_size_set(&frags[i], len);
				1155	}
				1156
				1157	/* Release all the original (foreign) frags. */
				1158	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
				1159	skb_frag_unref(skb, f);
				1160	uarg = skb_shinfo(skb)->destructor_arg;
				1161	/* increase inflight counter to offset decrement in callback */
				1162	atomic_inc(&queue->inflight_packets);
				1163	uarg->callback(uarg, true);
				1164	skb_shinfo(skb)->destructor_arg = NULL;
				1165
				1166	/* Fill the skb with the new (local) frags. */
				1167	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
				1168	skb_shinfo(skb)->nr_frags = i;
				1169	skb->truesize += i * PAGE_SIZE;
				1170
				1171	return 0;
				1172	}
				1173
				1174	static int xenvif_tx_submit(struct xenvif_queue *queue)
				1175	{
				1176	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
				1177	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
				1178	struct sk_buff *skb;
				1179	int work_done = 0;
				1180
				1181	while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
				1182	struct xen_netif_tx_request *txp;
				1183	u16 pending_idx;
				1184
				1185	pending_idx = copy_pending_idx(skb, 0);
				1186	txp = &queue->pending_tx_info[pending_idx].req;
				1187
				1188	/* Check the remap error code. */
				1189	if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
				1190	/* If there was an error, xenvif_tx_check_gop is
				1191	* expected to release all the frags which were mapped,
				1192	* so kfree_skb shouldn't do it again
				1193	*/
				1194	skb_shinfo(skb)->nr_frags = 0;
				1195	if (skb_has_frag_list(skb)) {
				1196	struct sk_buff *nskb =
				1197	skb_shinfo(skb)->frag_list;
				1198	skb_shinfo(nskb)->nr_frags = 0;
				1199	}
				1200	kfree_skb(skb);
				1201	continue;
				1202	}
				1203
				1204	if (txp->flags & XEN_NETTXF_csum_blank)
				1205	skb->ip_summed = CHECKSUM_PARTIAL;
				1206	else if (txp->flags & XEN_NETTXF_data_validated)
				1207	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1208
				1209	xenvif_fill_frags(queue, skb);
				1210
				1211	if (unlikely(skb_has_frag_list(skb))) {
				1212	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
				1213	xenvif_skb_zerocopy_prepare(queue, nskb);
				1214	if (xenvif_handle_frag_list(queue, skb)) {
				1215	if (net_ratelimit())
				1216	netdev_err(queue->vif->dev,
				1217	"Not enough memory to consolidate frag_list!\n");
				1218	xenvif_skb_zerocopy_prepare(queue, skb);
				1219	kfree_skb(skb);
				1220	continue;
				1221	}
				1222	/* Copied all the bits from the frag list -- free it. */
				1223	skb_frag_list_init(skb);
				1224	kfree_skb(nskb);
				1225	}
				1226
				1227	skb->dev = queue->vif->dev;
				1228	skb->protocol = eth_type_trans(skb, skb->dev);
				1229	skb_reset_network_header(skb);
				1230
				1231	if (checksum_setup(queue, skb)) {
				1232	netdev_dbg(queue->vif->dev,
				1233	"Can't setup checksum in net_tx_action\n");
				1234	/* We have to set this flag to trigger the callback */
				1235	if (skb_shinfo(skb)->destructor_arg)
				1236	xenvif_skb_zerocopy_prepare(queue, skb);
				1237	kfree_skb(skb);
				1238	continue;
				1239	}
				1240
				1241	skb_probe_transport_header(skb);
				1242
				1243	/* If the packet is GSO then we will have just set up the
				1244	* transport header offset in checksum_setup so it's now
				1245	* straightforward to calculate gso_segs.
				1246	*/
				1247	if (skb_is_gso(skb)) {
				1248	int mss, hdrlen;
				1249
				1250	/* GSO implies having the L4 header. */
				1251	WARN_ON_ONCE(!skb_transport_header_was_set(skb));
				1252	if (unlikely(!skb_transport_header_was_set(skb))) {
				1253	kfree_skb(skb);
				1254	continue;
				1255	}
				1256
				1257	mss = skb_shinfo(skb)->gso_size;
				1258	hdrlen = skb_transport_header(skb) -
				1259	skb_mac_header(skb) +
				1260	tcp_hdrlen(skb);
				1261
				1262	skb_shinfo(skb)->gso_segs =
				1263	DIV_ROUND_UP(skb->len - hdrlen, mss);
				1264	}
				1265
				1266	queue->stats.rx_bytes += skb->len;
				1267	queue->stats.rx_packets++;
				1268
				1269	work_done++;
				1270
				1271	/* Set this flag right before netif_receive_skb, otherwise
				1272	* someone might think this packet already left netback, and
				1273	* do a skb_copy_ubufs while we are still in control of the
				1274	* skb. E.g. the __pskb_pull_tail earlier can do such thing.
				1275	*/
				1276	if (skb_shinfo(skb)->destructor_arg) {
				1277	xenvif_skb_zerocopy_prepare(queue, skb);
				1278	queue->stats.tx_zerocopy_sent++;
				1279	}
				1280
				1281	netif_receive_skb(skb);
				1282	}
				1283
				1284	return work_done;
				1285	}
				1286
				1287	void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
				1288	{
				1289	unsigned long flags;
				1290	pending_ring_idx_t index;
				1291	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
				1292
				1293	/* This is the only place where we grab this lock, to protect callbacks
				1294	* from each other.
				1295	*/
				1296	spin_lock_irqsave(&queue->callback_lock, flags);
				1297	do {
				1298	u16 pending_idx = ubuf->desc;
				1299	ubuf = (struct ubuf_info *) ubuf->ctx;
				1300	BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
				1301	MAX_PENDING_REQS);
				1302	index = pending_index(queue->dealloc_prod);
				1303	queue->dealloc_ring[index] = pending_idx;
				1304	/* Sync with xenvif_tx_dealloc_action:
				1305	* insert idx then incr producer.
				1306	*/
				1307	smp_wmb();
				1308	queue->dealloc_prod++;
				1309	} while (ubuf);
				1310	spin_unlock_irqrestore(&queue->callback_lock, flags);
				1311
				1312	if (likely(zerocopy_success))
				1313	queue->stats.tx_zerocopy_success++;
				1314	else
				1315	queue->stats.tx_zerocopy_fail++;
				1316	xenvif_skb_zerocopy_complete(queue);
				1317	}
				1318
				1319	static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
				1320	{
				1321	struct gnttab_unmap_grant_ref *gop;
				1322	pending_ring_idx_t dc, dp;
				1323	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
				1324	unsigned int i = 0;
				1325
				1326	dc = queue->dealloc_cons;
				1327	gop = queue->tx_unmap_ops;
				1328
				1329	/* Free up any grants we have finished using */
				1330	do {
				1331	dp = queue->dealloc_prod;
				1332
				1333	/* Ensure we see all indices enqueued by all
				1334	* xenvif_zerocopy_callback().
				1335	*/
				1336	smp_rmb();
				1337
				1338	while (dc != dp) {
				1339	BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
				1340	pending_idx =
				1341	queue->dealloc_ring[pending_index(dc++)];
				1342
				1343	pending_idx_release[gop - queue->tx_unmap_ops] =
				1344	pending_idx;
				1345	queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
				1346	queue->mmap_pages[pending_idx];
				1347	gnttab_set_unmap_op(gop,
				1348	idx_to_kaddr(queue, pending_idx),
				1349	GNTMAP_host_map,
				1350	queue->grant_tx_handle[pending_idx]);
				1351	xenvif_grant_handle_reset(queue, pending_idx);
				1352	++gop;
				1353	}
				1354
				1355	} while (dp != queue->dealloc_prod);
				1356
				1357	queue->dealloc_cons = dc;
				1358
				1359	if (gop - queue->tx_unmap_ops > 0) {
				1360	int ret;
				1361	ret = gnttab_unmap_refs(queue->tx_unmap_ops,
				1362	NULL,
				1363	queue->pages_to_unmap,
				1364	gop - queue->tx_unmap_ops);
				1365	if (ret) {
				1366	netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
				1367	gop - queue->tx_unmap_ops, ret);
				1368	for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
				1369	if (gop[i].status != GNTST_okay)
				1370	netdev_err(queue->vif->dev,
				1371	" host_addr: 0x%llx handle: 0x%x status: %d\n",
				1372	gop[i].host_addr,
				1373	gop[i].handle,
				1374	gop[i].status);
				1375	}
				1376	BUG();
				1377	}
				1378	}
				1379
				1380	for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
				1381	xenvif_idx_release(queue, pending_idx_release[i],
				1382	XEN_NETIF_RSP_OKAY);
				1383	}
				1384
				1385
				1386	/* Called after netfront has transmitted */
				1387	int xenvif_tx_action(struct xenvif_queue *queue, int budget)
				1388	{
				1389	unsigned nr_mops = 0, nr_cops = 0;
				1390	int work_done, ret;
				1391
				1392	if (unlikely(!tx_work_todo(queue)))
				1393	return 0;
				1394
				1395	xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
				1396
				1397	if (nr_cops == 0)
				1398	return 0;
				1399
				1400	gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
				1401	if (nr_mops != 0) {
				1402	ret = gnttab_map_refs(queue->tx_map_ops,
				1403	NULL,
				1404	queue->pages_to_map,
				1405	nr_mops);
				1406	if (ret) {
				1407	unsigned int i;
				1408
				1409	netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
				1410	nr_mops, ret);
				1411	for (i = 0; i < nr_mops; ++i)
				1412	WARN_ON_ONCE(queue->tx_map_ops[i].status ==
				1413	GNTST_okay);
				1414	}
				1415	}
				1416
				1417	work_done = xenvif_tx_submit(queue);
				1418
				1419	return work_done;
				1420	}
				1421
				1422	static void _make_tx_response(struct xenvif_queue *queue,
				1423	const struct xen_netif_tx_request *txp,
				1424	unsigned int extra_count,
				1425	s8 status)
				1426	{
				1427	RING_IDX i = queue->tx.rsp_prod_pvt;
				1428	struct xen_netif_tx_response *resp;
				1429
				1430	resp = RING_GET_RESPONSE(&queue->tx, i);
				1431	resp->id = txp->id;
				1432	resp->status = status;
				1433
				1434	while (extra_count-- != 0)
				1435	RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
				1436
				1437	queue->tx.rsp_prod_pvt = ++i;
				1438	}
				1439
				1440	static void push_tx_responses(struct xenvif_queue *queue)
				1441	{
				1442	int notify;
				1443
				1444	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
				1445	if (notify)
				1446	notify_remote_via_irq(queue->tx_irq);
				1447	}
				1448
				1449	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
				1450	s8 status)
				1451	{
				1452	struct pending_tx_info *pending_tx_info;
				1453	pending_ring_idx_t index;
				1454	unsigned long flags;
				1455
				1456	pending_tx_info = &queue->pending_tx_info[pending_idx];
				1457
				1458	spin_lock_irqsave(&queue->response_lock, flags);
				1459
				1460	_make_tx_response(queue, &pending_tx_info->req,
				1461	pending_tx_info->extra_count, status);
				1462
				1463	/* Release the pending index before pusing the Tx response so
				1464	* its available before a new Tx request is pushed by the
				1465	* frontend.
				1466	*/
				1467	index = pending_index(queue->pending_prod++);
				1468	queue->pending_ring[index] = pending_idx;
				1469
				1470	push_tx_responses(queue);
				1471
				1472	spin_unlock_irqrestore(&queue->response_lock, flags);
				1473	}
				1474
				1475	static void make_tx_response(struct xenvif_queue *queue,
				1476	const struct xen_netif_tx_request *txp,
				1477	unsigned int extra_count,
				1478	s8 status)
				1479	{
				1480	unsigned long flags;
				1481
				1482	spin_lock_irqsave(&queue->response_lock, flags);
				1483
				1484	_make_tx_response(queue, txp, extra_count, status);
				1485	push_tx_responses(queue);
				1486
				1487	spin_unlock_irqrestore(&queue->response_lock, flags);
				1488	}
				1489
				1490	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
				1491	{
				1492	int ret;
				1493	struct gnttab_unmap_grant_ref tx_unmap_op;
				1494
				1495	gnttab_set_unmap_op(&tx_unmap_op,
				1496	idx_to_kaddr(queue, pending_idx),
				1497	GNTMAP_host_map,
				1498	queue->grant_tx_handle[pending_idx]);
				1499	xenvif_grant_handle_reset(queue, pending_idx);
				1500
				1501	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
				1502	&queue->mmap_pages[pending_idx], 1);
				1503	if (ret) {
				1504	netdev_err(queue->vif->dev,
				1505	"Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
				1506	ret,
				1507	pending_idx,
				1508	tx_unmap_op.host_addr,
				1509	tx_unmap_op.handle,
				1510	tx_unmap_op.status);
				1511	BUG();
				1512	}
				1513	}
				1514
				1515	static inline int tx_work_todo(struct xenvif_queue *queue)
				1516	{
				1517	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
				1518	return 1;
				1519
				1520	return 0;
				1521	}
				1522
				1523	static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
				1524	{
				1525	return queue->dealloc_cons != queue->dealloc_prod;
				1526	}
				1527
				1528	void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
				1529	{
				1530	if (queue->tx.sring)
				1531	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1532	queue->tx.sring);
				1533	if (queue->rx.sring)
				1534	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
				1535	queue->rx.sring);
				1536	}
				1537
				1538	int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
				1539	grant_ref_t tx_ring_ref,
				1540	grant_ref_t rx_ring_ref)
				1541	{
				1542	void *addr;
				1543	struct xen_netif_tx_sring *txs;
				1544	struct xen_netif_rx_sring *rxs;
				1545
				1546	int err = -ENOMEM;
				1547
				1548	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1549	&tx_ring_ref, 1, &addr);
				1550	if (err)
				1551	goto err;
				1552
				1553	txs = (struct xen_netif_tx_sring *)addr;
				1554	BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
				1555
				1556	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
				1557	&rx_ring_ref, 1, &addr);
				1558	if (err)
				1559	goto err;
				1560
				1561	rxs = (struct xen_netif_rx_sring *)addr;
				1562	BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
				1563
				1564	return 0;
				1565
				1566	err:
				1567	xenvif_unmap_frontend_data_rings(queue);
				1568	return err;
				1569	}
				1570
				1571	static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
				1572	{
				1573	/* Dealloc thread must remain running until all inflight
				1574	* packets complete.
				1575	*/
				1576	return kthread_should_stop() &&
				1577	!atomic_read(&queue->inflight_packets);
				1578	}
				1579
				1580	int xenvif_dealloc_kthread(void *data)
				1581	{
				1582	struct xenvif_queue *queue = data;
				1583
				1584	for (;;) {
				1585	wait_event_interruptible(queue->dealloc_wq,
				1586	tx_dealloc_work_todo(queue) \|\|
				1587	xenvif_dealloc_kthread_should_stop(queue));
				1588	if (xenvif_dealloc_kthread_should_stop(queue))
				1589	break;
				1590
				1591	xenvif_tx_dealloc_action(queue);
				1592	cond_resched();
				1593	}
				1594
				1595	/* Unmap anything remaining*/
				1596	if (tx_dealloc_work_todo(queue))
				1597	xenvif_tx_dealloc_action(queue);
				1598
				1599	return 0;
				1600	}
				1601
				1602	static void make_ctrl_response(struct xenvif *vif,
				1603	const struct xen_netif_ctrl_request *req,
				1604	u32 status, u32 data)
				1605	{
				1606	RING_IDX idx = vif->ctrl.rsp_prod_pvt;
				1607	struct xen_netif_ctrl_response rsp = {
				1608	.id = req->id,
				1609	.type = req->type,
				1610	.status = status,
				1611	.data = data,
				1612	};
				1613
				1614	*RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
				1615	vif->ctrl.rsp_prod_pvt = ++idx;
				1616	}
				1617
				1618	static void push_ctrl_response(struct xenvif *vif)
				1619	{
				1620	int notify;
				1621
				1622	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
				1623	if (notify)
				1624	notify_remote_via_irq(vif->ctrl_irq);
				1625	}
				1626
				1627	static void process_ctrl_request(struct xenvif *vif,
				1628	const struct xen_netif_ctrl_request *req)
				1629	{
				1630	u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
				1631	u32 data = 0;
				1632
				1633	switch (req->type) {
				1634	case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
				1635	status = xenvif_set_hash_alg(vif, req->data[0]);
				1636	break;
				1637
				1638	case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
				1639	status = xenvif_get_hash_flags(vif, &data);
				1640	break;
				1641
				1642	case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
				1643	status = xenvif_set_hash_flags(vif, req->data[0]);
				1644	break;
				1645
				1646	case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
				1647	status = xenvif_set_hash_key(vif, req->data[0],
				1648	req->data[1]);
				1649	break;
				1650
				1651	case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
				1652	status = XEN_NETIF_CTRL_STATUS_SUCCESS;
				1653	data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
				1654	break;
				1655
				1656	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
				1657	status = xenvif_set_hash_mapping_size(vif,
				1658	req->data[0]);
				1659	break;
				1660
				1661	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
				1662	status = xenvif_set_hash_mapping(vif, req->data[0],
				1663	req->data[1],
				1664	req->data[2]);
				1665	break;
				1666
				1667	default:
				1668	break;
				1669	}
				1670
				1671	make_ctrl_response(vif, req, status, data);
				1672	push_ctrl_response(vif);
				1673	}
				1674
				1675	static void xenvif_ctrl_action(struct xenvif *vif)
				1676	{
				1677	for (;;) {
				1678	RING_IDX req_prod, req_cons;
				1679
				1680	req_prod = vif->ctrl.sring->req_prod;
				1681	req_cons = vif->ctrl.req_cons;
				1682
				1683	/* Make sure we can see requests before we process them. */
				1684	rmb();
				1685
				1686	if (req_cons == req_prod)
				1687	break;
				1688
				1689	while (req_cons != req_prod) {
				1690	struct xen_netif_ctrl_request req;
				1691
				1692	RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
				1693	req_cons++;
				1694
				1695	process_ctrl_request(vif, &req);
				1696	}
				1697
				1698	vif->ctrl.req_cons = req_cons;
				1699	vif->ctrl.sring->req_event = req_cons + 1;
				1700	}
				1701	}
				1702
				1703	static bool xenvif_ctrl_work_todo(struct xenvif *vif)
				1704	{
				1705	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
				1706	return true;
				1707
				1708	return false;
				1709	}
				1710
				1711	irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
				1712	{
				1713	struct xenvif *vif = data;
				1714	unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
				1715
				1716	while (xenvif_ctrl_work_todo(vif)) {
				1717	xenvif_ctrl_action(vif);
				1718	eoi_flag = 0;
				1719	}
				1720
				1721	xen_irq_lateeoi(irq, eoi_flag);
				1722
				1723	return IRQ_HANDLED;
				1724	}
				1725
				1726	static int __init netback_init(void)
				1727	{
				1728	int rc = 0;
				1729
				1730	if (!xen_domain())
				1731	return -ENODEV;
				1732
				1733	/* Allow as many queues as there are CPUs but max. 8 if user has not
				1734	* specified a value.
				1735	*/
				1736	if (xenvif_max_queues == 0)
				1737	xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
				1738	num_online_cpus());
				1739
				1740	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
				1741	pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
				1742	fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
				1743	fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
				1744	}
				1745
				1746	rc = xenvif_xenbus_init();
				1747	if (rc)
				1748	goto failed_init;
				1749
				1750	#ifdef CONFIG_DEBUG_FS
				1751	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
				1752	#endif /* CONFIG_DEBUG_FS */
				1753
				1754	return 0;
				1755
				1756	failed_init:
				1757	return rc;
				1758	}
				1759
				1760	module_init(netback_init);
				1761
				1762	static void __exit netback_fini(void)
				1763	{
				1764	#ifdef CONFIG_DEBUG_FS
				1765	debugfs_remove_recursive(xen_netback_dbg_root);
				1766	#endif /* CONFIG_DEBUG_FS */
				1767	xenvif_xenbus_fini();
				1768	}
				1769	module_exit(netback_fini);
				1770
				1771	MODULE_LICENSE("Dual BSD/GPL");
				1772	MODULE_ALIAS("xen-backend:vif");