Blame - ap/os/linux/linux-3.4.x/net/packet/af_packet.c - R306

blob: dc845ff4b9780bd6fdf41045318e1372dcce2ba9 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* PACKET - implements raw packet sockets.
				7	*
				8	* Authors: Ross Biro
				9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				10	* Alan Cox, <gw4pts@gw4pts.ampr.org>
				11	*
				12	* Fixes:
				13	* Alan Cox : verify_area() now used correctly
				14	* Alan Cox : new skbuff lists, look ma no backlogs!
				15	* Alan Cox : tidied skbuff lists.
				16	* Alan Cox : Now uses generic datagram routines I
				17	* added. Also fixed the peek/read crash
				18	* from all old Linux datagram code.
				19	* Alan Cox : Uses the improved datagram code.
				20	* Alan Cox : Added NULL's for socket options.
				21	* Alan Cox : Re-commented the code.
				22	* Alan Cox : Use new kernel side addressing
				23	* Rob Janssen : Correct MTU usage.
				24	* Dave Platt : Counter leaks caused by incorrect
				25	* interrupt locking and some slightly
				26	* dubious gcc output. Can you read
				27	* compiler: it said _VOLATILE_
				28	* Richard Kooijman : Timestamp fixes.
				29	* Alan Cox : New buffers. Use sk->mac.raw.
				30	* Alan Cox : sendmsg/recvmsg support.
				31	* Alan Cox : Protocol setting support
				32	* Alexey Kuznetsov : Untied from IPv4 stack.
				33	* Cyrus Durgin : Fixed kerneld for kmod.
				34	* Michal Ostrowski : Module initialization cleanup.
				35	* Ulises Alonso : Frame number limit removal and
				36	* packet_set_ring memory leak.
				37	* Eric Biederman : Allow for > 8 byte hardware addresses.
				38	* The convention is that longer addresses
				39	* will simply extend the hardware address
				40	* byte arrays at the end of sockaddr_ll
				41	* and packet_mreq.
				42	* Johann Baudy : Added TX RING.
				43	* Chetan Loke : Implemented TPACKET_V3 block abstraction
				44	* layer.
				45	* Copyright (C) 2011, <lokec@ccs.neu.edu>
				46	*
				47	*
				48	* This program is free software; you can redistribute it and/or
				49	* modify it under the terms of the GNU General Public License
				50	* as published by the Free Software Foundation; either version
				51	* 2 of the License, or (at your option) any later version.
				52	*
				53	*/
				54
				55	#include <linux/types.h>
				56	#include <linux/mm.h>
				57	#include <linux/capability.h>
				58	#include <linux/fcntl.h>
				59	#include <linux/socket.h>
				60	#include <linux/in.h>
				61	#include <linux/inet.h>
				62	#include <linux/netdevice.h>
				63	#include <linux/if_packet.h>
				64	#include <linux/wireless.h>
				65	#include <linux/kernel.h>
				66	#include <linux/kmod.h>
				67	#include <linux/slab.h>
				68	#include <linux/vmalloc.h>
				69	#include <net/net_namespace.h>
				70	#include <net/ip.h>
				71	#include <net/protocol.h>
				72	#include <linux/skbuff.h>
				73	#include <net/sock.h>
				74	#include <linux/errno.h>
				75	#include <linux/timer.h>
				76	#include <asm/uaccess.h>
				77	#include <asm/ioctls.h>
				78	#include <asm/page.h>
				79	#include <asm/cacheflush.h>
				80	#include <asm/io.h>
				81	#include <linux/proc_fs.h>
				82	#include <linux/seq_file.h>
				83	#include <linux/poll.h>
				84	#include <linux/module.h>
				85	#include <linux/init.h>
				86	#include <linux/mutex.h>
				87	#include <linux/if_vlan.h>
				88	#include <linux/virtio_net.h>
				89	#include <linux/errqueue.h>
				90	#include <linux/net_tstamp.h>
				91	#include <linux/delay.h>
				92
				93	#ifdef CONFIG_INET
				94	#include <net/inet_common.h>
				95	#endif
				96
				97	/*
				98	Assumptions:
				99	- if device has no dev->hard_header routine, it adds and removes ll header
				100	inside itself. In this case ll header is invisible outside of device,
				101	but higher levels still should reserve dev->hard_header_len.
				102	Some devices are enough clever to reallocate skb, when header
				103	will not fit to reserved space (tunnel), another ones are silly
				104	(PPP).
				105	- packet socket receives packets with pulled ll header,
				106	so that SOCK_RAW should push it back.
				107
				108	On receive:
				109	-----------
				110
				111	Incoming, dev->hard_header!=NULL
				112	mac_header -> ll header
				113	data -> data
				114
				115	Outgoing, dev->hard_header!=NULL
				116	mac_header -> ll header
				117	data -> ll header
				118
				119	Incoming, dev->hard_header==NULL
				120	mac_header -> UNKNOWN position. It is very likely, that it points to ll
				121	header. PPP makes it, that is wrong, because introduce
				122	assymetry between rx and tx paths.
				123	data -> data
				124
				125	Outgoing, dev->hard_header==NULL
				126	mac_header -> data. ll header is still not built!
				127	data -> data
				128
				129	Resume
				130	If dev->hard_header==NULL we are unlikely to restore sensible ll header.
				131
				132
				133	On transmit:
				134	------------
				135
				136	dev->hard_header != NULL
				137	mac_header -> ll header
				138	data -> ll header
				139
				140	dev->hard_header == NULL (ll header is added by device, we cannot control it)
				141	mac_header -> data
				142	data -> data
				143
				144	We should set nh.raw on output to correct posistion,
				145	packet classifier depends on it.
				146	*/
				147
				148	/* Private packet socket structures. */
				149
				150	struct packet_mclist {
				151	struct packet_mclist *next;
				152	int ifindex;
				153	int count;
				154	unsigned short type;
				155	unsigned short alen;
				156	unsigned char addr[MAX_ADDR_LEN];
				157	};
				158	/* identical to struct packet_mreq except it has
				159	* a longer address field.
				160	*/
				161	struct packet_mreq_max {
				162	int mr_ifindex;
				163	unsigned short mr_type;
				164	unsigned short mr_alen;
				165	unsigned char mr_address[MAX_ADDR_LEN];
				166	};
				167
				168	static int packet_set_ring(struct sock sk, union tpacket_req_u req_u,
				169	int closing, int tx_ring);
				170
				171
				172	#define V3_ALIGNMENT (8)
				173
				174	#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
				175
				176	#define BLK_PLUS_PRIV(sz_of_priv) \
				177	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
				178
				179	/* kbdq - kernel block descriptor queue */
				180	struct tpacket_kbdq_core {
				181	struct pgv *pkbdq;
				182	unsigned int feature_req_word;
				183	unsigned int hdrlen;
				184	unsigned char reset_pending_on_curr_blk;
				185	unsigned char delete_blk_timer;
				186	unsigned short kactive_blk_num;
				187	unsigned short blk_sizeof_priv;
				188
				189	/* last_kactive_blk_num:
				190	* trick to see if user-space has caught up
				191	* in order to avoid refreshing timer when every single pkt arrives.
				192	*/
				193	unsigned short last_kactive_blk_num;
				194
				195	char *pkblk_start;
				196	char *pkblk_end;
				197	int kblk_size;
				198	unsigned int knum_blocks;
				199	uint64_t knxt_seq_num;
				200	char *prev;
				201	char *nxt_offset;
				202	struct sk_buff *skb;
				203
				204	atomic_t blk_fill_in_prog;
				205
				206	/* Default is set to 8ms */
				207	#define DEFAULT_PRB_RETIRE_TOV (8)
				208
				209	unsigned short retire_blk_tov;
				210	unsigned short version;
				211	unsigned long tov_in_jiffies;
				212
				213	/* timer to retire an outstanding block */
				214	struct timer_list retire_blk_timer;
				215	};
				216
				217	#define PGV_FROM_VMALLOC 1
				218	struct pgv {
				219	char *buffer;
				220	};
				221
				222	struct packet_ring_buffer {
				223	struct pgv *pg_vec;
				224	unsigned int head;
				225	unsigned int frames_per_block;
				226	unsigned int frame_size;
				227	unsigned int frame_max;
				228
				229	unsigned int pg_vec_order;
				230	unsigned int pg_vec_pages;
				231	unsigned int pg_vec_len;
				232
				233	struct tpacket_kbdq_core prb_bdqc;
				234	atomic_t pending;
				235	};
				236
				237	#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
				238	#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
				239	#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
				240	#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
				241	#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
				242	#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
				243	#define BLOCK_PRIV(x) ((void )((char )(x) + BLOCK_O2PRIV(x)))
				244
				245	struct packet_sock;
				246	static int tpacket_snd(struct packet_sock po, struct msghdr msg);
				247
				248	static void packet_previous_frame(struct packet_sock po,
				249	struct packet_ring_buffer *rb,
				250	int status);
				251	static void packet_increment_head(struct packet_ring_buffer *buff);
				252	static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
				253	struct tpacket_block_desc *);
				254	static void prb_dispatch_next_block(struct tpacket_kbdq_core ,
				255	struct packet_sock *);
				256	static void prb_retire_current_block(struct tpacket_kbdq_core *,
				257	struct packet_sock *, unsigned int status);
				258	static int prb_queue_frozen(struct tpacket_kbdq_core *);
				259	static void prb_open_block(struct tpacket_kbdq_core *,
				260	struct tpacket_block_desc *);
				261	static void prb_retire_rx_blk_timer_expired(unsigned long);
				262	static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
				263	static void prb_init_blk_timer(struct packet_sock *,
				264	struct tpacket_kbdq_core *,
				265	void (*func) (unsigned long));
				266	static void prb_fill_rxhash(struct tpacket_kbdq_core , struct tpacket3_hdr );
				267	static void prb_clear_rxhash(struct tpacket_kbdq_core *,
				268	struct tpacket3_hdr *);
				269	static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
				270	struct tpacket3_hdr *);
				271	static void packet_flush_mclist(struct sock *sk);
				272
				273	struct packet_fanout;
				274	struct packet_sock {
				275	/* struct sock has to be the first member of packet_sock */
				276	struct sock sk;
				277	struct packet_fanout *fanout;
				278	struct tpacket_stats stats;
				279	union tpacket_stats_u stats_u;
				280	struct packet_ring_buffer rx_ring;
				281	struct packet_ring_buffer tx_ring;
				282	int copy_thresh;
				283	spinlock_t bind_lock;
				284	struct mutex pg_vec_lock;
				285	unsigned int running:1, /* prot_hook is attached*/
				286	auxdata:1,
				287	origdev:1,
				288	has_vnet_hdr:1;
				289	int ifindex; /* bound device */
				290	__be16 num;
				291	struct packet_mclist *mclist;
				292	atomic_t mapped;
				293	enum tpacket_versions tp_version;
				294	unsigned int tp_hdrlen;
				295	unsigned int tp_reserve;
				296	unsigned int tp_loss:1;
				297	unsigned int tp_tstamp;
				298	struct net_device __rcu *cached_dev;
				299	struct packet_type prot_hook ____cacheline_aligned_in_smp;
				300	};
				301
				302	#define PACKET_FANOUT_MAX 256
				303
				304	struct packet_fanout {
				305	#ifdef CONFIG_NET_NS
				306	struct net *net;
				307	#endif
				308	unsigned int num_members;
				309	u16 id;
				310	u8 type;
				311	u8 defrag;
				312	atomic_t rr_cur;
				313	struct list_head list;
				314	struct sock *arr[PACKET_FANOUT_MAX];
				315	spinlock_t lock;
				316	atomic_t sk_ref;
				317	struct packet_type prot_hook ____cacheline_aligned_in_smp;
				318	};
				319
				320	struct packet_skb_cb {
				321	unsigned int origlen;
				322	union {
				323	struct sockaddr_pkt pkt;
				324	struct sockaddr_ll ll;
				325	} sa;
				326	};
				327
				328	#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
				329
				330	#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
				331	#define GET_PBLOCK_DESC(x, bid) \
				332	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
				333	#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
				334	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
				335	#define GET_NEXT_PRB_BLK_NUM(x) \
				336	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
				337	((x)->kactive_blk_num+1) : 0)
				338
				339	static struct packet_sock pkt_sk(struct sock sk)
				340	{
				341	return (struct packet_sock *)sk;
				342	}
				343
				344	static void __fanout_unlink(struct sock sk, struct packet_sock po);
				345	static void __fanout_link(struct sock sk, struct packet_sock po);
				346
				347	/* register_prot_hook must be invoked with the po->bind_lock held,
				348	* or from a context in which asynchronous accesses to the packet
				349	* socket is not possible (packet_create()).
				350	*/
				351	static void register_prot_hook(struct sock *sk)
				352	{
				353	struct packet_sock *po = pkt_sk(sk);
				354
				355	if (!po->running) {
				356	if (po->fanout) {
				357	__fanout_link(sk, po);
				358	} else {
				359	dev_add_pack(&po->prot_hook);
				360	rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
				361	}
				362
				363	sock_hold(sk);
				364	po->running = 1;
				365	}
				366	}
				367
				368	/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
				369	* held. If the sync parameter is true, we will temporarily drop
				370	* the po->bind_lock and do a synchronize_net to make sure no
				371	* asynchronous packet processing paths still refer to the elements
				372	* of po->prot_hook. If the sync parameter is false, it is the
				373	* callers responsibility to take care of this.
				374	*/
				375	static void __unregister_prot_hook(struct sock *sk, bool sync)
				376	{
				377	struct packet_sock *po = pkt_sk(sk);
				378
				379	po->running = 0;
				380	if (po->fanout) {
				381	__fanout_unlink(sk, po);
				382	} else {
				383	__dev_remove_pack(&po->prot_hook);
				384	RCU_INIT_POINTER(po->cached_dev, NULL);
				385	}
				386
				387	__sock_put(sk);
				388
				389	if (sync) {
				390	spin_unlock(&po->bind_lock);
				391	synchronize_net();
				392	spin_lock(&po->bind_lock);
				393	}
				394	}
				395
				396	static void unregister_prot_hook(struct sock *sk, bool sync)
				397	{
				398	struct packet_sock *po = pkt_sk(sk);
				399
				400	if (po->running)
				401	__unregister_prot_hook(sk, sync);
				402	}
				403
				404	static inline __pure struct page pgv_to_page(void addr)
				405	{
				406	if (is_vmalloc_addr(addr))
				407	return vmalloc_to_page(addr);
				408	return virt_to_page(addr);
				409	}
				410
				411	static void __packet_set_status(struct packet_sock po, void frame, int status)
				412	{
				413	union {
				414	struct tpacket_hdr *h1;
				415	struct tpacket2_hdr *h2;
				416	void *raw;
				417	} h;
				418
				419	h.raw = frame;
				420	switch (po->tp_version) {
				421	case TPACKET_V1:
				422	h.h1->tp_status = status;
				423	flush_dcache_page(pgv_to_page(&h.h1->tp_status));
				424	break;
				425	case TPACKET_V2:
				426	h.h2->tp_status = status;
				427	flush_dcache_page(pgv_to_page(&h.h2->tp_status));
				428	break;
				429	case TPACKET_V3:
				430	default:
				431	WARN(1, "TPACKET version not supported.\n");
				432	BUG();
				433	}
				434
				435	smp_wmb();
				436	}
				437
				438	static int __packet_get_status(struct packet_sock po, void frame)
				439	{
				440	union {
				441	struct tpacket_hdr *h1;
				442	struct tpacket2_hdr *h2;
				443	void *raw;
				444	} h;
				445
				446	smp_rmb();
				447
				448	h.raw = frame;
				449	switch (po->tp_version) {
				450	case TPACKET_V1:
				451	flush_dcache_page(pgv_to_page(&h.h1->tp_status));
				452	return h.h1->tp_status;
				453	case TPACKET_V2:
				454	flush_dcache_page(pgv_to_page(&h.h2->tp_status));
				455	return h.h2->tp_status;
				456	case TPACKET_V3:
				457	default:
				458	WARN(1, "TPACKET version not supported.\n");
				459	BUG();
				460	return 0;
				461	}
				462	}
				463
				464	static void packet_lookup_frame(struct packet_sock po,
				465	struct packet_ring_buffer *rb,
				466	unsigned int position,
				467	int status)
				468	{
				469	unsigned int pg_vec_pos, frame_offset;
				470	union {
				471	struct tpacket_hdr *h1;
				472	struct tpacket2_hdr *h2;
				473	void *raw;
				474	} h;
				475
				476	pg_vec_pos = position / rb->frames_per_block;
				477	frame_offset = position % rb->frames_per_block;
				478
				479	h.raw = rb->pg_vec[pg_vec_pos].buffer +
				480	(frame_offset * rb->frame_size);
				481
				482	if (status != __packet_get_status(po, h.raw))
				483	return NULL;
				484
				485	return h.raw;
				486	}
				487
				488	static void packet_current_frame(struct packet_sock po,
				489	struct packet_ring_buffer *rb,
				490	int status)
				491	{
				492	return packet_lookup_frame(po, rb, rb->head, status);
				493	}
				494
				495	static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
				496	{
				497	del_timer_sync(&pkc->retire_blk_timer);
				498	}
				499
				500	static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
				501	int tx_ring,
				502	struct sk_buff_head *rb_queue)
				503	{
				504	struct tpacket_kbdq_core *pkc;
				505
				506	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
				507
				508	spin_lock_bh(&rb_queue->lock);
				509	pkc->delete_blk_timer = 1;
				510	spin_unlock_bh(&rb_queue->lock);
				511
				512	prb_del_retire_blk_timer(pkc);
				513	}
				514
				515	static void prb_init_blk_timer(struct packet_sock *po,
				516	struct tpacket_kbdq_core *pkc,
				517	void (*func) (unsigned long))
				518	{
				519	init_timer(&pkc->retire_blk_timer);
				520	pkc->retire_blk_timer.data = (long)po;
				521	pkc->retire_blk_timer.function = func;
				522	pkc->retire_blk_timer.expires = jiffies;
				523	}
				524
				525	static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
				526	{
				527	struct tpacket_kbdq_core *pkc;
				528
				529	if (tx_ring)
				530	BUG();
				531
				532	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
				533	prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
				534	}
				535
				536	static int prb_calc_retire_blk_tmo(struct packet_sock *po,
				537	int blk_size_in_bytes)
				538	{
				539	struct net_device *dev;
				540	unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
				541	struct ethtool_cmd ecmd;
				542	int err;
				543
				544	rtnl_lock();
				545	dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
				546	if (unlikely(!dev)) {
				547	rtnl_unlock();
				548	return DEFAULT_PRB_RETIRE_TOV;
				549	}
				550	err = __ethtool_get_settings(dev, &ecmd);
				551	rtnl_unlock();
				552	if (!err) {
				553	switch (ecmd.speed) {
				554	case SPEED_10000:
				555	msec = 1;
				556	div = 10000/1000;
				557	break;
				558	case SPEED_1000:
				559	msec = 1;
				560	div = 1000/1000;
				561	break;
				562	/*
				563	* If the link speed is so slow you don't really
				564	* need to worry about perf anyways
				565	*/
				566	case SPEED_100:
				567	case SPEED_10:
				568	default:
				569	return DEFAULT_PRB_RETIRE_TOV;
				570	}
				571	}
				572
				573	mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
				574
				575	if (div)
				576	mbits /= div;
				577
				578	tmo = mbits * msec;
				579
				580	if (div)
				581	return tmo+1;
				582	return tmo;
				583	}
				584
				585	static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
				586	union tpacket_req_u *req_u)
				587	{
				588	p1->feature_req_word = req_u->req3.tp_feature_req_word;
				589	}
				590
				591	static void init_prb_bdqc(struct packet_sock *po,
				592	struct packet_ring_buffer *rb,
				593	struct pgv *pg_vec,
				594	union tpacket_req_u *req_u, int tx_ring)
				595	{
				596	struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
				597	struct tpacket_block_desc *pbd;
				598
				599	memset(p1, 0x0, sizeof(*p1));
				600
				601	p1->knxt_seq_num = 1;
				602	p1->pkbdq = pg_vec;
				603	pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
				604	p1->pkblk_start = (char *)pg_vec[0].buffer;
				605	p1->kblk_size = req_u->req3.tp_block_size;
				606	p1->knum_blocks = req_u->req3.tp_block_nr;
				607	p1->hdrlen = po->tp_hdrlen;
				608	p1->version = po->tp_version;
				609	p1->last_kactive_blk_num = 0;
				610	po->stats_u.stats3.tp_freeze_q_cnt = 0;
				611	if (req_u->req3.tp_retire_blk_tov)
				612	p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
				613	else
				614	p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
				615	req_u->req3.tp_block_size);
				616	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
				617	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
				618
				619	prb_init_ft_ops(p1, req_u);
				620	prb_setup_retire_blk_timer(po, tx_ring);
				621	prb_open_block(p1, pbd);
				622	}
				623
				624	/* Do NOT update the last_blk_num first.
				625	* Assumes sk_buff_head lock is held.
				626	*/
				627	static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
				628	{
				629	mod_timer(&pkc->retire_blk_timer,
				630	jiffies + pkc->tov_in_jiffies);
				631	pkc->last_kactive_blk_num = pkc->kactive_blk_num;
				632	}
				633
				634	/*
				635	* Timer logic:
				636	* 1) We refresh the timer only when we open a block.
				637	* By doing this we don't waste cycles refreshing the timer
				638	* on packet-by-packet basis.
				639	*
				640	* With a 1MB block-size, on a 1Gbps line, it will take
				641	* i) ~8 ms to fill a block + ii) memcpy etc.
				642	* In this cut we are not accounting for the memcpy time.
				643	*
				644	* So, if the user sets the 'tmo' to 10ms then the timer
				645	* will never fire while the block is still getting filled
				646	* (which is what we want). However, the user could choose
				647	* to close a block early and that's fine.
				648	*
				649	* But when the timer does fire, we check whether or not to refresh it.
				650	* Since the tmo granularity is in msecs, it is not too expensive
				651	* to refresh the timer, lets say every '8' msecs.
				652	* Either the user can set the 'tmo' or we can derive it based on
				653	* a) line-speed and b) block-size.
				654	* prb_calc_retire_blk_tmo() calculates the tmo.
				655	*
				656	*/
				657	static void prb_retire_rx_blk_timer_expired(unsigned long data)
				658	{
				659	struct packet_sock po = (struct packet_sock )data;
				660	struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
				661	unsigned int frozen;
				662	struct tpacket_block_desc *pbd;
				663
				664	spin_lock(&po->sk.sk_receive_queue.lock);
				665
				666	frozen = prb_queue_frozen(pkc);
				667	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
				668
				669	if (unlikely(pkc->delete_blk_timer))
				670	goto out;
				671
				672	/* We only need to plug the race when the block is partially filled.
				673	* tpacket_rcv:
				674	* lock(); increment BLOCK_NUM_PKTS; unlock()
				675	* copy_bits() is in progress ...
				676	* timer fires on other cpu:
				677	* we can't retire the current block because copy_bits
				678	* is in progress.
				679	*
				680	*/
				681	if (BLOCK_NUM_PKTS(pbd)) {
				682	while (atomic_read(&pkc->blk_fill_in_prog)) {
				683	/* Waiting for skb_copy_bits to finish... */
				684	cpu_chill();
				685	}
				686	}
				687
				688	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
				689	if (!frozen) {
				690	prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
				691	if (!prb_dispatch_next_block(pkc, po))
				692	goto refresh_timer;
				693	else
				694	goto out;
				695	} else {
				696	/* Case 1. Queue was frozen because user-space was
				697	* lagging behind.
				698	*/
				699	if (prb_curr_blk_in_use(pkc, pbd)) {
				700	/*
				701	* Ok, user-space is still behind.
				702	* So just refresh the timer.
				703	*/
				704	goto refresh_timer;
				705	} else {
				706	/* Case 2. queue was frozen,user-space caught up,
				707	* now the link went idle && the timer fired.
				708	* We don't have a block to close.So we open this
				709	* block and restart the timer.
				710	* opening a block thaws the queue,restarts timer
				711	* Thawing/timer-refresh is a side effect.
				712	*/
				713	prb_open_block(pkc, pbd);
				714	goto out;
				715	}
				716	}
				717	}
				718
				719	refresh_timer:
				720	_prb_refresh_rx_retire_blk_timer(pkc);
				721
				722	out:
				723	spin_unlock(&po->sk.sk_receive_queue.lock);
				724	}
				725
				726	static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
				727	struct tpacket_block_desc *pbd1, __u32 status)
				728	{
				729	/* Flush everything minus the block header */
				730
				731	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
				732	u8 start, end;
				733
				734	start = (u8 *)pbd1;
				735
				736	/* Skip the block header(we know header WILL fit in 4K) */
				737	start += PAGE_SIZE;
				738
				739	end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
				740	for (; start < end; start += PAGE_SIZE)
				741	flush_dcache_page(pgv_to_page(start));
				742
				743	smp_wmb();
				744	#endif
				745
				746	/* Now update the block status. */
				747
				748	BLOCK_STATUS(pbd1) = status;
				749
				750	/* Flush the block header */
				751
				752	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
				753	start = (u8 *)pbd1;
				754	flush_dcache_page(pgv_to_page(start));
				755
				756	smp_wmb();
				757	#endif
				758	}
				759
				760	/*
				761	* Side effect:
				762	*
				763	* 1) flush the block
				764	* 2) Increment active_blk_num
				765	*
				766	* Note:We DONT refresh the timer on purpose.
				767	* Because almost always the next block will be opened.
				768	*/
				769	static void prb_close_block(struct tpacket_kbdq_core *pkc1,
				770	struct tpacket_block_desc *pbd1,
				771	struct packet_sock *po, unsigned int stat)
				772	{
				773	__u32 status = TP_STATUS_USER \| stat;
				774
				775	struct tpacket3_hdr *last_pkt;
				776	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
				777
				778	if (po->stats.tp_drops)
				779	status \|= TP_STATUS_LOSING;
				780
				781	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
				782	last_pkt->tp_next_offset = 0;
				783
				784	/* Get the ts of the last pkt */
				785	if (BLOCK_NUM_PKTS(pbd1)) {
				786	h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
				787	h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
				788	} else {
				789	/* Ok, we tmo'd - so get the current time */
				790	struct timespec ts;
				791	getnstimeofday(&ts);
				792	h1->ts_last_pkt.ts_sec = ts.tv_sec;
				793	h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
				794	}
				795
				796	smp_wmb();
				797
				798	/* Flush the block */
				799	prb_flush_block(pkc1, pbd1, status);
				800
				801	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
				802	}
				803
				804	static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
				805	{
				806	pkc->reset_pending_on_curr_blk = 0;
				807	}
				808
				809	/*
				810	* Side effect of opening a block:
				811	*
				812	* 1) prb_queue is thawed.
				813	* 2) retire_blk_timer is refreshed.
				814	*
				815	*/
				816	static void prb_open_block(struct tpacket_kbdq_core *pkc1,
				817	struct tpacket_block_desc *pbd1)
				818	{
				819	struct timespec ts;
				820	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
				821
				822	smp_rmb();
				823
				824	/* We could have just memset this but we will lose the
				825	* flexibility of making the priv area sticky
				826	*/
				827	BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
				828	BLOCK_NUM_PKTS(pbd1) = 0;
				829	BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
				830	getnstimeofday(&ts);
				831	h1->ts_first_pkt.ts_sec = ts.tv_sec;
				832	h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
				833	pkc1->pkblk_start = (char *)pbd1;
				834	pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
				835	BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
				836	BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
				837	BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
				838	pbd1->version = pkc1->version;
				839	pkc1->prev = pkc1->nxt_offset;
				840	pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
				841	prb_thaw_queue(pkc1);
				842	_prb_refresh_rx_retire_blk_timer(pkc1);
				843
				844	smp_wmb();
				845	}
				846
				847	/*
				848	* Queue freeze logic:
				849	* 1) Assume tp_block_nr = 8 blocks.
				850	* 2) At time 't0', user opens Rx ring.
				851	* 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
				852	* 4) user-space is either sleeping or processing block '0'.
				853	* 5) tpacket_rcv is currently filling block '7', since there is no space left,
				854	* it will close block-7,loop around and try to fill block '0'.
				855	* call-flow:
				856	* __packet_lookup_frame_in_block
				857	* prb_retire_current_block()
				858	* prb_dispatch_next_block()
				859	* \|->(BLOCK_STATUS == USER) evaluates to true
				860	* 5.1) Since block-0 is currently in-use, we just freeze the queue.
				861	* 6) Now there are two cases:
				862	* 6.1) Link goes idle right after the queue is frozen.
				863	* But remember, the last open_block() refreshed the timer.
				864	* When this timer expires,it will refresh itself so that we can
				865	* re-open block-0 in near future.
				866	* 6.2) Link is busy and keeps on receiving packets. This is a simple
				867	* case and __packet_lookup_frame_in_block will check if block-0
				868	* is free and can now be re-used.
				869	*/
				870	static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
				871	struct packet_sock *po)
				872	{
				873	pkc->reset_pending_on_curr_blk = 1;
				874	po->stats_u.stats3.tp_freeze_q_cnt++;
				875	}
				876
				877	#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
				878
				879	/*
				880	* If the next block is free then we will dispatch it
				881	* and return a good offset.
				882	* Else, we will freeze the queue.
				883	* So, caller must check the return value.
				884	*/
				885	static void prb_dispatch_next_block(struct tpacket_kbdq_core pkc,
				886	struct packet_sock *po)
				887	{
				888	struct tpacket_block_desc *pbd;
				889
				890	smp_rmb();
				891
				892	/* 1. Get current block num */
				893	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
				894
				895	/* 2. If this block is currently in_use then freeze the queue */
				896	if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
				897	prb_freeze_queue(pkc, po);
				898	return NULL;
				899	}
				900
				901	/*
				902	* 3.
				903	* open this block and return the offset where the first packet
				904	* needs to get stored.
				905	*/
				906	prb_open_block(pkc, pbd);
				907	return (void *)pkc->nxt_offset;
				908	}
				909
				910	static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
				911	struct packet_sock *po, unsigned int status)
				912	{
				913	struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
				914
				915	/* retire/close the current block */
				916	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
				917	/*
				918	* Plug the case where copy_bits() is in progress on
				919	* cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
				920	* have space to copy the pkt in the current block and
				921	* called prb_retire_current_block()
				922	*
				923	* We don't need to worry about the TMO case because
				924	* the timer-handler already handled this case.
				925	*/
				926	if (!(status & TP_STATUS_BLK_TMO)) {
				927	while (atomic_read(&pkc->blk_fill_in_prog)) {
				928	/* Waiting for skb_copy_bits to finish... */
				929	cpu_chill();
				930	}
				931	}
				932	prb_close_block(pkc, pbd, po, status);
				933	return;
				934	}
				935	}
				936
				937	static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
				938	struct tpacket_block_desc *pbd)
				939	{
				940	return TP_STATUS_USER & BLOCK_STATUS(pbd);
				941	}
				942
				943	static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
				944	{
				945	return pkc->reset_pending_on_curr_blk;
				946	}
				947
				948	static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
				949	{
				950	struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
				951	atomic_dec(&pkc->blk_fill_in_prog);
				952	}
				953
				954	static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
				955	struct tpacket3_hdr *ppd)
				956	{
				957	ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
				958	}
				959
				960	static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
				961	struct tpacket3_hdr *ppd)
				962	{
				963	ppd->hv1.tp_rxhash = 0;
				964	}
				965
				966	static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
				967	struct tpacket3_hdr *ppd)
				968	{
				969	if (vlan_tx_tag_present(pkc->skb)) {
				970	ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
				971	ppd->tp_status = TP_STATUS_VLAN_VALID;
				972	} else {
				973	ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
				974	}
				975	}
				976
				977	static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
				978	struct tpacket3_hdr *ppd)
				979	{
				980	prb_fill_vlan_info(pkc, ppd);
				981
				982	if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
				983	prb_fill_rxhash(pkc, ppd);
				984	else
				985	prb_clear_rxhash(pkc, ppd);
				986	}
				987
				988	static void prb_fill_curr_block(char *curr,
				989	struct tpacket_kbdq_core *pkc,
				990	struct tpacket_block_desc *pbd,
				991	unsigned int len)
				992	{
				993	struct tpacket3_hdr *ppd;
				994
				995	ppd = (struct tpacket3_hdr *)curr;
				996	ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
				997	pkc->prev = curr;
				998	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
				999	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
				1000	BLOCK_NUM_PKTS(pbd) += 1;
				1001	atomic_inc(&pkc->blk_fill_in_prog);
				1002	prb_run_all_ft_ops(pkc, ppd);
				1003	}
				1004
				1005	/* Assumes caller has the sk->rx_queue.lock */
				1006	static void __packet_lookup_frame_in_block(struct packet_sock po,
				1007	struct sk_buff *skb,
				1008	int status,
				1009	unsigned int len
				1010	)
				1011	{
				1012	struct tpacket_kbdq_core *pkc;
				1013	struct tpacket_block_desc *pbd;
				1014	char curr, end;
				1015
				1016	pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring));
				1017	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
				1018
				1019	/* Queue is frozen when user space is lagging behind */
				1020	if (prb_queue_frozen(pkc)) {
				1021	/*
				1022	* Check if that last block which caused the queue to freeze,
				1023	* is still in_use by user-space.
				1024	*/
				1025	if (prb_curr_blk_in_use(pkc, pbd)) {
				1026	/* Can't record this packet */
				1027	return NULL;
				1028	} else {
				1029	/*
				1030	* Ok, the block was released by user-space.
				1031	* Now let's open that block.
				1032	* opening a block also thaws the queue.
				1033	* Thawing is a side effect.
				1034	*/
				1035	prb_open_block(pkc, pbd);
				1036	}
				1037	}
				1038
				1039	smp_mb();
				1040	curr = pkc->nxt_offset;
				1041	pkc->skb = skb;
				1042	end = (char ) ((char )pbd + pkc->kblk_size);
				1043
				1044	/* first try the current block */
				1045	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
				1046	prb_fill_curr_block(curr, pkc, pbd, len);
				1047	return (void *)curr;
				1048	}
				1049
				1050	/* Ok, close the current block */
				1051	prb_retire_current_block(pkc, po, 0);
				1052
				1053	/* Now, try to dispatch the next block */
				1054	curr = (char *)prb_dispatch_next_block(pkc, po);
				1055	if (curr) {
				1056	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
				1057	prb_fill_curr_block(curr, pkc, pbd, len);
				1058	return (void *)curr;
				1059	}
				1060
				1061	/*
				1062	* No free blocks are available.user_space hasn't caught up yet.
				1063	* Queue was just frozen and now this packet will get dropped.
				1064	*/
				1065	return NULL;
				1066	}
				1067
				1068	static void packet_current_rx_frame(struct packet_sock po,
				1069	struct sk_buff *skb,
				1070	int status, unsigned int len)
				1071	{
				1072	char *curr = NULL;
				1073	switch (po->tp_version) {
				1074	case TPACKET_V1:
				1075	case TPACKET_V2:
				1076	curr = packet_lookup_frame(po, &po->rx_ring,
				1077	po->rx_ring.head, status);
				1078	return curr;
				1079	case TPACKET_V3:
				1080	return __packet_lookup_frame_in_block(po, skb, status, len);
				1081	default:
				1082	WARN(1, "TPACKET version not supported\n");
				1083	BUG();
				1084	return 0;
				1085	}
				1086	}
				1087
				1088	static void prb_lookup_block(struct packet_sock po,
				1089	struct packet_ring_buffer *rb,
				1090	unsigned int previous,
				1091	int status)
				1092	{
				1093	struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
				1094	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
				1095
				1096	if (status != BLOCK_STATUS(pbd))
				1097	return NULL;
				1098	return pbd;
				1099	}
				1100
				1101	static int prb_previous_blk_num(struct packet_ring_buffer *rb)
				1102	{
				1103	unsigned int prev;
				1104	if (rb->prb_bdqc.kactive_blk_num)
				1105	prev = rb->prb_bdqc.kactive_blk_num-1;
				1106	else
				1107	prev = rb->prb_bdqc.knum_blocks-1;
				1108	return prev;
				1109	}
				1110
				1111	/* Assumes caller has held the rx_queue.lock */
				1112	static void __prb_previous_block(struct packet_sock po,
				1113	struct packet_ring_buffer *rb,
				1114	int status)
				1115	{
				1116	unsigned int previous = prb_previous_blk_num(rb);
				1117	return prb_lookup_block(po, rb, previous, status);
				1118	}
				1119
				1120	static void packet_previous_rx_frame(struct packet_sock po,
				1121	struct packet_ring_buffer *rb,
				1122	int status)
				1123	{
				1124	if (po->tp_version <= TPACKET_V2)
				1125	return packet_previous_frame(po, rb, status);
				1126
				1127	return __prb_previous_block(po, rb, status);
				1128	}
				1129
				1130	static void packet_increment_rx_head(struct packet_sock *po,
				1131	struct packet_ring_buffer *rb)
				1132	{
				1133	switch (po->tp_version) {
				1134	case TPACKET_V1:
				1135	case TPACKET_V2:
				1136	return packet_increment_head(rb);
				1137	case TPACKET_V3:
				1138	default:
				1139	WARN(1, "TPACKET version not supported.\n");
				1140	BUG();
				1141	return;
				1142	}
				1143	}
				1144
				1145	static void packet_previous_frame(struct packet_sock po,
				1146	struct packet_ring_buffer *rb,
				1147	int status)
				1148	{
				1149	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
				1150	return packet_lookup_frame(po, rb, previous, status);
				1151	}
				1152
				1153	static void packet_increment_head(struct packet_ring_buffer *buff)
				1154	{
				1155	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
				1156	}
				1157
				1158	static void packet_sock_destruct(struct sock *sk)
				1159	{
				1160	skb_queue_purge(&sk->sk_error_queue);
				1161
				1162	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
				1163	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
				1164
				1165	if (!sock_flag(sk, SOCK_DEAD)) {
				1166	pr_err("Attempt to release alive packet socket: %p\n", sk);
				1167	return;
				1168	}
				1169
				1170	sk_refcnt_debug_dec(sk);
				1171	}
				1172
				1173	static struct sock fanout_demux_hash(struct packet_fanout f, struct sk_buff *skb, unsigned int num)
				1174	{
				1175	u32 idx, hash = skb->rxhash;
				1176
				1177	idx = ((u64)hash * num) >> 32;
				1178
				1179	return f->arr[idx];
				1180	}
				1181
				1182	static struct sock fanout_demux_lb(struct packet_fanout f, struct sk_buff *skb, unsigned int num)
				1183	{
				1184	unsigned int val = atomic_inc_return(&f->rr_cur);
				1185
				1186	return f->arr[val % num];
				1187	}
				1188
				1189	static struct sock fanout_demux_cpu(struct packet_fanout f, struct sk_buff *skb, unsigned int num)
				1190	{
				1191	unsigned int cpu = smp_processor_id();
				1192
				1193	return f->arr[cpu % num];
				1194	}
				1195
				1196	static int packet_rcv_fanout(struct sk_buff skb, struct net_device dev,
				1197	struct packet_type pt, struct net_device orig_dev)
				1198	{
				1199	struct packet_fanout *f = pt->af_packet_priv;
				1200	unsigned int num = ACCESS_ONCE(f->num_members);
				1201	struct packet_sock *po;
				1202	struct sock *sk;
				1203
				1204	if (!net_eq(dev_net(dev), read_pnet(&f->net)) \|\|
				1205	!num) {
				1206	kfree_skb(skb);
				1207	return 0;
				1208	}
				1209
				1210	switch (f->type) {
				1211	case PACKET_FANOUT_HASH:
				1212	default:
				1213	if (f->defrag) {
				1214	skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
				1215	if (!skb)
				1216	return 0;
				1217	}
				1218	skb_get_rxhash(skb);
				1219	sk = fanout_demux_hash(f, skb, num);
				1220	break;
				1221	case PACKET_FANOUT_LB:
				1222	sk = fanout_demux_lb(f, skb, num);
				1223	break;
				1224	case PACKET_FANOUT_CPU:
				1225	sk = fanout_demux_cpu(f, skb, num);
				1226	break;
				1227	}
				1228
				1229	po = pkt_sk(sk);
				1230
				1231	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
				1232	}
				1233
				1234	static DEFINE_MUTEX(fanout_mutex);
				1235	static LIST_HEAD(fanout_list);
				1236
				1237	static void __fanout_link(struct sock sk, struct packet_sock po)
				1238	{
				1239	struct packet_fanout *f = po->fanout;
				1240
				1241	spin_lock(&f->lock);
				1242	f->arr[f->num_members] = sk;
				1243	smp_wmb();
				1244	f->num_members++;
				1245	spin_unlock(&f->lock);
				1246	}
				1247
				1248	static void __fanout_unlink(struct sock sk, struct packet_sock po)
				1249	{
				1250	struct packet_fanout *f = po->fanout;
				1251	int i;
				1252
				1253	spin_lock(&f->lock);
				1254	for (i = 0; i < f->num_members; i++) {
				1255	if (f->arr[i] == sk)
				1256	break;
				1257	}
				1258	BUG_ON(i >= f->num_members);
				1259	f->arr[i] = f->arr[f->num_members - 1];
				1260	f->num_members--;
				1261	spin_unlock(&f->lock);
				1262	}
				1263
				1264	bool match_fanout_group(struct packet_type ptype, struct sock sk)
				1265	{
				1266	if (ptype->af_packet_priv == (void)((struct packet_sock )sk)->fanout)
				1267	return true;
				1268
				1269	return false;
				1270	}
				1271
				1272	static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
				1273	{
				1274	struct packet_sock *po = pkt_sk(sk);
				1275	struct packet_fanout f, match;
				1276	u8 type = type_flags & 0xff;
				1277	u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
				1278	int err;
				1279
				1280	switch (type) {
				1281	case PACKET_FANOUT_HASH:
				1282	case PACKET_FANOUT_LB:
				1283	case PACKET_FANOUT_CPU:
				1284	break;
				1285	default:
				1286	return -EINVAL;
				1287	}
				1288	mutex_lock(&fanout_mutex);//CVE-2017-15649
				1289	//if (!po->running)
				1290	//return -EINVAL;
				1291
				1292	err = -EALREADY;
				1293	if (po->fanout)
				1294	goto out;
				1295	//return -EALREADY;
				1296
				1297	//mutex_lock(&fanout_mutex);//CVE-2017-15649
				1298	match = NULL;
				1299	list_for_each_entry(f, &fanout_list, list) {
				1300	if (f->id == id &&
				1301	read_pnet(&f->net) == sock_net(sk)) {
				1302	match = f;
				1303	break;
				1304	}
				1305	}
				1306	err = -EINVAL;
				1307	if (match && match->defrag != defrag)
				1308	goto out;
				1309	if (!match) {
				1310	err = -ENOMEM;
				1311	match = kzalloc(sizeof(*match), GFP_KERNEL);
				1312	if (!match)
				1313	goto out;
				1314	write_pnet(&match->net, sock_net(sk));
				1315	match->id = id;
				1316	match->type = type;
				1317	match->defrag = defrag;
				1318	atomic_set(&match->rr_cur, 0);
				1319	INIT_LIST_HEAD(&match->list);
				1320	spin_lock_init(&match->lock);
				1321	atomic_set(&match->sk_ref, 0);
				1322	match->prot_hook.type = po->prot_hook.type;
				1323	match->prot_hook.dev = po->prot_hook.dev;
				1324	match->prot_hook.func = packet_rcv_fanout;
				1325	match->prot_hook.af_packet_priv = match;
				1326	match->prot_hook.id_match = match_fanout_group;
				1327	dev_add_pack(&match->prot_hook);
				1328	list_add(&match->list, &fanout_list);
				1329	}
				1330	err = -EINVAL;
				1331	spin_lock(&po->bind_lock);//CVE-2017-15649
				1332	if (po->running &&
				1333	match->type == type &&
				1334	//if (match->type == type &&
				1335	match->prot_hook.type == po->prot_hook.type &&
				1336	match->prot_hook.dev == po->prot_hook.dev) {
				1337	err = -ENOSPC;
				1338	if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
				1339	__dev_remove_pack(&po->prot_hook);
				1340	po->fanout = match;
				1341	atomic_inc(&match->sk_ref);
				1342	__fanout_link(sk, po);
				1343	err = 0;
				1344	}
				1345	}
				1346	spin_unlock(&po->bind_lock);//CVE-2017-15649
				1347
				1348	if (err && !atomic_read(&match->sk_ref)) {
				1349	list_del(&match->list);
				1350	kfree(match);
				1351	}//CVE-2017-15649
				1352
				1353	out:
				1354	mutex_unlock(&fanout_mutex);
				1355	return err;
				1356	}
				1357
				1358	static void fanout_release(struct sock *sk)
				1359	{
				1360	struct packet_sock *po = pkt_sk(sk);
				1361	struct packet_fanout *f;
				1362
				1363	f = po->fanout;
				1364	if (!f)
				1365	return;
				1366
				1367	po->fanout = NULL;
				1368
				1369	mutex_lock(&fanout_mutex);
				1370	if (atomic_dec_and_test(&f->sk_ref)) {
				1371	list_del(&f->list);
				1372	dev_remove_pack(&f->prot_hook);
				1373	kfree(f);
				1374	}
				1375	mutex_unlock(&fanout_mutex);
				1376	}
				1377
				1378	static const struct proto_ops packet_ops;
				1379
				1380	static const struct proto_ops packet_ops_spkt;
				1381
				1382	static int packet_rcv_spkt(struct sk_buff skb, struct net_device dev,
				1383	struct packet_type pt, struct net_device orig_dev)
				1384	{
				1385	struct sock *sk;
				1386	struct sockaddr_pkt *spkt;
				1387
				1388	/*
				1389	* When we registered the protocol we saved the socket in the data
				1390	* field for just this event.
				1391	*/
				1392
				1393	sk = pt->af_packet_priv;
				1394
				1395	/*
				1396	* Yank back the headers [hope the device set this
				1397	* right or kerboom...]
				1398	*
				1399	* Incoming packets have ll header pulled,
				1400	* push it back.
				1401	*
				1402	* For outgoing ones skb->data == skb_mac_header(skb)
				1403	* so that this procedure is noop.
				1404	*/
				1405
				1406	if (skb->pkt_type == PACKET_LOOPBACK)
				1407	goto out;
				1408
				1409	if (!net_eq(dev_net(dev), sock_net(sk)))
				1410	goto out;
				1411
				1412	skb = skb_share_check(skb, GFP_ATOMIC);
				1413	if (skb == NULL)
				1414	goto oom;
				1415
				1416	/* drop any routing info */
				1417	skb_dst_drop(skb);
				1418
				1419	/* drop conntrack reference */
				1420	nf_reset(skb);
				1421
				1422	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
				1423
				1424	skb_push(skb, skb->data - skb_mac_header(skb));
				1425
				1426	/*
				1427	* The SOCK_PACKET socket receives _all_ frames.
				1428	*/
				1429
				1430	spkt->spkt_family = dev->type;
				1431	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
				1432	spkt->spkt_protocol = skb->protocol;
				1433
				1434	/*
				1435	* Charge the memory to the socket. This is done specifically
				1436	* to prevent sockets using all the memory up.
				1437	*/
				1438
				1439	if (sock_queue_rcv_skb(sk, skb) == 0)
				1440	return 0;
				1441
				1442	out:
				1443	kfree_skb(skb);
				1444	oom:
				1445	return 0;
				1446	}
				1447
				1448
				1449	/*
				1450	* Output a raw packet to a device layer. This bypasses all the other
				1451	* protocol layers and you must therefore supply it with a complete frame
				1452	*/
				1453
				1454	static int packet_sendmsg_spkt(struct kiocb iocb, struct socket sock,
				1455	struct msghdr *msg, size_t len)
				1456	{
				1457	struct sock *sk = sock->sk;
				1458	struct sockaddr_pkt saddr = (struct sockaddr_pkt )msg->msg_name;
				1459	struct sk_buff *skb = NULL;
				1460	struct net_device *dev;
				1461	__be16 proto = 0;
				1462	int err;
				1463	int extra_len = 0;
				1464
				1465	/*
				1466	* Get and verify the address.
				1467	*/
				1468
				1469	if (saddr) {
				1470	if (msg->msg_namelen < sizeof(struct sockaddr))
				1471	return -EINVAL;
				1472	if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
				1473	proto = saddr->spkt_protocol;
				1474	} else
				1475	return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
				1476
				1477	/*
				1478	* Find the device first to size check it
				1479	*/
				1480
				1481	saddr->spkt_device[13] = 0;
				1482	retry:
				1483	rcu_read_lock();
				1484	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
				1485	err = -ENODEV;
				1486	if (dev == NULL)
				1487	goto out_unlock;
				1488
				1489	err = -ENETDOWN;
				1490	if (!(dev->flags & IFF_UP))
				1491	goto out_unlock;
				1492
				1493	/*
				1494	* You may not queue a frame bigger than the mtu. This is the lowest level
				1495	* raw protocol and you must do your own fragmentation at this level.
				1496	*/
				1497
				1498	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
				1499	if (!netif_supports_nofcs(dev)) {
				1500	err = -EPROTONOSUPPORT;
				1501	goto out_unlock;
				1502	}
				1503	extra_len = 4; /* We're doing our own CRC */
				1504	}
				1505
				1506	err = -EMSGSIZE;
				1507	if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
				1508	goto out_unlock;
				1509
				1510	if (!skb) {
				1511	size_t reserved = LL_RESERVED_SPACE(dev);
				1512	int tlen = dev->needed_tailroom;
				1513	unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
				1514
				1515	rcu_read_unlock();
				1516	skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
				1517	if (skb == NULL)
				1518	return -ENOBUFS;
				1519	/* FIXME: Save some space for broken drivers that write a hard
				1520	* header at transmission time by themselves. PPP is the notable
				1521	* one here. This should really be fixed at the driver level.
				1522	*/
				1523	skb_reserve(skb, reserved);
				1524	skb_reset_network_header(skb);
				1525
				1526	/* Try to align data part correctly */
				1527	if (hhlen) {
				1528	skb->data -= hhlen;
				1529	skb->tail -= hhlen;
				1530	if (len < hhlen)
				1531	skb_reset_network_header(skb);
				1532	}
				1533	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
				1534	if (err)
				1535	goto out_free;
				1536	goto retry;
				1537	}
				1538
				1539	if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
				1540	/* Earlier code assumed this would be a VLAN pkt,
				1541	* double-check this now that we have the actual
				1542	* packet in hand.
				1543	*/
				1544	struct ethhdr *ehdr;
				1545	skb_reset_mac_header(skb);
				1546	ehdr = eth_hdr(skb);
				1547	if (ehdr->h_proto != htons(ETH_P_8021Q)) {
				1548	err = -EMSGSIZE;
				1549	goto out_unlock;
				1550	}
				1551	}
				1552
				1553	skb->protocol = proto;
				1554	skb->dev = dev;
				1555	skb->priority = sk->sk_priority;
				1556	skb->mark = sk->sk_mark;
				1557	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
				1558	if (err < 0)
				1559	goto out_unlock;
				1560
				1561	if (unlikely(extra_len == 4))
				1562	skb->no_fcs = 1;
				1563
				1564	dev_queue_xmit(skb);
				1565	rcu_read_unlock();
				1566	return len;
				1567
				1568	out_unlock:
				1569	rcu_read_unlock();
				1570	out_free:
				1571	kfree_skb(skb);
				1572	return err;
				1573	}
				1574
				1575	static unsigned int run_filter(const struct sk_buff *skb,
				1576	const struct sock *sk,
				1577	unsigned int res)
				1578	{
				1579	struct sk_filter *filter;
				1580
				1581	rcu_read_lock();
				1582	filter = rcu_dereference(sk->sk_filter);
				1583	if (filter != NULL)
				1584	res = SK_RUN_FILTER(filter, skb);
				1585	rcu_read_unlock();
				1586	net_run_track(PRT_TCPDUMP,"tcpdump");
				1587	return res;
				1588	}
				1589
				1590	/*
				1591	* This function makes lazy skb cloning in hope that most of packets
				1592	* are discarded by BPF.
				1593	*
				1594	* Note tricky part: we DO mangle shared skb! skb->data, skb->len
				1595	* and skb->cb are mangled. It works because (and until) packets
				1596	* falling here are owned by current CPU. Output packets are cloned
				1597	* by dev_queue_xmit_nit(), input packets are processed by net_bh
				1598	* sequencially, so that if we return skb to original state on exit,
				1599	* we will not harm anyone.
				1600	*/
				1601
				1602	static int packet_rcv(struct sk_buff skb, struct net_device dev,
				1603	struct packet_type pt, struct net_device orig_dev)
				1604	{
				1605	struct sock *sk;
				1606	struct sockaddr_ll *sll;
				1607	struct packet_sock *po;
				1608	u8 *skb_head = skb->data;
				1609	int skb_len = skb->len;
				1610	unsigned int snaplen, res;
				1611
				1612	if (skb->pkt_type == PACKET_LOOPBACK)
				1613	goto drop;
				1614
				1615	sk = pt->af_packet_priv;
				1616	po = pkt_sk(sk);
				1617
				1618	if (!net_eq(dev_net(dev), sock_net(sk)))
				1619	goto drop;
				1620
				1621	skb->dev = dev;
				1622
				1623	if (dev->header_ops) {
				1624	/* The device has an explicit notion of ll header,
				1625	* exported to higher levels.
				1626	*
				1627	* Otherwise, the device hides details of its frame
				1628	* structure, so that corresponding packet head is
				1629	* never delivered to user.
				1630	*/
				1631	if (sk->sk_type != SOCK_DGRAM)
				1632	skb_push(skb, skb->data - skb_mac_header(skb));
				1633	else if (skb->pkt_type == PACKET_OUTGOING) {
				1634	/* Special case: outgoing packets have ll header at head */
				1635	skb_pull(skb, skb_network_offset(skb));
				1636	}
				1637	}
				1638
				1639	snaplen = skb->len;
				1640
				1641	res = run_filter(skb, sk, snaplen);
				1642	if (!res)
				1643	goto drop_n_restore;
				1644	if (snaplen > res)
				1645	snaplen = res;
				1646
				1647	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
				1648	goto drop_n_acct;
				1649
				1650	if (skb_shared(skb)) {
				1651	struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
				1652	if (nskb == NULL)
				1653	goto drop_n_acct;
				1654
				1655	if (skb_head != skb->data) {
				1656	skb->data = skb_head;
				1657	skb->len = skb_len;
				1658	}
				1659	kfree_skb(skb);
				1660	skb = nskb;
				1661	}
				1662
				1663	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
				1664	sizeof(skb->cb));
				1665
				1666	sll = &PACKET_SKB_CB(skb)->sa.ll;
				1667	sll->sll_family = AF_PACKET;
				1668	sll->sll_hatype = dev->type;
				1669	sll->sll_protocol = skb->protocol;
				1670	sll->sll_pkttype = skb->pkt_type;
				1671	if (unlikely(po->origdev))
				1672	sll->sll_ifindex = orig_dev->ifindex;
				1673	else
				1674	sll->sll_ifindex = dev->ifindex;
				1675
				1676	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
				1677
				1678	PACKET_SKB_CB(skb)->origlen = skb->len;
				1679
				1680	if (pskb_trim(skb, snaplen))
				1681	goto drop_n_acct;
				1682
				1683	skb_set_owner_r(skb, sk);
				1684	skb->dev = NULL;
				1685	skb_dst_drop(skb);
				1686
				1687	/* drop conntrack reference */
				1688	nf_reset(skb);
				1689
				1690	spin_lock(&sk->sk_receive_queue.lock);
				1691	po->stats.tp_packets++;
				1692	skb->dropcount = atomic_read(&sk->sk_drops);
				1693	__skb_queue_tail(&sk->sk_receive_queue, skb);
				1694	spin_unlock(&sk->sk_receive_queue.lock);
				1695	sk->sk_data_ready(sk, skb->len);
				1696	return 0;
				1697
				1698	drop_n_acct:
				1699	spin_lock(&sk->sk_receive_queue.lock);
				1700	po->stats.tp_drops++;
				1701	atomic_inc(&sk->sk_drops);
				1702	spin_unlock(&sk->sk_receive_queue.lock);
				1703
				1704	drop_n_restore:
				1705	if (skb_head != skb->data && skb_shared(skb)) {
				1706	skb->data = skb_head;
				1707	skb->len = skb_len;
				1708	}
				1709	drop:
				1710	consume_skb(skb);
				1711	return 0;
				1712	}
				1713
				1714	static int tpacket_rcv(struct sk_buff skb, struct net_device dev,
				1715	struct packet_type pt, struct net_device orig_dev)
				1716	{
				1717	struct sock *sk;
				1718	struct packet_sock *po;
				1719	struct sockaddr_ll *sll;
				1720	union {
				1721	struct tpacket_hdr *h1;
				1722	struct tpacket2_hdr *h2;
				1723	struct tpacket3_hdr *h3;
				1724	void *raw;
				1725	} h;
				1726	u8 *skb_head = skb->data;
				1727	int skb_len = skb->len;
				1728	unsigned int snaplen, res;
				1729	unsigned long status = TP_STATUS_USER;
				1730	unsigned short macoff, netoff, hdrlen;
				1731	struct sk_buff *copy_skb = NULL;
				1732	struct timeval tv;
				1733	struct timespec ts;
				1734	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
				1735
				1736	if (skb->pkt_type == PACKET_LOOPBACK)
				1737	goto drop;
				1738
				1739	sk = pt->af_packet_priv;
				1740	po = pkt_sk(sk);
				1741
				1742	if (!net_eq(dev_net(dev), sock_net(sk)))
				1743	goto drop;
				1744
				1745	if (dev->header_ops) {
				1746	if (sk->sk_type != SOCK_DGRAM)
				1747	skb_push(skb, skb->data - skb_mac_header(skb));
				1748	else if (skb->pkt_type == PACKET_OUTGOING) {
				1749	/* Special case: outgoing packets have ll header at head */
				1750	skb_pull(skb, skb_network_offset(skb));
				1751	}
				1752	}
				1753
				1754	if (skb->ip_summed == CHECKSUM_PARTIAL)
				1755	status \|= TP_STATUS_CSUMNOTREADY;
				1756
				1757	snaplen = skb->len;
				1758
				1759	res = run_filter(skb, sk, snaplen);
				1760	if (!res)
				1761	goto drop_n_restore;
				1762	if (snaplen > res)
				1763	snaplen = res;
				1764
				1765	if (sk->sk_type == SOCK_DGRAM) {
				1766	macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
				1767	po->tp_reserve;
				1768	} else {
				1769	unsigned maclen = skb_network_offset(skb);
				1770	netoff = TPACKET_ALIGN(po->tp_hdrlen +
				1771	(maclen < 16 ? 16 : maclen)) +
				1772	po->tp_reserve;
				1773	macoff = netoff - maclen;
				1774	}
				1775	if (po->tp_version <= TPACKET_V2) {
				1776	if (macoff + snaplen > po->rx_ring.frame_size) {
				1777	if (po->copy_thresh &&
				1778	atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
				1779	if (skb_shared(skb)) {
				1780	copy_skb = skb_clone(skb, GFP_ATOMIC);
				1781	} else {
				1782	copy_skb = skb_get(skb);
				1783	skb_head = skb->data;
				1784	}
				1785	if (copy_skb)
				1786	skb_set_owner_r(copy_skb, sk);
				1787	}
				1788	snaplen = po->rx_ring.frame_size - macoff;
				1789	if ((int)snaplen < 0)
				1790	snaplen = 0;
				1791	}
				1792	}
				1793	spin_lock(&sk->sk_receive_queue.lock);
				1794	h.raw = packet_current_rx_frame(po, skb,
				1795	TP_STATUS_KERNEL, (macoff+snaplen));
				1796	if (!h.raw)
				1797	goto ring_is_full;
				1798	if (po->tp_version <= TPACKET_V2) {
				1799	packet_increment_rx_head(po, &po->rx_ring);
				1800	/*
				1801	* LOSING will be reported till you read the stats,
				1802	* because it's COR - Clear On Read.
				1803	* Anyways, moving it for V1/V2 only as V3 doesn't need this
				1804	* at packet level.
				1805	*/
				1806	if (po->stats.tp_drops)
				1807	status \|= TP_STATUS_LOSING;
				1808	}
				1809	po->stats.tp_packets++;
				1810	if (copy_skb) {
				1811	status \|= TP_STATUS_COPY;
				1812	__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
				1813	}
				1814	spin_unlock(&sk->sk_receive_queue.lock);
				1815
				1816	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
				1817
				1818	switch (po->tp_version) {
				1819	case TPACKET_V1:
				1820	h.h1->tp_len = skb->len;
				1821	h.h1->tp_snaplen = snaplen;
				1822	h.h1->tp_mac = macoff;
				1823	h.h1->tp_net = netoff;
				1824	if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
				1825	&& shhwtstamps->syststamp.tv64)
				1826	tv = ktime_to_timeval(shhwtstamps->syststamp);
				1827	else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
				1828	&& shhwtstamps->hwtstamp.tv64)
				1829	tv = ktime_to_timeval(shhwtstamps->hwtstamp);
				1830	else if (skb->tstamp.tv64)
				1831	tv = ktime_to_timeval(skb->tstamp);
				1832	else
				1833	do_gettimeofday(&tv);
				1834	h.h1->tp_sec = tv.tv_sec;
				1835	h.h1->tp_usec = tv.tv_usec;
				1836	hdrlen = sizeof(*h.h1);
				1837	break;
				1838	case TPACKET_V2:
				1839	h.h2->tp_len = skb->len;
				1840	h.h2->tp_snaplen = snaplen;
				1841	h.h2->tp_mac = macoff;
				1842	h.h2->tp_net = netoff;
				1843	if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
				1844	&& shhwtstamps->syststamp.tv64)
				1845	ts = ktime_to_timespec(shhwtstamps->syststamp);
				1846	else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
				1847	&& shhwtstamps->hwtstamp.tv64)
				1848	ts = ktime_to_timespec(shhwtstamps->hwtstamp);
				1849	else if (skb->tstamp.tv64)
				1850	ts = ktime_to_timespec(skb->tstamp);
				1851	else
				1852	getnstimeofday(&ts);
				1853	h.h2->tp_sec = ts.tv_sec;
				1854	h.h2->tp_nsec = ts.tv_nsec;
				1855	if (vlan_tx_tag_present(skb)) {
				1856	h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
				1857	status \|= TP_STATUS_VLAN_VALID;
				1858	} else {
				1859	h.h2->tp_vlan_tci = 0;
				1860	}
				1861	h.h2->tp_padding = 0;
				1862	hdrlen = sizeof(*h.h2);
				1863	break;
				1864	case TPACKET_V3:
				1865	/* tp_nxt_offset,vlan are already populated above.
				1866	* So DONT clear those fields here
				1867	*/
				1868	h.h3->tp_status \|= status;
				1869	h.h3->tp_len = skb->len;
				1870	h.h3->tp_snaplen = snaplen;
				1871	h.h3->tp_mac = macoff;
				1872	h.h3->tp_net = netoff;
				1873	if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
				1874	&& shhwtstamps->syststamp.tv64)
				1875	ts = ktime_to_timespec(shhwtstamps->syststamp);
				1876	else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
				1877	&& shhwtstamps->hwtstamp.tv64)
				1878	ts = ktime_to_timespec(shhwtstamps->hwtstamp);
				1879	else if (skb->tstamp.tv64)
				1880	ts = ktime_to_timespec(skb->tstamp);
				1881	else
				1882	getnstimeofday(&ts);
				1883	h.h3->tp_sec = ts.tv_sec;
				1884	h.h3->tp_nsec = ts.tv_nsec;
				1885	hdrlen = sizeof(*h.h3);
				1886	break;
				1887	default:
				1888	BUG();
				1889	}
				1890
				1891	sll = h.raw + TPACKET_ALIGN(hdrlen);
				1892	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
				1893	sll->sll_family = AF_PACKET;
				1894	sll->sll_hatype = dev->type;
				1895	sll->sll_protocol = skb->protocol;
				1896	sll->sll_pkttype = skb->pkt_type;
				1897	if (unlikely(po->origdev))
				1898	sll->sll_ifindex = orig_dev->ifindex;
				1899	else
				1900	sll->sll_ifindex = dev->ifindex;
				1901
				1902	smp_mb();
				1903	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
				1904	{
				1905	u8 start, end;
				1906
				1907	if (po->tp_version <= TPACKET_V2) {
				1908	end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
				1909	+ macoff + snaplen);
				1910	for (start = h.raw; start < end; start += PAGE_SIZE)
				1911	flush_dcache_page(pgv_to_page(start));
				1912	}
				1913	smp_wmb();
				1914	}
				1915	#endif
				1916	if (po->tp_version <= TPACKET_V2)
				1917	__packet_set_status(po, h.raw, status);
				1918	else
				1919	prb_clear_blk_fill_status(&po->rx_ring);
				1920
				1921	sk->sk_data_ready(sk, 0);
				1922
				1923	drop_n_restore:
				1924	if (skb_head != skb->data && skb_shared(skb)) {
				1925	skb->data = skb_head;
				1926	skb->len = skb_len;
				1927	}
				1928	drop:
				1929	kfree_skb(skb);
				1930	return 0;
				1931
				1932	ring_is_full:
				1933	po->stats.tp_drops++;
				1934	spin_unlock(&sk->sk_receive_queue.lock);
				1935
				1936	sk->sk_data_ready(sk, 0);
				1937	kfree_skb(copy_skb);
				1938	goto drop_n_restore;
				1939	}
				1940
				1941	static void tpacket_destruct_skb(struct sk_buff *skb)
				1942	{
				1943	struct packet_sock *po = pkt_sk(skb->sk);
				1944	void *ph;
				1945
				1946	if (likely(po->tx_ring.pg_vec)) {
				1947	ph = skb_shinfo(skb)->destructor_arg;
				1948	BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
				1949	atomic_dec(&po->tx_ring.pending);
				1950	__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
				1951	}
				1952
				1953	sock_wfree(skb);
				1954	}
				1955
				1956	static int tpacket_fill_skb(struct packet_sock po, struct sk_buff skb,
				1957	void frame, struct net_device dev, int size_max,
				1958	__be16 proto, unsigned char *addr, int hlen)
				1959	{
				1960	union {
				1961	struct tpacket_hdr *h1;
				1962	struct tpacket2_hdr *h2;
				1963	void *raw;
				1964	} ph;
				1965	int to_write, offset, len, tp_len, nr_frags, len_max;
				1966	struct socket *sock = po->sk.sk_socket;
				1967	struct page *page;
				1968	void *data;
				1969	int err;
				1970
				1971	ph.raw = frame;
				1972
				1973	skb->protocol = proto;
				1974	skb->dev = dev;
				1975	skb->priority = po->sk.sk_priority;
				1976	skb->mark = po->sk.sk_mark;
				1977	skb_shinfo(skb)->destructor_arg = ph.raw;
				1978
				1979	switch (po->tp_version) {
				1980	case TPACKET_V2:
				1981	tp_len = ph.h2->tp_len;
				1982	break;
				1983	default:
				1984	tp_len = ph.h1->tp_len;
				1985	break;
				1986	}
				1987	if (unlikely(tp_len > size_max)) {
				1988	pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
				1989	return -EMSGSIZE;
				1990	}
				1991
				1992	skb_reserve(skb, hlen);
				1993	skb_reset_network_header(skb);
				1994
				1995	data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
				1996	to_write = tp_len;
				1997
				1998	if (sock->type == SOCK_DGRAM) {
				1999	err = dev_hard_header(skb, dev, ntohs(proto), addr,
				2000	NULL, tp_len);
				2001	if (unlikely(err < 0))
				2002	return -EINVAL;
				2003	} else if (dev->hard_header_len) {
				2004	/* net device doesn't like empty head */
				2005	if (unlikely(tp_len <= dev->hard_header_len)) {
				2006	pr_err("packet size is too short (%d < %d)\n",
				2007	tp_len, dev->hard_header_len);
				2008	return -EINVAL;
				2009	}
				2010
				2011	skb_push(skb, dev->hard_header_len);
				2012	err = skb_store_bits(skb, 0, data,
				2013	dev->hard_header_len);
				2014	if (unlikely(err))
				2015	return err;
				2016
				2017	data += dev->hard_header_len;
				2018	to_write -= dev->hard_header_len;
				2019	}
				2020
				2021	err = -EFAULT;
				2022	offset = offset_in_page(data);
				2023	len_max = PAGE_SIZE - offset;
				2024	len = ((to_write > len_max) ? len_max : to_write);
				2025
				2026	skb->data_len = to_write;
				2027	skb->len += to_write;
				2028	skb->truesize += to_write;
				2029	atomic_add(to_write, &po->sk.sk_wmem_alloc);
				2030
				2031	while (likely(to_write)) {
				2032	nr_frags = skb_shinfo(skb)->nr_frags;
				2033
				2034	if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
				2035	pr_err("Packet exceed the number of skb frags(%lu)\n",
				2036	MAX_SKB_FRAGS);
				2037	return -EFAULT;
				2038	}
				2039
				2040	page = pgv_to_page(data);
				2041	data += len;
				2042	flush_dcache_page(page);
				2043	get_page(page);
				2044	skb_fill_page_desc(skb, nr_frags, page, offset, len);
				2045	to_write -= len;
				2046	offset = 0;
				2047	len_max = PAGE_SIZE;
				2048	len = ((to_write > len_max) ? len_max : to_write);
				2049	}
				2050
				2051	return tp_len;
				2052	}
				2053
				2054	static struct net_device packet_cached_dev_get(struct packet_sock po)
				2055	{
				2056	struct net_device *dev;
				2057
				2058	rcu_read_lock();
				2059	dev = rcu_dereference(po->cached_dev);
				2060	if (dev)
				2061	dev_hold(dev);
				2062	rcu_read_unlock();
				2063
				2064	return dev;
				2065	}
				2066
				2067	static int tpacket_snd(struct packet_sock po, struct msghdr msg)
				2068	{
				2069	struct sk_buff *skb;
				2070	struct net_device *dev;
				2071	__be16 proto;
				2072	int err, reserve = 0;
				2073	void *ph;
				2074	struct sockaddr_ll saddr = (struct sockaddr_ll )msg->msg_name;
				2075	int tp_len, size_max;
				2076	unsigned char *addr;
				2077	int len_sum = 0;
				2078	int status = 0;
				2079	int hlen, tlen;
				2080
				2081	mutex_lock(&po->pg_vec_lock);
				2082
				2083	err = -EBUSY;
				2084	if (saddr == NULL) {
				2085	dev = packet_cached_dev_get(po);
				2086	proto = po->num;
				2087	addr = NULL;
				2088	} else {
				2089	err = -EINVAL;
				2090	if (msg->msg_namelen < sizeof(struct sockaddr_ll))
				2091	goto out;
				2092	if (msg->msg_namelen < (saddr->sll_halen
				2093	+ offsetof(struct sockaddr_ll,
				2094	sll_addr)))
				2095	goto out;
				2096	proto = saddr->sll_protocol;
				2097	addr = saddr->sll_addr;
				2098	dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
				2099	}
				2100
				2101	err = -ENXIO;
				2102	if (unlikely(dev == NULL))
				2103	goto out;
				2104	err = -ENETDOWN;
				2105	if (unlikely(!(dev->flags & IFF_UP)))
				2106	goto out_put;
				2107
				2108	reserve = dev->hard_header_len;
				2109
				2110	size_max = po->tx_ring.frame_size
				2111	- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
				2112
				2113	if (size_max > dev->mtu + reserve)
				2114	size_max = dev->mtu + reserve;
				2115
				2116	do {
				2117	ph = packet_current_frame(po, &po->tx_ring,
				2118	TP_STATUS_SEND_REQUEST);
				2119
				2120	if (unlikely(ph == NULL)) {
				2121	schedule();
				2122	continue;
				2123	}
				2124
				2125	status = TP_STATUS_SEND_REQUEST;
				2126	hlen = LL_RESERVED_SPACE(dev);
				2127	tlen = dev->needed_tailroom;
				2128	skb = sock_alloc_send_skb(&po->sk,
				2129	hlen + tlen + sizeof(struct sockaddr_ll),
				2130	0, &err);
				2131
				2132	if (unlikely(skb == NULL))
				2133	goto out_status;
				2134
				2135	tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
				2136	addr, hlen);
				2137
				2138	if (unlikely(tp_len < 0)) {
				2139	if (po->tp_loss) {
				2140	__packet_set_status(po, ph,
				2141	TP_STATUS_AVAILABLE);
				2142	packet_increment_head(&po->tx_ring);
				2143	kfree_skb(skb);
				2144	continue;
				2145	} else {
				2146	status = TP_STATUS_WRONG_FORMAT;
				2147	err = tp_len;
				2148	goto out_status;
				2149	}
				2150	}
				2151
				2152	skb->destructor = tpacket_destruct_skb;
				2153	__packet_set_status(po, ph, TP_STATUS_SENDING);
				2154	atomic_inc(&po->tx_ring.pending);
				2155
				2156	status = TP_STATUS_SEND_REQUEST;
				2157	err = dev_queue_xmit(skb);
				2158	if (unlikely(err > 0)) {
				2159	err = net_xmit_errno(err);
				2160	if (err && __packet_get_status(po, ph) ==
				2161	TP_STATUS_AVAILABLE) {
				2162	/* skb was destructed already */
				2163	skb = NULL;
				2164	goto out_status;
				2165	}
				2166	/*
				2167	* skb was dropped but not destructed yet;
				2168	* let's treat it like congestion or err < 0
				2169	*/
				2170	err = 0;
				2171	}
				2172	packet_increment_head(&po->tx_ring);
				2173	len_sum += tp_len;
				2174	} while (likely((ph != NULL) \|\|
				2175	((!(msg->msg_flags & MSG_DONTWAIT)) &&
				2176	(atomic_read(&po->tx_ring.pending))))
				2177	);
				2178
				2179	err = len_sum;
				2180	goto out_put;
				2181
				2182	out_status:
				2183	__packet_set_status(po, ph, status);
				2184	kfree_skb(skb);
				2185	out_put:
				2186	dev_put(dev);
				2187	out:
				2188	mutex_unlock(&po->pg_vec_lock);
				2189	return err;
				2190	}
				2191
				2192	static struct sk_buff packet_alloc_skb(struct sock sk, size_t prepad,
				2193	size_t reserve, size_t len,
				2194	size_t linear, int noblock,
				2195	int *err)
				2196	{
				2197	struct sk_buff *skb;
				2198
				2199	/* Under a page? Don't bother with paged skb. */
				2200	if (prepad + len < PAGE_SIZE \|\| !linear)
				2201	linear = len;
				2202
				2203	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
				2204	err);
				2205	if (!skb)
				2206	return NULL;
				2207
				2208	skb_reserve(skb, reserve);
				2209	skb_put(skb, linear);
				2210	skb->data_len = len - linear;
				2211	skb->len += len - linear;
				2212
				2213	return skb;
				2214	}
				2215
				2216	static int packet_snd(struct socket *sock,
				2217	struct msghdr *msg, size_t len)
				2218	{
				2219	struct sock *sk = sock->sk;
				2220	struct sockaddr_ll saddr = (struct sockaddr_ll )msg->msg_name;
				2221	struct sk_buff *skb;
				2222	struct net_device *dev;
				2223	__be16 proto;
				2224	unsigned char *addr;
				2225	int err, reserve = 0;
				2226	struct virtio_net_hdr vnet_hdr = { 0 };
				2227	int offset = 0;
				2228	int vnet_hdr_len;
				2229	struct packet_sock *po = pkt_sk(sk);
				2230	unsigned short gso_type = 0;
				2231	int hlen, tlen;
				2232	int extra_len = 0;
				2233
				2234	/*
				2235	* Get and verify the address.
				2236	*/
				2237
				2238	if (saddr == NULL) {
				2239	dev = packet_cached_dev_get(po);
				2240	proto = po->num;
				2241	addr = NULL;
				2242	} else {
				2243	err = -EINVAL;
				2244	if (msg->msg_namelen < sizeof(struct sockaddr_ll))
				2245	goto out;
				2246	if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
				2247	goto out;
				2248	proto = saddr->sll_protocol;
				2249	addr = saddr->sll_addr;
				2250	dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
				2251	}
				2252
				2253	err = -ENXIO;
				2254	if (unlikely(dev == NULL))
				2255	goto out_unlock;
				2256	err = -ENETDOWN;
				2257	if (unlikely(!(dev->flags & IFF_UP)))
				2258	goto out_unlock;
				2259
				2260	if (sock->type == SOCK_RAW)
				2261	reserve = dev->hard_header_len;
				2262	if (po->has_vnet_hdr) {
				2263	vnet_hdr_len = sizeof(vnet_hdr);
				2264
				2265	err = -EINVAL;
				2266	if (len < vnet_hdr_len)
				2267	goto out_unlock;
				2268
				2269	len -= vnet_hdr_len;
				2270
				2271	err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
				2272	vnet_hdr_len);
				2273	if (err < 0)
				2274	goto out_unlock;
				2275
				2276	if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
				2277	(vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
				2278	vnet_hdr.hdr_len))
				2279	vnet_hdr.hdr_len = vnet_hdr.csum_start +
				2280	vnet_hdr.csum_offset + 2;
				2281
				2282	err = -EINVAL;
				2283	if (vnet_hdr.hdr_len > len)
				2284	goto out_unlock;
				2285
				2286	if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
				2287	switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
				2288	case VIRTIO_NET_HDR_GSO_TCPV4:
				2289	gso_type = SKB_GSO_TCPV4;
				2290	break;
				2291	case VIRTIO_NET_HDR_GSO_TCPV6:
				2292	gso_type = SKB_GSO_TCPV6;
				2293	break;
				2294	case VIRTIO_NET_HDR_GSO_UDP:
				2295	gso_type = SKB_GSO_UDP;
				2296	break;
				2297	default:
				2298	goto out_unlock;
				2299	}
				2300
				2301	if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
				2302	gso_type \|= SKB_GSO_TCP_ECN;
				2303
				2304	if (vnet_hdr.gso_size == 0)
				2305	goto out_unlock;
				2306
				2307	}
				2308	}
				2309
				2310	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
				2311	if (!netif_supports_nofcs(dev)) {
				2312	err = -EPROTONOSUPPORT;
				2313	goto out_unlock;
				2314	}
				2315	extra_len = 4; /* We're doing our own CRC */
				2316	}
				2317
				2318	err = -EMSGSIZE;
				2319	if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
				2320	goto out_unlock;
				2321
				2322	err = -ENOBUFS;
				2323	hlen = LL_RESERVED_SPACE(dev);
				2324	tlen = dev->needed_tailroom;
				2325	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
				2326	msg->msg_flags & MSG_DONTWAIT, &err);
				2327	if (skb == NULL)
				2328	goto out_unlock;
				2329
				2330	skb_set_network_header(skb, reserve);
				2331
				2332	err = -EINVAL;
				2333	if (sock->type == SOCK_DGRAM &&
				2334	(offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
				2335	goto out_free;
				2336
				2337	/* Returns -EFAULT on error */
				2338	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
				2339	if (err)
				2340	goto out_free;
				2341	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
				2342	if (err < 0)
				2343	goto out_free;
				2344
				2345	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
				2346	/* Earlier code assumed this would be a VLAN pkt,
				2347	* double-check this now that we have the actual
				2348	* packet in hand.
				2349	*/
				2350	struct ethhdr *ehdr;
				2351	skb_reset_mac_header(skb);
				2352	ehdr = eth_hdr(skb);
				2353	if (ehdr->h_proto != htons(ETH_P_8021Q)) {
				2354	err = -EMSGSIZE;
				2355	goto out_free;
				2356	}
				2357	}
				2358
				2359	skb->protocol = proto;
				2360	skb->dev = dev;
				2361	skb->priority = sk->sk_priority;
				2362	skb->mark = sk->sk_mark;
				2363
				2364	if (po->has_vnet_hdr) {
				2365	if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
				2366	if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
				2367	vnet_hdr.csum_offset)) {
				2368	err = -EINVAL;
				2369	goto out_free;
				2370	}
				2371	}
				2372
				2373	skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
				2374	skb_shinfo(skb)->gso_type = gso_type;
				2375
				2376	/* Header must be checked, and gso_segs computed. */
				2377	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
				2378	skb_shinfo(skb)->gso_segs = 0;
				2379
				2380	len += vnet_hdr_len;
				2381	}
				2382
				2383	if (unlikely(extra_len == 4))
				2384	skb->no_fcs = 1;
				2385
				2386	/*
				2387	* Now send it
				2388	*/
				2389
				2390	err = dev_queue_xmit(skb);
				2391	if (err > 0 && (err = net_xmit_errno(err)) != 0)
				2392	goto out_unlock;
				2393
				2394	dev_put(dev);
				2395
				2396	return len;
				2397
				2398	out_free:
				2399	kfree_skb(skb);
				2400	out_unlock:
				2401	if (dev)
				2402	dev_put(dev);
				2403	out:
				2404	return err;
				2405	}
				2406
				2407	static int packet_sendmsg(struct kiocb iocb, struct socket sock,
				2408	struct msghdr *msg, size_t len)
				2409	{
				2410	struct sock *sk = sock->sk;
				2411	struct packet_sock *po = pkt_sk(sk);
				2412	if (po->tx_ring.pg_vec)
				2413	return tpacket_snd(po, msg);
				2414	else
				2415	return packet_snd(sock, msg, len);
				2416	}
				2417
				2418	/*
				2419	* Close a PACKET socket. This is fairly simple. We immediately go
				2420	* to 'closed' state and remove our protocol entry in the device list.
				2421	*/
				2422
				2423	static int packet_release(struct socket *sock)
				2424	{
				2425	struct sock *sk = sock->sk;
				2426	struct packet_sock *po;
				2427	struct net *net;
				2428	union tpacket_req_u req_u;
				2429
				2430	if (!sk)
				2431	return 0;
				2432
				2433	net = sock_net(sk);
				2434	po = pkt_sk(sk);
				2435
				2436	spin_lock_bh(&net->packet.sklist_lock);
				2437	sk_del_node_init_rcu(sk);
				2438	sock_prot_inuse_add(net, sk->sk_prot, -1);
				2439	spin_unlock_bh(&net->packet.sklist_lock);
				2440
				2441	spin_lock(&po->bind_lock);
				2442	unregister_prot_hook(sk, false);
				2443	if (po->prot_hook.dev) {
				2444	dev_put(po->prot_hook.dev);
				2445	po->prot_hook.dev = NULL;
				2446	}
				2447	spin_unlock(&po->bind_lock);
				2448
				2449	packet_flush_mclist(sk);
				2450
				2451	if (po->rx_ring.pg_vec) {
				2452	memset(&req_u, 0, sizeof(req_u));
				2453	packet_set_ring(sk, &req_u, 1, 0);
				2454	}
				2455
				2456	if (po->tx_ring.pg_vec) {
				2457	memset(&req_u, 0, sizeof(req_u));
				2458	packet_set_ring(sk, &req_u, 1, 1);
				2459	}
				2460
				2461	fanout_release(sk);
				2462
				2463	synchronize_net();
				2464	/*
				2465	* Now the socket is dead. No more input will appear.
				2466	*/
				2467	sock_orphan(sk);
				2468	sock->sk = NULL;
				2469
				2470	/* Purge queues */
				2471
				2472	skb_queue_purge(&sk->sk_receive_queue);
				2473	sk_refcnt_debug_release(sk);
				2474
				2475	sock_put(sk);
				2476	return 0;
				2477	}
				2478
				2479	/*
				2480	* Attach a packet hook.
				2481	*/
				2482
				2483	static int packet_do_bind(struct sock sk, struct net_device dev, __be16 protocol)
				2484	{
				2485	struct packet_sock *po = pkt_sk(sk);
				2486	int ret = 0;
				2487	//CVE-2017-15649
				2488	lock_sock(sk);
				2489	spin_lock(&po->bind_lock);
				2490
				2491	if (po->fanout) {
				2492	if (dev)
				2493	dev_put(dev);
				2494
				2495	//return -EINVAL;
				2496	ret = -EINVAL;
				2497	goto out_unlock;//CVE-2017-15649
				2498
				2499	}
				2500
				2501	//lock_sock(sk);
				2502
				2503	//spin_lock(&po->bind_lock);//CVE-2017-15649
				2504	unregister_prot_hook(sk, true);
				2505	po->num = protocol;
				2506	po->prot_hook.type = protocol;
				2507	if (po->prot_hook.dev)
				2508	dev_put(po->prot_hook.dev);
				2509	po->prot_hook.dev = dev;
				2510
				2511	po->ifindex = dev ? dev->ifindex : 0;
				2512
				2513	if (protocol == 0)
				2514	goto out_unlock;
				2515
				2516	if (!dev \|\| (dev->flags & IFF_UP)) {
				2517	register_prot_hook(sk);
				2518	} else {
				2519	sk->sk_err = ENETDOWN;
				2520	if (!sock_flag(sk, SOCK_DEAD))
				2521	sk->sk_error_report(sk);
				2522	}
				2523
				2524	out_unlock:
				2525	spin_unlock(&po->bind_lock);
				2526	release_sock(sk);
				2527	return ret;//CVE-2017-15649
				2528	}
				2529
				2530	/*
				2531	* Bind a packet socket to a device
				2532	*/
				2533
				2534	static int packet_bind_spkt(struct socket sock, struct sockaddr uaddr,
				2535	int addr_len)
				2536	{
				2537	struct sock *sk = sock->sk;
				2538	char name[15];
				2539	struct net_device *dev;
				2540	int err = -ENODEV;
				2541
				2542	/*
				2543	* Check legality
				2544	*/
				2545
				2546	if (addr_len != sizeof(struct sockaddr))
				2547	return -EINVAL;
				2548	strlcpy(name, uaddr->sa_data, sizeof(name));
				2549
				2550	dev = dev_get_by_name(sock_net(sk), name);
				2551	if (dev)
				2552	err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
				2553	return err;
				2554	}
				2555
				2556	static int packet_bind(struct socket sock, struct sockaddr uaddr, int addr_len)
				2557	{
				2558	struct sockaddr_ll sll = (struct sockaddr_ll )uaddr;
				2559	struct sock *sk = sock->sk;
				2560	struct net_device *dev = NULL;
				2561	int err;
				2562
				2563
				2564	/*
				2565	* Check legality
				2566	*/
				2567
				2568	if (addr_len < sizeof(struct sockaddr_ll))
				2569	return -EINVAL;
				2570	if (sll->sll_family != AF_PACKET)
				2571	return -EINVAL;
				2572
				2573	if (sll->sll_ifindex) {
				2574	err = -ENODEV;
				2575	dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
				2576	if (dev == NULL)
				2577	goto out;
				2578	}
				2579	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
				2580
				2581	out:
				2582	return err;
				2583	}
				2584
				2585	static struct proto packet_proto = {
				2586	.name = "PACKET",
				2587	.owner = THIS_MODULE,
				2588	.obj_size = sizeof(struct packet_sock),
				2589	};
				2590
				2591	/*
				2592	* Create a packet of type SOCK_PACKET.
				2593	*/
				2594
				2595	static int packet_create(struct net net, struct socket sock, int protocol,
				2596	int kern)
				2597	{
				2598	struct sock *sk;
				2599	struct packet_sock *po;
				2600	__be16 proto = (__force __be16)protocol; /* weird, but documented */
				2601	int err;
				2602
				2603	if (!capable(CAP_NET_RAW))
				2604	return -EPERM;
				2605	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
				2606	sock->type != SOCK_PACKET)
				2607	return -ESOCKTNOSUPPORT;
				2608
				2609	sock->state = SS_UNCONNECTED;
				2610
				2611	err = -ENOBUFS;
				2612	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
				2613	if (sk == NULL)
				2614	goto out;
				2615
				2616	sock->ops = &packet_ops;
				2617	if (sock->type == SOCK_PACKET)
				2618	sock->ops = &packet_ops_spkt;
				2619
				2620	sock_init_data(sock, sk);
				2621
				2622	po = pkt_sk(sk);
				2623	sk->sk_family = PF_PACKET;
				2624	po->num = proto;
				2625	RCU_INIT_POINTER(po->cached_dev, NULL);
				2626
				2627	sk->sk_destruct = packet_sock_destruct;
				2628	sk_refcnt_debug_inc(sk);
				2629
				2630	/*
				2631	* Attach a protocol block
				2632	*/
				2633
				2634	spin_lock_init(&po->bind_lock);
				2635	mutex_init(&po->pg_vec_lock);
				2636	po->prot_hook.func = packet_rcv;
				2637
				2638	if (sock->type == SOCK_PACKET)
				2639	po->prot_hook.func = packet_rcv_spkt;
				2640
				2641	po->prot_hook.af_packet_priv = sk;
				2642
				2643	if (proto) {
				2644	po->prot_hook.type = proto;
				2645	register_prot_hook(sk);
				2646	}
				2647
				2648	spin_lock_bh(&net->packet.sklist_lock);
				2649	sk_add_node_rcu(sk, &net->packet.sklist);
				2650	sock_prot_inuse_add(net, &packet_proto, 1);
				2651	spin_unlock_bh(&net->packet.sklist_lock);
				2652
				2653	return 0;
				2654	out:
				2655	return err;
				2656	}
				2657
				2658	static int packet_recv_error(struct sock sk, struct msghdr msg, int len)
				2659	{
				2660	struct sock_exterr_skb *serr;
				2661	struct sk_buff skb, skb2;
				2662	int copied, err;
				2663
				2664	err = -EAGAIN;
				2665	skb = skb_dequeue(&sk->sk_error_queue);
				2666	if (skb == NULL)
				2667	goto out;
				2668
				2669	copied = skb->len;
				2670	if (copied > len) {
				2671	msg->msg_flags \|= MSG_TRUNC;
				2672	copied = len;
				2673	}
				2674	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
				2675	if (err)
				2676	goto out_free_skb;
				2677
				2678	sock_recv_timestamp(msg, sk, skb);
				2679
				2680	serr = SKB_EXT_ERR(skb);
				2681	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
				2682	sizeof(serr->ee), &serr->ee);
				2683
				2684	msg->msg_flags \|= MSG_ERRQUEUE;
				2685	err = copied;
				2686
				2687	/* Reset and regenerate socket error */
				2688	spin_lock_bh(&sk->sk_error_queue.lock);
				2689	sk->sk_err = 0;
				2690	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
				2691	sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
				2692	spin_unlock_bh(&sk->sk_error_queue.lock);
				2693	sk->sk_error_report(sk);
				2694	} else
				2695	spin_unlock_bh(&sk->sk_error_queue.lock);
				2696
				2697	out_free_skb:
				2698	kfree_skb(skb);
				2699	out:
				2700	return err;
				2701	}
				2702
				2703	/*
				2704	* Pull a packet from our receive queue and hand it to the user.
				2705	* If necessary we block.
				2706	*/
				2707
				2708	static int packet_recvmsg(struct kiocb iocb, struct socket sock,
				2709	struct msghdr *msg, size_t len, int flags)
				2710	{
				2711	struct sock *sk = sock->sk;
				2712	struct sk_buff *skb;
				2713	int copied, err;
				2714	int vnet_hdr_len = 0;
				2715
				2716	err = -EINVAL;
				2717	if (flags & ~(MSG_PEEK\|MSG_DONTWAIT\|MSG_TRUNC\|MSG_CMSG_COMPAT\|MSG_ERRQUEUE))
				2718	goto out;
				2719
				2720	#if 0
				2721	/* What error should we return now? EUNATTACH? */
				2722	if (pkt_sk(sk)->ifindex < 0)
				2723	return -ENODEV;
				2724	#endif
				2725
				2726	if (flags & MSG_ERRQUEUE) {
				2727	err = packet_recv_error(sk, msg, len);
				2728	goto out;
				2729	}
				2730
				2731	/*
				2732	* Call the generic datagram receiver. This handles all sorts
				2733	* of horrible races and re-entrancy so we can forget about it
				2734	* in the protocol layers.
				2735	*
				2736	* Now it will return ENETDOWN, if device have just gone down,
				2737	* but then it will block.
				2738	*/
				2739
				2740	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
				2741
				2742	/*
				2743	* An error occurred so return it. Because skb_recv_datagram()
				2744	* handles the blocking we don't see and worry about blocking
				2745	* retries.
				2746	*/
				2747
				2748	if (skb == NULL)
				2749	goto out;
				2750
				2751	if (pkt_sk(sk)->has_vnet_hdr) {
				2752	struct virtio_net_hdr vnet_hdr = { 0 };
				2753
				2754	err = -EINVAL;
				2755	vnet_hdr_len = sizeof(vnet_hdr);
				2756	if (len < vnet_hdr_len)
				2757	goto out_free;
				2758
				2759	len -= vnet_hdr_len;
				2760
				2761	if (skb_is_gso(skb)) {
				2762	struct skb_shared_info *sinfo = skb_shinfo(skb);
				2763
				2764	/* This is a hint as to how much should be linear. */
				2765	vnet_hdr.hdr_len = skb_headlen(skb);
				2766	vnet_hdr.gso_size = sinfo->gso_size;
				2767	if (sinfo->gso_type & SKB_GSO_TCPV4)
				2768	vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
				2769	else if (sinfo->gso_type & SKB_GSO_TCPV6)
				2770	vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
				2771	else if (sinfo->gso_type & SKB_GSO_UDP)
				2772	vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
				2773	else if (sinfo->gso_type & SKB_GSO_FCOE)
				2774	goto out_free;
				2775	else
				2776	BUG();
				2777	if (sinfo->gso_type & SKB_GSO_TCP_ECN)
				2778	vnet_hdr.gso_type \|= VIRTIO_NET_HDR_GSO_ECN;
				2779	} else
				2780	vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
				2781
				2782	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				2783	vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
				2784	vnet_hdr.csum_start = skb_checksum_start_offset(skb);
				2785	vnet_hdr.csum_offset = skb->csum_offset;
				2786	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
				2787	vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
				2788	} /* else everything is zero */
				2789
				2790	err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
				2791	vnet_hdr_len);
				2792	if (err < 0)
				2793	goto out_free;
				2794	}
				2795
				2796	/* You lose any data beyond the buffer you gave. If it worries
				2797	* a user program they can ask the device for its MTU
				2798	* anyway.
				2799	*/
				2800	copied = skb->len;
				2801	if (copied > len) {
				2802	copied = len;
				2803	msg->msg_flags \|= MSG_TRUNC;
				2804	}
				2805
				2806	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
				2807	if (err)
				2808	goto out_free;
				2809
				2810	sock_recv_ts_and_drops(msg, sk, skb);
				2811
				2812	if (msg->msg_name) {
				2813	/* If the address length field is there to be filled
				2814	* in, we fill it in now.
				2815	*/
				2816	if (sock->type == SOCK_PACKET) {
				2817	msg->msg_namelen = sizeof(struct sockaddr_pkt);
				2818	} else {
				2819	struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
				2820	msg->msg_namelen = sll->sll_halen +
				2821	offsetof(struct sockaddr_ll, sll_addr);
				2822	}
				2823	memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
				2824	msg->msg_namelen);
				2825	}
				2826
				2827	if (pkt_sk(sk)->auxdata) {
				2828	struct tpacket_auxdata aux;
				2829
				2830	aux.tp_status = TP_STATUS_USER;
				2831	if (skb->ip_summed == CHECKSUM_PARTIAL)
				2832	aux.tp_status \|= TP_STATUS_CSUMNOTREADY;
				2833	aux.tp_len = PACKET_SKB_CB(skb)->origlen;
				2834	aux.tp_snaplen = skb->len;
				2835	aux.tp_mac = 0;
				2836	aux.tp_net = skb_network_offset(skb);
				2837	if (vlan_tx_tag_present(skb)) {
				2838	aux.tp_vlan_tci = vlan_tx_tag_get(skb);
				2839	aux.tp_status \|= TP_STATUS_VLAN_VALID;
				2840	} else {
				2841	aux.tp_vlan_tci = 0;
				2842	}
				2843	aux.tp_padding = 0;
				2844	put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
				2845	}
				2846
				2847	/*
				2848	* Free or return the buffer as appropriate. Again this
				2849	* hides all the races and re-entrancy issues from us.
				2850	*/
				2851	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
				2852
				2853	out_free:
				2854	skb_free_datagram(sk, skb);
				2855	out:
				2856	return err;
				2857	}
				2858
				2859	static int packet_getname_spkt(struct socket sock, struct sockaddr uaddr,
				2860	int *uaddr_len, int peer)
				2861	{
				2862	struct net_device *dev;
				2863	struct sock *sk = sock->sk;
				2864
				2865	if (peer)
				2866	return -EOPNOTSUPP;
				2867
				2868	uaddr->sa_family = AF_PACKET;
				2869	memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
				2870	rcu_read_lock();
				2871	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
				2872	if (dev)
				2873	strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
				2874	rcu_read_unlock();
				2875	uaddr_len = sizeof(uaddr);
				2876
				2877	return 0;
				2878	}
				2879
				2880	static int packet_getname(struct socket sock, struct sockaddr uaddr,
				2881	int *uaddr_len, int peer)
				2882	{
				2883	struct net_device *dev;
				2884	struct sock *sk = sock->sk;
				2885	struct packet_sock *po = pkt_sk(sk);
				2886	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
				2887
				2888	if (peer)
				2889	return -EOPNOTSUPP;
				2890
				2891	sll->sll_family = AF_PACKET;
				2892	sll->sll_ifindex = po->ifindex;
				2893	sll->sll_protocol = po->num;
				2894	sll->sll_pkttype = 0;
				2895	rcu_read_lock();
				2896	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
				2897	if (dev) {
				2898	sll->sll_hatype = dev->type;
				2899	sll->sll_halen = dev->addr_len;
				2900	memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
				2901	} else {
				2902	sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
				2903	sll->sll_halen = 0;
				2904	}
				2905	rcu_read_unlock();
				2906	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
				2907
				2908	return 0;
				2909	}
				2910
				2911	static int packet_dev_mc(struct net_device dev, struct packet_mclist i,
				2912	int what)
				2913	{
				2914	switch (i->type) {
				2915	case PACKET_MR_MULTICAST:
				2916	if (i->alen != dev->addr_len)
				2917	return -EINVAL;
				2918	if (what > 0)
				2919	return dev_mc_add(dev, i->addr);
				2920	else
				2921	return dev_mc_del(dev, i->addr);
				2922	break;
				2923	case PACKET_MR_PROMISC:
				2924	return dev_set_promiscuity(dev, what);
				2925	break;
				2926	case PACKET_MR_ALLMULTI:
				2927	return dev_set_allmulti(dev, what);
				2928	break;
				2929	case PACKET_MR_UNICAST:
				2930	if (i->alen != dev->addr_len)
				2931	return -EINVAL;
				2932	if (what > 0)
				2933	return dev_uc_add(dev, i->addr);
				2934	else
				2935	return dev_uc_del(dev, i->addr);
				2936	break;
				2937	default:
				2938	break;
				2939	}
				2940	return 0;
				2941	}
				2942
				2943	static void packet_dev_mclist(struct net_device dev, struct packet_mclist i, int what)
				2944	{
				2945	for ( ; i; i = i->next) {
				2946	if (i->ifindex == dev->ifindex)
				2947	packet_dev_mc(dev, i, what);
				2948	}
				2949	}
				2950
				2951	static int packet_mc_add(struct sock sk, struct packet_mreq_max mreq)
				2952	{
				2953	struct packet_sock *po = pkt_sk(sk);
				2954	struct packet_mclist ml, i;
				2955	struct net_device *dev;
				2956	int err;
				2957
				2958	rtnl_lock();
				2959
				2960	err = -ENODEV;
				2961	dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
				2962	if (!dev)
				2963	goto done;
				2964
				2965	err = -EINVAL;
				2966	if (mreq->mr_alen > dev->addr_len)
				2967	goto done;
				2968
				2969	err = -ENOBUFS;
				2970	i = kmalloc(sizeof(*i), GFP_KERNEL);
				2971	if (i == NULL)
				2972	goto done;
				2973
				2974	err = 0;
				2975	for (ml = po->mclist; ml; ml = ml->next) {
				2976	if (ml->ifindex == mreq->mr_ifindex &&
				2977	ml->type == mreq->mr_type &&
				2978	ml->alen == mreq->mr_alen &&
				2979	memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
				2980	ml->count++;
				2981	/* Free the new element ... */
				2982	kfree(i);
				2983	goto done;
				2984	}
				2985	}
				2986
				2987	i->type = mreq->mr_type;
				2988	i->ifindex = mreq->mr_ifindex;
				2989	i->alen = mreq->mr_alen;
				2990	memcpy(i->addr, mreq->mr_address, i->alen);
				2991	i->count = 1;
				2992	i->next = po->mclist;
				2993	po->mclist = i;
				2994	err = packet_dev_mc(dev, i, 1);
				2995	if (err) {
				2996	po->mclist = i->next;
				2997	kfree(i);
				2998	}
				2999
				3000	done:
				3001	rtnl_unlock();
				3002	return err;
				3003	}
				3004
				3005	static int packet_mc_drop(struct sock sk, struct packet_mreq_max mreq)
				3006	{
				3007	struct packet_mclist ml, *mlp;
				3008
				3009	rtnl_lock();
				3010
				3011	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
				3012	if (ml->ifindex == mreq->mr_ifindex &&
				3013	ml->type == mreq->mr_type &&
				3014	ml->alen == mreq->mr_alen &&
				3015	memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
				3016	if (--ml->count == 0) {
				3017	struct net_device *dev;
				3018	*mlp = ml->next;
				3019	dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
				3020	if (dev)
				3021	packet_dev_mc(dev, ml, -1);
				3022	kfree(ml);
				3023	}
				3024	rtnl_unlock();
				3025	return 0;
				3026	}
				3027	}
				3028	rtnl_unlock();
				3029	return -EADDRNOTAVAIL;
				3030	}
				3031
				3032	static void packet_flush_mclist(struct sock *sk)
				3033	{
				3034	struct packet_sock *po = pkt_sk(sk);
				3035	struct packet_mclist *ml;
				3036
				3037	if (!po->mclist)
				3038	return;
				3039
				3040	rtnl_lock();
				3041	while ((ml = po->mclist) != NULL) {
				3042	struct net_device *dev;
				3043
				3044	po->mclist = ml->next;
				3045	dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
				3046	if (dev != NULL)
				3047	packet_dev_mc(dev, ml, -1);
				3048	kfree(ml);
				3049	}
				3050	rtnl_unlock();
				3051	}
				3052
				3053	static int
				3054	packet_setsockopt(struct socket sock, int level, int optname, char __user optval, unsigned int optlen)
				3055	{
				3056	struct sock *sk = sock->sk;
				3057	struct packet_sock *po = pkt_sk(sk);
				3058	int ret;
				3059
				3060	if (level != SOL_PACKET)
				3061	return -ENOPROTOOPT;
				3062
				3063	switch (optname) {
				3064	case PACKET_ADD_MEMBERSHIP:
				3065	case PACKET_DROP_MEMBERSHIP:
				3066	{
				3067	struct packet_mreq_max mreq;
				3068	int len = optlen;
				3069	memset(&mreq, 0, sizeof(mreq));
				3070	if (len < sizeof(struct packet_mreq))
				3071	return -EINVAL;
				3072	if (len > sizeof(mreq))
				3073	len = sizeof(mreq);
				3074	if (copy_from_user(&mreq, optval, len))
				3075	return -EFAULT;
				3076	if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
				3077	return -EINVAL;
				3078	if (optname == PACKET_ADD_MEMBERSHIP)
				3079	ret = packet_mc_add(sk, &mreq);
				3080	else
				3081	ret = packet_mc_drop(sk, &mreq);
				3082	return ret;
				3083	}
				3084
				3085	case PACKET_RX_RING:
				3086	case PACKET_TX_RING:
				3087	{
				3088	union tpacket_req_u req_u;
				3089	int len;
				3090
				3091	switch (po->tp_version) {
				3092	case TPACKET_V1:
				3093	case TPACKET_V2:
				3094	len = sizeof(req_u.req);
				3095	break;
				3096	case TPACKET_V3:
				3097	default:
				3098	len = sizeof(req_u.req3);
				3099	break;
				3100	}
				3101	if (optlen < len)
				3102	return -EINVAL;
				3103	if (pkt_sk(sk)->has_vnet_hdr)
				3104	return -EINVAL;
				3105	if (copy_from_user(&req_u.req, optval, len))
				3106	return -EFAULT;
				3107	return packet_set_ring(sk, &req_u, 0,
				3108	optname == PACKET_TX_RING);
				3109	}
				3110	case PACKET_COPY_THRESH:
				3111	{
				3112	int val;
				3113
				3114	if (optlen != sizeof(val))
				3115	return -EINVAL;
				3116	if (copy_from_user(&val, optval, sizeof(val)))
				3117	return -EFAULT;
				3118
				3119	pkt_sk(sk)->copy_thresh = val;
				3120	return 0;
				3121	}
				3122	case PACKET_VERSION:
				3123	{
				3124	int val;
				3125
				3126	if (optlen != sizeof(val))
				3127	return -EINVAL;
				3128
				3129	/*CVE-2016-8655
				3130	delete the following contents:
				3131	if (po->rx_ring.pg_vec \|\| po->tx_ring.pg_vec)
				3132	return -EBUSY;
				3133	*/
				3134
				3135	if (copy_from_user(&val, optval, sizeof(val)))
				3136	return -EFAULT;
				3137	switch (val) {
				3138	case TPACKET_V1:
				3139	case TPACKET_V2:
				3140	case TPACKET_V3:
				3141
				3142	/*CVE-2016-8655
				3143	delete the following contents:
				3144	po->tp_version = val;
				3145	return 0;
				3146	add the following contents:
				3147	break;
				3148	*/
				3149	break;
				3150	default:
				3151	return -EINVAL;
				3152	}
				3153
				3154	/CVE-2016-8655/
				3155	lock_sock(sk);
				3156	if (po->rx_ring.pg_vec \|\| po->tx_ring.pg_vec) {
				3157	ret = -EBUSY;
				3158	} else {
				3159	po->tp_version = val;
				3160	ret = 0;
				3161	}
				3162	release_sock(sk);
				3163	return ret;
				3164
				3165	}
				3166	case PACKET_RESERVE:
				3167	{
				3168	unsigned int val;
				3169
				3170	if (optlen != sizeof(val))
				3171	return -EINVAL;
				3172	if (po->rx_ring.pg_vec \|\| po->tx_ring.pg_vec)
				3173	return -EBUSY;
				3174	if (copy_from_user(&val, optval, sizeof(val)))
				3175	return -EFAULT;
				3176
				3177	//CVE-2017-7308
				3178	if (val > INT_MAX)
				3179	return -EINVAL;
				3180
				3181	po->tp_reserve = val;
				3182	return 0;
				3183	}
				3184	case PACKET_LOSS:
				3185	{
				3186	unsigned int val;
				3187
				3188	if (optlen != sizeof(val))
				3189	return -EINVAL;
				3190	if (po->rx_ring.pg_vec \|\| po->tx_ring.pg_vec)
				3191	return -EBUSY;
				3192	if (copy_from_user(&val, optval, sizeof(val)))
				3193	return -EFAULT;
				3194	po->tp_loss = !!val;
				3195	return 0;
				3196	}
				3197	case PACKET_AUXDATA:
				3198	{
				3199	int val;
				3200
				3201	if (optlen < sizeof(val))
				3202	return -EINVAL;
				3203	if (copy_from_user(&val, optval, sizeof(val)))
				3204	return -EFAULT;
				3205
				3206	po->auxdata = !!val;
				3207	return 0;
				3208	}
				3209	case PACKET_ORIGDEV:
				3210	{
				3211	int val;
				3212
				3213	if (optlen < sizeof(val))
				3214	return -EINVAL;
				3215	if (copy_from_user(&val, optval, sizeof(val)))
				3216	return -EFAULT;
				3217
				3218	po->origdev = !!val;
				3219	return 0;
				3220	}
				3221	case PACKET_VNET_HDR:
				3222	{
				3223	int val;
				3224
				3225	if (sock->type != SOCK_RAW)
				3226	return -EINVAL;
				3227	if (po->rx_ring.pg_vec \|\| po->tx_ring.pg_vec)
				3228	return -EBUSY;
				3229	if (optlen < sizeof(val))
				3230	return -EINVAL;
				3231	if (copy_from_user(&val, optval, sizeof(val)))
				3232	return -EFAULT;
				3233
				3234	po->has_vnet_hdr = !!val;
				3235	return 0;
				3236	}
				3237	case PACKET_TIMESTAMP:
				3238	{
				3239	int val;
				3240
				3241	if (optlen != sizeof(val))
				3242	return -EINVAL;
				3243	if (copy_from_user(&val, optval, sizeof(val)))
				3244	return -EFAULT;
				3245
				3246	po->tp_tstamp = val;
				3247	return 0;
				3248	}
				3249	case PACKET_FANOUT:
				3250	{
				3251	int val;
				3252
				3253	if (optlen != sizeof(val))
				3254	return -EINVAL;
				3255	if (copy_from_user(&val, optval, sizeof(val)))
				3256	return -EFAULT;
				3257
				3258	return fanout_add(sk, val & 0xffff, val >> 16);
				3259	}
				3260	default:
				3261	return -ENOPROTOOPT;
				3262	}
				3263	}
				3264
				3265	static int packet_getsockopt(struct socket *sock, int level, int optname,
				3266	char __user optval, int __user optlen)
				3267	{
				3268	int len;
				3269	int val;
				3270	struct sock *sk = sock->sk;
				3271	struct packet_sock *po = pkt_sk(sk);
				3272	void *data;
				3273	struct tpacket_stats st;
				3274	union tpacket_stats_u st_u;
				3275
				3276	if (level != SOL_PACKET)
				3277	return -ENOPROTOOPT;
				3278
				3279	if (get_user(len, optlen))
				3280	return -EFAULT;
				3281
				3282	if (len < 0)
				3283	return -EINVAL;
				3284
				3285	switch (optname) {
				3286	case PACKET_STATISTICS:
				3287	if (po->tp_version == TPACKET_V3) {
				3288	len = sizeof(struct tpacket_stats_v3);
				3289	} else {
				3290	if (len > sizeof(struct tpacket_stats))
				3291	len = sizeof(struct tpacket_stats);
				3292	}
				3293	spin_lock_bh(&sk->sk_receive_queue.lock);
				3294	if (po->tp_version == TPACKET_V3) {
				3295	memcpy(&st_u.stats3, &po->stats,
				3296	sizeof(struct tpacket_stats));
				3297	st_u.stats3.tp_freeze_q_cnt =
				3298	po->stats_u.stats3.tp_freeze_q_cnt;
				3299	st_u.stats3.tp_packets += po->stats.tp_drops;
				3300	data = &st_u.stats3;
				3301	} else {
				3302	st = po->stats;
				3303	st.tp_packets += st.tp_drops;
				3304	data = &st;
				3305	}
				3306	memset(&po->stats, 0, sizeof(st));
				3307	spin_unlock_bh(&sk->sk_receive_queue.lock);
				3308	break;
				3309	case PACKET_AUXDATA:
				3310	if (len > sizeof(int))
				3311	len = sizeof(int);
				3312	val = po->auxdata;
				3313
				3314	data = &val;
				3315	break;
				3316	case PACKET_ORIGDEV:
				3317	if (len > sizeof(int))
				3318	len = sizeof(int);
				3319	val = po->origdev;
				3320
				3321	data = &val;
				3322	break;
				3323	case PACKET_VNET_HDR:
				3324	if (len > sizeof(int))
				3325	len = sizeof(int);
				3326	val = po->has_vnet_hdr;
				3327
				3328	data = &val;
				3329	break;
				3330	case PACKET_VERSION:
				3331	if (len > sizeof(int))
				3332	len = sizeof(int);
				3333	val = po->tp_version;
				3334	data = &val;
				3335	break;
				3336	case PACKET_HDRLEN:
				3337	if (len > sizeof(int))
				3338	len = sizeof(int);
				3339	if (copy_from_user(&val, optval, len))
				3340	return -EFAULT;
				3341	switch (val) {
				3342	case TPACKET_V1:
				3343	val = sizeof(struct tpacket_hdr);
				3344	break;
				3345	case TPACKET_V2:
				3346	val = sizeof(struct tpacket2_hdr);
				3347	break;
				3348	case TPACKET_V3:
				3349	val = sizeof(struct tpacket3_hdr);
				3350	break;
				3351	default:
				3352	return -EINVAL;
				3353	}
				3354	data = &val;
				3355	break;
				3356	case PACKET_RESERVE:
				3357	if (len > sizeof(unsigned int))
				3358	len = sizeof(unsigned int);
				3359	val = po->tp_reserve;
				3360	data = &val;
				3361	break;
				3362	case PACKET_LOSS:
				3363	if (len > sizeof(unsigned int))
				3364	len = sizeof(unsigned int);
				3365	val = po->tp_loss;
				3366	data = &val;
				3367	break;
				3368	case PACKET_TIMESTAMP:
				3369	if (len > sizeof(int))
				3370	len = sizeof(int);
				3371	val = po->tp_tstamp;
				3372	data = &val;
				3373	break;
				3374	case PACKET_FANOUT:
				3375	if (len > sizeof(int))
				3376	len = sizeof(int);
				3377	val = (po->fanout ?
				3378	((u32)po->fanout->id \|
				3379	((u32)po->fanout->type << 16)) :
				3380	0);
				3381	data = &val;
				3382	break;
				3383	default:
				3384	return -ENOPROTOOPT;
				3385	}
				3386
				3387	if (put_user(len, optlen))
				3388	return -EFAULT;
				3389	if (copy_to_user(optval, data, len))
				3390	return -EFAULT;
				3391	return 0;
				3392	}
				3393
				3394
				3395	static int packet_notifier(struct notifier_block this, unsigned long msg, void data)
				3396	{
				3397	struct sock *sk;
				3398	struct hlist_node *node;
				3399	struct net_device *dev = data;
				3400	struct net *net = dev_net(dev);
				3401
				3402	rcu_read_lock();
				3403	sk_for_each_rcu(sk, node, &net->packet.sklist) {
				3404	struct packet_sock *po = pkt_sk(sk);
				3405
				3406	switch (msg) {
				3407	case NETDEV_UNREGISTER:
				3408	if (po->mclist)
				3409	packet_dev_mclist(dev, po->mclist, -1);
				3410	/* fallthrough */
				3411
				3412	case NETDEV_DOWN:
				3413	if (dev->ifindex == po->ifindex) {
				3414	spin_lock(&po->bind_lock);
				3415	if (po->running) {
				3416	__unregister_prot_hook(sk, false);
				3417	sk->sk_err = ENETDOWN;
				3418	if (!sock_flag(sk, SOCK_DEAD))
				3419	sk->sk_error_report(sk);
				3420	}
				3421	if (msg == NETDEV_UNREGISTER) {
				3422	po->ifindex = -1;
				3423	if (po->prot_hook.dev)
				3424	dev_put(po->prot_hook.dev);
				3425	po->prot_hook.dev = NULL;
				3426	}
				3427	spin_unlock(&po->bind_lock);
				3428	}
				3429	break;
				3430	case NETDEV_UP:
				3431	if (dev->ifindex == po->ifindex) {
				3432	spin_lock(&po->bind_lock);
				3433	if (po->num)
				3434	register_prot_hook(sk);
				3435	spin_unlock(&po->bind_lock);
				3436	}
				3437	break;
				3438	}
				3439	}
				3440	rcu_read_unlock();
				3441	return NOTIFY_DONE;
				3442	}
				3443
				3444
				3445	static int packet_ioctl(struct socket *sock, unsigned int cmd,
				3446	unsigned long arg)
				3447	{
				3448	struct sock *sk = sock->sk;
				3449
				3450	switch (cmd) {
				3451	case SIOCOUTQ:
				3452	{
				3453	int amount = sk_wmem_alloc_get(sk);
				3454
				3455	return put_user(amount, (int __user *)arg);
				3456	}
				3457	case SIOCINQ:
				3458	{
				3459	struct sk_buff *skb;
				3460	int amount = 0;
				3461
				3462	spin_lock_bh(&sk->sk_receive_queue.lock);
				3463	skb = skb_peek(&sk->sk_receive_queue);
				3464	if (skb)
				3465	amount = skb->len;
				3466	spin_unlock_bh(&sk->sk_receive_queue.lock);
				3467	return put_user(amount, (int __user *)arg);
				3468	}
				3469	case SIOCGSTAMP:
				3470	return sock_get_timestamp(sk, (struct timeval __user *)arg);
				3471	case SIOCGSTAMPNS:
				3472	return sock_get_timestampns(sk, (struct timespec __user *)arg);
				3473
				3474	#ifdef CONFIG_INET
				3475	case SIOCADDRT:
				3476	case SIOCDELRT:
				3477	case SIOCDARP:
				3478	case SIOCGARP:
				3479	case SIOCSARP:
				3480	case SIOCGIFADDR:
				3481	case SIOCSIFADDR:
				3482	case SIOCGIFBRDADDR:
				3483	case SIOCSIFBRDADDR:
				3484	case SIOCGIFNETMASK:
				3485	case SIOCSIFNETMASK:
				3486	case SIOCGIFDSTADDR:
				3487	case SIOCSIFDSTADDR:
				3488	case SIOCSIFFLAGS:
				3489	return inet_dgram_ops.ioctl(sock, cmd, arg);
				3490	#endif
				3491
				3492	default:
				3493	return -ENOIOCTLCMD;
				3494	}
				3495	return 0;
				3496	}
				3497
				3498	static unsigned int packet_poll(struct file file, struct socket sock,
				3499	poll_table *wait)
				3500	{
				3501	struct sock *sk = sock->sk;
				3502	struct packet_sock *po = pkt_sk(sk);
				3503	unsigned int mask = datagram_poll(file, sock, wait);
				3504
				3505	spin_lock_bh(&sk->sk_receive_queue.lock);
				3506	if (po->rx_ring.pg_vec) {
				3507	if (!packet_previous_rx_frame(po, &po->rx_ring,
				3508	TP_STATUS_KERNEL))
				3509	mask \|= POLLIN \| POLLRDNORM;
				3510	}
				3511	spin_unlock_bh(&sk->sk_receive_queue.lock);
				3512	spin_lock_bh(&sk->sk_write_queue.lock);
				3513	if (po->tx_ring.pg_vec) {
				3514	if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
				3515	mask \|= POLLOUT \| POLLWRNORM;
				3516	}
				3517	spin_unlock_bh(&sk->sk_write_queue.lock);
				3518	return mask;
				3519	}
				3520
				3521
				3522	/* Dirty? Well, I still did not learn better way to account
				3523	* for user mmaps.
				3524	*/
				3525
				3526	static void packet_mm_open(struct vm_area_struct *vma)
				3527	{
				3528	struct file *file = vma->vm_file;
				3529	struct socket *sock = file->private_data;
				3530	struct sock *sk = sock->sk;
				3531
				3532	if (sk)
				3533	atomic_inc(&pkt_sk(sk)->mapped);
				3534	}
				3535
				3536	static void packet_mm_close(struct vm_area_struct *vma)
				3537	{
				3538	struct file *file = vma->vm_file;
				3539	struct socket *sock = file->private_data;
				3540	struct sock *sk = sock->sk;
				3541
				3542	if (sk)
				3543	atomic_dec(&pkt_sk(sk)->mapped);
				3544	}
				3545
				3546	static const struct vm_operations_struct packet_mmap_ops = {
				3547	.open = packet_mm_open,
				3548	.close = packet_mm_close,
				3549	};
				3550
				3551	static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
				3552	unsigned int len)
				3553	{
				3554	int i;
				3555
				3556	for (i = 0; i < len; i++) {
				3557	if (likely(pg_vec[i].buffer)) {
				3558	if (is_vmalloc_addr(pg_vec[i].buffer))
				3559	vfree(pg_vec[i].buffer);
				3560	else
				3561	free_pages((unsigned long)pg_vec[i].buffer,
				3562	order);
				3563	pg_vec[i].buffer = NULL;
				3564	}
				3565	}
				3566	kfree(pg_vec);
				3567	}
				3568
				3569	static char *alloc_one_pg_vec_page(unsigned long order)
				3570	{
				3571	char *buffer = NULL;
				3572	gfp_t gfp_flags = GFP_KERNEL \| __GFP_COMP \|
				3573	__GFP_ZERO \| __GFP_NOWARN \| __GFP_NORETRY;
				3574
				3575	buffer = (char *) __get_free_pages(gfp_flags, order);
				3576
				3577	if (buffer)
				3578	return buffer;
				3579
				3580	/*
				3581	* __get_free_pages failed, fall back to vmalloc
				3582	*/
				3583	buffer = vzalloc((1 << order) * PAGE_SIZE);
				3584
				3585	if (buffer)
				3586	return buffer;
				3587
				3588	/*
				3589	* vmalloc failed, lets dig into swap here
				3590	*/
				3591	gfp_flags &= ~__GFP_NORETRY;
				3592	buffer = (char *)__get_free_pages(gfp_flags, order);
				3593	if (buffer)
				3594	return buffer;
				3595
				3596	/*
				3597	* complete and utter failure
				3598	*/
				3599	return NULL;
				3600	}
				3601
				3602	static struct pgv alloc_pg_vec(struct tpacket_req req, int order)
				3603	{
				3604	unsigned int block_nr = req->tp_block_nr;
				3605	struct pgv *pg_vec;
				3606	int i;
				3607
				3608	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
				3609	if (unlikely(!pg_vec))
				3610	goto out;
				3611
				3612	for (i = 0; i < block_nr; i++) {
				3613	pg_vec[i].buffer = alloc_one_pg_vec_page(order);
				3614	if (unlikely(!pg_vec[i].buffer))
				3615	goto out_free_pgvec;
				3616	}
				3617
				3618	out:
				3619	return pg_vec;
				3620
				3621	out_free_pgvec:
				3622	free_pg_vec(pg_vec, order, block_nr);
				3623	pg_vec = NULL;
				3624	goto out;
				3625	}
				3626
				3627	static int packet_set_ring(struct sock sk, union tpacket_req_u req_u,
				3628	int closing, int tx_ring)
				3629	{
				3630	struct pgv *pg_vec = NULL;
				3631	struct packet_sock *po = pkt_sk(sk);
				3632	int was_running, order = 0;
				3633	struct packet_ring_buffer *rb;
				3634	struct sk_buff_head *rb_queue;
				3635	__be16 num;
				3636	int err = -EINVAL;
				3637	/* Added to avoid minimal code churn */
				3638	struct tpacket_req *req = &req_u->req;
				3639
				3640	/CVE-2016-8655/
				3641	lock_sock(sk);
				3642
				3643	/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
				3644	if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
				3645	WARN(1, "Tx-ring is not supported.\n");
				3646	goto out;
				3647	}
				3648
				3649	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
				3650	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
				3651
				3652	err = -EBUSY;
				3653	if (!closing) {
				3654	if (atomic_read(&po->mapped))
				3655	goto out;
				3656	if (atomic_read(&rb->pending))
				3657	goto out;
				3658	}
				3659
				3660	if (req->tp_block_nr) {
				3661	/* Sanity tests and some calculations */
				3662	err = -EBUSY;
				3663	if (unlikely(rb->pg_vec))
				3664	goto out;
				3665
				3666	switch (po->tp_version) {
				3667	case TPACKET_V1:
				3668	po->tp_hdrlen = TPACKET_HDRLEN;
				3669	break;
				3670	case TPACKET_V2:
				3671	po->tp_hdrlen = TPACKET2_HDRLEN;
				3672	break;
				3673	case TPACKET_V3:
				3674	po->tp_hdrlen = TPACKET3_HDRLEN;
				3675	break;
				3676	}
				3677
				3678	err = -EINVAL;
				3679	if (unlikely((int)req->tp_block_size <= 0))
				3680	goto out;
				3681	if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
				3682	goto out;
				3683	if (unlikely(req->tp_frame_size < po->tp_hdrlen +
				3684	po->tp_reserve))
				3685	goto out;
				3686	if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
				3687	goto out;
				3688
				3689	rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
				3690	if (unlikely(rb->frames_per_block <= 0))
				3691	goto out;
				3692	if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
				3693	req->tp_frame_nr))
				3694	goto out;
				3695
				3696	err = -ENOMEM;
				3697	order = get_order(req->tp_block_size);
				3698	pg_vec = alloc_pg_vec(req, order);
				3699	if (unlikely(!pg_vec))
				3700	goto out;
				3701	switch (po->tp_version) {
				3702	case TPACKET_V3:
				3703	/* Transmit path is not supported. We checked
				3704	* it above but just being paranoid
				3705	*/
				3706	if (!tx_ring)
				3707	init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
				3708	break;
				3709	default:
				3710	break;
				3711	}
				3712	}
				3713	/* Done */
				3714	else {
				3715	err = -EINVAL;
				3716	if (unlikely(req->tp_frame_nr))
				3717	goto out;
				3718	}
				3719
				3720	/*CVE-2016-8655
				3721	delete the following contents:
				3722	lock_sock(sk);
				3723	*/
				3724
				3725	/* Detach socket from network */
				3726	spin_lock(&po->bind_lock);
				3727	was_running = po->running;
				3728	num = po->num;
				3729	if (was_running) {
				3730	po->num = 0;
				3731	__unregister_prot_hook(sk, false);
				3732	}
				3733	spin_unlock(&po->bind_lock);
				3734
				3735	synchronize_net();
				3736
				3737	err = -EBUSY;
				3738	mutex_lock(&po->pg_vec_lock);
				3739	if (closing \|\| atomic_read(&po->mapped) == 0) {
				3740	err = 0;
				3741	spin_lock_bh(&rb_queue->lock);
				3742	swap(rb->pg_vec, pg_vec);
				3743	rb->frame_max = (req->tp_frame_nr - 1);
				3744	rb->head = 0;
				3745	rb->frame_size = req->tp_frame_size;
				3746	spin_unlock_bh(&rb_queue->lock);
				3747
				3748	swap(rb->pg_vec_order, order);
				3749	swap(rb->pg_vec_len, req->tp_block_nr);
				3750
				3751	rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
				3752	po->prot_hook.func = (po->rx_ring.pg_vec) ?
				3753	tpacket_rcv : packet_rcv;
				3754	skb_queue_purge(rb_queue);
				3755	if (atomic_read(&po->mapped))
				3756	pr_err("packet_mmap: vma is busy: %d\n",
				3757	atomic_read(&po->mapped));
				3758	}
				3759	mutex_unlock(&po->pg_vec_lock);
				3760
				3761	spin_lock(&po->bind_lock);
				3762	if (was_running) {
				3763	po->num = num;
				3764	register_prot_hook(sk);
				3765	}
				3766	spin_unlock(&po->bind_lock);
				3767	if (closing && (po->tp_version > TPACKET_V2)) {
				3768	/* Because we don't support block-based V3 on tx-ring */
				3769	if (!tx_ring)
				3770	prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
				3771	}
				3772
				3773	/*CVE-2016-8655
				3774	delete the following contents:
				3775	release_sock(sk);
				3776	*/
				3777
				3778	if (pg_vec)
				3779	free_pg_vec(pg_vec, order, req->tp_block_nr);
				3780	out:
				3781	release_sock(sk); //CVE-2016-8655
				3782	return err;
				3783	}
				3784
				3785	static int packet_mmap(struct file file, struct socket sock,
				3786	struct vm_area_struct *vma)
				3787	{
				3788	struct sock *sk = sock->sk;
				3789	struct packet_sock *po = pkt_sk(sk);
				3790	unsigned long size, expected_size;
				3791	struct packet_ring_buffer *rb;
				3792	unsigned long start;
				3793	int err = -EINVAL;
				3794	int i;
				3795
				3796	if (vma->vm_pgoff)
				3797	return -EINVAL;
				3798
				3799	mutex_lock(&po->pg_vec_lock);
				3800
				3801	expected_size = 0;
				3802	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
				3803	if (rb->pg_vec) {
				3804	expected_size += rb->pg_vec_len
				3805	* rb->pg_vec_pages
				3806	* PAGE_SIZE;
				3807	}
				3808	}
				3809
				3810	if (expected_size == 0)
				3811	goto out;
				3812
				3813	size = vma->vm_end - vma->vm_start;
				3814	if (size != expected_size)
				3815	goto out;
				3816
				3817	start = vma->vm_start;
				3818	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
				3819	if (rb->pg_vec == NULL)
				3820	continue;
				3821
				3822	for (i = 0; i < rb->pg_vec_len; i++) {
				3823	struct page *page;
				3824	void *kaddr = rb->pg_vec[i].buffer;
				3825	int pg_num;
				3826
				3827	for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
				3828	page = pgv_to_page(kaddr);
				3829	err = vm_insert_page(vma, start, page);
				3830	if (unlikely(err))
				3831	goto out;
				3832	start += PAGE_SIZE;
				3833	kaddr += PAGE_SIZE;
				3834	}
				3835	}
				3836	}
				3837
				3838	atomic_inc(&po->mapped);
				3839	vma->vm_ops = &packet_mmap_ops;
				3840	err = 0;
				3841
				3842	out:
				3843	mutex_unlock(&po->pg_vec_lock);
				3844	return err;
				3845	}
				3846
				3847	static const struct proto_ops packet_ops_spkt = {
				3848	.family = PF_PACKET,
				3849	.owner = THIS_MODULE,
				3850	.release = packet_release,
				3851	.bind = packet_bind_spkt,
				3852	.connect = sock_no_connect,
				3853	.socketpair = sock_no_socketpair,
				3854	.accept = sock_no_accept,
				3855	.getname = packet_getname_spkt,
				3856	.poll = datagram_poll,
				3857	.ioctl = packet_ioctl,
				3858	.listen = sock_no_listen,
				3859	.shutdown = sock_no_shutdown,
				3860	.setsockopt = sock_no_setsockopt,
				3861	.getsockopt = sock_no_getsockopt,
				3862	.sendmsg = packet_sendmsg_spkt,
				3863	.recvmsg = packet_recvmsg,
				3864	.mmap = sock_no_mmap,
				3865	.sendpage = sock_no_sendpage,
				3866	};
				3867
				3868	static const struct proto_ops packet_ops = {
				3869	.family = PF_PACKET,
				3870	.owner = THIS_MODULE,
				3871	.release = packet_release,
				3872	.bind = packet_bind,
				3873	.connect = sock_no_connect,
				3874	.socketpair = sock_no_socketpair,
				3875	.accept = sock_no_accept,
				3876	.getname = packet_getname,
				3877	.poll = packet_poll,
				3878	.ioctl = packet_ioctl,
				3879	.listen = sock_no_listen,
				3880	.shutdown = sock_no_shutdown,
				3881	.setsockopt = packet_setsockopt,
				3882	.getsockopt = packet_getsockopt,
				3883	.sendmsg = packet_sendmsg,
				3884	.recvmsg = packet_recvmsg,
				3885	.mmap = packet_mmap,
				3886	.sendpage = sock_no_sendpage,
				3887	};
				3888
				3889	static const struct net_proto_family packet_family_ops = {
				3890	.family = PF_PACKET,
				3891	.create = packet_create,
				3892	.owner = THIS_MODULE,
				3893	};
				3894
				3895	static struct notifier_block packet_netdev_notifier = {
				3896	.notifier_call = packet_notifier,
				3897	};
				3898
				3899	#ifdef CONFIG_PROC_FS
				3900
				3901	static void packet_seq_start(struct seq_file seq, loff_t *pos)
				3902	__acquires(RCU)
				3903	{
				3904	struct net *net = seq_file_net(seq);
				3905
				3906	rcu_read_lock();
				3907	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
				3908	}
				3909
				3910	static void packet_seq_next(struct seq_file seq, void v, loff_t pos)
				3911	{
				3912	struct net *net = seq_file_net(seq);
				3913	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
				3914	}
				3915
				3916	static void packet_seq_stop(struct seq_file seq, void v)
				3917	__releases(RCU)
				3918	{
				3919	rcu_read_unlock();
				3920	}
				3921
				3922	static int packet_seq_show(struct seq_file seq, void v)
				3923	{
				3924	if (v == SEQ_START_TOKEN)
				3925	seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
				3926	else {
				3927	struct sock *s = sk_entry(v);
				3928	const struct packet_sock *po = pkt_sk(s);
				3929
				3930	seq_printf(seq,
				3931	"%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
				3932	s,
				3933	atomic_read(&s->sk_refcnt),
				3934	s->sk_type,
				3935	ntohs(po->num),
				3936	po->ifindex,
				3937	po->running,
				3938	atomic_read(&s->sk_rmem_alloc),
				3939	sock_i_uid(s),
				3940	sock_i_ino(s));
				3941	}
				3942
				3943	return 0;
				3944	}
				3945
				3946	static const struct seq_operations packet_seq_ops = {
				3947	.start = packet_seq_start,
				3948	.next = packet_seq_next,
				3949	.stop = packet_seq_stop,
				3950	.show = packet_seq_show,
				3951	};
				3952
				3953	static int packet_seq_open(struct inode inode, struct file file)
				3954	{
				3955	return seq_open_net(inode, file, &packet_seq_ops,
				3956	sizeof(struct seq_net_private));
				3957	}
				3958
				3959	static const struct file_operations packet_seq_fops = {
				3960	.owner = THIS_MODULE,
				3961	.open = packet_seq_open,
				3962	.read = seq_read,
				3963	.llseek = seq_lseek,
				3964	.release = seq_release_net,
				3965	};
				3966
				3967	#endif
				3968
				3969	static int __net_init packet_net_init(struct net *net)
				3970	{
				3971	spin_lock_init(&net->packet.sklist_lock);
				3972	INIT_HLIST_HEAD(&net->packet.sklist);
				3973
				3974	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
				3975	return -ENOMEM;
				3976
				3977	return 0;
				3978	}
				3979
				3980	static void __net_exit packet_net_exit(struct net *net)
				3981	{
				3982	proc_net_remove(net, "packet");
				3983	}
				3984
				3985	static struct pernet_operations packet_net_ops = {
				3986	.init = packet_net_init,
				3987	.exit = packet_net_exit,
				3988	};
				3989
				3990
				3991	static void __exit packet_exit(void)
				3992	{
				3993	unregister_netdevice_notifier(&packet_netdev_notifier);
				3994	unregister_pernet_subsys(&packet_net_ops);
				3995	sock_unregister(PF_PACKET);
				3996	proto_unregister(&packet_proto);
				3997	}
				3998
				3999	static int __init packet_init(void)
				4000	{
				4001	int rc = proto_register(&packet_proto, 0);
				4002
				4003	if (rc != 0)
				4004	goto out;
				4005
				4006	sock_register(&packet_family_ops);
				4007	register_pernet_subsys(&packet_net_ops);
				4008	register_netdevice_notifier(&packet_netdev_notifier);
				4009	out:
				4010	return rc;
				4011	}
				4012
				4013	module_init(packet_init);
				4014	module_exit(packet_exit);
				4015	MODULE_LICENSE("GPL");
				4016	MODULE_ALIAS_NETPROTO(PF_PACKET);