Blame - src/kernel/linux/v4.19/net/ipv4/udp.c - T800

blob: 8099d035dff549a64f3ce5cf2495b94bb1154aee [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* The User Datagram Protocol (UDP).
				7	*
				8	* Authors: Ross Biro
				9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				10	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				11	* Alan Cox, <alan@lxorguk.ukuu.org.uk>
				12	* Hirokazu Takahashi, <taka@valinux.co.jp>
				13	*
				14	* Fixes:
				15	* Alan Cox : verify_area() calls
				16	* Alan Cox : stopped close while in use off icmp
				17	* messages. Not a fix but a botch that
				18	* for udp at least is 'valid'.
				19	* Alan Cox : Fixed icmp handling properly
				20	* Alan Cox : Correct error for oversized datagrams
				21	* Alan Cox : Tidied select() semantics.
				22	* Alan Cox : udp_err() fixed properly, also now
				23	* select and read wake correctly on errors
				24	* Alan Cox : udp_send verify_area moved to avoid mem leak
				25	* Alan Cox : UDP can count its memory
				26	* Alan Cox : send to an unknown connection causes
				27	* an ECONNREFUSED off the icmp, but
				28	* does NOT close.
				29	* Alan Cox : Switched to new sk_buff handlers. No more backlog!
				30	* Alan Cox : Using generic datagram code. Even smaller and the PEEK
				31	* bug no longer crashes it.
				32	* Fred Van Kempen : Net2e support for sk->broadcast.
				33	* Alan Cox : Uses skb_free_datagram
				34	* Alan Cox : Added get/set sockopt support.
				35	* Alan Cox : Broadcasting without option set returns EACCES.
				36	* Alan Cox : No wakeup calls. Instead we now use the callbacks.
				37	* Alan Cox : Use ip_tos and ip_ttl
				38	* Alan Cox : SNMP Mibs
				39	* Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
				40	* Matt Dillon : UDP length checks.
				41	* Alan Cox : Smarter af_inet used properly.
				42	* Alan Cox : Use new kernel side addressing.
				43	* Alan Cox : Incorrect return on truncated datagram receive.
				44	* Arnt Gulbrandsen : New udp_send and stuff
				45	* Alan Cox : Cache last socket
				46	* Alan Cox : Route cache
				47	* Jon Peatfield : Minor efficiency fix to sendto().
				48	* Mike Shaver : RFC1122 checks.
				49	* Alan Cox : Nonblocking error fix.
				50	* Willy Konynenberg : Transparent proxying support.
				51	* Mike McLagan : Routing by source
				52	* David S. Miller : New socket lookup architecture.
				53	* Last socket cache retained as it
				54	* does have a high hit rate.
				55	* Olaf Kirch : Don't linearise iovec on sendmsg.
				56	* Andi Kleen : Some cleanups, cache destination entry
				57	* for connect.
				58	* Vitaly E. Lavrov : Transparent proxy revived after year coma.
				59	* Melvin Smith : Check msg_name not msg_namelen in sendto(),
				60	* return ENOTCONN for unconnected sockets (POSIX)
				61	* Janos Farkas : don't deliver multi/broadcasts to a different
				62	* bound-to-device socket
				63	* Hirokazu Takahashi : HW checksumming for outgoing UDP
				64	* datagrams.
				65	* Hirokazu Takahashi : sendfile() on UDP works now.
				66	* Arnaldo C. Melo : convert /proc/net/udp to seq_file
				67	* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
				68	* Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
				69	* a single port at the same time.
				70	* Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
				71	* James Chapman : Add L2TP encapsulation type.
				72	*
				73	*
				74	* This program is free software; you can redistribute it and/or
				75	* modify it under the terms of the GNU General Public License
				76	* as published by the Free Software Foundation; either version
				77	* 2 of the License, or (at your option) any later version.
				78	*/
				79
				80	#define pr_fmt(fmt) "UDP: " fmt
				81
				82	#include <linux/uaccess.h>
				83	#include <asm/ioctls.h>
				84	#include <linux/bootmem.h>
				85	#include <linux/highmem.h>
				86	#include <linux/swap.h>
				87	#include <linux/types.h>
				88	#include <linux/fcntl.h>
				89	#include <linux/module.h>
				90	#include <linux/socket.h>
				91	#include <linux/sockios.h>
				92	#include <linux/igmp.h>
				93	#include <linux/inetdevice.h>
				94	#include <linux/in.h>
				95	#include <linux/errno.h>
				96	#include <linux/timer.h>
				97	#include <linux/mm.h>
				98	#include <linux/inet.h>
				99	#include <linux/netdevice.h>
				100	#include <linux/slab.h>
				101	#include <net/tcp_states.h>
				102	#include <linux/skbuff.h>
				103	#include <linux/proc_fs.h>
				104	#include <linux/seq_file.h>
				105	#include <net/net_namespace.h>
				106	#include <net/icmp.h>
				107	#include <net/inet_hashtables.h>
				108	#include <net/route.h>
				109	#include <net/checksum.h>
				110	#include <net/xfrm.h>
				111	#include <trace/events/udp.h>
				112	#include <linux/static_key.h>
				113	#include <trace/events/skb.h>
				114	#include <net/busy_poll.h>
				115	#include "udp_impl.h"
				116	#include <net/sock_reuseport.h>
				117	#include <net/addrconf.h>
				118	#include <net/ra_nat.h>
				119
				120	struct udp_table udp_table __read_mostly;
				121	EXPORT_SYMBOL(udp_table);
				122
				123	long sysctl_udp_mem[3] __read_mostly;
				124	EXPORT_SYMBOL(sysctl_udp_mem);
				125
				126	atomic_long_t udp_memory_allocated;
				127	EXPORT_SYMBOL(udp_memory_allocated);
				128
				129	#define MAX_UDP_PORTS 65536
				130	#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
				131
				132	/* IPCB reference means this can not be used from early demux */
				133	static bool udp_lib_exact_dif_match(struct net net, struct sk_buff skb)
				134	{
				135	#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
				136	if (!net->ipv4.sysctl_udp_l3mdev_accept &&
				137	skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
				138	return true;
				139	#endif
				140	return false;
				141	}
				142
				143	static int udp_lib_lport_inuse(struct net *net, __u16 num,
				144	const struct udp_hslot *hslot,
				145	unsigned long *bitmap,
				146	struct sock *sk, unsigned int log)
				147	{
				148	struct sock *sk2;
				149	kuid_t uid = sock_i_uid(sk);
				150
				151	sk_for_each(sk2, &hslot->head) {
				152	if (net_eq(sock_net(sk2), net) &&
				153	sk2 != sk &&
				154	(bitmap \|\| udp_sk(sk2)->udp_port_hash == num) &&
				155	(!sk2->sk_reuse \|\| !sk->sk_reuse) &&
				156	(!sk2->sk_bound_dev_if \|\| !sk->sk_bound_dev_if \|\|
				157	sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				158	inet_rcv_saddr_equal(sk, sk2, true)) {
				159	if (sk2->sk_reuseport && sk->sk_reuseport &&
				160	!rcu_access_pointer(sk->sk_reuseport_cb) &&
				161	uid_eq(uid, sock_i_uid(sk2))) {
				162	if (!bitmap)
				163	return 0;
				164	} else {
				165	if (!bitmap)
				166	return 1;
				167	__set_bit(udp_sk(sk2)->udp_port_hash >> log,
				168	bitmap);
				169	}
				170	}
				171	}
				172	return 0;
				173	}
				174
				175	/*
				176	* Note: we still hold spinlock of primary hash chain, so no other writer
				177	* can insert/delete a socket with local_port == num
				178	*/
				179	static int udp_lib_lport_inuse2(struct net *net, __u16 num,
				180	struct udp_hslot *hslot2,
				181	struct sock *sk)
				182	{
				183	struct sock *sk2;
				184	kuid_t uid = sock_i_uid(sk);
				185	int res = 0;
				186
				187	spin_lock(&hslot2->lock);
				188	udp_portaddr_for_each_entry(sk2, &hslot2->head) {
				189	if (net_eq(sock_net(sk2), net) &&
				190	sk2 != sk &&
				191	(udp_sk(sk2)->udp_port_hash == num) &&
				192	(!sk2->sk_reuse \|\| !sk->sk_reuse) &&
				193	(!sk2->sk_bound_dev_if \|\| !sk->sk_bound_dev_if \|\|
				194	sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				195	inet_rcv_saddr_equal(sk, sk2, true)) {
				196	if (sk2->sk_reuseport && sk->sk_reuseport &&
				197	!rcu_access_pointer(sk->sk_reuseport_cb) &&
				198	uid_eq(uid, sock_i_uid(sk2))) {
				199	res = 0;
				200	} else {
				201	res = 1;
				202	}
				203	break;
				204	}
				205	}
				206	spin_unlock(&hslot2->lock);
				207	return res;
				208	}
				209
				210	static int udp_reuseport_add_sock(struct sock sk, struct udp_hslot hslot)
				211	{
				212	struct net *net = sock_net(sk);
				213	kuid_t uid = sock_i_uid(sk);
				214	struct sock *sk2;
				215
				216	sk_for_each(sk2, &hslot->head) {
				217	if (net_eq(sock_net(sk2), net) &&
				218	sk2 != sk &&
				219	sk2->sk_family == sk->sk_family &&
				220	ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
				221	(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
				222	(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				223	sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
				224	inet_rcv_saddr_equal(sk, sk2, false)) {
				225	return reuseport_add_sock(sk, sk2,
				226	inet_rcv_saddr_any(sk));
				227	}
				228	}
				229
				230	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
				231	}
				232
				233	/**
				234	* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
				235	*
				236	* @sk: socket struct in question
				237	* @snum: port number to look up
				238	* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
				239	* with NULL address
				240	*/
				241	int udp_lib_get_port(struct sock *sk, unsigned short snum,
				242	unsigned int hash2_nulladdr)
				243	{
				244	struct udp_hslot hslot, hslot2;
				245	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				246	int error = 1;
				247	struct net *net = sock_net(sk);
				248
				249	if (!snum) {
				250	int low, high, remaining;
				251	unsigned int rand;
				252	unsigned short first, last;
				253	DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
				254
				255	inet_get_local_port_range(net, &low, &high);
				256	remaining = (high - low) + 1;
				257
				258	rand = prandom_u32();
				259	first = reciprocal_scale(rand, remaining) + low;
				260	/*
				261	* force rand to be an odd multiple of UDP_HTABLE_SIZE
				262	*/
				263	rand = (rand \| 1) * (udptable->mask + 1);
				264	last = first + udptable->mask + 1;
				265	do {
				266	hslot = udp_hashslot(udptable, net, first);
				267	bitmap_zero(bitmap, PORTS_PER_CHAIN);
				268	spin_lock_bh(&hslot->lock);
				269	udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
				270	udptable->log);
				271
				272	snum = first;
				273	/*
				274	* Iterate on all possible values of snum for this hash.
				275	* Using steps of an odd multiple of UDP_HTABLE_SIZE
				276	* give us randomization and full range coverage.
				277	*/
				278	do {
				279	if (low <= snum && snum <= high &&
				280	!test_bit(snum >> udptable->log, bitmap) &&
				281	!inet_is_local_reserved_port(net, snum))
				282	goto found;
				283	snum += rand;
				284	} while (snum != first);
				285	spin_unlock_bh(&hslot->lock);
				286	cond_resched();
				287	} while (++first != last);
				288	goto fail;
				289	} else {
				290	hslot = udp_hashslot(udptable, net, snum);
				291	spin_lock_bh(&hslot->lock);
				292	if (hslot->count > 10) {
				293	int exist;
				294	unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
				295
				296	slot2 &= udptable->mask;
				297	hash2_nulladdr &= udptable->mask;
				298
				299	hslot2 = udp_hashslot2(udptable, slot2);
				300	if (hslot->count < hslot2->count)
				301	goto scan_primary_hash;
				302
				303	exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
				304	if (!exist && (hash2_nulladdr != slot2)) {
				305	hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
				306	exist = udp_lib_lport_inuse2(net, snum, hslot2,
				307	sk);
				308	}
				309	if (exist)
				310	goto fail_unlock;
				311	else
				312	goto found;
				313	}
				314	scan_primary_hash:
				315	if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
				316	goto fail_unlock;
				317	}
				318	found:
				319	inet_sk(sk)->inet_num = snum;
				320	udp_sk(sk)->udp_port_hash = snum;
				321	udp_sk(sk)->udp_portaddr_hash ^= snum;
				322	if (sk_unhashed(sk)) {
				323	if (sk->sk_reuseport &&
				324	udp_reuseport_add_sock(sk, hslot)) {
				325	inet_sk(sk)->inet_num = 0;
				326	udp_sk(sk)->udp_port_hash = 0;
				327	udp_sk(sk)->udp_portaddr_hash ^= snum;
				328	goto fail_unlock;
				329	}
				330
				331	sk_add_node_rcu(sk, &hslot->head);
				332	hslot->count++;
				333	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				334
				335	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				336	spin_lock(&hslot2->lock);
				337	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
				338	sk->sk_family == AF_INET6)
				339	hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
				340	&hslot2->head);
				341	else
				342	hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
				343	&hslot2->head);
				344	hslot2->count++;
				345	spin_unlock(&hslot2->lock);
				346	}
				347	sock_set_flag(sk, SOCK_RCU_FREE);
				348	error = 0;
				349	fail_unlock:
				350	spin_unlock_bh(&hslot->lock);
				351	fail:
				352	return error;
				353	}
				354	EXPORT_SYMBOL(udp_lib_get_port);
				355
				356	int udp_v4_get_port(struct sock *sk, unsigned short snum)
				357	{
				358	unsigned int hash2_nulladdr =
				359	ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
				360	unsigned int hash2_partial =
				361	ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
				362
				363	/* precompute partial secondary hash */
				364	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
				365	return udp_lib_get_port(sk, snum, hash2_nulladdr);
				366	}
				367
				368	static int compute_score(struct sock sk, struct net net,
				369	__be32 saddr, __be16 sport,
				370	__be32 daddr, unsigned short hnum,
				371	int dif, int sdif, bool exact_dif)
				372	{
				373	int score;
				374	struct inet_sock *inet;
				375
				376	if (!net_eq(sock_net(sk), net) \|\|
				377	udp_sk(sk)->udp_port_hash != hnum \|\|
				378	ipv6_only_sock(sk))
				379	return -1;
				380
				381	score = (sk->sk_family == PF_INET) ? 2 : 1;
				382	inet = inet_sk(sk);
				383
				384	if (inet->inet_rcv_saddr) {
				385	if (inet->inet_rcv_saddr != daddr)
				386	return -1;
				387	score += 4;
				388	}
				389
				390	if (inet->inet_daddr) {
				391	if (inet->inet_daddr != saddr)
				392	return -1;
				393	score += 4;
				394	}
				395
				396	if (inet->inet_dport) {
				397	if (inet->inet_dport != sport)
				398	return -1;
				399	score += 4;
				400	}
				401
				402	if (sk->sk_bound_dev_if \|\| exact_dif) {
				403	bool dev_match = (sk->sk_bound_dev_if == dif \|\|
				404	sk->sk_bound_dev_if == sdif);
				405
				406	if (!dev_match)
				407	return -1;
				408	if (sk->sk_bound_dev_if)
				409	score += 4;
				410	}
				411
				412	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
				413	score++;
				414	return score;
				415	}
				416
				417	static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
				418	const __u16 lport, const __be32 faddr,
				419	const __be16 fport)
				420	{
				421	static u32 udp_ehash_secret __read_mostly;
				422
				423	net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
				424
				425	return __inet_ehashfn(laddr, lport, faddr, fport,
				426	udp_ehash_secret + net_hash_mix(net));
				427	}
				428
				429	/* called with rcu_read_lock() */
				430	static struct sock udp4_lib_lookup2(struct net net,
				431	__be32 saddr, __be16 sport,
				432	__be32 daddr, unsigned int hnum,
				433	int dif, int sdif, bool exact_dif,
				434	struct udp_hslot *hslot2,
				435	struct sk_buff *skb)
				436	{
				437	struct sock sk, result;
				438	int score, badness;
				439	u32 hash = 0;
				440
				441	result = NULL;
				442	badness = 0;
				443	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
				444	score = compute_score(sk, net, saddr, sport,
				445	daddr, hnum, dif, sdif, exact_dif);
				446	if (score > badness) {
				447	if (sk->sk_reuseport &&
				448	sk->sk_state != TCP_ESTABLISHED) {
				449	hash = udp_ehashfn(net, daddr, hnum,
				450	saddr, sport);
				451	result = reuseport_select_sock(sk, hash, skb,
				452	sizeof(struct udphdr));
				453	if (result && !reuseport_has_conns(sk, false))
				454	return result;
				455	}
				456	badness = score;
				457	result = sk;
				458	}
				459	}
				460	return result;
				461	}
				462
				463	/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
				464	* harder than this. -DaveM
				465	*/
				466	struct sock __udp4_lib_lookup(struct net net, __be32 saddr,
				467	__be16 sport, __be32 daddr, __be16 dport, int dif,
				468	int sdif, struct udp_table udptable, struct sk_buff skb)
				469	{
				470	struct sock sk, result;
				471	unsigned short hnum = ntohs(dport);
				472	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
				473	struct udp_hslot hslot2, hslot = &udptable->hash[slot];
				474	bool exact_dif = udp_lib_exact_dif_match(net, skb);
				475	int score, badness;
				476	u32 hash = 0;
				477
				478	if (hslot->count > 10) {
				479	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
				480	slot2 = hash2 & udptable->mask;
				481	hslot2 = &udptable->hash2[slot2];
				482	if (hslot->count < hslot2->count)
				483	goto begin;
				484
				485	result = udp4_lib_lookup2(net, saddr, sport,
				486	daddr, hnum, dif, sdif,
				487	exact_dif, hslot2, skb);
				488	if (!result) {
				489	unsigned int old_slot2 = slot2;
				490	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
				491	slot2 = hash2 & udptable->mask;
				492	/* avoid searching the same slot again. */
				493	if (unlikely(slot2 == old_slot2))
				494	return result;
				495
				496	hslot2 = &udptable->hash2[slot2];
				497	if (hslot->count < hslot2->count)
				498	goto begin;
				499
				500	result = udp4_lib_lookup2(net, saddr, sport,
				501	daddr, hnum, dif, sdif,
				502	exact_dif, hslot2, skb);
				503	}
				504	if (unlikely(IS_ERR(result)))
				505	return NULL;
				506	return result;
				507	}
				508	begin:
				509	result = NULL;
				510	badness = 0;
				511	sk_for_each_rcu(sk, &hslot->head) {
				512	score = compute_score(sk, net, saddr, sport,
				513	daddr, hnum, dif, sdif, exact_dif);
				514	if (score > badness) {
				515	if (sk->sk_reuseport) {
				516	hash = udp_ehashfn(net, daddr, hnum,
				517	saddr, sport);
				518	result = reuseport_select_sock(sk, hash, skb,
				519	sizeof(struct udphdr));
				520	if (unlikely(IS_ERR(result)))
				521	return NULL;
				522	if (result)
				523	return result;
				524	}
				525	result = sk;
				526	badness = score;
				527	}
				528	}
				529	return result;
				530	}
				531	EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
				532
				533	static inline struct sock __udp4_lib_lookup_skb(struct sk_buff skb,
				534	__be16 sport, __be16 dport,
				535	struct udp_table *udptable)
				536	{
				537	const struct iphdr *iph = ip_hdr(skb);
				538
				539	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
				540	iph->daddr, dport, inet_iif(skb),
				541	inet_sdif(skb), udptable, skb);
				542	}
				543
				544	struct sock udp4_lib_lookup_skb(struct sk_buff skb,
				545	__be16 sport, __be16 dport)
				546	{
				547	const struct iphdr *iph = ip_hdr(skb);
				548
				549	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
				550	iph->daddr, dport, inet_iif(skb),
				551	inet_sdif(skb), &udp_table, NULL);
				552	}
				553	EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
				554
				555	/* Must be called under rcu_read_lock().
				556	* Does increment socket refcount.
				557	*/
				558	#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) \|\| IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
				559	struct sock udp4_lib_lookup(struct net net, __be32 saddr, __be16 sport,
				560	__be32 daddr, __be16 dport, int dif)
				561	{
				562	struct sock *sk;
				563
				564	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
				565	dif, 0, &udp_table, NULL);
				566	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
				567	sk = NULL;
				568	return sk;
				569	}
				570	EXPORT_SYMBOL_GPL(udp4_lib_lookup);
				571	#endif
				572
				573	static inline bool __udp_is_mcast_sock(struct net net, struct sock sk,
				574	__be16 loc_port, __be32 loc_addr,
				575	__be16 rmt_port, __be32 rmt_addr,
				576	int dif, int sdif, unsigned short hnum)
				577	{
				578	struct inet_sock *inet = inet_sk(sk);
				579
				580	if (!net_eq(sock_net(sk), net) \|\|
				581	udp_sk(sk)->udp_port_hash != hnum \|\|
				582	(inet->inet_daddr && inet->inet_daddr != rmt_addr) \|\|
				583	(inet->inet_dport != rmt_port && inet->inet_dport) \|\|
				584	(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) \|\|
				585	ipv6_only_sock(sk) \|\|
				586	(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
				587	sk->sk_bound_dev_if != sdif))
				588	return false;
				589	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
				590	return false;
				591	return true;
				592	}
				593
				594	/*
				595	* This routine is called by the ICMP module when it gets some
				596	* sort of error condition. If err < 0 then the socket should
				597	* be closed and the error returned to the user. If err > 0
				598	* it's just the icmp type << 8 \| icmp code.
				599	* Header points to the ip header of the error packet. We move
				600	* on past this. Then (as it used to claim before adjustment)
				601	* header points to the first 8 bytes of the udp header. We need
				602	* to find the appropriate port.
				603	*/
				604
				605	void __udp4_lib_err(struct sk_buff skb, u32 info, struct udp_table udptable)
				606	{
				607	struct inet_sock *inet;
				608	const struct iphdr iph = (const struct iphdr )skb->data;
				609	struct udphdr uh = (struct udphdr )(skb->data+(iph->ihl<<2));
				610	const int type = icmp_hdr(skb)->type;
				611	const int code = icmp_hdr(skb)->code;
				612	struct sock *sk;
				613	int harderr;
				614	int err;
				615	struct net *net = dev_net(skb->dev);
				616
				617	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
				618	iph->saddr, uh->source, skb->dev->ifindex, 0,
				619	udptable, NULL);
				620	if (!sk) {
				621	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
				622	return; /* No socket for error */
				623	}
				624
				625	err = 0;
				626	harderr = 0;
				627	inet = inet_sk(sk);
				628
				629	switch (type) {
				630	default:
				631	case ICMP_TIME_EXCEEDED:
				632	err = EHOSTUNREACH;
				633	break;
				634	case ICMP_SOURCE_QUENCH:
				635	goto out;
				636	case ICMP_PARAMETERPROB:
				637	err = EPROTO;
				638	harderr = 1;
				639	break;
				640	case ICMP_DEST_UNREACH:
				641	if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
				642	ipv4_sk_update_pmtu(skb, sk, info);
				643	if (inet->pmtudisc != IP_PMTUDISC_DONT) {
				644	err = EMSGSIZE;
				645	harderr = 1;
				646	break;
				647	}
				648	goto out;
				649	}
				650	err = EHOSTUNREACH;
				651	if (code <= NR_ICMP_UNREACH) {
				652	harderr = icmp_err_convert[code].fatal;
				653	err = icmp_err_convert[code].errno;
				654	}
				655	break;
				656	case ICMP_REDIRECT:
				657	ipv4_sk_redirect(skb, sk);
				658	goto out;
				659	}
				660
				661	/*
				662	* RFC1122: OK. Passes ICMP errors back to application, as per
				663	* 4.1.3.3.
				664	*/
				665	if (!inet->recverr) {
				666	if (!harderr \|\| sk->sk_state != TCP_ESTABLISHED)
				667	goto out;
				668	} else
				669	ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
				670
				671	sk->sk_err = err;
				672	sk->sk_error_report(sk);
				673	out:
				674	return;
				675	}
				676
				677	void udp_err(struct sk_buff *skb, u32 info)
				678	{
				679	__udp4_lib_err(skb, info, &udp_table);
				680	}
				681
				682	/*
				683	* Throw away all pending data and cancel the corking. Socket is locked.
				684	*/
				685	void udp_flush_pending_frames(struct sock *sk)
				686	{
				687	struct udp_sock *up = udp_sk(sk);
				688
				689	if (up->pending) {
				690	up->len = 0;
				691	up->pending = 0;
				692	ip_flush_pending_frames(sk);
				693	}
				694	}
				695	EXPORT_SYMBOL(udp_flush_pending_frames);
				696
				697	/**
				698	* udp4_hwcsum - handle outgoing HW checksumming
				699	* @skb: sk_buff containing the filled-in UDP header
				700	* (checksum field must be zeroed out)
				701	* @src: source IP address
				702	* @dst: destination IP address
				703	*/
				704	void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
				705	{
				706	struct udphdr *uh = udp_hdr(skb);
				707	int offset = skb_transport_offset(skb);
				708	int len = skb->len - offset;
				709	int hlen = len;
				710	__wsum csum = 0;
				711
				712	if (!skb_has_frag_list(skb)) {
				713	/*
				714	* Only one fragment on the socket.
				715	*/
				716	skb->csum_start = skb_transport_header(skb) - skb->head;
				717	skb->csum_offset = offsetof(struct udphdr, check);
				718	uh->check = ~csum_tcpudp_magic(src, dst, len,
				719	IPPROTO_UDP, 0);
				720	} else {
				721	struct sk_buff *frags;
				722
				723	/*
				724	* HW-checksum won't work as there are two or more
				725	* fragments on the socket so that all csums of sk_buffs
				726	* should be together
				727	*/
				728	skb_walk_frags(skb, frags) {
				729	csum = csum_add(csum, frags->csum);
				730	hlen -= frags->len;
				731	}
				732
				733	csum = skb_checksum(skb, offset, hlen, csum);
				734	skb->ip_summed = CHECKSUM_NONE;
				735
				736	uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
				737	if (uh->check == 0)
				738	uh->check = CSUM_MANGLED_0;
				739	}
				740	}
				741	EXPORT_SYMBOL_GPL(udp4_hwcsum);
				742
				743	/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
				744	* for the simple case like when setting the checksum for a UDP tunnel.
				745	*/
				746	void udp_set_csum(bool nocheck, struct sk_buff *skb,
				747	__be32 saddr, __be32 daddr, int len)
				748	{
				749	struct udphdr *uh = udp_hdr(skb);
				750
				751	if (nocheck) {
				752	uh->check = 0;
				753	} else if (skb_is_gso(skb)) {
				754	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
				755	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
				756	uh->check = 0;
				757	uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
				758	if (uh->check == 0)
				759	uh->check = CSUM_MANGLED_0;
				760	} else {
				761	skb->ip_summed = CHECKSUM_PARTIAL;
				762	skb->csum_start = skb_transport_header(skb) - skb->head;
				763	skb->csum_offset = offsetof(struct udphdr, check);
				764	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
				765	}
				766	}
				767	EXPORT_SYMBOL(udp_set_csum);
				768
				769	static int udp_send_skb(struct sk_buff skb, struct flowi4 fl4,
				770	struct inet_cork *cork)
				771	{
				772	struct sock *sk = skb->sk;
				773	struct inet_sock *inet = inet_sk(sk);
				774	struct udphdr *uh;
				775	int err = 0;
				776	int is_udplite = IS_UDPLITE(sk);
				777	int offset = skb_transport_offset(skb);
				778	int len = skb->len - offset;
				779	int datalen = len - sizeof(*uh);
				780	__wsum csum = 0;
				781
				782	/*
				783	* Create a UDP header
				784	*/
				785	uh = udp_hdr(skb);
				786	uh->source = inet->inet_sport;
				787	uh->dest = fl4->fl4_dport;
				788	uh->len = htons(len);
				789	uh->check = 0;
				790
				791	if (cork->gso_size) {
				792	const int hlen = skb_network_header_len(skb) +
				793	sizeof(struct udphdr);
				794
				795	if (hlen + cork->gso_size > cork->fragsize) {
				796	kfree_skb(skb);
				797	return -EINVAL;
				798	}
				799	if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
				800	kfree_skb(skb);
				801	return -EINVAL;
				802	}
				803	if (sk->sk_no_check_tx) {
				804	kfree_skb(skb);
				805	return -EINVAL;
				806	}
				807	if (skb->ip_summed != CHECKSUM_PARTIAL \|\| is_udplite \|\|
				808	dst_xfrm(skb_dst(skb))) {
				809	kfree_skb(skb);
				810	return -EIO;
				811	}
				812
				813	if (datalen > cork->gso_size) {
				814	skb_shinfo(skb)->gso_size = cork->gso_size;
				815	skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
				816	skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
				817	cork->gso_size);
				818	}
				819	goto csum_partial;
				820	}
				821
				822	if (is_udplite) /* UDP-Lite */
				823	csum = udplite_csum(skb);
				824
				825	else if (sk->sk_no_check_tx) { /* UDP csum off */
				826
				827	skb->ip_summed = CHECKSUM_NONE;
				828	goto send;
				829
				830	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
				831	csum_partial:
				832
				833	udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
				834	goto send;
				835
				836	} else
				837	csum = udp_csum(skb);
				838
				839	/* add protocol-dependent pseudo-header */
				840	uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
				841	sk->sk_protocol, csum);
				842	if (uh->check == 0)
				843	uh->check = CSUM_MANGLED_0;
				844
				845	send:
				846	err = ip_send_skb(sock_net(sk), skb);
				847	if (err) {
				848	if (err == -ENOBUFS && !inet->recverr) {
				849	UDP_INC_STATS(sock_net(sk),
				850	UDP_MIB_SNDBUFERRORS, is_udplite);
				851	err = 0;
				852	}
				853	} else
				854	UDP_INC_STATS(sock_net(sk),
				855	UDP_MIB_OUTDATAGRAMS, is_udplite);
				856	return err;
				857	}
				858
				859	/*
				860	* Push out all pending data as one UDP datagram. Socket is locked.
				861	*/
				862	int udp_push_pending_frames(struct sock *sk)
				863	{
				864	struct udp_sock *up = udp_sk(sk);
				865	struct inet_sock *inet = inet_sk(sk);
				866	struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
				867	struct sk_buff *skb;
				868	int err = 0;
				869
				870	skb = ip_finish_skb(sk, fl4);
				871	if (!skb)
				872	goto out;
				873
				874	err = udp_send_skb(skb, fl4, &inet->cork.base);
				875
				876	out:
				877	up->len = 0;
				878	up->pending = 0;
				879	return err;
				880	}
				881	EXPORT_SYMBOL(udp_push_pending_frames);
				882
				883	static int __udp_cmsg_send(struct cmsghdr cmsg, u16 gso_size)
				884	{
				885	switch (cmsg->cmsg_type) {
				886	case UDP_SEGMENT:
				887	if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
				888	return -EINVAL;
				889	gso_size = (__u16 *)CMSG_DATA(cmsg);
				890	return 0;
				891	default:
				892	return -EINVAL;
				893	}
				894	}
				895
				896	int udp_cmsg_send(struct sock sk, struct msghdr msg, u16 *gso_size)
				897	{
				898	struct cmsghdr *cmsg;
				899	bool need_ip = false;
				900	int err;
				901
				902	for_each_cmsghdr(cmsg, msg) {
				903	if (!CMSG_OK(msg, cmsg))
				904	return -EINVAL;
				905
				906	if (cmsg->cmsg_level != SOL_UDP) {
				907	need_ip = true;
				908	continue;
				909	}
				910
				911	err = __udp_cmsg_send(cmsg, gso_size);
				912	if (err)
				913	return err;
				914	}
				915
				916	return need_ip;
				917	}
				918	EXPORT_SYMBOL_GPL(udp_cmsg_send);
				919
				920	int udp_sendmsg(struct sock sk, struct msghdr msg, size_t len)
				921	{
				922	struct inet_sock *inet = inet_sk(sk);
				923	struct udp_sock *up = udp_sk(sk);
				924	DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
				925	struct flowi4 fl4_stack;
				926	struct flowi4 *fl4;
				927	int ulen = len;
				928	struct ipcm_cookie ipc;
				929	struct rtable *rt = NULL;
				930	int free = 0;
				931	int connected = 0;
				932	__be32 daddr, faddr, saddr;
				933	__be16 dport;
				934	u8 tos;
				935	int err, is_udplite = IS_UDPLITE(sk);
				936	int corkreq = up->corkflag \|\| msg->msg_flags&MSG_MORE;
				937	int (getfrag)(void , char , int, int, int, struct sk_buff );
				938	struct sk_buff *skb;
				939	struct ip_options_data opt_copy;
				940
				941	if (len > 0xFFFF)
				942	return -EMSGSIZE;
				943
				944	/*
				945	* Check the flags.
				946	*/
				947
				948	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
				949	return -EOPNOTSUPP;
				950
				951	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
				952
				953	fl4 = &inet->cork.fl.u.ip4;
				954	if (up->pending) {
				955	/*
				956	* There are pending frames.
				957	* The socket lock must be held while it's corked.
				958	*/
				959	lock_sock(sk);
				960	if (likely(up->pending)) {
				961	if (unlikely(up->pending != AF_INET)) {
				962	release_sock(sk);
				963	return -EINVAL;
				964	}
				965	goto do_append_data;
				966	}
				967	release_sock(sk);
				968	}
				969	ulen += sizeof(struct udphdr);
				970
				971	/*
				972	* Get and verify the address.
				973	*/
				974	if (usin) {
				975	if (msg->msg_namelen < sizeof(*usin))
				976	return -EINVAL;
				977	if (usin->sin_family != AF_INET) {
				978	if (usin->sin_family != AF_UNSPEC)
				979	return -EAFNOSUPPORT;
				980	}
				981
				982	daddr = usin->sin_addr.s_addr;
				983	dport = usin->sin_port;
				984	if (dport == 0)
				985	return -EINVAL;
				986	} else {
				987	if (sk->sk_state != TCP_ESTABLISHED)
				988	return -EDESTADDRREQ;
				989	daddr = inet->inet_daddr;
				990	dport = inet->inet_dport;
				991	/* Open fast path for connected socket.
				992	Route will not be used, if at least one option is set.
				993	*/
				994	connected = 1;
				995	}
				996
				997	ipcm_init_sk(&ipc, inet);
				998	ipc.gso_size = up->gso_size;
				999
				1000	if (msg->msg_controllen) {
				1001	err = udp_cmsg_send(sk, msg, &ipc.gso_size);
				1002	if (err > 0)
				1003	err = ip_cmsg_send(sk, msg, &ipc,
				1004	sk->sk_family == AF_INET6);
				1005	if (unlikely(err < 0)) {
				1006	kfree(ipc.opt);
				1007	return err;
				1008	}
				1009	if (ipc.opt)
				1010	free = 1;
				1011	connected = 0;
				1012	}
				1013	if (!ipc.opt) {
				1014	struct ip_options_rcu *inet_opt;
				1015
				1016	rcu_read_lock();
				1017	inet_opt = rcu_dereference(inet->inet_opt);
				1018	if (inet_opt) {
				1019	memcpy(&opt_copy, inet_opt,
				1020	sizeof(*inet_opt) + inet_opt->opt.optlen);
				1021	ipc.opt = &opt_copy.opt;
				1022	}
				1023	rcu_read_unlock();
				1024	}
				1025
				1026	if (cgroup_bpf_enabled && !connected) {
				1027	err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
				1028	(struct sockaddr *)usin, &ipc.addr);
				1029	if (err)
				1030	goto out_free;
				1031	if (usin) {
				1032	if (usin->sin_port == 0) {
				1033	/* BPF program set invalid port. Reject it. */
				1034	err = -EINVAL;
				1035	goto out_free;
				1036	}
				1037	daddr = usin->sin_addr.s_addr;
				1038	dport = usin->sin_port;
				1039	}
				1040	}
				1041
				1042	saddr = ipc.addr;
				1043	ipc.addr = faddr = daddr;
				1044
				1045	if (ipc.opt && ipc.opt->opt.srr) {
				1046	if (!daddr) {
				1047	err = -EINVAL;
				1048	goto out_free;
				1049	}
				1050	faddr = ipc.opt->opt.faddr;
				1051	connected = 0;
				1052	}
				1053	tos = get_rttos(&ipc, inet);
				1054	if (sock_flag(sk, SOCK_LOCALROUTE) \|\|
				1055	(msg->msg_flags & MSG_DONTROUTE) \|\|
				1056	(ipc.opt && ipc.opt->opt.is_strictroute)) {
				1057	tos \|= RTO_ONLINK;
				1058	connected = 0;
				1059	}
				1060
				1061	if (ipv4_is_multicast(daddr)) {
				1062	if (!ipc.oif)
				1063	ipc.oif = inet->mc_index;
				1064	if (!saddr)
				1065	saddr = inet->mc_addr;
				1066	connected = 0;
				1067	} else if (!ipc.oif) {
				1068	ipc.oif = inet->uc_index;
				1069	} else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
				1070	/* oif is set, packet is to local broadcast and
				1071	* and uc_index is set. oif is most likely set
				1072	* by sk_bound_dev_if. If uc_index != oif check if the
				1073	* oif is an L3 master and uc_index is an L3 slave.
				1074	* If so, we want to allow the send using the uc_index.
				1075	*/
				1076	if (ipc.oif != inet->uc_index &&
				1077	ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
				1078	inet->uc_index)) {
				1079	ipc.oif = inet->uc_index;
				1080	}
				1081	}
				1082
				1083	if (connected)
				1084	rt = (struct rtable *)sk_dst_check(sk, 0);
				1085
				1086	if (!rt) {
				1087	struct net *net = sock_net(sk);
				1088	__u8 flow_flags = inet_sk_flowi_flags(sk);
				1089
				1090	fl4 = &fl4_stack;
				1091
				1092	flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
				1093	RT_SCOPE_UNIVERSE, sk->sk_protocol,
				1094	flow_flags,
				1095	faddr, saddr, dport, inet->inet_sport,
				1096	sk->sk_uid);
				1097
				1098	security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
				1099	rt = ip_route_output_flow(net, fl4, sk);
				1100	if (IS_ERR(rt)) {
				1101	err = PTR_ERR(rt);
				1102	rt = NULL;
				1103	if (err == -ENETUNREACH)
				1104	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
				1105	goto out;
				1106	}
				1107
				1108	err = -EACCES;
				1109	if ((rt->rt_flags & RTCF_BROADCAST) &&
				1110	!sock_flag(sk, SOCK_BROADCAST))
				1111	goto out;
				1112	if (connected)
				1113	sk_dst_set(sk, dst_clone(&rt->dst));
				1114	}
				1115
				1116	if (msg->msg_flags&MSG_CONFIRM)
				1117	goto do_confirm;
				1118	back_from_confirm:
				1119
				1120	saddr = fl4->saddr;
				1121	if (!ipc.addr)
				1122	daddr = ipc.addr = fl4->daddr;
				1123
				1124	/* Lockless fast path for the non-corking case. */
				1125	if (!corkreq) {
				1126	struct inet_cork cork;
				1127
				1128	skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
				1129	sizeof(struct udphdr), &ipc, &rt,
				1130	&cork, msg->msg_flags);
				1131	err = PTR_ERR(skb);
				1132	if (!IS_ERR_OR_NULL(skb))
				1133	err = udp_send_skb(skb, fl4, &cork);
				1134	goto out;
				1135	}
				1136
				1137	lock_sock(sk);
				1138	if (unlikely(up->pending)) {
				1139	/* The socket is already corked while preparing it. */
				1140	/* ... which is an evident application bug. --ANK */
				1141	release_sock(sk);
				1142
				1143	net_dbg_ratelimited("socket already corked\n");
				1144	err = -EINVAL;
				1145	goto out;
				1146	}
				1147	/*
				1148	* Now cork the socket to pend data.
				1149	*/
				1150	fl4 = &inet->cork.fl.u.ip4;
				1151	fl4->daddr = daddr;
				1152	fl4->saddr = saddr;
				1153	fl4->fl4_dport = dport;
				1154	fl4->fl4_sport = inet->inet_sport;
				1155	up->pending = AF_INET;
				1156
				1157	do_append_data:
				1158	up->len += ulen;
				1159	err = ip_append_data(sk, fl4, getfrag, msg, ulen,
				1160	sizeof(struct udphdr), &ipc, &rt,
				1161	corkreq ? msg->msg_flags\|MSG_MORE : msg->msg_flags);
				1162	if (err)
				1163	udp_flush_pending_frames(sk);
				1164	else if (!corkreq)
				1165	err = udp_push_pending_frames(sk);
				1166	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
				1167	up->pending = 0;
				1168	release_sock(sk);
				1169
				1170	out:
				1171	ip_rt_put(rt);
				1172	out_free:
				1173	if (free)
				1174	kfree(ipc.opt);
				1175	if (!err)
				1176	return len;
				1177	/*
				1178	* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
				1179	* ENOBUFS might not be good (it's not tunable per se), but otherwise
				1180	* we don't have a good statistic (IpOutDiscards but it can be too many
				1181	* things). We could add another new stat but at least for now that
				1182	* seems like overkill.
				1183	*/
				1184	if (err == -ENOBUFS \|\| test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
				1185	UDP_INC_STATS(sock_net(sk),
				1186	UDP_MIB_SNDBUFERRORS, is_udplite);
				1187	}
				1188	return err;
				1189
				1190	do_confirm:
				1191	if (msg->msg_flags & MSG_PROBE)
				1192	dst_confirm_neigh(&rt->dst, &fl4->daddr);
				1193	if (!(msg->msg_flags&MSG_PROBE) \|\| len)
				1194	goto back_from_confirm;
				1195	err = 0;
				1196	goto out;
				1197	}
				1198	EXPORT_SYMBOL(udp_sendmsg);
				1199
				1200	int udp_sendpage(struct sock sk, struct page page, int offset,
				1201	size_t size, int flags)
				1202	{
				1203	struct inet_sock *inet = inet_sk(sk);
				1204	struct udp_sock *up = udp_sk(sk);
				1205	int ret;
				1206
				1207	if (flags & MSG_SENDPAGE_NOTLAST)
				1208	flags \|= MSG_MORE;
				1209
				1210	if (!up->pending) {
				1211	struct msghdr msg = { .msg_flags = flags\|MSG_MORE };
				1212
				1213	/* Call udp_sendmsg to specify destination address which
				1214	* sendpage interface can't pass.
				1215	* This will succeed only when the socket is connected.
				1216	*/
				1217	ret = udp_sendmsg(sk, &msg, 0);
				1218	if (ret < 0)
				1219	return ret;
				1220	}
				1221
				1222	lock_sock(sk);
				1223
				1224	if (unlikely(!up->pending)) {
				1225	release_sock(sk);
				1226
				1227	net_dbg_ratelimited("cork failed\n");
				1228	return -EINVAL;
				1229	}
				1230
				1231	ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
				1232	page, offset, size, flags);
				1233	if (ret == -EOPNOTSUPP) {
				1234	release_sock(sk);
				1235	return sock_no_sendpage(sk->sk_socket, page, offset,
				1236	size, flags);
				1237	}
				1238	if (ret < 0) {
				1239	udp_flush_pending_frames(sk);
				1240	goto out;
				1241	}
				1242
				1243	up->len += size;
				1244	if (!(up->corkflag \|\| (flags&MSG_MORE)))
				1245	ret = udp_push_pending_frames(sk);
				1246	if (!ret)
				1247	ret = size;
				1248	out:
				1249	release_sock(sk);
				1250	return ret;
				1251	}
				1252
				1253	#define UDP_SKB_IS_STATELESS 0x80000000
				1254
				1255	static void udp_set_dev_scratch(struct sk_buff *skb)
				1256	{
				1257	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
				1258
				1259	BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
				1260	scratch->_tsize_state = skb->truesize;
				1261	#if BITS_PER_LONG == 64
				1262	scratch->len = skb->len;
				1263	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
				1264	scratch->is_linear = !skb_is_nonlinear(skb);
				1265	#endif
				1266	/* all head states execept sp (dst, sk, nf) are always cleared by
				1267	* udp_rcv() and we need to preserve secpath, if present, to eventually
				1268	* process IP_CMSG_PASSSEC at recvmsg() time
				1269	*/
				1270	if (likely(!skb_sec_path(skb)))
				1271	scratch->_tsize_state \|= UDP_SKB_IS_STATELESS;
				1272	}
				1273
				1274	static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
				1275	{
				1276	/* We come here after udp_lib_checksum_complete() returned 0.
				1277	* This means that __skb_checksum_complete() might have
				1278	* set skb->csum_valid to 1.
				1279	* On 64bit platforms, we can set csum_unnecessary
				1280	* to true, but only if the skb is not shared.
				1281	*/
				1282	#if BITS_PER_LONG == 64
				1283	if (!skb_shared(skb))
				1284	udp_skb_scratch(skb)->csum_unnecessary = true;
				1285	#endif
				1286	}
				1287
				1288	static int udp_skb_truesize(struct sk_buff *skb)
				1289	{
				1290	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
				1291	}
				1292
				1293	static bool udp_skb_has_head_state(struct sk_buff *skb)
				1294	{
				1295	return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
				1296	}
				1297
				1298	/* fully reclaim rmem/fwd memory allocated for skb */
				1299	static void udp_rmem_release(struct sock *sk, int size, int partial,
				1300	bool rx_queue_lock_held)
				1301	{
				1302	struct udp_sock *up = udp_sk(sk);
				1303	struct sk_buff_head *sk_queue;
				1304	int amt;
				1305
				1306	if (likely(partial)) {
				1307	up->forward_deficit += size;
				1308	size = up->forward_deficit;
				1309	if (size < (sk->sk_rcvbuf >> 2))
				1310	return;
				1311	} else {
				1312	size += up->forward_deficit;
				1313	}
				1314	up->forward_deficit = 0;
				1315
				1316	/* acquire the sk_receive_queue for fwd allocated memory scheduling,
				1317	* if the called don't held it already
				1318	*/
				1319	sk_queue = &sk->sk_receive_queue;
				1320	if (!rx_queue_lock_held)
				1321	spin_lock(&sk_queue->lock);
				1322
				1323
				1324	sk->sk_forward_alloc += size;
				1325	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
				1326	sk->sk_forward_alloc -= amt;
				1327
				1328	if (amt)
				1329	__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
				1330
				1331	atomic_sub(size, &sk->sk_rmem_alloc);
				1332
				1333	/* this can save us from acquiring the rx queue lock on next receive */
				1334	skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
				1335
				1336	if (!rx_queue_lock_held)
				1337	spin_unlock(&sk_queue->lock);
				1338	}
				1339
				1340	/* Note: called with reader_queue.lock held.
				1341	* Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
				1342	* This avoids a cache line miss while receive_queue lock is held.
				1343	* Look at __udp_enqueue_schedule_skb() to find where this copy is done.
				1344	*/
				1345	void udp_skb_destructor(struct sock sk, struct sk_buff skb)
				1346	{
				1347	prefetch(&skb->data);
				1348	udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
				1349	}
				1350	EXPORT_SYMBOL(udp_skb_destructor);
				1351
				1352	/* as above, but the caller held the rx queue lock, too */
				1353	static void udp_skb_dtor_locked(struct sock sk, struct sk_buff skb)
				1354	{
				1355	prefetch(&skb->data);
				1356	udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
				1357	}
				1358
				1359	/* Idea of busylocks is to let producers grab an extra spinlock
				1360	* to relieve pressure on the receive_queue spinlock shared by consumer.
				1361	* Under flood, this means that only one producer can be in line
				1362	* trying to acquire the receive_queue spinlock.
				1363	* These busylock can be allocated on a per cpu manner, instead of a
				1364	* per socket one (that would consume a cache line per socket)
				1365	*/
				1366	static int udp_busylocks_log __read_mostly;
				1367	static spinlock_t *udp_busylocks __read_mostly;
				1368
				1369	static spinlock_t busylock_acquire(void ptr)
				1370	{
				1371	spinlock_t *busy;
				1372
				1373	busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
				1374	spin_lock(busy);
				1375	return busy;
				1376	}
				1377
				1378	static void busylock_release(spinlock_t *busy)
				1379	{
				1380	if (busy)
				1381	spin_unlock(busy);
				1382	}
				1383
				1384	int __udp_enqueue_schedule_skb(struct sock sk, struct sk_buff skb)
				1385	{
				1386	struct sk_buff_head *list = &sk->sk_receive_queue;
				1387	int rmem, delta, amt, err = -ENOMEM;
				1388	spinlock_t *busy = NULL;
				1389	int size;
				1390
				1391	/* try to avoid the costly atomic add/sub pair when the receive
				1392	* queue is full; always allow at least a packet
				1393	*/
				1394	rmem = atomic_read(&sk->sk_rmem_alloc);
				1395	if (rmem > sk->sk_rcvbuf)
				1396	goto drop;
				1397
				1398	/* Under mem pressure, it might be helpful to help udp_recvmsg()
				1399	* having linear skbs :
				1400	* - Reduce memory overhead and thus increase receive queue capacity
				1401	* - Less cache line misses at copyout() time
				1402	* - Less work at consume_skb() (less alien page frag freeing)
				1403	*/
				1404	if (rmem > (sk->sk_rcvbuf >> 1)) {
				1405	skb_condense(skb);
				1406
				1407	busy = busylock_acquire(sk);
				1408	}
				1409	size = skb->truesize;
				1410	udp_set_dev_scratch(skb);
				1411
				1412	/* we drop only if the receive buf is full and the receive
				1413	* queue contains some other skb
				1414	*/
				1415	rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
				1416	if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
				1417	goto uncharge_drop;
				1418
				1419	spin_lock(&list->lock);
				1420	if (size >= sk->sk_forward_alloc) {
				1421	amt = sk_mem_pages(size);
				1422	delta = amt << SK_MEM_QUANTUM_SHIFT;
				1423	if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
				1424	err = -ENOBUFS;
				1425	spin_unlock(&list->lock);
				1426	goto uncharge_drop;
				1427	}
				1428
				1429	sk->sk_forward_alloc += delta;
				1430	}
				1431
				1432	sk->sk_forward_alloc -= size;
				1433
				1434	/* no need to setup a destructor, we will explicitly release the
				1435	* forward allocated memory on dequeue
				1436	*/
				1437	sock_skb_set_dropcount(sk, skb);
				1438
				1439	__skb_queue_tail(list, skb);
				1440	spin_unlock(&list->lock);
				1441
				1442	if (!sock_flag(sk, SOCK_DEAD))
				1443	sk->sk_data_ready(sk);
				1444
				1445	busylock_release(busy);
				1446	return 0;
				1447
				1448	uncharge_drop:
				1449	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
				1450
				1451	drop:
				1452	atomic_inc(&sk->sk_drops);
				1453	busylock_release(busy);
				1454	return err;
				1455	}
				1456	EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
				1457
				1458	void udp_destruct_sock(struct sock *sk)
				1459	{
				1460	/* reclaim completely the forward allocated memory */
				1461	struct udp_sock *up = udp_sk(sk);
				1462	unsigned int total = 0;
				1463	struct sk_buff *skb;
				1464
				1465	skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
				1466	while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
				1467	total += skb->truesize;
				1468	kfree_skb(skb);
				1469	}
				1470	udp_rmem_release(sk, total, 0, true);
				1471
				1472	inet_sock_destruct(sk);
				1473	}
				1474	EXPORT_SYMBOL_GPL(udp_destruct_sock);
				1475
				1476	int udp_init_sock(struct sock *sk)
				1477	{
				1478	skb_queue_head_init(&udp_sk(sk)->reader_queue);
				1479	sk->sk_destruct = udp_destruct_sock;
				1480	return 0;
				1481	}
				1482	EXPORT_SYMBOL_GPL(udp_init_sock);
				1483
				1484	void skb_consume_udp(struct sock sk, struct sk_buff skb, int len)
				1485	{
				1486	if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
				1487	bool slow = lock_sock_fast(sk);
				1488
				1489	sk_peek_offset_bwd(sk, len);
				1490	unlock_sock_fast(sk, slow);
				1491	}
				1492
				1493	if (!skb_unref(skb))
				1494	return;
				1495
				1496	/* In the more common cases we cleared the head states previously,
				1497	* see __udp_queue_rcv_skb().
				1498	*/
				1499	if (unlikely(udp_skb_has_head_state(skb)))
				1500	skb_release_head_state(skb);
				1501	__consume_stateless_skb(skb);
				1502	}
				1503	EXPORT_SYMBOL_GPL(skb_consume_udp);
				1504
				1505	static struct sk_buff __first_packet_length(struct sock sk,
				1506	struct sk_buff_head *rcvq,
				1507	int *total)
				1508	{
				1509	struct sk_buff *skb;
				1510
				1511	while ((skb = skb_peek(rcvq)) != NULL) {
				1512	if (udp_lib_checksum_complete(skb)) {
				1513	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
				1514	IS_UDPLITE(sk));
				1515	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
				1516	IS_UDPLITE(sk));
				1517	atomic_inc(&sk->sk_drops);
				1518	__skb_unlink(skb, rcvq);
				1519	*total += skb->truesize;
				1520	kfree_skb(skb);
				1521	} else {
				1522	udp_skb_csum_unnecessary_set(skb);
				1523	break;
				1524	}
				1525	}
				1526	return skb;
				1527	}
				1528
				1529	/**
				1530	* first_packet_length - return length of first packet in receive queue
				1531	* @sk: socket
				1532	*
				1533	* Drops all bad checksum frames, until a valid one is found.
				1534	* Returns the length of found skb, or -1 if none is found.
				1535	*/
				1536	static int first_packet_length(struct sock *sk)
				1537	{
				1538	struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
				1539	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
				1540	struct sk_buff *skb;
				1541	int total = 0;
				1542	int res;
				1543
				1544	spin_lock_bh(&rcvq->lock);
				1545	skb = __first_packet_length(sk, rcvq, &total);
				1546	if (!skb && !skb_queue_empty_lockless(sk_queue)) {
				1547	spin_lock(&sk_queue->lock);
				1548	skb_queue_splice_tail_init(sk_queue, rcvq);
				1549	spin_unlock(&sk_queue->lock);
				1550
				1551	skb = __first_packet_length(sk, rcvq, &total);
				1552	}
				1553	res = skb ? skb->len : -1;
				1554	if (total)
				1555	udp_rmem_release(sk, total, 1, false);
				1556	spin_unlock_bh(&rcvq->lock);
				1557	return res;
				1558	}
				1559
				1560	/*
				1561	* IOCTL requests applicable to the UDP protocol
				1562	*/
				1563
				1564	int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
				1565	{
				1566	switch (cmd) {
				1567	case SIOCOUTQ:
				1568	{
				1569	int amount = sk_wmem_alloc_get(sk);
				1570
				1571	return put_user(amount, (int __user *)arg);
				1572	}
				1573
				1574	case SIOCINQ:
				1575	{
				1576	int amount = max_t(int, 0, first_packet_length(sk));
				1577
				1578	return put_user(amount, (int __user *)arg);
				1579	}
				1580
				1581	default:
				1582	return -ENOIOCTLCMD;
				1583	}
				1584
				1585	return 0;
				1586	}
				1587	EXPORT_SYMBOL(udp_ioctl);
				1588
				1589	struct sk_buff __skb_recv_udp(struct sock sk, unsigned int flags,
				1590	int noblock, int peeked, int off, int *err)
				1591	{
				1592	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
				1593	struct sk_buff_head *queue;
				1594	struct sk_buff *last;
				1595	long timeo;
				1596	int error;
				1597
				1598	queue = &udp_sk(sk)->reader_queue;
				1599	flags \|= noblock ? MSG_DONTWAIT : 0;
				1600	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
				1601	do {
				1602	struct sk_buff *skb;
				1603
				1604	error = sock_error(sk);
				1605	if (error)
				1606	break;
				1607
				1608	error = -EAGAIN;
				1609	*peeked = 0;
				1610	do {
				1611	spin_lock_bh(&queue->lock);
				1612	skb = __skb_try_recv_from_queue(sk, queue, flags,
				1613	udp_skb_destructor,
				1614	peeked, off, err,
				1615	&last);
				1616	if (skb) {
				1617	spin_unlock_bh(&queue->lock);
				1618	return skb;
				1619	}
				1620
				1621	if (skb_queue_empty_lockless(sk_queue)) {
				1622	spin_unlock_bh(&queue->lock);
				1623	goto busy_check;
				1624	}
				1625
				1626	/* refill the reader queue and walk it again
				1627	* keep both queues locked to avoid re-acquiring
				1628	* the sk_receive_queue lock if fwd memory scheduling
				1629	* is needed.
				1630	*/
				1631	spin_lock(&sk_queue->lock);
				1632	skb_queue_splice_tail_init(sk_queue, queue);
				1633
				1634	skb = __skb_try_recv_from_queue(sk, queue, flags,
				1635	udp_skb_dtor_locked,
				1636	peeked, off, err,
				1637	&last);
				1638	spin_unlock(&sk_queue->lock);
				1639	spin_unlock_bh(&queue->lock);
				1640	if (skb)
				1641	return skb;
				1642
				1643	busy_check:
				1644	if (!sk_can_busy_loop(sk))
				1645	break;
				1646
				1647	sk_busy_loop(sk, flags & MSG_DONTWAIT);
				1648	} while (!skb_queue_empty_lockless(sk_queue));
				1649
				1650	/* sk_queue is empty, reader_queue may contain peeked packets */
				1651	} while (timeo &&
				1652	!__skb_wait_for_more_packets(sk, &error, &timeo,
				1653	(struct sk_buff *)sk_queue));
				1654
				1655	*err = error;
				1656	return NULL;
				1657	}
				1658	EXPORT_SYMBOL(__skb_recv_udp);
				1659
				1660	/*
				1661	* This should be easy, if there is something there we
				1662	* return it, otherwise we block.
				1663	*/
				1664
				1665	int udp_recvmsg(struct sock sk, struct msghdr msg, size_t len, int noblock,
				1666	int flags, int *addr_len)
				1667	{
				1668	struct inet_sock *inet = inet_sk(sk);
				1669	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
				1670	struct sk_buff *skb;
				1671	unsigned int ulen, copied;
				1672	int peeked, peeking, off;
				1673	int err;
				1674	int is_udplite = IS_UDPLITE(sk);
				1675	bool checksum_valid = false;
				1676
				1677	if (flags & MSG_ERRQUEUE)
				1678	return ip_recv_error(sk, msg, len, addr_len);
				1679
				1680	try_again:
				1681	peeking = flags & MSG_PEEK;
				1682	off = sk_peek_offset(sk, flags);
				1683	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
				1684	if (!skb)
				1685	return err;
				1686
				1687	ulen = udp_skb_len(skb);
				1688	copied = len;
				1689	if (copied > ulen - off)
				1690	copied = ulen - off;
				1691	else if (copied < ulen)
				1692	msg->msg_flags \|= MSG_TRUNC;
				1693
				1694	/*
				1695	* If checksum is needed at all, try to do it while copying the
				1696	* data. If the data is truncated, or if we only want a partial
				1697	* coverage checksum (UDP-Lite), do it before the copy.
				1698	*/
				1699
				1700	if (copied < ulen \|\| peeking \|\|
				1701	(is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
				1702	checksum_valid = udp_skb_csum_unnecessary(skb) \|\|
				1703	!__udp_lib_checksum_complete(skb);
				1704	if (!checksum_valid)
				1705	goto csum_copy_err;
				1706	}
				1707
				1708	if (checksum_valid \|\| udp_skb_csum_unnecessary(skb)) {
				1709	if (udp_skb_is_linear(skb))
				1710	err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
				1711	else
				1712	err = skb_copy_datagram_msg(skb, off, msg, copied);
				1713	} else {
				1714	err = skb_copy_and_csum_datagram_msg(skb, off, msg);
				1715
				1716	if (err == -EINVAL)
				1717	goto csum_copy_err;
				1718	}
				1719
				1720	if (unlikely(err)) {
				1721	if (!peeked) {
				1722	atomic_inc(&sk->sk_drops);
				1723	UDP_INC_STATS(sock_net(sk),
				1724	UDP_MIB_INERRORS, is_udplite);
				1725	}
				1726	kfree_skb(skb);
				1727	return err;
				1728	}
				1729
				1730	if (!peeked)
				1731	UDP_INC_STATS(sock_net(sk),
				1732	UDP_MIB_INDATAGRAMS, is_udplite);
				1733
				1734	sock_recv_ts_and_drops(msg, sk, skb);
				1735
				1736	/* Copy the address. */
				1737	if (sin) {
				1738	sin->sin_family = AF_INET;
				1739	sin->sin_port = udp_hdr(skb)->source;
				1740	sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
				1741	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
				1742	addr_len = sizeof(sin);
				1743
				1744	if (cgroup_bpf_enabled)
				1745	BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
				1746	(struct sockaddr *)sin);
				1747	}
				1748	if (inet->cmsg_flags)
				1749	ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
				1750
				1751	err = copied;
				1752	if (flags & MSG_TRUNC)
				1753	err = ulen;
				1754
				1755	skb_consume_udp(sk, skb, peeking ? -err : err);
				1756	return err;
				1757
				1758	csum_copy_err:
				1759	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
				1760	udp_skb_destructor)) {
				1761	UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
				1762	UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				1763	}
				1764	kfree_skb(skb);
				1765
				1766	/* starting over for a new packet, but check if we need to yield */
				1767	cond_resched();
				1768	msg->msg_flags &= ~MSG_TRUNC;
				1769	goto try_again;
				1770	}
				1771
				1772	int udp_pre_connect(struct sock sk, struct sockaddr uaddr, int addr_len)
				1773	{
				1774	/* This check is replicated from __ip4_datagram_connect() and
				1775	* intended to prevent BPF program called below from accessing bytes
				1776	* that are out of the bound specified by user in addr_len.
				1777	*/
				1778	if (addr_len < sizeof(struct sockaddr_in))
				1779	return -EINVAL;
				1780
				1781	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
				1782	}
				1783	EXPORT_SYMBOL(udp_pre_connect);
				1784
				1785	int __udp_disconnect(struct sock *sk, int flags)
				1786	{
				1787	struct inet_sock *inet = inet_sk(sk);
				1788	/*
				1789	* 1003.1g - break association.
				1790	*/
				1791
				1792	sk->sk_state = TCP_CLOSE;
				1793	inet->inet_daddr = 0;
				1794	inet->inet_dport = 0;
				1795	sock_rps_reset_rxhash(sk);
				1796	sk->sk_bound_dev_if = 0;
				1797	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
				1798	inet_reset_saddr(sk);
				1799
				1800	if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
				1801	sk->sk_prot->unhash(sk);
				1802	inet->inet_sport = 0;
				1803	}
				1804	sk_dst_reset(sk);
				1805	return 0;
				1806	}
				1807	EXPORT_SYMBOL(__udp_disconnect);
				1808
				1809	int udp_disconnect(struct sock *sk, int flags)
				1810	{
				1811	lock_sock(sk);
				1812	__udp_disconnect(sk, flags);
				1813	release_sock(sk);
				1814	return 0;
				1815	}
				1816	EXPORT_SYMBOL(udp_disconnect);
				1817
				1818	void udp_lib_unhash(struct sock *sk)
				1819	{
				1820	if (sk_hashed(sk)) {
				1821	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				1822	struct udp_hslot hslot, hslot2;
				1823
				1824	hslot = udp_hashslot(udptable, sock_net(sk),
				1825	udp_sk(sk)->udp_port_hash);
				1826	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				1827
				1828	spin_lock_bh(&hslot->lock);
				1829	if (rcu_access_pointer(sk->sk_reuseport_cb))
				1830	reuseport_detach_sock(sk);
				1831	if (sk_del_node_init_rcu(sk)) {
				1832	hslot->count--;
				1833	inet_sk(sk)->inet_num = 0;
				1834	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				1835
				1836	spin_lock(&hslot2->lock);
				1837	hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
				1838	hslot2->count--;
				1839	spin_unlock(&hslot2->lock);
				1840	}
				1841	spin_unlock_bh(&hslot->lock);
				1842	}
				1843	}
				1844	EXPORT_SYMBOL(udp_lib_unhash);
				1845
				1846	/*
				1847	* inet_rcv_saddr was changed, we must rehash secondary hash
				1848	*/
				1849	void udp_lib_rehash(struct sock *sk, u16 newhash)
				1850	{
				1851	if (sk_hashed(sk)) {
				1852	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				1853	struct udp_hslot hslot, hslot2, *nhslot2;
				1854
				1855	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				1856	nhslot2 = udp_hashslot2(udptable, newhash);
				1857	udp_sk(sk)->udp_portaddr_hash = newhash;
				1858
				1859	if (hslot2 != nhslot2 \|\|
				1860	rcu_access_pointer(sk->sk_reuseport_cb)) {
				1861	hslot = udp_hashslot(udptable, sock_net(sk),
				1862	udp_sk(sk)->udp_port_hash);
				1863	/* we must lock primary chain too */
				1864	spin_lock_bh(&hslot->lock);
				1865	if (rcu_access_pointer(sk->sk_reuseport_cb))
				1866	reuseport_detach_sock(sk);
				1867
				1868	if (hslot2 != nhslot2) {
				1869	spin_lock(&hslot2->lock);
				1870	hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
				1871	hslot2->count--;
				1872	spin_unlock(&hslot2->lock);
				1873
				1874	spin_lock(&nhslot2->lock);
				1875	hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
				1876	&nhslot2->head);
				1877	nhslot2->count++;
				1878	spin_unlock(&nhslot2->lock);
				1879	}
				1880
				1881	spin_unlock_bh(&hslot->lock);
				1882	}
				1883	}
				1884	}
				1885	EXPORT_SYMBOL(udp_lib_rehash);
				1886
				1887	static void udp_v4_rehash(struct sock *sk)
				1888	{
				1889	u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
				1890	inet_sk(sk)->inet_rcv_saddr,
				1891	inet_sk(sk)->inet_num);
				1892	udp_lib_rehash(sk, new_hash);
				1893	}
				1894
				1895	static int __udp_queue_rcv_skb(struct sock sk, struct sk_buff skb)
				1896	{
				1897	int rc;
				1898
				1899	if (inet_sk(sk)->inet_daddr) {
				1900	sock_rps_save_rxhash(sk, skb);
				1901	sk_mark_napi_id(sk, skb);
				1902	sk_incoming_cpu_update(sk);
				1903	} else {
				1904	sk_mark_napi_id_once(sk, skb);
				1905	}
				1906
				1907	rc = __udp_enqueue_schedule_skb(sk, skb);
				1908	if (rc < 0) {
				1909	int is_udplite = IS_UDPLITE(sk);
				1910
				1911	/* Note that an ENOMEM error is charged twice */
				1912	if (rc == -ENOMEM)
				1913	UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
				1914	is_udplite);
				1915	UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				1916	kfree_skb(skb);
				1917	trace_udp_fail_queue_rcv_skb(rc, sk);
				1918	return -1;
				1919	}
				1920
				1921	return 0;
				1922	}
				1923
				1924	static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
				1925	void udp_encap_enable(void)
				1926	{
				1927	static_branch_enable(&udp_encap_needed_key);
				1928	}
				1929	EXPORT_SYMBOL(udp_encap_enable);
				1930
				1931	/* returns:
				1932	* -1: error
				1933	* 0: success
				1934	* >0: "udp encap" protocol resubmission
				1935	*
				1936	* Note that in the success and error cases, the skb is assumed to
				1937	* have either been requeued or freed.
				1938	*/
				1939	static int udp_queue_rcv_skb(struct sock sk, struct sk_buff skb)
				1940	{
				1941	struct udp_sock *up = udp_sk(sk);
				1942	int is_udplite = IS_UDPLITE(sk);
				1943
				1944	/*
				1945	* Charge it to the socket, dropping if the queue is full.
				1946	*/
				1947	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
				1948	goto drop;
				1949	nf_reset(skb);
				1950
				1951	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
				1952	int (encap_rcv)(struct sock sk, struct sk_buff *skb);
				1953
				1954	/*
				1955	* This is an encapsulation socket so pass the skb to
				1956	* the socket's udp_encap_rcv() hook. Otherwise, just
				1957	* fall through and pass this up the UDP socket.
				1958	* up->encap_rcv() returns the following value:
				1959	* =0 if skb was successfully passed to the encap
				1960	* handler or was discarded by it.
				1961	* >0 if skb should be passed on to UDP.
				1962	* <0 if skb should be resubmitted as proto -N
				1963	*/
				1964
				1965	/* if we're overly short, let UDP handle it */
				1966	encap_rcv = READ_ONCE(up->encap_rcv);
				1967	if (encap_rcv) {
				1968	int ret;
				1969
				1970	/* Verify checksum before giving to encap */
				1971	if (udp_lib_checksum_complete(skb))
				1972	goto csum_error;
				1973
				1974	ret = encap_rcv(sk, skb);
				1975	if (ret <= 0) {
				1976	__UDP_INC_STATS(sock_net(sk),
				1977	UDP_MIB_INDATAGRAMS,
				1978	is_udplite);
				1979	return -ret;
				1980	}
				1981	}
				1982
				1983	/* FALLTHROUGH -- it's a UDP Packet */
				1984	}
				1985
				1986	/*
				1987	* UDP-Lite specific tests, ignored on UDP sockets
				1988	*/
				1989	if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
				1990
				1991	/*
				1992	* MIB statistics other than incrementing the error count are
				1993	* disabled for the following two types of errors: these depend
				1994	* on the application settings, not on the functioning of the
				1995	* protocol stack as such.
				1996	*
				1997	* RFC 3828 here recommends (sec 3.3): "There should also be a
				1998	* way ... to ... at least let the receiving application block
				1999	* delivery of packets with coverage values less than a value
				2000	* provided by the application."
				2001	*/
				2002	if (up->pcrlen == 0) { /* full coverage was set */
				2003	net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
				2004	UDP_SKB_CB(skb)->cscov, skb->len);
				2005	goto drop;
				2006	}
				2007	/* The next case involves violating the min. coverage requested
				2008	* by the receiver. This is subtle: if receiver wants x and x is
				2009	* greater than the buffersize/MTU then receiver will complain
				2010	* that it wants x while sender emits packets of smaller size y.
				2011	* Therefore the above ...()->partial_cov statement is essential.
				2012	*/
				2013	if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
				2014	net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
				2015	UDP_SKB_CB(skb)->cscov, up->pcrlen);
				2016	goto drop;
				2017	}
				2018	}
				2019
				2020	prefetch(&sk->sk_rmem_alloc);
				2021	if (rcu_access_pointer(sk->sk_filter) &&
				2022	udp_lib_checksum_complete(skb))
				2023	goto csum_error;
				2024
				2025	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
				2026	goto drop;
				2027
				2028	udp_csum_pull_header(skb);
				2029
				2030	ipv4_pktinfo_prepare(sk, skb);
				2031	return __udp_queue_rcv_skb(sk, skb);
				2032
				2033	csum_error:
				2034	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
				2035	drop:
				2036	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				2037	atomic_inc(&sk->sk_drops);
				2038	kfree_skb(skb);
				2039	return -1;
				2040	}
				2041
				2042	/* For TCP sockets, sk_rx_dst is protected by socket lock
				2043	* For UDP, we use xchg() to guard against concurrent changes.
				2044	*/
				2045	bool udp_sk_rx_dst_set(struct sock sk, struct dst_entry dst)
				2046	{
				2047	struct dst_entry *old;
				2048
				2049	if (dst_hold_safe(dst)) {
				2050	old = xchg(&sk->sk_rx_dst, dst);
				2051	dst_release(old);
				2052	return old != dst;
				2053	}
				2054	return false;
				2055	}
				2056	EXPORT_SYMBOL(udp_sk_rx_dst_set);
				2057
				2058	/*
				2059	* Multicasts and broadcasts go to each listener.
				2060	*
				2061	* Note: called only from the BH handler context.
				2062	*/
				2063	static int __udp4_lib_mcast_deliver(struct net net, struct sk_buff skb,
				2064	struct udphdr *uh,
				2065	__be32 saddr, __be32 daddr,
				2066	struct udp_table *udptable,
				2067	int proto)
				2068	{
				2069	struct sock sk, first = NULL;
				2070	unsigned short hnum = ntohs(uh->dest);
				2071	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
				2072	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
				2073	unsigned int offset = offsetof(typeof(*sk), sk_node);
				2074	int dif = skb->dev->ifindex;
				2075	int sdif = inet_sdif(skb);
				2076	struct hlist_node *node;
				2077	struct sk_buff *nskb;
				2078
				2079	if (use_hash2) {
				2080	hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
				2081	udptable->mask;
				2082	hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
				2083	start_lookup:
				2084	hslot = &udptable->hash2[hash2];
				2085	offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
				2086	}
				2087
				2088	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
				2089	if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
				2090	uh->source, saddr, dif, sdif, hnum))
				2091	continue;
				2092
				2093	if (!first) {
				2094	first = sk;
				2095	continue;
				2096	}
				2097	nskb = skb_clone(skb, GFP_ATOMIC);
				2098
				2099	if (unlikely(!nskb)) {
				2100	atomic_inc(&sk->sk_drops);
				2101	__UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
				2102	IS_UDPLITE(sk));
				2103	__UDP_INC_STATS(net, UDP_MIB_INERRORS,
				2104	IS_UDPLITE(sk));
				2105	continue;
				2106	}
				2107	if (udp_queue_rcv_skb(sk, nskb) > 0)
				2108	consume_skb(nskb);
				2109	}
				2110
				2111	/* Also lookup :port if we are using hash2 and haven't done so yet. /
				2112	if (use_hash2 && hash2 != hash2_any) {
				2113	hash2 = hash2_any;
				2114	goto start_lookup;
				2115	}
				2116
				2117	if (first) {
				2118	if (udp_queue_rcv_skb(first, skb) > 0)
				2119	consume_skb(skb);
				2120	} else {
				2121	kfree_skb(skb);
				2122	__UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
				2123	proto == IPPROTO_UDPLITE);
				2124	}
				2125	return 0;
				2126	}
				2127
				2128	/* Initialize UDP checksum. If exited with zero value (success),
				2129	* CHECKSUM_UNNECESSARY means, that no more checks are required.
				2130	* Otherwise, csum completion requires chacksumming packet body,
				2131	* including udp header and folding it to skb->csum.
				2132	*/
				2133	static inline int udp4_csum_init(struct sk_buff skb, struct udphdr uh,
				2134	int proto)
				2135	{
				2136	int err;
				2137
				2138	UDP_SKB_CB(skb)->partial_cov = 0;
				2139	UDP_SKB_CB(skb)->cscov = skb->len;
				2140
				2141	if (proto == IPPROTO_UDPLITE) {
				2142	err = udplite_checksum_init(skb, uh);
				2143	if (err)
				2144	return err;
				2145
				2146	if (UDP_SKB_CB(skb)->partial_cov) {
				2147	skb->csum = inet_compute_pseudo(skb, proto);
				2148	return 0;
				2149	}
				2150	}
				2151
				2152	/* Note, we are only interested in != 0 or == 0, thus the
				2153	* force to int.
				2154	*/
				2155	err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
				2156	inet_compute_pseudo);
				2157	if (err)
				2158	return err;
				2159
				2160	if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
				2161	/* If SW calculated the value, we know it's bad */
				2162	if (skb->csum_complete_sw)
				2163	return 1;
				2164
				2165	/* HW says the value is bad. Let's validate that.
				2166	* skb->csum is no longer the full packet checksum,
				2167	* so don't treat it as such.
				2168	*/
				2169	skb_checksum_complete_unset(skb);
				2170	}
				2171
				2172	return 0;
				2173	}
				2174
				2175	/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
				2176	* return code conversion for ip layer consumption
				2177	*/
				2178	static int udp_unicast_rcv_skb(struct sock sk, struct sk_buff skb,
				2179	struct udphdr *uh)
				2180	{
				2181	int ret;
				2182
				2183	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
				2184	skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
				2185	inet_compute_pseudo);
				2186
				2187	ret = udp_queue_rcv_skb(sk, skb);
				2188
				2189	/* a return value > 0 means to resubmit the input, but
				2190	* it wants the return to be -protocol, or 0
				2191	*/
				2192	if (ret > 0)
				2193	return -ret;
				2194	return 0;
				2195	}
				2196
				2197	/*
				2198	* All we need to do is get the socket, and then do a checksum.
				2199	*/
				2200
				2201	int __udp4_lib_rcv(struct sk_buff skb, struct udp_table udptable,
				2202	int proto)
				2203	{
				2204	struct sock *sk;
				2205	struct udphdr *uh;
				2206	unsigned short ulen;
				2207	struct rtable *rt = skb_rtable(skb);
				2208	__be32 saddr, daddr;
				2209	struct net *net = dev_net(skb->dev);
				2210
				2211	/*
				2212	* Validate the packet.
				2213	*/
				2214	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
				2215	goto drop; /* No space for header. */
				2216
				2217	uh = udp_hdr(skb);
				2218	ulen = ntohs(uh->len);
				2219	saddr = ip_hdr(skb)->saddr;
				2220	daddr = ip_hdr(skb)->daddr;
				2221
				2222	if (ulen > skb->len)
				2223	goto short_packet;
				2224
				2225	if (proto == IPPROTO_UDP) {
				2226	/* UDP validates ulen. */
				2227	if (ulen < sizeof(*uh) \|\| pskb_trim_rcsum(skb, ulen))
				2228	goto short_packet;
				2229	uh = udp_hdr(skb);
				2230	}
				2231
				2232	if (udp4_csum_init(skb, uh, proto))
				2233	goto csum_error;
				2234
				2235	sk = skb_steal_sock(skb);
				2236	if (sk) {
				2237	struct dst_entry *dst = skb_dst(skb);
				2238	int ret;
				2239
				2240	if (unlikely(sk->sk_rx_dst != dst))
				2241	udp_sk_rx_dst_set(sk, dst);
				2242
				2243	ret = udp_unicast_rcv_skb(sk, skb, uh);
				2244	sock_put(sk);
				2245	return ret;
				2246	}
				2247
				2248	if (rt->rt_flags & (RTCF_BROADCAST\|RTCF_MULTICAST))
				2249	return __udp4_lib_mcast_deliver(net, skb, uh,
				2250	saddr, daddr, udptable, proto);
				2251
				2252	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
				2253	if (sk)
				2254	return udp_unicast_rcv_skb(sk, skb, uh);
				2255
				2256	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
				2257	goto drop;
				2258	nf_reset(skb);
				2259
				2260	/* No socket. Drop packet silently, if checksum is wrong */
				2261	if (udp_lib_checksum_complete(skb))
				2262	goto csum_error;
				2263
				2264	__UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
				2265	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
				2266
				2267	/*
				2268	* Hmm. We got an UDP packet to a port to which we
				2269	* don't wanna listen. Ignore it.
				2270	*/
				2271	kfree_skb(skb);
				2272	return 0;
				2273
				2274	short_packet:
				2275	net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
				2276	proto == IPPROTO_UDPLITE ? "Lite" : "",
				2277	&saddr, ntohs(uh->source),
				2278	ulen, skb->len,
				2279	&daddr, ntohs(uh->dest));
				2280	goto drop;
				2281
				2282	csum_error:
				2283	/*
				2284	* RFC1122: OK. Discards the bad packet silently (as far as
				2285	* the network is concerned, anyway) as per 4.1.3.4 (MUST).
				2286	*/
				2287	net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
				2288	proto == IPPROTO_UDPLITE ? "Lite" : "",
				2289	&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
				2290	ulen);
				2291	__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
				2292	drop:
				2293	__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
				2294	kfree_skb(skb);
				2295	return 0;
				2296	}
				2297
				2298	/* We can only early demux multicast if there is a single matching socket.
				2299	* If more than one socket found returns NULL
				2300	*/
				2301	static struct sock __udp4_lib_mcast_demux_lookup(struct net net,
				2302	__be16 loc_port, __be32 loc_addr,
				2303	__be16 rmt_port, __be32 rmt_addr,
				2304	int dif, int sdif)
				2305	{
				2306	struct sock sk, result;
				2307	unsigned short hnum = ntohs(loc_port);
				2308	unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
				2309	struct udp_hslot *hslot = &udp_table.hash[slot];
				2310
				2311	/* Do not bother scanning a too big list */
				2312	if (hslot->count > 10)
				2313	return NULL;
				2314
				2315	result = NULL;
				2316	sk_for_each_rcu(sk, &hslot->head) {
				2317	if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
				2318	rmt_port, rmt_addr, dif, sdif, hnum)) {
				2319	if (result)
				2320	return NULL;
				2321	result = sk;
				2322	}
				2323	}
				2324
				2325	return result;
				2326	}
				2327
				2328	/* For unicast we should only early demux connected sockets or we can
				2329	* break forwarding setups. The chains here can be long so only check
				2330	* if the first socket is an exact match and if not move on.
				2331	*/
				2332	static struct sock __udp4_lib_demux_lookup(struct net net,
				2333	__be16 loc_port, __be32 loc_addr,
				2334	__be16 rmt_port, __be32 rmt_addr,
				2335	int dif, int sdif)
				2336	{
				2337	unsigned short hnum = ntohs(loc_port);
				2338	unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
				2339	unsigned int slot2 = hash2 & udp_table.mask;
				2340	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
				2341	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
				2342	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
				2343	struct sock *sk;
				2344
				2345	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
				2346	if (INET_MATCH(sk, net, acookie, rmt_addr,
				2347	loc_addr, ports, dif, sdif))
				2348	return sk;
				2349	/* Only check first socket in chain */
				2350	break;
				2351	}
				2352	return NULL;
				2353	}
				2354
				2355	int udp_v4_early_demux(struct sk_buff *skb)
				2356	{
				2357	struct net *net = dev_net(skb->dev);
				2358	struct in_device *in_dev = NULL;
				2359	const struct iphdr *iph;
				2360	const struct udphdr *uh;
				2361	struct sock *sk = NULL;
				2362	struct dst_entry *dst;
				2363	int dif = skb->dev->ifindex;
				2364	int sdif = inet_sdif(skb);
				2365	int ours;
				2366
				2367	/* validate the packet */
				2368	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
				2369	return 0;
				2370
				2371	iph = ip_hdr(skb);
				2372	uh = udp_hdr(skb);
				2373
				2374	if (skb->pkt_type == PACKET_MULTICAST) {
				2375	in_dev = __in_dev_get_rcu(skb->dev);
				2376
				2377	if (!in_dev)
				2378	return 0;
				2379
				2380	ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
				2381	iph->protocol);
				2382	if (!ours)
				2383	return 0;
				2384
				2385	sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
				2386	uh->source, iph->saddr,
				2387	dif, sdif);
				2388	} else if (skb->pkt_type == PACKET_HOST) {
				2389	sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
				2390	uh->source, iph->saddr, dif, sdif);
				2391	}
				2392
				2393	if (!sk \|\| !refcount_inc_not_zero(&sk->sk_refcnt))
				2394	return 0;
				2395
				2396	skb->sk = sk;
				2397	skb->destructor = sock_efree;
				2398	dst = READ_ONCE(sk->sk_rx_dst);
				2399
				2400	if (dst)
				2401	dst = dst_check(dst, 0);
				2402	if (dst) {
				2403	u32 itag = 0;
				2404
				2405	/* set noref for now.
				2406	* any place which wants to hold dst has to call
				2407	* dst_hold_safe()
				2408	*/
				2409	skb_dst_set_noref(skb, dst);
				2410
				2411	/* for unconnected multicast sockets we need to validate
				2412	* the source on each packet
				2413	*/
				2414	if (!inet_sk(sk)->inet_daddr && in_dev)
				2415	return ip_mc_validate_source(skb, iph->daddr,
				2416	iph->saddr, iph->tos,
				2417	skb->dev, in_dev, &itag);
				2418	}
				2419	return 0;
				2420	}
				2421
				2422	int udp_rcv(struct sk_buff *skb)
				2423	{
				2424	hwnat_magic_tag_set_zero(skb);
				2425	return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
				2426	}
				2427
				2428	void udp_destroy_sock(struct sock *sk)
				2429	{
				2430	struct udp_sock *up = udp_sk(sk);
				2431	bool slow = lock_sock_fast(sk);
				2432	udp_flush_pending_frames(sk);
				2433	unlock_sock_fast(sk, slow);
				2434	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
				2435	void (encap_destroy)(struct sock sk);
				2436	encap_destroy = READ_ONCE(up->encap_destroy);
				2437	if (encap_destroy)
				2438	encap_destroy(sk);
				2439	}
				2440	}
				2441
				2442	/*
				2443	* Socket option code for UDP
				2444	*/
				2445	int udp_lib_setsockopt(struct sock *sk, int level, int optname,
				2446	char __user *optval, unsigned int optlen,
				2447	int (push_pending_frames)(struct sock ))
				2448	{
				2449	struct udp_sock *up = udp_sk(sk);
				2450	int val, valbool;
				2451	int err = 0;
				2452	int is_udplite = IS_UDPLITE(sk);
				2453
				2454	if (optlen < sizeof(int))
				2455	return -EINVAL;
				2456
				2457	if (get_user(val, (int __user *)optval))
				2458	return -EFAULT;
				2459
				2460	valbool = val ? 1 : 0;
				2461
				2462	switch (optname) {
				2463	case UDP_CORK:
				2464	if (val != 0) {
				2465	up->corkflag = 1;
				2466	} else {
				2467	up->corkflag = 0;
				2468	lock_sock(sk);
				2469	push_pending_frames(sk);
				2470	release_sock(sk);
				2471	}
				2472	break;
				2473
				2474	case UDP_ENCAP:
				2475	switch (val) {
				2476	case 0:
				2477	case UDP_ENCAP_ESPINUDP:
				2478	case UDP_ENCAP_ESPINUDP_NON_IKE:
				2479	up->encap_rcv = xfrm4_udp_encap_rcv;
				2480	/* FALLTHROUGH */
				2481	case UDP_ENCAP_L2TPINUDP:
				2482	up->encap_type = val;
				2483	udp_encap_enable();
				2484	break;
				2485	default:
				2486	err = -ENOPROTOOPT;
				2487	break;
				2488	}
				2489	break;
				2490
				2491	case UDP_NO_CHECK6_TX:
				2492	up->no_check6_tx = valbool;
				2493	break;
				2494
				2495	case UDP_NO_CHECK6_RX:
				2496	up->no_check6_rx = valbool;
				2497	break;
				2498
				2499	case UDP_SEGMENT:
				2500	if (val < 0 \|\| val > USHRT_MAX)
				2501	return -EINVAL;
				2502	up->gso_size = val;
				2503	break;
				2504
				2505	/*
				2506	* UDP-Lite's partial checksum coverage (RFC 3828).
				2507	*/
				2508	/* The sender sets actual checksum coverage length via this option.
				2509	* The case coverage > packet length is handled by send module. */
				2510	case UDPLITE_SEND_CSCOV:
				2511	if (!is_udplite) /* Disable the option on UDP sockets */
				2512	return -ENOPROTOOPT;
				2513	if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
				2514	val = 8;
				2515	else if (val > USHRT_MAX)
				2516	val = USHRT_MAX;
				2517	up->pcslen = val;
				2518	up->pcflag \|= UDPLITE_SEND_CC;
				2519	break;
				2520
				2521	/* The receiver specifies a minimum checksum coverage value. To make
				2522	* sense, this should be set to at least 8 (as done below). If zero is
				2523	* used, this again means full checksum coverage. */
				2524	case UDPLITE_RECV_CSCOV:
				2525	if (!is_udplite) /* Disable the option on UDP sockets */
				2526	return -ENOPROTOOPT;
				2527	if (val != 0 && val < 8) /* Avoid silly minimal values. */
				2528	val = 8;
				2529	else if (val > USHRT_MAX)
				2530	val = USHRT_MAX;
				2531	up->pcrlen = val;
				2532	up->pcflag \|= UDPLITE_RECV_CC;
				2533	break;
				2534
				2535	default:
				2536	err = -ENOPROTOOPT;
				2537	break;
				2538	}
				2539
				2540	return err;
				2541	}
				2542	EXPORT_SYMBOL(udp_lib_setsockopt);
				2543
				2544	int udp_setsockopt(struct sock *sk, int level, int optname,
				2545	char __user *optval, unsigned int optlen)
				2546	{
				2547	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2548	return udp_lib_setsockopt(sk, level, optname, optval, optlen,
				2549	udp_push_pending_frames);
				2550	return ip_setsockopt(sk, level, optname, optval, optlen);
				2551	}
				2552
				2553	#ifdef CONFIG_COMPAT
				2554	int compat_udp_setsockopt(struct sock *sk, int level, int optname,
				2555	char __user *optval, unsigned int optlen)
				2556	{
				2557	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2558	return udp_lib_setsockopt(sk, level, optname, optval, optlen,
				2559	udp_push_pending_frames);
				2560	return compat_ip_setsockopt(sk, level, optname, optval, optlen);
				2561	}
				2562	#endif
				2563
				2564	int udp_lib_getsockopt(struct sock *sk, int level, int optname,
				2565	char __user optval, int __user optlen)
				2566	{
				2567	struct udp_sock *up = udp_sk(sk);
				2568	int val, len;
				2569
				2570	if (get_user(len, optlen))
				2571	return -EFAULT;
				2572
				2573	len = min_t(unsigned int, len, sizeof(int));
				2574
				2575	if (len < 0)
				2576	return -EINVAL;
				2577
				2578	switch (optname) {
				2579	case UDP_CORK:
				2580	val = up->corkflag;
				2581	break;
				2582
				2583	case UDP_ENCAP:
				2584	val = up->encap_type;
				2585	break;
				2586
				2587	case UDP_NO_CHECK6_TX:
				2588	val = up->no_check6_tx;
				2589	break;
				2590
				2591	case UDP_NO_CHECK6_RX:
				2592	val = up->no_check6_rx;
				2593	break;
				2594
				2595	case UDP_SEGMENT:
				2596	val = up->gso_size;
				2597	break;
				2598
				2599	/* The following two cannot be changed on UDP sockets, the return is
				2600	* always 0 (which corresponds to the full checksum coverage of UDP). */
				2601	case UDPLITE_SEND_CSCOV:
				2602	val = up->pcslen;
				2603	break;
				2604
				2605	case UDPLITE_RECV_CSCOV:
				2606	val = up->pcrlen;
				2607	break;
				2608
				2609	default:
				2610	return -ENOPROTOOPT;
				2611	}
				2612
				2613	if (put_user(len, optlen))
				2614	return -EFAULT;
				2615	if (copy_to_user(optval, &val, len))
				2616	return -EFAULT;
				2617	return 0;
				2618	}
				2619	EXPORT_SYMBOL(udp_lib_getsockopt);
				2620
				2621	int udp_getsockopt(struct sock *sk, int level, int optname,
				2622	char __user optval, int __user optlen)
				2623	{
				2624	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2625	return udp_lib_getsockopt(sk, level, optname, optval, optlen);
				2626	return ip_getsockopt(sk, level, optname, optval, optlen);
				2627	}
				2628
				2629	#ifdef CONFIG_COMPAT
				2630	int compat_udp_getsockopt(struct sock *sk, int level, int optname,
				2631	char __user optval, int __user optlen)
				2632	{
				2633	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2634	return udp_lib_getsockopt(sk, level, optname, optval, optlen);
				2635	return compat_ip_getsockopt(sk, level, optname, optval, optlen);
				2636	}
				2637	#endif
				2638	/**
				2639	* udp_poll - wait for a UDP event.
				2640	* @file - file struct
				2641	* @sock - socket
				2642	* @wait - poll table
				2643	*
				2644	* This is same as datagram poll, except for the special case of
				2645	* blocking sockets. If application is using a blocking fd
				2646	* and a packet with checksum error is in the queue;
				2647	* then it could get return from select indicating data available
				2648	* but then block when reading it. Add special case code
				2649	* to work around these arguably broken applications.
				2650	*/
				2651	__poll_t udp_poll(struct file file, struct socket sock, poll_table *wait)
				2652	{
				2653	__poll_t mask = datagram_poll(file, sock, wait);
				2654	struct sock *sk = sock->sk;
				2655
				2656	if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
				2657	mask \|= EPOLLIN \| EPOLLRDNORM;
				2658
				2659	/* Check for false positives due to checksum errors */
				2660	if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
				2661	!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
				2662	mask &= ~(EPOLLIN \| EPOLLRDNORM);
				2663
				2664	return mask;
				2665
				2666	}
				2667	EXPORT_SYMBOL(udp_poll);
				2668
				2669	int udp_abort(struct sock *sk, int err)
				2670	{
				2671	lock_sock(sk);
				2672
				2673	sk->sk_err = err;
				2674	sk->sk_error_report(sk);
				2675	__udp_disconnect(sk, 0);
				2676
				2677	release_sock(sk);
				2678
				2679	return 0;
				2680	}
				2681	EXPORT_SYMBOL_GPL(udp_abort);
				2682
				2683	struct proto udp_prot = {
				2684	.name = "UDP",
				2685	.owner = THIS_MODULE,
				2686	.close = udp_lib_close,
				2687	.pre_connect = udp_pre_connect,
				2688	.connect = ip4_datagram_connect,
				2689	.disconnect = udp_disconnect,
				2690	.ioctl = udp_ioctl,
				2691	.init = udp_init_sock,
				2692	.destroy = udp_destroy_sock,
				2693	.setsockopt = udp_setsockopt,
				2694	.getsockopt = udp_getsockopt,
				2695	.sendmsg = udp_sendmsg,
				2696	.recvmsg = udp_recvmsg,
				2697	.sendpage = udp_sendpage,
				2698	.release_cb = ip4_datagram_release_cb,
				2699	.hash = udp_lib_hash,
				2700	.unhash = udp_lib_unhash,
				2701	.rehash = udp_v4_rehash,
				2702	.get_port = udp_v4_get_port,
				2703	.memory_allocated = &udp_memory_allocated,
				2704	.sysctl_mem = sysctl_udp_mem,
				2705	.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
				2706	.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
				2707	.obj_size = sizeof(struct udp_sock),
				2708	.h.udp_table = &udp_table,
				2709	#ifdef CONFIG_COMPAT
				2710	.compat_setsockopt = compat_udp_setsockopt,
				2711	.compat_getsockopt = compat_udp_getsockopt,
				2712	#endif
				2713	.diag_destroy = udp_abort,
				2714	};
				2715	EXPORT_SYMBOL(udp_prot);
				2716
				2717	/* ------------------------------------------------------------------------ */
				2718	#ifdef CONFIG_PROC_FS
				2719
				2720	static struct sock udp_get_first(struct seq_file seq, int start)
				2721	{
				2722	struct sock *sk;
				2723	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2724	struct udp_iter_state *state = seq->private;
				2725	struct net *net = seq_file_net(seq);
				2726
				2727	for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
				2728	++state->bucket) {
				2729	struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
				2730
				2731	if (hlist_empty(&hslot->head))
				2732	continue;
				2733
				2734	spin_lock_bh(&hslot->lock);
				2735	sk_for_each(sk, &hslot->head) {
				2736	if (!net_eq(sock_net(sk), net))
				2737	continue;
				2738	if (sk->sk_family == afinfo->family)
				2739	goto found;
				2740	}
				2741	spin_unlock_bh(&hslot->lock);
				2742	}
				2743	sk = NULL;
				2744	found:
				2745	return sk;
				2746	}
				2747
				2748	static struct sock udp_get_next(struct seq_file seq, struct sock *sk)
				2749	{
				2750	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2751	struct udp_iter_state *state = seq->private;
				2752	struct net *net = seq_file_net(seq);
				2753
				2754	do {
				2755	sk = sk_next(sk);
				2756	} while (sk && (!net_eq(sock_net(sk), net) \|\| sk->sk_family != afinfo->family));
				2757
				2758	if (!sk) {
				2759	if (state->bucket <= afinfo->udp_table->mask)
				2760	spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
				2761	return udp_get_first(seq, state->bucket + 1);
				2762	}
				2763	return sk;
				2764	}
				2765
				2766	static struct sock udp_get_idx(struct seq_file seq, loff_t pos)
				2767	{
				2768	struct sock *sk = udp_get_first(seq, 0);
				2769
				2770	if (sk)
				2771	while (pos && (sk = udp_get_next(seq, sk)) != NULL)
				2772	--pos;
				2773	return pos ? NULL : sk;
				2774	}
				2775
				2776	void udp_seq_start(struct seq_file seq, loff_t *pos)
				2777	{
				2778	struct udp_iter_state *state = seq->private;
				2779	state->bucket = MAX_UDP_PORTS;
				2780
				2781	return pos ? udp_get_idx(seq, pos-1) : SEQ_START_TOKEN;
				2782	}
				2783	EXPORT_SYMBOL(udp_seq_start);
				2784
				2785	void udp_seq_next(struct seq_file seq, void v, loff_t pos)
				2786	{
				2787	struct sock *sk;
				2788
				2789	if (v == SEQ_START_TOKEN)
				2790	sk = udp_get_idx(seq, 0);
				2791	else
				2792	sk = udp_get_next(seq, v);
				2793
				2794	++*pos;
				2795	return sk;
				2796	}
				2797	EXPORT_SYMBOL(udp_seq_next);
				2798
				2799	void udp_seq_stop(struct seq_file seq, void v)
				2800	{
				2801	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2802	struct udp_iter_state *state = seq->private;
				2803
				2804	if (state->bucket <= afinfo->udp_table->mask)
				2805	spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
				2806	}
				2807	EXPORT_SYMBOL(udp_seq_stop);
				2808
				2809	/* ------------------------------------------------------------------------ */
				2810	static void udp4_format_sock(struct sock sp, struct seq_file f,
				2811	int bucket)
				2812	{
				2813	struct inet_sock *inet = inet_sk(sp);
				2814	__be32 dest = inet->inet_daddr;
				2815	__be32 src = inet->inet_rcv_saddr;
				2816	__u16 destp = ntohs(inet->inet_dport);
				2817	__u16 srcp = ntohs(inet->inet_sport);
				2818
				2819	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
				2820	" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
				2821	bucket, src, srcp, dest, destp, sp->sk_state,
				2822	sk_wmem_alloc_get(sp),
				2823	udp_rqueue_get(sp),
				2824	0, 0L, 0,
				2825	from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
				2826	0, sock_i_ino(sp),
				2827	refcount_read(&sp->sk_refcnt), sp,
				2828	atomic_read(&sp->sk_drops));
				2829	}
				2830
				2831	int udp4_seq_show(struct seq_file seq, void v)
				2832	{
				2833	seq_setwidth(seq, 127);
				2834	if (v == SEQ_START_TOKEN)
				2835	seq_puts(seq, " sl local_address rem_address st tx_queue "
				2836	"rx_queue tr tm->when retrnsmt uid timeout "
				2837	"inode ref pointer drops");
				2838	else {
				2839	struct udp_iter_state *state = seq->private;
				2840
				2841	udp4_format_sock(v, seq, state->bucket);
				2842	}
				2843	seq_pad(seq, '\n');
				2844	return 0;
				2845	}
				2846
				2847	const struct seq_operations udp_seq_ops = {
				2848	.start = udp_seq_start,
				2849	.next = udp_seq_next,
				2850	.stop = udp_seq_stop,
				2851	.show = udp4_seq_show,
				2852	};
				2853	EXPORT_SYMBOL(udp_seq_ops);
				2854
				2855	static struct udp_seq_afinfo udp4_seq_afinfo = {
				2856	.family = AF_INET,
				2857	.udp_table = &udp_table,
				2858	};
				2859
				2860	static int __net_init udp4_proc_init_net(struct net *net)
				2861	{
				2862	if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
				2863	sizeof(struct udp_iter_state), &udp4_seq_afinfo))
				2864	return -ENOMEM;
				2865	return 0;
				2866	}
				2867
				2868	static void __net_exit udp4_proc_exit_net(struct net *net)
				2869	{
				2870	remove_proc_entry("udp", net->proc_net);
				2871	}
				2872
				2873	static struct pernet_operations udp4_net_ops = {
				2874	.init = udp4_proc_init_net,
				2875	.exit = udp4_proc_exit_net,
				2876	};
				2877
				2878	int __init udp4_proc_init(void)
				2879	{
				2880	return register_pernet_subsys(&udp4_net_ops);
				2881	}
				2882
				2883	void udp4_proc_exit(void)
				2884	{
				2885	unregister_pernet_subsys(&udp4_net_ops);
				2886	}
				2887	#endif /* CONFIG_PROC_FS */
				2888
				2889	static __initdata unsigned long uhash_entries;
				2890	static int __init set_uhash_entries(char *str)
				2891	{
				2892	ssize_t ret;
				2893
				2894	if (!str)
				2895	return 0;
				2896
				2897	ret = kstrtoul(str, 0, &uhash_entries);
				2898	if (ret)
				2899	return 0;
				2900
				2901	if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
				2902	uhash_entries = UDP_HTABLE_SIZE_MIN;
				2903	return 1;
				2904	}
				2905	__setup("uhash_entries=", set_uhash_entries);
				2906
				2907	void __init udp_table_init(struct udp_table table, const char name)
				2908	{
				2909	unsigned int i;
				2910
				2911	table->hash = alloc_large_system_hash(name,
				2912	2 * sizeof(struct udp_hslot),
				2913	uhash_entries,
				2914	21, /* one slot per 2 MB */
				2915	0,
				2916	&table->log,
				2917	&table->mask,
				2918	UDP_HTABLE_SIZE_MIN,
				2919	64 * 1024);
				2920
				2921	table->hash2 = table->hash + (table->mask + 1);
				2922	for (i = 0; i <= table->mask; i++) {
				2923	INIT_HLIST_HEAD(&table->hash[i].head);
				2924	table->hash[i].count = 0;
				2925	spin_lock_init(&table->hash[i].lock);
				2926	}
				2927	for (i = 0; i <= table->mask; i++) {
				2928	INIT_HLIST_HEAD(&table->hash2[i].head);
				2929	table->hash2[i].count = 0;
				2930	spin_lock_init(&table->hash2[i].lock);
				2931	}
				2932	}
				2933
				2934	u32 udp_flow_hashrnd(void)
				2935	{
				2936	static u32 hashrnd __read_mostly;
				2937
				2938	net_get_random_once(&hashrnd, sizeof(hashrnd));
				2939
				2940	return hashrnd;
				2941	}
				2942	EXPORT_SYMBOL(udp_flow_hashrnd);
				2943
				2944	static void __udp_sysctl_init(struct net *net)
				2945	{
				2946	net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
				2947	net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
				2948
				2949	#ifdef CONFIG_NET_L3_MASTER_DEV
				2950	net->ipv4.sysctl_udp_l3mdev_accept = 0;
				2951	#endif
				2952	}
				2953
				2954	static int __net_init udp_sysctl_init(struct net *net)
				2955	{
				2956	__udp_sysctl_init(net);
				2957	return 0;
				2958	}
				2959
				2960	static struct pernet_operations __net_initdata udp_sysctl_ops = {
				2961	.init = udp_sysctl_init,
				2962	};
				2963
				2964	void __init udp_init(void)
				2965	{
				2966	unsigned long limit;
				2967	unsigned int i;
				2968
				2969	udp_table_init(&udp_table, "UDP");
				2970	limit = nr_free_buffer_pages() / 8;
				2971	limit = max(limit, 128UL);
				2972	sysctl_udp_mem[0] = limit / 4 * 3;
				2973	sysctl_udp_mem[1] = limit;
				2974	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
				2975
				2976	__udp_sysctl_init(&init_net);
				2977
				2978	/* 16 spinlocks per cpu */
				2979	udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
				2980	udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
				2981	GFP_KERNEL);
				2982	if (!udp_busylocks)
				2983	panic("UDP: failed to alloc udp_busylocks\n");
				2984	for (i = 0; i < (1U << udp_busylocks_log); i++)
				2985	spin_lock_init(udp_busylocks + i);
				2986
				2987	if (register_pernet_subsys(&udp_sysctl_ops))
				2988	panic("UDP: failed to init sysctl parameters.\n");
				2989	}