Blame - marvell/linux/net/ipv4/udp.c - T108

blob: 59f158f2727388a4ce9aaa69ca8fd7e21189621d [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* The User Datagram Protocol (UDP).
				8	*
				9	* Authors: Ross Biro
				10	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				11	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				12	* Alan Cox, <alan@lxorguk.ukuu.org.uk>
				13	* Hirokazu Takahashi, <taka@valinux.co.jp>
				14	*
				15	* Fixes:
				16	* Alan Cox : verify_area() calls
				17	* Alan Cox : stopped close while in use off icmp
				18	* messages. Not a fix but a botch that
				19	* for udp at least is 'valid'.
				20	* Alan Cox : Fixed icmp handling properly
				21	* Alan Cox : Correct error for oversized datagrams
				22	* Alan Cox : Tidied select() semantics.
				23	* Alan Cox : udp_err() fixed properly, also now
				24	* select and read wake correctly on errors
				25	* Alan Cox : udp_send verify_area moved to avoid mem leak
				26	* Alan Cox : UDP can count its memory
				27	* Alan Cox : send to an unknown connection causes
				28	* an ECONNREFUSED off the icmp, but
				29	* does NOT close.
				30	* Alan Cox : Switched to new sk_buff handlers. No more backlog!
				31	* Alan Cox : Using generic datagram code. Even smaller and the PEEK
				32	* bug no longer crashes it.
				33	* Fred Van Kempen : Net2e support for sk->broadcast.
				34	* Alan Cox : Uses skb_free_datagram
				35	* Alan Cox : Added get/set sockopt support.
				36	* Alan Cox : Broadcasting without option set returns EACCES.
				37	* Alan Cox : No wakeup calls. Instead we now use the callbacks.
				38	* Alan Cox : Use ip_tos and ip_ttl
				39	* Alan Cox : SNMP Mibs
				40	* Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
				41	* Matt Dillon : UDP length checks.
				42	* Alan Cox : Smarter af_inet used properly.
				43	* Alan Cox : Use new kernel side addressing.
				44	* Alan Cox : Incorrect return on truncated datagram receive.
				45	* Arnt Gulbrandsen : New udp_send and stuff
				46	* Alan Cox : Cache last socket
				47	* Alan Cox : Route cache
				48	* Jon Peatfield : Minor efficiency fix to sendto().
				49	* Mike Shaver : RFC1122 checks.
				50	* Alan Cox : Nonblocking error fix.
				51	* Willy Konynenberg : Transparent proxying support.
				52	* Mike McLagan : Routing by source
				53	* David S. Miller : New socket lookup architecture.
				54	* Last socket cache retained as it
				55	* does have a high hit rate.
				56	* Olaf Kirch : Don't linearise iovec on sendmsg.
				57	* Andi Kleen : Some cleanups, cache destination entry
				58	* for connect.
				59	* Vitaly E. Lavrov : Transparent proxy revived after year coma.
				60	* Melvin Smith : Check msg_name not msg_namelen in sendto(),
				61	* return ENOTCONN for unconnected sockets (POSIX)
				62	* Janos Farkas : don't deliver multi/broadcasts to a different
				63	* bound-to-device socket
				64	* Hirokazu Takahashi : HW checksumming for outgoing UDP
				65	* datagrams.
				66	* Hirokazu Takahashi : sendfile() on UDP works now.
				67	* Arnaldo C. Melo : convert /proc/net/udp to seq_file
				68	* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
				69	* Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
				70	* a single port at the same time.
				71	* Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
				72	* James Chapman : Add L2TP encapsulation type.
				73	*/
				74
				75	#define pr_fmt(fmt) "UDP: " fmt
				76
				77	#include <linux/uaccess.h>
				78	#include <asm/ioctls.h>
				79	#include <linux/memblock.h>
				80	#include <linux/highmem.h>
				81	#include <linux/swap.h>
				82	#include <linux/types.h>
				83	#include <linux/fcntl.h>
				84	#include <linux/module.h>
				85	#include <linux/socket.h>
				86	#include <linux/sockios.h>
				87	#include <linux/igmp.h>
				88	#include <linux/inetdevice.h>
				89	#include <linux/in.h>
				90	#include <linux/errno.h>
				91	#include <linux/timer.h>
				92	#include <linux/mm.h>
				93	#include <linux/inet.h>
				94	#include <linux/netdevice.h>
				95	#include <linux/slab.h>
				96	#include <net/tcp_states.h>
				97	#include <linux/skbuff.h>
				98	#include <linux/proc_fs.h>
				99	#include <linux/seq_file.h>
				100	#include <net/net_namespace.h>
				101	#include <net/icmp.h>
				102	#include <net/inet_hashtables.h>
				103	#include <net/ip_tunnels.h>
				104	#include <net/route.h>
				105	#include <net/checksum.h>
				106	#include <net/xfrm.h>
				107	#include <trace/events/udp.h>
				108	#include <linux/static_key.h>
				109	#include <trace/events/skb.h>
				110	#include <net/busy_poll.h>
				111	#include "udp_impl.h"
				112	#include <net/sock_reuseport.h>
				113	#include <net/addrconf.h>
				114	#include <net/udp_tunnel.h>
				115
				116	struct udp_table udp_table __read_mostly;
				117	EXPORT_SYMBOL(udp_table);
				118
				119	long sysctl_udp_mem[3] __read_mostly;
				120	EXPORT_SYMBOL(sysctl_udp_mem);
				121
				122	atomic_long_t udp_memory_allocated;
				123	EXPORT_SYMBOL(udp_memory_allocated);
				124
				125	#define MAX_UDP_PORTS 65536
				126	#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
				127
				128	static int udp_lib_lport_inuse(struct net *net, __u16 num,
				129	const struct udp_hslot *hslot,
				130	unsigned long *bitmap,
				131	struct sock *sk, unsigned int log)
				132	{
				133	struct sock *sk2;
				134	kuid_t uid = sock_i_uid(sk);
				135
				136	sk_for_each(sk2, &hslot->head) {
				137	if (net_eq(sock_net(sk2), net) &&
				138	sk2 != sk &&
				139	(bitmap \|\| udp_sk(sk2)->udp_port_hash == num) &&
				140	(!sk2->sk_reuse \|\| !sk->sk_reuse) &&
				141	(!sk2->sk_bound_dev_if \|\| !sk->sk_bound_dev_if \|\|
				142	sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				143	inet_rcv_saddr_equal(sk, sk2, true)) {
				144	if (sk2->sk_reuseport && sk->sk_reuseport &&
				145	!rcu_access_pointer(sk->sk_reuseport_cb) &&
				146	uid_eq(uid, sock_i_uid(sk2))) {
				147	if (!bitmap)
				148	return 0;
				149	} else {
				150	if (!bitmap)
				151	return 1;
				152	__set_bit(udp_sk(sk2)->udp_port_hash >> log,
				153	bitmap);
				154	}
				155	}
				156	}
				157	return 0;
				158	}
				159
				160	/*
				161	* Note: we still hold spinlock of primary hash chain, so no other writer
				162	* can insert/delete a socket with local_port == num
				163	*/
				164	static int udp_lib_lport_inuse2(struct net *net, __u16 num,
				165	struct udp_hslot *hslot2,
				166	struct sock *sk)
				167	{
				168	struct sock *sk2;
				169	kuid_t uid = sock_i_uid(sk);
				170	int res = 0;
				171
				172	spin_lock(&hslot2->lock);
				173	udp_portaddr_for_each_entry(sk2, &hslot2->head) {
				174	if (net_eq(sock_net(sk2), net) &&
				175	sk2 != sk &&
				176	(udp_sk(sk2)->udp_port_hash == num) &&
				177	(!sk2->sk_reuse \|\| !sk->sk_reuse) &&
				178	(!sk2->sk_bound_dev_if \|\| !sk->sk_bound_dev_if \|\|
				179	sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				180	inet_rcv_saddr_equal(sk, sk2, true)) {
				181	if (sk2->sk_reuseport && sk->sk_reuseport &&
				182	!rcu_access_pointer(sk->sk_reuseport_cb) &&
				183	uid_eq(uid, sock_i_uid(sk2))) {
				184	res = 0;
				185	} else {
				186	res = 1;
				187	}
				188	break;
				189	}
				190	}
				191	spin_unlock(&hslot2->lock);
				192	return res;
				193	}
				194
				195	static int udp_reuseport_add_sock(struct sock sk, struct udp_hslot hslot)
				196	{
				197	struct net *net = sock_net(sk);
				198	kuid_t uid = sock_i_uid(sk);
				199	struct sock *sk2;
				200
				201	sk_for_each(sk2, &hslot->head) {
				202	if (net_eq(sock_net(sk2), net) &&
				203	sk2 != sk &&
				204	sk2->sk_family == sk->sk_family &&
				205	ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
				206	(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
				207	(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
				208	sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
				209	inet_rcv_saddr_equal(sk, sk2, false)) {
				210	return reuseport_add_sock(sk, sk2,
				211	inet_rcv_saddr_any(sk));
				212	}
				213	}
				214
				215	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
				216	}
				217
				218	/**
				219	* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
				220	*
				221	* @sk: socket struct in question
				222	* @snum: port number to look up
				223	* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
				224	* with NULL address
				225	*/
				226	int udp_lib_get_port(struct sock *sk, unsigned short snum,
				227	unsigned int hash2_nulladdr)
				228	{
				229	struct udp_hslot hslot, hslot2;
				230	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				231	int error = 1;
				232	struct net *net = sock_net(sk);
				233
				234	if (!snum) {
				235	int low, high, remaining;
				236	unsigned int rand;
				237	unsigned short first, last;
				238	DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
				239
				240	inet_get_local_port_range(net, &low, &high);
				241	remaining = (high - low) + 1;
				242
				243	rand = prandom_u32();
				244	first = reciprocal_scale(rand, remaining) + low;
				245	/*
				246	* force rand to be an odd multiple of UDP_HTABLE_SIZE
				247	*/
				248	rand = (rand \| 1) * (udptable->mask + 1);
				249	last = first + udptable->mask + 1;
				250	do {
				251	hslot = udp_hashslot(udptable, net, first);
				252	bitmap_zero(bitmap, PORTS_PER_CHAIN);
				253	spin_lock_bh(&hslot->lock);
				254	udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
				255	udptable->log);
				256
				257	snum = first;
				258	/*
				259	* Iterate on all possible values of snum for this hash.
				260	* Using steps of an odd multiple of UDP_HTABLE_SIZE
				261	* give us randomization and full range coverage.
				262	*/
				263	do {
				264	if (low <= snum && snum <= high &&
				265	!test_bit(snum >> udptable->log, bitmap) &&
				266	!inet_is_local_reserved_port(net, snum))
				267	goto found;
				268	snum += rand;
				269	} while (snum != first);
				270	spin_unlock_bh(&hslot->lock);
				271	cond_resched();
				272	} while (++first != last);
				273	goto fail;
				274	} else {
				275	hslot = udp_hashslot(udptable, net, snum);
				276	spin_lock_bh(&hslot->lock);
				277	if (hslot->count > 10) {
				278	int exist;
				279	unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
				280
				281	slot2 &= udptable->mask;
				282	hash2_nulladdr &= udptable->mask;
				283
				284	hslot2 = udp_hashslot2(udptable, slot2);
				285	if (hslot->count < hslot2->count)
				286	goto scan_primary_hash;
				287
				288	exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
				289	if (!exist && (hash2_nulladdr != slot2)) {
				290	hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
				291	exist = udp_lib_lport_inuse2(net, snum, hslot2,
				292	sk);
				293	}
				294	if (exist)
				295	goto fail_unlock;
				296	else
				297	goto found;
				298	}
				299	scan_primary_hash:
				300	if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
				301	goto fail_unlock;
				302	}
				303	found:
				304	inet_sk(sk)->inet_num = snum;
				305	udp_sk(sk)->udp_port_hash = snum;
				306	udp_sk(sk)->udp_portaddr_hash ^= snum;
				307	if (sk_unhashed(sk)) {
				308	if (sk->sk_reuseport &&
				309	udp_reuseport_add_sock(sk, hslot)) {
				310	inet_sk(sk)->inet_num = 0;
				311	udp_sk(sk)->udp_port_hash = 0;
				312	udp_sk(sk)->udp_portaddr_hash ^= snum;
				313	goto fail_unlock;
				314	}
				315
				316	sock_set_flag(sk, SOCK_RCU_FREE);
				317
				318	sk_add_node_rcu(sk, &hslot->head);
				319	hslot->count++;
				320	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				321
				322	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				323	spin_lock(&hslot2->lock);
				324	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
				325	sk->sk_family == AF_INET6)
				326	hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
				327	&hslot2->head);
				328	else
				329	hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
				330	&hslot2->head);
				331	hslot2->count++;
				332	spin_unlock(&hslot2->lock);
				333	}
				334
				335	error = 0;
				336	fail_unlock:
				337	spin_unlock_bh(&hslot->lock);
				338	fail:
				339	return error;
				340	}
				341	EXPORT_SYMBOL(udp_lib_get_port);
				342
				343	int udp_v4_get_port(struct sock *sk, unsigned short snum)
				344	{
				345	unsigned int hash2_nulladdr =
				346	ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
				347	unsigned int hash2_partial =
				348	ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
				349
				350	/* precompute partial secondary hash */
				351	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
				352	return udp_lib_get_port(sk, snum, hash2_nulladdr);
				353	}
				354
				355	static int compute_score(struct sock sk, struct net net,
				356	__be32 saddr, __be16 sport,
				357	__be32 daddr, unsigned short hnum,
				358	int dif, int sdif)
				359	{
				360	int score;
				361	struct inet_sock *inet;
				362	bool dev_match;
				363
				364	if (!net_eq(sock_net(sk), net) \|\|
				365	udp_sk(sk)->udp_port_hash != hnum \|\|
				366	ipv6_only_sock(sk))
				367	return -1;
				368
				369	if (sk->sk_rcv_saddr != daddr)
				370	return -1;
				371
				372	score = (sk->sk_family == PF_INET) ? 2 : 1;
				373
				374	inet = inet_sk(sk);
				375	if (inet->inet_daddr) {
				376	if (inet->inet_daddr != saddr)
				377	return -1;
				378	score += 4;
				379	}
				380
				381	if (inet->inet_dport) {
				382	if (inet->inet_dport != sport)
				383	return -1;
				384	score += 4;
				385	}
				386
				387	dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
				388	dif, sdif);
				389	if (!dev_match)
				390	return -1;
				391	if (sk->sk_bound_dev_if)
				392	score += 4;
				393
				394	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
				395	score++;
				396	return score;
				397	}
				398
				399	static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
				400	const __u16 lport, const __be32 faddr,
				401	const __be16 fport)
				402	{
				403	static u32 udp_ehash_secret __read_mostly;
				404
				405	net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
				406
				407	return __inet_ehashfn(laddr, lport, faddr, fport,
				408	udp_ehash_secret + net_hash_mix(net));
				409	}
				410
				411	/* called with rcu_read_lock() */
				412	static struct sock udp4_lib_lookup2(struct net net,
				413	__be32 saddr, __be16 sport,
				414	__be32 daddr, unsigned int hnum,
				415	int dif, int sdif,
				416	struct udp_hslot *hslot2,
				417	struct sk_buff *skb)
				418	{
				419	struct sock sk, result, *reuseport_result;
				420	int score, badness;
				421	u32 hash = 0;
				422
				423	result = NULL;
				424	badness = 0;
				425	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
				426	score = compute_score(sk, net, saddr, sport,
				427	daddr, hnum, dif, sdif);
				428	if (score > badness) {
				429	reuseport_result = NULL;
				430
				431	if (sk->sk_reuseport &&
				432	sk->sk_state != TCP_ESTABLISHED) {
				433	hash = udp_ehashfn(net, daddr, hnum,
				434	saddr, sport);
				435	reuseport_result = reuseport_select_sock(sk, hash, skb,
				436	sizeof(struct udphdr));
				437	if (reuseport_result && !reuseport_has_conns(sk, false))
				438	return reuseport_result;
				439	}
				440
				441	result = reuseport_result ? : sk;
				442	badness = score;
				443	}
				444	}
				445	return result;
				446	}
				447
				448	/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
				449	* harder than this. -DaveM
				450	*/
				451	struct sock __udp4_lib_lookup(struct net net, __be32 saddr,
				452	__be16 sport, __be32 daddr, __be16 dport, int dif,
				453	int sdif, struct udp_table udptable, struct sk_buff skb)
				454	{
				455	struct sock *result;
				456	unsigned short hnum = ntohs(dport);
				457	unsigned int hash2, slot2;
				458	struct udp_hslot *hslot2;
				459
				460	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
				461	slot2 = hash2 & udptable->mask;
				462	hslot2 = &udptable->hash2[slot2];
				463
				464	result = udp4_lib_lookup2(net, saddr, sport,
				465	daddr, hnum, dif, sdif,
				466	hslot2, skb);
				467	if (!result) {
				468	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
				469	slot2 = hash2 & udptable->mask;
				470	hslot2 = &udptable->hash2[slot2];
				471
				472	result = udp4_lib_lookup2(net, saddr, sport,
				473	htonl(INADDR_ANY), hnum, dif, sdif,
				474	hslot2, skb);
				475	}
				476	if (IS_ERR(result))
				477	return NULL;
				478	return result;
				479	}
				480	EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
				481
				482	static inline struct sock __udp4_lib_lookup_skb(struct sk_buff skb,
				483	__be16 sport, __be16 dport,
				484	struct udp_table *udptable)
				485	{
				486	const struct iphdr *iph = ip_hdr(skb);
				487
				488	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
				489	iph->daddr, dport, inet_iif(skb),
				490	inet_sdif(skb), udptable, skb);
				491	}
				492
				493	struct sock udp4_lib_lookup_skb(struct sk_buff skb,
				494	__be16 sport, __be16 dport)
				495	{
				496	const struct iphdr *iph = ip_hdr(skb);
				497
				498	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
				499	iph->daddr, dport, inet_iif(skb),
				500	inet_sdif(skb), &udp_table, NULL);
				501	}
				502	EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
				503
				504	/* Must be called under rcu_read_lock().
				505	* Does increment socket refcount.
				506	*/
				507	#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) \|\| IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
				508	struct sock udp4_lib_lookup(struct net net, __be32 saddr, __be16 sport,
				509	__be32 daddr, __be16 dport, int dif)
				510	{
				511	struct sock *sk;
				512
				513	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
				514	dif, 0, &udp_table, NULL);
				515	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
				516	sk = NULL;
				517	return sk;
				518	}
				519	EXPORT_SYMBOL_GPL(udp4_lib_lookup);
				520	#endif
				521
				522	static inline bool __udp_is_mcast_sock(struct net net, struct sock sk,
				523	__be16 loc_port, __be32 loc_addr,
				524	__be16 rmt_port, __be32 rmt_addr,
				525	int dif, int sdif, unsigned short hnum)
				526	{
				527	struct inet_sock *inet = inet_sk(sk);
				528
				529	if (!net_eq(sock_net(sk), net) \|\|
				530	udp_sk(sk)->udp_port_hash != hnum \|\|
				531	(inet->inet_daddr && inet->inet_daddr != rmt_addr) \|\|
				532	(inet->inet_dport != rmt_port && inet->inet_dport) \|\|
				533	(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) \|\|
				534	ipv6_only_sock(sk) \|\|
				535	!udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
				536	return false;
				537	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
				538	return false;
				539	return true;
				540	}
				541
				542	DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
				543	void udp_encap_enable(void)
				544	{
				545	static_branch_inc(&udp_encap_needed_key);
				546	}
				547	EXPORT_SYMBOL(udp_encap_enable);
				548
				549	void udp_encap_disable(void)
				550	{
				551	static_branch_dec(&udp_encap_needed_key);
				552	}
				553	EXPORT_SYMBOL(udp_encap_disable);
				554
				555	/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
				556	* through error handlers in encapsulations looking for a match.
				557	*/
				558	static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
				559	{
				560	int i;
				561
				562	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
				563	int (handler)(struct sk_buff skb, u32 info);
				564	const struct ip_tunnel_encap_ops *encap;
				565
				566	encap = rcu_dereference(iptun_encaps[i]);
				567	if (!encap)
				568	continue;
				569	handler = encap->err_handler;
				570	if (handler && !handler(skb, info))
				571	return 0;
				572	}
				573
				574	return -ENOENT;
				575	}
				576
				577	/* Try to match ICMP errors to UDP tunnels by looking up a socket without
				578	* reversing source and destination port: this will match tunnels that force the
				579	* same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
				580	* lwtunnels might actually break this assumption by being configured with
				581	* different destination ports on endpoints, in this case we won't be able to
				582	* trace ICMP messages back to them.
				583	*
				584	* If this doesn't match any socket, probe tunnels with arbitrary destination
				585	* ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
				586	* we've sent packets to won't necessarily match the local destination port.
				587	*
				588	* Then ask the tunnel implementation to match the error against a valid
				589	* association.
				590	*
				591	* Return an error if we can't find a match, the socket if we need further
				592	* processing, zero otherwise.
				593	*/
				594	static struct sock __udp4_lib_err_encap(struct net net,
				595	const struct iphdr *iph,
				596	struct udphdr *uh,
				597	struct udp_table *udptable,
				598	struct sk_buff *skb, u32 info)
				599	{
				600	int network_offset, transport_offset;
				601	struct sock *sk;
				602
				603	network_offset = skb_network_offset(skb);
				604	transport_offset = skb_transport_offset(skb);
				605
				606	/* Network header needs to point to the outer IPv4 header inside ICMP */
				607	skb_reset_network_header(skb);
				608
				609	/* Transport header needs to point to the UDP header */
				610	skb_set_transport_header(skb, iph->ihl << 2);
				611
				612	sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
				613	iph->saddr, uh->dest, skb->dev->ifindex, 0,
				614	udptable, NULL);
				615	if (sk) {
				616	int (lookup)(struct sock sk, struct sk_buff *skb);
				617	struct udp_sock *up = udp_sk(sk);
				618
				619	lookup = READ_ONCE(up->encap_err_lookup);
				620	if (!lookup \|\| lookup(sk, skb))
				621	sk = NULL;
				622	}
				623
				624	if (!sk)
				625	sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
				626
				627	skb_set_transport_header(skb, transport_offset);
				628	skb_set_network_header(skb, network_offset);
				629
				630	return sk;
				631	}
				632
				633	/*
				634	* This routine is called by the ICMP module when it gets some
				635	* sort of error condition. If err < 0 then the socket should
				636	* be closed and the error returned to the user. If err > 0
				637	* it's just the icmp type << 8 \| icmp code.
				638	* Header points to the ip header of the error packet. We move
				639	* on past this. Then (as it used to claim before adjustment)
				640	* header points to the first 8 bytes of the udp header. We need
				641	* to find the appropriate port.
				642	*/
				643
				644	int __udp4_lib_err(struct sk_buff skb, u32 info, struct udp_table udptable)
				645	{
				646	struct inet_sock *inet;
				647	const struct iphdr iph = (const struct iphdr )skb->data;
				648	struct udphdr uh = (struct udphdr )(skb->data+(iph->ihl<<2));
				649	const int type = icmp_hdr(skb)->type;
				650	const int code = icmp_hdr(skb)->code;
				651	bool tunnel = false;
				652	struct sock *sk;
				653	int harderr;
				654	int err;
				655	struct net *net = dev_net(skb->dev);
				656
				657	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
				658	iph->saddr, uh->source, skb->dev->ifindex,
				659	inet_sdif(skb), udptable, NULL);
				660	if (!sk) {
				661	/* No socket for error: try tunnels before discarding */
				662	sk = ERR_PTR(-ENOENT);
				663	if (static_branch_unlikely(&udp_encap_needed_key)) {
				664	sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
				665	info);
				666	if (!sk)
				667	return 0;
				668	}
				669
				670	if (IS_ERR(sk)) {
				671	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
				672	return PTR_ERR(sk);
				673	}
				674
				675	tunnel = true;
				676	}
				677
				678	err = 0;
				679	harderr = 0;
				680	inet = inet_sk(sk);
				681
				682	switch (type) {
				683	default:
				684	case ICMP_TIME_EXCEEDED:
				685	err = EHOSTUNREACH;
				686	break;
				687	case ICMP_SOURCE_QUENCH:
				688	goto out;
				689	case ICMP_PARAMETERPROB:
				690	err = EPROTO;
				691	harderr = 1;
				692	break;
				693	case ICMP_DEST_UNREACH:
				694	if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
				695	ipv4_sk_update_pmtu(skb, sk, info);
				696	if (inet->pmtudisc != IP_PMTUDISC_DONT) {
				697	err = EMSGSIZE;
				698	harderr = 1;
				699	break;
				700	}
				701	goto out;
				702	}
				703	err = EHOSTUNREACH;
				704	if (code <= NR_ICMP_UNREACH) {
				705	harderr = icmp_err_convert[code].fatal;
				706	err = icmp_err_convert[code].errno;
				707	}
				708	break;
				709	case ICMP_REDIRECT:
				710	ipv4_sk_redirect(skb, sk);
				711	goto out;
				712	}
				713
				714	/*
				715	* RFC1122: OK. Passes ICMP errors back to application, as per
				716	* 4.1.3.3.
				717	*/
				718	if (tunnel) {
				719	/* ...not for tunnels though: we don't have a sending socket */
				720	goto out;
				721	}
				722	if (!inet->recverr) {
				723	if (!harderr \|\| sk->sk_state != TCP_ESTABLISHED)
				724	goto out;
				725	} else
				726	ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
				727
				728	sk->sk_err = err;
				729	sk->sk_error_report(sk);
				730	out:
				731	return 0;
				732	}
				733
				734	int udp_err(struct sk_buff *skb, u32 info)
				735	{
				736	return __udp4_lib_err(skb, info, &udp_table);
				737	}
				738
				739	/*
				740	* Throw away all pending data and cancel the corking. Socket is locked.
				741	*/
				742	void udp_flush_pending_frames(struct sock *sk)
				743	{
				744	struct udp_sock *up = udp_sk(sk);
				745
				746	if (up->pending) {
				747	up->len = 0;
				748	up->pending = 0;
				749	ip_flush_pending_frames(sk);
				750	}
				751	}
				752	EXPORT_SYMBOL(udp_flush_pending_frames);
				753
				754	/**
				755	* udp4_hwcsum - handle outgoing HW checksumming
				756	* @skb: sk_buff containing the filled-in UDP header
				757	* (checksum field must be zeroed out)
				758	* @src: source IP address
				759	* @dst: destination IP address
				760	*/
				761	void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
				762	{
				763	struct udphdr *uh = udp_hdr(skb);
				764	int offset = skb_transport_offset(skb);
				765	int len = skb->len - offset;
				766	int hlen = len;
				767	__wsum csum = 0;
				768
				769	if (!skb_has_frag_list(skb)) {
				770	/*
				771	* Only one fragment on the socket.
				772	*/
				773	skb->csum_start = skb_transport_header(skb) - skb->head;
				774	skb->csum_offset = offsetof(struct udphdr, check);
				775	uh->check = ~csum_tcpudp_magic(src, dst, len,
				776	IPPROTO_UDP, 0);
				777	} else {
				778	struct sk_buff *frags;
				779
				780	/*
				781	* HW-checksum won't work as there are two or more
				782	* fragments on the socket so that all csums of sk_buffs
				783	* should be together
				784	*/
				785	skb_walk_frags(skb, frags) {
				786	csum = csum_add(csum, frags->csum);
				787	hlen -= frags->len;
				788	}
				789
				790	csum = skb_checksum(skb, offset, hlen, csum);
				791	skb->ip_summed = CHECKSUM_NONE;
				792
				793	uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
				794	if (uh->check == 0)
				795	uh->check = CSUM_MANGLED_0;
				796	}
				797	}
				798	EXPORT_SYMBOL_GPL(udp4_hwcsum);
				799
				800	/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
				801	* for the simple case like when setting the checksum for a UDP tunnel.
				802	*/
				803	void udp_set_csum(bool nocheck, struct sk_buff *skb,
				804	__be32 saddr, __be32 daddr, int len)
				805	{
				806	struct udphdr *uh = udp_hdr(skb);
				807
				808	if (nocheck) {
				809	uh->check = 0;
				810	} else if (skb_is_gso(skb)) {
				811	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
				812	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
				813	uh->check = 0;
				814	uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
				815	if (uh->check == 0)
				816	uh->check = CSUM_MANGLED_0;
				817	} else {
				818	skb->ip_summed = CHECKSUM_PARTIAL;
				819	skb->csum_start = skb_transport_header(skb) - skb->head;
				820	skb->csum_offset = offsetof(struct udphdr, check);
				821	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
				822	}
				823	}
				824	EXPORT_SYMBOL(udp_set_csum);
				825
				826	static int udp_send_skb(struct sk_buff skb, struct flowi4 fl4,
				827	struct inet_cork *cork)
				828	{
				829	struct sock *sk = skb->sk;
				830	struct inet_sock *inet = inet_sk(sk);
				831	struct udphdr *uh;
				832	int err = 0;
				833	int is_udplite = IS_UDPLITE(sk);
				834	int offset = skb_transport_offset(skb);
				835	int len = skb->len - offset;
				836	int datalen = len - sizeof(*uh);
				837	__wsum csum = 0;
				838
				839	/*
				840	* Create a UDP header
				841	*/
				842	uh = udp_hdr(skb);
				843	uh->source = inet->inet_sport;
				844	uh->dest = fl4->fl4_dport;
				845	uh->len = htons(len);
				846	uh->check = 0;
				847
				848	if (cork->gso_size) {
				849	const int hlen = skb_network_header_len(skb) +
				850	sizeof(struct udphdr);
				851
				852	if (hlen + cork->gso_size > cork->fragsize) {
				853	kfree_skb(skb);
				854	return -EINVAL;
				855	}
				856	if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
				857	kfree_skb(skb);
				858	return -EINVAL;
				859	}
				860	if (sk->sk_no_check_tx) {
				861	kfree_skb(skb);
				862	return -EINVAL;
				863	}
				864	if (skb->ip_summed != CHECKSUM_PARTIAL \|\| is_udplite \|\|
				865	dst_xfrm(skb_dst(skb))) {
				866	kfree_skb(skb);
				867	return -EIO;
				868	}
				869
				870	if (datalen > cork->gso_size) {
				871	skb_shinfo(skb)->gso_size = cork->gso_size;
				872	skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
				873	skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
				874	cork->gso_size);
				875	}
				876	goto csum_partial;
				877	}
				878
				879	if (is_udplite) /* UDP-Lite */
				880	csum = udplite_csum(skb);
				881
				882	else if (sk->sk_no_check_tx) { /* UDP csum off */
				883
				884	skb->ip_summed = CHECKSUM_NONE;
				885	goto send;
				886
				887	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
				888	csum_partial:
				889
				890	udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
				891	goto send;
				892
				893	} else
				894	csum = udp_csum(skb);
				895
				896	/* add protocol-dependent pseudo-header */
				897	uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
				898	sk->sk_protocol, csum);
				899	if (uh->check == 0)
				900	uh->check = CSUM_MANGLED_0;
				901
				902	send:
				903	err = ip_send_skb(sock_net(sk), skb);
				904	if (err) {
				905	if (err == -ENOBUFS && !inet->recverr) {
				906	UDP_INC_STATS(sock_net(sk),
				907	UDP_MIB_SNDBUFERRORS, is_udplite);
				908	err = 0;
				909	}
				910	} else
				911	UDP_INC_STATS(sock_net(sk),
				912	UDP_MIB_OUTDATAGRAMS, is_udplite);
				913	return err;
				914	}
				915
				916	/*
				917	* Push out all pending data as one UDP datagram. Socket is locked.
				918	*/
				919	int udp_push_pending_frames(struct sock *sk)
				920	{
				921	struct udp_sock *up = udp_sk(sk);
				922	struct inet_sock *inet = inet_sk(sk);
				923	struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
				924	struct sk_buff *skb;
				925	int err = 0;
				926
				927	skb = ip_finish_skb(sk, fl4);
				928	if (!skb)
				929	goto out;
				930
				931	err = udp_send_skb(skb, fl4, &inet->cork.base);
				932
				933	out:
				934	up->len = 0;
				935	up->pending = 0;
				936	return err;
				937	}
				938	EXPORT_SYMBOL(udp_push_pending_frames);
				939
				940	static int __udp_cmsg_send(struct cmsghdr cmsg, u16 gso_size)
				941	{
				942	switch (cmsg->cmsg_type) {
				943	case UDP_SEGMENT:
				944	if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
				945	return -EINVAL;
				946	gso_size = (__u16 *)CMSG_DATA(cmsg);
				947	return 0;
				948	default:
				949	return -EINVAL;
				950	}
				951	}
				952
				953	int udp_cmsg_send(struct sock sk, struct msghdr msg, u16 *gso_size)
				954	{
				955	struct cmsghdr *cmsg;
				956	bool need_ip = false;
				957	int err;
				958
				959	for_each_cmsghdr(cmsg, msg) {
				960	if (!CMSG_OK(msg, cmsg))
				961	return -EINVAL;
				962
				963	if (cmsg->cmsg_level != SOL_UDP) {
				964	need_ip = true;
				965	continue;
				966	}
				967
				968	err = __udp_cmsg_send(cmsg, gso_size);
				969	if (err)
				970	return err;
				971	}
				972
				973	return need_ip;
				974	}
				975	EXPORT_SYMBOL_GPL(udp_cmsg_send);
				976
				977	int udp_sendmsg(struct sock sk, struct msghdr msg, size_t len)
				978	{
				979	struct inet_sock *inet = inet_sk(sk);
				980	struct udp_sock *up = udp_sk(sk);
				981	DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
				982	struct flowi4 fl4_stack;
				983	struct flowi4 *fl4;
				984	int ulen = len;
				985	struct ipcm_cookie ipc;
				986	struct rtable *rt = NULL;
				987	int free = 0;
				988	int connected = 0;
				989	__be32 daddr, faddr, saddr;
				990	__be16 dport;
				991	u8 tos;
				992	int err, is_udplite = IS_UDPLITE(sk);
				993	int corkreq = READ_ONCE(up->corkflag) \|\| msg->msg_flags&MSG_MORE;
				994	int (getfrag)(void , char , int, int, int, struct sk_buff );
				995	struct sk_buff *skb;
				996	struct ip_options_data opt_copy;
				997
				998	if (len > 0xFFFF)
				999	return -EMSGSIZE;
				1000
				1001	/*
				1002	* Check the flags.
				1003	*/
				1004
				1005	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
				1006	return -EOPNOTSUPP;
				1007
				1008	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
				1009
				1010	fl4 = &inet->cork.fl.u.ip4;
				1011	if (up->pending) {
				1012	/*
				1013	* There are pending frames.
				1014	* The socket lock must be held while it's corked.
				1015	*/
				1016	lock_sock(sk);
				1017	if (likely(up->pending)) {
				1018	if (unlikely(up->pending != AF_INET)) {
				1019	release_sock(sk);
				1020	return -EINVAL;
				1021	}
				1022	goto do_append_data;
				1023	}
				1024	release_sock(sk);
				1025	}
				1026	ulen += sizeof(struct udphdr);
				1027
				1028	/*
				1029	* Get and verify the address.
				1030	*/
				1031	if (usin) {
				1032	if (msg->msg_namelen < sizeof(*usin))
				1033	return -EINVAL;
				1034	if (usin->sin_family != AF_INET) {
				1035	if (usin->sin_family != AF_UNSPEC)
				1036	return -EAFNOSUPPORT;
				1037	}
				1038
				1039	daddr = usin->sin_addr.s_addr;
				1040	dport = usin->sin_port;
				1041	if (dport == 0)
				1042	return -EINVAL;
				1043	} else {
				1044	if (sk->sk_state != TCP_ESTABLISHED)
				1045	return -EDESTADDRREQ;
				1046	daddr = inet->inet_daddr;
				1047	dport = inet->inet_dport;
				1048	/* Open fast path for connected socket.
				1049	Route will not be used, if at least one option is set.
				1050	*/
				1051	connected = 1;
				1052	}
				1053
				1054	ipcm_init_sk(&ipc, inet);
				1055	ipc.gso_size = READ_ONCE(up->gso_size);
				1056
				1057	if (msg->msg_controllen) {
				1058	err = udp_cmsg_send(sk, msg, &ipc.gso_size);
				1059	if (err > 0) {
				1060	err = ip_cmsg_send(sk, msg, &ipc,
				1061	sk->sk_family == AF_INET6);
				1062	connected = 0;
				1063	}
				1064	if (unlikely(err < 0)) {
				1065	kfree(ipc.opt);
				1066	return err;
				1067	}
				1068	if (ipc.opt)
				1069	free = 1;
				1070	}
				1071	if (!ipc.opt) {
				1072	struct ip_options_rcu *inet_opt;
				1073
				1074	rcu_read_lock();
				1075	inet_opt = rcu_dereference(inet->inet_opt);
				1076	if (inet_opt) {
				1077	memcpy(&opt_copy, inet_opt,
				1078	sizeof(*inet_opt) + inet_opt->opt.optlen);
				1079	ipc.opt = &opt_copy.opt;
				1080	}
				1081	rcu_read_unlock();
				1082	}
				1083
				1084	if (cgroup_bpf_enabled && !connected) {
				1085	err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
				1086	(struct sockaddr *)usin, &ipc.addr);
				1087	if (err)
				1088	goto out_free;
				1089	if (usin) {
				1090	if (usin->sin_port == 0) {
				1091	/* BPF program set invalid port. Reject it. */
				1092	err = -EINVAL;
				1093	goto out_free;
				1094	}
				1095	daddr = usin->sin_addr.s_addr;
				1096	dport = usin->sin_port;
				1097	}
				1098	}
				1099
				1100	saddr = ipc.addr;
				1101	ipc.addr = faddr = daddr;
				1102
				1103	if (ipc.opt && ipc.opt->opt.srr) {
				1104	if (!daddr) {
				1105	err = -EINVAL;
				1106	goto out_free;
				1107	}
				1108	faddr = ipc.opt->opt.faddr;
				1109	connected = 0;
				1110	}
				1111	tos = get_rttos(&ipc, inet);
				1112	if (sock_flag(sk, SOCK_LOCALROUTE) \|\|
				1113	(msg->msg_flags & MSG_DONTROUTE) \|\|
				1114	(ipc.opt && ipc.opt->opt.is_strictroute)) {
				1115	tos \|= RTO_ONLINK;
				1116	connected = 0;
				1117	}
				1118
				1119	if (ipv4_is_multicast(daddr)) {
				1120	if (!ipc.oif \|\| netif_index_is_l3_master(sock_net(sk), ipc.oif))
				1121	ipc.oif = inet->mc_index;
				1122	if (!saddr)
				1123	saddr = inet->mc_addr;
				1124	connected = 0;
				1125	} else if (!ipc.oif) {
				1126	ipc.oif = inet->uc_index;
				1127	} else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
				1128	/* oif is set, packet is to local broadcast and
				1129	* and uc_index is set. oif is most likely set
				1130	* by sk_bound_dev_if. If uc_index != oif check if the
				1131	* oif is an L3 master and uc_index is an L3 slave.
				1132	* If so, we want to allow the send using the uc_index.
				1133	*/
				1134	if (ipc.oif != inet->uc_index &&
				1135	ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
				1136	inet->uc_index)) {
				1137	ipc.oif = inet->uc_index;
				1138	}
				1139	}
				1140
				1141	if (connected)
				1142	rt = (struct rtable *)sk_dst_check(sk, 0);
				1143
				1144	if (!rt) {
				1145	struct net *net = sock_net(sk);
				1146	__u8 flow_flags = inet_sk_flowi_flags(sk);
				1147
				1148	fl4 = &fl4_stack;
				1149
				1150	flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
				1151	RT_SCOPE_UNIVERSE, sk->sk_protocol,
				1152	flow_flags,
				1153	faddr, saddr, dport, inet->inet_sport,
				1154	sk->sk_uid);
				1155
				1156	security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
				1157	rt = ip_route_output_flow(net, fl4, sk);
				1158	if (IS_ERR(rt)) {
				1159	err = PTR_ERR(rt);
				1160	rt = NULL;
				1161	if (err == -ENETUNREACH)
				1162	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
				1163	goto out;
				1164	}
				1165
				1166	err = -EACCES;
				1167	if ((rt->rt_flags & RTCF_BROADCAST) &&
				1168	!sock_flag(sk, SOCK_BROADCAST))
				1169	goto out;
				1170	if (connected)
				1171	sk_dst_set(sk, dst_clone(&rt->dst));
				1172	}
				1173
				1174	if (msg->msg_flags&MSG_CONFIRM)
				1175	goto do_confirm;
				1176	back_from_confirm:
				1177
				1178	saddr = fl4->saddr;
				1179	if (!ipc.addr)
				1180	daddr = ipc.addr = fl4->daddr;
				1181
				1182	/* Lockless fast path for the non-corking case. */
				1183	if (!corkreq) {
				1184	struct inet_cork cork;
				1185
				1186	skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
				1187	sizeof(struct udphdr), &ipc, &rt,
				1188	&cork, msg->msg_flags);
				1189	err = PTR_ERR(skb);
				1190	if (!IS_ERR_OR_NULL(skb))
				1191	err = udp_send_skb(skb, fl4, &cork);
				1192	goto out;
				1193	}
				1194
				1195	lock_sock(sk);
				1196	if (unlikely(up->pending)) {
				1197	/* The socket is already corked while preparing it. */
				1198	/* ... which is an evident application bug. --ANK */
				1199	release_sock(sk);
				1200
				1201	net_dbg_ratelimited("socket already corked\n");
				1202	err = -EINVAL;
				1203	goto out;
				1204	}
				1205	/*
				1206	* Now cork the socket to pend data.
				1207	*/
				1208	fl4 = &inet->cork.fl.u.ip4;
				1209	fl4->daddr = daddr;
				1210	fl4->saddr = saddr;
				1211	fl4->fl4_dport = dport;
				1212	fl4->fl4_sport = inet->inet_sport;
				1213	up->pending = AF_INET;
				1214
				1215	do_append_data:
				1216	up->len += ulen;
				1217	err = ip_append_data(sk, fl4, getfrag, msg, ulen,
				1218	sizeof(struct udphdr), &ipc, &rt,
				1219	corkreq ? msg->msg_flags\|MSG_MORE : msg->msg_flags);
				1220	if (err)
				1221	udp_flush_pending_frames(sk);
				1222	else if (!corkreq)
				1223	err = udp_push_pending_frames(sk);
				1224	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
				1225	up->pending = 0;
				1226	release_sock(sk);
				1227
				1228	out:
				1229	ip_rt_put(rt);
				1230	out_free:
				1231	if (free)
				1232	kfree(ipc.opt);
				1233	if (!err)
				1234	return len;
				1235	/*
				1236	* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
				1237	* ENOBUFS might not be good (it's not tunable per se), but otherwise
				1238	* we don't have a good statistic (IpOutDiscards but it can be too many
				1239	* things). We could add another new stat but at least for now that
				1240	* seems like overkill.
				1241	*/
				1242	if (err == -ENOBUFS \|\| test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
				1243	UDP_INC_STATS(sock_net(sk),
				1244	UDP_MIB_SNDBUFERRORS, is_udplite);
				1245	}
				1246	return err;
				1247
				1248	do_confirm:
				1249	if (msg->msg_flags & MSG_PROBE)
				1250	dst_confirm_neigh(&rt->dst, &fl4->daddr);
				1251	if (!(msg->msg_flags&MSG_PROBE) \|\| len)
				1252	goto back_from_confirm;
				1253	err = 0;
				1254	goto out;
				1255	}
				1256	EXPORT_SYMBOL(udp_sendmsg);
				1257
				1258	int udp_sendpage(struct sock sk, struct page page, int offset,
				1259	size_t size, int flags)
				1260	{
				1261	struct inet_sock *inet = inet_sk(sk);
				1262	struct udp_sock *up = udp_sk(sk);
				1263	int ret;
				1264
				1265	if (flags & MSG_SENDPAGE_NOTLAST)
				1266	flags \|= MSG_MORE;
				1267
				1268	if (!up->pending) {
				1269	struct msghdr msg = { .msg_flags = flags\|MSG_MORE };
				1270
				1271	/* Call udp_sendmsg to specify destination address which
				1272	* sendpage interface can't pass.
				1273	* This will succeed only when the socket is connected.
				1274	*/
				1275	ret = udp_sendmsg(sk, &msg, 0);
				1276	if (ret < 0)
				1277	return ret;
				1278	}
				1279
				1280	lock_sock(sk);
				1281
				1282	if (unlikely(!up->pending)) {
				1283	release_sock(sk);
				1284
				1285	net_dbg_ratelimited("cork failed\n");
				1286	return -EINVAL;
				1287	}
				1288
				1289	ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
				1290	page, offset, size, flags);
				1291	if (ret == -EOPNOTSUPP) {
				1292	release_sock(sk);
				1293	return sock_no_sendpage(sk->sk_socket, page, offset,
				1294	size, flags);
				1295	}
				1296	if (ret < 0) {
				1297	udp_flush_pending_frames(sk);
				1298	goto out;
				1299	}
				1300
				1301	up->len += size;
				1302	if (!(READ_ONCE(up->corkflag) \|\| (flags&MSG_MORE)))
				1303	ret = udp_push_pending_frames(sk);
				1304	if (!ret)
				1305	ret = size;
				1306	out:
				1307	release_sock(sk);
				1308	return ret;
				1309	}
				1310
				1311	#define UDP_SKB_IS_STATELESS 0x80000000
				1312
				1313	/* all head states (dst, sk, nf conntrack) except skb extensions are
				1314	* cleared by udp_rcv().
				1315	*
				1316	* We need to preserve secpath, if present, to eventually process
				1317	* IP_CMSG_PASSSEC at recvmsg() time.
				1318	*
				1319	* Other extensions can be cleared.
				1320	*/
				1321	static bool udp_try_make_stateless(struct sk_buff *skb)
				1322	{
				1323	if (!skb_has_extensions(skb))
				1324	return true;
				1325
				1326	if (!secpath_exists(skb)) {
				1327	skb_ext_reset(skb);
				1328	return true;
				1329	}
				1330
				1331	return false;
				1332	}
				1333
				1334	static void udp_set_dev_scratch(struct sk_buff *skb)
				1335	{
				1336	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
				1337
				1338	BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
				1339	scratch->_tsize_state = skb->truesize;
				1340	#if BITS_PER_LONG == 64
				1341	scratch->len = skb->len;
				1342	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
				1343	scratch->is_linear = !skb_is_nonlinear(skb);
				1344	#endif
				1345	if (udp_try_make_stateless(skb))
				1346	scratch->_tsize_state \|= UDP_SKB_IS_STATELESS;
				1347	}
				1348
				1349	static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
				1350	{
				1351	/* We come here after udp_lib_checksum_complete() returned 0.
				1352	* This means that __skb_checksum_complete() might have
				1353	* set skb->csum_valid to 1.
				1354	* On 64bit platforms, we can set csum_unnecessary
				1355	* to true, but only if the skb is not shared.
				1356	*/
				1357	#if BITS_PER_LONG == 64
				1358	if (!skb_shared(skb))
				1359	udp_skb_scratch(skb)->csum_unnecessary = true;
				1360	#endif
				1361	}
				1362
				1363	static int udp_skb_truesize(struct sk_buff *skb)
				1364	{
				1365	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
				1366	}
				1367
				1368	static bool udp_skb_has_head_state(struct sk_buff *skb)
				1369	{
				1370	return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
				1371	}
				1372
				1373	/* fully reclaim rmem/fwd memory allocated for skb */
				1374	static void udp_rmem_release(struct sock *sk, int size, int partial,
				1375	bool rx_queue_lock_held)
				1376	{
				1377	struct udp_sock *up = udp_sk(sk);
				1378	struct sk_buff_head *sk_queue;
				1379	int amt;
				1380
				1381	if (likely(partial)) {
				1382	up->forward_deficit += size;
				1383	size = up->forward_deficit;
				1384	if (size < (sk->sk_rcvbuf >> 2) &&
				1385	!skb_queue_empty(&up->reader_queue))
				1386	return;
				1387	} else {
				1388	size += up->forward_deficit;
				1389	}
				1390	up->forward_deficit = 0;
				1391
				1392	/* acquire the sk_receive_queue for fwd allocated memory scheduling,
				1393	* if the called don't held it already
				1394	*/
				1395	sk_queue = &sk->sk_receive_queue;
				1396	if (!rx_queue_lock_held)
				1397	spin_lock(&sk_queue->lock);
				1398
				1399
				1400	sk->sk_forward_alloc += size;
				1401	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
				1402	sk->sk_forward_alloc -= amt;
				1403
				1404	if (amt)
				1405	__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
				1406
				1407	atomic_sub(size, &sk->sk_rmem_alloc);
				1408
				1409	/* this can save us from acquiring the rx queue lock on next receive */
				1410	skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
				1411
				1412	if (!rx_queue_lock_held)
				1413	spin_unlock(&sk_queue->lock);
				1414	}
				1415
				1416	/* Note: called with reader_queue.lock held.
				1417	* Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
				1418	* This avoids a cache line miss while receive_queue lock is held.
				1419	* Look at __udp_enqueue_schedule_skb() to find where this copy is done.
				1420	*/
				1421	void udp_skb_destructor(struct sock sk, struct sk_buff skb)
				1422	{
				1423	prefetch(&skb->data);
				1424	udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
				1425	}
				1426	EXPORT_SYMBOL(udp_skb_destructor);
				1427
				1428	/* as above, but the caller held the rx queue lock, too */
				1429	static void udp_skb_dtor_locked(struct sock sk, struct sk_buff skb)
				1430	{
				1431	prefetch(&skb->data);
				1432	udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
				1433	}
				1434
				1435	/* Idea of busylocks is to let producers grab an extra spinlock
				1436	* to relieve pressure on the receive_queue spinlock shared by consumer.
				1437	* Under flood, this means that only one producer can be in line
				1438	* trying to acquire the receive_queue spinlock.
				1439	* These busylock can be allocated on a per cpu manner, instead of a
				1440	* per socket one (that would consume a cache line per socket)
				1441	*/
				1442	static int udp_busylocks_log __read_mostly;
				1443	static spinlock_t *udp_busylocks __read_mostly;
				1444
				1445	static spinlock_t busylock_acquire(void ptr)
				1446	{
				1447	spinlock_t *busy;
				1448
				1449	busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
				1450	spin_lock(busy);
				1451	return busy;
				1452	}
				1453
				1454	static void busylock_release(spinlock_t *busy)
				1455	{
				1456	if (busy)
				1457	spin_unlock(busy);
				1458	}
				1459
				1460	int __udp_enqueue_schedule_skb(struct sock sk, struct sk_buff skb)
				1461	{
				1462	struct sk_buff_head *list = &sk->sk_receive_queue;
				1463	int rmem, delta, amt, err = -ENOMEM;
				1464	spinlock_t *busy = NULL;
				1465	int size;
				1466
				1467	/* try to avoid the costly atomic add/sub pair when the receive
				1468	* queue is full; always allow at least a packet
				1469	*/
				1470	rmem = atomic_read(&sk->sk_rmem_alloc);
				1471	if (rmem > sk->sk_rcvbuf)
				1472	goto drop;
				1473
				1474	/* Under mem pressure, it might be helpful to help udp_recvmsg()
				1475	* having linear skbs :
				1476	* - Reduce memory overhead and thus increase receive queue capacity
				1477	* - Less cache line misses at copyout() time
				1478	* - Less work at consume_skb() (less alien page frag freeing)
				1479	*/
				1480	if (rmem > (sk->sk_rcvbuf >> 1)) {
				1481	skb_condense(skb);
				1482
				1483	busy = busylock_acquire(sk);
				1484	}
				1485	size = skb->truesize;
				1486	udp_set_dev_scratch(skb);
				1487
				1488	/* we drop only if the receive buf is full and the receive
				1489	* queue contains some other skb
				1490	*/
				1491	rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
				1492	if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
				1493	goto uncharge_drop;
				1494
				1495	spin_lock(&list->lock);
				1496	if (size >= sk->sk_forward_alloc) {
				1497	amt = sk_mem_pages(size);
				1498	delta = amt << SK_MEM_QUANTUM_SHIFT;
				1499	if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
				1500	err = -ENOBUFS;
				1501	spin_unlock(&list->lock);
				1502	goto uncharge_drop;
				1503	}
				1504
				1505	sk->sk_forward_alloc += delta;
				1506	}
				1507
				1508	sk->sk_forward_alloc -= size;
				1509
				1510	/* no need to setup a destructor, we will explicitly release the
				1511	* forward allocated memory on dequeue
				1512	*/
				1513	sock_skb_set_dropcount(sk, skb);
				1514
				1515	__skb_queue_tail(list, skb);
				1516	spin_unlock(&list->lock);
				1517
				1518	if (!sock_flag(sk, SOCK_DEAD))
				1519	sk->sk_data_ready(sk);
				1520
				1521	busylock_release(busy);
				1522	return 0;
				1523
				1524	uncharge_drop:
				1525	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
				1526
				1527	drop:
				1528	atomic_inc(&sk->sk_drops);
				1529	busylock_release(busy);
				1530	return err;
				1531	}
				1532	EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
				1533
				1534	void udp_destruct_common(struct sock *sk)
				1535	{
				1536	/* reclaim completely the forward allocated memory */
				1537	struct udp_sock *up = udp_sk(sk);
				1538	unsigned int total = 0;
				1539	struct sk_buff *skb;
				1540
				1541	skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
				1542	while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
				1543	total += skb->truesize;
				1544	kfree_skb(skb);
				1545	}
				1546	udp_rmem_release(sk, total, 0, true);
				1547	}
				1548	EXPORT_SYMBOL_GPL(udp_destruct_common);
				1549
				1550	static void udp_destruct_sock(struct sock *sk)
				1551	{
				1552	udp_destruct_common(sk);
				1553	inet_sock_destruct(sk);
				1554	}
				1555
				1556	int udp_init_sock(struct sock *sk)
				1557	{
				1558	skb_queue_head_init(&udp_sk(sk)->reader_queue);
				1559	sk->sk_destruct = udp_destruct_sock;
				1560	return 0;
				1561	}
				1562
				1563	void skb_consume_udp(struct sock sk, struct sk_buff skb, int len)
				1564	{
				1565	if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
				1566	bool slow = lock_sock_fast(sk);
				1567
				1568	sk_peek_offset_bwd(sk, len);
				1569	unlock_sock_fast(sk, slow);
				1570	}
				1571
				1572	if (!skb_unref(skb))
				1573	return;
				1574
				1575	/* In the more common cases we cleared the head states previously,
				1576	* see __udp_queue_rcv_skb().
				1577	*/
				1578	if (unlikely(udp_skb_has_head_state(skb)))
				1579	skb_release_head_state(skb);
				1580	__consume_stateless_skb(skb);
				1581	}
				1582	EXPORT_SYMBOL_GPL(skb_consume_udp);
				1583
				1584	static struct sk_buff __first_packet_length(struct sock sk,
				1585	struct sk_buff_head *rcvq,
				1586	int *total)
				1587	{
				1588	struct sk_buff *skb;
				1589
				1590	while ((skb = skb_peek(rcvq)) != NULL) {
				1591	if (udp_lib_checksum_complete(skb)) {
				1592	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
				1593	IS_UDPLITE(sk));
				1594	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
				1595	IS_UDPLITE(sk));
				1596	atomic_inc(&sk->sk_drops);
				1597	__skb_unlink(skb, rcvq);
				1598	*total += skb->truesize;
				1599	kfree_skb(skb);
				1600	} else {
				1601	udp_skb_csum_unnecessary_set(skb);
				1602	break;
				1603	}
				1604	}
				1605	return skb;
				1606	}
				1607
				1608	/**
				1609	* first_packet_length - return length of first packet in receive queue
				1610	* @sk: socket
				1611	*
				1612	* Drops all bad checksum frames, until a valid one is found.
				1613	* Returns the length of found skb, or -1 if none is found.
				1614	*/
				1615	static int first_packet_length(struct sock *sk)
				1616	{
				1617	struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
				1618	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
				1619	struct sk_buff *skb;
				1620	int total = 0;
				1621	int res;
				1622
				1623	spin_lock_bh(&rcvq->lock);
				1624	skb = __first_packet_length(sk, rcvq, &total);
				1625	if (!skb && !skb_queue_empty_lockless(sk_queue)) {
				1626	spin_lock(&sk_queue->lock);
				1627	skb_queue_splice_tail_init(sk_queue, rcvq);
				1628	spin_unlock(&sk_queue->lock);
				1629
				1630	skb = __first_packet_length(sk, rcvq, &total);
				1631	}
				1632	res = skb ? skb->len : -1;
				1633	if (total)
				1634	udp_rmem_release(sk, total, 1, false);
				1635	spin_unlock_bh(&rcvq->lock);
				1636	return res;
				1637	}
				1638
				1639	/*
				1640	* IOCTL requests applicable to the UDP protocol
				1641	*/
				1642
				1643	int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
				1644	{
				1645	switch (cmd) {
				1646	case SIOCOUTQ:
				1647	{
				1648	int amount = sk_wmem_alloc_get(sk);
				1649
				1650	return put_user(amount, (int __user *)arg);
				1651	}
				1652
				1653	case SIOCINQ:
				1654	{
				1655	int amount = max_t(int, 0, first_packet_length(sk));
				1656
				1657	return put_user(amount, (int __user *)arg);
				1658	}
				1659
				1660	default:
				1661	return -ENOIOCTLCMD;
				1662	}
				1663
				1664	return 0;
				1665	}
				1666	EXPORT_SYMBOL(udp_ioctl);
				1667
				1668	struct sk_buff __skb_recv_udp(struct sock sk, unsigned int flags,
				1669	int noblock, int off, int err)
				1670	{
				1671	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
				1672	struct sk_buff_head *queue;
				1673	struct sk_buff *last;
				1674	long timeo;
				1675	int error;
				1676
				1677	queue = &udp_sk(sk)->reader_queue;
				1678	flags \|= noblock ? MSG_DONTWAIT : 0;
				1679	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
				1680	do {
				1681	struct sk_buff *skb;
				1682
				1683	error = sock_error(sk);
				1684	if (error)
				1685	break;
				1686
				1687	error = -EAGAIN;
				1688	do {
				1689	spin_lock_bh(&queue->lock);
				1690	skb = __skb_try_recv_from_queue(sk, queue, flags,
				1691	udp_skb_destructor,
				1692	off, err, &last);
				1693	if (skb) {
				1694	spin_unlock_bh(&queue->lock);
				1695	return skb;
				1696	}
				1697
				1698	if (skb_queue_empty_lockless(sk_queue)) {
				1699	spin_unlock_bh(&queue->lock);
				1700	goto busy_check;
				1701	}
				1702
				1703	/* refill the reader queue and walk it again
				1704	* keep both queues locked to avoid re-acquiring
				1705	* the sk_receive_queue lock if fwd memory scheduling
				1706	* is needed.
				1707	*/
				1708	spin_lock(&sk_queue->lock);
				1709	skb_queue_splice_tail_init(sk_queue, queue);
				1710
				1711	skb = __skb_try_recv_from_queue(sk, queue, flags,
				1712	udp_skb_dtor_locked,
				1713	off, err, &last);
				1714	spin_unlock(&sk_queue->lock);
				1715	spin_unlock_bh(&queue->lock);
				1716	if (skb)
				1717	return skb;
				1718
				1719	busy_check:
				1720	if (!sk_can_busy_loop(sk))
				1721	break;
				1722
				1723	sk_busy_loop(sk, flags & MSG_DONTWAIT);
				1724	} while (!skb_queue_empty_lockless(sk_queue));
				1725
				1726	/* sk_queue is empty, reader_queue may contain peeked packets */
				1727	} while (timeo &&
				1728	!__skb_wait_for_more_packets(sk, &error, &timeo,
				1729	(struct sk_buff *)sk_queue));
				1730
				1731	*err = error;
				1732	return NULL;
				1733	}
				1734	EXPORT_SYMBOL(__skb_recv_udp);
				1735
				1736	/*
				1737	* This should be easy, if there is something there we
				1738	* return it, otherwise we block.
				1739	*/
				1740
				1741	int udp_recvmsg(struct sock sk, struct msghdr msg, size_t len, int noblock,
				1742	int flags, int *addr_len)
				1743	{
				1744	struct inet_sock *inet = inet_sk(sk);
				1745	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
				1746	struct sk_buff *skb;
				1747	unsigned int ulen, copied;
				1748	int off, err, peeking = flags & MSG_PEEK;
				1749	int is_udplite = IS_UDPLITE(sk);
				1750	bool checksum_valid = false;
				1751
				1752	if (flags & MSG_ERRQUEUE)
				1753	return ip_recv_error(sk, msg, len, addr_len);
				1754
				1755	try_again:
				1756	off = sk_peek_offset(sk, flags);
				1757	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
				1758	if (!skb)
				1759	return err;
				1760
				1761	ulen = udp_skb_len(skb);
				1762	copied = len;
				1763	if (copied > ulen - off)
				1764	copied = ulen - off;
				1765	else if (copied < ulen)
				1766	msg->msg_flags \|= MSG_TRUNC;
				1767
				1768	/*
				1769	* If checksum is needed at all, try to do it while copying the
				1770	* data. If the data is truncated, or if we only want a partial
				1771	* coverage checksum (UDP-Lite), do it before the copy.
				1772	*/
				1773
				1774	if (copied < ulen \|\| peeking \|\|
				1775	(is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
				1776	checksum_valid = udp_skb_csum_unnecessary(skb) \|\|
				1777	!__udp_lib_checksum_complete(skb);
				1778	if (!checksum_valid)
				1779	goto csum_copy_err;
				1780	}
				1781
				1782	if (checksum_valid \|\| udp_skb_csum_unnecessary(skb)) {
				1783	if (udp_skb_is_linear(skb))
				1784	err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
				1785	else
				1786	err = skb_copy_datagram_msg(skb, off, msg, copied);
				1787	} else {
				1788	err = skb_copy_and_csum_datagram_msg(skb, off, msg);
				1789
				1790	if (err == -EINVAL)
				1791	goto csum_copy_err;
				1792	}
				1793
				1794	if (unlikely(err)) {
				1795	if (!peeking) {
				1796	atomic_inc(&sk->sk_drops);
				1797	UDP_INC_STATS(sock_net(sk),
				1798	UDP_MIB_INERRORS, is_udplite);
				1799	}
				1800	kfree_skb(skb);
				1801	return err;
				1802	}
				1803
				1804	if (!peeking)
				1805	UDP_INC_STATS(sock_net(sk),
				1806	UDP_MIB_INDATAGRAMS, is_udplite);
				1807
				1808	sock_recv_ts_and_drops(msg, sk, skb);
				1809
				1810	/* Copy the address. */
				1811	if (sin) {
				1812	sin->sin_family = AF_INET;
				1813	sin->sin_port = udp_hdr(skb)->source;
				1814	sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
				1815	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
				1816	addr_len = sizeof(sin);
				1817
				1818	if (cgroup_bpf_enabled)
				1819	BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
				1820	(struct sockaddr *)sin);
				1821	}
				1822
				1823	if (udp_sk(sk)->gro_enabled)
				1824	udp_cmsg_recv(msg, sk, skb);
				1825
				1826	if (inet->cmsg_flags)
				1827	ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
				1828
				1829	err = copied;
				1830	if (flags & MSG_TRUNC)
				1831	err = ulen;
				1832
				1833	skb_consume_udp(sk, skb, peeking ? -err : err);
				1834	return err;
				1835
				1836	csum_copy_err:
				1837	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
				1838	udp_skb_destructor)) {
				1839	UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
				1840	UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				1841	}
				1842	kfree_skb(skb);
				1843
				1844	/* starting over for a new packet, but check if we need to yield */
				1845	cond_resched();
				1846	msg->msg_flags &= ~MSG_TRUNC;
				1847	goto try_again;
				1848	}
				1849
				1850	int udp_pre_connect(struct sock sk, struct sockaddr uaddr, int addr_len)
				1851	{
				1852	/* This check is replicated from __ip4_datagram_connect() and
				1853	* intended to prevent BPF program called below from accessing bytes
				1854	* that are out of the bound specified by user in addr_len.
				1855	*/
				1856	if (addr_len < sizeof(struct sockaddr_in))
				1857	return -EINVAL;
				1858
				1859	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
				1860	}
				1861	EXPORT_SYMBOL(udp_pre_connect);
				1862
				1863	int __udp_disconnect(struct sock *sk, int flags)
				1864	{
				1865	struct inet_sock *inet = inet_sk(sk);
				1866	/*
				1867	* 1003.1g - break association.
				1868	*/
				1869
				1870	sk->sk_state = TCP_CLOSE;
				1871	inet->inet_daddr = 0;
				1872	inet->inet_dport = 0;
				1873	sock_rps_reset_rxhash(sk);
				1874	sk->sk_bound_dev_if = 0;
				1875	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
				1876	inet_reset_saddr(sk);
				1877	if (sk->sk_prot->rehash &&
				1878	(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
				1879	sk->sk_prot->rehash(sk);
				1880	}
				1881
				1882	if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
				1883	sk->sk_prot->unhash(sk);
				1884	inet->inet_sport = 0;
				1885	}
				1886	sk_dst_reset(sk);
				1887	return 0;
				1888	}
				1889	EXPORT_SYMBOL(__udp_disconnect);
				1890
				1891	int udp_disconnect(struct sock *sk, int flags)
				1892	{
				1893	lock_sock(sk);
				1894	__udp_disconnect(sk, flags);
				1895	release_sock(sk);
				1896	return 0;
				1897	}
				1898	EXPORT_SYMBOL(udp_disconnect);
				1899
				1900	void udp_lib_unhash(struct sock *sk)
				1901	{
				1902	if (sk_hashed(sk)) {
				1903	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				1904	struct udp_hslot hslot, hslot2;
				1905
				1906	hslot = udp_hashslot(udptable, sock_net(sk),
				1907	udp_sk(sk)->udp_port_hash);
				1908	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				1909
				1910	spin_lock_bh(&hslot->lock);
				1911	if (rcu_access_pointer(sk->sk_reuseport_cb))
				1912	reuseport_detach_sock(sk);
				1913	if (sk_del_node_init_rcu(sk)) {
				1914	hslot->count--;
				1915	inet_sk(sk)->inet_num = 0;
				1916	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				1917
				1918	spin_lock(&hslot2->lock);
				1919	hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
				1920	hslot2->count--;
				1921	spin_unlock(&hslot2->lock);
				1922	}
				1923	spin_unlock_bh(&hslot->lock);
				1924	}
				1925	}
				1926	EXPORT_SYMBOL(udp_lib_unhash);
				1927
				1928	/*
				1929	* inet_rcv_saddr was changed, we must rehash secondary hash
				1930	*/
				1931	void udp_lib_rehash(struct sock *sk, u16 newhash)
				1932	{
				1933	if (sk_hashed(sk)) {
				1934	struct udp_table *udptable = sk->sk_prot->h.udp_table;
				1935	struct udp_hslot hslot, hslot2, *nhslot2;
				1936
				1937	hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
				1938	nhslot2 = udp_hashslot2(udptable, newhash);
				1939	udp_sk(sk)->udp_portaddr_hash = newhash;
				1940
				1941	if (hslot2 != nhslot2 \|\|
				1942	rcu_access_pointer(sk->sk_reuseport_cb)) {
				1943	hslot = udp_hashslot(udptable, sock_net(sk),
				1944	udp_sk(sk)->udp_port_hash);
				1945	/* we must lock primary chain too */
				1946	spin_lock_bh(&hslot->lock);
				1947	if (rcu_access_pointer(sk->sk_reuseport_cb))
				1948	reuseport_detach_sock(sk);
				1949
				1950	if (hslot2 != nhslot2) {
				1951	spin_lock(&hslot2->lock);
				1952	hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
				1953	hslot2->count--;
				1954	spin_unlock(&hslot2->lock);
				1955
				1956	spin_lock(&nhslot2->lock);
				1957	hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
				1958	&nhslot2->head);
				1959	nhslot2->count++;
				1960	spin_unlock(&nhslot2->lock);
				1961	}
				1962
				1963	spin_unlock_bh(&hslot->lock);
				1964	}
				1965	}
				1966	}
				1967	EXPORT_SYMBOL(udp_lib_rehash);
				1968
				1969	void udp_v4_rehash(struct sock *sk)
				1970	{
				1971	u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
				1972	inet_sk(sk)->inet_rcv_saddr,
				1973	inet_sk(sk)->inet_num);
				1974	udp_lib_rehash(sk, new_hash);
				1975	}
				1976
				1977	static int __udp_queue_rcv_skb(struct sock sk, struct sk_buff skb)
				1978	{
				1979	int rc;
				1980
				1981	if (inet_sk(sk)->inet_daddr) {
				1982	sock_rps_save_rxhash(sk, skb);
				1983	sk_mark_napi_id(sk, skb);
				1984	sk_incoming_cpu_update(sk);
				1985	} else {
				1986	sk_mark_napi_id_once(sk, skb);
				1987	}
				1988
				1989	rc = __udp_enqueue_schedule_skb(sk, skb);
				1990	if (rc < 0) {
				1991	int is_udplite = IS_UDPLITE(sk);
				1992
				1993	/* Note that an ENOMEM error is charged twice */
				1994	if (rc == -ENOMEM)
				1995	UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
				1996	is_udplite);
				1997	UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				1998	kfree_skb(skb);
				1999	trace_udp_fail_queue_rcv_skb(rc, sk);
				2000	return -1;
				2001	}
				2002
				2003	return 0;
				2004	}
				2005
				2006	/* returns:
				2007	* -1: error
				2008	* 0: success
				2009	* >0: "udp encap" protocol resubmission
				2010	*
				2011	* Note that in the success and error cases, the skb is assumed to
				2012	* have either been requeued or freed.
				2013	*/
				2014	static int udp_queue_rcv_one_skb(struct sock sk, struct sk_buff skb)
				2015	{
				2016	struct udp_sock *up = udp_sk(sk);
				2017	int is_udplite = IS_UDPLITE(sk);
				2018
				2019	/*
				2020	* Charge it to the socket, dropping if the queue is full.
				2021	*/
				2022	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
				2023	goto drop;
				2024	nf_reset_ct(skb);
				2025
				2026	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
				2027	int (encap_rcv)(struct sock sk, struct sk_buff *skb);
				2028
				2029	/*
				2030	* This is an encapsulation socket so pass the skb to
				2031	* the socket's udp_encap_rcv() hook. Otherwise, just
				2032	* fall through and pass this up the UDP socket.
				2033	* up->encap_rcv() returns the following value:
				2034	* =0 if skb was successfully passed to the encap
				2035	* handler or was discarded by it.
				2036	* >0 if skb should be passed on to UDP.
				2037	* <0 if skb should be resubmitted as proto -N
				2038	*/
				2039
				2040	/* if we're overly short, let UDP handle it */
				2041	encap_rcv = READ_ONCE(up->encap_rcv);
				2042	if (encap_rcv) {
				2043	int ret;
				2044
				2045	/* Verify checksum before giving to encap */
				2046	if (udp_lib_checksum_complete(skb))
				2047	goto csum_error;
				2048
				2049	ret = encap_rcv(sk, skb);
				2050	if (ret <= 0) {
				2051	__UDP_INC_STATS(sock_net(sk),
				2052	UDP_MIB_INDATAGRAMS,
				2053	is_udplite);
				2054	return -ret;
				2055	}
				2056	}
				2057
				2058	/* FALLTHROUGH -- it's a UDP Packet */
				2059	}
				2060
				2061	/*
				2062	* UDP-Lite specific tests, ignored on UDP sockets
				2063	*/
				2064	if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
				2065
				2066	/*
				2067	* MIB statistics other than incrementing the error count are
				2068	* disabled for the following two types of errors: these depend
				2069	* on the application settings, not on the functioning of the
				2070	* protocol stack as such.
				2071	*
				2072	* RFC 3828 here recommends (sec 3.3): "There should also be a
				2073	* way ... to ... at least let the receiving application block
				2074	* delivery of packets with coverage values less than a value
				2075	* provided by the application."
				2076	*/
				2077	if (up->pcrlen == 0) { /* full coverage was set */
				2078	net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
				2079	UDP_SKB_CB(skb)->cscov, skb->len);
				2080	goto drop;
				2081	}
				2082	/* The next case involves violating the min. coverage requested
				2083	* by the receiver. This is subtle: if receiver wants x and x is
				2084	* greater than the buffersize/MTU then receiver will complain
				2085	* that it wants x while sender emits packets of smaller size y.
				2086	* Therefore the above ...()->partial_cov statement is essential.
				2087	*/
				2088	if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
				2089	net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
				2090	UDP_SKB_CB(skb)->cscov, up->pcrlen);
				2091	goto drop;
				2092	}
				2093	}
				2094
				2095	prefetch(&sk->sk_rmem_alloc);
				2096	if (rcu_access_pointer(sk->sk_filter) &&
				2097	udp_lib_checksum_complete(skb))
				2098	goto csum_error;
				2099
				2100	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
				2101	goto drop;
				2102
				2103	udp_csum_pull_header(skb);
				2104
				2105	ipv4_pktinfo_prepare(sk, skb);
				2106	return __udp_queue_rcv_skb(sk, skb);
				2107
				2108	csum_error:
				2109	__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
				2110	drop:
				2111	__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
				2112	atomic_inc(&sk->sk_drops);
				2113	kfree_skb(skb);
				2114	return -1;
				2115	}
				2116
				2117	static int udp_queue_rcv_skb(struct sock sk, struct sk_buff skb)
				2118	{
				2119	struct sk_buff next, segs;
				2120	int ret;
				2121
				2122	if (likely(!udp_unexpected_gso(sk, skb)))
				2123	return udp_queue_rcv_one_skb(sk, skb);
				2124
				2125	BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET);
				2126	__skb_push(skb, -skb_mac_offset(skb));
				2127	segs = udp_rcv_segment(sk, skb, true);
				2128	for (skb = segs; skb; skb = next) {
				2129	next = skb->next;
				2130	__skb_pull(skb, skb_transport_offset(skb));
				2131	ret = udp_queue_rcv_one_skb(sk, skb);
				2132	if (ret > 0)
				2133	ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
				2134	}
				2135	return 0;
				2136	}
				2137
				2138	/* For TCP sockets, sk_rx_dst is protected by socket lock
				2139	* For UDP, we use xchg() to guard against concurrent changes.
				2140	*/
				2141	bool udp_sk_rx_dst_set(struct sock sk, struct dst_entry dst)
				2142	{
				2143	struct dst_entry *old;
				2144
				2145	if (dst_hold_safe(dst)) {
				2146	old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
				2147	dst_release(old);
				2148	return old != dst;
				2149	}
				2150	return false;
				2151	}
				2152	EXPORT_SYMBOL(udp_sk_rx_dst_set);
				2153
				2154	/*
				2155	* Multicasts and broadcasts go to each listener.
				2156	*
				2157	* Note: called only from the BH handler context.
				2158	*/
				2159	static int __udp4_lib_mcast_deliver(struct net net, struct sk_buff skb,
				2160	struct udphdr *uh,
				2161	__be32 saddr, __be32 daddr,
				2162	struct udp_table *udptable,
				2163	int proto)
				2164	{
				2165	struct sock sk, first = NULL;
				2166	unsigned short hnum = ntohs(uh->dest);
				2167	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
				2168	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
				2169	unsigned int offset = offsetof(typeof(*sk), sk_node);
				2170	int dif = skb->dev->ifindex;
				2171	int sdif = inet_sdif(skb);
				2172	struct hlist_node *node;
				2173	struct sk_buff *nskb;
				2174
				2175	if (use_hash2) {
				2176	hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
				2177	udptable->mask;
				2178	hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
				2179	start_lookup:
				2180	hslot = &udptable->hash2[hash2];
				2181	offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
				2182	}
				2183
				2184	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
				2185	if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
				2186	uh->source, saddr, dif, sdif, hnum))
				2187	continue;
				2188
				2189	if (!first) {
				2190	first = sk;
				2191	continue;
				2192	}
				2193	nskb = skb_clone(skb, GFP_ATOMIC);
				2194
				2195	if (unlikely(!nskb)) {
				2196	atomic_inc(&sk->sk_drops);
				2197	__UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
				2198	IS_UDPLITE(sk));
				2199	__UDP_INC_STATS(net, UDP_MIB_INERRORS,
				2200	IS_UDPLITE(sk));
				2201	continue;
				2202	}
				2203	if (udp_queue_rcv_skb(sk, nskb) > 0)
				2204	consume_skb(nskb);
				2205	}
				2206
				2207	/* Also lookup :port if we are using hash2 and haven't done so yet. /
				2208	if (use_hash2 && hash2 != hash2_any) {
				2209	hash2 = hash2_any;
				2210	goto start_lookup;
				2211	}
				2212
				2213	if (first) {
				2214	if (udp_queue_rcv_skb(first, skb) > 0)
				2215	consume_skb(skb);
				2216	} else {
				2217	kfree_skb(skb);
				2218	__UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
				2219	proto == IPPROTO_UDPLITE);
				2220	}
				2221	return 0;
				2222	}
				2223
				2224	/* Initialize UDP checksum. If exited with zero value (success),
				2225	* CHECKSUM_UNNECESSARY means, that no more checks are required.
				2226	* Otherwise, csum completion requires checksumming packet body,
				2227	* including udp header and folding it to skb->csum.
				2228	*/
				2229	static inline int udp4_csum_init(struct sk_buff skb, struct udphdr uh,
				2230	int proto)
				2231	{
				2232	int err;
				2233
				2234	UDP_SKB_CB(skb)->partial_cov = 0;
				2235	UDP_SKB_CB(skb)->cscov = skb->len;
				2236
				2237	if (proto == IPPROTO_UDPLITE) {
				2238	err = udplite_checksum_init(skb, uh);
				2239	if (err)
				2240	return err;
				2241
				2242	if (UDP_SKB_CB(skb)->partial_cov) {
				2243	skb->csum = inet_compute_pseudo(skb, proto);
				2244	return 0;
				2245	}
				2246	}
				2247
				2248	/* Note, we are only interested in != 0 or == 0, thus the
				2249	* force to int.
				2250	*/
				2251	err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
				2252	inet_compute_pseudo);
				2253	if (err)
				2254	return err;
				2255
				2256	if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
				2257	/* If SW calculated the value, we know it's bad */
				2258	if (skb->csum_complete_sw)
				2259	return 1;
				2260
				2261	/* HW says the value is bad. Let's validate that.
				2262	* skb->csum is no longer the full packet checksum,
				2263	* so don't treat it as such.
				2264	*/
				2265	skb_checksum_complete_unset(skb);
				2266	}
				2267
				2268	return 0;
				2269	}
				2270
				2271	/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
				2272	* return code conversion for ip layer consumption
				2273	*/
				2274	static int udp_unicast_rcv_skb(struct sock sk, struct sk_buff skb,
				2275	struct udphdr *uh)
				2276	{
				2277	int ret;
				2278
				2279	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
				2280	skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
				2281
				2282	ret = udp_queue_rcv_skb(sk, skb);
				2283
				2284	/* a return value > 0 means to resubmit the input, but
				2285	* it wants the return to be -protocol, or 0
				2286	*/
				2287	if (ret > 0)
				2288	return -ret;
				2289	return 0;
				2290	}
				2291
				2292	/*
				2293	* All we need to do is get the socket, and then do a checksum.
				2294	*/
				2295
				2296	int __udp4_lib_rcv(struct sk_buff skb, struct udp_table udptable,
				2297	int proto)
				2298	{
				2299	struct sock *sk;
				2300	struct udphdr *uh;
				2301	unsigned short ulen;
				2302	struct rtable *rt = skb_rtable(skb);
				2303	__be32 saddr, daddr;
				2304	struct net *net = dev_net(skb->dev);
				2305
				2306	/*
				2307	* Validate the packet.
				2308	*/
				2309	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
				2310	goto drop; /* No space for header. */
				2311
				2312	uh = udp_hdr(skb);
				2313	ulen = ntohs(uh->len);
				2314	saddr = ip_hdr(skb)->saddr;
				2315	daddr = ip_hdr(skb)->daddr;
				2316
				2317	if (ulen > skb->len)
				2318	goto short_packet;
				2319
				2320	if (proto == IPPROTO_UDP) {
				2321	/* UDP validates ulen. */
				2322	if (ulen < sizeof(*uh) \|\| pskb_trim_rcsum(skb, ulen))
				2323	goto short_packet;
				2324	uh = udp_hdr(skb);
				2325	}
				2326
				2327	if (udp4_csum_init(skb, uh, proto))
				2328	goto csum_error;
				2329
				2330	sk = skb_steal_sock(skb);
				2331	if (sk) {
				2332	struct dst_entry *dst = skb_dst(skb);
				2333	int ret;
				2334
				2335	if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
				2336	udp_sk_rx_dst_set(sk, dst);
				2337
				2338	ret = udp_unicast_rcv_skb(sk, skb, uh);
				2339	sock_put(sk);
				2340	return ret;
				2341	}
				2342
				2343	if (rt->rt_flags & (RTCF_BROADCAST\|RTCF_MULTICAST))
				2344	return __udp4_lib_mcast_deliver(net, skb, uh,
				2345	saddr, daddr, udptable, proto);
				2346
				2347	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
				2348	if (sk)
				2349	return udp_unicast_rcv_skb(sk, skb, uh);
				2350
				2351	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
				2352	goto drop;
				2353	nf_reset_ct(skb);
				2354
				2355	/* No socket. Drop packet silently, if checksum is wrong */
				2356	if (udp_lib_checksum_complete(skb))
				2357	goto csum_error;
				2358
				2359	__UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
				2360	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
				2361
				2362	/*
				2363	* Hmm. We got an UDP packet to a port to which we
				2364	* don't wanna listen. Ignore it.
				2365	*/
				2366	kfree_skb(skb);
				2367	return 0;
				2368
				2369	short_packet:
				2370	net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
				2371	proto == IPPROTO_UDPLITE ? "Lite" : "",
				2372	&saddr, ntohs(uh->source),
				2373	ulen, skb->len,
				2374	&daddr, ntohs(uh->dest));
				2375	goto drop;
				2376
				2377	csum_error:
				2378	/*
				2379	* RFC1122: OK. Discards the bad packet silently (as far as
				2380	* the network is concerned, anyway) as per 4.1.3.4 (MUST).
				2381	*/
				2382	net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
				2383	proto == IPPROTO_UDPLITE ? "Lite" : "",
				2384	&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
				2385	ulen);
				2386	__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
				2387	drop:
				2388	__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
				2389	kfree_skb(skb);
				2390	return 0;
				2391	}
				2392
				2393	/* We can only early demux multicast if there is a single matching socket.
				2394	* If more than one socket found returns NULL
				2395	*/
				2396	static struct sock __udp4_lib_mcast_demux_lookup(struct net net,
				2397	__be16 loc_port, __be32 loc_addr,
				2398	__be16 rmt_port, __be32 rmt_addr,
				2399	int dif, int sdif)
				2400	{
				2401	struct sock sk, result;
				2402	unsigned short hnum = ntohs(loc_port);
				2403	unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
				2404	struct udp_hslot *hslot = &udp_table.hash[slot];
				2405
				2406	/* Do not bother scanning a too big list */
				2407	if (hslot->count > 10)
				2408	return NULL;
				2409
				2410	result = NULL;
				2411	sk_for_each_rcu(sk, &hslot->head) {
				2412	if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
				2413	rmt_port, rmt_addr, dif, sdif, hnum)) {
				2414	if (result)
				2415	return NULL;
				2416	result = sk;
				2417	}
				2418	}
				2419
				2420	return result;
				2421	}
				2422
				2423	/* For unicast we should only early demux connected sockets or we can
				2424	* break forwarding setups. The chains here can be long so only check
				2425	* if the first socket is an exact match and if not move on.
				2426	*/
				2427	static struct sock __udp4_lib_demux_lookup(struct net net,
				2428	__be16 loc_port, __be32 loc_addr,
				2429	__be16 rmt_port, __be32 rmt_addr,
				2430	int dif, int sdif)
				2431	{
				2432	unsigned short hnum = ntohs(loc_port);
				2433	unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
				2434	unsigned int slot2 = hash2 & udp_table.mask;
				2435	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
				2436	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
				2437	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
				2438	struct sock *sk;
				2439
				2440	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
				2441	if (INET_MATCH(sk, net, acookie, rmt_addr,
				2442	loc_addr, ports, dif, sdif))
				2443	return sk;
				2444	/* Only check first socket in chain */
				2445	break;
				2446	}
				2447	return NULL;
				2448	}
				2449
				2450	int udp_v4_early_demux(struct sk_buff *skb)
				2451	{
				2452	struct net *net = dev_net(skb->dev);
				2453	struct in_device *in_dev = NULL;
				2454	const struct iphdr *iph;
				2455	const struct udphdr *uh;
				2456	struct sock *sk = NULL;
				2457	struct dst_entry *dst;
				2458	int dif = skb->dev->ifindex;
				2459	int sdif = inet_sdif(skb);
				2460	int ours;
				2461
				2462	/* validate the packet */
				2463	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
				2464	return 0;
				2465
				2466	iph = ip_hdr(skb);
				2467	uh = udp_hdr(skb);
				2468
				2469	if (skb->pkt_type == PACKET_MULTICAST) {
				2470	in_dev = __in_dev_get_rcu(skb->dev);
				2471
				2472	if (!in_dev)
				2473	return 0;
				2474
				2475	ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
				2476	iph->protocol);
				2477	if (!ours)
				2478	return 0;
				2479
				2480	sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
				2481	uh->source, iph->saddr,
				2482	dif, sdif);
				2483	} else if (skb->pkt_type == PACKET_HOST) {
				2484	sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
				2485	uh->source, iph->saddr, dif, sdif);
				2486	}
				2487
				2488	if (!sk \|\| !refcount_inc_not_zero(&sk->sk_refcnt))
				2489	return 0;
				2490
				2491	skb->sk = sk;
				2492	skb->destructor = sock_efree;
				2493	dst = rcu_dereference(sk->sk_rx_dst);
				2494
				2495	if (dst)
				2496	dst = dst_check(dst, 0);
				2497	if (dst) {
				2498	u32 itag = 0;
				2499
				2500	/* set noref for now.
				2501	* any place which wants to hold dst has to call
				2502	* dst_hold_safe()
				2503	*/
				2504	skb_dst_set_noref(skb, dst);
				2505
				2506	/* for unconnected multicast sockets we need to validate
				2507	* the source on each packet
				2508	*/
				2509	if (!inet_sk(sk)->inet_daddr && in_dev)
				2510	return ip_mc_validate_source(skb, iph->daddr,
				2511	iph->saddr,
				2512	iph->tos & IPTOS_RT_MASK,
				2513	skb->dev, in_dev, &itag);
				2514	}
				2515	return 0;
				2516	}
				2517
				2518	int udp_rcv(struct sk_buff *skb)
				2519	{
				2520	return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
				2521	}
				2522
				2523	void udp_destroy_sock(struct sock *sk)
				2524	{
				2525	struct udp_sock *up = udp_sk(sk);
				2526	bool slow = lock_sock_fast(sk);
				2527
				2528	/* protects from races with udp_abort() */
				2529	sock_set_flag(sk, SOCK_DEAD);
				2530	udp_flush_pending_frames(sk);
				2531	unlock_sock_fast(sk, slow);
				2532	if (static_branch_unlikely(&udp_encap_needed_key)) {
				2533	if (up->encap_type) {
				2534	void (encap_destroy)(struct sock sk);
				2535	encap_destroy = READ_ONCE(up->encap_destroy);
				2536	if (encap_destroy)
				2537	encap_destroy(sk);
				2538	}
				2539	if (up->encap_enabled)
				2540	static_branch_dec(&udp_encap_needed_key);
				2541	}
				2542	}
				2543
				2544	/*
				2545	* Socket option code for UDP
				2546	*/
				2547	int udp_lib_setsockopt(struct sock *sk, int level, int optname,
				2548	char __user *optval, unsigned int optlen,
				2549	int (push_pending_frames)(struct sock ))
				2550	{
				2551	struct udp_sock *up = udp_sk(sk);
				2552	int val, valbool;
				2553	int err = 0;
				2554	int is_udplite = IS_UDPLITE(sk);
				2555
				2556	if (optlen < sizeof(int))
				2557	return -EINVAL;
				2558
				2559	if (get_user(val, (int __user *)optval))
				2560	return -EFAULT;
				2561
				2562	valbool = val ? 1 : 0;
				2563
				2564	switch (optname) {
				2565	case UDP_CORK:
				2566	if (val != 0) {
				2567	WRITE_ONCE(up->corkflag, 1);
				2568	} else {
				2569	WRITE_ONCE(up->corkflag, 0);
				2570	lock_sock(sk);
				2571	push_pending_frames(sk);
				2572	release_sock(sk);
				2573	}
				2574	break;
				2575
				2576	case UDP_ENCAP:
				2577	switch (val) {
				2578	case 0:
				2579	case UDP_ENCAP_ESPINUDP:
				2580	case UDP_ENCAP_ESPINUDP_NON_IKE:
				2581	up->encap_rcv = xfrm4_udp_encap_rcv;
				2582	/* FALLTHROUGH */
				2583	case UDP_ENCAP_L2TPINUDP:
				2584	up->encap_type = val;
				2585	lock_sock(sk);
				2586	udp_tunnel_encap_enable(sk->sk_socket);
				2587	release_sock(sk);
				2588	break;
				2589	default:
				2590	err = -ENOPROTOOPT;
				2591	break;
				2592	}
				2593	break;
				2594
				2595	case UDP_NO_CHECK6_TX:
				2596	up->no_check6_tx = valbool;
				2597	break;
				2598
				2599	case UDP_NO_CHECK6_RX:
				2600	up->no_check6_rx = valbool;
				2601	break;
				2602
				2603	case UDP_SEGMENT:
				2604	if (val < 0 \|\| val > USHRT_MAX)
				2605	return -EINVAL;
				2606	WRITE_ONCE(up->gso_size, val);
				2607	break;
				2608
				2609	case UDP_GRO:
				2610	lock_sock(sk);
				2611
				2612	/* when enabling GRO, accept the related GSO packet type */
				2613	if (valbool)
				2614	udp_tunnel_encap_enable(sk->sk_socket);
				2615	up->gro_enabled = valbool;
				2616	up->accept_udp_l4 = valbool;
				2617	release_sock(sk);
				2618	break;
				2619
				2620	/*
				2621	* UDP-Lite's partial checksum coverage (RFC 3828).
				2622	*/
				2623	/* The sender sets actual checksum coverage length via this option.
				2624	* The case coverage > packet length is handled by send module. */
				2625	case UDPLITE_SEND_CSCOV:
				2626	if (!is_udplite) /* Disable the option on UDP sockets */
				2627	return -ENOPROTOOPT;
				2628	if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
				2629	val = 8;
				2630	else if (val > USHRT_MAX)
				2631	val = USHRT_MAX;
				2632	up->pcslen = val;
				2633	up->pcflag \|= UDPLITE_SEND_CC;
				2634	break;
				2635
				2636	/* The receiver specifies a minimum checksum coverage value. To make
				2637	* sense, this should be set to at least 8 (as done below). If zero is
				2638	* used, this again means full checksum coverage. */
				2639	case UDPLITE_RECV_CSCOV:
				2640	if (!is_udplite) /* Disable the option on UDP sockets */
				2641	return -ENOPROTOOPT;
				2642	if (val != 0 && val < 8) /* Avoid silly minimal values. */
				2643	val = 8;
				2644	else if (val > USHRT_MAX)
				2645	val = USHRT_MAX;
				2646	up->pcrlen = val;
				2647	up->pcflag \|= UDPLITE_RECV_CC;
				2648	break;
				2649
				2650	default:
				2651	err = -ENOPROTOOPT;
				2652	break;
				2653	}
				2654
				2655	return err;
				2656	}
				2657	EXPORT_SYMBOL(udp_lib_setsockopt);
				2658
				2659	int udp_setsockopt(struct sock *sk, int level, int optname,
				2660	char __user *optval, unsigned int optlen)
				2661	{
				2662	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2663	return udp_lib_setsockopt(sk, level, optname, optval, optlen,
				2664	udp_push_pending_frames);
				2665	return ip_setsockopt(sk, level, optname, optval, optlen);
				2666	}
				2667
				2668	#ifdef CONFIG_COMPAT
				2669	int compat_udp_setsockopt(struct sock *sk, int level, int optname,
				2670	char __user *optval, unsigned int optlen)
				2671	{
				2672	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2673	return udp_lib_setsockopt(sk, level, optname, optval, optlen,
				2674	udp_push_pending_frames);
				2675	return compat_ip_setsockopt(sk, level, optname, optval, optlen);
				2676	}
				2677	#endif
				2678
				2679	int udp_lib_getsockopt(struct sock *sk, int level, int optname,
				2680	char __user optval, int __user optlen)
				2681	{
				2682	struct udp_sock *up = udp_sk(sk);
				2683	int val, len;
				2684
				2685	if (get_user(len, optlen))
				2686	return -EFAULT;
				2687
				2688	if (len < 0)
				2689	return -EINVAL;
				2690
				2691	len = min_t(unsigned int, len, sizeof(int));
				2692
				2693	switch (optname) {
				2694	case UDP_CORK:
				2695	val = READ_ONCE(up->corkflag);
				2696	break;
				2697
				2698	case UDP_ENCAP:
				2699	val = up->encap_type;
				2700	break;
				2701
				2702	case UDP_NO_CHECK6_TX:
				2703	val = up->no_check6_tx;
				2704	break;
				2705
				2706	case UDP_NO_CHECK6_RX:
				2707	val = up->no_check6_rx;
				2708	break;
				2709
				2710	case UDP_SEGMENT:
				2711	val = READ_ONCE(up->gso_size);
				2712	break;
				2713
				2714	case UDP_GRO:
				2715	val = up->gro_enabled;
				2716	break;
				2717
				2718	/* The following two cannot be changed on UDP sockets, the return is
				2719	* always 0 (which corresponds to the full checksum coverage of UDP). */
				2720	case UDPLITE_SEND_CSCOV:
				2721	val = up->pcslen;
				2722	break;
				2723
				2724	case UDPLITE_RECV_CSCOV:
				2725	val = up->pcrlen;
				2726	break;
				2727
				2728	default:
				2729	return -ENOPROTOOPT;
				2730	}
				2731
				2732	if (put_user(len, optlen))
				2733	return -EFAULT;
				2734	if (copy_to_user(optval, &val, len))
				2735	return -EFAULT;
				2736	return 0;
				2737	}
				2738	EXPORT_SYMBOL(udp_lib_getsockopt);
				2739
				2740	int udp_getsockopt(struct sock *sk, int level, int optname,
				2741	char __user optval, int __user optlen)
				2742	{
				2743	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2744	return udp_lib_getsockopt(sk, level, optname, optval, optlen);
				2745	return ip_getsockopt(sk, level, optname, optval, optlen);
				2746	}
				2747
				2748	#ifdef CONFIG_COMPAT
				2749	int compat_udp_getsockopt(struct sock *sk, int level, int optname,
				2750	char __user optval, int __user optlen)
				2751	{
				2752	if (level == SOL_UDP \|\| level == SOL_UDPLITE)
				2753	return udp_lib_getsockopt(sk, level, optname, optval, optlen);
				2754	return compat_ip_getsockopt(sk, level, optname, optval, optlen);
				2755	}
				2756	#endif
				2757	/**
				2758	* udp_poll - wait for a UDP event.
				2759	* @file - file struct
				2760	* @sock - socket
				2761	* @wait - poll table
				2762	*
				2763	* This is same as datagram poll, except for the special case of
				2764	* blocking sockets. If application is using a blocking fd
				2765	* and a packet with checksum error is in the queue;
				2766	* then it could get return from select indicating data available
				2767	* but then block when reading it. Add special case code
				2768	* to work around these arguably broken applications.
				2769	*/
				2770	__poll_t udp_poll(struct file file, struct socket sock, poll_table *wait)
				2771	{
				2772	__poll_t mask = datagram_poll(file, sock, wait);
				2773	struct sock *sk = sock->sk;
				2774
				2775	if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
				2776	mask \|= EPOLLIN \| EPOLLRDNORM;
				2777
				2778	/* Check for false positives due to checksum errors */
				2779	if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
				2780	!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
				2781	mask &= ~(EPOLLIN \| EPOLLRDNORM);
				2782
				2783	return mask;
				2784
				2785	}
				2786	EXPORT_SYMBOL(udp_poll);
				2787
				2788	int udp_abort(struct sock *sk, int err)
				2789	{
				2790	lock_sock(sk);
				2791
				2792	/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
				2793	* with close()
				2794	*/
				2795	if (sock_flag(sk, SOCK_DEAD))
				2796	goto out;
				2797
				2798	sk->sk_err = err;
				2799	sk->sk_error_report(sk);
				2800	__udp_disconnect(sk, 0);
				2801
				2802	out:
				2803	release_sock(sk);
				2804
				2805	return 0;
				2806	}
				2807	EXPORT_SYMBOL_GPL(udp_abort);
				2808
				2809	struct proto udp_prot = {
				2810	.name = "UDP",
				2811	.owner = THIS_MODULE,
				2812	.close = udp_lib_close,
				2813	.pre_connect = udp_pre_connect,
				2814	.connect = ip4_datagram_connect,
				2815	.disconnect = udp_disconnect,
				2816	.ioctl = udp_ioctl,
				2817	.init = udp_init_sock,
				2818	.destroy = udp_destroy_sock,
				2819	.setsockopt = udp_setsockopt,
				2820	.getsockopt = udp_getsockopt,
				2821	.sendmsg = udp_sendmsg,
				2822	.recvmsg = udp_recvmsg,
				2823	.sendpage = udp_sendpage,
				2824	.release_cb = ip4_datagram_release_cb,
				2825	.hash = udp_lib_hash,
				2826	.unhash = udp_lib_unhash,
				2827	.rehash = udp_v4_rehash,
				2828	.get_port = udp_v4_get_port,
				2829	.memory_allocated = &udp_memory_allocated,
				2830	.sysctl_mem = sysctl_udp_mem,
				2831	.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
				2832	.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
				2833	.obj_size = sizeof(struct udp_sock),
				2834	.h.udp_table = &udp_table,
				2835	#ifdef CONFIG_COMPAT
				2836	.compat_setsockopt = compat_udp_setsockopt,
				2837	.compat_getsockopt = compat_udp_getsockopt,
				2838	#endif
				2839	.diag_destroy = udp_abort,
				2840	};
				2841	EXPORT_SYMBOL(udp_prot);
				2842
				2843	/* ------------------------------------------------------------------------ */
				2844	#ifdef CONFIG_PROC_FS
				2845
				2846	static struct sock udp_get_first(struct seq_file seq, int start)
				2847	{
				2848	struct sock *sk;
				2849	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2850	struct udp_iter_state *state = seq->private;
				2851	struct net *net = seq_file_net(seq);
				2852
				2853	for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
				2854	++state->bucket) {
				2855	struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
				2856
				2857	if (hlist_empty(&hslot->head))
				2858	continue;
				2859
				2860	spin_lock_bh(&hslot->lock);
				2861	sk_for_each(sk, &hslot->head) {
				2862	if (!net_eq(sock_net(sk), net))
				2863	continue;
				2864	if (sk->sk_family == afinfo->family)
				2865	goto found;
				2866	}
				2867	spin_unlock_bh(&hslot->lock);
				2868	}
				2869	sk = NULL;
				2870	found:
				2871	return sk;
				2872	}
				2873
				2874	static struct sock udp_get_next(struct seq_file seq, struct sock *sk)
				2875	{
				2876	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2877	struct udp_iter_state *state = seq->private;
				2878	struct net *net = seq_file_net(seq);
				2879
				2880	do {
				2881	sk = sk_next(sk);
				2882	} while (sk && (!net_eq(sock_net(sk), net) \|\| sk->sk_family != afinfo->family));
				2883
				2884	if (!sk) {
				2885	if (state->bucket <= afinfo->udp_table->mask)
				2886	spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
				2887	return udp_get_first(seq, state->bucket + 1);
				2888	}
				2889	return sk;
				2890	}
				2891
				2892	static struct sock udp_get_idx(struct seq_file seq, loff_t pos)
				2893	{
				2894	struct sock *sk = udp_get_first(seq, 0);
				2895
				2896	if (sk)
				2897	while (pos && (sk = udp_get_next(seq, sk)) != NULL)
				2898	--pos;
				2899	return pos ? NULL : sk;
				2900	}
				2901
				2902	void udp_seq_start(struct seq_file seq, loff_t *pos)
				2903	{
				2904	struct udp_iter_state *state = seq->private;
				2905	state->bucket = MAX_UDP_PORTS;
				2906
				2907	return pos ? udp_get_idx(seq, pos-1) : SEQ_START_TOKEN;
				2908	}
				2909	EXPORT_SYMBOL(udp_seq_start);
				2910
				2911	void udp_seq_next(struct seq_file seq, void v, loff_t pos)
				2912	{
				2913	struct sock *sk;
				2914
				2915	if (v == SEQ_START_TOKEN)
				2916	sk = udp_get_idx(seq, 0);
				2917	else
				2918	sk = udp_get_next(seq, v);
				2919
				2920	++*pos;
				2921	return sk;
				2922	}
				2923	EXPORT_SYMBOL(udp_seq_next);
				2924
				2925	void udp_seq_stop(struct seq_file seq, void v)
				2926	{
				2927	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
				2928	struct udp_iter_state *state = seq->private;
				2929
				2930	if (state->bucket <= afinfo->udp_table->mask)
				2931	spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
				2932	}
				2933	EXPORT_SYMBOL(udp_seq_stop);
				2934
				2935	/* ------------------------------------------------------------------------ */
				2936	static void udp4_format_sock(struct sock sp, struct seq_file f,
				2937	int bucket)
				2938	{
				2939	struct inet_sock *inet = inet_sk(sp);
				2940	__be32 dest = inet->inet_daddr;
				2941	__be32 src = inet->inet_rcv_saddr;
				2942	__u16 destp = ntohs(inet->inet_dport);
				2943	__u16 srcp = ntohs(inet->inet_sport);
				2944
				2945	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
				2946	" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
				2947	bucket, src, srcp, dest, destp, sp->sk_state,
				2948	sk_wmem_alloc_get(sp),
				2949	udp_rqueue_get(sp),
				2950	0, 0L, 0,
				2951	from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
				2952	0, sock_i_ino(sp),
				2953	refcount_read(&sp->sk_refcnt), sp,
				2954	atomic_read(&sp->sk_drops));
				2955	}
				2956
				2957	int udp4_seq_show(struct seq_file seq, void v)
				2958	{
				2959	seq_setwidth(seq, 127);
				2960	if (v == SEQ_START_TOKEN)
				2961	seq_puts(seq, " sl local_address rem_address st tx_queue "
				2962	"rx_queue tr tm->when retrnsmt uid timeout "
				2963	"inode ref pointer drops");
				2964	else {
				2965	struct udp_iter_state *state = seq->private;
				2966
				2967	udp4_format_sock(v, seq, state->bucket);
				2968	}
				2969	seq_pad(seq, '\n');
				2970	return 0;
				2971	}
				2972
				2973	const struct seq_operations udp_seq_ops = {
				2974	.start = udp_seq_start,
				2975	.next = udp_seq_next,
				2976	.stop = udp_seq_stop,
				2977	.show = udp4_seq_show,
				2978	};
				2979	EXPORT_SYMBOL(udp_seq_ops);
				2980
				2981	static struct udp_seq_afinfo udp4_seq_afinfo = {
				2982	.family = AF_INET,
				2983	.udp_table = &udp_table,
				2984	};
				2985
				2986	static int __net_init udp4_proc_init_net(struct net *net)
				2987	{
				2988	if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
				2989	sizeof(struct udp_iter_state), &udp4_seq_afinfo))
				2990	return -ENOMEM;
				2991	return 0;
				2992	}
				2993
				2994	static void __net_exit udp4_proc_exit_net(struct net *net)
				2995	{
				2996	remove_proc_entry("udp", net->proc_net);
				2997	}
				2998
				2999	static struct pernet_operations udp4_net_ops = {
				3000	.init = udp4_proc_init_net,
				3001	.exit = udp4_proc_exit_net,
				3002	};
				3003
				3004	int __init udp4_proc_init(void)
				3005	{
				3006	return register_pernet_subsys(&udp4_net_ops);
				3007	}
				3008
				3009	void udp4_proc_exit(void)
				3010	{
				3011	unregister_pernet_subsys(&udp4_net_ops);
				3012	}
				3013	#endif /* CONFIG_PROC_FS */
				3014
				3015	static __initdata unsigned long uhash_entries;
				3016	static int __init set_uhash_entries(char *str)
				3017	{
				3018	ssize_t ret;
				3019
				3020	if (!str)
				3021	return 0;
				3022
				3023	ret = kstrtoul(str, 0, &uhash_entries);
				3024	if (ret)
				3025	return 0;
				3026
				3027	if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
				3028	uhash_entries = UDP_HTABLE_SIZE_MIN;
				3029	return 1;
				3030	}
				3031	__setup("uhash_entries=", set_uhash_entries);
				3032
				3033	void __init udp_table_init(struct udp_table table, const char name)
				3034	{
				3035	unsigned int i;
				3036
				3037	table->hash = alloc_large_system_hash(name,
				3038	2 * sizeof(struct udp_hslot),
				3039	uhash_entries,
				3040	21, /* one slot per 2 MB */
				3041	0,
				3042	&table->log,
				3043	&table->mask,
				3044	UDP_HTABLE_SIZE_MIN,
				3045	64 * 1024);
				3046
				3047	table->hash2 = table->hash + (table->mask + 1);
				3048	for (i = 0; i <= table->mask; i++) {
				3049	INIT_HLIST_HEAD(&table->hash[i].head);
				3050	table->hash[i].count = 0;
				3051	spin_lock_init(&table->hash[i].lock);
				3052	}
				3053	for (i = 0; i <= table->mask; i++) {
				3054	INIT_HLIST_HEAD(&table->hash2[i].head);
				3055	table->hash2[i].count = 0;
				3056	spin_lock_init(&table->hash2[i].lock);
				3057	}
				3058	}
				3059
				3060	u32 udp_flow_hashrnd(void)
				3061	{
				3062	static u32 hashrnd __read_mostly;
				3063
				3064	net_get_random_once(&hashrnd, sizeof(hashrnd));
				3065
				3066	return hashrnd;
				3067	}
				3068	EXPORT_SYMBOL(udp_flow_hashrnd);
				3069
				3070	static void __udp_sysctl_init(struct net *net)
				3071	{
				3072	net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
				3073	net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
				3074
				3075	#ifdef CONFIG_NET_L3_MASTER_DEV
				3076	net->ipv4.sysctl_udp_l3mdev_accept = 0;
				3077	#endif
				3078	}
				3079
				3080	static int __net_init udp_sysctl_init(struct net *net)
				3081	{
				3082	__udp_sysctl_init(net);
				3083	return 0;
				3084	}
				3085
				3086	static struct pernet_operations __net_initdata udp_sysctl_ops = {
				3087	.init = udp_sysctl_init,
				3088	};
				3089
				3090	void __init udp_init(void)
				3091	{
				3092	unsigned long limit;
				3093	unsigned int i;
				3094
				3095	udp_table_init(&udp_table, "UDP");
				3096	limit = nr_free_buffer_pages() / 8;
				3097	limit = max(limit, 128UL);
				3098	sysctl_udp_mem[0] = limit / 4 * 3;
				3099	sysctl_udp_mem[1] = limit;
				3100	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
				3101
				3102	__udp_sysctl_init(&init_net);
				3103
				3104	/* 16 spinlocks per cpu */
				3105	udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
				3106	udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
				3107	GFP_KERNEL);
				3108	if (!udp_busylocks)
				3109	panic("UDP: failed to alloc udp_busylocks\n");
				3110	for (i = 0; i < (1U << udp_busylocks_log); i++)
				3111	spin_lock_init(udp_busylocks + i);
				3112
				3113	if (register_pernet_subsys(&udp_sysctl_ops))
				3114	panic("UDP: failed to init sysctl parameters.\n");
				3115	}