Blame - marvell/linux/include/net/tcp.h - T108

blob: 199853b007abfd81e3ac1f1b95fb64181a0d55a3 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0-or-later */
				2	/*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* Definitions for the TCP module.
				8	*
				9	* Version: @(#)tcp.h 1.0.5 05/23/93
				10	*
				11	* Authors: Ross Biro
				12	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				13	*/
				14	#ifndef _TCP_H
				15	#define _TCP_H
				16
				17	#define FASTRETRANS_DEBUG 1
				18
				19	#include <linux/list.h>
				20	#include <linux/tcp.h>
				21	#include <linux/bug.h>
				22	#include <linux/slab.h>
				23	#include <linux/cache.h>
				24	#include <linux/percpu.h>
				25	#include <linux/skbuff.h>
				26	#include <linux/cryptohash.h>
				27	#include <linux/kref.h>
				28	#include <linux/ktime.h>
				29
				30	#include <net/inet_connection_sock.h>
				31	#include <net/inet_timewait_sock.h>
				32	#include <net/inet_hashtables.h>
				33	#include <net/checksum.h>
				34	#include <net/request_sock.h>
				35	#include <net/sock_reuseport.h>
				36	#include <net/sock.h>
				37	#include <net/snmp.h>
				38	#include <net/ip.h>
				39	#include <net/tcp_states.h>
				40	#include <net/inet_ecn.h>
				41	#include <net/dst.h>
				42
				43	#include <linux/seq_file.h>
				44	#include <linux/memcontrol.h>
				45	#include <linux/bpf-cgroup.h>
				46	#include <linux/siphash.h>
				47
				48	extern struct inet_hashinfo tcp_hashinfo;
				49
				50	extern struct percpu_counter tcp_orphan_count;
				51	void tcp_time_wait(struct sock *sk, int state, int timeo);
				52
				53	#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
				54	#define MAX_TCP_OPTION_SPACE 40
				55	#define TCP_MIN_SND_MSS 48
				56	#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
				57
				58	/*
				59	* Never offer a window over 32767 without using window scaling. Some
				60	* poor stacks do signed 16bit maths!
				61	*/
				62	#define MAX_TCP_WINDOW 32767U
				63
				64	/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
				65	#define TCP_MIN_MSS 88U
				66
				67	/* The initial MTU to use for probing */
				68	#define TCP_BASE_MSS 1024
				69
				70	/* probing interval, default to 10 minutes as per RFC4821 */
				71	#define TCP_PROBE_INTERVAL 600
				72
				73	/* Specify interval when tcp mtu probing will stop */
				74	#define TCP_PROBE_THRESHOLD 8
				75
				76	/* After receiving this amount of duplicate ACKs fast retransmit starts. */
				77	#define TCP_FASTRETRANS_THRESH 3
				78
				79	/* Maximal number of ACKs sent quickly to accelerate slow-start. */
				80	#define TCP_MAX_QUICKACKS 16U
				81
				82	/* Maximal number of window scale according to RFC1323 */
				83	#define TCP_MAX_WSCALE 14U
				84
				85	/* urg_data states */
				86	#define TCP_URG_VALID 0x0100
				87	#define TCP_URG_NOTYET 0x0200
				88	#define TCP_URG_READ 0x0400
				89
				90	#define TCP_RETR1 3 /*
				91	* This is how many retries it does before it
				92	* tries to figure out if the gateway is
				93	* down. Minimal RFC value is 3; it corresponds
				94	* to ~3sec-8min depending on RTO.
				95	*/
				96
				97	#define TCP_RETR2 15 /*
				98	* This should take at least
				99	* 90 minutes to time out.
				100	* RFC1122 says that the limit is 100 sec.
				101	* 15 is ~13-30min depending on RTO.
				102	*/
				103
				104	#define TCP_SYN_RETRIES 6 /* This is how many retries are done
				105	* when active opening a connection.
				106	* RFC1122 says the minimum retry MUST
				107	* be at least 180secs. Nevertheless
				108	* this value is corresponding to
				109	* 63secs of retransmission with the
				110	* current initial RTO.
				111	*/
				112
				113	#define TCP_SYNACK_RETRIES 5 /* This is how may retries are done
				114	* when passive opening a connection.
				115	* This is corresponding to 31secs of
				116	* retransmission with the current
				117	* initial RTO.
				118	*/
				119
				120	#define TCP_TIMEWAIT_LEN (60HZ) / how long to wait to destroy TIME-WAIT
				121	* state, about 60 seconds */
				122	#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
				123	/* BSD style FIN_WAIT2 deadlock breaker.
				124	* It used to be 3min, new value is 60sec,
				125	* to combine FIN-WAIT-2 timeout with
				126	* TIME-WAIT timer.
				127	*/
				128	#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
				129
				130	#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
				131	#if HZ >= 100
				132	#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
				133	#define TCP_ATO_MIN ((unsigned)(HZ/25))
				134	#else
				135	#define TCP_DELACK_MIN 4U
				136	#define TCP_ATO_MIN 4U
				137	#endif
				138	#define TCP_RTO_MAX ((unsigned)(120*HZ))
				139	#define TCP_RTO_MIN ((unsigned)(HZ/5))
				140	#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
				141
				142	#define TCP_TIMEOUT_MIN_US (2USEC_PER_MSEC) / Min TCP timeout in microsecs */
				143
				144	#define TCP_TIMEOUT_INIT ((unsigned)(1HZ)) / RFC6298 2.1 initial RTO value */
				145	#define TCP_TIMEOUT_FALLBACK ((unsigned)(3HZ)) / RFC 1122 initial RTO value, now
				146	* used as a fallback RTO for the
				147	* initial data transmission if no
				148	* valid RTT sample has been acquired,
				149	* most likely due to retrans in 3WHS.
				150	*/
				151
				152	#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
				153	* for local resources.
				154	*/
				155	#define TCP_KEEPALIVE_TIME (12060HZ) /* two hours */
				156	#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
				157	#define TCP_KEEPALIVE_INTVL (75*HZ)
				158
				159	#define MAX_TCP_KEEPIDLE 32767
				160	#define MAX_TCP_KEEPINTVL 32767
				161	#define MAX_TCP_KEEPCNT 127
				162	#define MAX_TCP_SYNCNT 127
				163
				164	#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
				165
				166	#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
				167	#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
				168	* after this time. It should be equal
				169	* (or greater than) TCP_TIMEWAIT_LEN
				170	* to provide reliability equal to one
				171	* provided by timewait state.
				172	*/
				173	#define TCP_PAWS_WINDOW 1 /* Replay window for per-host
				174	* timestamps. It must be less than
				175	* minimal timewait lifetime.
				176	*/
				177	/*
				178	* TCP option
				179	*/
				180
				181	#define TCPOPT_NOP 1 /* Padding */
				182	#define TCPOPT_EOL 0 /* End of options */
				183	#define TCPOPT_MSS 2 /* Segment size negotiating */
				184	#define TCPOPT_WINDOW 3 /* Window scaling */
				185	#define TCPOPT_SACK_PERM 4 /* SACK Permitted */
				186	#define TCPOPT_SACK 5 /* SACK Block */
				187	#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
				188	#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
				189	#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
				190	#define TCPOPT_EXP 254 /* Experimental */
				191	/* Magic number to be after the option value for sharing TCP
				192	* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
				193	*/
				194	#define TCPOPT_FASTOPEN_MAGIC 0xF989
				195	#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
				196
				197	/*
				198	* TCP option lengths
				199	*/
				200
				201	#define TCPOLEN_MSS 4
				202	#define TCPOLEN_WINDOW 3
				203	#define TCPOLEN_SACK_PERM 2
				204	#define TCPOLEN_TIMESTAMP 10
				205	#define TCPOLEN_MD5SIG 18
				206	#define TCPOLEN_FASTOPEN_BASE 2
				207	#define TCPOLEN_EXP_FASTOPEN_BASE 4
				208	#define TCPOLEN_EXP_SMC_BASE 6
				209
				210	/* But this is what stacks really send out. */
				211	#define TCPOLEN_TSTAMP_ALIGNED 12
				212	#define TCPOLEN_WSCALE_ALIGNED 4
				213	#define TCPOLEN_SACKPERM_ALIGNED 4
				214	#define TCPOLEN_SACK_BASE 2
				215	#define TCPOLEN_SACK_BASE_ALIGNED 4
				216	#define TCPOLEN_SACK_PERBLOCK 8
				217	#define TCPOLEN_MD5SIG_ALIGNED 20
				218	#define TCPOLEN_MSS_ALIGNED 4
				219	#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
				220
				221	/* Flags in tp->nonagle */
				222	#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
				223	#define TCP_NAGLE_CORK 2 /* Socket is corked */
				224	#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
				225
				226	/* TCP thin-stream limits */
				227	#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
				228
				229	/* TCP initial congestion window as per rfc6928 */
				230	#define TCP_INIT_CWND 10
				231
				232	/* Bit Flags for sysctl_tcp_fastopen */
				233	#define TFO_CLIENT_ENABLE 1
				234	#define TFO_SERVER_ENABLE 2
				235	#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */
				236
				237	/* Accept SYN data w/o any cookie option */
				238	#define TFO_SERVER_COOKIE_NOT_REQD 0x200
				239
				240	/* Force enable TFO on all listeners, i.e., not requiring the
				241	* TCP_FASTOPEN socket option.
				242	*/
				243	#define TFO_SERVER_WO_SOCKOPT1 0x400
				244
				245
				246	/* sysctl variables for tcp */
				247	extern int sysctl_tcp_max_orphans;
				248	extern long sysctl_tcp_mem[3];
				249
				250	#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
				251	#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
				252	#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */
				253
				254	extern atomic_long_t tcp_memory_allocated;
				255	extern struct percpu_counter tcp_sockets_allocated;
				256	extern unsigned long tcp_memory_pressure;
				257
				258	/* optimized version of sk_under_memory_pressure() for TCP sockets */
				259	static inline bool tcp_under_memory_pressure(const struct sock *sk)
				260	{
				261	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
				262	mem_cgroup_under_socket_pressure(sk->sk_memcg))
				263	return true;
				264
				265	return READ_ONCE(tcp_memory_pressure);
				266	}
				267	/*
				268	* The next routines deal with comparing 32 bit unsigned ints
				269	* and worry about wraparound (automatic with unsigned arithmetic).
				270	*/
				271
				272	static inline bool before(__u32 seq1, __u32 seq2)
				273	{
				274	return (__s32)(seq1-seq2) < 0;
				275	}
				276	#define after(seq2, seq1) before(seq1, seq2)
				277
				278	/* is s2<=s1<=s3 ? */
				279	static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
				280	{
				281	return seq3 - seq2 >= seq1 - seq2;
				282	}
				283
				284	static inline bool tcp_out_of_memory(struct sock *sk)
				285	{
				286	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
				287	sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
				288	return true;
				289	return false;
				290	}
				291
				292	void sk_forced_mem_schedule(struct sock *sk, int size);
				293
				294	static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
				295	{
				296	struct percpu_counter *ocp = sk->sk_prot->orphan_count;
				297	int orphans = percpu_counter_read_positive(ocp);
				298
				299	if (orphans << shift > sysctl_tcp_max_orphans) {
				300	orphans = percpu_counter_sum_positive(ocp);
				301	if (orphans << shift > sysctl_tcp_max_orphans)
				302	return true;
				303	}
				304	return false;
				305	}
				306
				307	bool tcp_check_oom(struct sock *sk, int shift);
				308
				309
				310	extern struct proto tcp_prot;
				311
				312	#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
				313	#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
				314	#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
				315	#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
				316
				317	void tcp_tasklet_init(void);
				318
				319	int tcp_v4_err(struct sk_buff *skb, u32);
				320
				321	void tcp_shutdown(struct sock *sk, int how);
				322
				323	int tcp_v4_early_demux(struct sk_buff *skb);
				324	int tcp_v4_rcv(struct sk_buff *skb);
				325
				326	int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
				327	int tcp_sendmsg(struct sock sk, struct msghdr msg, size_t size);
				328	int tcp_sendmsg_locked(struct sock sk, struct msghdr msg, size_t size);
				329	int tcp_sendpage(struct sock sk, struct page page, int offset, size_t size,
				330	int flags);
				331	int tcp_sendpage_locked(struct sock sk, struct page page, int offset,
				332	size_t size, int flags);
				333	ssize_t do_tcp_sendpages(struct sock sk, struct page page, int offset,
				334	size_t size, int flags);
				335	void tcp_release_cb(struct sock *sk);
				336	void tcp_wfree(struct sk_buff *skb);
				337	void tcp_write_timer_handler(struct sock *sk);
				338	void tcp_delack_timer_handler(struct sock *sk);
				339	int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
				340	int tcp_rcv_state_process(struct sock sk, struct sk_buff skb);
				341	void tcp_rcv_established(struct sock sk, struct sk_buff skb);
				342	void tcp_rcv_space_adjust(struct sock *sk);
				343	int tcp_twsk_unique(struct sock sk, struct sock sktw, void *twp);
				344	void tcp_twsk_destructor(struct sock *sk);
				345	ssize_t tcp_splice_read(struct socket sk, loff_t ppos,
				346	struct pipe_inode_info *pipe, size_t len,
				347	unsigned int flags);
				348
				349	static inline void tcp_dec_quickack_mode(struct sock *sk)
				350	{
				351	struct inet_connection_sock *icsk = inet_csk(sk);
				352
				353	if (icsk->icsk_ack.quick) {
				354	/* How many ACKs S/ACKing new data have we sent? */
				355	const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0;
				356
				357	if (pkts >= icsk->icsk_ack.quick) {
				358	icsk->icsk_ack.quick = 0;
				359	/* Leaving quickack mode we deflate ATO. */
				360	icsk->icsk_ack.ato = TCP_ATO_MIN;
				361	} else
				362	icsk->icsk_ack.quick -= pkts;
				363	}
				364	}
				365
				366	#define TCP_ECN_OK 1
				367	#define TCP_ECN_QUEUE_CWR 2
				368	#define TCP_ECN_DEMAND_CWR 4
				369	#define TCP_ECN_SEEN 8
				370
				371	enum tcp_tw_status {
				372	TCP_TW_SUCCESS = 0,
				373	TCP_TW_RST = 1,
				374	TCP_TW_ACK = 2,
				375	TCP_TW_SYN = 3
				376	};
				377
				378
				379	enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
				380	struct sk_buff *skb,
				381	const struct tcphdr *th);
				382	struct sock tcp_check_req(struct sock sk, struct sk_buff *skb,
				383	struct request_sock *req, bool fastopen,
				384	bool *lost_race);
				385	int tcp_child_process(struct sock parent, struct sock child,
				386	struct sk_buff *skb);
				387	void tcp_enter_loss(struct sock *sk);
				388	void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag);
				389	void tcp_clear_retrans(struct tcp_sock *tp);
				390	void tcp_update_metrics(struct sock *sk);
				391	void tcp_init_metrics(struct sock *sk);
				392	void tcp_metrics_init(void);
				393	bool tcp_peer_is_proven(struct request_sock req, struct dst_entry dst);
				394	void __tcp_close(struct sock *sk, long timeout);
				395	void tcp_close(struct sock *sk, long timeout);
				396	void tcp_init_sock(struct sock *sk);
				397	void tcp_init_transfer(struct sock *sk, int bpf_op);
				398	__poll_t tcp_poll(struct file file, struct socket sock,
				399	struct poll_table_struct *wait);
				400	int tcp_getsockopt(struct sock *sk, int level, int optname,
				401	char __user optval, int __user optlen);
				402	int tcp_setsockopt(struct sock *sk, int level, int optname,
				403	char __user *optval, unsigned int optlen);
				404	int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
				405	char __user optval, int __user optlen);
				406	int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
				407	char __user *optval, unsigned int optlen);
				408	void tcp_set_keepalive(struct sock *sk, int val);
				409	void tcp_syn_ack_timeout(const struct request_sock *req);
				410	int tcp_recvmsg(struct sock sk, struct msghdr msg, size_t len, int nonblock,
				411	int flags, int *addr_len);
				412	int tcp_set_rcvlowat(struct sock *sk, int val);
				413	void tcp_data_ready(struct sock *sk);
				414	#ifdef CONFIG_MMU
				415	int tcp_mmap(struct file file, struct socket sock,
				416	struct vm_area_struct *vma);
				417	#endif
				418	void tcp_parse_options(const struct net net, const struct sk_buff skb,
				419	struct tcp_options_received *opt_rx,
				420	int estab, struct tcp_fastopen_cookie *foc);
				421	const u8 tcp_parse_md5sig_option(const struct tcphdr th);
				422
				423	/*
				424	* BPF SKB-less helpers
				425	*/
				426	u16 tcp_v4_get_syncookie(struct sock sk, struct iphdr iph,
				427	struct tcphdr th, u32 cookie);
				428	u16 tcp_v6_get_syncookie(struct sock sk, struct ipv6hdr iph,
				429	struct tcphdr th, u32 cookie);
				430	u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
				431	const struct tcp_request_sock_ops *af_ops,
				432	struct sock sk, struct tcphdr th);
				433	/*
				434	* TCP v4 functions exported for the inet6 API
				435	*/
				436
				437	void tcp_v4_send_check(struct sock sk, struct sk_buff skb);
				438	void tcp_v4_mtu_reduced(struct sock *sk);
				439	void tcp_req_err(struct sock *sk, u32 seq, bool abort);
				440	int tcp_v4_conn_request(struct sock sk, struct sk_buff skb);
				441	struct sock tcp_create_openreq_child(const struct sock sk,
				442	struct request_sock *req,
				443	struct sk_buff *skb);
				444	void tcp_ca_openreq_child(struct sock sk, const struct dst_entry dst);
				445	struct sock tcp_v4_syn_recv_sock(const struct sock sk, struct sk_buff *skb,
				446	struct request_sock *req,
				447	struct dst_entry *dst,
				448	struct request_sock *req_unhash,
				449	bool *own_req);
				450	int tcp_v4_do_rcv(struct sock sk, struct sk_buff skb);
				451	int tcp_v4_connect(struct sock sk, struct sockaddr uaddr, int addr_len);
				452	int tcp_connect(struct sock *sk);
				453	enum tcp_synack_type {
				454	TCP_SYNACK_NORMAL,
				455	TCP_SYNACK_FASTOPEN,
				456	TCP_SYNACK_COOKIE,
				457	};
				458	struct sk_buff tcp_make_synack(const struct sock sk, struct dst_entry *dst,
				459	struct request_sock *req,
				460	struct tcp_fastopen_cookie *foc,
				461	enum tcp_synack_type synack_type);
				462	int tcp_disconnect(struct sock *sk, int flags);
				463
				464	void tcp_finish_connect(struct sock sk, struct sk_buff skb);
				465	int tcp_send_rcvq(struct sock sk, struct msghdr msg, size_t size);
				466	void inet_sk_rx_dst_set(struct sock sk, const struct sk_buff skb);
				467
				468	/* From syncookies.c */
				469	struct sock tcp_get_cookie_sock(struct sock sk, struct sk_buff *skb,
				470	struct request_sock *req,
				471	struct dst_entry *dst, u32 tsoff);
				472	int __cookie_v4_check(const struct iphdr iph, const struct tcphdr th,
				473	u32 cookie);
				474	struct sock cookie_v4_check(struct sock sk, struct sk_buff *skb);
				475	#ifdef CONFIG_SYN_COOKIES
				476
				477	/* Syncookies use a monotonic timer which increments every 60 seconds.
				478	* This counter is used both as a hash input and partially encoded into
				479	* the cookie value. A cookie is only validated further if the delta
				480	* between the current counter value and the encoded one is less than this,
				481	* i.e. a sent cookie is valid only at most for 2*60 seconds (or less if
				482	* the counter advances immediately after a cookie is generated).
				483	*/
				484	#define MAX_SYNCOOKIE_AGE 2
				485	#define TCP_SYNCOOKIE_PERIOD (60 * HZ)
				486	#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD)
				487
				488	/* syncookies: remember time of last synqueue overflow
				489	* But do not dirty this field too often (once per second is enough)
				490	* It is racy as we do not hold a lock, but race is very minor.
				491	*/
				492	static inline void tcp_synq_overflow(const struct sock *sk)
				493	{
				494	unsigned int last_overflow;
				495	unsigned int now = jiffies;
				496
				497	if (sk->sk_reuseport) {
				498	struct sock_reuseport *reuse;
				499
				500	reuse = rcu_dereference(sk->sk_reuseport_cb);
				501	if (likely(reuse)) {
				502	last_overflow = READ_ONCE(reuse->synq_overflow_ts);
				503	if (!time_between32(now, last_overflow,
				504	last_overflow + HZ))
				505	WRITE_ONCE(reuse->synq_overflow_ts, now);
				506	return;
				507	}
				508	}
				509
				510	last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
				511	if (!time_between32(now, last_overflow, last_overflow + HZ))
				512	WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
				513	}
				514
				515	/* syncookies: no recent synqueue overflow on this listening socket? */
				516	static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
				517	{
				518	unsigned int last_overflow;
				519	unsigned int now = jiffies;
				520
				521	if (sk->sk_reuseport) {
				522	struct sock_reuseport *reuse;
				523
				524	reuse = rcu_dereference(sk->sk_reuseport_cb);
				525	if (likely(reuse)) {
				526	last_overflow = READ_ONCE(reuse->synq_overflow_ts);
				527	return !time_between32(now, last_overflow - HZ,
				528	last_overflow +
				529	TCP_SYNCOOKIE_VALID);
				530	}
				531	}
				532
				533	last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
				534
				535	/* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
				536	* then we're under synflood. However, we have to use
				537	* 'last_overflow - HZ' as lower bound. That's because a concurrent
				538	* tcp_synq_overflow() could update .ts_recent_stamp after we read
				539	* jiffies but before we store .ts_recent_stamp into last_overflow,
				540	* which could lead to rejecting a valid syncookie.
				541	*/
				542	return !time_between32(now, last_overflow - HZ,
				543	last_overflow + TCP_SYNCOOKIE_VALID);
				544	}
				545
				546	static inline u32 tcp_cookie_time(void)
				547	{
				548	u64 val = get_jiffies_64();
				549
				550	do_div(val, TCP_SYNCOOKIE_PERIOD);
				551	return val;
				552	}
				553
				554	u32 __cookie_v4_init_sequence(const struct iphdr iph, const struct tcphdr th,
				555	u16 *mssp);
				556	__u32 cookie_v4_init_sequence(const struct sk_buff skb, __u16 mss);
				557	u64 cookie_init_timestamp(struct request_sock *req, u64 now);
				558	bool cookie_timestamp_decode(const struct net *net,
				559	struct tcp_options_received *opt);
				560	bool cookie_ecn_ok(const struct tcp_options_received *opt,
				561	const struct net net, const struct dst_entry dst);
				562
				563	/* From net/ipv6/syncookies.c */
				564	int __cookie_v6_check(const struct ipv6hdr iph, const struct tcphdr th,
				565	u32 cookie);
				566	struct sock cookie_v6_check(struct sock sk, struct sk_buff *skb);
				567
				568	u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
				569	const struct tcphdr th, u16 mssp);
				570	__u32 cookie_v6_init_sequence(const struct sk_buff skb, __u16 mss);
				571	#endif
				572	/* tcp_output.c */
				573
				574	void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
				575	int nonagle);
				576	int __tcp_retransmit_skb(struct sock sk, struct sk_buff skb, int segs);
				577	int tcp_retransmit_skb(struct sock sk, struct sk_buff skb, int segs);
				578	void tcp_retransmit_timer(struct sock *sk);
				579	void tcp_xmit_retransmit_queue(struct sock *);
				580	void tcp_simple_retransmit(struct sock *);
				581	void tcp_enter_recovery(struct sock *sk, bool ece_ack);
				582	int tcp_trim_head(struct sock , struct sk_buff , u32);
				583	enum tcp_queue {
				584	TCP_FRAG_IN_WRITE_QUEUE,
				585	TCP_FRAG_IN_RTX_QUEUE,
				586	};
				587	int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
				588	struct sk_buff *skb, u32 len,
				589	unsigned int mss_now, gfp_t gfp);
				590
				591	void tcp_send_probe0(struct sock *);
				592	void tcp_send_partial(struct sock *);
				593	int tcp_write_wakeup(struct sock *, int mib);
				594	void tcp_send_fin(struct sock *sk);
				595	void tcp_send_active_reset(struct sock *sk, gfp_t priority);
				596	int tcp_send_synack(struct sock *);
				597	void tcp_push_one(struct sock *, unsigned int mss_now);
				598	void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
				599	void tcp_send_ack(struct sock *sk);
				600	void tcp_send_delayed_ack(struct sock *sk);
				601	void tcp_send_loss_probe(struct sock *sk);
				602	bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
				603	void tcp_skb_collapse_tstamp(struct sk_buff *skb,
				604	const struct sk_buff *next_skb);
				605
				606	/* tcp_input.c */
				607	void tcp_rearm_rto(struct sock *sk);
				608	void tcp_synack_rtt_meas(struct sock sk, struct request_sock req);
				609	void tcp_reset(struct sock *sk);
				610	void tcp_skb_mark_lost_uncond_verify(struct tcp_sock tp, struct sk_buff skb);
				611	void tcp_fin(struct sock *sk);
				612	void tcp_check_space(struct sock *sk);
				613
				614	/* tcp_timer.c */
				615	void tcp_init_xmit_timers(struct sock *);
				616	static inline void tcp_clear_xmit_timers(struct sock *sk)
				617	{
				618	if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
				619	__sock_put(sk);
				620
				621	if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1)
				622	__sock_put(sk);
				623
				624	inet_csk_clear_xmit_timers(sk);
				625	}
				626
				627	unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
				628	unsigned int tcp_current_mss(struct sock *sk);
				629	u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when);
				630
				631	/* Bound MSS / TSO packet size with the half of the window */
				632	static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
				633	{
				634	int cutoff;
				635
				636	/* When peer uses tiny windows, there is no use in packetizing
				637	* to sub-MSS pieces for the sake of SWS or making sure there
				638	* are enough packets in the pipe for fast recovery.
				639	*
				640	* On the other hand, for extremely large MSS devices, handling
				641	* smaller than MSS windows in this way does make sense.
				642	*/
				643	if (tp->max_window > TCP_MSS_DEFAULT)
				644	cutoff = (tp->max_window >> 1);
				645	else
				646	cutoff = tp->max_window;
				647
				648	if (cutoff && pktsize > cutoff)
				649	return max_t(int, cutoff, 68U - tp->tcp_header_len);
				650	else
				651	return pktsize;
				652	}
				653
				654	/* tcp.c */
				655	void tcp_get_info(struct sock , struct tcp_info );
				656
				657	/* Read 'sendfile()'-style from a TCP socket */
				658	int tcp_read_sock(struct sock sk, read_descriptor_t desc,
				659	sk_read_actor_t recv_actor);
				660
				661	void tcp_initialize_rcv_mss(struct sock *sk);
				662
				663	int tcp_mtu_to_mss(struct sock *sk, int pmtu);
				664	int tcp_mss_to_mtu(struct sock *sk, int mss);
				665	void tcp_mtup_init(struct sock *sk);
				666	void tcp_init_buffer_space(struct sock *sk);
				667
				668	static inline void tcp_bound_rto(const struct sock *sk)
				669	{
				670	if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
				671	inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
				672	}
				673
				674	static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
				675	{
				676	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
				677	}
				678
				679	static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
				680	{
				681	tp->pred_flags = htonl((tp->tcp_header_len << 26) \|
				682	ntohl(TCP_FLAG_ACK) \|
				683	snd_wnd);
				684	}
				685
				686	static inline void tcp_fast_path_on(struct tcp_sock *tp)
				687	{
				688	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
				689	}
				690
				691	static inline void tcp_fast_path_check(struct sock *sk)
				692	{
				693	struct tcp_sock *tp = tcp_sk(sk);
				694
				695	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
				696	tp->rcv_wnd &&
				697	atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
				698	!tp->urg_data)
				699	tcp_fast_path_on(tp);
				700	}
				701
				702	/* Compute the actual rto_min value */
				703	static inline u32 tcp_rto_min(struct sock *sk)
				704	{
				705	const struct dst_entry *dst = __sk_dst_get(sk);
				706	u32 rto_min = TCP_RTO_MIN;
				707
				708	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
				709	rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
				710	return rto_min;
				711	}
				712
				713	static inline u32 tcp_rto_min_us(struct sock *sk)
				714	{
				715	return jiffies_to_usecs(tcp_rto_min(sk));
				716	}
				717
				718	static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
				719	{
				720	return dst_metric_locked(dst, RTAX_CC_ALGO);
				721	}
				722
				723	/* Minimum RTT in usec. ~0 means not available. */
				724	static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
				725	{
				726	return minmax_get(&tp->rtt_min);
				727	}
				728
				729	/* Compute the actual receive window we are currently advertising.
				730	* Rcv_nxt can be after the window if our peer push more data
				731	* than the offered window.
				732	*/
				733	static inline u32 tcp_receive_window(const struct tcp_sock *tp)
				734	{
				735	s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
				736
				737	if (win < 0)
				738	win = 0;
				739	return (u32) win;
				740	}
				741
				742	/* Choose a new window, without checks for shrinking, and without
				743	* scaling applied to the result. The caller does these things
				744	* if necessary. This is a "raw" window selection.
				745	*/
				746	u32 __tcp_select_window(struct sock *sk);
				747
				748	void tcp_send_window_probe(struct sock *sk);
				749
				750	/* TCP uses 32bit jiffies to save some space.
				751	* Note that this is different from tcp_time_stamp, which
				752	* historically has been the same until linux-4.13.
				753	*/
				754	#define tcp_jiffies32 ((u32)jiffies)
				755
				756	/*
				757	* Deliver a 32bit value for TCP timestamp option (RFC 7323)
				758	* It is no longer tied to jiffies, but to 1 ms clock.
				759	* Note: double check if you want to use tcp_jiffies32 instead of this.
				760	*/
				761	#define TCP_TS_HZ 1000
				762
				763	static inline u64 tcp_clock_ns(void)
				764	{
				765	return ktime_get_ns();
				766	}
				767
				768	static inline u64 tcp_clock_us(void)
				769	{
				770	return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
				771	}
				772
				773	/* This should only be used in contexts where tp->tcp_mstamp is up to date */
				774	static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
				775	{
				776	return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
				777	}
				778
				779	/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
				780	static inline u64 tcp_ns_to_ts(u64 ns)
				781	{
				782	return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
				783	}
				784
				785	/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
				786	static inline u32 tcp_time_stamp_raw(void)
				787	{
				788	return tcp_ns_to_ts(tcp_clock_ns());
				789	}
				790
				791	void tcp_mstamp_refresh(struct tcp_sock *tp);
				792
				793	static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
				794	{
				795	return max_t(s64, t1 - t0, 0);
				796	}
				797
				798	static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
				799	{
				800	return tcp_ns_to_ts(skb->skb_mstamp_ns);
				801	}
				802
				803	/* provide the departure time in us unit */
				804	static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
				805	{
				806	return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
				807	}
				808
				809
				810	#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
				811
				812	#define TCPHDR_FIN 0x01
				813	#define TCPHDR_SYN 0x02
				814	#define TCPHDR_RST 0x04
				815	#define TCPHDR_PSH 0x08
				816	#define TCPHDR_ACK 0x10
				817	#define TCPHDR_URG 0x20
				818	#define TCPHDR_ECE 0x40
				819	#define TCPHDR_CWR 0x80
				820
				821	#define TCPHDR_SYN_ECN (TCPHDR_SYN \| TCPHDR_ECE \| TCPHDR_CWR)
				822
				823	/* This is what the send packet queuing engine uses to pass
				824	* TCP per-packet control information to the transmission code.
				825	* We also store the host-order sequence numbers in here too.
				826	* This is 44 bytes if IPV6 is enabled.
				827	* If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
				828	*/
				829	struct tcp_skb_cb {
				830	__u32 seq; /* Starting sequence number */
				831	__u32 end_seq; /* SEQ + FIN + SYN + datalen */
				832	union {
				833	/* Note : tcp_tw_isn is used in input path only
				834	* (isn chosen by tcp_timewait_state_process())
				835	*
				836	* tcp_gso_segs/size are used in write queue only,
				837	* cf tcp_skb_pcount()/tcp_skb_mss()
				838	*/
				839	__u32 tcp_tw_isn;
				840	struct {
				841	u16 tcp_gso_segs;
				842	u16 tcp_gso_size;
				843	};
				844	};
				845	__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
				846
				847	__u8 sacked; /* State flags for SACK. */
				848	#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
				849	#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
				850	#define TCPCB_LOST 0x04 /* SKB is lost */
				851	#define TCPCB_TAGBITS 0x07 /* All tag bits */
				852	#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */
				853	#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
				854	#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS\|TCPCB_EVER_RETRANS\| \
				855	TCPCB_REPAIRED)
				856
				857	__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
				858	__u8 txstamp_ack:1, /* Record TX timestamp for ack? */
				859	eor:1, /* Is skb MSG_EOR marked? */
				860	has_rxtstamp:1, /* SKB has a RX timestamp */
				861	unused:5;
				862	__u32 ack_seq; /* Sequence number ACK'd */
				863	union {
				864	struct {
				865	/* There is space for up to 24 bytes */
				866	__u32 in_flight:30,/* Bytes in flight at transmit */
				867	is_app_limited:1, /* cwnd not fully used? */
				868	unused:1;
				869	/* pkts S/ACKed so far upon tx of skb, incl retrans: */
				870	__u32 delivered;
				871	/* start of send pipeline phase */
				872	u64 first_tx_mstamp;
				873	/* when we reached the "delivered" count */
				874	u64 delivered_mstamp;
				875	} tx; /* only used for outgoing skbs */
				876	union {
				877	struct inet_skb_parm h4;
				878	#if IS_ENABLED(CONFIG_IPV6)
				879	struct inet6_skb_parm h6;
				880	#endif
				881	} header; /* For incoming skbs */
				882	struct {
				883	__u32 flags;
				884	struct sock *sk_redir;
				885	void *data_end;
				886	} bpf;
				887	};
				888	};
				889
				890	#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
				891
				892	static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
				893	{
				894	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
				895	}
				896
				897	static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
				898	{
				899	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
				900	}
				901
				902	static inline struct sock tcp_skb_bpf_redirect_fetch(struct sk_buff skb)
				903	{
				904	return TCP_SKB_CB(skb)->bpf.sk_redir;
				905	}
				906
				907	static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
				908	{
				909	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
				910	}
				911
				912	#if IS_ENABLED(CONFIG_IPV6)
				913	/* This is the variant of inet6_iif() that must be used by TCP,
				914	* as TCP moves IP6CB into a different location in skb->cb[]
				915	*/
				916	static inline int tcp_v6_iif(const struct sk_buff *skb)
				917	{
				918	return TCP_SKB_CB(skb)->header.h6.iif;
				919	}
				920
				921	static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb)
				922	{
				923	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
				924
				925	return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
				926	}
				927
				928	/* TCP_SKB_CB reference means this can not be used from early demux */
				929	static inline int tcp_v6_sdif(const struct sk_buff *skb)
				930	{
				931	#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
				932	if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
				933	return TCP_SKB_CB(skb)->header.h6.iif;
				934	#endif
				935	return 0;
				936	}
				937
				938	void tcp_v6_early_demux(struct sk_buff *skb);
				939	#endif
				940
				941	static inline bool inet_exact_dif_match(struct net net, struct sk_buff skb)
				942	{
				943	#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
				944	if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
				945	skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
				946	return true;
				947	#endif
				948	return false;
				949	}
				950
				951	/* TCP_SKB_CB reference means this can not be used from early demux */
				952	static inline int tcp_v4_sdif(struct sk_buff *skb)
				953	{
				954	#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
				955	if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
				956	return TCP_SKB_CB(skb)->header.h4.iif;
				957	#endif
				958	return 0;
				959	}
				960
				961	/* Due to TSO, an SKB can be composed of multiple actual
				962	* packets. To keep these tracked properly, we use this.
				963	*/
				964	static inline int tcp_skb_pcount(const struct sk_buff *skb)
				965	{
				966	return TCP_SKB_CB(skb)->tcp_gso_segs;
				967	}
				968
				969	static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs)
				970	{
				971	TCP_SKB_CB(skb)->tcp_gso_segs = segs;
				972	}
				973
				974	static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs)
				975	{
				976	TCP_SKB_CB(skb)->tcp_gso_segs += segs;
				977	}
				978
				979	/* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */
				980	static inline int tcp_skb_mss(const struct sk_buff *skb)
				981	{
				982	return TCP_SKB_CB(skb)->tcp_gso_size;
				983	}
				984
				985	static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
				986	{
				987	return likely(!TCP_SKB_CB(skb)->eor);
				988	}
				989
				990	/* Events passed to congestion control interface */
				991	enum tcp_ca_event {
				992	CA_EVENT_TX_START, /* first transmit when no packets in flight */
				993	CA_EVENT_CWND_RESTART, /* congestion window restart */
				994	CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */
				995	CA_EVENT_LOSS, /* loss timeout */
				996	CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */
				997	CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */
				998	};
				999
				1000	/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
				1001	enum tcp_ca_ack_event_flags {
				1002	CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */
				1003	CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */
				1004	CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */
				1005	};
				1006
				1007	/*
				1008	* Interface for adding new TCP congestion control handlers
				1009	*/
				1010	#define TCP_CA_NAME_MAX 16
				1011	#define TCP_CA_MAX 128
				1012	#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
				1013
				1014	#define TCP_CA_UNSPEC 0
				1015
				1016	/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
				1017	#define TCP_CONG_NON_RESTRICTED 0x1
				1018	/* Requires ECN/ECT set on all packets */
				1019	#define TCP_CONG_NEEDS_ECN 0x2
				1020
				1021	union tcp_cc_info;
				1022
				1023	struct ack_sample {
				1024	u32 pkts_acked;
				1025	s32 rtt_us;
				1026	u32 in_flight;
				1027	};
				1028
				1029	/* A rate sample measures the number of (original/retransmitted) data
				1030	* packets delivered "delivered" over an interval of time "interval_us".
				1031	* The tcp_rate.c code fills in the rate sample, and congestion
				1032	* control modules that define a cong_control function to run at the end
				1033	* of ACK processing can optionally chose to consult this sample when
				1034	* setting cwnd and pacing rate.
				1035	* A sample is invalid if "delivered" or "interval_us" is negative.
				1036	*/
				1037	struct rate_sample {
				1038	u64 prior_mstamp; /* starting timestamp for interval */
				1039	u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
				1040	s32 delivered; /* number of packets delivered over interval */
				1041	long interval_us; /* time for tp->delivered to incr "delivered" */
				1042	u32 snd_interval_us; /* snd interval for delivered packets */
				1043	u32 rcv_interval_us; /* rcv interval for delivered packets */
				1044	long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
				1045	int losses; /* number of packets marked lost upon ACK */
				1046	u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
				1047	u32 prior_in_flight; /* in flight before this ACK */
				1048	u32 last_end_seq; /* end_seq of most recently ACKed packet */
				1049	bool is_app_limited; /* is sample from packet with bubble in pipe? */
				1050	bool is_retrans; /* is sample from retransmission? */
				1051	bool is_ack_delayed; /* is this (likely) a delayed ACK? */
				1052	};
				1053
				1054	struct tcp_congestion_ops {
				1055	struct list_head list;
				1056	u32 key;
				1057	u32 flags;
				1058
				1059	/* initialize private data (optional) */
				1060	void (init)(struct sock sk);
				1061	/* cleanup private data (optional) */
				1062	void (release)(struct sock sk);
				1063
				1064	/* return slow start threshold (required) */
				1065	u32 (ssthresh)(struct sock sk);
				1066	/* do new cwnd calculation (required) */
				1067	void (cong_avoid)(struct sock sk, u32 ack, u32 acked);
				1068	/* call before changing ca_state (optional) */
				1069	void (set_state)(struct sock sk, u8 new_state);
				1070	/* call when cwnd event occurs (optional) */
				1071	void (cwnd_event)(struct sock sk, enum tcp_ca_event ev);
				1072	/* call when ack arrives (optional) */
				1073	void (in_ack_event)(struct sock sk, u32 flags);
				1074	/* new value of cwnd after loss (required) */
				1075	u32 (undo_cwnd)(struct sock sk);
				1076	/* hook for packet ack accounting (optional) */
				1077	void (pkts_acked)(struct sock sk, const struct ack_sample *sample);
				1078	/* override sysctl_tcp_min_tso_segs */
				1079	u32 (min_tso_segs)(struct sock sk);
				1080	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
				1081	u32 (sndbuf_expand)(struct sock sk);
				1082	/* call when packets are delivered to update cwnd and pacing rate,
				1083	* after all the ca_state processing. (optional)
				1084	*/
				1085	void (cong_control)(struct sock sk, const struct rate_sample *rs);
				1086	/* get info for inet_diag (optional) */
				1087	size_t (get_info)(struct sock sk, u32 ext, int *attr,
				1088	union tcp_cc_info *info);
				1089
				1090	char name[TCP_CA_NAME_MAX];
				1091	struct module *owner;
				1092	};
				1093
				1094	int tcp_register_congestion_control(struct tcp_congestion_ops *type);
				1095	void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
				1096
				1097	void tcp_assign_congestion_control(struct sock *sk);
				1098	void tcp_init_congestion_control(struct sock *sk);
				1099	void tcp_cleanup_congestion_control(struct sock *sk);
				1100	int tcp_set_default_congestion_control(struct net net, const char name);
				1101	void tcp_get_default_congestion_control(struct net net, char name);
				1102	void tcp_get_available_congestion_control(char *buf, size_t len);
				1103	void tcp_get_allowed_congestion_control(char *buf, size_t len);
				1104	int tcp_set_allowed_congestion_control(char *allowed);
				1105	int tcp_set_congestion_control(struct sock sk, const char name, bool load,
				1106	bool reinit, bool cap_net_admin);
				1107	u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
				1108	void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
				1109
				1110	u32 tcp_reno_ssthresh(struct sock *sk);
				1111	u32 tcp_reno_undo_cwnd(struct sock *sk);
				1112	void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
				1113	extern struct tcp_congestion_ops tcp_reno;
				1114
				1115	struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
				1116	u32 tcp_ca_get_key_by_name(struct net net, const char name, bool *ecn_ca);
				1117	#ifdef CONFIG_INET
				1118	char tcp_ca_get_name_by_key(u32 key, char buffer);
				1119	#else
				1120	static inline char tcp_ca_get_name_by_key(u32 key, char buffer)
				1121	{
				1122	return NULL;
				1123	}
				1124	#endif
				1125
				1126	static inline bool tcp_ca_needs_ecn(const struct sock *sk)
				1127	{
				1128	const struct inet_connection_sock *icsk = inet_csk(sk);
				1129
				1130	return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
				1131	}
				1132
				1133	static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
				1134	{
				1135	struct inet_connection_sock *icsk = inet_csk(sk);
				1136
				1137	if (icsk->icsk_ca_ops->set_state)
				1138	icsk->icsk_ca_ops->set_state(sk, ca_state);
				1139	icsk->icsk_ca_state = ca_state;
				1140	}
				1141
				1142	static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
				1143	{
				1144	const struct inet_connection_sock *icsk = inet_csk(sk);
				1145
				1146	if (icsk->icsk_ca_ops->cwnd_event)
				1147	icsk->icsk_ca_ops->cwnd_event(sk, event);
				1148	}
				1149
				1150	/* From tcp_rate.c */
				1151	void tcp_rate_skb_sent(struct sock sk, struct sk_buff skb);
				1152	void tcp_rate_skb_delivered(struct sock sk, struct sk_buff skb,
				1153	struct rate_sample *rs);
				1154	void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
				1155	bool is_sack_reneg, struct rate_sample *rs);
				1156	void tcp_rate_check_app_limited(struct sock *sk);
				1157
				1158	static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
				1159	{
				1160	return t1 > t2 \|\| (t1 == t2 && after(seq1, seq2));
				1161	}
				1162
				1163	/* These functions determine how the current flow behaves in respect of SACK
				1164	* handling. SACK is negotiated with the peer, and therefore it can vary
				1165	* between different flows.
				1166	*
				1167	* tcp_is_sack - SACK enabled
				1168	* tcp_is_reno - No SACK
				1169	*/
				1170	static inline int tcp_is_sack(const struct tcp_sock *tp)
				1171	{
				1172	return likely(tp->rx_opt.sack_ok);
				1173	}
				1174
				1175	static inline bool tcp_is_reno(const struct tcp_sock *tp)
				1176	{
				1177	return !tcp_is_sack(tp);
				1178	}
				1179
				1180	static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
				1181	{
				1182	return tp->sacked_out + tp->lost_out;
				1183	}
				1184
				1185	/* This determines how many packets are "in the network" to the best
				1186	* of our knowledge. In many cases it is conservative, but where
				1187	* detailed information is available from the receiver (via SACK
				1188	* blocks etc.) we can make more aggressive calculations.
				1189	*
				1190	* Use this for decisions involving congestion control, use just
				1191	* tp->packets_out to determine if the send queue is empty or not.
				1192	*
				1193	* Read this equation as:
				1194	*
				1195	* "Packets sent once on transmission queue" MINUS
				1196	* "Packets left network, but not honestly ACKed yet" PLUS
				1197	* "Packets fast retransmitted"
				1198	*/
				1199	static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
				1200	{
				1201	return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
				1202	}
				1203
				1204	#define TCP_INFINITE_SSTHRESH 0x7fffffff
				1205
				1206	static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
				1207	{
				1208	return tp->snd_cwnd < tp->snd_ssthresh;
				1209	}
				1210
				1211	static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
				1212	{
				1213	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
				1214	}
				1215
				1216	static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
				1217	{
				1218	return (TCPF_CA_CWR \| TCPF_CA_Recovery) &
				1219	(1 << inet_csk(sk)->icsk_ca_state);
				1220	}
				1221
				1222	/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
				1223	* The exception is cwnd reduction phase, when cwnd is decreasing towards
				1224	* ssthresh.
				1225	*/
				1226	static inline __u32 tcp_current_ssthresh(const struct sock *sk)
				1227	{
				1228	const struct tcp_sock *tp = tcp_sk(sk);
				1229
				1230	if (tcp_in_cwnd_reduction(sk))
				1231	return tp->snd_ssthresh;
				1232	else
				1233	return max(tp->snd_ssthresh,
				1234	((tp->snd_cwnd >> 1) +
				1235	(tp->snd_cwnd >> 2)));
				1236	}
				1237
				1238	/* Use define here intentionally to get WARN_ON location shown at the caller */
				1239	#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
				1240
				1241	void tcp_enter_cwr(struct sock *sk);
				1242	__u32 tcp_init_cwnd(const struct tcp_sock tp, const struct dst_entry dst);
				1243
				1244	/* The maximum number of MSS of available cwnd for which TSO defers
				1245	* sending if not using sysctl_tcp_tso_win_divisor.
				1246	*/
				1247	static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp)
				1248	{
				1249	return 3;
				1250	}
				1251
				1252	/* Returns end sequence number of the receiver's advertised window */
				1253	static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
				1254	{
				1255	return tp->snd_una + tp->snd_wnd;
				1256	}
				1257
				1258	/* We follow the spirit of RFC2861 to validate cwnd but implement a more
				1259	* flexible approach. The RFC suggests cwnd should not be raised unless
				1260	* it was fully used previously. And that's exactly what we do in
				1261	* congestion avoidance mode. But in slow start we allow cwnd to grow
				1262	* as long as the application has used half the cwnd.
				1263	* Example :
				1264	* cwnd is 10 (IW10), but application sends 9 frames.
				1265	* We allow cwnd to reach 18 when all frames are ACKed.
				1266	* This check is safe because it's as aggressive as slow start which already
				1267	* risks 100% overshoot. The advantage is that we discourage application to
				1268	* either send more filler packets or data to artificially blow up the cwnd
				1269	* usage, and allow application-limited process to probe bw more aggressively.
				1270	*/
				1271	static inline bool tcp_is_cwnd_limited(const struct sock *sk)
				1272	{
				1273	const struct tcp_sock *tp = tcp_sk(sk);
				1274
				1275	if (tp->is_cwnd_limited)
				1276	return true;
				1277
				1278	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
				1279	if (tcp_in_slow_start(tp))
				1280	return tp->snd_cwnd < 2 * tp->max_packets_out;
				1281
				1282	return false;
				1283	}
				1284
				1285	/* BBR congestion control needs pacing.
				1286	* Same remark for SO_MAX_PACING_RATE.
				1287	* sch_fq packet scheduler is efficiently handling pacing,
				1288	* but is not always installed/used.
				1289	* Return true if TCP stack should pace packets itself.
				1290	*/
				1291	static inline bool tcp_needs_internal_pacing(const struct sock *sk)
				1292	{
				1293	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
				1294	}
				1295
				1296	/* Return in jiffies the delay before one skb is sent.
				1297	* If @skb is NULL, we look at EDT for next packet being sent on the socket.
				1298	*/
				1299	static inline unsigned long tcp_pacing_delay(const struct sock *sk,
				1300	const struct sk_buff *skb)
				1301	{
				1302	s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns;
				1303
				1304	pacing_delay -= tcp_sk(sk)->tcp_clock_cache;
				1305
				1306	return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0;
				1307	}
				1308
				1309	static inline void tcp_reset_xmit_timer(struct sock *sk,
				1310	const int what,
				1311	unsigned long when,
				1312	const unsigned long max_when,
				1313	const struct sk_buff *skb)
				1314	{
				1315	inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb),
				1316	max_when);
				1317	}
				1318
				1319	/* Something is really bad, we could not queue an additional packet,
				1320	* because qdisc is full or receiver sent a 0 window, or we are paced.
				1321	* We do not want to add fuel to the fire, or abort too early,
				1322	* so make sure the timer we arm now is at least 200ms in the future,
				1323	* regardless of current icsk_rto value (as it could be ~2ms)
				1324	*/
				1325	static inline unsigned long tcp_probe0_base(const struct sock *sk)
				1326	{
				1327	return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN);
				1328	}
				1329
				1330	/* Variant of inet_csk_rto_backoff() used for zero window probes */
				1331	static inline unsigned long tcp_probe0_when(const struct sock *sk,
				1332	unsigned long max_when)
				1333	{
				1334	u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff;
				1335
				1336	return (unsigned long)min_t(u64, when, max_when);
				1337	}
				1338
				1339	static inline void tcp_check_probe_timer(struct sock *sk)
				1340	{
				1341	if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
				1342	tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
				1343	tcp_probe0_base(sk), TCP_RTO_MAX,
				1344	NULL);
				1345	}
				1346
				1347	static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
				1348	{
				1349	tp->snd_wl1 = seq;
				1350	}
				1351
				1352	static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
				1353	{
				1354	tp->snd_wl1 = seq;
				1355	}
				1356
				1357	/*
				1358	* Calculate(/check) TCP checksum
				1359	*/
				1360	static inline __sum16 tcp_v4_check(int len, __be32 saddr,
				1361	__be32 daddr, __wsum base)
				1362	{
				1363	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base);
				1364	}
				1365
				1366	static inline bool tcp_checksum_complete(struct sk_buff *skb)
				1367	{
				1368	return !skb_csum_unnecessary(skb) &&
				1369	__skb_checksum_complete(skb);
				1370	}
				1371
				1372	bool tcp_add_backlog(struct sock sk, struct sk_buff skb);
				1373	int tcp_filter(struct sock sk, struct sk_buff skb);
				1374	void tcp_set_state(struct sock *sk, int state);
				1375	void tcp_done(struct sock *sk);
				1376	int tcp_abort(struct sock *sk, int err);
				1377
				1378	static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
				1379	{
				1380	rx_opt->dsack = 0;
				1381	rx_opt->num_sacks = 0;
				1382	}
				1383
				1384	u32 tcp_default_init_rwnd(u32 mss);
				1385	void tcp_cwnd_restart(struct sock *sk, s32 delta);
				1386
				1387	static inline void tcp_slow_start_after_idle_check(struct sock *sk)
				1388	{
				1389	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
				1390	struct tcp_sock *tp = tcp_sk(sk);
				1391	s32 delta;
				1392
				1393	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) \|\|
				1394	tp->packets_out \|\| ca_ops->cong_control)
				1395	return;
				1396	delta = tcp_jiffies32 - tp->lsndtime;
				1397	if (delta > inet_csk(sk)->icsk_rto)
				1398	tcp_cwnd_restart(sk, delta);
				1399	}
				1400
				1401	/* Determine a window scaling and initial window to offer. */
				1402	void tcp_select_initial_window(const struct sock *sk, int __space,
				1403	__u32 mss, __u32 *rcv_wnd,
				1404	__u32 *window_clamp, int wscale_ok,
				1405	__u8 *rcv_wscale, __u32 init_rcv_wnd);
				1406
				1407	static inline int tcp_win_from_space(const struct sock *sk, int space)
				1408	{
				1409	int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
				1410
				1411	return tcp_adv_win_scale <= 0 ?
				1412	(space>>(-tcp_adv_win_scale)) :
				1413	space - (space>>tcp_adv_win_scale);
				1414	}
				1415
				1416	/* Note: caller must be prepared to deal with negative returns */
				1417	static inline int tcp_space(const struct sock *sk)
				1418	{
				1419	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
				1420	READ_ONCE(sk->sk_backlog.len) -
				1421	atomic_read(&sk->sk_rmem_alloc));
				1422	}
				1423
				1424	static inline int tcp_full_space(const struct sock *sk)
				1425	{
				1426	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
				1427	}
				1428
				1429	/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
				1430	* If 87.5 % (7/8) of the space has been consumed, we want to override
				1431	* SO_RCVLOWAT constraint, since we are receiving skbs with too small
				1432	* len/truesize ratio.
				1433	*/
				1434	static inline bool tcp_rmem_pressure(const struct sock *sk)
				1435	{
				1436	int rcvbuf, threshold;
				1437
				1438	if (tcp_under_memory_pressure(sk))
				1439	return true;
				1440
				1441	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
				1442	threshold = rcvbuf - (rcvbuf >> 3);
				1443
				1444	return atomic_read(&sk->sk_rmem_alloc) > threshold;
				1445	}
				1446
				1447	extern void tcp_openreq_init_rwin(struct request_sock *req,
				1448	const struct sock *sk_listener,
				1449	const struct dst_entry *dst);
				1450
				1451	void tcp_enter_memory_pressure(struct sock *sk);
				1452	void tcp_leave_memory_pressure(struct sock *sk);
				1453
				1454	static inline int keepalive_intvl_when(const struct tcp_sock *tp)
				1455	{
				1456	struct net net = sock_net((struct sock )tp);
				1457
				1458	return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
				1459	}
				1460
				1461	static inline int keepalive_time_when(const struct tcp_sock *tp)
				1462	{
				1463	struct net net = sock_net((struct sock )tp);
				1464
				1465	return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
				1466	}
				1467
				1468	static inline int keepalive_probes(const struct tcp_sock *tp)
				1469	{
				1470	struct net net = sock_net((struct sock )tp);
				1471
				1472	return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
				1473	}
				1474
				1475	static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
				1476	{
				1477	const struct inet_connection_sock *icsk = &tp->inet_conn;
				1478
				1479	return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
				1480	tcp_jiffies32 - tp->rcv_tstamp);
				1481	}
				1482
				1483	static inline int tcp_fin_time(const struct sock *sk)
				1484	{
				1485	int fin_timeout = tcp_sk(sk)->linger2 ? :
				1486	READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
				1487	const int rto = inet_csk(sk)->icsk_rto;
				1488
				1489	if (fin_timeout < (rto << 2) - (rto >> 1))
				1490	fin_timeout = (rto << 2) - (rto >> 1);
				1491
				1492	return fin_timeout;
				1493	}
				1494
				1495	static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
				1496	int paws_win)
				1497	{
				1498	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
				1499	return true;
				1500	if (unlikely(!time_before32(ktime_get_seconds(),
				1501	rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
				1502	return true;
				1503	/*
				1504	* Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
				1505	* then following tcp messages have valid values. Ignore 0 value,
				1506	* or else 'negative' tsval might forbid us to accept their packets.
				1507	*/
				1508	if (!rx_opt->ts_recent)
				1509	return true;
				1510	return false;
				1511	}
				1512
				1513	static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
				1514	int rst)
				1515	{
				1516	if (tcp_paws_check(rx_opt, 0))
				1517	return false;
				1518
				1519	/* RST segments are not recommended to carry timestamp,
				1520	and, if they do, it is recommended to ignore PAWS because
				1521	"their cleanup function should take precedence over timestamps."
				1522	Certainly, it is mistake. It is necessary to understand the reasons
				1523	of this constraint to relax it: if peer reboots, clock may go
				1524	out-of-sync and half-open connections will not be reset.
				1525	Actually, the problem would be not existing if all
				1526	the implementations followed draft about maintaining clock
				1527	via reboots. Linux-2.2 DOES NOT!
				1528
				1529	However, we can relax time bounds for RST segments to MSL.
				1530	*/
				1531	if (rst && !time_before32(ktime_get_seconds(),
				1532	rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
				1533	return false;
				1534	return true;
				1535	}
				1536
				1537	bool tcp_oow_rate_limited(struct net net, const struct sk_buff skb,
				1538	int mib_idx, u32 *last_oow_ack_time);
				1539
				1540	static inline void tcp_mib_init(struct net *net)
				1541	{
				1542	/* See RFC 2012 */
				1543	TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1);
				1544	TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
				1545	TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
				1546	TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1);
				1547	}
				1548
				1549	/* from STCP */
				1550	static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
				1551	{
				1552	tp->lost_skb_hint = NULL;
				1553	}
				1554
				1555	static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
				1556	{
				1557	tcp_clear_retrans_hints_partial(tp);
				1558	tp->retransmit_skb_hint = NULL;
				1559	}
				1560
				1561	union tcp_md5_addr {
				1562	struct in_addr a4;
				1563	#if IS_ENABLED(CONFIG_IPV6)
				1564	struct in6_addr a6;
				1565	#endif
				1566	};
				1567
				1568	/* - key database */
				1569	struct tcp_md5sig_key {
				1570	struct hlist_node node;
				1571	u8 keylen;
				1572	u8 family; /* AF_INET or AF_INET6 */
				1573	union tcp_md5_addr addr;
				1574	u8 prefixlen;
				1575	u8 key[TCP_MD5SIG_MAXKEYLEN];
				1576	struct rcu_head rcu;
				1577	};
				1578
				1579	/* - sock block */
				1580	struct tcp_md5sig_info {
				1581	struct hlist_head head;
				1582	struct rcu_head rcu;
				1583	};
				1584
				1585	/* - pseudo header */
				1586	struct tcp4_pseudohdr {
				1587	__be32 saddr;
				1588	__be32 daddr;
				1589	__u8 pad;
				1590	__u8 protocol;
				1591	__be16 len;
				1592	};
				1593
				1594	struct tcp6_pseudohdr {
				1595	struct in6_addr saddr;
				1596	struct in6_addr daddr;
				1597	__be32 len;
				1598	__be32 protocol; /* including padding */
				1599	};
				1600
				1601	union tcp_md5sum_block {
				1602	struct tcp4_pseudohdr ip4;
				1603	#if IS_ENABLED(CONFIG_IPV6)
				1604	struct tcp6_pseudohdr ip6;
				1605	#endif
				1606	};
				1607
				1608	/* - pool: digest algorithm, hash description and scratch buffer */
				1609	struct tcp_md5sig_pool {
				1610	struct ahash_request *md5_req;
				1611	void *scratch;
				1612	};
				1613
				1614	/* - functions */
				1615	int tcp_v4_md5_hash_skb(char md5_hash, const struct tcp_md5sig_key key,
				1616	const struct sock sk, const struct sk_buff skb);
				1617	int tcp_md5_do_add(struct sock sk, const union tcp_md5_addr addr,
				1618	int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
				1619	gfp_t gfp);
				1620	int tcp_md5_do_del(struct sock sk, const union tcp_md5_addr addr,
				1621	int family, u8 prefixlen);
				1622	struct tcp_md5sig_key tcp_v4_md5_lookup(const struct sock sk,
				1623	const struct sock *addr_sk);
				1624
				1625	#ifdef CONFIG_TCP_MD5SIG
				1626	#include <linux/jump_label.h>
				1627	extern struct static_key_false tcp_md5_needed;
				1628	struct tcp_md5sig_key __tcp_md5_do_lookup(const struct sock sk,
				1629	const union tcp_md5_addr *addr,
				1630	int family);
				1631	static inline struct tcp_md5sig_key *
				1632	tcp_md5_do_lookup(const struct sock *sk,
				1633	const union tcp_md5_addr *addr,
				1634	int family)
				1635	{
				1636	if (!static_branch_unlikely(&tcp_md5_needed))
				1637	return NULL;
				1638	return __tcp_md5_do_lookup(sk, addr, family);
				1639	}
				1640
				1641	#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
				1642	#else
				1643	static inline struct tcp_md5sig_key tcp_md5_do_lookup(const struct sock sk,
				1644	const union tcp_md5_addr *addr,
				1645	int family)
				1646	{
				1647	return NULL;
				1648	}
				1649	#define tcp_twsk_md5_key(twsk) NULL
				1650	#endif
				1651
				1652	bool tcp_alloc_md5sig_pool(void);
				1653
				1654	struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
				1655	static inline void tcp_put_md5sig_pool(void)
				1656	{
				1657	local_bh_enable();
				1658	}
				1659
				1660	int tcp_md5_hash_skb_data(struct tcp_md5sig_pool , const struct sk_buff ,
				1661	unsigned int header_len);
				1662	int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
				1663	const struct tcp_md5sig_key *key);
				1664
				1665	/* From tcp_fastopen.c */
				1666	void tcp_fastopen_cache_get(struct sock sk, u16 mss,
				1667	struct tcp_fastopen_cookie *cookie);
				1668	void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
				1669	struct tcp_fastopen_cookie *cookie, bool syn_lost,
				1670	u16 try_exp);
				1671	struct tcp_fastopen_request {
				1672	/* Fast Open cookie. Size 0 means a cookie request */
				1673	struct tcp_fastopen_cookie cookie;
				1674	struct msghdr data; / data in MSG_FASTOPEN */
				1675	size_t size;
				1676	int copied; /* queued in tcp_connect() */
				1677	struct ubuf_info *uarg;
				1678	};
				1679	void tcp_free_fastopen_req(struct tcp_sock *tp);
				1680	void tcp_fastopen_destroy_cipher(struct sock *sk);
				1681	void tcp_fastopen_ctx_destroy(struct net *net);
				1682	int tcp_fastopen_reset_cipher(struct net net, struct sock sk,
				1683	void primary_key, void backup_key);
				1684	int tcp_fastopen_get_cipher(struct net net, struct inet_connection_sock icsk,
				1685	u64 *key);
				1686	void tcp_fastopen_add_skb(struct sock sk, struct sk_buff skb);
				1687	struct sock tcp_try_fastopen(struct sock sk, struct sk_buff *skb,
				1688	struct request_sock *req,
				1689	struct tcp_fastopen_cookie *foc,
				1690	const struct dst_entry *dst);
				1691	void tcp_fastopen_init_key_once(struct net *net);
				1692	bool tcp_fastopen_cookie_check(struct sock sk, u16 mss,
				1693	struct tcp_fastopen_cookie *cookie);
				1694	bool tcp_fastopen_defer_connect(struct sock sk, int err);
				1695	#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t)
				1696	#define TCP_FASTOPEN_KEY_MAX 2
				1697	#define TCP_FASTOPEN_KEY_BUF_LENGTH \
				1698	(TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
				1699
				1700	/* Fastopen key context */
				1701	struct tcp_fastopen_context {
				1702	siphash_key_t key[TCP_FASTOPEN_KEY_MAX];
				1703	int num;
				1704	struct rcu_head rcu;
				1705	};
				1706
				1707	extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
				1708	void tcp_fastopen_active_disable(struct sock *sk);
				1709	bool tcp_fastopen_active_should_disable(struct sock *sk);
				1710	void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
				1711	void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
				1712
				1713	/* Caller needs to wrap with rcu_read_(un)lock() */
				1714	static inline
				1715	struct tcp_fastopen_context tcp_fastopen_get_ctx(const struct sock sk)
				1716	{
				1717	struct tcp_fastopen_context *ctx;
				1718
				1719	ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
				1720	if (!ctx)
				1721	ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
				1722	return ctx;
				1723	}
				1724
				1725	static inline
				1726	bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
				1727	const struct tcp_fastopen_cookie *orig)
				1728	{
				1729	if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
				1730	orig->len == foc->len &&
				1731	!memcmp(orig->val, foc->val, foc->len))
				1732	return true;
				1733	return false;
				1734	}
				1735
				1736	static inline
				1737	int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
				1738	{
				1739	return ctx->num;
				1740	}
				1741
				1742	/* Latencies incurred by various limits for a sender. They are
				1743	* chronograph-like stats that are mutually exclusive.
				1744	*/
				1745	enum tcp_chrono {
				1746	TCP_CHRONO_UNSPEC,
				1747	TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */
				1748	TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */
				1749	TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */
				1750	__TCP_CHRONO_MAX,
				1751	};
				1752
				1753	void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
				1754	void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
				1755
				1756	/* This helper is needed, because skb->tcp_tsorted_anchor uses
				1757	* the same memory storage than skb->destructor/_skb_refdst
				1758	*/
				1759	static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
				1760	{
				1761	skb->destructor = NULL;
				1762	skb->_skb_refdst = 0UL;
				1763	}
				1764
				1765	#define tcp_skb_tsorted_save(skb) { \
				1766	unsigned long _save = skb->_skb_refdst; \
				1767	skb->_skb_refdst = 0UL;
				1768
				1769	#define tcp_skb_tsorted_restore(skb) \
				1770	skb->_skb_refdst = _save; \
				1771	}
				1772
				1773	void tcp_write_queue_purge(struct sock *sk);
				1774
				1775	static inline struct sk_buff tcp_rtx_queue_head(const struct sock sk)
				1776	{
				1777	return skb_rb_first(&sk->tcp_rtx_queue);
				1778	}
				1779
				1780	static inline struct sk_buff tcp_rtx_queue_tail(const struct sock sk)
				1781	{
				1782	return skb_rb_last(&sk->tcp_rtx_queue);
				1783	}
				1784
				1785	static inline struct sk_buff tcp_write_queue_head(const struct sock sk)
				1786	{
				1787	return skb_peek(&sk->sk_write_queue);
				1788	}
				1789
				1790	static inline struct sk_buff tcp_write_queue_tail(const struct sock sk)
				1791	{
				1792	return skb_peek_tail(&sk->sk_write_queue);
				1793	}
				1794
				1795	#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
				1796	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
				1797
				1798	static inline struct sk_buff tcp_send_head(const struct sock sk)
				1799	{
				1800	return skb_peek(&sk->sk_write_queue);
				1801	}
				1802
				1803	static inline bool tcp_skb_is_last(const struct sock *sk,
				1804	const struct sk_buff *skb)
				1805	{
				1806	return skb_queue_is_last(&sk->sk_write_queue, skb);
				1807	}
				1808
				1809	static inline bool tcp_write_queue_empty(const struct sock *sk)
				1810	{
				1811	return skb_queue_empty(&sk->sk_write_queue);
				1812	}
				1813
				1814	static inline bool tcp_rtx_queue_empty(const struct sock *sk)
				1815	{
				1816	return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
				1817	}
				1818
				1819	static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
				1820	{
				1821	return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
				1822	}
				1823
				1824	static inline void tcp_add_write_queue_tail(struct sock sk, struct sk_buff skb)
				1825	{
				1826	__skb_queue_tail(&sk->sk_write_queue, skb);
				1827
				1828	/* Queue it, remembering where we must start sending. */
				1829	if (sk->sk_write_queue.next == skb)
				1830	tcp_chrono_start(sk, TCP_CHRONO_BUSY);
				1831	}
				1832
				1833	/* Insert new before skb on the write queue of sk. */
				1834	static inline void tcp_insert_write_queue_before(struct sk_buff *new,
				1835	struct sk_buff *skb,
				1836	struct sock *sk)
				1837	{
				1838	__skb_queue_before(&sk->sk_write_queue, skb, new);
				1839	}
				1840
				1841	static inline void tcp_unlink_write_queue(struct sk_buff skb, struct sock sk)
				1842	{
				1843	tcp_skb_tsorted_anchor_cleanup(skb);
				1844	__skb_unlink(skb, &sk->sk_write_queue);
				1845	}
				1846
				1847	void tcp_rbtree_insert(struct rb_root root, struct sk_buff skb);
				1848
				1849	static inline void tcp_rtx_queue_unlink(struct sk_buff skb, struct sock sk)
				1850	{
				1851	tcp_skb_tsorted_anchor_cleanup(skb);
				1852	rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
				1853	}
				1854
				1855	static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff skb, struct sock sk)
				1856	{
				1857	list_del(&skb->tcp_tsorted_anchor);
				1858	tcp_rtx_queue_unlink(skb, sk);
				1859	sk_wmem_free_skb(sk, skb);
				1860	}
				1861
				1862	static inline void tcp_push_pending_frames(struct sock *sk)
				1863	{
				1864	if (tcp_send_head(sk)) {
				1865	struct tcp_sock *tp = tcp_sk(sk);
				1866
				1867	__tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
				1868	}
				1869	}
				1870
				1871	/* Start sequence of the skb just after the highest skb with SACKed
				1872	* bit, valid only if sacked_out > 0 or when the caller has ensured
				1873	* validity by itself.
				1874	*/
				1875	static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
				1876	{
				1877	if (!tp->sacked_out)
				1878	return tp->snd_una;
				1879
				1880	if (tp->highest_sack == NULL)
				1881	return tp->snd_nxt;
				1882
				1883	return TCP_SKB_CB(tp->highest_sack)->seq;
				1884	}
				1885
				1886	static inline void tcp_advance_highest_sack(struct sock sk, struct sk_buff skb)
				1887	{
				1888	tcp_sk(sk)->highest_sack = skb_rb_next(skb);
				1889	}
				1890
				1891	static inline struct sk_buff tcp_highest_sack(struct sock sk)
				1892	{
				1893	return tcp_sk(sk)->highest_sack;
				1894	}
				1895
				1896	static inline void tcp_highest_sack_reset(struct sock *sk)
				1897	{
				1898	tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk);
				1899	}
				1900
				1901	/* Called when old skb is about to be deleted and replaced by new skb */
				1902	static inline void tcp_highest_sack_replace(struct sock *sk,
				1903	struct sk_buff *old,
				1904	struct sk_buff *new)
				1905	{
				1906	if (old == tcp_highest_sack(sk))
				1907	tcp_sk(sk)->highest_sack = new;
				1908	}
				1909
				1910	/* This helper checks if socket has IP_TRANSPARENT set */
				1911	static inline bool inet_sk_transparent(const struct sock *sk)
				1912	{
				1913	switch (sk->sk_state) {
				1914	case TCP_TIME_WAIT:
				1915	return inet_twsk(sk)->tw_transparent;
				1916	case TCP_NEW_SYN_RECV:
				1917	return inet_rsk(inet_reqsk(sk))->no_srccheck;
				1918	}
				1919	return inet_sk(sk)->transparent;
				1920	}
				1921
				1922	/* Determines whether this is a thin stream (which may suffer from
				1923	* increased latency). Used to trigger latency-reducing mechanisms.
				1924	*/
				1925	static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
				1926	{
				1927	return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
				1928	}
				1929
				1930	/* /proc */
				1931	enum tcp_seq_states {
				1932	TCP_SEQ_STATE_LISTENING,
				1933	TCP_SEQ_STATE_ESTABLISHED,
				1934	};
				1935
				1936	void tcp_seq_start(struct seq_file seq, loff_t *pos);
				1937	void tcp_seq_next(struct seq_file seq, void v, loff_t pos);
				1938	void tcp_seq_stop(struct seq_file seq, void v);
				1939
				1940	struct tcp_seq_afinfo {
				1941	sa_family_t family;
				1942	};
				1943
				1944	struct tcp_iter_state {
				1945	struct seq_net_private p;
				1946	enum tcp_seq_states state;
				1947	struct sock *syn_wait_sk;
				1948	int bucket, offset, sbucket, num;
				1949	loff_t last_pos;
				1950	};
				1951
				1952	extern struct request_sock_ops tcp_request_sock_ops;
				1953	extern struct request_sock_ops tcp6_request_sock_ops;
				1954
				1955	void tcp_v4_destroy_sock(struct sock *sk);
				1956
				1957	struct sk_buff tcp_gso_segment(struct sk_buff skb,
				1958	netdev_features_t features);
				1959	struct sk_buff tcp_gro_receive(struct list_head head, struct sk_buff *skb);
				1960	int tcp_gro_complete(struct sk_buff *skb);
				1961
				1962	void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
				1963
				1964	static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
				1965	{
				1966	struct net net = sock_net((struct sock )tp);
				1967	u32 val;
				1968
				1969	val = READ_ONCE(tp->notsent_lowat);
				1970
				1971	return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
				1972	}
				1973
				1974	/* @wake is one when sk_stream_write_space() calls us.
				1975	* This sends EPOLLOUT only if notsent_bytes is half the limit.
				1976	* This mimics the strategy used in sock_def_write_space().
				1977	*/
				1978	static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
				1979	{
				1980	const struct tcp_sock *tp = tcp_sk(sk);
				1981	u32 notsent_bytes = READ_ONCE(tp->write_seq) -
				1982	READ_ONCE(tp->snd_nxt);
				1983
				1984	return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
				1985	}
				1986
				1987	#ifdef CONFIG_PROC_FS
				1988	int tcp4_proc_init(void);
				1989	void tcp4_proc_exit(void);
				1990	#endif
				1991
				1992	int tcp_rtx_synack(const struct sock sk, struct request_sock req);
				1993	int tcp_conn_request(struct request_sock_ops *rsk_ops,
				1994	const struct tcp_request_sock_ops *af_ops,
				1995	struct sock sk, struct sk_buff skb);
				1996
				1997	/* TCP af-specific functions */
				1998	struct tcp_sock_af_ops {
				1999	#ifdef CONFIG_TCP_MD5SIG
				2000	struct tcp_md5sig_key (md5_lookup) (const struct sock *sk,
				2001	const struct sock *addr_sk);
				2002	int (calc_md5_hash)(char location,
				2003	const struct tcp_md5sig_key *md5,
				2004	const struct sock *sk,
				2005	const struct sk_buff *skb);
				2006	int (md5_parse)(struct sock sk,
				2007	int optname,
				2008	char __user *optval,
				2009	int optlen);
				2010	#endif
				2011	};
				2012
				2013	struct tcp_request_sock_ops {
				2014	u16 mss_clamp;
				2015	#ifdef CONFIG_TCP_MD5SIG
				2016	struct tcp_md5sig_key (req_md5_lookup)(const struct sock *sk,
				2017	const struct sock *addr_sk);
				2018	int (calc_md5_hash) (char location,
				2019	const struct tcp_md5sig_key *md5,
				2020	const struct sock *sk,
				2021	const struct sk_buff *skb);
				2022	#endif
				2023	void (init_req)(struct request_sock req,
				2024	const struct sock *sk_listener,
				2025	struct sk_buff *skb);
				2026	#ifdef CONFIG_SYN_COOKIES
				2027	__u32 (cookie_init_seq)(const struct sk_buff skb,
				2028	__u16 *mss);
				2029	#endif
				2030	struct dst_entry (route_req)(const struct sock sk, struct flowi fl,
				2031	const struct request_sock *req);
				2032	u32 (init_seq)(const struct sk_buff skb);
				2033	u32 (init_ts_off)(const struct net net, const struct sk_buff *skb);
				2034	int (send_synack)(const struct sock sk, struct dst_entry *dst,
				2035	struct flowi fl, struct request_sock req,
				2036	struct tcp_fastopen_cookie *foc,
				2037	enum tcp_synack_type synack_type);
				2038	};
				2039
				2040	extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
				2041	#if IS_ENABLED(CONFIG_IPV6)
				2042	extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
				2043	#endif
				2044
				2045	#ifdef CONFIG_SYN_COOKIES
				2046	static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
				2047	const struct sock sk, struct sk_buff skb,
				2048	__u16 *mss)
				2049	{
				2050	tcp_synq_overflow(sk);
				2051	__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
				2052	return ops->cookie_init_seq(skb, mss);
				2053	}
				2054	#else
				2055	static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
				2056	const struct sock sk, struct sk_buff skb,
				2057	__u16 *mss)
				2058	{
				2059	return 0;
				2060	}
				2061	#endif
				2062
				2063	int tcpv4_offload_init(void);
				2064
				2065	void tcp_v4_init(void);
				2066	void tcp_init(void);
				2067
				2068	/* tcp_recovery.c */
				2069	void tcp_mark_skb_lost(struct sock sk, struct sk_buff skb);
				2070	void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
				2071	extern s32 tcp_rack_skb_timeout(struct tcp_sock tp, struct sk_buff skb,
				2072	u32 reo_wnd);
				2073	extern bool tcp_rack_mark_lost(struct sock *sk);
				2074	extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
				2075	u64 xmit_time);
				2076	extern void tcp_rack_reo_timeout(struct sock *sk);
				2077	extern void tcp_rack_update_reo_wnd(struct sock sk, struct rate_sample rs);
				2078
				2079	/* At how many usecs into the future should the RTO fire? */
				2080	static inline s64 tcp_rto_delta_us(const struct sock *sk)
				2081	{
				2082	const struct sk_buff *skb = tcp_rtx_queue_head(sk);
				2083	u32 rto = inet_csk(sk)->icsk_rto;
				2084
				2085	if (likely(skb)) {
				2086	u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
				2087
				2088	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
				2089	} else {
				2090	WARN_ONCE(1,
				2091	"rtx queue emtpy: "
				2092	"out:%u sacked:%u lost:%u retrans:%u "
				2093	"tlp_high_seq:%u sk_state:%u ca_state:%u "
				2094	"advmss:%u mss_cache:%u pmtu:%u\n",
				2095	tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
				2096	tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
				2097	tcp_sk(sk)->tlp_high_seq, sk->sk_state,
				2098	inet_csk(sk)->icsk_ca_state,
				2099	tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
				2100	inet_csk(sk)->icsk_pmtu_cookie);
				2101	return jiffies_to_usecs(rto);
				2102	}
				2103
				2104	}
				2105
				2106	/*
				2107	* Save and compile IPv4 options, return a pointer to it
				2108	*/
				2109	static inline struct ip_options_rcu tcp_v4_save_options(struct net net,
				2110	struct sk_buff *skb)
				2111	{
				2112	const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
				2113	struct ip_options_rcu *dopt = NULL;
				2114
				2115	if (opt->optlen) {
				2116	int opt_size = sizeof(*dopt) + opt->optlen;
				2117
				2118	dopt = kmalloc(opt_size, GFP_ATOMIC);
				2119	if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
				2120	kfree(dopt);
				2121	dopt = NULL;
				2122	}
				2123	}
				2124	return dopt;
				2125	}
				2126
				2127	/* locally generated TCP pure ACKs have skb->truesize == 2
				2128	* (check tcp_send_ack() in net/ipv4/tcp_output.c )
				2129	* This is much faster than dissecting the packet to find out.
				2130	* (Think of GRE encapsulations, IPv4, IPv6, ...)
				2131	*/
				2132	static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb)
				2133	{
				2134	return skb->truesize == 2;
				2135	}
				2136
				2137	static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
				2138	{
				2139	skb->truesize = 2;
				2140	}
				2141
				2142	static inline int tcp_inq(struct sock *sk)
				2143	{
				2144	struct tcp_sock *tp = tcp_sk(sk);
				2145	int answ;
				2146
				2147	if ((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV)) {
				2148	answ = 0;
				2149	} else if (sock_flag(sk, SOCK_URGINLINE) \|\|
				2150	!tp->urg_data \|\|
				2151	before(tp->urg_seq, tp->copied_seq) \|\|
				2152	!before(tp->urg_seq, tp->rcv_nxt)) {
				2153
				2154	answ = tp->rcv_nxt - tp->copied_seq;
				2155
				2156	/* Subtract 1, if FIN was received */
				2157	if (answ && sock_flag(sk, SOCK_DONE))
				2158	answ--;
				2159	} else {
				2160	answ = tp->urg_seq - tp->copied_seq;
				2161	}
				2162
				2163	return answ;
				2164	}
				2165
				2166	int tcp_peek_len(struct socket *sock);
				2167
				2168	static inline void tcp_segs_in(struct tcp_sock tp, const struct sk_buff skb)
				2169	{
				2170	u16 segs_in;
				2171
				2172	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
				2173	tp->segs_in += segs_in;
				2174	if (skb->len > tcp_hdrlen(skb))
				2175	tp->data_segs_in += segs_in;
				2176	}
				2177
				2178	/*
				2179	* TCP listen path runs lockless.
				2180	* We forced "struct sock" to be const qualified to make sure
				2181	* we don't modify one of its field by mistake.
				2182	* Here, we increment sk_drops which is an atomic_t, so we can safely
				2183	* make sock writable again.
				2184	*/
				2185	static inline void tcp_listendrop(const struct sock *sk)
				2186	{
				2187	atomic_inc(&((struct sock *)sk)->sk_drops);
				2188	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
				2189	}
				2190
				2191	enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
				2192
				2193	/*
				2194	* Interface for adding Upper Level Protocols over TCP
				2195	*/
				2196
				2197	#define TCP_ULP_NAME_MAX 16
				2198	#define TCP_ULP_MAX 128
				2199	#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
				2200
				2201	struct tcp_ulp_ops {
				2202	struct list_head list;
				2203
				2204	/* initialize ulp */
				2205	int (init)(struct sock sk);
				2206	/* update ulp */
				2207	void (update)(struct sock sk, struct proto *p,
				2208	void (write_space)(struct sock sk));
				2209	/* cleanup ulp */
				2210	void (release)(struct sock sk);
				2211	/* diagnostic */
				2212	int (get_info)(const struct sock sk, struct sk_buff *skb);
				2213	size_t (get_info_size)(const struct sock sk);
				2214
				2215	char name[TCP_ULP_NAME_MAX];
				2216	struct module *owner;
				2217	};
				2218	int tcp_register_ulp(struct tcp_ulp_ops *type);
				2219	void tcp_unregister_ulp(struct tcp_ulp_ops *type);
				2220	int tcp_set_ulp(struct sock sk, const char name);
				2221	void tcp_get_available_ulp(char *buf, size_t len);
				2222	void tcp_cleanup_ulp(struct sock *sk);
				2223	void tcp_update_ulp(struct sock sk, struct proto p,
				2224	void (write_space)(struct sock sk));
				2225
				2226	#define MODULE_ALIAS_TCP_ULP(name) \
				2227	__MODULE_INFO(alias, alias_userspace, name); \
				2228	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
				2229
				2230	struct sk_msg;
				2231	struct sk_psock;
				2232
				2233	int tcp_bpf_init(struct sock *sk);
				2234	void tcp_bpf_reinit(struct sock *sk);
				2235	int tcp_bpf_sendmsg_redir(struct sock sk, struct sk_msg msg, u32 bytes,
				2236	int flags);
				2237	int tcp_bpf_recvmsg(struct sock sk, struct msghdr msg, size_t len,
				2238	int nonblock, int flags, int *addr_len);
				2239	int __tcp_bpf_recvmsg(struct sock sk, struct sk_psock psock,
				2240	struct msghdr *msg, int len, int flags);
				2241
				2242	/* Call BPF_SOCK_OPS program that returns an int. If the return value
				2243	* is < 0, then the BPF op failed (for example if the loaded BPF
				2244	* program does not support the chosen operation or there is no BPF
				2245	* program loaded).
				2246	*/
				2247	#ifdef CONFIG_BPF
				2248	static inline int tcp_call_bpf(struct sock sk, int op, u32 nargs, u32 args)
				2249	{
				2250	struct bpf_sock_ops_kern sock_ops;
				2251	int ret;
				2252
				2253	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
				2254	if (sk_fullsock(sk)) {
				2255	sock_ops.is_fullsock = 1;
				2256	sock_owned_by_me(sk);
				2257	}
				2258
				2259	sock_ops.sk = sk;
				2260	sock_ops.op = op;
				2261	if (nargs > 0)
				2262	memcpy(sock_ops.args, args, nargs * sizeof(*args));
				2263
				2264	ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
				2265	if (ret == 0)
				2266	ret = sock_ops.reply;
				2267	else
				2268	ret = -1;
				2269	return ret;
				2270	}
				2271
				2272	static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
				2273	{
				2274	u32 args[2] = {arg1, arg2};
				2275
				2276	return tcp_call_bpf(sk, op, 2, args);
				2277	}
				2278
				2279	static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
				2280	u32 arg3)
				2281	{
				2282	u32 args[3] = {arg1, arg2, arg3};
				2283
				2284	return tcp_call_bpf(sk, op, 3, args);
				2285	}
				2286
				2287	#else
				2288	static inline int tcp_call_bpf(struct sock sk, int op, u32 nargs, u32 args)
				2289	{
				2290	return -EPERM;
				2291	}
				2292
				2293	static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
				2294	{
				2295	return -EPERM;
				2296	}
				2297
				2298	static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
				2299	u32 arg3)
				2300	{
				2301	return -EPERM;
				2302	}
				2303
				2304	#endif
				2305
				2306	static inline u32 tcp_timeout_init(struct sock *sk)
				2307	{
				2308	int timeout;
				2309
				2310	timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
				2311
				2312	if (timeout <= 0)
				2313	timeout = TCP_TIMEOUT_INIT;
				2314	return timeout;
				2315	}
				2316
				2317	static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
				2318	{
				2319	int rwnd;
				2320
				2321	rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
				2322
				2323	if (rwnd < 0)
				2324	rwnd = 0;
				2325	return rwnd;
				2326	}
				2327
				2328	static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
				2329	{
				2330	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
				2331	}
				2332
				2333	static inline void tcp_bpf_rtt(struct sock *sk)
				2334	{
				2335	if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG))
				2336	tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
				2337	}
				2338
				2339	#if IS_ENABLED(CONFIG_SMC)
				2340	extern struct static_key_false tcp_have_smc;
				2341	#endif
				2342
				2343	#if IS_ENABLED(CONFIG_TLS_DEVICE)
				2344	void clean_acked_data_enable(struct inet_connection_sock *icsk,
				2345	void (cad)(struct sock sk, u32 ack_seq));
				2346	void clean_acked_data_disable(struct inet_connection_sock *icsk);
				2347	void clean_acked_data_flush(void);
				2348	#endif
				2349
				2350	DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
				2351	static inline void tcp_add_tx_delay(struct sk_buff *skb,
				2352	const struct tcp_sock *tp)
				2353	{
				2354	if (static_branch_unlikely(&tcp_tx_delay_enabled))
				2355	skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC;
				2356	}
				2357
				2358	/* Compute Earliest Departure Time for some control packets
				2359	* like ACK or RST for TIME_WAIT or non ESTABLISHED sockets.
				2360	*/
				2361	static inline u64 tcp_transmit_time(const struct sock *sk)
				2362	{
				2363	if (static_branch_unlikely(&tcp_tx_delay_enabled)) {
				2364	u32 delay = (sk->sk_state == TCP_TIME_WAIT) ?
				2365	tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay;
				2366
				2367	return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC;
				2368	}
				2369	return 0;
				2370	}
				2371
				2372	#endif /* _TCP_H */