Blame - marvell/linux/net/ipv4/tcp_timer.c - T108

blob: 9740f2989f284b650834aa5ac8e2f51884b779d0 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* Implementation of the Transmission Control Protocol(TCP).
				8	*
				9	* Authors: Ross Biro
				10	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				11	* Mark Evans, <evansmp@uhura.aston.ac.uk>
				12	* Corey Minyard <wf-rch!minyard@relay.EU.net>
				13	* Florian La Roche, <flla@stud.uni-sb.de>
				14	* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
				15	* Linus Torvalds, <torvalds@cs.helsinki.fi>
				16	* Alan Cox, <gw4pts@gw4pts.ampr.org>
				17	* Matthew Dillon, <dillon@apollo.west.oic.com>
				18	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				19	* Jorge Cwik, <jorge@laser.satlink.net>
				20	*/
				21
				22	#include <linux/module.h>
				23	#include <linux/gfp.h>
				24	#include <net/tcp.h>
				25
				26	static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
				27	{
				28	struct inet_connection_sock *icsk = inet_csk(sk);
				29	u32 elapsed, start_ts;
				30	s32 remaining;
				31
				32	start_ts = tcp_sk(sk)->retrans_stamp;
				33	if (!icsk->icsk_user_timeout)
				34	return icsk->icsk_rto;
				35	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
				36	remaining = icsk->icsk_user_timeout - elapsed;
				37	if (remaining <= 0)
				38	return 1; /* user timeout has passed; fire ASAP */
				39
				40	return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
				41	}
				42
				43	u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
				44	{
				45	struct inet_connection_sock *icsk = inet_csk(sk);
				46	u32 remaining;
				47	s32 elapsed;
				48
				49	if (!icsk->icsk_user_timeout \|\| !icsk->icsk_probes_tstamp)
				50	return when;
				51
				52	elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
				53	if (unlikely(elapsed < 0))
				54	elapsed = 0;
				55	remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
				56	remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
				57
				58	return min_t(u32, remaining, when);
				59	}
				60
				61	/**
				62	* tcp_write_err() - close socket and save error info
				63	* @sk: The socket the error has appeared on.
				64	*
				65	* Returns: Nothing (void)
				66	*/
				67
				68	static void tcp_write_err(struct sock *sk)
				69	{
				70	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
				71	sk->sk_error_report(sk);
				72
				73	tcp_write_queue_purge(sk);
				74	tcp_done(sk);
				75	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
				76	}
				77
				78	/**
				79	* tcp_out_of_resources() - Close socket if out of resources
				80	* @sk: pointer to current socket
				81	* @do_reset: send a last packet with reset flag
				82	*
				83	* Do not allow orphaned sockets to eat all our resources.
				84	* This is direct violation of TCP specs, but it is required
				85	* to prevent DoS attacks. It is called when a retransmission timeout
				86	* or zero probe timeout occurs on orphaned socket.
				87	*
				88	* Also close if our net namespace is exiting; in that case there is no
				89	* hope of ever communicating again since all netns interfaces are already
				90	* down (or about to be down), and we need to release our dst references,
				91	* which have been moved to the netns loopback interface, so the namespace
				92	* can finish exiting. This condition is only possible if we are a kernel
				93	* socket, as those do not hold references to the namespace.
				94	*
				95	* Criteria is still not confirmed experimentally and may change.
				96	* We kill the socket, if:
				97	* 1. If number of orphaned sockets exceeds an administratively configured
				98	* limit.
				99	* 2. If we have strong memory pressure.
				100	* 3. If our net namespace is exiting.
				101	*/
				102	static int tcp_out_of_resources(struct sock *sk, bool do_reset)
				103	{
				104	struct tcp_sock *tp = tcp_sk(sk);
				105	int shift = 0;
				106
				107	/* If peer does not open window for long time, or did not transmit
				108	* anything for long time, penalize it. */
				109	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX \|\| !do_reset)
				110	shift++;
				111
				112	/* If some dubious ICMP arrived, penalize even more. */
				113	if (sk->sk_err_soft)
				114	shift++;
				115
				116	if (tcp_check_oom(sk, shift)) {
				117	/* Catch exceptional cases, when connection requires reset.
				118	* 1. Last segment was sent recently. */
				119	if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN \|\|
				120	/* 2. Window is closed. */
				121	(!tp->snd_wnd && !tp->packets_out))
				122	do_reset = true;
				123	if (do_reset)
				124	tcp_send_active_reset(sk, GFP_ATOMIC);
				125	tcp_done(sk);
				126	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
				127	return 1;
				128	}
				129
				130	if (!check_net(sock_net(sk))) {
				131	/* Not possible to send reset; just close */
				132	tcp_done(sk);
				133	return 1;
				134	}
				135
				136	return 0;
				137	}
				138
				139	/**
				140	* tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
				141	* @sk: Pointer to the current socket.
				142	* @alive: bool, socket alive state
				143	*/
				144	static int tcp_orphan_retries(struct sock *sk, bool alive)
				145	{
				146	int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
				147
				148	/* We know from an ICMP that something is wrong. */
				149	if (sk->sk_err_soft && !alive)
				150	retries = 0;
				151
				152	/* However, if socket sent something recently, select some safe
				153	* number of retries. 8 corresponds to >100 seconds with minimal
				154	* RTO of 200msec. */
				155	if (retries == 0 && alive)
				156	retries = 8;
				157	return retries;
				158	}
				159
				160	static void tcp_mtu_probing(struct inet_connection_sock icsk, struct sock sk)
				161	{
				162	const struct net *net = sock_net(sk);
				163	int mss;
				164
				165	/* Black hole detection */
				166	if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
				167	return;
				168
				169	if (!icsk->icsk_mtup.enabled) {
				170	icsk->icsk_mtup.enabled = 1;
				171	icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
				172	} else {
				173	mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
				174	mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
				175	mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
				176	mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
				177	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
				178	}
				179	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
				180	}
				181
				182	static unsigned int tcp_model_timeout(struct sock *sk,
				183	unsigned int boundary,
				184	unsigned int rto_base)
				185	{
				186	unsigned int linear_backoff_thresh, timeout;
				187
				188	linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
				189	if (boundary <= linear_backoff_thresh)
				190	timeout = ((2 << boundary) - 1) * rto_base;
				191	else
				192	timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
				193	(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
				194	return jiffies_to_msecs(timeout);
				195	}
				196	/**
				197	* retransmits_timed_out() - returns true if this connection has timed out
				198	* @sk: The current socket
				199	* @boundary: max number of retransmissions
				200	* @timeout: A custom timeout value.
				201	* If set to 0 the default timeout is calculated and used.
				202	* Using TCP_RTO_MIN and the number of unsuccessful retransmits.
				203	*
				204	* The default "timeout" value this function can calculate and use
				205	* is equivalent to the timeout of a TCP Connection
				206	* after "boundary" unsuccessful, exponentially backed-off
				207	* retransmissions with an initial RTO of TCP_RTO_MIN.
				208	*/
				209	static bool retransmits_timed_out(struct sock *sk,
				210	unsigned int boundary,
				211	unsigned int timeout)
				212	{
				213	unsigned int start_ts;
				214
				215	if (!inet_csk(sk)->icsk_retransmits)
				216	return false;
				217
				218	start_ts = tcp_sk(sk)->retrans_stamp;
				219	if (likely(timeout == 0)) {
				220	unsigned int rto_base = TCP_RTO_MIN;
				221
				222	if ((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV))
				223	rto_base = tcp_timeout_init(sk);
				224	timeout = tcp_model_timeout(sk, boundary, rto_base);
				225	}
				226
				227	return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
				228	}
				229
				230	/* A write timeout has occurred. Process the after effects. */
				231	static int tcp_write_timeout(struct sock *sk)
				232	{
				233	struct inet_connection_sock *icsk = inet_csk(sk);
				234	struct tcp_sock *tp = tcp_sk(sk);
				235	struct net *net = sock_net(sk);
				236	bool expired = false, do_reset;
				237	int retry_until;
				238
				239	if ((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV)) {
				240	if (icsk->icsk_retransmits) {
				241	dst_negative_advice(sk);
				242	} else {
				243	sk_rethink_txhash(sk);
				244	}
				245	retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
				246	expired = icsk->icsk_retransmits >= retry_until;
				247	} else {
				248	if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
				249	/* Black hole detection */
				250	tcp_mtu_probing(icsk, sk);
				251
				252	dst_negative_advice(sk);
				253	} else {
				254	sk_rethink_txhash(sk);
				255	}
				256
				257	retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
				258	if (sock_flag(sk, SOCK_DEAD)) {
				259	const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
				260
				261	retry_until = tcp_orphan_retries(sk, alive);
				262	do_reset = alive \|\|
				263	!retransmits_timed_out(sk, retry_until, 0);
				264
				265	if (tcp_out_of_resources(sk, do_reset))
				266	return 1;
				267	}
				268	}
				269	if (!expired)
				270	expired = retransmits_timed_out(sk, retry_until,
				271	icsk->icsk_user_timeout);
				272	tcp_fastopen_active_detect_blackhole(sk, expired);
				273
				274	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
				275	tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
				276	icsk->icsk_retransmits,
				277	icsk->icsk_rto, (int)expired);
				278
				279	if (expired) {
				280	/* Has it gone just too far? */
				281	tcp_write_err(sk);
				282	return 1;
				283	}
				284
				285	return 0;
				286	}
				287
				288	/* Called with BH disabled */
				289	void tcp_delack_timer_handler(struct sock *sk)
				290	{
				291	struct inet_connection_sock *icsk = inet_csk(sk);
				292
				293	sk_mem_reclaim_partial(sk);
				294
				295	if (((1 << sk->sk_state) & (TCPF_CLOSE \| TCPF_LISTEN)) \|\|
				296	!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
				297	goto out;
				298
				299	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
				300	sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
				301	goto out;
				302	}
				303	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
				304
				305	if (inet_csk_ack_scheduled(sk)) {
				306	if (!inet_csk_in_pingpong_mode(sk)) {
				307	/* Delayed ACK missed: inflate ATO. */
				308	icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
				309	} else {
				310	/* Delayed ACK missed: leave pingpong mode and
				311	* deflate ATO.
				312	*/
				313	inet_csk_exit_pingpong_mode(sk);
				314	icsk->icsk_ack.ato = TCP_ATO_MIN;
				315	}
				316	tcp_mstamp_refresh(tcp_sk(sk));
				317	tcp_send_ack(sk);
				318	__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
				319	}
				320
				321	out:
				322	if (tcp_under_memory_pressure(sk))
				323	sk_mem_reclaim(sk);
				324	}
				325
				326
				327	/**
				328	* tcp_delack_timer() - The TCP delayed ACK timeout handler
				329	* @data: Pointer to the current socket. (gets casted to struct sock *)
				330	*
				331	* This function gets (indirectly) called when the kernel timer for a TCP packet
				332	* of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
				333	*
				334	* Returns: Nothing (void)
				335	*/
				336	static void tcp_delack_timer(struct timer_list *t)
				337	{
				338	struct inet_connection_sock *icsk =
				339	from_timer(icsk, t, icsk_delack_timer);
				340	struct sock *sk = &icsk->icsk_inet.sk;
				341
				342	bh_lock_sock(sk);
				343	if (!sock_owned_by_user(sk)) {
				344	tcp_delack_timer_handler(sk);
				345	} else {
				346	icsk->icsk_ack.blocked = 1;
				347	__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
				348	/* deleguate our work to tcp_release_cb() */
				349	if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
				350	sock_hold(sk);
				351	}
				352	bh_unlock_sock(sk);
				353	sock_put(sk);
				354	}
				355
				356	static void tcp_probe_timer(struct sock *sk)
				357	{
				358	struct inet_connection_sock *icsk = inet_csk(sk);
				359	struct sk_buff *skb = tcp_send_head(sk);
				360	struct tcp_sock *tp = tcp_sk(sk);
				361	int max_probes;
				362
				363	if (tp->packets_out \|\| !skb) {
				364	icsk->icsk_probes_out = 0;
				365	icsk->icsk_probes_tstamp = 0;
				366	return;
				367	}
				368
				369	/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
				370	* long as the receiver continues to respond probes. We support this by
				371	* default and reset icsk_probes_out with incoming ACKs. But if the
				372	* socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
				373	* kill the socket when the retry count and the time exceeds the
				374	* corresponding system limit. We also implement similar policy when
				375	* we use RTO to probe window in tcp_retransmit_timer().
				376	*/
				377	if (!icsk->icsk_probes_tstamp)
				378	icsk->icsk_probes_tstamp = tcp_jiffies32;
				379	else if (icsk->icsk_user_timeout &&
				380	(s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
				381	msecs_to_jiffies(icsk->icsk_user_timeout))
				382	goto abort;
				383
				384	max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
				385	if (sock_flag(sk, SOCK_DEAD)) {
				386	const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
				387
				388	max_probes = tcp_orphan_retries(sk, alive);
				389	if (!alive && icsk->icsk_backoff >= max_probes)
				390	goto abort;
				391	if (tcp_out_of_resources(sk, true))
				392	return;
				393	}
				394
				395	if (icsk->icsk_probes_out >= max_probes) {
				396	abort: tcp_write_err(sk);
				397	} else {
				398	/* Only send another probe if we didn't close things up. */
				399	tcp_send_probe0(sk);
				400	}
				401	}
				402
				403	/*
				404	* Timer for Fast Open socket to retransmit SYNACK. Note that the
				405	* sk here is the child socket, not the parent (listener) socket.
				406	*/
				407	static void tcp_fastopen_synack_timer(struct sock sk, struct request_sock req)
				408	{
				409	struct inet_connection_sock *icsk = inet_csk(sk);
				410	int max_retries = icsk->icsk_syn_retries ? :
				411	sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
				412	struct tcp_sock *tp = tcp_sk(sk);
				413
				414	req->rsk_ops->syn_ack_timeout(req);
				415
				416	if (req->num_timeout >= max_retries) {
				417	tcp_write_err(sk);
				418	return;
				419	}
				420	/* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */
				421	if (icsk->icsk_retransmits == 1)
				422	tcp_enter_loss(sk);
				423	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
				424	* returned from rtx_syn_ack() to make it more persistent like
				425	* regular retransmit because if the child socket has been accepted
				426	* it's not good to give up too easily.
				427	*/
				428	inet_rtx_syn_ack(sk, req);
				429	req->num_timeout++;
				430	icsk->icsk_retransmits++;
				431	if (!tp->retrans_stamp)
				432	tp->retrans_stamp = tcp_time_stamp(tp);
				433	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				434	TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
				435	}
				436
				437	static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
				438	const struct sk_buff *skb)
				439	{
				440	const struct inet_connection_sock *icsk = inet_csk(sk);
				441	u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
				442	const struct tcp_sock *tp = tcp_sk(sk);
				443	int timeout = TCP_RTO_MAX * 2;
				444	u32 rtx_delta;
				445	s32 rcv_delta;
				446
				447	rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
				448	(tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
				449
				450	if (user_timeout) {
				451	/* If user application specified a TCP_USER_TIMEOUT,
				452	* it does not want win 0 packets to 'reset the timer'
				453	* while retransmits are not making progress.
				454	*/
				455	if (rtx_delta > user_timeout)
				456	return true;
				457	timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
				458	}
				459
				460	/* Note: timer interrupt might have been delayed by at least one jiffy,
				461	* and tp->rcv_tstamp might very well have been written recently.
				462	* rcv_delta can thus be negative.
				463	*/
				464	rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
				465	if (rcv_delta <= timeout)
				466	return false;
				467
				468	return rtx_delta > timeout;
				469	}
				470
				471	/**
				472	* tcp_retransmit_timer() - The TCP retransmit timeout handler
				473	* @sk: Pointer to the current socket.
				474	*
				475	* This function gets called when the kernel timer for a TCP packet
				476	* of this socket expires.
				477	*
				478	* It handles retransmission, timer adjustment and other necesarry measures.
				479	*
				480	* Returns: Nothing (void)
				481	*/
				482	void tcp_retransmit_timer(struct sock *sk)
				483	{
				484	struct tcp_sock *tp = tcp_sk(sk);
				485	struct net *net = sock_net(sk);
				486	struct inet_connection_sock *icsk = inet_csk(sk);
				487	struct request_sock *req;
				488	struct sk_buff *skb;
				489
				490	req = rcu_dereference_protected(tp->fastopen_rsk,
				491	lockdep_sock_is_held(sk));
				492	if (req) {
				493	WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
				494	sk->sk_state != TCP_FIN_WAIT1);
				495	tcp_fastopen_synack_timer(sk, req);
				496	/* Before we receive ACK to our SYN-ACK don't retransmit
				497	* anything else (e.g., data or FIN segments).
				498	*/
				499	return;
				500	}
				501
				502	if (!tp->packets_out)
				503	return;
				504
				505	skb = tcp_rtx_queue_head(sk);
				506	if (WARN_ON_ONCE(!skb))
				507	return;
				508
				509	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
				510	!((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV))) {
				511	/* Receiver dastardly shrinks window. Our retransmits
				512	* become zero probes, but we should not timeout this
				513	* connection. If the socket is an orphan, time it out,
				514	* we cannot allow such beasts to hang infinitely.
				515	*/
				516	struct inet_sock *inet = inet_sk(sk);
				517	if (sk->sk_family == AF_INET) {
				518	net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
				519	&inet->inet_daddr,
				520	ntohs(inet->inet_dport),
				521	inet->inet_num,
				522	tp->snd_una, tp->snd_nxt);
				523	}
				524	#if IS_ENABLED(CONFIG_IPV6)
				525	else if (sk->sk_family == AF_INET6) {
				526	net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
				527	&sk->sk_v6_daddr,
				528	ntohs(inet->inet_dport),
				529	inet->inet_num,
				530	tp->snd_una, tp->snd_nxt);
				531	}
				532	#endif
				533	if (tcp_rtx_probe0_timed_out(sk, skb)) {
				534	tcp_write_err(sk);
				535	goto out;
				536	}
				537	tcp_enter_loss(sk);
				538	tcp_retransmit_skb(sk, skb, 1);
				539	__sk_dst_reset(sk);
				540	goto out_reset_timer;
				541	}
				542
				543	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
				544	if (tcp_write_timeout(sk))
				545	goto out;
				546
				547	if (icsk->icsk_retransmits == 0) {
				548	int mib_idx = 0;
				549
				550	if (icsk->icsk_ca_state == TCP_CA_Recovery) {
				551	if (tcp_is_sack(tp))
				552	mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
				553	else
				554	mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
				555	} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
				556	mib_idx = LINUX_MIB_TCPLOSSFAILURES;
				557	} else if ((icsk->icsk_ca_state == TCP_CA_Disorder) \|\|
				558	tp->sacked_out) {
				559	if (tcp_is_sack(tp))
				560	mib_idx = LINUX_MIB_TCPSACKFAILURES;
				561	else
				562	mib_idx = LINUX_MIB_TCPRENOFAILURES;
				563	}
				564	if (mib_idx)
				565	__NET_INC_STATS(sock_net(sk), mib_idx);
				566	}
				567
				568	tcp_enter_loss(sk);
				569
				570	icsk->icsk_retransmits++;
				571	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
				572	/* Retransmission failed because of local congestion,
				573	* Let senders fight for local resources conservatively.
				574	*/
				575	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				576	TCP_RESOURCE_PROBE_INTERVAL,
				577	TCP_RTO_MAX);
				578	goto out;
				579	}
				580
				581	/* Increase the timeout each time we retransmit. Note that
				582	* we do not increase the rtt estimate. rto is initialized
				583	* from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
				584	* that doubling rto each time is the least we can get away with.
				585	* In KA9Q, Karn uses this for the first few times, and then
				586	* goes to quadratic. netBSD doubles, but only goes up to *64,
				587	* and clamps at 1 to 64 sec afterwards. Note that 120 sec is
				588	* defined in the protocol as the maximum possible RTT. I guess
				589	* we'll have to use something other than TCP to talk to the
				590	* University of Mars.
				591	*
				592	* PAWS allows us longer timeouts and large windows, so once
				593	* implemented ftp to mars will work nicely. We will have to fix
				594	* the 120 second clamps though!
				595	*/
				596	icsk->icsk_backoff++;
				597
				598	out_reset_timer:
				599	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
				600	* used to reset timer, set to 0. Recalculate 'icsk_rto' as this
				601	* might be increased if the stream oscillates between thin and thick,
				602	* thus the old value might already be too high compared to the value
				603	* set by 'tcp_set_rto' in tcp_input.c which resets the rto without
				604	* backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
				605	* exponential backoff behaviour to avoid continue hammering
				606	* linear-timeout retransmissions into a black hole
				607	*/
				608	if (sk->sk_state == TCP_ESTABLISHED &&
				609	(tp->thin_lto \|\| READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
				610	tcp_stream_is_thin(tp) &&
				611	icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
				612	icsk->icsk_backoff = 0;
				613	icsk->icsk_rto = clamp(__tcp_set_rto(tp),
				614	tcp_rto_min(sk),
				615	TCP_RTO_MAX);
				616	} else {
				617	/* Use normal (exponential) backoff */
				618	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
				619	}
				620	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				621	tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
				622	if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
				623	__sk_dst_reset(sk);
				624
				625	out:;
				626	}
				627
				628	/* Called with bottom-half processing disabled.
				629	Called by tcp_write_timer() */
				630	void tcp_write_timer_handler(struct sock *sk)
				631	{
				632	struct inet_connection_sock *icsk = inet_csk(sk);
				633	int event;
				634
				635	if (((1 << sk->sk_state) & (TCPF_CLOSE \| TCPF_LISTEN)) \|\|
				636	!icsk->icsk_pending)
				637	goto out;
				638
				639	if (time_after(icsk->icsk_timeout, jiffies)) {
				640	sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
				641	goto out;
				642	}
				643
				644	tcp_mstamp_refresh(tcp_sk(sk));
				645	event = icsk->icsk_pending;
				646
				647	switch (event) {
				648	case ICSK_TIME_REO_TIMEOUT:
				649	tcp_rack_reo_timeout(sk);
				650	break;
				651	case ICSK_TIME_LOSS_PROBE:
				652	tcp_send_loss_probe(sk);
				653	break;
				654	case ICSK_TIME_RETRANS:
				655	icsk->icsk_pending = 0;
				656	tcp_retransmit_timer(sk);
				657	break;
				658	case ICSK_TIME_PROBE0:
				659	icsk->icsk_pending = 0;
				660	tcp_probe_timer(sk);
				661	break;
				662	}
				663
				664	out:
				665	sk_mem_reclaim(sk);
				666	}
				667
				668	static void tcp_write_timer(struct timer_list *t)
				669	{
				670	struct inet_connection_sock *icsk =
				671	from_timer(icsk, t, icsk_retransmit_timer);
				672	struct sock *sk = &icsk->icsk_inet.sk;
				673
				674	bh_lock_sock(sk);
				675	if (!sock_owned_by_user(sk)) {
				676	tcp_write_timer_handler(sk);
				677	} else {
				678	/* delegate our work to tcp_release_cb() */
				679	if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
				680	sock_hold(sk);
				681	}
				682	bh_unlock_sock(sk);
				683	sock_put(sk);
				684	}
				685
				686	void tcp_syn_ack_timeout(const struct request_sock *req)
				687	{
				688	struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
				689
				690	__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
				691	}
				692	EXPORT_SYMBOL(tcp_syn_ack_timeout);
				693
				694	void tcp_set_keepalive(struct sock *sk, int val)
				695	{
				696	if ((1 << sk->sk_state) & (TCPF_CLOSE \| TCPF_LISTEN))
				697	return;
				698
				699	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
				700	inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
				701	else if (!val)
				702	inet_csk_delete_keepalive_timer(sk);
				703	}
				704	EXPORT_SYMBOL_GPL(tcp_set_keepalive);
				705
				706
				707	static void tcp_keepalive_timer (struct timer_list *t)
				708	{
				709	struct sock *sk = from_timer(sk, t, sk_timer);
				710	struct inet_connection_sock *icsk = inet_csk(sk);
				711	struct tcp_sock *tp = tcp_sk(sk);
				712	u32 elapsed;
				713
				714	/* Only process if socket is not in use. */
				715	bh_lock_sock(sk);
				716	if (sock_owned_by_user(sk)) {
				717	/* Try again later. */
				718	inet_csk_reset_keepalive_timer (sk, HZ/20);
				719	goto out;
				720	}
				721
				722	if (sk->sk_state == TCP_LISTEN) {
				723	pr_err("Hmm... keepalive on a LISTEN ???\n");
				724	goto out;
				725	}
				726
				727	tcp_mstamp_refresh(tp);
				728	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
				729	if (tp->linger2 >= 0) {
				730	const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
				731
				732	if (tmo > 0) {
				733	tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
				734	goto out;
				735	}
				736	}
				737	tcp_send_active_reset(sk, GFP_ATOMIC);
				738	goto death;
				739	}
				740
				741	if (!sock_flag(sk, SOCK_KEEPOPEN) \|\|
				742	((1 << sk->sk_state) & (TCPF_CLOSE \| TCPF_SYN_SENT)))
				743	goto out;
				744
				745	elapsed = keepalive_time_when(tp);
				746
				747	/* It is alive without keepalive 8) */
				748	if (tp->packets_out \|\| !tcp_write_queue_empty(sk))
				749	goto resched;
				750
				751	elapsed = keepalive_time_elapsed(tp);
				752
				753	if (elapsed >= keepalive_time_when(tp)) {
				754	/* If the TCP_USER_TIMEOUT option is enabled, use that
				755	* to determine when to timeout instead.
				756	*/
				757	if ((icsk->icsk_user_timeout != 0 &&
				758	elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
				759	icsk->icsk_probes_out > 0) \|\|
				760	(icsk->icsk_user_timeout == 0 &&
				761	icsk->icsk_probes_out >= keepalive_probes(tp))) {
				762	tcp_send_active_reset(sk, GFP_ATOMIC);
				763	tcp_write_err(sk);
				764	goto out;
				765	}
				766	if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
				767	icsk->icsk_probes_out++;
				768	elapsed = keepalive_intvl_when(tp);
				769	} else {
				770	/* If keepalive was lost due to local congestion,
				771	* try harder.
				772	*/
				773	elapsed = TCP_RESOURCE_PROBE_INTERVAL;
				774	}
				775	} else {
				776	/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
				777	elapsed = keepalive_time_when(tp) - elapsed;
				778	}
				779
				780	sk_mem_reclaim(sk);
				781
				782	resched:
				783	inet_csk_reset_keepalive_timer (sk, elapsed);
				784	goto out;
				785
				786	death:
				787	tcp_done(sk);
				788
				789	out:
				790	bh_unlock_sock(sk);
				791	sock_put(sk);
				792	}
				793
				794	static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
				795	{
				796	struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer);
				797	struct sock sk = (struct sock )tp;
				798
				799	bh_lock_sock(sk);
				800	if (!sock_owned_by_user(sk)) {
				801	if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
				802	tcp_send_ack(sk);
				803	} else {
				804	if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
				805	&sk->sk_tsq_flags))
				806	sock_hold(sk);
				807	}
				808	bh_unlock_sock(sk);
				809
				810	sock_put(sk);
				811
				812	return HRTIMER_NORESTART;
				813	}
				814
				815	void tcp_init_xmit_timers(struct sock *sk)
				816	{
				817	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
				818	&tcp_keepalive_timer);
				819	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
				820	HRTIMER_MODE_ABS_PINNED_SOFT);
				821	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
				822
				823	hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC,
				824	HRTIMER_MODE_REL_PINNED_SOFT);
				825	tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick;
				826	}