Blame - src/kernel/linux/v4.14/net/ipv4/tcp_ipv4.c - T103

blob: d01c34e95016f78e936a4880e0aa93ff9ca1ed0a [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* Implementation of the Transmission Control Protocol(TCP).
				7	*
				8	* IPv4 specific functions
				9	*
				10	*
				11	* code split from:
				12	* linux/ipv4/tcp.c
				13	* linux/ipv4/tcp_input.c
				14	* linux/ipv4/tcp_output.c
				15	*
				16	* See tcp.c for author information
				17	*
				18	* This program is free software; you can redistribute it and/or
				19	* modify it under the terms of the GNU General Public License
				20	* as published by the Free Software Foundation; either version
				21	* 2 of the License, or (at your option) any later version.
				22	*/
				23
				24	/*
				25	* Changes:
				26	* David S. Miller : New socket lookup architecture.
				27	* This code is dedicated to John Dyson.
				28	* David S. Miller : Change semantics of established hash,
				29	* half is devoted to TIME_WAIT sockets
				30	* and the rest go in the other half.
				31	* Andi Kleen : Add support for syncookies and fixed
				32	* some bugs: ip options weren't passed to
				33	* the TCP layer, missed a check for an
				34	* ACK bit.
				35	* Andi Kleen : Implemented fast path mtu discovery.
				36	* Fixed many serious bugs in the
				37	* request_sock handling and moved
				38	* most of it into the af independent code.
				39	* Added tail drop and some other bugfixes.
				40	* Added new listen semantics.
				41	* Mike McLagan : Routing by source
				42	* Juan Jose Ciarlante: ip_dynaddr bits
				43	* Andi Kleen: various fixes.
				44	* Vitaly E. Lavrov : Transparent proxy revived after year
				45	* coma.
				46	* Andi Kleen : Fix new listen.
				47	* Andi Kleen : Fix accept error reporting.
				48	* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
				49	* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
				50	* a single port at the same time.
				51	*/
				52
				53	#define pr_fmt(fmt) "TCP: " fmt
				54
				55	#include <linux/bottom_half.h>
				56	#include <linux/types.h>
				57	#include <linux/fcntl.h>
				58	#include <linux/module.h>
				59	#include <linux/random.h>
				60	#include <linux/cache.h>
				61	#include <linux/jhash.h>
				62	#include <linux/init.h>
				63	#include <linux/times.h>
				64	#include <linux/slab.h>
				65
				66	#include <net/net_namespace.h>
				67	#include <net/icmp.h>
				68	#include <net/inet_hashtables.h>
				69	#include <net/tcp.h>
				70	#include <net/transp_v6.h>
				71	#include <net/ipv6.h>
				72	#include <net/inet_common.h>
				73	#include <net/timewait_sock.h>
				74	#include <net/xfrm.h>
				75	#include <net/secure_seq.h>
				76	#include <net/busy_poll.h>
				77
				78	#include <linux/inet.h>
				79	#include <linux/ipv6.h>
				80	#include <linux/stddef.h>
				81	#include <linux/proc_fs.h>
				82	#include <linux/seq_file.h>
				83	#include <linux/inetdevice.h>
				84
				85	#include <crypto/hash.h>
				86	#include <linux/scatterlist.h>
				87
				88	#ifdef CONFIG_TCP_MD5SIG
				89	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				90	__be32 daddr, __be32 saddr, const struct tcphdr *th);
				91	#endif
				92
				93	struct inet_hashinfo tcp_hashinfo;
				94	EXPORT_SYMBOL(tcp_hashinfo);
				95
				96	static u32 tcp_v4_init_seq(const struct sk_buff *skb)
				97	{
				98	return secure_tcp_seq(ip_hdr(skb)->daddr,
				99	ip_hdr(skb)->saddr,
				100	tcp_hdr(skb)->dest,
				101	tcp_hdr(skb)->source);
				102	}
				103
				104	static u32 tcp_v4_init_ts_off(const struct net net, const struct sk_buff skb)
				105	{
				106	return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
				107	}
				108
				109	int tcp_twsk_unique(struct sock sk, struct sock sktw, void *twp)
				110	{
				111	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
				112	struct tcp_sock *tp = tcp_sk(sk);
				113
				114	/* With PAWS, it is safe from the viewpoint
				115	of data integrity. Even without PAWS it is safe provided sequence
				116	spaces do not overlap i.e. at data rates <= 80Mbit/sec.
				117
				118	Actually, the idea is close to VJ's one, only timestamp cache is
				119	held not per host, but per port pair and TW bucket is used as state
				120	holder.
				121
				122	If TW bucket has been already destroyed we fall back to VJ's scheme
				123	and use initial timestamp retrieved from peer table.
				124	*/
				125	if (tcptw->tw_ts_recent_stamp &&
				126	(!twp \|\| (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
				127	get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
				128	tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
				129	if (tp->write_seq == 0)
				130	tp->write_seq = 1;
				131	tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
				132	tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
				133	sock_hold(sktw);
				134	return 1;
				135	}
				136
				137	return 0;
				138	}
				139	EXPORT_SYMBOL_GPL(tcp_twsk_unique);
				140
				141	/* This will initiate an outgoing connection. */
				142	int tcp_v4_connect(struct sock sk, struct sockaddr uaddr, int addr_len)
				143	{
				144	struct sockaddr_in usin = (struct sockaddr_in )uaddr;
				145	struct inet_sock *inet = inet_sk(sk);
				146	struct tcp_sock *tp = tcp_sk(sk);
				147	__be16 orig_sport, orig_dport;
				148	__be32 daddr, nexthop;
				149	struct flowi4 *fl4;
				150	struct rtable *rt;
				151	int err;
				152	struct ip_options_rcu *inet_opt;
				153	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
				154
				155	if (addr_len < sizeof(struct sockaddr_in))
				156	return -EINVAL;
				157
				158	if (usin->sin_family != AF_INET)
				159	return -EAFNOSUPPORT;
				160
				161	nexthop = daddr = usin->sin_addr.s_addr;
				162	inet_opt = rcu_dereference_protected(inet->inet_opt,
				163	lockdep_sock_is_held(sk));
				164	if (inet_opt && inet_opt->opt.srr) {
				165	if (!daddr)
				166	return -EINVAL;
				167	nexthop = inet_opt->opt.faddr;
				168	}
				169
				170	orig_sport = inet->inet_sport;
				171	orig_dport = usin->sin_port;
				172	fl4 = &inet->cork.fl.u.ip4;
				173	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
				174	RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
				175	IPPROTO_TCP,
				176	orig_sport, orig_dport, sk);
				177	if (IS_ERR(rt)) {
				178	err = PTR_ERR(rt);
				179	if (err == -ENETUNREACH)
				180	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
				181	return err;
				182	}
				183
				184	if (rt->rt_flags & (RTCF_MULTICAST \| RTCF_BROADCAST)) {
				185	ip_rt_put(rt);
				186	return -ENETUNREACH;
				187	}
				188
				189	if (!inet_opt \|\| !inet_opt->opt.srr)
				190	daddr = fl4->daddr;
				191
				192	if (!inet->inet_saddr)
				193	inet->inet_saddr = fl4->saddr;
				194	sk_rcv_saddr_set(sk, inet->inet_saddr);
				195
				196	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
				197	/* Reset inherited state */
				198	tp->rx_opt.ts_recent = 0;
				199	tp->rx_opt.ts_recent_stamp = 0;
				200	if (likely(!tp->repair))
				201	tp->write_seq = 0;
				202	}
				203
				204	inet->inet_dport = usin->sin_port;
				205	sk_daddr_set(sk, daddr);
				206
				207	inet_csk(sk)->icsk_ext_hdr_len = 0;
				208	if (inet_opt)
				209	inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				210
				211	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
				212
				213	/* Socket identity is still unknown (sport may be zero).
				214	* However we set state to SYN-SENT and not releasing socket
				215	* lock select source port, enter ourselves into the hash tables and
				216	* complete initialization after this.
				217	*/
				218	tcp_set_state(sk, TCP_SYN_SENT);
				219	err = inet_hash_connect(tcp_death_row, sk);
				220	if (err)
				221	goto failure;
				222
				223	sk_set_txhash(sk);
				224
				225	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
				226	inet->inet_sport, inet->inet_dport, sk);
				227	if (IS_ERR(rt)) {
				228	err = PTR_ERR(rt);
				229	rt = NULL;
				230	goto failure;
				231	}
				232	/* OK, now commit destination to socket. */
				233	sk->sk_gso_type = SKB_GSO_TCPV4;
				234	sk_setup_caps(sk, &rt->dst);
				235	rt = NULL;
				236
				237	if (likely(!tp->repair)) {
				238	if (!tp->write_seq)
				239	tp->write_seq = secure_tcp_seq(inet->inet_saddr,
				240	inet->inet_daddr,
				241	inet->inet_sport,
				242	usin->sin_port);
				243	tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
				244	inet->inet_saddr,
				245	inet->inet_daddr);
				246	}
				247
				248	inet->inet_id = prandom_u32();
				249
				250	if (tcp_fastopen_defer_connect(sk, &err))
				251	return err;
				252	if (err)
				253	goto failure;
				254
				255	err = tcp_connect(sk);
				256
				257	if (err)
				258	goto failure;
				259
				260	return 0;
				261
				262	failure:
				263	/*
				264	* This unhashes the socket and releases the local port,
				265	* if necessary.
				266	*/
				267	tcp_set_state(sk, TCP_CLOSE);
				268	ip_rt_put(rt);
				269	sk->sk_route_caps = 0;
				270	inet->inet_dport = 0;
				271	return err;
				272	}
				273	EXPORT_SYMBOL(tcp_v4_connect);
				274
				275	/*
				276	* This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
				277	* It can be called through tcp_release_cb() if socket was owned by user
				278	* at the time tcp_v4_err() was called to handle ICMP message.
				279	*/
				280	void tcp_v4_mtu_reduced(struct sock *sk)
				281	{
				282	struct inet_sock *inet = inet_sk(sk);
				283	struct dst_entry *dst;
				284	u32 mtu;
				285
				286	if ((1 << sk->sk_state) & (TCPF_LISTEN \| TCPF_CLOSE))
				287	return;
				288	mtu = tcp_sk(sk)->mtu_info;
				289	dst = inet_csk_update_pmtu(sk, mtu);
				290	if (!dst)
				291	return;
				292
				293	/* Something is about to be wrong... Remember soft error
				294	* for the case, if this connection will not able to recover.
				295	*/
				296	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
				297	sk->sk_err_soft = EMSGSIZE;
				298
				299	mtu = dst_mtu(dst);
				300
				301	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
				302	ip_sk_accept_pmtu(sk) &&
				303	inet_csk(sk)->icsk_pmtu_cookie > mtu) {
				304	tcp_sync_mss(sk, mtu);
				305
				306	/* Resend the TCP packet because it's
				307	* clear that the old packet has been
				308	* dropped. This is the new "fast" path mtu
				309	* discovery.
				310	*/
				311	tcp_simple_retransmit(sk);
				312	} /* else let the usual retransmit timer handle it */
				313	}
				314	EXPORT_SYMBOL(tcp_v4_mtu_reduced);
				315
				316	static void do_redirect(struct sk_buff skb, struct sock sk)
				317	{
				318	struct dst_entry *dst = __sk_dst_check(sk, 0);
				319
				320	if (dst)
				321	dst->ops->redirect(dst, sk, skb);
				322	}
				323
				324
				325	/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
				326	void tcp_req_err(struct sock *sk, u32 seq, bool abort)
				327	{
				328	struct request_sock *req = inet_reqsk(sk);
				329	struct net *net = sock_net(sk);
				330
				331	/* ICMPs are not backlogged, hence we cannot get
				332	* an established socket here.
				333	*/
				334	if (seq != tcp_rsk(req)->snt_isn) {
				335	__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
				336	} else if (abort) {
				337	/*
				338	* Still in SYN_RECV, just remove it silently.
				339	* There is no good way to pass the error to the newly
				340	* created socket, and POSIX does not want network
				341	* errors returned from accept().
				342	*/
				343	inet_csk_reqsk_queue_drop(req->rsk_listener, req);
				344	tcp_listendrop(req->rsk_listener);
				345	}
				346	reqsk_put(req);
				347	}
				348	EXPORT_SYMBOL(tcp_req_err);
				349
				350	/*
				351	* This routine is called by the ICMP module when it gets some
				352	* sort of error condition. If err < 0 then the socket should
				353	* be closed and the error returned to the user. If err > 0
				354	* it's just the icmp type << 8 \| icmp code. After adjustment
				355	* header points to the first 8 bytes of the tcp header. We need
				356	* to find the appropriate port.
				357	*
				358	* The locking strategy used here is very "optimistic". When
				359	* someone else accesses the socket the ICMP is just dropped
				360	* and for some paths there is no check at all.
				361	* A more general error queue to queue errors for later handling
				362	* is probably better.
				363	*
				364	*/
				365
				366	void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
				367	{
				368	const struct iphdr iph = (const struct iphdr )icmp_skb->data;
				369	struct tcphdr th = (struct tcphdr )(icmp_skb->data + (iph->ihl << 2));
				370	struct inet_connection_sock *icsk;
				371	struct tcp_sock *tp;
				372	struct inet_sock *inet;
				373	const int type = icmp_hdr(icmp_skb)->type;
				374	const int code = icmp_hdr(icmp_skb)->code;
				375	struct sock *sk;
				376	struct sk_buff *skb;
				377	struct request_sock *fastopen;
				378	u32 seq, snd_una;
				379	s32 remaining;
				380	u32 delta_us;
				381	int err;
				382	struct net *net = dev_net(icmp_skb->dev);
				383
				384	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
				385	th->dest, iph->saddr, ntohs(th->source),
				386	inet_iif(icmp_skb), 0);
				387	if (!sk) {
				388	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
				389	return;
				390	}
				391	if (sk->sk_state == TCP_TIME_WAIT) {
				392	inet_twsk_put(inet_twsk(sk));
				393	return;
				394	}
				395	seq = ntohl(th->seq);
				396	if (sk->sk_state == TCP_NEW_SYN_RECV)
				397	return tcp_req_err(sk, seq,
				398	type == ICMP_PARAMETERPROB \|\|
				399	type == ICMP_TIME_EXCEEDED \|\|
				400	(type == ICMP_DEST_UNREACH &&
				401	(code == ICMP_NET_UNREACH \|\|
				402	code == ICMP_HOST_UNREACH)));
				403
				404	bh_lock_sock(sk);
				405	/* If too many ICMPs get dropped on busy
				406	* servers this needs to be solved differently.
				407	* We do take care of PMTU discovery (RFC1191) special case :
				408	* we can receive locally generated ICMP messages while socket is held.
				409	*/
				410	if (sock_owned_by_user(sk)) {
				411	if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
				412	__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
				413	}
				414	if (sk->sk_state == TCP_CLOSE)
				415	goto out;
				416
				417	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				418	__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
				419	goto out;
				420	}
				421
				422	icsk = inet_csk(sk);
				423	tp = tcp_sk(sk);
				424	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
				425	fastopen = tp->fastopen_rsk;
				426	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
				427	if (sk->sk_state != TCP_LISTEN &&
				428	!between(seq, snd_una, tp->snd_nxt)) {
				429	__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
				430	goto out;
				431	}
				432
				433	switch (type) {
				434	case ICMP_REDIRECT:
				435	if (!sock_owned_by_user(sk))
				436	do_redirect(icmp_skb, sk);
				437	goto out;
				438	case ICMP_SOURCE_QUENCH:
				439	/* Just silently ignore these. */
				440	goto out;
				441	case ICMP_PARAMETERPROB:
				442	err = EPROTO;
				443	break;
				444	case ICMP_DEST_UNREACH:
				445	if (code > NR_ICMP_UNREACH)
				446	goto out;
				447
				448	if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
				449	/* We are not interested in TCP_LISTEN and open_requests
				450	* (SYN-ACKs send out by Linux are always <576bytes so
				451	* they should go through unfragmented).
				452	*/
				453	if (sk->sk_state == TCP_LISTEN)
				454	goto out;
				455
				456	tp->mtu_info = info;
				457	if (!sock_owned_by_user(sk)) {
				458	tcp_v4_mtu_reduced(sk);
				459	} else {
				460	if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
				461	sock_hold(sk);
				462	}
				463	goto out;
				464	}
				465
				466	err = icmp_err_convert[code].errno;
				467	/* check if icmp_skb allows revert of backoff
				468	* (see draft-zimmermann-tcp-lcd) */
				469	if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
				470	break;
				471	if (seq != tp->snd_una \|\| !icsk->icsk_retransmits \|\|
				472	!icsk->icsk_backoff \|\| fastopen)
				473	break;
				474
				475	if (sock_owned_by_user(sk))
				476	break;
				477
				478	skb = tcp_write_queue_head(sk);
				479	if (WARN_ON_ONCE(!skb))
				480	break;
				481
				482	icsk->icsk_backoff--;
				483	icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
				484	TCP_TIMEOUT_INIT;
				485	icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
				486
				487	tcp_mstamp_refresh(tp);
				488	delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
				489	remaining = icsk->icsk_rto -
				490	usecs_to_jiffies(delta_us);
				491
				492	if (remaining > 0) {
				493	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				494	remaining, TCP_RTO_MAX);
				495	} else {
				496	/* RTO revert clocked out retransmission.
				497	* Will retransmit now */
				498	tcp_retransmit_timer(sk);
				499	}
				500
				501	break;
				502	case ICMP_TIME_EXCEEDED:
				503	err = EHOSTUNREACH;
				504	break;
				505	default:
				506	goto out;
				507	}
				508
				509	switch (sk->sk_state) {
				510	case TCP_SYN_SENT:
				511	case TCP_SYN_RECV:
				512	/* Only in fast or simultaneous open. If a fast open socket is
				513	* is already accepted it is treated as a connected one below.
				514	*/
				515	if (fastopen && !fastopen->sk)
				516	break;
				517
				518	if (!sock_owned_by_user(sk)) {
				519	sk->sk_err = err;
				520
				521	sk->sk_error_report(sk);
				522
				523	tcp_done(sk);
				524	} else {
				525	sk->sk_err_soft = err;
				526	}
				527	goto out;
				528	}
				529
				530	/* If we've already connected we will keep trying
				531	* until we time out, or the user gives up.
				532	*
				533	* rfc1122 4.2.3.9 allows to consider as hard errors
				534	* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
				535	* but it is obsoleted by pmtu discovery).
				536	*
				537	* Note, that in modern internet, where routing is unreliable
				538	* and in each dark corner broken firewalls sit, sending random
				539	* errors ordered by their masters even this two messages finally lose
				540	* their original sense (even Linux sends invalid PORT_UNREACHs)
				541	*
				542	* Now we are in compliance with RFCs.
				543	* --ANK (980905)
				544	*/
				545
				546	inet = inet_sk(sk);
				547	if (!sock_owned_by_user(sk) && inet->recverr) {
				548	sk->sk_err = err;
				549	sk->sk_error_report(sk);
				550	} else { /* Only an error on timeout */
				551	sk->sk_err_soft = err;
				552	}
				553
				554	out:
				555	bh_unlock_sock(sk);
				556	sock_put(sk);
				557	}
				558
				559	void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
				560	{
				561	struct tcphdr *th = tcp_hdr(skb);
				562
				563	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				564	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
				565	skb->csum_start = skb_transport_header(skb) - skb->head;
				566	skb->csum_offset = offsetof(struct tcphdr, check);
				567	} else {
				568	th->check = tcp_v4_check(skb->len, saddr, daddr,
				569	csum_partial(th,
				570	th->doff << 2,
				571	skb->csum));
				572	}
				573	}
				574
				575	/* This routine computes an IPv4 TCP checksum. */
				576	void tcp_v4_send_check(struct sock sk, struct sk_buff skb)
				577	{
				578	const struct inet_sock *inet = inet_sk(sk);
				579
				580	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
				581	}
				582	EXPORT_SYMBOL(tcp_v4_send_check);
				583
				584	/*
				585	* This routine will send an RST to the other tcp.
				586	*
				587	* Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
				588	* for reset.
				589	* Answer: if a packet caused RST, it is not for a socket
				590	* existing in our system, if it is matched to a socket,
				591	* it is just duplicate segment or bug in other side's TCP.
				592	* So that we build reply only basing on parameters
				593	* arrived with segment.
				594	* Exception: precedence violation. We do not implement it in any case.
				595	*/
				596
				597	static void tcp_v4_send_reset(const struct sock sk, struct sk_buff skb)
				598	{
				599	const struct tcphdr *th = tcp_hdr(skb);
				600	struct {
				601	struct tcphdr th;
				602	#ifdef CONFIG_TCP_MD5SIG
				603	__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
				604	#endif
				605	} rep;
				606	struct ip_reply_arg arg;
				607	#ifdef CONFIG_TCP_MD5SIG
				608	struct tcp_md5sig_key *key = NULL;
				609	const __u8 *hash_location = NULL;
				610	unsigned char newhash[16];
				611	int genhash;
				612	struct sock *sk1 = NULL;
				613	#endif
				614	struct net *net;
				615
				616	/* Never send a reset in response to a reset. */
				617	if (th->rst)
				618	return;
				619
				620	/* If sk not NULL, it means we did a successful lookup and incoming
				621	* route had to be correct. prequeue might have dropped our dst.
				622	*/
				623	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
				624	return;
				625
				626	/* Swap the send and the receive. */
				627	memset(&rep, 0, sizeof(rep));
				628	rep.th.dest = th->source;
				629	rep.th.source = th->dest;
				630	rep.th.doff = sizeof(struct tcphdr) / 4;
				631	rep.th.rst = 1;
				632
				633	if (th->ack) {
				634	rep.th.seq = th->ack_seq;
				635	} else {
				636	rep.th.ack = 1;
				637	rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				638	skb->len - (th->doff << 2));
				639	}
				640
				641	memset(&arg, 0, sizeof(arg));
				642	arg.iov[0].iov_base = (unsigned char *)&rep;
				643	arg.iov[0].iov_len = sizeof(rep.th);
				644
				645	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
				646	#ifdef CONFIG_TCP_MD5SIG
				647	rcu_read_lock();
				648	hash_location = tcp_parse_md5sig_option(th);
				649	if (sk && sk_fullsock(sk)) {
				650	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
				651	&ip_hdr(skb)->saddr, AF_INET);
				652	} else if (hash_location) {
				653	/*
				654	* active side is lost. Try to find listening socket through
				655	* source port, and then find md5 key through listening socket.
				656	* we are not loose security here:
				657	* Incoming packet is checked with md5 hash with finding key,
				658	* no RST generated if md5 hash doesn't match.
				659	*/
				660	sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
				661	ip_hdr(skb)->saddr,
				662	th->source, ip_hdr(skb)->daddr,
				663	ntohs(th->source), inet_iif(skb),
				664	tcp_v4_sdif(skb));
				665	/* don't send rst if it can't find key */
				666	if (!sk1)
				667	goto out;
				668
				669	key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
				670	&ip_hdr(skb)->saddr, AF_INET);
				671	if (!key)
				672	goto out;
				673
				674
				675	genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
				676	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0)
				677	goto out;
				678
				679	}
				680
				681	if (key) {
				682	rep.opt[0] = htonl((TCPOPT_NOP << 24) \|
				683	(TCPOPT_NOP << 16) \|
				684	(TCPOPT_MD5SIG << 8) \|
				685	TCPOLEN_MD5SIG);
				686	/* Update length and the length the header thinks exists */
				687	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				688	rep.th.doff = arg.iov[0].iov_len / 4;
				689
				690	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
				691	key, ip_hdr(skb)->saddr,
				692	ip_hdr(skb)->daddr, &rep.th);
				693	}
				694	#endif
				695	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				696	ip_hdr(skb)->saddr, /* XXX */
				697	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				698	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				699	arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
				700
				701	/* When socket is gone, all binding information is lost.
				702	* routing might fail in this case. No choice here, if we choose to force
				703	* input interface, we will misroute in case of asymmetric route.
				704	*/
				705	if (sk)
				706	arg.bound_dev_if = sk->sk_bound_dev_if;
				707
				708	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
				709	offsetof(struct inet_timewait_sock, tw_bound_dev_if));
				710
				711	arg.tos = ip_hdr(skb)->tos;
				712	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
				713	local_bh_disable();
				714	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
				715	skb, &TCP_SKB_CB(skb)->header.h4.opt,
				716	ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
				717	&arg, arg.iov[0].iov_len);
				718
				719	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
				720	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
				721	local_bh_enable();
				722
				723	#ifdef CONFIG_TCP_MD5SIG
				724	out:
				725	rcu_read_unlock();
				726	#endif
				727	}
				728
				729	/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
				730	outside socket context is ugly, certainly. What can I do?
				731	*/
				732
				733	static void tcp_v4_send_ack(const struct sock *sk,
				734	struct sk_buff *skb, u32 seq, u32 ack,
				735	u32 win, u32 tsval, u32 tsecr, int oif,
				736	struct tcp_md5sig_key *key,
				737	int reply_flags, u8 tos)
				738	{
				739	const struct tcphdr *th = tcp_hdr(skb);
				740	struct {
				741	struct tcphdr th;
				742	__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
				743	#ifdef CONFIG_TCP_MD5SIG
				744	+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
				745	#endif
				746	];
				747	} rep;
				748	struct net *net = sock_net(sk);
				749	struct ip_reply_arg arg;
				750
				751	memset(&rep.th, 0, sizeof(struct tcphdr));
				752	memset(&arg, 0, sizeof(arg));
				753
				754	arg.iov[0].iov_base = (unsigned char *)&rep;
				755	arg.iov[0].iov_len = sizeof(rep.th);
				756	if (tsecr) {
				757	rep.opt[0] = htonl((TCPOPT_NOP << 24) \| (TCPOPT_NOP << 16) \|
				758	(TCPOPT_TIMESTAMP << 8) \|
				759	TCPOLEN_TIMESTAMP);
				760	rep.opt[1] = htonl(tsval);
				761	rep.opt[2] = htonl(tsecr);
				762	arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
				763	}
				764
				765	/* Swap the send and the receive. */
				766	rep.th.dest = th->source;
				767	rep.th.source = th->dest;
				768	rep.th.doff = arg.iov[0].iov_len / 4;
				769	rep.th.seq = htonl(seq);
				770	rep.th.ack_seq = htonl(ack);
				771	rep.th.ack = 1;
				772	rep.th.window = htons(win);
				773
				774	#ifdef CONFIG_TCP_MD5SIG
				775	if (key) {
				776	int offset = (tsecr) ? 3 : 0;
				777
				778	rep.opt[offset++] = htonl((TCPOPT_NOP << 24) \|
				779	(TCPOPT_NOP << 16) \|
				780	(TCPOPT_MD5SIG << 8) \|
				781	TCPOLEN_MD5SIG);
				782	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				783	rep.th.doff = arg.iov[0].iov_len/4;
				784
				785	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
				786	key, ip_hdr(skb)->saddr,
				787	ip_hdr(skb)->daddr, &rep.th);
				788	}
				789	#endif
				790	arg.flags = reply_flags;
				791	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				792	ip_hdr(skb)->saddr, /* XXX */
				793	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				794	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				795	if (oif)
				796	arg.bound_dev_if = oif;
				797	arg.tos = tos;
				798	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
				799	local_bh_disable();
				800	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
				801	skb, &TCP_SKB_CB(skb)->header.h4.opt,
				802	ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
				803	&arg, arg.iov[0].iov_len);
				804
				805	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
				806	local_bh_enable();
				807	}
				808
				809	static void tcp_v4_timewait_ack(struct sock sk, struct sk_buff skb)
				810	{
				811	struct inet_timewait_sock *tw = inet_twsk(sk);
				812	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
				813
				814	tcp_v4_send_ack(sk, skb,
				815	tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
				816	tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
				817	tcp_time_stamp_raw() + tcptw->tw_ts_offset,
				818	tcptw->tw_ts_recent,
				819	tw->tw_bound_dev_if,
				820	tcp_twsk_md5_key(tcptw),
				821	tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
				822	tw->tw_tos
				823	);
				824
				825	inet_twsk_put(tw);
				826	}
				827
				828	static void tcp_v4_reqsk_send_ack(const struct sock sk, struct sk_buff skb,
				829	struct request_sock *req)
				830	{
				831	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
				832	* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
				833	*/
				834	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
				835	tcp_sk(sk)->snd_nxt;
				836
				837	/* RFC 7323 2.3
				838	* The window field (SEG.WND) of every outgoing segment, with the
				839	* exception of <SYN> segments, MUST be right-shifted by
				840	* Rcv.Wind.Shift bits:
				841	*/
				842	tcp_v4_send_ack(sk, skb, seq,
				843	tcp_rsk(req)->rcv_nxt,
				844	req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
				845	tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
				846	req->ts_recent,
				847	0,
				848	tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
				849	AF_INET),
				850	inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
				851	ip_hdr(skb)->tos);
				852	}
				853
				854	/*
				855	* Send a SYN-ACK after having received a SYN.
				856	* This still operates on a request_sock only, not on a big
				857	* socket.
				858	*/
				859	static int tcp_v4_send_synack(const struct sock sk, struct dst_entry dst,
				860	struct flowi *fl,
				861	struct request_sock *req,
				862	struct tcp_fastopen_cookie *foc,
				863	enum tcp_synack_type synack_type)
				864	{
				865	const struct inet_request_sock *ireq = inet_rsk(req);
				866	struct flowi4 fl4;
				867	int err = -1;
				868	struct sk_buff *skb;
				869
				870	/* First, grab a route. */
				871	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
				872	return -1;
				873
				874	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
				875
				876	if (skb) {
				877	__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
				878
				879	rcu_read_lock();
				880	err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
				881	ireq->ir_rmt_addr,
				882	rcu_dereference(ireq->ireq_opt));
				883	rcu_read_unlock();
				884	err = net_xmit_eval(err);
				885	}
				886
				887	return err;
				888	}
				889
				890	/*
				891	* IPv4 request_sock destructor.
				892	*/
				893	static void tcp_v4_reqsk_destructor(struct request_sock *req)
				894	{
				895	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
				896	}
				897
				898	#ifdef CONFIG_TCP_MD5SIG
				899	/*
				900	* RFC2385 MD5 checksumming requires a mapping of
				901	* IP address->MD5 Key.
				902	* We need to maintain these in the sk structure.
				903	*/
				904
				905	/* Find the Key structure for an address. */
				906	struct tcp_md5sig_key tcp_md5_do_lookup(const struct sock sk,
				907	const union tcp_md5_addr *addr,
				908	int family)
				909	{
				910	const struct tcp_sock *tp = tcp_sk(sk);
				911	struct tcp_md5sig_key *key;
				912	const struct tcp_md5sig_info *md5sig;
				913	__be32 mask;
				914	struct tcp_md5sig_key *best_match = NULL;
				915	bool match;
				916
				917	/* caller either holds rcu_read_lock() or socket lock */
				918	md5sig = rcu_dereference_check(tp->md5sig_info,
				919	lockdep_sock_is_held(sk));
				920	if (!md5sig)
				921	return NULL;
				922
				923	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
				924	if (key->family != family)
				925	continue;
				926
				927	if (family == AF_INET) {
				928	mask = inet_make_mask(key->prefixlen);
				929	match = (key->addr.a4.s_addr & mask) ==
				930	(addr->a4.s_addr & mask);
				931	#if IS_ENABLED(CONFIG_IPV6)
				932	} else if (family == AF_INET6) {
				933	match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
				934	key->prefixlen);
				935	#endif
				936	} else {
				937	match = false;
				938	}
				939
				940	if (match && (!best_match \|\|
				941	key->prefixlen > best_match->prefixlen))
				942	best_match = key;
				943	}
				944	return best_match;
				945	}
				946	EXPORT_SYMBOL(tcp_md5_do_lookup);
				947
				948	static struct tcp_md5sig_key tcp_md5_do_lookup_exact(const struct sock sk,
				949	const union tcp_md5_addr *addr,
				950	int family, u8 prefixlen)
				951	{
				952	const struct tcp_sock *tp = tcp_sk(sk);
				953	struct tcp_md5sig_key *key;
				954	unsigned int size = sizeof(struct in_addr);
				955	const struct tcp_md5sig_info *md5sig;
				956
				957	/* caller either holds rcu_read_lock() or socket lock */
				958	md5sig = rcu_dereference_check(tp->md5sig_info,
				959	lockdep_sock_is_held(sk));
				960	if (!md5sig)
				961	return NULL;
				962	#if IS_ENABLED(CONFIG_IPV6)
				963	if (family == AF_INET6)
				964	size = sizeof(struct in6_addr);
				965	#endif
				966	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
				967	if (key->family != family)
				968	continue;
				969	if (!memcmp(&key->addr, addr, size) &&
				970	key->prefixlen == prefixlen)
				971	return key;
				972	}
				973	return NULL;
				974	}
				975
				976	struct tcp_md5sig_key tcp_v4_md5_lookup(const struct sock sk,
				977	const struct sock *addr_sk)
				978	{
				979	const union tcp_md5_addr *addr;
				980
				981	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
				982	return tcp_md5_do_lookup(sk, addr, AF_INET);
				983	}
				984	EXPORT_SYMBOL(tcp_v4_md5_lookup);
				985
				986	/* This can be called on a newly created socket, from other files */
				987	int tcp_md5_do_add(struct sock sk, const union tcp_md5_addr addr,
				988	int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
				989	gfp_t gfp)
				990	{
				991	/* Add Key to the list */
				992	struct tcp_md5sig_key *key;
				993	struct tcp_sock *tp = tcp_sk(sk);
				994	struct tcp_md5sig_info *md5sig;
				995
				996	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
				997	if (key) {
				998	/* Pre-existing entry - just update that one.
				999	* Note that the key might be used concurrently.
				1000	*/
				1001	memcpy(key->key, newkey, newkeylen);
				1002
				1003	/* Pairs with READ_ONCE() in tcp_md5_hash_key().
				1004	* Also note that a reader could catch new key->keylen value
				1005	* but old key->key[], this is the reason we use __GFP_ZERO
				1006	* at sock_kmalloc() time below these lines.
				1007	*/
				1008	WRITE_ONCE(key->keylen, newkeylen);
				1009
				1010	return 0;
				1011	}
				1012
				1013	md5sig = rcu_dereference_protected(tp->md5sig_info,
				1014	lockdep_sock_is_held(sk));
				1015	if (!md5sig) {
				1016	md5sig = kmalloc(sizeof(*md5sig), gfp);
				1017	if (!md5sig)
				1018	return -ENOMEM;
				1019
				1020	sk_nocaps_add(sk, NETIF_F_GSO_MASK);
				1021	INIT_HLIST_HEAD(&md5sig->head);
				1022	rcu_assign_pointer(tp->md5sig_info, md5sig);
				1023	}
				1024
				1025	key = sock_kmalloc(sk, sizeof(*key), gfp \| __GFP_ZERO);
				1026	if (!key)
				1027	return -ENOMEM;
				1028	if (!tcp_alloc_md5sig_pool()) {
				1029	sock_kfree_s(sk, key, sizeof(*key));
				1030	return -ENOMEM;
				1031	}
				1032
				1033	memcpy(key->key, newkey, newkeylen);
				1034	key->keylen = newkeylen;
				1035	key->family = family;
				1036	key->prefixlen = prefixlen;
				1037	memcpy(&key->addr, addr,
				1038	(family == AF_INET6) ? sizeof(struct in6_addr) :
				1039	sizeof(struct in_addr));
				1040	hlist_add_head_rcu(&key->node, &md5sig->head);
				1041	return 0;
				1042	}
				1043	EXPORT_SYMBOL(tcp_md5_do_add);
				1044
				1045	int tcp_md5_do_del(struct sock sk, const union tcp_md5_addr addr, int family,
				1046	u8 prefixlen)
				1047	{
				1048	struct tcp_md5sig_key *key;
				1049
				1050	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
				1051	if (!key)
				1052	return -ENOENT;
				1053	hlist_del_rcu(&key->node);
				1054	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				1055	kfree_rcu(key, rcu);
				1056	return 0;
				1057	}
				1058	EXPORT_SYMBOL(tcp_md5_do_del);
				1059
				1060	static void tcp_clear_md5_list(struct sock *sk)
				1061	{
				1062	struct tcp_sock *tp = tcp_sk(sk);
				1063	struct tcp_md5sig_key *key;
				1064	struct hlist_node *n;
				1065	struct tcp_md5sig_info *md5sig;
				1066
				1067	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
				1068
				1069	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
				1070	hlist_del_rcu(&key->node);
				1071	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				1072	kfree_rcu(key, rcu);
				1073	}
				1074	}
				1075
				1076	static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
				1077	char __user *optval, int optlen)
				1078	{
				1079	struct tcp_md5sig cmd;
				1080	struct sockaddr_in sin = (struct sockaddr_in )&cmd.tcpm_addr;
				1081	u8 prefixlen = 32;
				1082
				1083	if (optlen < sizeof(cmd))
				1084	return -EINVAL;
				1085
				1086	if (copy_from_user(&cmd, optval, sizeof(cmd)))
				1087	return -EFAULT;
				1088
				1089	if (sin->sin_family != AF_INET)
				1090	return -EINVAL;
				1091
				1092	if (optname == TCP_MD5SIG_EXT &&
				1093	cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
				1094	prefixlen = cmd.tcpm_prefixlen;
				1095	if (prefixlen > 32)
				1096	return -EINVAL;
				1097	}
				1098
				1099	if (!cmd.tcpm_keylen)
				1100	return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1101	AF_INET, prefixlen);
				1102
				1103	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
				1104	return -EINVAL;
				1105
				1106	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1107	AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
				1108	GFP_KERNEL);
				1109	}
				1110
				1111	static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
				1112	__be32 daddr, __be32 saddr,
				1113	const struct tcphdr *th, int nbytes)
				1114	{
				1115	struct tcp4_pseudohdr *bp;
				1116	struct scatterlist sg;
				1117	struct tcphdr *_th;
				1118
				1119	bp = hp->scratch;
				1120	bp->saddr = saddr;
				1121	bp->daddr = daddr;
				1122	bp->pad = 0;
				1123	bp->protocol = IPPROTO_TCP;
				1124	bp->len = cpu_to_be16(nbytes);
				1125
				1126	_th = (struct tcphdr *)(bp + 1);
				1127	memcpy(_th, th, sizeof(*th));
				1128	_th->check = 0;
				1129
				1130	sg_init_one(&sg, bp, sizeof(bp) + sizeof(th));
				1131	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
				1132	sizeof(bp) + sizeof(th));
				1133	return crypto_ahash_update(hp->md5_req);
				1134	}
				1135
				1136	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				1137	__be32 daddr, __be32 saddr, const struct tcphdr *th)
				1138	{
				1139	struct tcp_md5sig_pool *hp;
				1140	struct ahash_request *req;
				1141
				1142	hp = tcp_get_md5sig_pool();
				1143	if (!hp)
				1144	goto clear_hash_noput;
				1145	req = hp->md5_req;
				1146
				1147	if (crypto_ahash_init(req))
				1148	goto clear_hash;
				1149	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
				1150	goto clear_hash;
				1151	if (tcp_md5_hash_key(hp, key))
				1152	goto clear_hash;
				1153	ahash_request_set_crypt(req, NULL, md5_hash, 0);
				1154	if (crypto_ahash_final(req))
				1155	goto clear_hash;
				1156
				1157	tcp_put_md5sig_pool();
				1158	return 0;
				1159
				1160	clear_hash:
				1161	tcp_put_md5sig_pool();
				1162	clear_hash_noput:
				1163	memset(md5_hash, 0, 16);
				1164	return 1;
				1165	}
				1166
				1167	int tcp_v4_md5_hash_skb(char md5_hash, const struct tcp_md5sig_key key,
				1168	const struct sock *sk,
				1169	const struct sk_buff *skb)
				1170	{
				1171	struct tcp_md5sig_pool *hp;
				1172	struct ahash_request *req;
				1173	const struct tcphdr *th = tcp_hdr(skb);
				1174	__be32 saddr, daddr;
				1175
				1176	if (sk) { /* valid for establish/request sockets */
				1177	saddr = sk->sk_rcv_saddr;
				1178	daddr = sk->sk_daddr;
				1179	} else {
				1180	const struct iphdr *iph = ip_hdr(skb);
				1181	saddr = iph->saddr;
				1182	daddr = iph->daddr;
				1183	}
				1184
				1185	hp = tcp_get_md5sig_pool();
				1186	if (!hp)
				1187	goto clear_hash_noput;
				1188	req = hp->md5_req;
				1189
				1190	if (crypto_ahash_init(req))
				1191	goto clear_hash;
				1192
				1193	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
				1194	goto clear_hash;
				1195	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
				1196	goto clear_hash;
				1197	if (tcp_md5_hash_key(hp, key))
				1198	goto clear_hash;
				1199	ahash_request_set_crypt(req, NULL, md5_hash, 0);
				1200	if (crypto_ahash_final(req))
				1201	goto clear_hash;
				1202
				1203	tcp_put_md5sig_pool();
				1204	return 0;
				1205
				1206	clear_hash:
				1207	tcp_put_md5sig_pool();
				1208	clear_hash_noput:
				1209	memset(md5_hash, 0, 16);
				1210	return 1;
				1211	}
				1212	EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
				1213
				1214	#endif
				1215
				1216	/* Called with rcu_read_lock() */
				1217	static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
				1218	const struct sk_buff *skb)
				1219	{
				1220	#ifdef CONFIG_TCP_MD5SIG
				1221	/*
				1222	* This gets called for each TCP segment that arrives
				1223	* so we want to be efficient.
				1224	* We have 3 drop cases:
				1225	* o No MD5 hash and one expected.
				1226	* o MD5 hash and we're not expecting one.
				1227	* o MD5 hash and its wrong.
				1228	*/
				1229	const __u8 *hash_location = NULL;
				1230	struct tcp_md5sig_key *hash_expected;
				1231	const struct iphdr *iph = ip_hdr(skb);
				1232	const struct tcphdr *th = tcp_hdr(skb);
				1233	int genhash;
				1234	unsigned char newhash[16];
				1235
				1236	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
				1237	AF_INET);
				1238	hash_location = tcp_parse_md5sig_option(th);
				1239
				1240	/* We've parsed the options - do we have a hash? */
				1241	if (!hash_expected && !hash_location)
				1242	return false;
				1243
				1244	if (hash_expected && !hash_location) {
				1245	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
				1246	return true;
				1247	}
				1248
				1249	if (!hash_expected && hash_location) {
				1250	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
				1251	return true;
				1252	}
				1253
				1254	/* Okay, so this is hash_expected and hash_location -
				1255	* so we need to calculate the checksum.
				1256	*/
				1257	genhash = tcp_v4_md5_hash_skb(newhash,
				1258	hash_expected,
				1259	NULL, skb);
				1260
				1261	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0) {
				1262	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
				1263	net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
				1264	&iph->saddr, ntohs(th->source),
				1265	&iph->daddr, ntohs(th->dest),
				1266	genhash ? " tcp_v4_calc_md5_hash failed"
				1267	: "");
				1268	return true;
				1269	}
				1270	return false;
				1271	#endif
				1272	return false;
				1273	}
				1274
				1275	static void tcp_v4_init_req(struct request_sock *req,
				1276	const struct sock *sk_listener,
				1277	struct sk_buff *skb)
				1278	{
				1279	struct inet_request_sock *ireq = inet_rsk(req);
				1280	struct net *net = sock_net(sk_listener);
				1281
				1282	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
				1283	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
				1284	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
				1285	}
				1286
				1287	static struct dst_entry tcp_v4_route_req(const struct sock sk,
				1288	struct flowi *fl,
				1289	const struct request_sock *req)
				1290	{
				1291	return inet_csk_route_req(sk, &fl->u.ip4, req);
				1292	}
				1293
				1294	struct request_sock_ops tcp_request_sock_ops __read_mostly = {
				1295	.family = PF_INET,
				1296	.obj_size = sizeof(struct tcp_request_sock),
				1297	.rtx_syn_ack = tcp_rtx_synack,
				1298	.send_ack = tcp_v4_reqsk_send_ack,
				1299	.destructor = tcp_v4_reqsk_destructor,
				1300	.send_reset = tcp_v4_send_reset,
				1301	.syn_ack_timeout = tcp_syn_ack_timeout,
				1302	};
				1303
				1304	static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
				1305	.mss_clamp = TCP_MSS_DEFAULT,
				1306	#ifdef CONFIG_TCP_MD5SIG
				1307	.req_md5_lookup = tcp_v4_md5_lookup,
				1308	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1309	#endif
				1310	.init_req = tcp_v4_init_req,
				1311	#ifdef CONFIG_SYN_COOKIES
				1312	.cookie_init_seq = cookie_v4_init_sequence,
				1313	#endif
				1314	.route_req = tcp_v4_route_req,
				1315	.init_seq = tcp_v4_init_seq,
				1316	.init_ts_off = tcp_v4_init_ts_off,
				1317	.send_synack = tcp_v4_send_synack,
				1318	};
				1319
				1320	int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
				1321	{
				1322	/* Never answer to SYNs send to broadcast or multicast */
				1323	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
				1324	goto drop;
				1325
				1326	return tcp_conn_request(&tcp_request_sock_ops,
				1327	&tcp_request_sock_ipv4_ops, sk, skb);
				1328
				1329	drop:
				1330	tcp_listendrop(sk);
				1331	return 0;
				1332	}
				1333	EXPORT_SYMBOL(tcp_v4_conn_request);
				1334
				1335
				1336	/*
				1337	* The three way handshake has completed - we got a valid synack -
				1338	* now create the new socket.
				1339	*/
				1340	struct sock tcp_v4_syn_recv_sock(const struct sock sk, struct sk_buff *skb,
				1341	struct request_sock *req,
				1342	struct dst_entry *dst,
				1343	struct request_sock *req_unhash,
				1344	bool *own_req)
				1345	{
				1346	struct inet_request_sock *ireq;
				1347	struct inet_sock *newinet;
				1348	struct tcp_sock *newtp;
				1349	struct sock *newsk;
				1350	#ifdef CONFIG_TCP_MD5SIG
				1351	struct tcp_md5sig_key *key;
				1352	#endif
				1353	struct ip_options_rcu *inet_opt;
				1354
				1355	if (sk_acceptq_is_full(sk))
				1356	goto exit_overflow;
				1357
				1358	newsk = tcp_create_openreq_child(sk, req, skb);
				1359	if (!newsk)
				1360	goto exit_nonewsk;
				1361
				1362	newsk->sk_gso_type = SKB_GSO_TCPV4;
				1363	inet_sk_rx_dst_set(newsk, skb);
				1364
				1365	newtp = tcp_sk(newsk);
				1366	newinet = inet_sk(newsk);
				1367	ireq = inet_rsk(req);
				1368	sk_daddr_set(newsk, ireq->ir_rmt_addr);
				1369	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
				1370	newsk->sk_bound_dev_if = ireq->ir_iif;
				1371	newinet->inet_saddr = ireq->ir_loc_addr;
				1372	inet_opt = rcu_dereference(ireq->ireq_opt);
				1373	RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
				1374	newinet->mc_index = inet_iif(skb);
				1375	newinet->mc_ttl = ip_hdr(skb)->ttl;
				1376	newinet->rcv_tos = ip_hdr(skb)->tos;
				1377	inet_csk(newsk)->icsk_ext_hdr_len = 0;
				1378	if (inet_opt)
				1379	inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				1380	newinet->inet_id = prandom_u32();
				1381
				1382	if (!dst) {
				1383	dst = inet_csk_route_child_sock(sk, newsk, req);
				1384	if (!dst)
				1385	goto put_and_exit;
				1386	} else {
				1387	/* syncookie case : see end of cookie_v4_check() */
				1388	}
				1389	sk_setup_caps(newsk, dst);
				1390
				1391	tcp_ca_openreq_child(newsk, dst);
				1392
				1393	tcp_sync_mss(newsk, dst_mtu(dst));
				1394	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
				1395
				1396	tcp_initialize_rcv_mss(newsk);
				1397
				1398	#ifdef CONFIG_TCP_MD5SIG
				1399	/* Copy over the MD5 key from the original socket */
				1400	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1401	AF_INET);
				1402	if (key) {
				1403	/*
				1404	* We're using one, so create a matching key
				1405	* on the newsk structure. If we fail to get
				1406	* memory, then we end up not copying the key
				1407	* across. Shucks.
				1408	*/
				1409	tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1410	AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
				1411	sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
				1412	}
				1413	#endif
				1414
				1415	if (__inet_inherit_port(sk, newsk) < 0)
				1416	goto put_and_exit;
				1417	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
				1418	if (likely(*own_req)) {
				1419	tcp_move_syn(newtp, req);
				1420	ireq->ireq_opt = NULL;
				1421	} else {
				1422	newinet->inet_opt = NULL;
				1423	}
				1424	return newsk;
				1425
				1426	exit_overflow:
				1427	NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
				1428	exit_nonewsk:
				1429	dst_release(dst);
				1430	exit:
				1431	tcp_listendrop(sk);
				1432	return NULL;
				1433	put_and_exit:
				1434	newinet->inet_opt = NULL;
				1435	inet_csk_prepare_forced_close(newsk);
				1436	tcp_done(newsk);
				1437	goto exit;
				1438	}
				1439	EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
				1440
				1441	static struct sock tcp_v4_cookie_check(struct sock sk, struct sk_buff *skb)
				1442	{
				1443	#ifdef CONFIG_SYN_COOKIES
				1444	const struct tcphdr *th = tcp_hdr(skb);
				1445
				1446	if (!th->syn)
				1447	sk = cookie_v4_check(sk, skb);
				1448	#endif
				1449	return sk;
				1450	}
				1451
				1452	/* The socket must have it's spinlock held when we get
				1453	* here, unless it is a TCP_LISTEN socket.
				1454	*
				1455	* We have a potential double-lock case here, so even when
				1456	* doing backlog processing we use the BH locking scheme.
				1457	* This is because we cannot sleep with the original spinlock
				1458	* held.
				1459	*/
				1460	int tcp_v4_do_rcv(struct sock sk, struct sk_buff skb)
				1461	{
				1462	struct sock *rsk;
				1463
				1464	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
				1465	struct dst_entry *dst = sk->sk_rx_dst;
				1466
				1467	sock_rps_save_rxhash(sk, skb);
				1468	sk_mark_napi_id(sk, skb);
				1469	if (dst) {
				1470	if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif \|\|
				1471	!dst->ops->check(dst, 0)) {
				1472	dst_release(dst);
				1473	sk->sk_rx_dst = NULL;
				1474	}
				1475	}
				1476	tcp_rcv_established(sk, skb, tcp_hdr(skb));
				1477	return 0;
				1478	}
				1479
				1480	if (tcp_checksum_complete(skb))
				1481	goto csum_err;
				1482
				1483	if (sk->sk_state == TCP_LISTEN) {
				1484	struct sock *nsk = tcp_v4_cookie_check(sk, skb);
				1485
				1486	if (!nsk)
				1487	goto discard;
				1488	if (nsk != sk) {
				1489	if (tcp_child_process(sk, nsk, skb)) {
				1490	rsk = nsk;
				1491	goto reset;
				1492	}
				1493	return 0;
				1494	}
				1495	} else
				1496	sock_rps_save_rxhash(sk, skb);
				1497
				1498	if (tcp_rcv_state_process(sk, skb)) {
				1499	rsk = sk;
				1500	goto reset;
				1501	}
				1502	return 0;
				1503
				1504	reset:
				1505	tcp_v4_send_reset(rsk, skb);
				1506	discard:
				1507	kfree_skb(skb);
				1508	/* Be careful here. If this function gets more complicated and
				1509	* gcc suffers from register pressure on the x86, sk (in %ebx)
				1510	* might be destroyed here. This current version compiles correctly,
				1511	* but you have been warned.
				1512	*/
				1513	return 0;
				1514
				1515	csum_err:
				1516	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
				1517	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
				1518	goto discard;
				1519	}
				1520	EXPORT_SYMBOL(tcp_v4_do_rcv);
				1521
				1522	int tcp_v4_early_demux(struct sk_buff *skb)
				1523	{
				1524	const struct iphdr *iph;
				1525	const struct tcphdr *th;
				1526	struct sock *sk;
				1527
				1528	if (skb->pkt_type != PACKET_HOST)
				1529	return 0;
				1530
				1531	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
				1532	return 0;
				1533
				1534	iph = ip_hdr(skb);
				1535	th = tcp_hdr(skb);
				1536
				1537	if (th->doff < sizeof(struct tcphdr) / 4)
				1538	return 0;
				1539
				1540	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
				1541	iph->saddr, th->source,
				1542	iph->daddr, ntohs(th->dest),
				1543	skb->skb_iif, inet_sdif(skb));
				1544	if (sk) {
				1545	skb->sk = sk;
				1546	skb->destructor = sock_edemux;
				1547	if (sk_fullsock(sk)) {
				1548	struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
				1549
				1550	if (dst)
				1551	dst = dst_check(dst, 0);
				1552	if (dst &&
				1553	inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
				1554	skb_dst_set_noref(skb, dst);
				1555	}
				1556	}
				1557	return 0;
				1558	}
				1559
				1560	bool tcp_add_backlog(struct sock sk, struct sk_buff skb)
				1561	{
				1562	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
				1563
				1564	/* Only socket owner can try to collapse/prune rx queues
				1565	* to reduce memory overhead, so add a little headroom here.
				1566	* Few sockets backlog are possibly concurrently non empty.
				1567	*/
				1568	limit += 64*1024;
				1569
				1570	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
				1571	* we can fix skb->truesize to its real value to avoid future drops.
				1572	* This is valid because skb is not yet charged to the socket.
				1573	* It has been noticed pure SACK packets were sometimes dropped
				1574	* (if cooked by drivers without copybreak feature).
				1575	*/
				1576	skb_condense(skb);
				1577
				1578	if (unlikely(sk_add_backlog(sk, skb, limit))) {
				1579	bh_unlock_sock(sk);
				1580	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
				1581	return true;
				1582	}
				1583	return false;
				1584	}
				1585	EXPORT_SYMBOL(tcp_add_backlog);
				1586
				1587	int tcp_filter(struct sock sk, struct sk_buff skb)
				1588	{
				1589	struct tcphdr th = (struct tcphdr )skb->data;
				1590
				1591	return sk_filter_trim_cap(sk, skb, th->doff * 4);
				1592	}
				1593	EXPORT_SYMBOL(tcp_filter);
				1594
				1595	static void tcp_v4_restore_cb(struct sk_buff *skb)
				1596	{
				1597	memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
				1598	sizeof(struct inet_skb_parm));
				1599	}
				1600
				1601	static void tcp_v4_fill_cb(struct sk_buff skb, const struct iphdr iph,
				1602	const struct tcphdr *th)
				1603	{
				1604	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
				1605	* barrier() makes sure compiler wont play fool^Waliasing games.
				1606	*/
				1607	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
				1608	sizeof(struct inet_skb_parm));
				1609	barrier();
				1610
				1611	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
				1612	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				1613	skb->len - th->doff * 4);
				1614	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
				1615	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
				1616	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
				1617	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
				1618	TCP_SKB_CB(skb)->sacked = 0;
				1619	TCP_SKB_CB(skb)->has_rxtstamp =
				1620	skb->tstamp \|\| skb_hwtstamps(skb)->hwtstamp;
				1621	}
				1622
				1623	/*
				1624	* From tcp_input.c
				1625	*/
				1626
				1627	int tcp_v4_rcv(struct sk_buff *skb)
				1628	{
				1629	struct net *net = dev_net(skb->dev);
				1630	int sdif = inet_sdif(skb);
				1631	const struct iphdr *iph;
				1632	const struct tcphdr *th;
				1633	bool refcounted;
				1634	struct sock *sk;
				1635	int ret;
				1636
				1637	if (skb->pkt_type != PACKET_HOST)
				1638	goto discard_it;
				1639
				1640	/* Count it even if it's bad */
				1641	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
				1642
				1643	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
				1644	goto discard_it;
				1645
				1646	th = (const struct tcphdr *)skb->data;
				1647
				1648	if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
				1649	goto bad_packet;
				1650	if (!pskb_may_pull(skb, th->doff * 4))
				1651	goto discard_it;
				1652
				1653	/* An explanation is required here, I think.
				1654	* Packet length and doff are validated by header prediction,
				1655	* provided case of th->doff==0 is eliminated.
				1656	* So, we defer the checks. */
				1657
				1658	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
				1659	goto csum_error;
				1660
				1661	th = (const struct tcphdr *)skb->data;
				1662	iph = ip_hdr(skb);
				1663	lookup:
				1664	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
				1665	th->dest, sdif, &refcounted);
				1666	if (!sk)
				1667	goto no_tcp_socket;
				1668
				1669	process:
				1670	if (sk->sk_state == TCP_TIME_WAIT)
				1671	goto do_time_wait;
				1672
				1673	if (sk->sk_state == TCP_NEW_SYN_RECV) {
				1674	struct request_sock *req = inet_reqsk(sk);
				1675	struct sock *nsk;
				1676
				1677	sk = req->rsk_listener;
				1678	if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
				1679	sk_drops_add(sk, skb);
				1680	reqsk_put(req);
				1681	goto discard_it;
				1682	}
				1683	if (tcp_checksum_complete(skb)) {
				1684	reqsk_put(req);
				1685	goto csum_error;
				1686	}
				1687	if (unlikely(sk->sk_state != TCP_LISTEN)) {
				1688	inet_csk_reqsk_queue_drop_and_put(sk, req);
				1689	goto lookup;
				1690	}
				1691	/* We own a reference on the listener, increase it again
				1692	* as we might lose it too soon.
				1693	*/
				1694	sock_hold(sk);
				1695	refcounted = true;
				1696	nsk = NULL;
				1697	if (!tcp_filter(sk, skb)) {
				1698	th = (const struct tcphdr *)skb->data;
				1699	iph = ip_hdr(skb);
				1700	tcp_v4_fill_cb(skb, iph, th);
				1701	nsk = tcp_check_req(sk, skb, req, false);
				1702	}
				1703	if (!nsk) {
				1704	reqsk_put(req);
				1705	goto discard_and_relse;
				1706	}
				1707	if (nsk == sk) {
				1708	reqsk_put(req);
				1709	tcp_v4_restore_cb(skb);
				1710	} else if (tcp_child_process(sk, nsk, skb)) {
				1711	tcp_v4_send_reset(nsk, skb);
				1712	goto discard_and_relse;
				1713	} else {
				1714	sock_put(sk);
				1715	return 0;
				1716	}
				1717	}
				1718	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				1719	__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
				1720	goto discard_and_relse;
				1721	}
				1722
				1723	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
				1724	goto discard_and_relse;
				1725
				1726	if (tcp_v4_inbound_md5_hash(sk, skb))
				1727	goto discard_and_relse;
				1728
				1729	nf_reset(skb);
				1730
				1731	if (tcp_filter(sk, skb))
				1732	goto discard_and_relse;
				1733	th = (const struct tcphdr *)skb->data;
				1734	iph = ip_hdr(skb);
				1735	tcp_v4_fill_cb(skb, iph, th);
				1736
				1737	skb->dev = NULL;
				1738
				1739	if (sk->sk_state == TCP_LISTEN) {
				1740	ret = tcp_v4_do_rcv(sk, skb);
				1741	goto put_and_return;
				1742	}
				1743
				1744	sk_incoming_cpu_update(sk);
				1745
				1746	bh_lock_sock_nested(sk);
				1747	tcp_segs_in(tcp_sk(sk), skb);
				1748	ret = 0;
				1749	if (!sock_owned_by_user(sk)) {
				1750	ret = tcp_v4_do_rcv(sk, skb);
				1751	} else if (tcp_add_backlog(sk, skb)) {
				1752	goto discard_and_relse;
				1753	}
				1754	bh_unlock_sock(sk);
				1755
				1756	put_and_return:
				1757	if (refcounted)
				1758	sock_put(sk);
				1759
				1760	return ret;
				1761
				1762	no_tcp_socket:
				1763	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
				1764	goto discard_it;
				1765
				1766	tcp_v4_fill_cb(skb, iph, th);
				1767
				1768	if (tcp_checksum_complete(skb)) {
				1769	csum_error:
				1770	__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
				1771	bad_packet:
				1772	__TCP_INC_STATS(net, TCP_MIB_INERRS);
				1773	} else {
				1774	tcp_v4_send_reset(NULL, skb);
				1775	}
				1776
				1777	discard_it:
				1778	/* Discard frame. */
				1779	kfree_skb(skb);
				1780	return 0;
				1781
				1782	discard_and_relse:
				1783	sk_drops_add(sk, skb);
				1784	if (refcounted)
				1785	sock_put(sk);
				1786	goto discard_it;
				1787
				1788	do_time_wait:
				1789	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				1790	inet_twsk_put(inet_twsk(sk));
				1791	goto discard_it;
				1792	}
				1793
				1794	tcp_v4_fill_cb(skb, iph, th);
				1795
				1796	if (tcp_checksum_complete(skb)) {
				1797	inet_twsk_put(inet_twsk(sk));
				1798	goto csum_error;
				1799	}
				1800	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
				1801	case TCP_TW_SYN: {
				1802	struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
				1803	&tcp_hashinfo, skb,
				1804	__tcp_hdrlen(th),
				1805	iph->saddr, th->source,
				1806	iph->daddr, th->dest,
				1807	inet_iif(skb),
				1808	sdif);
				1809	if (sk2) {
				1810	inet_twsk_deschedule_put(inet_twsk(sk));
				1811	sk = sk2;
				1812	tcp_v4_restore_cb(skb);
				1813	refcounted = false;
				1814	goto process;
				1815	}
				1816	/* Fall through to ACK */
				1817	}
				1818	case TCP_TW_ACK:
				1819	tcp_v4_timewait_ack(sk, skb);
				1820	break;
				1821	case TCP_TW_RST:
				1822	tcp_v4_send_reset(sk, skb);
				1823	inet_twsk_deschedule_put(inet_twsk(sk));
				1824	goto discard_it;
				1825	case TCP_TW_SUCCESS:;
				1826	}
				1827	goto discard_it;
				1828	}
				1829
				1830	static struct timewait_sock_ops tcp_timewait_sock_ops = {
				1831	.twsk_obj_size = sizeof(struct tcp_timewait_sock),
				1832	.twsk_unique = tcp_twsk_unique,
				1833	.twsk_destructor= tcp_twsk_destructor,
				1834	};
				1835
				1836	void inet_sk_rx_dst_set(struct sock sk, const struct sk_buff skb)
				1837	{
				1838	struct dst_entry *dst = skb_dst(skb);
				1839
				1840	if (dst && dst_hold_safe(dst)) {
				1841	sk->sk_rx_dst = dst;
				1842	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
				1843	}
				1844	}
				1845	EXPORT_SYMBOL(inet_sk_rx_dst_set);
				1846
				1847	const struct inet_connection_sock_af_ops ipv4_specific = {
				1848	.queue_xmit = ip_queue_xmit,
				1849	.send_check = tcp_v4_send_check,
				1850	.rebuild_header = inet_sk_rebuild_header,
				1851	.sk_rx_dst_set = inet_sk_rx_dst_set,
				1852	.conn_request = tcp_v4_conn_request,
				1853	.syn_recv_sock = tcp_v4_syn_recv_sock,
				1854	.net_header_len = sizeof(struct iphdr),
				1855	.setsockopt = ip_setsockopt,
				1856	.getsockopt = ip_getsockopt,
				1857	.addr2sockaddr = inet_csk_addr2sockaddr,
				1858	.sockaddr_len = sizeof(struct sockaddr_in),
				1859	#ifdef CONFIG_COMPAT
				1860	.compat_setsockopt = compat_ip_setsockopt,
				1861	.compat_getsockopt = compat_ip_getsockopt,
				1862	#endif
				1863	.mtu_reduced = tcp_v4_mtu_reduced,
				1864	};
				1865	EXPORT_SYMBOL(ipv4_specific);
				1866
				1867	#ifdef CONFIG_TCP_MD5SIG
				1868	static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
				1869	.md5_lookup = tcp_v4_md5_lookup,
				1870	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1871	.md5_parse = tcp_v4_parse_md5_keys,
				1872	};
				1873	#endif
				1874
				1875	/* NOTE: A lot of things set to zero explicitly by call to
				1876	* sk_alloc() so need not be done here.
				1877	*/
				1878	static int tcp_v4_init_sock(struct sock *sk)
				1879	{
				1880	struct inet_connection_sock *icsk = inet_csk(sk);
				1881
				1882	tcp_init_sock(sk);
				1883
				1884	icsk->icsk_af_ops = &ipv4_specific;
				1885
				1886	#ifdef CONFIG_TCP_MD5SIG
				1887	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
				1888	#endif
				1889
				1890	return 0;
				1891	}
				1892
				1893	void tcp_v4_destroy_sock(struct sock *sk)
				1894	{
				1895	struct tcp_sock *tp = tcp_sk(sk);
				1896
				1897	tcp_clear_xmit_timers(sk);
				1898
				1899	tcp_cleanup_congestion_control(sk);
				1900
				1901	tcp_cleanup_ulp(sk);
				1902
				1903	/* Cleanup up the write buffer. */
				1904	tcp_write_queue_purge(sk);
				1905
				1906	/* Check if we want to disable active TFO */
				1907	tcp_fastopen_active_disable_ofo_check(sk);
				1908
				1909	/* Cleans up our, hopefully empty, out_of_order_queue. */
				1910	skb_rbtree_purge(&tp->out_of_order_queue);
				1911
				1912	#ifdef CONFIG_TCP_MD5SIG
				1913	/* Clean up the MD5 key list, if any */
				1914	if (tp->md5sig_info) {
				1915	tcp_clear_md5_list(sk);
				1916	kfree_rcu(tp->md5sig_info, rcu);
				1917	tp->md5sig_info = NULL;
				1918	}
				1919	#endif
				1920
				1921	/* Clean up a referenced TCP bind bucket. */
				1922	if (inet_csk(sk)->icsk_bind_hash)
				1923	inet_put_port(sk);
				1924
				1925	BUG_ON(tp->fastopen_rsk);
				1926
				1927	/* If socket is aborted during connect operation */
				1928	tcp_free_fastopen_req(tp);
				1929	tcp_saved_syn_free(tp);
				1930
				1931	sk_sockets_allocated_dec(sk);
				1932	}
				1933	EXPORT_SYMBOL(tcp_v4_destroy_sock);
				1934
				1935	#ifdef CONFIG_PROC_FS
				1936	/* Proc filesystem TCP sock list dumping. */
				1937
				1938	/*
				1939	* Get next listener socket follow cur. If cur is NULL, get first socket
				1940	* starting from bucket given in st->bucket; when st->bucket is zero the
				1941	* very first socket in the hash table is returned.
				1942	*/
				1943	static void listening_get_next(struct seq_file seq, void *cur)
				1944	{
				1945	struct tcp_iter_state *st = seq->private;
				1946	struct net *net = seq_file_net(seq);
				1947	struct inet_listen_hashbucket *ilb;
				1948	struct hlist_nulls_node *node;
				1949	struct sock *sk = cur;
				1950
				1951	if (!sk) {
				1952	get_head:
				1953	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				1954	spin_lock(&ilb->lock);
				1955	sk = sk_nulls_head(&ilb->nulls_head);
				1956	st->offset = 0;
				1957	goto get_sk;
				1958	}
				1959	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				1960	++st->num;
				1961	++st->offset;
				1962
				1963	sk = sk_nulls_next(sk);
				1964	get_sk:
				1965	sk_nulls_for_each_from(sk, node) {
				1966	if (!net_eq(sock_net(sk), net))
				1967	continue;
				1968	if (sk->sk_family == st->family)
				1969	return sk;
				1970	}
				1971	spin_unlock(&ilb->lock);
				1972	st->offset = 0;
				1973	if (++st->bucket < INET_LHTABLE_SIZE)
				1974	goto get_head;
				1975	return NULL;
				1976	}
				1977
				1978	static void listening_get_idx(struct seq_file seq, loff_t *pos)
				1979	{
				1980	struct tcp_iter_state *st = seq->private;
				1981	void *rc;
				1982
				1983	st->bucket = 0;
				1984	st->offset = 0;
				1985	rc = listening_get_next(seq, NULL);
				1986
				1987	while (rc && *pos) {
				1988	rc = listening_get_next(seq, rc);
				1989	--*pos;
				1990	}
				1991	return rc;
				1992	}
				1993
				1994	static inline bool empty_bucket(const struct tcp_iter_state *st)
				1995	{
				1996	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
				1997	}
				1998
				1999	/*
				2000	* Get first established socket starting from bucket given in st->bucket.
				2001	* If st->bucket is zero, the very first socket in the hash is returned.
				2002	*/
				2003	static void established_get_first(struct seq_file seq)
				2004	{
				2005	struct tcp_iter_state *st = seq->private;
				2006	struct net *net = seq_file_net(seq);
				2007	void *rc = NULL;
				2008
				2009	st->offset = 0;
				2010	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
				2011	struct sock *sk;
				2012	struct hlist_nulls_node *node;
				2013	spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
				2014
				2015	/* Lockless fast path for the common case of empty buckets */
				2016	if (empty_bucket(st))
				2017	continue;
				2018
				2019	spin_lock_bh(lock);
				2020	sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
				2021	if (sk->sk_family != st->family \|\|
				2022	!net_eq(sock_net(sk), net)) {
				2023	continue;
				2024	}
				2025	rc = sk;
				2026	goto out;
				2027	}
				2028	spin_unlock_bh(lock);
				2029	}
				2030	out:
				2031	return rc;
				2032	}
				2033
				2034	static void established_get_next(struct seq_file seq, void *cur)
				2035	{
				2036	struct sock *sk = cur;
				2037	struct hlist_nulls_node *node;
				2038	struct tcp_iter_state *st = seq->private;
				2039	struct net *net = seq_file_net(seq);
				2040
				2041	++st->num;
				2042	++st->offset;
				2043
				2044	sk = sk_nulls_next(sk);
				2045
				2046	sk_nulls_for_each_from(sk, node) {
				2047	if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
				2048	return sk;
				2049	}
				2050
				2051	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2052	++st->bucket;
				2053	return established_get_first(seq);
				2054	}
				2055
				2056	static void established_get_idx(struct seq_file seq, loff_t pos)
				2057	{
				2058	struct tcp_iter_state *st = seq->private;
				2059	void *rc;
				2060
				2061	st->bucket = 0;
				2062	rc = established_get_first(seq);
				2063
				2064	while (rc && pos) {
				2065	rc = established_get_next(seq, rc);
				2066	--pos;
				2067	}
				2068	return rc;
				2069	}
				2070
				2071	static void tcp_get_idx(struct seq_file seq, loff_t pos)
				2072	{
				2073	void *rc;
				2074	struct tcp_iter_state *st = seq->private;
				2075
				2076	st->state = TCP_SEQ_STATE_LISTENING;
				2077	rc = listening_get_idx(seq, &pos);
				2078
				2079	if (!rc) {
				2080	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2081	rc = established_get_idx(seq, pos);
				2082	}
				2083
				2084	return rc;
				2085	}
				2086
				2087	static void tcp_seek_last_pos(struct seq_file seq)
				2088	{
				2089	struct tcp_iter_state *st = seq->private;
				2090	int offset = st->offset;
				2091	int orig_num = st->num;
				2092	void *rc = NULL;
				2093
				2094	switch (st->state) {
				2095	case TCP_SEQ_STATE_LISTENING:
				2096	if (st->bucket >= INET_LHTABLE_SIZE)
				2097	break;
				2098	st->state = TCP_SEQ_STATE_LISTENING;
				2099	rc = listening_get_next(seq, NULL);
				2100	while (offset-- && rc)
				2101	rc = listening_get_next(seq, rc);
				2102	if (rc)
				2103	break;
				2104	st->bucket = 0;
				2105	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2106	/* Fallthrough */
				2107	case TCP_SEQ_STATE_ESTABLISHED:
				2108	if (st->bucket > tcp_hashinfo.ehash_mask)
				2109	break;
				2110	rc = established_get_first(seq);
				2111	while (offset-- && rc)
				2112	rc = established_get_next(seq, rc);
				2113	}
				2114
				2115	st->num = orig_num;
				2116
				2117	return rc;
				2118	}
				2119
				2120	static void tcp_seq_start(struct seq_file seq, loff_t *pos)
				2121	{
				2122	struct tcp_iter_state *st = seq->private;
				2123	void *rc;
				2124
				2125	if (pos && pos == st->last_pos) {
				2126	rc = tcp_seek_last_pos(seq);
				2127	if (rc)
				2128	goto out;
				2129	}
				2130
				2131	st->state = TCP_SEQ_STATE_LISTENING;
				2132	st->num = 0;
				2133	st->bucket = 0;
				2134	st->offset = 0;
				2135	rc = pos ? tcp_get_idx(seq, pos - 1) : SEQ_START_TOKEN;
				2136
				2137	out:
				2138	st->last_pos = *pos;
				2139	return rc;
				2140	}
				2141
				2142	static void tcp_seq_next(struct seq_file seq, void v, loff_t pos)
				2143	{
				2144	struct tcp_iter_state *st = seq->private;
				2145	void *rc = NULL;
				2146
				2147	if (v == SEQ_START_TOKEN) {
				2148	rc = tcp_get_idx(seq, 0);
				2149	goto out;
				2150	}
				2151
				2152	switch (st->state) {
				2153	case TCP_SEQ_STATE_LISTENING:
				2154	rc = listening_get_next(seq, v);
				2155	if (!rc) {
				2156	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2157	st->bucket = 0;
				2158	st->offset = 0;
				2159	rc = established_get_first(seq);
				2160	}
				2161	break;
				2162	case TCP_SEQ_STATE_ESTABLISHED:
				2163	rc = established_get_next(seq, v);
				2164	break;
				2165	}
				2166	out:
				2167	++*pos;
				2168	st->last_pos = *pos;
				2169	return rc;
				2170	}
				2171
				2172	static void tcp_seq_stop(struct seq_file seq, void v)
				2173	{
				2174	struct tcp_iter_state *st = seq->private;
				2175
				2176	switch (st->state) {
				2177	case TCP_SEQ_STATE_LISTENING:
				2178	if (v != SEQ_START_TOKEN)
				2179	spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
				2180	break;
				2181	case TCP_SEQ_STATE_ESTABLISHED:
				2182	if (v)
				2183	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2184	break;
				2185	}
				2186	}
				2187
				2188	int tcp_seq_open(struct inode inode, struct file file)
				2189	{
				2190	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
				2191	struct tcp_iter_state *s;
				2192	int err;
				2193
				2194	err = seq_open_net(inode, file, &afinfo->seq_ops,
				2195	sizeof(struct tcp_iter_state));
				2196	if (err < 0)
				2197	return err;
				2198
				2199	s = ((struct seq_file *)file->private_data)->private;
				2200	s->family = afinfo->family;
				2201	s->last_pos = 0;
				2202	return 0;
				2203	}
				2204	EXPORT_SYMBOL(tcp_seq_open);
				2205
				2206	int tcp_proc_register(struct net net, struct tcp_seq_afinfo afinfo)
				2207	{
				2208	int rc = 0;
				2209	struct proc_dir_entry *p;
				2210
				2211	afinfo->seq_ops.start = tcp_seq_start;
				2212	afinfo->seq_ops.next = tcp_seq_next;
				2213	afinfo->seq_ops.stop = tcp_seq_stop;
				2214
				2215	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
				2216	afinfo->seq_fops, afinfo);
				2217	if (!p)
				2218	rc = -ENOMEM;
				2219	return rc;
				2220	}
				2221	EXPORT_SYMBOL(tcp_proc_register);
				2222
				2223	void tcp_proc_unregister(struct net net, struct tcp_seq_afinfo afinfo)
				2224	{
				2225	remove_proc_entry(afinfo->name, net->proc_net);
				2226	}
				2227	EXPORT_SYMBOL(tcp_proc_unregister);
				2228
				2229	static void get_openreq4(const struct request_sock *req,
				2230	struct seq_file *f, int i)
				2231	{
				2232	const struct inet_request_sock *ireq = inet_rsk(req);
				2233	long delta = req->rsk_timer.expires - jiffies;
				2234
				2235	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2236	" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
				2237	i,
				2238	ireq->ir_loc_addr,
				2239	ireq->ir_num,
				2240	ireq->ir_rmt_addr,
				2241	ntohs(ireq->ir_rmt_port),
				2242	TCP_SYN_RECV,
				2243	0, 0, /* could print option size, but that is af dependent. */
				2244	1, /* timers active (only the expire timer) */
				2245	jiffies_delta_to_clock_t(delta),
				2246	req->num_timeout,
				2247	from_kuid_munged(seq_user_ns(f),
				2248	sock_i_uid(req->rsk_listener)),
				2249	0, /* non standard timer */
				2250	0, /* open_requests have no inode */
				2251	0,
				2252	req);
				2253	}
				2254
				2255	static void get_tcp4_sock(struct sock sk, struct seq_file f, int i)
				2256	{
				2257	int timer_active;
				2258	unsigned long timer_expires;
				2259	const struct tcp_sock *tp = tcp_sk(sk);
				2260	const struct inet_connection_sock *icsk = inet_csk(sk);
				2261	const struct inet_sock *inet = inet_sk(sk);
				2262	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
				2263	__be32 dest = inet->inet_daddr;
				2264	__be32 src = inet->inet_rcv_saddr;
				2265	__u16 destp = ntohs(inet->inet_dport);
				2266	__u16 srcp = ntohs(inet->inet_sport);
				2267	int rx_queue;
				2268	int state;
				2269
				2270	if (icsk->icsk_pending == ICSK_TIME_RETRANS \|\|
				2271	icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT \|\|
				2272	icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
				2273	timer_active = 1;
				2274	timer_expires = icsk->icsk_timeout;
				2275	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
				2276	timer_active = 4;
				2277	timer_expires = icsk->icsk_timeout;
				2278	} else if (timer_pending(&sk->sk_timer)) {
				2279	timer_active = 2;
				2280	timer_expires = sk->sk_timer.expires;
				2281	} else {
				2282	timer_active = 0;
				2283	timer_expires = jiffies;
				2284	}
				2285
				2286	state = sk_state_load(sk);
				2287	if (state == TCP_LISTEN)
				2288	rx_queue = sk->sk_ack_backlog;
				2289	else
				2290	/* Because we don't lock the socket,
				2291	* we might find a transient negative value.
				2292	*/
				2293	rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
				2294
				2295	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
				2296	"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
				2297	i, src, srcp, dest, destp, state,
				2298	tp->write_seq - tp->snd_una,
				2299	rx_queue,
				2300	timer_active,
				2301	jiffies_delta_to_clock_t(timer_expires - jiffies),
				2302	icsk->icsk_retransmits,
				2303	from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
				2304	icsk->icsk_probes_out,
				2305	sock_i_ino(sk),
				2306	refcount_read(&sk->sk_refcnt), sk,
				2307	jiffies_to_clock_t(icsk->icsk_rto),
				2308	jiffies_to_clock_t(icsk->icsk_ack.ato),
				2309	(icsk->icsk_ack.quick << 1) \| icsk->icsk_ack.pingpong,
				2310	tp->snd_cwnd,
				2311	state == TCP_LISTEN ?
				2312	fastopenq->max_qlen :
				2313	(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
				2314	}
				2315
				2316	static void get_timewait4_sock(const struct inet_timewait_sock *tw,
				2317	struct seq_file *f, int i)
				2318	{
				2319	long delta = tw->tw_timer.expires - jiffies;
				2320	__be32 dest, src;
				2321	__u16 destp, srcp;
				2322
				2323	dest = tw->tw_daddr;
				2324	src = tw->tw_rcv_saddr;
				2325	destp = ntohs(tw->tw_dport);
				2326	srcp = ntohs(tw->tw_sport);
				2327
				2328	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2329	" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
				2330	i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
				2331	3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
				2332	refcount_read(&tw->tw_refcnt), tw);
				2333	}
				2334
				2335	#define TMPSZ 150
				2336
				2337	static int tcp4_seq_show(struct seq_file seq, void v)
				2338	{
				2339	struct tcp_iter_state *st;
				2340	struct sock *sk = v;
				2341
				2342	seq_setwidth(seq, TMPSZ - 1);
				2343	if (v == SEQ_START_TOKEN) {
				2344	seq_puts(seq, " sl local_address rem_address st tx_queue "
				2345	"rx_queue tr tm->when retrnsmt uid timeout "
				2346	"inode");
				2347	goto out;
				2348	}
				2349	st = seq->private;
				2350
				2351	if (sk->sk_state == TCP_TIME_WAIT)
				2352	get_timewait4_sock(v, seq, st->num);
				2353	else if (sk->sk_state == TCP_NEW_SYN_RECV)
				2354	get_openreq4(v, seq, st->num);
				2355	else
				2356	get_tcp4_sock(v, seq, st->num);
				2357	out:
				2358	seq_pad(seq, '\n');
				2359	return 0;
				2360	}
				2361
				2362	static const struct file_operations tcp_afinfo_seq_fops = {
				2363	.owner = THIS_MODULE,
				2364	.open = tcp_seq_open,
				2365	.read = seq_read,
				2366	.llseek = seq_lseek,
				2367	.release = seq_release_net
				2368	};
				2369
				2370	static struct tcp_seq_afinfo tcp4_seq_afinfo = {
				2371	.name = "tcp",
				2372	.family = AF_INET,
				2373	.seq_fops = &tcp_afinfo_seq_fops,
				2374	.seq_ops = {
				2375	.show = tcp4_seq_show,
				2376	},
				2377	};
				2378
				2379	static int __net_init tcp4_proc_init_net(struct net *net)
				2380	{
				2381	return tcp_proc_register(net, &tcp4_seq_afinfo);
				2382	}
				2383
				2384	static void __net_exit tcp4_proc_exit_net(struct net *net)
				2385	{
				2386	tcp_proc_unregister(net, &tcp4_seq_afinfo);
				2387	}
				2388
				2389	static struct pernet_operations tcp4_net_ops = {
				2390	.init = tcp4_proc_init_net,
				2391	.exit = tcp4_proc_exit_net,
				2392	};
				2393
				2394	int __init tcp4_proc_init(void)
				2395	{
				2396	return register_pernet_subsys(&tcp4_net_ops);
				2397	}
				2398
				2399	void tcp4_proc_exit(void)
				2400	{
				2401	unregister_pernet_subsys(&tcp4_net_ops);
				2402	}
				2403	#endif /* CONFIG_PROC_FS */
				2404
				2405	struct proto tcp_prot = {
				2406	.name = "TCP",
				2407	.owner = THIS_MODULE,
				2408	.close = tcp_close,
				2409	.connect = tcp_v4_connect,
				2410	.disconnect = tcp_disconnect,
				2411	.accept = inet_csk_accept,
				2412	.ioctl = tcp_ioctl,
				2413	.init = tcp_v4_init_sock,
				2414	.destroy = tcp_v4_destroy_sock,
				2415	.shutdown = tcp_shutdown,
				2416	.setsockopt = tcp_setsockopt,
				2417	.getsockopt = tcp_getsockopt,
				2418	.keepalive = tcp_set_keepalive,
				2419	.recvmsg = tcp_recvmsg,
				2420	.sendmsg = tcp_sendmsg,
				2421	.sendpage = tcp_sendpage,
				2422	.backlog_rcv = tcp_v4_do_rcv,
				2423	.release_cb = tcp_release_cb,
				2424	.hash = inet_hash,
				2425	.unhash = inet_unhash,
				2426	.get_port = inet_csk_get_port,
				2427	.enter_memory_pressure = tcp_enter_memory_pressure,
				2428	.leave_memory_pressure = tcp_leave_memory_pressure,
				2429	.stream_memory_free = tcp_stream_memory_free,
				2430	.sockets_allocated = &tcp_sockets_allocated,
				2431	.orphan_count = &tcp_orphan_count,
				2432	.memory_allocated = &tcp_memory_allocated,
				2433	.memory_pressure = &tcp_memory_pressure,
				2434	.sysctl_mem = sysctl_tcp_mem,
				2435	.sysctl_wmem = sysctl_tcp_wmem,
				2436	.sysctl_rmem = sysctl_tcp_rmem,
				2437	.max_header = MAX_TCP_HEADER,
				2438	.obj_size = sizeof(struct tcp_sock),
				2439	.slab_flags = SLAB_TYPESAFE_BY_RCU,
				2440	.twsk_prot = &tcp_timewait_sock_ops,
				2441	.rsk_prot = &tcp_request_sock_ops,
				2442	.h.hashinfo = &tcp_hashinfo,
				2443	.no_autobind = true,
				2444	#ifdef CONFIG_COMPAT
				2445	.compat_setsockopt = compat_tcp_setsockopt,
				2446	.compat_getsockopt = compat_tcp_getsockopt,
				2447	#endif
				2448	.diag_destroy = tcp_abort,
				2449	};
				2450	EXPORT_SYMBOL(tcp_prot);
				2451
				2452	static void __net_exit tcp_sk_exit(struct net *net)
				2453	{
				2454	int cpu;
				2455
				2456	for_each_possible_cpu(cpu)
				2457	inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
				2458	free_percpu(net->ipv4.tcp_sk);
				2459	}
				2460
				2461	static int __net_init tcp_sk_init(struct net *net)
				2462	{
				2463	int res, cpu, cnt;
				2464
				2465	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
				2466	if (!net->ipv4.tcp_sk)
				2467	return -ENOMEM;
				2468
				2469	for_each_possible_cpu(cpu) {
				2470	struct sock *sk;
				2471
				2472	res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
				2473	IPPROTO_TCP, net);
				2474	if (res)
				2475	goto fail;
				2476	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
				2477
				2478	/* Please enforce IP_DF and IPID==0 for RST and
				2479	* ACK sent in SYN-RECV and TIME-WAIT state.
				2480	*/
				2481	inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
				2482
				2483	*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
				2484	}
				2485
				2486	net->ipv4.sysctl_tcp_ecn = 2;
				2487	net->ipv4.sysctl_tcp_ecn_fallback = 1;
				2488
				2489	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
				2490	net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
				2491	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
				2492	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
				2493
				2494	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
				2495	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
				2496	net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
				2497
				2498	net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
				2499	net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
				2500	net->ipv4.sysctl_tcp_syncookies = 1;
				2501	net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
				2502	net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
				2503	net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
				2504	net->ipv4.sysctl_tcp_orphan_retries = 0;
				2505	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
				2506	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
				2507	net->ipv4.sysctl_tcp_tw_reuse = 0;
				2508
				2509	cnt = tcp_hashinfo.ehash_mask + 1;
				2510	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
				2511	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
				2512
				2513	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
				2514	net->ipv4.sysctl_tcp_sack = 1;
				2515	net->ipv4.sysctl_tcp_window_scaling = 1;
				2516	net->ipv4.sysctl_tcp_timestamps = 1;
				2517
				2518	return 0;
				2519	fail:
				2520	tcp_sk_exit(net);
				2521
				2522	return res;
				2523	}
				2524
				2525	static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
				2526	{
				2527	inet_twsk_purge(&tcp_hashinfo, AF_INET);
				2528	}
				2529
				2530	static struct pernet_operations __net_initdata tcp_sk_ops = {
				2531	.init = tcp_sk_init,
				2532	.exit = tcp_sk_exit,
				2533	.exit_batch = tcp_sk_exit_batch,
				2534	};
				2535
				2536	void __init tcp_v4_init(void)
				2537	{
				2538	if (register_pernet_subsys(&tcp_sk_ops))
				2539	panic("Failed to create the TCP control socket.\n");
				2540	}