Blame - ap/os/linux/linux-3.4.x/net/ipv4/tcp_ipv4.c - R306

blob: a699559a30004105b0bb057af78ff83f0625d462 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* Implementation of the Transmission Control Protocol(TCP).
				7	*
				8	* IPv4 specific functions
				9	*
				10	*
				11	* code split from:
				12	* linux/ipv4/tcp.c
				13	* linux/ipv4/tcp_input.c
				14	* linux/ipv4/tcp_output.c
				15	*
				16	* See tcp.c for author information
				17	*
				18	* This program is free software; you can redistribute it and/or
				19	* modify it under the terms of the GNU General Public License
				20	* as published by the Free Software Foundation; either version
				21	* 2 of the License, or (at your option) any later version.
				22	*/
				23
				24	/*
				25	* Changes:
				26	* David S. Miller : New socket lookup architecture.
				27	* This code is dedicated to John Dyson.
				28	* David S. Miller : Change semantics of established hash,
				29	* half is devoted to TIME_WAIT sockets
				30	* and the rest go in the other half.
				31	* Andi Kleen : Add support for syncookies and fixed
				32	* some bugs: ip options weren't passed to
				33	* the TCP layer, missed a check for an
				34	* ACK bit.
				35	* Andi Kleen : Implemented fast path mtu discovery.
				36	* Fixed many serious bugs in the
				37	* request_sock handling and moved
				38	* most of it into the af independent code.
				39	* Added tail drop and some other bugfixes.
				40	* Added new listen semantics.
				41	* Mike McLagan : Routing by source
				42	* Juan Jose Ciarlante: ip_dynaddr bits
				43	* Andi Kleen: various fixes.
				44	* Vitaly E. Lavrov : Transparent proxy revived after year
				45	* coma.
				46	* Andi Kleen : Fix new listen.
				47	* Andi Kleen : Fix accept error reporting.
				48	* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
				49	* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
				50	* a single port at the same time.
				51	*/
				52
				53	#define pr_fmt(fmt) "TCP: " fmt
				54
				55	#include <linux/bottom_half.h>
				56	#include <linux/types.h>
				57	#include <linux/fcntl.h>
				58	#include <linux/module.h>
				59	#include <linux/random.h>
				60	#include <linux/cache.h>
				61	#include <linux/jhash.h>
				62	#include <linux/init.h>
				63	#include <linux/times.h>
				64	#include <linux/slab.h>
				65
				66	#include <net/net_namespace.h>
				67	#include <net/icmp.h>
				68	#include <net/inet_hashtables.h>
				69	#include <net/tcp.h>
				70	#include <net/transp_v6.h>
				71	#include <net/ipv6.h>
				72	#include <net/inet_common.h>
				73	#include <net/timewait_sock.h>
				74	#include <net/xfrm.h>
				75	#include <net/netdma.h>
				76	#include <net/secure_seq.h>
				77	#include <net/tcp_memcontrol.h>
				78
				79	#include <linux/inet.h>
				80	#include <linux/ipv6.h>
				81	#include <linux/stddef.h>
				82	#include <linux/proc_fs.h>
				83	#include <linux/seq_file.h>
				84
				85	#include <linux/crypto.h>
				86	#include <linux/scatterlist.h>
				87
				88	#include <net/netfilter/nf_conntrack.h>
				89	#include <net/SI/fast_common.h>
				90
				91	#include <net/SI/sock_track.h>
				92
				93	int sysctl_tcp_tw_reuse __read_mostly;
				94	int sysctl_tcp_low_latency __read_mostly;
				95	EXPORT_SYMBOL(sysctl_tcp_low_latency);
				96
				97
				98	#ifdef CONFIG_TCP_MD5SIG
				99	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				100	__be32 daddr, __be32 saddr, const struct tcphdr *th);
				101	#endif
				102
				103	struct inet_hashinfo tcp_hashinfo;
				104	EXPORT_SYMBOL(tcp_hashinfo);
				105
				106	static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
				107	{
				108	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
				109	ip_hdr(skb)->saddr,
				110	tcp_hdr(skb)->dest,
				111	tcp_hdr(skb)->source);
				112	}
				113
				114	int tcp_twsk_unique(struct sock sk, struct sock sktw, void *twp)
				115	{
				116	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
				117	struct tcp_sock *tp = tcp_sk(sk);
				118
				119	/* With PAWS, it is safe from the viewpoint
				120	of data integrity. Even without PAWS it is safe provided sequence
				121	spaces do not overlap i.e. at data rates <= 80Mbit/sec.
				122
				123	Actually, the idea is close to VJ's one, only timestamp cache is
				124	held not per host, but per port pair and TW bucket is used as state
				125	holder.
				126
				127	If TW bucket has been already destroyed we fall back to VJ's scheme
				128	and use initial timestamp retrieved from peer table.
				129	*/
				130	if (tcptw->tw_ts_recent_stamp &&
				131	(twp == NULL \|\| (sysctl_tcp_tw_reuse &&
				132	get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
				133	tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
				134	if (tp->write_seq == 0)
				135	tp->write_seq = 1;
				136	tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
				137	tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
				138	sock_hold(sktw);
				139	return 1;
				140	}
				141
				142	return 0;
				143	}
				144	EXPORT_SYMBOL_GPL(tcp_twsk_unique);
				145
				146	/* This will initiate an outgoing connection. */
				147	int tcp_v4_connect(struct sock sk, struct sockaddr uaddr, int addr_len)
				148	{
				149	struct sockaddr_in usin = (struct sockaddr_in )uaddr;
				150	struct inet_sock *inet = inet_sk(sk);
				151	struct tcp_sock *tp = tcp_sk(sk);
				152	__be16 orig_sport, orig_dport;
				153	__be32 daddr, nexthop;
				154	struct flowi4 *fl4;
				155	struct rtable *rt;
				156	int err;
				157	struct ip_options_rcu *inet_opt;
				158
				159	if (addr_len < sizeof(struct sockaddr_in))
				160	return -EINVAL;
				161
				162	if (usin->sin_family != AF_INET)
				163	return -EAFNOSUPPORT;
				164
				165	nexthop = daddr = usin->sin_addr.s_addr;
				166	inet_opt = rcu_dereference_protected(inet->inet_opt,
				167	sock_owned_by_user(sk));
				168	if (inet_opt && inet_opt->opt.srr) {
				169	if (!daddr)
				170	return -EINVAL;
				171	nexthop = inet_opt->opt.faddr;
				172	}
				173
				174	orig_sport = inet->inet_sport;
				175	orig_dport = usin->sin_port;
				176	fl4 = &inet->cork.fl.u.ip4;
				177	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
				178	RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
				179	IPPROTO_TCP,
				180	orig_sport, orig_dport, sk, true);
				181	if (IS_ERR(rt)) {
				182	err = PTR_ERR(rt);
				183	if (err == -ENETUNREACH)
				184	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
				185	return err;
				186	}
				187
				188	if (rt->rt_flags & (RTCF_MULTICAST \| RTCF_BROADCAST)) {
				189	ip_rt_put(rt);
				190	return -ENETUNREACH;
				191	}
				192
				193	if (!inet_opt \|\| !inet_opt->opt.srr)
				194	daddr = fl4->daddr;
				195
				196	if (!inet->inet_saddr)
				197	inet->inet_saddr = fl4->saddr;
				198	inet->inet_rcv_saddr = inet->inet_saddr;
				199
				200	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
				201	/* Reset inherited state */
				202	tp->rx_opt.ts_recent = 0;
				203	tp->rx_opt.ts_recent_stamp = 0;
				204	tp->write_seq = 0;
				205	}
				206
				207	if (tcp_death_row.sysctl_tw_recycle &&
				208	!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
				209	struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
				210	/*
				211	* VJ's idea. We save last timestamp seen from
				212	* the destination in peer table, when entering state
				213	* TIME-WAIT * and initialize rx_opt.ts_recent from it,
				214	* when trying new connection.
				215	*/
				216	if (peer) {
				217	inet_peer_refcheck(peer);
				218	if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
				219	tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
				220	tp->rx_opt.ts_recent = peer->tcp_ts;
				221	}
				222	}
				223	}
				224
				225	inet->inet_dport = usin->sin_port;
				226	inet->inet_daddr = daddr;
				227
				228	inet_csk(sk)->icsk_ext_hdr_len = 0;
				229	if (inet_opt)
				230	inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				231
				232	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
				233
				234	/* Socket identity is still unknown (sport may be zero).
				235	* However we set state to SYN-SENT and not releasing socket
				236	* lock select source port, enter ourselves into the hash tables and
				237	* complete initialization after this.
				238	*/
				239	tcp_set_state(sk, TCP_SYN_SENT);
				240	err = inet_hash_connect(&tcp_death_row, sk);
				241	if (err)
				242	goto failure;
				243
				244	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
				245	inet->inet_sport, inet->inet_dport, sk);
				246	if (IS_ERR(rt)) {
				247	err = PTR_ERR(rt);
				248	rt = NULL;
				249	goto failure;
				250	}
				251	/* OK, now commit destination to socket. */
				252	sk->sk_gso_type = SKB_GSO_TCPV4;
				253	sk_setup_caps(sk, &rt->dst);
				254
				255	if (!tp->write_seq)
				256	tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
				257	inet->inet_daddr,
				258	inet->inet_sport,
				259	usin->sin_port);
				260
				261	inet->inet_id = tp->write_seq ^ jiffies;
				262
				263	err = tcp_connect(sk);
				264	rt = NULL;
				265	if (err)
				266	goto failure;
				267
				268	return 0;
				269
				270	failure:
				271	/*
				272	* This unhashes the socket and releases the local port,
				273	* if necessary.
				274	*/
				275	tcp_set_state(sk, TCP_CLOSE);
				276	ip_rt_put(rt);
				277	sk->sk_route_caps = 0;
				278	inet->inet_dport = 0;
				279	return err;
				280	}
				281	EXPORT_SYMBOL(tcp_v4_connect);
				282
				283	/*
				284	* This routine does path mtu discovery as defined in RFC1191.
				285	*/
				286	static void do_pmtu_discovery(struct sock sk, const struct iphdr iph, u32 mtu)
				287	{
				288	struct dst_entry *dst;
				289	struct inet_sock *inet = inet_sk(sk);
				290
				291	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
				292	* send out by Linux are always <576bytes so they should go through
				293	* unfragmented).
				294	*/
				295	if (sk->sk_state == TCP_LISTEN)
				296	return;
				297
				298	/* We don't check in the destentry if pmtu discovery is forbidden
				299	* on this route. We just assume that no packet_to_big packets
				300	* are send back when pmtu discovery is not active.
				301	* There is a small race when the user changes this flag in the
				302	* route, but I think that's acceptable.
				303	*/
				304	if ((dst = __sk_dst_check(sk, 0)) == NULL)
				305	return;
				306
				307	dst->ops->update_pmtu(dst, mtu);
				308
				309	/* Something is about to be wrong... Remember soft error
				310	* for the case, if this connection will not able to recover.
				311	*/
				312	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
				313	sk->sk_err_soft = EMSGSIZE;
				314
				315	mtu = dst_mtu(dst);
				316
				317	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
				318	inet_csk(sk)->icsk_pmtu_cookie > mtu) {
				319	tcp_sync_mss(sk, mtu);
				320
				321	/* Resend the TCP packet because it's
				322	* clear that the old packet has been
				323	* dropped. This is the new "fast" path mtu
				324	* discovery.
				325	*/
				326	tcp_simple_retransmit(sk);
				327	} /* else let the usual retransmit timer handle it */
				328	}
				329
				330	/*
				331	* This routine is called by the ICMP module when it gets some
				332	* sort of error condition. If err < 0 then the socket should
				333	* be closed and the error returned to the user. If err > 0
				334	* it's just the icmp type << 8 \| icmp code. After adjustment
				335	* header points to the first 8 bytes of the tcp header. We need
				336	* to find the appropriate port.
				337	*
				338	* The locking strategy used here is very "optimistic". When
				339	* someone else accesses the socket the ICMP is just dropped
				340	* and for some paths there is no check at all.
				341	* A more general error queue to queue errors for later handling
				342	* is probably better.
				343	*
				344	*/
				345
				346	void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
				347	{
				348	const struct iphdr iph = (const struct iphdr )icmp_skb->data;
				349	struct tcphdr th = (struct tcphdr )(icmp_skb->data + (iph->ihl << 2));
				350	struct inet_connection_sock *icsk;
				351	struct tcp_sock *tp;
				352	struct inet_sock *inet;
				353	const int type = icmp_hdr(icmp_skb)->type;
				354	const int code = icmp_hdr(icmp_skb)->code;
				355	struct sock *sk;
				356	struct sk_buff *skb;
				357	__u32 seq;
				358	__u32 remaining;
				359	int err;
				360	struct net *net = dev_net(icmp_skb->dev);
				361
				362	if (icmp_skb->len < (iph->ihl << 2) + 8) {
				363	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
				364	return;
				365	}
				366
				367	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
				368	iph->saddr, th->source, inet_iif(icmp_skb));
				369	if (!sk) {
				370	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
				371	return;
				372	}
				373	if (sk->sk_state == TCP_TIME_WAIT) {
				374	inet_twsk_put(inet_twsk(sk));
				375	return;
				376	}
				377
				378	bh_lock_sock(sk);
				379	/* If too many ICMPs get dropped on busy
				380	* servers this needs to be solved differently.
				381	*/
				382	if (sock_owned_by_user(sk))
				383	NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
				384
				385	if (sk->sk_state == TCP_CLOSE)
				386	goto out;
				387
				388	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				389	NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
				390	goto out;
				391	}
				392
				393	icsk = inet_csk(sk);
				394	tp = tcp_sk(sk);
				395	seq = ntohl(th->seq);
				396	if (sk->sk_state != TCP_LISTEN &&
				397	!between(seq, tp->snd_una, tp->snd_nxt)) {
				398	NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
				399	goto out;
				400	}
				401
				402	switch (type) {
				403	case ICMP_SOURCE_QUENCH:
				404	/* Just silently ignore these. */
				405	goto out;
				406	case ICMP_PARAMETERPROB:
				407	err = EPROTO;
				408	break;
				409	case ICMP_DEST_UNREACH:
				410	if (code > NR_ICMP_UNREACH)
				411	goto out;
				412
				413	if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
				414	if (!sock_owned_by_user(sk))
				415	do_pmtu_discovery(sk, iph, info);
				416	goto out;
				417	}
				418
				419	err = icmp_err_convert[code].errno;
				420	/* check if icmp_skb allows revert of backoff
				421	* (see draft-zimmermann-tcp-lcd) */
				422	if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
				423	break;
				424	if (seq != tp->snd_una \|\| !icsk->icsk_retransmits \|\|
				425	!icsk->icsk_backoff)
				426	break;
				427
				428	if (sock_owned_by_user(sk))
				429	break;
				430
				431	icsk->icsk_backoff--;
				432	inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
				433	TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
				434	tcp_bound_rto(sk);
				435
				436	skb = tcp_write_queue_head(sk);
				437	BUG_ON(!skb);
				438
				439	remaining = icsk->icsk_rto - min(icsk->icsk_rto,
				440	tcp_time_stamp - TCP_SKB_CB(skb)->when);
				441
				442	if (remaining) {
				443	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				444	remaining, TCP_RTO_MAX);
				445	} else {
				446	/* RTO revert clocked out retransmission.
				447	* Will retransmit now */
				448	tcp_retransmit_timer(sk);
				449	}
				450
				451	break;
				452	case ICMP_TIME_EXCEEDED:
				453	err = EHOSTUNREACH;
				454	break;
				455	default:
				456	goto out;
				457	}
				458
				459	switch (sk->sk_state) {
				460	struct request_sock req, *prev;
				461	case TCP_LISTEN:
				462	if (sock_owned_by_user(sk))
				463	goto out;
				464
				465	req = inet_csk_search_req(sk, &prev, th->dest,
				466	iph->daddr, iph->saddr);
				467	if (!req)
				468	goto out;
				469
				470	/* ICMPs are not backlogged, hence we cannot get
				471	an established socket here.
				472	*/
				473	WARN_ON(req->sk);
				474
				475	if (seq != tcp_rsk(req)->snt_isn) {
				476	NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
				477	goto out;
				478	}
				479
				480	/*
				481	* Still in SYN_RECV, just remove it silently.
				482	* There is no good way to pass the error to the newly
				483	* created socket, and POSIX does not want network
				484	* errors returned from accept().
				485	*/
				486	inet_csk_reqsk_queue_drop(sk, req, prev);
				487	goto out;
				488
				489	case TCP_SYN_SENT:
				490	case TCP_SYN_RECV: /* Cannot happen.
				491	It can f.e. if SYNs crossed.
				492	*/
				493	if (!sock_owned_by_user(sk)) {
				494	sk->sk_err = err;
				495
				496	sk->sk_error_report(sk);
				497
				498	tcp_done(sk);
				499	} else {
				500	sk->sk_err_soft = err;
				501	}
				502	goto out;
				503	}
				504
				505	/* If we've already connected we will keep trying
				506	* until we time out, or the user gives up.
				507	*
				508	* rfc1122 4.2.3.9 allows to consider as hard errors
				509	* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
				510	* but it is obsoleted by pmtu discovery).
				511	*
				512	* Note, that in modern internet, where routing is unreliable
				513	* and in each dark corner broken firewalls sit, sending random
				514	* errors ordered by their masters even this two messages finally lose
				515	* their original sense (even Linux sends invalid PORT_UNREACHs)
				516	*
				517	* Now we are in compliance with RFCs.
				518	* --ANK (980905)
				519	*/
				520
				521	inet = inet_sk(sk);
				522	if (!sock_owned_by_user(sk) && inet->recverr) {
				523	sk->sk_err = err;
				524	sk->sk_error_report(sk);
				525	} else { /* Only an error on timeout */
				526	sk->sk_err_soft = err;
				527	}
				528
				529	out:
				530	bh_unlock_sock(sk);
				531	sock_put(sk);
				532	}
				533
				534	static void __tcp_v4_send_check(struct sk_buff *skb,
				535	__be32 saddr, __be32 daddr)
				536	{
				537	struct tcphdr *th = tcp_hdr(skb);
				538
				539	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				540	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
				541	skb->csum_start = skb_transport_header(skb) - skb->head;
				542	skb->csum_offset = offsetof(struct tcphdr, check);
				543	} else {
				544	th->check = tcp_v4_check(skb->len, saddr, daddr,
				545	csum_partial(th,
				546	th->doff << 2,
				547	skb->csum));
				548	}
				549	}
				550
				551	/* This routine computes an IPv4 TCP checksum. */
				552	void tcp_v4_send_check(struct sock sk, struct sk_buff skb)
				553	{
				554	const struct inet_sock *inet = inet_sk(sk);
				555
				556	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
				557	}
				558	EXPORT_SYMBOL(tcp_v4_send_check);
				559
				560	int tcp_v4_gso_send_check(struct sk_buff *skb)
				561	{
				562	const struct iphdr *iph;
				563	struct tcphdr *th;
				564
				565	if (!pskb_may_pull(skb, sizeof(*th)))
				566	return -EINVAL;
				567
				568	iph = ip_hdr(skb);
				569	th = tcp_hdr(skb);
				570
				571	th->check = 0;
				572	skb->ip_summed = CHECKSUM_PARTIAL;
				573	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
				574	return 0;
				575	}
				576
				577	/*
				578	* This routine will send an RST to the other tcp.
				579	*
				580	* Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
				581	* for reset.
				582	* Answer: if a packet caused RST, it is not for a socket
				583	* existing in our system, if it is matched to a socket,
				584	* it is just duplicate segment or bug in other side's TCP.
				585	* So that we build reply only basing on parameters
				586	* arrived with segment.
				587	* Exception: precedence violation. We do not implement it in any case.
				588	*/
				589
				590	static void tcp_v4_send_reset(struct sock sk, struct sk_buff skb)
				591	{
				592	const struct tcphdr *th = tcp_hdr(skb);
				593	struct {
				594	struct tcphdr th;
				595	#ifdef CONFIG_TCP_MD5SIG
				596	__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
				597	#endif
				598	} rep;
				599	struct ip_reply_arg arg;
				600	#ifdef CONFIG_TCP_MD5SIG
				601	struct tcp_md5sig_key *key;
				602	const __u8 *hash_location = NULL;
				603	unsigned char newhash[16];
				604	int genhash;
				605	struct sock *sk1 = NULL;
				606	#endif
				607	struct net *net;
				608
				609	/* Never send a reset in response to a reset. */
				610	if (th->rst)
				611	return;
				612
				613	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
				614	return;
				615
				616	/* Swap the send and the receive. */
				617	memset(&rep, 0, sizeof(rep));
				618	rep.th.dest = th->source;
				619	rep.th.source = th->dest;
				620	rep.th.doff = sizeof(struct tcphdr) / 4;
				621	rep.th.rst = 1;
				622
				623	if (th->ack) {
				624	rep.th.seq = th->ack_seq;
				625	} else {
				626	rep.th.ack = 1;
				627	rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				628	skb->len - (th->doff << 2));
				629	}
				630
				631	memset(&arg, 0, sizeof(arg));
				632	arg.iov[0].iov_base = (unsigned char *)&rep;
				633	arg.iov[0].iov_len = sizeof(rep.th);
				634
				635	#ifdef CONFIG_TCP_MD5SIG
				636	hash_location = tcp_parse_md5sig_option(th);
				637	if (!sk && hash_location) {
				638	/*
				639	* active side is lost. Try to find listening socket through
				640	* source port, and then find md5 key through listening socket.
				641	* we are not loose security here:
				642	* Incoming packet is checked with md5 hash with finding key,
				643	* no RST generated if md5 hash doesn't match.
				644	*/
				645	sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
				646	&tcp_hashinfo, ip_hdr(skb)->daddr,
				647	ntohs(th->source), inet_iif(skb));
				648	/* don't send rst if it can't find key */
				649	if (!sk1)
				650	return;
				651	rcu_read_lock();
				652	key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
				653	&ip_hdr(skb)->saddr, AF_INET);
				654	if (!key)
				655	goto release_sk1;
				656
				657	genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
				658	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0)
				659	goto release_sk1;
				660	} else {
				661	key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
				662	&ip_hdr(skb)->saddr,
				663	AF_INET) : NULL;
				664	}
				665
				666	if (key) {
				667	rep.opt[0] = htonl((TCPOPT_NOP << 24) \|
				668	(TCPOPT_NOP << 16) \|
				669	(TCPOPT_MD5SIG << 8) \|
				670	TCPOLEN_MD5SIG);
				671	/* Update length and the length the header thinks exists */
				672	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				673	rep.th.doff = arg.iov[0].iov_len / 4;
				674
				675	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
				676	key, ip_hdr(skb)->saddr,
				677	ip_hdr(skb)->daddr, &rep.th);
				678	}
				679	#endif
				680	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				681	ip_hdr(skb)->saddr, /* XXX */
				682	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				683	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				684	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
				685	/* When socket is gone, all binding information is lost.
				686	* routing might fail in this case. No choice here, if we choose to force
				687	* input interface, we will misroute in case of asymmetric route.
				688	*/
				689	if (sk)
				690	arg.bound_dev_if = sk->sk_bound_dev_if;
				691
				692	net = dev_net(skb_dst(skb)->dev);
				693	arg.tos = ip_hdr(skb)->tos;
				694	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
				695	&arg, arg.iov[0].iov_len);
				696
				697	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
				698	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
				699
				700	TCP_PKT_STATS_INC(TCP_SEND_PKTS);
				701	TCP_PKT_STATS_INC(TCP_RST_SEND_NUM);
				702
				703	TCP_SOCK_TRACK(sk, TCP_RST_SEND);
				704
				705	#ifdef CONFIG_TCP_MD5SIG
				706	release_sk1:
				707	if (sk1) {
				708	rcu_read_unlock();
				709	sock_put(sk1);
				710	}
				711	#endif
				712	}
				713
				714	/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
				715	outside socket context is ugly, certainly. What can I do?
				716	*/
				717
				718	static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
				719	u32 win, u32 ts, int oif,
				720	struct tcp_md5sig_key *key,
				721	int reply_flags, u8 tos)
				722	{
				723	const struct tcphdr *th = tcp_hdr(skb);
				724	struct {
				725	struct tcphdr th;
				726	__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
				727	#ifdef CONFIG_TCP_MD5SIG
				728	+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
				729	#endif
				730	];
				731	} rep;
				732	struct ip_reply_arg arg;
				733	struct net *net = dev_net(skb_dst(skb)->dev);
				734
				735	memset(&rep.th, 0, sizeof(struct tcphdr));
				736	memset(&arg, 0, sizeof(arg));
				737
				738	arg.iov[0].iov_base = (unsigned char *)&rep;
				739	arg.iov[0].iov_len = sizeof(rep.th);
				740	if (ts) {
				741	rep.opt[0] = htonl((TCPOPT_NOP << 24) \| (TCPOPT_NOP << 16) \|
				742	(TCPOPT_TIMESTAMP << 8) \|
				743	TCPOLEN_TIMESTAMP);
				744	rep.opt[1] = htonl(tcp_time_stamp);
				745	rep.opt[2] = htonl(ts);
				746	arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
				747	}
				748
				749	/* Swap the send and the receive. */
				750	rep.th.dest = th->source;
				751	rep.th.source = th->dest;
				752	rep.th.doff = arg.iov[0].iov_len / 4;
				753	rep.th.seq = htonl(seq);
				754	rep.th.ack_seq = htonl(ack);
				755	rep.th.ack = 1;
				756	rep.th.window = htons(win);
				757
				758	#ifdef CONFIG_TCP_MD5SIG
				759	if (key) {
				760	int offset = (ts) ? 3 : 0;
				761
				762	rep.opt[offset++] = htonl((TCPOPT_NOP << 24) \|
				763	(TCPOPT_NOP << 16) \|
				764	(TCPOPT_MD5SIG << 8) \|
				765	TCPOLEN_MD5SIG);
				766	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				767	rep.th.doff = arg.iov[0].iov_len/4;
				768
				769	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
				770	key, ip_hdr(skb)->saddr,
				771	ip_hdr(skb)->daddr, &rep.th);
				772	}
				773	#endif
				774	arg.flags = reply_flags;
				775	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				776	ip_hdr(skb)->saddr, /* XXX */
				777	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				778	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				779	if (oif)
				780	arg.bound_dev_if = oif;
				781	arg.tos = tos;
				782	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
				783	&arg, arg.iov[0].iov_len);
				784
				785	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
				786	TCP_PKT_STATS_INC(TCP_SEND_PKTS);
				787	}
				788
				789	static void tcp_v4_timewait_ack(struct sock sk, struct sk_buff skb)
				790	{
				791	struct inet_timewait_sock *tw = inet_twsk(sk);
				792	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
				793
				794	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
				795	tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
				796	tcptw->tw_ts_recent,
				797	tw->tw_bound_dev_if,
				798	tcp_twsk_md5_key(tcptw),
				799	tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
				800	tw->tw_tos
				801	);
				802
				803	inet_twsk_put(tw);
				804	}
				805
				806	static void tcp_v4_reqsk_send_ack(struct sock sk, struct sk_buff skb,
				807	struct request_sock *req)
				808	{
				809	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
				810	tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
				811	req->ts_recent,
				812	0,
				813	tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
				814	AF_INET),
				815	inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
				816	ip_hdr(skb)->tos);
				817	}
				818
				819	/*
				820	* Send a SYN-ACK after having received a SYN.
				821	* This still operates on a request_sock only, not on a big
				822	* socket.
				823	*/
				824	static int tcp_v4_send_synack(struct sock sk, struct dst_entry dst,
				825	struct request_sock *req,
				826	struct request_values *rvp)
				827	{
				828	const struct inet_request_sock *ireq = inet_rsk(req);
				829	struct flowi4 fl4;
				830	int err = -1;
				831	struct sk_buff * skb;
				832
				833	/* First, grab a route. */
				834	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
				835	return -1;
				836
				837	skb = tcp_make_synack(sk, dst, req, rvp);
				838
				839	if (skb) {
				840	__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
				841
				842	err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
				843	ireq->rmt_addr,
				844	ireq->opt);
				845	err = net_xmit_eval(err);
				846	}
				847
				848	dst_release(dst);
				849	return err;
				850	}
				851
				852	static int tcp_v4_rtx_synack(struct sock sk, struct request_sock req,
				853	struct request_values *rvp)
				854	{
				855	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
				856	TCP_PKT_STATS_INC(TCP_RETRANS_PKTS);
				857	TCP_PKT_STATS_INC(TCP_SEND_DROPS);
				858	return tcp_v4_send_synack(sk, NULL, req, rvp);
				859	}
				860
				861	/*
				862	* IPv4 request_sock destructor.
				863	*/
				864	static void tcp_v4_reqsk_destructor(struct request_sock *req)
				865	{
				866	kfree(inet_rsk(req)->opt);
				867	}
				868
				869	/*
				870	* Return 1 if a syncookie should be sent
				871	*/
				872	int tcp_syn_flood_action(struct sock *sk,
				873	const struct sk_buff *skb,
				874	const char *proto)
				875	{
				876	const char *msg = "Dropping request";
				877	int want_cookie = 0;
				878	struct listen_sock *lopt;
				879
				880
				881
				882	#ifdef CONFIG_SYN_COOKIES
				883	if (sysctl_tcp_syncookies) {
				884	msg = "Sending cookies";
				885	want_cookie = 1;
				886	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
				887	} else
				888	#endif
				889	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
				890
				891	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
				892	if (!lopt->synflood_warned) {
				893	lopt->synflood_warned = 1;
				894	pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
				895	proto, ntohs(tcp_hdr(skb)->dest), msg);
				896	}
				897	return want_cookie;
				898	}
				899	EXPORT_SYMBOL(tcp_syn_flood_action);
				900
				901	/*
				902	* Save and compile IPv4 options into the request_sock if needed.
				903	*/
				904	static struct ip_options_rcu tcp_v4_save_options(struct sock sk,
				905	struct sk_buff *skb)
				906	{
				907	const struct ip_options *opt = &(IPCB(skb)->opt);
				908	struct ip_options_rcu *dopt = NULL;
				909
				910	if (opt && opt->optlen) {
				911	int opt_size = sizeof(*dopt) + opt->optlen;
				912
				913	dopt = kmalloc(opt_size, GFP_ATOMIC);
				914	if (dopt) {
				915	if (ip_options_echo(&dopt->opt, skb)) {
				916	kfree(dopt);
				917	dopt = NULL;
				918	}
				919	}
				920	}
				921	return dopt;
				922	}
				923
				924	#ifdef CONFIG_TCP_MD5SIG
				925	/*
				926	* RFC2385 MD5 checksumming requires a mapping of
				927	* IP address->MD5 Key.
				928	* We need to maintain these in the sk structure.
				929	*/
				930
				931	/* Find the Key structure for an address. */
				932	struct tcp_md5sig_key tcp_md5_do_lookup(struct sock sk,
				933	const union tcp_md5_addr *addr,
				934	int family)
				935	{
				936	struct tcp_sock *tp = tcp_sk(sk);
				937	struct tcp_md5sig_key *key;
				938	struct hlist_node *pos;
				939	unsigned int size = sizeof(struct in_addr);
				940	struct tcp_md5sig_info *md5sig;
				941
				942	/* caller either holds rcu_read_lock() or socket lock */
				943	md5sig = rcu_dereference_check(tp->md5sig_info,
				944	sock_owned_by_user(sk) \|\|
				945	lockdep_is_held(&sk->sk_lock.slock));
				946	if (!md5sig)
				947	return NULL;
				948	#if IS_ENABLED(CONFIG_IPV6)
				949	if (family == AF_INET6)
				950	size = sizeof(struct in6_addr);
				951	#endif
				952	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
				953	if (key->family != family)
				954	continue;
				955	if (!memcmp(&key->addr, addr, size))
				956	return key;
				957	}
				958	return NULL;
				959	}
				960	EXPORT_SYMBOL(tcp_md5_do_lookup);
				961
				962	struct tcp_md5sig_key tcp_v4_md5_lookup(struct sock sk,
				963	struct sock *addr_sk)
				964	{
				965	union tcp_md5_addr *addr;
				966
				967	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
				968	return tcp_md5_do_lookup(sk, addr, AF_INET);
				969	}
				970	EXPORT_SYMBOL(tcp_v4_md5_lookup);
				971
				972	static struct tcp_md5sig_key tcp_v4_reqsk_md5_lookup(struct sock sk,
				973	struct request_sock *req)
				974	{
				975	union tcp_md5_addr *addr;
				976
				977	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
				978	return tcp_md5_do_lookup(sk, addr, AF_INET);
				979	}
				980
				981	/* This can be called on a newly created socket, from other files */
				982	int tcp_md5_do_add(struct sock sk, const union tcp_md5_addr addr,
				983	int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
				984	{
				985	/* Add Key to the list */
				986	struct tcp_md5sig_key *key;
				987	struct tcp_sock *tp = tcp_sk(sk);
				988	struct tcp_md5sig_info *md5sig;
				989
				990	key = tcp_md5_do_lookup(sk, addr, family);
				991	if (key) {
				992	/* Pre-existing entry - just update that one. */
				993	memcpy(key->key, newkey, newkeylen);
				994	key->keylen = newkeylen;
				995	return 0;
				996	}
				997
				998	md5sig = rcu_dereference_protected(tp->md5sig_info,
				999	sock_owned_by_user(sk));
				1000	if (!md5sig) {
				1001	md5sig = kmalloc(sizeof(*md5sig), gfp);
				1002	if (!md5sig)
				1003	return -ENOMEM;
				1004
				1005	sk_nocaps_add(sk, NETIF_F_GSO_MASK);
				1006	INIT_HLIST_HEAD(&md5sig->head);
				1007	rcu_assign_pointer(tp->md5sig_info, md5sig);
				1008	}
				1009
				1010	key = sock_kmalloc(sk, sizeof(*key), gfp);
				1011	if (!key)
				1012	return -ENOMEM;
				1013	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
				1014	sock_kfree_s(sk, key, sizeof(*key));
				1015	return -ENOMEM;
				1016	}
				1017
				1018	memcpy(key->key, newkey, newkeylen);
				1019	key->keylen = newkeylen;
				1020	key->family = family;
				1021	memcpy(&key->addr, addr,
				1022	(family == AF_INET6) ? sizeof(struct in6_addr) :
				1023	sizeof(struct in_addr));
				1024	hlist_add_head_rcu(&key->node, &md5sig->head);
				1025	return 0;
				1026	}
				1027	EXPORT_SYMBOL(tcp_md5_do_add);
				1028
				1029	int tcp_md5_do_del(struct sock sk, const union tcp_md5_addr addr, int family)
				1030	{
				1031	struct tcp_sock *tp = tcp_sk(sk);
				1032	struct tcp_md5sig_key *key;
				1033	struct tcp_md5sig_info *md5sig;
				1034
				1035	key = tcp_md5_do_lookup(sk, addr, family);
				1036	if (!key)
				1037	return -ENOENT;
				1038	hlist_del_rcu(&key->node);
				1039	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				1040	kfree_rcu(key, rcu);
				1041	md5sig = rcu_dereference_protected(tp->md5sig_info,
				1042	sock_owned_by_user(sk));
				1043	if (hlist_empty(&md5sig->head))
				1044	tcp_free_md5sig_pool();
				1045	return 0;
				1046	}
				1047	EXPORT_SYMBOL(tcp_md5_do_del);
				1048
				1049	void tcp_clear_md5_list(struct sock *sk)
				1050	{
				1051	struct tcp_sock *tp = tcp_sk(sk);
				1052	struct tcp_md5sig_key *key;
				1053	struct hlist_node pos, n;
				1054	struct tcp_md5sig_info *md5sig;
				1055
				1056	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
				1057
				1058	if (!hlist_empty(&md5sig->head))
				1059	tcp_free_md5sig_pool();
				1060	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
				1061	hlist_del_rcu(&key->node);
				1062	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				1063	kfree_rcu(key, rcu);
				1064	}
				1065	}
				1066
				1067	static int tcp_v4_parse_md5_keys(struct sock sk, char __user optval,
				1068	int optlen)
				1069	{
				1070	struct tcp_md5sig cmd;
				1071	struct sockaddr_in sin = (struct sockaddr_in )&cmd.tcpm_addr;
				1072
				1073	if (optlen < sizeof(cmd))
				1074	return -EINVAL;
				1075
				1076	if (copy_from_user(&cmd, optval, sizeof(cmd)))
				1077	return -EFAULT;
				1078
				1079	if (sin->sin_family != AF_INET)
				1080	return -EINVAL;
				1081
				1082	if (!cmd.tcpm_key \|\| !cmd.tcpm_keylen)
				1083	return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1084	AF_INET);
				1085
				1086	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
				1087	return -EINVAL;
				1088
				1089	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1090	AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
				1091	GFP_KERNEL);
				1092	}
				1093
				1094	static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
				1095	__be32 daddr, __be32 saddr, int nbytes)
				1096	{
				1097	struct tcp4_pseudohdr *bp;
				1098	struct scatterlist sg;
				1099
				1100	bp = &hp->md5_blk.ip4;
				1101
				1102	/*
				1103	* 1. the TCP pseudo-header (in the order: source IP address,
				1104	* destination IP address, zero-padded protocol number, and
				1105	* segment length)
				1106	*/
				1107	bp->saddr = saddr;
				1108	bp->daddr = daddr;
				1109	bp->pad = 0;
				1110	bp->protocol = IPPROTO_TCP;
				1111	bp->len = cpu_to_be16(nbytes);
				1112
				1113	sg_init_one(&sg, bp, sizeof(*bp));
				1114	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
				1115	}
				1116
				1117	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				1118	__be32 daddr, __be32 saddr, const struct tcphdr *th)
				1119	{
				1120	struct tcp_md5sig_pool *hp;
				1121	struct hash_desc *desc;
				1122
				1123	hp = tcp_get_md5sig_pool();
				1124	if (!hp)
				1125	goto clear_hash_noput;
				1126	desc = &hp->md5_desc;
				1127
				1128	if (crypto_hash_init(desc))
				1129	goto clear_hash;
				1130	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
				1131	goto clear_hash;
				1132	if (tcp_md5_hash_header(hp, th))
				1133	goto clear_hash;
				1134	if (tcp_md5_hash_key(hp, key))
				1135	goto clear_hash;
				1136	if (crypto_hash_final(desc, md5_hash))
				1137	goto clear_hash;
				1138
				1139	tcp_put_md5sig_pool();
				1140	return 0;
				1141
				1142	clear_hash:
				1143	tcp_put_md5sig_pool();
				1144	clear_hash_noput:
				1145	memset(md5_hash, 0, 16);
				1146	return 1;
				1147	}
				1148
				1149	int tcp_v4_md5_hash_skb(char md5_hash, struct tcp_md5sig_key key,
				1150	const struct sock sk, const struct request_sock req,
				1151	const struct sk_buff *skb)
				1152	{
				1153	struct tcp_md5sig_pool *hp;
				1154	struct hash_desc *desc;
				1155	const struct tcphdr *th = tcp_hdr(skb);
				1156	__be32 saddr, daddr;
				1157
				1158	if (sk) {
				1159	saddr = inet_sk(sk)->inet_saddr;
				1160	daddr = inet_sk(sk)->inet_daddr;
				1161	} else if (req) {
				1162	saddr = inet_rsk(req)->loc_addr;
				1163	daddr = inet_rsk(req)->rmt_addr;
				1164	} else {
				1165	const struct iphdr *iph = ip_hdr(skb);
				1166	saddr = iph->saddr;
				1167	daddr = iph->daddr;
				1168	}
				1169
				1170	hp = tcp_get_md5sig_pool();
				1171	if (!hp)
				1172	goto clear_hash_noput;
				1173	desc = &hp->md5_desc;
				1174
				1175	if (crypto_hash_init(desc))
				1176	goto clear_hash;
				1177
				1178	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
				1179	goto clear_hash;
				1180	if (tcp_md5_hash_header(hp, th))
				1181	goto clear_hash;
				1182	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
				1183	goto clear_hash;
				1184	if (tcp_md5_hash_key(hp, key))
				1185	goto clear_hash;
				1186	if (crypto_hash_final(desc, md5_hash))
				1187	goto clear_hash;
				1188
				1189	tcp_put_md5sig_pool();
				1190	return 0;
				1191
				1192	clear_hash:
				1193	tcp_put_md5sig_pool();
				1194	clear_hash_noput:
				1195	memset(md5_hash, 0, 16);
				1196	return 1;
				1197	}
				1198	EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
				1199
				1200	static int tcp_v4_inbound_md5_hash(struct sock sk, const struct sk_buff skb)
				1201	{
				1202	/*
				1203	* This gets called for each TCP segment that arrives
				1204	* so we want to be efficient.
				1205	* We have 3 drop cases:
				1206	* o No MD5 hash and one expected.
				1207	* o MD5 hash and we're not expecting one.
				1208	* o MD5 hash and its wrong.
				1209	*/
				1210	const __u8 *hash_location = NULL;
				1211	struct tcp_md5sig_key *hash_expected;
				1212	const struct iphdr *iph = ip_hdr(skb);
				1213	const struct tcphdr *th = tcp_hdr(skb);
				1214	int genhash;
				1215	unsigned char newhash[16];
				1216
				1217	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
				1218	AF_INET);
				1219	hash_location = tcp_parse_md5sig_option(th);
				1220
				1221	/* We've parsed the options - do we have a hash? */
				1222	if (!hash_expected && !hash_location)
				1223	return 0;
				1224
				1225	if (hash_expected && !hash_location) {
				1226	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
				1227	return 1;
				1228	}
				1229
				1230	if (!hash_expected && hash_location) {
				1231	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
				1232	return 1;
				1233	}
				1234
				1235	/* Okay, so this is hash_expected and hash_location -
				1236	* so we need to calculate the checksum.
				1237	*/
				1238	genhash = tcp_v4_md5_hash_skb(newhash,
				1239	hash_expected,
				1240	NULL, NULL, skb);
				1241
				1242	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0) {
				1243	if (net_ratelimit()) {
				1244	pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
				1245	&iph->saddr, ntohs(th->source),
				1246	&iph->daddr, ntohs(th->dest),
				1247	genhash ? " tcp_v4_calc_md5_hash failed" : "");
				1248	}
				1249	return 1;
				1250	}
				1251	return 0;
				1252	}
				1253
				1254	#endif
				1255
				1256	struct request_sock_ops tcp_request_sock_ops __read_mostly = {
				1257	.family = PF_INET,
				1258	.obj_size = sizeof(struct tcp_request_sock),
				1259	.rtx_syn_ack = tcp_v4_rtx_synack,
				1260	.send_ack = tcp_v4_reqsk_send_ack,
				1261	.destructor = tcp_v4_reqsk_destructor,
				1262	.send_reset = tcp_v4_send_reset,
				1263	.syn_ack_timeout = tcp_syn_ack_timeout,
				1264	};
				1265
				1266	#ifdef CONFIG_TCP_MD5SIG
				1267	static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
				1268	.md5_lookup = tcp_v4_reqsk_md5_lookup,
				1269	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1270	};
				1271	#endif
				1272
				1273	int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
				1274	{
				1275	struct tcp_extend_values tmp_ext;
				1276	struct tcp_options_received tmp_opt;
				1277	const u8 *hash_location;
				1278	struct request_sock *req;
				1279	struct inet_request_sock *ireq;
				1280	struct tcp_sock *tp = tcp_sk(sk);
				1281	struct dst_entry *dst = NULL;
				1282	__be32 saddr = ip_hdr(skb)->saddr;
				1283	__be32 daddr = ip_hdr(skb)->daddr;
				1284	__u32 isn = TCP_SKB_CB(skb)->when;
				1285	int want_cookie = 0;
				1286
				1287	/* Never answer to SYNs send to broadcast or multicast */
				1288	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
				1289	goto drop;
				1290
				1291	/* TW buckets are converted to open requests without
				1292	* limitations, they conserve resources and peer is
				1293	* evidently real one.
				1294	*/
				1295	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
				1296	TCP_SOCK_TRACK(sk, TCP_REQ_QUEUE_FULL);
				1297	want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
				1298	if (!want_cookie)
				1299	goto drop;
				1300	}
				1301
				1302	/* Accept backlog is full. If we have already queued enough
				1303	* of warm entries in syn queue, drop request. It is better than
				1304	* clogging syn queue with openreqs with exponentially increasing
				1305	* timeout.
				1306	*/
				1307	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
				1308	{
				1309	TCP_SOCK_TRACK(sk, TCP_ACCEPT_QUEUE_FULL);
				1310	goto drop;
				1311	}
				1312
				1313	req = inet_reqsk_alloc(&tcp_request_sock_ops);
				1314	if (!req)
				1315	goto drop;
				1316
				1317	#ifdef CONFIG_TCP_MD5SIG
				1318	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
				1319	#endif
				1320
				1321	tcp_clear_options(&tmp_opt);
				1322	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
				1323	tmp_opt.user_mss = tp->rx_opt.user_mss;
				1324	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
				1325
				1326	if (tmp_opt.cookie_plus > 0 &&
				1327	tmp_opt.saw_tstamp &&
				1328	!tp->rx_opt.cookie_out_never &&
				1329	(sysctl_tcp_cookie_size > 0 \|\|
				1330	(tp->cookie_values != NULL &&
				1331	tp->cookie_values->cookie_desired > 0))) {
				1332	u8 *c;
				1333	u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
				1334	int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
				1335
				1336	if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
				1337	goto drop_and_release;
				1338
				1339	/* Secret recipe starts with IP addresses */
				1340	*mess++ ^= (__force u32)daddr;
				1341	*mess++ ^= (__force u32)saddr;
				1342
				1343	/* plus variable length Initiator Cookie */
				1344	c = (u8 *)mess;
				1345	while (l-- > 0)
				1346	c++ ^= hash_location++;
				1347
				1348	want_cookie = 0; /* not our kind of cookie */
				1349	tmp_ext.cookie_out_never = 0; /* false */
				1350	tmp_ext.cookie_plus = tmp_opt.cookie_plus;
				1351	} else if (!tp->rx_opt.cookie_in_always) {
				1352	/* redundant indications, but ensure initialization. */
				1353	tmp_ext.cookie_out_never = 1; /* true */
				1354	tmp_ext.cookie_plus = 0;
				1355	} else {
				1356	goto drop_and_release;
				1357	}
				1358	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
				1359
				1360	if (want_cookie && !tmp_opt.saw_tstamp)
				1361	tcp_clear_options(&tmp_opt);
				1362
				1363	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
				1364	tcp_openreq_init(req, &tmp_opt, skb);
				1365
				1366	ireq = inet_rsk(req);
				1367	ireq->loc_addr = daddr;
				1368	ireq->rmt_addr = saddr;
				1369	ireq->no_srccheck = inet_sk(sk)->transparent;
				1370	ireq->opt = tcp_v4_save_options(sk, skb);
				1371
				1372	if (security_inet_conn_request(sk, skb, req))
				1373	goto drop_and_free;
				1374
				1375	if (!want_cookie \|\| tmp_opt.tstamp_ok)
				1376	TCP_ECN_create_request(req, tcp_hdr(skb));
				1377
				1378	if (want_cookie) {
				1379	isn = cookie_v4_init_sequence(sk, skb, &req->mss);
				1380	req->cookie_ts = tmp_opt.tstamp_ok;
				1381	} else if (!isn) {
				1382	struct inet_peer *peer = NULL;
				1383	struct flowi4 fl4;
				1384
				1385	/* VJ's idea. We save last timestamp seen
				1386	* from the destination in peer table, when entering
				1387	* state TIME-WAIT, and check against it before
				1388	* accepting new connection request.
				1389	*
				1390	* If "isn" is not zero, this request hit alive
				1391	* timewait bucket, so that all the necessary checks
				1392	* are made in the function processing timewait state.
				1393	*/
				1394	if (tmp_opt.saw_tstamp &&
				1395	tcp_death_row.sysctl_tw_recycle &&
				1396	(dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
				1397	fl4.daddr == saddr &&
				1398	(peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
				1399	inet_peer_refcheck(peer);
				1400	if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
				1401	(s32)(peer->tcp_ts - req->ts_recent) >
				1402	TCP_PAWS_WINDOW) {
				1403	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
				1404	goto drop_and_release;
				1405	}
				1406	}
				1407	/* Kill the following clause, if you dislike this way. */
				1408	else if (!sysctl_tcp_syncookies &&
				1409	(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
				1410	(sysctl_max_syn_backlog >> 2)) &&
				1411	(!peer \|\| !peer->tcp_ts_stamp) &&
				1412	(!dst \|\| !dst_metric(dst, RTAX_RTT))) {
				1413	/* Without syncookies last quarter of
				1414	* backlog is filled with destinations,
				1415	* proven to be alive.
				1416	* It means that we continue to communicate
				1417	* to destinations, already remembered
				1418	* to the moment of synflood.
				1419	*/
				1420	LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
				1421	&saddr, ntohs(tcp_hdr(skb)->source));
				1422	goto drop_and_release;
				1423	}
				1424
				1425	isn = tcp_v4_init_sequence(skb);
				1426	}
				1427	tcp_rsk(req)->snt_isn = isn;
				1428	tcp_rsk(req)->snt_synack = tcp_time_stamp;
				1429
				1430	if (tcp_v4_send_synack(sk, dst, req,
				1431	(struct request_values *)&tmp_ext) \|\|
				1432	want_cookie)
				1433	goto drop_and_free;
				1434
				1435	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
				1436	return 0;
				1437
				1438	drop_and_release:
				1439	dst_release(dst);
				1440	drop_and_free:
				1441	reqsk_free(req);
				1442	drop:
				1443	return 0;
				1444	}
				1445	EXPORT_SYMBOL(tcp_v4_conn_request);
				1446
				1447
				1448	/*
				1449	* The three way handshake has completed - we got a valid synack -
				1450	* now create the new socket.
				1451	*/
				1452	struct sock tcp_v4_syn_recv_sock(struct sock sk, struct sk_buff *skb,
				1453	struct request_sock *req,
				1454	struct dst_entry *dst)
				1455	{
				1456	struct inet_request_sock *ireq;
				1457	struct inet_sock *newinet;
				1458	struct tcp_sock *newtp;
				1459	struct sock *newsk;
				1460	#ifdef CONFIG_TCP_MD5SIG
				1461	struct tcp_md5sig_key *key;
				1462	#endif
				1463	struct ip_options_rcu *inet_opt;
				1464
				1465	if (sk_acceptq_is_full(sk))
				1466	{
				1467	TCP_SOCK_TRACK(sk, TCP_ACCEPT_QUEUE_FULL);
				1468	goto exit_overflow;
				1469	}
				1470
				1471	newsk = tcp_create_openreq_child(sk, req, skb);
				1472	if (!newsk)
				1473	goto exit_nonewsk;
				1474
				1475	newsk->sk_gso_type = SKB_GSO_TCPV4;
				1476
				1477	newtp = tcp_sk(newsk);
				1478	newinet = inet_sk(newsk);
				1479	ireq = inet_rsk(req);
				1480	newinet->inet_daddr = ireq->rmt_addr;
				1481	newinet->inet_rcv_saddr = ireq->loc_addr;
				1482	newinet->inet_saddr = ireq->loc_addr;
				1483	inet_opt = ireq->opt;
				1484	rcu_assign_pointer(newinet->inet_opt, inet_opt);
				1485	ireq->opt = NULL;
				1486	newinet->mc_index = inet_iif(skb);
				1487	newinet->mc_ttl = ip_hdr(skb)->ttl;
				1488	newinet->rcv_tos = ip_hdr(skb)->tos;
				1489	inet_csk(newsk)->icsk_ext_hdr_len = 0;
				1490	if (inet_opt)
				1491	inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				1492	newinet->inet_id = newtp->write_seq ^ jiffies;
				1493
				1494	if (!dst) {
				1495	dst = inet_csk_route_child_sock(sk, newsk, req);
				1496	if (!dst)
				1497	goto put_and_exit;
				1498	} else {
				1499	/* syncookie case : see end of cookie_v4_check() */
				1500	}
				1501	sk_setup_caps(newsk, dst);
				1502
				1503	tcp_mtup_init(newsk);
				1504	tcp_sync_mss(newsk, dst_mtu(dst));
				1505	newtp->advmss = dst_metric_advmss(dst);
				1506	if (tcp_sk(sk)->rx_opt.user_mss &&
				1507	tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
				1508	newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
				1509
				1510	tcp_initialize_rcv_mss(newsk);
				1511	if (tcp_rsk(req)->snt_synack)
				1512	tcp_valid_rtt_meas(newsk,
				1513	tcp_time_stamp - tcp_rsk(req)->snt_synack);
				1514	newtp->total_retrans = req->retrans;
				1515
				1516	#ifdef CONFIG_TCP_MD5SIG
				1517	/* Copy over the MD5 key from the original socket */
				1518	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1519	AF_INET);
				1520	if (key != NULL) {
				1521	/*
				1522	* We're using one, so create a matching key
				1523	* on the newsk structure. If we fail to get
				1524	* memory, then we end up not copying the key
				1525	* across. Shucks.
				1526	*/
				1527	tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1528	AF_INET, key->key, key->keylen, GFP_ATOMIC);
				1529	sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
				1530	}
				1531	#endif
				1532
				1533	if (__inet_inherit_port(sk, newsk) < 0)
				1534	goto put_and_exit;
				1535	__inet_hash_nolisten(newsk, NULL);
				1536
				1537	return newsk;
				1538
				1539	exit_overflow:
				1540	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
				1541	exit_nonewsk:
				1542	dst_release(dst);
				1543	exit:
				1544	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
				1545	return NULL;
				1546	put_and_exit:
				1547	inet_csk_prepare_forced_close(newsk);
				1548	tcp_done(newsk);
				1549	goto exit;
				1550	}
				1551	EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
				1552
				1553	static struct sock tcp_v4_hnd_req(struct sock sk, struct sk_buff *skb)
				1554	{
				1555	struct tcphdr *th = tcp_hdr(skb);
				1556	const struct iphdr *iph = ip_hdr(skb);
				1557	struct sock *nsk;
				1558	struct request_sock **prev;
				1559	/* Find possible connection requests. */
				1560	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
				1561	iph->saddr, iph->daddr);
				1562	if (req)
				1563	return tcp_check_req(sk, skb, req, prev);
				1564
				1565	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
				1566	th->source, iph->daddr, th->dest, inet_iif(skb));
				1567
				1568	if (nsk) {
				1569	if (nsk->sk_state != TCP_TIME_WAIT) {
				1570	bh_lock_sock(nsk);
				1571	return nsk;
				1572	}
				1573	inet_twsk_put(inet_twsk(nsk));
				1574	return NULL;
				1575	}
				1576
				1577	#ifdef CONFIG_SYN_COOKIES
				1578	if (!th->syn)
				1579	sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
				1580	#endif
				1581	return sk;
				1582	}
				1583
				1584	static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
				1585	{
				1586	const struct iphdr *iph = ip_hdr(skb);
				1587
				1588	if (skb->ip_summed == CHECKSUM_COMPLETE) {
				1589	if (!tcp_v4_check(skb->len, iph->saddr,
				1590	iph->daddr, skb->csum)) {
				1591	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1592	return 0;
				1593	}
				1594	}
				1595
				1596	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
				1597	skb->len, IPPROTO_TCP, 0);
				1598
				1599	if (skb->len <= 76) {
				1600	return __skb_checksum_complete(skb);
				1601	}
				1602	return 0;
				1603	}
				1604
				1605
				1606	/* The socket must have it's spinlock held when we get
				1607	* here.
				1608	*
				1609	* We have a potential double-lock case here, so even when
				1610	* doing backlog processing we use the BH locking scheme.
				1611	* This is because we cannot sleep with the original spinlock
				1612	* held.
				1613	*/
				1614	int tcp_v4_do_rcv(struct sock sk, struct sk_buff skb)
				1615	{
				1616	struct sock *rsk;
				1617	#ifdef CONFIG_TCP_MD5SIG
				1618	/*
				1619	* We really want to reject the packet as early as possible
				1620	* if:
				1621	* o We're expecting an MD5'd packet and this is no MD5 tcp option
				1622	* o There is an MD5 option and we're not expecting one
				1623	*/
				1624	if (tcp_v4_inbound_md5_hash(sk, skb))
				1625	goto discard;
				1626	#endif
				1627
				1628	//Èç¹û×´Ì¬ÊÇTCP_ESTABLISHED£¬±íÃ÷Á¬½ÓÒÑ¾½¨Á¢
				1629	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
				1630	sock_rps_save_rxhash(sk, skb);
				1631	if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
				1632	rsk = sk;
				1633	goto reset;
				1634	}
				1635	return 0;
				1636	}
				1637
				1638	//Ð£ÑéºÍ¼ì²é
				1639	if (skb->len < tcp_hdrlen(skb) \|\| tcp_checksum_complete(skb))
				1640	goto csum_err;
				1641
				1642	//Èç¹û×´Ì¬ÊÇTCP_LISTEN£¬ÐÂ½¨Ò»¸ösockÓÃÓÚ´«Êä
				1643	if (sk->sk_state == TCP_LISTEN) {
				1644	struct sock *nsk = tcp_v4_hnd_req(sk, skb);
				1645	if (!nsk)
				1646	goto discard;
				1647
				1648	if (nsk != sk) {
				1649	sock_rps_save_rxhash(nsk, skb);
				1650	if (tcp_child_process(sk, nsk, skb)) {
				1651	rsk = nsk;
				1652	goto reset;
				1653	}
				1654	return 0;
				1655	}
				1656	} else
				1657	sock_rps_save_rxhash(sk, skb);
				1658
				1659	//Î¬»¤×´Ì¬
				1660	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
				1661	rsk = sk;
				1662	goto reset;
				1663	}
				1664	return 0;
				1665
				1666	reset:
				1667	tcp_v4_send_reset(rsk, skb);
				1668	discard:
				1669	kfree_skb(skb);
				1670	/* Be careful here. If this function gets more complicated and
				1671	* gcc suffers from register pressure on the x86, sk (in %ebx)
				1672	* might be destroyed here. This current version compiles correctly,
				1673	* but you have been warned.
				1674	*/
				1675	return 0;
				1676
				1677	csum_err:
				1678	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
				1679	goto discard;
				1680	}
				1681	EXPORT_SYMBOL(tcp_v4_do_rcv);
				1682
				1683	/*
				1684	* From tcp_input.c
				1685	*/
				1686	extern void fast_sk_add_ct(struct sk_buff skb,struct sock sk);
				1687	int tcp_v4_rcv(struct sk_buff *skb)
				1688	{
				1689	const struct iphdr *iph;
				1690	const struct tcphdr *th;
				1691	struct sock *sk;
				1692	int ret;
				1693	struct net *net = dev_net(skb->dev);
				1694	struct nf_conn ct = (struct nf_conn )skb->nfct;
				1695
				1696	//·Ç±¾µØ
				1697	if (skb->pkt_type != PACKET_HOST)
				1698	goto discard_it;
				1699
				1700	/* Count it even if it's bad */
				1701	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
				1702
				1703	TCP_PKT_STATS_INC(TCP_RECV_PKTS);
				1704
				1705	//°ü³¤¶È±ØÐë´óÓÚtcpÍ·
				1706	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
				1707	goto discard_it;
				1708
				1709	th = tcp_hdr(skb);
				1710
				1711	//tcpÍ·³¤¶ÈºÍdoffÊÇ·ñÆ¥Åä
				1712	if (th->doff < sizeof(struct tcphdr) / 4)
				1713	goto bad_packet;
				1714	//Ê×²¿µ½Êý¾Ý¶ÎÆ«ÒÆ¼ì²â
				1715	if (!pskb_may_pull(skb, th->doff * 4))
				1716	goto discard_it;
				1717
				1718	/* An explanation is required here, I think.
				1719	* Packet length and doff are validated by header prediction,
				1720	* provided case of th->doff==0 is eliminated.
				1721	* So, we defer the checks. */
				1722	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
				1723	goto bad_packet;
				1724
				1725	//¼ÆËãend_seq,end_seqÊÇÊý¾Ý°üµÄ½áÊøÐòÁÐºÅ£¬Êµ¼ÊÉÏÊÇÆÚ´ýTCPÈ·ÈÏ°üÖÐACKµÄÊýÖµ£¬ÔÚÊý¾Ý´«Êä¹ý³ÌÖÐ£¬È·ÈÏ°üACKµÄÊýÖµµÈÓÚ±¾´ÎÊý¾Ý°üSEQ
				1726
				1727	th = tcp_hdr(skb);
				1728	iph = ip_hdr(skb);
				1729	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
				1730	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				1731	skb->len - th->doff * 4);
				1732	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
				1733	TCP_SKB_CB(skb)->when = 0;
				1734	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
				1735	TCP_SKB_CB(skb)->sacked = 0;
				1736
				1737	//¸ù¾ÝËÄÔª×é²éÕÒÏàÓ¦Á¬½ÓµÄsock½á¹¹£¬´óÌåÓÐÁ½¸ö²½Öè
				1738	//Ê×ÏÈÓÃ__inet_lookup_establishedº¯Êý²éÕÒÒÑ¾´¦ÓÚestablish×´Ì¬µÄÁ¬½Ó
				1739	//Èç¹û²éÕÒ²»µ½µÄ»°£¬¾Íµ÷ÓÃ__inet_lookup_listenerº¯Êý²éÕÒÊÇ·ñ´æÔÚËÄÔª×éÏà
				1740	//Æ¥ÅäµÄ´¦ÓÚlisten×´Ì¬µÄsock,Õâ¸öÊ±ºòÊµ¼ÊÉÏÊÇ±»¶¯µÄ½ÓÊÕÀ´×ÔÆäËûÖ÷»úµÄÁ¬½ÓÇëÇó
				1741
				1742	//Èç¹û²éÕÒ²»µ½Æ¥ÅäµÄsock,ÔòÖ±½Ó¶ªÆúÊý¾Ý°ü
				1743	if (skb->isFastlocal && ct && ct->fast_ct.isFast == FAST_CT_LOCAL4)
				1744	{
				1745	sk = ct->fast_ct.sk;
				1746	}
				1747	else
				1748	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
				1749	if (!sk)
				1750	goto no_tcp_socket;
				1751
				1752	process:
				1753	if (sk->sk_state == TCP_TIME_WAIT)
				1754	goto do_time_wait;
				1755
				1756	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				1757	NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
				1758	goto discard_and_relse;
				1759	}
				1760
				1761	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
				1762	goto discard_and_relse;
				1763
				1764	if (skb->isFastlocal == 0)
				1765	fast_sk_add_ct(skb, sk);
				1766
				1767	nf_reset(skb);
				1768
				1769	if (sk_filter(sk, skb))
				1770	goto discard_and_relse;
				1771
				1772	skb->dev = NULL;
				1773
				1774	bh_lock_sock_nested(sk);
				1775	ret = 0;
				1776	if (!sock_owned_by_user(sk)) {
				1777	#ifdef CONFIG_NET_DMA
				1778	struct tcp_sock *tp = tcp_sk(sk);
				1779	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
				1780	tp->ucopy.dma_chan = net_dma_find_channel();
				1781	if (tp->ucopy.dma_chan)
				1782	ret = tcp_v4_do_rcv(sk, skb);
				1783	else
				1784	#endif
				1785	{
				1786	//½øÈëÔ¤±¸´¦Àí¶ÓÁÐ
				1787	if (!tcp_prequeue(sk, skb))
				1788	ret = tcp_v4_do_rcv(sk, skb);
				1789	}
				1790	}
				1791
				1792	//Èç¹ûÊý¾Ý°ü±»ÓÃ»§½ø³ÌËø¶¨£¬ÔòÊý¾Ý°ü½øÈëºó±¸´¦Àí¶ÓÁÐ
				1793	else if (unlikely(sk_add_backlog(sk, skb))) {
				1794	bh_unlock_sock(sk);
				1795	NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
				1796	goto discard_and_relse;
				1797	}
				1798	bh_unlock_sock(sk);
				1799
				1800	sock_put(sk);
				1801
				1802	return ret;
				1803
				1804	no_tcp_socket:
				1805	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
				1806	goto discard_it;
				1807
				1808	if (skb->len < (th->doff << 2) \|\| tcp_checksum_complete(skb)) {
				1809	bad_packet:
				1810	TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
				1811	} else {
				1812	tcp_v4_send_reset(NULL, skb);
				1813	}
				1814
				1815	discard_it:
				1816	/* Discard frame. */
				1817	kfree_skb(skb);
				1818	return 0;
				1819
				1820	discard_and_relse:
				1821	sock_put(sk);
				1822	goto discard_it;
				1823
				1824	do_time_wait:
				1825	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				1826	inet_twsk_put(inet_twsk(sk));
				1827	goto discard_it;
				1828	}
				1829
				1830	if (skb->len < (th->doff << 2) \|\| tcp_checksum_complete(skb)) {
				1831	TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
				1832	inet_twsk_put(inet_twsk(sk));
				1833	goto discard_it;
				1834	}
				1835	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
				1836	case TCP_TW_SYN: {
				1837	struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
				1838	&tcp_hashinfo,
				1839	iph->daddr, th->dest,
				1840	inet_iif(skb));
				1841	if (sk2) {
				1842	inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
				1843	inet_twsk_put(inet_twsk(sk));
				1844	sk = sk2;
				1845	goto process;
				1846	}
				1847	/* Fall through to ACK */
				1848	}
				1849	case TCP_TW_ACK:
				1850	tcp_v4_timewait_ack(sk, skb);
				1851	break;
				1852	case TCP_TW_RST:
				1853	goto no_tcp_socket;
				1854	case TCP_TW_SUCCESS:;
				1855	}
				1856	goto discard_it;
				1857	}
				1858
				1859	struct inet_peer tcp_v4_get_peer(struct sock sk, bool *release_it)
				1860	{
				1861	struct rtable rt = (struct rtable ) __sk_dst_get(sk);
				1862	struct inet_sock *inet = inet_sk(sk);
				1863	struct inet_peer *peer;
				1864
				1865	if (!rt \|\|
				1866	inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
				1867	peer = inet_getpeer_v4(inet->inet_daddr, 1);
				1868	*release_it = true;
				1869	} else {
				1870	if (!rt->peer)
				1871	rt_bind_peer(rt, inet->inet_daddr, 1);
				1872	peer = rt->peer;
				1873	*release_it = false;
				1874	}
				1875
				1876	return peer;
				1877	}
				1878	EXPORT_SYMBOL(tcp_v4_get_peer);
				1879
				1880	void tcp_v4_tw_get_peer(struct sock sk)
				1881	{
				1882	const struct inet_timewait_sock *tw = inet_twsk(sk);
				1883
				1884	return inet_getpeer_v4(tw->tw_daddr, 1);
				1885	}
				1886	EXPORT_SYMBOL(tcp_v4_tw_get_peer);
				1887
				1888	static struct timewait_sock_ops tcp_timewait_sock_ops = {
				1889	.twsk_obj_size = sizeof(struct tcp_timewait_sock),
				1890	.twsk_unique = tcp_twsk_unique,
				1891	.twsk_destructor= tcp_twsk_destructor,
				1892	.twsk_getpeer = tcp_v4_tw_get_peer,
				1893	};
				1894
				1895	const struct inet_connection_sock_af_ops ipv4_specific = {
				1896	.queue_xmit = ip_queue_xmit,
				1897	.send_check = tcp_v4_send_check,
				1898	.rebuild_header = inet_sk_rebuild_header,
				1899	.conn_request = tcp_v4_conn_request,
				1900	.syn_recv_sock = tcp_v4_syn_recv_sock,
				1901	.get_peer = tcp_v4_get_peer,
				1902	.net_header_len = sizeof(struct iphdr),
				1903	.setsockopt = ip_setsockopt,
				1904	.getsockopt = ip_getsockopt,
				1905	.addr2sockaddr = inet_csk_addr2sockaddr,
				1906	.sockaddr_len = sizeof(struct sockaddr_in),
				1907	.bind_conflict = inet_csk_bind_conflict,
				1908	#ifdef CONFIG_COMPAT
				1909	.compat_setsockopt = compat_ip_setsockopt,
				1910	.compat_getsockopt = compat_ip_getsockopt,
				1911	#endif
				1912	};
				1913	EXPORT_SYMBOL(ipv4_specific);
				1914
				1915	#ifdef CONFIG_TCP_MD5SIG
				1916	static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
				1917	.md5_lookup = tcp_v4_md5_lookup,
				1918	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1919	.md5_parse = tcp_v4_parse_md5_keys,
				1920	};
				1921	#endif
				1922
				1923	/* NOTE: A lot of things set to zero explicitly by call to
				1924	* sk_alloc() so need not be done here.
				1925	*/
				1926	static int tcp_v4_init_sock(struct sock *sk)
				1927	{
				1928	struct inet_connection_sock *icsk = inet_csk(sk);
				1929	struct tcp_sock *tp = tcp_sk(sk);
				1930
				1931	skb_queue_head_init(&tp->out_of_order_queue);
				1932	tcp_init_xmit_timers(sk);
				1933	tcp_prequeue_init(tp);
				1934
				1935	icsk->icsk_rto = TCP_TIMEOUT_INIT;
				1936	tp->mdev = TCP_TIMEOUT_INIT;
				1937
				1938	/* So many TCP implementations out there (incorrectly) count the
				1939	* initial SYN frame in their delayed-ACK and congestion control
				1940	* algorithms that we must have the following bandaid to talk
				1941	* efficiently to them. -DaveM
				1942	*/
				1943	tp->snd_cwnd = TCP_INIT_CWND;
				1944
				1945	/* See draft-stevens-tcpca-spec-01 for discussion of the
				1946	* initialization of these values.
				1947	*/
				1948	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
				1949	tp->snd_cwnd_clamp = ~0;
				1950	tp->mss_cache = TCP_MSS_DEFAULT;
				1951
				1952	tp->reordering = sysctl_tcp_reordering;
				1953	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
				1954
				1955	sk->sk_state = TCP_CLOSE;
				1956
				1957	sk->sk_write_space = sk_stream_write_space;
				1958	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
				1959
				1960	icsk->icsk_af_ops = &ipv4_specific;
				1961	icsk->icsk_sync_mss = tcp_sync_mss;
				1962	#ifdef CONFIG_TCP_MD5SIG
				1963	tp->af_specific = &tcp_sock_ipv4_specific;
				1964	#endif
				1965
				1966	/* TCP Cookie Transactions */
				1967	if (sysctl_tcp_cookie_size > 0) {
				1968	/* Default, cookies without s_data_payload. */
				1969	tp->cookie_values =
				1970	kzalloc(sizeof(*tp->cookie_values),
				1971	sk->sk_allocation);
				1972	if (tp->cookie_values != NULL)
				1973	kref_init(&tp->cookie_values->kref);
				1974	}
				1975	/* Presumed zeroed, in order of appearance:
				1976	* cookie_in_always, cookie_out_never,
				1977	* s_data_constant, s_data_in, s_data_out
				1978	*/
				1979	sk->sk_sndbuf = sysctl_tcp_wmem[1];
				1980	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
				1981
				1982	local_bh_disable();
				1983	sock_update_memcg(sk);
				1984	sk_sockets_allocated_inc(sk);
				1985	local_bh_enable();
				1986
				1987	return 0;
				1988	}
				1989
				1990	void tcp_v4_destroy_sock(struct sock *sk)
				1991	{
				1992	struct tcp_sock *tp = tcp_sk(sk);
				1993
				1994	tcp_clear_xmit_timers(sk);
				1995
				1996	tcp_cleanup_congestion_control(sk);
				1997
				1998	/* Cleanup up the write buffer. */
				1999	tcp_write_queue_purge(sk);
				2000
				2001	/* Cleans up our, hopefully empty, out_of_order_queue. */
				2002	__skb_queue_purge(&tp->out_of_order_queue);
				2003
				2004	#ifdef CONFIG_TCP_MD5SIG
				2005	/* Clean up the MD5 key list, if any */
				2006	if (tp->md5sig_info) {
				2007	tcp_clear_md5_list(sk);
				2008	kfree_rcu(tp->md5sig_info, rcu);
				2009	tp->md5sig_info = NULL;
				2010	}
				2011	#endif
				2012
				2013	#ifdef CONFIG_NET_DMA
				2014	/* Cleans up our sk_async_wait_queue */
				2015	__skb_queue_purge(&sk->sk_async_wait_queue);
				2016	#endif
				2017
				2018	/* Clean prequeue, it must be empty really */
				2019	__skb_queue_purge(&tp->ucopy.prequeue);
				2020
				2021	/* Clean up a referenced TCP bind bucket. */
				2022	if (inet_csk(sk)->icsk_bind_hash)
				2023	inet_put_port(sk);
				2024
				2025	/*
				2026	* If sendmsg cached page exists, toss it.
				2027	*/
				2028	if (sk->sk_sndmsg_page) {
				2029	__free_page(sk->sk_sndmsg_page);
				2030	sk->sk_sndmsg_page = NULL;
				2031	}
				2032
				2033	/* TCP Cookie Transactions */
				2034	if (tp->cookie_values != NULL) {
				2035	kref_put(&tp->cookie_values->kref,
				2036	tcp_cookie_values_release);
				2037	tp->cookie_values = NULL;
				2038	}
				2039
				2040	sk_sockets_allocated_dec(sk);
				2041	sock_release_memcg(sk);
				2042	}
				2043	EXPORT_SYMBOL(tcp_v4_destroy_sock);
				2044
				2045	#ifdef CONFIG_PROC_FS
				2046	/* Proc filesystem TCP sock list dumping. */
				2047
				2048	static inline struct inet_timewait_sock tw_head(struct hlist_nulls_head head)
				2049	{
				2050	return hlist_nulls_empty(head) ? NULL :
				2051	list_entry(head->first, struct inet_timewait_sock, tw_node);
				2052	}
				2053
				2054	static inline struct inet_timewait_sock tw_next(struct inet_timewait_sock tw)
				2055	{
				2056	return !is_a_nulls(tw->tw_node.next) ?
				2057	hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
				2058	}
				2059
				2060	/*
				2061	* Get next listener socket follow cur. If cur is NULL, get first socket
				2062	* starting from bucket given in st->bucket; when st->bucket is zero the
				2063	* very first socket in the hash table is returned.
				2064	*/
				2065	static void listening_get_next(struct seq_file seq, void *cur)
				2066	{
				2067	struct inet_connection_sock *icsk;
				2068	struct hlist_nulls_node *node;
				2069	struct sock *sk = cur;
				2070	struct inet_listen_hashbucket *ilb;
				2071	struct tcp_iter_state *st = seq->private;
				2072	struct net *net = seq_file_net(seq);
				2073
				2074	if (!sk) {
				2075	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				2076	spin_lock_bh(&ilb->lock);
				2077	sk = sk_nulls_head(&ilb->head);
				2078	st->offset = 0;
				2079	goto get_sk;
				2080	}
				2081	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				2082	++st->num;
				2083	++st->offset;
				2084
				2085	if (st->state == TCP_SEQ_STATE_OPENREQ) {
				2086	struct request_sock *req = cur;
				2087
				2088	icsk = inet_csk(st->syn_wait_sk);
				2089	req = req->dl_next;
				2090	while (1) {
				2091	while (req) {
				2092	if (req->rsk_ops->family == st->family) {
				2093	cur = req;
				2094	goto out;
				2095	}
				2096	req = req->dl_next;
				2097	}
				2098	if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
				2099	break;
				2100	get_req:
				2101	req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
				2102	}
				2103	sk = sk_nulls_next(st->syn_wait_sk);
				2104	st->state = TCP_SEQ_STATE_LISTENING;
				2105	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2106	} else {
				2107	icsk = inet_csk(sk);
				2108	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2109	if (reqsk_queue_len(&icsk->icsk_accept_queue))
				2110	goto start_req;
				2111	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2112	sk = sk_nulls_next(sk);
				2113	}
				2114	get_sk:
				2115	sk_nulls_for_each_from(sk, node) {
				2116	if (!net_eq(sock_net(sk), net))
				2117	continue;
				2118	if (sk->sk_family == st->family) {
				2119	cur = sk;
				2120	goto out;
				2121	}
				2122	icsk = inet_csk(sk);
				2123	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2124	if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
				2125	start_req:
				2126	st->uid = sock_i_uid(sk);
				2127	st->syn_wait_sk = sk;
				2128	st->state = TCP_SEQ_STATE_OPENREQ;
				2129	st->sbucket = 0;
				2130	goto get_req;
				2131	}
				2132	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2133	}
				2134	spin_unlock_bh(&ilb->lock);
				2135	st->offset = 0;
				2136	if (++st->bucket < INET_LHTABLE_SIZE) {
				2137	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				2138	spin_lock_bh(&ilb->lock);
				2139	sk = sk_nulls_head(&ilb->head);
				2140	goto get_sk;
				2141	}
				2142	cur = NULL;
				2143	out:
				2144	return cur;
				2145	}
				2146
				2147	static void listening_get_idx(struct seq_file seq, loff_t *pos)
				2148	{
				2149	struct tcp_iter_state *st = seq->private;
				2150	void *rc;
				2151
				2152	st->bucket = 0;
				2153	st->offset = 0;
				2154	rc = listening_get_next(seq, NULL);
				2155
				2156	while (rc && *pos) {
				2157	rc = listening_get_next(seq, rc);
				2158	--*pos;
				2159	}
				2160	return rc;
				2161	}
				2162
				2163	static inline int empty_bucket(struct tcp_iter_state *st)
				2164	{
				2165	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
				2166	hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
				2167	}
				2168
				2169	/*
				2170	* Get first established socket starting from bucket given in st->bucket.
				2171	* If st->bucket is zero, the very first socket in the hash is returned.
				2172	*/
				2173	static void established_get_first(struct seq_file seq)
				2174	{
				2175	struct tcp_iter_state *st = seq->private;
				2176	struct net *net = seq_file_net(seq);
				2177	void *rc = NULL;
				2178
				2179	st->offset = 0;
				2180	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
				2181	struct sock *sk;
				2182	struct hlist_nulls_node *node;
				2183	struct inet_timewait_sock *tw;
				2184	spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
				2185
				2186	/* Lockless fast path for the common case of empty buckets */
				2187	if (empty_bucket(st))
				2188	continue;
				2189
				2190	spin_lock_bh(lock);
				2191	sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
				2192	if (sk->sk_family != st->family \|\|
				2193	!net_eq(sock_net(sk), net)) {
				2194	continue;
				2195	}
				2196	rc = sk;
				2197	goto out;
				2198	}
				2199	st->state = TCP_SEQ_STATE_TIME_WAIT;
				2200	inet_twsk_for_each(tw, node,
				2201	&tcp_hashinfo.ehash[st->bucket].twchain) {
				2202	if (tw->tw_family != st->family \|\|
				2203	!net_eq(twsk_net(tw), net)) {
				2204	continue;
				2205	}
				2206	rc = tw;
				2207	goto out;
				2208	}
				2209	spin_unlock_bh(lock);
				2210	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2211	}
				2212	out:
				2213	return rc;
				2214	}
				2215
				2216	static void established_get_next(struct seq_file seq, void *cur)
				2217	{
				2218	struct sock *sk = cur;
				2219	struct inet_timewait_sock *tw;
				2220	struct hlist_nulls_node *node;
				2221	struct tcp_iter_state *st = seq->private;
				2222	struct net *net = seq_file_net(seq);
				2223
				2224	++st->num;
				2225	++st->offset;
				2226
				2227	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
				2228	tw = cur;
				2229	tw = tw_next(tw);
				2230	get_tw:
				2231	while (tw && (tw->tw_family != st->family \|\| !net_eq(twsk_net(tw), net))) {
				2232	tw = tw_next(tw);
				2233	}
				2234	if (tw) {
				2235	cur = tw;
				2236	goto out;
				2237	}
				2238	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2239	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2240
				2241	/* Look for next non empty bucket */
				2242	st->offset = 0;
				2243	while (++st->bucket <= tcp_hashinfo.ehash_mask &&
				2244	empty_bucket(st))
				2245	;
				2246	if (st->bucket > tcp_hashinfo.ehash_mask)
				2247	return NULL;
				2248
				2249	spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2250	sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
				2251	} else
				2252	sk = sk_nulls_next(sk);
				2253
				2254	sk_nulls_for_each_from(sk, node) {
				2255	if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
				2256	goto found;
				2257	}
				2258
				2259	st->state = TCP_SEQ_STATE_TIME_WAIT;
				2260	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
				2261	goto get_tw;
				2262	found:
				2263	cur = sk;
				2264	out:
				2265	return cur;
				2266	}
				2267
				2268	static void established_get_idx(struct seq_file seq, loff_t pos)
				2269	{
				2270	struct tcp_iter_state *st = seq->private;
				2271	void *rc;
				2272
				2273	st->bucket = 0;
				2274	rc = established_get_first(seq);
				2275
				2276	while (rc && pos) {
				2277	rc = established_get_next(seq, rc);
				2278	--pos;
				2279	}
				2280	return rc;
				2281	}
				2282
				2283	static void tcp_get_idx(struct seq_file seq, loff_t pos)
				2284	{
				2285	void *rc;
				2286	struct tcp_iter_state *st = seq->private;
				2287
				2288	st->state = TCP_SEQ_STATE_LISTENING;
				2289	rc = listening_get_idx(seq, &pos);
				2290
				2291	if (!rc) {
				2292	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2293	rc = established_get_idx(seq, pos);
				2294	}
				2295
				2296	return rc;
				2297	}
				2298
				2299	static void tcp_seek_last_pos(struct seq_file seq)
				2300	{
				2301	struct tcp_iter_state *st = seq->private;
				2302	int offset = st->offset;
				2303	int orig_num = st->num;
				2304	void *rc = NULL;
				2305
				2306	switch (st->state) {
				2307	case TCP_SEQ_STATE_OPENREQ:
				2308	case TCP_SEQ_STATE_LISTENING:
				2309	if (st->bucket >= INET_LHTABLE_SIZE)
				2310	break;
				2311	st->state = TCP_SEQ_STATE_LISTENING;
				2312	rc = listening_get_next(seq, NULL);
				2313	while (offset-- && rc)
				2314	rc = listening_get_next(seq, rc);
				2315	if (rc)
				2316	break;
				2317	st->bucket = 0;
				2318	/* Fallthrough */
				2319	case TCP_SEQ_STATE_ESTABLISHED:
				2320	case TCP_SEQ_STATE_TIME_WAIT:
				2321	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2322	if (st->bucket > tcp_hashinfo.ehash_mask)
				2323	break;
				2324	rc = established_get_first(seq);
				2325	while (offset-- && rc)
				2326	rc = established_get_next(seq, rc);
				2327	}
				2328
				2329	st->num = orig_num;
				2330
				2331	return rc;
				2332	}
				2333
				2334	static void tcp_seq_start(struct seq_file seq, loff_t *pos)
				2335	{
				2336	struct tcp_iter_state *st = seq->private;
				2337	void *rc;
				2338
				2339	if (pos && pos == st->last_pos) {
				2340	rc = tcp_seek_last_pos(seq);
				2341	if (rc)
				2342	goto out;
				2343	}
				2344
				2345	st->state = TCP_SEQ_STATE_LISTENING;
				2346	st->num = 0;
				2347	st->bucket = 0;
				2348	st->offset = 0;
				2349	rc = pos ? tcp_get_idx(seq, pos - 1) : SEQ_START_TOKEN;
				2350
				2351	out:
				2352	st->last_pos = *pos;
				2353	return rc;
				2354	}
				2355
				2356	static void tcp_seq_next(struct seq_file seq, void v, loff_t pos)
				2357	{
				2358	struct tcp_iter_state *st = seq->private;
				2359	void *rc = NULL;
				2360
				2361	if (v == SEQ_START_TOKEN) {
				2362	rc = tcp_get_idx(seq, 0);
				2363	goto out;
				2364	}
				2365
				2366	switch (st->state) {
				2367	case TCP_SEQ_STATE_OPENREQ:
				2368	case TCP_SEQ_STATE_LISTENING:
				2369	rc = listening_get_next(seq, v);
				2370	if (!rc) {
				2371	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2372	st->bucket = 0;
				2373	st->offset = 0;
				2374	rc = established_get_first(seq);
				2375	}
				2376	break;
				2377	case TCP_SEQ_STATE_ESTABLISHED:
				2378	case TCP_SEQ_STATE_TIME_WAIT:
				2379	rc = established_get_next(seq, v);
				2380	break;
				2381	}
				2382	out:
				2383	++*pos;
				2384	st->last_pos = *pos;
				2385	return rc;
				2386	}
				2387
				2388	static void tcp_seq_stop(struct seq_file seq, void v)
				2389	{
				2390	struct tcp_iter_state *st = seq->private;
				2391
				2392	switch (st->state) {
				2393	case TCP_SEQ_STATE_OPENREQ:
				2394	if (v) {
				2395	struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
				2396	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
				2397	}
				2398	case TCP_SEQ_STATE_LISTENING:
				2399	if (v != SEQ_START_TOKEN)
				2400	spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
				2401	break;
				2402	case TCP_SEQ_STATE_TIME_WAIT:
				2403	case TCP_SEQ_STATE_ESTABLISHED:
				2404	if (v)
				2405	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2406	break;
				2407	}
				2408	}
				2409
				2410	int tcp_seq_open(struct inode inode, struct file file)
				2411	{
				2412	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
				2413	struct tcp_iter_state *s;
				2414	int err;
				2415
				2416	err = seq_open_net(inode, file, &afinfo->seq_ops,
				2417	sizeof(struct tcp_iter_state));
				2418	if (err < 0)
				2419	return err;
				2420
				2421	s = ((struct seq_file *)file->private_data)->private;
				2422	s->family = afinfo->family;
				2423	s->last_pos = 0;
				2424	return 0;
				2425	}
				2426	EXPORT_SYMBOL(tcp_seq_open);
				2427
				2428	int tcp_proc_register(struct net net, struct tcp_seq_afinfo afinfo)
				2429	{
				2430	int rc = 0;
				2431	struct proc_dir_entry *p;
				2432
				2433	afinfo->seq_ops.start = tcp_seq_start;
				2434	afinfo->seq_ops.next = tcp_seq_next;
				2435	afinfo->seq_ops.stop = tcp_seq_stop;
				2436
				2437	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
				2438	afinfo->seq_fops, afinfo);
				2439	if (!p)
				2440	rc = -ENOMEM;
				2441	return rc;
				2442	}
				2443	EXPORT_SYMBOL(tcp_proc_register);
				2444
				2445	void tcp_proc_unregister(struct net net, struct tcp_seq_afinfo afinfo)
				2446	{
				2447	proc_net_remove(net, afinfo->name);
				2448	}
				2449	EXPORT_SYMBOL(tcp_proc_unregister);
				2450
				2451	static void get_openreq4(const struct sock sk, const struct request_sock req,
				2452	struct seq_file f, int i, int uid, int len)
				2453	{
				2454	const struct inet_request_sock *ireq = inet_rsk(req);
				2455	int ttd = req->expires - jiffies;
				2456
				2457	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2458	" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
				2459	i,
				2460	ireq->loc_addr,
				2461	ntohs(inet_sk(sk)->inet_sport),
				2462	ireq->rmt_addr,
				2463	ntohs(ireq->rmt_port),
				2464	TCP_SYN_RECV,
				2465	0, 0, /* could print option size, but that is af dependent. */
				2466	1, /* timers active (only the expire timer) */
				2467	jiffies_to_clock_t(ttd),
				2468	req->retrans,
				2469	uid,
				2470	0, /* non standard timer */
				2471	0, /* open_requests have no inode */
				2472	atomic_read(&sk->sk_refcnt),
				2473	req,
				2474	len);
				2475	}
				2476
				2477	static void get_tcp4_sock(struct sock sk, struct seq_file f, int i, int *len)
				2478	{
				2479	int timer_active;
				2480	unsigned long timer_expires;
				2481	const struct tcp_sock *tp = tcp_sk(sk);
				2482	const struct inet_connection_sock *icsk = inet_csk(sk);
				2483	const struct inet_sock *inet = inet_sk(sk);
				2484	__be32 dest = inet->inet_daddr;
				2485	__be32 src = inet->inet_rcv_saddr;
				2486	__u16 destp = ntohs(inet->inet_dport);
				2487	__u16 srcp = ntohs(inet->inet_sport);
				2488	int rx_queue;
				2489
				2490	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
				2491	timer_active = 1;
				2492	timer_expires = icsk->icsk_timeout;
				2493	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
				2494	timer_active = 4;
				2495	timer_expires = icsk->icsk_timeout;
				2496	} else if (timer_pending(&sk->sk_timer)) {
				2497	timer_active = 2;
				2498	timer_expires = sk->sk_timer.expires;
				2499	} else {
				2500	timer_active = 0;
				2501	timer_expires = jiffies;
				2502	}
				2503
				2504	if (sk->sk_state == TCP_LISTEN)
				2505	rx_queue = sk->sk_ack_backlog;
				2506	else
				2507	/*
				2508	* because we dont lock socket, we might find a transient negative value
				2509	*/
				2510	rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
				2511
				2512	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
				2513	"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
				2514	i, src, srcp, dest, destp, sk->sk_state,
				2515	tp->write_seq - tp->snd_una,
				2516	rx_queue,
				2517	timer_active,
				2518	jiffies_to_clock_t(timer_expires - jiffies),
				2519	icsk->icsk_retransmits,
				2520	sock_i_uid(sk),
				2521	icsk->icsk_probes_out,
				2522	sock_i_ino(sk),
				2523	atomic_read(&sk->sk_refcnt), sk,
				2524	jiffies_to_clock_t(icsk->icsk_rto),
				2525	jiffies_to_clock_t(icsk->icsk_ack.ato),
				2526	(icsk->icsk_ack.quick << 1) \| icsk->icsk_ack.pingpong,
				2527	tp->snd_cwnd,
				2528	tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
				2529	len);
				2530	}
				2531
				2532	static void get_timewait4_sock(const struct inet_timewait_sock *tw,
				2533	struct seq_file f, int i, int len)
				2534	{
				2535	__be32 dest, src;
				2536	__u16 destp, srcp;
				2537	int ttd = tw->tw_ttd - jiffies;
				2538
				2539	if (ttd < 0)
				2540	ttd = 0;
				2541
				2542	dest = tw->tw_daddr;
				2543	src = tw->tw_rcv_saddr;
				2544	destp = ntohs(tw->tw_dport);
				2545	srcp = ntohs(tw->tw_sport);
				2546
				2547	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2548	" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
				2549	i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
				2550	3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
				2551	atomic_read(&tw->tw_refcnt), tw, len);
				2552	}
				2553
				2554	#define TMPSZ 150
				2555
				2556	static int tcp4_seq_show(struct seq_file seq, void v)
				2557	{
				2558	struct tcp_iter_state *st;
				2559	int len;
				2560
				2561	if (v == SEQ_START_TOKEN) {
				2562	seq_printf(seq, "%-*s\n", TMPSZ - 1,
				2563	" sl local_address rem_address st tx_queue "
				2564	"rx_queue tr tm->when retrnsmt uid timeout "
				2565	"inode");
				2566	goto out;
				2567	}
				2568	st = seq->private;
				2569
				2570	switch (st->state) {
				2571	case TCP_SEQ_STATE_LISTENING:
				2572	case TCP_SEQ_STATE_ESTABLISHED:
				2573	get_tcp4_sock(v, seq, st->num, &len);
				2574	break;
				2575	case TCP_SEQ_STATE_OPENREQ:
				2576	get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
				2577	break;
				2578	case TCP_SEQ_STATE_TIME_WAIT:
				2579	get_timewait4_sock(v, seq, st->num, &len);
				2580	break;
				2581	}
				2582	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
				2583	out:
				2584	return 0;
				2585	}
				2586
				2587	static const struct file_operations tcp_afinfo_seq_fops = {
				2588	.owner = THIS_MODULE,
				2589	.open = tcp_seq_open,
				2590	.read = seq_read,
				2591	.llseek = seq_lseek,
				2592	.release = seq_release_net
				2593	};
				2594
				2595	static struct tcp_seq_afinfo tcp4_seq_afinfo = {
				2596	.name = "tcp",
				2597	.family = AF_INET,
				2598	.seq_fops = &tcp_afinfo_seq_fops,
				2599	.seq_ops = {
				2600	.show = tcp4_seq_show,
				2601	},
				2602	};
				2603
				2604	static int __net_init tcp4_proc_init_net(struct net *net)
				2605	{
				2606	return tcp_proc_register(net, &tcp4_seq_afinfo);
				2607	}
				2608
				2609	static void __net_exit tcp4_proc_exit_net(struct net *net)
				2610	{
				2611	tcp_proc_unregister(net, &tcp4_seq_afinfo);
				2612	}
				2613
				2614	static struct pernet_operations tcp4_net_ops = {
				2615	.init = tcp4_proc_init_net,
				2616	.exit = tcp4_proc_exit_net,
				2617	};
				2618
				2619	int __init tcp4_proc_init(void)
				2620	{
				2621	return register_pernet_subsys(&tcp4_net_ops);
				2622	}
				2623
				2624	void tcp4_proc_exit(void)
				2625	{
				2626	unregister_pernet_subsys(&tcp4_net_ops);
				2627	}
				2628	#endif /* CONFIG_PROC_FS */
				2629
				2630	struct sk_buff tcp4_gro_receive(struct sk_buff head, struct sk_buff *skb)
				2631	{
				2632	const struct iphdr *iph = skb_gro_network_header(skb);
				2633
				2634	switch (skb->ip_summed) {
				2635	case CHECKSUM_COMPLETE:
				2636	if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
				2637	skb->csum)) {
				2638	skb->ip_summed = CHECKSUM_UNNECESSARY;
				2639	break;
				2640	}
				2641
				2642	/* fall through */
				2643	case CHECKSUM_NONE:
				2644	NAPI_GRO_CB(skb)->flush = 1;
				2645	return NULL;
				2646	}
				2647
				2648	return tcp_gro_receive(head, skb);
				2649	}
				2650
				2651	int tcp4_gro_complete(struct sk_buff *skb)
				2652	{
				2653	const struct iphdr *iph = ip_hdr(skb);
				2654	struct tcphdr *th = tcp_hdr(skb);
				2655
				2656	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
				2657	iph->saddr, iph->daddr, 0);
				2658	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
				2659
				2660	return tcp_gro_complete(skb);
				2661	}
				2662
				2663	struct proto tcp_prot = {
				2664	.name = "TCP",
				2665	.owner = THIS_MODULE,
				2666	.close = tcp_close,
				2667	.connect = tcp_v4_connect,
				2668	.disconnect = tcp_disconnect,
				2669	.accept = inet_csk_accept,
				2670	.ioctl = tcp_ioctl,
				2671	.init = tcp_v4_init_sock,
				2672	.destroy = tcp_v4_destroy_sock,
				2673	.shutdown = tcp_shutdown,
				2674	.setsockopt = tcp_setsockopt,
				2675	.getsockopt = tcp_getsockopt,
				2676	.recvmsg = tcp_recvmsg,
				2677	.sendmsg = tcp_sendmsg,
				2678	.sendpage = tcp_sendpage,
				2679	.backlog_rcv = tcp_v4_do_rcv,
				2680	.hash = inet_hash,
				2681	.unhash = inet_unhash,
				2682	.get_port = inet_csk_get_port,
				2683	.enter_memory_pressure = tcp_enter_memory_pressure,
				2684	.sockets_allocated = &tcp_sockets_allocated,
				2685	.orphan_count = &tcp_orphan_count,
				2686	.memory_allocated = &tcp_memory_allocated,
				2687	.memory_pressure = &tcp_memory_pressure,
				2688	.sysctl_wmem = sysctl_tcp_wmem,
				2689	.sysctl_rmem = sysctl_tcp_rmem,
				2690	.max_header = MAX_TCP_HEADER,
				2691	.obj_size = sizeof(struct tcp_sock),
				2692	.slab_flags = SLAB_DESTROY_BY_RCU,
				2693	.twsk_prot = &tcp_timewait_sock_ops,
				2694	.rsk_prot = &tcp_request_sock_ops,
				2695	.h.hashinfo = &tcp_hashinfo,
				2696	.no_autobind = true,
				2697	#ifdef CONFIG_COMPAT
				2698	.compat_setsockopt = compat_tcp_setsockopt,
				2699	.compat_getsockopt = compat_tcp_getsockopt,
				2700	#endif
				2701	#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
				2702	.init_cgroup = tcp_init_cgroup,
				2703	.destroy_cgroup = tcp_destroy_cgroup,
				2704	.proto_cgroup = tcp_proto_cgroup,
				2705	#endif
				2706	};
				2707	EXPORT_SYMBOL(tcp_prot);
				2708
				2709	static int __net_init tcp_sk_init(struct net *net)
				2710	{
				2711	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
				2712	PF_INET, SOCK_RAW, IPPROTO_TCP, net);
				2713	}
				2714
				2715	static void __net_exit tcp_sk_exit(struct net *net)
				2716	{
				2717	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
				2718	}
				2719
				2720	static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
				2721	{
				2722	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
				2723	}
				2724
				2725	static struct pernet_operations __net_initdata tcp_sk_ops = {
				2726	.init = tcp_sk_init,
				2727	.exit = tcp_sk_exit,
				2728	.exit_batch = tcp_sk_exit_batch,
				2729	};
				2730
				2731	void __init tcp_v4_init(void)
				2732	{
				2733	inet_hashinfo_init(&tcp_hashinfo);
				2734	if (register_pernet_subsys(&tcp_sk_ops))
				2735	panic("Failed to create the TCP control socket.\n");
				2736	}