blob: 8ed5b5dd6ba487bda80a57f770d0e665054f953b [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22#include <linux/bottom_half.h>
23#include <linux/module.h>
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/net.h>
29#include <linux/jiffies.h>
30#include <linux/in.h>
31#include <linux/in6.h>
32#include <linux/netdevice.h>
33#include <linux/init.h>
34#include <linux/jhash.h>
35#include <linux/ipsec.h>
36#include <linux/times.h>
37#include <linux/slab.h>
38#include <linux/uaccess.h>
39#include <linux/ipv6.h>
40#include <linux/icmpv6.h>
41#include <linux/random.h>
42#include <linux/indirect_call_wrapper.h>
43
44#include <net/tcp.h>
45#include <net/ndisc.h>
46#include <net/inet6_hashtables.h>
47#include <net/inet6_connection_sock.h>
48#include <net/ipv6.h>
49#include <net/transp_v6.h>
50#include <net/addrconf.h>
51#include <net/ip6_route.h>
52#include <net/ip6_checksum.h>
53#include <net/inet_ecn.h>
54#include <net/protocol.h>
55#include <net/xfrm.h>
56#include <net/snmp.h>
57#include <net/dsfield.h>
58#include <net/timewait_sock.h>
59#include <net/inet_common.h>
60#include <net/secure_seq.h>
61#include <net/busy_poll.h>
62
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
65
66#include <crypto/hash.h>
67#include <linux/scatterlist.h>
68
69#include <trace/events/tcp.h>
70
71static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77static const struct inet_connection_sock_af_ops ipv6_mapped;
78static const struct inet_connection_sock_af_ops ipv6_specific;
79#ifdef CONFIG_TCP_MD5SIG
80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82#else
83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
85{
86 return NULL;
87}
88#endif
89
90/* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
94 */
95static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96{
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100}
101
102static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103{
104 struct dst_entry *dst = skb_dst(skb);
105
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
108
109 rcu_assign_pointer(sk->sk_rx_dst, dst);
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112 }
113}
114
115static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116{
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
119 tcp_hdr(skb)->dest,
120 tcp_hdr(skb)->source);
121}
122
123static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124{
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
127}
128
129static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 int addr_len)
131{
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
135 */
136 if (addr_len < SIN6_LEN_RFC2133)
137 return -EINVAL;
138
139 sock_owned_by_me(sk);
140
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142}
143
144static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 int addr_len)
146{
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
154 struct flowi6 fl6;
155 struct dst_entry *dst;
156 int addr_type;
157 int err;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159
160 if (addr_len < SIN6_LEN_RFC2133)
161 return -EINVAL;
162
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
165
166 memset(&fl6, 0, sizeof(fl6));
167
168 if (np->sndflow) {
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (IS_ERR(flowlabel))
175 return -EINVAL;
176 fl6_sock_release(flowlabel);
177 }
178 }
179
180 /*
181 * connect() to INADDR_ANY means loopback (BSD'ism).
182 */
183
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 &usin->sin6_addr);
188 else
189 usin->sin6_addr = in6addr_loopback;
190 }
191
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
193
194 if (addr_type & IPV6_ADDR_MULTICAST)
195 return -ENETUNREACH;
196
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
201 * must coincide.
202 */
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 return -EINVAL;
205
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
207 }
208
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
211 return -EINVAL;
212 }
213
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
218 WRITE_ONCE(tp->write_seq, 0);
219 }
220
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
223
224 /*
225 * TCP over IPv4
226 */
227
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
231
232 if (__ipv6_only_sock(sk))
233 return -ENETUNREACH;
234
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238
239 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
240 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
241 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242#ifdef CONFIG_TCP_MD5SIG
243 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244#endif
245
246 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248 if (err) {
249 icsk->icsk_ext_hdr_len = exthdrlen;
250 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252 sk->sk_backlog_rcv = tcp_v6_do_rcv;
253#ifdef CONFIG_TCP_MD5SIG
254 tp->af_specific = &tcp_sock_ipv6_specific;
255#endif
256 goto failure;
257 }
258 np->saddr = sk->sk_v6_rcv_saddr;
259
260 return err;
261 }
262
263 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
264 saddr = &sk->sk_v6_rcv_saddr;
265
266 fl6.flowi6_proto = IPPROTO_TCP;
267 fl6.daddr = sk->sk_v6_daddr;
268 fl6.saddr = saddr ? *saddr : np->saddr;
269 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
270 fl6.flowi6_oif = sk->sk_bound_dev_if;
271 fl6.flowi6_mark = sk->sk_mark;
272 fl6.fl6_dport = usin->sin6_port;
273 fl6.fl6_sport = inet->inet_sport;
274 fl6.flowi6_uid = sk->sk_uid;
275
276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 final_p = fl6_update_dst(&fl6, opt, &final);
278
279 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
280
281 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
282 if (IS_ERR(dst)) {
283 err = PTR_ERR(dst);
284 goto failure;
285 }
286
287 if (!saddr) {
288 saddr = &fl6.saddr;
289 sk->sk_v6_rcv_saddr = *saddr;
290 }
291
292 /* set the source address */
293 np->saddr = *saddr;
294 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
295
296 sk->sk_gso_type = SKB_GSO_TCPV6;
297 ip6_dst_store(sk, dst, NULL, NULL);
298
299 icsk->icsk_ext_hdr_len = 0;
300 if (opt)
301 icsk->icsk_ext_hdr_len = opt->opt_flen +
302 opt->opt_nflen;
303
304 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
305
306 inet->inet_dport = usin->sin6_port;
307
308 tcp_set_state(sk, TCP_SYN_SENT);
309 err = inet6_hash_connect(tcp_death_row, sk);
310 if (err)
311 goto late_failure;
312
313 sk_set_txhash(sk);
314
315 if (likely(!tp->repair)) {
316 if (!tp->write_seq)
317 WRITE_ONCE(tp->write_seq,
318 secure_tcpv6_seq(np->saddr.s6_addr32,
319 sk->sk_v6_daddr.s6_addr32,
320 inet->inet_sport,
321 inet->inet_dport));
322 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
323 np->saddr.s6_addr32,
324 sk->sk_v6_daddr.s6_addr32);
325 }
326
327 if (tcp_fastopen_defer_connect(sk, &err))
328 return err;
329 if (err)
330 goto late_failure;
331
332 err = tcp_connect(sk);
333 if (err)
334 goto late_failure;
335
336 return 0;
337
338late_failure:
339 tcp_set_state(sk, TCP_CLOSE);
340 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
341 inet_reset_saddr(sk);
342failure:
343 inet->inet_dport = 0;
344 sk->sk_route_caps = 0;
345 return err;
346}
347
348static void tcp_v6_mtu_reduced(struct sock *sk)
349{
350 struct dst_entry *dst;
351 u32 mtu;
352
353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354 return;
355
356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357
358 /* Drop requests trying to increase our current mss.
359 * Check done in __ip6_rt_update_pmtu() is too late.
360 */
361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362 return;
363
364 dst = inet6_csk_update_pmtu(sk, mtu);
365 if (!dst)
366 return;
367
368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369 tcp_sync_mss(sk, dst_mtu(dst));
370 tcp_simple_retransmit(sk);
371 }
372}
373
374static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375 u8 type, u8 code, int offset, __be32 info)
376{
377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 struct net *net = dev_net(skb->dev);
380 struct request_sock *fastopen;
381 struct ipv6_pinfo *np;
382 struct tcp_sock *tp;
383 __u32 seq, snd_una;
384 struct sock *sk;
385 bool fatal;
386 int err;
387
388 sk = __inet6_lookup_established(net, &tcp_hashinfo,
389 &hdr->daddr, th->dest,
390 &hdr->saddr, ntohs(th->source),
391 skb->dev->ifindex, inet6_sdif(skb));
392
393 if (!sk) {
394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395 ICMP6_MIB_INERRORS);
396 return -ENOENT;
397 }
398
399 if (sk->sk_state == TCP_TIME_WAIT) {
400 inet_twsk_put(inet_twsk(sk));
401 return 0;
402 }
403 seq = ntohl(th->seq);
404 fatal = icmpv6_err_convert(type, code, &err);
405 if (sk->sk_state == TCP_NEW_SYN_RECV) {
406 tcp_req_err(sk, seq, fatal);
407 return 0;
408 }
409
410 bh_lock_sock(sk);
411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413
414 if (sk->sk_state == TCP_CLOSE)
415 goto out;
416
417 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
419 goto out;
420 }
421
422 tp = tcp_sk(sk);
423 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424 fastopen = rcu_dereference(tp->fastopen_rsk);
425 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426 if (sk->sk_state != TCP_LISTEN &&
427 !between(seq, snd_una, tp->snd_nxt)) {
428 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
429 goto out;
430 }
431
432 np = tcp_inet6_sk(sk);
433
434 if (type == NDISC_REDIRECT) {
435 if (!sock_owned_by_user(sk)) {
436 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
437
438 if (dst)
439 dst->ops->redirect(dst, sk, skb);
440 }
441 goto out;
442 }
443
444 if (type == ICMPV6_PKT_TOOBIG) {
445 u32 mtu = ntohl(info);
446
447 /* We are not interested in TCP_LISTEN and open_requests
448 * (SYN-ACKs send out by Linux are always <576bytes so
449 * they should go through unfragmented).
450 */
451 if (sk->sk_state == TCP_LISTEN)
452 goto out;
453
454 if (!ip6_sk_accept_pmtu(sk))
455 goto out;
456
457 if (mtu < IPV6_MIN_MTU)
458 goto out;
459
460 WRITE_ONCE(tp->mtu_info, mtu);
461
462 if (!sock_owned_by_user(sk))
463 tcp_v6_mtu_reduced(sk);
464 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
465 &sk->sk_tsq_flags))
466 sock_hold(sk);
467 goto out;
468 }
469
470
471 /* Might be for an request_sock */
472 switch (sk->sk_state) {
473 case TCP_SYN_SENT:
474 case TCP_SYN_RECV:
475 /* Only in fast or simultaneous open. If a fast open socket is
476 * is already accepted it is treated as a connected one below.
477 */
478 if (fastopen && !fastopen->sk)
479 break;
480
481 if (!sock_owned_by_user(sk)) {
482 sk->sk_err = err;
483 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
484
485 tcp_done(sk);
486 } else
487 sk->sk_err_soft = err;
488 goto out;
489 }
490
491 if (!sock_owned_by_user(sk) && np->recverr) {
492 sk->sk_err = err;
493 sk->sk_error_report(sk);
494 } else
495 sk->sk_err_soft = err;
496
497out:
498 bh_unlock_sock(sk);
499 sock_put(sk);
500 return 0;
501}
502
503
504static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
505 struct flowi *fl,
506 struct request_sock *req,
507 struct tcp_fastopen_cookie *foc,
508 enum tcp_synack_type synack_type)
509{
510 struct inet_request_sock *ireq = inet_rsk(req);
511 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
512 struct ipv6_txoptions *opt;
513 struct flowi6 *fl6 = &fl->u.ip6;
514 struct sk_buff *skb;
515 int err = -ENOMEM;
516
517 /* First, grab a route. */
518 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
519 IPPROTO_TCP)) == NULL)
520 goto done;
521
522 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
523
524 if (skb) {
525 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
526 &ireq->ir_v6_rmt_addr);
527
528 fl6->daddr = ireq->ir_v6_rmt_addr;
529 if (np->repflow && ireq->pktopts)
530 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
531
532 rcu_read_lock();
533 opt = ireq->ipv6_opt;
534 if (!opt)
535 opt = rcu_dereference(np->opt);
536 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
537 np->tclass, sk->sk_priority);
538 rcu_read_unlock();
539 err = net_xmit_eval(err);
540 }
541
542done:
543 return err;
544}
545
546
547static void tcp_v6_reqsk_destructor(struct request_sock *req)
548{
549 kfree(inet_rsk(req)->ipv6_opt);
550 kfree_skb(inet_rsk(req)->pktopts);
551}
552
553#ifdef CONFIG_TCP_MD5SIG
554static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
555 const struct in6_addr *addr)
556{
557 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
558}
559
560static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
561 const struct sock *addr_sk)
562{
563 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
564}
565
566static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
567 char __user *optval, int optlen)
568{
569 struct tcp_md5sig cmd;
570 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
571 u8 prefixlen;
572
573 if (optlen < sizeof(cmd))
574 return -EINVAL;
575
576 if (copy_from_user(&cmd, optval, sizeof(cmd)))
577 return -EFAULT;
578
579 if (sin6->sin6_family != AF_INET6)
580 return -EINVAL;
581
582 if (optname == TCP_MD5SIG_EXT &&
583 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
584 prefixlen = cmd.tcpm_prefixlen;
585 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
586 prefixlen > 32))
587 return -EINVAL;
588 } else {
589 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
590 }
591
592 if (!cmd.tcpm_keylen) {
593 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
594 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
595 AF_INET, prefixlen);
596 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
597 AF_INET6, prefixlen);
598 }
599
600 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
601 return -EINVAL;
602
603 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
604 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
605 AF_INET, prefixlen, cmd.tcpm_key,
606 cmd.tcpm_keylen, GFP_KERNEL);
607
608 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
609 AF_INET6, prefixlen, cmd.tcpm_key,
610 cmd.tcpm_keylen, GFP_KERNEL);
611}
612
613static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
614 const struct in6_addr *daddr,
615 const struct in6_addr *saddr,
616 const struct tcphdr *th, int nbytes)
617{
618 struct tcp6_pseudohdr *bp;
619 struct scatterlist sg;
620 struct tcphdr *_th;
621
622 bp = hp->scratch;
623 /* 1. TCP pseudo-header (RFC2460) */
624 bp->saddr = *saddr;
625 bp->daddr = *daddr;
626 bp->protocol = cpu_to_be32(IPPROTO_TCP);
627 bp->len = cpu_to_be32(nbytes);
628
629 _th = (struct tcphdr *)(bp + 1);
630 memcpy(_th, th, sizeof(*th));
631 _th->check = 0;
632
633 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
634 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
635 sizeof(*bp) + sizeof(*th));
636 return crypto_ahash_update(hp->md5_req);
637}
638
639static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
640 const struct in6_addr *daddr, struct in6_addr *saddr,
641 const struct tcphdr *th)
642{
643 struct tcp_md5sig_pool *hp;
644 struct ahash_request *req;
645
646 hp = tcp_get_md5sig_pool();
647 if (!hp)
648 goto clear_hash_noput;
649 req = hp->md5_req;
650
651 if (crypto_ahash_init(req))
652 goto clear_hash;
653 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
654 goto clear_hash;
655 if (tcp_md5_hash_key(hp, key))
656 goto clear_hash;
657 ahash_request_set_crypt(req, NULL, md5_hash, 0);
658 if (crypto_ahash_final(req))
659 goto clear_hash;
660
661 tcp_put_md5sig_pool();
662 return 0;
663
664clear_hash:
665 tcp_put_md5sig_pool();
666clear_hash_noput:
667 memset(md5_hash, 0, 16);
668 return 1;
669}
670
671static int tcp_v6_md5_hash_skb(char *md5_hash,
672 const struct tcp_md5sig_key *key,
673 const struct sock *sk,
674 const struct sk_buff *skb)
675{
676 const struct in6_addr *saddr, *daddr;
677 struct tcp_md5sig_pool *hp;
678 struct ahash_request *req;
679 const struct tcphdr *th = tcp_hdr(skb);
680
681 if (sk) { /* valid for establish/request sockets */
682 saddr = &sk->sk_v6_rcv_saddr;
683 daddr = &sk->sk_v6_daddr;
684 } else {
685 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
686 saddr = &ip6h->saddr;
687 daddr = &ip6h->daddr;
688 }
689
690 hp = tcp_get_md5sig_pool();
691 if (!hp)
692 goto clear_hash_noput;
693 req = hp->md5_req;
694
695 if (crypto_ahash_init(req))
696 goto clear_hash;
697
698 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
699 goto clear_hash;
700 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
701 goto clear_hash;
702 if (tcp_md5_hash_key(hp, key))
703 goto clear_hash;
704 ahash_request_set_crypt(req, NULL, md5_hash, 0);
705 if (crypto_ahash_final(req))
706 goto clear_hash;
707
708 tcp_put_md5sig_pool();
709 return 0;
710
711clear_hash:
712 tcp_put_md5sig_pool();
713clear_hash_noput:
714 memset(md5_hash, 0, 16);
715 return 1;
716}
717
718#endif
719
720static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
721 const struct sk_buff *skb)
722{
723#ifdef CONFIG_TCP_MD5SIG
724 const __u8 *hash_location = NULL;
725 struct tcp_md5sig_key *hash_expected;
726 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
727 const struct tcphdr *th = tcp_hdr(skb);
728 int genhash;
729 u8 newhash[16];
730
731 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
732 hash_location = tcp_parse_md5sig_option(th);
733
734 /* We've parsed the options - do we have a hash? */
735 if (!hash_expected && !hash_location)
736 return false;
737
738 if (hash_expected && !hash_location) {
739 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
740 return true;
741 }
742
743 if (!hash_expected && hash_location) {
744 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
745 return true;
746 }
747
748 /* check the signature */
749 genhash = tcp_v6_md5_hash_skb(newhash,
750 hash_expected,
751 NULL, skb);
752
753 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
754 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
755 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
756 genhash ? "failed" : "mismatch",
757 &ip6h->saddr, ntohs(th->source),
758 &ip6h->daddr, ntohs(th->dest));
759 return true;
760 }
761#endif
762 return false;
763}
764
765static void tcp_v6_init_req(struct request_sock *req,
766 const struct sock *sk_listener,
767 struct sk_buff *skb)
768{
769 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
770 struct inet_request_sock *ireq = inet_rsk(req);
771 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
772
773 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
774 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
775
776 /* So that link locals have meaning */
777 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
778 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
779 ireq->ir_iif = tcp_v6_iif(skb);
780
781 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
782 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
783 np->rxopt.bits.rxinfo ||
784 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
785 np->rxopt.bits.rxohlim || np->repflow)) {
786 refcount_inc(&skb->users);
787 ireq->pktopts = skb;
788 }
789}
790
791static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
792 struct flowi *fl,
793 const struct request_sock *req)
794{
795 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
796}
797
798struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
799 .family = AF_INET6,
800 .obj_size = sizeof(struct tcp6_request_sock),
801 .rtx_syn_ack = tcp_rtx_synack,
802 .send_ack = tcp_v6_reqsk_send_ack,
803 .destructor = tcp_v6_reqsk_destructor,
804 .send_reset = tcp_v6_send_reset,
805 .syn_ack_timeout = tcp_syn_ack_timeout,
806};
807
808const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
809 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
810 sizeof(struct ipv6hdr),
811#ifdef CONFIG_TCP_MD5SIG
812 .req_md5_lookup = tcp_v6_md5_lookup,
813 .calc_md5_hash = tcp_v6_md5_hash_skb,
814#endif
815 .init_req = tcp_v6_init_req,
816#ifdef CONFIG_SYN_COOKIES
817 .cookie_init_seq = cookie_v6_init_sequence,
818#endif
819 .route_req = tcp_v6_route_req,
820 .init_seq = tcp_v6_init_seq,
821 .init_ts_off = tcp_v6_init_ts_off,
822 .send_synack = tcp_v6_send_synack,
823};
824
825static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
826 u32 ack, u32 win, u32 tsval, u32 tsecr,
827 int oif, struct tcp_md5sig_key *key, int rst,
828 u8 tclass, __be32 label, u32 priority)
829{
830 const struct tcphdr *th = tcp_hdr(skb);
831 struct tcphdr *t1;
832 struct sk_buff *buff;
833 struct flowi6 fl6;
834 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
835 struct sock *ctl_sk = net->ipv6.tcp_sk;
836 unsigned int tot_len = sizeof(struct tcphdr);
837 struct dst_entry *dst;
838 __be32 *topt;
839 __u32 mark = 0;
840
841 if (tsecr)
842 tot_len += TCPOLEN_TSTAMP_ALIGNED;
843#ifdef CONFIG_TCP_MD5SIG
844 if (key)
845 tot_len += TCPOLEN_MD5SIG_ALIGNED;
846#endif
847
848 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
849 GFP_ATOMIC);
850 if (!buff)
851 return;
852
853 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
854
855 t1 = skb_push(buff, tot_len);
856 skb_reset_transport_header(buff);
857
858 /* Swap the send and the receive. */
859 memset(t1, 0, sizeof(*t1));
860 t1->dest = th->source;
861 t1->source = th->dest;
862 t1->doff = tot_len / 4;
863 t1->seq = htonl(seq);
864 t1->ack_seq = htonl(ack);
865 t1->ack = !rst || !th->ack;
866 t1->rst = rst;
867 t1->window = htons(win);
868
869 topt = (__be32 *)(t1 + 1);
870
871 if (tsecr) {
872 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
873 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
874 *topt++ = htonl(tsval);
875 *topt++ = htonl(tsecr);
876 }
877
878#ifdef CONFIG_TCP_MD5SIG
879 if (key) {
880 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
881 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
882 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
883 &ipv6_hdr(skb)->saddr,
884 &ipv6_hdr(skb)->daddr, t1);
885 }
886#endif
887
888 memset(&fl6, 0, sizeof(fl6));
889 fl6.daddr = ipv6_hdr(skb)->saddr;
890 fl6.saddr = ipv6_hdr(skb)->daddr;
891 fl6.flowlabel = label;
892
893 buff->ip_summed = CHECKSUM_PARTIAL;
894 buff->csum = 0;
895
896 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
897
898 fl6.flowi6_proto = IPPROTO_TCP;
899 if (rt6_need_strict(&fl6.daddr) && !oif)
900 fl6.flowi6_oif = tcp_v6_iif(skb);
901 else {
902 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
903 oif = skb->skb_iif;
904
905 fl6.flowi6_oif = oif;
906 }
907
908 if (sk) {
909 if (sk->sk_state == TCP_TIME_WAIT) {
910 mark = inet_twsk(sk)->tw_mark;
911 /* autoflowlabel relies on buff->hash */
912 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
913 PKT_HASH_TYPE_L4);
914 } else {
915 mark = sk->sk_mark;
916 }
917 buff->tstamp = tcp_transmit_time(sk);
918 }
919 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
920 fl6.fl6_dport = t1->dest;
921 fl6.fl6_sport = t1->source;
922 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
923 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
924
925 /* Pass a socket to ip6_dst_lookup either it is for RST
926 * Underlying function will use this to retrieve the network
927 * namespace
928 */
929 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
930 if (!IS_ERR(dst)) {
931 skb_dst_set(buff, dst);
932 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
933 priority);
934 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
935 if (rst)
936 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
937 return;
938 }
939
940 kfree_skb(buff);
941}
942
943static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
944{
945 const struct tcphdr *th = tcp_hdr(skb);
946 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
947 u32 seq = 0, ack_seq = 0;
948 struct tcp_md5sig_key *key = NULL;
949#ifdef CONFIG_TCP_MD5SIG
950 const __u8 *hash_location = NULL;
951 unsigned char newhash[16];
952 int genhash;
953 struct sock *sk1 = NULL;
954#endif
955 __be32 label = 0;
956 u32 priority = 0;
957 struct net *net;
958 int oif = 0;
959
960 if (th->rst)
961 return;
962
963 /* If sk not NULL, it means we did a successful lookup and incoming
964 * route had to be correct. prequeue might have dropped our dst.
965 */
966 if (!sk && !ipv6_unicast_destination(skb))
967 return;
968
969 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
970#ifdef CONFIG_TCP_MD5SIG
971 rcu_read_lock();
972 hash_location = tcp_parse_md5sig_option(th);
973 if (sk && sk_fullsock(sk)) {
974 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
975 } else if (hash_location) {
976 /*
977 * active side is lost. Try to find listening socket through
978 * source port, and then find md5 key through listening socket.
979 * we are not loose security here:
980 * Incoming packet is checked with md5 hash with finding key,
981 * no RST generated if md5 hash doesn't match.
982 */
983 sk1 = inet6_lookup_listener(net,
984 &tcp_hashinfo, NULL, 0,
985 &ipv6h->saddr,
986 th->source, &ipv6h->daddr,
987 ntohs(th->source),
988 tcp_v6_iif_l3_slave(skb),
989 tcp_v6_sdif(skb));
990 if (!sk1)
991 goto out;
992
993 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
994 if (!key)
995 goto out;
996
997 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
998 if (genhash || memcmp(hash_location, newhash, 16) != 0)
999 goto out;
1000 }
1001#endif
1002
1003 if (th->ack)
1004 seq = ntohl(th->ack_seq);
1005 else
1006 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1007 (th->doff << 2);
1008
1009 if (sk) {
1010 oif = sk->sk_bound_dev_if;
1011 if (sk_fullsock(sk)) {
1012 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1013
1014 trace_tcp_send_reset(sk, skb);
1015 if (np->repflow)
1016 label = ip6_flowlabel(ipv6h);
1017 priority = sk->sk_priority;
1018 }
1019 if (sk->sk_state == TCP_TIME_WAIT) {
1020 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1021 priority = inet_twsk(sk)->tw_priority;
1022 }
1023 } else {
1024 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1025 label = ip6_flowlabel(ipv6h);
1026 }
1027
1028 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1029 label, priority);
1030
1031#ifdef CONFIG_TCP_MD5SIG
1032out:
1033 rcu_read_unlock();
1034#endif
1035}
1036
1037static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1038 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1039 struct tcp_md5sig_key *key, u8 tclass,
1040 __be32 label, u32 priority)
1041{
1042 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1043 tclass, label, priority);
1044}
1045
1046static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1047{
1048 struct inet_timewait_sock *tw = inet_twsk(sk);
1049 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1050
1051 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1052 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1053 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1054 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1055 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1056
1057 inet_twsk_put(tw);
1058}
1059
1060static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1061 struct request_sock *req)
1062{
1063 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1064 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1065 */
1066 /* RFC 7323 2.3
1067 * The window field (SEG.WND) of every outgoing segment, with the
1068 * exception of <SYN> segments, MUST be right-shifted by
1069 * Rcv.Wind.Shift bits:
1070 */
1071 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1072 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1073 tcp_rsk(req)->rcv_nxt,
1074 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1075 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1076 req->ts_recent, sk->sk_bound_dev_if,
1077 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1078 0, 0, sk->sk_priority);
1079}
1080
1081
1082static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1083{
1084#ifdef CONFIG_SYN_COOKIES
1085 const struct tcphdr *th = tcp_hdr(skb);
1086
1087 if (!th->syn)
1088 sk = cookie_v6_check(sk, skb);
1089#endif
1090 return sk;
1091}
1092
1093u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1094 struct tcphdr *th, u32 *cookie)
1095{
1096 u16 mss = 0;
1097#ifdef CONFIG_SYN_COOKIES
1098 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1099 &tcp_request_sock_ipv6_ops, sk, th);
1100 if (mss) {
1101 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1102 tcp_synq_overflow(sk);
1103 }
1104#endif
1105 return mss;
1106}
1107
1108static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1109{
1110 if (skb->protocol == htons(ETH_P_IP))
1111 return tcp_v4_conn_request(sk, skb);
1112
1113 if (!ipv6_unicast_destination(skb))
1114 goto drop;
1115
1116 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1117 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1118 return 0;
1119 }
1120
1121 return tcp_conn_request(&tcp6_request_sock_ops,
1122 &tcp_request_sock_ipv6_ops, sk, skb);
1123
1124drop:
1125 tcp_listendrop(sk);
1126 return 0; /* don't send reset */
1127}
1128
1129static void tcp_v6_restore_cb(struct sk_buff *skb)
1130{
1131 /* We need to move header back to the beginning if xfrm6_policy_check()
1132 * and tcp_v6_fill_cb() are going to be called again.
1133 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1134 */
1135 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1136 sizeof(struct inet6_skb_parm));
1137}
1138
1139static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1140 struct request_sock *req,
1141 struct dst_entry *dst,
1142 struct request_sock *req_unhash,
1143 bool *own_req)
1144{
1145 struct inet_request_sock *ireq;
1146 struct ipv6_pinfo *newnp;
1147 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1148 struct ipv6_txoptions *opt;
1149 struct inet_sock *newinet;
1150 bool found_dup_sk = false;
1151 struct tcp_sock *newtp;
1152 struct sock *newsk;
1153#ifdef CONFIG_TCP_MD5SIG
1154 struct tcp_md5sig_key *key;
1155#endif
1156 struct flowi6 fl6;
1157
1158 if (skb->protocol == htons(ETH_P_IP)) {
1159 /*
1160 * v6 mapped
1161 */
1162
1163 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1164 req_unhash, own_req);
1165
1166 if (!newsk)
1167 return NULL;
1168
1169 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1170
1171 newinet = inet_sk(newsk);
1172 newnp = tcp_inet6_sk(newsk);
1173 newtp = tcp_sk(newsk);
1174
1175 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1176
1177 newnp->saddr = newsk->sk_v6_rcv_saddr;
1178
1179 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1180 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1181#ifdef CONFIG_TCP_MD5SIG
1182 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1183#endif
1184
1185 newnp->ipv6_mc_list = NULL;
1186 newnp->ipv6_ac_list = NULL;
1187 newnp->ipv6_fl_list = NULL;
1188 newnp->pktoptions = NULL;
1189 newnp->opt = NULL;
1190 newnp->mcast_oif = inet_iif(skb);
1191 newnp->mcast_hops = ip_hdr(skb)->ttl;
1192 newnp->rcv_flowinfo = 0;
1193 if (np->repflow)
1194 newnp->flow_label = 0;
1195
1196 /*
1197 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1198 * here, tcp_create_openreq_child now does this for us, see the comment in
1199 * that function for the gory details. -acme
1200 */
1201
1202 /* It is tricky place. Until this moment IPv4 tcp
1203 worked with IPv6 icsk.icsk_af_ops.
1204 Sync it now.
1205 */
1206 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1207
1208 return newsk;
1209 }
1210
1211 ireq = inet_rsk(req);
1212
1213 if (sk_acceptq_is_full(sk))
1214 goto out_overflow;
1215
1216 if (!dst) {
1217 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1218 if (!dst)
1219 goto out;
1220 }
1221
1222 newsk = tcp_create_openreq_child(sk, req, skb);
1223 if (!newsk)
1224 goto out_nonewsk;
1225
1226 /*
1227 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1228 * count here, tcp_create_openreq_child now does this for us, see the
1229 * comment in that function for the gory details. -acme
1230 */
1231
1232 newsk->sk_gso_type = SKB_GSO_TCPV6;
1233 inet6_sk_rx_dst_set(newsk, skb);
1234
1235 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1236
1237 newtp = tcp_sk(newsk);
1238 newinet = inet_sk(newsk);
1239 newnp = tcp_inet6_sk(newsk);
1240
1241 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1242
1243 ip6_dst_store(newsk, dst, NULL, NULL);
1244
1245 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1246 newnp->saddr = ireq->ir_v6_loc_addr;
1247 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1248 newsk->sk_bound_dev_if = ireq->ir_iif;
1249
1250 /* Now IPv6 options...
1251
1252 First: no IPv4 options.
1253 */
1254 newinet->inet_opt = NULL;
1255 newnp->ipv6_mc_list = NULL;
1256 newnp->ipv6_ac_list = NULL;
1257 newnp->ipv6_fl_list = NULL;
1258
1259 /* Clone RX bits */
1260 newnp->rxopt.all = np->rxopt.all;
1261
1262 newnp->pktoptions = NULL;
1263 newnp->opt = NULL;
1264 newnp->mcast_oif = tcp_v6_iif(skb);
1265 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1266 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1267 if (np->repflow)
1268 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1269
1270 /* Clone native IPv6 options from listening socket (if any)
1271
1272 Yes, keeping reference count would be much more clever,
1273 but we make one more one thing there: reattach optmem
1274 to newsk.
1275 */
1276 opt = ireq->ipv6_opt;
1277 if (!opt)
1278 opt = rcu_dereference(np->opt);
1279 if (opt) {
1280 opt = ipv6_dup_options(newsk, opt);
1281 RCU_INIT_POINTER(newnp->opt, opt);
1282 }
1283 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1284 if (opt)
1285 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1286 opt->opt_flen;
1287
1288 tcp_ca_openreq_child(newsk, dst);
1289
1290 tcp_sync_mss(newsk, dst_mtu(dst));
1291 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1292
1293 tcp_initialize_rcv_mss(newsk);
1294
1295 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1296 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1297
1298#ifdef CONFIG_TCP_MD5SIG
1299 /* Copy over the MD5 key from the original socket */
1300 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1301 if (key) {
1302 /* We're using one, so create a matching key
1303 * on the newsk structure. If we fail to get
1304 * memory, then we end up not copying the key
1305 * across. Shucks.
1306 */
1307 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1308 AF_INET6, 128, key->key, key->keylen,
1309 sk_gfp_mask(sk, GFP_ATOMIC));
1310 }
1311#endif
1312
1313 if (__inet_inherit_port(sk, newsk) < 0) {
1314 inet_csk_prepare_forced_close(newsk);
1315 tcp_done(newsk);
1316 goto out;
1317 }
1318 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1319 &found_dup_sk);
1320 if (*own_req) {
1321 tcp_move_syn(newtp, req);
1322
1323 /* Clone pktoptions received with SYN, if we own the req */
1324 if (ireq->pktopts) {
1325 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1326 consume_skb(ireq->pktopts);
1327 ireq->pktopts = NULL;
1328 if (newnp->pktoptions)
1329 tcp_v6_restore_cb(newnp->pktoptions);
1330 }
1331 } else {
1332 if (!req_unhash && found_dup_sk) {
1333 /* This code path should only be executed in the
1334 * syncookie case only
1335 */
1336 bh_unlock_sock(newsk);
1337 sock_put(newsk);
1338 newsk = NULL;
1339 }
1340 }
1341
1342 return newsk;
1343
1344out_overflow:
1345 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1346out_nonewsk:
1347 dst_release(dst);
1348out:
1349 tcp_listendrop(sk);
1350 return NULL;
1351}
1352
1353/* The socket must have it's spinlock held when we get
1354 * here, unless it is a TCP_LISTEN socket.
1355 *
1356 * We have a potential double-lock case here, so even when
1357 * doing backlog processing we use the BH locking scheme.
1358 * This is because we cannot sleep with the original spinlock
1359 * held.
1360 */
1361static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1362{
1363 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1364 struct sk_buff *opt_skb = NULL;
1365 struct tcp_sock *tp;
1366
1367 /* Imagine: socket is IPv6. IPv4 packet arrives,
1368 goes to IPv4 receive handler and backlogged.
1369 From backlog it always goes here. Kerboom...
1370 Fortunately, tcp_rcv_established and rcv_established
1371 handle them correctly, but it is not case with
1372 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1373 */
1374
1375 if (skb->protocol == htons(ETH_P_IP))
1376 return tcp_v4_do_rcv(sk, skb);
1377
1378 /*
1379 * socket locking is here for SMP purposes as backlog rcv
1380 * is currently called with bh processing disabled.
1381 */
1382
1383 /* Do Stevens' IPV6_PKTOPTIONS.
1384
1385 Yes, guys, it is the only place in our code, where we
1386 may make it not affecting IPv4.
1387 The rest of code is protocol independent,
1388 and I do not like idea to uglify IPv4.
1389
1390 Actually, all the idea behind IPV6_PKTOPTIONS
1391 looks not very well thought. For now we latch
1392 options, received in the last packet, enqueued
1393 by tcp. Feel free to propose better solution.
1394 --ANK (980728)
1395 */
1396 if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1397 opt_skb = skb_clone_and_charge_r(skb, sk);
1398
1399 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1400 struct dst_entry *dst;
1401
1402 dst = rcu_dereference_protected(sk->sk_rx_dst,
1403 lockdep_sock_is_held(sk));
1404
1405 sock_rps_save_rxhash(sk, skb);
1406 sk_mark_napi_id(sk, skb);
1407 if (dst) {
1408 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1409 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1410 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1411 dst_release(dst);
1412 }
1413 }
1414
1415 tcp_rcv_established(sk, skb);
1416 if (opt_skb)
1417 goto ipv6_pktoptions;
1418 return 0;
1419 }
1420
1421 if (tcp_checksum_complete(skb))
1422 goto csum_err;
1423
1424 if (sk->sk_state == TCP_LISTEN) {
1425 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1426
1427 if (!nsk)
1428 goto discard;
1429
1430 if (nsk != sk) {
1431 if (tcp_child_process(sk, nsk, skb))
1432 goto reset;
1433 return 0;
1434 }
1435 } else
1436 sock_rps_save_rxhash(sk, skb);
1437
1438 if (tcp_rcv_state_process(sk, skb))
1439 goto reset;
1440 if (opt_skb)
1441 goto ipv6_pktoptions;
1442 return 0;
1443
1444reset:
1445 tcp_v6_send_reset(sk, skb);
1446discard:
1447 if (opt_skb)
1448 __kfree_skb(opt_skb);
1449 kfree_skb(skb);
1450 return 0;
1451csum_err:
1452 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1453 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1454 goto discard;
1455
1456
1457ipv6_pktoptions:
1458 /* Do you ask, what is it?
1459
1460 1. skb was enqueued by tcp.
1461 2. skb is added to tail of read queue, rather than out of order.
1462 3. socket is not in passive state.
1463 4. Finally, it really contains options, which user wants to receive.
1464 */
1465 tp = tcp_sk(sk);
1466 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1467 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1468 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1469 np->mcast_oif = tcp_v6_iif(opt_skb);
1470 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1471 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1472 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1473 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1474 if (np->repflow)
1475 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1476 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1477 tcp_v6_restore_cb(opt_skb);
1478 opt_skb = xchg(&np->pktoptions, opt_skb);
1479 } else {
1480 __kfree_skb(opt_skb);
1481 opt_skb = xchg(&np->pktoptions, NULL);
1482 }
1483 }
1484
1485 kfree_skb(opt_skb);
1486 return 0;
1487}
1488
1489static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1490 const struct tcphdr *th)
1491{
1492 /* This is tricky: we move IP6CB at its correct location into
1493 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1494 * _decode_session6() uses IP6CB().
1495 * barrier() makes sure compiler won't play aliasing games.
1496 */
1497 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1498 sizeof(struct inet6_skb_parm));
1499 barrier();
1500
1501 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1502 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1503 skb->len - th->doff*4);
1504 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1505 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1506 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1507 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1508 TCP_SKB_CB(skb)->sacked = 0;
1509 TCP_SKB_CB(skb)->has_rxtstamp =
1510 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1511}
1512
1513INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1514{
1515 struct sk_buff *skb_to_free;
1516 int sdif = inet6_sdif(skb);
1517 const struct tcphdr *th;
1518 const struct ipv6hdr *hdr;
1519 bool refcounted;
1520 struct sock *sk;
1521 int ret;
1522 struct net *net = dev_net(skb->dev);
1523
1524 if (skb->pkt_type != PACKET_HOST)
1525 goto discard_it;
1526
1527 /*
1528 * Count it even if it's bad.
1529 */
1530 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1531
1532 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1533 goto discard_it;
1534
1535 th = (const struct tcphdr *)skb->data;
1536
1537 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1538 goto bad_packet;
1539 if (!pskb_may_pull(skb, th->doff*4))
1540 goto discard_it;
1541
1542 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1543 goto csum_error;
1544
1545 th = (const struct tcphdr *)skb->data;
1546 hdr = ipv6_hdr(skb);
1547
1548lookup:
1549 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1550 th->source, th->dest, inet6_iif(skb), sdif,
1551 &refcounted);
1552 if (!sk)
1553 goto no_tcp_socket;
1554
1555process:
1556 if (sk->sk_state == TCP_TIME_WAIT)
1557 goto do_time_wait;
1558
1559 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1560 struct request_sock *req = inet_reqsk(sk);
1561 bool req_stolen = false;
1562 struct sock *nsk;
1563
1564 sk = req->rsk_listener;
1565 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1566 sk_drops_add(sk, skb);
1567 reqsk_put(req);
1568 goto discard_it;
1569 }
1570 if (tcp_checksum_complete(skb)) {
1571 reqsk_put(req);
1572 goto csum_error;
1573 }
1574 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1575 inet_csk_reqsk_queue_drop_and_put(sk, req);
1576 goto lookup;
1577 }
1578 sock_hold(sk);
1579 refcounted = true;
1580 nsk = NULL;
1581 if (!tcp_filter(sk, skb)) {
1582 th = (const struct tcphdr *)skb->data;
1583 hdr = ipv6_hdr(skb);
1584 tcp_v6_fill_cb(skb, hdr, th);
1585 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1586 }
1587 if (!nsk) {
1588 reqsk_put(req);
1589 if (req_stolen) {
1590 /* Another cpu got exclusive access to req
1591 * and created a full blown socket.
1592 * Try to feed this packet to this socket
1593 * instead of discarding it.
1594 */
1595 tcp_v6_restore_cb(skb);
1596 sock_put(sk);
1597 goto lookup;
1598 }
1599 goto discard_and_relse;
1600 }
1601 if (nsk == sk) {
1602 reqsk_put(req);
1603 tcp_v6_restore_cb(skb);
1604 } else if (tcp_child_process(sk, nsk, skb)) {
1605 tcp_v6_send_reset(nsk, skb);
1606 goto discard_and_relse;
1607 } else {
1608 sock_put(sk);
1609 return 0;
1610 }
1611 }
1612 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1613 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1614 goto discard_and_relse;
1615 }
1616
1617 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1618 goto discard_and_relse;
1619
1620 if (tcp_v6_inbound_md5_hash(sk, skb))
1621 goto discard_and_relse;
1622
1623 if (tcp_filter(sk, skb))
1624 goto discard_and_relse;
1625 th = (const struct tcphdr *)skb->data;
1626 hdr = ipv6_hdr(skb);
1627 tcp_v6_fill_cb(skb, hdr, th);
1628
1629 skb->dev = NULL;
1630
1631 if (sk->sk_state == TCP_LISTEN) {
1632 ret = tcp_v6_do_rcv(sk, skb);
1633 goto put_and_return;
1634 }
1635
1636 sk_incoming_cpu_update(sk);
1637
1638 bh_lock_sock_nested(sk);
1639 tcp_segs_in(tcp_sk(sk), skb);
1640 ret = 0;
1641 if (!sock_owned_by_user(sk)) {
1642 skb_to_free = sk->sk_rx_skb_cache;
1643 sk->sk_rx_skb_cache = NULL;
1644 ret = tcp_v6_do_rcv(sk, skb);
1645 } else {
1646 if (tcp_add_backlog(sk, skb))
1647 goto discard_and_relse;
1648 skb_to_free = NULL;
1649 }
1650 bh_unlock_sock(sk);
1651 if (skb_to_free)
1652 __kfree_skb(skb_to_free);
1653put_and_return:
1654 if (refcounted)
1655 sock_put(sk);
1656 return ret ? -1 : 0;
1657
1658no_tcp_socket:
1659 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1660 goto discard_it;
1661
1662 tcp_v6_fill_cb(skb, hdr, th);
1663
1664 if (tcp_checksum_complete(skb)) {
1665csum_error:
1666 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1667bad_packet:
1668 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1669 } else {
1670 tcp_v6_send_reset(NULL, skb);
1671 }
1672
1673discard_it:
1674 kfree_skb(skb);
1675 return 0;
1676
1677discard_and_relse:
1678 sk_drops_add(sk, skb);
1679 if (refcounted)
1680 sock_put(sk);
1681 goto discard_it;
1682
1683do_time_wait:
1684 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1685 inet_twsk_put(inet_twsk(sk));
1686 goto discard_it;
1687 }
1688
1689 tcp_v6_fill_cb(skb, hdr, th);
1690
1691 if (tcp_checksum_complete(skb)) {
1692 inet_twsk_put(inet_twsk(sk));
1693 goto csum_error;
1694 }
1695
1696 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1697 case TCP_TW_SYN:
1698 {
1699 struct sock *sk2;
1700
1701 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1702 skb, __tcp_hdrlen(th),
1703 &ipv6_hdr(skb)->saddr, th->source,
1704 &ipv6_hdr(skb)->daddr,
1705 ntohs(th->dest),
1706 tcp_v6_iif_l3_slave(skb),
1707 sdif);
1708 if (sk2) {
1709 struct inet_timewait_sock *tw = inet_twsk(sk);
1710 inet_twsk_deschedule_put(tw);
1711 sk = sk2;
1712 tcp_v6_restore_cb(skb);
1713 refcounted = false;
1714 goto process;
1715 }
1716 }
1717 /* to ACK */
1718 /* fall through */
1719 case TCP_TW_ACK:
1720 tcp_v6_timewait_ack(sk, skb);
1721 break;
1722 case TCP_TW_RST:
1723 tcp_v6_send_reset(sk, skb);
1724 inet_twsk_deschedule_put(inet_twsk(sk));
1725 goto discard_it;
1726 case TCP_TW_SUCCESS:
1727 ;
1728 }
1729 goto discard_it;
1730}
1731
1732void tcp_v6_early_demux(struct sk_buff *skb)
1733{
1734 const struct ipv6hdr *hdr;
1735 const struct tcphdr *th;
1736 struct sock *sk;
1737
1738 if (skb->pkt_type != PACKET_HOST)
1739 return;
1740
1741 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1742 return;
1743
1744 hdr = ipv6_hdr(skb);
1745 th = tcp_hdr(skb);
1746
1747 if (th->doff < sizeof(struct tcphdr) / 4)
1748 return;
1749
1750 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1751 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1752 &hdr->saddr, th->source,
1753 &hdr->daddr, ntohs(th->dest),
1754 inet6_iif(skb), inet6_sdif(skb));
1755 if (sk) {
1756 skb->sk = sk;
1757 skb->destructor = sock_edemux;
1758 if (sk_fullsock(sk)) {
1759 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1760
1761 if (dst)
1762 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1763 if (dst &&
1764 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1765 skb_dst_set_noref(skb, dst);
1766 }
1767 }
1768}
1769
1770static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1771 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1772 .twsk_unique = tcp_twsk_unique,
1773 .twsk_destructor = tcp_twsk_destructor,
1774};
1775
1776static const struct inet_connection_sock_af_ops ipv6_specific = {
1777 .queue_xmit = inet6_csk_xmit,
1778 .send_check = tcp_v6_send_check,
1779 .rebuild_header = inet6_sk_rebuild_header,
1780 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1781 .conn_request = tcp_v6_conn_request,
1782 .syn_recv_sock = tcp_v6_syn_recv_sock,
1783 .net_header_len = sizeof(struct ipv6hdr),
1784 .net_frag_header_len = sizeof(struct frag_hdr),
1785 .setsockopt = ipv6_setsockopt,
1786 .getsockopt = ipv6_getsockopt,
1787 .addr2sockaddr = inet6_csk_addr2sockaddr,
1788 .sockaddr_len = sizeof(struct sockaddr_in6),
1789#ifdef CONFIG_COMPAT
1790 .compat_setsockopt = compat_ipv6_setsockopt,
1791 .compat_getsockopt = compat_ipv6_getsockopt,
1792#endif
1793 .mtu_reduced = tcp_v6_mtu_reduced,
1794};
1795
1796#ifdef CONFIG_TCP_MD5SIG
1797static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1798 .md5_lookup = tcp_v6_md5_lookup,
1799 .calc_md5_hash = tcp_v6_md5_hash_skb,
1800 .md5_parse = tcp_v6_parse_md5_keys,
1801};
1802#endif
1803
1804/*
1805 * TCP over IPv4 via INET6 API
1806 */
1807static const struct inet_connection_sock_af_ops ipv6_mapped = {
1808 .queue_xmit = ip_queue_xmit,
1809 .send_check = tcp_v4_send_check,
1810 .rebuild_header = inet_sk_rebuild_header,
1811 .sk_rx_dst_set = inet_sk_rx_dst_set,
1812 .conn_request = tcp_v6_conn_request,
1813 .syn_recv_sock = tcp_v6_syn_recv_sock,
1814 .net_header_len = sizeof(struct iphdr),
1815 .setsockopt = ipv6_setsockopt,
1816 .getsockopt = ipv6_getsockopt,
1817 .addr2sockaddr = inet6_csk_addr2sockaddr,
1818 .sockaddr_len = sizeof(struct sockaddr_in6),
1819#ifdef CONFIG_COMPAT
1820 .compat_setsockopt = compat_ipv6_setsockopt,
1821 .compat_getsockopt = compat_ipv6_getsockopt,
1822#endif
1823 .mtu_reduced = tcp_v4_mtu_reduced,
1824};
1825
1826#ifdef CONFIG_TCP_MD5SIG
1827static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1828 .md5_lookup = tcp_v4_md5_lookup,
1829 .calc_md5_hash = tcp_v4_md5_hash_skb,
1830 .md5_parse = tcp_v6_parse_md5_keys,
1831};
1832#endif
1833
1834/* NOTE: A lot of things set to zero explicitly by call to
1835 * sk_alloc() so need not be done here.
1836 */
1837static int tcp_v6_init_sock(struct sock *sk)
1838{
1839 struct inet_connection_sock *icsk = inet_csk(sk);
1840
1841 tcp_init_sock(sk);
1842
1843 icsk->icsk_af_ops = &ipv6_specific;
1844
1845#ifdef CONFIG_TCP_MD5SIG
1846 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1847#endif
1848
1849 return 0;
1850}
1851
1852#ifdef CONFIG_PROC_FS
1853/* Proc filesystem TCPv6 sock list dumping. */
1854static void get_openreq6(struct seq_file *seq,
1855 const struct request_sock *req, int i)
1856{
1857 long ttd = req->rsk_timer.expires - jiffies;
1858 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1859 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1860
1861 if (ttd < 0)
1862 ttd = 0;
1863
1864 seq_printf(seq,
1865 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1866 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1867 i,
1868 src->s6_addr32[0], src->s6_addr32[1],
1869 src->s6_addr32[2], src->s6_addr32[3],
1870 inet_rsk(req)->ir_num,
1871 dest->s6_addr32[0], dest->s6_addr32[1],
1872 dest->s6_addr32[2], dest->s6_addr32[3],
1873 ntohs(inet_rsk(req)->ir_rmt_port),
1874 TCP_SYN_RECV,
1875 0, 0, /* could print option size, but that is af dependent. */
1876 1, /* timers active (only the expire timer) */
1877 jiffies_to_clock_t(ttd),
1878 req->num_timeout,
1879 from_kuid_munged(seq_user_ns(seq),
1880 sock_i_uid(req->rsk_listener)),
1881 0, /* non standard timer */
1882 0, /* open_requests have no inode */
1883 0, req);
1884}
1885
1886static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1887{
1888 const struct in6_addr *dest, *src;
1889 __u16 destp, srcp;
1890 int timer_active;
1891 unsigned long timer_expires;
1892 const struct inet_sock *inet = inet_sk(sp);
1893 const struct tcp_sock *tp = tcp_sk(sp);
1894 const struct inet_connection_sock *icsk = inet_csk(sp);
1895 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1896 int rx_queue;
1897 int state;
1898
1899 dest = &sp->sk_v6_daddr;
1900 src = &sp->sk_v6_rcv_saddr;
1901 destp = ntohs(inet->inet_dport);
1902 srcp = ntohs(inet->inet_sport);
1903
1904 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1905 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1906 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1907 timer_active = 1;
1908 timer_expires = icsk->icsk_timeout;
1909 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1910 timer_active = 4;
1911 timer_expires = icsk->icsk_timeout;
1912 } else if (timer_pending(&sp->sk_timer)) {
1913 timer_active = 2;
1914 timer_expires = sp->sk_timer.expires;
1915 } else {
1916 timer_active = 0;
1917 timer_expires = jiffies;
1918 }
1919
1920 state = inet_sk_state_load(sp);
1921 if (state == TCP_LISTEN)
1922 rx_queue = READ_ONCE(sp->sk_ack_backlog);
1923 else
1924 /* Because we don't lock the socket,
1925 * we might find a transient negative value.
1926 */
1927 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1928 READ_ONCE(tp->copied_seq), 0);
1929
1930 seq_printf(seq,
1931 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1932 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1933 i,
1934 src->s6_addr32[0], src->s6_addr32[1],
1935 src->s6_addr32[2], src->s6_addr32[3], srcp,
1936 dest->s6_addr32[0], dest->s6_addr32[1],
1937 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1938 state,
1939 READ_ONCE(tp->write_seq) - tp->snd_una,
1940 rx_queue,
1941 timer_active,
1942 jiffies_delta_to_clock_t(timer_expires - jiffies),
1943 icsk->icsk_retransmits,
1944 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1945 icsk->icsk_probes_out,
1946 sock_i_ino(sp),
1947 refcount_read(&sp->sk_refcnt), sp,
1948 jiffies_to_clock_t(icsk->icsk_rto),
1949 jiffies_to_clock_t(icsk->icsk_ack.ato),
1950 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1951 tp->snd_cwnd,
1952 state == TCP_LISTEN ?
1953 fastopenq->max_qlen :
1954 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1955 );
1956}
1957
1958static void get_timewait6_sock(struct seq_file *seq,
1959 struct inet_timewait_sock *tw, int i)
1960{
1961 long delta = tw->tw_timer.expires - jiffies;
1962 const struct in6_addr *dest, *src;
1963 __u16 destp, srcp;
1964
1965 dest = &tw->tw_v6_daddr;
1966 src = &tw->tw_v6_rcv_saddr;
1967 destp = ntohs(tw->tw_dport);
1968 srcp = ntohs(tw->tw_sport);
1969
1970 seq_printf(seq,
1971 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1972 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1973 i,
1974 src->s6_addr32[0], src->s6_addr32[1],
1975 src->s6_addr32[2], src->s6_addr32[3], srcp,
1976 dest->s6_addr32[0], dest->s6_addr32[1],
1977 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1978 tw->tw_substate, 0, 0,
1979 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1980 refcount_read(&tw->tw_refcnt), tw);
1981}
1982
1983static int tcp6_seq_show(struct seq_file *seq, void *v)
1984{
1985 struct tcp_iter_state *st;
1986 struct sock *sk = v;
1987
1988 if (v == SEQ_START_TOKEN) {
1989 seq_puts(seq,
1990 " sl "
1991 "local_address "
1992 "remote_address "
1993 "st tx_queue rx_queue tr tm->when retrnsmt"
1994 " uid timeout inode\n");
1995 goto out;
1996 }
1997 st = seq->private;
1998
1999 if (sk->sk_state == TCP_TIME_WAIT)
2000 get_timewait6_sock(seq, v, st->num);
2001 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2002 get_openreq6(seq, v, st->num);
2003 else
2004 get_tcp6_sock(seq, v, st->num);
2005out:
2006 return 0;
2007}
2008
2009static const struct seq_operations tcp6_seq_ops = {
2010 .show = tcp6_seq_show,
2011 .start = tcp_seq_start,
2012 .next = tcp_seq_next,
2013 .stop = tcp_seq_stop,
2014};
2015
2016static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2017 .family = AF_INET6,
2018};
2019
2020int __net_init tcp6_proc_init(struct net *net)
2021{
2022 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2023 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2024 return -ENOMEM;
2025 return 0;
2026}
2027
2028void tcp6_proc_exit(struct net *net)
2029{
2030 remove_proc_entry("tcp6", net->proc_net);
2031}
2032#endif
2033
2034struct proto tcpv6_prot = {
2035 .name = "TCPv6",
2036 .owner = THIS_MODULE,
2037 .close = tcp_close,
2038 .pre_connect = tcp_v6_pre_connect,
2039 .connect = tcp_v6_connect,
2040 .disconnect = tcp_disconnect,
2041 .accept = inet_csk_accept,
2042 .ioctl = tcp_ioctl,
2043 .init = tcp_v6_init_sock,
2044 .destroy = tcp_v4_destroy_sock,
2045 .shutdown = tcp_shutdown,
2046 .setsockopt = tcp_setsockopt,
2047 .getsockopt = tcp_getsockopt,
2048 .keepalive = tcp_set_keepalive,
2049 .recvmsg = tcp_recvmsg,
2050 .sendmsg = tcp_sendmsg,
2051 .sendpage = tcp_sendpage,
2052 .backlog_rcv = tcp_v6_do_rcv,
2053 .release_cb = tcp_release_cb,
2054 .hash = inet6_hash,
2055 .unhash = inet_unhash,
2056 .get_port = inet_csk_get_port,
2057 .enter_memory_pressure = tcp_enter_memory_pressure,
2058 .leave_memory_pressure = tcp_leave_memory_pressure,
2059 .stream_memory_free = tcp_stream_memory_free,
2060 .sockets_allocated = &tcp_sockets_allocated,
2061 .memory_allocated = &tcp_memory_allocated,
2062 .memory_pressure = &tcp_memory_pressure,
2063 .orphan_count = &tcp_orphan_count,
2064 .sysctl_mem = sysctl_tcp_mem,
2065 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2066 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2067 .max_header = MAX_TCP_HEADER,
2068 .obj_size = sizeof(struct tcp6_sock),
2069 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2070 .twsk_prot = &tcp6_timewait_sock_ops,
2071 .rsk_prot = &tcp6_request_sock_ops,
2072 .h.hashinfo = &tcp_hashinfo,
2073 .no_autobind = true,
2074#ifdef CONFIG_COMPAT
2075 .compat_setsockopt = compat_tcp_setsockopt,
2076 .compat_getsockopt = compat_tcp_getsockopt,
2077#endif
2078 .diag_destroy = tcp_abort,
2079};
2080
2081static const struct inet6_protocol tcpv6_protocol = {
2082 .handler = tcp_v6_rcv,
2083 .err_handler = tcp_v6_err,
2084 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2085};
2086
2087static struct inet_protosw tcpv6_protosw = {
2088 .type = SOCK_STREAM,
2089 .protocol = IPPROTO_TCP,
2090 .prot = &tcpv6_prot,
2091 .ops = &inet6_stream_ops,
2092 .flags = INET_PROTOSW_PERMANENT |
2093 INET_PROTOSW_ICSK,
2094};
2095
2096static int __net_init tcpv6_net_init(struct net *net)
2097{
2098 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2099 SOCK_RAW, IPPROTO_TCP, net);
2100}
2101
2102static void __net_exit tcpv6_net_exit(struct net *net)
2103{
2104 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2105}
2106
2107static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2108{
2109 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2110}
2111
2112static struct pernet_operations tcpv6_net_ops = {
2113 .init = tcpv6_net_init,
2114 .exit = tcpv6_net_exit,
2115 .exit_batch = tcpv6_net_exit_batch,
2116};
2117
2118int __init tcpv6_init(void)
2119{
2120 int ret;
2121
2122 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2123 if (ret)
2124 goto out;
2125
2126 /* register inet6 protocol */
2127 ret = inet6_register_protosw(&tcpv6_protosw);
2128 if (ret)
2129 goto out_tcpv6_protocol;
2130
2131 ret = register_pernet_subsys(&tcpv6_net_ops);
2132 if (ret)
2133 goto out_tcpv6_protosw;
2134out:
2135 return ret;
2136
2137out_tcpv6_protosw:
2138 inet6_unregister_protosw(&tcpv6_protosw);
2139out_tcpv6_protocol:
2140 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2141 goto out;
2142}
2143
2144void tcpv6_exit(void)
2145{
2146 unregister_pernet_subsys(&tcpv6_net_ops);
2147 inet6_unregister_protosw(&tcpv6_protosw);
2148 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2149}