b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 4 | * |
| 5 | * Socket Closing - normal and abnormal |
| 6 | * |
| 7 | * Copyright IBM Corp. 2016 |
| 8 | * |
| 9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 10 | */ |
| 11 | |
| 12 | #include <linux/workqueue.h> |
| 13 | #include <linux/sched/signal.h> |
| 14 | |
| 15 | #include <net/sock.h> |
| 16 | |
| 17 | #include "smc.h" |
| 18 | #include "smc_tx.h" |
| 19 | #include "smc_cdc.h" |
| 20 | #include "smc_close.h" |
| 21 | |
| 22 | #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) |
| 23 | |
| 24 | /* release the clcsock that is assigned to the smc_sock */ |
| 25 | void smc_clcsock_release(struct smc_sock *smc) |
| 26 | { |
| 27 | struct socket *tcp; |
| 28 | |
| 29 | if (smc->listen_smc && current_work() != &smc->smc_listen_work) |
| 30 | cancel_work_sync(&smc->smc_listen_work); |
| 31 | mutex_lock(&smc->clcsock_release_lock); |
| 32 | if (smc->clcsock) { |
| 33 | tcp = smc->clcsock; |
| 34 | smc->clcsock = NULL; |
| 35 | sock_release(tcp); |
| 36 | } |
| 37 | mutex_unlock(&smc->clcsock_release_lock); |
| 38 | } |
| 39 | |
| 40 | static void smc_close_cleanup_listen(struct sock *parent) |
| 41 | { |
| 42 | struct sock *sk; |
| 43 | |
| 44 | /* Close non-accepted connections */ |
| 45 | while ((sk = smc_accept_dequeue(parent, NULL))) |
| 46 | smc_close_non_accepted(sk); |
| 47 | } |
| 48 | |
| 49 | /* wait for sndbuf data being transmitted */ |
| 50 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
| 51 | { |
| 52 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 53 | struct sock *sk = &smc->sk; |
| 54 | |
| 55 | if (!timeout) |
| 56 | return; |
| 57 | |
| 58 | if (!smc_tx_prepared_sends(&smc->conn)) |
| 59 | return; |
| 60 | |
| 61 | smc->wait_close_tx_prepared = 1; |
| 62 | add_wait_queue(sk_sleep(sk), &wait); |
| 63 | while (!signal_pending(current) && timeout) { |
| 64 | int rc; |
| 65 | |
| 66 | rc = sk_wait_event(sk, &timeout, |
| 67 | !smc_tx_prepared_sends(&smc->conn) || |
| 68 | (sk->sk_err == ECONNABORTED) || |
| 69 | (sk->sk_err == ECONNRESET), |
| 70 | &wait); |
| 71 | if (rc) |
| 72 | break; |
| 73 | } |
| 74 | remove_wait_queue(sk_sleep(sk), &wait); |
| 75 | smc->wait_close_tx_prepared = 0; |
| 76 | } |
| 77 | |
| 78 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
| 79 | { |
| 80 | if (smc->wait_close_tx_prepared) |
| 81 | /* wake up socket closing */ |
| 82 | smc->sk.sk_state_change(&smc->sk); |
| 83 | } |
| 84 | |
| 85 | static int smc_close_wr(struct smc_connection *conn) |
| 86 | { |
| 87 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
| 88 | |
| 89 | return smc_cdc_get_slot_and_msg_send(conn); |
| 90 | } |
| 91 | |
| 92 | static int smc_close_final(struct smc_connection *conn) |
| 93 | { |
| 94 | if (atomic_read(&conn->bytes_to_rcv)) |
| 95 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 96 | else |
| 97 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
| 98 | |
| 99 | return smc_cdc_get_slot_and_msg_send(conn); |
| 100 | } |
| 101 | |
| 102 | static int smc_close_abort(struct smc_connection *conn) |
| 103 | { |
| 104 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
| 105 | |
| 106 | return smc_cdc_get_slot_and_msg_send(conn); |
| 107 | } |
| 108 | |
| 109 | /* terminate smc socket abnormally - active abort |
| 110 | * link group is terminated, i.e. RDMA communication no longer possible |
| 111 | */ |
| 112 | static void smc_close_active_abort(struct smc_sock *smc) |
| 113 | { |
| 114 | struct sock *sk = &smc->sk; |
| 115 | |
| 116 | struct smc_cdc_conn_state_flags *txflags = |
| 117 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 118 | |
| 119 | if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { |
| 120 | sk->sk_err = ECONNABORTED; |
| 121 | if (smc->clcsock && smc->clcsock->sk) { |
| 122 | smc->clcsock->sk->sk_err = ECONNABORTED; |
| 123 | smc->clcsock->sk->sk_state_change(smc->clcsock->sk); |
| 124 | } |
| 125 | } |
| 126 | switch (sk->sk_state) { |
| 127 | case SMC_ACTIVE: |
| 128 | sk->sk_state = SMC_PEERABORTWAIT; |
| 129 | release_sock(sk); |
| 130 | cancel_delayed_work_sync(&smc->conn.tx_work); |
| 131 | lock_sock(sk); |
| 132 | sock_put(sk); /* passive closing */ |
| 133 | break; |
| 134 | case SMC_APPCLOSEWAIT1: |
| 135 | case SMC_APPCLOSEWAIT2: |
| 136 | if (!smc_cdc_rxed_any_close(&smc->conn)) |
| 137 | sk->sk_state = SMC_PEERABORTWAIT; |
| 138 | else |
| 139 | sk->sk_state = SMC_CLOSED; |
| 140 | release_sock(sk); |
| 141 | cancel_delayed_work_sync(&smc->conn.tx_work); |
| 142 | lock_sock(sk); |
| 143 | break; |
| 144 | case SMC_PEERCLOSEWAIT1: |
| 145 | case SMC_PEERCLOSEWAIT2: |
| 146 | if (!txflags->peer_conn_closed) { |
| 147 | /* just SHUTDOWN_SEND done */ |
| 148 | sk->sk_state = SMC_PEERABORTWAIT; |
| 149 | } else { |
| 150 | sk->sk_state = SMC_CLOSED; |
| 151 | } |
| 152 | sock_put(sk); /* passive closing */ |
| 153 | break; |
| 154 | case SMC_PROCESSABORT: |
| 155 | case SMC_APPFINCLOSEWAIT: |
| 156 | sk->sk_state = SMC_CLOSED; |
| 157 | break; |
| 158 | case SMC_PEERFINCLOSEWAIT: |
| 159 | sock_put(sk); /* passive closing */ |
| 160 | break; |
| 161 | case SMC_INIT: |
| 162 | case SMC_PEERABORTWAIT: |
| 163 | case SMC_CLOSED: |
| 164 | break; |
| 165 | } |
| 166 | |
| 167 | smc_sock_set_flag(sk, SOCK_DEAD); |
| 168 | sk->sk_state_change(sk); |
| 169 | } |
| 170 | |
| 171 | static inline bool smc_close_sent_any_close(struct smc_connection *conn) |
| 172 | { |
| 173 | return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || |
| 174 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; |
| 175 | } |
| 176 | |
| 177 | int smc_close_active(struct smc_sock *smc) |
| 178 | { |
| 179 | struct smc_cdc_conn_state_flags *txflags = |
| 180 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 181 | struct smc_connection *conn = &smc->conn; |
| 182 | struct sock *sk = &smc->sk; |
| 183 | int old_state; |
| 184 | long timeout; |
| 185 | int rc = 0; |
| 186 | int rc1 = 0; |
| 187 | |
| 188 | timeout = current->flags & PF_EXITING ? |
| 189 | 0 : sock_flag(sk, SOCK_LINGER) ? |
| 190 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 191 | |
| 192 | old_state = sk->sk_state; |
| 193 | again: |
| 194 | switch (sk->sk_state) { |
| 195 | case SMC_INIT: |
| 196 | sk->sk_state = SMC_CLOSED; |
| 197 | break; |
| 198 | case SMC_LISTEN: |
| 199 | sk->sk_state = SMC_CLOSED; |
| 200 | sk->sk_state_change(sk); /* wake up accept */ |
| 201 | if (smc->clcsock && smc->clcsock->sk) { |
| 202 | rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); |
| 203 | /* wake up kernel_accept of smc_tcp_listen_worker */ |
| 204 | smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); |
| 205 | } |
| 206 | smc_close_cleanup_listen(sk); |
| 207 | release_sock(sk); |
| 208 | flush_work(&smc->tcp_listen_work); |
| 209 | lock_sock(sk); |
| 210 | break; |
| 211 | case SMC_ACTIVE: |
| 212 | smc_close_stream_wait(smc, timeout); |
| 213 | release_sock(sk); |
| 214 | cancel_delayed_work_sync(&conn->tx_work); |
| 215 | lock_sock(sk); |
| 216 | if (sk->sk_state == SMC_ACTIVE) { |
| 217 | /* send close request */ |
| 218 | rc = smc_close_final(conn); |
| 219 | if (rc) |
| 220 | break; |
| 221 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 222 | |
| 223 | /* actively shutdown clcsock before peer close it, |
| 224 | * prevent peer from entering TIME_WAIT state. |
| 225 | */ |
| 226 | if (smc->clcsock && smc->clcsock->sk) { |
| 227 | rc1 = kernel_sock_shutdown(smc->clcsock, |
| 228 | SHUT_RDWR); |
| 229 | rc = rc ? rc : rc1; |
| 230 | } |
| 231 | } else { |
| 232 | /* peer event has changed the state */ |
| 233 | goto again; |
| 234 | } |
| 235 | break; |
| 236 | case SMC_APPFINCLOSEWAIT: |
| 237 | /* socket already shutdown wr or both (active close) */ |
| 238 | if (txflags->peer_done_writing && |
| 239 | !smc_close_sent_any_close(conn)) { |
| 240 | /* just shutdown wr done, send close request */ |
| 241 | rc = smc_close_final(conn); |
| 242 | if (rc) |
| 243 | break; |
| 244 | } |
| 245 | sk->sk_state = SMC_CLOSED; |
| 246 | break; |
| 247 | case SMC_APPCLOSEWAIT1: |
| 248 | case SMC_APPCLOSEWAIT2: |
| 249 | if (!smc_cdc_rxed_any_close(conn)) |
| 250 | smc_close_stream_wait(smc, timeout); |
| 251 | release_sock(sk); |
| 252 | cancel_delayed_work_sync(&conn->tx_work); |
| 253 | lock_sock(sk); |
| 254 | if (sk->sk_state != SMC_APPCLOSEWAIT1 && |
| 255 | sk->sk_state != SMC_APPCLOSEWAIT2) |
| 256 | goto again; |
| 257 | /* confirm close from peer */ |
| 258 | rc = smc_close_final(conn); |
| 259 | if (rc) |
| 260 | break; |
| 261 | if (smc_cdc_rxed_any_close(conn)) { |
| 262 | /* peer has closed the socket already */ |
| 263 | sk->sk_state = SMC_CLOSED; |
| 264 | sock_put(sk); /* postponed passive closing */ |
| 265 | } else { |
| 266 | /* peer has just issued a shutdown write */ |
| 267 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
| 268 | } |
| 269 | break; |
| 270 | case SMC_PEERCLOSEWAIT1: |
| 271 | case SMC_PEERCLOSEWAIT2: |
| 272 | if (txflags->peer_done_writing && |
| 273 | !smc_close_sent_any_close(conn)) { |
| 274 | /* just shutdown wr done, send close request */ |
| 275 | rc = smc_close_final(conn); |
| 276 | if (rc) |
| 277 | break; |
| 278 | } |
| 279 | /* peer sending PeerConnectionClosed will cause transition */ |
| 280 | break; |
| 281 | case SMC_PEERFINCLOSEWAIT: |
| 282 | /* peer sending PeerConnectionClosed will cause transition */ |
| 283 | break; |
| 284 | case SMC_PROCESSABORT: |
| 285 | smc_close_abort(conn); |
| 286 | sk->sk_state = SMC_CLOSED; |
| 287 | break; |
| 288 | case SMC_PEERABORTWAIT: |
| 289 | case SMC_CLOSED: |
| 290 | /* nothing to do, add tracing in future patch */ |
| 291 | break; |
| 292 | } |
| 293 | |
| 294 | if (old_state != sk->sk_state) |
| 295 | sk->sk_state_change(sk); |
| 296 | return rc; |
| 297 | } |
| 298 | |
| 299 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
| 300 | { |
| 301 | struct smc_cdc_conn_state_flags *txflags = |
| 302 | &smc->conn.local_tx_ctrl.conn_state_flags; |
| 303 | struct sock *sk = &smc->sk; |
| 304 | |
| 305 | switch (sk->sk_state) { |
| 306 | case SMC_INIT: |
| 307 | case SMC_ACTIVE: |
| 308 | case SMC_APPCLOSEWAIT1: |
| 309 | sk->sk_state = SMC_PROCESSABORT; |
| 310 | sock_put(sk); /* passive closing */ |
| 311 | break; |
| 312 | case SMC_APPFINCLOSEWAIT: |
| 313 | sk->sk_state = SMC_PROCESSABORT; |
| 314 | break; |
| 315 | case SMC_PEERCLOSEWAIT1: |
| 316 | case SMC_PEERCLOSEWAIT2: |
| 317 | if (txflags->peer_done_writing && |
| 318 | !smc_close_sent_any_close(&smc->conn)) |
| 319 | /* just shutdown, but not yet closed locally */ |
| 320 | sk->sk_state = SMC_PROCESSABORT; |
| 321 | else |
| 322 | sk->sk_state = SMC_CLOSED; |
| 323 | sock_put(sk); /* passive closing */ |
| 324 | break; |
| 325 | case SMC_APPCLOSEWAIT2: |
| 326 | case SMC_PEERFINCLOSEWAIT: |
| 327 | sk->sk_state = SMC_CLOSED; |
| 328 | sock_put(sk); /* passive closing */ |
| 329 | break; |
| 330 | case SMC_PEERABORTWAIT: |
| 331 | sk->sk_state = SMC_CLOSED; |
| 332 | break; |
| 333 | case SMC_PROCESSABORT: |
| 334 | /* nothing to do, add tracing in future patch */ |
| 335 | break; |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | /* Either some kind of closing has been received: peer_conn_closed, |
| 340 | * peer_conn_abort, or peer_done_writing |
| 341 | * or the link group of the connection terminates abnormally. |
| 342 | */ |
| 343 | static void smc_close_passive_work(struct work_struct *work) |
| 344 | { |
| 345 | struct smc_connection *conn = container_of(work, |
| 346 | struct smc_connection, |
| 347 | close_work); |
| 348 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
| 349 | struct smc_cdc_conn_state_flags *rxflags; |
| 350 | bool release_clcsock = false; |
| 351 | struct sock *sk = &smc->sk; |
| 352 | int old_state; |
| 353 | |
| 354 | lock_sock(sk); |
| 355 | old_state = sk->sk_state; |
| 356 | |
| 357 | if (!conn->alert_token_local) { |
| 358 | /* abnormal termination */ |
| 359 | smc_close_active_abort(smc); |
| 360 | goto wakeup; |
| 361 | } |
| 362 | |
| 363 | rxflags = &conn->local_rx_ctrl.conn_state_flags; |
| 364 | if (rxflags->peer_conn_abort) { |
| 365 | /* peer has not received all data */ |
| 366 | smc_close_passive_abort_received(smc); |
| 367 | release_sock(&smc->sk); |
| 368 | cancel_delayed_work_sync(&conn->tx_work); |
| 369 | lock_sock(&smc->sk); |
| 370 | goto wakeup; |
| 371 | } |
| 372 | |
| 373 | switch (sk->sk_state) { |
| 374 | case SMC_INIT: |
| 375 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 376 | break; |
| 377 | case SMC_ACTIVE: |
| 378 | sk->sk_state = SMC_APPCLOSEWAIT1; |
| 379 | /* postpone sock_put() for passive closing to cover |
| 380 | * received SEND_SHUTDOWN as well |
| 381 | */ |
| 382 | break; |
| 383 | case SMC_PEERCLOSEWAIT1: |
| 384 | if (rxflags->peer_done_writing) |
| 385 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
| 386 | /* fall through */ |
| 387 | /* to check for closing */ |
| 388 | case SMC_PEERCLOSEWAIT2: |
| 389 | if (!smc_cdc_rxed_any_close(conn)) |
| 390 | break; |
| 391 | if (sock_flag(sk, SOCK_DEAD) && |
| 392 | smc_close_sent_any_close(conn)) { |
| 393 | /* smc_release has already been called locally */ |
| 394 | sk->sk_state = SMC_CLOSED; |
| 395 | } else { |
| 396 | /* just shutdown, but not yet closed locally */ |
| 397 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
| 398 | } |
| 399 | sock_put(sk); /* passive closing */ |
| 400 | break; |
| 401 | case SMC_PEERFINCLOSEWAIT: |
| 402 | if (smc_cdc_rxed_any_close(conn)) { |
| 403 | sk->sk_state = SMC_CLOSED; |
| 404 | sock_put(sk); /* passive closing */ |
| 405 | } |
| 406 | break; |
| 407 | case SMC_APPCLOSEWAIT1: |
| 408 | case SMC_APPCLOSEWAIT2: |
| 409 | /* postpone sock_put() for passive closing to cover |
| 410 | * received SEND_SHUTDOWN as well |
| 411 | */ |
| 412 | break; |
| 413 | case SMC_APPFINCLOSEWAIT: |
| 414 | case SMC_PEERABORTWAIT: |
| 415 | case SMC_PROCESSABORT: |
| 416 | case SMC_CLOSED: |
| 417 | /* nothing to do, add tracing in future patch */ |
| 418 | break; |
| 419 | } |
| 420 | |
| 421 | wakeup: |
| 422 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
| 423 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
| 424 | |
| 425 | if (old_state != sk->sk_state) { |
| 426 | sk->sk_state_change(sk); |
| 427 | if ((sk->sk_state == SMC_CLOSED) && |
| 428 | (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { |
| 429 | smc_conn_free(conn); |
| 430 | if (smc->clcsock) |
| 431 | release_clcsock = true; |
| 432 | } |
| 433 | } |
| 434 | release_sock(sk); |
| 435 | if (release_clcsock) |
| 436 | smc_clcsock_release(smc); |
| 437 | sock_put(sk); /* sock_hold done by schedulers of close_work */ |
| 438 | } |
| 439 | |
| 440 | int smc_close_shutdown_write(struct smc_sock *smc) |
| 441 | { |
| 442 | struct smc_connection *conn = &smc->conn; |
| 443 | struct sock *sk = &smc->sk; |
| 444 | int old_state; |
| 445 | long timeout; |
| 446 | int rc = 0; |
| 447 | |
| 448 | timeout = current->flags & PF_EXITING ? |
| 449 | 0 : sock_flag(sk, SOCK_LINGER) ? |
| 450 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
| 451 | |
| 452 | old_state = sk->sk_state; |
| 453 | again: |
| 454 | switch (sk->sk_state) { |
| 455 | case SMC_ACTIVE: |
| 456 | smc_close_stream_wait(smc, timeout); |
| 457 | release_sock(sk); |
| 458 | cancel_delayed_work_sync(&conn->tx_work); |
| 459 | lock_sock(sk); |
| 460 | if (sk->sk_state != SMC_ACTIVE) |
| 461 | goto again; |
| 462 | /* send close wr request */ |
| 463 | rc = smc_close_wr(conn); |
| 464 | if (rc) |
| 465 | break; |
| 466 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
| 467 | break; |
| 468 | case SMC_APPCLOSEWAIT1: |
| 469 | /* passive close */ |
| 470 | if (!smc_cdc_rxed_any_close(conn)) |
| 471 | smc_close_stream_wait(smc, timeout); |
| 472 | release_sock(sk); |
| 473 | cancel_delayed_work_sync(&conn->tx_work); |
| 474 | lock_sock(sk); |
| 475 | if (sk->sk_state != SMC_APPCLOSEWAIT1) |
| 476 | goto again; |
| 477 | /* confirm close from peer */ |
| 478 | rc = smc_close_wr(conn); |
| 479 | if (rc) |
| 480 | break; |
| 481 | sk->sk_state = SMC_APPCLOSEWAIT2; |
| 482 | break; |
| 483 | case SMC_APPCLOSEWAIT2: |
| 484 | case SMC_PEERFINCLOSEWAIT: |
| 485 | case SMC_PEERCLOSEWAIT1: |
| 486 | case SMC_PEERCLOSEWAIT2: |
| 487 | case SMC_APPFINCLOSEWAIT: |
| 488 | case SMC_PROCESSABORT: |
| 489 | case SMC_PEERABORTWAIT: |
| 490 | /* nothing to do, add tracing in future patch */ |
| 491 | break; |
| 492 | } |
| 493 | |
| 494 | if (old_state != sk->sk_state) |
| 495 | sk->sk_state_change(sk); |
| 496 | return rc; |
| 497 | } |
| 498 | |
| 499 | /* Initialize close properties on connection establishment. */ |
| 500 | void smc_close_init(struct smc_sock *smc) |
| 501 | { |
| 502 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); |
| 503 | } |