| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. | 
|  | 3 | * | 
|  | 4 | * This software is available to you under a choice of one of two | 
|  | 5 | * licenses.  You may choose to be licensed under the terms of the GNU | 
|  | 6 | * General Public License (GPL) Version 2, available from the file | 
|  | 7 | * COPYING in the main directory of this source tree, or the | 
|  | 8 | * OpenIB.org BSD license below: | 
|  | 9 | * | 
|  | 10 | *     Redistribution and use in source and binary forms, with or | 
|  | 11 | *     without modification, are permitted provided that the following | 
|  | 12 | *     conditions are met: | 
|  | 13 | * | 
|  | 14 | *      - Redistributions of source code must retain the above | 
|  | 15 | *        copyright notice, this list of conditions and the following | 
|  | 16 | *        disclaimer. | 
|  | 17 | * | 
|  | 18 | *      - Redistributions in binary form must reproduce the above | 
|  | 19 | *        copyright notice, this list of conditions and the following | 
|  | 20 | *        disclaimer in the documentation and/or other materials | 
|  | 21 | *        provided with the distribution. | 
|  | 22 | * | 
|  | 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|  | 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
|  | 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|  | 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
|  | 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
|  | 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|  | 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | 30 | * SOFTWARE. | 
|  | 31 | * | 
|  | 32 | */ | 
|  | 33 | #include <linux/kernel.h> | 
|  | 34 | #include <linux/slab.h> | 
|  | 35 | #include <linux/in.h> | 
|  | 36 | #include <net/net_namespace.h> | 
|  | 37 | #include <net/netns/generic.h> | 
|  | 38 | #include <linux/ipv6.h> | 
|  | 39 |  | 
|  | 40 | #include "rds_single_path.h" | 
|  | 41 | #include "rds.h" | 
|  | 42 | #include "loop.h" | 
|  | 43 |  | 
|  | 44 | static DEFINE_SPINLOCK(loop_conns_lock); | 
|  | 45 | static LIST_HEAD(loop_conns); | 
|  | 46 | static atomic_t rds_loop_unloading = ATOMIC_INIT(0); | 
|  | 47 |  | 
|  | 48 | static void rds_loop_set_unloading(void) | 
|  | 49 | { | 
|  | 50 | atomic_set(&rds_loop_unloading, 1); | 
|  | 51 | } | 
|  | 52 |  | 
|  | 53 | static bool rds_loop_is_unloading(struct rds_connection *conn) | 
|  | 54 | { | 
|  | 55 | return atomic_read(&rds_loop_unloading) != 0; | 
|  | 56 | } | 
|  | 57 |  | 
|  | 58 | /* | 
|  | 59 | * This 'loopback' transport is a special case for flows that originate | 
|  | 60 | * and terminate on the same machine. | 
|  | 61 | * | 
|  | 62 | * Connection build-up notices if the destination address is thought of | 
|  | 63 | * as a local address by a transport.  At that time it decides to use the | 
|  | 64 | * loopback transport instead of the bound transport of the sending socket. | 
|  | 65 | * | 
|  | 66 | * The loopback transport's sending path just hands the sent rds_message | 
|  | 67 | * straight to the receiving path via an embedded rds_incoming. | 
|  | 68 | */ | 
|  | 69 |  | 
|  | 70 | /* | 
|  | 71 | * Usually a message transits both the sender and receiver's conns as it | 
|  | 72 | * flows to the receiver.  In the loopback case, though, the receive path | 
|  | 73 | * is handed the sending conn so the sense of the addresses is reversed. | 
|  | 74 | */ | 
|  | 75 | static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, | 
|  | 76 | unsigned int hdr_off, unsigned int sg, | 
|  | 77 | unsigned int off) | 
|  | 78 | { | 
|  | 79 | struct scatterlist *sgp = &rm->data.op_sg[sg]; | 
|  | 80 | int ret = sizeof(struct rds_header) + | 
|  | 81 | be32_to_cpu(rm->m_inc.i_hdr.h_len); | 
|  | 82 |  | 
|  | 83 | /* Do not send cong updates to loopback */ | 
|  | 84 | if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { | 
|  | 85 | rds_cong_map_updated(conn->c_fcong, ~(u64) 0); | 
|  | 86 | ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); | 
|  | 87 | goto out; | 
|  | 88 | } | 
|  | 89 |  | 
|  | 90 | BUG_ON(hdr_off || sg || off); | 
|  | 91 |  | 
|  | 92 | rds_inc_init(&rm->m_inc, conn, &conn->c_laddr); | 
|  | 93 | /* For the embedded inc. Matching put is in loop_inc_free() */ | 
|  | 94 | rds_message_addref(rm); | 
|  | 95 |  | 
|  | 96 | rds_recv_incoming(conn, &conn->c_laddr, &conn->c_faddr, &rm->m_inc, | 
|  | 97 | GFP_KERNEL); | 
|  | 98 |  | 
|  | 99 | rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), | 
|  | 100 | NULL); | 
|  | 101 |  | 
|  | 102 | rds_inc_put(&rm->m_inc); | 
|  | 103 | out: | 
|  | 104 | return ret; | 
|  | 105 | } | 
|  | 106 |  | 
|  | 107 | /* | 
|  | 108 | * See rds_loop_xmit(). Since our inc is embedded in the rm, we | 
|  | 109 | * make sure the rm lives at least until the inc is done. | 
|  | 110 | */ | 
|  | 111 | static void rds_loop_inc_free(struct rds_incoming *inc) | 
|  | 112 | { | 
|  | 113 | struct rds_message *rm = container_of(inc, struct rds_message, m_inc); | 
|  | 114 |  | 
|  | 115 | rds_message_put(rm); | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | /* we need to at least give the thread something to succeed */ | 
|  | 119 | static int rds_loop_recv_path(struct rds_conn_path *cp) | 
|  | 120 | { | 
|  | 121 | return 0; | 
|  | 122 | } | 
|  | 123 |  | 
|  | 124 | struct rds_loop_connection { | 
|  | 125 | struct list_head loop_node; | 
|  | 126 | struct rds_connection *conn; | 
|  | 127 | }; | 
|  | 128 |  | 
|  | 129 | /* | 
|  | 130 | * Even the loopback transport needs to keep track of its connections, | 
|  | 131 | * so it can call rds_conn_destroy() on them on exit. N.B. there are | 
|  | 132 | * 1+ loopback addresses (127.*.*.*) so it's not a bug to have | 
|  | 133 | * multiple loopback conns allocated, although rather useless. | 
|  | 134 | */ | 
|  | 135 | static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) | 
|  | 136 | { | 
|  | 137 | struct rds_loop_connection *lc; | 
|  | 138 | unsigned long flags; | 
|  | 139 |  | 
|  | 140 | lc = kzalloc(sizeof(struct rds_loop_connection), gfp); | 
|  | 141 | if (!lc) | 
|  | 142 | return -ENOMEM; | 
|  | 143 |  | 
|  | 144 | INIT_LIST_HEAD(&lc->loop_node); | 
|  | 145 | lc->conn = conn; | 
|  | 146 | conn->c_transport_data = lc; | 
|  | 147 |  | 
|  | 148 | spin_lock_irqsave(&loop_conns_lock, flags); | 
|  | 149 | list_add_tail(&lc->loop_node, &loop_conns); | 
|  | 150 | spin_unlock_irqrestore(&loop_conns_lock, flags); | 
|  | 151 |  | 
|  | 152 | return 0; | 
|  | 153 | } | 
|  | 154 |  | 
|  | 155 | static void rds_loop_conn_free(void *arg) | 
|  | 156 | { | 
|  | 157 | struct rds_loop_connection *lc = arg; | 
|  | 158 | unsigned long flags; | 
|  | 159 |  | 
|  | 160 | rdsdebug("lc %p\n", lc); | 
|  | 161 | spin_lock_irqsave(&loop_conns_lock, flags); | 
|  | 162 | list_del(&lc->loop_node); | 
|  | 163 | spin_unlock_irqrestore(&loop_conns_lock, flags); | 
|  | 164 | kfree(lc); | 
|  | 165 | } | 
|  | 166 |  | 
|  | 167 | static int rds_loop_conn_path_connect(struct rds_conn_path *cp) | 
|  | 168 | { | 
|  | 169 | rds_connect_complete(cp->cp_conn); | 
|  | 170 | return 0; | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp) | 
|  | 174 | { | 
|  | 175 | } | 
|  | 176 |  | 
|  | 177 | void rds_loop_exit(void) | 
|  | 178 | { | 
|  | 179 | struct rds_loop_connection *lc, *_lc; | 
|  | 180 | LIST_HEAD(tmp_list); | 
|  | 181 |  | 
|  | 182 | rds_loop_set_unloading(); | 
|  | 183 | synchronize_rcu(); | 
|  | 184 | /* avoid calling conn_destroy with irqs off */ | 
|  | 185 | spin_lock_irq(&loop_conns_lock); | 
|  | 186 | list_splice(&loop_conns, &tmp_list); | 
|  | 187 | INIT_LIST_HEAD(&loop_conns); | 
|  | 188 | spin_unlock_irq(&loop_conns_lock); | 
|  | 189 |  | 
|  | 190 | list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { | 
|  | 191 | WARN_ON(lc->conn->c_passive); | 
|  | 192 | rds_conn_destroy(lc->conn); | 
|  | 193 | } | 
|  | 194 | } | 
|  | 195 |  | 
|  | 196 | static void rds_loop_kill_conns(struct net *net) | 
|  | 197 | { | 
|  | 198 | struct rds_loop_connection *lc, *_lc; | 
|  | 199 | LIST_HEAD(tmp_list); | 
|  | 200 |  | 
|  | 201 | spin_lock_irq(&loop_conns_lock); | 
|  | 202 | list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node)  { | 
|  | 203 | struct net *c_net = read_pnet(&lc->conn->c_net); | 
|  | 204 |  | 
|  | 205 | if (net != c_net) | 
|  | 206 | continue; | 
|  | 207 | list_move_tail(&lc->loop_node, &tmp_list); | 
|  | 208 | } | 
|  | 209 | spin_unlock_irq(&loop_conns_lock); | 
|  | 210 |  | 
|  | 211 | list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { | 
|  | 212 | WARN_ON(lc->conn->c_passive); | 
|  | 213 | rds_conn_destroy(lc->conn); | 
|  | 214 | } | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | static void __net_exit rds_loop_exit_net(struct net *net) | 
|  | 218 | { | 
|  | 219 | rds_loop_kill_conns(net); | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | static struct pernet_operations rds_loop_net_ops = { | 
|  | 223 | .exit = rds_loop_exit_net, | 
|  | 224 | }; | 
|  | 225 |  | 
|  | 226 | int rds_loop_net_init(void) | 
|  | 227 | { | 
|  | 228 | return register_pernet_device(&rds_loop_net_ops); | 
|  | 229 | } | 
|  | 230 |  | 
|  | 231 | void rds_loop_net_exit(void) | 
|  | 232 | { | 
|  | 233 | unregister_pernet_device(&rds_loop_net_ops); | 
|  | 234 | } | 
|  | 235 |  | 
|  | 236 | /* | 
|  | 237 | * This is missing .xmit_* because loop doesn't go through generic | 
|  | 238 | * rds_send_xmit() and doesn't call rds_recv_incoming().  .listen_stop and | 
|  | 239 | * .laddr_check are missing because transport.c doesn't iterate over | 
|  | 240 | * rds_loop_transport. | 
|  | 241 | */ | 
|  | 242 | struct rds_transport rds_loop_transport = { | 
|  | 243 | .xmit			= rds_loop_xmit, | 
|  | 244 | .recv_path		= rds_loop_recv_path, | 
|  | 245 | .conn_alloc		= rds_loop_conn_alloc, | 
|  | 246 | .conn_free		= rds_loop_conn_free, | 
|  | 247 | .conn_path_connect	= rds_loop_conn_path_connect, | 
|  | 248 | .conn_path_shutdown	= rds_loop_conn_path_shutdown, | 
|  | 249 | .inc_copy_to_user	= rds_message_inc_copy_to_user, | 
|  | 250 | .inc_free		= rds_loop_inc_free, | 
|  | 251 | .t_name			= "loopback", | 
|  | 252 | .t_type			= RDS_TRANS_LOOP, | 
|  | 253 | .t_unloading		= rds_loop_is_unloading, | 
|  | 254 | }; |