xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* |
| 2 | * net/sched/act_mirred.c packet mirroring and redirect actions |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation; either version |
| 7 | * 2 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * Authors: Jamal Hadi Salim (2002-4) |
| 10 | * |
| 11 | * TODO: Add ingress support (and socket redirect support) |
| 12 | * |
| 13 | */ |
| 14 | |
| 15 | #include <linux/types.h> |
| 16 | #include <linux/kernel.h> |
| 17 | #include <linux/string.h> |
| 18 | #include <linux/errno.h> |
| 19 | #include <linux/skbuff.h> |
| 20 | #include <linux/rtnetlink.h> |
| 21 | #include <linux/module.h> |
| 22 | #include <linux/init.h> |
| 23 | #include <linux/gfp.h> |
| 24 | #include <linux/if_arp.h> |
| 25 | #include <net/net_namespace.h> |
| 26 | #include <net/netlink.h> |
| 27 | #include <net/pkt_sched.h> |
| 28 | #include <net/pkt_cls.h> |
| 29 | #include <linux/tc_act/tc_mirred.h> |
| 30 | #include <net/tc_act/tc_mirred.h> |
| 31 | |
| 32 | static LIST_HEAD(mirred_list); |
| 33 | static DEFINE_SPINLOCK(mirred_list_lock); |
| 34 | |
| 35 | static bool tcf_mirred_is_act_redirect(int action) |
| 36 | { |
| 37 | return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; |
| 38 | } |
| 39 | |
| 40 | static bool tcf_mirred_act_wants_ingress(int action) |
| 41 | { |
| 42 | switch (action) { |
| 43 | case TCA_EGRESS_REDIR: |
| 44 | case TCA_EGRESS_MIRROR: |
| 45 | return false; |
| 46 | case TCA_INGRESS_REDIR: |
| 47 | case TCA_INGRESS_MIRROR: |
| 48 | return true; |
| 49 | default: |
| 50 | BUG(); |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | static bool tcf_mirred_can_reinsert(int action) |
| 55 | { |
| 56 | switch (action) { |
| 57 | case TC_ACT_SHOT: |
| 58 | case TC_ACT_STOLEN: |
| 59 | case TC_ACT_QUEUED: |
| 60 | case TC_ACT_TRAP: |
| 61 | return true; |
| 62 | } |
| 63 | return false; |
| 64 | } |
| 65 | |
| 66 | static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m) |
| 67 | { |
| 68 | return rcu_dereference_protected(m->tcfm_dev, |
| 69 | lockdep_is_held(&m->tcf_lock)); |
| 70 | } |
| 71 | |
| 72 | static void tcf_mirred_release(struct tc_action *a) |
| 73 | { |
| 74 | struct tcf_mirred *m = to_mirred(a); |
| 75 | struct net_device *dev; |
| 76 | |
| 77 | spin_lock(&mirred_list_lock); |
| 78 | list_del(&m->tcfm_list); |
| 79 | spin_unlock(&mirred_list_lock); |
| 80 | |
| 81 | /* last reference to action, no need to lock */ |
| 82 | dev = rcu_dereference_protected(m->tcfm_dev, 1); |
| 83 | if (dev) |
| 84 | dev_put(dev); |
| 85 | } |
| 86 | |
| 87 | static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { |
| 88 | [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, |
| 89 | }; |
| 90 | |
| 91 | static unsigned int mirred_net_id; |
| 92 | static struct tc_action_ops act_mirred_ops; |
| 93 | |
| 94 | static int tcf_mirred_init(struct net *net, struct nlattr *nla, |
| 95 | struct nlattr *est, struct tc_action **a, |
| 96 | int ovr, int bind, bool rtnl_held, |
| 97 | struct netlink_ext_ack *extack) |
| 98 | { |
| 99 | struct tc_action_net *tn = net_generic(net, mirred_net_id); |
| 100 | struct nlattr *tb[TCA_MIRRED_MAX + 1]; |
| 101 | bool mac_header_xmit = false; |
| 102 | struct tc_mirred *parm; |
| 103 | struct tcf_mirred *m; |
| 104 | struct net_device *dev; |
| 105 | bool exists = false; |
| 106 | int ret, err; |
| 107 | u32 index; |
| 108 | |
| 109 | if (!nla) { |
| 110 | NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); |
| 111 | return -EINVAL; |
| 112 | } |
| 113 | ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); |
| 114 | if (ret < 0) |
| 115 | return ret; |
| 116 | if (!tb[TCA_MIRRED_PARMS]) { |
| 117 | NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); |
| 118 | return -EINVAL; |
| 119 | } |
| 120 | parm = nla_data(tb[TCA_MIRRED_PARMS]); |
| 121 | index = parm->index; |
| 122 | err = tcf_idr_check_alloc(tn, &index, a, bind); |
| 123 | if (err < 0) |
| 124 | return err; |
| 125 | exists = err; |
| 126 | if (exists && bind) |
| 127 | return 0; |
| 128 | |
| 129 | switch (parm->eaction) { |
| 130 | case TCA_EGRESS_MIRROR: |
| 131 | case TCA_EGRESS_REDIR: |
| 132 | case TCA_INGRESS_REDIR: |
| 133 | case TCA_INGRESS_MIRROR: |
| 134 | break; |
| 135 | default: |
| 136 | if (exists) |
| 137 | tcf_idr_release(*a, bind); |
| 138 | else |
| 139 | tcf_idr_cleanup(tn, index); |
| 140 | NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); |
| 141 | return -EINVAL; |
| 142 | } |
| 143 | |
| 144 | if (!exists) { |
| 145 | if (!parm->ifindex) { |
| 146 | tcf_idr_cleanup(tn, index); |
| 147 | NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); |
| 148 | return -EINVAL; |
| 149 | } |
| 150 | ret = tcf_idr_create(tn, index, est, a, |
| 151 | &act_mirred_ops, bind, true); |
| 152 | if (ret) { |
| 153 | tcf_idr_cleanup(tn, index); |
| 154 | return ret; |
| 155 | } |
| 156 | ret = ACT_P_CREATED; |
| 157 | } else if (!ovr) { |
| 158 | tcf_idr_release(*a, bind); |
| 159 | return -EEXIST; |
| 160 | } |
| 161 | m = to_mirred(*a); |
| 162 | |
| 163 | if (ret == ACT_P_CREATED) |
| 164 | INIT_LIST_HEAD(&m->tcfm_list); |
| 165 | |
| 166 | spin_lock_bh(&m->tcf_lock); |
| 167 | m->tcf_action = parm->action; |
| 168 | m->tcfm_eaction = parm->eaction; |
| 169 | |
| 170 | if (parm->ifindex) { |
| 171 | dev = dev_get_by_index(net, parm->ifindex); |
| 172 | if (!dev) { |
| 173 | spin_unlock_bh(&m->tcf_lock); |
| 174 | tcf_idr_release(*a, bind); |
| 175 | return -ENODEV; |
| 176 | } |
| 177 | mac_header_xmit = dev_is_mac_header_xmit(dev); |
| 178 | rcu_swap_protected(m->tcfm_dev, dev, |
| 179 | lockdep_is_held(&m->tcf_lock)); |
| 180 | if (dev) |
| 181 | dev_put(dev); |
| 182 | m->tcfm_mac_header_xmit = mac_header_xmit; |
| 183 | } |
| 184 | spin_unlock_bh(&m->tcf_lock); |
| 185 | |
| 186 | if (ret == ACT_P_CREATED) { |
| 187 | spin_lock(&mirred_list_lock); |
| 188 | list_add(&m->tcfm_list, &mirred_list); |
| 189 | spin_unlock(&mirred_list_lock); |
| 190 | |
| 191 | tcf_idr_insert(tn, *a); |
| 192 | } |
| 193 | |
| 194 | return ret; |
| 195 | } |
| 196 | |
| 197 | static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, |
| 198 | struct tcf_result *res) |
| 199 | { |
| 200 | struct tcf_mirred *m = to_mirred(a); |
| 201 | struct sk_buff *skb2 = skb; |
| 202 | bool m_mac_header_xmit; |
| 203 | struct net_device *dev; |
| 204 | int retval, err = 0; |
| 205 | bool use_reinsert; |
| 206 | bool want_ingress; |
| 207 | bool is_redirect; |
| 208 | int m_eaction; |
| 209 | int mac_len; |
| 210 | |
| 211 | tcf_lastuse_update(&m->tcf_tm); |
| 212 | bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); |
| 213 | |
| 214 | m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); |
| 215 | m_eaction = READ_ONCE(m->tcfm_eaction); |
| 216 | retval = READ_ONCE(m->tcf_action); |
| 217 | dev = rcu_dereference_bh(m->tcfm_dev); |
| 218 | if (unlikely(!dev)) { |
| 219 | pr_notice_once("tc mirred: target device is gone\n"); |
| 220 | goto out; |
| 221 | } |
| 222 | |
| 223 | if (unlikely(!(dev->flags & IFF_UP))) { |
| 224 | net_notice_ratelimited("tc mirred to Houston: device %s is down\n", |
| 225 | dev->name); |
| 226 | goto out; |
| 227 | } |
| 228 | |
| 229 | /* we could easily avoid the clone only if called by ingress and clsact; |
| 230 | * since we can't easily detect the clsact caller, skip clone only for |
| 231 | * ingress - that covers the TC S/W datapath. |
| 232 | */ |
| 233 | is_redirect = tcf_mirred_is_act_redirect(m_eaction); |
| 234 | use_reinsert = skb_at_tc_ingress(skb) && is_redirect && |
| 235 | tcf_mirred_can_reinsert(retval); |
| 236 | if (!use_reinsert) { |
| 237 | skb2 = skb_clone(skb, GFP_ATOMIC); |
| 238 | if (!skb2) |
| 239 | goto out; |
| 240 | } |
| 241 | |
| 242 | /* If action's target direction differs than filter's direction, |
| 243 | * and devices expect a mac header on xmit, then mac push/pull is |
| 244 | * needed. |
| 245 | */ |
| 246 | want_ingress = tcf_mirred_act_wants_ingress(m_eaction); |
| 247 | if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) { |
| 248 | if (!skb_at_tc_ingress(skb)) { |
| 249 | /* caught at egress, act ingress: pull mac */ |
| 250 | mac_len = skb_network_header(skb) - skb_mac_header(skb); |
| 251 | skb_pull_rcsum(skb2, mac_len); |
| 252 | } else { |
| 253 | /* caught at ingress, act egress: push mac */ |
| 254 | skb_push_rcsum(skb2, skb->mac_len); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | skb2->skb_iif = skb->dev->ifindex; |
| 259 | skb2->dev = dev; |
| 260 | |
| 261 | /* mirror is always swallowed */ |
| 262 | if (is_redirect) { |
| 263 | skb2->tc_redirected = 1; |
| 264 | skb2->tc_from_ingress = skb2->tc_at_ingress; |
| 265 | |
| 266 | /* let's the caller reinsert the packet, if possible */ |
| 267 | if (use_reinsert) { |
| 268 | res->ingress = want_ingress; |
| 269 | res->qstats = this_cpu_ptr(m->common.cpu_qstats); |
| 270 | return TC_ACT_REINSERT; |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | if (!want_ingress) |
| 275 | err = dev_queue_xmit(skb2); |
| 276 | else |
| 277 | err = netif_receive_skb(skb2); |
| 278 | |
| 279 | if (err) { |
| 280 | out: |
| 281 | qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); |
| 282 | if (tcf_mirred_is_act_redirect(m_eaction)) |
| 283 | retval = TC_ACT_SHOT; |
| 284 | } |
| 285 | |
| 286 | return retval; |
| 287 | } |
| 288 | |
| 289 | static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, |
| 290 | u64 lastuse) |
| 291 | { |
| 292 | struct tcf_mirred *m = to_mirred(a); |
| 293 | struct tcf_t *tm = &m->tcf_tm; |
| 294 | |
| 295 | _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); |
| 296 | tm->lastuse = max_t(u64, tm->lastuse, lastuse); |
| 297 | } |
| 298 | |
| 299 | static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, |
| 300 | int ref) |
| 301 | { |
| 302 | unsigned char *b = skb_tail_pointer(skb); |
| 303 | struct tcf_mirred *m = to_mirred(a); |
| 304 | struct tc_mirred opt = { |
| 305 | .index = m->tcf_index, |
| 306 | .refcnt = refcount_read(&m->tcf_refcnt) - ref, |
| 307 | .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, |
| 308 | }; |
| 309 | struct net_device *dev; |
| 310 | struct tcf_t t; |
| 311 | |
| 312 | spin_lock_bh(&m->tcf_lock); |
| 313 | opt.action = m->tcf_action; |
| 314 | opt.eaction = m->tcfm_eaction; |
| 315 | dev = tcf_mirred_dev_dereference(m); |
| 316 | if (dev) |
| 317 | opt.ifindex = dev->ifindex; |
| 318 | |
| 319 | if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) |
| 320 | goto nla_put_failure; |
| 321 | |
| 322 | tcf_tm_dump(&t, &m->tcf_tm); |
| 323 | if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) |
| 324 | goto nla_put_failure; |
| 325 | spin_unlock_bh(&m->tcf_lock); |
| 326 | |
| 327 | return skb->len; |
| 328 | |
| 329 | nla_put_failure: |
| 330 | spin_unlock_bh(&m->tcf_lock); |
| 331 | nlmsg_trim(skb, b); |
| 332 | return -1; |
| 333 | } |
| 334 | |
| 335 | static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, |
| 336 | struct netlink_callback *cb, int type, |
| 337 | const struct tc_action_ops *ops, |
| 338 | struct netlink_ext_ack *extack) |
| 339 | { |
| 340 | struct tc_action_net *tn = net_generic(net, mirred_net_id); |
| 341 | |
| 342 | return tcf_generic_walker(tn, skb, cb, type, ops, extack); |
| 343 | } |
| 344 | |
| 345 | static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index, |
| 346 | struct netlink_ext_ack *extack) |
| 347 | { |
| 348 | struct tc_action_net *tn = net_generic(net, mirred_net_id); |
| 349 | |
| 350 | return tcf_idr_search(tn, a, index); |
| 351 | } |
| 352 | |
| 353 | static int mirred_device_event(struct notifier_block *unused, |
| 354 | unsigned long event, void *ptr) |
| 355 | { |
| 356 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
| 357 | struct tcf_mirred *m; |
| 358 | |
| 359 | ASSERT_RTNL(); |
| 360 | if (event == NETDEV_UNREGISTER) { |
| 361 | spin_lock(&mirred_list_lock); |
| 362 | list_for_each_entry(m, &mirred_list, tcfm_list) { |
| 363 | spin_lock_bh(&m->tcf_lock); |
| 364 | if (tcf_mirred_dev_dereference(m) == dev) { |
| 365 | dev_put(dev); |
| 366 | /* Note : no rcu grace period necessary, as |
| 367 | * net_device are already rcu protected. |
| 368 | */ |
| 369 | RCU_INIT_POINTER(m->tcfm_dev, NULL); |
| 370 | } |
| 371 | spin_unlock_bh(&m->tcf_lock); |
| 372 | } |
| 373 | spin_unlock(&mirred_list_lock); |
| 374 | } |
| 375 | |
| 376 | return NOTIFY_DONE; |
| 377 | } |
| 378 | |
| 379 | static struct notifier_block mirred_device_notifier = { |
| 380 | .notifier_call = mirred_device_event, |
| 381 | }; |
| 382 | |
| 383 | static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) |
| 384 | { |
| 385 | struct tcf_mirred *m = to_mirred(a); |
| 386 | struct net_device *dev; |
| 387 | |
| 388 | rcu_read_lock(); |
| 389 | dev = rcu_dereference(m->tcfm_dev); |
| 390 | if (dev) |
| 391 | dev_hold(dev); |
| 392 | rcu_read_unlock(); |
| 393 | |
| 394 | return dev; |
| 395 | } |
| 396 | |
| 397 | static void tcf_mirred_put_dev(struct net_device *dev) |
| 398 | { |
| 399 | dev_put(dev); |
| 400 | } |
| 401 | |
| 402 | static struct tc_action_ops act_mirred_ops = { |
| 403 | .kind = "mirred", |
| 404 | .type = TCA_ACT_MIRRED, |
| 405 | .owner = THIS_MODULE, |
| 406 | .act = tcf_mirred_act, |
| 407 | .stats_update = tcf_stats_update, |
| 408 | .dump = tcf_mirred_dump, |
| 409 | .cleanup = tcf_mirred_release, |
| 410 | .init = tcf_mirred_init, |
| 411 | .walk = tcf_mirred_walker, |
| 412 | .lookup = tcf_mirred_search, |
| 413 | .size = sizeof(struct tcf_mirred), |
| 414 | .get_dev = tcf_mirred_get_dev, |
| 415 | .put_dev = tcf_mirred_put_dev, |
| 416 | }; |
| 417 | |
| 418 | static __net_init int mirred_init_net(struct net *net) |
| 419 | { |
| 420 | struct tc_action_net *tn = net_generic(net, mirred_net_id); |
| 421 | |
| 422 | return tc_action_net_init(net, tn, &act_mirred_ops); |
| 423 | } |
| 424 | |
| 425 | static void __net_exit mirred_exit_net(struct list_head *net_list) |
| 426 | { |
| 427 | tc_action_net_exit(net_list, mirred_net_id); |
| 428 | } |
| 429 | |
| 430 | static struct pernet_operations mirred_net_ops = { |
| 431 | .init = mirred_init_net, |
| 432 | .exit_batch = mirred_exit_net, |
| 433 | .id = &mirred_net_id, |
| 434 | .size = sizeof(struct tc_action_net), |
| 435 | }; |
| 436 | |
| 437 | MODULE_AUTHOR("Jamal Hadi Salim(2002)"); |
| 438 | MODULE_DESCRIPTION("Device Mirror/redirect actions"); |
| 439 | MODULE_LICENSE("GPL"); |
| 440 | |
| 441 | static int __init mirred_init_module(void) |
| 442 | { |
| 443 | int err = register_netdevice_notifier(&mirred_device_notifier); |
| 444 | if (err) |
| 445 | return err; |
| 446 | |
| 447 | pr_info("Mirror/redirect action on\n"); |
| 448 | return tcf_register_action(&act_mirred_ops, &mirred_net_ops); |
| 449 | } |
| 450 | |
| 451 | static void __exit mirred_cleanup_module(void) |
| 452 | { |
| 453 | tcf_unregister_action(&act_mirred_ops, &mirred_net_ops); |
| 454 | unregister_netdevice_notifier(&mirred_device_notifier); |
| 455 | } |
| 456 | |
| 457 | module_init(mirred_init_module); |
| 458 | module_exit(mirred_cleanup_module); |