rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * net/sched/cls_api.c Packet classifier API. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation; either version |
| 7 | * 2 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
| 10 | * |
| 11 | * Changes: |
| 12 | * |
| 13 | * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support |
| 14 | * |
| 15 | */ |
| 16 | |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/types.h> |
| 19 | #include <linux/kernel.h> |
| 20 | #include <linux/string.h> |
| 21 | #include <linux/errno.h> |
| 22 | #include <linux/err.h> |
| 23 | #include <linux/skbuff.h> |
| 24 | #include <linux/init.h> |
| 25 | #include <linux/kmod.h> |
| 26 | #include <linux/err.h> |
| 27 | #include <linux/slab.h> |
| 28 | #include <net/net_namespace.h> |
| 29 | #include <net/sock.h> |
| 30 | #include <net/netlink.h> |
| 31 | #include <net/pkt_sched.h> |
| 32 | #include <net/pkt_cls.h> |
| 33 | |
| 34 | extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; |
| 35 | |
| 36 | /* The list of all installed classifier types */ |
| 37 | static LIST_HEAD(tcf_proto_base); |
| 38 | |
| 39 | /* Protects list of registered TC modules. It is pure SMP lock. */ |
| 40 | static DEFINE_RWLOCK(cls_mod_lock); |
| 41 | |
| 42 | /* Find classifier type by string name */ |
| 43 | |
| 44 | static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind) |
| 45 | { |
| 46 | const struct tcf_proto_ops *t, *res = NULL; |
| 47 | |
| 48 | if (kind) { |
| 49 | read_lock(&cls_mod_lock); |
| 50 | list_for_each_entry(t, &tcf_proto_base, head) { |
| 51 | if (strcmp(kind, t->kind) == 0) { |
| 52 | if (try_module_get(t->owner)) |
| 53 | res = t; |
| 54 | break; |
| 55 | } |
| 56 | } |
| 57 | read_unlock(&cls_mod_lock); |
| 58 | } |
| 59 | return res; |
| 60 | } |
| 61 | |
| 62 | /* Register(unregister) new classifier type */ |
| 63 | |
| 64 | int register_tcf_proto_ops(struct tcf_proto_ops *ops) |
| 65 | { |
| 66 | struct tcf_proto_ops *t; |
| 67 | int rc = -EEXIST; |
| 68 | |
| 69 | write_lock(&cls_mod_lock); |
| 70 | list_for_each_entry(t, &tcf_proto_base, head) |
| 71 | if (!strcmp(ops->kind, t->kind)) |
| 72 | goto out; |
| 73 | |
| 74 | list_add_tail(&ops->head, &tcf_proto_base); |
| 75 | rc = 0; |
| 76 | out: |
| 77 | write_unlock(&cls_mod_lock); |
| 78 | return rc; |
| 79 | } |
| 80 | EXPORT_SYMBOL(register_tcf_proto_ops); |
| 81 | |
| 82 | static struct workqueue_struct *tc_filter_wq; |
| 83 | |
| 84 | int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) |
| 85 | { |
| 86 | struct tcf_proto_ops *t; |
| 87 | int rc = -ENOENT; |
| 88 | |
| 89 | /* Wait for outstanding call_rcu()s, if any, from a |
| 90 | * tcf_proto_ops's destroy() handler. |
| 91 | */ |
| 92 | rcu_barrier(); |
| 93 | flush_workqueue(tc_filter_wq); |
| 94 | |
| 95 | write_lock(&cls_mod_lock); |
| 96 | list_for_each_entry(t, &tcf_proto_base, head) { |
| 97 | if (t == ops) { |
| 98 | list_del(&t->head); |
| 99 | rc = 0; |
| 100 | break; |
| 101 | } |
| 102 | } |
| 103 | write_unlock(&cls_mod_lock); |
| 104 | return rc; |
| 105 | } |
| 106 | EXPORT_SYMBOL(unregister_tcf_proto_ops); |
| 107 | |
| 108 | bool tcf_queue_work(struct work_struct *work) |
| 109 | { |
| 110 | return queue_work(tc_filter_wq, work); |
| 111 | } |
| 112 | EXPORT_SYMBOL(tcf_queue_work); |
| 113 | |
| 114 | /* Select new prio value from the range, managed by kernel. */ |
| 115 | |
| 116 | static inline u32 tcf_auto_prio(struct tcf_proto *tp) |
| 117 | { |
| 118 | u32 first = TC_H_MAKE(0xC0000000U, 0U); |
| 119 | |
| 120 | if (tp) |
| 121 | first = tp->prio - 1; |
| 122 | |
| 123 | return TC_H_MAJ(first); |
| 124 | } |
| 125 | |
| 126 | static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, |
| 127 | u32 prio, u32 parent, struct Qdisc *q, |
| 128 | struct tcf_chain *chain) |
| 129 | { |
| 130 | struct tcf_proto *tp; |
| 131 | int err; |
| 132 | |
| 133 | tp = kzalloc(sizeof(*tp), GFP_KERNEL); |
| 134 | if (!tp) |
| 135 | return ERR_PTR(-ENOBUFS); |
| 136 | |
| 137 | err = -ENOENT; |
| 138 | tp->ops = tcf_proto_lookup_ops(kind); |
| 139 | if (!tp->ops) { |
| 140 | #ifdef CONFIG_MODULES |
| 141 | rtnl_unlock(); |
| 142 | request_module("cls_%s", kind); |
| 143 | rtnl_lock(); |
| 144 | tp->ops = tcf_proto_lookup_ops(kind); |
| 145 | /* We dropped the RTNL semaphore in order to perform |
| 146 | * the module load. So, even if we succeeded in loading |
| 147 | * the module we have to replay the request. We indicate |
| 148 | * this using -EAGAIN. |
| 149 | */ |
| 150 | if (tp->ops) { |
| 151 | module_put(tp->ops->owner); |
| 152 | err = -EAGAIN; |
| 153 | } else { |
| 154 | err = -ENOENT; |
| 155 | } |
| 156 | #endif |
| 157 | goto errout; |
| 158 | } |
| 159 | tp->classify = tp->ops->classify; |
| 160 | tp->protocol = protocol; |
| 161 | tp->prio = prio; |
| 162 | tp->classid = parent; |
| 163 | tp->q = q; |
| 164 | tp->chain = chain; |
| 165 | |
| 166 | err = tp->ops->init(tp); |
| 167 | if (err) { |
| 168 | module_put(tp->ops->owner); |
| 169 | goto errout; |
| 170 | } |
| 171 | return tp; |
| 172 | |
| 173 | errout: |
| 174 | kfree(tp); |
| 175 | return ERR_PTR(err); |
| 176 | } |
| 177 | |
| 178 | static void tcf_proto_destroy(struct tcf_proto *tp) |
| 179 | { |
| 180 | tp->ops->destroy(tp); |
| 181 | module_put(tp->ops->owner); |
| 182 | kfree_rcu(tp, rcu); |
| 183 | } |
| 184 | |
| 185 | static struct tcf_chain *tcf_chain_create(struct tcf_block *block, |
| 186 | u32 chain_index) |
| 187 | { |
| 188 | struct tcf_chain *chain; |
| 189 | |
| 190 | chain = kzalloc(sizeof(*chain), GFP_KERNEL); |
| 191 | if (!chain) |
| 192 | return NULL; |
| 193 | list_add_tail(&chain->list, &block->chain_list); |
| 194 | chain->block = block; |
| 195 | chain->index = chain_index; |
| 196 | chain->refcnt = 1; |
| 197 | return chain; |
| 198 | } |
| 199 | |
| 200 | static void tcf_chain_flush(struct tcf_chain *chain) |
| 201 | { |
| 202 | struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); |
| 203 | |
| 204 | if (chain->p_filter_chain) |
| 205 | RCU_INIT_POINTER(*chain->p_filter_chain, NULL); |
| 206 | while (tp) { |
| 207 | RCU_INIT_POINTER(chain->filter_chain, tp->next); |
| 208 | tcf_proto_destroy(tp); |
| 209 | tp = rtnl_dereference(chain->filter_chain); |
| 210 | tcf_chain_put(chain); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | static void tcf_chain_destroy(struct tcf_chain *chain) |
| 215 | { |
| 216 | struct tcf_block *block = chain->block; |
| 217 | |
| 218 | list_del(&chain->list); |
| 219 | kfree(chain); |
| 220 | if (list_empty(&block->chain_list)) |
| 221 | kfree(block); |
| 222 | } |
| 223 | |
| 224 | static void tcf_chain_hold(struct tcf_chain *chain) |
| 225 | { |
| 226 | ++chain->refcnt; |
| 227 | } |
| 228 | |
| 229 | struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, |
| 230 | bool create) |
| 231 | { |
| 232 | struct tcf_chain *chain; |
| 233 | |
| 234 | list_for_each_entry(chain, &block->chain_list, list) { |
| 235 | if (chain->index == chain_index) { |
| 236 | tcf_chain_hold(chain); |
| 237 | return chain; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | return create ? tcf_chain_create(block, chain_index) : NULL; |
| 242 | } |
| 243 | EXPORT_SYMBOL(tcf_chain_get); |
| 244 | |
| 245 | void tcf_chain_put(struct tcf_chain *chain) |
| 246 | { |
| 247 | if (--chain->refcnt == 0) |
| 248 | tcf_chain_destroy(chain); |
| 249 | } |
| 250 | EXPORT_SYMBOL(tcf_chain_put); |
| 251 | |
| 252 | static void |
| 253 | tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, |
| 254 | struct tcf_proto __rcu **p_filter_chain) |
| 255 | { |
| 256 | chain->p_filter_chain = p_filter_chain; |
| 257 | } |
| 258 | |
| 259 | int tcf_block_get(struct tcf_block **p_block, |
| 260 | struct tcf_proto __rcu **p_filter_chain) |
| 261 | { |
| 262 | struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); |
| 263 | struct tcf_chain *chain; |
| 264 | int err; |
| 265 | |
| 266 | if (!block) |
| 267 | return -ENOMEM; |
| 268 | INIT_LIST_HEAD(&block->chain_list); |
| 269 | /* Create chain 0 by default, it has to be always present. */ |
| 270 | chain = tcf_chain_create(block, 0); |
| 271 | if (!chain) { |
| 272 | err = -ENOMEM; |
| 273 | goto err_chain_create; |
| 274 | } |
| 275 | tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); |
| 276 | *p_block = block; |
| 277 | return 0; |
| 278 | |
| 279 | err_chain_create: |
| 280 | kfree(block); |
| 281 | return err; |
| 282 | } |
| 283 | EXPORT_SYMBOL(tcf_block_get); |
| 284 | |
| 285 | /* XXX: Standalone actions are not allowed to jump to any chain, and bound |
| 286 | * actions should be all removed after flushing. |
| 287 | */ |
| 288 | void tcf_block_put(struct tcf_block *block) |
| 289 | { |
| 290 | struct tcf_chain *chain, *tmp; |
| 291 | |
| 292 | if (!block) |
| 293 | return; |
| 294 | |
| 295 | /* Hold a refcnt for all chains, so that they don't disappear |
| 296 | * while we are iterating. |
| 297 | */ |
| 298 | list_for_each_entry(chain, &block->chain_list, list) |
| 299 | tcf_chain_hold(chain); |
| 300 | |
| 301 | list_for_each_entry(chain, &block->chain_list, list) |
| 302 | tcf_chain_flush(chain); |
| 303 | |
| 304 | /* At this point, all the chains should have refcnt >= 1. */ |
| 305 | list_for_each_entry_safe(chain, tmp, &block->chain_list, list) |
| 306 | tcf_chain_put(chain); |
| 307 | |
| 308 | /* Finally, put chain 0 and allow block to be freed. */ |
| 309 | chain = list_first_entry(&block->chain_list, struct tcf_chain, list); |
| 310 | tcf_chain_put(chain); |
| 311 | } |
| 312 | EXPORT_SYMBOL(tcf_block_put); |
| 313 | |
| 314 | /* Main classifier routine: scans classifier chain attached |
| 315 | * to this qdisc, (optionally) tests for protocol and asks |
| 316 | * specific classifiers. |
| 317 | */ |
| 318 | int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, |
| 319 | struct tcf_result *res, bool compat_mode) |
| 320 | { |
| 321 | #ifdef CONFIG_NET_CLS_ACT |
| 322 | const int max_reclassify_loop = 4; |
| 323 | const struct tcf_proto *orig_tp = tp; |
| 324 | const struct tcf_proto *first_tp; |
| 325 | int limit = 0; |
| 326 | |
| 327 | reclassify: |
| 328 | #endif |
| 329 | for (; tp; tp = rcu_dereference_bh(tp->next)) { |
| 330 | __be16 protocol = tc_skb_protocol(skb); |
| 331 | int err; |
| 332 | |
| 333 | if (tp->protocol != protocol && |
| 334 | tp->protocol != htons(ETH_P_ALL)) |
| 335 | continue; |
| 336 | |
| 337 | err = tp->classify(skb, tp, res); |
| 338 | #ifdef CONFIG_NET_CLS_ACT |
| 339 | if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { |
| 340 | first_tp = orig_tp; |
| 341 | goto reset; |
| 342 | } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { |
| 343 | first_tp = res->goto_tp; |
| 344 | goto reset; |
| 345 | } |
| 346 | #endif |
| 347 | if (err >= 0) |
| 348 | return err; |
| 349 | } |
| 350 | |
| 351 | return TC_ACT_UNSPEC; /* signal: continue lookup */ |
| 352 | #ifdef CONFIG_NET_CLS_ACT |
| 353 | reset: |
| 354 | if (unlikely(limit++ >= max_reclassify_loop)) { |
| 355 | net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", |
| 356 | tp->q->ops->id, tp->prio & 0xffff, |
| 357 | ntohs(tp->protocol)); |
| 358 | return TC_ACT_SHOT; |
| 359 | } |
| 360 | |
| 361 | tp = first_tp; |
| 362 | goto reclassify; |
| 363 | #endif |
| 364 | } |
| 365 | EXPORT_SYMBOL(tcf_classify); |
| 366 | |
| 367 | struct tcf_chain_info { |
| 368 | struct tcf_proto __rcu **pprev; |
| 369 | struct tcf_proto __rcu *next; |
| 370 | }; |
| 371 | |
| 372 | static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) |
| 373 | { |
| 374 | return rtnl_dereference(*chain_info->pprev); |
| 375 | } |
| 376 | |
| 377 | static void tcf_chain_tp_insert(struct tcf_chain *chain, |
| 378 | struct tcf_chain_info *chain_info, |
| 379 | struct tcf_proto *tp) |
| 380 | { |
| 381 | if (chain->p_filter_chain && |
| 382 | *chain_info->pprev == chain->filter_chain) |
| 383 | rcu_assign_pointer(*chain->p_filter_chain, tp); |
| 384 | RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); |
| 385 | rcu_assign_pointer(*chain_info->pprev, tp); |
| 386 | tcf_chain_hold(chain); |
| 387 | } |
| 388 | |
| 389 | static void tcf_chain_tp_remove(struct tcf_chain *chain, |
| 390 | struct tcf_chain_info *chain_info, |
| 391 | struct tcf_proto *tp) |
| 392 | { |
| 393 | struct tcf_proto *next = rtnl_dereference(chain_info->next); |
| 394 | |
| 395 | if (chain->p_filter_chain && tp == chain->filter_chain) |
| 396 | RCU_INIT_POINTER(*chain->p_filter_chain, next); |
| 397 | RCU_INIT_POINTER(*chain_info->pprev, next); |
| 398 | tcf_chain_put(chain); |
| 399 | } |
| 400 | |
| 401 | static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, |
| 402 | struct tcf_chain_info *chain_info, |
| 403 | u32 protocol, u32 prio, |
| 404 | bool prio_allocate) |
| 405 | { |
| 406 | struct tcf_proto **pprev; |
| 407 | struct tcf_proto *tp; |
| 408 | |
| 409 | /* Check the chain for existence of proto-tcf with this priority */ |
| 410 | for (pprev = &chain->filter_chain; |
| 411 | (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { |
| 412 | if (tp->prio >= prio) { |
| 413 | if (tp->prio == prio) { |
| 414 | if (prio_allocate || |
| 415 | (tp->protocol != protocol && protocol)) |
| 416 | return ERR_PTR(-EINVAL); |
| 417 | } else { |
| 418 | tp = NULL; |
| 419 | } |
| 420 | break; |
| 421 | } |
| 422 | } |
| 423 | chain_info->pprev = pprev; |
| 424 | chain_info->next = tp ? tp->next : NULL; |
| 425 | return tp; |
| 426 | } |
| 427 | |
| 428 | static int tcf_fill_node(struct net *net, struct sk_buff *skb, |
| 429 | struct tcf_proto *tp, void *fh, u32 portid, |
| 430 | u32 seq, u16 flags, int event) |
| 431 | { |
| 432 | struct tcmsg *tcm; |
| 433 | struct nlmsghdr *nlh; |
| 434 | unsigned char *b = skb_tail_pointer(skb); |
| 435 | |
| 436 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); |
| 437 | if (!nlh) |
| 438 | goto out_nlmsg_trim; |
| 439 | tcm = nlmsg_data(nlh); |
| 440 | tcm->tcm_family = AF_UNSPEC; |
| 441 | tcm->tcm__pad1 = 0; |
| 442 | tcm->tcm__pad2 = 0; |
| 443 | tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; |
| 444 | tcm->tcm_parent = tp->classid; |
| 445 | tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); |
| 446 | if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) |
| 447 | goto nla_put_failure; |
| 448 | if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) |
| 449 | goto nla_put_failure; |
| 450 | if (!fh) { |
| 451 | tcm->tcm_handle = 0; |
| 452 | } else { |
| 453 | if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) |
| 454 | goto nla_put_failure; |
| 455 | } |
| 456 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
| 457 | return skb->len; |
| 458 | |
| 459 | out_nlmsg_trim: |
| 460 | nla_put_failure: |
| 461 | nlmsg_trim(skb, b); |
| 462 | return -1; |
| 463 | } |
| 464 | |
| 465 | static int tfilter_notify(struct net *net, struct sk_buff *oskb, |
| 466 | struct nlmsghdr *n, struct tcf_proto *tp, |
| 467 | void *fh, int event, bool unicast) |
| 468 | { |
| 469 | struct sk_buff *skb; |
| 470 | u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; |
| 471 | |
| 472 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
| 473 | if (!skb) |
| 474 | return -ENOBUFS; |
| 475 | |
| 476 | if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, |
| 477 | n->nlmsg_flags, event) <= 0) { |
| 478 | kfree_skb(skb); |
| 479 | return -EINVAL; |
| 480 | } |
| 481 | |
| 482 | if (unicast) |
| 483 | return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); |
| 484 | |
| 485 | return rtnetlink_send(skb, net, portid, RTNLGRP_TC, |
| 486 | n->nlmsg_flags & NLM_F_ECHO); |
| 487 | } |
| 488 | |
| 489 | static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, |
| 490 | struct nlmsghdr *n, struct tcf_proto *tp, |
| 491 | void *fh, bool unicast, bool *last) |
| 492 | { |
| 493 | struct sk_buff *skb; |
| 494 | u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; |
| 495 | int err; |
| 496 | |
| 497 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
| 498 | if (!skb) |
| 499 | return -ENOBUFS; |
| 500 | |
| 501 | if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, |
| 502 | n->nlmsg_flags, RTM_DELTFILTER) <= 0) { |
| 503 | kfree_skb(skb); |
| 504 | return -EINVAL; |
| 505 | } |
| 506 | |
| 507 | err = tp->ops->delete(tp, fh, last); |
| 508 | if (err) { |
| 509 | kfree_skb(skb); |
| 510 | return err; |
| 511 | } |
| 512 | |
| 513 | if (unicast) |
| 514 | return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); |
| 515 | |
| 516 | return rtnetlink_send(skb, net, portid, RTNLGRP_TC, |
| 517 | n->nlmsg_flags & NLM_F_ECHO); |
| 518 | } |
| 519 | |
| 520 | static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, |
| 521 | struct nlmsghdr *n, |
| 522 | struct tcf_chain *chain, int event) |
| 523 | { |
| 524 | struct tcf_proto *tp; |
| 525 | |
| 526 | for (tp = rtnl_dereference(chain->filter_chain); |
| 527 | tp; tp = rtnl_dereference(tp->next)) |
| 528 | tfilter_notify(net, oskb, n, tp, 0, event, false); |
| 529 | } |
| 530 | |
| 531 | /* Add/change/delete/get a filter node */ |
| 532 | |
| 533 | static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, |
| 534 | struct netlink_ext_ack *extack) |
| 535 | { |
| 536 | struct net *net = sock_net(skb->sk); |
| 537 | struct nlattr *tca[TCA_MAX + 1]; |
| 538 | struct tcmsg *t; |
| 539 | u32 protocol; |
| 540 | u32 prio; |
| 541 | bool prio_allocate; |
| 542 | u32 parent; |
| 543 | u32 chain_index; |
| 544 | struct net_device *dev; |
| 545 | struct Qdisc *q; |
| 546 | struct tcf_chain_info chain_info; |
| 547 | struct tcf_chain *chain = NULL; |
| 548 | struct tcf_block *block; |
| 549 | struct tcf_proto *tp; |
| 550 | const struct Qdisc_class_ops *cops; |
| 551 | unsigned long cl; |
| 552 | void *fh; |
| 553 | int err; |
| 554 | int tp_created; |
| 555 | |
| 556 | if ((n->nlmsg_type != RTM_GETTFILTER) && |
| 557 | !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) |
| 558 | return -EPERM; |
| 559 | |
| 560 | replay: |
| 561 | tp_created = 0; |
| 562 | |
| 563 | err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); |
| 564 | if (err < 0) |
| 565 | return err; |
| 566 | |
| 567 | t = nlmsg_data(n); |
| 568 | protocol = TC_H_MIN(t->tcm_info); |
| 569 | prio = TC_H_MAJ(t->tcm_info); |
| 570 | prio_allocate = false; |
| 571 | parent = t->tcm_parent; |
| 572 | cl = 0; |
| 573 | |
| 574 | if (prio == 0) { |
| 575 | switch (n->nlmsg_type) { |
| 576 | case RTM_DELTFILTER: |
| 577 | if (protocol || t->tcm_handle || tca[TCA_KIND]) |
| 578 | return -ENOENT; |
| 579 | break; |
| 580 | case RTM_NEWTFILTER: |
| 581 | /* If no priority is provided by the user, |
| 582 | * we allocate one. |
| 583 | */ |
| 584 | if (n->nlmsg_flags & NLM_F_CREATE) { |
| 585 | prio = TC_H_MAKE(0x80000000U, 0U); |
| 586 | prio_allocate = true; |
| 587 | break; |
| 588 | } |
| 589 | /* fall-through */ |
| 590 | default: |
| 591 | return -ENOENT; |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | /* Find head of filter chain. */ |
| 596 | |
| 597 | /* Find link */ |
| 598 | dev = __dev_get_by_index(net, t->tcm_ifindex); |
| 599 | if (dev == NULL) |
| 600 | return -ENODEV; |
| 601 | |
| 602 | /* Find qdisc */ |
| 603 | if (!parent) { |
| 604 | q = dev->qdisc; |
| 605 | parent = q->handle; |
| 606 | } else { |
| 607 | q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); |
| 608 | if (q == NULL) |
| 609 | return -EINVAL; |
| 610 | } |
| 611 | |
| 612 | /* Is it classful? */ |
| 613 | cops = q->ops->cl_ops; |
| 614 | if (!cops) |
| 615 | return -EINVAL; |
| 616 | |
| 617 | if (!cops->tcf_block) |
| 618 | return -EOPNOTSUPP; |
| 619 | |
| 620 | /* Do we search for filter, attached to class? */ |
| 621 | if (TC_H_MIN(parent)) { |
| 622 | cl = cops->find(q, parent); |
| 623 | if (cl == 0) |
| 624 | return -ENOENT; |
| 625 | } |
| 626 | |
| 627 | /* And the last stroke */ |
| 628 | block = cops->tcf_block(q, cl); |
| 629 | if (!block) { |
| 630 | err = -EINVAL; |
| 631 | goto errout; |
| 632 | } |
| 633 | |
| 634 | chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; |
| 635 | if (chain_index > TC_ACT_EXT_VAL_MASK) { |
| 636 | err = -EINVAL; |
| 637 | goto errout; |
| 638 | } |
| 639 | chain = tcf_chain_get(block, chain_index, |
| 640 | n->nlmsg_type == RTM_NEWTFILTER); |
| 641 | if (!chain) { |
| 642 | err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; |
| 643 | goto errout; |
| 644 | } |
| 645 | |
| 646 | if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { |
| 647 | tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); |
| 648 | tcf_chain_flush(chain); |
| 649 | err = 0; |
| 650 | goto errout; |
| 651 | } |
| 652 | |
| 653 | tp = tcf_chain_tp_find(chain, &chain_info, protocol, |
| 654 | prio, prio_allocate); |
| 655 | if (IS_ERR(tp)) { |
| 656 | err = PTR_ERR(tp); |
| 657 | goto errout; |
| 658 | } |
| 659 | |
| 660 | if (tp == NULL) { |
| 661 | /* Proto-tcf does not exist, create new one */ |
| 662 | |
| 663 | if (tca[TCA_KIND] == NULL || !protocol) { |
| 664 | err = -EINVAL; |
| 665 | goto errout; |
| 666 | } |
| 667 | |
| 668 | if (n->nlmsg_type != RTM_NEWTFILTER || |
| 669 | !(n->nlmsg_flags & NLM_F_CREATE)) { |
| 670 | err = -ENOENT; |
| 671 | goto errout; |
| 672 | } |
| 673 | |
| 674 | if (prio_allocate) |
| 675 | prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); |
| 676 | |
| 677 | tp = tcf_proto_create(nla_data(tca[TCA_KIND]), |
| 678 | protocol, prio, parent, q, chain); |
| 679 | if (IS_ERR(tp)) { |
| 680 | err = PTR_ERR(tp); |
| 681 | goto errout; |
| 682 | } |
| 683 | tp_created = 1; |
| 684 | } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { |
| 685 | err = -EINVAL; |
| 686 | goto errout; |
| 687 | } |
| 688 | |
| 689 | fh = tp->ops->get(tp, t->tcm_handle); |
| 690 | |
| 691 | if (!fh) { |
| 692 | if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { |
| 693 | tcf_chain_tp_remove(chain, &chain_info, tp); |
| 694 | tfilter_notify(net, skb, n, tp, fh, |
| 695 | RTM_DELTFILTER, false); |
| 696 | tcf_proto_destroy(tp); |
| 697 | err = 0; |
| 698 | goto errout; |
| 699 | } |
| 700 | |
| 701 | if (n->nlmsg_type != RTM_NEWTFILTER || |
| 702 | !(n->nlmsg_flags & NLM_F_CREATE)) { |
| 703 | err = -ENOENT; |
| 704 | goto errout; |
| 705 | } |
| 706 | } else { |
| 707 | bool last; |
| 708 | |
| 709 | switch (n->nlmsg_type) { |
| 710 | case RTM_NEWTFILTER: |
| 711 | if (n->nlmsg_flags & NLM_F_EXCL) { |
| 712 | if (tp_created) |
| 713 | tcf_proto_destroy(tp); |
| 714 | err = -EEXIST; |
| 715 | goto errout; |
| 716 | } |
| 717 | break; |
| 718 | case RTM_DELTFILTER: |
| 719 | err = tfilter_del_notify(net, skb, n, tp, fh, false, |
| 720 | &last); |
| 721 | if (err) |
| 722 | goto errout; |
| 723 | if (last) { |
| 724 | tcf_chain_tp_remove(chain, &chain_info, tp); |
| 725 | tcf_proto_destroy(tp); |
| 726 | } |
| 727 | goto errout; |
| 728 | case RTM_GETTFILTER: |
| 729 | err = tfilter_notify(net, skb, n, tp, fh, |
| 730 | RTM_NEWTFILTER, true); |
| 731 | goto errout; |
| 732 | default: |
| 733 | err = -EINVAL; |
| 734 | goto errout; |
| 735 | } |
| 736 | } |
| 737 | |
| 738 | err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, |
| 739 | n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); |
| 740 | if (err == 0) { |
| 741 | if (tp_created) |
| 742 | tcf_chain_tp_insert(chain, &chain_info, tp); |
| 743 | tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); |
| 744 | } else { |
| 745 | if (tp_created) |
| 746 | tcf_proto_destroy(tp); |
| 747 | } |
| 748 | |
| 749 | errout: |
| 750 | if (chain) |
| 751 | tcf_chain_put(chain); |
| 752 | if (err == -EAGAIN) |
| 753 | /* Replay the request. */ |
| 754 | goto replay; |
| 755 | return err; |
| 756 | } |
| 757 | |
| 758 | struct tcf_dump_args { |
| 759 | struct tcf_walker w; |
| 760 | struct sk_buff *skb; |
| 761 | struct netlink_callback *cb; |
| 762 | }; |
| 763 | |
| 764 | static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) |
| 765 | { |
| 766 | struct tcf_dump_args *a = (void *)arg; |
| 767 | struct net *net = sock_net(a->skb->sk); |
| 768 | |
| 769 | return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, |
| 770 | a->cb->nlh->nlmsg_seq, NLM_F_MULTI, |
| 771 | RTM_NEWTFILTER); |
| 772 | } |
| 773 | |
| 774 | static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, |
| 775 | struct netlink_callback *cb, |
| 776 | long index_start, long *p_index) |
| 777 | { |
| 778 | struct net *net = sock_net(skb->sk); |
| 779 | struct tcmsg *tcm = nlmsg_data(cb->nlh); |
| 780 | struct tcf_dump_args arg; |
| 781 | struct tcf_proto *tp; |
| 782 | |
| 783 | for (tp = rtnl_dereference(chain->filter_chain); |
| 784 | tp; tp = rtnl_dereference(tp->next), (*p_index)++) { |
| 785 | if (*p_index < index_start) |
| 786 | continue; |
| 787 | if (TC_H_MAJ(tcm->tcm_info) && |
| 788 | TC_H_MAJ(tcm->tcm_info) != tp->prio) |
| 789 | continue; |
| 790 | if (TC_H_MIN(tcm->tcm_info) && |
| 791 | TC_H_MIN(tcm->tcm_info) != tp->protocol) |
| 792 | continue; |
| 793 | if (*p_index > index_start) |
| 794 | memset(&cb->args[1], 0, |
| 795 | sizeof(cb->args) - sizeof(cb->args[0])); |
| 796 | if (cb->args[1] == 0) { |
| 797 | if (tcf_fill_node(net, skb, tp, 0, |
| 798 | NETLINK_CB(cb->skb).portid, |
| 799 | cb->nlh->nlmsg_seq, NLM_F_MULTI, |
| 800 | RTM_NEWTFILTER) <= 0) |
| 801 | return false; |
| 802 | |
| 803 | cb->args[1] = 1; |
| 804 | } |
| 805 | if (!tp->ops->walk) |
| 806 | continue; |
| 807 | arg.w.fn = tcf_node_dump; |
| 808 | arg.skb = skb; |
| 809 | arg.cb = cb; |
| 810 | arg.w.stop = 0; |
| 811 | arg.w.skip = cb->args[1] - 1; |
| 812 | arg.w.count = 0; |
| 813 | tp->ops->walk(tp, &arg.w); |
| 814 | cb->args[1] = arg.w.count + 1; |
| 815 | if (arg.w.stop) |
| 816 | return false; |
| 817 | } |
| 818 | return true; |
| 819 | } |
| 820 | |
| 821 | /* called with RTNL */ |
| 822 | static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) |
| 823 | { |
| 824 | struct net *net = sock_net(skb->sk); |
| 825 | struct nlattr *tca[TCA_MAX + 1]; |
| 826 | struct net_device *dev; |
| 827 | struct Qdisc *q; |
| 828 | struct tcf_block *block; |
| 829 | struct tcf_chain *chain; |
| 830 | struct tcmsg *tcm = nlmsg_data(cb->nlh); |
| 831 | unsigned long cl = 0; |
| 832 | const struct Qdisc_class_ops *cops; |
| 833 | long index_start; |
| 834 | long index; |
| 835 | int err; |
| 836 | |
| 837 | if (nlmsg_len(cb->nlh) < sizeof(*tcm)) |
| 838 | return skb->len; |
| 839 | |
| 840 | err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, |
| 841 | NULL); |
| 842 | if (err) |
| 843 | return err; |
| 844 | |
| 845 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
| 846 | if (!dev) |
| 847 | return skb->len; |
| 848 | |
| 849 | if (!tcm->tcm_parent) |
| 850 | q = dev->qdisc; |
| 851 | else |
| 852 | q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); |
| 853 | if (!q) |
| 854 | goto out; |
| 855 | cops = q->ops->cl_ops; |
| 856 | if (!cops) |
| 857 | goto out; |
| 858 | if (!cops->tcf_block) |
| 859 | goto out; |
| 860 | if (TC_H_MIN(tcm->tcm_parent)) { |
| 861 | cl = cops->find(q, tcm->tcm_parent); |
| 862 | if (cl == 0) |
| 863 | goto out; |
| 864 | } |
| 865 | block = cops->tcf_block(q, cl); |
| 866 | if (!block) |
| 867 | goto out; |
| 868 | |
| 869 | index_start = cb->args[0]; |
| 870 | index = 0; |
| 871 | |
| 872 | list_for_each_entry(chain, &block->chain_list, list) { |
| 873 | if (tca[TCA_CHAIN] && |
| 874 | nla_get_u32(tca[TCA_CHAIN]) != chain->index) |
| 875 | continue; |
| 876 | if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) { |
| 877 | err = -EMSGSIZE; |
| 878 | break; |
| 879 | } |
| 880 | } |
| 881 | |
| 882 | cb->args[0] = index; |
| 883 | |
| 884 | out: |
| 885 | /* If we did no progress, the error (EMSGSIZE) is real */ |
| 886 | if (skb->len == 0 && err) |
| 887 | return err; |
| 888 | return skb->len; |
| 889 | } |
| 890 | |
| 891 | void tcf_exts_destroy(struct tcf_exts *exts) |
| 892 | { |
| 893 | #ifdef CONFIG_NET_CLS_ACT |
| 894 | LIST_HEAD(actions); |
| 895 | |
| 896 | ASSERT_RTNL(); |
| 897 | tcf_exts_to_list(exts, &actions); |
| 898 | tcf_action_destroy(&actions, TCA_ACT_UNBIND); |
| 899 | kfree(exts->actions); |
| 900 | exts->nr_actions = 0; |
| 901 | #endif |
| 902 | } |
| 903 | EXPORT_SYMBOL(tcf_exts_destroy); |
| 904 | |
| 905 | int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, |
| 906 | struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) |
| 907 | { |
| 908 | #ifdef CONFIG_NET_CLS_ACT |
| 909 | { |
| 910 | struct tc_action *act; |
| 911 | |
| 912 | if (exts->police && tb[exts->police]) { |
| 913 | act = tcf_action_init_1(net, tp, tb[exts->police], |
| 914 | rate_tlv, "police", ovr, |
| 915 | TCA_ACT_BIND); |
| 916 | if (IS_ERR(act)) |
| 917 | return PTR_ERR(act); |
| 918 | |
| 919 | act->type = exts->type = TCA_OLD_COMPAT; |
| 920 | exts->actions[0] = act; |
| 921 | exts->nr_actions = 1; |
| 922 | } else if (exts->action && tb[exts->action]) { |
| 923 | LIST_HEAD(actions); |
| 924 | int err, i = 0; |
| 925 | |
| 926 | err = tcf_action_init(net, tp, tb[exts->action], |
| 927 | rate_tlv, NULL, ovr, TCA_ACT_BIND, |
| 928 | &actions); |
| 929 | if (err) |
| 930 | return err; |
| 931 | list_for_each_entry(act, &actions, list) |
| 932 | exts->actions[i++] = act; |
| 933 | exts->nr_actions = i; |
| 934 | } |
| 935 | exts->net = net; |
| 936 | } |
| 937 | #else |
| 938 | if ((exts->action && tb[exts->action]) || |
| 939 | (exts->police && tb[exts->police])) |
| 940 | return -EOPNOTSUPP; |
| 941 | #endif |
| 942 | |
| 943 | return 0; |
| 944 | } |
| 945 | EXPORT_SYMBOL(tcf_exts_validate); |
| 946 | |
| 947 | void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) |
| 948 | { |
| 949 | #ifdef CONFIG_NET_CLS_ACT |
| 950 | struct tcf_exts old = *dst; |
| 951 | |
| 952 | *dst = *src; |
| 953 | tcf_exts_destroy(&old); |
| 954 | #endif |
| 955 | } |
| 956 | EXPORT_SYMBOL(tcf_exts_change); |
| 957 | |
| 958 | #ifdef CONFIG_NET_CLS_ACT |
| 959 | static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) |
| 960 | { |
| 961 | if (exts->nr_actions == 0) |
| 962 | return NULL; |
| 963 | else |
| 964 | return exts->actions[0]; |
| 965 | } |
| 966 | #endif |
| 967 | |
| 968 | int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) |
| 969 | { |
| 970 | #ifdef CONFIG_NET_CLS_ACT |
| 971 | struct nlattr *nest; |
| 972 | |
| 973 | if (exts->action && tcf_exts_has_actions(exts)) { |
| 974 | /* |
| 975 | * again for backward compatible mode - we want |
| 976 | * to work with both old and new modes of entering |
| 977 | * tc data even if iproute2 was newer - jhs |
| 978 | */ |
| 979 | if (exts->type != TCA_OLD_COMPAT) { |
| 980 | LIST_HEAD(actions); |
| 981 | |
| 982 | nest = nla_nest_start(skb, exts->action); |
| 983 | if (nest == NULL) |
| 984 | goto nla_put_failure; |
| 985 | |
| 986 | tcf_exts_to_list(exts, &actions); |
| 987 | if (tcf_action_dump(skb, &actions, 0, 0) < 0) |
| 988 | goto nla_put_failure; |
| 989 | nla_nest_end(skb, nest); |
| 990 | } else if (exts->police) { |
| 991 | struct tc_action *act = tcf_exts_first_act(exts); |
| 992 | nest = nla_nest_start(skb, exts->police); |
| 993 | if (nest == NULL || !act) |
| 994 | goto nla_put_failure; |
| 995 | if (tcf_action_dump_old(skb, act, 0, 0) < 0) |
| 996 | goto nla_put_failure; |
| 997 | nla_nest_end(skb, nest); |
| 998 | } |
| 999 | } |
| 1000 | return 0; |
| 1001 | |
| 1002 | nla_put_failure: |
| 1003 | nla_nest_cancel(skb, nest); |
| 1004 | return -1; |
| 1005 | #else |
| 1006 | return 0; |
| 1007 | #endif |
| 1008 | } |
| 1009 | EXPORT_SYMBOL(tcf_exts_dump); |
| 1010 | |
| 1011 | |
| 1012 | int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) |
| 1013 | { |
| 1014 | #ifdef CONFIG_NET_CLS_ACT |
| 1015 | struct tc_action *a = tcf_exts_first_act(exts); |
| 1016 | if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) |
| 1017 | return -1; |
| 1018 | #endif |
| 1019 | return 0; |
| 1020 | } |
| 1021 | EXPORT_SYMBOL(tcf_exts_dump_stats); |
| 1022 | |
| 1023 | int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, |
| 1024 | struct net_device **hw_dev) |
| 1025 | { |
| 1026 | #ifdef CONFIG_NET_CLS_ACT |
| 1027 | const struct tc_action *a; |
| 1028 | LIST_HEAD(actions); |
| 1029 | |
| 1030 | if (!tcf_exts_has_actions(exts)) |
| 1031 | return -EINVAL; |
| 1032 | |
| 1033 | tcf_exts_to_list(exts, &actions); |
| 1034 | list_for_each_entry(a, &actions, list) { |
| 1035 | if (a->ops->get_dev) { |
| 1036 | a->ops->get_dev(a, dev_net(dev), hw_dev); |
| 1037 | break; |
| 1038 | } |
| 1039 | } |
| 1040 | if (*hw_dev) |
| 1041 | return 0; |
| 1042 | #endif |
| 1043 | return -EOPNOTSUPP; |
| 1044 | } |
| 1045 | EXPORT_SYMBOL(tcf_exts_get_dev); |
| 1046 | |
| 1047 | static int __init tc_filter_init(void) |
| 1048 | { |
| 1049 | tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); |
| 1050 | if (!tc_filter_wq) |
| 1051 | return -ENOMEM; |
| 1052 | |
| 1053 | rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); |
| 1054 | rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); |
| 1055 | rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, |
| 1056 | tc_dump_tfilter, 0); |
| 1057 | |
| 1058 | return 0; |
| 1059 | } |
| 1060 | |
| 1061 | subsys_initcall(tc_filter_init); |