|  | /* | 
|  | * net/sched/ematch.c		Extended Match API | 
|  | * | 
|  | *		This program is free software; you can redistribute it and/or | 
|  | *		modify it under the terms of the GNU General Public License | 
|  | *		as published by the Free Software Foundation; either version | 
|  | *		2 of the License, or (at your option) any later version. | 
|  | * | 
|  | * Authors:	Thomas Graf <tgraf@suug.ch> | 
|  | * | 
|  | * ========================================================================== | 
|  | * | 
|  | * An extended match (ematch) is a small classification tool not worth | 
|  | * writing a full classifier for. Ematches can be interconnected to form | 
|  | * a logic expression and get attached to classifiers to extend their | 
|  | * functionatlity. | 
|  | * | 
|  | * The userspace part transforms the logic expressions into an array | 
|  | * consisting of multiple sequences of interconnected ematches separated | 
|  | * by markers. Precedence is implemented by a special ematch kind | 
|  | * referencing a sequence beyond the marker of the current sequence | 
|  | * causing the current position in the sequence to be pushed onto a stack | 
|  | * to allow the current position to be overwritten by the position referenced | 
|  | * in the special ematch. Matching continues in the new sequence until a | 
|  | * marker is reached causing the position to be restored from the stack. | 
|  | * | 
|  | * Example: | 
|  | *          A AND (B1 OR B2) AND C AND D | 
|  | * | 
|  | *              ------->-PUSH------- | 
|  | *    -->--    /         -->--      \   -->-- | 
|  | *   /     \  /         /     \      \ /     \ | 
|  | * +-------+-------+-------+-------+-------+--------+ | 
|  | * | A AND | B AND | C AND | D END | B1 OR | B2 END | | 
|  | * +-------+-------+-------+-------+-------+--------+ | 
|  | *                    \                      / | 
|  | *                     --------<-POP--------- | 
|  | * | 
|  | * where B is a virtual ematch referencing to sequence starting with B1. | 
|  | * | 
|  | * ========================================================================== | 
|  | * | 
|  | * How to write an ematch in 60 seconds | 
|  | * ------------------------------------ | 
|  | * | 
|  | *   1) Provide a matcher function: | 
|  | *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m, | 
|  | *                          struct tcf_pkt_info *info) | 
|  | *      { | 
|  | *      	struct mydata *d = (struct mydata *) m->data; | 
|  | * | 
|  | *      	if (...matching goes here...) | 
|  | *      		return 1; | 
|  | *      	else | 
|  | *      		return 0; | 
|  | *      } | 
|  | * | 
|  | *   2) Fill out a struct tcf_ematch_ops: | 
|  | *      static struct tcf_ematch_ops my_ops = { | 
|  | *      	.kind = unique id, | 
|  | *      	.datalen = sizeof(struct mydata), | 
|  | *      	.match = my_match, | 
|  | *      	.owner = THIS_MODULE, | 
|  | *      }; | 
|  | * | 
|  | *   3) Register/Unregister your ematch: | 
|  | *      static int __init init_my_ematch(void) | 
|  | *      { | 
|  | *      	return tcf_em_register(&my_ops); | 
|  | *      } | 
|  | * | 
|  | *      static void __exit exit_my_ematch(void) | 
|  | *      { | 
|  | *      	tcf_em_unregister(&my_ops); | 
|  | *      } | 
|  | * | 
|  | *      module_init(init_my_ematch); | 
|  | *      module_exit(exit_my_ematch); | 
|  | * | 
|  | *   4) By now you should have two more seconds left, barely enough to | 
|  | *      open up a beer to watch the compilation going. | 
|  | */ | 
|  |  | 
|  | #include <linux/module.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/types.h> | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/errno.h> | 
|  | #include <linux/rtnetlink.h> | 
|  | #include <linux/skbuff.h> | 
|  | #include <net/pkt_cls.h> | 
|  |  | 
|  | static LIST_HEAD(ematch_ops); | 
|  | static DEFINE_RWLOCK(ematch_mod_lock); | 
|  |  | 
|  | static struct tcf_ematch_ops *tcf_em_lookup(u16 kind) | 
|  | { | 
|  | struct tcf_ematch_ops *e = NULL; | 
|  |  | 
|  | read_lock(&ematch_mod_lock); | 
|  | list_for_each_entry(e, &ematch_ops, link) { | 
|  | if (kind == e->kind) { | 
|  | if (!try_module_get(e->owner)) | 
|  | e = NULL; | 
|  | read_unlock(&ematch_mod_lock); | 
|  | return e; | 
|  | } | 
|  | } | 
|  | read_unlock(&ematch_mod_lock); | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * tcf_em_register - register an extended match | 
|  | * | 
|  | * @ops: ematch operations lookup table | 
|  | * | 
|  | * This function must be called by ematches to announce their presence. | 
|  | * The given @ops must have kind set to a unique identifier and the | 
|  | * callback match() must be implemented. All other callbacks are optional | 
|  | * and a fallback implementation is used instead. | 
|  | * | 
|  | * Returns -EEXISTS if an ematch of the same kind has already registered. | 
|  | */ | 
|  | int tcf_em_register(struct tcf_ematch_ops *ops) | 
|  | { | 
|  | int err = -EEXIST; | 
|  | struct tcf_ematch_ops *e; | 
|  |  | 
|  | if (ops->match == NULL) | 
|  | return -EINVAL; | 
|  |  | 
|  | write_lock(&ematch_mod_lock); | 
|  | list_for_each_entry(e, &ematch_ops, link) | 
|  | if (ops->kind == e->kind) | 
|  | goto errout; | 
|  |  | 
|  | list_add_tail(&ops->link, &ematch_ops); | 
|  | err = 0; | 
|  | errout: | 
|  | write_unlock(&ematch_mod_lock); | 
|  | return err; | 
|  | } | 
|  | EXPORT_SYMBOL(tcf_em_register); | 
|  |  | 
|  | /** | 
|  | * tcf_em_unregister - unregster and extended match | 
|  | * | 
|  | * @ops: ematch operations lookup table | 
|  | * | 
|  | * This function must be called by ematches to announce their disappearance | 
|  | * for examples when the module gets unloaded. The @ops parameter must be | 
|  | * the same as the one used for registration. | 
|  | * | 
|  | * Returns -ENOENT if no matching ematch was found. | 
|  | */ | 
|  | void tcf_em_unregister(struct tcf_ematch_ops *ops) | 
|  | { | 
|  | write_lock(&ematch_mod_lock); | 
|  | list_del(&ops->link); | 
|  | write_unlock(&ematch_mod_lock); | 
|  | } | 
|  | EXPORT_SYMBOL(tcf_em_unregister); | 
|  |  | 
|  | static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree, | 
|  | int index) | 
|  | { | 
|  | return &tree->matches[index]; | 
|  | } | 
|  |  | 
|  |  | 
|  | static int tcf_em_validate(struct tcf_proto *tp, | 
|  | struct tcf_ematch_tree_hdr *tree_hdr, | 
|  | struct tcf_ematch *em, struct nlattr *nla, int idx) | 
|  | { | 
|  | int err = -EINVAL; | 
|  | struct tcf_ematch_hdr *em_hdr = nla_data(nla); | 
|  | int data_len = nla_len(nla) - sizeof(*em_hdr); | 
|  | void *data = (void *) em_hdr + sizeof(*em_hdr); | 
|  |  | 
|  | if (!TCF_EM_REL_VALID(em_hdr->flags)) | 
|  | goto errout; | 
|  |  | 
|  | if (em_hdr->kind == TCF_EM_CONTAINER) { | 
|  | /* Special ematch called "container", carries an index | 
|  | * referencing an external ematch sequence. | 
|  | */ | 
|  | u32 ref; | 
|  |  | 
|  | if (data_len < sizeof(ref)) | 
|  | goto errout; | 
|  | ref = *(u32 *) data; | 
|  |  | 
|  | if (ref >= tree_hdr->nmatches) | 
|  | goto errout; | 
|  |  | 
|  | /* We do not allow backward jumps to avoid loops and jumps | 
|  | * to our own position are of course illegal. | 
|  | */ | 
|  | if (ref <= idx) | 
|  | goto errout; | 
|  |  | 
|  |  | 
|  | em->data = ref; | 
|  | } else { | 
|  | /* Note: This lookup will increase the module refcnt | 
|  | * of the ematch module referenced. In case of a failure, | 
|  | * a destroy function is called by the underlying layer | 
|  | * which automatically releases the reference again, therefore | 
|  | * the module MUST not be given back under any circumstances | 
|  | * here. Be aware, the destroy function assumes that the | 
|  | * module is held if the ops field is non zero. | 
|  | */ | 
|  | em->ops = tcf_em_lookup(em_hdr->kind); | 
|  |  | 
|  | if (em->ops == NULL) { | 
|  | err = -ENOENT; | 
|  | #ifdef CONFIG_MODULES | 
|  | __rtnl_unlock(); | 
|  | request_module("ematch-kind-%u", em_hdr->kind); | 
|  | rtnl_lock(); | 
|  | em->ops = tcf_em_lookup(em_hdr->kind); | 
|  | if (em->ops) { | 
|  | /* We dropped the RTNL mutex in order to | 
|  | * perform the module load. Tell the caller | 
|  | * to replay the request. | 
|  | */ | 
|  | module_put(em->ops->owner); | 
|  | err = -EAGAIN; | 
|  | } | 
|  | #endif | 
|  | goto errout; | 
|  | } | 
|  |  | 
|  | /* ematch module provides expected length of data, so we | 
|  | * can do a basic sanity check. | 
|  | */ | 
|  | if (em->ops->datalen && data_len < em->ops->datalen) | 
|  | goto errout; | 
|  |  | 
|  | if (em->ops->change) { | 
|  | err = em->ops->change(tp, data, data_len, em); | 
|  | if (err < 0) | 
|  | goto errout; | 
|  | } else if (data_len > 0) { | 
|  | /* ematch module doesn't provide an own change | 
|  | * procedure and expects us to allocate and copy | 
|  | * the ematch data. | 
|  | * | 
|  | * TCF_EM_SIMPLE may be specified stating that the | 
|  | * data only consists of a u32 integer and the module | 
|  | * does not expected a memory reference but rather | 
|  | * the value carried. | 
|  | */ | 
|  | if (em_hdr->flags & TCF_EM_SIMPLE) { | 
|  | if (data_len < sizeof(u32)) | 
|  | goto errout; | 
|  | em->data = *(u32 *) data; | 
|  | } else { | 
|  | void *v = kmemdup(data, data_len, GFP_KERNEL); | 
|  | if (v == NULL) { | 
|  | err = -ENOBUFS; | 
|  | goto errout; | 
|  | } | 
|  | em->data = (unsigned long) v; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | em->matchid = em_hdr->matchid; | 
|  | em->flags = em_hdr->flags; | 
|  | em->datalen = data_len; | 
|  |  | 
|  | err = 0; | 
|  | errout: | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = { | 
|  | [TCA_EMATCH_TREE_HDR]	= { .len = sizeof(struct tcf_ematch_tree_hdr) }, | 
|  | [TCA_EMATCH_TREE_LIST]	= { .type = NLA_NESTED }, | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * tcf_em_tree_validate - validate ematch config TLV and build ematch tree | 
|  | * | 
|  | * @tp: classifier kind handle | 
|  | * @nla: ematch tree configuration TLV | 
|  | * @tree: destination ematch tree variable to store the resulting | 
|  | *        ematch tree. | 
|  | * | 
|  | * This function validates the given configuration TLV @nla and builds an | 
|  | * ematch tree in @tree. The resulting tree must later be copied into | 
|  | * the private classifier data using tcf_em_tree_change(). You MUST NOT | 
|  | * provide the ematch tree variable of the private classifier data directly, | 
|  | * the changes would not be locked properly. | 
|  | * | 
|  | * Returns a negative error code if the configuration TLV contains errors. | 
|  | */ | 
|  | int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, | 
|  | struct tcf_ematch_tree *tree) | 
|  | { | 
|  | int idx, list_len, matches_len, err; | 
|  | struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1]; | 
|  | struct nlattr *rt_match, *rt_hdr, *rt_list; | 
|  | struct tcf_ematch_tree_hdr *tree_hdr; | 
|  | struct tcf_ematch *em; | 
|  |  | 
|  | memset(tree, 0, sizeof(*tree)); | 
|  | if (!nla) | 
|  | return 0; | 
|  |  | 
|  | err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy); | 
|  | if (err < 0) | 
|  | goto errout; | 
|  |  | 
|  | err = -EINVAL; | 
|  | rt_hdr = tb[TCA_EMATCH_TREE_HDR]; | 
|  | rt_list = tb[TCA_EMATCH_TREE_LIST]; | 
|  |  | 
|  | if (rt_hdr == NULL || rt_list == NULL) | 
|  | goto errout; | 
|  |  | 
|  | tree_hdr = nla_data(rt_hdr); | 
|  | memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr)); | 
|  |  | 
|  | rt_match = nla_data(rt_list); | 
|  | list_len = nla_len(rt_list); | 
|  | matches_len = tree_hdr->nmatches * sizeof(*em); | 
|  |  | 
|  | tree->matches = kzalloc(matches_len, GFP_KERNEL); | 
|  | if (tree->matches == NULL) | 
|  | goto errout; | 
|  |  | 
|  | /* We do not use nla_parse_nested here because the maximum | 
|  | * number of attributes is unknown. This saves us the allocation | 
|  | * for a tb buffer which would serve no purpose at all. | 
|  | * | 
|  | * The array of rt attributes is parsed in the order as they are | 
|  | * provided, their type must be incremental from 1 to n. Even | 
|  | * if it does not serve any real purpose, a failure of sticking | 
|  | * to this policy will result in parsing failure. | 
|  | */ | 
|  | for (idx = 0; nla_ok(rt_match, list_len); idx++) { | 
|  | err = -EINVAL; | 
|  |  | 
|  | if (rt_match->nla_type != (idx + 1)) | 
|  | goto errout_abort; | 
|  |  | 
|  | if (idx >= tree_hdr->nmatches) | 
|  | goto errout_abort; | 
|  |  | 
|  | if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr)) | 
|  | goto errout_abort; | 
|  |  | 
|  | em = tcf_em_get_match(tree, idx); | 
|  |  | 
|  | err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx); | 
|  | if (err < 0) | 
|  | goto errout_abort; | 
|  |  | 
|  | rt_match = nla_next(rt_match, &list_len); | 
|  | } | 
|  |  | 
|  | /* Check if the number of matches provided by userspace actually | 
|  | * complies with the array of matches. The number was used for | 
|  | * the validation of references and a mismatch could lead to | 
|  | * undefined references during the matching process. | 
|  | */ | 
|  | if (idx != tree_hdr->nmatches) { | 
|  | err = -EINVAL; | 
|  | goto errout_abort; | 
|  | } | 
|  |  | 
|  | err = 0; | 
|  | errout: | 
|  | return err; | 
|  |  | 
|  | errout_abort: | 
|  | tcf_em_tree_destroy(tp, tree); | 
|  | return err; | 
|  | } | 
|  | EXPORT_SYMBOL(tcf_em_tree_validate); | 
|  |  | 
|  | /** | 
|  | * tcf_em_tree_destroy - destroy an ematch tree | 
|  | * | 
|  | * @tp: classifier kind handle | 
|  | * @tree: ematch tree to be deleted | 
|  | * | 
|  | * This functions destroys an ematch tree previously created by | 
|  | * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that | 
|  | * the ematch tree is not in use before calling this function. | 
|  | */ | 
|  | void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | if (tree->matches == NULL) | 
|  | return; | 
|  |  | 
|  | for (i = 0; i < tree->hdr.nmatches; i++) { | 
|  | struct tcf_ematch *em = tcf_em_get_match(tree, i); | 
|  |  | 
|  | if (em->ops) { | 
|  | if (em->ops->destroy) | 
|  | em->ops->destroy(tp, em); | 
|  | else if (!tcf_em_is_simple(em)) | 
|  | kfree((void *) em->data); | 
|  | module_put(em->ops->owner); | 
|  | } | 
|  | } | 
|  |  | 
|  | tree->hdr.nmatches = 0; | 
|  | kfree(tree->matches); | 
|  | tree->matches = NULL; | 
|  | } | 
|  | EXPORT_SYMBOL(tcf_em_tree_destroy); | 
|  |  | 
|  | /** | 
|  | * tcf_em_tree_dump - dump ematch tree into a rtnl message | 
|  | * | 
|  | * @skb: skb holding the rtnl message | 
|  | * @t: ematch tree to be dumped | 
|  | * @tlv: TLV type to be used to encapsulate the tree | 
|  | * | 
|  | * This function dumps a ematch tree into a rtnl message. It is valid to | 
|  | * call this function while the ematch tree is in use. | 
|  | * | 
|  | * Returns -1 if the skb tailroom is insufficient. | 
|  | */ | 
|  | int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) | 
|  | { | 
|  | int i; | 
|  | u8 *tail; | 
|  | struct nlattr *top_start; | 
|  | struct nlattr *list_start; | 
|  |  | 
|  | top_start = nla_nest_start(skb, tlv); | 
|  | if (top_start == NULL) | 
|  | goto nla_put_failure; | 
|  |  | 
|  | NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr); | 
|  |  | 
|  | list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST); | 
|  | if (list_start == NULL) | 
|  | goto nla_put_failure; | 
|  |  | 
|  | tail = skb_tail_pointer(skb); | 
|  | for (i = 0; i < tree->hdr.nmatches; i++) { | 
|  | struct nlattr *match_start = (struct nlattr *)tail; | 
|  | struct tcf_ematch *em = tcf_em_get_match(tree, i); | 
|  | struct tcf_ematch_hdr em_hdr = { | 
|  | .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, | 
|  | .matchid = em->matchid, | 
|  | .flags = em->flags | 
|  | }; | 
|  |  | 
|  | NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr); | 
|  |  | 
|  | if (em->ops && em->ops->dump) { | 
|  | if (em->ops->dump(skb, em) < 0) | 
|  | goto nla_put_failure; | 
|  | } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) { | 
|  | u32 u = em->data; | 
|  | nla_put_nohdr(skb, sizeof(u), &u); | 
|  | } else if (em->datalen > 0) | 
|  | nla_put_nohdr(skb, em->datalen, (void *) em->data); | 
|  |  | 
|  | tail = skb_tail_pointer(skb); | 
|  | match_start->nla_len = tail - (u8 *)match_start; | 
|  | } | 
|  |  | 
|  | nla_nest_end(skb, list_start); | 
|  | nla_nest_end(skb, top_start); | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | nla_put_failure: | 
|  | return -1; | 
|  | } | 
|  | EXPORT_SYMBOL(tcf_em_tree_dump); | 
|  |  | 
|  | static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, | 
|  | struct tcf_pkt_info *info) | 
|  | { | 
|  | int r = em->ops->match(skb, em, info); | 
|  |  | 
|  | return tcf_em_is_inverted(em) ? !r : r; | 
|  | } | 
|  |  | 
|  | /* Do not use this function directly, use tcf_em_tree_match instead */ | 
|  | int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree, | 
|  | struct tcf_pkt_info *info) | 
|  | { | 
|  | int stackp = 0, match_idx = 0, res = 0; | 
|  | struct tcf_ematch *cur_match; | 
|  | int stack[CONFIG_NET_EMATCH_STACK]; | 
|  |  | 
|  | proceed: | 
|  | while (match_idx < tree->hdr.nmatches) { | 
|  | cur_match = tcf_em_get_match(tree, match_idx); | 
|  |  | 
|  | if (tcf_em_is_container(cur_match)) { | 
|  | if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK)) | 
|  | goto stack_overflow; | 
|  |  | 
|  | stack[stackp++] = match_idx; | 
|  | match_idx = cur_match->data; | 
|  | goto proceed; | 
|  | } | 
|  |  | 
|  | res = tcf_em_match(skb, cur_match, info); | 
|  |  | 
|  | if (tcf_em_early_end(cur_match, res)) | 
|  | break; | 
|  |  | 
|  | match_idx++; | 
|  | } | 
|  |  | 
|  | pop_stack: | 
|  | if (stackp > 0) { | 
|  | match_idx = stack[--stackp]; | 
|  | cur_match = tcf_em_get_match(tree, match_idx); | 
|  |  | 
|  | if (tcf_em_early_end(cur_match, res)) | 
|  | goto pop_stack; | 
|  | else { | 
|  | match_idx++; | 
|  | goto proceed; | 
|  | } | 
|  | } | 
|  |  | 
|  | return res; | 
|  |  | 
|  | stack_overflow: | 
|  | if (net_ratelimit()) | 
|  | pr_warning("tc ematch: local stack overflow," | 
|  | " increase NET_EMATCH_STACK\n"); | 
|  | return -1; | 
|  | } | 
|  | EXPORT_SYMBOL(__tcf_em_tree_match); |