blob: 1f9c014c8903bab39e819b94da57380df145e444 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/*
2 * Fastpath Learner
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU FP_ERR( Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#define pr_fmt(fmt) "mfp" " learner:%s:%d: " fmt, __func__, __LINE__
10
11#include <br_private.h>
12#include <net/addrconf.h>
13#include <linux/inetdevice.h>
14#include "fp_common.h"
15#include "fp_database.h"
16#include "fp_device.h"
17#include "fp_core.h"
18#include "fp_netlink.h"
19
20#define RTMGRP_IPV4_ROUTE 0x40
21#define RTMGRP_IPV4_RULE 0x80
22#define RTMGRP_IPV6_ROUTE 0x400
23#define RTNETLINK_GRP (RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE | RTMGRP_IPV6_ROUTE)
24
25#define NFLGRP2MASK(group) ((((group) > NFNLGRP_NONE) && \
26 ((group) < __NFNLGRP_MAX)) ? \
27 (0x1UL << ((group) - 1)) : 0)
28
29#define NFNETLINK_GRP \
30 NFLGRP2MASK(NFNLGRP_CONNTRACK_NEW) | \
31 NFLGRP2MASK(NFNLGRP_CONNTRACK_UPDATE) | \
32 NFLGRP2MASK(NFNLGRP_CONNTRACK_DESTROY) | \
33 NFLGRP2MASK(NFNLGRP_CONNTRACK_EXP_NEW) | \
34 NFLGRP2MASK(NFNLGRP_CONNTRACK_EXP_UPDATE) | \
35 NFLGRP2MASK(NFNLGRP_CONNTRACK_EXP_DESTROY)
36
37/* ipv6 special flags always rejected (RTF values > 64K) */
38#define RT6_REJECT_MASK ~(RTF_UP | RTF_GATEWAY | RTF_HOST | \
39 RTF_REINSTATE | RTF_DYNAMIC | RTF_MODIFIED | \
40 RTF_DEFAULT | RTF_ADDRCONF | RTF_CACHE)
41
42#define DEFAULT_LOOKUPS_DELAY_MS (5)
43#define DEFAULT_LOOKUPS_RETRIES (10)
44
45#define NETIF_INVALID(x) (!(x) || !netif_device_present(x) || \
46 !netif_running(x) || !netif_carrier_ok(x))
47
48static inline struct net_device *
49get_netdev_from_br(struct net_device *br, struct nf_conntrack_tuple *tuple);
50
51static bool fp_learner_wq = FP_LEARNER_WQ_DEFAULT;
52
53struct policy_entry {
54 struct list_head list;
55 unsigned int port;
56};
57
58struct fp_learner {
59 spinlock_t lock;
60 struct list_head work_items_list;
61 struct list_head policy_list;
62 struct workqueue_struct *wq;
63 struct work_struct update_work;
64 struct socket *rt_nl_sock;
65 struct socket *nf_nl_sock;
66 struct notifier_block netdev_notifier;
67 struct notifier_block netevent_notifier;
68 struct notifier_block inet6addr_notifier;
69
70 unsigned int lookups_retries;
71 unsigned int lookups_delay;
72 unsigned int fp_rmmoding;
73};
74
75struct learner_work {
76 struct list_head list;
77 struct fp_learner *priv;
78 struct delayed_work work;
79 /* add new connection data*/
80 struct nf_conn *ct;
81};
82
83struct nf_conn *
84__get_conntrack_from_nlmsg(struct sk_buff *skb, struct nlmsghdr *nlh);
85struct nf_conntrack_expect *
86__get_expect_from_nlmsg(struct sk_buff *skb, struct nlmsghdr *nlh);
87
88void learner_nc_dump_conntrack_tuple(char *msg, struct nf_conn *ct)
89{
90 struct nf_conntrack_tuple *orig_tuple =
91 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
92
93 struct nf_conntrack_tuple *reply_tuple =
94 &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
95
96 char buf[MAX_DEBUG_PRINT_SIZE];
97 int len = 0;
98
99 if (msg)
100 len = sprintf(buf, "%s", msg);
101
102 len += sprintf(buf + len, "tuple orig:\n");
103 len += fp_dump_tuple(buf + len, orig_tuple);
104 len += sprintf(buf + len, "\ntuple reply:\n");
105 len += fp_dump_tuple(buf + len, reply_tuple);
106
107 pr_err("%s\n", buf);
108}
109
110static inline bool policy_check_port(u_int8_t protocol, __be16 port)
111{
112 if (protocol == IPPROTO_UDP) {
113 switch (ntohs(port)) {
114 case 53: /* DNS */
115 case 67: /* bootps */
116 case 68: /* bootpc */
117 case 69: /* Trivial File Transfer Protocol (TFTP) */
118 case 135: /* DHCP server, DNS server and WINS. Also used by DCOM */
119 case 137: /* NetBIOS NetBIOS Name Service */
120 case 138: /* NetBIOS NetBIOS Datagram Service */
121 case 139: /* NetBIOS NetBIOS Session Service */
122 case 161: /* SNMP */
123 case 162: /* SNMPTRAP */
124 case 199: /* SMUX, SNMP Unix Multiplexer */
125 case 517: /* Talk */
126 case 518: /* NTalk */
127 case 546: /* DHCPv6 client*/
128 case 547: /* DHCPv6 server*/
129 case 953: /* Domain Name System (DNS) RNDC Service */
130 case 1719: /* H.323 Registration and alternate communication */
131 case 1723: /* Microsoft Point-to-Point Tunneling Protocol (PPTP) */
132 case 5060: /* Session Initiation Protocol (SIP) */
133 case 5353: /* Multicast DNS (mDNS) */
134 case 6566: /* SANE (Scanner Access Now Easy) */
135 case 20480: /* emwavemsg (emWave Message Service) */
136 return false;
137 }
138 } else { /* TCP */
139 switch (ntohs(port)) {
140 case 21: /* FTP control (command) */
141 case 53: /* DNS */
142 case 135: /* DHCP server, DNS server and WINS. Also used by DCOM */
143 case 137: /* NetBIOS NetBIOS Name Service */
144 case 138: /* NetBIOS NetBIOS Datagram Service */
145 case 139: /* NetBIOS NetBIOS Session Service */
146 case 162: /* SNMPTRAP */
147 case 199: /* SMUX, SNMP Unix Multiplexer */
148 case 546: /* DHCPv6 client*/
149 case 547: /* DHCPv6 server*/
150 case 953: /* Domain Name System (DNS) RNDC Service */
151 case 1720: /* H.323 Call signalling */
152 case 1723: /* Microsoft Point-to-Point Tunneling Protocol (PPTP) */
153 case 5060: /* Session Initiation Protocol (SIP) */
154 case 6566: /* SANE (Scanner Access Now Easy) */
155 case 6667: /* Internet Relay Chat (IRC) */
156 case 20480: /* emwavemsg (emWave Message Service) */
157 return false;
158 }
159 }
160
161 return true;
162}
163
164
165
166static bool learner_policy_check(struct fp_learner *priv, struct nf_conn *ct)
167{
168 const struct nf_conntrack_l4proto *l4proto;
169 struct policy_entry *itr;
170 struct nf_conntrack_tuple *orig_tuple;
171
172 orig_tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
173
174 l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
175 NF_CT_ASSERT(l4proto);
176
177 if (!l4proto->l4proto)
178 goto fail;
179
180 /* check protocol is UDP/TCP */
181 if (l4proto->l4proto != IPPROTO_UDP &&
182 l4proto->l4proto != IPPROTO_TCP)
183 goto fail;
184
185 if (!policy_check_port(l4proto->l4proto, orig_tuple->dst.u.all))
186 goto fail;
187
188 if (!policy_check_port(l4proto->l4proto, orig_tuple->src.u.all))
189 goto fail;
190
191 /* Check dynamic policy */
192 spin_lock_bh(&priv->lock);
193 list_for_each_entry(itr, &priv->policy_list, list)
194 if (itr && ((itr->port == ntohs(orig_tuple->dst.u.all)) ||
195 (itr->port == ntohs(orig_tuple->src.u.all)))) {
196 spin_unlock_bh(&priv->lock);
197 goto fail;
198 }
199 spin_unlock_bh(&priv->lock);
200
201 return true;
202fail:
203 pr_debug("connection %p failed police check\n", ct);
204 return false;
205}
206
207static inline void flowi_init(struct flowi *fl, int iif,
208 __u8 scope, __u8 proto,
209 __be32 daddr, __be32 saddr,
210 __be16 dport, __be16 sport,
211 __u32 mark)
212{
213 memset(fl, 0, sizeof(*fl));
214
215#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
216 fl->flowi_iif = iif;
217 fl->flowi_scope = scope;
218 fl->flowi_proto = proto;
219 fl->u.ip4.daddr = daddr;
220 fl->u.ip4.saddr = saddr;
221 fl->u.ip4.fl4_dport = dport;
222 fl->u.ip4.fl4_sport = sport;
223#ifdef CONFIG_NF_CONNTRACK_MARK
224 fl->flowi_mark = mark;
225#endif
226#else
227 fl->iif = iif;
228 fl->fl4_scope = scope;
229 fl->proto = proto;
230 fl->fl4_dst = daddr;
231 fl->fl4_src = saddr;
232 fl->fl_ip_dport = dport;
233 fl->fl_ip_sport = sport;
234#endif
235}
236
237static inline bool invert_tuple(struct nf_conntrack_tuple *inverse,
238 struct nf_conntrack_tuple *orig)
239{
240 return nf_ct_invert_tuple(inverse, orig);
241}
242
243static inline bool ipv6_check_special_addr(const struct in6_addr *addr)
244{
245 int addr_type = ipv6_addr_type(addr);
246 /* TODO: check if we need to filter other types - such as Link Local */
247 return ((addr_type & IPV6_ADDR_MULTICAST) ||
248 (addr_type & IPV6_ADDR_LOOPBACK) ||
249 (addr_type & IPV6_ADDR_ANY));
250}
251
252static struct net_device *fp_get_route_ipv6(struct nf_conn *ct,
253 struct nf_conntrack_tuple *tuple,
254 unsigned int *route)
255{
256 struct net_device *dev = NULL;
257 struct flowi6 fl6 = {
258 .flowi6_oif = 0,
259 .daddr = tuple->dst.u3.in6,
260 };
261 int flags = RT6_LOOKUP_F_IFACE;
262 struct fib6_result res = {};
263 int ret = 0;
264
265 if (ipv6_check_special_addr(&tuple->dst.u3.in6) ||
266 ipv6_check_special_addr(&tuple->src.u3.in6)) {
267 pr_debug("Filter special address (saddr=%pI6c, daddr=%pI6c)\n",
268 &tuple->src.u3.in6, &tuple->dst.u3.in6);
269 return NULL;
270 }
271
272 //if (&tuple->src.u3.in6) {
273 memcpy(&fl6.saddr, &tuple->src.u3.in6, sizeof(tuple->src.u3.in6));
274 flags |= RT6_LOOKUP_F_HAS_SADDR;
275 //}
276
277#ifdef CONFIG_NF_CONNTRACK_MARK
278 fl6.flowi6_mark = ct->mark;
279#endif
280
281 ret = ip6_route_lookup_fastpath(nf_ct_net(ct), &fl6, &res, flags);
282 if (ret){
283 pr_debug("rt6_lookup failed\n");
284 goto out;
285 }
286
287 /* check if route is usable*/
288 if (res.fib6_flags & RTF_UP) {
289 if (res.fib6_flags & RT6_REJECT_MASK) {
290 pr_debug("route rejected (rt6i_flags = 0x%08x)\n", res.fib6_flags);
291 goto out;
292 }
293 /* accepted in fastpath */
294 dev = res.nh->fib_nh_dev;
295 *route = res.fib6_flags;
296 }
297
298out:
299 return dev;
300}
301
302static inline bool ipv4_check_special_addr(const __be32 addr)
303{
304 /* Filter multicast, broadcast, loopback and zero net*/
305 return (ipv4_is_loopback(addr) || ipv4_is_multicast(addr) ||
306 ipv4_is_lbcast(addr) || ipv4_is_zeronet(addr));
307}
308
309static inline struct net_device *fp_get_dev_by_ipaddr(struct nf_conntrack_tuple *tuple)
310{
311 struct net *net;
312 struct net_device *dev;
313 struct in_device *in_dev;
314 struct in_ifaddr *ifa;
315
316 for_each_net(net) {
317 for_each_netdev(net, dev) {
318 in_dev = __in_dev_get_rcu(dev);
319 if (!in_dev)
320 continue;
321
322 in_dev_for_each_ifa_rcu(ifa, in_dev) {
323 if (tuple->src.u3.ip == ifa->ifa_local)
324 return dev;
325 }
326 }
327 }
328
329 return NULL;
330}
331
332static struct net_device *fp_get_route_ipv4(struct nf_conn *ct,
333 struct nf_conntrack_tuple *tuple,
334 unsigned int *route)
335{
336 struct fib_result res;
337 struct flowi flp;
338 struct net_device *dev = NULL;
339
340 if (ipv4_check_special_addr(tuple->dst.u3.ip) ||
341 ipv4_check_special_addr(tuple->src.u3.ip)) {
342 pr_debug("Filter special address (saddr=%pI4, daddr=%pI4)\n",
343 &tuple->src.u3.ip, &tuple->dst.u3.ip);
344 return NULL;
345 }
346
347#ifdef CONFIG_NF_CONNTRACK_MARK
348 flowi_init(&flp, 0, 0, tuple->dst.protonum, tuple->dst.u3.ip,
349 tuple->src.u3.ip, tuple->src.u.all, tuple->dst.u.all, ct->mark);
350#else
351 flowi_init(&flp, 0, 0, tuple->dst.protonum, tuple->dst.u3.ip,
352 tuple->src.u3.ip, tuple->src.u.all, tuple->dst.u.all, 0);
353#endif
354
355 rcu_read_lock_bh();
356 if (rt4_lookup(nf_ct_net(ct), &flp, &res) < 0) {
357 pr_debug("Getting route failed\n");
358 rcu_read_unlock_bh();
359 return NULL;
360 }
361
362 if (res.type == RTN_BROADCAST) {
363 pr_err("Route = RTN_BROADCAST\n");
364 goto out;
365 }
366
367 if (res.type == RTN_MULTICAST) {
368 pr_err("Route = RTN_MULTICAST\n");
369 goto out;
370 }
371
372 if (res.type == RTN_LOCAL) {
373 pr_debug("Route = RTN_LOCAL\n");
374 goto out;
375 }
376
377 *route = res.type;
378 dev = res.fi->fib_nh->fib_nh_dev;
379
380 if (NF_CT_NAT(ct))
381 dev = fp_get_dev_by_ipaddr(tuple) ? fp_get_dev_by_ipaddr(tuple) : dev;
382out:
383 ip4_rt_put(&res);
384 rcu_read_unlock_bh();
385 return dev;
386}
387
388static struct fp_net_device *fp_get_route(struct nf_conn *ct,
389 struct nf_conntrack_tuple *tuple,
390 u32 *route, int retries, int delay)
391{
392 struct fp_net_device *fdev;
393 struct net_device *dev, *br = NULL;
394
395 dev = (tuple->src.l3num == AF_INET6) ?
396 fp_get_route_ipv6(ct, tuple, route) :
397 fp_get_route_ipv4(ct, tuple, route);
398 if (!dev)
399 return NULL;
400
401 if (dev->priv_flags & IFF_EBRIDGE) {
402 br = dev;
403 do {
404 dev = get_netdev_from_br(br, tuple);
405 if (dev)
406 break;
407 if (delay)
408 msleep(delay);
409 } while (retries--);
410
411 if (!dev) {
412 pr_debug("Unable to get net device from bridge IP\n");
413 return NULL;
414 }
415 }
416
417 if (dev->reg_state != NETREG_REGISTERED) {
418 pr_debug("device %s not registred (reg_state=%d)\n", dev->name,
419 dev->reg_state);
420 return NULL;
421 }
422
423 if (unlikely(NETIF_INVALID(dev)) || !(dev->flags & IFF_UP)) {
424 pr_debug("dev (%s) state invalid (state: %lu) or is not up (flags: 0x%x)\n", dev->name, dev->state, dev->flags);
425 return NULL;
426 }
427
428 fdev = fpdev_get_if(dev);
429 if (!fdev) {
430 pr_err("no fastpath device for %s\n", dev->name);
431 return NULL;
432 }
433
434 fdev->br = br;
435 return fdev;
436}
437
438static inline int ipv4_gw_addr(struct nf_conn *ct, struct net_device *dev,
439 __be32 saddr, __be32 daddr, __be32 *gw)
440{
441 struct fib_result res;
442 int ret = 0;
443 struct flowi flp;
444
445#ifdef CONFIG_NF_CONNTRACK_MARK
446 flowi_init(&flp, dev->ifindex, RT_SCOPE_UNIVERSE, 0, daddr, saddr, 0, 0, ct->mark);
447#else
448 flowi_init(&flp, dev->ifindex, RT_SCOPE_UNIVERSE, 0, daddr, saddr, 0, 0, 0);
449#endif
450 rcu_read_lock_bh();
451 ret = rt4_lookup(dev_net(dev), &flp, &res);
452 if (ret != 0) {
453 pr_err("rt4_lookup failed, ret = %d\n", ret);
454 rcu_read_unlock_bh();
455 return ret;
456 }
457
458 if (res.type == RTN_BROADCAST || res.type == RTN_MULTICAST ||
459 res.type == RTN_LOCAL) {
460 pr_debug("gw not found - res.type = %d\n", res.type);
461 ret = -EFAULT;
462 } else {
463 *gw = res.fi->fib_nh->fib_nh_gw4;
464 pr_debug("gw found (%pI4)\n", gw);
465 }
466
467 ip4_rt_put(&res);
468 rcu_read_unlock_bh();
469 return ret;
470}
471
472static inline int ipv6_gw_addr(struct nf_conn *ct, struct net_device *dev, struct in6_addr *saddr,
473 struct in6_addr *daddr, struct in6_addr *gw)
474{
475 int ret = 0;
476 struct flowi6 fl6 = {
477 .flowi6_oif = 0,
478 .daddr = *daddr,
479 };
480 int flags = RT6_LOOKUP_F_IFACE;
481 struct fib6_result res = {};
482
483 if (saddr) {
484 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
485 flags |= RT6_LOOKUP_F_HAS_SADDR;
486 }
487
488#ifdef CONFIG_NF_CONNTRACK_MARK
489 fl6.flowi6_mark = ct->mark;
490#endif
491
492 ret = ip6_route_lookup_fastpath(dev_net(dev), &fl6, &res, flags);
493 if (ret) {
494 pr_err("rt6_lookup failed\n");
495 ret = -ENETUNREACH;
496 goto out;
497 }
498
499 /* check if route is usable*/
500 if (res.fib6_flags & RTF_UP) {
501 if (res.nh->fib_nh_gw_family)
502 *gw = res.nh->fib_nh_gw6;
503 } else {
504 pr_debug("gw found but route is not up\n");
505 ret = -EFAULT;
506 }
507
508out:
509 return ret;
510}
511
512/* copied from br_fdb.c */
513static inline
514struct net_bridge_fdb_entry *fp_br_fdb_find(struct hlist_head *head,
515 const unsigned char *addr)
516{
517 struct net_bridge_fdb_entry *fdb;
518
519 hlist_for_each_entry_rcu(fdb, head, fdb_node) {
520 if (ether_addr_equal(fdb->key.addr.addr, addr))
521 return fdb;
522 }
523
524 return NULL;
525}
526
527static inline
528struct net_device *fp_br_get_netdev_by_mac(struct net_bridge *br,
529 const unsigned char *mac)
530{
531 unsigned int i;
532 struct net_bridge_fdb_entry *fdb;
533
534 BUG_ON(!br);
535
536 rcu_read_lock_bh();
537 for (i = 0; i < BR_HASH_SIZE; i++) {
538 fdb = fp_br_fdb_find(&br->fdb_list, mac);
539 if (fdb) {
540 pr_debug("br: %s fdb[%u]: %pIM , port:%s\n",
541 br->dev->name, i, fdb->key.addr.addr,
542 fdb->dst->dev->name);
543 rcu_read_unlock_bh();
544 return fdb->dst->dev;
545 }
546 }
547 rcu_read_unlock_bh();
548 pr_debug("no match found in fdb (%pM)\n", mac);
549
550 return NULL;
551}
552
553static inline
554struct net_device *get_netdev_from_br(struct net_device *br,
555 struct nf_conntrack_tuple *tuple)
556{
557 struct neighbour *neigh;
558 struct neigh_table *tbl;
559 struct net_device *dev = NULL;
560
561 BUG_ON(!tuple);
562
563 tbl = (tuple->src.l3num == AF_INET6) ? &nd_tbl : &arp_tbl;
564
565 neigh = neigh_lookup(tbl, tuple->dst.u3.all, br);
566 if (neigh) {
567 dev = fp_br_get_netdev_by_mac(netdev_priv(br), neigh->ha);
568 neigh_release(neigh);
569 }
570
571 return dev;
572}
573
574static int fp_hh_init(struct nf_conn *ct, struct nf_conntrack_tuple *t,
575 struct fp_net_device *dst, struct hh_cache *hh)
576{
577 struct net_device *dev = dst->br ? dst->br : dst->dev;
578 __be16 prot;
579 struct neighbour *n;
580 const struct header_ops *header_ops;
581
582 if (is_vlan_dev(dev))
583 header_ops = vlan_dev_real_dev(dev)->header_ops;
584 else
585 header_ops = dev->header_ops;
586
587 memset(hh, 0, sizeof(*hh));
588
589 if (!header_ops) {
590 pr_debug("device %s has no header ops\n", dev->name);
591 return 0; /* device does not have L2 header*/
592 }
593
594 if (!header_ops->cache || !header_ops->cache_update) {
595 pr_debug("device %s has no header cache ops\n", dev->name);
596 return -ENOTSUPP;
597 }
598
599 if (t->src.l3num == AF_INET) {
600 __be32 gw;
601 prot = htons(ETH_P_IP);
602
603 n = __ipv4_neigh_lookup(dev, t->dst.u3.ip);
604 if (!n) {
605 if (ipv4_gw_addr(ct, dev, t->src.u3.ip, t->dst.u3.ip, &gw))
606 goto not_found;
607 n = __ipv4_neigh_lookup(dev, gw);
608 if (!n)
609 goto not_found;
610 }
611 } else if (t->src.l3num == AF_INET6) {
612 struct in6_addr gw6;
613 prot = htons(ETH_P_IPV6);
614
615 n = __ipv6_neigh_lookup(dev, &t->dst.u3.in6);
616 if (!n) {
617 if (ipv6_gw_addr(ct, dev, &t->src.u3.in6, &t->dst.u3.in6, &gw6))
618 goto not_found;
619 n = __ipv6_neigh_lookup(dev, &t->dst.u3.in6);
620 if (!n)
621 goto not_found;
622 }
623 } else {
624 BUG();
625 }
626
627 if (n->nud_state & NUD_VALID) {
628 int err = header_ops->cache(n, hh, prot);
629 neigh_release(n);
630 pr_debug("device %s hh_cache initialized: hh_len=%d, hh_data=%pM\n",
631 dev->name, hh->hh_len, hh->hh_data);
632 return err;
633 }
634
635 pr_debug("neighbour state invalid (%02x)\n", n->nud_state);
636 neigh_release(n);
637not_found:
638 /* we get here in 2 cases, both are NOT considered as error:
639 * 1. Neighbour lookup failed - we will be notified when the neighbour
640 * will be finally created
641 * 2. Neighbour state not valid - we will be notified when the neighbour
642 * state changes
643 * Both are handled by netdev_event - where the entry's hh_cache will be
644 * updated. Untill this happens, all packets matching this entry will be
645 * classified as slow by the fp_classifier.
646 */
647 pr_debug("No neighbour found or neighbour state invalid\n");
648 return 0;
649}
650
651static struct fpdb_entry *connection_to_entry(struct fp_learner *priv,
652 struct nf_conn *ct,
653 enum ip_conntrack_dir dir,
654 gfp_t flags)
655{
656 struct fp_net_device *dst = NULL, *src = NULL;
657 struct nf_conntrack_tuple *orig_tuple, *reply_tuple;
658 struct nf_conntrack_tuple orig_tuple_inverse, reply_tuple_inverse;
659 struct fpdb_entry *entry;
660 struct hh_cache hh;
661 unsigned int in_route_type, out_route_type;
662 int retries = flags != GFP_ATOMIC ? priv->lookups_retries : 0;
663 int delay = flags != GFP_ATOMIC ? priv->lookups_delay : 0;
664
665 if (unlikely(priv->fp_rmmoding))
666 goto failed;
667
668 /* For reply connections -> switch tuples */
669 if (dir == IP_CT_DIR_REPLY) {
670 orig_tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
671 reply_tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
672 } else {
673 orig_tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
674 reply_tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
675 }
676
677 if (NF_CT_NAT(ct)) {
678 if (!invert_tuple(&orig_tuple_inverse, orig_tuple)) {
679 pr_err("Inverting tuple failed\n");
680 goto failed;
681 }
682
683 if (!invert_tuple(&reply_tuple_inverse, reply_tuple)) {
684 pr_err("Inverting tuple failed\n");
685 goto failed;
686 }
687
688 orig_tuple = &reply_tuple_inverse;
689 reply_tuple = &orig_tuple_inverse;
690 pr_debug( "NAT connection was detected\n");
691 }
692
693 /* Check destination route */
694 dst = fp_get_route(ct, orig_tuple, &in_route_type, retries, delay);
695 if (!dst) {
696 pr_debug("Connection routing failed\n");
697 goto failed;
698 }
699
700 /* Check source route */
701 src = fp_get_route(ct, reply_tuple, &out_route_type, retries, delay);
702 if (!src) {
703 pr_debug("Connection routing failed (local)\n");
704 goto failed;
705 }
706
707 if (fp_hh_init(ct, orig_tuple, dst, &hh)) {
708 pr_debug("fp_hh_init failed \n");
709 goto failed;
710 }
711
712 entry = fpdb_alloc(flags);
713 if (!entry) {
714 pr_debug("Allocating entry failed\n");
715 goto failed;
716 }
717
718 /* Restore the original tuples */
719 if (dir == IP_CT_DIR_REPLY) {
720 orig_tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
721 reply_tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
722 } else {
723 orig_tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
724 reply_tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
725 }
726
727 /*
728 * if interface is going down, and we are updating an entry refering
729 * to this interface, we might accidently route the connection to its
730 * source device. Block this entry until it is updated again.
731 */
732 if (src->dev == dst->dev) {
733 pr_debug("Connection created with src == dst for (%s)\n",
734 src->dev->name);
735 entry->block = 1;
736 }
737
738 /* Fill in entry */
739 entry->dir = dir;
740 entry->in_tuple = *orig_tuple;
741 entry->out_tuple = *reply_tuple;
742 entry->ct = ct;
743 entry->out_dev = dst;
744 entry->in_dev = src;
745 entry->hit_counter = 0;
746 entry->debug.in_route_type = in_route_type;
747 entry->debug.out_route_type = out_route_type;
748 entry->hh = hh;
749
750 /* Succced */
751 pr_debug("connection added (ct=%p, dir=%d)\n", ct, dir);
752 FP_DEBUG_DUMP_CONTRACK(NULL, ct);
753 return entry;
754
755failed:
756 /* Failed */
757 fpdev_put(src);
758 fpdev_put(dst);
759 pr_debug("connection refused (ct=%p, dir=%d)\n", ct, dir);
760 FP_DEBUG_DUMP_CONTRACK(NULL, ct);
761 return NULL;
762}
763
764
765static inline int __add_new_connection(struct fp_learner *priv,
766 struct nf_conn *ct, gfp_t flags)
767{
768 struct nf_conn_fastpath *fastpath = nfct_fastpath(ct);
769 struct fpdb_entry *e;
770
771 rcu_read_lock_bh();
772 /* original fastpath connection */
773 if (!fastpath) {
774 e = connection_to_entry(priv, ct, IP_CT_DIR_ORIGINAL, flags);
775 if (!e) {
776 rcu_read_unlock_bh();
777 return -EINVAL;
778 }
779
780 set_bit(IPS_FASTPATH_BIT, &ct->status);
781 fastpath = nf_ct_ext_add(ct, NF_CT_EXT_FASTPATH, flags);
782 BUG_ON(!fastpath);
783
784 fastpath->fpd_el[IP_CT_DIR_ORIGINAL] = e;
785 fastpath->fpd_el[IP_CT_DIR_REPLY] = NULL;
786 fpdb_add(e);
787 goto del_entry;
788 }
789
790 /* reply fastpath connection */
791 BUG_ON(!test_bit(IPS_FASTPATH_BIT, &ct->status));
792 if (fastpath->fpd_el[IP_CT_DIR_REPLY] == NULL &&
793 test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
794 e = connection_to_entry(priv, ct, IP_CT_DIR_REPLY, flags);
795 if (!e) {
796 rcu_read_unlock_bh();
797 return -EINVAL;
798 }
799
800 fastpath->fpd_el[IP_CT_DIR_REPLY] = e;
801 fpdb_add(e);
802 goto del_entry;
803 }
804 rcu_read_unlock_bh();
805 return 0;
806
807del_entry:
808 if (unlikely((NETIF_INVALID(e->in_dev->dev)) ||
809 !(e->in_dev->dev->flags & IFF_UP) || priv->fp_rmmoding)) {
810 pr_err("in_dev (%s) state invalid or is rmmoding, del entry!\n", e->in_dev->dev->name);
811 fpdb_del_by_dev(e->in_dev->dev);
812 }
813
814 if (unlikely((NETIF_INVALID(e->out_dev->dev)) ||
815 !(e->out_dev->dev->flags & IFF_UP) || priv->fp_rmmoding)) {
816 pr_err("out_dev (%s) state invalid or is rmmoding, del entry!\n", e->out_dev->dev->name);
817 fpdb_del_by_dev(e->out_dev->dev);
818 }
819 rcu_read_unlock_bh();
820 return 0;
821}
822
823static void new_connection_work(struct work_struct *w)
824{
825 struct learner_work *work;
826
827 work = container_of(w, struct learner_work, work.work);
828 BUG_ON(!work);
829
830 __add_new_connection(work->priv, work->ct, GFP_KERNEL);
831
832 /* release work */
833 spin_lock_bh(&work->priv->lock);
834 list_del(&work->list);
835 spin_unlock_bh(&work->priv->lock);
836 kfree(work);
837}
838
839static inline int add_new_connection_work(struct fp_learner *priv,
840 struct nf_conn *ct)
841{
842 struct learner_work *work;
843
844 if (!learner_policy_check(priv, ct))
845 return -EINVAL;
846
847 work = kzalloc(sizeof(*work), GFP_ATOMIC);
848 if (!work)
849 return -ENOMEM;
850
851 work->ct = ct;
852 work->priv = priv;
853 INIT_LIST_HEAD(&work->list);
854 INIT_DELAYED_WORK(&work->work, new_connection_work);
855
856 spin_lock_bh(&priv->lock);
857 list_add_tail(&work->list, &priv->work_items_list);
858 spin_unlock_bh(&priv->lock);
859
860 queue_delayed_work(priv->wq, &work->work, 0);
861
862 return 0;
863}
864
865
866static inline int add_new_connection_noblock(struct fp_learner *priv,
867 struct nf_conn *ct)
868{
869 if (!learner_policy_check(priv, ct))
870 return -EINVAL;
871 return __add_new_connection(priv, ct, GFP_ATOMIC);
872}
873
874static inline int add_new_connection(struct fp_learner *priv,
875 struct nf_conn *ct)
876{
877 if (fp_learner_wq)
878 return add_new_connection_work(priv, ct);
879 else
880 return add_new_connection_noblock(priv, ct);
881}
882
883/* check if this connection is waiting in our workqueue
884 * and cancle it if it is.
885 */
886static inline int
887new_connection_cancle(struct fp_learner *priv, struct nf_conn *ct)
888{
889 struct learner_work *work;
890
891 if (!fp_learner_wq)
892 return 0;
893
894 spin_lock_bh(&priv->lock);
895 list_for_each_entry(work, &priv->work_items_list, list) {
896 if (work->ct == ct) {
897 if (cancel_delayed_work(&work->work)) {
898 pr_debug("cancle connection add %p\n", ct);
899 list_del(&work->list);
900 kfree(work);
901 }
902 break;
903 }
904 }
905 spin_unlock_bh(&priv->lock);
906
907 return 0;
908}
909
910static int learner_ct_event(struct fp_learner *priv, struct nf_conn *ct,
911 unsigned int type, unsigned int flags)
912{
913 if (type == IPCTNL_MSG_CT_DELETE) {
914 pr_debug("delete connection (%p)\n", ct);
915 return new_connection_cancle(priv, ct);
916 } else if (type == IPCTNL_MSG_CT_NEW) {
917 pr_debug("new connection (%p)\n", ct);
918 return add_new_connection(priv, ct);
919 }
920
921 pr_debug("Unhandled type=%u\n", type);
922 FP_DEBUG_DUMP_CONTRACK(NULL, ct);
923
924 return -ENOTSUPP;
925}
926
927static int fpdev_del_gb6(struct net_device *dev)
928{
929 struct fp_net_device *fpdev;
930
931 fpdev = fpdev_get_if(dev);
932 if (unlikely(!fpdev))
933 return 0;
934
935 memset(&fpdev->ll6addr, 0, sizeof(struct in6_addr));
936 memset(&fpdev->gb6addr, 0, sizeof(struct in6_addr));
937 fpdev->prefixlen = 0;
938 fpdev->mtu = 0;
939 fpdev_clear_ll6(fpdev);
940 fpdev_clear_gb6(fpdev);
941 fpdev_clear_mtu(fpdev);
942
943 fpdev_put(fpdev);
944
945 return 0;
946}
947
948/**
949 * handle netdevice events.
950 *
951 * NETDEV_REGISTER
952 * new net_device is registered. A fastpath device is created
953 * and associated to it.
954 *
955 * NETDEV_UNREGISTER
956 * net_device unregistered, delete the associated fastpath device. In
957 * addition, remove all conntracks related to this device - this will
958 * cause all the related fastpath database entries to be deleted thus allowing
959 * the device to be safely removed.
960 *
961 * @note We can safely ignore NETDEV_UP / NETDEV_DOWN since it is
962 * checked in the classifier anyway. Regarding other events -
963 * will be added in the future if needed.
964 * @param dev
965 * @param event
966 *
967 * @return NOTIFY_DONE
968 */
969static int
970__learner_netdev_event(struct net_device *dev, unsigned long event)
971{
972 switch (event) {
973 case NETDEV_REGISTER:
974 pr_debug("received netdev (%s) register, event %lu, state: 0x%lx, flags: 0x%x, invalid: %d\n",
975 dev->name, event, dev->state, dev->flags, NETIF_INVALID(dev));
976 fpdev_add_if(dev);
977 break;
978 case NETDEV_UNREGISTER:
979 printk(KERN_DEBUG "received netdev (%s) unregister, event %lu, state: 0x%lx, flags: 0x%x, invalid: %d\n",
980 dev->name, event, dev->state, dev->flags, NETIF_INVALID(dev));
981 fpdb_del_by_dev(dev);
982 fpdb_iterate(&fpdb_del_block_entry_by_dev, (void *)dev);
983 fpdev_del_if(dev);
984 break;
985 case NETDEV_DOWN:
986 fpdev_del_gb6(dev);
987 break;
988 default:
989 pr_debug("ignoring netdev %s event %lu, state: 0x%lx, flags: 0x%x, invalid: %d\n",
990 dev->name, event, dev->state, dev->flags, NETIF_INVALID(dev));
991 }
992
993 return NOTIFY_DONE;
994}
995
996/* main dispatcher for netdev events - bridge and loopback ignored */
997static int learner_netdev_event(struct notifier_block *nb,
998 unsigned long event, void *ptr)
999{
1000 struct net_device *dev;
1001
1002#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
1003 dev = ptr;
1004#else
1005 dev = ((struct netdev_notifier_info*)ptr)->dev;
1006#endif
1007
1008 if ((dev->priv_flags & IFF_EBRIDGE) || (dev->flags & IFF_LOOPBACK))
1009 return NOTIFY_DONE;
1010
1011 return __learner_netdev_event(dev, event);
1012}
1013
1014static void learner_netdev_cleanup(struct notifier_block *nb)
1015{
1016#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
1017 struct net_device *dev;
1018 struct net *net;
1019
1020 rtnl_lock();
1021 for_each_net(net) {
1022 for_each_netdev(net, dev) {
1023 if (dev->flags & IFF_UP) {
1024 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1025 nb->notifier_call(nb, NETDEV_DOWN, dev);
1026 }
1027 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1028 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1029 }
1030 }
1031 rtnl_unlock();
1032#endif
1033}
1034
1035static int fp_inet6addr_event(struct notifier_block *nb,
1036 unsigned long event, void *ptr)
1037{
1038 struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
1039 struct net_device *dev = ifa->idev->dev;
1040 struct fp_net_device *fpdev;
1041 int addr_type;
1042
1043 if (event != NETDEV_UP)
1044 return NOTIFY_DONE;
1045
1046 addr_type = ipv6_addr_type(&ifa->addr);
1047 if (!(addr_type & IPV6_ADDR_LINKLOCAL))
1048 return NOTIFY_DONE;
1049
1050 fpdev = fpdev_get_if(dev);
1051 if (unlikely(!fpdev))
1052 return NOTIFY_DONE;
1053
1054 if (!fpdev_is_ll6_set(fpdev)) {
1055 memcpy(&fpdev->ll6addr, &ifa->addr, sizeof(ifa->addr));
1056 fpdev_set_ll6(fpdev);
1057 }
1058
1059 fpdev_put(fpdev);
1060
1061 return NOTIFY_DONE;
1062}
1063
1064static int update_entry(struct fpdb_entry *e, void *data)
1065{
1066 struct fpdb_entry *ne = NULL;
1067 struct fp_learner *fpl = (struct fp_learner *)data;
1068 struct nf_conn_fastpath *fastpath;
1069
1070 spin_lock_bh(&e->lock);
1071 fpdb_lock_bh();
1072
1073 /* Exit if CT destroied, in case fpdb get a wrong ct info */
1074 if (e->state == ENTRY_DYING)
1075 goto done;
1076
1077 fastpath = nfct_fastpath(e->ct);
1078 if (unlikely(!fastpath))
1079 goto done;
1080
1081 ne = connection_to_entry(fpl, e->ct, e->dir, GFP_ATOMIC);
1082 if (!ne) {
1083 /* The connection may become local but we do not want
1084 to remove it from STACK so just block it */
1085 e->block = 1;
1086 } else {
1087 if (ne->out_dev == e->out_dev &&
1088 ne->in_dev == e->in_dev &&
1089 nf_ct_tuple_equal(&ne->in_tuple, &e->in_tuple) &&
1090 nf_ct_tuple_equal(&ne->out_tuple, &e->out_tuple) &&
1091 !memcmp(&ne->hh, &e->hh, sizeof(struct hh_cache))) {
1092 pr_debug("new fp entry equal old,no update\n");
1093 fpdb_free(ne);
1094 goto done;
1095 }
1096
1097 /* if the old connection is blocked keep it blocked */
1098 /* if ne->block is 1 and e->block is 0, there will be issue --yhuang 20160617*/
1099 if (ne->block != 1)
1100 ne->block = e->block;
1101
1102 if (ne->dir == IP_CT_DIR_REPLY)
1103 fastpath->fpd_el[IP_CT_DIR_REPLY] = ne;
1104 else
1105 fastpath->fpd_el[IP_CT_DIR_ORIGINAL] = ne;
1106
1107 fpdb_replace(e, ne);
1108 }
1109
1110done:
1111 fpdb_unlock_bh();
1112 spin_unlock_bh(&e->lock);
1113
1114 if (ne) {
1115 if (unlikely((NETIF_INVALID(ne->in_dev->dev)) ||
1116 !(ne->in_dev->dev->flags & IFF_UP) || fpl->fp_rmmoding)) {
1117 pr_err("in_dev (%s) state invalid or rmmoding, del!\n",
1118 ne->in_dev->dev->name);
1119 fpdb_del_by_dev(ne->in_dev->dev);
1120 }
1121
1122 if (unlikely((NETIF_INVALID(ne->out_dev->dev)) ||
1123 !(ne->out_dev->dev->flags & IFF_UP) || fpl->fp_rmmoding)) {
1124 pr_err("out_dev (%s) state invalid or rmmoding, del!\n",
1125 ne->out_dev->dev->name);
1126 fpdb_del_by_dev(ne->out_dev->dev);
1127 }
1128 }
1129 return 0;
1130}
1131
1132static int block_entry(struct fpdb_entry *e, void *ptr)
1133{
1134 spin_lock_bh(&e->lock);
1135 e->block = 1;
1136 spin_unlock_bh(&e->lock);
1137
1138 return 0;
1139}
1140
1141static void learner_ct_update_work(struct work_struct *work)
1142{
1143 struct fp_learner *fpl = container_of(work,
1144 struct fp_learner, update_work);
1145
1146 fpdb_iterate(&update_entry, (void *)fpl);
1147}
1148
1149void __learner_ct_update_all(struct fp_learner *fpl)
1150{
1151 schedule_work(&fpl->update_work);
1152}
1153
1154static int learner_netevent(struct notifier_block *nb, unsigned long event, void *ctx)
1155{
1156 struct fp_learner *fpl = container_of(nb, struct fp_learner, netevent_notifier);
1157
1158 BUG_ON(!fpl);
1159
1160 if (event == NETEVENT_NEIGH_UPDATE) {
1161 struct neighbour *n = ctx;
1162
1163 pr_debug("neighbor update received (state=%d, dev=%s)\n",
1164 n->nud_state, n->dev->name);
1165 __learner_ct_update_all(fpl);
1166 } else if (event == NETEVENT_REDIRECT) {
1167
1168 __learner_ct_update_all(fpl);
1169 pr_debug("neighbor redirect received\n");
1170 } else {
1171 pr_debug("mfp received netevent %lu, which no need to update ct\n", event);
1172 WARN_ON(1);
1173 }
1174
1175 return 0;
1176}
1177
1178static void learner_rtnetlink_rcv(struct nlmsghdr *nlh, void *ptr)
1179{
1180 struct fp_learner *fpl = (struct fp_learner *)ptr;
1181 switch (nlh->nlmsg_type) {
1182 case RTM_NEWROUTE:
1183 case RTM_DELROUTE:
1184 pr_debug( "%s\n",
1185 nlh->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" :
1186 "RTM_DELROUTE");
1187 __learner_ct_update_all(fpl);
1188 break;
1189 case RTM_NEWTFILTER:
1190 pr_debug( "RTM_NEWTFILTER\n");
1191 /* TODO: check if we need update in this case*/
1192 break;
1193 case RTM_DELTFILTER:
1194 pr_debug( "RTM_DELTFILTER\n");
1195 /* TODO: check if we need update in this case*/
1196 break;
1197 case RTM_GETTFILTER:
1198 pr_debug( "RTM_GETTFILTER\n");
1199 break;
1200 }
1201 pr_debug("handle routing netlink message, type=%d\n", nlh->nlmsg_type);
1202 //TODO - add support for update_all_connections
1203}
1204
1205static void learner_nfnetlink_rcv(struct sk_buff *skb,
1206 struct nlmsghdr *nlh, void *ptr)
1207{
1208 struct fp_learner *priv = (struct fp_learner *)ptr;
1209 unsigned int type = NFNL_MSG_TYPE(nlh->nlmsg_type);
1210 unsigned int ssid = NFNL_SUBSYS_ID(nlh->nlmsg_type);
1211 struct nf_conn *ct;
1212 struct nf_conntrack_expect *exp;
1213 unsigned int flags = nlh->nlmsg_flags;
1214 int ret;
1215
1216 if (ssid == NFNL_SUBSYS_CTNETLINK) {
1217 ct = __get_conntrack_from_nlmsg(skb, nlh);
1218 if (ct == NULL) {
1219 pr_debug("can't get nf conn type=%u, ssid=%u\n", type, ssid);
1220 return;
1221 }
1222 pr_debug("found CTNETLINK connection %p, type=%u, ssid=%u\n", ct, type, ssid);
1223 } else if (ssid == NFNL_SUBSYS_CTNETLINK_EXP) {
1224 exp = __get_expect_from_nlmsg(skb, nlh);
1225 if (exp == NULL) {
1226 pr_err("can't get expect\n");
1227 return;
1228 }
1229 ct = exp->master;
1230 pr_debug("found CTNETLINK_EXP exp %p, master connection %p, type=%u, ssid=%u\n", exp, ct, type, ssid);
1231 } else {
1232 pr_err("unexpected ssid (%d)\n", ssid);
1233 return;
1234 }
1235
1236 /* dispatch events */
1237 ret = learner_ct_event(priv, ct, type, flags);
1238 if (ret < 0)
1239 pr_debug("learner_ct_event failed with error code %d\n"
1240 "ct=%p, type=%u, flags=%u\n", ret, ct, type, flags);
1241}
1242
1243/* Receive message from netlink and pass information to relevant function. */
1244static void learner_nl_data_ready(struct sock *sk)
1245{
1246 int ret = 0;
1247 int len;
1248 struct sk_buff *skb;
1249 struct nlmsghdr *nlh;
1250
1251 BUG_ON(!sk);
1252 pr_debug("got a message (socket protocol=%d)\n", sk->sk_protocol);
1253
1254 while ((skb = skb_recv_datagram(sk, 0, 1, &ret)) == NULL) {
1255 if (ret == -EAGAIN || ret == -ENOBUFS) {
1256 pr_err("recvfrom() error %d\n", -ret);
1257 return;
1258 }
1259 }
1260
1261 len = skb->len;
1262 for (nlh = (struct nlmsghdr *)skb->data; NLMSG_OK(nlh, len);
1263 nlh = NLMSG_NEXT(nlh, len)) {
1264 pr_debug("nlmsg_len %u, nlmsg_type %u\n", nlh->nlmsg_len, nlh->nlmsg_type);
1265
1266 /* Finish of reading. */
1267 if (nlh->nlmsg_type == NFNL_MSG_TYPE(NLMSG_DONE))
1268 goto out;
1269
1270 /* Error handling. */
1271 if (nlh->nlmsg_type == NFNL_MSG_TYPE(NLMSG_ERROR)) {
1272 pr_err("nl message error\n");
1273 goto out;
1274 }
1275
1276 if (sk->sk_protocol == NETLINK_ROUTE) {
1277 learner_rtnetlink_rcv(nlh, sk->sk_user_data);
1278 } else if (sk->sk_protocol == NETLINK_NETFILTER) {
1279 learner_nfnetlink_rcv(skb, nlh, sk->sk_user_data);
1280 } else {
1281 pr_err("unrecognized sk_protocol (%u)\n", sk->sk_protocol);
1282 goto out;
1283 }
1284 }
1285out:
1286 skb_orphan(skb);
1287 kfree_skb(skb);
1288 return;
1289}
1290
1291static int learner_nl_open(void *priv, struct socket **s, int proto, int groups)
1292{
1293 struct socket *sock;
1294 struct sockaddr_nl addr;
1295 int rc, val = 1;
1296
1297 rc = sock_create_kern(&init_net, AF_NETLINK , SOCK_RAW, proto, &sock);
1298 if (rc < 0) {
1299 pr_err("create err (rc=%d)\n", rc);
1300 return rc;
1301 }
1302
1303 memset((void *)&addr, 0, sizeof(addr));
1304 addr.nl_family = AF_NETLINK;
1305 addr.nl_pid = 0;
1306 addr.nl_groups = groups;
1307 sock->sk->sk_user_data = priv;
1308 sock->sk->sk_data_ready = learner_nl_data_ready;
1309 sock->sk->sk_allocation = GFP_ATOMIC;
1310
1311 rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
1312 if (rc < 0) {
1313 pr_err("bind err (rc=%d)\n", rc);
1314 goto sock_err;
1315 }
1316
1317 rc = kernel_setsockopt(sock, SOL_NETLINK, NETLINK_NO_ENOBUFS, (char *)&val, sizeof(val));
1318 if (rc < 0) {
1319 pr_err("setsockopt err (rc=%d)", rc);
1320 goto sock_err;
1321 }
1322
1323 pr_debug("netlink socket opened (proto=%u, groups=%u)\n", proto, groups);
1324 *s = sock;
1325 return 0;
1326
1327sock_err:
1328 kernel_sock_shutdown(sock, SHUT_RDWR);
1329 sock_release(sock);
1330 return rc;
1331}
1332
1333static void learner_nl_close(struct socket *sk)
1334{
1335 BUG_ON(!sk);
1336 kernel_sock_shutdown(sk, SHUT_RDWR);
1337 sock_release(sk);
1338}
1339
1340static unsigned int fp_learner_nf_hook(void *priv,
1341 struct sk_buff *skb,
1342 const struct nf_hook_state *state)
1343{
1344 enum ip_conntrack_info ctinfo;
1345 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1346 struct nf_conn_fastpath *f;
1347 struct fpdb_entry *e;
1348 struct net_device *el_src, *el_dst;
1349
1350 if (!ct)
1351 goto out;
1352
1353 f = nfct_fastpath(ct);
1354 if (!f)
1355 goto out;
1356
1357 rcu_read_lock_bh();
1358 e = rcu_dereference(f->fpd_el[CTINFO2DIR(ctinfo)]);
1359 /* Here we can not clear block simply. We need to check */
1360 /* whether src dev equals to dst dev yhuang 20160622*/
1361 if (unlikely(e && (e->block == true))) {
1362 if ((e->state != ENTRY_ALIVE)
1363 || (e->dir >= IP_CT_DIR_MAX)) {
1364 rcu_read_unlock_bh();
1365 goto out;
1366 }
1367
1368 /* skb->dev already set to destiniation device */
1369 el_src = e->in_dev->dev;
1370 el_dst = e->out_dev->dev;
1371 if(unlikely((el_src == NULL) || (el_dst == NULL))) {
1372 rcu_read_unlock_bh();
1373 goto out;
1374 }
1375 if (unlikely(NETIF_INVALID(el_src) || NETIF_INVALID(el_dst))) {
1376 rcu_read_unlock_bh();
1377 goto out;
1378 }
1379
1380 if (likely(el_src != el_dst)) {
1381 spin_lock_bh(&e->lock);
1382 e->block = 0;
1383 spin_unlock_bh(&e->lock);
1384 pr_debug("unblock entry:ct=%x dir=%d bucket=%x %x %x\n",
1385 (unsigned int)e->ct, e->dir, e->bucket,
1386 (unsigned int)(&e->in_tuple),
1387 (unsigned int)(&e->out_tuple));
1388 }
1389 }
1390 rcu_read_unlock_bh();
1391out:
1392 return NF_ACCEPT;
1393}
1394
1395/** learner's netfilter hook */
1396static struct nf_hook_ops nf_learner_hook_data[] __read_mostly = {
1397 {
1398 .hook = fp_learner_nf_hook,
1399 .pf = NFPROTO_IPV4,
1400 .hooknum = NF_INET_POST_ROUTING,
1401 .priority = NF_IP_PRI_LAST,
1402 },
1403 {
1404 .hook = fp_learner_nf_hook,
1405 .pf = NFPROTO_IPV6,
1406 .hooknum = NF_INET_POST_ROUTING,
1407 .priority = NF_IP_PRI_LAST,
1408 },
1409};
1410
1411void __learner_ct_block_all(struct fastpath_module *m)
1412{
1413 fpdb_iterate(&block_entry, m->priv);
1414}
1415
1416static ssize_t
1417learner_ct_update_all(struct fastpath_module *m, const char *buf, size_t count)
1418{
1419 __learner_ct_update_all((struct fp_learner *)m->priv);
1420 return count;
1421}
1422
1423static ssize_t
1424learner_ct_block_all(struct fastpath_module *m, const char *buf, size_t count)
1425{
1426 __learner_ct_block_all(m);
1427 return count;
1428}
1429
1430static ssize_t learner_policy_show(struct fastpath_module *m, char *buf)
1431{
1432 struct policy_entry *itr;
1433 struct fp_learner *priv = m->priv;
1434 int len;
1435
1436 len = scnprintf(buf, PAGE_SIZE, "dynamic policy restricted ports:\n");
1437
1438 spin_lock_bh(&priv->lock);
1439 list_for_each_entry(itr, &priv->policy_list, list)
1440 len += scnprintf(buf + len, PAGE_SIZE - len, "%d, ", itr->port);
1441 spin_unlock_bh(&priv->lock);
1442
1443 len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
1444
1445 return len;
1446}
1447
1448static ssize_t learner_policy_store(struct fastpath_module *m,
1449 const char *buf, size_t count)
1450{
1451 char op;
1452 unsigned int port;
1453 struct policy_entry *entry, *tmp;
1454 struct fp_learner *priv = m->priv;
1455 if (sscanf(buf, "%c%u", &op, &port) != 2 || port > 0xFFFF)
1456 return -EINVAL;
1457 pr_err("Enter learner_policy_store:op=%c\n", op);
1458 if (op == '-') {
1459 /* remove port from the restricted list*/
1460 spin_lock_bh(&priv->lock);
1461 list_for_each_entry_safe(entry, tmp, &priv->policy_list, list)
1462 if (entry && entry->port == port) {
1463 list_del(&entry->list);
1464 kfree(entry);
1465 }
1466 spin_unlock_bh(&priv->lock);
1467 } else if (op == '+') {
1468 /* add port to the restricted list*/
1469 entry = kzalloc(sizeof(struct policy_entry), GFP_KERNEL);
1470 if (!entry)
1471 return -ENOMEM;
1472
1473 INIT_LIST_HEAD(&entry->list);
1474 entry->port = port;
1475
1476 spin_lock_bh(&priv->lock);
1477 list_add(&entry->list, &priv->policy_list);
1478 spin_unlock_bh(&priv->lock);
1479
1480 fpdb_del_by_port(port);
1481 } else {
1482 return -EINVAL;
1483 }
1484
1485 return count;
1486}
1487
1488static ssize_t learner_lookup_retries_store(struct fastpath_module *m,
1489 const char *buf, size_t count)
1490{
1491 unsigned int retries;
1492 struct fp_learner *priv = m->priv;
1493
1494 if (sscanf(buf, "%u", &retries) != 1)
1495 return -EINVAL;
1496
1497 priv->lookups_retries = retries;
1498
1499 return count;
1500}
1501
1502static ssize_t learner_lookup_retries_show(struct fastpath_module *m, char *buf)
1503{
1504 struct fp_learner *priv = m->priv;
1505 return scnprintf(buf, PAGE_SIZE, "%u\n", priv->lookups_retries);
1506}
1507
1508static ssize_t learner_lookup_delay_store(struct fastpath_module *m,
1509 const char *buf, size_t count)
1510{
1511 unsigned int delay;
1512 struct fp_learner *priv = m->priv;
1513
1514 if (sscanf(buf, "%u", &delay) != 1)
1515 return -EINVAL;
1516
1517 priv->lookups_delay = delay;
1518
1519 return count;
1520}
1521
1522static ssize_t learner_lookup_delay_show(struct fastpath_module *m, char *buf)
1523{
1524 struct fp_learner *priv = m->priv;
1525 return scnprintf(buf, PAGE_SIZE, "%u[ms]\n", priv->lookups_delay);
1526}
1527
1528static ssize_t learner_nfnl_groups_show(struct fastpath_module *m, char *buf)
1529{
1530 struct fp_learner *priv = m->priv;
1531 struct sockaddr addr;
1532 int rc;
1533
1534 rc = kernel_getsockname(priv->nf_nl_sock, &addr);
1535 if (rc < 0)
1536 return scnprintf(buf, PAGE_SIZE, "ERROR\n");
1537
1538 return scnprintf(buf, PAGE_SIZE, "0x%08x\n", ((struct sockaddr_nl *)&addr)->nl_groups);
1539}
1540
1541static ssize_t learner_rtnl_groups_show(struct fastpath_module *m, char *buf)
1542{
1543 struct fp_learner *priv = m->priv;
1544 struct sockaddr addr;
1545 int rc;
1546
1547 rc = kernel_getsockname(priv->rt_nl_sock, &addr);
1548 if (rc < 0)
1549 return scnprintf(buf, PAGE_SIZE, "ERROR\n");
1550
1551 return scnprintf(buf, PAGE_SIZE, "0x%08x\n", ((struct sockaddr_nl *)&addr)->nl_groups);
1552}
1553
1554static FP_ATTR(policy, S_IRUGO|S_IWUSR, learner_policy_show, learner_policy_store);
1555static FP_ATTR(lookup_retries, S_IRUGO|S_IWUSR, learner_lookup_retries_show, learner_lookup_retries_store);
1556static FP_ATTR(lookup_delay, S_IRUGO|S_IWUSR, learner_lookup_delay_show, learner_lookup_delay_store);
1557static FP_ATTR(nfnl_groups, S_IRUGO, learner_nfnl_groups_show, NULL);
1558static FP_ATTR(rtnl_groups, S_IRUGO, learner_rtnl_groups_show, NULL);
1559static FP_ATTR(update, S_IWUSR, NULL, learner_ct_update_all);
1560static FP_ATTR(block, S_IWUSR, NULL, learner_ct_block_all);
1561
1562static struct attribute *fp_learner_attrs[] = {
1563 &fp_attr_policy.attr,
1564 &fp_attr_lookup_retries.attr,
1565 &fp_attr_lookup_delay.attr,
1566 &fp_attr_nfnl_groups.attr,
1567 &fp_attr_rtnl_groups.attr,
1568 &fp_attr_update.attr,
1569 &fp_attr_block.attr,
1570 NULL, /* need to NULL terminate the list of attributes */
1571};
1572
1573static int
1574fp_learner_ioctl(struct fastpath_module *m, unsigned int cmd, void *data)
1575{
1576 BUG_ON(!m);
1577
1578 switch (cmd) {
1579 case FASTPATH_NL_C_IPT_NOTIFY:
1580 __learner_ct_block_all(m);
1581 break;
1582 default:
1583 pr_err("unsupported command %u\n", cmd);
1584 return -ENOTSUPP;
1585 }
1586
1587 return 0;
1588}
1589
1590static void fp_learner_release(struct kobject *kobj)
1591{
1592 struct fastpath_module *module = to_fpmod(kobj);
1593 struct fp_learner *priv = module->priv;
1594
1595 priv->fp_rmmoding = 1;
1596 nf_unregister_net_hooks(&init_net, nf_learner_hook_data, ARRAY_SIZE(nf_learner_hook_data));
1597 unregister_inet6addr_notifier(&priv->inet6addr_notifier);
1598 unregister_netevent_notifier(&priv->netevent_notifier);
1599 learner_netdev_cleanup(&priv->netdev_notifier);
1600 unregister_netdevice_notifier(&priv->netdev_notifier);
1601 learner_nl_close(priv->nf_nl_sock);
1602 learner_nl_close(priv->rt_nl_sock);
1603
1604 cancel_work_sync(&priv->update_work);
1605 if (fp_learner_wq) {
1606 flush_workqueue(priv->wq);
1607 destroy_workqueue(priv->wq);
1608 }
1609
1610 kfree(priv);
1611 kfree(module);
1612
1613 pr_debug("fp_learner released\n");
1614}
1615
1616static struct kobj_type ktype_learner = {
1617 .sysfs_ops = &fp_sysfs_ops,
1618 .default_attrs = fp_learner_attrs,
1619 .release = fp_learner_release,
1620};
1621
1622static int fp_learner_probe(struct fastpath_module *module)
1623{
1624 struct fp_learner *priv;
1625 int ret;
1626
1627 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1628 if (!priv) {
1629 pr_err("no memeory\n");
1630 return -ENOMEM;
1631 }
1632
1633 module->priv = priv;
1634 snprintf(module->name, sizeof(module->name),"fp_learner");
1635
1636 spin_lock_init(&priv->lock);
1637 INIT_LIST_HEAD(&priv->policy_list);
1638 priv->lookups_retries = DEFAULT_LOOKUPS_RETRIES;
1639 priv->fp_rmmoding = 0;
1640
1641 if (fp_learner_wq) {
1642 INIT_LIST_HEAD(&priv->work_items_list);
1643 priv->lookups_delay = DEFAULT_LOOKUPS_DELAY_MS;
1644 priv->wq = create_singlethread_workqueue(module->name);
1645 if (!priv->wq) {
1646 pr_err("create workqueue failed\n");
1647 ret = -EBUSY;
1648 goto priv_kfree;
1649 }
1650 }
1651
1652 INIT_WORK(&priv->update_work, (void *)learner_ct_update_work);
1653
1654 rtnl_lock();
1655 ret = learner_nl_open(priv, &priv->rt_nl_sock, NETLINK_ROUTE, RTNETLINK_GRP);
1656 rtnl_unlock();
1657 if (ret < 0) {
1658 pr_err("learner_nl_open(NETLINK_ROUTE) failed (%d)\n", ret);
1659 goto wq_destroy;
1660 }
1661
1662 nfnl_lock(NFNL_SUBSYS_CTNETLINK);
1663 ret = learner_nl_open(priv, &priv->nf_nl_sock, NETLINK_NETFILTER, NFNETLINK_GRP);
1664 nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
1665 if (ret < 0) {
1666 pr_err("learner_nl_open(NETLINK_NETFILTER) failed (%d)\n", ret);
1667 goto nl_close_rt;
1668 }
1669
1670 priv->netdev_notifier.notifier_call = learner_netdev_event;
1671 ret = register_netdevice_notifier(&priv->netdev_notifier);
1672 if (ret < 0) {
1673 pr_err("register_netdevice_notifier failed (%d)\n", ret);
1674 goto nl_close_nf;
1675 }
1676
1677 priv->netevent_notifier.notifier_call = learner_netevent;
1678 ret = register_netevent_notifier(&priv->netevent_notifier);
1679 if (ret < 0) {
1680 pr_err("register_netevent_notifier failed (%d)\n", ret);
1681 goto netdev_notifier_unreg;
1682 }
1683
1684 priv->inet6addr_notifier.notifier_call = fp_inet6addr_event;
1685 ret = register_inet6addr_notifier(&priv->inet6addr_notifier);
1686 if (ret < 0) {
1687 pr_err("register_inet6addr_notifier failed (%d)\n", ret);
1688 goto netdev_netevent_unreg;
1689 }
1690
1691 ret = nf_register_net_hooks(&init_net, nf_learner_hook_data, ARRAY_SIZE(nf_learner_hook_data));
1692 if (ret < 0) {
1693 pr_err("nf_register_hooks failed (%d)\n", ret);
1694 goto in6_notifier_err;
1695 }
1696
1697 kobject_init(&module->kobj, &ktype_learner);
1698 ret = kobject_add(&module->kobj, module->fastpath->kobj, "%s", module->name);
1699 if (ret < 0) {
1700 pr_err("kobject_add failed (%d)\n", ret);
1701 goto nf_hooks_unreg;
1702 }
1703
1704 kobject_uevent(&module->kobj, KOBJ_ADD);
1705
1706 pr_debug("fp_learner probed\n");
1707 return 0;
1708
1709nf_hooks_unreg:
1710 kobject_put(&module->kobj);
1711 nf_unregister_net_hooks(&init_net, nf_learner_hook_data, ARRAY_SIZE(nf_learner_hook_data));
1712in6_notifier_err:
1713 unregister_inet6addr_notifier(&priv->inet6addr_notifier);
1714netdev_netevent_unreg:
1715 unregister_netevent_notifier(&priv->netevent_notifier);
1716netdev_notifier_unreg:
1717 learner_netdev_cleanup(&priv->netdev_notifier);
1718 unregister_netdevice_notifier(&priv->netdev_notifier);
1719nl_close_nf:
1720 learner_nl_close(priv->nf_nl_sock);
1721nl_close_rt:
1722 learner_nl_close(priv->rt_nl_sock);
1723wq_destroy:
1724 if (fp_learner_wq) {
1725 flush_workqueue(priv->wq);
1726 destroy_workqueue(priv->wq);
1727 }
1728priv_kfree:
1729 kfree(priv);
1730
1731 return ret;
1732}
1733
1734static int fp_learner_remove(struct fastpath_module *module)
1735{
1736 kobject_put(&module->kobj);
1737
1738 pr_debug("fp_learner removed\n");
1739 return 0;
1740}
1741
1742struct fastpath_module_ops fp_learner_ops = {
1743 .probe = fp_learner_probe,
1744 .remove = fp_learner_remove,
1745 .ioctl = fp_learner_ioctl,
1746};
1747
1748module_param(fp_learner_wq, bool, S_IRUGO);
1749MODULE_PARM_DESC(fp_learner_wq, "fastpath learner worqueue mode (default="
1750 __MODULE_STRING(FP_LEARNER_WQ_DEFAULT) ")");