blob: a130630f6685e6bb4a933df60fb320aee6692bb6 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/*
2 * Fast path Forward
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "mfp" " forward: %s:%d: " fmt, __func__, __LINE__
11
12#include <linux/if_vlan.h>
13#include <linux/if_ether.h>
14#include <linux/tcp.h>
15#include <net/ip.h>
16#include <net/udp.h>
17#include <net/ipv6.h>
18#include <net/icmp.h>
19#include <linux/relay.h>
20#include <linux/debugfs.h>
21#include <linux/skbrb.h>
22
23#include "fp_common.h"
24#include "fp_classifier.h"
25#include "fp_database.h"
26#include "fp_device.h"
27#include "fp_core.h"
28#include "fp_ndisc.h"
29
30/**
31 * For netif_rx called in interrupt or irq_disabled, direct hook can't use.
32 * So enable netfilter hook for this situation.
33 */
34//#define FP_RX_IN_INTR_TO_NETFILTER
35
36/* function prototypes */
37static int fp_forward_direct(struct sk_buff *skb);
38static int fp_forward_queue(struct sk_buff *skb);
39static unsigned int fp_forward_nf_hook(void *priv,
40 struct sk_buff *skb,
41 const struct nf_hook_state *state);
42
43static int fp_forward_output(struct sk_buff *skb);
44static int fp_forward_netif_rx(struct sk_buff *skb);
45
46#ifdef CONFIG_ASR_TOE
47extern int fp_cm_genl_send_tuple(struct nf_conntrack_tuple *tuple, struct fpdb_entry *el,
48 int add, int len);
49#endif
50
51struct rx_hook_struct {
52 const char *name;
53 int (*connect)(void);
54 void (*disconnect)(void);
55};
56
57struct tx_hook_struct {
58 const char *name;
59 int (*output)(struct sk_buff *);
60};
61
62struct fp_forward {
63 spinlock_t lock;
64 struct rx_hook_struct *rx_hook;
65 struct tx_hook_struct *tx_hook;
66};
67
68static int (*output)(struct sk_buff *); /* global output function pointer */
69static int drop_on_busy = 1; /* drop packets if output dev is busy */
70static int bypass_fastpath = 0;
71static int reply_ra;
72static unsigned int pkt_debug_level;
73static struct rchan *fp_chan;
74static struct dentry *fp_dir;
75
76/*--------------------------------------*/
77/*-------------- RX HOOKS --------------*/
78/*--------------------------------------*/
79
80/** netif_rx hook */
81static int netif_rx_hook_connect(void)
82{
83 netif_rx_fastpath_register(&fp_forward_netif_rx);
84 return 0;
85}
86static void netif_rx_hook_disconnect(void)
87{
88 netif_rx_fastpath_unregister();
89}
90
91/** netfilter rx_hook */
92static struct nf_hook_ops nf_rx_hook_data[] __read_mostly = {
93 {
94 .hook = fp_forward_nf_hook,
95 .pf = NFPROTO_IPV4,
96 .hooknum = NF_INET_PRE_ROUTING,
97 .priority = NF_IP_PRI_FIRST,
98 },
99 {
100 .hook = fp_forward_nf_hook,
101 .pf = NFPROTO_IPV6,
102 .hooknum = NF_INET_PRE_ROUTING,
103 .priority = NF_IP6_PRI_FIRST,
104 },
105};
106
107static int nf_rx_hook_connect(void)
108{
109 return nf_register_net_hooks(&init_net, nf_rx_hook_data, ARRAY_SIZE(nf_rx_hook_data));
110}
111
112static void nf_rx_hook_disconnect(void)
113{
114 nf_unregister_net_hooks(&init_net, nf_rx_hook_data, ARRAY_SIZE(nf_rx_hook_data));
115}
116
117#define RX_HOOK_NETIF (0)
118#define RX_HOOK_NETFILTER (1)
119#define RX_HOOK_NONE (2)
120
121static struct rx_hook_struct rx_hooks[] = {
122 [RX_HOOK_NETIF].name = "direct (netif_rx)",
123 [RX_HOOK_NETIF].connect = &netif_rx_hook_connect,
124 [RX_HOOK_NETIF].disconnect = &netif_rx_hook_disconnect,
125 [RX_HOOK_NETFILTER].name = "netfilter (NF_INET_PRE_ROUTING)",
126 [RX_HOOK_NETFILTER].connect = &nf_rx_hook_connect,
127 [RX_HOOK_NETFILTER].disconnect = &nf_rx_hook_disconnect,
128 [RX_HOOK_NONE].name = "disconnected",
129};
130
131/*--------------------------------------*/
132/*-------------- TX HOOKS --------------*/
133/*--------------------------------------*/
134
135#define TX_HOOK_NDO_START_XMIT (0)
136#define TX_HOOK_DEV_QUEUE_XMIT (1)
137#define TX_HOOK_NONE (2)
138
139static struct tx_hook_struct tx_hooks[] = {
140 [TX_HOOK_NDO_START_XMIT].name = "direct (ndo_start_xmit)",
141 [TX_HOOK_NDO_START_XMIT].output = &fp_forward_direct,
142 [TX_HOOK_DEV_QUEUE_XMIT].name = "queue (dev_queue_xmit)",
143 [TX_HOOK_DEV_QUEUE_XMIT].output = &fp_forward_queue,
144 [TX_HOOK_NONE].name = "disconnected"
145};
146
147static unsigned int fp_forward_rx_hook = FP_FORWARD_RX_HOOK_DEFAULT;
148static unsigned int fp_forward_tx_hook = FP_FORWARD_TX_HOOK_DEFAULT;
149
150static void fp_print_pkt(char *buf, u32 buf_len)
151{
152 if (!fp_chan)
153 return;
154
155 relay_write(fp_chan, buf, buf_len);
156}
157
158static void fp_dump_input_pkt(struct sk_buff *skb, char *rx_tx)
159{
160 struct iphdr *iph = (struct iphdr *)(skb->data);
161 u8 version = iph->version;
162 char buf[512] = {0};
163 u32 len = 0;
164 u64 ts_nsec;
165 unsigned long rem_nsec;
166
167 ts_nsec = local_clock();
168 rem_nsec = do_div(ts_nsec, 1000000000);
169
170 if (version == 4) {
171 if (iph->protocol == IPPROTO_ICMP) {
172 struct icmphdr *icmph = (struct icmphdr *)(iph + 1);
173 int type = icmph->type;
174 if (type == 8) {
175 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp request, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
176 (unsigned long)ts_nsec, rem_nsec / 1000,
177 rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
178 ntohs(icmph->un.echo.sequence));
179 }else if (type == 0) {
180 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp reply, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
181 (unsigned long)ts_nsec, rem_nsec / 1000,
182 rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
183 ntohs(icmph->un.echo.sequence));
184 }
185 } else if (iph->protocol == IPPROTO_UDP) {
186 struct udphdr *uh = (struct udphdr *)(iph + 1);
187 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive UDP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u\n",
188 (unsigned long)ts_nsec, rem_nsec / 1000,
189 rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(uh->source), ntohs(uh->dest));
190 } else if (iph->protocol == IPPROTO_TCP) {
191 struct tcphdr *th = (struct tcphdr *)(iph + 1);
192 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive TCP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u seq=%u ack=%u\n",
193 (unsigned long)ts_nsec, rem_nsec / 1000,
194 rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(th->source), ntohs(th->dest),
195 ntohl(th->seq), ntohl(th->ack_seq));
196 } else {
197 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive pkt type %u, src=%pI4 dst=%pI4 ID=%u\n",
198 (unsigned long)ts_nsec, rem_nsec / 1000,
199 rx_tx, iph->protocol, &iph->saddr, &iph->daddr, ntohs(iph->id));
200 }
201 } else if (version == 6) {
202 struct ipv6hdr *ip6h;
203 __be16 frag_off;
204 int offset;
205 u8 nexthdr;
206
207 ip6h = (struct ipv6hdr *)(skb->data);
208 nexthdr = ip6h->nexthdr;
209 /* not support fragment pkt */
210 if (nexthdr == NEXTHDR_FRAGMENT)
211 return;
212 if (ipv6_ext_hdr(nexthdr)) {
213 offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
214 if (offset < 0)
215 return;
216 } else
217 offset = sizeof(struct ipv6hdr);
218
219 if (nexthdr == IPPROTO_ICMPV6) {
220 struct icmp6hdr *icmp6;
221 if (!pskb_may_pull(skb, ((unsigned char*)ip6h + offset + 6 - skb->data)))
222 return;
223 icmp6 = (struct icmp6hdr *)((unsigned char*)ip6h + offset);
224 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp6, src=%pI6c dst=%pI6c type=%u code=%u id=%u\n",
225 (unsigned long)ts_nsec, rem_nsec / 1000,
226 rx_tx, &ip6h->saddr, &ip6h->daddr, icmp6->icmp6_type, icmp6->icmp6_type,
227 ntohs(icmp6->icmp6_identifier));
228 } else if (nexthdr == IPPROTO_UDP) {
229 struct udphdr *uh6 = (struct udphdr *)((unsigned char*)ip6h + offset);
230 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive UDP6, src=%pI6c dst=%pI6c sp=%u dp=%u\n",
231 (unsigned long)ts_nsec, rem_nsec / 1000,
232 rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(uh6->source), ntohs(uh6->dest));
233 } else if (nexthdr == IPPROTO_TCP) {
234 struct tcphdr *th6 = (struct tcphdr *)((unsigned char*)ip6h + offset);
235 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive TCP6, src=%pI6c dst=%pI6c sp=%u dp=%u seq=%u ack=%u\n",
236 (unsigned long)ts_nsec, rem_nsec / 1000,
237 rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(th6->source), ntohs(th6->dest),
238 ntohl(th6->seq), ntohl(th6->ack_seq));
239 } else {
240 len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive pkt type %u, src=%pI6c dst=%pI6c\n",
241 (unsigned long)ts_nsec, rem_nsec / 1000,
242 rx_tx, nexthdr, &ip6h->saddr, &ip6h->daddr);
243 }
244 }else {
245 return;
246 }
247
248 fp_print_pkt(buf, len);
249}
250
251static void fp_dump_output_pkt(struct sk_buff *skb, char *rx_tx)
252{
253 struct iphdr *iph;
254 u8 version;
255 char buf[512] = {0};
256 u32 len = 0;
257 u64 ts_nsec;
258 unsigned long rem_nsec;
259
260 ts_nsec = local_clock();
261 rem_nsec = do_div(ts_nsec, 1000000000);
262
263 iph = (struct iphdr *)(skb->data);
264
265 version = iph->version;
266 if (version == 4) {
267 if (iph->protocol == IPPROTO_ICMP) {
268 struct icmphdr *icmph = (struct icmphdr *)(iph + 1);
269 int type = icmph->type;
270 if (type == 8) {
271 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp request, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
272 (unsigned long)ts_nsec, rem_nsec / 1000,
273 rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
274 ntohs(icmph->un.echo.sequence));
275 }else if (type == 0) {
276 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp reply, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
277 (unsigned long)ts_nsec, rem_nsec / 1000,
278 rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
279 ntohs(icmph->un.echo.sequence));
280 }
281 } else if (iph->protocol == IPPROTO_UDP) {
282 struct udphdr *uh = (struct udphdr *)(iph + 1);
283 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send UDP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u\n",
284 (unsigned long)ts_nsec, rem_nsec / 1000,
285 rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(uh->source), ntohs(uh->dest));
286 } else if (iph->protocol == IPPROTO_TCP) {
287 struct tcphdr *th = (struct tcphdr *)(iph + 1);
288 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send TCP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u seq=%u ack=%u\n",
289 (unsigned long)ts_nsec, rem_nsec / 1000,
290 rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(th->source), ntohs(th->dest),
291 ntohl(th->seq), ntohl(th->ack_seq));
292 } else {
293 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send pkt type %u, src=%pI4 dst=%pI4 ID=%u\n",
294 (unsigned long)ts_nsec, rem_nsec / 1000,
295 rx_tx, iph->protocol, &iph->saddr, &iph->daddr, ntohs(iph->id));
296 }
297 } else if (version == 6) {
298 struct ipv6hdr *ip6h;
299 __be16 frag_off;
300 int offset;
301 u8 nexthdr;
302
303 ip6h = (struct ipv6hdr *)(skb->data);
304
305 nexthdr = ip6h->nexthdr;
306 /* not support fragment pkt */
307 if (nexthdr == NEXTHDR_FRAGMENT)
308 return;
309 if (ipv6_ext_hdr(nexthdr)) {
310 offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
311 if (offset < 0)
312 return;
313 } else
314 offset = sizeof(struct ipv6hdr);
315
316 if (nexthdr == IPPROTO_ICMPV6) {
317 struct icmp6hdr *icmp6;
318 if (!pskb_may_pull(skb, ((unsigned char*)ip6h + offset + 6 - skb->data)))
319 return;
320 icmp6 = (struct icmp6hdr *)((unsigned char*)ip6h + offset);
321 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp6, src=%pI6c dst=%pI6c type=%u code=%u id=%u\n",
322 (unsigned long)ts_nsec, rem_nsec / 1000,
323 rx_tx, &ip6h->saddr, &ip6h->daddr, icmp6->icmp6_type, icmp6->icmp6_type,
324 ntohs(icmp6->icmp6_identifier));
325 } else if (nexthdr == IPPROTO_UDP) {
326 struct udphdr *uh6 = (struct udphdr *)((unsigned char*)ip6h + offset);
327 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send UDP6, src=%pI6c dst=%pI6c sp=%u dp=%u\n",
328 (unsigned long)ts_nsec, rem_nsec / 1000,
329 rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(uh6->source), ntohs(uh6->dest));
330 } else if (nexthdr == IPPROTO_TCP) {
331 struct tcphdr *th6 = (struct tcphdr *)((unsigned char*)ip6h + offset);
332 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send TCP6, src=%pI6c dst=%pI6c sp=%u dp=%u seq=%u ack=%u\n",
333 (unsigned long)ts_nsec, rem_nsec / 1000,
334 rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(th6->source), ntohs(th6->dest),
335 ntohl(th6->seq), ntohl(th6->ack_seq));
336 } else {
337 len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send pkt type %u, src=%pI6c dst=%pI6c\n",
338 (unsigned long)ts_nsec, rem_nsec / 1000,
339 rx_tx, nexthdr, &ip6h->saddr, &ip6h->daddr);
340 }
341 } else {
342 return;
343 }
344
345 fp_print_pkt(buf, len);
346}
347/**
348 * Forward an skb directly to the output interface if classified as
349 * fastpath. skb->dev must point to the src net_device (done in
350 * eth_type_trans or in drivers)
351 *
352 * @todo 1. Add an option to enable/disable fastpath for a
353 * specific net_device from userspace (via
354 * ifconfig/ethtool)
355 * @note If the source net_device doesn't have fastpath enabled,
356 * a packet from it can still traverse through fastpath if
357 * the output net_device supports it and there was a match
358 * in the fastpath database.
359 * @param skb skb to forward through fastpath
360 *
361 * @return 1 if skb consumed by fastpath, 0 otherwise (should be
362 * sent through slowpath)
363 */
364static int fp_forward_direct(struct sk_buff *skb)
365{
366 int ret, len = skb->len; /* default is slowpath */
367 struct fp_net_device *dst, *src;
368 struct fpdb_entry *el;
369 struct netdev_queue *txq;
370 struct sk_buff *skb2 = skb;
371 struct nf_conntrack_tuple tuple;
372 const struct net_device_ops *ops;
373
374 /*
375 * fastpath direct tx hook should be used only when no packets can
376 * arrive in irq/irq disable context, since fastpath only protects
377 * at soft-irq level. Otherwise this could possibly result in a deadlock.
378 */
379
380 WARN_ONCE(in_irq() || irqs_disabled(),
381 "fastpath direct tx called from irq, or irq disabled!\n");
382
383 el = fpc_classify_start(skb, &tuple);
384 if (unlikely(!el))
385 goto slowpath;
386 rcu_read_lock_bh();
387
388 src = rcu_dereference_bh(el->in_dev);
389 dst = rcu_dereference_bh(el->out_dev);
390
391 if (pkt_debug_level == 2 || pkt_debug_level == 3) {
392 if (!strncasecmp(dst->dev->name, "ccinet", 6))
393 fp_dump_output_pkt(skb, "F_UL");
394 else
395 fp_dump_output_pkt(skb, "F_DL");
396 }
397
398 ops = dst->dev->netdev_ops;
399 if (fpc_classify_finish(skb, el)) {
400 rcu_read_unlock_bh();
401 goto slowpath;
402 }
403
404 skb_reset_mac_header(skb);
405 txq = netdev_core_pick_tx(dst->dev, skb, NULL);
406 HARD_TX_LOCK(dst->dev, txq, smp_processor_id());
407 if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
408 skb2 = NULL;
409 dst->stats.queue_stopped++;
410 }
411
412 if (skb2)
413 skb2->dev = dst->dev;
414 ret = skb2 ? ops->ndo_start_xmit(skb2, dst->dev) : NETDEV_TX_BUSY;
415
416 switch (ret) {
417 case NETDEV_TX_OK:
418 /* sent through fastpath */
419 txq_trans_update(txq);
420 src->stats.rx_packets++;
421 src->stats.rx_bytes += len;
422 dst->stats.tx_packets++;
423 if (dst->dev->header_ops)
424 dst->stats.tx_bytes += len + ETH_HLEN;
425 else
426 dst->stats.tx_bytes += len;
427 break;
428 case NET_XMIT_CN:
429 src->stats.rx_dropped++;
430 dst->stats.tx_dropped++;
431 break;
432 case NET_XMIT_DROP:
433 case NETDEV_TX_BUSY:
434 default:
435 if (unlikely(skb2)) {
436 /* shouldn't happen since we check txq before trying to transmit */
437 src->stats.rx_errors++;
438 dst->stats.tx_errors++;
439 printk(KERN_DEBUG "Failed to send through fastpath (ret=%d)\n", ret);
440 }
441
442 if (drop_on_busy) {
443 src->stats.rx_dropped++;
444 dst->stats.tx_dropped++;
445 dev_kfree_skb_any(skb);
446 }
447 }
448
449 HARD_TX_UNLOCK(dst->dev, txq);
450
451#ifdef CONFIG_ASR_TOE
452 if ((0 == el->nl_flag) && (ret == NETDEV_TX_OK)) {
453 //fpdb_dump_entry("fp_cm_genl_send_tuple, entry dump:\n", el);
454 fp_cm_genl_send_tuple(&tuple, el, 1, len);
455 }
456#endif
457 rcu_read_unlock_bh();
458
459 if (likely(skb2) || drop_on_busy)
460 return 1;
461slowpath:
462 if (pkt_debug_level == 2 || pkt_debug_level == 3) {
463 if (!strncasecmp(skb->dev->name, "ccinet", 6))
464 fp_dump_output_pkt(skb, "S_DL");
465 else
466 fp_dump_output_pkt(skb, "S_UL");
467 }
468
469 /* DO NOT do skb copy if the skb is allocated from skbrb
470 * (skb ring buffer for bridge performace)
471 */
472 if (!IS_SKBRB_SKB(skb) && FP_IS_SKB_P(skb))
473 pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
474 return 0;
475}
476
477/**
478 * Forward an skb to the output device's queue if classified as fastpath.
479 *
480 * @param skb skb to forward
481 *
482 * @return 1 if consumed by fastpath, 0 otherwise (should be sent through slowpath)
483 */
484static int fp_forward_queue(struct sk_buff *skb)
485{
486 int ret, len = skb->len;
487 struct fp_net_device *dst, *src;
488 struct fpdb_entry *el;
489 struct vlan_hdr *vhdr;
490 struct nf_conntrack_tuple tuple;
491
492 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
493 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
494 vhdr = (struct vlan_hdr *) skb->data;
495 skb = skb_vlan_untag(skb);
496 if (unlikely(!skb))
497 return 0;
498 if (skb_vlan_tag_present(skb)) {
499 if (!vlan_do_receive(&skb)) {
500 if (unlikely(!skb))
501 return 1;
502 }
503 }
504 }
505
506 el = fpc_classify_start(skb, &tuple);
507 if (unlikely(!el)) {
508 /* DO NOT do skb copy if the skb is allocated from skbrb
509 * (skb ring buffer for bridge performace)
510 */
511 if (!IS_SKBRB_SKB(skb) && FP_IS_SKB_P(skb))
512 pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
513 return 0;
514 }
515
516#ifdef CONFIG_ASR_TOE
517 if (0 == el->nl_flag) {
518 //fpdb_dump_entry("fp_cm_genl_send_tuple, entry dump:\n", el);
519 fp_cm_genl_send_tuple(&tuple, el, 1, len);
520 }
521#endif
522
523 src = fpdev_hold(el->in_dev);
524 dst = fpdev_hold(el->out_dev);
525 if (fpc_classify_finish(skb, el))
526 return 0;
527
528 skb->dev = dst->dev;
529
530 ret = dev_queue_xmit(skb);
531 switch (ret) {
532 case NET_XMIT_SUCCESS:
533 src->stats.rx_bytes += len;
534 src->stats.rx_packets++;
535 dst->stats.tx_bytes += len + ETH_HLEN;
536 dst->stats.tx_packets++;
537 break;
538 case NET_XMIT_CN:
539 src->stats.rx_dropped++;
540 dst->stats.tx_dropped++;
541 dst->stats.queue_stopped++;
542 break;
543 case NET_XMIT_DROP:
544 default:
545 pr_info("unexpected return code from dev_queue_xmit (%d)\n", ret);
546 src->stats.rx_errors++;
547 dst->stats.tx_errors++;
548 }
549
550 fpdev_put(dst);
551 fpdev_put(src);
552
553 return 1;
554}
555
556static unsigned int fp_forward_nf_hook(void *priv, struct sk_buff *skb,
557 const struct nf_hook_state *state)
558{
559 WARN_ON_ONCE(irqs_disabled());
560
561 if (fp_forward_output(skb))
562 return NF_STOLEN;
563
564 return NF_ACCEPT;
565}
566
567static int fp_forward_netif_rx(struct sk_buff *skb)
568{
569 struct iphdr *iph;
570 struct ipv6hdr *ipv6h;
571 u32 len;
572 struct vlan_hdr *vhdr;
573 int ret;
574
575 if (unlikely(bypass_fastpath == 1))
576 goto slowpath;
577
578#ifdef FP_RX_IN_INTR_TO_NETFILTER
579 if (in_irq() || irqs_disabled())
580 goto slowpath;
581#endif
582 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
583 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
584 vhdr = (struct vlan_hdr *) skb->data;
585 skb = skb_vlan_untag(skb);
586 if (unlikely(!skb))
587 goto slowpath;
588 if (skb_vlan_tag_present(skb)) {
589 if (!vlan_do_receive(&skb)) {
590 if (unlikely(!skb))
591 return 1;
592 }
593 }
594 }
595
596 if (pkt_debug_level == 1 || pkt_debug_level == 3) {
597 if (!strncasecmp(skb->dev->name, "ccinet", 6))
598 fp_dump_input_pkt(skb, "DL");
599 else
600 fp_dump_input_pkt(skb, "UL");
601 }
602
603 iph = (struct iphdr *)skb->data;
604
605 if (likely(iph->version == 4)) {
606
607 if (iph->ihl < 5)
608 goto slowpath_warn;
609
610 len = ntohs(iph->tot_len);
611
612 if (skb->len < len || len < (iph->ihl * 4))
613 goto slowpath_warn;
614
615 } else if (likely(iph->version == 6)) {
616
617 ipv6h = (struct ipv6hdr *)skb->data;
618
619 len = ntohs(ipv6h->payload_len);
620
621 if (!len && ipv6h->nexthdr == NEXTHDR_HOP)
622 goto done;
623
624 if (len + sizeof(struct ipv6hdr) > skb->len)
625 goto slowpath_warn;
626
627 len = len + sizeof(struct ipv6hdr);
628 } else {
629 goto slowpath;
630 }
631
632 /* trim possible padding on skb*/
633 if (pskb_trim_rcsum(skb, len))
634 goto slowpath_warn;
635
636done:
637 ret = fp_forward_output(skb);
638 if (!ret) {
639 if (reply_ra && fpnd_is_rs(skb)) {
640 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
641 printk(KERN_DEBUG "received RS on dev (%s), saddr=%pI6c, daddr=%pI6c\n",
642 skb->dev->name, &ipv6h->saddr, &ipv6h->daddr);
643 return fpnd_process_rs(skb);
644 }
645
646 if (fpnd_is_ra(skb)) {
647 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
648 printk(KERN_DEBUG "received RA on dev (%s), saddr=%pI6c, daddr=%pI6c\n",
649 skb->dev->name, &ipv6h->saddr, &ipv6h->daddr);
650 fpnd_process_ra(skb->dev, skb);
651 }
652 }
653 return ret;
654slowpath_warn:
655 pr_debug_ratelimited("bad ip header received\n");
656slowpath:
657 return 0;
658}
659
660static inline int fp_forward_output(struct sk_buff *skb)
661{
662 if (unlikely(bypass_fastpath == 1))
663 return 0;
664
665 if (output)
666 return output(skb);
667 return 0;
668}
669
670static inline void tx_hook_disconnect(struct fp_forward *priv)
671{
672 BUG_ON(!priv);
673 priv->tx_hook = &tx_hooks[TX_HOOK_NONE];
674 output = priv->tx_hook->output;
675}
676
677static inline void tx_hook_connect(struct fp_forward *priv)
678{
679 BUG_ON(!priv || !priv->tx_hook || !priv->tx_hook->output);
680 output = priv->tx_hook->output;
681}
682
683static inline void rx_hook_disconnect(struct fp_forward *priv)
684{
685 BUG_ON(!priv);
686
687 if (priv->rx_hook->disconnect)
688 priv->rx_hook->disconnect();
689 priv->rx_hook = &rx_hooks[RX_HOOK_NONE];
690}
691
692static inline int rx_hook_connect(struct fp_forward *priv)
693{
694 int ret;
695
696 BUG_ON(!priv || !priv->rx_hook || !priv->rx_hook->connect);
697
698 ret = priv->rx_hook->connect();
699 if (ret < 0) {
700 pr_err("rx_hook connect failed (%d)\n", ret);
701 priv->rx_hook = &rx_hooks[RX_HOOK_NONE];
702 return ret;
703 }
704
705 return 0;
706}
707
708static inline void fp_forward_disconnect(struct fp_forward *priv)
709{
710 tx_hook_disconnect(priv);
711 rx_hook_disconnect(priv);
712#ifdef FP_RX_IN_INTR_TO_NETFILTER
713 if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER])
714 nf_rx_hook_disconnect();
715#endif
716}
717
718static inline int fp_forward_connect(struct fp_forward *priv)
719{
720 int ret;
721
722 tx_hook_connect(priv);
723 ret = rx_hook_connect(priv);
724 if (ret < 0) {
725 pr_err("rx_hook connect failed (%d)\n", ret);
726 tx_hook_disconnect(priv);
727 return ret;
728 }
729
730#ifdef FP_RX_IN_INTR_TO_NETFILTER
731 if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER]) {
732 ret = nf_rx_hook_connect();
733 if (ret < 0) {
734 pr_err("netfilter rx_hook connect failed (%d)\n", ret);
735 return ret;
736 }
737 //pr_info("=== mfp: also enable netfilter hook for RX\n");
738 }
739#endif
740
741 return 0;
742}
743
744static ssize_t rx_hook_show(struct fastpath_module *m, char *buf)
745{
746 struct fp_forward *priv = m->priv;
747 int i, len = sprintf(buf, "fastpath forward rx hooks:\n");
748 char c;
749
750 for (i = 0; i < ARRAY_SIZE(rx_hooks); i++) {
751 c = (priv->rx_hook == &rx_hooks[i]) ? '*' : ' ';
752 len += sprintf(buf+len, "%c %s\n", c, rx_hooks[i].name);
753 }
754
755 return len;
756}
757
758
759static ssize_t rx_hook_store(struct fastpath_module *m, const char *buf,
760 size_t count)
761{
762 struct fp_forward *priv = m->priv;
763 struct rx_hook_struct *rx_hook;
764 unsigned int idx;
765 int ret;
766
767 sscanf(buf, "%u", &idx);
768
769 if (idx > ARRAY_SIZE(rx_hooks) - 1) {
770 pr_debug("Invalid rx hook=%d\n", idx);
771 return -EINVAL;
772 }
773
774 rx_hook = &rx_hooks[idx];
775 if (rx_hook == priv->rx_hook)
776 return count; /* no change */
777
778#ifdef FP_RX_IN_INTR_TO_NETFILTER
779 if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER])
780 rx_hook_disconnect(priv);
781#else
782 rx_hook_disconnect(priv);
783#endif
784 priv->rx_hook = rx_hook;
785
786#ifdef FP_RX_IN_INTR_TO_NETFILTER
787 if (rx_hook == &rx_hooks[RX_HOOK_NETFILTER])
788 return count;
789#endif
790 ret = rx_hook_connect(priv);
791 if (ret < 0)
792 return ret;
793
794 return count;
795}
796
797static ssize_t tx_hook_show(struct fastpath_module *m, char *buf)
798{
799 struct fp_forward *priv = m->priv;
800 int i, len = sprintf(buf, "fastpath forward tx hooks:\n");
801 char c;
802
803 for (i = 0; i < ARRAY_SIZE(tx_hooks); i++) {
804 c = (priv->tx_hook == &tx_hooks[i]) ? '*' : ' ';
805 len += sprintf(buf+len, "%c %s\n", c, tx_hooks[i].name);
806 }
807
808 return len;
809}
810
811
812static ssize_t tx_hook_store(struct fastpath_module *m, const char *buf,
813 size_t count)
814{
815 struct fp_forward *priv = m->priv;
816 struct tx_hook_struct *tx_hook;
817 unsigned int idx;
818
819 sscanf(buf, "%u", &idx);
820
821 if (idx > ARRAY_SIZE(tx_hooks) - 1) {
822 pr_debug("Invalid tx hook=%d\n", idx);
823 return -EINVAL;
824 }
825
826 tx_hook = &tx_hooks[idx];
827
828 if (tx_hook == priv->tx_hook)
829 return count; /* no change */
830
831 tx_hook_disconnect(priv);
832 priv->tx_hook = tx_hook;
833 tx_hook_connect(priv);
834
835 return count;
836}
837
838static ssize_t dob_show(struct fastpath_module *m, char *buf)
839{
840 return sprintf(buf, "fastpath forward drop on busy: %d\n", drop_on_busy);
841}
842
843
844static ssize_t dob_store(struct fastpath_module *m, const char *buf,
845 size_t count)
846{
847 unsigned int dob;
848
849 sscanf(buf, "%u", &dob);
850
851 if (dob != 0 && dob != 1) {
852 pr_debug("Invalid value %d - should be 1/0 \n", dob);
853 return -EINVAL;
854 }
855
856 drop_on_busy = dob;
857
858 return count;
859}
860
861static ssize_t bypass_show(struct fastpath_module *m, char *buf)
862{
863 return sprintf(buf, "fastpath bypass flag: %d\n", bypass_fastpath);
864}
865
866
867static ssize_t bypass_store(struct fastpath_module *m, const char *buf,
868 size_t count)
869{
870 unsigned int bypass_fastpath_flag;
871
872 sscanf(buf, "%u", &bypass_fastpath_flag);
873
874 if (bypass_fastpath_flag != 0 && bypass_fastpath_flag != 1) {
875 pr_debug("bypass_store: Invalid value %d - should be 1/0 \n",
876 bypass_fastpath_flag);
877 return -EINVAL;
878 }
879 bypass_fastpath = bypass_fastpath_flag;
880 return count;
881}
882
883
884static void fp_forward_release(struct kobject *kobj)
885{
886 struct fastpath_module *module = to_fpmod(kobj);
887 struct fp_forward *priv = module->priv;
888
889 fp_forward_disconnect(priv);
890
891 pr_debug("fp_forward released\n");
892 kfree(priv);
893 kfree(module);
894}
895
896static ssize_t reply_ra_show(struct fastpath_module *m, char *buf)
897{
898 return sprintf(buf, "fastpath reply_ra flag: %d\n", reply_ra);
899}
900
901static ssize_t reply_ra_store(struct fastpath_module *m, const char *buf,
902 size_t count)
903{
904 unsigned int reply_ra_flag;
905
906 sscanf(buf, "%u", &reply_ra_flag);
907
908 if (reply_ra_flag != 0 && reply_ra_flag != 1) {
909 pr_debug("reply_ra_store: Invalid value %d - should be 1/0 \n",
910 reply_ra_flag);
911 return -EINVAL;
912 }
913 reply_ra = reply_ra_flag;
914 return count;
915}
916
917static struct dentry *create_buf_file_handler(const char *filename,
918 struct dentry *parent,
919 umode_t mode,
920 struct rchan_buf *buf,
921 int *is_global)
922{
923 struct dentry *buf_file;
924
925 buf_file = debugfs_create_file(filename, mode|S_IRUGO|S_IWUSR, parent, buf,
926 &relay_file_operations);
927 *is_global = 1;
928 return buf_file;
929}
930
931static int remove_buf_file_handler(struct dentry *dentry)
932{
933 debugfs_remove(dentry);
934 return 0;
935}
936
937static struct rchan_callbacks fp_relay_callbacks =
938{
939 .create_buf_file = create_buf_file_handler,
940 .remove_buf_file = remove_buf_file_handler,
941};
942
943static ssize_t pkt_debug_level_show(struct fastpath_module *m, char *buf)
944{
945 return sprintf(buf, "%d\n", pkt_debug_level);
946}
947
948static ssize_t set_pkt_debug_level(struct fastpath_module *m, const char *buf,
949 size_t count)
950{
951 unsigned int cmd;
952
953 sscanf(buf, "%u", &cmd);
954
955 if (cmd > 3) {
956 pr_debug("Invalid value for pkt_debug_level %d\n", cmd);
957 return -EINVAL;
958 }
959 /* 0: off
960 1: rx enable
961 2: tx enable
962 3: rx+tx enable
963 */
964 pkt_debug_level = cmd;
965
966 if (cmd > 0) {
967 fp_dir = debugfs_create_dir("fastpath", NULL);
968 if (!fp_dir) {
969 pr_err("debugfs_create_dir fastpath failed.\n");
970 return count;
971 }
972
973 fp_chan = relay_open("pkt_debug", fp_dir, 0x200000, 1, &fp_relay_callbacks, NULL);;
974 if(!fp_chan){
975 pr_err("relay_open pkt_debug failed.\n");
976 debugfs_remove(fp_dir);
977 }
978 } else if (cmd == 0) {
979 if (fp_chan) {
980 relay_close(fp_chan);
981 fp_chan = NULL;
982 debugfs_remove(fp_dir);
983 }
984 }
985
986 return count;
987}
988
989static FP_ATTR(rx_hook, S_IRUGO|S_IWUSR, rx_hook_show, rx_hook_store);
990static FP_ATTR(tx_hook, S_IRUGO|S_IWUSR, tx_hook_show, tx_hook_store);
991static FP_ATTR(drop_on_busy, S_IRUGO|S_IWUSR, dob_show, dob_store);
992static FP_ATTR(bypass_fastpath, S_IRUGO|S_IWUSR, bypass_show, bypass_store);
993static FP_ATTR(reply_ra, S_IRUGO|S_IWUSR, reply_ra_show, reply_ra_store);
994static FP_ATTR(pkt_debug, S_IRUGO|S_IWUSR, pkt_debug_level_show, set_pkt_debug_level);
995
996static struct attribute *fp_forward_attrs[] = {
997 &fp_attr_rx_hook.attr,
998 &fp_attr_tx_hook.attr,
999 &fp_attr_drop_on_busy.attr,
1000 &fp_attr_bypass_fastpath.attr,
1001 &fp_attr_reply_ra.attr,
1002 &fp_attr_pkt_debug.attr,
1003 NULL, /* need to NULL terminate the list of attributes */
1004};
1005
1006static struct kobj_type ktype_forward = {
1007 .sysfs_ops = &fp_sysfs_ops,
1008 .default_attrs = fp_forward_attrs,
1009 .release = fp_forward_release,
1010};
1011
1012static int fp_forward_probe(struct fastpath_module *module)
1013{
1014 int ret;
1015 struct fp_forward *priv;
1016
1017 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1018 if (!priv) {
1019 pr_err("no memeory\n");
1020 return -ENOMEM;
1021 }
1022
1023 module->priv = priv;
1024 snprintf(module->name, sizeof(module->name), "fp_forward");
1025 spin_lock_init(&priv->lock);
1026
1027 if ((fp_forward_rx_hook > ARRAY_SIZE(rx_hooks) - 1) ||
1028 (fp_forward_tx_hook > ARRAY_SIZE(tx_hooks) - 1)) {
1029 pr_err("Invalid hook (rx_hook=%d , tx_hook=%d)\n",
1030 fp_forward_rx_hook, fp_forward_tx_hook);
1031 ret = -EINVAL;
1032 goto priv_kfree;
1033 }
1034 priv->rx_hook = &rx_hooks[fp_forward_rx_hook];
1035 priv->tx_hook = &tx_hooks[fp_forward_tx_hook];
1036
1037 ret = fp_forward_connect(priv);
1038 if (ret < 0) {
1039 pr_err("rx connect failed\n");
1040 goto priv_kfree;
1041 }
1042
1043 kobject_init(&module->kobj, &ktype_forward);
1044 ret = kobject_add(&module->kobj, module->fastpath->kobj, "%s", module->name);
1045 if (ret < 0) {
1046 pr_err("kobject_add failed (%d)\n", ret);
1047 goto fp_forward_disconnect;
1048 }
1049 kobject_uevent(&module->kobj, KOBJ_ADD);
1050
1051 pr_debug("fp_forward probed\n");
1052 return 0;
1053
1054fp_forward_disconnect:
1055 kobject_put(&module->kobj);
1056 fp_forward_disconnect(priv);
1057priv_kfree:
1058 kfree(priv);
1059 return ret;
1060}
1061
1062static int fp_forward_remove(struct fastpath_module *module)
1063{
1064 if (fp_chan) {
1065 relay_close(fp_chan);
1066 fp_chan = NULL;
1067 debugfs_remove(fp_dir);
1068 }
1069
1070 kobject_put(&module->kobj);
1071
1072 pr_debug("fp_forward removed\n");
1073 return 0;
1074}
1075
1076struct fastpath_module_ops fp_forward_ops = {
1077 .probe = fp_forward_probe,
1078 .remove = fp_forward_remove
1079};
1080
1081module_param(fp_forward_rx_hook, uint, 0);
1082MODULE_PARM_DESC(fp_forward_rx_hook, "fastpath forward rx hook (default="
1083 __MODULE_STRING(FP_FORWARD_RX_HOOK_DEFAULT) ")");
1084module_param(fp_forward_tx_hook, uint, 0);
1085MODULE_PARM_DESC(fp_forward_tx_hook, "fastpath forward rx hook (default="
1086 __MODULE_STRING(FP_FORWARD_TX_HOOK_DEFAULT) ")");