/*
 *	Fast path Forward
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#define pr_fmt(fmt) "mfp" " forward: %s:%d: " fmt, __func__, __LINE__

#include <linux/if_vlan.h>
#include <linux/if_ether.h>
#include <linux/tcp.h>
#include <net/ip.h>
#include <net/udp.h>
#include <net/ipv6.h>
#include <net/icmp.h>
#include <linux/relay.h>
#include <linux/debugfs.h>
#include <linux/skbrb.h>

#include "fp_common.h"
#include "fp_classifier.h"
#include "fp_database.h"
#include "fp_device.h"
#include "fp_core.h"
#include "fp_ndisc.h"

/**
 * For netif_rx called in interrupt or irq_disabled, direct hook can't use.
 * So enable netfilter hook for this situation.
 */
//#define FP_RX_IN_INTR_TO_NETFILTER

/* function prototypes */
static int fp_forward_direct(struct sk_buff *skb);
static int fp_forward_queue(struct sk_buff *skb);
static unsigned int fp_forward_nf_hook(void *priv,
			       struct sk_buff *skb,
			       const struct nf_hook_state *state);

static int fp_forward_output(struct sk_buff *skb);
static int fp_forward_netif_rx(struct sk_buff *skb);

#ifdef CONFIG_ASR_TOE
extern int fp_cm_genl_send_tuple(struct nf_conntrack_tuple *tuple, struct fpdb_entry *el,
									 int add, int len);
#endif

struct rx_hook_struct {
	const char *name;
	int (*connect)(void);
	void (*disconnect)(void);
};

struct tx_hook_struct {
	const char *name;
	int (*output)(struct sk_buff *);
};

struct fp_forward {
	spinlock_t lock;
	struct rx_hook_struct *rx_hook;
	struct tx_hook_struct *tx_hook;
};

static int (*output)(struct sk_buff *); /* global output function pointer */
static int drop_on_busy = 1; /* drop packets if output dev is busy */
static int bypass_fastpath = 0;
static int reply_ra;
static unsigned int pkt_debug_level;
static struct rchan *fp_chan;
static struct dentry *fp_dir;

/*--------------------------------------*/
/*-------------- RX HOOKS --------------*/
/*--------------------------------------*/

/** netif_rx hook */
static int netif_rx_hook_connect(void)
{
	netif_rx_fastpath_register(&fp_forward_netif_rx);
	return 0;
}
static void netif_rx_hook_disconnect(void)
{
	netif_rx_fastpath_unregister();
}

/** netfilter rx_hook */
static struct nf_hook_ops nf_rx_hook_data[] __read_mostly = {
	{
		.hook = fp_forward_nf_hook,
		.pf = NFPROTO_IPV4,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP_PRI_FIRST,
	},
	{
		.hook = fp_forward_nf_hook,
		.pf = NFPROTO_IPV6,
		.hooknum = NF_INET_PRE_ROUTING,
		.priority = NF_IP6_PRI_FIRST,
	},
};

static int nf_rx_hook_connect(void)
{
	return nf_register_net_hooks(&init_net, nf_rx_hook_data, ARRAY_SIZE(nf_rx_hook_data));
}

static void nf_rx_hook_disconnect(void)
{
	nf_unregister_net_hooks(&init_net, nf_rx_hook_data, ARRAY_SIZE(nf_rx_hook_data));
}

#define RX_HOOK_NETIF		(0)
#define RX_HOOK_NETFILTER	(1)
#define RX_HOOK_NONE		(2)

static struct rx_hook_struct rx_hooks[] = {
	[RX_HOOK_NETIF].name		= "direct (netif_rx)",
	[RX_HOOK_NETIF].connect		= &netif_rx_hook_connect,
	[RX_HOOK_NETIF].disconnect	= &netif_rx_hook_disconnect,
	[RX_HOOK_NETFILTER].name	= "netfilter (NF_INET_PRE_ROUTING)",
	[RX_HOOK_NETFILTER].connect	= &nf_rx_hook_connect,
	[RX_HOOK_NETFILTER].disconnect	= &nf_rx_hook_disconnect,
	[RX_HOOK_NONE].name		= "disconnected",
};

/*--------------------------------------*/
/*-------------- TX HOOKS --------------*/
/*--------------------------------------*/

#define TX_HOOK_NDO_START_XMIT		(0)
#define TX_HOOK_DEV_QUEUE_XMIT		(1)
#define TX_HOOK_NONE			(2)

static struct tx_hook_struct tx_hooks[] = {
	[TX_HOOK_NDO_START_XMIT].name	= "direct (ndo_start_xmit)",
	[TX_HOOK_NDO_START_XMIT].output	= &fp_forward_direct,
	[TX_HOOK_DEV_QUEUE_XMIT].name	= "queue (dev_queue_xmit)",
	[TX_HOOK_DEV_QUEUE_XMIT].output	= &fp_forward_queue,
	[TX_HOOK_NONE].name		= "disconnected"
};

static unsigned int fp_forward_rx_hook = FP_FORWARD_RX_HOOK_DEFAULT;
static unsigned int fp_forward_tx_hook = FP_FORWARD_TX_HOOK_DEFAULT;

static void fp_print_pkt(char *buf, u32 buf_len)
{
	if (!fp_chan)
		return;

	relay_write(fp_chan, buf, buf_len);
}

static void fp_dump_input_pkt(struct sk_buff *skb, char *rx_tx)
{
	struct iphdr *iph = (struct iphdr *)(skb->data);
	u8 version = iph->version;
	char buf[512] = {0};
	u32 len = 0;
	u64 ts_nsec;
	unsigned long rem_nsec;

	ts_nsec = local_clock();
	rem_nsec = do_div(ts_nsec, 1000000000);

	if (version == 4) {
		if (iph->protocol == IPPROTO_ICMP) {
			struct icmphdr *icmph = (struct icmphdr *)(iph + 1);
			int type = icmph->type;
			if (type == 8) {
				len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp request, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
							ntohs(icmph->un.echo.sequence));
			}else if (type == 0) {
				len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp reply, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
							ntohs(icmph->un.echo.sequence));
			}
		} else if (iph->protocol == IPPROTO_UDP) {
			struct udphdr *uh = (struct udphdr *)(iph + 1);
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive UDP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(uh->source), ntohs(uh->dest));
		} else if (iph->protocol == IPPROTO_TCP) {
			struct tcphdr *th = (struct tcphdr *)(iph + 1);
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive TCP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u seq=%u ack=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(th->source), ntohs(th->dest),
							ntohl(th->seq), ntohl(th->ack_seq));
		} else {
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive pkt type %u, src=%pI4 dst=%pI4 ID=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, iph->protocol, &iph->saddr, &iph->daddr, ntohs(iph->id));
		}
	} else if (version == 6) {
		struct ipv6hdr *ip6h;
		__be16 frag_off;
		int offset;
		u8 nexthdr;

		ip6h = (struct ipv6hdr *)(skb->data);
		nexthdr = ip6h->nexthdr;
		/* not support fragment pkt */
		if (nexthdr == NEXTHDR_FRAGMENT)
			return;
		if (ipv6_ext_hdr(nexthdr)) {
			offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
			if (offset < 0)
				return;
		} else
			offset = sizeof(struct ipv6hdr);

		if (nexthdr == IPPROTO_ICMPV6) {
			struct icmp6hdr *icmp6;
			if (!pskb_may_pull(skb, ((unsigned char*)ip6h + offset + 6 - skb->data)))
				return;
			icmp6 = (struct icmp6hdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive icmp6, src=%pI6c dst=%pI6c type=%u code=%u id=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, icmp6->icmp6_type, icmp6->icmp6_type, 
							ntohs(icmp6->icmp6_identifier));
		} else if (nexthdr == IPPROTO_UDP) {
			struct udphdr *uh6 = (struct udphdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive UDP6, src=%pI6c dst=%pI6c sp=%u dp=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(uh6->source), ntohs(uh6->dest));
		} else if (nexthdr == IPPROTO_TCP) {
			struct tcphdr *th6 = (struct tcphdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive TCP6, src=%pI6c dst=%pI6c sp=%u dp=%u seq=%u ack=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(th6->source), ntohs(th6->dest),
							ntohl(th6->seq), ntohl(th6->ack_seq));
		} else {
			len = scnprintf(buf, 512, "[%5lu.%06lu] --->>%s, receive pkt type %u, src=%pI6c dst=%pI6c\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, nexthdr, &ip6h->saddr, &ip6h->daddr);
		}
	}else {
		return;
	}

	fp_print_pkt(buf, len);
}

static void fp_dump_output_pkt(struct sk_buff *skb, char *rx_tx)
{
	struct iphdr *iph;
	u8 version;
	char buf[512] = {0};
	u32 len = 0;
	u64 ts_nsec;
	unsigned long rem_nsec;

	ts_nsec = local_clock();
	rem_nsec = do_div(ts_nsec, 1000000000);

	iph = (struct iphdr *)(skb->data);

	version = iph->version;
	if (version == 4) {
		if (iph->protocol == IPPROTO_ICMP) {
			struct icmphdr *icmph = (struct icmphdr *)(iph + 1);
			int type = icmph->type;
			if (type == 8) {
				len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp request, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
							ntohs(icmph->un.echo.sequence));
			}else if (type == 0) {
				len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp reply, src=%pI4 dst=%pI4 ID=%u SEQ=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(icmph->un.echo.id),
							ntohs(icmph->un.echo.sequence));
			}
		} else if (iph->protocol == IPPROTO_UDP) {
			struct udphdr *uh = (struct udphdr *)(iph + 1);
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send UDP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(uh->source), ntohs(uh->dest));
		} else if (iph->protocol == IPPROTO_TCP) {
			struct tcphdr *th = (struct tcphdr *)(iph + 1);
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send TCP, src=%pI4 dst=%pI4 ID=%u sp=%u dp=%u seq=%u ack=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &iph->saddr, &iph->daddr, ntohs(iph->id), ntohs(th->source), ntohs(th->dest),
							ntohl(th->seq), ntohl(th->ack_seq));
		} else {
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send pkt type %u, src=%pI4 dst=%pI4 ID=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, iph->protocol, &iph->saddr, &iph->daddr, ntohs(iph->id));
		}
	} else if (version == 6) {
		struct ipv6hdr *ip6h;
		__be16 frag_off;
		int offset;
		u8 nexthdr;

		ip6h = (struct ipv6hdr *)(skb->data);

		nexthdr = ip6h->nexthdr;
		/* not support fragment pkt */
		if (nexthdr == NEXTHDR_FRAGMENT)
			return;
		if (ipv6_ext_hdr(nexthdr)) {
			offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
			if (offset < 0)
				return;
		} else
			offset = sizeof(struct ipv6hdr);

		if (nexthdr == IPPROTO_ICMPV6) {
			struct icmp6hdr *icmp6;
			if (!pskb_may_pull(skb, ((unsigned char*)ip6h + offset + 6 - skb->data)))
				return;
			icmp6 = (struct icmp6hdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send icmp6, src=%pI6c dst=%pI6c type=%u code=%u id=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, icmp6->icmp6_type, icmp6->icmp6_type,
							ntohs(icmp6->icmp6_identifier));
		} else if (nexthdr == IPPROTO_UDP) {
			struct udphdr *uh6 = (struct udphdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send UDP6, src=%pI6c dst=%pI6c sp=%u dp=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(uh6->source), ntohs(uh6->dest));
		} else if (nexthdr == IPPROTO_TCP) {
			struct tcphdr *th6 = (struct tcphdr *)((unsigned char*)ip6h + offset);
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send TCP6, src=%pI6c dst=%pI6c sp=%u dp=%u seq=%u ack=%u\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, &ip6h->saddr, &ip6h->daddr, ntohs(th6->source), ntohs(th6->dest),
							ntohl(th6->seq), ntohl(th6->ack_seq));
		} else {
			len = scnprintf(buf, 512, "[%5lu.%06lu] <<---%s, send pkt type %u, src=%pI6c dst=%pI6c\n",
							(unsigned long)ts_nsec, rem_nsec / 1000,
							rx_tx, nexthdr, &ip6h->saddr, &ip6h->daddr);
		}
	} else {
		return;
	}

	fp_print_pkt(buf, len);
}
/**
 * Forward an skb directly to the output interface if classified as
 * fastpath. skb->dev must point to the src net_device (done in
 * eth_type_trans or in drivers)
 *
 * @todo 1. Add an option to enable/disable fastpath for a
 *       specific net_device from userspace (via
 *       ifconfig/ethtool)
 * @note If the source net_device doesn't have fastpath enabled,
 *       a packet from it can still traverse through fastpath if
 *       the output net_device supports it and there was a match
 *       in the fastpath database.
 * @param skb    skb to forward through fastpath
 *
 * @return 1 if skb consumed by fastpath, 0 otherwise (should be
 *         sent through slowpath)
 */
static int fp_forward_direct(struct sk_buff *skb)
{
	int ret, len = skb->len; /* default is slowpath */
	struct fp_net_device *dst, *src;
	struct fpdb_entry *el;
	struct netdev_queue *txq;
	struct sk_buff *skb2 = skb;
	struct nf_conntrack_tuple tuple;
	const struct net_device_ops *ops;

	/*
	 * fastpath direct tx hook should be used only when no packets can
	 * arrive in irq/irq disable context, since fastpath only protects
	 * at soft-irq level. Otherwise this could possibly result in a deadlock.
	*/

	WARN_ONCE(in_irq() || irqs_disabled(),
		  "fastpath direct tx called from irq, or irq disabled!\n");

	el = fpc_classify_start(skb, &tuple);
	if (unlikely(!el))
		goto slowpath;
	rcu_read_lock_bh();

	src = rcu_dereference_bh(el->in_dev);
	dst = rcu_dereference_bh(el->out_dev);

	if (pkt_debug_level == 2 || pkt_debug_level == 3) {
		if (!strncasecmp(dst->dev->name, "ccinet", 6))
			fp_dump_output_pkt(skb, "F_UL");
		else
			fp_dump_output_pkt(skb, "F_DL");
	}

	ops = dst->dev->netdev_ops;
	if (fpc_classify_finish(skb, el)) {
		rcu_read_unlock_bh();
		goto slowpath;
	}

	skb_reset_mac_header(skb);
	txq = netdev_core_pick_tx(dst->dev, skb, NULL);
	HARD_TX_LOCK(dst->dev, txq, smp_processor_id());
	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
		skb2 = NULL;
		dst->stats.queue_stopped++;
	}

	if (skb2)
		skb2->dev = dst->dev;
	ret = skb2 ? ops->ndo_start_xmit(skb2, dst->dev) : NETDEV_TX_BUSY;

	switch (ret) {
	case NETDEV_TX_OK:
		/* sent through fastpath */
		txq_trans_update(txq);
		src->stats.rx_packets++;
		src->stats.rx_bytes += len;
		dst->stats.tx_packets++;
		if (dst->dev->header_ops)
			dst->stats.tx_bytes += len + ETH_HLEN;
		else
			dst->stats.tx_bytes += len;
		break;
	case NET_XMIT_CN:
		src->stats.rx_dropped++;
		dst->stats.tx_dropped++;
		break;
	case NET_XMIT_DROP:
	case NETDEV_TX_BUSY:
	default:
		if (unlikely(skb2)) {
			/* shouldn't happen since we check txq before trying to transmit */
			src->stats.rx_errors++;
			dst->stats.tx_errors++;
			printk(KERN_DEBUG "Failed to send through fastpath (ret=%d)\n", ret);
		}

		if (drop_on_busy) {
			src->stats.rx_dropped++;
			dst->stats.tx_dropped++;
			dev_kfree_skb_any(skb);
		}
	}

	HARD_TX_UNLOCK(dst->dev, txq);

#ifdef CONFIG_ASR_TOE
	if ((0 == el->nl_flag) && (ret == NETDEV_TX_OK)) {
		//fpdb_dump_entry("fp_cm_genl_send_tuple, entry dump:\n", el);
		fp_cm_genl_send_tuple(&tuple, el, 1, len);
	}
#endif
	rcu_read_unlock_bh();

	if (likely(skb2) || drop_on_busy)
		return 1;
slowpath:
	if (pkt_debug_level == 2 || pkt_debug_level == 3) {
		if (!strncasecmp(skb->dev->name, "ccinet", 6))
			fp_dump_output_pkt(skb, "S_DL");
		else
			fp_dump_output_pkt(skb, "S_UL");
	}

	/* DO NOT do skb copy if the skb is allocated from skbrb
	 * (skb ring buffer for bridge performace)
	 */
	if (!IS_SKBRB_SKB(skb) && FP_IS_SKB_P(skb))
		pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
	return 0;
}

/**
 * Forward an skb to the output device's queue if classified as fastpath.
 * 
 * @param skb    skb to forward
 * 
 * @return 1 if consumed by fastpath, 0 otherwise (should be sent through slowpath)
 */
static int fp_forward_queue(struct sk_buff *skb)
{
	int ret, len = skb->len;
	struct fp_net_device *dst, *src;
	struct fpdb_entry *el;
	struct vlan_hdr *vhdr;
	struct nf_conntrack_tuple tuple;

	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
			skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
		vhdr = (struct vlan_hdr *) skb->data;
		skb = skb_vlan_untag(skb);
		if (unlikely(!skb))
			return 0;
		if (skb_vlan_tag_present(skb)) {
			if (!vlan_do_receive(&skb)) {
				if (unlikely(!skb))
					return 1;
			}
		}
	}

	el = fpc_classify_start(skb, &tuple);
	if (unlikely(!el)) {
		/* DO NOT do skb copy if the skb is allocated from skbrb
		 * (skb ring buffer for bridge performace)
		 */
		if (!IS_SKBRB_SKB(skb) && FP_IS_SKB_P(skb))
			pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
		return 0;
	}

#ifdef CONFIG_ASR_TOE
	if (0 == el->nl_flag) {
		//fpdb_dump_entry("fp_cm_genl_send_tuple, entry dump:\n", el);
		fp_cm_genl_send_tuple(&tuple, el, 1, len);
	}
#endif

	src = fpdev_hold(el->in_dev);
	dst = fpdev_hold(el->out_dev);
	if (fpc_classify_finish(skb, el))
		return 0;

	skb->dev = dst->dev;

	ret = dev_queue_xmit(skb);
	switch (ret) {
	case NET_XMIT_SUCCESS:
		src->stats.rx_bytes += len;
		src->stats.rx_packets++;
		dst->stats.tx_bytes += len + ETH_HLEN;
		dst->stats.tx_packets++;
		break;
	case NET_XMIT_CN:
		src->stats.rx_dropped++;
		dst->stats.tx_dropped++;
		dst->stats.queue_stopped++;
		break;
	case NET_XMIT_DROP:
	default:
		pr_info("unexpected return code from dev_queue_xmit (%d)\n", ret);
		src->stats.rx_errors++;
		dst->stats.tx_errors++;
	}

	fpdev_put(dst);
	fpdev_put(src);

	return 1;
}

static unsigned int fp_forward_nf_hook(void *priv, struct sk_buff *skb,
	   const struct nf_hook_state *state)
{
	WARN_ON_ONCE(irqs_disabled());

	if (fp_forward_output(skb))
		return NF_STOLEN;

	return NF_ACCEPT;
}

static int fp_forward_netif_rx(struct sk_buff *skb)
{
	struct iphdr *iph;
	struct ipv6hdr *ipv6h;
	u32 len;
	struct vlan_hdr *vhdr;
	int ret;

	if (unlikely(bypass_fastpath == 1))
		goto slowpath;

#ifdef FP_RX_IN_INTR_TO_NETFILTER
	if (in_irq() || irqs_disabled())
		goto slowpath;
#endif
	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
			skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
		vhdr = (struct vlan_hdr *) skb->data;
		skb = skb_vlan_untag(skb);
		if (unlikely(!skb))
			goto slowpath;
		if (skb_vlan_tag_present(skb)) {
			if (!vlan_do_receive(&skb)) {
				if (unlikely(!skb))
					return 1;
			}
		}
	}

	if (pkt_debug_level == 1 || pkt_debug_level == 3) {
		if (!strncasecmp(skb->dev->name, "ccinet", 6))
			fp_dump_input_pkt(skb, "DL");
		else
			fp_dump_input_pkt(skb, "UL");
	}

	iph = (struct iphdr *)skb->data;

	if (likely(iph->version == 4)) {

		if (iph->ihl < 5)
			goto slowpath_warn;

		len = ntohs(iph->tot_len);

		if (skb->len < len || len < (iph->ihl * 4))
			goto slowpath_warn;

	} else if (likely(iph->version == 6)) {

		ipv6h = (struct ipv6hdr *)skb->data;

		len = ntohs(ipv6h->payload_len);

		if (!len && ipv6h->nexthdr == NEXTHDR_HOP)
			goto done;

		if (len + sizeof(struct ipv6hdr) > skb->len)
			goto slowpath_warn;

		len = len + sizeof(struct ipv6hdr);
	} else {
		goto slowpath;
	}

	/* trim possible padding on skb*/
	if (pskb_trim_rcsum(skb, len))
		goto slowpath_warn;

done:
	ret = fp_forward_output(skb);
	if (!ret) {
		if (reply_ra && fpnd_is_rs(skb)) {
			struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
			printk(KERN_DEBUG "received RS on dev (%s), saddr=%pI6c, daddr=%pI6c\n",
				skb->dev->name, &ipv6h->saddr, &ipv6h->daddr);
			return fpnd_process_rs(skb);
		}

		if (fpnd_is_ra(skb)) {
			struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
			printk(KERN_DEBUG "received RA on dev (%s), saddr=%pI6c, daddr=%pI6c\n",
				skb->dev->name, &ipv6h->saddr, &ipv6h->daddr);
			fpnd_process_ra(skb->dev, skb);
		}
	}
	return ret;
slowpath_warn:
	pr_debug_ratelimited("bad ip header received\n");
slowpath:
	return 0;
}

static inline int fp_forward_output(struct sk_buff *skb)
{
	if (unlikely(bypass_fastpath == 1))
		return 0;

	if (output)
		return output(skb);
	return 0;
}

static inline void tx_hook_disconnect(struct fp_forward *priv)
{
	BUG_ON(!priv);
	priv->tx_hook = &tx_hooks[TX_HOOK_NONE];
	output = priv->tx_hook->output;
}

static inline void tx_hook_connect(struct fp_forward *priv)
{
	BUG_ON(!priv || !priv->tx_hook || !priv->tx_hook->output);
	output = priv->tx_hook->output;
}

static inline void rx_hook_disconnect(struct fp_forward *priv)
{
	BUG_ON(!priv);

	if (priv->rx_hook->disconnect)
		priv->rx_hook->disconnect();
	priv->rx_hook = &rx_hooks[RX_HOOK_NONE];
}

static inline int rx_hook_connect(struct fp_forward *priv)
{
	int ret;

	BUG_ON(!priv || !priv->rx_hook || !priv->rx_hook->connect);

	ret = priv->rx_hook->connect();
	if (ret < 0) {
		pr_err("rx_hook connect failed (%d)\n", ret);
		priv->rx_hook =  &rx_hooks[RX_HOOK_NONE];
		return ret;
	}

	return 0;
}

static inline void fp_forward_disconnect(struct fp_forward *priv)
{
	tx_hook_disconnect(priv);
	rx_hook_disconnect(priv);
#ifdef FP_RX_IN_INTR_TO_NETFILTER
	if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER])
		nf_rx_hook_disconnect();
#endif
}

static inline int fp_forward_connect(struct fp_forward *priv)
{
	int ret;

	tx_hook_connect(priv);
	ret = rx_hook_connect(priv);
	if (ret < 0) {
		pr_err("rx_hook connect failed (%d)\n", ret);
		tx_hook_disconnect(priv);
		return ret;
	}

#ifdef FP_RX_IN_INTR_TO_NETFILTER
	if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER]) {
		ret = nf_rx_hook_connect();
		if (ret < 0) {
			pr_err("netfilter rx_hook connect failed (%d)\n", ret);
			return ret;
		}
		//pr_info("=== mfp: also enable netfilter hook for RX\n");
	}
#endif

	return 0;
}

static ssize_t rx_hook_show(struct fastpath_module *m, char *buf)
{
	struct fp_forward *priv = m->priv;
	int i, len = sprintf(buf, "fastpath forward rx hooks:\n");
	char c;

	for (i = 0; i < ARRAY_SIZE(rx_hooks); i++) {
		c = (priv->rx_hook == &rx_hooks[i]) ? '*' : ' ';
		len += sprintf(buf+len, "%c %s\n", c, rx_hooks[i].name);
	}

	return len;
}


static ssize_t rx_hook_store(struct fastpath_module *m, const char *buf,
			      size_t count)
{
	struct fp_forward *priv = m->priv;
	struct rx_hook_struct *rx_hook;
	unsigned int idx;
	int ret;

	sscanf(buf, "%u", &idx);

	if (idx > ARRAY_SIZE(rx_hooks) - 1) {
		pr_debug("Invalid rx hook=%d\n", idx);
		return -EINVAL;
	}

	rx_hook = &rx_hooks[idx];
	if (rx_hook == priv->rx_hook)
		return count; /* no change */

#ifdef FP_RX_IN_INTR_TO_NETFILTER
	if (priv->rx_hook != &rx_hooks[RX_HOOK_NETFILTER])
		rx_hook_disconnect(priv);
#else
	rx_hook_disconnect(priv);
#endif
	priv->rx_hook = rx_hook;

#ifdef FP_RX_IN_INTR_TO_NETFILTER
	if (rx_hook == &rx_hooks[RX_HOOK_NETFILTER])
		return count;
#endif
	ret = rx_hook_connect(priv);
	if (ret < 0)
		return ret;

	return count;
}

static ssize_t tx_hook_show(struct fastpath_module *m, char *buf)
{
	struct fp_forward *priv = m->priv;
	int i, len = sprintf(buf, "fastpath forward tx hooks:\n");
	char c;

	for (i = 0; i < ARRAY_SIZE(tx_hooks); i++) {
		c = (priv->tx_hook == &tx_hooks[i]) ? '*' : ' ';
		len += sprintf(buf+len, "%c %s\n", c, tx_hooks[i].name);
	}

	return len;
}


static ssize_t tx_hook_store(struct fastpath_module *m, const char *buf,
			      size_t count)
{
	struct fp_forward *priv = m->priv;
	struct tx_hook_struct *tx_hook;
	unsigned int idx;

	sscanf(buf, "%u", &idx);

	if (idx > ARRAY_SIZE(tx_hooks) - 1) {
		pr_debug("Invalid tx hook=%d\n", idx);
		return -EINVAL;
	}

	tx_hook = &tx_hooks[idx];

	if (tx_hook == priv->tx_hook)
		return count; /* no change */

	tx_hook_disconnect(priv);
	priv->tx_hook = tx_hook;
	tx_hook_connect(priv);

	return count;
}

static ssize_t dob_show(struct fastpath_module *m, char *buf)
{
	return sprintf(buf, "fastpath forward drop on busy: %d\n", drop_on_busy);
}


static ssize_t dob_store(struct fastpath_module *m, const char *buf,
			      size_t count)
{
	unsigned int dob;

	sscanf(buf, "%u", &dob);

	if (dob != 0 && dob != 1) {
		pr_debug("Invalid value %d - should be 1/0 \n", dob);
		return -EINVAL;
	}

	drop_on_busy = dob;

	return count;
}

static ssize_t bypass_show(struct fastpath_module *m, char *buf)
{
	return sprintf(buf, "fastpath bypass flag: %d\n", bypass_fastpath);
}


static ssize_t bypass_store(struct fastpath_module *m, const char *buf,
			      size_t count)
{
	unsigned int bypass_fastpath_flag;

	sscanf(buf, "%u", &bypass_fastpath_flag);

	if (bypass_fastpath_flag != 0 && bypass_fastpath_flag != 1) {
		pr_debug("bypass_store: Invalid value %d - should be 1/0 \n",
			bypass_fastpath_flag);
		return -EINVAL;
	}
	bypass_fastpath = bypass_fastpath_flag;
	return count;
}


static void fp_forward_release(struct kobject *kobj)
{
	struct fastpath_module *module = to_fpmod(kobj);
	struct fp_forward *priv = module->priv;

	fp_forward_disconnect(priv);

	pr_debug("fp_forward released\n");
	kfree(priv);
	kfree(module);
}

static ssize_t reply_ra_show(struct fastpath_module *m, char *buf)
{
	return sprintf(buf, "fastpath reply_ra flag: %d\n", reply_ra);
}

static ssize_t reply_ra_store(struct fastpath_module *m, const char *buf,
		size_t count)
{
	unsigned int reply_ra_flag;

	sscanf(buf, "%u", &reply_ra_flag);

	if (reply_ra_flag != 0 && reply_ra_flag != 1) {
		pr_debug("reply_ra_store: Invalid value %d - should be 1/0 \n",
		  reply_ra_flag);
		return -EINVAL;
	}
	reply_ra = reply_ra_flag;
	return count;
}

static struct dentry *create_buf_file_handler(const char *filename,
						 struct dentry *parent,
						 umode_t mode,
						 struct rchan_buf *buf,
						 int *is_global)
{
	struct dentry *buf_file;

	buf_file = debugfs_create_file(filename, mode|S_IRUGO|S_IWUSR, parent, buf,
					  &relay_file_operations);
	*is_global = 1;
	return buf_file;
}

static int remove_buf_file_handler(struct dentry *dentry)
{
	debugfs_remove(dentry);
	return 0;
}

static struct rchan_callbacks fp_relay_callbacks =
{
	.create_buf_file = create_buf_file_handler,
	.remove_buf_file = remove_buf_file_handler,
};

static ssize_t pkt_debug_level_show(struct fastpath_module *m, char *buf)
{
	return sprintf(buf, "%d\n", pkt_debug_level);
}

static ssize_t set_pkt_debug_level(struct fastpath_module *m, const char *buf,
					  size_t count)
{
	unsigned int cmd;

	sscanf(buf, "%u", &cmd);

	if (cmd > 3) {
		pr_debug("Invalid value for pkt_debug_level %d\n", cmd);
		return -EINVAL;
	}
	/* 0: off
	   1: rx enable
	   2: tx enable
	   3: rx+tx enable
	 */
	pkt_debug_level = cmd;

	if (cmd > 0) {
		fp_dir = debugfs_create_dir("fastpath", NULL);
		if (!fp_dir) {
			pr_err("debugfs_create_dir fastpath failed.\n");
			return count;
		}

		fp_chan = relay_open("pkt_debug", fp_dir, 0x200000, 1, &fp_relay_callbacks, NULL);;
		if(!fp_chan){
			pr_err("relay_open pkt_debug failed.\n");
			debugfs_remove(fp_dir);
		}
	} else if (cmd == 0) {
		if (fp_chan) {
			relay_close(fp_chan);
			fp_chan = NULL;
			debugfs_remove(fp_dir);
		}
	}

	return count;
}

static FP_ATTR(rx_hook, S_IRUGO|S_IWUSR, rx_hook_show, rx_hook_store);
static FP_ATTR(tx_hook, S_IRUGO|S_IWUSR, tx_hook_show, tx_hook_store);
static FP_ATTR(drop_on_busy, S_IRUGO|S_IWUSR, dob_show, dob_store);
static FP_ATTR(bypass_fastpath, S_IRUGO|S_IWUSR, bypass_show, bypass_store);
static FP_ATTR(reply_ra, S_IRUGO|S_IWUSR, reply_ra_show, reply_ra_store);
static FP_ATTR(pkt_debug, S_IRUGO|S_IWUSR, pkt_debug_level_show, set_pkt_debug_level);

static struct attribute *fp_forward_attrs[] = {
	&fp_attr_rx_hook.attr,
	&fp_attr_tx_hook.attr,
	&fp_attr_drop_on_busy.attr,
	&fp_attr_bypass_fastpath.attr,
	&fp_attr_reply_ra.attr,
	&fp_attr_pkt_debug.attr,
	NULL, /* need to NULL terminate the list of attributes */
};

static struct kobj_type ktype_forward = {
	.sysfs_ops	= &fp_sysfs_ops,
	.default_attrs	= fp_forward_attrs,
	.release	= fp_forward_release,
};

static int fp_forward_probe(struct fastpath_module *module)
{
	int ret;
	struct fp_forward *priv;

	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (!priv) {
		pr_err("no memeory\n");
		return -ENOMEM;
	}

	module->priv = priv;
	snprintf(module->name, sizeof(module->name), "fp_forward");
	spin_lock_init(&priv->lock);

	if ((fp_forward_rx_hook > ARRAY_SIZE(rx_hooks) - 1) ||
	    (fp_forward_tx_hook > ARRAY_SIZE(tx_hooks) - 1)) {
		pr_err("Invalid hook (rx_hook=%d , tx_hook=%d)\n",
			fp_forward_rx_hook, fp_forward_tx_hook);
		ret = -EINVAL;
		goto priv_kfree;
	}
	priv->rx_hook = &rx_hooks[fp_forward_rx_hook];
	priv->tx_hook = &tx_hooks[fp_forward_tx_hook];

	ret = fp_forward_connect(priv);
	if (ret < 0) {
		pr_err("rx connect failed\n");
		goto priv_kfree;
	}

	kobject_init(&module->kobj, &ktype_forward);
	ret = kobject_add(&module->kobj, module->fastpath->kobj, "%s", module->name);
	if (ret < 0) {
		pr_err("kobject_add failed (%d)\n", ret);
		goto fp_forward_disconnect;
	}
	kobject_uevent(&module->kobj, KOBJ_ADD);

	pr_debug("fp_forward probed\n");
	return 0;

fp_forward_disconnect:
	kobject_put(&module->kobj);
	fp_forward_disconnect(priv);
priv_kfree:
	kfree(priv);
	return ret;
}

static int fp_forward_remove(struct fastpath_module *module)
{
	if (fp_chan) {
		relay_close(fp_chan);
		fp_chan = NULL;
		debugfs_remove(fp_dir);
	}

	kobject_put(&module->kobj);

	pr_debug("fp_forward removed\n");
	return 0;
}

struct fastpath_module_ops fp_forward_ops = {
	.probe = fp_forward_probe,
	.remove = fp_forward_remove
};

module_param(fp_forward_rx_hook, uint, 0);
MODULE_PARM_DESC(fp_forward_rx_hook, "fastpath forward rx hook (default="
			__MODULE_STRING(FP_FORWARD_RX_HOOK_DEFAULT) ")");
module_param(fp_forward_tx_hook, uint, 0);
MODULE_PARM_DESC(fp_forward_tx_hook, "fastpath forward rx hook (default="
			__MODULE_STRING(FP_FORWARD_TX_HOOK_DEFAULT) ")");
