b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * Fast path database hash implementation |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU FP_ERR( Public License |
| 6 | * as published by the Free Software Foundation; either version |
| 7 | * 2 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * Notes: |
| 10 | * Implementation according to Documentation/RCU/rcuref.txt |
| 11 | */ |
| 12 | |
| 13 | #define pr_fmt(fmt) "mfp" " database:%s:%d: " fmt, __func__, __LINE__ |
| 14 | |
| 15 | #include "fp_common.h" |
| 16 | #include "fp_database.h" |
| 17 | #include "fp_device.h" |
| 18 | #include "fp_core.h" |
| 19 | #ifdef CONFIG_ASR_TOE |
| 20 | #include "../linux/drivers/marvell/toev2/toe.h" |
| 21 | #endif |
| 22 | |
| 23 | #define FP_ZONE (NF_CT_DEFAULT_ZONE_ID) |
| 24 | #define GUARD_TIMEOUT_SEC (10) |
| 25 | |
| 26 | static u32 hash_rnd __read_mostly; |
| 27 | |
| 28 | static inline const char *state_to_string(enum entry_state state) |
| 29 | { |
| 30 | return entry_state_names[state]; |
| 31 | } |
| 32 | |
| 33 | static inline int __fpdb_dump_entry(char *buf, struct fpdb_entry *el) |
| 34 | { |
| 35 | int len = sprintf(buf, "fpdb dump entry (0x%p):\n", el); |
| 36 | |
| 37 | len += fp_dump_tuple(buf + len, &el->in_tuple); |
| 38 | len += sprintf(buf + len, "\n"); |
| 39 | len += fp_dump_tuple(buf + len, &el->out_tuple); |
| 40 | if (el->hh.hh_len) { |
| 41 | struct ethhdr *eth = (struct ethhdr *)(((u8 *) el->hh.hh_data) + |
| 42 | (HH_DATA_OFF(sizeof(*eth)))); |
| 43 | len += sprintf(buf + len, "\nMAC header: src=%pM dst=%pM type=%04x\n", |
| 44 | eth->h_source, eth->h_dest, eth->h_proto); |
| 45 | } else { |
| 46 | len += sprintf(buf + len, "\nMAC header was not set\n"); |
| 47 | } |
| 48 | len += sprintf(buf + len, "Interfaces: in %p: %s, out %p: %s\n", |
| 49 | el->in_dev,el->in_dev->dev->name, el->out_dev,el->out_dev->dev->name); |
| 50 | len += sprintf(buf + len, "State: %s hits=%d pointer=%p\n", |
| 51 | state_to_string(el->state), el->hit_counter, el); |
| 52 | len += sprintf(buf + len, "ct info: ct=%p timeout: %x rc=%d\n", |
| 53 | el->ct, el->ct->timeout, atomic_read(&el->rc)); |
| 54 | |
| 55 | if (debug_level & DBG_INFO) |
| 56 | len += sprintf(buf + len, "DEBUG: (NAT=%s) (route: in=%d out=%d)\n", |
| 57 | NF_CT_NAT(el->ct) ? "YES" : "NO", |
| 58 | el->debug.in_route_type, el->debug.out_route_type); |
| 59 | |
| 60 | return len; |
| 61 | } |
| 62 | |
| 63 | void fpdb_dump_entry(char *msg, struct fpdb_entry *el) |
| 64 | { |
| 65 | char buf[MAX_DEBUG_PRINT_SIZE]; |
| 66 | int len = 0; |
| 67 | |
| 68 | BUG_ON(!el); |
| 69 | |
| 70 | if (msg) |
| 71 | len = sprintf(buf, "%s", msg); |
| 72 | |
| 73 | len += __fpdb_dump_entry(buf + len, el); |
| 74 | |
| 75 | pr_err("%s", buf); |
| 76 | } |
| 77 | |
| 78 | void fpdb_dump_tuple(char *msg, struct nf_conntrack_tuple *t) |
| 79 | { |
| 80 | char buf[MAX_DEBUG_PRINT_SIZE]; |
| 81 | int len = 0; |
| 82 | |
| 83 | BUG_ON(!t); |
| 84 | |
| 85 | if (msg) |
| 86 | len = sprintf(buf, "%s", msg); |
| 87 | |
| 88 | len += sprintf(buf + len, "fpdb dump tuple:\n"); |
| 89 | len += fp_dump_tuple(buf + len, t); |
| 90 | |
| 91 | pr_err("%s\n", buf); |
| 92 | } |
| 93 | |
| 94 | static int fpdb_print_entry(struct fpdb_entry *el, void *data) |
| 95 | { |
| 96 | char in[256], out[256]; |
| 97 | unsigned int state, use; |
| 98 | int *first_entry = data; |
| 99 | |
| 100 | if (atomic_inc_not_zero(&el->rc)) { |
| 101 | if (*first_entry == true) { |
| 102 | pr_err("l2 l3 l4 timeout\thash\thits\tstate in_dev out_dev tuple_in tuple_out ct block use refcnt\n"); |
| 103 | *first_entry = false; |
| 104 | } |
| 105 | __fp_dump_tuple(in, &el->in_tuple, 0); |
| 106 | __fp_dump_tuple(out, &el->out_tuple, 0); |
| 107 | state = el->ct->proto.tcp.state; |
| 108 | use = atomic_read(&el->ct->ct_general.use); |
| 109 | pr_err("%s %s %s %d\t%d\t%d\t%s %s %s %s %s %p %d %d %d\n", |
| 110 | el->hh.hh_len ? "eth" : "NA", |
| 111 | el->in_tuple.src.l3num == AF_INET6 ? |
| 112 | "ipv6" : "ipv4", |
| 113 | el->in_tuple.dst.protonum == IPPROTO_UDP ? |
| 114 | "udp" : "tcp", |
| 115 | jiffies_to_msecs(el->ct->timeout - jiffies) / 1000U, |
| 116 | el->bucket, el->hit_counter, |
| 117 | el->in_tuple.dst.protonum == IPPROTO_UDP ? |
| 118 | "N/A" : tcp_conntrack_names[state], |
| 119 | el->in_dev->dev->name, |
| 120 | el->out_dev->dev->name, |
| 121 | in, out, el->ct, el->block, use, atomic_read(&el->rc)); |
| 122 | fpdb_put(el); |
| 123 | } |
| 124 | return 0; |
| 125 | } |
| 126 | |
| 127 | void fpdb_dump_db(void) |
| 128 | { |
| 129 | int first_entry = true; |
| 130 | fpdb_iterate(fpdb_print_entry, &first_entry); |
| 131 | } |
| 132 | |
| 133 | /**************************************************************************** |
| 134 | * Fast Path Database prototypes |
| 135 | ****************************************************************************/ |
| 136 | |
| 137 | struct fpdb_htable { |
| 138 | struct hlist_head *h; |
| 139 | unsigned int size; |
| 140 | int vmalloced; |
| 141 | }; |
| 142 | |
| 143 | struct fp_database { |
| 144 | struct fpdb_stats stats; |
| 145 | volatile u32 num_entries; |
| 146 | struct fpdb_htable htable; |
| 147 | spinlock_t lock; |
| 148 | struct nf_ct_ext_type *nfct_ext; |
| 149 | struct kmem_cache *db_cache; |
| 150 | }; |
| 151 | |
| 152 | struct timeout_entry { |
| 153 | struct list_head list; |
| 154 | struct timer_list *timeout; |
| 155 | }; |
| 156 | /**************************************************************************** |
| 157 | * Fast Path Database globals |
| 158 | ****************************************************************************/ |
| 159 | |
| 160 | static struct fp_database *db; |
| 161 | /* TODO - do we need something else here?? |
| 162 | Or is there only one "net" in ESHEL? */ |
| 163 | struct net *net = &init_net; |
| 164 | |
| 165 | #ifdef CONFIG_ASR_TOE |
| 166 | extern int fp_cm_genl_send_tuple(struct nf_conntrack_tuple *tuple, struct fpdb_entry *el, |
| 167 | int add, int len); |
| 168 | static inline bool get_remote_mac_addr(struct fpdb_entry *el, char *mac) |
| 169 | { |
| 170 | struct neighbour *neigh; |
| 171 | struct neigh_table *tbl; |
| 172 | struct nf_conntrack_tuple *tuple; |
| 173 | struct net_device *br; |
| 174 | |
| 175 | if (el->in_dev->br != el->out_dev->br) |
| 176 | return false; |
| 177 | |
| 178 | tuple = &el->in_tuple; |
| 179 | br = el->out_dev->br; |
| 180 | tbl = (tuple->src.l3num == AF_INET6) ? &nd_tbl : &arp_tbl; |
| 181 | |
| 182 | neigh = neigh_lookup(tbl, tuple->dst.u3.all, br); |
| 183 | if (neigh) { |
| 184 | memcpy(mac, neigh->ha, ETH_ALEN); |
| 185 | neigh_release(neigh); |
| 186 | return true; |
| 187 | } |
| 188 | |
| 189 | return false; |
| 190 | } |
| 191 | |
| 192 | #endif |
| 193 | |
| 194 | static void guard_timer_timeout(struct timer_list *t) |
| 195 | { |
| 196 | struct fpdb_entry *el = from_timer(el, &t, guard_timer); |
| 197 | |
| 198 | pr_err("Entry was hold and could not be removed for %d sec. [%px][rc=%d] state=%d\n", |
| 199 | GUARD_TIMEOUT_SEC, el, atomic_read(&el->rc), el->state); |
| 200 | |
| 201 | /* BUG_ON(debug_level & DBG_WARN_AS_ERR);*/ |
| 202 | if (atomic_read(&el->rc) > 0) { |
| 203 | FP_ERR_DUMP_ENTRY(NULL, el); |
| 204 | pr_err("Extend the timer when rc is not 0!\n"); |
| 205 | mod_timer(el->guard_timer, jiffies + GUARD_TIMEOUT_SEC * HZ); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | #ifdef FP_USE_SRAM_POOL_OPT |
| 210 | static void *local_nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) |
| 211 | { |
| 212 | struct hlist_nulls_head *hash; |
| 213 | unsigned int nr_slots, i; |
| 214 | size_t sz; |
| 215 | |
| 216 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); |
| 217 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); |
| 218 | sz = nr_slots * sizeof(struct hlist_nulls_head); |
| 219 | hash = (void *)sram_pool_alloc(sz); |
| 220 | if (hash && nulls) |
| 221 | for (i = 0; i < nr_slots; i++) |
| 222 | INIT_HLIST_NULLS_HEAD(&hash[i], i); |
| 223 | return hash; |
| 224 | } |
| 225 | |
| 226 | static void local_nf_ct_free_hashtable(void *hash, unsigned int size) |
| 227 | { |
| 228 | sram_pool_free((unsigned long)hash, size * sizeof(struct hlist_nulls_head)); |
| 229 | } |
| 230 | #endif |
| 231 | |
| 232 | static inline int fpdb_alloc_hashtable(struct fpdb_htable *htable) |
| 233 | { |
| 234 | /* Currently use the same size used by others.. */ |
| 235 | htable->size = nf_conntrack_htable_size; |
| 236 | #ifdef FP_USE_SRAM_POOL_OPT |
| 237 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 238 | htable->h = local_nf_ct_alloc_hashtable(&htable->size, &htable->vmalloced, 0); |
| 239 | #else |
| 240 | htable->h = local_nf_ct_alloc_hashtable(&htable->size, 0); |
| 241 | #endif |
| 242 | #else |
| 243 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 244 | htable->h = nf_ct_alloc_hashtable(&htable->size, &htable->vmalloced, 0); |
| 245 | #else |
| 246 | htable->h = nf_ct_alloc_hashtable(&htable->size, 0); |
| 247 | #endif |
| 248 | #endif |
| 249 | if (!htable->h) |
| 250 | return -ENOMEM; |
| 251 | |
| 252 | pr_debug("allocated fpdb hashtable (size = %d)\n", htable->size); |
| 253 | |
| 254 | return 0; |
| 255 | } |
| 256 | |
| 257 | static inline void fpdb_free_hashtable(struct fpdb_htable *htable) |
| 258 | { |
| 259 | #ifdef FP_USE_SRAM_POOL_OPT |
| 260 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 261 | local_nf_ct_free_hashtable(htable->h, htable->vmalloced, htable->size); |
| 262 | #else |
| 263 | local_nf_ct_free_hashtable(htable->h, htable->size); |
| 264 | #endif |
| 265 | #else |
| 266 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 267 | nf_ct_free_hashtable(htable->h, htable->vmalloced, htable->size); |
| 268 | #else |
| 269 | kvfree(htable->h); |
| 270 | htable->h = NULL; |
| 271 | #endif |
| 272 | #endif |
| 273 | } |
| 274 | |
| 275 | /**************************************************************************** |
| 276 | * Fast Path Database API |
| 277 | ****************************************************************************/ |
| 278 | |
| 279 | /** |
| 280 | * Allocates and initializes a new database entry |
| 281 | * |
| 282 | * @param flags kmalloc flags |
| 283 | * |
| 284 | * @return new allocated and initialized database entry |
| 285 | */ |
| 286 | struct fpdb_entry *fpdb_alloc(gfp_t flags) |
| 287 | { |
| 288 | struct fpdb_entry *el; |
| 289 | |
| 290 | #ifdef FP_USE_SRAM_POOL_OPT |
| 291 | el = (struct fpdb_entry *)sram_pool_alloc(sizeof(struct fpdb_entry)); |
| 292 | #else |
| 293 | el = kmem_cache_zalloc(db->db_cache, flags); |
| 294 | #endif |
| 295 | if (!el) { |
| 296 | pr_err("no memory\n"); |
| 297 | return NULL; |
| 298 | } |
| 299 | |
| 300 | spin_lock_init(&el->lock); |
| 301 | INIT_HLIST_NODE(&el->hlist); |
| 302 | INIT_LIST_HEAD(&el->debug.trace.list); |
| 303 | |
| 304 | el->state = ENTRY_INITIALIZED; |
| 305 | #ifdef CONFIG_ASR_TOE |
| 306 | el->nl_flag = 0; |
| 307 | #endif |
| 308 | return el; |
| 309 | } |
| 310 | |
| 311 | |
| 312 | /** |
| 313 | * Free a database entry |
| 314 | * |
| 315 | * @param flags fpdb_entry * e |
| 316 | * |
| 317 | * @return void |
| 318 | */ |
| 319 | void fpdb_free(struct fpdb_entry * el) |
| 320 | { |
| 321 | fpdev_put(el->out_dev); |
| 322 | fpdev_put(el->in_dev); |
| 323 | |
| 324 | #ifdef FP_USE_SRAM_POOL_OPT |
| 325 | sram_pool_free((unsigned long)el, sizeof(struct fpdb_entry)); |
| 326 | #else |
| 327 | kmem_cache_free(db->db_cache, el); |
| 328 | #endif |
| 329 | return; |
| 330 | } |
| 331 | |
| 332 | |
| 333 | /** |
| 334 | * jenkins hash function using the source tuple |
| 335 | * |
| 336 | * @return hash key |
| 337 | */ |
| 338 | static inline unsigned int |
| 339 | fpdb_hash_by_src(const struct nf_conntrack_tuple *tuple) |
| 340 | { |
| 341 | unsigned int hash_src, hash_dst, hash; |
| 342 | |
| 343 | BUG_ON(!tuple); |
| 344 | |
| 345 | hash_src = jhash_3words((__force u32) tuple->src.u3.ip, |
| 346 | (__force u32) tuple->src.u.all ^ FP_ZONE, |
| 347 | tuple->src.l3num, hash_rnd); |
| 348 | hash_dst = jhash_3words((__force u32) tuple->dst.u3.ip, |
| 349 | (__force u32) tuple->dst.u.all ^ FP_ZONE, |
| 350 | tuple->dst.protonum, hash_rnd); |
| 351 | hash = jhash_2words(hash_src, hash_dst, hash_rnd); |
| 352 | |
| 353 | return ((u64)hash * db->htable.size) >> 32; |
| 354 | } |
| 355 | |
| 356 | /** |
| 357 | * rcu callback |
| 358 | * |
| 359 | * @param head |
| 360 | */ |
| 361 | static void fpdb_rcu_free(struct rcu_head *head) |
| 362 | { |
| 363 | struct fpdb_entry *el = container_of(rcu_dereference(head), |
| 364 | struct fpdb_entry, rcu); |
| 365 | |
| 366 | if (el == NULL) { |
| 367 | pr_err("fpdb_rcu_free el = NULL!\n"); |
| 368 | return; |
| 369 | } |
| 370 | |
| 371 | BUG_ON(!el || atomic_read(&el->rc) || el->state != ENTRY_DYING); |
| 372 | |
| 373 | FP_DEBUG_DUMP_ENTRY("fpdb_rcu_free: entry was deleted\n", el); |
| 374 | |
| 375 | if (el->guard_timer) { |
| 376 | del_timer_sync(el->guard_timer); |
| 377 | kfree(el->guard_timer); |
| 378 | el->guard_timer = NULL; |
| 379 | } |
| 380 | |
| 381 | spin_lock_bh(&db->lock); |
| 382 | db->num_entries--; |
| 383 | spin_unlock_bh(&db->lock); |
| 384 | |
| 385 | fpdev_put(el->out_dev); |
| 386 | fpdev_put(el->in_dev); |
| 387 | |
| 388 | #ifdef FP_USE_SRAM_POOL_OPT |
| 389 | sram_pool_free((unsigned long)el, sizeof(struct fpdb_entry)); |
| 390 | #else |
| 391 | kmem_cache_free(db->db_cache, el); |
| 392 | #endif |
| 393 | } |
| 394 | |
| 395 | |
| 396 | |
| 397 | /** |
| 398 | * decrement an entry's reference count and delete if 0 |
| 399 | * |
| 400 | * @param el pointer to a previously allocated fpdb_entry |
| 401 | */ |
| 402 | void fpdb_put(struct fpdb_entry *el) |
| 403 | { |
| 404 | if (atomic_dec_and_test(&el->rc)) |
| 405 | call_rcu(&el->rcu, fpdb_rcu_free); |
| 406 | } |
| 407 | |
| 408 | |
| 409 | #define FP_SMALL_MEM_LIMIT (64 * 1024 * 1204) |
| 410 | /** |
| 411 | * Adds a previously allocated entry to the database |
| 412 | * and updates its reference count to 1. |
| 413 | * |
| 414 | * @attention el must be allocated first with fpdb_alloc() |
| 415 | * Initial Implementation - Hash by input tuple only |
| 416 | * @param el pointer to a previously allocated fpdb_entry |
| 417 | * |
| 418 | */ |
| 419 | void fpdb_add(struct fpdb_entry *el) |
| 420 | { |
| 421 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) |
| 422 | int pages = totalram_pages(); |
| 423 | #else |
| 424 | int pages = totalram_pages; |
| 425 | #endif |
| 426 | unsigned int hash; |
| 427 | u32 max_num; |
| 428 | |
| 429 | #ifdef CONFIG_ASR_TOE |
| 430 | char mac[ETH_ALEN]; |
| 431 | #endif |
| 432 | |
| 433 | spin_lock_bh(&el->lock); |
| 434 | spin_lock_bh(&db->lock); |
| 435 | BUG_ON(!el || !el->out_dev); |
| 436 | BUG_ON(el->state != ENTRY_INITIALIZED); |
| 437 | |
| 438 | hash = fpdb_hash_by_src(&el->in_tuple); |
| 439 | |
| 440 | atomic_set(&el->rc, 1); |
| 441 | el->state = ENTRY_ALIVE; |
| 442 | el->bucket = hash; |
| 443 | el->tstamp = jiffies; |
| 444 | if (!el->tstamp) |
| 445 | el->tstamp = 1; |
| 446 | |
| 447 | BUG_ON(in_irq()); |
| 448 | WARN_ON_ONCE(irqs_disabled()); |
| 449 | hlist_add_head_rcu(&el->hlist, &db->htable.h[hash]); |
| 450 | db->num_entries++; |
| 451 | |
| 452 | #ifdef CONFIG_ASR_TOE |
| 453 | if (get_remote_mac_addr(el, mac)) |
| 454 | mfp_toe_add_dmac(el->out_dev->dev, mac); |
| 455 | #endif |
| 456 | |
| 457 | spin_unlock_bh(&db->lock); |
| 458 | spin_unlock_bh(&el->lock); |
| 459 | |
| 460 | /* Normally Conntrack MAX is HashSize*8. So here is not suit to only check double*/ |
| 461 | /*we will modify the code to check 6 times of hash size --Yhuang 20160617*/ |
| 462 | |
| 463 | if (pages <= (FP_SMALL_MEM_LIMIT >> PAGE_SHIFT)) |
| 464 | max_num = 2 * db->htable.size; |
| 465 | else |
| 466 | max_num = 6 * db->htable.size; |
| 467 | |
| 468 | if (unlikely(db->num_entries > max_num)) { |
| 469 | pr_err_ratelimited("%s: database overloaded (%d entries, max=%d)\n", |
| 470 | __func__, db->num_entries, max_num); |
| 471 | /* |
| 472 | if (debug_level & DBG_WARN_AS_ERR) { |
| 473 | fpdb_dump_db(); |
| 474 | BUG(); |
| 475 | } |
| 476 | */ |
| 477 | fpdb_flush(); |
| 478 | } else if (unlikely(db->num_entries > ((max_num * 3) / 4))) { |
| 479 | fpdb_del_least_used_entry(max_num); |
| 480 | } |
| 481 | |
| 482 | if (db->stats.max_entries < db->num_entries) |
| 483 | db->stats.max_entries = db->num_entries; |
| 484 | FP_DEBUG_DUMP_ENTRY("fpdb_add: entry was added\n", el); |
| 485 | } |
| 486 | |
| 487 | /** |
| 488 | * Query the database for an entry matching the input tuple |
| 489 | * and increment the reference count for that entry if found. |
| 490 | * |
| 491 | * @attention The user MUST call fpdb_put() as soon as the entry |
| 492 | * is not used! |
| 493 | * |
| 494 | * @param tuple pointer to a nf_conntrack_tuple |
| 495 | * |
| 496 | * @return pointer to the matching entry, NULL if not found |
| 497 | */ |
| 498 | struct fpdb_entry *fpdb_get(struct nf_conntrack_tuple *tuple) |
| 499 | { |
| 500 | unsigned int hash, iterations = 0; |
| 501 | struct fpdb_entry *el; |
| 502 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 503 | struct hlist_node *h; |
| 504 | #endif |
| 505 | |
| 506 | BUG_ON(!tuple); |
| 507 | |
| 508 | db->stats.lookups++; |
| 509 | |
| 510 | hash = fpdb_hash_by_src(tuple); |
| 511 | |
| 512 | rcu_read_lock_bh(); |
| 513 | |
| 514 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 515 | hlist_for_each_entry_rcu(el, h, &db->htable.h[hash], hlist) { |
| 516 | #else |
| 517 | hlist_for_each_entry_rcu(el, &db->htable.h[hash], hlist) { |
| 518 | #endif |
| 519 | if (el && nf_ct_tuple_equal(&el->in_tuple, tuple)) { |
| 520 | if (!atomic_inc_not_zero(&el->rc)) |
| 521 | goto not_found; |
| 522 | rcu_read_unlock_bh(); |
| 523 | if (!iterations) |
| 524 | db->stats.hits++; |
| 525 | el->hit_counter++; |
| 526 | FP_DEBUG_DUMP_ENTRY("fpdb_get: entry was found:\n", el); |
| 527 | return el; |
| 528 | } |
| 529 | iterations++; |
| 530 | db->stats.iterations++; /* Total Iterations*/ |
| 531 | } |
| 532 | |
| 533 | not_found: |
| 534 | rcu_read_unlock_bh(); |
| 535 | FP_DEBUG_DUMP_TUPLE("fpdb_get: entry was not found:\n", tuple); |
| 536 | |
| 537 | return NULL; |
| 538 | } |
| 539 | |
| 540 | #ifdef CONFIG_ASR_TOE |
| 541 | static int fpdb_del_toe_tuple(struct fpdb_entry *el) |
| 542 | { |
| 543 | struct toe_tuple_buff toe_tuple; |
| 544 | struct fp_net_device *dst, *src; |
| 545 | u32 nat_ip = 0; |
| 546 | u16 nat_port = 0; |
| 547 | u8 proto = 0, in_pkt = 0, out_pkt = 0, fwd = 0, nat = 0; |
| 548 | u8 rx_tx; |
| 549 | |
| 550 | BUG_ON(!el); |
| 551 | |
| 552 | if (!el->nl_flag) |
| 553 | return 0; |
| 554 | |
| 555 | src = rcu_dereference_bh(el->in_dev); |
| 556 | dst = rcu_dereference_bh(el->out_dev); |
| 557 | if (!strncasecmp(src->dev->name, "ccinet", 6)) |
| 558 | in_pkt = PDU_PKT; |
| 559 | else if (!strncasecmp(src->dev->name, "usbnet", 6)) |
| 560 | in_pkt = USB_PKT; |
| 561 | else if (!strncasecmp(src->dev->name, "wlan", 4)) |
| 562 | in_pkt = WIFI_PKT; |
| 563 | else if (!strncasecmp(src->dev->name, "eth", 3)) |
| 564 | in_pkt = ETH_PKT; |
| 565 | else |
| 566 | in_pkt = AP_PKT; |
| 567 | |
| 568 | if (!strncasecmp(dst->dev->name, "ccinet", 6)) |
| 569 | out_pkt = PDU_PKT; |
| 570 | else if (!strncasecmp(dst->dev->name, "usbnet", 6)) |
| 571 | out_pkt = USB_PKT; |
| 572 | else if (!strncasecmp(dst->dev->name, "wlan", 4)) |
| 573 | out_pkt = WIFI_PKT; |
| 574 | else if (!strncasecmp(dst->dev->name, "eth", 3)) |
| 575 | out_pkt = ETH_PKT; |
| 576 | else |
| 577 | out_pkt = AP_PKT; |
| 578 | |
| 579 | fwd = (in_pkt != AP_PKT) && (out_pkt != AP_PKT); |
| 580 | if (fwd && (el->out_tuple.src.l3num == AF_INET)) { |
| 581 | if (in_pkt == PDU_PKT && (out_pkt == USB_PKT || out_pkt == WIFI_PKT || out_pkt == ETH_PKT)) { |
| 582 | nat = 1; |
| 583 | nat_ip = ntohl(el->out_tuple.src.u3.ip); |
| 584 | nat_port = ntohs(el->out_tuple.src.u.all); |
| 585 | } else if ((in_pkt == USB_PKT || in_pkt == WIFI_PKT || in_pkt == ETH_PKT) && out_pkt == PDU_PKT) { |
| 586 | nat = 1; |
| 587 | nat_ip = ntohl(el->out_tuple.dst.u3.ip); |
| 588 | nat_port = ntohs(el->out_tuple.dst.u.all); |
| 589 | } else |
| 590 | /* CP TOE WIFI/WIFI TOE CP no need nat */ |
| 591 | nat = 0; |
| 592 | } |
| 593 | |
| 594 | /* rx: cp -> ap, usb, wifi */ |
| 595 | if (in_pkt == PDU_PKT) |
| 596 | rx_tx = 1; |
| 597 | /* rx: ap -> usb, ap -> wifi */ |
| 598 | else if ((in_pkt == AP_PKT) && (out_pkt != PDU_PKT)) |
| 599 | rx_tx = 1; |
| 600 | /* |
| 601 | * tx: |
| 602 | * ap -> cp |
| 603 | * usb/wifi -> ap/cp */ |
| 604 | else |
| 605 | rx_tx = 0; |
| 606 | |
| 607 | if (el->in_tuple.src.l3num == AF_INET6) { |
| 608 | memcpy(toe_tuple.src_ip6, el->in_tuple.src.u3.all, sizeof(toe_tuple.src_ip6)); |
| 609 | memcpy(toe_tuple.dst_ip6, el->in_tuple.dst.u3.all, sizeof(toe_tuple.src_ip6)); |
| 610 | toe_tuple.ip6 = 1; |
| 611 | } else { |
| 612 | toe_tuple.src_ip = ntohl(el->in_tuple.src.u3.ip); |
| 613 | toe_tuple.dst_ip = ntohl(el->in_tuple.dst.u3.ip); |
| 614 | toe_tuple.ip6 = 0; |
| 615 | toe_tuple.nat = nat; |
| 616 | toe_tuple.nat_port = nat_port; |
| 617 | toe_tuple.nat_ip = nat_ip; |
| 618 | } |
| 619 | |
| 620 | if (el->in_tuple.dst.protonum == IPPROTO_UDP) |
| 621 | proto = TOE_UDP; |
| 622 | else if (el->in_tuple.dst.protonum == IPPROTO_TCP) |
| 623 | proto = TOE_TCP; |
| 624 | else |
| 625 | proto = TOE_MAX; |
| 626 | |
| 627 | toe_tuple.src_port = ntohs(el->in_tuple.src.u.all); |
| 628 | toe_tuple.dst_port = ntohs(el->in_tuple.dst.u.all); |
| 629 | toe_tuple.prot = proto; |
| 630 | toe_tuple.fwd = fwd; |
| 631 | toe_tuple.rxtx = rx_tx; |
| 632 | toe_tuple.out_pkt = out_pkt; |
| 633 | |
| 634 | return toe_del_connection(&toe_tuple); |
| 635 | } |
| 636 | #endif |
| 637 | |
| 638 | void __fpdb_del(struct fpdb_entry *entry, bool hlist_del) |
| 639 | { |
| 640 | BUG_ON(!entry); |
| 641 | if(entry->state != ENTRY_ALIVE) |
| 642 | return; |
| 643 | |
| 644 | entry->state = ENTRY_DYING; |
| 645 | |
| 646 | #ifdef CONFIG_ASR_TOE |
| 647 | if (entry->nl_flag) { |
| 648 | fp_cm_genl_send_tuple(&entry->in_tuple, entry, 0, 0); |
| 649 | if (fpdb_del_toe_tuple(entry)) |
| 650 | pr_debug("fpdb_del_toe_tuple failed!!!\r\n"); |
| 651 | entry->nl_flag = 0; |
| 652 | } |
| 653 | #endif |
| 654 | |
| 655 | BUG_ON(entry->guard_timer); |
| 656 | if (hlist_del) |
| 657 | hlist_del_rcu(&entry->hlist); |
| 658 | |
| 659 | if (atomic_dec_and_test(&entry->rc)) { |
| 660 | /* move start timer here to avoid rc is not zero yhuang 20160624*/ |
| 661 | entry->guard_timer = kmalloc(sizeof(*entry->guard_timer), GFP_ATOMIC); |
| 662 | if (entry->guard_timer) { |
| 663 | timer_setup(entry->guard_timer, guard_timer_timeout, 0); |
| 664 | mod_timer(entry->guard_timer, jiffies + GUARD_TIMEOUT_SEC * HZ); |
| 665 | } else { |
| 666 | pr_err("Guard timer allocation failed!"); |
| 667 | } |
| 668 | |
| 669 | /* prevent out of order so that guard timer can be stopped */ |
| 670 | mb(); |
| 671 | call_rcu(&entry->rcu, fpdb_rcu_free); |
| 672 | } else { |
| 673 | pr_err("__fpdb_del fail. entry:%p, rc=%d, state=%d\n", entry, |
| 674 | atomic_read(&entry->rc), entry->state); |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | void fpdb_lock_bh(void) |
| 679 | { |
| 680 | return spin_lock_bh(&db->lock); |
| 681 | } |
| 682 | |
| 683 | void fpdb_unlock_bh(void) |
| 684 | { |
| 685 | return spin_unlock_bh(&db->lock); |
| 686 | } |
| 687 | |
| 688 | void fpdb_del(struct fpdb_entry *entry) |
| 689 | { |
| 690 | spin_lock_bh(&db->lock); |
| 691 | __fpdb_del(entry, true); |
| 692 | spin_unlock_bh(&db->lock); |
| 693 | } |
| 694 | |
| 695 | /** |
| 696 | * Replace a previously allocated entry with an prexisting one |
| 697 | * to the database. |
| 698 | * |
| 699 | * @attention nel must be allocated first with fpdb_alloc() |
| 700 | * el - must be already in the database/ |
| 701 | * @param el pointer to a previously added fpdb_entry |
| 702 | * @param nel pointer to a newely allocated fpdb_entry |
| 703 | * NOTE: must be called from softirq/lock_bh context |
| 704 | */ |
| 705 | void fpdb_replace(struct fpdb_entry *el, struct fpdb_entry *nel) |
| 706 | { |
| 707 | unsigned int hash; |
| 708 | |
| 709 | BUG_ON(!el || !el->out_dev); |
| 710 | BUG_ON(!nel || !nel->out_dev); |
| 711 | BUG_ON(nel->state != ENTRY_INITIALIZED); |
| 712 | |
| 713 | hash = fpdb_hash_by_src(&nel->in_tuple); |
| 714 | |
| 715 | atomic_set(&nel->rc, 1); |
| 716 | nel->state = ENTRY_ALIVE; |
| 717 | nel->bucket = hash; |
| 718 | |
| 719 | BUG_ON(el->bucket != nel->bucket); |
| 720 | |
| 721 | db->num_entries++; |
| 722 | hlist_replace_rcu(&el->hlist, &nel->hlist); |
| 723 | __fpdb_del(el, false); |
| 724 | } |
| 725 | |
| 726 | |
| 727 | static int device_cmp(struct nf_conn *ct, void *dev) |
| 728 | { |
| 729 | struct nf_conn_fastpath *fp = nfct_fastpath(ct); |
| 730 | struct fpdb_entry *orig, *reply; |
| 731 | struct net_device *net = (struct net_device *)dev; |
| 732 | |
| 733 | if (!fp) |
| 734 | return 0; |
| 735 | |
| 736 | orig = fp->fpd_el[IP_CT_DIR_ORIGINAL]; |
| 737 | reply = fp->fpd_el[IP_CT_DIR_REPLY]; |
| 738 | |
| 739 | if (orig && (fpdev_cmp_if(orig->in_dev, net) || |
| 740 | fpdev_cmp_if(orig->out_dev, net))) |
| 741 | return 1; |
| 742 | if (reply && (fpdev_cmp_if(reply->in_dev, net) || |
| 743 | fpdev_cmp_if(reply->out_dev, net))) |
| 744 | return 1; |
| 745 | |
| 746 | return 0; |
| 747 | } |
| 748 | |
| 749 | static inline bool |
| 750 | tuple_cmp_port(const struct nf_conntrack_tuple *t, unsigned int port) |
| 751 | { |
| 752 | return (ntohs(t->dst.u.all) == port || ntohs(t->src.u.all) == port); |
| 753 | } |
| 754 | |
| 755 | static int port_cmp(struct nf_conn *ct, void *ptr) |
| 756 | { |
| 757 | struct nf_conn_fastpath *fp = nfct_fastpath(ct); |
| 758 | struct fpdb_entry *orig, *reply; |
| 759 | unsigned int port = (unsigned int)(unsigned long)ptr; |
| 760 | |
| 761 | if (!fp) |
| 762 | return 0; |
| 763 | |
| 764 | orig = fp->fpd_el[IP_CT_DIR_ORIGINAL]; |
| 765 | reply = fp->fpd_el[IP_CT_DIR_REPLY]; |
| 766 | |
| 767 | if (orig && (tuple_cmp_port(&orig->in_tuple, port) || |
| 768 | tuple_cmp_port(&orig->out_tuple, port))) |
| 769 | return 1; |
| 770 | if (reply && (tuple_cmp_port(&reply->in_tuple, port) || |
| 771 | tuple_cmp_port(&reply->out_tuple, port))) |
| 772 | return 1; |
| 773 | |
| 774 | return 0; |
| 775 | } |
| 776 | |
| 777 | /* kill all fastpath related conntracks */ |
| 778 | static int nf_fp_remove(struct nf_conn *ct, void *data) |
| 779 | { |
| 780 | return test_bit(IPS_FASTPATH_BIT, &ct->status); |
| 781 | } |
| 782 | |
| 783 | int fpdb_del_block_entry_by_dev(struct fpdb_entry *el, void *data) |
| 784 | { |
| 785 | struct net_device *dev = (struct net_device *)data; |
| 786 | struct nf_conn_fastpath *ct_fp; |
| 787 | |
| 788 | if (fpdev_cmp_if(el->in_dev, dev) || |
| 789 | fpdev_cmp_if(el->out_dev, dev)) { |
| 790 | |
| 791 | spin_lock_bh(&db->lock); |
| 792 | ct_fp = nfct_fastpath(el->ct); |
| 793 | if (ct_fp) { |
| 794 | if (ct_fp->fpd_el[el->dir] == NULL) { |
| 795 | spin_unlock_bh(&db->lock); |
| 796 | return 0; |
| 797 | } |
| 798 | |
| 799 | ct_fp->fpd_el[el->dir] = NULL; |
| 800 | } |
| 801 | spin_unlock_bh(&db->lock); |
| 802 | |
| 803 | fpdb_del(el); |
| 804 | printk(KERN_DEBUG "delete a block entry related to %s\n", dev->name); |
| 805 | } |
| 806 | |
| 807 | return 0; |
| 808 | } |
| 809 | |
| 810 | static int nf_fpdb_del(struct nf_conn *ct, void *del) |
| 811 | { |
| 812 | struct nf_conn_fastpath *fp = nfct_fastpath(ct); |
| 813 | struct fpdb_entry *orig, *reply; |
| 814 | |
| 815 | if (!fp) |
| 816 | return 0; |
| 817 | |
| 818 | orig = fp->fpd_el[IP_CT_DIR_ORIGINAL]; |
| 819 | reply = fp->fpd_el[IP_CT_DIR_REPLY]; |
| 820 | |
| 821 | if (orig && orig == (struct fpdb_entry *)del) { |
| 822 | orig->tstamp = 0; |
| 823 | return 1; |
| 824 | } |
| 825 | |
| 826 | if (reply && reply == (struct fpdb_entry *)del) { |
| 827 | reply->tstamp = 0; |
| 828 | return 1; |
| 829 | } |
| 830 | |
| 831 | return 0; |
| 832 | } |
| 833 | |
| 834 | static int fpdb_find_lest_used_entry(struct fpdb_entry *el, void *data) |
| 835 | { |
| 836 | struct fpdb_entry **p_el = (struct fpdb_entry **)data; |
| 837 | |
| 838 | if (!*p_el) |
| 839 | *p_el = el; |
| 840 | else if (el->tstamp && time_before(el->tstamp, (*p_el)->tstamp)) |
| 841 | *p_el = el; |
| 842 | |
| 843 | return 0; |
| 844 | } |
| 845 | |
| 846 | void fpdb_del_least_used_entry(int max_num) |
| 847 | { |
| 848 | struct fpdb_entry *el = NULL; |
| 849 | |
| 850 | fpdb_iterate(fpdb_find_lest_used_entry, &el); |
| 851 | |
| 852 | if (!el) |
| 853 | return; |
| 854 | |
| 855 | pr_info_ratelimited("%s: el=0x%x (%d entries, max=%d)\n", |
| 856 | __func__, (unsigned)el, db->num_entries, max_num); |
| 857 | nf_ct_iterate_cleanup(&nf_fpdb_del, (void *)el, 0, 0); |
| 858 | } |
| 859 | |
| 860 | /** |
| 861 | * Remove all fastpath related connections with the specified network device |
| 862 | * |
| 863 | * caller should have rtnl locked |
| 864 | * |
| 865 | * @param dev |
| 866 | */ |
| 867 | void fpdb_del_by_dev(struct net_device *dev) |
| 868 | { |
| 869 | nf_ct_iterate_cleanup(&device_cmp, (void *)dev, 0, 0); |
| 870 | |
| 871 | printk(KERN_DEBUG "All entries related to %s deleted\n", dev->name); |
| 872 | } |
| 873 | |
| 874 | /** |
| 875 | * Remove all fastpath related connections with the specified port |
| 876 | * |
| 877 | * caller should have rtnl locked |
| 878 | * |
| 879 | * @param port |
| 880 | */ |
| 881 | void fpdb_del_by_port(unsigned int port) |
| 882 | { |
| 883 | nf_ct_iterate_cleanup(&port_cmp, (void *)(unsigned long)port, 0, 0); |
| 884 | |
| 885 | pr_debug("All entries with port=%d deleted\n", port); |
| 886 | } |
| 887 | |
| 888 | /** |
| 889 | * flush the entire database by cleaning all fastpath related |
| 890 | * conntracks |
| 891 | * |
| 892 | * MUST BE CALLED IN PROCESS CONTEXT |
| 893 | */ |
| 894 | void fpdb_flush(void) |
| 895 | { |
| 896 | nf_ct_iterate_cleanup(&nf_fp_remove, 0, 0, 0); |
| 897 | |
| 898 | pr_debug("All entries flushed\n"); |
| 899 | } |
| 900 | |
| 901 | /** |
| 902 | * Iterate through all fpdb entries |
| 903 | * MUST BE CALLED IN PROCESS CONTEXT |
| 904 | * |
| 905 | * @param iter callback function called per every entry |
| 906 | * If returns 0, iteration stops. |
| 907 | * @param data private data to be passed to the iter callback |
| 908 | */ |
| 909 | void fpdb_iterate(int (*iter)(struct fpdb_entry *e, void *data), void *data) |
| 910 | { |
| 911 | int i; |
| 912 | struct fpdb_entry *e; |
| 913 | |
| 914 | for (i = 0; i < db->htable.size; i++) { |
| 915 | rcu_read_lock_bh(); |
| 916 | hlist_for_each_entry_rcu(e, &db->htable.h[i], hlist) { |
| 917 | if (iter(e, data)) |
| 918 | break; |
| 919 | } |
| 920 | rcu_read_unlock_bh(); |
| 921 | } |
| 922 | } |
| 923 | |
| 924 | /** |
| 925 | * Add the current entry state to the entry's trace buffer when |
| 926 | * debug_level mask contains DBG_TRACE_LOG |
| 927 | * |
| 928 | * @param entry - entry to log |
| 929 | * @param tcph - NULL for UDP |
| 930 | */ |
| 931 | void fpdb_trace(struct fpdb_entry *entry, struct tcphdr *tcph) |
| 932 | { |
| 933 | if (debug_level & DBG_TRACE_LOG) { |
| 934 | struct fpdb_trace *trace = kzalloc(sizeof(struct fpdb_trace), GFP_ATOMIC); |
| 935 | |
| 936 | BUG_ON(!entry); |
| 937 | |
| 938 | trace->timeout = jiffies_to_msecs(entry->ct->timeout - jiffies) / 1000U; |
| 939 | trace->ct_status = entry->ct->status; |
| 940 | trace->hit_counter = entry->hit_counter; |
| 941 | |
| 942 | if (tcph) { |
| 943 | trace->tcp_state = entry->ct->proto.tcp.state; |
| 944 | trace->tcph = *tcph; |
| 945 | } |
| 946 | |
| 947 | list_add(&trace->list, &entry->debug.trace.list); |
| 948 | if (++entry->debug.trace.sz > 5) { |
| 949 | /* TODO - change to configurable param */ |
| 950 | trace = list_entry(entry->debug.trace.list.prev, struct fpdb_trace, list); |
| 951 | list_del(entry->debug.trace.list.prev); |
| 952 | kfree(trace); |
| 953 | entry->debug.trace.sz--; |
| 954 | } |
| 955 | } |
| 956 | } |
| 957 | |
| 958 | /**************************************************************************** |
| 959 | * Fast Path Database private |
| 960 | ****************************************************************************/ |
| 961 | |
| 962 | /* SYS FS and PROC FS */ |
| 963 | |
| 964 | static void fpdb_get_stats(void) |
| 965 | { |
| 966 | int i, count, max = 0; |
| 967 | struct fpdb_entry *el; |
| 968 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 969 | struct hlist_node *h; |
| 970 | #endif |
| 971 | |
| 972 | memset(db->stats.hist, 0, sizeof(db->stats.hist)); |
| 973 | db->stats.num_occupied = 0; |
| 974 | |
| 975 | for (i = 0; i < db->htable.size; i++) { |
| 976 | count = 0; |
| 977 | |
| 978 | rcu_read_lock_bh(); |
| 979 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 980 | hlist_for_each_entry_rcu(el, h, &db->htable.h[i], hlist) |
| 981 | #else |
| 982 | hlist_for_each_entry_rcu(el, &db->htable.h[i], hlist) |
| 983 | #endif |
| 984 | count++; |
| 985 | rcu_read_unlock_bh(); |
| 986 | |
| 987 | if (count) |
| 988 | db->stats.num_occupied++; |
| 989 | |
| 990 | if (count < HISTOGRAM_SIZE) { |
| 991 | db->stats.hist[count].buckets++; |
| 992 | db->stats.hist[count].entries += count; |
| 993 | } else { |
| 994 | db->stats.hist[HISTOGRAM_SIZE].buckets++; |
| 995 | db->stats.hist[HISTOGRAM_SIZE].entries += count; |
| 996 | } |
| 997 | |
| 998 | max = (count > max) ? count : max; |
| 999 | } |
| 1000 | |
| 1001 | db->stats.largest_bucket = max; |
| 1002 | } |
| 1003 | |
| 1004 | static ssize_t fpdb_sysfs_flush(struct fastpath_module *m, const char *buf, size_t count) |
| 1005 | { |
| 1006 | struct net_device *dev; |
| 1007 | |
| 1008 | if (count > 2) { |
| 1009 | char *str = kmalloc(sizeof(char)*count, GFP_KERNEL); |
| 1010 | sprintf(str, "%s", buf); |
| 1011 | str[count-1] = '\0'; |
| 1012 | dev = dev_get_by_name(&init_net, str); |
| 1013 | kfree(str); |
| 1014 | |
| 1015 | if (dev) { |
| 1016 | fpdb_del_by_dev(dev); |
| 1017 | dev_put(dev); |
| 1018 | return count; |
| 1019 | } |
| 1020 | } |
| 1021 | |
| 1022 | fpdb_flush(); |
| 1023 | return count; |
| 1024 | } |
| 1025 | |
| 1026 | static ssize_t fpdb_sysfs_stats_show(struct fastpath_module *m, char *buf) |
| 1027 | { |
| 1028 | int len, i; |
| 1029 | u32 sum_pct = 0; |
| 1030 | |
| 1031 | fpdb_get_stats(); |
| 1032 | |
| 1033 | len = sprintf(buf, "Fast Path Database (HASH) statistics:\n"); |
| 1034 | len += sprintf(buf + len, "Max number of entries: %d ", |
| 1035 | db->stats.max_entries); |
| 1036 | len += sprintf(buf + len, "Total lookups: %d, Total hits: %d, " |
| 1037 | "hit rate %d%%\n", db->stats.lookups, db->stats.hits, |
| 1038 | (100 * db->stats.hits) / (db->stats.lookups ? |
| 1039 | db->stats.lookups : 1)); |
| 1040 | len += sprintf(buf + len, "Database Size is %d Buckets\n", |
| 1041 | db->htable.size); |
| 1042 | len += sprintf(buf + len, "Number of occupied buckets: %d\n", |
| 1043 | db->stats.num_occupied); |
| 1044 | len += sprintf(buf + len, "Database contains %d entries\n", |
| 1045 | db->num_entries); |
| 1046 | len += sprintf(buf + len, "Largest bucket contains %d entries\n", |
| 1047 | db->stats.largest_bucket); |
| 1048 | len += sprintf(buf + len, "Load Factor is %d (%d/%d)\n", |
| 1049 | db->num_entries / |
| 1050 | (db->htable.size ? db->htable.size : 1), |
| 1051 | db->num_entries, db->htable.size); |
| 1052 | len += sprintf(buf + len, "find_entry() iterations/lookups: %d/%d\n", |
| 1053 | db->stats.iterations, db->stats.lookups); |
| 1054 | len += sprintf(buf + len, "Histogram:\n"); |
| 1055 | len += sprintf(buf + len, "Size buckets entries sum-pct\n"); |
| 1056 | for (i = 0; i < HISTOGRAM_SIZE; i++) { |
| 1057 | if (sum_pct < 100) |
| 1058 | sum_pct += (100 * db->stats.hist[i].entries) / |
| 1059 | (db->num_entries ? |
| 1060 | db->num_entries : 1); |
| 1061 | else |
| 1062 | sum_pct = 100; |
| 1063 | |
| 1064 | len += sprintf(buf + len, "%4d%10d%10d%10d\n", i, |
| 1065 | db->stats.hist[i].buckets, |
| 1066 | db->stats.hist[i].entries, sum_pct); |
| 1067 | } |
| 1068 | len += sprintf(buf + len, ">%3d%10d%10d%10d\n", i - 1, |
| 1069 | db->stats.hist[i].buckets, |
| 1070 | db->stats.hist[i].entries, 100); |
| 1071 | |
| 1072 | return len; |
| 1073 | } |
| 1074 | |
| 1075 | static ssize_t fpdb_sysfs_stats_clear(struct fastpath_module *m, const char *buf, |
| 1076 | size_t count) |
| 1077 | { |
| 1078 | pr_debug("reset stats...\n"); |
| 1079 | memset(&db->stats, 0, sizeof(db->stats)); |
| 1080 | return count; |
| 1081 | } |
| 1082 | |
| 1083 | static unsigned int dbg_hash; |
| 1084 | |
| 1085 | static ssize_t fpdb_sysfs_entry_debug_select(struct fastpath_module *m, const char *buf, |
| 1086 | size_t count) |
| 1087 | { |
| 1088 | sscanf(buf, "%u", &dbg_hash); |
| 1089 | return count; |
| 1090 | } |
| 1091 | |
| 1092 | static ssize_t fpdb_sysfs_entry_debug_show(struct fastpath_module *m, char *buf) |
| 1093 | { |
| 1094 | struct fpdb_entry *el; |
| 1095 | int i = 0, len; |
| 1096 | struct fpdb_trace *itr; |
| 1097 | struct nf_conn_fastpath *fp_ext; |
| 1098 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 1099 | struct hlist_node *h; |
| 1100 | #endif |
| 1101 | |
| 1102 | if (dbg_hash > db->htable.size) |
| 1103 | return sprintf(buf, "invalid hash (%d)\n", dbg_hash); |
| 1104 | |
| 1105 | len = sprintf(buf, "debug info for bucket%u:\n", dbg_hash); |
| 1106 | rcu_read_lock_bh(); |
| 1107 | |
| 1108 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) |
| 1109 | hlist_for_each_entry_rcu(el, h, &db->htable.h[dbg_hash], hlist) { |
| 1110 | #else |
| 1111 | hlist_for_each_entry_rcu(el, &db->htable.h[dbg_hash], hlist) { |
| 1112 | #endif |
| 1113 | len += __fpdb_dump_entry(buf+len, el); |
| 1114 | fp_ext = nf_ct_ext_find(el->ct, NF_CT_EXT_FASTPATH); |
| 1115 | BUG_ON(!fp_ext); |
| 1116 | len += sprintf(buf+len, "fastpath_ext orig:=%p reply=%p\n", |
| 1117 | fp_ext->fpd_el[IP_CT_DIR_ORIGINAL], |
| 1118 | fp_ext->fpd_el[IP_CT_DIR_REPLY]); |
| 1119 | if (el->in_tuple.dst.protonum == IPPROTO_UDP) |
| 1120 | continue; |
| 1121 | len += sprintf(buf+len, "%d: trace:\n", i++); |
| 1122 | len += sprintf(buf+len, "hits timeout tcp_state tcp_flags ct_status\n"); |
| 1123 | list_for_each_entry(itr, &el->debug.trace.list, list) |
| 1124 | len += sprintf(buf+len, "%d %d %s %c%c%c%c%c%c %lu\n", |
| 1125 | itr->hit_counter, itr->timeout, |
| 1126 | tcp_conntrack_names[itr->tcp_state], |
| 1127 | itr->tcph.urg ? 'U' : '-', |
| 1128 | itr->tcph.ack ? 'A' : '-', |
| 1129 | itr->tcph.psh ? 'P' : '-', |
| 1130 | itr->tcph.rst ? 'R' : '-', |
| 1131 | itr->tcph.syn ? 'S' : '-', |
| 1132 | itr->tcph.fin ? 'F' : '-', |
| 1133 | itr->ct_status); |
| 1134 | } |
| 1135 | rcu_read_unlock_bh(); |
| 1136 | |
| 1137 | return len; |
| 1138 | } |
| 1139 | |
| 1140 | |
| 1141 | static FP_ATTR(stats, S_IRUGO|S_IWUSR, fpdb_sysfs_stats_show, fpdb_sysfs_stats_clear); |
| 1142 | static FP_ATTR(flush, S_IWUSR, NULL, fpdb_sysfs_flush); |
| 1143 | static FP_ATTR(bucket, S_IRUGO|S_IWUSR, fpdb_sysfs_entry_debug_show, fpdb_sysfs_entry_debug_select); |
| 1144 | |
| 1145 | static struct attribute *fp_database_attrs[] = { |
| 1146 | &fp_attr_stats.attr, |
| 1147 | &fp_attr_flush.attr, |
| 1148 | &fp_attr_bucket.attr, |
| 1149 | NULL, /* need to NULL terminate the list of attributes */ |
| 1150 | }; |
| 1151 | |
| 1152 | #ifdef CONFIG_PROC_FS |
| 1153 | static bool first; |
| 1154 | struct fpdb_iter_state { |
| 1155 | struct seq_net_private p; |
| 1156 | unsigned int bucket; |
| 1157 | }; |
| 1158 | |
| 1159 | static struct hlist_node *fpdb_entries_get_first(struct seq_file *seq) |
| 1160 | { |
| 1161 | struct fpdb_iter_state *st = seq->private; |
| 1162 | struct hlist_node *n; |
| 1163 | |
| 1164 | for (st->bucket = 0; st->bucket < db->htable.size; st->bucket++) { |
| 1165 | n = rcu_dereference(db->htable.h[st->bucket].first); |
| 1166 | if (n) { |
| 1167 | first = true; |
| 1168 | return n; |
| 1169 | } |
| 1170 | } |
| 1171 | return NULL; |
| 1172 | } |
| 1173 | |
| 1174 | static struct hlist_node *fpdb_entries_get_next(struct seq_file *seq, |
| 1175 | struct hlist_node *head) |
| 1176 | { |
| 1177 | struct fpdb_iter_state *st = seq->private; |
| 1178 | |
| 1179 | first = false; |
| 1180 | head = rcu_dereference(head->next); |
| 1181 | |
| 1182 | while (head == NULL) { |
| 1183 | if (++st->bucket >= db->htable.size) |
| 1184 | return NULL; |
| 1185 | head = rcu_dereference(db->htable.h[st->bucket].first); |
| 1186 | } |
| 1187 | return head; |
| 1188 | } |
| 1189 | |
| 1190 | static struct hlist_node *fpdb_entries_get_idx(struct seq_file *seq, loff_t pos) |
| 1191 | { |
| 1192 | struct hlist_node *head = fpdb_entries_get_first(seq); |
| 1193 | |
| 1194 | if (head) |
| 1195 | while (pos && (head = fpdb_entries_get_next(seq, head))) |
| 1196 | pos--; |
| 1197 | return pos ? NULL : head; |
| 1198 | } |
| 1199 | |
| 1200 | static void *fpdb_seq_start(struct seq_file *seq, loff_t *pos) |
| 1201 | __acquires(RCU) |
| 1202 | { |
| 1203 | rcu_read_lock_bh(); |
| 1204 | return fpdb_entries_get_idx(seq, *pos); |
| 1205 | } |
| 1206 | |
| 1207 | static void *fpdb_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
| 1208 | { |
| 1209 | (*pos)++; |
| 1210 | return fpdb_entries_get_next(seq, v); |
| 1211 | } |
| 1212 | |
| 1213 | static void fpdb_seq_stop(struct seq_file *seq, void *v) |
| 1214 | __releases(RCU) |
| 1215 | { |
| 1216 | rcu_read_unlock_bh(); |
| 1217 | } |
| 1218 | |
| 1219 | static int fpdb_seq_show(struct seq_file *s, void *v) |
| 1220 | { |
| 1221 | struct hlist_node *n = v; |
| 1222 | struct fpdb_entry *el; |
| 1223 | char in[256], out[256]; |
| 1224 | unsigned int state, use; |
| 1225 | |
| 1226 | el = hlist_entry(n, struct fpdb_entry, hlist); |
| 1227 | if (atomic_inc_not_zero(&el->rc)) { |
| 1228 | if (first == true) { |
| 1229 | seq_printf(s, "l2 l3 l4 timeout\thash\thits\tstate in_dev out_dev tuple_in tuple_out ct block use refcnt\n"); |
| 1230 | } |
| 1231 | __fp_dump_tuple(in, &el->in_tuple, 0); |
| 1232 | __fp_dump_tuple(out, &el->out_tuple, 0); |
| 1233 | state = el->ct->proto.tcp.state; |
| 1234 | use = atomic_read(&el->ct->ct_general.use); |
| 1235 | seq_printf(s, "%s %s %s %d\t%d\t%d\t%s %s %s %s %s %p %d %d %d" |
| 1236 | #ifdef CONFIG_ASR_TOE |
| 1237 | " %dKbps" |
| 1238 | #endif |
| 1239 | "\n", |
| 1240 | el->hh.hh_len ? "eth" : "NA", |
| 1241 | el->in_tuple.src.l3num == AF_INET6 ? |
| 1242 | "ipv6" : "ipv4", |
| 1243 | el->in_tuple.dst.protonum == IPPROTO_UDP ? |
| 1244 | "udp" : "tcp", |
| 1245 | jiffies_to_msecs(el->ct->timeout - jiffies) / 1000U, |
| 1246 | el->bucket, el->hit_counter, |
| 1247 | el->in_tuple.dst.protonum == IPPROTO_UDP ? |
| 1248 | "N/A" : tcp_conntrack_names[state], |
| 1249 | el->in_dev->dev->name, |
| 1250 | el->out_dev->dev->name, |
| 1251 | in, out, el->ct, el->block, use, atomic_read(&el->rc) |
| 1252 | #ifdef CONFIG_ASR_TOE |
| 1253 | , el->speed |
| 1254 | #endif |
| 1255 | ); |
| 1256 | fpdb_put(el); |
| 1257 | } |
| 1258 | return 0; |
| 1259 | } |
| 1260 | |
| 1261 | static const struct seq_operations fpdb_seq_ops = { |
| 1262 | .start = fpdb_seq_start, |
| 1263 | .next = fpdb_seq_next, |
| 1264 | .stop = fpdb_seq_stop, |
| 1265 | .show = fpdb_seq_show |
| 1266 | }; |
| 1267 | |
| 1268 | #endif /* CONFIG_PROC_FS */ |
| 1269 | |
| 1270 | static int fpdb_net_init(struct net *net) |
| 1271 | { |
| 1272 | if (!proc_create_net("fastpath", 0440, net->proc_net, &fpdb_seq_ops, |
| 1273 | sizeof(struct fpdb_iter_state))) |
| 1274 | |
| 1275 | return -ENOMEM; |
| 1276 | return 0; |
| 1277 | } |
| 1278 | |
| 1279 | static void fpdb_net_exit(struct net *net) |
| 1280 | { |
| 1281 | remove_proc_entry("fastpath", net->proc_net); |
| 1282 | } |
| 1283 | |
| 1284 | static struct pernet_operations fpdb_net_ops = { |
| 1285 | .init = fpdb_net_init, |
| 1286 | .exit = fpdb_net_exit, |
| 1287 | }; |
| 1288 | |
| 1289 | static void fp_database_release(struct kobject *kobj) |
| 1290 | { |
| 1291 | struct fastpath_module *module = to_fpmod(kobj); |
| 1292 | int wait_time = 200; |
| 1293 | |
| 1294 | fpdb_flush(); |
| 1295 | do { |
| 1296 | /* wait all fpdb freed, then call kmem_cache_destroy */ |
| 1297 | synchronize_rcu(); |
| 1298 | msleep(10); |
| 1299 | if (--wait_time <= 0) |
| 1300 | break; |
| 1301 | } while (db->num_entries); |
| 1302 | |
| 1303 | pr_info("%d fpdb entry left\n", db->num_entries); |
| 1304 | nf_ct_extend_unregister(db->nfct_ext); |
| 1305 | unregister_pernet_subsys(&fpdb_net_ops); |
| 1306 | fpdb_free_hashtable(&db->htable); |
| 1307 | kmem_cache_destroy(db->db_cache); |
| 1308 | #ifdef FP_USE_SRAM_POOL_OPT |
| 1309 | sram_pool_free((unsigned long)db, sizeof(struct fp_database)); |
| 1310 | #else |
| 1311 | kfree(db); |
| 1312 | #endif |
| 1313 | kfree(module); |
| 1314 | |
| 1315 | pr_debug("fp_database released\n"); |
| 1316 | } |
| 1317 | |
| 1318 | static struct kobj_type ktype_database = { |
| 1319 | .sysfs_ops = &fp_sysfs_ops, |
| 1320 | .default_attrs = fp_database_attrs, |
| 1321 | .release = fp_database_release, |
| 1322 | }; |
| 1323 | |
| 1324 | static void fpdb_destroy_ext(struct nf_conn *ct) |
| 1325 | { |
| 1326 | struct nf_conn_fastpath *ct_fp; |
| 1327 | struct fpdb_entry *orig, *reply; |
| 1328 | BUG_ON(!ct); |
| 1329 | |
| 1330 | spin_lock_bh(&db->lock); |
| 1331 | ct_fp = nfct_fastpath(ct); |
| 1332 | if (ct_fp) { |
| 1333 | orig = ct_fp->fpd_el[IP_CT_DIR_ORIGINAL]; |
| 1334 | reply = ct_fp->fpd_el[IP_CT_DIR_REPLY]; |
| 1335 | } else { |
| 1336 | orig = NULL; |
| 1337 | reply = NULL; |
| 1338 | } |
| 1339 | |
| 1340 | if (orig == NULL && reply == NULL) { |
| 1341 | spin_unlock_bh(&db->lock); |
| 1342 | return; |
| 1343 | } |
| 1344 | |
| 1345 | ct_fp->fpd_el[IP_CT_DIR_ORIGINAL] = NULL; |
| 1346 | ct_fp->fpd_el[IP_CT_DIR_REPLY] = NULL; |
| 1347 | if (orig) { |
| 1348 | FP_DEBUG_DUMP_ENTRY("Delete orig entry:\n", orig); |
| 1349 | __fpdb_del(orig, true); |
| 1350 | } |
| 1351 | |
| 1352 | if (reply) { |
| 1353 | FP_DEBUG_DUMP_ENTRY("Delete reply entry:\n", reply); |
| 1354 | __fpdb_del(reply, true); |
| 1355 | } |
| 1356 | spin_unlock_bh(&db->lock); |
| 1357 | } |
| 1358 | |
| 1359 | static struct nf_ct_ext_type fpdb_ct_extend = { |
| 1360 | .len = sizeof(struct nf_conn_fastpath), |
| 1361 | .align = __alignof__(struct nf_conn_fastpath), |
| 1362 | .id = NF_CT_EXT_FASTPATH, |
| 1363 | .destroy = fpdb_destroy_ext, |
| 1364 | }; |
| 1365 | |
| 1366 | static int fp_database_probe(struct fastpath_module *module) |
| 1367 | { |
| 1368 | struct fp_database *priv; |
| 1369 | int ret; |
| 1370 | |
| 1371 | #ifdef FP_USE_SRAM_POOL_OPT |
| 1372 | priv = (struct fp_database *)sram_pool_alloc(sizeof(struct fp_database)); |
| 1373 | #else |
| 1374 | priv = kzalloc(sizeof(struct fp_database), GFP_KERNEL); |
| 1375 | #endif |
| 1376 | if (!priv) { |
| 1377 | pr_err("no memory\n"); |
| 1378 | return -ENOMEM; |
| 1379 | } |
| 1380 | spin_lock_init(&priv->lock); |
| 1381 | get_random_bytes(&hash_rnd, sizeof(hash_rnd)); |
| 1382 | |
| 1383 | priv->db_cache = kmem_cache_create("fpdb_entry", |
| 1384 | sizeof(struct fpdb_entry), 0, SLAB_HWCACHE_ALIGN, NULL); |
| 1385 | if (!priv->db_cache) { |
| 1386 | pr_err("kmem_cache_create fpdb_entry failed\n"); |
| 1387 | ret = -ENOMEM; |
| 1388 | goto kfree_priv; |
| 1389 | } |
| 1390 | |
| 1391 | ret = fpdb_alloc_hashtable(&priv->htable); |
| 1392 | if (ret < 0) { |
| 1393 | pr_err("fpdb_alloc_hashtable failed (ret=%d)\n", ret); |
| 1394 | goto kfree_cache; |
| 1395 | } |
| 1396 | |
| 1397 | ret = register_pernet_subsys(&fpdb_net_ops); |
| 1398 | if (ret < 0) { |
| 1399 | pr_err("cannot register pernet operations (ret=%d)\n", ret); |
| 1400 | goto free_hashtable; |
| 1401 | } |
| 1402 | |
| 1403 | priv->nfct_ext = &fpdb_ct_extend; |
| 1404 | ret = nf_ct_extend_register(priv->nfct_ext); |
| 1405 | if (ret < 0) { |
| 1406 | pr_err("nf_ct_extend_register failed (%d)\n", ret); |
| 1407 | goto unreg_pernet; |
| 1408 | } |
| 1409 | |
| 1410 | db = module->priv = priv; |
| 1411 | snprintf(module->name, sizeof(module->name), "fp_database"); |
| 1412 | |
| 1413 | kobject_init(&module->kobj, &ktype_database); |
| 1414 | ret = kobject_add(&module->kobj, module->fastpath->kobj, "%s", module->name); |
| 1415 | if (ret < 0) { |
| 1416 | pr_err("kobject_add failed (%d)\n", ret); |
| 1417 | goto nf_ct_extend_unreg; |
| 1418 | } |
| 1419 | kobject_uevent(&module->kobj, KOBJ_ADD); |
| 1420 | |
| 1421 | pr_debug("fp_database probed\n"); |
| 1422 | return 0; |
| 1423 | |
| 1424 | nf_ct_extend_unreg: |
| 1425 | kobject_put(&module->kobj); |
| 1426 | nf_ct_extend_unregister(priv->nfct_ext); |
| 1427 | unreg_pernet: |
| 1428 | unregister_pernet_subsys(&fpdb_net_ops); |
| 1429 | free_hashtable: |
| 1430 | fpdb_free_hashtable(&priv->htable); |
| 1431 | kfree_cache: |
| 1432 | kmem_cache_destroy(priv->db_cache); |
| 1433 | kfree_priv: |
| 1434 | #ifdef FP_USE_SRAM_POOL_OPT |
| 1435 | sram_pool_free((unsigned long)priv, sizeof(struct fp_database)); |
| 1436 | #else |
| 1437 | kfree(priv); |
| 1438 | #endif |
| 1439 | return ret; |
| 1440 | } |
| 1441 | |
| 1442 | static int fp_database_remove(struct fastpath_module *module) |
| 1443 | { |
| 1444 | kobject_put(&module->kobj); |
| 1445 | |
| 1446 | pr_debug("fp_database removed\n"); |
| 1447 | return 0; |
| 1448 | } |
| 1449 | |
| 1450 | struct fastpath_module_ops fp_database_ops = { |
| 1451 | .probe = fp_database_probe, |
| 1452 | .remove = fp_database_remove, |
| 1453 | }; |
| 1454 | |