blob: 4dfe17f1a76aa30d35aaa76211b1ba9503070437 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Generic address resolution entity
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * Fixes:
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/slab.h>
17#include <linux/kmemleak.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/socket.h>
22#include <linux/netdevice.h>
23#include <linux/proc_fs.h>
24#ifdef CONFIG_SYSCTL
25#include <linux/sysctl.h>
26#endif
27#include <linux/times.h>
28#include <net/net_namespace.h>
29#include <net/neighbour.h>
30#include <net/arp.h>
31#include <net/dst.h>
32#include <net/sock.h>
33#include <net/netevent.h>
34#include <net/netlink.h>
35#include <linux/rtnetlink.h>
36#include <linux/random.h>
37#include <linux/string.h>
38#include <linux/log2.h>
39#include <linux/inetdevice.h>
40#include <net/addrconf.h>
41
42#include <trace/events/neigh.h>
43
44#define DEBUG
45#define NEIGH_DEBUG 1
46#define neigh_dbg(level, fmt, ...) \
47do { \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
50} while (0)
51
52#define PNEIGH_HASHMASK 0xF
53
54static void neigh_timer_handler(struct timer_list *t);
55static void __neigh_notify(struct neighbour *n, int type, int flags,
56 u32 pid);
57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 struct net_device *dev);
60
61#ifdef CONFIG_PROC_FS
62static const struct seq_operations neigh_stat_seq_ops;
63#endif
64
65/*
66 Neighbour hash table buckets are protected with rwlock tbl->lock.
67
68 - All the scans/updates to hash buckets MUST be made under this lock.
69 - NOTHING clever should be made under this lock: no callbacks
70 to protocol backends, no attempts to send something to network.
71 It will result in deadlocks, if backend/driver wants to use neighbour
72 cache.
73 - If the entry requires some non-trivial actions, increase
74 its reference count and release table lock.
75
76 Neighbour entries are protected:
77 - with reference count.
78 - with rwlock neigh->lock
79
80 Reference count prevents destruction.
81
82 neigh->lock mainly serializes ll address data and its validity state.
83 However, the same lock is used to protect another entry fields:
84 - timer
85 - resolution queue
86
87 Again, nothing clever shall be made under neigh->lock,
88 the most complicated procedure, which we allow is dev->hard_header.
89 It is supposed, that dev->hard_header is simplistic and does
90 not make callbacks to neighbour tables.
91 */
92
93static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94{
95 kfree_skb(skb);
96 return -ENETDOWN;
97}
98
99static void neigh_cleanup_and_release(struct neighbour *neigh)
100{
101 trace_neigh_cleanup_and_release(neigh, 0);
102 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
104 neigh_release(neigh);
105}
106
107/*
108 * It is random distribution in the interval (1/2)*base...(3/2)*base.
109 * It corresponds to default IPv6 settings and is not overridable,
110 * because it is really reasonable choice.
111 */
112
113unsigned long neigh_rand_reach_time(unsigned long base)
114{
115 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116}
117EXPORT_SYMBOL(neigh_rand_reach_time);
118
119static void neigh_mark_dead(struct neighbour *n)
120{
121 n->dead = 1;
122 if (!list_empty(&n->gc_list)) {
123 list_del_init(&n->gc_list);
124 atomic_dec(&n->tbl->gc_entries);
125 }
126}
127
128static void neigh_update_gc_list(struct neighbour *n)
129{
130 bool on_gc_list, exempt_from_gc;
131
132 write_lock_bh(&n->tbl->lock);
133 write_lock(&n->lock);
134
135 if (n->dead)
136 goto out;
137
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
140 */
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
144
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
152 }
153
154out:
155 write_unlock(&n->lock);
156 write_unlock_bh(&n->tbl->lock);
157}
158
159static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
160 int *notify)
161{
162 bool rc = false;
163 u8 ndm_flags;
164
165 if (!(flags & NEIGH_UPDATE_F_ADMIN))
166 return rc;
167
168 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
169 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
170 if (ndm_flags & NTF_EXT_LEARNED)
171 neigh->flags |= NTF_EXT_LEARNED;
172 else
173 neigh->flags &= ~NTF_EXT_LEARNED;
174 rc = true;
175 *notify = 1;
176 }
177
178 return rc;
179}
180
181static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
182 struct neigh_table *tbl)
183{
184 bool retval = false;
185
186 write_lock(&n->lock);
187 if (refcount_read(&n->refcnt) == 1) {
188 struct neighbour *neigh;
189
190 neigh = rcu_dereference_protected(n->next,
191 lockdep_is_held(&tbl->lock));
192 rcu_assign_pointer(*np, neigh);
193 neigh_mark_dead(n);
194 retval = true;
195 }
196 write_unlock(&n->lock);
197 if (retval)
198 neigh_cleanup_and_release(n);
199 return retval;
200}
201
202bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
203{
204 struct neigh_hash_table *nht;
205 void *pkey = ndel->primary_key;
206 u32 hash_val;
207 struct neighbour *n;
208 struct neighbour __rcu **np;
209
210 nht = rcu_dereference_protected(tbl->nht,
211 lockdep_is_held(&tbl->lock));
212 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
213 hash_val = hash_val >> (32 - nht->hash_shift);
214
215 np = &nht->hash_buckets[hash_val];
216 while ((n = rcu_dereference_protected(*np,
217 lockdep_is_held(&tbl->lock)))) {
218 if (n == ndel)
219 return neigh_del(n, np, tbl);
220 np = &n->next;
221 }
222 return false;
223}
224
225static int neigh_forced_gc(struct neigh_table *tbl)
226{
227 int max_clean = atomic_read(&tbl->gc_entries) -
228 READ_ONCE(tbl->gc_thresh2);
229 u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
230 unsigned long tref = jiffies - 5 * HZ;
231 struct neighbour *n, *tmp;
232 int shrunk = 0;
233 int loop = 0;
234
235 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
236
237 write_lock_bh(&tbl->lock);
238
239 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
240 if (refcount_read(&n->refcnt) == 1) {
241 bool remove = false;
242
243 write_lock(&n->lock);
244 if ((n->nud_state == NUD_FAILED) ||
245 (n->nud_state == NUD_NOARP) ||
246 (tbl->is_multicast &&
247 tbl->is_multicast(n->primary_key)) ||
248 !time_in_range(n->updated, tref, jiffies))
249 remove = true;
250 write_unlock(&n->lock);
251
252 if (remove && neigh_remove_one(n, tbl))
253 shrunk++;
254 if (shrunk >= max_clean)
255 break;
256 if (++loop == 16) {
257 if (ktime_get_ns() > tmax)
258 goto unlock;
259 loop = 0;
260 }
261 }
262 }
263
264 WRITE_ONCE(tbl->last_flush, jiffies);
265unlock:
266 write_unlock_bh(&tbl->lock);
267
268 return shrunk;
269}
270
271static void neigh_add_timer(struct neighbour *n, unsigned long when)
272{
273 /* Use safe distance from the jiffies - LONG_MAX point while timer
274 * is running in DELAY/PROBE state but still show to user space
275 * large times in the past.
276 */
277 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
278
279 neigh_hold(n);
280 if (!time_in_range(n->confirmed, mint, jiffies))
281 n->confirmed = mint;
282 if (time_before(n->used, n->confirmed))
283 n->used = n->confirmed;
284 if (unlikely(mod_timer(&n->timer, when))) {
285 printk("NEIGH: BUG, double timer add, state is %x\n",
286 n->nud_state);
287 dump_stack();
288 }
289}
290
291static int neigh_del_timer(struct neighbour *n)
292{
293 if ((n->nud_state & NUD_IN_TIMER) &&
294 del_timer(&n->timer)) {
295 neigh_release(n);
296 return 1;
297 }
298 return 0;
299}
300
301static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
302{
303 struct sk_buff_head tmp;
304 unsigned long flags;
305 struct sk_buff *skb;
306
307 skb_queue_head_init(&tmp);
308 spin_lock_irqsave(&list->lock, flags);
309 skb = skb_peek(list);
310 while (skb != NULL) {
311 struct sk_buff *skb_next = skb_peek_next(skb, list);
312 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
313 __skb_unlink(skb, list);
314 __skb_queue_tail(&tmp, skb);
315 }
316 skb = skb_next;
317 }
318 spin_unlock_irqrestore(&list->lock, flags);
319
320 while ((skb = __skb_dequeue(&tmp))) {
321 dev_put(skb->dev);
322 kfree_skb(skb);
323 }
324}
325
326static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
327 bool skip_perm)
328{
329 int i;
330 struct neigh_hash_table *nht;
331
332 nht = rcu_dereference_protected(tbl->nht,
333 lockdep_is_held(&tbl->lock));
334
335 for (i = 0; i < (1 << nht->hash_shift); i++) {
336 struct neighbour *n;
337 struct neighbour __rcu **np = &nht->hash_buckets[i];
338
339 while ((n = rcu_dereference_protected(*np,
340 lockdep_is_held(&tbl->lock))) != NULL) {
341 if (dev && n->dev != dev) {
342 np = &n->next;
343 continue;
344 }
345 if (skip_perm && n->nud_state & NUD_PERMANENT) {
346 np = &n->next;
347 continue;
348 }
349 rcu_assign_pointer(*np,
350 rcu_dereference_protected(n->next,
351 lockdep_is_held(&tbl->lock)));
352 write_lock(&n->lock);
353 neigh_del_timer(n);
354 neigh_mark_dead(n);
355 if (refcount_read(&n->refcnt) != 1) {
356 /* The most unpleasant situation.
357 We must destroy neighbour entry,
358 but someone still uses it.
359
360 The destroy will be delayed until
361 the last user releases us, but
362 we must kill timers etc. and move
363 it to safe state.
364 */
365 __skb_queue_purge(&n->arp_queue);
366 n->arp_queue_len_bytes = 0;
367 n->output = neigh_blackhole;
368 if (n->nud_state & NUD_VALID)
369 n->nud_state = NUD_NOARP;
370 else
371 n->nud_state = NUD_NONE;
372 neigh_dbg(2, "neigh %p is stray\n", n);
373 }
374 write_unlock(&n->lock);
375 neigh_cleanup_and_release(n);
376 }
377 }
378}
379
380void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
381{
382 write_lock_bh(&tbl->lock);
383 neigh_flush_dev(tbl, dev, false);
384 write_unlock_bh(&tbl->lock);
385}
386EXPORT_SYMBOL(neigh_changeaddr);
387
388static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
389 bool skip_perm)
390{
391 write_lock_bh(&tbl->lock);
392 neigh_flush_dev(tbl, dev, skip_perm);
393 pneigh_ifdown_and_unlock(tbl, dev);
394 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
395 if (skb_queue_empty_lockless(&tbl->proxy_queue))
396 del_timer_sync(&tbl->proxy_timer);
397 return 0;
398}
399
400int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
401{
402 __neigh_ifdown(tbl, dev, true);
403 return 0;
404}
405EXPORT_SYMBOL(neigh_carrier_down);
406
407int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
408{
409 __neigh_ifdown(tbl, dev, false);
410 return 0;
411}
412EXPORT_SYMBOL(neigh_ifdown);
413
414static struct neighbour *neigh_alloc(struct neigh_table *tbl,
415 struct net_device *dev,
416 u8 flags, bool exempt_from_gc)
417{
418 struct neighbour *n = NULL;
419 unsigned long now = jiffies;
420 int entries, gc_thresh3;
421
422 if (exempt_from_gc)
423 goto do_alloc;
424
425 entries = atomic_inc_return(&tbl->gc_entries) - 1;
426 gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
427 if (entries >= gc_thresh3 ||
428 (entries >= READ_ONCE(tbl->gc_thresh2) &&
429 time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
430 if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
431 net_info_ratelimited("%s: neighbor table overflow!\n",
432 tbl->id);
433 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
434 goto out_entries;
435 }
436 }
437
438do_alloc:
439 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
440 if (!n)
441 goto out_entries;
442
443 __skb_queue_head_init(&n->arp_queue);
444 rwlock_init(&n->lock);
445 seqlock_init(&n->ha_lock);
446 n->updated = n->used = now;
447 n->nud_state = NUD_NONE;
448 n->output = neigh_blackhole;
449 n->flags = flags;
450 seqlock_init(&n->hh.hh_lock);
451 n->parms = neigh_parms_clone(&tbl->parms);
452 timer_setup(&n->timer, neigh_timer_handler, 0);
453
454 NEIGH_CACHE_STAT_INC(tbl, allocs);
455 n->tbl = tbl;
456 refcount_set(&n->refcnt, 1);
457 n->dead = 1;
458 INIT_LIST_HEAD(&n->gc_list);
459
460 atomic_inc(&tbl->entries);
461out:
462 return n;
463
464out_entries:
465 if (!exempt_from_gc)
466 atomic_dec(&tbl->gc_entries);
467 goto out;
468}
469
470static void neigh_get_hash_rnd(u32 *x)
471{
472 *x = get_random_u32() | 1;
473}
474
475static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
476{
477 size_t size = (1 << shift) * sizeof(struct neighbour *);
478 struct neigh_hash_table *ret;
479 struct neighbour __rcu **buckets;
480 int i;
481
482 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
483 if (!ret)
484 return NULL;
485 if (size <= PAGE_SIZE) {
486 buckets = kzalloc(size, GFP_ATOMIC);
487 } else {
488 buckets = (struct neighbour __rcu **)
489 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
490 get_order(size));
491 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
492 }
493 if (!buckets) {
494 kfree(ret);
495 return NULL;
496 }
497 ret->hash_buckets = buckets;
498 ret->hash_shift = shift;
499 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
500 neigh_get_hash_rnd(&ret->hash_rnd[i]);
501 return ret;
502}
503
504static void neigh_hash_free_rcu(struct rcu_head *head)
505{
506 struct neigh_hash_table *nht = container_of(head,
507 struct neigh_hash_table,
508 rcu);
509 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
510 struct neighbour __rcu **buckets = nht->hash_buckets;
511
512 if (size <= PAGE_SIZE) {
513 kfree(buckets);
514 } else {
515 kmemleak_free(buckets);
516 free_pages((unsigned long)buckets, get_order(size));
517 }
518 kfree(nht);
519}
520
521static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
522 unsigned long new_shift)
523{
524 unsigned int i, hash;
525 struct neigh_hash_table *new_nht, *old_nht;
526
527 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
528
529 old_nht = rcu_dereference_protected(tbl->nht,
530 lockdep_is_held(&tbl->lock));
531 new_nht = neigh_hash_alloc(new_shift);
532 if (!new_nht)
533 return old_nht;
534
535 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
536 struct neighbour *n, *next;
537
538 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
539 lockdep_is_held(&tbl->lock));
540 n != NULL;
541 n = next) {
542 hash = tbl->hash(n->primary_key, n->dev,
543 new_nht->hash_rnd);
544
545 hash >>= (32 - new_nht->hash_shift);
546 next = rcu_dereference_protected(n->next,
547 lockdep_is_held(&tbl->lock));
548
549 rcu_assign_pointer(n->next,
550 rcu_dereference_protected(
551 new_nht->hash_buckets[hash],
552 lockdep_is_held(&tbl->lock)));
553 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
554 }
555 }
556
557 rcu_assign_pointer(tbl->nht, new_nht);
558 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
559 return new_nht;
560}
561
562struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
563 struct net_device *dev)
564{
565 struct neighbour *n;
566
567 NEIGH_CACHE_STAT_INC(tbl, lookups);
568
569 rcu_read_lock_bh();
570 n = __neigh_lookup_noref(tbl, pkey, dev);
571 if (n) {
572 if (!refcount_inc_not_zero(&n->refcnt))
573 n = NULL;
574 NEIGH_CACHE_STAT_INC(tbl, hits);
575 }
576
577 rcu_read_unlock_bh();
578 return n;
579}
580EXPORT_SYMBOL(neigh_lookup);
581
582static struct neighbour *
583___neigh_create(struct neigh_table *tbl, const void *pkey,
584 struct net_device *dev, u8 flags,
585 bool exempt_from_gc, bool want_ref)
586{
587 u32 hash_val, key_len = tbl->key_len;
588 struct neighbour *n1, *rc, *n;
589 struct neigh_hash_table *nht;
590 int error;
591
592 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
593 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
594 if (!n) {
595 rc = ERR_PTR(-ENOBUFS);
596 goto out;
597 }
598
599 memcpy(n->primary_key, pkey, key_len);
600 n->dev = dev;
601 dev_hold(dev);
602
603 /* Protocol specific setup. */
604 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
605 rc = ERR_PTR(error);
606 goto out_neigh_release;
607 }
608
609 if (dev->netdev_ops->ndo_neigh_construct) {
610 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
611 if (error < 0) {
612 rc = ERR_PTR(error);
613 goto out_neigh_release;
614 }
615 }
616
617 /* Device specific setup. */
618 if (n->parms->neigh_setup &&
619 (error = n->parms->neigh_setup(n)) < 0) {
620 rc = ERR_PTR(error);
621 goto out_neigh_release;
622 }
623
624 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
625
626 write_lock_bh(&tbl->lock);
627 nht = rcu_dereference_protected(tbl->nht,
628 lockdep_is_held(&tbl->lock));
629
630 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
631 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
632
633 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
634
635 if (n->parms->dead) {
636 rc = ERR_PTR(-EINVAL);
637 goto out_tbl_unlock;
638 }
639
640 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
641 lockdep_is_held(&tbl->lock));
642 n1 != NULL;
643 n1 = rcu_dereference_protected(n1->next,
644 lockdep_is_held(&tbl->lock))) {
645 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
646 if (want_ref)
647 neigh_hold(n1);
648 rc = n1;
649 goto out_tbl_unlock;
650 }
651 }
652
653 n->dead = 0;
654 if (!exempt_from_gc)
655 list_add_tail(&n->gc_list, &n->tbl->gc_list);
656
657 if (want_ref)
658 neigh_hold(n);
659 rcu_assign_pointer(n->next,
660 rcu_dereference_protected(nht->hash_buckets[hash_val],
661 lockdep_is_held(&tbl->lock)));
662 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
663 write_unlock_bh(&tbl->lock);
664 neigh_dbg(2, "neigh %p is created\n", n);
665 rc = n;
666out:
667 return rc;
668out_tbl_unlock:
669 write_unlock_bh(&tbl->lock);
670out_neigh_release:
671 if (!exempt_from_gc)
672 atomic_dec(&tbl->gc_entries);
673 neigh_release(n);
674 goto out;
675}
676
677struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
678 struct net_device *dev, bool want_ref)
679{
680 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
681}
682EXPORT_SYMBOL(__neigh_create);
683
684static u32 pneigh_hash(const void *pkey, unsigned int key_len)
685{
686 u32 hash_val = *(u32 *)(pkey + key_len - 4);
687 hash_val ^= (hash_val >> 16);
688 hash_val ^= hash_val >> 8;
689 hash_val ^= hash_val >> 4;
690 hash_val &= PNEIGH_HASHMASK;
691 return hash_val;
692}
693
694static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
695 struct net *net,
696 const void *pkey,
697 unsigned int key_len,
698 struct net_device *dev)
699{
700 while (n) {
701 if (!memcmp(n->key, pkey, key_len) &&
702 net_eq(pneigh_net(n), net) &&
703 (n->dev == dev || !n->dev))
704 return n;
705 n = n->next;
706 }
707 return NULL;
708}
709
710struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
711 struct net *net, const void *pkey, struct net_device *dev)
712{
713 unsigned int key_len = tbl->key_len;
714 u32 hash_val = pneigh_hash(pkey, key_len);
715
716 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
717 net, pkey, key_len, dev);
718}
719EXPORT_SYMBOL_GPL(__pneigh_lookup);
720
721struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
722 struct net *net, const void *pkey,
723 struct net_device *dev, int creat)
724{
725 struct pneigh_entry *n;
726 unsigned int key_len = tbl->key_len;
727 u32 hash_val = pneigh_hash(pkey, key_len);
728
729 read_lock_bh(&tbl->lock);
730 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
731 net, pkey, key_len, dev);
732 read_unlock_bh(&tbl->lock);
733
734 if (n || !creat)
735 goto out;
736
737 ASSERT_RTNL();
738
739 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
740 if (!n)
741 goto out;
742
743 write_pnet(&n->net, net);
744 memcpy(n->key, pkey, key_len);
745 n->dev = dev;
746 if (dev)
747 dev_hold(dev);
748
749 if (tbl->pconstructor && tbl->pconstructor(n)) {
750 if (dev)
751 dev_put(dev);
752 kfree(n);
753 n = NULL;
754 goto out;
755 }
756
757 write_lock_bh(&tbl->lock);
758 n->next = tbl->phash_buckets[hash_val];
759 tbl->phash_buckets[hash_val] = n;
760 write_unlock_bh(&tbl->lock);
761out:
762 return n;
763}
764EXPORT_SYMBOL(pneigh_lookup);
765
766
767int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
768 struct net_device *dev)
769{
770 struct pneigh_entry *n, **np;
771 unsigned int key_len = tbl->key_len;
772 u32 hash_val = pneigh_hash(pkey, key_len);
773
774 write_lock_bh(&tbl->lock);
775 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
776 np = &n->next) {
777 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
778 net_eq(pneigh_net(n), net)) {
779 *np = n->next;
780 write_unlock_bh(&tbl->lock);
781 if (tbl->pdestructor)
782 tbl->pdestructor(n);
783 if (n->dev)
784 dev_put(n->dev);
785 kfree(n);
786 return 0;
787 }
788 }
789 write_unlock_bh(&tbl->lock);
790 return -ENOENT;
791}
792
793static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
794 struct net_device *dev)
795{
796 struct pneigh_entry *n, **np, *freelist = NULL;
797 u32 h;
798
799 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
800 np = &tbl->phash_buckets[h];
801 while ((n = *np) != NULL) {
802 if (!dev || n->dev == dev) {
803 *np = n->next;
804 n->next = freelist;
805 freelist = n;
806 continue;
807 }
808 np = &n->next;
809 }
810 }
811 write_unlock_bh(&tbl->lock);
812 while ((n = freelist)) {
813 freelist = n->next;
814 n->next = NULL;
815 if (tbl->pdestructor)
816 tbl->pdestructor(n);
817 if (n->dev)
818 dev_put(n->dev);
819 kfree(n);
820 }
821 return -ENOENT;
822}
823
824static void neigh_parms_destroy(struct neigh_parms *parms);
825
826static inline void neigh_parms_put(struct neigh_parms *parms)
827{
828 if (refcount_dec_and_test(&parms->refcnt))
829 neigh_parms_destroy(parms);
830}
831
832/*
833 * neighbour must already be out of the table;
834 *
835 */
836void neigh_destroy(struct neighbour *neigh)
837{
838 struct net_device *dev = neigh->dev;
839
840 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
841
842 if (!neigh->dead) {
843 pr_warn("Destroying alive neighbour %p\n", neigh);
844 dump_stack();
845 return;
846 }
847
848 if (neigh_del_timer(neigh))
849 pr_warn("Impossible event\n");
850
851 write_lock_bh(&neigh->lock);
852 __skb_queue_purge(&neigh->arp_queue);
853 write_unlock_bh(&neigh->lock);
854 neigh->arp_queue_len_bytes = 0;
855
856 if (dev->netdev_ops->ndo_neigh_destroy)
857 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
858
859 dev_put(dev);
860 neigh_parms_put(neigh->parms);
861
862 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
863
864 atomic_dec(&neigh->tbl->entries);
865 kfree_rcu(neigh, rcu);
866}
867EXPORT_SYMBOL(neigh_destroy);
868
869/* Neighbour state is suspicious;
870 disable fast path.
871
872 Called with write_locked neigh.
873 */
874static void neigh_suspect(struct neighbour *neigh)
875{
876 neigh_dbg(2, "neigh %p is suspected\n", neigh);
877
878 neigh->output = neigh->ops->output;
879}
880
881/* Neighbour state is OK;
882 enable fast path.
883
884 Called with write_locked neigh.
885 */
886static void neigh_connect(struct neighbour *neigh)
887{
888 neigh_dbg(2, "neigh %p is connected\n", neigh);
889
890 neigh->output = neigh->ops->connected_output;
891}
892
893static void neigh_periodic_work(struct work_struct *work)
894{
895 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
896 struct neighbour *n;
897 struct neighbour __rcu **np;
898 unsigned int i;
899 struct neigh_hash_table *nht;
900
901 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
902
903 write_lock_bh(&tbl->lock);
904 nht = rcu_dereference_protected(tbl->nht,
905 lockdep_is_held(&tbl->lock));
906
907 /*
908 * periodically recompute ReachableTime from random function
909 */
910
911 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
912 struct neigh_parms *p;
913
914 WRITE_ONCE(tbl->last_rand, jiffies);
915 list_for_each_entry(p, &tbl->parms_list, list)
916 p->reachable_time =
917 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
918 }
919
920 if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
921 goto out;
922
923 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
924 np = &nht->hash_buckets[i];
925
926 while ((n = rcu_dereference_protected(*np,
927 lockdep_is_held(&tbl->lock))) != NULL) {
928 unsigned int state;
929
930 write_lock(&n->lock);
931
932 state = n->nud_state;
933 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
934 (n->flags & NTF_EXT_LEARNED)) {
935 write_unlock(&n->lock);
936 goto next_elt;
937 }
938
939 if (time_before(n->used, n->confirmed) &&
940 time_is_before_eq_jiffies(n->confirmed))
941 n->used = n->confirmed;
942
943 if (refcount_read(&n->refcnt) == 1 &&
944 (state == NUD_FAILED ||
945 !time_in_range_open(jiffies, n->used,
946 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
947 rcu_assign_pointer(*np,
948 rcu_dereference_protected(n->next,
949 lockdep_is_held(&tbl->lock)));
950 neigh_mark_dead(n);
951 write_unlock(&n->lock);
952 neigh_cleanup_and_release(n);
953 continue;
954 }
955 write_unlock(&n->lock);
956
957next_elt:
958 np = &n->next;
959 }
960 /*
961 * It's fine to release lock here, even if hash table
962 * grows while we are preempted.
963 */
964 write_unlock_bh(&tbl->lock);
965 cond_resched();
966 write_lock_bh(&tbl->lock);
967 nht = rcu_dereference_protected(tbl->nht,
968 lockdep_is_held(&tbl->lock));
969 }
970out:
971 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
972 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
973 * BASE_REACHABLE_TIME.
974 */
975 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
976 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
977 write_unlock_bh(&tbl->lock);
978}
979
980static __inline__ int neigh_max_probes(struct neighbour *n)
981{
982 struct neigh_parms *p = n->parms;
983 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
984 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
985 NEIGH_VAR(p, MCAST_PROBES));
986}
987
988static void neigh_invalidate(struct neighbour *neigh)
989 __releases(neigh->lock)
990 __acquires(neigh->lock)
991{
992 struct sk_buff *skb;
993
994 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
995 neigh_dbg(2, "neigh %p is failed\n", neigh);
996 neigh->updated = jiffies;
997
998 /* It is very thin place. report_unreachable is very complicated
999 routine. Particularly, it can hit the same neighbour entry!
1000
1001 So that, we try to be accurate and avoid dead loop. --ANK
1002 */
1003 while (neigh->nud_state == NUD_FAILED &&
1004 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1005 write_unlock(&neigh->lock);
1006 neigh->ops->error_report(neigh, skb);
1007 write_lock(&neigh->lock);
1008 }
1009 __skb_queue_purge(&neigh->arp_queue);
1010 neigh->arp_queue_len_bytes = 0;
1011}
1012
1013static void neigh_probe(struct neighbour *neigh)
1014 __releases(neigh->lock)
1015{
1016 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1017 /* keep skb alive even if arp_queue overflows */
1018 if (skb)
1019 skb = skb_clone(skb, GFP_ATOMIC);
1020 write_unlock(&neigh->lock);
1021 if (neigh->ops->solicit)
1022 neigh->ops->solicit(neigh, skb);
1023 atomic_inc(&neigh->probes);
1024 consume_skb(skb);
1025}
1026
1027/* Called when a timer expires for a neighbour entry. */
1028
1029static void neigh_timer_handler(struct timer_list *t)
1030{
1031 unsigned long now, next;
1032 struct neighbour *neigh = from_timer(neigh, t, timer);
1033 unsigned int state;
1034 int notify = 0;
1035
1036 write_lock(&neigh->lock);
1037
1038 state = neigh->nud_state;
1039 now = jiffies;
1040 next = now + HZ;
1041
1042 if (!(state & NUD_IN_TIMER))
1043 goto out;
1044
1045 if (state & NUD_REACHABLE) {
1046 if (time_before_eq(now,
1047 neigh->confirmed + neigh->parms->reachable_time)) {
1048 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1049 next = neigh->confirmed + neigh->parms->reachable_time;
1050 } else if (time_before_eq(now,
1051 neigh->used +
1052 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1053 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1054 neigh->nud_state = NUD_DELAY;
1055 neigh->updated = jiffies;
1056 neigh_suspect(neigh);
1057 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1058 } else {
1059 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1060 neigh->nud_state = NUD_STALE;
1061 neigh->updated = jiffies;
1062 neigh_suspect(neigh);
1063 notify = 1;
1064 }
1065 } else if (state & NUD_DELAY) {
1066 if (time_before_eq(now,
1067 neigh->confirmed +
1068 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1069 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1070 neigh->nud_state = NUD_REACHABLE;
1071 neigh->updated = jiffies;
1072 neigh_connect(neigh);
1073 notify = 1;
1074 next = neigh->confirmed + neigh->parms->reachable_time;
1075 } else {
1076 neigh_dbg(2, "neigh %p is probed\n", neigh);
1077 neigh->nud_state = NUD_PROBE;
1078 neigh->updated = jiffies;
1079 atomic_set(&neigh->probes, 0);
1080 notify = 1;
1081 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1082 }
1083 } else {
1084 /* NUD_PROBE|NUD_INCOMPLETE */
1085 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1086 }
1087
1088 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1089 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1090 neigh->nud_state = NUD_FAILED;
1091 notify = 1;
1092 neigh_invalidate(neigh);
1093 goto out;
1094 }
1095
1096 if (neigh->nud_state & NUD_IN_TIMER) {
1097 if (time_before(next, jiffies + HZ/2))
1098 next = jiffies + HZ/2;
1099 if (!mod_timer(&neigh->timer, next))
1100 neigh_hold(neigh);
1101 }
1102 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1103 neigh_probe(neigh);
1104 } else {
1105out:
1106 write_unlock(&neigh->lock);
1107 }
1108
1109 if (notify)
1110 neigh_update_notify(neigh, 0);
1111
1112 trace_neigh_timer_handler(neigh, 0);
1113
1114 neigh_release(neigh);
1115}
1116
1117int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1118{
1119 int rc;
1120 bool immediate_probe = false;
1121
1122 write_lock_bh(&neigh->lock);
1123
1124 rc = 0;
1125 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1126 goto out_unlock_bh;
1127 if (neigh->dead)
1128 goto out_dead;
1129
1130 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1131 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1132 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1133 unsigned long next, now = jiffies;
1134
1135 atomic_set(&neigh->probes,
1136 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1137 neigh_del_timer(neigh);
1138 neigh->nud_state = NUD_INCOMPLETE;
1139 neigh->updated = now;
1140 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1141 HZ/2);
1142 neigh_add_timer(neigh, next);
1143 immediate_probe = true;
1144 } else {
1145 neigh->nud_state = NUD_FAILED;
1146 neigh->updated = jiffies;
1147 write_unlock_bh(&neigh->lock);
1148
1149 kfree_skb(skb);
1150 return 1;
1151 }
1152 } else if (neigh->nud_state & NUD_STALE) {
1153 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1154 neigh_del_timer(neigh);
1155 neigh->nud_state = NUD_DELAY;
1156 neigh->updated = jiffies;
1157 neigh_add_timer(neigh, jiffies +
1158 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1159 }
1160
1161 if (neigh->nud_state == NUD_INCOMPLETE) {
1162 if (skb) {
1163 while (neigh->arp_queue_len_bytes + skb->truesize >
1164 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1165 struct sk_buff *buff;
1166
1167 buff = __skb_dequeue(&neigh->arp_queue);
1168 if (!buff)
1169 break;
1170 neigh->arp_queue_len_bytes -= buff->truesize;
1171 kfree_skb(buff);
1172 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1173 }
1174 skb_dst_force(skb);
1175 __skb_queue_tail(&neigh->arp_queue, skb);
1176 neigh->arp_queue_len_bytes += skb->truesize;
1177 }
1178 rc = 1;
1179 }
1180out_unlock_bh:
1181 if (immediate_probe)
1182 neigh_probe(neigh);
1183 else
1184 write_unlock(&neigh->lock);
1185 local_bh_enable();
1186 trace_neigh_event_send_done(neigh, rc);
1187 return rc;
1188
1189out_dead:
1190 if (neigh->nud_state & NUD_STALE)
1191 goto out_unlock_bh;
1192 write_unlock_bh(&neigh->lock);
1193 kfree_skb(skb);
1194 trace_neigh_event_send_dead(neigh, 1);
1195 return 1;
1196}
1197EXPORT_SYMBOL(__neigh_event_send);
1198
1199static void neigh_update_hhs(struct neighbour *neigh)
1200{
1201 struct hh_cache *hh;
1202 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1203 = NULL;
1204
1205 if (neigh->dev->header_ops)
1206 update = neigh->dev->header_ops->cache_update;
1207
1208 if (update) {
1209 hh = &neigh->hh;
1210 if (READ_ONCE(hh->hh_len)) {
1211 write_seqlock_bh(&hh->hh_lock);
1212 update(hh, neigh->dev, neigh->ha);
1213 write_sequnlock_bh(&hh->hh_lock);
1214 }
1215 }
1216}
1217
1218
1219
1220/* Generic update routine.
1221 -- lladdr is new lladdr or NULL, if it is not supplied.
1222 -- new is new state.
1223 -- flags
1224 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1225 if it is different.
1226 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1227 lladdr instead of overriding it
1228 if it is different.
1229 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1230 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1231 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1232 NTF_ROUTER flag.
1233 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1234 a router.
1235
1236 Caller MUST hold reference count on the entry.
1237 */
1238
1239static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1240 u8 new, u32 flags, u32 nlmsg_pid,
1241 struct netlink_ext_ack *extack)
1242{
1243 bool ext_learn_change = false;
1244 u8 old;
1245 int err;
1246 int notify = 0;
1247 struct net_device *dev;
1248 int update_isrouter = 0;
1249
1250 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1251
1252 write_lock_bh(&neigh->lock);
1253
1254 dev = neigh->dev;
1255 old = neigh->nud_state;
1256 err = -EPERM;
1257
1258 if (neigh->dead) {
1259 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1260 new = old;
1261 goto out;
1262 }
1263 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1264 (old & (NUD_NOARP | NUD_PERMANENT)))
1265 goto out;
1266
1267 ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1268 if (flags & NEIGH_UPDATE_F_USE) {
1269 new = old & ~NUD_PERMANENT;
1270 neigh->nud_state = new;
1271 err = 0;
1272 goto out;
1273 }
1274
1275 if (!(new & NUD_VALID)) {
1276 neigh_del_timer(neigh);
1277 if (old & NUD_CONNECTED)
1278 neigh_suspect(neigh);
1279 neigh->nud_state = new;
1280 err = 0;
1281 notify = old & NUD_VALID;
1282 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1283 (new & NUD_FAILED)) {
1284 neigh_invalidate(neigh);
1285 notify = 1;
1286 }
1287 goto out;
1288 }
1289
1290 /* Compare new lladdr with cached one */
1291 if (!dev->addr_len) {
1292 /* First case: device needs no address. */
1293 lladdr = neigh->ha;
1294 } else if (lladdr) {
1295 /* The second case: if something is already cached
1296 and a new address is proposed:
1297 - compare new & old
1298 - if they are different, check override flag
1299 */
1300 if ((old & NUD_VALID) &&
1301 !memcmp(lladdr, neigh->ha, dev->addr_len))
1302 lladdr = neigh->ha;
1303 } else {
1304 /* No address is supplied; if we know something,
1305 use it, otherwise discard the request.
1306 */
1307 err = -EINVAL;
1308 if (!(old & NUD_VALID)) {
1309 NL_SET_ERR_MSG(extack, "No link layer address given");
1310 goto out;
1311 }
1312 lladdr = neigh->ha;
1313 }
1314
1315 /* Update confirmed timestamp for neighbour entry after we
1316 * received ARP packet even if it doesn't change IP to MAC binding.
1317 */
1318 if (new & NUD_CONNECTED)
1319 neigh->confirmed = jiffies;
1320
1321 /* If entry was valid and address is not changed,
1322 do not change entry state, if new one is STALE.
1323 */
1324 err = 0;
1325 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1326 if (old & NUD_VALID) {
1327 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1328 update_isrouter = 0;
1329 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1330 (old & NUD_CONNECTED)) {
1331 lladdr = neigh->ha;
1332 new = NUD_STALE;
1333 } else
1334 goto out;
1335 } else {
1336 if (lladdr == neigh->ha && new == NUD_STALE &&
1337 !(flags & NEIGH_UPDATE_F_ADMIN))
1338 new = old;
1339 }
1340 }
1341
1342 /* Update timestamp only once we know we will make a change to the
1343 * neighbour entry. Otherwise we risk to move the locktime window with
1344 * noop updates and ignore relevant ARP updates.
1345 */
1346 if (new != old || lladdr != neigh->ha)
1347 neigh->updated = jiffies;
1348
1349 if (new != old) {
1350 neigh_del_timer(neigh);
1351 if (new & NUD_PROBE)
1352 atomic_set(&neigh->probes, 0);
1353 if (new & NUD_IN_TIMER)
1354 neigh_add_timer(neigh, (jiffies +
1355 ((new & NUD_REACHABLE) ?
1356 neigh->parms->reachable_time :
1357 0)));
1358 neigh->nud_state = new;
1359 notify = 1;
1360 }
1361
1362 if (lladdr != neigh->ha) {
1363 write_seqlock(&neigh->ha_lock);
1364 memcpy(&neigh->ha, lladdr, dev->addr_len);
1365 write_sequnlock(&neigh->ha_lock);
1366 neigh_update_hhs(neigh);
1367 if (!(new & NUD_CONNECTED))
1368 neigh->confirmed = jiffies -
1369 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1370 notify = 1;
1371 }
1372 if (new == old)
1373 goto out;
1374 if (new & NUD_CONNECTED)
1375 neigh_connect(neigh);
1376 else
1377 neigh_suspect(neigh);
1378 if (!(old & NUD_VALID)) {
1379 struct sk_buff *skb;
1380
1381 /* Again: avoid dead loop if something went wrong */
1382
1383 while (neigh->nud_state & NUD_VALID &&
1384 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1385 struct dst_entry *dst = skb_dst(skb);
1386 struct neighbour *n2, *n1 = neigh;
1387 write_unlock_bh(&neigh->lock);
1388
1389 rcu_read_lock();
1390
1391 /* Why not just use 'neigh' as-is? The problem is that
1392 * things such as shaper, eql, and sch_teql can end up
1393 * using alternative, different, neigh objects to output
1394 * the packet in the output path. So what we need to do
1395 * here is re-lookup the top-level neigh in the path so
1396 * we can reinject the packet there.
1397 */
1398 n2 = NULL;
1399 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1400 n2 = dst_neigh_lookup_skb(dst, skb);
1401 if (n2)
1402 n1 = n2;
1403 }
1404 n1->output(n1, skb);
1405 if (n2)
1406 neigh_release(n2);
1407 rcu_read_unlock();
1408
1409 write_lock_bh(&neigh->lock);
1410 }
1411 __skb_queue_purge(&neigh->arp_queue);
1412 neigh->arp_queue_len_bytes = 0;
1413 }
1414out:
1415 if (update_isrouter)
1416 neigh_update_is_router(neigh, flags, &notify);
1417 write_unlock_bh(&neigh->lock);
1418
1419 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1420 neigh_update_gc_list(neigh);
1421
1422 if (notify)
1423 neigh_update_notify(neigh, nlmsg_pid);
1424
1425 trace_neigh_update_done(neigh, err);
1426
1427 return err;
1428}
1429
1430int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1431 u32 flags, u32 nlmsg_pid)
1432{
1433 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1434}
1435EXPORT_SYMBOL(neigh_update);
1436
1437/* Update the neigh to listen temporarily for probe responses, even if it is
1438 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1439 */
1440void __neigh_set_probe_once(struct neighbour *neigh)
1441{
1442 if (neigh->dead)
1443 return;
1444 neigh->updated = jiffies;
1445 if (!(neigh->nud_state & NUD_FAILED))
1446 return;
1447 neigh->nud_state = NUD_INCOMPLETE;
1448 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1449 neigh_add_timer(neigh,
1450 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1451}
1452EXPORT_SYMBOL(__neigh_set_probe_once);
1453
1454struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1455 u8 *lladdr, void *saddr,
1456 struct net_device *dev)
1457{
1458 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1459 lladdr || !dev->addr_len);
1460 if (neigh)
1461 neigh_update(neigh, lladdr, NUD_STALE,
1462 NEIGH_UPDATE_F_OVERRIDE, 0);
1463 return neigh;
1464}
1465EXPORT_SYMBOL(neigh_event_ns);
1466
1467/* called with read_lock_bh(&n->lock); */
1468static void neigh_hh_init(struct neighbour *n)
1469{
1470 struct net_device *dev = n->dev;
1471 __be16 prot = n->tbl->protocol;
1472 struct hh_cache *hh = &n->hh;
1473
1474 write_lock_bh(&n->lock);
1475
1476 /* Only one thread can come in here and initialize the
1477 * hh_cache entry.
1478 */
1479 if (!hh->hh_len)
1480 dev->header_ops->cache(n, hh, prot);
1481
1482 write_unlock_bh(&n->lock);
1483}
1484
1485/* Slow and careful. */
1486
1487int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1488{
1489 int rc = 0;
1490
1491 if (!neigh_event_send(neigh, skb)) {
1492 int err;
1493 struct net_device *dev = neigh->dev;
1494 unsigned int seq;
1495
1496 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1497 neigh_hh_init(neigh);
1498
1499 do {
1500 __skb_pull(skb, skb_network_offset(skb));
1501 seq = read_seqbegin(&neigh->ha_lock);
1502 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1503 neigh->ha, NULL, skb->len);
1504 } while (read_seqretry(&neigh->ha_lock, seq));
1505
1506 if (err >= 0)
1507 rc = dev_queue_xmit(skb);
1508 else
1509 goto out_kfree_skb;
1510 }
1511out:
1512 return rc;
1513out_kfree_skb:
1514 rc = -EINVAL;
1515 kfree_skb(skb);
1516 goto out;
1517}
1518EXPORT_SYMBOL(neigh_resolve_output);
1519
1520/* As fast as possible without hh cache */
1521
1522int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1523{
1524 struct net_device *dev = neigh->dev;
1525 unsigned int seq;
1526 int err;
1527
1528 do {
1529 __skb_pull(skb, skb_network_offset(skb));
1530 seq = read_seqbegin(&neigh->ha_lock);
1531 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1532 neigh->ha, NULL, skb->len);
1533 } while (read_seqretry(&neigh->ha_lock, seq));
1534
1535 if (err >= 0)
1536 err = dev_queue_xmit(skb);
1537 else {
1538 err = -EINVAL;
1539 kfree_skb(skb);
1540 }
1541 return err;
1542}
1543EXPORT_SYMBOL(neigh_connected_output);
1544
1545int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1546{
1547 return dev_queue_xmit(skb);
1548}
1549EXPORT_SYMBOL(neigh_direct_output);
1550
1551static void neigh_proxy_process(struct timer_list *t)
1552{
1553 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1554 long sched_next = 0;
1555 unsigned long now = jiffies;
1556 struct sk_buff *skb, *n;
1557
1558 spin_lock(&tbl->proxy_queue.lock);
1559
1560 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1561 long tdif = NEIGH_CB(skb)->sched_next - now;
1562
1563 if (tdif <= 0) {
1564 struct net_device *dev = skb->dev;
1565
1566 __skb_unlink(skb, &tbl->proxy_queue);
1567 if (tbl->proxy_redo && netif_running(dev)) {
1568 rcu_read_lock();
1569 tbl->proxy_redo(skb);
1570 rcu_read_unlock();
1571 } else {
1572 kfree_skb(skb);
1573 }
1574
1575 dev_put(dev);
1576 } else if (!sched_next || tdif < sched_next)
1577 sched_next = tdif;
1578 }
1579 del_timer(&tbl->proxy_timer);
1580 if (sched_next)
1581 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1582 spin_unlock(&tbl->proxy_queue.lock);
1583}
1584
1585void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1586 struct sk_buff *skb)
1587{
1588 unsigned long now = jiffies;
1589
1590 unsigned long sched_next = now + (prandom_u32() %
1591 NEIGH_VAR(p, PROXY_DELAY));
1592
1593 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1594 kfree_skb(skb);
1595 return;
1596 }
1597
1598 NEIGH_CB(skb)->sched_next = sched_next;
1599 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1600
1601 spin_lock(&tbl->proxy_queue.lock);
1602 if (del_timer(&tbl->proxy_timer)) {
1603 if (time_before(tbl->proxy_timer.expires, sched_next))
1604 sched_next = tbl->proxy_timer.expires;
1605 }
1606 skb_dst_drop(skb);
1607 dev_hold(skb->dev);
1608 __skb_queue_tail(&tbl->proxy_queue, skb);
1609 mod_timer(&tbl->proxy_timer, sched_next);
1610 spin_unlock(&tbl->proxy_queue.lock);
1611}
1612EXPORT_SYMBOL(pneigh_enqueue);
1613
1614static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1615 struct net *net, int ifindex)
1616{
1617 struct neigh_parms *p;
1618
1619 list_for_each_entry(p, &tbl->parms_list, list) {
1620 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1621 (!p->dev && !ifindex && net_eq(net, &init_net)))
1622 return p;
1623 }
1624
1625 return NULL;
1626}
1627
1628struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1629 struct neigh_table *tbl)
1630{
1631 struct neigh_parms *p;
1632 struct net *net = dev_net(dev);
1633 const struct net_device_ops *ops = dev->netdev_ops;
1634
1635 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1636 if (p) {
1637 p->tbl = tbl;
1638 refcount_set(&p->refcnt, 1);
1639 p->reachable_time =
1640 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1641 dev_hold(dev);
1642 p->dev = dev;
1643 write_pnet(&p->net, net);
1644 p->sysctl_table = NULL;
1645
1646 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1647 dev_put(dev);
1648 kfree(p);
1649 return NULL;
1650 }
1651
1652 write_lock_bh(&tbl->lock);
1653 list_add(&p->list, &tbl->parms.list);
1654 write_unlock_bh(&tbl->lock);
1655
1656 neigh_parms_data_state_cleanall(p);
1657 }
1658 return p;
1659}
1660EXPORT_SYMBOL(neigh_parms_alloc);
1661
1662static void neigh_rcu_free_parms(struct rcu_head *head)
1663{
1664 struct neigh_parms *parms =
1665 container_of(head, struct neigh_parms, rcu_head);
1666
1667 neigh_parms_put(parms);
1668}
1669
1670void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1671{
1672 if (!parms || parms == &tbl->parms)
1673 return;
1674 write_lock_bh(&tbl->lock);
1675 list_del(&parms->list);
1676 parms->dead = 1;
1677 write_unlock_bh(&tbl->lock);
1678 if (parms->dev)
1679 dev_put(parms->dev);
1680 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1681}
1682EXPORT_SYMBOL(neigh_parms_release);
1683
1684static void neigh_parms_destroy(struct neigh_parms *parms)
1685{
1686 kfree(parms);
1687}
1688
1689static struct lock_class_key neigh_table_proxy_queue_class;
1690
1691static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1692
1693void neigh_table_init(int index, struct neigh_table *tbl)
1694{
1695 unsigned long now = jiffies;
1696 unsigned long phsize;
1697
1698 INIT_LIST_HEAD(&tbl->parms_list);
1699 INIT_LIST_HEAD(&tbl->gc_list);
1700 list_add(&tbl->parms.list, &tbl->parms_list);
1701 write_pnet(&tbl->parms.net, &init_net);
1702 refcount_set(&tbl->parms.refcnt, 1);
1703 tbl->parms.reachable_time =
1704 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1705
1706 tbl->stats = alloc_percpu(struct neigh_statistics);
1707 if (!tbl->stats)
1708 panic("cannot create neighbour cache statistics");
1709
1710#ifdef CONFIG_PROC_FS
1711 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1712 &neigh_stat_seq_ops, tbl))
1713 panic("cannot create neighbour proc dir entry");
1714#endif
1715
1716 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1717
1718 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1719 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1720
1721 if (!tbl->nht || !tbl->phash_buckets)
1722 panic("cannot allocate neighbour cache hashes");
1723
1724 if (!tbl->entry_size)
1725 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1726 tbl->key_len, NEIGH_PRIV_ALIGN);
1727 else
1728 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1729
1730 rwlock_init(&tbl->lock);
1731 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1732 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1733 tbl->parms.reachable_time);
1734 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1735 skb_queue_head_init_class(&tbl->proxy_queue,
1736 &neigh_table_proxy_queue_class);
1737
1738 tbl->last_flush = now;
1739 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1740
1741 neigh_tables[index] = tbl;
1742}
1743EXPORT_SYMBOL(neigh_table_init);
1744
1745int neigh_table_clear(int index, struct neigh_table *tbl)
1746{
1747 neigh_tables[index] = NULL;
1748 /* It is not clean... Fix it to unload IPv6 module safely */
1749 cancel_delayed_work_sync(&tbl->gc_work);
1750 del_timer_sync(&tbl->proxy_timer);
1751 pneigh_queue_purge(&tbl->proxy_queue, NULL);
1752 neigh_ifdown(tbl, NULL);
1753 if (atomic_read(&tbl->entries))
1754 pr_crit("neighbour leakage\n");
1755
1756 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1757 neigh_hash_free_rcu);
1758 tbl->nht = NULL;
1759
1760 kfree(tbl->phash_buckets);
1761 tbl->phash_buckets = NULL;
1762
1763 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1764
1765 free_percpu(tbl->stats);
1766 tbl->stats = NULL;
1767
1768 return 0;
1769}
1770EXPORT_SYMBOL(neigh_table_clear);
1771
1772static struct neigh_table *neigh_find_table(int family)
1773{
1774 struct neigh_table *tbl = NULL;
1775
1776 switch (family) {
1777 case AF_INET:
1778 tbl = neigh_tables[NEIGH_ARP_TABLE];
1779 break;
1780 case AF_INET6:
1781 tbl = neigh_tables[NEIGH_ND_TABLE];
1782 break;
1783 }
1784
1785 return tbl;
1786}
1787
1788const struct nla_policy nda_policy[NDA_MAX+1] = {
1789 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1790 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1791 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1792 [NDA_PROBES] = { .type = NLA_U32 },
1793 [NDA_VLAN] = { .type = NLA_U16 },
1794 [NDA_PORT] = { .type = NLA_U16 },
1795 [NDA_VNI] = { .type = NLA_U32 },
1796 [NDA_IFINDEX] = { .type = NLA_U32 },
1797 [NDA_MASTER] = { .type = NLA_U32 },
1798 [NDA_PROTOCOL] = { .type = NLA_U8 },
1799};
1800
1801static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1802 struct netlink_ext_ack *extack)
1803{
1804 struct net *net = sock_net(skb->sk);
1805 struct ndmsg *ndm;
1806 struct nlattr *dst_attr;
1807 struct neigh_table *tbl;
1808 struct neighbour *neigh;
1809 struct net_device *dev = NULL;
1810 int err = -EINVAL;
1811
1812 ASSERT_RTNL();
1813 if (nlmsg_len(nlh) < sizeof(*ndm))
1814 goto out;
1815
1816 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1817 if (!dst_attr) {
1818 NL_SET_ERR_MSG(extack, "Network address not specified");
1819 goto out;
1820 }
1821
1822 ndm = nlmsg_data(nlh);
1823 if (ndm->ndm_ifindex) {
1824 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1825 if (dev == NULL) {
1826 err = -ENODEV;
1827 goto out;
1828 }
1829 }
1830
1831 tbl = neigh_find_table(ndm->ndm_family);
1832 if (tbl == NULL)
1833 return -EAFNOSUPPORT;
1834
1835 if (nla_len(dst_attr) < (int)tbl->key_len) {
1836 NL_SET_ERR_MSG(extack, "Invalid network address");
1837 goto out;
1838 }
1839
1840 if (ndm->ndm_flags & NTF_PROXY) {
1841 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1842 goto out;
1843 }
1844
1845 if (dev == NULL)
1846 goto out;
1847
1848 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1849 if (neigh == NULL) {
1850 err = -ENOENT;
1851 goto out;
1852 }
1853
1854 err = __neigh_update(neigh, NULL, NUD_FAILED,
1855 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1856 NETLINK_CB(skb).portid, extack);
1857 write_lock_bh(&tbl->lock);
1858 neigh_release(neigh);
1859 neigh_remove_one(neigh, tbl);
1860 write_unlock_bh(&tbl->lock);
1861
1862out:
1863 return err;
1864}
1865
1866static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1867 struct netlink_ext_ack *extack)
1868{
1869 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1870 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1871 struct net *net = sock_net(skb->sk);
1872 struct ndmsg *ndm;
1873 struct nlattr *tb[NDA_MAX+1];
1874 struct neigh_table *tbl;
1875 struct net_device *dev = NULL;
1876 struct neighbour *neigh;
1877 void *dst, *lladdr;
1878 u8 protocol = 0;
1879 int err;
1880
1881 ASSERT_RTNL();
1882 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1883 nda_policy, extack);
1884 if (err < 0)
1885 goto out;
1886
1887 err = -EINVAL;
1888 if (!tb[NDA_DST]) {
1889 NL_SET_ERR_MSG(extack, "Network address not specified");
1890 goto out;
1891 }
1892
1893 ndm = nlmsg_data(nlh);
1894 if (ndm->ndm_ifindex) {
1895 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1896 if (dev == NULL) {
1897 err = -ENODEV;
1898 goto out;
1899 }
1900
1901 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1902 NL_SET_ERR_MSG(extack, "Invalid link address");
1903 goto out;
1904 }
1905 }
1906
1907 tbl = neigh_find_table(ndm->ndm_family);
1908 if (tbl == NULL)
1909 return -EAFNOSUPPORT;
1910
1911 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1912 NL_SET_ERR_MSG(extack, "Invalid network address");
1913 goto out;
1914 }
1915
1916 dst = nla_data(tb[NDA_DST]);
1917 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1918
1919 if (tb[NDA_PROTOCOL])
1920 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1921
1922 if (ndm->ndm_flags & NTF_PROXY) {
1923 struct pneigh_entry *pn;
1924
1925 err = -ENOBUFS;
1926 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1927 if (pn) {
1928 pn->flags = ndm->ndm_flags;
1929 if (protocol)
1930 pn->protocol = protocol;
1931 err = 0;
1932 }
1933 goto out;
1934 }
1935
1936 if (!dev) {
1937 NL_SET_ERR_MSG(extack, "Device not specified");
1938 goto out;
1939 }
1940
1941 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1942 err = -EINVAL;
1943 goto out;
1944 }
1945
1946 neigh = neigh_lookup(tbl, dst, dev);
1947 if (neigh == NULL) {
1948 bool exempt_from_gc;
1949
1950 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1951 err = -ENOENT;
1952 goto out;
1953 }
1954
1955 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1956 ndm->ndm_flags & NTF_EXT_LEARNED;
1957 neigh = ___neigh_create(tbl, dst, dev,
1958 ndm->ndm_flags & NTF_EXT_LEARNED,
1959 exempt_from_gc, true);
1960 if (IS_ERR(neigh)) {
1961 err = PTR_ERR(neigh);
1962 goto out;
1963 }
1964 } else {
1965 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1966 err = -EEXIST;
1967 neigh_release(neigh);
1968 goto out;
1969 }
1970
1971 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1972 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1973 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1974 }
1975
1976 if (protocol)
1977 neigh->protocol = protocol;
1978 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1979 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1980 if (ndm->ndm_flags & NTF_ROUTER)
1981 flags |= NEIGH_UPDATE_F_ISROUTER;
1982 if (ndm->ndm_flags & NTF_USE)
1983 flags |= NEIGH_UPDATE_F_USE;
1984
1985 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1986 NETLINK_CB(skb).portid, extack);
1987 if (!err && ndm->ndm_flags & NTF_USE) {
1988 neigh_event_send(neigh, NULL);
1989 err = 0;
1990 }
1991 neigh_release(neigh);
1992out:
1993 return err;
1994}
1995
1996static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1997{
1998 struct nlattr *nest;
1999
2000 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2001 if (nest == NULL)
2002 return -ENOBUFS;
2003
2004 if ((parms->dev &&
2005 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2006 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2007 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2008 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2009 /* approximative value for deprecated QUEUE_LEN (in packets) */
2010 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2011 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2012 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2013 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2014 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2015 NEIGH_VAR(parms, UCAST_PROBES)) ||
2016 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2017 NEIGH_VAR(parms, MCAST_PROBES)) ||
2018 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2019 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2020 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2021 NDTPA_PAD) ||
2022 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2023 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2024 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2025 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2026 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2027 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2028 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2029 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2030 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2031 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2032 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2033 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2034 nla_put_msecs(skb, NDTPA_LOCKTIME,
2035 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2036 goto nla_put_failure;
2037 return nla_nest_end(skb, nest);
2038
2039nla_put_failure:
2040 nla_nest_cancel(skb, nest);
2041 return -EMSGSIZE;
2042}
2043
2044static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2045 u32 pid, u32 seq, int type, int flags)
2046{
2047 struct nlmsghdr *nlh;
2048 struct ndtmsg *ndtmsg;
2049
2050 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2051 if (nlh == NULL)
2052 return -EMSGSIZE;
2053
2054 ndtmsg = nlmsg_data(nlh);
2055
2056 read_lock_bh(&tbl->lock);
2057 ndtmsg->ndtm_family = tbl->family;
2058 ndtmsg->ndtm_pad1 = 0;
2059 ndtmsg->ndtm_pad2 = 0;
2060
2061 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2062 nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2063 NDTA_PAD) ||
2064 nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2065 nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2066 nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2067 goto nla_put_failure;
2068 {
2069 unsigned long now = jiffies;
2070 long flush_delta = now - READ_ONCE(tbl->last_flush);
2071 long rand_delta = now - READ_ONCE(tbl->last_rand);
2072 struct neigh_hash_table *nht;
2073 struct ndt_config ndc = {
2074 .ndtc_key_len = tbl->key_len,
2075 .ndtc_entry_size = tbl->entry_size,
2076 .ndtc_entries = atomic_read(&tbl->entries),
2077 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2078 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2079 .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
2080 };
2081
2082 rcu_read_lock_bh();
2083 nht = rcu_dereference_bh(tbl->nht);
2084 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2085 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2086 rcu_read_unlock_bh();
2087
2088 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2089 goto nla_put_failure;
2090 }
2091
2092 {
2093 int cpu;
2094 struct ndt_stats ndst;
2095
2096 memset(&ndst, 0, sizeof(ndst));
2097
2098 for_each_possible_cpu(cpu) {
2099 struct neigh_statistics *st;
2100
2101 st = per_cpu_ptr(tbl->stats, cpu);
2102 ndst.ndts_allocs += READ_ONCE(st->allocs);
2103 ndst.ndts_destroys += READ_ONCE(st->destroys);
2104 ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
2105 ndst.ndts_res_failed += READ_ONCE(st->res_failed);
2106 ndst.ndts_lookups += READ_ONCE(st->lookups);
2107 ndst.ndts_hits += READ_ONCE(st->hits);
2108 ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
2109 ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
2110 ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
2111 ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
2112 ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
2113 }
2114
2115 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2116 NDTA_PAD))
2117 goto nla_put_failure;
2118 }
2119
2120 BUG_ON(tbl->parms.dev);
2121 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2122 goto nla_put_failure;
2123
2124 read_unlock_bh(&tbl->lock);
2125 nlmsg_end(skb, nlh);
2126 return 0;
2127
2128nla_put_failure:
2129 read_unlock_bh(&tbl->lock);
2130 nlmsg_cancel(skb, nlh);
2131 return -EMSGSIZE;
2132}
2133
2134static int neightbl_fill_param_info(struct sk_buff *skb,
2135 struct neigh_table *tbl,
2136 struct neigh_parms *parms,
2137 u32 pid, u32 seq, int type,
2138 unsigned int flags)
2139{
2140 struct ndtmsg *ndtmsg;
2141 struct nlmsghdr *nlh;
2142
2143 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2144 if (nlh == NULL)
2145 return -EMSGSIZE;
2146
2147 ndtmsg = nlmsg_data(nlh);
2148
2149 read_lock_bh(&tbl->lock);
2150 ndtmsg->ndtm_family = tbl->family;
2151 ndtmsg->ndtm_pad1 = 0;
2152 ndtmsg->ndtm_pad2 = 0;
2153
2154 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2155 neightbl_fill_parms(skb, parms) < 0)
2156 goto errout;
2157
2158 read_unlock_bh(&tbl->lock);
2159 nlmsg_end(skb, nlh);
2160 return 0;
2161errout:
2162 read_unlock_bh(&tbl->lock);
2163 nlmsg_cancel(skb, nlh);
2164 return -EMSGSIZE;
2165}
2166
2167static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2168 [NDTA_NAME] = { .type = NLA_STRING },
2169 [NDTA_THRESH1] = { .type = NLA_U32 },
2170 [NDTA_THRESH2] = { .type = NLA_U32 },
2171 [NDTA_THRESH3] = { .type = NLA_U32 },
2172 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2173 [NDTA_PARMS] = { .type = NLA_NESTED },
2174};
2175
2176static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2177 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2178 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2179 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2180 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2181 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2182 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2183 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2184 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2185 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2186 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2187 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2188 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2189 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2190 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2191};
2192
2193static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2194 struct netlink_ext_ack *extack)
2195{
2196 struct net *net = sock_net(skb->sk);
2197 struct neigh_table *tbl;
2198 struct ndtmsg *ndtmsg;
2199 struct nlattr *tb[NDTA_MAX+1];
2200 bool found = false;
2201 int err, tidx;
2202
2203 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2204 nl_neightbl_policy, extack);
2205 if (err < 0)
2206 goto errout;
2207
2208 if (tb[NDTA_NAME] == NULL) {
2209 err = -EINVAL;
2210 goto errout;
2211 }
2212
2213 ndtmsg = nlmsg_data(nlh);
2214
2215 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2216 tbl = neigh_tables[tidx];
2217 if (!tbl)
2218 continue;
2219 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2220 continue;
2221 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2222 found = true;
2223 break;
2224 }
2225 }
2226
2227 if (!found)
2228 return -ENOENT;
2229
2230 /*
2231 * We acquire tbl->lock to be nice to the periodic timers and
2232 * make sure they always see a consistent set of values.
2233 */
2234 write_lock_bh(&tbl->lock);
2235
2236 if (tb[NDTA_PARMS]) {
2237 struct nlattr *tbp[NDTPA_MAX+1];
2238 struct neigh_parms *p;
2239 int i, ifindex = 0;
2240
2241 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2242 tb[NDTA_PARMS],
2243 nl_ntbl_parm_policy, extack);
2244 if (err < 0)
2245 goto errout_tbl_lock;
2246
2247 if (tbp[NDTPA_IFINDEX])
2248 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2249
2250 p = lookup_neigh_parms(tbl, net, ifindex);
2251 if (p == NULL) {
2252 err = -ENOENT;
2253 goto errout_tbl_lock;
2254 }
2255
2256 for (i = 1; i <= NDTPA_MAX; i++) {
2257 if (tbp[i] == NULL)
2258 continue;
2259
2260 switch (i) {
2261 case NDTPA_QUEUE_LEN:
2262 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2263 nla_get_u32(tbp[i]) *
2264 SKB_TRUESIZE(ETH_FRAME_LEN));
2265 break;
2266 case NDTPA_QUEUE_LENBYTES:
2267 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2268 nla_get_u32(tbp[i]));
2269 break;
2270 case NDTPA_PROXY_QLEN:
2271 NEIGH_VAR_SET(p, PROXY_QLEN,
2272 nla_get_u32(tbp[i]));
2273 break;
2274 case NDTPA_APP_PROBES:
2275 NEIGH_VAR_SET(p, APP_PROBES,
2276 nla_get_u32(tbp[i]));
2277 break;
2278 case NDTPA_UCAST_PROBES:
2279 NEIGH_VAR_SET(p, UCAST_PROBES,
2280 nla_get_u32(tbp[i]));
2281 break;
2282 case NDTPA_MCAST_PROBES:
2283 NEIGH_VAR_SET(p, MCAST_PROBES,
2284 nla_get_u32(tbp[i]));
2285 break;
2286 case NDTPA_MCAST_REPROBES:
2287 NEIGH_VAR_SET(p, MCAST_REPROBES,
2288 nla_get_u32(tbp[i]));
2289 break;
2290 case NDTPA_BASE_REACHABLE_TIME:
2291 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2292 nla_get_msecs(tbp[i]));
2293 /* update reachable_time as well, otherwise, the change will
2294 * only be effective after the next time neigh_periodic_work
2295 * decides to recompute it (can be multiple minutes)
2296 */
2297 p->reachable_time =
2298 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2299 break;
2300 case NDTPA_GC_STALETIME:
2301 NEIGH_VAR_SET(p, GC_STALETIME,
2302 nla_get_msecs(tbp[i]));
2303 break;
2304 case NDTPA_DELAY_PROBE_TIME:
2305 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2306 nla_get_msecs(tbp[i]));
2307 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2308 break;
2309 case NDTPA_RETRANS_TIME:
2310 NEIGH_VAR_SET(p, RETRANS_TIME,
2311 nla_get_msecs(tbp[i]));
2312 break;
2313 case NDTPA_ANYCAST_DELAY:
2314 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2315 nla_get_msecs(tbp[i]));
2316 break;
2317 case NDTPA_PROXY_DELAY:
2318 NEIGH_VAR_SET(p, PROXY_DELAY,
2319 nla_get_msecs(tbp[i]));
2320 break;
2321 case NDTPA_LOCKTIME:
2322 NEIGH_VAR_SET(p, LOCKTIME,
2323 nla_get_msecs(tbp[i]));
2324 break;
2325 }
2326 }
2327 }
2328
2329 err = -ENOENT;
2330 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2331 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2332 !net_eq(net, &init_net))
2333 goto errout_tbl_lock;
2334
2335 if (tb[NDTA_THRESH1])
2336 WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2337
2338 if (tb[NDTA_THRESH2])
2339 WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2340
2341 if (tb[NDTA_THRESH3])
2342 WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2343
2344 if (tb[NDTA_GC_INTERVAL])
2345 WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2346
2347 err = 0;
2348
2349errout_tbl_lock:
2350 write_unlock_bh(&tbl->lock);
2351errout:
2352 return err;
2353}
2354
2355static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2356 struct netlink_ext_ack *extack)
2357{
2358 struct ndtmsg *ndtm;
2359
2360 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2361 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2362 return -EINVAL;
2363 }
2364
2365 ndtm = nlmsg_data(nlh);
2366 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2367 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2368 return -EINVAL;
2369 }
2370
2371 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2372 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2373 return -EINVAL;
2374 }
2375
2376 return 0;
2377}
2378
2379static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2380{
2381 const struct nlmsghdr *nlh = cb->nlh;
2382 struct net *net = sock_net(skb->sk);
2383 int family, tidx, nidx = 0;
2384 int tbl_skip = cb->args[0];
2385 int neigh_skip = cb->args[1];
2386 struct neigh_table *tbl;
2387
2388 if (cb->strict_check) {
2389 int err = neightbl_valid_dump_info(nlh, cb->extack);
2390
2391 if (err < 0)
2392 return err;
2393 }
2394
2395 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2396
2397 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2398 struct neigh_parms *p;
2399
2400 tbl = neigh_tables[tidx];
2401 if (!tbl)
2402 continue;
2403
2404 if (tidx < tbl_skip || (family && tbl->family != family))
2405 continue;
2406
2407 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2408 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2409 NLM_F_MULTI) < 0)
2410 break;
2411
2412 nidx = 0;
2413 p = list_next_entry(&tbl->parms, list);
2414 list_for_each_entry_from(p, &tbl->parms_list, list) {
2415 if (!net_eq(neigh_parms_net(p), net))
2416 continue;
2417
2418 if (nidx < neigh_skip)
2419 goto next;
2420
2421 if (neightbl_fill_param_info(skb, tbl, p,
2422 NETLINK_CB(cb->skb).portid,
2423 nlh->nlmsg_seq,
2424 RTM_NEWNEIGHTBL,
2425 NLM_F_MULTI) < 0)
2426 goto out;
2427 next:
2428 nidx++;
2429 }
2430
2431 neigh_skip = 0;
2432 }
2433out:
2434 cb->args[0] = tidx;
2435 cb->args[1] = nidx;
2436
2437 return skb->len;
2438}
2439
2440static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2441 u32 pid, u32 seq, int type, unsigned int flags)
2442{
2443 unsigned long now = jiffies;
2444 struct nda_cacheinfo ci;
2445 struct nlmsghdr *nlh;
2446 struct ndmsg *ndm;
2447
2448 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2449 if (nlh == NULL)
2450 return -EMSGSIZE;
2451
2452 ndm = nlmsg_data(nlh);
2453 ndm->ndm_family = neigh->ops->family;
2454 ndm->ndm_pad1 = 0;
2455 ndm->ndm_pad2 = 0;
2456 ndm->ndm_flags = neigh->flags;
2457 ndm->ndm_type = neigh->type;
2458 ndm->ndm_ifindex = neigh->dev->ifindex;
2459
2460 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2461 goto nla_put_failure;
2462
2463 read_lock_bh(&neigh->lock);
2464 ndm->ndm_state = neigh->nud_state;
2465 if (neigh->nud_state & NUD_VALID) {
2466 char haddr[MAX_ADDR_LEN];
2467
2468 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2469 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2470 read_unlock_bh(&neigh->lock);
2471 goto nla_put_failure;
2472 }
2473 }
2474
2475 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2476 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2477 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2478 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2479 read_unlock_bh(&neigh->lock);
2480
2481 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2482 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2483 goto nla_put_failure;
2484
2485 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2486 goto nla_put_failure;
2487
2488 nlmsg_end(skb, nlh);
2489 return 0;
2490
2491nla_put_failure:
2492 nlmsg_cancel(skb, nlh);
2493 return -EMSGSIZE;
2494}
2495
2496static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2497 u32 pid, u32 seq, int type, unsigned int flags,
2498 struct neigh_table *tbl)
2499{
2500 struct nlmsghdr *nlh;
2501 struct ndmsg *ndm;
2502
2503 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2504 if (nlh == NULL)
2505 return -EMSGSIZE;
2506
2507 ndm = nlmsg_data(nlh);
2508 ndm->ndm_family = tbl->family;
2509 ndm->ndm_pad1 = 0;
2510 ndm->ndm_pad2 = 0;
2511 ndm->ndm_flags = pn->flags | NTF_PROXY;
2512 ndm->ndm_type = RTN_UNICAST;
2513 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2514 ndm->ndm_state = NUD_NONE;
2515
2516 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2517 goto nla_put_failure;
2518
2519 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2520 goto nla_put_failure;
2521
2522 nlmsg_end(skb, nlh);
2523 return 0;
2524
2525nla_put_failure:
2526 nlmsg_cancel(skb, nlh);
2527 return -EMSGSIZE;
2528}
2529
2530static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2531{
2532 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2533 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2534}
2535
2536static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2537{
2538 struct net_device *master;
2539
2540 if (!master_idx)
2541 return false;
2542
2543 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2544 if (!master || master->ifindex != master_idx)
2545 return true;
2546
2547 return false;
2548}
2549
2550static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2551{
2552 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2553 return true;
2554
2555 return false;
2556}
2557
2558struct neigh_dump_filter {
2559 int master_idx;
2560 int dev_idx;
2561};
2562
2563static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2564 struct netlink_callback *cb,
2565 struct neigh_dump_filter *filter)
2566{
2567 struct net *net = sock_net(skb->sk);
2568 struct neighbour *n;
2569 int rc, h, s_h = cb->args[1];
2570 int idx, s_idx = idx = cb->args[2];
2571 struct neigh_hash_table *nht;
2572 unsigned int flags = NLM_F_MULTI;
2573
2574 if (filter->dev_idx || filter->master_idx)
2575 flags |= NLM_F_DUMP_FILTERED;
2576
2577 rcu_read_lock_bh();
2578 nht = rcu_dereference_bh(tbl->nht);
2579
2580 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2581 if (h > s_h)
2582 s_idx = 0;
2583 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2584 n != NULL;
2585 n = rcu_dereference_bh(n->next)) {
2586 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2587 goto next;
2588 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2589 neigh_master_filtered(n->dev, filter->master_idx))
2590 goto next;
2591 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2592 cb->nlh->nlmsg_seq,
2593 RTM_NEWNEIGH,
2594 flags) < 0) {
2595 rc = -1;
2596 goto out;
2597 }
2598next:
2599 idx++;
2600 }
2601 }
2602 rc = skb->len;
2603out:
2604 rcu_read_unlock_bh();
2605 cb->args[1] = h;
2606 cb->args[2] = idx;
2607 return rc;
2608}
2609
2610static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2611 struct netlink_callback *cb,
2612 struct neigh_dump_filter *filter)
2613{
2614 struct pneigh_entry *n;
2615 struct net *net = sock_net(skb->sk);
2616 int rc, h, s_h = cb->args[3];
2617 int idx, s_idx = idx = cb->args[4];
2618 unsigned int flags = NLM_F_MULTI;
2619
2620 if (filter->dev_idx || filter->master_idx)
2621 flags |= NLM_F_DUMP_FILTERED;
2622
2623 read_lock_bh(&tbl->lock);
2624
2625 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2626 if (h > s_h)
2627 s_idx = 0;
2628 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2629 if (idx < s_idx || pneigh_net(n) != net)
2630 goto next;
2631 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2632 neigh_master_filtered(n->dev, filter->master_idx))
2633 goto next;
2634 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2635 cb->nlh->nlmsg_seq,
2636 RTM_NEWNEIGH, flags, tbl) < 0) {
2637 read_unlock_bh(&tbl->lock);
2638 rc = -1;
2639 goto out;
2640 }
2641 next:
2642 idx++;
2643 }
2644 }
2645
2646 read_unlock_bh(&tbl->lock);
2647 rc = skb->len;
2648out:
2649 cb->args[3] = h;
2650 cb->args[4] = idx;
2651 return rc;
2652
2653}
2654
2655static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2656 bool strict_check,
2657 struct neigh_dump_filter *filter,
2658 struct netlink_ext_ack *extack)
2659{
2660 struct nlattr *tb[NDA_MAX + 1];
2661 int err, i;
2662
2663 if (strict_check) {
2664 struct ndmsg *ndm;
2665
2666 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2667 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2668 return -EINVAL;
2669 }
2670
2671 ndm = nlmsg_data(nlh);
2672 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2673 ndm->ndm_state || ndm->ndm_type) {
2674 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2675 return -EINVAL;
2676 }
2677
2678 if (ndm->ndm_flags & ~NTF_PROXY) {
2679 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2680 return -EINVAL;
2681 }
2682
2683 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2684 tb, NDA_MAX, nda_policy,
2685 extack);
2686 } else {
2687 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2688 NDA_MAX, nda_policy, extack);
2689 }
2690 if (err < 0)
2691 return err;
2692
2693 for (i = 0; i <= NDA_MAX; ++i) {
2694 if (!tb[i])
2695 continue;
2696
2697 /* all new attributes should require strict_check */
2698 switch (i) {
2699 case NDA_IFINDEX:
2700 filter->dev_idx = nla_get_u32(tb[i]);
2701 break;
2702 case NDA_MASTER:
2703 filter->master_idx = nla_get_u32(tb[i]);
2704 break;
2705 default:
2706 if (strict_check) {
2707 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2708 return -EINVAL;
2709 }
2710 }
2711 }
2712
2713 return 0;
2714}
2715
2716static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2717{
2718 const struct nlmsghdr *nlh = cb->nlh;
2719 struct neigh_dump_filter filter = {};
2720 struct neigh_table *tbl;
2721 int t, family, s_t;
2722 int proxy = 0;
2723 int err;
2724
2725 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2726
2727 /* check for full ndmsg structure presence, family member is
2728 * the same for both structures
2729 */
2730 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2731 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2732 proxy = 1;
2733
2734 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2735 if (err < 0 && cb->strict_check)
2736 return err;
2737 err = 0;
2738
2739 s_t = cb->args[0];
2740
2741 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2742 tbl = neigh_tables[t];
2743
2744 if (!tbl)
2745 continue;
2746 if (t < s_t || (family && tbl->family != family))
2747 continue;
2748 if (t > s_t)
2749 memset(&cb->args[1], 0, sizeof(cb->args) -
2750 sizeof(cb->args[0]));
2751 if (proxy)
2752 err = pneigh_dump_table(tbl, skb, cb, &filter);
2753 else
2754 err = neigh_dump_table(tbl, skb, cb, &filter);
2755 if (err < 0)
2756 break;
2757 }
2758
2759 cb->args[0] = t;
2760 return skb->len;
2761}
2762
2763static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2764 struct neigh_table **tbl,
2765 void **dst, int *dev_idx, u8 *ndm_flags,
2766 struct netlink_ext_ack *extack)
2767{
2768 struct nlattr *tb[NDA_MAX + 1];
2769 struct ndmsg *ndm;
2770 int err, i;
2771
2772 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2773 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2774 return -EINVAL;
2775 }
2776
2777 ndm = nlmsg_data(nlh);
2778 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2779 ndm->ndm_type) {
2780 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2781 return -EINVAL;
2782 }
2783
2784 if (ndm->ndm_flags & ~NTF_PROXY) {
2785 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2786 return -EINVAL;
2787 }
2788
2789 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2790 NDA_MAX, nda_policy, extack);
2791 if (err < 0)
2792 return err;
2793
2794 *ndm_flags = ndm->ndm_flags;
2795 *dev_idx = ndm->ndm_ifindex;
2796 *tbl = neigh_find_table(ndm->ndm_family);
2797 if (*tbl == NULL) {
2798 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2799 return -EAFNOSUPPORT;
2800 }
2801
2802 for (i = 0; i <= NDA_MAX; ++i) {
2803 if (!tb[i])
2804 continue;
2805
2806 switch (i) {
2807 case NDA_DST:
2808 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2809 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2810 return -EINVAL;
2811 }
2812 *dst = nla_data(tb[i]);
2813 break;
2814 default:
2815 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2816 return -EINVAL;
2817 }
2818 }
2819
2820 return 0;
2821}
2822
2823static inline size_t neigh_nlmsg_size(void)
2824{
2825 return NLMSG_ALIGN(sizeof(struct ndmsg))
2826 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2827 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2828 + nla_total_size(sizeof(struct nda_cacheinfo))
2829 + nla_total_size(4) /* NDA_PROBES */
2830 + nla_total_size(1); /* NDA_PROTOCOL */
2831}
2832
2833static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2834 u32 pid, u32 seq)
2835{
2836 struct sk_buff *skb;
2837 int err = 0;
2838
2839 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2840 if (!skb)
2841 return -ENOBUFS;
2842
2843 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2844 if (err) {
2845 kfree_skb(skb);
2846 goto errout;
2847 }
2848
2849 err = rtnl_unicast(skb, net, pid);
2850errout:
2851 return err;
2852}
2853
2854static inline size_t pneigh_nlmsg_size(void)
2855{
2856 return NLMSG_ALIGN(sizeof(struct ndmsg))
2857 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2858 + nla_total_size(1); /* NDA_PROTOCOL */
2859}
2860
2861static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2862 u32 pid, u32 seq, struct neigh_table *tbl)
2863{
2864 struct sk_buff *skb;
2865 int err = 0;
2866
2867 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2868 if (!skb)
2869 return -ENOBUFS;
2870
2871 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2872 if (err) {
2873 kfree_skb(skb);
2874 goto errout;
2875 }
2876
2877 err = rtnl_unicast(skb, net, pid);
2878errout:
2879 return err;
2880}
2881
2882static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2883 struct netlink_ext_ack *extack)
2884{
2885 struct net *net = sock_net(in_skb->sk);
2886 struct net_device *dev = NULL;
2887 struct neigh_table *tbl = NULL;
2888 struct neighbour *neigh;
2889 void *dst = NULL;
2890 u8 ndm_flags = 0;
2891 int dev_idx = 0;
2892 int err;
2893
2894 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2895 extack);
2896 if (err < 0)
2897 return err;
2898
2899 if (dev_idx) {
2900 dev = __dev_get_by_index(net, dev_idx);
2901 if (!dev) {
2902 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2903 return -ENODEV;
2904 }
2905 }
2906
2907 if (!dst) {
2908 NL_SET_ERR_MSG(extack, "Network address not specified");
2909 return -EINVAL;
2910 }
2911
2912 if (ndm_flags & NTF_PROXY) {
2913 struct pneigh_entry *pn;
2914
2915 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2916 if (!pn) {
2917 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2918 return -ENOENT;
2919 }
2920 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2921 nlh->nlmsg_seq, tbl);
2922 }
2923
2924 if (!dev) {
2925 NL_SET_ERR_MSG(extack, "No device specified");
2926 return -EINVAL;
2927 }
2928
2929 neigh = neigh_lookup(tbl, dst, dev);
2930 if (!neigh) {
2931 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2932 return -ENOENT;
2933 }
2934
2935 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2936 nlh->nlmsg_seq);
2937
2938 neigh_release(neigh);
2939
2940 return err;
2941}
2942
2943void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2944{
2945 int chain;
2946 struct neigh_hash_table *nht;
2947
2948 rcu_read_lock_bh();
2949 nht = rcu_dereference_bh(tbl->nht);
2950
2951 read_lock(&tbl->lock); /* avoid resizes */
2952 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2953 struct neighbour *n;
2954
2955 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2956 n != NULL;
2957 n = rcu_dereference_bh(n->next))
2958 cb(n, cookie);
2959 }
2960 read_unlock(&tbl->lock);
2961 rcu_read_unlock_bh();
2962}
2963EXPORT_SYMBOL(neigh_for_each);
2964
2965/* The tbl->lock must be held as a writer and BH disabled. */
2966void __neigh_for_each_release(struct neigh_table *tbl,
2967 int (*cb)(struct neighbour *))
2968{
2969 int chain;
2970 struct neigh_hash_table *nht;
2971
2972 nht = rcu_dereference_protected(tbl->nht,
2973 lockdep_is_held(&tbl->lock));
2974 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2975 struct neighbour *n;
2976 struct neighbour __rcu **np;
2977
2978 np = &nht->hash_buckets[chain];
2979 while ((n = rcu_dereference_protected(*np,
2980 lockdep_is_held(&tbl->lock))) != NULL) {
2981 int release;
2982
2983 write_lock(&n->lock);
2984 release = cb(n);
2985 if (release) {
2986 rcu_assign_pointer(*np,
2987 rcu_dereference_protected(n->next,
2988 lockdep_is_held(&tbl->lock)));
2989 neigh_mark_dead(n);
2990 } else
2991 np = &n->next;
2992 write_unlock(&n->lock);
2993 if (release)
2994 neigh_cleanup_and_release(n);
2995 }
2996 }
2997}
2998EXPORT_SYMBOL(__neigh_for_each_release);
2999
3000int neigh_xmit(int index, struct net_device *dev,
3001 const void *addr, struct sk_buff *skb)
3002{
3003 int err = -EAFNOSUPPORT;
3004 if (likely(index < NEIGH_NR_TABLES)) {
3005 struct neigh_table *tbl;
3006 struct neighbour *neigh;
3007
3008 tbl = neigh_tables[index];
3009 if (!tbl)
3010 goto out;
3011 rcu_read_lock_bh();
3012 if (index == NEIGH_ARP_TABLE) {
3013 u32 key = *((u32 *)addr);
3014
3015 neigh = __ipv4_neigh_lookup_noref(dev, key);
3016 } else {
3017 neigh = __neigh_lookup_noref(tbl, addr, dev);
3018 }
3019 if (!neigh)
3020 neigh = __neigh_create(tbl, addr, dev, false);
3021 err = PTR_ERR(neigh);
3022 if (IS_ERR(neigh)) {
3023 rcu_read_unlock_bh();
3024 goto out_kfree_skb;
3025 }
3026 err = neigh->output(neigh, skb);
3027 rcu_read_unlock_bh();
3028 }
3029 else if (index == NEIGH_LINK_TABLE) {
3030 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3031 addr, NULL, skb->len);
3032 if (err < 0)
3033 goto out_kfree_skb;
3034 err = dev_queue_xmit(skb);
3035 }
3036out:
3037 return err;
3038out_kfree_skb:
3039 kfree_skb(skb);
3040 goto out;
3041}
3042EXPORT_SYMBOL(neigh_xmit);
3043
3044#ifdef CONFIG_PROC_FS
3045
3046static struct neighbour *neigh_get_first(struct seq_file *seq)
3047{
3048 struct neigh_seq_state *state = seq->private;
3049 struct net *net = seq_file_net(seq);
3050 struct neigh_hash_table *nht = state->nht;
3051 struct neighbour *n = NULL;
3052 int bucket;
3053
3054 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3055 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3056 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3057
3058 while (n) {
3059 if (!net_eq(dev_net(n->dev), net))
3060 goto next;
3061 if (state->neigh_sub_iter) {
3062 loff_t fakep = 0;
3063 void *v;
3064
3065 v = state->neigh_sub_iter(state, n, &fakep);
3066 if (!v)
3067 goto next;
3068 }
3069 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3070 break;
3071 if (n->nud_state & ~NUD_NOARP)
3072 break;
3073next:
3074 n = rcu_dereference_bh(n->next);
3075 }
3076
3077 if (n)
3078 break;
3079 }
3080 state->bucket = bucket;
3081
3082 return n;
3083}
3084
3085static struct neighbour *neigh_get_next(struct seq_file *seq,
3086 struct neighbour *n,
3087 loff_t *pos)
3088{
3089 struct neigh_seq_state *state = seq->private;
3090 struct net *net = seq_file_net(seq);
3091 struct neigh_hash_table *nht = state->nht;
3092
3093 if (state->neigh_sub_iter) {
3094 void *v = state->neigh_sub_iter(state, n, pos);
3095 if (v)
3096 return n;
3097 }
3098 n = rcu_dereference_bh(n->next);
3099
3100 while (1) {
3101 while (n) {
3102 if (!net_eq(dev_net(n->dev), net))
3103 goto next;
3104 if (state->neigh_sub_iter) {
3105 void *v = state->neigh_sub_iter(state, n, pos);
3106 if (v)
3107 return n;
3108 goto next;
3109 }
3110 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3111 break;
3112
3113 if (n->nud_state & ~NUD_NOARP)
3114 break;
3115next:
3116 n = rcu_dereference_bh(n->next);
3117 }
3118
3119 if (n)
3120 break;
3121
3122 if (++state->bucket >= (1 << nht->hash_shift))
3123 break;
3124
3125 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3126 }
3127
3128 if (n && pos)
3129 --(*pos);
3130 return n;
3131}
3132
3133static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3134{
3135 struct neighbour *n = neigh_get_first(seq);
3136
3137 if (n) {
3138 --(*pos);
3139 while (*pos) {
3140 n = neigh_get_next(seq, n, pos);
3141 if (!n)
3142 break;
3143 }
3144 }
3145 return *pos ? NULL : n;
3146}
3147
3148static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3149{
3150 struct neigh_seq_state *state = seq->private;
3151 struct net *net = seq_file_net(seq);
3152 struct neigh_table *tbl = state->tbl;
3153 struct pneigh_entry *pn = NULL;
3154 int bucket = state->bucket;
3155
3156 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3157 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3158 pn = tbl->phash_buckets[bucket];
3159 while (pn && !net_eq(pneigh_net(pn), net))
3160 pn = pn->next;
3161 if (pn)
3162 break;
3163 }
3164 state->bucket = bucket;
3165
3166 return pn;
3167}
3168
3169static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3170 struct pneigh_entry *pn,
3171 loff_t *pos)
3172{
3173 struct neigh_seq_state *state = seq->private;
3174 struct net *net = seq_file_net(seq);
3175 struct neigh_table *tbl = state->tbl;
3176
3177 do {
3178 pn = pn->next;
3179 } while (pn && !net_eq(pneigh_net(pn), net));
3180
3181 while (!pn) {
3182 if (++state->bucket > PNEIGH_HASHMASK)
3183 break;
3184 pn = tbl->phash_buckets[state->bucket];
3185 while (pn && !net_eq(pneigh_net(pn), net))
3186 pn = pn->next;
3187 if (pn)
3188 break;
3189 }
3190
3191 if (pn && pos)
3192 --(*pos);
3193
3194 return pn;
3195}
3196
3197static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3198{
3199 struct pneigh_entry *pn = pneigh_get_first(seq);
3200
3201 if (pn) {
3202 --(*pos);
3203 while (*pos) {
3204 pn = pneigh_get_next(seq, pn, pos);
3205 if (!pn)
3206 break;
3207 }
3208 }
3209 return *pos ? NULL : pn;
3210}
3211
3212static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3213{
3214 struct neigh_seq_state *state = seq->private;
3215 void *rc;
3216 loff_t idxpos = *pos;
3217
3218 rc = neigh_get_idx(seq, &idxpos);
3219 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3220 rc = pneigh_get_idx(seq, &idxpos);
3221
3222 return rc;
3223}
3224
3225void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3226 __acquires(tbl->lock)
3227 __acquires(rcu_bh)
3228{
3229 struct neigh_seq_state *state = seq->private;
3230
3231 state->tbl = tbl;
3232 state->bucket = 0;
3233 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3234
3235 rcu_read_lock_bh();
3236 state->nht = rcu_dereference_bh(tbl->nht);
3237 read_lock(&tbl->lock);
3238
3239 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3240}
3241EXPORT_SYMBOL(neigh_seq_start);
3242
3243void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3244{
3245 struct neigh_seq_state *state;
3246 void *rc;
3247
3248 if (v == SEQ_START_TOKEN) {
3249 rc = neigh_get_first(seq);
3250 goto out;
3251 }
3252
3253 state = seq->private;
3254 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3255 rc = neigh_get_next(seq, v, NULL);
3256 if (rc)
3257 goto out;
3258 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3259 rc = pneigh_get_first(seq);
3260 } else {
3261 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3262 rc = pneigh_get_next(seq, v, NULL);
3263 }
3264out:
3265 ++(*pos);
3266 return rc;
3267}
3268EXPORT_SYMBOL(neigh_seq_next);
3269
3270void neigh_seq_stop(struct seq_file *seq, void *v)
3271 __releases(tbl->lock)
3272 __releases(rcu_bh)
3273{
3274 struct neigh_seq_state *state = seq->private;
3275 struct neigh_table *tbl = state->tbl;
3276
3277 read_unlock(&tbl->lock);
3278 rcu_read_unlock_bh();
3279}
3280EXPORT_SYMBOL(neigh_seq_stop);
3281
3282/* statistics via seq_file */
3283
3284static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3285{
3286 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3287 int cpu;
3288
3289 if (*pos == 0)
3290 return SEQ_START_TOKEN;
3291
3292 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3293 if (!cpu_possible(cpu))
3294 continue;
3295 *pos = cpu+1;
3296 return per_cpu_ptr(tbl->stats, cpu);
3297 }
3298 return NULL;
3299}
3300
3301static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3302{
3303 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3304 int cpu;
3305
3306 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3307 if (!cpu_possible(cpu))
3308 continue;
3309 *pos = cpu+1;
3310 return per_cpu_ptr(tbl->stats, cpu);
3311 }
3312 (*pos)++;
3313 return NULL;
3314}
3315
3316static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3317{
3318
3319}
3320
3321static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3322{
3323 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3324 struct neigh_statistics *st = v;
3325
3326 if (v == SEQ_START_TOKEN) {
3327 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3328 return 0;
3329 }
3330
3331 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3332 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3333 atomic_read(&tbl->entries),
3334
3335 st->allocs,
3336 st->destroys,
3337 st->hash_grows,
3338
3339 st->lookups,
3340 st->hits,
3341
3342 st->res_failed,
3343
3344 st->rcv_probes_mcast,
3345 st->rcv_probes_ucast,
3346
3347 st->periodic_gc_runs,
3348 st->forced_gc_runs,
3349 st->unres_discards,
3350 st->table_fulls
3351 );
3352
3353 return 0;
3354}
3355
3356static const struct seq_operations neigh_stat_seq_ops = {
3357 .start = neigh_stat_seq_start,
3358 .next = neigh_stat_seq_next,
3359 .stop = neigh_stat_seq_stop,
3360 .show = neigh_stat_seq_show,
3361};
3362#endif /* CONFIG_PROC_FS */
3363
3364static void __neigh_notify(struct neighbour *n, int type, int flags,
3365 u32 pid)
3366{
3367 struct net *net = dev_net(n->dev);
3368 struct sk_buff *skb;
3369 int err = -ENOBUFS;
3370
3371 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3372 if (skb == NULL)
3373 goto errout;
3374
3375 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3376 if (err < 0) {
3377 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3378 WARN_ON(err == -EMSGSIZE);
3379 kfree_skb(skb);
3380 goto errout;
3381 }
3382 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3383 return;
3384errout:
3385 if (err < 0)
3386 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3387}
3388
3389void neigh_app_ns(struct neighbour *n)
3390{
3391 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3392}
3393EXPORT_SYMBOL(neigh_app_ns);
3394
3395#ifdef CONFIG_SYSCTL
3396static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3397
3398static int proc_unres_qlen(struct ctl_table *ctl, int write,
3399 void __user *buffer, size_t *lenp, loff_t *ppos)
3400{
3401 int size, ret;
3402 struct ctl_table tmp = *ctl;
3403
3404 tmp.extra1 = SYSCTL_ZERO;
3405 tmp.extra2 = &unres_qlen_max;
3406 tmp.data = &size;
3407
3408 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3409 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3410
3411 if (write && !ret)
3412 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3413 return ret;
3414}
3415
3416static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3417 int family)
3418{
3419 switch (family) {
3420 case AF_INET:
3421 return __in_dev_arp_parms_get_rcu(dev);
3422 case AF_INET6:
3423 return __in6_dev_nd_parms_get_rcu(dev);
3424 }
3425 return NULL;
3426}
3427
3428static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3429 int index)
3430{
3431 struct net_device *dev;
3432 int family = neigh_parms_family(p);
3433
3434 rcu_read_lock();
3435 for_each_netdev_rcu(net, dev) {
3436 struct neigh_parms *dst_p =
3437 neigh_get_dev_parms_rcu(dev, family);
3438
3439 if (dst_p && !test_bit(index, dst_p->data_state))
3440 dst_p->data[index] = p->data[index];
3441 }
3442 rcu_read_unlock();
3443}
3444
3445static void neigh_proc_update(struct ctl_table *ctl, int write)
3446{
3447 struct net_device *dev = ctl->extra1;
3448 struct neigh_parms *p = ctl->extra2;
3449 struct net *net = neigh_parms_net(p);
3450 int index = (int *) ctl->data - p->data;
3451
3452 if (!write)
3453 return;
3454
3455 set_bit(index, p->data_state);
3456 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3457 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3458 if (!dev) /* NULL dev means this is default value */
3459 neigh_copy_dflt_parms(net, p, index);
3460}
3461
3462static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3463 void __user *buffer,
3464 size_t *lenp, loff_t *ppos)
3465{
3466 struct ctl_table tmp = *ctl;
3467 int ret;
3468
3469 tmp.extra1 = SYSCTL_ZERO;
3470 tmp.extra2 = SYSCTL_INT_MAX;
3471
3472 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3473 neigh_proc_update(ctl, write);
3474 return ret;
3475}
3476
3477int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3478 void __user *buffer, size_t *lenp, loff_t *ppos)
3479{
3480 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3481
3482 neigh_proc_update(ctl, write);
3483 return ret;
3484}
3485EXPORT_SYMBOL(neigh_proc_dointvec);
3486
3487int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3488 void __user *buffer,
3489 size_t *lenp, loff_t *ppos)
3490{
3491 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3492
3493 neigh_proc_update(ctl, write);
3494 return ret;
3495}
3496EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3497
3498static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3499 void __user *buffer,
3500 size_t *lenp, loff_t *ppos)
3501{
3502 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3503
3504 neigh_proc_update(ctl, write);
3505 return ret;
3506}
3507
3508int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3509 void __user *buffer,
3510 size_t *lenp, loff_t *ppos)
3511{
3512 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3513
3514 neigh_proc_update(ctl, write);
3515 return ret;
3516}
3517EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3518
3519static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3520 void __user *buffer,
3521 size_t *lenp, loff_t *ppos)
3522{
3523 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3524
3525 neigh_proc_update(ctl, write);
3526 return ret;
3527}
3528
3529static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3530 void __user *buffer,
3531 size_t *lenp, loff_t *ppos)
3532{
3533 struct neigh_parms *p = ctl->extra2;
3534 int ret;
3535
3536 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3537 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3538 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3539 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3540 else
3541 ret = -1;
3542
3543 if (write && ret == 0) {
3544 /* update reachable_time as well, otherwise, the change will
3545 * only be effective after the next time neigh_periodic_work
3546 * decides to recompute it
3547 */
3548 p->reachable_time =
3549 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3550 }
3551 return ret;
3552}
3553
3554#define NEIGH_PARMS_DATA_OFFSET(index) \
3555 (&((struct neigh_parms *) 0)->data[index])
3556
3557#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3558 [NEIGH_VAR_ ## attr] = { \
3559 .procname = name, \
3560 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3561 .maxlen = sizeof(int), \
3562 .mode = mval, \
3563 .proc_handler = proc, \
3564 }
3565
3566#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3567 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3568
3569#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3570 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3571
3572#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3573 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3574
3575#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3576 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3577
3578#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3579 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3580
3581#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3582 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3583
3584static struct neigh_sysctl_table {
3585 struct ctl_table_header *sysctl_header;
3586 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3587} neigh_sysctl_template __read_mostly = {
3588 .neigh_vars = {
3589 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3590 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3591 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3592 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3593 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3594 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3595 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3596 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3597 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3598 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3599 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3600 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3601 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3602 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3603 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3604 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3605 [NEIGH_VAR_GC_INTERVAL] = {
3606 .procname = "gc_interval",
3607 .maxlen = sizeof(int),
3608 .mode = 0644,
3609 .proc_handler = proc_dointvec_jiffies,
3610 },
3611 [NEIGH_VAR_GC_THRESH1] = {
3612 .procname = "gc_thresh1",
3613 .maxlen = sizeof(int),
3614 .mode = 0644,
3615 .extra1 = SYSCTL_ZERO,
3616 .extra2 = SYSCTL_INT_MAX,
3617 .proc_handler = proc_dointvec_minmax,
3618 },
3619 [NEIGH_VAR_GC_THRESH2] = {
3620 .procname = "gc_thresh2",
3621 .maxlen = sizeof(int),
3622 .mode = 0644,
3623 .extra1 = SYSCTL_ZERO,
3624 .extra2 = SYSCTL_INT_MAX,
3625 .proc_handler = proc_dointvec_minmax,
3626 },
3627 [NEIGH_VAR_GC_THRESH3] = {
3628 .procname = "gc_thresh3",
3629 .maxlen = sizeof(int),
3630 .mode = 0644,
3631 .extra1 = SYSCTL_ZERO,
3632 .extra2 = SYSCTL_INT_MAX,
3633 .proc_handler = proc_dointvec_minmax,
3634 },
3635 {},
3636 },
3637};
3638
3639int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3640 proc_handler *handler)
3641{
3642 int i;
3643 struct neigh_sysctl_table *t;
3644 const char *dev_name_source;
3645 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3646 char *p_name;
3647
3648 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3649 if (!t)
3650 goto err;
3651
3652 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3653 t->neigh_vars[i].data += (long) p;
3654 t->neigh_vars[i].extra1 = dev;
3655 t->neigh_vars[i].extra2 = p;
3656 }
3657
3658 if (dev) {
3659 dev_name_source = dev->name;
3660 /* Terminate the table early */
3661 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3662 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3663 } else {
3664 struct neigh_table *tbl = p->tbl;
3665 dev_name_source = "default";
3666 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3667 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3668 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3669 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3670 }
3671
3672 if (handler) {
3673 /* RetransTime */
3674 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3675 /* ReachableTime */
3676 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3677 /* RetransTime (in milliseconds)*/
3678 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3679 /* ReachableTime (in milliseconds) */
3680 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3681 } else {
3682 /* Those handlers will update p->reachable_time after
3683 * base_reachable_time(_ms) is set to ensure the new timer starts being
3684 * applied after the next neighbour update instead of waiting for
3685 * neigh_periodic_work to update its value (can be multiple minutes)
3686 * So any handler that replaces them should do this as well
3687 */
3688 /* ReachableTime */
3689 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3690 neigh_proc_base_reachable_time;
3691 /* ReachableTime (in milliseconds) */
3692 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3693 neigh_proc_base_reachable_time;
3694 }
3695
3696 /* Don't export sysctls to unprivileged users */
3697 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3698 t->neigh_vars[0].procname = NULL;
3699
3700 switch (neigh_parms_family(p)) {
3701 case AF_INET:
3702 p_name = "ipv4";
3703 break;
3704 case AF_INET6:
3705 p_name = "ipv6";
3706 break;
3707 default:
3708 BUG();
3709 }
3710
3711 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3712 p_name, dev_name_source);
3713 t->sysctl_header =
3714 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3715 if (!t->sysctl_header)
3716 goto free;
3717
3718 p->sysctl_table = t;
3719 return 0;
3720
3721free:
3722 kfree(t);
3723err:
3724 return -ENOBUFS;
3725}
3726EXPORT_SYMBOL(neigh_sysctl_register);
3727
3728void neigh_sysctl_unregister(struct neigh_parms *p)
3729{
3730 if (p->sysctl_table) {
3731 struct neigh_sysctl_table *t = p->sysctl_table;
3732 p->sysctl_table = NULL;
3733 unregister_net_sysctl_table(t->sysctl_header);
3734 kfree(t);
3735 }
3736}
3737EXPORT_SYMBOL(neigh_sysctl_unregister);
3738
3739#endif /* CONFIG_SYSCTL */
3740
3741static int __init neigh_init(void)
3742{
3743 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3744 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3745 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3746
3747 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3748 0);
3749 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3750
3751 return 0;
3752}
3753
3754subsys_initcall(neigh_init);