blob: 93ffb3ad8643a7807e40a8f4a093367e5eb6c334 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __NET_SCHED_GENERIC_H
3#define __NET_SCHED_GENERIC_H
4
5#include <linux/netdevice.h>
6#include <linux/types.h>
7#include <linux/rcupdate.h>
8#include <linux/pkt_sched.h>
9#include <linux/pkt_cls.h>
10#include <linux/percpu.h>
11#include <linux/dynamic_queue_limits.h>
12#include <linux/list.h>
13#include <linux/refcount.h>
14#include <linux/workqueue.h>
15#include <linux/mutex.h>
16#include <linux/rwsem.h>
17#include <linux/atomic.h>
18#include <linux/hashtable.h>
19#include <linux/android_kabi.h>
20#include <net/gen_stats.h>
21#include <net/rtnetlink.h>
22#include <net/flow_offload.h>
23
24struct Qdisc_ops;
25struct qdisc_walker;
26struct tcf_walker;
27struct module;
28struct bpf_flow_keys;
29
30struct qdisc_rate_table {
31 struct tc_ratespec rate;
32 u32 data[256];
33 struct qdisc_rate_table *next;
34 int refcnt;
35};
36
37enum qdisc_state_t {
38 __QDISC_STATE_SCHED,
39 __QDISC_STATE_DEACTIVATED,
40 __QDISC_STATE_MISSED,
41};
42
43struct qdisc_size_table {
44 struct rcu_head rcu;
45 struct list_head list;
46 struct tc_sizespec szopts;
47 int refcnt;
48 u16 data[];
49};
50
51/* similar to sk_buff_head, but skb->prev pointer is undefined. */
52struct qdisc_skb_head {
53 struct sk_buff *head;
54 struct sk_buff *tail;
55 __u32 qlen;
56 spinlock_t lock;
57};
58
59struct Qdisc {
60 int (*enqueue)(struct sk_buff *skb,
61 struct Qdisc *sch,
62 struct sk_buff **to_free);
63 struct sk_buff * (*dequeue)(struct Qdisc *sch);
64 unsigned int flags;
65#define TCQ_F_BUILTIN 1
66#define TCQ_F_INGRESS 2
67#define TCQ_F_CAN_BYPASS 4
68#define TCQ_F_MQROOT 8
69#define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for
70 * q->dev_queue : It can test
71 * netif_xmit_frozen_or_stopped() before
72 * dequeueing next packet.
73 * Its true for MQ/MQPRIO slaves, or non
74 * multiqueue device.
75 */
76#define TCQ_F_WARN_NONWC (1 << 16)
77#define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
78#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy :
79 * qdisc_tree_decrease_qlen() should stop.
80 */
81#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
82#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
83#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
84 u32 limit;
85 const struct Qdisc_ops *ops;
86 struct qdisc_size_table __rcu *stab;
87 struct hlist_node hash;
88 u32 handle;
89 u32 parent;
90
91 struct netdev_queue *dev_queue;
92
93 struct net_rate_estimator __rcu *rate_est;
94 struct gnet_stats_basic_cpu __percpu *cpu_bstats;
95 struct gnet_stats_queue __percpu *cpu_qstats;
96 int padded;
97 refcount_t refcnt;
98
99 /*
100 * For performance sake on SMP, we put highly modified fields at the end
101 */
102 struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
103 struct qdisc_skb_head q;
104 struct gnet_stats_basic_packed bstats;
105 seqcount_t running;
106 struct gnet_stats_queue qstats;
107 unsigned long state;
108 struct Qdisc *next_sched;
109 struct sk_buff_head skb_bad_txq;
110
111 spinlock_t busylock ____cacheline_aligned_in_smp;
112 spinlock_t seqlock;
113
114 /* for NOLOCK qdisc, true if there are no enqueued skbs */
115 bool empty;
116 struct rcu_head rcu;
117
118 ANDROID_KABI_RESERVE(1);
119};
120
121static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
122{
123 if (qdisc->flags & TCQ_F_BUILTIN)
124 return;
125 refcount_inc(&qdisc->refcnt);
126}
127
128/* Intended to be used by unlocked users, when concurrent qdisc release is
129 * possible.
130 */
131
132static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
133{
134 if (qdisc->flags & TCQ_F_BUILTIN)
135 return qdisc;
136 if (refcount_inc_not_zero(&qdisc->refcnt))
137 return qdisc;
138 return NULL;
139}
140
141static inline bool qdisc_is_running(struct Qdisc *qdisc)
142{
143 if (qdisc->flags & TCQ_F_NOLOCK)
144 return spin_is_locked(&qdisc->seqlock);
145 return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
146}
147
148static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
149{
150 return q->flags & TCQ_F_CPUSTATS;
151}
152
153static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
154{
155 if (qdisc_is_percpu_stats(qdisc))
156 return READ_ONCE(qdisc->empty);
157 return !READ_ONCE(qdisc->q.qlen);
158}
159
160static inline bool qdisc_run_begin(struct Qdisc *qdisc)
161{
162 if (qdisc->flags & TCQ_F_NOLOCK) {
163 if (spin_trylock(&qdisc->seqlock))
164 goto nolock_empty;
165
166 /* No need to insist if the MISSED flag was already set.
167 * Note that test_and_set_bit() also gives us memory ordering
168 * guarantees wrt potential earlier enqueue() and below
169 * spin_trylock(), both of which are necessary to prevent races
170 */
171 if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
172 return false;
173
174 /* Try to take the lock again to make sure that we will either
175 * grab it or the CPU that still has it will see MISSED set
176 * when testing it in qdisc_run_end()
177 */
178 if (!spin_trylock(&qdisc->seqlock))
179 return false;
180
181nolock_empty:
182 WRITE_ONCE(qdisc->empty, false);
183 } else if (qdisc_is_running(qdisc)) {
184 return false;
185 }
186 /* Variant of write_seqcount_begin() telling lockdep a trylock
187 * was attempted.
188 */
189 raw_write_seqcount_begin(&qdisc->running);
190 seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
191 return true;
192}
193
194static inline void qdisc_run_end(struct Qdisc *qdisc)
195{
196 write_seqcount_end(&qdisc->running);
197 if (qdisc->flags & TCQ_F_NOLOCK) {
198 spin_unlock(&qdisc->seqlock);
199
200 /* spin_unlock() only has store-release semantic. The unlock
201 * and test_bit() ordering is a store-load ordering, so a full
202 * memory barrier is needed here.
203 */
204 smp_mb();
205
206 if (unlikely(test_bit(__QDISC_STATE_MISSED,
207 &qdisc->state))) {
208 clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
209 __netif_schedule(qdisc);
210 }
211 }
212}
213
214static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
215{
216 return qdisc->flags & TCQ_F_ONETXQUEUE;
217}
218
219static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
220{
221#ifdef CONFIG_BQL
222 /* Non-BQL migrated drivers will return 0, too. */
223 return dql_avail(&txq->dql);
224#else
225 return 0;
226#endif
227}
228
229struct Qdisc_class_ops {
230 unsigned int flags;
231 /* Child qdisc manipulation */
232 struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *);
233 int (*graft)(struct Qdisc *, unsigned long cl,
234 struct Qdisc *, struct Qdisc **,
235 struct netlink_ext_ack *extack);
236 struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
237 void (*qlen_notify)(struct Qdisc *, unsigned long);
238
239 /* Class manipulation routines */
240 unsigned long (*find)(struct Qdisc *, u32 classid);
241 int (*change)(struct Qdisc *, u32, u32,
242 struct nlattr **, unsigned long *,
243 struct netlink_ext_ack *);
244 int (*delete)(struct Qdisc *, unsigned long);
245 void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
246
247 /* Filter manipulation */
248 struct tcf_block * (*tcf_block)(struct Qdisc *sch,
249 unsigned long arg,
250 struct netlink_ext_ack *extack);
251 unsigned long (*bind_tcf)(struct Qdisc *, unsigned long,
252 u32 classid);
253 void (*unbind_tcf)(struct Qdisc *, unsigned long);
254
255 /* rtnetlink specific */
256 int (*dump)(struct Qdisc *, unsigned long,
257 struct sk_buff *skb, struct tcmsg*);
258 int (*dump_stats)(struct Qdisc *, unsigned long,
259 struct gnet_dump *);
260
261 ANDROID_KABI_RESERVE(1);
262};
263
264/* Qdisc_class_ops flag values */
265
266/* Implements API that doesn't require rtnl lock */
267enum qdisc_class_ops_flags {
268 QDISC_CLASS_OPS_DOIT_UNLOCKED = 1,
269};
270
271struct Qdisc_ops {
272 struct Qdisc_ops *next;
273 const struct Qdisc_class_ops *cl_ops;
274 char id[IFNAMSIZ];
275 int priv_size;
276 unsigned int static_flags;
277
278 int (*enqueue)(struct sk_buff *skb,
279 struct Qdisc *sch,
280 struct sk_buff **to_free);
281 struct sk_buff * (*dequeue)(struct Qdisc *);
282 struct sk_buff * (*peek)(struct Qdisc *);
283
284 int (*init)(struct Qdisc *sch, struct nlattr *arg,
285 struct netlink_ext_ack *extack);
286 void (*reset)(struct Qdisc *);
287 void (*destroy)(struct Qdisc *);
288 int (*change)(struct Qdisc *sch,
289 struct nlattr *arg,
290 struct netlink_ext_ack *extack);
291 void (*attach)(struct Qdisc *sch);
292 int (*change_tx_queue_len)(struct Qdisc *, unsigned int);
293 void (*change_real_num_tx)(struct Qdisc *sch,
294 unsigned int new_real_tx);
295
296 int (*dump)(struct Qdisc *, struct sk_buff *);
297 int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
298
299 void (*ingress_block_set)(struct Qdisc *sch,
300 u32 block_index);
301 void (*egress_block_set)(struct Qdisc *sch,
302 u32 block_index);
303 u32 (*ingress_block_get)(struct Qdisc *sch);
304 u32 (*egress_block_get)(struct Qdisc *sch);
305
306 struct module *owner;
307
308 ANDROID_KABI_RESERVE(1);
309};
310
311
312struct tcf_result {
313 union {
314 struct {
315 unsigned long class;
316 u32 classid;
317 };
318 const struct tcf_proto *goto_tp;
319
320 /* used in the skb_tc_reinsert function */
321 struct {
322 bool ingress;
323 struct gnet_stats_queue *qstats;
324 };
325 };
326};
327
328struct tcf_chain;
329
330struct tcf_proto_ops {
331 struct list_head head;
332 char kind[IFNAMSIZ];
333
334 int (*classify)(struct sk_buff *,
335 const struct tcf_proto *,
336 struct tcf_result *);
337 int (*init)(struct tcf_proto*);
338 void (*destroy)(struct tcf_proto *tp, bool rtnl_held,
339 struct netlink_ext_ack *extack);
340
341 void* (*get)(struct tcf_proto*, u32 handle);
342 void (*put)(struct tcf_proto *tp, void *f);
343 int (*change)(struct net *net, struct sk_buff *,
344 struct tcf_proto*, unsigned long,
345 u32 handle, struct nlattr **,
346 void **, bool, bool,
347 struct netlink_ext_ack *);
348 int (*delete)(struct tcf_proto *tp, void *arg,
349 bool *last, bool rtnl_held,
350 struct netlink_ext_ack *);
351 bool (*delete_empty)(struct tcf_proto *tp);
352 void (*walk)(struct tcf_proto *tp,
353 struct tcf_walker *arg, bool rtnl_held);
354 int (*reoffload)(struct tcf_proto *tp, bool add,
355 flow_setup_cb_t *cb, void *cb_priv,
356 struct netlink_ext_ack *extack);
357 void (*hw_add)(struct tcf_proto *tp,
358 void *type_data);
359 void (*hw_del)(struct tcf_proto *tp,
360 void *type_data);
361 void (*bind_class)(void *, u32, unsigned long,
362 void *, unsigned long);
363 void * (*tmplt_create)(struct net *net,
364 struct tcf_chain *chain,
365 struct nlattr **tca,
366 struct netlink_ext_ack *extack);
367 void (*tmplt_destroy)(void *tmplt_priv);
368
369 /* rtnetlink specific */
370 int (*dump)(struct net*, struct tcf_proto*, void *,
371 struct sk_buff *skb, struct tcmsg*,
372 bool);
373 int (*tmplt_dump)(struct sk_buff *skb,
374 struct net *net,
375 void *tmplt_priv);
376
377 struct module *owner;
378 int flags;
379};
380
381/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
382 * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
383 * conditions can occur when filters are inserted/deleted simultaneously.
384 */
385enum tcf_proto_ops_flags {
386 TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
387};
388
389struct tcf_proto {
390 /* Fast access part */
391 struct tcf_proto __rcu *next;
392 void __rcu *root;
393
394 /* called under RCU BH lock*/
395 int (*classify)(struct sk_buff *,
396 const struct tcf_proto *,
397 struct tcf_result *);
398 __be16 protocol;
399
400 /* All the rest */
401 u32 prio;
402 void *data;
403 const struct tcf_proto_ops *ops;
404 struct tcf_chain *chain;
405 /* Lock protects tcf_proto shared state and can be used by unlocked
406 * classifiers to protect their private data.
407 */
408 spinlock_t lock;
409 bool deleting;
410 refcount_t refcnt;
411 struct rcu_head rcu;
412 struct hlist_node destroy_ht_node;
413};
414
415struct qdisc_skb_cb {
416 struct {
417 unsigned int pkt_len;
418 u16 slave_dev_queue_mapping;
419 u16 tc_classid;
420 };
421#define QDISC_CB_PRIV_LEN 20
422 unsigned char data[QDISC_CB_PRIV_LEN];
423};
424
425typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
426
427struct tcf_chain {
428 /* Protects filter_chain. */
429 struct mutex filter_chain_lock;
430 struct tcf_proto __rcu *filter_chain;
431 struct list_head list;
432 struct tcf_block *block;
433 u32 index; /* chain index */
434 unsigned int refcnt;
435 unsigned int action_refcnt;
436 bool explicitly_created;
437 bool flushing;
438 const struct tcf_proto_ops *tmplt_ops;
439 void *tmplt_priv;
440 struct rcu_head rcu;
441};
442
443struct tcf_block {
444 /* Lock protects tcf_block and lifetime-management data of chains
445 * attached to the block (refcnt, action_refcnt, explicitly_created).
446 */
447 struct mutex lock;
448 struct list_head chain_list;
449 u32 index; /* block index for shared blocks */
450 u32 classid; /* which class this block belongs to */
451 refcount_t refcnt;
452 struct net *net;
453 struct Qdisc *q;
454 struct rw_semaphore cb_lock; /* protects cb_list and offload counters */
455 struct flow_block flow_block;
456 struct list_head owner_list;
457 bool keep_dst;
458 atomic_t offloadcnt; /* Number of oddloaded filters */
459 unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
460 unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
461 struct {
462 struct tcf_chain *chain;
463 struct list_head filter_chain_list;
464 } chain0;
465 struct rcu_head rcu;
466 DECLARE_HASHTABLE(proto_destroy_ht, 7);
467 struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
468};
469
470#ifdef CONFIG_PROVE_LOCKING
471static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
472{
473 return lockdep_is_held(&chain->filter_chain_lock);
474}
475
476static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
477{
478 return lockdep_is_held(&tp->lock);
479}
480#else
481static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
482{
483 return true;
484}
485
486static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
487{
488 return true;
489}
490#endif /* #ifdef CONFIG_PROVE_LOCKING */
491
492#define tcf_chain_dereference(p, chain) \
493 rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
494
495#define tcf_proto_dereference(p, tp) \
496 rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
497
498static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
499{
500 struct qdisc_skb_cb *qcb;
501
502 BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz);
503 BUILD_BUG_ON(sizeof(qcb->data) < sz);
504}
505
506static inline int qdisc_qlen_cpu(const struct Qdisc *q)
507{
508 return this_cpu_ptr(q->cpu_qstats)->qlen;
509}
510
511static inline int qdisc_qlen(const struct Qdisc *q)
512{
513 return q->q.qlen;
514}
515
516static inline int qdisc_qlen_sum(const struct Qdisc *q)
517{
518 __u32 qlen = q->qstats.qlen;
519 int i;
520
521 if (qdisc_is_percpu_stats(q)) {
522 for_each_possible_cpu(i)
523 qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
524 } else {
525 qlen += q->q.qlen;
526 }
527
528 return qlen;
529}
530
531static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
532{
533 return (struct qdisc_skb_cb *)skb->cb;
534}
535
536static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
537{
538 return &qdisc->q.lock;
539}
540
541static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
542{
543 struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
544
545 return q;
546}
547
548static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
549{
550 return rcu_dereference_bh(qdisc->dev_queue->qdisc);
551}
552
553static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
554{
555 return qdisc->dev_queue->qdisc_sleeping;
556}
557
558/* The qdisc root lock is a mechanism by which to top level
559 * of a qdisc tree can be locked from any qdisc node in the
560 * forest. This allows changing the configuration of some
561 * aspect of the qdisc tree while blocking out asynchronous
562 * qdisc access in the packet processing paths.
563 *
564 * It is only legal to do this when the root will not change
565 * on us. Otherwise we'll potentially lock the wrong qdisc
566 * root. This is enforced by holding the RTNL semaphore, which
567 * all users of this lock accessor must do.
568 */
569static inline spinlock_t *qdisc_root_lock(const struct Qdisc *qdisc)
570{
571 struct Qdisc *root = qdisc_root(qdisc);
572
573 ASSERT_RTNL();
574 return qdisc_lock(root);
575}
576
577static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
578{
579 struct Qdisc *root = qdisc_root_sleeping(qdisc);
580
581 ASSERT_RTNL();
582 return qdisc_lock(root);
583}
584
585static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
586{
587 struct Qdisc *root = qdisc_root_sleeping(qdisc);
588
589 ASSERT_RTNL();
590 return &root->running;
591}
592
593static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
594{
595 return qdisc->dev_queue->dev;
596}
597
598static inline void sch_tree_lock(const struct Qdisc *q)
599{
600 spin_lock_bh(qdisc_root_sleeping_lock(q));
601}
602
603static inline void sch_tree_unlock(const struct Qdisc *q)
604{
605 spin_unlock_bh(qdisc_root_sleeping_lock(q));
606}
607
608extern struct Qdisc noop_qdisc;
609extern struct Qdisc_ops noop_qdisc_ops;
610extern struct Qdisc_ops pfifo_fast_ops;
611extern struct Qdisc_ops mq_qdisc_ops;
612extern struct Qdisc_ops noqueue_qdisc_ops;
613extern struct Qdisc_ops fq_codel_qdisc_ops;
614extern const struct Qdisc_ops *default_qdisc_ops;
615static inline const struct Qdisc_ops *
616get_default_qdisc_ops(const struct net_device *dev, int ntx)
617{
618 return ntx < dev->real_num_tx_queues ?
619 default_qdisc_ops : &fq_codel_qdisc_ops;
620}
621
622struct Qdisc_class_common {
623 u32 classid;
624 struct hlist_node hnode;
625};
626
627struct Qdisc_class_hash {
628 struct hlist_head *hash;
629 unsigned int hashsize;
630 unsigned int hashmask;
631 unsigned int hashelems;
632};
633
634static inline unsigned int qdisc_class_hash(u32 id, u32 mask)
635{
636 id ^= id >> 8;
637 id ^= id >> 4;
638 return id & mask;
639}
640
641static inline struct Qdisc_class_common *
642qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
643{
644 struct Qdisc_class_common *cl;
645 unsigned int h;
646
647 if (!id)
648 return NULL;
649
650 h = qdisc_class_hash(id, hash->hashmask);
651 hlist_for_each_entry(cl, &hash->hash[h], hnode) {
652 if (cl->classid == id)
653 return cl;
654 }
655 return NULL;
656}
657
658static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
659{
660 u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;
661
662 return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
663}
664
665int qdisc_class_hash_init(struct Qdisc_class_hash *);
666void qdisc_class_hash_insert(struct Qdisc_class_hash *,
667 struct Qdisc_class_common *);
668void qdisc_class_hash_remove(struct Qdisc_class_hash *,
669 struct Qdisc_class_common *);
670void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
671void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
672
673int dev_qdisc_change_tx_queue_len(struct net_device *dev);
674void dev_qdisc_change_real_num_tx(struct net_device *dev,
675 unsigned int new_real_tx);
676void dev_init_scheduler(struct net_device *dev);
677void dev_shutdown(struct net_device *dev);
678void dev_activate(struct net_device *dev);
679void dev_deactivate(struct net_device *dev);
680void dev_deactivate_many(struct list_head *head);
681struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
682 struct Qdisc *qdisc);
683void qdisc_reset(struct Qdisc *qdisc);
684void qdisc_put(struct Qdisc *qdisc);
685void qdisc_put_unlocked(struct Qdisc *qdisc);
686void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
687#ifdef CONFIG_NET_SCHED
688int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
689 void *type_data);
690void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
691 struct Qdisc *new, struct Qdisc *old,
692 enum tc_setup_type type, void *type_data,
693 struct netlink_ext_ack *extack);
694#else
695static inline int
696qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
697 void *type_data)
698{
699 q->flags &= ~TCQ_F_OFFLOADED;
700 return 0;
701}
702
703static inline void
704qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
705 struct Qdisc *new, struct Qdisc *old,
706 enum tc_setup_type type, void *type_data,
707 struct netlink_ext_ack *extack)
708{
709}
710#endif
711struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
712 const struct Qdisc_ops *ops,
713 struct netlink_ext_ack *extack);
714void qdisc_free(struct Qdisc *qdisc);
715struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
716 const struct Qdisc_ops *ops, u32 parentid,
717 struct netlink_ext_ack *extack);
718void __qdisc_calculate_pkt_len(struct sk_buff *skb,
719 const struct qdisc_size_table *stab);
720int skb_do_redirect(struct sk_buff *);
721
722static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
723{
724#ifdef CONFIG_NET_CLS_ACT
725 return skb->tc_at_ingress;
726#else
727 return false;
728#endif
729}
730
731static inline bool skb_skip_tc_classify(struct sk_buff *skb)
732{
733#ifdef CONFIG_NET_CLS_ACT
734 if (skb->tc_skip_classify) {
735 skb->tc_skip_classify = 0;
736 return true;
737 }
738#endif
739 return false;
740}
741
742/* Reset all TX qdiscs greater than index of a device. */
743static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
744{
745 struct Qdisc *qdisc;
746
747 for (; i < dev->num_tx_queues; i++) {
748 qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
749 if (qdisc) {
750 spin_lock_bh(qdisc_lock(qdisc));
751 qdisc_reset(qdisc);
752 spin_unlock_bh(qdisc_lock(qdisc));
753 }
754 }
755}
756
757static inline void qdisc_reset_all_tx(struct net_device *dev)
758{
759 qdisc_reset_all_tx_gt(dev, 0);
760}
761
762/* Are all TX queues of the device empty? */
763static inline bool qdisc_all_tx_empty(const struct net_device *dev)
764{
765 unsigned int i;
766
767 rcu_read_lock();
768 for (i = 0; i < dev->num_tx_queues; i++) {
769 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
770 const struct Qdisc *q = rcu_dereference(txq->qdisc);
771
772 if (!qdisc_is_empty(q)) {
773 rcu_read_unlock();
774 return false;
775 }
776 }
777 rcu_read_unlock();
778 return true;
779}
780
781/* Are any of the TX qdiscs changing? */
782static inline bool qdisc_tx_changing(const struct net_device *dev)
783{
784 unsigned int i;
785
786 for (i = 0; i < dev->num_tx_queues; i++) {
787 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
788 if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
789 return true;
790 }
791 return false;
792}
793
794/* Is the device using the noop qdisc on all queues? */
795static inline bool qdisc_tx_is_noop(const struct net_device *dev)
796{
797 unsigned int i;
798
799 for (i = 0; i < dev->num_tx_queues; i++) {
800 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
801 if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
802 return false;
803 }
804 return true;
805}
806
807static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
808{
809 return qdisc_skb_cb(skb)->pkt_len;
810}
811
812/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */
813enum net_xmit_qdisc_t {
814 __NET_XMIT_STOLEN = 0x00010000,
815 __NET_XMIT_BYPASS = 0x00020000,
816};
817
818#ifdef CONFIG_NET_CLS_ACT
819#define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1)
820#else
821#define net_xmit_drop_count(e) (1)
822#endif
823
824static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
825 const struct Qdisc *sch)
826{
827#ifdef CONFIG_NET_SCHED
828 struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
829
830 if (stab)
831 __qdisc_calculate_pkt_len(skb, stab);
832#endif
833}
834
835static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
836 struct sk_buff **to_free)
837{
838 return sch->enqueue(skb, sch, to_free);
839}
840
841static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
842 __u64 bytes, __u32 packets)
843{
844 bstats->bytes += bytes;
845 bstats->packets += packets;
846}
847
848static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
849 const struct sk_buff *skb)
850{
851 _bstats_update(bstats,
852 qdisc_pkt_len(skb),
853 skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
854}
855
856static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
857 __u64 bytes, __u32 packets)
858{
859 u64_stats_update_begin(&bstats->syncp);
860 _bstats_update(&bstats->bstats, bytes, packets);
861 u64_stats_update_end(&bstats->syncp);
862}
863
864static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
865 const struct sk_buff *skb)
866{
867 u64_stats_update_begin(&bstats->syncp);
868 bstats_update(&bstats->bstats, skb);
869 u64_stats_update_end(&bstats->syncp);
870}
871
872static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
873 const struct sk_buff *skb)
874{
875 bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
876}
877
878static inline void qdisc_bstats_update(struct Qdisc *sch,
879 const struct sk_buff *skb)
880{
881 bstats_update(&sch->bstats, skb);
882}
883
884static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
885 const struct sk_buff *skb)
886{
887 sch->qstats.backlog -= qdisc_pkt_len(skb);
888}
889
890static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
891 const struct sk_buff *skb)
892{
893 this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
894}
895
896static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
897 const struct sk_buff *skb)
898{
899 sch->qstats.backlog += qdisc_pkt_len(skb);
900}
901
902static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
903 const struct sk_buff *skb)
904{
905 this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
906}
907
908static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
909{
910 this_cpu_inc(sch->cpu_qstats->qlen);
911}
912
913static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
914{
915 this_cpu_dec(sch->cpu_qstats->qlen);
916}
917
918static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
919{
920 this_cpu_inc(sch->cpu_qstats->requeues);
921}
922
923static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
924{
925 sch->qstats.drops += count;
926}
927
928static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
929{
930 qstats->drops++;
931}
932
933static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
934{
935 qstats->overlimits++;
936}
937
938static inline void qdisc_qstats_drop(struct Qdisc *sch)
939{
940 qstats_drop_inc(&sch->qstats);
941}
942
943static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
944{
945 this_cpu_inc(sch->cpu_qstats->drops);
946}
947
948static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
949{
950 sch->qstats.overlimits++;
951}
952
953static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
954{
955 __u32 qlen = qdisc_qlen_sum(sch);
956
957 return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
958}
959
960static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
961 __u32 *backlog)
962{
963 struct gnet_stats_queue qstats = { 0 };
964 __u32 len = qdisc_qlen_sum(sch);
965
966 __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
967 *qlen = qstats.qlen;
968 *backlog = qstats.backlog;
969}
970
971static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
972{
973 __u32 qlen, backlog;
974
975 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
976 qdisc_tree_reduce_backlog(sch, qlen, backlog);
977}
978
979static inline void qdisc_purge_queue(struct Qdisc *sch)
980{
981 __u32 qlen, backlog;
982
983 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
984 qdisc_reset(sch);
985 qdisc_tree_reduce_backlog(sch, qlen, backlog);
986}
987
988static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
989{
990 qh->head = NULL;
991 qh->tail = NULL;
992 qh->qlen = 0;
993}
994
995static inline void __qdisc_enqueue_tail(struct sk_buff *skb,
996 struct qdisc_skb_head *qh)
997{
998 struct sk_buff *last = qh->tail;
999
1000 if (last) {
1001 skb->next = NULL;
1002 last->next = skb;
1003 qh->tail = skb;
1004 } else {
1005 qh->tail = skb;
1006 qh->head = skb;
1007 }
1008 qh->qlen++;
1009}
1010
1011static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
1012{
1013 __qdisc_enqueue_tail(skb, &sch->q);
1014 qdisc_qstats_backlog_inc(sch, skb);
1015 return NET_XMIT_SUCCESS;
1016}
1017
1018static inline void __qdisc_enqueue_head(struct sk_buff *skb,
1019 struct qdisc_skb_head *qh)
1020{
1021 skb->next = qh->head;
1022
1023 if (!qh->head)
1024 qh->tail = skb;
1025 qh->head = skb;
1026 qh->qlen++;
1027}
1028
1029static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
1030{
1031 struct sk_buff *skb = qh->head;
1032
1033 if (likely(skb != NULL)) {
1034 qh->head = skb->next;
1035 qh->qlen--;
1036 if (qh->head == NULL)
1037 qh->tail = NULL;
1038 skb->next = NULL;
1039 }
1040
1041 return skb;
1042}
1043
1044static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
1045{
1046 struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
1047
1048 if (likely(skb != NULL)) {
1049 qdisc_qstats_backlog_dec(sch, skb);
1050 qdisc_bstats_update(sch, skb);
1051 }
1052
1053 return skb;
1054}
1055
1056/* Instead of calling kfree_skb() while root qdisc lock is held,
1057 * queue the skb for future freeing at end of __dev_xmit_skb()
1058 */
1059static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
1060{
1061 skb->next = *to_free;
1062 *to_free = skb;
1063}
1064
1065static inline void __qdisc_drop_all(struct sk_buff *skb,
1066 struct sk_buff **to_free)
1067{
1068 if (skb->prev)
1069 skb->prev->next = *to_free;
1070 else
1071 skb->next = *to_free;
1072 *to_free = skb;
1073}
1074
1075static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
1076 struct qdisc_skb_head *qh,
1077 struct sk_buff **to_free)
1078{
1079 struct sk_buff *skb = __qdisc_dequeue_head(qh);
1080
1081 if (likely(skb != NULL)) {
1082 unsigned int len = qdisc_pkt_len(skb);
1083
1084 qdisc_qstats_backlog_dec(sch, skb);
1085 __qdisc_drop(skb, to_free);
1086 return len;
1087 }
1088
1089 return 0;
1090}
1091
1092static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
1093 struct sk_buff **to_free)
1094{
1095 return __qdisc_queue_drop_head(sch, &sch->q, to_free);
1096}
1097
1098static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
1099{
1100 const struct qdisc_skb_head *qh = &sch->q;
1101
1102 return qh->head;
1103}
1104
1105/* generic pseudo peek method for non-work-conserving qdisc */
1106static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
1107{
1108 struct sk_buff *skb = skb_peek(&sch->gso_skb);
1109
1110 /* we can reuse ->gso_skb because peek isn't called for root qdiscs */
1111 if (!skb) {
1112 skb = sch->dequeue(sch);
1113
1114 if (skb) {
1115 __skb_queue_head(&sch->gso_skb, skb);
1116 /* it's still part of the queue */
1117 qdisc_qstats_backlog_inc(sch, skb);
1118 sch->q.qlen++;
1119 }
1120 }
1121
1122 return skb;
1123}
1124
1125static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
1126 struct sk_buff *skb)
1127{
1128 if (qdisc_is_percpu_stats(sch)) {
1129 qdisc_qstats_cpu_backlog_dec(sch, skb);
1130 qdisc_bstats_cpu_update(sch, skb);
1131 qdisc_qstats_cpu_qlen_dec(sch);
1132 } else {
1133 qdisc_qstats_backlog_dec(sch, skb);
1134 qdisc_bstats_update(sch, skb);
1135 sch->q.qlen--;
1136 }
1137}
1138
1139static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
1140 unsigned int pkt_len)
1141{
1142 if (qdisc_is_percpu_stats(sch)) {
1143 qdisc_qstats_cpu_qlen_inc(sch);
1144 this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
1145 } else {
1146 sch->qstats.backlog += pkt_len;
1147 sch->q.qlen++;
1148 }
1149}
1150
1151/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
1152static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
1153{
1154 struct sk_buff *skb = skb_peek(&sch->gso_skb);
1155
1156 if (skb) {
1157 skb = __skb_dequeue(&sch->gso_skb);
1158 if (qdisc_is_percpu_stats(sch)) {
1159 qdisc_qstats_cpu_backlog_dec(sch, skb);
1160 qdisc_qstats_cpu_qlen_dec(sch);
1161 } else {
1162 qdisc_qstats_backlog_dec(sch, skb);
1163 sch->q.qlen--;
1164 }
1165 } else {
1166 skb = sch->dequeue(sch);
1167 }
1168
1169 return skb;
1170}
1171
1172static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
1173{
1174 /*
1175 * We do not know the backlog in bytes of this list, it
1176 * is up to the caller to correct it
1177 */
1178 ASSERT_RTNL();
1179 if (qh->qlen) {
1180 rtnl_kfree_skbs(qh->head, qh->tail);
1181
1182 qh->head = NULL;
1183 qh->tail = NULL;
1184 qh->qlen = 0;
1185 }
1186}
1187
1188static inline void qdisc_reset_queue(struct Qdisc *sch)
1189{
1190 __qdisc_reset_queue(&sch->q);
1191 sch->qstats.backlog = 0;
1192}
1193
1194static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
1195 struct Qdisc **pold)
1196{
1197 struct Qdisc *old;
1198
1199 sch_tree_lock(sch);
1200 old = *pold;
1201 *pold = new;
1202 if (old != NULL)
1203 qdisc_purge_queue(old);
1204 sch_tree_unlock(sch);
1205
1206 return old;
1207}
1208
1209static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
1210{
1211 rtnl_kfree_skbs(skb, skb);
1212 qdisc_qstats_drop(sch);
1213}
1214
1215static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
1216 struct sk_buff **to_free)
1217{
1218 __qdisc_drop(skb, to_free);
1219 qdisc_qstats_cpu_drop(sch);
1220
1221 return NET_XMIT_DROP;
1222}
1223
1224static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
1225 struct sk_buff **to_free)
1226{
1227 __qdisc_drop(skb, to_free);
1228 qdisc_qstats_drop(sch);
1229
1230 return NET_XMIT_DROP;
1231}
1232
1233static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
1234 struct sk_buff **to_free)
1235{
1236 __qdisc_drop_all(skb, to_free);
1237 qdisc_qstats_drop(sch);
1238
1239 return NET_XMIT_DROP;
1240}
1241
1242/* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
1243 long it will take to send a packet given its size.
1244 */
1245static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
1246{
1247 int slot = pktlen + rtab->rate.cell_align + rtab->rate.overhead;
1248 if (slot < 0)
1249 slot = 0;
1250 slot >>= rtab->rate.cell_log;
1251 if (slot > 255)
1252 return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF];
1253 return rtab->data[slot];
1254}
1255
1256struct psched_ratecfg {
1257 u64 rate_bytes_ps; /* bytes per second */
1258 u32 mult;
1259 u16 overhead;
1260 u16 mpu;
1261 u8 linklayer;
1262 u8 shift;
1263};
1264
1265static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
1266 unsigned int len)
1267{
1268 len += r->overhead;
1269
1270 if (len < r->mpu)
1271 len = r->mpu;
1272
1273 if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
1274 return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
1275
1276 return ((u64)len * r->mult) >> r->shift;
1277}
1278
1279void psched_ratecfg_precompute(struct psched_ratecfg *r,
1280 const struct tc_ratespec *conf,
1281 u64 rate64);
1282
1283static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
1284 const struct psched_ratecfg *r)
1285{
1286 memset(res, 0, sizeof(*res));
1287
1288 /* legacy struct tc_ratespec has a 32bit @rate field
1289 * Qdisc using 64bit rate should add new attributes
1290 * in order to maintain compatibility.
1291 */
1292 res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
1293
1294 res->overhead = r->overhead;
1295 res->mpu = r->mpu;
1296 res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
1297}
1298
1299/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
1300 * The fast path only needs to access filter list and to update stats
1301 */
1302struct mini_Qdisc {
1303 struct tcf_proto *filter_list;
1304 struct gnet_stats_basic_cpu __percpu *cpu_bstats;
1305 struct gnet_stats_queue __percpu *cpu_qstats;
1306 struct rcu_head rcu;
1307};
1308
1309static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
1310 const struct sk_buff *skb)
1311{
1312 bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
1313}
1314
1315static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
1316{
1317 this_cpu_inc(miniq->cpu_qstats->drops);
1318}
1319
1320struct mini_Qdisc_pair {
1321 struct mini_Qdisc miniq1;
1322 struct mini_Qdisc miniq2;
1323 struct mini_Qdisc __rcu **p_miniq;
1324};
1325
1326void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1327 struct tcf_proto *tp_head);
1328void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1329 struct mini_Qdisc __rcu **p_miniq);
1330
1331static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
1332{
1333 struct gnet_stats_queue *stats = res->qstats;
1334 int ret;
1335
1336 if (res->ingress)
1337 ret = netif_receive_skb(skb);
1338 else
1339 ret = dev_queue_xmit(skb);
1340 if (ret && stats)
1341 qstats_overlimit_inc(res->qstats);
1342}
1343
1344/* Make sure qdisc is no longer in SCHED state. */
1345static inline void qdisc_synchronize(const struct Qdisc *q)
1346{
1347 while (test_bit(__QDISC_STATE_SCHED, &q->state))
1348 msleep(1);
1349}
1350
1351#endif