blob: a0837a0bb811f8f881f9bcc0a4fba3616813e31a [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001#ifndef __LINUX_PKT_SCHED_H
2#define __LINUX_PKT_SCHED_H
3
4#include <linux/types.h>
5
6/* Logical priority bands not depending on specific packet scheduler.
7 Every scheduler will map them to real traffic classes, if it has
8 no more precise mechanism to classify packets.
9
10 These numbers have no special meaning, though their coincidence
11 with obsolete IPv6 values is not occasional :-). New IPv6 drafts
12 preferred full anarchy inspired by diffserv group.
13
14 Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
15 class, actually, as rule it will be handled with more care than
16 filler or even bulk.
17 */
18
19#define TC_PRIO_BESTEFFORT 0
20#define TC_PRIO_FILLER 1
21#define TC_PRIO_BULK 2
22#define TC_PRIO_INTERACTIVE_BULK 4
23#define TC_PRIO_INTERACTIVE 6
24#define TC_PRIO_CONTROL 7
25
26#define TC_PRIO_MAX 15
27
28/* Generic queue statistics, available for all the elements.
29 Particular schedulers may have also their private records.
30 */
31
32struct tc_stats {
33 __u64 bytes; /* NUmber of enqueues bytes */
34 __u32 packets; /* Number of enqueued packets */
35 __u32 drops; /* Packets dropped because of lack of resources */
36 __u32 overlimits; /* Number of throttle events when this
37 * flow goes out of allocated bandwidth */
38 __u32 bps; /* Current flow byte rate */
39 __u32 pps; /* Current flow packet rate */
40 __u32 qlen;
41 __u32 backlog;
42};
43
44struct tc_estimator {
45 signed char interval;
46 unsigned char ewma_log;
47};
48
49/* "Handles"
50 ---------
51
52 All the traffic control objects have 32bit identifiers, or "handles".
53
54 They can be considered as opaque numbers from user API viewpoint,
55 but actually they always consist of two fields: major and
56 minor numbers, which are interpreted by kernel specially,
57 that may be used by applications, though not recommended.
58
59 F.e. qdisc handles always have minor number equal to zero,
60 classes (or flows) have major equal to parent qdisc major, and
61 minor uniquely identifying class inside qdisc.
62
63 Macros to manipulate handles:
64 */
65
66#define TC_H_MAJ_MASK (0xFFFF0000U)
67#define TC_H_MIN_MASK (0x0000FFFFU)
68#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
69#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
70#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
71
72#define TC_H_UNSPEC (0U)
73#define TC_H_ROOT (0xFFFFFFFFU)
74#define TC_H_INGRESS (0xFFFFFFF1U)
75
76struct tc_ratespec {
77 unsigned char cell_log;
78 unsigned char __reserved;
79 unsigned short overhead;
80 short cell_align;
81 unsigned short mpu;
82 __u32 rate;
83};
84
85#define TC_RTAB_SIZE 1024
86
87struct tc_sizespec {
88 unsigned char cell_log;
89 unsigned char size_log;
90 short cell_align;
91 int overhead;
92 unsigned int linklayer;
93 unsigned int mpu;
94 unsigned int mtu;
95 unsigned int tsize;
96};
97
98enum {
99 TCA_STAB_UNSPEC,
100 TCA_STAB_BASE,
101 TCA_STAB_DATA,
102 __TCA_STAB_MAX
103};
104
105#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
106
107/* FIFO section */
108
109struct tc_fifo_qopt {
110 __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
111};
112
113/* PRIO section */
114
115#define TCQ_PRIO_BANDS 16
116#define TCQ_MIN_PRIO_BANDS 2
117
118struct tc_prio_qopt {
119 int bands; /* Number of bands */
120 __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
121};
122
123/* MULTIQ section */
124
125struct tc_multiq_qopt {
126 __u16 bands; /* Number of bands */
127 __u16 max_bands; /* Maximum number of queues */
128};
129
130/* PLUG section */
131
132#define TCQ_PLUG_BUFFER 0
133#define TCQ_PLUG_RELEASE_ONE 1
134#define TCQ_PLUG_RELEASE_INDEFINITE 2
135#define TCQ_PLUG_LIMIT 3
136
137struct tc_plug_qopt {
138 /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
139 * buffer any incoming packets
140 * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
141 * to beginning of the next plug.
142 * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
143 * Stop buffering packets until the next TCQ_PLUG_BUFFER
144 * command is received (just act as a pass-thru queue).
145 * TCQ_PLUG_LIMIT: Increase/decrease queue size
146 */
147 int action;
148 __u32 limit;
149};
150
151/* TBF section */
152
153struct tc_tbf_qopt {
154 struct tc_ratespec rate;
155 struct tc_ratespec peakrate;
156 __u32 limit;
157 __u32 buffer;
158 __u32 mtu;
159};
160
161enum {
162 TCA_TBF_UNSPEC,
163 TCA_TBF_PARMS,
164 TCA_TBF_RTAB,
165 TCA_TBF_PTAB,
166 __TCA_TBF_MAX,
167};
168
169#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
170
171
172/* TEQL section */
173
174/* TEQL does not require any parameters */
175
176/* SFQ section */
177
178struct tc_sfq_qopt {
179 unsigned quantum; /* Bytes per round allocated to flow */
180 int perturb_period; /* Period of hash perturbation */
181 __u32 limit; /* Maximal packets in queue */
182 unsigned divisor; /* Hash divisor */
183 unsigned flows; /* Maximal number of flows */
184};
185
186struct tc_sfq_xstats {
187 __s32 allot;
188};
189
190/*
191 * NOTE: limit, divisor and flows are hardwired to code at the moment.
192 *
193 * limit=flows=128, divisor=1024;
194 *
195 * The only reason for this is efficiency, it is possible
196 * to change these parameters in compile time.
197 */
198
199/* RED section */
200
201enum {
202 TCA_RED_UNSPEC,
203 TCA_RED_PARMS,
204 TCA_RED_STAB,
205 __TCA_RED_MAX,
206};
207
208#define TCA_RED_MAX (__TCA_RED_MAX - 1)
209
210struct tc_red_qopt {
211 __u32 limit; /* HARD maximal queue length (bytes) */
212 __u32 qth_min; /* Min average length threshold (bytes) */
213 __u32 qth_max; /* Max average length threshold (bytes) */
214 unsigned char Wlog; /* log(W) */
215 unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
216 unsigned char Scell_log; /* cell size for idle damping */
217 unsigned char flags;
218#define TC_RED_ECN 1
219#define TC_RED_HARDDROP 2
220};
221
222struct tc_red_xstats {
223 __u32 early; /* Early drops */
224 __u32 pdrop; /* Drops due to queue limits */
225 __u32 other; /* Drops due to drop() calls */
226 __u32 marked; /* Marked packets */
227};
228
229/* GRED section */
230
231#define MAX_DPs 16
232
233enum {
234 TCA_GRED_UNSPEC,
235 TCA_GRED_PARMS,
236 TCA_GRED_STAB,
237 TCA_GRED_DPS,
238 __TCA_GRED_MAX,
239};
240
241#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
242
243struct tc_gred_qopt {
244 __u32 limit; /* HARD maximal queue length (bytes) */
245 __u32 qth_min; /* Min average length threshold (bytes) */
246 __u32 qth_max; /* Max average length threshold (bytes) */
247 __u32 DP; /* up to 2^32 DPs */
248 __u32 backlog;
249 __u32 qave;
250 __u32 forced;
251 __u32 early;
252 __u32 other;
253 __u32 pdrop;
254 __u8 Wlog; /* log(W) */
255 __u8 Plog; /* log(P_max/(qth_max-qth_min)) */
256 __u8 Scell_log; /* cell size for idle damping */
257 __u8 prio; /* prio of this VQ */
258 __u32 packets;
259 __u32 bytesin;
260};
261
262/* gred setup */
263struct tc_gred_sopt {
264 __u32 DPs;
265 __u32 def_DP;
266 __u8 grio;
267 __u8 flags;
268 __u16 pad1;
269};
270
271/* CHOKe section */
272
273enum {
274 TCA_CHOKE_UNSPEC,
275 TCA_CHOKE_PARMS,
276 TCA_CHOKE_STAB,
277 __TCA_CHOKE_MAX,
278};
279
280#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
281
282struct tc_choke_qopt {
283 __u32 limit; /* Hard queue length (packets) */
284 __u32 qth_min; /* Min average threshold (packets) */
285 __u32 qth_max; /* Max average threshold (packets) */
286 unsigned char Wlog; /* log(W) */
287 unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
288 unsigned char Scell_log; /* cell size for idle damping */
289 unsigned char flags; /* see RED flags */
290};
291
292struct tc_choke_xstats {
293 __u32 early; /* Early drops */
294 __u32 pdrop; /* Drops due to queue limits */
295 __u32 other; /* Drops due to drop() calls */
296 __u32 marked; /* Marked packets */
297 __u32 matched; /* Drops due to flow match */
298};
299
300/* HTB section */
301#define TC_HTB_NUMPRIO 8
302#define TC_HTB_MAXDEPTH 8
303#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
304
305struct tc_htb_opt {
306 struct tc_ratespec rate;
307 struct tc_ratespec ceil;
308 __u32 buffer;
309 __u32 cbuffer;
310 __u32 quantum;
311 __u32 level; /* out only */
312 __u32 prio;
313};
314struct tc_htb_glob {
315 __u32 version; /* to match HTB/TC */
316 __u32 rate2quantum; /* bps->quantum divisor */
317 __u32 defcls; /* default class number */
318 __u32 debug; /* debug flags */
319
320 /* stats */
321 __u32 direct_pkts; /* count of non shapped packets */
322};
323enum {
324 TCA_HTB_UNSPEC,
325 TCA_HTB_PARMS,
326 TCA_HTB_INIT,
327 TCA_HTB_CTAB,
328 TCA_HTB_RTAB,
329 __TCA_HTB_MAX,
330};
331
332#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
333
334struct tc_htb_xstats {
335 __u32 lends;
336 __u32 borrows;
337 __u32 giants; /* too big packets (rate will not be accurate) */
338 __u32 tokens;
339 __u32 ctokens;
340};
341
342/* HFSC section */
343
344struct tc_hfsc_qopt {
345 __u16 defcls; /* default class */
346};
347
348struct tc_service_curve {
349 __u32 m1; /* slope of the first segment in bps */
350 __u32 d; /* x-projection of the first segment in us */
351 __u32 m2; /* slope of the second segment in bps */
352};
353
354struct tc_hfsc_stats {
355 __u64 work; /* total work done */
356 __u64 rtwork; /* work done by real-time criteria */
357 __u32 period; /* current period */
358 __u32 level; /* class level in hierarchy */
359};
360
361enum {
362 TCA_HFSC_UNSPEC,
363 TCA_HFSC_RSC,
364 TCA_HFSC_FSC,
365 TCA_HFSC_USC,
366 __TCA_HFSC_MAX,
367};
368
369#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
370
371
372/* CBQ section */
373
374#define TC_CBQ_MAXPRIO 8
375#define TC_CBQ_MAXLEVEL 8
376#define TC_CBQ_DEF_EWMA 5
377
378struct tc_cbq_lssopt {
379 unsigned char change;
380 unsigned char flags;
381#define TCF_CBQ_LSS_BOUNDED 1
382#define TCF_CBQ_LSS_ISOLATED 2
383 unsigned char ewma_log;
384 unsigned char level;
385#define TCF_CBQ_LSS_FLAGS 1
386#define TCF_CBQ_LSS_EWMA 2
387#define TCF_CBQ_LSS_MAXIDLE 4
388#define TCF_CBQ_LSS_MINIDLE 8
389#define TCF_CBQ_LSS_OFFTIME 0x10
390#define TCF_CBQ_LSS_AVPKT 0x20
391 __u32 maxidle;
392 __u32 minidle;
393 __u32 offtime;
394 __u32 avpkt;
395};
396
397struct tc_cbq_wrropt {
398 unsigned char flags;
399 unsigned char priority;
400 unsigned char cpriority;
401 unsigned char __reserved;
402 __u32 allot;
403 __u32 weight;
404};
405
406struct tc_cbq_ovl {
407 unsigned char strategy;
408#define TC_CBQ_OVL_CLASSIC 0
409#define TC_CBQ_OVL_DELAY 1
410#define TC_CBQ_OVL_LOWPRIO 2
411#define TC_CBQ_OVL_DROP 3
412#define TC_CBQ_OVL_RCLASSIC 4
413 unsigned char priority2;
414 __u16 pad;
415 __u32 penalty;
416};
417
418struct tc_cbq_police {
419 unsigned char police;
420 unsigned char __res1;
421 unsigned short __res2;
422};
423
424struct tc_cbq_fopt {
425 __u32 split;
426 __u32 defmap;
427 __u32 defchange;
428};
429
430struct tc_cbq_xstats {
431 __u32 borrows;
432 __u32 overactions;
433 __s32 avgidle;
434 __s32 undertime;
435};
436
437enum {
438 TCA_CBQ_UNSPEC,
439 TCA_CBQ_LSSOPT,
440 TCA_CBQ_WRROPT,
441 TCA_CBQ_FOPT,
442 TCA_CBQ_OVL_STRATEGY,
443 TCA_CBQ_RATE,
444 TCA_CBQ_RTAB,
445 TCA_CBQ_POLICE,
446 __TCA_CBQ_MAX,
447};
448
449#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
450
451/* dsmark section */
452
453enum {
454 TCA_DSMARK_UNSPEC,
455 TCA_DSMARK_INDICES,
456 TCA_DSMARK_DEFAULT_INDEX,
457 TCA_DSMARK_SET_TC_INDEX,
458 TCA_DSMARK_MASK,
459 TCA_DSMARK_VALUE,
460 __TCA_DSMARK_MAX,
461};
462
463#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
464
465/* fq_codel section */
466
467enum {
468 TCA_FQ_CODEL_UNSPEC,
469 TCA_FQ_CODEL_TARGET,
470 TCA_FQ_CODEL_LIMIT,
471 TCA_FQ_CODEL_INTERVAL,
472 TCA_FQ_CODEL_ECN,
473 TCA_FQ_CODEL_FLOWS,
474 TCA_FQ_CODEL_QUANTUM,
475 __TCA_FQ_CODEL_MAX
476};
477
478#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1)
479
480/* ATM section */
481
482enum {
483 TCA_ATM_UNSPEC,
484 TCA_ATM_FD, /* file/socket descriptor */
485 TCA_ATM_PTR, /* pointer to descriptor - later */
486 TCA_ATM_HDR, /* LL header */
487 TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
488 TCA_ATM_ADDR, /* PVC address (for output only) */
489 TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
490 __TCA_ATM_MAX,
491};
492
493#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
494
495/* Network emulator */
496
497enum {
498 TCA_NETEM_UNSPEC,
499 TCA_NETEM_CORR,
500 TCA_NETEM_DELAY_DIST,
501 TCA_NETEM_REORDER,
502 TCA_NETEM_CORRUPT,
503 TCA_NETEM_LOSS,
504 __TCA_NETEM_MAX,
505};
506
507#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
508
509struct tc_netem_qopt {
510 __u32 latency; /* added delay (us) */
511 __u32 limit; /* fifo limit (packets) */
512 __u32 loss; /* random packet loss (0=none ~0=100%) */
513 __u32 gap; /* re-ordering gap (0 for none) */
514 __u32 duplicate; /* random packet dup (0=none ~0=100%) */
515 __u32 jitter; /* random jitter in latency (us) */
516};
517
518struct tc_netem_corr {
519 __u32 delay_corr; /* delay correlation */
520 __u32 loss_corr; /* packet loss correlation */
521 __u32 dup_corr; /* duplicate correlation */
522};
523
524struct tc_netem_reorder {
525 __u32 probability;
526 __u32 correlation;
527};
528
529struct tc_netem_corrupt {
530 __u32 probability;
531 __u32 correlation;
532};
533
534enum {
535 NETEM_LOSS_UNSPEC,
536 NETEM_LOSS_GI, /* General Intuitive - 4 state model */
537 NETEM_LOSS_GE, /* Gilbert Elliot models */
538 __NETEM_LOSS_MAX
539};
540#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
541
542/* State transition probablities for 4 state model */
543struct tc_netem_gimodel {
544 __u32 p13;
545 __u32 p31;
546 __u32 p32;
547 __u32 p14;
548 __u32 p23;
549};
550
551/* Gilbert-Elliot models */
552struct tc_netem_gemodel {
553 __u32 p;
554 __u32 r;
555 __u32 h;
556 __u32 k1;
557};
558
559#define NETEM_DIST_SCALE 8192
560#define NETEM_DIST_MAX 16384
561
562/* DRR */
563
564enum {
565 TCA_DRR_UNSPEC,
566 TCA_DRR_QUANTUM,
567 __TCA_DRR_MAX
568};
569
570#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
571
572struct tc_drr_stats {
573 __u32 deficit;
574};
575
576/* MQPRIO */
577#define TC_QOPT_BITMASK 15
578#define TC_QOPT_MAX_QUEUE 16
579
580struct tc_mqprio_qopt {
581 __u8 num_tc;
582 __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
583 __u8 hw;
584 __u16 count[TC_QOPT_MAX_QUEUE];
585 __u16 offset[TC_QOPT_MAX_QUEUE];
586};
587
588/* SFB */
589
590enum {
591 TCA_SFB_UNSPEC,
592 TCA_SFB_PARMS,
593 __TCA_SFB_MAX,
594};
595
596#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
597
598/*
599 * Note: increment, decrement are Q0.16 fixed-point values.
600 */
601struct tc_sfb_qopt {
602 __u32 rehash_interval; /* delay between hash move, in ms */
603 __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */
604 __u32 max; /* max len of qlen_min */
605 __u32 bin_size; /* maximum queue length per bin */
606 __u32 increment; /* probability increment, (d1 in Blue) */
607 __u32 decrement; /* probability decrement, (d2 in Blue) */
608 __u32 limit; /* max SFB queue length */
609 __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */
610 __u32 penalty_burst;
611};
612
613struct tc_sfb_xstats {
614 __u32 earlydrop;
615 __u32 penaltydrop;
616 __u32 bucketdrop;
617 __u32 queuedrop;
618 __u32 childdrop; /* drops in child qdisc */
619 __u32 marked;
620 __u32 maxqlen;
621 __u32 maxprob;
622 __u32 avgprob;
623};
624
625#define SFB_MAX_PROB 0xFFFF
626
627/* QFQ */
628enum {
629 TCA_QFQ_UNSPEC,
630 TCA_QFQ_WEIGHT,
631 TCA_QFQ_LMAX,
632 __TCA_QFQ_MAX
633};
634
635#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1)
636
637struct tc_qfq_stats {
638 __u32 weight;
639 __u32 lmax;
640};
641
642#endif