Blame - ap/os/linux/linux-3.4.x/net/sched/sch_cbq.c - R306

blob: 7f599447b3ca66f3625b87cc0d667cd3c15caa41 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* net/sched/sch_cbq.c Class-Based Queueing discipline.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation; either version
				7	* 2 of the License, or (at your option) any later version.
				8	*
				9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
				10	*
				11	*/
				12
				13	#include <linux/module.h>
				14	#include <linux/slab.h>
				15	#include <linux/types.h>
				16	#include <linux/kernel.h>
				17	#include <linux/string.h>
				18	#include <linux/errno.h>
				19	#include <linux/skbuff.h>
				20	#include <net/netlink.h>
				21	#include <net/pkt_sched.h>
				22
				23
				24	/* Class-Based Queueing (CBQ) algorithm.
				25	=======================================
				26
				27	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
				28	Management Models for Packet Networks",
				29	IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
				30
				31	[2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
				32
				33	[3] Sally Floyd, "Notes on Class-Based Queueing: Setting
				34	Parameters", 1996
				35
				36	[4] Sally Floyd and Michael Speer, "Experimental Results
				37	for Class-Based Queueing", 1998, not published.
				38
				39	-----------------------------------------------------------------------
				40
				41	Algorithm skeleton was taken from NS simulator cbq.cc.
				42	If someone wants to check this code against the LBL version,
				43	he should take into account that ONLY the skeleton was borrowed,
				44	the implementation is different. Particularly:
				45
				46	--- The WRR algorithm is different. Our version looks more
				47	reasonable (I hope) and works when quanta are allowed to be
				48	less than MTU, which is always the case when real time classes
				49	have small rates. Note, that the statement of [3] is
				50	incomplete, delay may actually be estimated even if class
				51	per-round allotment is less than MTU. Namely, if per-round
				52	allotment is W*r_i, and r_1+...+r_k = r < 1
				53
				54	delay_i <= ([MTU/(Wr_i)]Wr + Wr + k*MTU)/B
				55
				56	In the worst case we have IntServ estimate with D = Wr+kMTU
				57	and C = MTU*r. The proof (if correct at all) is trivial.
				58
				59
				60	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
				61	interpret some places, which look like wrong translations
				62	from NS. Anyone is advised to find these differences
				63	and explain to me, why I am wrong 8).
				64
				65	--- Linux has no EOI event, so that we cannot estimate true class
				66	idle time. Workaround is to consider the next dequeue event
				67	as sign that previous packet is finished. This is wrong because of
				68	internal device queueing, but on a permanently loaded link it is true.
				69	Moreover, combined with clock integrator, this scheme looks
				70	very close to an ideal solution. */
				71
				72	struct cbq_sched_data;
				73
				74
				75	struct cbq_class {
				76	struct Qdisc_class_common common;
				77	struct cbq_class next_alive; / next class with backlog in this priority band */
				78
				79	/* Parameters */
				80	unsigned char priority; /* class priority */
				81	unsigned char priority2; /* priority to be used after overlimit */
				82	unsigned char ewma_log; /* time constant for idle time calculation */
				83	unsigned char ovl_strategy;
				84	#ifdef CONFIG_NET_CLS_ACT
				85	unsigned char police;
				86	#endif
				87
				88	u32 defmap;
				89
				90	/* Link-sharing scheduler parameters */
				91	long maxidle; /* Class parameters: see below. */
				92	long offtime;
				93	long minidle;
				94	u32 avpkt;
				95	struct qdisc_rate_table *R_tab;
				96
				97	/* Overlimit strategy parameters */
				98	void (overlimit)(struct cbq_class cl);
				99	psched_tdiff_t penalty;
				100
				101	/* General scheduler (WRR) parameters */
				102	long allot;
				103	long quantum; /* Allotment per WRR round */
				104	long weight; /* Relative allotment: see below */
				105
				106	struct Qdisc qdisc; / Ptr to CBQ discipline */
				107	struct cbq_class split; / Ptr to split node */
				108	struct cbq_class share; / Ptr to LS parent in the class tree */
				109	struct cbq_class tparent; / Ptr to tree parent in the class tree */
				110	struct cbq_class borrow; / NULL if class is bandwidth limited;
				111	parent otherwise */
				112	struct cbq_class sibling; / Sibling chain */
				113	struct cbq_class children; / Pointer to children chain */
				114
				115	struct Qdisc q; / Elementary queueing discipline */
				116
				117
				118	/* Variables */
				119	unsigned char cpriority; /* Effective priority */
				120	unsigned char delayed;
				121	unsigned char level; /* level of the class in hierarchy:
				122	0 for leaf classes, and maximal
				123	level of children + 1 for nodes.
				124	*/
				125
				126	psched_time_t last; /* Last end of service */
				127	psched_time_t undertime;
				128	long avgidle;
				129	long deficit; /* Saved deficit for WRR */
				130	psched_time_t penalized;
				131	struct gnet_stats_basic_packed bstats;
				132	struct gnet_stats_queue qstats;
				133	struct gnet_stats_rate_est rate_est;
				134	struct tc_cbq_xstats xstats;
				135
				136	struct tcf_proto *filter_list;
				137
				138	int refcnt;
				139	int filters;
				140
				141	struct cbq_class *defaults[TC_PRIO_MAX + 1];
				142	};
				143
				144	struct cbq_sched_data {
				145	struct Qdisc_class_hash clhash; /* Hash table of all classes */
				146	int nclasses[TC_CBQ_MAXPRIO + 1];
				147	unsigned int quanta[TC_CBQ_MAXPRIO + 1];
				148
				149	struct cbq_class link;
				150
				151	unsigned int activemask;
				152	struct cbq_class active[TC_CBQ_MAXPRIO + 1]; / List of all classes
				153	with backlog */
				154
				155	#ifdef CONFIG_NET_CLS_ACT
				156	struct cbq_class *rx_class;
				157	#endif
				158	struct cbq_class *tx_class;
				159	struct cbq_class *tx_borrowed;
				160	int tx_len;
				161	psched_time_t now; /* Cached timestamp */
				162	psched_time_t now_rt; /* Cached real time */
				163	unsigned int pmask;
				164
				165	struct hrtimer delay_timer;
				166	struct qdisc_watchdog watchdog; /* Watchdog timer,
				167	started when CBQ has
				168	backlog, but cannot
				169	transmit just now */
				170	psched_tdiff_t wd_expires;
				171	int toplevel;
				172	u32 hgenerator;
				173	};
				174
				175
				176	#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
				177
				178	static inline struct cbq_class *
				179	cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
				180	{
				181	struct Qdisc_class_common *clc;
				182
				183	clc = qdisc_class_find(&q->clhash, classid);
				184	if (clc == NULL)
				185	return NULL;
				186	return container_of(clc, struct cbq_class, common);
				187	}
				188
				189	#ifdef CONFIG_NET_CLS_ACT
				190
				191	static struct cbq_class *
				192	cbq_reclassify(struct sk_buff skb, struct cbq_class this)
				193	{
				194	struct cbq_class *cl;
				195
				196	for (cl = this->tparent; cl; cl = cl->tparent) {
				197	struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
				198
				199	if (new != NULL && new != this)
				200	return new;
				201	}
				202	return NULL;
				203	}
				204
				205	#endif
				206
				207	/* Classify packet. The procedure is pretty complicated, but
				208	* it allows us to combine link sharing and priority scheduling
				209	* transparently.
				210	*
				211	* Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
				212	* so that it resolves to split nodes. Then packets are classified
				213	* by logical priority, or a more specific classifier may be attached
				214	* to the split node.
				215	*/
				216
				217	static struct cbq_class *
				218	cbq_classify(struct sk_buff skb, struct Qdisc sch, int *qerr)
				219	{
				220	struct cbq_sched_data *q = qdisc_priv(sch);
				221	struct cbq_class *head = &q->link;
				222	struct cbq_class **defmap;
				223	struct cbq_class *cl = NULL;
				224	u32 prio = skb->priority;
				225	struct tcf_result res;
				226
				227	/*
				228	* Step 1. If skb->priority points to one of our classes, use it.
				229	*/
				230	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
				231	(cl = cbq_class_lookup(q, prio)) != NULL)
				232	return cl;
				233
				234	*qerr = NET_XMIT_SUCCESS \| __NET_XMIT_BYPASS;
				235	for (;;) {
				236	int result = 0;
				237	defmap = head->defaults;
				238
				239	/*
				240	* Step 2+n. Apply classifier.
				241	*/
				242	if (!head->filter_list \|\|
				243	(result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
				244	goto fallback;
				245
				246	cl = (void *)res.class;
				247	if (!cl) {
				248	if (TC_H_MAJ(res.classid))
				249	cl = cbq_class_lookup(q, res.classid);
				250	else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
				251	cl = defmap[TC_PRIO_BESTEFFORT];
				252
				253	if (cl == NULL)
				254	goto fallback;
				255	}
				256	if (cl->level >= head->level)
				257	goto fallback;
				258	#ifdef CONFIG_NET_CLS_ACT
				259	switch (result) {
				260	case TC_ACT_QUEUED:
				261	case TC_ACT_STOLEN:
				262	*qerr = NET_XMIT_SUCCESS \| __NET_XMIT_STOLEN;
				263	case TC_ACT_SHOT:
				264	return NULL;
				265	case TC_ACT_RECLASSIFY:
				266	return cbq_reclassify(skb, cl);
				267	}
				268	#endif
				269	if (cl->level == 0)
				270	return cl;
				271
				272	/*
				273	* Step 3+n. If classifier selected a link sharing class,
				274	* apply agency specific classifier.
				275	* Repeat this procdure until we hit a leaf node.
				276	*/
				277	head = cl;
				278	}
				279
				280	fallback:
				281	cl = head;
				282
				283	/*
				284	* Step 4. No success...
				285	*/
				286	if (TC_H_MAJ(prio) == 0 &&
				287	!(cl = head->defaults[prio & TC_PRIO_MAX]) &&
				288	!(cl = head->defaults[TC_PRIO_BESTEFFORT]))
				289	return head;
				290
				291	return cl;
				292	}
				293
				294	/*
				295	* A packet has just been enqueued on the empty class.
				296	* cbq_activate_class adds it to the tail of active class list
				297	* of its priority band.
				298	*/
				299
				300	static inline void cbq_activate_class(struct cbq_class *cl)
				301	{
				302	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				303	int prio = cl->cpriority;
				304	struct cbq_class *cl_tail;
				305
				306	cl_tail = q->active[prio];
				307	q->active[prio] = cl;
				308
				309	if (cl_tail != NULL) {
				310	cl->next_alive = cl_tail->next_alive;
				311	cl_tail->next_alive = cl;
				312	} else {
				313	cl->next_alive = cl;
				314	q->activemask \|= (1<<prio);
				315	}
				316	}
				317
				318	/*
				319	* Unlink class from active chain.
				320	* Note that this same procedure is done directly in cbq_dequeue*
				321	* during round-robin procedure.
				322	*/
				323
				324	static void cbq_deactivate_class(struct cbq_class *this)
				325	{
				326	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
				327	int prio = this->cpriority;
				328	struct cbq_class *cl;
				329	struct cbq_class *cl_prev = q->active[prio];
				330
				331	do {
				332	cl = cl_prev->next_alive;
				333	if (cl == this) {
				334	cl_prev->next_alive = cl->next_alive;
				335	cl->next_alive = NULL;
				336
				337	if (cl == q->active[prio]) {
				338	q->active[prio] = cl_prev;
				339	if (cl == q->active[prio]) {
				340	q->active[prio] = NULL;
				341	q->activemask &= ~(1<<prio);
				342	return;
				343	}
				344	}
				345	return;
				346	}
				347	} while ((cl_prev = cl) != q->active[prio]);
				348	}
				349
				350	static void
				351	cbq_mark_toplevel(struct cbq_sched_data q, struct cbq_class cl)
				352	{
				353	int toplevel = q->toplevel;
				354
				355	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
				356	psched_time_t now;
				357	psched_tdiff_t incr;
				358
				359	now = psched_get_time();
				360	incr = now - q->now_rt;
				361	now = q->now + incr;
				362
				363	do {
				364	if (cl->undertime < now) {
				365	q->toplevel = cl->level;
				366	return;
				367	}
				368	} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
				369	}
				370	}
				371
				372	static int
				373	cbq_enqueue(struct sk_buff skb, struct Qdisc sch)
				374	{
				375	struct cbq_sched_data *q = qdisc_priv(sch);
				376	int uninitialized_var(ret);
				377	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
				378
				379	#ifdef CONFIG_NET_CLS_ACT
				380	q->rx_class = cl;
				381	#endif
				382	if (cl == NULL) {
				383	if (ret & __NET_XMIT_BYPASS)
				384	sch->qstats.drops++;
				385	kfree_skb(skb);
				386	return ret;
				387	}
				388
				389	#ifdef CONFIG_NET_CLS_ACT
				390	cl->q->__parent = sch;
				391	#endif
				392	ret = qdisc_enqueue(skb, cl->q);
				393	if (ret == NET_XMIT_SUCCESS) {
				394	sch->q.qlen++;
				395	cbq_mark_toplevel(q, cl);
				396	if (!cl->next_alive)
				397	cbq_activate_class(cl);
				398	return ret;
				399	}
				400
				401	if (net_xmit_drop_count(ret)) {
				402	sch->qstats.drops++;
				403	cbq_mark_toplevel(q, cl);
				404	cl->qstats.drops++;
				405	}
				406	return ret;
				407	}
				408
				409	/* Overlimit actions */
				410
				411	/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
				412
				413	static void cbq_ovl_classic(struct cbq_class *cl)
				414	{
				415	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				416	psched_tdiff_t delay = cl->undertime - q->now;
				417
				418	if (!cl->delayed) {
				419	delay += cl->offtime;
				420
				421	/*
				422	* Class goes to sleep, so that it will have no
				423	* chance to work avgidle. Let's forgive it 8)
				424	*
				425	* BTW cbq-2.0 has a crap in this
				426	* place, apparently they forgot to shift it by cl->ewma_log.
				427	*/
				428	if (cl->avgidle < 0)
				429	delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
				430	if (cl->avgidle < cl->minidle)
				431	cl->avgidle = cl->minidle;
				432	if (delay <= 0)
				433	delay = 1;
				434	cl->undertime = q->now + delay;
				435
				436	cl->xstats.overactions++;
				437	cl->delayed = 1;
				438	}
				439	if (q->wd_expires == 0 \|\| q->wd_expires > delay)
				440	q->wd_expires = delay;
				441
				442	/* Dirty work! We must schedule wakeups based on
				443	* real available rate, rather than leaf rate,
				444	* which may be tiny (even zero).
				445	*/
				446	if (q->toplevel == TC_CBQ_MAXLEVEL) {
				447	struct cbq_class *b;
				448	psched_tdiff_t base_delay = q->wd_expires;
				449
				450	for (b = cl->borrow; b; b = b->borrow) {
				451	delay = b->undertime - q->now;
				452	if (delay < base_delay) {
				453	if (delay <= 0)
				454	delay = 1;
				455	base_delay = delay;
				456	}
				457	}
				458
				459	q->wd_expires = base_delay;
				460	}
				461	}
				462
				463	/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
				464	* they go overlimit
				465	*/
				466
				467	static void cbq_ovl_rclassic(struct cbq_class *cl)
				468	{
				469	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				470	struct cbq_class *this = cl;
				471
				472	do {
				473	if (cl->level > q->toplevel) {
				474	cl = NULL;
				475	break;
				476	}
				477	} while ((cl = cl->borrow) != NULL);
				478
				479	if (cl == NULL)
				480	cl = this;
				481	cbq_ovl_classic(cl);
				482	}
				483
				484	/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
				485
				486	static void cbq_ovl_delay(struct cbq_class *cl)
				487	{
				488	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				489	psched_tdiff_t delay = cl->undertime - q->now;
				490
				491	if (test_bit(__QDISC_STATE_DEACTIVATED,
				492	&qdisc_root_sleeping(cl->qdisc)->state))
				493	return;
				494
				495	if (!cl->delayed) {
				496	psched_time_t sched = q->now;
				497	ktime_t expires;
				498
				499	delay += cl->offtime;
				500	if (cl->avgidle < 0)
				501	delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
				502	if (cl->avgidle < cl->minidle)
				503	cl->avgidle = cl->minidle;
				504	cl->undertime = q->now + delay;
				505
				506	if (delay > 0) {
				507	sched += delay + cl->penalty;
				508	cl->penalized = sched;
				509	cl->cpriority = TC_CBQ_MAXPRIO;
				510	q->pmask \|= (1<<TC_CBQ_MAXPRIO);
				511
				512	expires = ktime_set(0, 0);
				513	expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
				514	if (hrtimer_try_to_cancel(&q->delay_timer) &&
				515	ktime_to_ns(ktime_sub(
				516	hrtimer_get_expires(&q->delay_timer),
				517	expires)) > 0)
				518	hrtimer_set_expires(&q->delay_timer, expires);
				519	hrtimer_restart(&q->delay_timer);
				520	cl->delayed = 1;
				521	cl->xstats.overactions++;
				522	return;
				523	}
				524	delay = 1;
				525	}
				526	if (q->wd_expires == 0 \|\| q->wd_expires > delay)
				527	q->wd_expires = delay;
				528	}
				529
				530	/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
				531
				532	static void cbq_ovl_lowprio(struct cbq_class *cl)
				533	{
				534	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				535
				536	cl->penalized = q->now + cl->penalty;
				537
				538	if (cl->cpriority != cl->priority2) {
				539	cl->cpriority = cl->priority2;
				540	q->pmask \|= (1<<cl->cpriority);
				541	cl->xstats.overactions++;
				542	}
				543	cbq_ovl_classic(cl);
				544	}
				545
				546	/* TC_CBQ_OVL_DROP: penalize class by dropping */
				547
				548	static void cbq_ovl_drop(struct cbq_class *cl)
				549	{
				550	if (cl->q->ops->drop)
				551	if (cl->q->ops->drop(cl->q))
				552	cl->qdisc->q.qlen--;
				553	cl->xstats.overactions++;
				554	cbq_ovl_classic(cl);
				555	}
				556
				557	static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
				558	psched_time_t now)
				559	{
				560	struct cbq_class *cl;
				561	struct cbq_class *cl_prev = q->active[prio];
				562	psched_time_t sched = now;
				563
				564	if (cl_prev == NULL)
				565	return 0;
				566
				567	do {
				568	cl = cl_prev->next_alive;
				569	if (now - cl->penalized > 0) {
				570	cl_prev->next_alive = cl->next_alive;
				571	cl->next_alive = NULL;
				572	cl->cpriority = cl->priority;
				573	cl->delayed = 0;
				574	cbq_activate_class(cl);
				575
				576	if (cl == q->active[prio]) {
				577	q->active[prio] = cl_prev;
				578	if (cl == q->active[prio]) {
				579	q->active[prio] = NULL;
				580	return 0;
				581	}
				582	}
				583
				584	cl = cl_prev->next_alive;
				585	} else if (sched - cl->penalized > 0)
				586	sched = cl->penalized;
				587	} while ((cl_prev = cl) != q->active[prio]);
				588
				589	return sched - now;
				590	}
				591
				592	static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
				593	{
				594	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
				595	delay_timer);
				596	struct Qdisc *sch = q->watchdog.qdisc;
				597	psched_time_t now;
				598	psched_tdiff_t delay = 0;
				599	unsigned int pmask;
				600
				601	now = psched_get_time();
				602
				603	pmask = q->pmask;
				604	q->pmask = 0;
				605
				606	while (pmask) {
				607	int prio = ffz(~pmask);
				608	psched_tdiff_t tmp;
				609
				610	pmask &= ~(1<<prio);
				611
				612	tmp = cbq_undelay_prio(q, prio, now);
				613	if (tmp > 0) {
				614	q->pmask \|= 1<<prio;
				615	if (tmp < delay \|\| delay == 0)
				616	delay = tmp;
				617	}
				618	}
				619
				620	if (delay) {
				621	ktime_t time;
				622
				623	time = ktime_set(0, 0);
				624	time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
				625	hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
				626	}
				627
				628	qdisc_unthrottled(sch);
				629	__netif_schedule(qdisc_root(sch));
				630	return HRTIMER_NORESTART;
				631	}
				632
				633	#ifdef CONFIG_NET_CLS_ACT
				634	static int cbq_reshape_fail(struct sk_buff skb, struct Qdisc child)
				635	{
				636	struct Qdisc *sch = child->__parent;
				637	struct cbq_sched_data *q = qdisc_priv(sch);
				638	struct cbq_class *cl = q->rx_class;
				639
				640	q->rx_class = NULL;
				641
				642	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
				643	int ret;
				644
				645	cbq_mark_toplevel(q, cl);
				646
				647	q->rx_class = cl;
				648	cl->q->__parent = sch;
				649
				650	ret = qdisc_enqueue(skb, cl->q);
				651	if (ret == NET_XMIT_SUCCESS) {
				652	sch->q.qlen++;
				653	if (!cl->next_alive)
				654	cbq_activate_class(cl);
				655	return 0;
				656	}
				657	if (net_xmit_drop_count(ret))
				658	sch->qstats.drops++;
				659	return 0;
				660	}
				661
				662	sch->qstats.drops++;
				663	return -1;
				664	}
				665	#endif
				666
				667	/*
				668	* It is mission critical procedure.
				669	*
				670	* We "regenerate" toplevel cutoff, if transmitting class
				671	* has backlog and it is not regulated. It is not part of
				672	* original CBQ description, but looks more reasonable.
				673	* Probably, it is wrong. This question needs further investigation.
				674	*/
				675
				676	static inline void
				677	cbq_update_toplevel(struct cbq_sched_data q, struct cbq_class cl,
				678	struct cbq_class *borrowed)
				679	{
				680	if (cl && q->toplevel >= borrowed->level) {
				681	if (cl->q->q.qlen > 1) {
				682	do {
				683	if (borrowed->undertime == PSCHED_PASTPERFECT) {
				684	q->toplevel = borrowed->level;
				685	return;
				686	}
				687	} while ((borrowed = borrowed->borrow) != NULL);
				688	}
				689	#if 0
				690	/* It is not necessary now. Uncommenting it
				691	will save CPU cycles, but decrease fairness.
				692	*/
				693	q->toplevel = TC_CBQ_MAXLEVEL;
				694	#endif
				695	}
				696	}
				697
				698	static void
				699	cbq_update(struct cbq_sched_data *q)
				700	{
				701	struct cbq_class *this = q->tx_class;
				702	struct cbq_class *cl = this;
				703	int len = q->tx_len;
				704
				705	q->tx_class = NULL;
				706
				707	for ( ; cl; cl = cl->share) {
				708	long avgidle = cl->avgidle;
				709	long idle;
				710
				711	cl->bstats.packets++;
				712	cl->bstats.bytes += len;
				713
				714	/*
				715	* (now - last) is total time between packet right edges.
				716	* (last_pktlen/rate) is "virtual" busy time, so that
				717	*
				718	* idle = (now - last) - last_pktlen/rate
				719	*/
				720
				721	idle = q->now - cl->last;
				722	if ((unsigned long)idle > 12810241024) {
				723	avgidle = cl->maxidle;
				724	} else {
				725	idle -= L2T(cl, len);
				726
				727	/* true_avgidle := (1-W)true_avgidle + Widle,
				728	* where W=2^{-ewma_log}. But cl->avgidle is scaled:
				729	* cl->avgidle == true_avgidle/W,
				730	* hence:
				731	*/
				732	avgidle += idle - (avgidle>>cl->ewma_log);
				733	}
				734
				735	if (avgidle <= 0) {
				736	/* Overlimit or at-limit */
				737
				738	if (avgidle < cl->minidle)
				739	avgidle = cl->minidle;
				740
				741	cl->avgidle = avgidle;
				742
				743	/* Calculate expected time, when this class
				744	* will be allowed to send.
				745	* It will occur, when:
				746	* (1-W)true_avgidle + Wdelay = 0, i.e.
				747	* idle = (1/W - 1)*(-true_avgidle)
				748	* or
				749	* idle = (1 - W)*(-cl->avgidle);
				750	*/
				751	idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
				752
				753	/*
				754	* That is not all.
				755	* To maintain the rate allocated to the class,
				756	* we add to undertime virtual clock,
				757	* necessary to complete transmitted packet.
				758	* (len/phys_bandwidth has been already passed
				759	* to the moment of cbq_update)
				760	*/
				761
				762	idle -= L2T(&q->link, len);
				763	idle += L2T(cl, len);
				764
				765	cl->undertime = q->now + idle;
				766	} else {
				767	/* Underlimit */
				768
				769	cl->undertime = PSCHED_PASTPERFECT;
				770	if (avgidle > cl->maxidle)
				771	cl->avgidle = cl->maxidle;
				772	else
				773	cl->avgidle = avgidle;
				774	}
				775	cl->last = q->now;
				776	}
				777
				778	cbq_update_toplevel(q, this, q->tx_borrowed);
				779	}
				780
				781	static inline struct cbq_class *
				782	cbq_under_limit(struct cbq_class *cl)
				783	{
				784	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				785	struct cbq_class *this_cl = cl;
				786
				787	if (cl->tparent == NULL)
				788	return cl;
				789
				790	if (cl->undertime == PSCHED_PASTPERFECT \|\| q->now >= cl->undertime) {
				791	cl->delayed = 0;
				792	return cl;
				793	}
				794
				795	do {
				796	/* It is very suspicious place. Now overlimit
				797	* action is generated for not bounded classes
				798	* only if link is completely congested.
				799	* Though it is in agree with ancestor-only paradigm,
				800	* it looks very stupid. Particularly,
				801	* it means that this chunk of code will either
				802	* never be called or result in strong amplification
				803	* of burstiness. Dangerous, silly, and, however,
				804	* no another solution exists.
				805	*/
				806	cl = cl->borrow;
				807	if (!cl) {
				808	this_cl->qstats.overlimits++;
				809	this_cl->overlimit(this_cl);
				810	return NULL;
				811	}
				812	if (cl->level > q->toplevel)
				813	return NULL;
				814	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
				815
				816	cl->delayed = 0;
				817	return cl;
				818	}
				819
				820	static inline struct sk_buff *
				821	cbq_dequeue_prio(struct Qdisc *sch, int prio)
				822	{
				823	struct cbq_sched_data *q = qdisc_priv(sch);
				824	struct cbq_class cl_tail, cl_prev, *cl;
				825	struct sk_buff *skb;
				826	int deficit;
				827
				828	cl_tail = cl_prev = q->active[prio];
				829	cl = cl_prev->next_alive;
				830
				831	do {
				832	deficit = 0;
				833
				834	/* Start round */
				835	do {
				836	struct cbq_class *borrow = cl;
				837
				838	if (cl->q->q.qlen &&
				839	(borrow = cbq_under_limit(cl)) == NULL)
				840	goto skip_class;
				841
				842	if (cl->deficit <= 0) {
				843	/* Class exhausted its allotment per
				844	* this round. Switch to the next one.
				845	*/
				846	deficit = 1;
				847	cl->deficit += cl->quantum;
				848	goto next_class;
				849	}
				850
				851	skb = cl->q->dequeue(cl->q);
				852
				853	/* Class did not give us any skb :-(
				854	* It could occur even if cl->q->q.qlen != 0
				855	* f.e. if cl->q == "tbf"
				856	*/
				857	if (skb == NULL)
				858	goto skip_class;
				859
				860	cl->deficit -= qdisc_pkt_len(skb);
				861	q->tx_class = cl;
				862	q->tx_borrowed = borrow;
				863	if (borrow != cl) {
				864	#ifndef CBQ_XSTATS_BORROWS_BYTES
				865	borrow->xstats.borrows++;
				866	cl->xstats.borrows++;
				867	#else
				868	borrow->xstats.borrows += qdisc_pkt_len(skb);
				869	cl->xstats.borrows += qdisc_pkt_len(skb);
				870	#endif
				871	}
				872	q->tx_len = qdisc_pkt_len(skb);
				873
				874	if (cl->deficit <= 0) {
				875	q->active[prio] = cl;
				876	cl = cl->next_alive;
				877	cl->deficit += cl->quantum;
				878	}
				879	return skb;
				880
				881	skip_class:
				882	if (cl->q->q.qlen == 0 \|\| prio != cl->cpriority) {
				883	/* Class is empty or penalized.
				884	* Unlink it from active chain.
				885	*/
				886	cl_prev->next_alive = cl->next_alive;
				887	cl->next_alive = NULL;
				888
				889	/* Did cl_tail point to it? */
				890	if (cl == cl_tail) {
				891	/* Repair it! */
				892	cl_tail = cl_prev;
				893
				894	/* Was it the last class in this band? */
				895	if (cl == cl_tail) {
				896	/* Kill the band! */
				897	q->active[prio] = NULL;
				898	q->activemask &= ~(1<<prio);
				899	if (cl->q->q.qlen)
				900	cbq_activate_class(cl);
				901	return NULL;
				902	}
				903
				904	q->active[prio] = cl_tail;
				905	}
				906	if (cl->q->q.qlen)
				907	cbq_activate_class(cl);
				908
				909	cl = cl_prev;
				910	}
				911
				912	next_class:
				913	cl_prev = cl;
				914	cl = cl->next_alive;
				915	} while (cl_prev != cl_tail);
				916	} while (deficit);
				917
				918	q->active[prio] = cl_prev;
				919
				920	return NULL;
				921	}
				922
				923	static inline struct sk_buff *
				924	cbq_dequeue_1(struct Qdisc *sch)
				925	{
				926	struct cbq_sched_data *q = qdisc_priv(sch);
				927	struct sk_buff *skb;
				928	unsigned int activemask;
				929
				930	activemask = q->activemask & 0xFF;
				931	while (activemask) {
				932	int prio = ffz(~activemask);
				933	activemask &= ~(1<<prio);
				934	skb = cbq_dequeue_prio(sch, prio);
				935	if (skb)
				936	return skb;
				937	}
				938	return NULL;
				939	}
				940
				941	static struct sk_buff *
				942	cbq_dequeue(struct Qdisc *sch)
				943	{
				944	struct sk_buff *skb;
				945	struct cbq_sched_data *q = qdisc_priv(sch);
				946	psched_time_t now;
				947	psched_tdiff_t incr;
				948
				949	now = psched_get_time();
				950	incr = now - q->now_rt;
				951
				952	if (q->tx_class) {
				953	psched_tdiff_t incr2;
				954	/* Time integrator. We calculate EOS time
				955	* by adding expected packet transmission time.
				956	* If real time is greater, we warp artificial clock,
				957	* so that:
				958	*
				959	* cbq_time = max(real_time, work);
				960	*/
				961	incr2 = L2T(&q->link, q->tx_len);
				962	q->now += incr2;
				963	cbq_update(q);
				964	if ((incr -= incr2) < 0)
				965	incr = 0;
				966	q->now += incr;
				967	} else {
				968	if (now > q->now)
				969	q->now = now;
				970	}
				971	q->now_rt = now;
				972
				973	for (;;) {
				974	q->wd_expires = 0;
				975
				976	skb = cbq_dequeue_1(sch);
				977	if (skb) {
				978	qdisc_bstats_update(sch, skb);
				979	sch->q.qlen--;
				980	qdisc_unthrottled(sch);
				981	return skb;
				982	}
				983
				984	/* All the classes are overlimit.
				985	*
				986	* It is possible, if:
				987	*
				988	* 1. Scheduler is empty.
				989	* 2. Toplevel cutoff inhibited borrowing.
				990	* 3. Root class is overlimit.
				991	*
				992	* Reset 2d and 3d conditions and retry.
				993	*
				994	* Note, that NS and cbq-2.0 are buggy, peeking
				995	* an arbitrary class is appropriate for ancestor-only
				996	* sharing, but not for toplevel algorithm.
				997	*
				998	* Our version is better, but slower, because it requires
				999	* two passes, but it is unavoidable with top-level sharing.
				1000	*/
				1001
				1002	if (q->toplevel == TC_CBQ_MAXLEVEL &&
				1003	q->link.undertime == PSCHED_PASTPERFECT)
				1004	break;
				1005
				1006	q->toplevel = TC_CBQ_MAXLEVEL;
				1007	q->link.undertime = PSCHED_PASTPERFECT;
				1008	}
				1009
				1010	/* No packets in scheduler or nobody wants to give them to us :-(
				1011	* Sigh... start watchdog timer in the last case.
				1012	*/
				1013
				1014	if (sch->q.qlen) {
				1015	sch->qstats.overlimits++;
				1016	if (q->wd_expires)
				1017	qdisc_watchdog_schedule(&q->watchdog,
				1018	now + q->wd_expires);
				1019	}
				1020	return NULL;
				1021	}
				1022
				1023	/* CBQ class maintanance routines */
				1024
				1025	static void cbq_adjust_levels(struct cbq_class *this)
				1026	{
				1027	if (this == NULL)
				1028	return;
				1029
				1030	do {
				1031	int level = 0;
				1032	struct cbq_class *cl;
				1033
				1034	cl = this->children;
				1035	if (cl) {
				1036	do {
				1037	if (cl->level > level)
				1038	level = cl->level;
				1039	} while ((cl = cl->sibling) != this->children);
				1040	}
				1041	this->level = level + 1;
				1042	} while ((this = this->tparent) != NULL);
				1043	}
				1044
				1045	static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
				1046	{
				1047	struct cbq_class *cl;
				1048	struct hlist_node *n;
				1049	unsigned int h;
				1050
				1051	if (q->quanta[prio] == 0)
				1052	return;
				1053
				1054	for (h = 0; h < q->clhash.hashsize; h++) {
				1055	hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
				1056	/* BUGGGG... Beware! This expression suffer of
				1057	* arithmetic overflows!
				1058	*/
				1059	if (cl->priority == prio) {
				1060	cl->quantum = (cl->weightcl->allotq->nclasses[prio])/
				1061	q->quanta[prio];
				1062	}
				1063	if (cl->quantum <= 0 \|\| cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
				1064	pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
				1065	cl->common.classid, cl->quantum);
				1066	cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
				1067	}
				1068	}
				1069	}
				1070	}
				1071
				1072	static void cbq_sync_defmap(struct cbq_class *cl)
				1073	{
				1074	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				1075	struct cbq_class *split = cl->split;
				1076	unsigned int h;
				1077	int i;
				1078
				1079	if (split == NULL)
				1080	return;
				1081
				1082	for (i = 0; i <= TC_PRIO_MAX; i++) {
				1083	if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
				1084	split->defaults[i] = NULL;
				1085	}
				1086
				1087	for (i = 0; i <= TC_PRIO_MAX; i++) {
				1088	int level = split->level;
				1089
				1090	if (split->defaults[i])
				1091	continue;
				1092
				1093	for (h = 0; h < q->clhash.hashsize; h++) {
				1094	struct hlist_node *n;
				1095	struct cbq_class *c;
				1096
				1097	hlist_for_each_entry(c, n, &q->clhash.hash[h],
				1098	common.hnode) {
				1099	if (c->split == split && c->level < level &&
				1100	c->defmap & (1<<i)) {
				1101	split->defaults[i] = c;
				1102	level = c->level;
				1103	}
				1104	}
				1105	}
				1106	}
				1107	}
				1108
				1109	static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
				1110	{
				1111	struct cbq_class *split = NULL;
				1112
				1113	if (splitid == 0) {
				1114	split = cl->split;
				1115	if (!split)
				1116	return;
				1117	splitid = split->common.classid;
				1118	}
				1119
				1120	if (split == NULL \|\| split->common.classid != splitid) {
				1121	for (split = cl->tparent; split; split = split->tparent)
				1122	if (split->common.classid == splitid)
				1123	break;
				1124	}
				1125
				1126	if (split == NULL)
				1127	return;
				1128
				1129	if (cl->split != split) {
				1130	cl->defmap = 0;
				1131	cbq_sync_defmap(cl);
				1132	cl->split = split;
				1133	cl->defmap = def & mask;
				1134	} else
				1135	cl->defmap = (cl->defmap & ~mask) \| (def & mask);
				1136
				1137	cbq_sync_defmap(cl);
				1138	}
				1139
				1140	static void cbq_unlink_class(struct cbq_class *this)
				1141	{
				1142	struct cbq_class cl, *clp;
				1143	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
				1144
				1145	qdisc_class_hash_remove(&q->clhash, &this->common);
				1146
				1147	if (this->tparent) {
				1148	clp = &this->sibling;
				1149	cl = *clp;
				1150	do {
				1151	if (cl == this) {
				1152	*clp = cl->sibling;
				1153	break;
				1154	}
				1155	clp = &cl->sibling;
				1156	} while ((cl = *clp) != this->sibling);
				1157
				1158	if (this->tparent->children == this) {
				1159	this->tparent->children = this->sibling;
				1160	if (this->sibling == this)
				1161	this->tparent->children = NULL;
				1162	}
				1163	} else {
				1164	WARN_ON(this->sibling != this);
				1165	}
				1166	}
				1167
				1168	static void cbq_link_class(struct cbq_class *this)
				1169	{
				1170	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
				1171	struct cbq_class *parent = this->tparent;
				1172
				1173	this->sibling = this;
				1174	qdisc_class_hash_insert(&q->clhash, &this->common);
				1175
				1176	if (parent == NULL)
				1177	return;
				1178
				1179	if (parent->children == NULL) {
				1180	parent->children = this;
				1181	} else {
				1182	this->sibling = parent->children->sibling;
				1183	parent->children->sibling = this;
				1184	}
				1185	}
				1186
				1187	static unsigned int cbq_drop(struct Qdisc *sch)
				1188	{
				1189	struct cbq_sched_data *q = qdisc_priv(sch);
				1190	struct cbq_class cl, cl_head;
				1191	int prio;
				1192	unsigned int len;
				1193
				1194	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
				1195	cl_head = q->active[prio];
				1196	if (!cl_head)
				1197	continue;
				1198
				1199	cl = cl_head;
				1200	do {
				1201	if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
				1202	sch->q.qlen--;
				1203	if (!cl->q->q.qlen)
				1204	cbq_deactivate_class(cl);
				1205	return len;
				1206	}
				1207	} while ((cl = cl->next_alive) != cl_head);
				1208	}
				1209	return 0;
				1210	}
				1211
				1212	static void
				1213	cbq_reset(struct Qdisc *sch)
				1214	{
				1215	struct cbq_sched_data *q = qdisc_priv(sch);
				1216	struct cbq_class *cl;
				1217	struct hlist_node *n;
				1218	int prio;
				1219	unsigned int h;
				1220
				1221	q->activemask = 0;
				1222	q->pmask = 0;
				1223	q->tx_class = NULL;
				1224	q->tx_borrowed = NULL;
				1225	qdisc_watchdog_cancel(&q->watchdog);
				1226	hrtimer_cancel(&q->delay_timer);
				1227	q->toplevel = TC_CBQ_MAXLEVEL;
				1228	q->now = psched_get_time();
				1229	q->now_rt = q->now;
				1230
				1231	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
				1232	q->active[prio] = NULL;
				1233
				1234	for (h = 0; h < q->clhash.hashsize; h++) {
				1235	hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
				1236	qdisc_reset(cl->q);
				1237
				1238	cl->next_alive = NULL;
				1239	cl->undertime = PSCHED_PASTPERFECT;
				1240	cl->avgidle = cl->maxidle;
				1241	cl->deficit = cl->quantum;
				1242	cl->cpriority = cl->priority;
				1243	}
				1244	}
				1245	sch->q.qlen = 0;
				1246	}
				1247
				1248
				1249	static int cbq_set_lss(struct cbq_class cl, struct tc_cbq_lssopt lss)
				1250	{
				1251	if (lss->change & TCF_CBQ_LSS_FLAGS) {
				1252	cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
				1253	cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
				1254	}
				1255	if (lss->change & TCF_CBQ_LSS_EWMA)
				1256	cl->ewma_log = lss->ewma_log;
				1257	if (lss->change & TCF_CBQ_LSS_AVPKT)
				1258	cl->avpkt = lss->avpkt;
				1259	if (lss->change & TCF_CBQ_LSS_MINIDLE)
				1260	cl->minidle = -(long)lss->minidle;
				1261	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
				1262	cl->maxidle = lss->maxidle;
				1263	cl->avgidle = lss->maxidle;
				1264	}
				1265	if (lss->change & TCF_CBQ_LSS_OFFTIME)
				1266	cl->offtime = lss->offtime;
				1267	return 0;
				1268	}
				1269
				1270	static void cbq_rmprio(struct cbq_sched_data q, struct cbq_class cl)
				1271	{
				1272	q->nclasses[cl->priority]--;
				1273	q->quanta[cl->priority] -= cl->weight;
				1274	cbq_normalize_quanta(q, cl->priority);
				1275	}
				1276
				1277	static void cbq_addprio(struct cbq_sched_data q, struct cbq_class cl)
				1278	{
				1279	q->nclasses[cl->priority]++;
				1280	q->quanta[cl->priority] += cl->weight;
				1281	cbq_normalize_quanta(q, cl->priority);
				1282	}
				1283
				1284	static int cbq_set_wrr(struct cbq_class cl, struct tc_cbq_wrropt wrr)
				1285	{
				1286	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
				1287
				1288	if (wrr->allot)
				1289	cl->allot = wrr->allot;
				1290	if (wrr->weight)
				1291	cl->weight = wrr->weight;
				1292	if (wrr->priority) {
				1293	cl->priority = wrr->priority - 1;
				1294	cl->cpriority = cl->priority;
				1295	if (cl->priority >= cl->priority2)
				1296	cl->priority2 = TC_CBQ_MAXPRIO - 1;
				1297	}
				1298
				1299	cbq_addprio(q, cl);
				1300	return 0;
				1301	}
				1302
				1303	static int cbq_set_overlimit(struct cbq_class cl, struct tc_cbq_ovl ovl)
				1304	{
				1305	switch (ovl->strategy) {
				1306	case TC_CBQ_OVL_CLASSIC:
				1307	cl->overlimit = cbq_ovl_classic;
				1308	break;
				1309	case TC_CBQ_OVL_DELAY:
				1310	cl->overlimit = cbq_ovl_delay;
				1311	break;
				1312	case TC_CBQ_OVL_LOWPRIO:
				1313	if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO \|\|
				1314	ovl->priority2 - 1 <= cl->priority)
				1315	return -EINVAL;
				1316	cl->priority2 = ovl->priority2 - 1;
				1317	cl->overlimit = cbq_ovl_lowprio;
				1318	break;
				1319	case TC_CBQ_OVL_DROP:
				1320	cl->overlimit = cbq_ovl_drop;
				1321	break;
				1322	case TC_CBQ_OVL_RCLASSIC:
				1323	cl->overlimit = cbq_ovl_rclassic;
				1324	break;
				1325	default:
				1326	return -EINVAL;
				1327	}
				1328	cl->penalty = ovl->penalty;
				1329	return 0;
				1330	}
				1331
				1332	#ifdef CONFIG_NET_CLS_ACT
				1333	static int cbq_set_police(struct cbq_class cl, struct tc_cbq_police p)
				1334	{
				1335	cl->police = p->police;
				1336
				1337	if (cl->q->handle) {
				1338	if (p->police == TC_POLICE_RECLASSIFY)
				1339	cl->q->reshape_fail = cbq_reshape_fail;
				1340	else
				1341	cl->q->reshape_fail = NULL;
				1342	}
				1343	return 0;
				1344	}
				1345	#endif
				1346
				1347	static int cbq_set_fopt(struct cbq_class cl, struct tc_cbq_fopt fopt)
				1348	{
				1349	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
				1350	return 0;
				1351	}
				1352
				1353	static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
				1354	[TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
				1355	[TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
				1356	[TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
				1357	[TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
				1358	[TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
				1359	[TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
				1360	[TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
				1361	};
				1362
				1363	static int cbq_init(struct Qdisc sch, struct nlattr opt)
				1364	{
				1365	struct cbq_sched_data *q = qdisc_priv(sch);
				1366	struct nlattr *tb[TCA_CBQ_MAX + 1];
				1367	struct tc_ratespec *r;
				1368	int err;
				1369
				1370	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
				1371	if (err < 0)
				1372	return err;
				1373
				1374	if (tb[TCA_CBQ_RTAB] == NULL \|\| tb[TCA_CBQ_RATE] == NULL)
				1375	return -EINVAL;
				1376
				1377	r = nla_data(tb[TCA_CBQ_RATE]);
				1378
				1379	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
				1380	return -EINVAL;
				1381
				1382	err = qdisc_class_hash_init(&q->clhash);
				1383	if (err < 0)
				1384	goto put_rtab;
				1385
				1386	q->link.refcnt = 1;
				1387	q->link.sibling = &q->link;
				1388	q->link.common.classid = sch->handle;
				1389	q->link.qdisc = sch;
				1390	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
				1391	sch->handle);
				1392	if (!q->link.q)
				1393	q->link.q = &noop_qdisc;
				1394
				1395	q->link.priority = TC_CBQ_MAXPRIO - 1;
				1396	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
				1397	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
				1398	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
				1399	q->link.overlimit = cbq_ovl_classic;
				1400	q->link.allot = psched_mtu(qdisc_dev(sch));
				1401	q->link.quantum = q->link.allot;
				1402	q->link.weight = q->link.R_tab->rate.rate;
				1403
				1404	q->link.ewma_log = TC_CBQ_DEF_EWMA;
				1405	q->link.avpkt = q->link.allot/2;
				1406	q->link.minidle = -0x7FFFFFFF;
				1407
				1408	qdisc_watchdog_init(&q->watchdog, sch);
				1409	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
				1410	q->delay_timer.function = cbq_undelay;
				1411	q->toplevel = TC_CBQ_MAXLEVEL;
				1412	q->now = psched_get_time();
				1413	q->now_rt = q->now;
				1414
				1415	cbq_link_class(&q->link);
				1416
				1417	if (tb[TCA_CBQ_LSSOPT])
				1418	cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
				1419
				1420	cbq_addprio(q, &q->link);
				1421	return 0;
				1422
				1423	put_rtab:
				1424	qdisc_put_rtab(q->link.R_tab);
				1425	return err;
				1426	}
				1427
				1428	static int cbq_dump_rate(struct sk_buff skb, struct cbq_class cl)
				1429	{
				1430	unsigned char *b = skb_tail_pointer(skb);
				1431
				1432	NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
				1433	return skb->len;
				1434
				1435	nla_put_failure:
				1436	nlmsg_trim(skb, b);
				1437	return -1;
				1438	}
				1439
				1440	static int cbq_dump_lss(struct sk_buff skb, struct cbq_class cl)
				1441	{
				1442	unsigned char *b = skb_tail_pointer(skb);
				1443	struct tc_cbq_lssopt opt;
				1444
				1445	opt.flags = 0;
				1446	if (cl->borrow == NULL)
				1447	opt.flags \|= TCF_CBQ_LSS_BOUNDED;
				1448	if (cl->share == NULL)
				1449	opt.flags \|= TCF_CBQ_LSS_ISOLATED;
				1450	opt.ewma_log = cl->ewma_log;
				1451	opt.level = cl->level;
				1452	opt.avpkt = cl->avpkt;
				1453	opt.maxidle = cl->maxidle;
				1454	opt.minidle = (u32)(-cl->minidle);
				1455	opt.offtime = cl->offtime;
				1456	opt.change = ~0;
				1457	NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
				1458	return skb->len;
				1459
				1460	nla_put_failure:
				1461	nlmsg_trim(skb, b);
				1462	return -1;
				1463	}
				1464
				1465	static int cbq_dump_wrr(struct sk_buff skb, struct cbq_class cl)
				1466	{
				1467	unsigned char *b = skb_tail_pointer(skb);
				1468	struct tc_cbq_wrropt opt;
				1469
				1470	memset(&opt, 0, sizeof(opt));
				1471	opt.flags = 0;
				1472	opt.allot = cl->allot;
				1473	opt.priority = cl->priority + 1;
				1474	opt.cpriority = cl->cpriority + 1;
				1475	opt.weight = cl->weight;
				1476	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
				1477	return skb->len;
				1478
				1479	nla_put_failure:
				1480	nlmsg_trim(skb, b);
				1481	return -1;
				1482	}
				1483
				1484	static int cbq_dump_ovl(struct sk_buff skb, struct cbq_class cl)
				1485	{
				1486	unsigned char *b = skb_tail_pointer(skb);
				1487	struct tc_cbq_ovl opt;
				1488
				1489	opt.strategy = cl->ovl_strategy;
				1490	opt.priority2 = cl->priority2 + 1;
				1491	opt.pad = 0;
				1492	opt.penalty = cl->penalty;
				1493	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
				1494	return skb->len;
				1495
				1496	nla_put_failure:
				1497	nlmsg_trim(skb, b);
				1498	return -1;
				1499	}
				1500
				1501	static int cbq_dump_fopt(struct sk_buff skb, struct cbq_class cl)
				1502	{
				1503	unsigned char *b = skb_tail_pointer(skb);
				1504	struct tc_cbq_fopt opt;
				1505
				1506	if (cl->split \|\| cl->defmap) {
				1507	opt.split = cl->split ? cl->split->common.classid : 0;
				1508	opt.defmap = cl->defmap;
				1509	opt.defchange = ~0;
				1510	NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
				1511	}
				1512	return skb->len;
				1513
				1514	nla_put_failure:
				1515	nlmsg_trim(skb, b);
				1516	return -1;
				1517	}
				1518
				1519	#ifdef CONFIG_NET_CLS_ACT
				1520	static int cbq_dump_police(struct sk_buff skb, struct cbq_class cl)
				1521	{
				1522	unsigned char *b = skb_tail_pointer(skb);
				1523	struct tc_cbq_police opt;
				1524
				1525	if (cl->police) {
				1526	opt.police = cl->police;
				1527	opt.__res1 = 0;
				1528	opt.__res2 = 0;
				1529	NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
				1530	}
				1531	return skb->len;
				1532
				1533	nla_put_failure:
				1534	nlmsg_trim(skb, b);
				1535	return -1;
				1536	}
				1537	#endif
				1538
				1539	static int cbq_dump_attr(struct sk_buff skb, struct cbq_class cl)
				1540	{
				1541	if (cbq_dump_lss(skb, cl) < 0 \|\|
				1542	cbq_dump_rate(skb, cl) < 0 \|\|
				1543	cbq_dump_wrr(skb, cl) < 0 \|\|
				1544	cbq_dump_ovl(skb, cl) < 0 \|\|
				1545	#ifdef CONFIG_NET_CLS_ACT
				1546	cbq_dump_police(skb, cl) < 0 \|\|
				1547	#endif
				1548	cbq_dump_fopt(skb, cl) < 0)
				1549	return -1;
				1550	return 0;
				1551	}
				1552
				1553	static int cbq_dump(struct Qdisc sch, struct sk_buff skb)
				1554	{
				1555	struct cbq_sched_data *q = qdisc_priv(sch);
				1556	struct nlattr *nest;
				1557
				1558	nest = nla_nest_start(skb, TCA_OPTIONS);
				1559	if (nest == NULL)
				1560	goto nla_put_failure;
				1561	if (cbq_dump_attr(skb, &q->link) < 0)
				1562	goto nla_put_failure;
				1563	nla_nest_end(skb, nest);
				1564	return skb->len;
				1565
				1566	nla_put_failure:
				1567	nla_nest_cancel(skb, nest);
				1568	return -1;
				1569	}
				1570
				1571	static int
				1572	cbq_dump_stats(struct Qdisc sch, struct gnet_dump d)
				1573	{
				1574	struct cbq_sched_data *q = qdisc_priv(sch);
				1575
				1576	q->link.xstats.avgidle = q->link.avgidle;
				1577	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
				1578	}
				1579
				1580	static int
				1581	cbq_dump_class(struct Qdisc *sch, unsigned long arg,
				1582	struct sk_buff skb, struct tcmsg tcm)
				1583	{
				1584	struct cbq_class cl = (struct cbq_class )arg;
				1585	struct nlattr *nest;
				1586
				1587	if (cl->tparent)
				1588	tcm->tcm_parent = cl->tparent->common.classid;
				1589	else
				1590	tcm->tcm_parent = TC_H_ROOT;
				1591	tcm->tcm_handle = cl->common.classid;
				1592	tcm->tcm_info = cl->q->handle;
				1593
				1594	nest = nla_nest_start(skb, TCA_OPTIONS);
				1595	if (nest == NULL)
				1596	goto nla_put_failure;
				1597	if (cbq_dump_attr(skb, cl) < 0)
				1598	goto nla_put_failure;
				1599	nla_nest_end(skb, nest);
				1600	return skb->len;
				1601
				1602	nla_put_failure:
				1603	nla_nest_cancel(skb, nest);
				1604	return -1;
				1605	}
				1606
				1607	static int
				1608	cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
				1609	struct gnet_dump *d)
				1610	{
				1611	struct cbq_sched_data *q = qdisc_priv(sch);
				1612	struct cbq_class cl = (struct cbq_class )arg;
				1613
				1614	cl->qstats.qlen = cl->q->q.qlen;
				1615	cl->xstats.avgidle = cl->avgidle;
				1616	cl->xstats.undertime = 0;
				1617
				1618	if (cl->undertime != PSCHED_PASTPERFECT)
				1619	cl->xstats.undertime = cl->undertime - q->now;
				1620
				1621	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 \|\|
				1622	gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 \|\|
				1623	gnet_stats_copy_queue(d, &cl->qstats) < 0)
				1624	return -1;
				1625
				1626	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
				1627	}
				1628
				1629	static int cbq_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
				1630	struct Qdisc **old)
				1631	{
				1632	struct cbq_class cl = (struct cbq_class )arg;
				1633
				1634	if (new == NULL) {
				1635	new = qdisc_create_dflt(sch->dev_queue,
				1636	&pfifo_qdisc_ops, cl->common.classid);
				1637	if (new == NULL)
				1638	return -ENOBUFS;
				1639	} else {
				1640	#ifdef CONFIG_NET_CLS_ACT
				1641	if (cl->police == TC_POLICE_RECLASSIFY)
				1642	new->reshape_fail = cbq_reshape_fail;
				1643	#endif
				1644	}
				1645	sch_tree_lock(sch);
				1646	*old = cl->q;
				1647	cl->q = new;
				1648	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
				1649	qdisc_reset(*old);
				1650	sch_tree_unlock(sch);
				1651
				1652	return 0;
				1653	}
				1654
				1655	static struct Qdisc cbq_leaf(struct Qdisc sch, unsigned long arg)
				1656	{
				1657	struct cbq_class cl = (struct cbq_class )arg;
				1658
				1659	return cl->q;
				1660	}
				1661
				1662	static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
				1663	{
				1664	struct cbq_class cl = (struct cbq_class )arg;
				1665
				1666	if (cl->q->q.qlen == 0)
				1667	cbq_deactivate_class(cl);
				1668	}
				1669
				1670	static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
				1671	{
				1672	struct cbq_sched_data *q = qdisc_priv(sch);
				1673	struct cbq_class *cl = cbq_class_lookup(q, classid);
				1674
				1675	if (cl) {
				1676	cl->refcnt++;
				1677	return (unsigned long)cl;
				1678	}
				1679	return 0;
				1680	}
				1681
				1682	static void cbq_destroy_class(struct Qdisc sch, struct cbq_class cl)
				1683	{
				1684	struct cbq_sched_data *q = qdisc_priv(sch);
				1685
				1686	WARN_ON(cl->filters);
				1687
				1688	tcf_destroy_chain(&cl->filter_list);
				1689	qdisc_destroy(cl->q);
				1690	qdisc_put_rtab(cl->R_tab);
				1691	gen_kill_estimator(&cl->bstats, &cl->rate_est);
				1692	if (cl != &q->link)
				1693	kfree(cl);
				1694	}
				1695
				1696	static void cbq_destroy(struct Qdisc *sch)
				1697	{
				1698	struct cbq_sched_data *q = qdisc_priv(sch);
				1699	struct hlist_node n, next;
				1700	struct cbq_class *cl;
				1701	unsigned int h;
				1702
				1703	#ifdef CONFIG_NET_CLS_ACT
				1704	q->rx_class = NULL;
				1705	#endif
				1706	/*
				1707	* Filters must be destroyed first because we don't destroy the
				1708	* classes from root to leafs which means that filters can still
				1709	* be bound to classes which have been destroyed already. --TGR '04
				1710	*/
				1711	for (h = 0; h < q->clhash.hashsize; h++) {
				1712	hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
				1713	tcf_destroy_chain(&cl->filter_list);
				1714	}
				1715	for (h = 0; h < q->clhash.hashsize; h++) {
				1716	hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
				1717	common.hnode)
				1718	cbq_destroy_class(sch, cl);
				1719	}
				1720	qdisc_class_hash_destroy(&q->clhash);
				1721	}
				1722
				1723	static void cbq_put(struct Qdisc *sch, unsigned long arg)
				1724	{
				1725	struct cbq_class cl = (struct cbq_class )arg;
				1726
				1727	if (--cl->refcnt == 0) {
				1728	#ifdef CONFIG_NET_CLS_ACT
				1729	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
				1730	struct cbq_sched_data *q = qdisc_priv(sch);
				1731
				1732	spin_lock_bh(root_lock);
				1733	if (q->rx_class == cl)
				1734	q->rx_class = NULL;
				1735	spin_unlock_bh(root_lock);
				1736	#endif
				1737
				1738	cbq_destroy_class(sch, cl);
				1739	}
				1740	}
				1741
				1742	static int
				1743	cbq_change_class(struct Qdisc sch, u32 classid, u32 parentid, struct nlattr *tca,
				1744	unsigned long *arg)
				1745	{
				1746	int err;
				1747	struct cbq_sched_data *q = qdisc_priv(sch);
				1748	struct cbq_class cl = (struct cbq_class )*arg;
				1749	struct nlattr *opt = tca[TCA_OPTIONS];
				1750	struct nlattr *tb[TCA_CBQ_MAX + 1];
				1751	struct cbq_class *parent;
				1752	struct qdisc_rate_table *rtab = NULL;
				1753
				1754	if (opt == NULL)
				1755	return -EINVAL;
				1756
				1757	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
				1758	if (err < 0)
				1759	return err;
				1760
				1761	if (cl) {
				1762	/* Check parent */
				1763	if (parentid) {
				1764	if (cl->tparent &&
				1765	cl->tparent->common.classid != parentid)
				1766	return -EINVAL;
				1767	if (!cl->tparent && parentid != TC_H_ROOT)
				1768	return -EINVAL;
				1769	}
				1770
				1771	if (tb[TCA_CBQ_RATE]) {
				1772	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
				1773	tb[TCA_CBQ_RTAB]);
				1774	if (rtab == NULL)
				1775	return -EINVAL;
				1776	}
				1777
				1778	if (tca[TCA_RATE]) {
				1779	err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
				1780	qdisc_root_sleeping_lock(sch),
				1781	tca[TCA_RATE]);
				1782	if (err) {
				1783	if (rtab)
				1784	qdisc_put_rtab(rtab);
				1785	return err;
				1786	}
				1787	}
				1788
				1789	/* Change class parameters */
				1790	sch_tree_lock(sch);
				1791
				1792	if (cl->next_alive != NULL)
				1793	cbq_deactivate_class(cl);
				1794
				1795	if (rtab) {
				1796	qdisc_put_rtab(cl->R_tab);
				1797	cl->R_tab = rtab;
				1798	}
				1799
				1800	if (tb[TCA_CBQ_LSSOPT])
				1801	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
				1802
				1803	if (tb[TCA_CBQ_WRROPT]) {
				1804	cbq_rmprio(q, cl);
				1805	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
				1806	}
				1807
				1808	if (tb[TCA_CBQ_OVL_STRATEGY])
				1809	cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
				1810
				1811	#ifdef CONFIG_NET_CLS_ACT
				1812	if (tb[TCA_CBQ_POLICE])
				1813	cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
				1814	#endif
				1815
				1816	if (tb[TCA_CBQ_FOPT])
				1817	cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
				1818
				1819	if (cl->q->q.qlen)
				1820	cbq_activate_class(cl);
				1821
				1822	sch_tree_unlock(sch);
				1823
				1824	return 0;
				1825	}
				1826
				1827	if (parentid == TC_H_ROOT)
				1828	return -EINVAL;
				1829
				1830	if (tb[TCA_CBQ_WRROPT] == NULL \|\| tb[TCA_CBQ_RATE] == NULL \|\|
				1831	tb[TCA_CBQ_LSSOPT] == NULL)
				1832	return -EINVAL;
				1833
				1834	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
				1835	if (rtab == NULL)
				1836	return -EINVAL;
				1837
				1838	if (classid) {
				1839	err = -EINVAL;
				1840	if (TC_H_MAJ(classid ^ sch->handle) \|\|
				1841	cbq_class_lookup(q, classid))
				1842	goto failure;
				1843	} else {
				1844	int i;
				1845	classid = TC_H_MAKE(sch->handle, 0x8000);
				1846
				1847	for (i = 0; i < 0x8000; i++) {
				1848	if (++q->hgenerator >= 0x8000)
				1849	q->hgenerator = 1;
				1850	if (cbq_class_lookup(q, classid\|q->hgenerator) == NULL)
				1851	break;
				1852	}
				1853	err = -ENOSR;
				1854	if (i >= 0x8000)
				1855	goto failure;
				1856	classid = classid\|q->hgenerator;
				1857	}
				1858
				1859	parent = &q->link;
				1860	if (parentid) {
				1861	parent = cbq_class_lookup(q, parentid);
				1862	err = -EINVAL;
				1863	if (parent == NULL)
				1864	goto failure;
				1865	}
				1866
				1867	err = -ENOBUFS;
				1868	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
				1869	if (cl == NULL)
				1870	goto failure;
				1871
				1872	if (tca[TCA_RATE]) {
				1873	err = gen_new_estimator(&cl->bstats, &cl->rate_est,
				1874	qdisc_root_sleeping_lock(sch),
				1875	tca[TCA_RATE]);
				1876	if (err) {
				1877	kfree(cl);
				1878	goto failure;
				1879	}
				1880	}
				1881
				1882	cl->R_tab = rtab;
				1883	rtab = NULL;
				1884	cl->refcnt = 1;
				1885	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
				1886	if (!cl->q)
				1887	cl->q = &noop_qdisc;
				1888	cl->common.classid = classid;
				1889	cl->tparent = parent;
				1890	cl->qdisc = sch;
				1891	cl->allot = parent->allot;
				1892	cl->quantum = cl->allot;
				1893	cl->weight = cl->R_tab->rate.rate;
				1894
				1895	sch_tree_lock(sch);
				1896	cbq_link_class(cl);
				1897	cl->borrow = cl->tparent;
				1898	if (cl->tparent != &q->link)
				1899	cl->share = cl->tparent;
				1900	cbq_adjust_levels(parent);
				1901	cl->minidle = -0x7FFFFFFF;
				1902	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
				1903	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
				1904	if (cl->ewma_log == 0)
				1905	cl->ewma_log = q->link.ewma_log;
				1906	if (cl->maxidle == 0)
				1907	cl->maxidle = q->link.maxidle;
				1908	if (cl->avpkt == 0)
				1909	cl->avpkt = q->link.avpkt;
				1910	cl->overlimit = cbq_ovl_classic;
				1911	if (tb[TCA_CBQ_OVL_STRATEGY])
				1912	cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
				1913	#ifdef CONFIG_NET_CLS_ACT
				1914	if (tb[TCA_CBQ_POLICE])
				1915	cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
				1916	#endif
				1917	if (tb[TCA_CBQ_FOPT])
				1918	cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
				1919	sch_tree_unlock(sch);
				1920
				1921	qdisc_class_hash_grow(sch, &q->clhash);
				1922
				1923	*arg = (unsigned long)cl;
				1924	return 0;
				1925
				1926	failure:
				1927	qdisc_put_rtab(rtab);
				1928	return err;
				1929	}
				1930
				1931	static int cbq_delete(struct Qdisc *sch, unsigned long arg)
				1932	{
				1933	struct cbq_sched_data *q = qdisc_priv(sch);
				1934	struct cbq_class cl = (struct cbq_class )arg;
				1935	unsigned int qlen;
				1936
				1937	if (cl->filters \|\| cl->children \|\| cl == &q->link)
				1938	return -EBUSY;
				1939
				1940	sch_tree_lock(sch);
				1941
				1942	qlen = cl->q->q.qlen;
				1943	qdisc_reset(cl->q);
				1944	qdisc_tree_decrease_qlen(cl->q, qlen);
				1945
				1946	if (cl->next_alive)
				1947	cbq_deactivate_class(cl);
				1948
				1949	if (q->tx_borrowed == cl)
				1950	q->tx_borrowed = q->tx_class;
				1951	if (q->tx_class == cl) {
				1952	q->tx_class = NULL;
				1953	q->tx_borrowed = NULL;
				1954	}
				1955	#ifdef CONFIG_NET_CLS_ACT
				1956	if (q->rx_class == cl)
				1957	q->rx_class = NULL;
				1958	#endif
				1959
				1960	cbq_unlink_class(cl);
				1961	cbq_adjust_levels(cl->tparent);
				1962	cl->defmap = 0;
				1963	cbq_sync_defmap(cl);
				1964
				1965	cbq_rmprio(q, cl);
				1966	sch_tree_unlock(sch);
				1967
				1968	BUG_ON(--cl->refcnt == 0);
				1969	/*
				1970	* This shouldn't happen: we "hold" one cops->get() when called
				1971	* from tc_ctl_tclass; the destroy method is done from cops->put().
				1972	*/
				1973
				1974	return 0;
				1975	}
				1976
				1977	static struct tcf_proto *cbq_find_tcf(struct Qdisc sch, unsigned long arg)
				1978	{
				1979	struct cbq_sched_data *q = qdisc_priv(sch);
				1980	struct cbq_class cl = (struct cbq_class )arg;
				1981
				1982	if (cl == NULL)
				1983	cl = &q->link;
				1984
				1985	return &cl->filter_list;
				1986	}
				1987
				1988	static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
				1989	u32 classid)
				1990	{
				1991	struct cbq_sched_data *q = qdisc_priv(sch);
				1992	struct cbq_class p = (struct cbq_class )parent;
				1993	struct cbq_class *cl = cbq_class_lookup(q, classid);
				1994
				1995	if (cl) {
				1996	if (p && p->level <= cl->level)
				1997	return 0;
				1998	cl->filters++;
				1999	return (unsigned long)cl;
				2000	}
				2001	return 0;
				2002	}
				2003
				2004	static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
				2005	{
				2006	struct cbq_class cl = (struct cbq_class )arg;
				2007
				2008	cl->filters--;
				2009	}
				2010
				2011	static void cbq_walk(struct Qdisc sch, struct qdisc_walker arg)
				2012	{
				2013	struct cbq_sched_data *q = qdisc_priv(sch);
				2014	struct cbq_class *cl;
				2015	struct hlist_node *n;
				2016	unsigned int h;
				2017
				2018	if (arg->stop)
				2019	return;
				2020
				2021	for (h = 0; h < q->clhash.hashsize; h++) {
				2022	hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
				2023	if (arg->count < arg->skip) {
				2024	arg->count++;
				2025	continue;
				2026	}
				2027	if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
				2028	arg->stop = 1;
				2029	return;
				2030	}
				2031	arg->count++;
				2032	}
				2033	}
				2034	}
				2035
				2036	static const struct Qdisc_class_ops cbq_class_ops = {
				2037	.graft = cbq_graft,
				2038	.leaf = cbq_leaf,
				2039	.qlen_notify = cbq_qlen_notify,
				2040	.get = cbq_get,
				2041	.put = cbq_put,
				2042	.change = cbq_change_class,
				2043	.delete = cbq_delete,
				2044	.walk = cbq_walk,
				2045	.tcf_chain = cbq_find_tcf,
				2046	.bind_tcf = cbq_bind_filter,
				2047	.unbind_tcf = cbq_unbind_filter,
				2048	.dump = cbq_dump_class,
				2049	.dump_stats = cbq_dump_class_stats,
				2050	};
				2051
				2052	static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
				2053	.next = NULL,
				2054	.cl_ops = &cbq_class_ops,
				2055	.id = "cbq",
				2056	.priv_size = sizeof(struct cbq_sched_data),
				2057	.enqueue = cbq_enqueue,
				2058	.dequeue = cbq_dequeue,
				2059	.peek = qdisc_peek_dequeued,
				2060	.drop = cbq_drop,
				2061	.init = cbq_init,
				2062	.reset = cbq_reset,
				2063	.destroy = cbq_destroy,
				2064	.change = NULL,
				2065	.dump = cbq_dump,
				2066	.dump_stats = cbq_dump_stats,
				2067	.owner = THIS_MODULE,
				2068	};
				2069
				2070	static int __init cbq_module_init(void)
				2071	{
				2072	return register_qdisc(&cbq_qdisc_ops);
				2073	}
				2074	static void __exit cbq_module_exit(void)
				2075	{
				2076	unregister_qdisc(&cbq_qdisc_ops);
				2077	}
				2078	module_init(cbq_module_init)
				2079	module_exit(cbq_module_exit)
				2080	MODULE_LICENSE("GPL");