Blame - ap/os/linux/linux-3.4.x/net/sched/sch_generic.c - R306

blob: ad0ca207ed002c0e9468922ff0a11120ff0a336f [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* net/sched/sch_generic.c Generic packet scheduler routines.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation; either version
				7	* 2 of the License, or (at your option) any later version.
				8	*
				9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
				10	* Jamal Hadi Salim, <hadi@cyberus.ca> 990601
				11	* - Ingress support
				12	*/
				13
				14	#include <linux/bitops.h>
				15	#include <linux/module.h>
				16	#include <linux/types.h>
				17	#include <linux/kernel.h>
				18	#include <linux/sched.h>
				19	#include <linux/string.h>
				20	#include <linux/errno.h>
				21	#include <linux/netdevice.h>
				22	#include <linux/skbuff.h>
				23	#include <linux/rtnetlink.h>
				24	#include <linux/init.h>
				25	#include <linux/rcupdate.h>
				26	#include <linux/list.h>
				27	#include <linux/slab.h>
				28	#include <net/pkt_sched.h>
				29	#include <net/dst.h>
				30
				31	/* Main transmission queue. */
				32
				33	/* Modifications to data participating in scheduling must be protected with
				34	* qdisc_lock(qdisc) spinlock.
				35	*
				36	* The idea is the following:
				37	* - enqueue, dequeue are serialized via qdisc root lock
				38	* - ingress filtering is also serialized via qdisc root lock
				39	* - updates to tree and tree walking are only done under the rtnl mutex.
				40	*/
				41
				42	static inline int dev_requeue_skb(struct sk_buff skb, struct Qdisc q)
				43	{
				44	skb_dst_force(skb);
				45	q->gso_skb = skb;
				46	q->qstats.requeues++;
				47	q->q.qlen++; /* it's still part of the queue */
				48	__netif_schedule(q);
				49
				50	return 0;
				51	}
				52
				53	static inline struct sk_buff dequeue_skb(struct Qdisc q)
				54	{
				55	struct sk_buff *skb = q->gso_skb;
				56
				57	if (unlikely(skb)) {
				58	struct net_device *dev = qdisc_dev(q);
				59	struct netdev_queue *txq;
				60
				61	/* check the reason of requeuing without tx lock first */
				62	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
				63	if (!netif_xmit_frozen_or_stopped(txq)) {
				64	q->gso_skb = NULL;
				65	q->q.qlen--;
				66	} else
				67	skb = NULL;
				68	} else {
				69	skb = q->dequeue(q);
				70	}
				71
				72	return skb;
				73	}
				74
				75	static inline int handle_dev_cpu_collision(struct sk_buff *skb,
				76	struct netdev_queue *dev_queue,
				77	struct Qdisc *q)
				78	{
				79	int ret;
				80
				81	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
				82	/*
				83	* Same CPU holding the lock. It may be a transient
				84	* configuration error, when hard_start_xmit() recurses. We
				85	* detect it by checking xmit owner and drop the packet when
				86	* deadloop is detected. Return OK to try the next skb.
				87	*/
				88	kfree_skb(skb);
				89	if (net_ratelimit())
				90	pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
				91	dev_queue->dev->name);
				92	ret = qdisc_qlen(q);
				93	} else {
				94	/*
				95	* Another cpu is holding lock, requeue & delay xmits for
				96	* some time.
				97	*/
				98	__this_cpu_inc(softnet_data.cpu_collision);
				99	ret = dev_requeue_skb(skb, q);
				100	}
				101
				102	return ret;
				103	}
				104
				105	/*
				106	* Transmit one skb, and handle the return status as required. Holding the
				107	* __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
				108	* function.
				109	*
				110	* Returns to the caller:
				111	* 0 - queue is empty or throttled.
				112	* >0 - queue is not empty.
				113	*/
				114	int sch_direct_xmit(struct sk_buff skb, struct Qdisc q,
				115	struct net_device dev, struct netdev_queue txq,
				116	spinlock_t *root_lock)
				117	{
				118	int ret = NETDEV_TX_BUSY;
				119
				120	/* And release qdisc */
				121	spin_unlock(root_lock);
				122
				123	HARD_TX_LOCK(dev, txq, smp_processor_id());
				124	if (!netif_xmit_frozen_or_stopped(txq))
				125	ret = dev_hard_start_xmit(skb, dev, txq);
				126
				127	HARD_TX_UNLOCK(dev, txq);
				128
				129	spin_lock(root_lock);
				130
				131	if (dev_xmit_complete(ret)) {
				132	/* Driver sent out skb successfully or skb was consumed */
				133	ret = qdisc_qlen(q);
				134	} else if (ret == NETDEV_TX_LOCKED) {
				135	/* Driver try lock failed */
				136	ret = handle_dev_cpu_collision(skb, txq, q);
				137	} else {
				138	/* Driver returned NETDEV_TX_BUSY - requeue skb */
				139	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
				140	pr_warning("BUG %s code %d qlen %d\n",
				141	dev->name, ret, q->q.qlen);
				142
				143	ret = dev_requeue_skb(skb, q);
				144	}
				145
				146	if (ret && netif_xmit_frozen_or_stopped(txq))
				147	ret = 0;
				148
				149	return ret;
				150	}
				151
				152	/*
				153	* NOTE: Called under qdisc_lock(q) with locally disabled BH.
				154	*
				155	* __QDISC_STATE_RUNNING guarantees only one CPU can process
				156	* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
				157	* this queue.
				158	*
				159	* netif_tx_lock serializes accesses to device driver.
				160	*
				161	* qdisc_lock(q) and netif_tx_lock are mutually exclusive,
				162	* if one is grabbed, another must be free.
				163	*
				164	* Note, that this procedure can be called by a watchdog timer
				165	*
				166	* Returns to the caller:
				167	* 0 - queue is empty or throttled.
				168	* >0 - queue is not empty.
				169	*
				170	*/
				171	static inline int qdisc_restart(struct Qdisc *q)
				172	{
				173	struct netdev_queue *txq;
				174	struct net_device *dev;
				175	spinlock_t *root_lock;
				176	struct sk_buff *skb;
				177
				178	/* Dequeue packet */
				179	skb = dequeue_skb(q);
				180	if (unlikely(!skb))
				181	return 0;
				182	WARN_ON_ONCE(skb_dst_is_noref(skb));
				183	root_lock = qdisc_lock(q);
				184	dev = qdisc_dev(q);
				185	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
				186
				187	return sch_direct_xmit(skb, q, dev, txq, root_lock);
				188	}
				189
				190	void __qdisc_run(struct Qdisc *q)
				191	{
				192	int quota = weight_p;
				193
				194	while (qdisc_restart(q)) {
				195	/*
				196	* Ordered by possible occurrence: Postpone processing if
				197	* 1. we've exceeded packet quota
				198	* 2. another process needs the CPU;
				199	*/
				200	if (--quota <= 0 \|\| need_resched()) {
				201	__netif_schedule(q);
				202	break;
				203	}
				204	}
				205
				206	qdisc_run_end(q);
				207	}
				208
				209	unsigned long dev_trans_start(struct net_device *dev)
				210	{
				211	unsigned long val, res = dev->trans_start;
				212	unsigned int i;
				213
				214	for (i = 0; i < dev->num_tx_queues; i++) {
				215	val = netdev_get_tx_queue(dev, i)->trans_start;
				216	if (val && time_after(val, res))
				217	res = val;
				218	}
				219	dev->trans_start = res;
				220	return res;
				221	}
				222	EXPORT_SYMBOL(dev_trans_start);
				223
				224	static void dev_watchdog(unsigned long arg)
				225	{
				226	struct net_device dev = (struct net_device )arg;
				227
				228	netif_tx_lock(dev);
				229	if (!qdisc_tx_is_noop(dev)) {
				230	if (netif_device_present(dev) &&
				231	netif_running(dev) &&
				232	netif_carrier_ok(dev)) {
				233	int some_queue_timedout = 0;
				234	unsigned int i;
				235	unsigned long trans_start;
				236
				237	for (i = 0; i < dev->num_tx_queues; i++) {
				238	struct netdev_queue *txq;
				239
				240	txq = netdev_get_tx_queue(dev, i);
				241	/*
				242	* old device drivers set dev->trans_start
				243	*/
				244	trans_start = txq->trans_start ? : dev->trans_start;
				245	if (netif_xmit_stopped(txq) &&
				246	time_after(jiffies, (trans_start +
				247	dev->watchdog_timeo))) {
				248	some_queue_timedout = 1;
				249	txq->trans_timeout++;
				250	break;
				251	}
				252	}
				253
				254	if (some_queue_timedout) {
				255	WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
				256	dev->name, netdev_drivername(dev), i);
				257	dev->netdev_ops->ndo_tx_timeout(dev);
				258	}
				259	if (!mod_timer(&dev->watchdog_timer,
				260	round_jiffies(jiffies +
				261	dev->watchdog_timeo)))
				262	dev_hold(dev);
				263	}
				264	}
				265	netif_tx_unlock(dev);
				266
				267	dev_put(dev);
				268	}
				269
				270	void __netdev_watchdog_up(struct net_device *dev)
				271	{
				272	if (dev->netdev_ops->ndo_tx_timeout) {
				273	if (dev->watchdog_timeo <= 0)
				274	dev->watchdog_timeo = 5*HZ;
				275	if (!mod_timer(&dev->watchdog_timer,
				276	round_jiffies(jiffies + dev->watchdog_timeo)))
				277	dev_hold(dev);
				278	}
				279	}
				280
				281	static void dev_watchdog_up(struct net_device *dev)
				282	{
				283	__netdev_watchdog_up(dev);
				284	}
				285
				286	static void dev_watchdog_down(struct net_device *dev)
				287	{
				288	netif_tx_lock_bh(dev);
				289	if (del_timer(&dev->watchdog_timer))
				290	dev_put(dev);
				291	netif_tx_unlock_bh(dev);
				292	}
				293
				294	/**
				295	* netif_carrier_on - set carrier
				296	* @dev: network device
				297	*
				298	* Device has detected that carrier.
				299	*/
				300	void netif_carrier_on(struct net_device *dev)
				301	{
				302	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
				303	if (dev->reg_state == NETREG_UNINITIALIZED)
				304	return;
				305	linkwatch_fire_event(dev);
				306	if (netif_running(dev))
				307	__netdev_watchdog_up(dev);
				308	}
				309	}
				310	EXPORT_SYMBOL(netif_carrier_on);
				311
				312	/**
				313	* netif_carrier_off - clear carrier
				314	* @dev: network device
				315	*
				316	* Device has detected loss of carrier.
				317	*/
				318	void netif_carrier_off(struct net_device *dev)
				319	{
				320	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
				321	if (dev->reg_state == NETREG_UNINITIALIZED)
				322	return;
				323	linkwatch_fire_event(dev);
				324	}
				325	}
				326	EXPORT_SYMBOL(netif_carrier_off);
				327
				328	/**
				329	* netif_notify_peers - notify network peers about existence of @dev
				330	* @dev: network device
				331	*
				332	* Generate traffic such that interested network peers are aware of
				333	* @dev, such as by generating a gratuitous ARP. This may be used when
				334	* a device wants to inform the rest of the network about some sort of
				335	* reconfiguration such as a failover event or virtual machine
				336	* migration.
				337	*/
				338	void netif_notify_peers(struct net_device *dev)
				339	{
				340	rtnl_lock();
				341	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
				342	rtnl_unlock();
				343	}
				344	EXPORT_SYMBOL(netif_notify_peers);
				345
				346	/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
				347	under all circumstances. It is difficult to invent anything faster or
				348	cheaper.
				349	*/
				350
				351	static int noop_enqueue(struct sk_buff skb, struct Qdisc qdisc)
				352	{
				353	kfree_skb(skb);
				354	return NET_XMIT_CN;
				355	}
				356
				357	static struct sk_buff noop_dequeue(struct Qdisc qdisc)
				358	{
				359	return NULL;
				360	}
				361
				362	struct Qdisc_ops noop_qdisc_ops __read_mostly = {
				363	.id = "noop",
				364	.priv_size = 0,
				365	.enqueue = noop_enqueue,
				366	.dequeue = noop_dequeue,
				367	.peek = noop_dequeue,
				368	.owner = THIS_MODULE,
				369	};
				370
				371	static struct netdev_queue noop_netdev_queue = {
				372	.qdisc = &noop_qdisc,
				373	.qdisc_sleeping = &noop_qdisc,
				374	};
				375
				376	struct Qdisc noop_qdisc = {
				377	.enqueue = noop_enqueue,
				378	.dequeue = noop_dequeue,
				379	.flags = TCQ_F_BUILTIN,
				380	.ops = &noop_qdisc_ops,
				381	.list = LIST_HEAD_INIT(noop_qdisc.list),
				382	.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
				383	.dev_queue = &noop_netdev_queue,
				384	.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
				385	};
				386	EXPORT_SYMBOL(noop_qdisc);
				387
				388	static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
				389	.id = "noqueue",
				390	.priv_size = 0,
				391	.enqueue = noop_enqueue,
				392	.dequeue = noop_dequeue,
				393	.peek = noop_dequeue,
				394	.owner = THIS_MODULE,
				395	};
				396
				397	static struct Qdisc noqueue_qdisc;
				398	static struct netdev_queue noqueue_netdev_queue = {
				399	.qdisc = &noqueue_qdisc,
				400	.qdisc_sleeping = &noqueue_qdisc,
				401	};
				402
				403	static struct Qdisc noqueue_qdisc = {
				404	.enqueue = NULL,
				405	.dequeue = noop_dequeue,
				406	.flags = TCQ_F_BUILTIN,
				407	.ops = &noqueue_qdisc_ops,
				408	.list = LIST_HEAD_INIT(noqueue_qdisc.list),
				409	.q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
				410	.dev_queue = &noqueue_netdev_queue,
				411	.busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
				412	};
				413
				414
				415	static const u8 prio2band[TC_PRIO_MAX + 1] = {
				416	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
				417	};
				418
				419	/* 3-band FIFO queue: old style, but should be a bit faster than
				420	generic prio+fifo combination.
				421	*/
				422
				423	#define PFIFO_FAST_BANDS 3
				424
				425	/*
				426	* Private data for a pfifo_fast scheduler containing:
				427	* - queues for the three band
				428	* - bitmap indicating which of the bands contain skbs
				429	*/
				430	struct pfifo_fast_priv {
				431	u32 bitmap;
				432	struct sk_buff_head q[PFIFO_FAST_BANDS];
				433	};
				434
				435	/*
				436	* Convert a bitmap to the first band number where an skb is queued, where:
				437	* bitmap=0 means there are no skbs on any band.
				438	* bitmap=1 means there is an skb on band 0.
				439	* bitmap=7 means there are skbs on all 3 bands, etc.
				440	*/
				441	static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
				442
				443	static inline struct sk_buff_head band2list(struct pfifo_fast_priv priv,
				444	int band)
				445	{
				446	return priv->q + band;
				447	}
				448
				449	static int pfifo_fast_enqueue(struct sk_buff skb, struct Qdisc qdisc)
				450	{
				451	net_run_track(PRT_TC, "pfifo_fast_enqueue \n");
				452	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
				453	int band = prio2band[skb->priority & TC_PRIO_MAX];
				454	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
				455	struct sk_buff_head *list = band2list(priv, band);
				456
				457	priv->bitmap \|= (1 << band);
				458	qdisc->q.qlen++;
				459	return __qdisc_enqueue_tail(skb, qdisc, list);
				460	}
				461
				462	return qdisc_drop(skb, qdisc);
				463	}
				464
				465	static struct sk_buff pfifo_fast_dequeue(struct Qdisc qdisc)
				466	{
				467	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
				468	int band = bitmap2band[priv->bitmap];
				469
				470	net_run_track(PRT_TC, "pfifo_fast_dequeue \n");
				471	if (likely(band >= 0)) {
				472	struct sk_buff_head *list = band2list(priv, band);
				473	struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
				474
				475	qdisc->q.qlen--;
				476	if (skb_queue_empty(list))
				477	priv->bitmap &= ~(1 << band);
				478
				479	return skb;
				480	}
				481
				482	return NULL;
				483	}
				484
				485	static struct sk_buff pfifo_fast_peek(struct Qdisc qdisc)
				486	{
				487	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
				488	int band = bitmap2band[priv->bitmap];
				489
				490	if (band >= 0) {
				491	struct sk_buff_head *list = band2list(priv, band);
				492
				493	return skb_peek(list);
				494	}
				495
				496	return NULL;
				497	}
				498
				499	static void pfifo_fast_reset(struct Qdisc *qdisc)
				500	{
				501	int prio;
				502	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
				503
				504	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
				505	__qdisc_reset_queue(qdisc, band2list(priv, prio));
				506
				507	priv->bitmap = 0;
				508	qdisc->qstats.backlog = 0;
				509	qdisc->q.qlen = 0;
				510	}
				511
				512	static int pfifo_fast_dump(struct Qdisc qdisc, struct sk_buff skb)
				513	{
				514	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
				515
				516	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
				517	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
				518	return skb->len;
				519
				520	nla_put_failure:
				521	return -1;
				522	}
				523
				524	static int pfifo_fast_init(struct Qdisc qdisc, struct nlattr opt)
				525	{
				526	int prio;
				527	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
				528
				529	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
				530	skb_queue_head_init(band2list(priv, prio));
				531
				532	/* Can by-pass the queue discipline */
				533	qdisc->flags \|= TCQ_F_CAN_BYPASS;
				534	return 0;
				535	}
				536
				537	struct Qdisc_ops pfifo_fast_ops __read_mostly = {
				538	.id = "pfifo_fast",
				539	.priv_size = sizeof(struct pfifo_fast_priv),
				540	.enqueue = pfifo_fast_enqueue,
				541	.dequeue = pfifo_fast_dequeue,
				542	.peek = pfifo_fast_peek,
				543	.init = pfifo_fast_init,
				544	.reset = pfifo_fast_reset,
				545	.dump = pfifo_fast_dump,
				546	.owner = THIS_MODULE,
				547	};
				548	EXPORT_SYMBOL(pfifo_fast_ops);
				549
				550	struct Qdisc qdisc_alloc(struct netdev_queue dev_queue,
				551	struct Qdisc_ops *ops)
				552	{
				553	void *p;
				554	struct Qdisc *sch;
				555	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
				556	int err = -ENOBUFS;
				557
				558	p = kzalloc_node(size, GFP_KERNEL,
				559	netdev_queue_numa_node_read(dev_queue));
				560
				561	if (!p)
				562	goto errout;
				563	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
				564	/* if we got non aligned memory, ask more and do alignment ourself */
				565	if (sch != p) {
				566	kfree(p);
				567	p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
				568	netdev_queue_numa_node_read(dev_queue));
				569	if (!p)
				570	goto errout;
				571	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
				572	sch->padded = (char ) sch - (char ) p;
				573	}
				574	INIT_LIST_HEAD(&sch->list);
				575	skb_queue_head_init(&sch->q);
				576	spin_lock_init(&sch->busylock);
				577	sch->ops = ops;
				578	sch->enqueue = ops->enqueue;
				579	sch->dequeue = ops->dequeue;
				580	sch->dev_queue = dev_queue;
				581	dev_hold(qdisc_dev(sch));
				582	atomic_set(&sch->refcnt, 1);
				583
				584	return sch;
				585	errout:
				586	return ERR_PTR(err);
				587	}
				588
				589	struct Qdisc qdisc_create_dflt(struct netdev_queue dev_queue,
				590	struct Qdisc_ops *ops, unsigned int parentid)
				591	{
				592	struct Qdisc *sch;
				593
				594	sch = qdisc_alloc(dev_queue, ops);
				595	if (IS_ERR(sch))
				596	goto errout;
				597	sch->parent = parentid;
				598
				599	if (!ops->init \|\| ops->init(sch, NULL) == 0)
				600	return sch;
				601
				602	qdisc_destroy(sch);
				603	errout:
				604	return NULL;
				605	}
				606	EXPORT_SYMBOL(qdisc_create_dflt);
				607
				608	/* Under qdisc_lock(qdisc) and BH! */
				609
				610	void qdisc_reset(struct Qdisc *qdisc)
				611	{
				612	const struct Qdisc_ops *ops = qdisc->ops;
				613
				614	if (ops->reset)
				615	ops->reset(qdisc);
				616
				617	if (qdisc->gso_skb) {
				618	kfree_skb(qdisc->gso_skb);
				619	qdisc->gso_skb = NULL;
				620	qdisc->q.qlen = 0;
				621	}
				622	}
				623	EXPORT_SYMBOL(qdisc_reset);
				624
				625	static void qdisc_rcu_free(struct rcu_head *head)
				626	{
				627	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
				628
				629	kfree((char *) qdisc - qdisc->padded);
				630	}
				631
				632	void qdisc_destroy(struct Qdisc *qdisc)
				633	{
				634	const struct Qdisc_ops *ops = qdisc->ops;
				635
				636	if (qdisc->flags & TCQ_F_BUILTIN \|\|
				637	!atomic_dec_and_test(&qdisc->refcnt))
				638	return;
				639
				640	#ifdef CONFIG_NET_SCHED
				641	qdisc_list_del(qdisc);
				642
				643	qdisc_put_stab(rtnl_dereference(qdisc->stab));
				644	#endif
				645	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
				646	if (ops->reset)
				647	ops->reset(qdisc);
				648	if (ops->destroy)
				649	ops->destroy(qdisc);
				650
				651	module_put(ops->owner);
				652	dev_put(qdisc_dev(qdisc));
				653
				654	kfree_skb(qdisc->gso_skb);
				655	/*
				656	* gen_estimator est_timer() might access qdisc->q.lock,
				657	* wait a RCU grace period before freeing qdisc.
				658	*/
				659	call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
				660	}
				661	EXPORT_SYMBOL(qdisc_destroy);
				662
				663	/* Attach toplevel qdisc to device queue. */
				664	struct Qdisc dev_graft_qdisc(struct netdev_queue dev_queue,
				665	struct Qdisc *qdisc)
				666	{
				667	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
				668	spinlock_t *root_lock;
				669
				670	root_lock = qdisc_lock(oqdisc);
				671	spin_lock_bh(root_lock);
				672
				673	/* Prune old scheduler */
				674	if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
				675	qdisc_reset(oqdisc);
				676
				677	/* ... and graft new one */
				678	if (qdisc == NULL)
				679	qdisc = &noop_qdisc;
				680	dev_queue->qdisc_sleeping = qdisc;
				681	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
				682
				683	spin_unlock_bh(root_lock);
				684
				685	return oqdisc;
				686	}
				687	EXPORT_SYMBOL(dev_graft_qdisc);
				688
				689	static void attach_one_default_qdisc(struct net_device *dev,
				690	struct netdev_queue *dev_queue,
				691	void *_unused)
				692	{
				693	struct Qdisc *qdisc = &noqueue_qdisc;
				694
				695	net_run_track(PRT_TC, "attach_one_default_qdisc\n");
				696	if (dev->tx_queue_len) {
				697	qdisc = qdisc_create_dflt(dev_queue,
				698	&pfifo_fast_ops, TC_H_ROOT);
				699	if (!qdisc) {
				700	netdev_info(dev, "activation failed\n");
				701	return;
				702	}
				703	}
				704	dev_queue->qdisc_sleeping = qdisc;
				705	}
				706
				707	static void attach_default_qdiscs(struct net_device *dev)
				708	{
				709	struct netdev_queue *txq;
				710	struct Qdisc *qdisc;
				711
				712	txq = netdev_get_tx_queue(dev, 0);
				713
				714	if (!netif_is_multiqueue(dev) \|\| dev->tx_queue_len == 0) {
				715	netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
				716	dev->qdisc = txq->qdisc_sleeping;
				717	atomic_inc(&dev->qdisc->refcnt);
				718	} else {
				719	net_run_track(PRT_TC, "attach_default_qdiscs mq_qdisc_ops\n");
				720	qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
				721	if (qdisc) {
				722	qdisc->ops->attach(qdisc);
				723	dev->qdisc = qdisc;
				724	}
				725	}
				726	}
				727
				728	static void transition_one_qdisc(struct net_device *dev,
				729	struct netdev_queue *dev_queue,
				730	void *_need_watchdog)
				731	{
				732	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
				733	int *need_watchdog_p = _need_watchdog;
				734
				735	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
				736	clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
				737
				738	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
				739	if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
				740	dev_queue->trans_start = 0;
				741	*need_watchdog_p = 1;
				742	}
				743	}
				744
				745	void dev_activate(struct net_device *dev)
				746	{
				747	int need_watchdog;
				748
				749	/* No queueing discipline is attached to device;
				750	create default one i.e. pfifo_fast for devices,
				751	which need queueing and noqueue_qdisc for
				752	virtual interfaces
				753	*/
				754
				755	if (dev->qdisc == &noop_qdisc)
				756	attach_default_qdiscs(dev);
				757
				758	if (!netif_carrier_ok(dev))
				759	/* Delay activation until next carrier-on event */
				760	return;
				761
				762	need_watchdog = 0;
				763	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
				764	if (dev_ingress_queue(dev))
				765	transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
				766
				767	if (need_watchdog) {
				768	dev->trans_start = jiffies;
				769	dev_watchdog_up(dev);
				770	}
				771	}
				772	EXPORT_SYMBOL(dev_activate);
				773
				774	static void dev_deactivate_queue(struct net_device *dev,
				775	struct netdev_queue *dev_queue,
				776	void *_qdisc_default)
				777	{
				778	struct Qdisc *qdisc_default = _qdisc_default;
				779	struct Qdisc *qdisc;
				780
				781	qdisc = dev_queue->qdisc;
				782	if (qdisc) {
				783	spin_lock_bh(qdisc_lock(qdisc));
				784
				785	if (!(qdisc->flags & TCQ_F_BUILTIN))
				786	set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
				787
				788	rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
				789	qdisc_reset(qdisc);
				790
				791	spin_unlock_bh(qdisc_lock(qdisc));
				792	}
				793	}
				794
				795	static bool some_qdisc_is_busy(struct net_device *dev)
				796	{
				797	unsigned int i;
				798
				799	for (i = 0; i < dev->num_tx_queues; i++) {
				800	struct netdev_queue *dev_queue;
				801	spinlock_t *root_lock;
				802	struct Qdisc *q;
				803	int val;
				804
				805	dev_queue = netdev_get_tx_queue(dev, i);
				806	q = dev_queue->qdisc_sleeping;
				807	root_lock = qdisc_lock(q);
				808
				809	spin_lock_bh(root_lock);
				810
				811	val = (qdisc_is_running(q) \|\|
				812	test_bit(__QDISC_STATE_SCHED, &q->state));
				813
				814	spin_unlock_bh(root_lock);
				815
				816	if (val)
				817	return true;
				818	}
				819	return false;
				820	}
				821
				822	/**
				823	* dev_deactivate_many - deactivate transmissions on several devices
				824	* @head: list of devices to deactivate
				825	*
				826	* This function returns only when all outstanding transmissions
				827	* have completed, unless all devices are in dismantle phase.
				828	*/
				829	void dev_deactivate_many(struct list_head *head)
				830	{
				831	struct net_device *dev;
				832	bool sync_needed = false;
				833
				834	list_for_each_entry(dev, head, unreg_list) {
				835	netdev_for_each_tx_queue(dev, dev_deactivate_queue,
				836	&noop_qdisc);
				837	if (dev_ingress_queue(dev))
				838	dev_deactivate_queue(dev, dev_ingress_queue(dev),
				839	&noop_qdisc);
				840
				841	dev_watchdog_down(dev);
				842	sync_needed \|= !dev->dismantle;
				843	}
				844
				845	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
				846	* This is avoided if all devices are in dismantle phase :
				847	* Caller will call synchronize_net() for us
				848	*/
				849	if (sync_needed)
				850	synchronize_net();
				851
				852	/* Wait for outstanding qdisc_run calls. */
				853	list_for_each_entry(dev, head, unreg_list)
				854	while (some_qdisc_is_busy(dev))
				855	msleep(1);
				856	}
				857
				858	void dev_deactivate(struct net_device *dev)
				859	{
				860	LIST_HEAD(single);
				861
				862	list_add(&dev->unreg_list, &single);
				863	dev_deactivate_many(&single);
				864	list_del(&single);
				865	}
				866	EXPORT_SYMBOL(dev_deactivate);
				867
				868	static void dev_init_scheduler_queue(struct net_device *dev,
				869	struct netdev_queue *dev_queue,
				870	void *_qdisc)
				871	{
				872	struct Qdisc *qdisc = _qdisc;
				873
				874	dev_queue->qdisc = qdisc;
				875	dev_queue->qdisc_sleeping = qdisc;
				876
				877	net_run_track(PRT_TC, "dev_init_scheduler_queue \n");
				878	}
				879
				880	void dev_init_scheduler(struct net_device *dev)
				881	{
				882	dev->qdisc = &noop_qdisc;
				883	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
				884	if (dev_ingress_queue(dev))
				885	dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
				886
				887	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
				888	}
				889
				890	static void shutdown_scheduler_queue(struct net_device *dev,
				891	struct netdev_queue *dev_queue,
				892	void *_qdisc_default)
				893	{
				894	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
				895	struct Qdisc *qdisc_default = _qdisc_default;
				896
				897	if (qdisc) {
				898	rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
				899	dev_queue->qdisc_sleeping = qdisc_default;
				900
				901	qdisc_destroy(qdisc);
				902	}
				903	}
				904
				905	void dev_shutdown(struct net_device *dev)
				906	{
				907	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
				908	if (dev_ingress_queue(dev))
				909	shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
				910	qdisc_destroy(dev->qdisc);
				911	dev->qdisc = &noop_qdisc;
				912
				913	WARN_ON(timer_pending(&dev->watchdog_timer));
				914	}