1/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2 *
3 *		This program is free software; you can redistribute it and/or
4 *		modify it under the terms of the GNU General Public License
5 *		as published by the Free Software Foundation; either version
6 *		2 of the License, or (at your option) any later version.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/if_arp.h>
18#include <linux/netdevice.h>
19#include <linux/init.h>
20#include <linux/skbuff.h>
21#include <linux/moduleparam.h>
22#include <net/dst.h>
23#include <net/neighbour.h>
24#include <net/pkt_sched.h>
25
26/*
27   How to setup it.
28   ----------------
29
30   After loading this module you will find a new device teqlN
31   and new qdisc with the same name. To join a slave to the equalizer
32   you should just set this qdisc on a device f.e.
33
34   # tc qdisc add dev eth0 root teql0
35   # tc qdisc add dev eth1 root teql0
36
37   That's all. Full PnP 8)
38
39   Applicability.
40   --------------
41
42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43      signal and generate EOI events. If you want to equalize virtual devices
44      like tunnels, use a normal eql device.
45   2. This device puts no limitations on physical slave characteristics
46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47      Certainly, large difference in link speeds will make the resulting
48      eqalized link unusable, because of huge packet reordering.
49      I estimate an upper useful difference as ~10 times.
50   3. If the slave requires address resolution, only protocols using
51      neighbour cache (IPv4/IPv6) will work over the equalized link.
52      Other protocols are still allowed to use the slave device directly,
53      which will not break load balancing, though native slave
54      traffic will have the highest priority.  */
55
56struct teql_master {
57	struct Qdisc_ops qops;
58	struct net_device *dev;
59	struct Qdisc *slaves;
60	struct list_head master_list;
61	unsigned long	tx_bytes;
62	unsigned long	tx_packets;
63	unsigned long	tx_errors;
64	unsigned long	tx_dropped;
65};
66
67struct teql_sched_data {
68	struct Qdisc *next;
69	struct teql_master *m;
70	struct sk_buff_head q;
71};
72
73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77/* "teql*" qdisc routines */
78
79static int
80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81{
82	struct net_device *dev = qdisc_dev(sch);
83	struct teql_sched_data *q = qdisc_priv(sch);
84
85	if (q->q.qlen < dev->tx_queue_len) {
86		__skb_queue_tail(&q->q, skb);
87		return NET_XMIT_SUCCESS;
88	}
89
90	return qdisc_drop(skb, sch);
91}
92
93static struct sk_buff *
94teql_dequeue(struct Qdisc *sch)
95{
96	struct teql_sched_data *dat = qdisc_priv(sch);
97	struct netdev_queue *dat_queue;
98	struct sk_buff *skb;
99	struct Qdisc *q;
100
101	skb = __skb_dequeue(&dat->q);
102	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103	q = rcu_dereference_bh(dat_queue->qdisc);
104
105	if (skb == NULL) {
106		struct net_device *m = qdisc_dev(q);
107		if (m) {
108			dat->m->slaves = sch;
109			netif_wake_queue(m);
110		}
111	} else {
112		qdisc_bstats_update(sch, skb);
113	}
114	sch->q.qlen = dat->q.qlen + q->q.qlen;
115	return skb;
116}
117
118static struct sk_buff *
119teql_peek(struct Qdisc *sch)
120{
121	/* teql is meant to be used as root qdisc */
122	return NULL;
123}
124
125static void
126teql_reset(struct Qdisc *sch)
127{
128	struct teql_sched_data *dat = qdisc_priv(sch);
129
130	skb_queue_purge(&dat->q);
131	sch->q.qlen = 0;
132}
133
134static void
135teql_destroy(struct Qdisc *sch)
136{
137	struct Qdisc *q, *prev;
138	struct teql_sched_data *dat = qdisc_priv(sch);
139	struct teql_master *master = dat->m;
140
141	prev = master->slaves;
142	if (prev) {
143		do {
144			q = NEXT_SLAVE(prev);
145			if (q == sch) {
146				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
147				if (q == master->slaves) {
148					master->slaves = NEXT_SLAVE(q);
149					if (q == master->slaves) {
150						struct netdev_queue *txq;
151						spinlock_t *root_lock;
152
153						txq = netdev_get_tx_queue(master->dev, 0);
154						master->slaves = NULL;
155
156						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
157						spin_lock_bh(root_lock);
158						qdisc_reset(rtnl_dereference(txq->qdisc));
159						spin_unlock_bh(root_lock);
160					}
161				}
162				skb_queue_purge(&dat->q);
163				break;
164			}
165
166		} while ((prev = q) != master->slaves);
167	}
168}
169
170static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
171{
172	struct net_device *dev = qdisc_dev(sch);
173	struct teql_master *m = (struct teql_master *)sch->ops;
174	struct teql_sched_data *q = qdisc_priv(sch);
175
176	if (dev->hard_header_len > m->dev->hard_header_len)
177		return -EINVAL;
178
179	if (m->dev == dev)
180		return -ELOOP;
181
182	q->m = m;
183
184	skb_queue_head_init(&q->q);
185
186	if (m->slaves) {
187		if (m->dev->flags & IFF_UP) {
188			if ((m->dev->flags & IFF_POINTOPOINT &&
189			     !(dev->flags & IFF_POINTOPOINT)) ||
190			    (m->dev->flags & IFF_BROADCAST &&
191			     !(dev->flags & IFF_BROADCAST)) ||
192			    (m->dev->flags & IFF_MULTICAST &&
193			     !(dev->flags & IFF_MULTICAST)) ||
194			    dev->mtu < m->dev->mtu)
195				return -EINVAL;
196		} else {
197			if (!(dev->flags&IFF_POINTOPOINT))
198				m->dev->flags &= ~IFF_POINTOPOINT;
199			if (!(dev->flags&IFF_BROADCAST))
200				m->dev->flags &= ~IFF_BROADCAST;
201			if (!(dev->flags&IFF_MULTICAST))
202				m->dev->flags &= ~IFF_MULTICAST;
203			if (dev->mtu < m->dev->mtu)
204				m->dev->mtu = dev->mtu;
205		}
206		q->next = NEXT_SLAVE(m->slaves);
207		NEXT_SLAVE(m->slaves) = sch;
208	} else {
209		q->next = sch;
210		m->slaves = sch;
211		m->dev->mtu = dev->mtu;
212		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
213	}
214	return 0;
215}
216
217
218static int
219__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
220	       struct net_device *dev, struct netdev_queue *txq,
221	       struct dst_entry *dst)
222{
223	struct neighbour *n;
224	int err = 0;
225
226	n = dst_neigh_lookup_skb(dst, skb);
227	if (!n)
228		return -ENOENT;
229
230	if (dst->dev != dev) {
231		struct neighbour *mn;
232
233		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
234		neigh_release(n);
235		if (IS_ERR(mn))
236			return PTR_ERR(mn);
237		n = mn;
238	}
239
240	if (neigh_event_send(n, skb_res) == 0) {
241		int err;
242		char haddr[MAX_ADDR_LEN];
243
244		neigh_ha_snapshot(haddr, n, dev);
245		err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
246				      haddr, NULL, skb->len);
247
248		if (err < 0)
249			err = -EINVAL;
250	} else {
251		err = (skb_res == NULL) ? -EAGAIN : 1;
252	}
253	neigh_release(n);
254	return err;
255}
256
257static inline int teql_resolve(struct sk_buff *skb,
258			       struct sk_buff *skb_res,
259			       struct net_device *dev,
260			       struct netdev_queue *txq)
261{
262	struct dst_entry *dst = skb_dst(skb);
263	int res;
264
265	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
266		return -ENODEV;
267
268	if (!dev->header_ops || !dst)
269		return 0;
270
271	rcu_read_lock();
272	res = __teql_resolve(skb, skb_res, dev, txq, dst);
273	rcu_read_unlock();
274
275	return res;
276}
277
278static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
279{
280	struct teql_master *master = netdev_priv(dev);
281	struct Qdisc *start, *q;
282	int busy;
283	int nores;
284	int subq = skb_get_queue_mapping(skb);
285	struct sk_buff *skb_res = NULL;
286
287	start = master->slaves;
288
289restart:
290	nores = 0;
291	busy = 0;
292
293	q = start;
294	if (!q)
295		goto drop;
296
297	do {
298		struct net_device *slave = qdisc_dev(q);
299		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
300
301		if (slave_txq->qdisc_sleeping != q)
302			continue;
303		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
304		    !netif_running(slave)) {
305			busy = 1;
306			continue;
307		}
308
309		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
310		case 0:
311			if (__netif_tx_trylock(slave_txq)) {
312				unsigned int length = qdisc_pkt_len(skb);
313
314				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
315				    netdev_start_xmit(skb, slave, slave_txq, false) ==
316				    NETDEV_TX_OK) {
317					__netif_tx_unlock(slave_txq);
318					master->slaves = NEXT_SLAVE(q);
319					netif_wake_queue(dev);
320					master->tx_packets++;
321					master->tx_bytes += length;
322					return NETDEV_TX_OK;
323				}
324				__netif_tx_unlock(slave_txq);
325			}
326			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
327				busy = 1;
328			break;
329		case 1:
330			master->slaves = NEXT_SLAVE(q);
331			return NETDEV_TX_OK;
332		default:
333			nores = 1;
334			break;
335		}
336		__skb_pull(skb, skb_network_offset(skb));
337	} while ((q = NEXT_SLAVE(q)) != start);
338
339	if (nores && skb_res == NULL) {
340		skb_res = skb;
341		goto restart;
342	}
343
344	if (busy) {
345		netif_stop_queue(dev);
346		return NETDEV_TX_BUSY;
347	}
348	master->tx_errors++;
349
350drop:
351	master->tx_dropped++;
352	dev_kfree_skb(skb);
353	return NETDEV_TX_OK;
354}
355
356static int teql_master_open(struct net_device *dev)
357{
358	struct Qdisc *q;
359	struct teql_master *m = netdev_priv(dev);
360	int mtu = 0xFFFE;
361	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
362
363	if (m->slaves == NULL)
364		return -EUNATCH;
365
366	flags = FMASK;
367
368	q = m->slaves;
369	do {
370		struct net_device *slave = qdisc_dev(q);
371
372		if (slave == NULL)
373			return -EUNATCH;
374
375		if (slave->mtu < mtu)
376			mtu = slave->mtu;
377		if (slave->hard_header_len > LL_MAX_HEADER)
378			return -EINVAL;
379
380		/* If all the slaves are BROADCAST, master is BROADCAST
381		   If all the slaves are PtP, master is PtP
382		   Otherwise, master is NBMA.
383		 */
384		if (!(slave->flags&IFF_POINTOPOINT))
385			flags &= ~IFF_POINTOPOINT;
386		if (!(slave->flags&IFF_BROADCAST))
387			flags &= ~IFF_BROADCAST;
388		if (!(slave->flags&IFF_MULTICAST))
389			flags &= ~IFF_MULTICAST;
390	} while ((q = NEXT_SLAVE(q)) != m->slaves);
391
392	m->dev->mtu = mtu;
393	m->dev->flags = (m->dev->flags&~FMASK) | flags;
394	netif_start_queue(m->dev);
395	return 0;
396}
397
398static int teql_master_close(struct net_device *dev)
399{
400	netif_stop_queue(dev);
401	return 0;
402}
403
404static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
405						     struct rtnl_link_stats64 *stats)
406{
407	struct teql_master *m = netdev_priv(dev);
408
409	stats->tx_packets	= m->tx_packets;
410	stats->tx_bytes		= m->tx_bytes;
411	stats->tx_errors	= m->tx_errors;
412	stats->tx_dropped	= m->tx_dropped;
413	return stats;
414}
415
416static int teql_master_mtu(struct net_device *dev, int new_mtu)
417{
418	struct teql_master *m = netdev_priv(dev);
419	struct Qdisc *q;
420
421	if (new_mtu < 68)
422		return -EINVAL;
423
424	q = m->slaves;
425	if (q) {
426		do {
427			if (new_mtu > qdisc_dev(q)->mtu)
428				return -EINVAL;
429		} while ((q = NEXT_SLAVE(q)) != m->slaves);
430	}
431
432	dev->mtu = new_mtu;
433	return 0;
434}
435
436static const struct net_device_ops teql_netdev_ops = {
437	.ndo_open	= teql_master_open,
438	.ndo_stop	= teql_master_close,
439	.ndo_start_xmit	= teql_master_xmit,
440	.ndo_get_stats64 = teql_master_stats64,
441	.ndo_change_mtu	= teql_master_mtu,
442};
443
444static __init void teql_master_setup(struct net_device *dev)
445{
446	struct teql_master *master = netdev_priv(dev);
447	struct Qdisc_ops *ops = &master->qops;
448
449	master->dev	= dev;
450	ops->priv_size  = sizeof(struct teql_sched_data);
451
452	ops->enqueue	=	teql_enqueue;
453	ops->dequeue	=	teql_dequeue;
454	ops->peek	=	teql_peek;
455	ops->init	=	teql_qdisc_init;
456	ops->reset	=	teql_reset;
457	ops->destroy	=	teql_destroy;
458	ops->owner	=	THIS_MODULE;
459
460	dev->netdev_ops =       &teql_netdev_ops;
461	dev->type		= ARPHRD_VOID;
462	dev->mtu		= 1500;
463	dev->tx_queue_len	= 100;
464	dev->flags		= IFF_NOARP;
465	dev->hard_header_len	= LL_MAX_HEADER;
466	netif_keep_dst(dev);
467}
468
469static LIST_HEAD(master_dev_list);
470static int max_equalizers = 1;
471module_param(max_equalizers, int, 0);
472MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
473
474static int __init teql_init(void)
475{
476	int i;
477	int err = -ENODEV;
478
479	for (i = 0; i < max_equalizers; i++) {
480		struct net_device *dev;
481		struct teql_master *master;
482
483		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
484				   NET_NAME_UNKNOWN, teql_master_setup);
485		if (!dev) {
486			err = -ENOMEM;
487			break;
488		}
489
490		if ((err = register_netdev(dev))) {
491			free_netdev(dev);
492			break;
493		}
494
495		master = netdev_priv(dev);
496
497		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
498		err = register_qdisc(&master->qops);
499
500		if (err) {
501			unregister_netdev(dev);
502			free_netdev(dev);
503			break;
504		}
505
506		list_add_tail(&master->master_list, &master_dev_list);
507	}
508	return i ? 0 : err;
509}
510
511static void __exit teql_exit(void)
512{
513	struct teql_master *master, *nxt;
514
515	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
516
517		list_del(&master->master_list);
518
519		unregister_qdisc(&master->qops);
520		unregister_netdev(master->dev);
521		free_netdev(master->dev);
522	}
523}
524
525module_init(teql_init);
526module_exit(teql_exit);
527
528MODULE_LICENSE("GPL");
529