1/*******************************************************************************
2
3  Intel 10 Gigabit PCI Express Linux driver
4  Copyright(c) 1999 - 2013 Intel Corporation.
5
6  This program is free software; you can redistribute it and/or modify it
7  under the terms and conditions of the GNU General Public License,
8  version 2, as published by the Free Software Foundation.
9
10  This program is distributed in the hope it will be useful, but WITHOUT
11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  more details.
14
15  You should have received a copy of the GNU General Public License along with
16  this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19  The full GNU General Public License is included in this distribution in
20  the file called "COPYING".
21
22  Contact Information:
23  Linux NICS <linux.nics@intel.com>
24  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
25  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26
27*******************************************************************************/
28
29#include "ixgbe.h"
30#include "ixgbe_sriov.h"
31
32#ifdef CONFIG_IXGBE_DCB
33/**
34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
35 * @adapter: board private structure to initialize
36 *
37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings.  It
38 * will also try to cache the proper offsets if RSS/FCoE are enabled along
39 * with VMDq.
40 *
41 **/
42static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
43{
44#ifdef IXGBE_FCOE
45	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
46#endif /* IXGBE_FCOE */
47	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
48	int i;
49	u16 reg_idx;
50	u8 tcs = netdev_get_num_tc(adapter->netdev);
51
52	/* verify we have DCB queueing enabled before proceeding */
53	if (tcs <= 1)
54		return false;
55
56	/* verify we have VMDq enabled before proceeding */
57	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
58		return false;
59
60	/* start at VMDq register offset for SR-IOV enabled setups */
61	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
62	for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
63		/* If we are greater than indices move to next pool */
64		if ((reg_idx & ~vmdq->mask) >= tcs)
65			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
66		adapter->rx_ring[i]->reg_idx = reg_idx;
67	}
68
69	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
70	for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
71		/* If we are greater than indices move to next pool */
72		if ((reg_idx & ~vmdq->mask) >= tcs)
73			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
74		adapter->tx_ring[i]->reg_idx = reg_idx;
75	}
76
77#ifdef IXGBE_FCOE
78	/* nothing to do if FCoE is disabled */
79	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
80		return true;
81
82	/* The work is already done if the FCoE ring is shared */
83	if (fcoe->offset < tcs)
84		return true;
85
86	/* The FCoE rings exist separately, we need to move their reg_idx */
87	if (fcoe->indices) {
88		u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
89		u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter);
90
91		reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
92		for (i = fcoe->offset; i < adapter->num_rx_queues; i++) {
93			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
94			adapter->rx_ring[i]->reg_idx = reg_idx;
95			reg_idx++;
96		}
97
98		reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
99		for (i = fcoe->offset; i < adapter->num_tx_queues; i++) {
100			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
101			adapter->tx_ring[i]->reg_idx = reg_idx;
102			reg_idx++;
103		}
104	}
105
106#endif /* IXGBE_FCOE */
107	return true;
108}
109
110/* ixgbe_get_first_reg_idx - Return first register index associated with ring */
111static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
112				    unsigned int *tx, unsigned int *rx)
113{
114	struct net_device *dev = adapter->netdev;
115	struct ixgbe_hw *hw = &adapter->hw;
116	u8 num_tcs = netdev_get_num_tc(dev);
117
118	*tx = 0;
119	*rx = 0;
120
121	switch (hw->mac.type) {
122	case ixgbe_mac_82598EB:
123		/* TxQs/TC: 4	RxQs/TC: 8 */
124		*tx = tc << 2; /* 0, 4,  8, 12, 16, 20, 24, 28 */
125		*rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
126		break;
127	case ixgbe_mac_82599EB:
128	case ixgbe_mac_X540:
129	case ixgbe_mac_X550:
130	case ixgbe_mac_X550EM_x:
131		if (num_tcs > 4) {
132			/*
133			 * TCs    : TC0/1 TC2/3 TC4-7
134			 * TxQs/TC:    32    16     8
135			 * RxQs/TC:    16    16    16
136			 */
137			*rx = tc << 4;
138			if (tc < 3)
139				*tx = tc << 5;		/*   0,  32,  64 */
140			else if (tc < 5)
141				*tx = (tc + 2) << 4;	/*  80,  96 */
142			else
143				*tx = (tc + 8) << 3;	/* 104, 112, 120 */
144		} else {
145			/*
146			 * TCs    : TC0 TC1 TC2/3
147			 * TxQs/TC:  64  32    16
148			 * RxQs/TC:  32  32    32
149			 */
150			*rx = tc << 5;
151			if (tc < 2)
152				*tx = tc << 6;		/*  0,  64 */
153			else
154				*tx = (tc + 4) << 4;	/* 96, 112 */
155		}
156	default:
157		break;
158	}
159}
160
161/**
162 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
163 * @adapter: board private structure to initialize
164 *
165 * Cache the descriptor ring offsets for DCB to the assigned rings.
166 *
167 **/
168static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter)
169{
170	struct net_device *dev = adapter->netdev;
171	unsigned int tx_idx, rx_idx;
172	int tc, offset, rss_i, i;
173	u8 num_tcs = netdev_get_num_tc(dev);
174
175	/* verify we have DCB queueing enabled before proceeding */
176	if (num_tcs <= 1)
177		return false;
178
179	rss_i = adapter->ring_feature[RING_F_RSS].indices;
180
181	for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) {
182		ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx);
183		for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) {
184			adapter->tx_ring[offset + i]->reg_idx = tx_idx;
185			adapter->rx_ring[offset + i]->reg_idx = rx_idx;
186			adapter->tx_ring[offset + i]->dcb_tc = tc;
187			adapter->rx_ring[offset + i]->dcb_tc = tc;
188		}
189	}
190
191	return true;
192}
193
194#endif
195/**
196 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
197 * @adapter: board private structure to initialize
198 *
199 * SR-IOV doesn't use any descriptor rings but changes the default if
200 * no other mapping is used.
201 *
202 */
203static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
204{
205#ifdef IXGBE_FCOE
206	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
207#endif /* IXGBE_FCOE */
208	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
209	struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS];
210	int i;
211	u16 reg_idx;
212
213	/* only proceed if VMDq is enabled */
214	if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED))
215		return false;
216
217	/* start at VMDq register offset for SR-IOV enabled setups */
218	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
219	for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
220#ifdef IXGBE_FCOE
221		/* Allow first FCoE queue to be mapped as RSS */
222		if (fcoe->offset && (i > fcoe->offset))
223			break;
224#endif
225		/* If we are greater than indices move to next pool */
226		if ((reg_idx & ~vmdq->mask) >= rss->indices)
227			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
228		adapter->rx_ring[i]->reg_idx = reg_idx;
229	}
230
231#ifdef IXGBE_FCOE
232	/* FCoE uses a linear block of queues so just assigning 1:1 */
233	for (; i < adapter->num_rx_queues; i++, reg_idx++)
234		adapter->rx_ring[i]->reg_idx = reg_idx;
235
236#endif
237	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
238	for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
239#ifdef IXGBE_FCOE
240		/* Allow first FCoE queue to be mapped as RSS */
241		if (fcoe->offset && (i > fcoe->offset))
242			break;
243#endif
244		/* If we are greater than indices move to next pool */
245		if ((reg_idx & rss->mask) >= rss->indices)
246			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
247		adapter->tx_ring[i]->reg_idx = reg_idx;
248	}
249
250#ifdef IXGBE_FCOE
251	/* FCoE uses a linear block of queues so just assigning 1:1 */
252	for (; i < adapter->num_tx_queues; i++, reg_idx++)
253		adapter->tx_ring[i]->reg_idx = reg_idx;
254
255#endif
256
257	return true;
258}
259
260/**
261 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
262 * @adapter: board private structure to initialize
263 *
264 * Cache the descriptor ring offsets for RSS to the assigned rings.
265 *
266 **/
267static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
268{
269	int i;
270
271	for (i = 0; i < adapter->num_rx_queues; i++)
272		adapter->rx_ring[i]->reg_idx = i;
273	for (i = 0; i < adapter->num_tx_queues; i++)
274		adapter->tx_ring[i]->reg_idx = i;
275
276	return true;
277}
278
279/**
280 * ixgbe_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
282 *
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
285 *
286 * Note, the order the various feature calls is important.  It must start with
287 * the "most" features enabled at the same time, then trickle down to the
288 * least amount of features turned on at once.
289 **/
290static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
291{
292	/* start with default case */
293	adapter->rx_ring[0]->reg_idx = 0;
294	adapter->tx_ring[0]->reg_idx = 0;
295
296#ifdef CONFIG_IXGBE_DCB
297	if (ixgbe_cache_ring_dcb_sriov(adapter))
298		return;
299
300	if (ixgbe_cache_ring_dcb(adapter))
301		return;
302
303#endif
304	if (ixgbe_cache_ring_sriov(adapter))
305		return;
306
307	ixgbe_cache_ring_rss(adapter);
308}
309
310#define IXGBE_RSS_16Q_MASK	0xF
311#define IXGBE_RSS_8Q_MASK	0x7
312#define IXGBE_RSS_4Q_MASK	0x3
313#define IXGBE_RSS_2Q_MASK	0x1
314#define IXGBE_RSS_DISABLED_MASK	0x0
315
316#ifdef CONFIG_IXGBE_DCB
317/**
318 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
319 * @adapter: board private structure to initialize
320 *
321 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
322 * and VM pools where appropriate.  Also assign queues based on DCB
323 * priorities and map accordingly..
324 *
325 **/
326static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
327{
328	int i;
329	u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
330	u16 vmdq_m = 0;
331#ifdef IXGBE_FCOE
332	u16 fcoe_i = 0;
333#endif
334	u8 tcs = netdev_get_num_tc(adapter->netdev);
335
336	/* verify we have DCB queueing enabled before proceeding */
337	if (tcs <= 1)
338		return false;
339
340	/* verify we have VMDq enabled before proceeding */
341	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
342		return false;
343
344	/* Add starting offset to total pool count */
345	vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
346
347	/* 16 pools w/ 8 TC per pool */
348	if (tcs > 4) {
349		vmdq_i = min_t(u16, vmdq_i, 16);
350		vmdq_m = IXGBE_82599_VMDQ_8Q_MASK;
351	/* 32 pools w/ 4 TC per pool */
352	} else {
353		vmdq_i = min_t(u16, vmdq_i, 32);
354		vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
355	}
356
357#ifdef IXGBE_FCOE
358	/* queues in the remaining pools are available for FCoE */
359	fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i;
360
361#endif
362	/* remove the starting offset from the pool count */
363	vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
364
365	/* save features for later use */
366	adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
367	adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
368
369	/*
370	 * We do not support DCB, VMDq, and RSS all simultaneously
371	 * so we will disable RSS since it is the lowest priority
372	 */
373	adapter->ring_feature[RING_F_RSS].indices = 1;
374	adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK;
375
376	/* disable ATR as it is not supported when VMDq is enabled */
377	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
378
379	adapter->num_rx_pools = vmdq_i;
380	adapter->num_rx_queues_per_pool = tcs;
381
382	adapter->num_tx_queues = vmdq_i * tcs;
383	adapter->num_rx_queues = vmdq_i * tcs;
384
385#ifdef IXGBE_FCOE
386	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
387		struct ixgbe_ring_feature *fcoe;
388
389		fcoe = &adapter->ring_feature[RING_F_FCOE];
390
391		/* limit ourselves based on feature limits */
392		fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
393
394		if (fcoe_i) {
395			/* alloc queues for FCoE separately */
396			fcoe->indices = fcoe_i;
397			fcoe->offset = vmdq_i * tcs;
398
399			/* add queues to adapter */
400			adapter->num_tx_queues += fcoe_i;
401			adapter->num_rx_queues += fcoe_i;
402		} else if (tcs > 1) {
403			/* use queue belonging to FcoE TC */
404			fcoe->indices = 1;
405			fcoe->offset = ixgbe_fcoe_get_tc(adapter);
406		} else {
407			adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
408
409			fcoe->indices = 0;
410			fcoe->offset = 0;
411		}
412	}
413
414#endif /* IXGBE_FCOE */
415	/* configure TC to queue mapping */
416	for (i = 0; i < tcs; i++)
417		netdev_set_tc_queue(adapter->netdev, i, 1, i);
418
419	return true;
420}
421
422static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
423{
424	struct net_device *dev = adapter->netdev;
425	struct ixgbe_ring_feature *f;
426	int rss_i, rss_m, i;
427	int tcs;
428
429	/* Map queue offset and counts onto allocated tx queues */
430	tcs = netdev_get_num_tc(dev);
431
432	/* verify we have DCB queueing enabled before proceeding */
433	if (tcs <= 1)
434		return false;
435
436	/* determine the upper limit for our current DCB mode */
437	rss_i = dev->num_tx_queues / tcs;
438	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
439		/* 8 TC w/ 4 queues per TC */
440		rss_i = min_t(u16, rss_i, 4);
441		rss_m = IXGBE_RSS_4Q_MASK;
442	} else if (tcs > 4) {
443		/* 8 TC w/ 8 queues per TC */
444		rss_i = min_t(u16, rss_i, 8);
445		rss_m = IXGBE_RSS_8Q_MASK;
446	} else {
447		/* 4 TC w/ 16 queues per TC */
448		rss_i = min_t(u16, rss_i, 16);
449		rss_m = IXGBE_RSS_16Q_MASK;
450	}
451
452	/* set RSS mask and indices */
453	f = &adapter->ring_feature[RING_F_RSS];
454	rss_i = min_t(int, rss_i, f->limit);
455	f->indices = rss_i;
456	f->mask = rss_m;
457
458	/* disable ATR as it is not supported when multiple TCs are enabled */
459	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
460
461#ifdef IXGBE_FCOE
462	/* FCoE enabled queues require special configuration indexed
463	 * by feature specific indices and offset. Here we map FCoE
464	 * indices onto the DCB queue pairs allowing FCoE to own
465	 * configuration later.
466	 */
467	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
468		u8 tc = ixgbe_fcoe_get_tc(adapter);
469
470		f = &adapter->ring_feature[RING_F_FCOE];
471		f->indices = min_t(u16, rss_i, f->limit);
472		f->offset = rss_i * tc;
473	}
474
475#endif /* IXGBE_FCOE */
476	for (i = 0; i < tcs; i++)
477		netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
478
479	adapter->num_tx_queues = rss_i * tcs;
480	adapter->num_rx_queues = rss_i * tcs;
481
482	return true;
483}
484
485#endif
486/**
487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
488 * @adapter: board private structure to initialize
489 *
490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
491 * and VM pools where appropriate.  If RSS is available, then also try and
492 * enable RSS and map accordingly.
493 *
494 **/
495static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
496{
497	u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
498	u16 vmdq_m = 0;
499	u16 rss_i = adapter->ring_feature[RING_F_RSS].limit;
500	u16 rss_m = IXGBE_RSS_DISABLED_MASK;
501#ifdef IXGBE_FCOE
502	u16 fcoe_i = 0;
503#endif
504	bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1);
505
506	/* only proceed if SR-IOV is enabled */
507	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
508		return false;
509
510	/* Add starting offset to total pool count */
511	vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
512
513	/* double check we are limited to maximum pools */
514	vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
515
516	/* 64 pool mode with 2 queues per pool */
517	if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) {
518		vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
519		rss_m = IXGBE_RSS_2Q_MASK;
520		rss_i = min_t(u16, rss_i, 2);
521	/* 32 pool mode with 4 queues per pool */
522	} else {
523		vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
524		rss_m = IXGBE_RSS_4Q_MASK;
525		rss_i = 4;
526	}
527
528#ifdef IXGBE_FCOE
529	/* queues in the remaining pools are available for FCoE */
530	fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m));
531
532#endif
533	/* remove the starting offset from the pool count */
534	vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
535
536	/* save features for later use */
537	adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
538	adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
539
540	/* limit RSS based on user input and save for later use */
541	adapter->ring_feature[RING_F_RSS].indices = rss_i;
542	adapter->ring_feature[RING_F_RSS].mask = rss_m;
543
544	adapter->num_rx_pools = vmdq_i;
545	adapter->num_rx_queues_per_pool = rss_i;
546
547	adapter->num_rx_queues = vmdq_i * rss_i;
548	adapter->num_tx_queues = vmdq_i * rss_i;
549
550	/* disable ATR as it is not supported when VMDq is enabled */
551	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
552
553#ifdef IXGBE_FCOE
554	/*
555	 * FCoE can use rings from adjacent buffers to allow RSS
556	 * like behavior.  To account for this we need to add the
557	 * FCoE indices to the total ring count.
558	 */
559	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
560		struct ixgbe_ring_feature *fcoe;
561
562		fcoe = &adapter->ring_feature[RING_F_FCOE];
563
564		/* limit ourselves based on feature limits */
565		fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
566
567		if (vmdq_i > 1 && fcoe_i) {
568			/* alloc queues for FCoE separately */
569			fcoe->indices = fcoe_i;
570			fcoe->offset = vmdq_i * rss_i;
571		} else {
572			/* merge FCoE queues with RSS queues */
573			fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus());
574
575			/* limit indices to rss_i if MSI-X is disabled */
576			if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
577				fcoe_i = rss_i;
578
579			/* attempt to reserve some queues for just FCoE */
580			fcoe->indices = min_t(u16, fcoe_i, fcoe->limit);
581			fcoe->offset = fcoe_i - fcoe->indices;
582
583			fcoe_i -= rss_i;
584		}
585
586		/* add queues to adapter */
587		adapter->num_tx_queues += fcoe_i;
588		adapter->num_rx_queues += fcoe_i;
589	}
590
591#endif
592	return true;
593}
594
595/**
596 * ixgbe_set_rss_queues - Allocate queues for RSS
597 * @adapter: board private structure to initialize
598 *
599 * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
600 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
601 *
602 **/
603static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
604{
605	struct ixgbe_ring_feature *f;
606	u16 rss_i;
607
608	/* set mask for 16 queue limit of RSS */
609	f = &adapter->ring_feature[RING_F_RSS];
610	rss_i = f->limit;
611
612	f->indices = rss_i;
613	f->mask = IXGBE_RSS_16Q_MASK;
614
615	/* disable ATR by default, it will be configured below */
616	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
617
618	/*
619	 * Use Flow Director in addition to RSS to ensure the best
620	 * distribution of flows across cores, even when an FDIR flow
621	 * isn't matched.
622	 */
623	if (rss_i > 1 && adapter->atr_sample_rate) {
624		f = &adapter->ring_feature[RING_F_FDIR];
625
626		rss_i = f->indices = f->limit;
627
628		if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
629			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
630	}
631
632#ifdef IXGBE_FCOE
633	/*
634	 * FCoE can exist on the same rings as standard network traffic
635	 * however it is preferred to avoid that if possible.  In order
636	 * to get the best performance we allocate as many FCoE queues
637	 * as we can and we place them at the end of the ring array to
638	 * avoid sharing queues with standard RSS on systems with 24 or
639	 * more CPUs.
640	 */
641	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
642		struct net_device *dev = adapter->netdev;
643		u16 fcoe_i;
644
645		f = &adapter->ring_feature[RING_F_FCOE];
646
647		/* merge FCoE queues with RSS queues */
648		fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus());
649		fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues);
650
651		/* limit indices to rss_i if MSI-X is disabled */
652		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
653			fcoe_i = rss_i;
654
655		/* attempt to reserve some queues for just FCoE */
656		f->indices = min_t(u16, fcoe_i, f->limit);
657		f->offset = fcoe_i - f->indices;
658		rss_i = max_t(u16, fcoe_i, rss_i);
659	}
660
661#endif /* IXGBE_FCOE */
662	adapter->num_rx_queues = rss_i;
663	adapter->num_tx_queues = rss_i;
664
665	return true;
666}
667
668/**
669 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
670 * @adapter: board private structure to initialize
671 *
672 * This is the top level queue allocation routine.  The order here is very
673 * important, starting with the "most" number of features turned on at once,
674 * and ending with the smallest set of features.  This way large combinations
675 * can be allocated if they're turned on, and smaller combinations are the
676 * fallthrough conditions.
677 *
678 **/
679static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
680{
681	/* Start with base case */
682	adapter->num_rx_queues = 1;
683	adapter->num_tx_queues = 1;
684	adapter->num_rx_pools = adapter->num_rx_queues;
685	adapter->num_rx_queues_per_pool = 1;
686
687#ifdef CONFIG_IXGBE_DCB
688	if (ixgbe_set_dcb_sriov_queues(adapter))
689		return;
690
691	if (ixgbe_set_dcb_queues(adapter))
692		return;
693
694#endif
695	if (ixgbe_set_sriov_queues(adapter))
696		return;
697
698	ixgbe_set_rss_queues(adapter);
699}
700
701/**
702 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
703 * @adapter: board private structure
704 *
705 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
706 * return a negative error code if unable to acquire MSI-X vectors for any
707 * reason.
708 */
709static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
710{
711	struct ixgbe_hw *hw = &adapter->hw;
712	int i, vectors, vector_threshold;
713
714	/* We start by asking for one vector per queue pair */
715	vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
716
717	/* It is easy to be greedy for MSI-X vectors. However, it really
718	 * doesn't do much good if we have a lot more vectors than CPUs. We'll
719	 * be somewhat conservative and only ask for (roughly) the same number
720	 * of vectors as there are CPUs.
721	 */
722	vectors = min_t(int, vectors, num_online_cpus());
723
724	/* Some vectors are necessary for non-queue interrupts */
725	vectors += NON_Q_VECTORS;
726
727	/* Hardware can only support a maximum of hw.mac->max_msix_vectors.
728	 * With features such as RSS and VMDq, we can easily surpass the
729	 * number of Rx and Tx descriptor queues supported by our device.
730	 * Thus, we cap the maximum in the rare cases where the CPU count also
731	 * exceeds our vector limit
732	 */
733	vectors = min_t(int, vectors, hw->mac.max_msix_vectors);
734
735	/* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
736	 * handler, and (2) an Other (Link Status Change, etc.) handler.
737	 */
738	vector_threshold = MIN_MSIX_COUNT;
739
740	adapter->msix_entries = kcalloc(vectors,
741					sizeof(struct msix_entry),
742					GFP_KERNEL);
743	if (!adapter->msix_entries)
744		return -ENOMEM;
745
746	for (i = 0; i < vectors; i++)
747		adapter->msix_entries[i].entry = i;
748
749	vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
750					vector_threshold, vectors);
751
752	if (vectors < 0) {
753		/* A negative count of allocated vectors indicates an error in
754		 * acquiring within the specified range of MSI-X vectors
755		 */
756		e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
757			   vectors);
758
759		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
760		kfree(adapter->msix_entries);
761		adapter->msix_entries = NULL;
762
763		return vectors;
764	}
765
766	/* we successfully allocated some number of vectors within our
767	 * requested range.
768	 */
769	adapter->flags |= IXGBE_FLAG_MSIX_ENABLED;
770
771	/* Adjust for only the vectors we'll use, which is minimum
772	 * of max_q_vectors, or the number of vectors we were allocated.
773	 */
774	vectors -= NON_Q_VECTORS;
775	adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors);
776
777	return 0;
778}
779
780static void ixgbe_add_ring(struct ixgbe_ring *ring,
781			   struct ixgbe_ring_container *head)
782{
783	ring->next = head->ring;
784	head->ring = ring;
785	head->count++;
786}
787
788/**
789 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
790 * @adapter: board private structure to initialize
791 * @v_count: q_vectors allocated on adapter, used for ring interleaving
792 * @v_idx: index of vector in adapter struct
793 * @txr_count: total number of Tx rings to allocate
794 * @txr_idx: index of first Tx ring to allocate
795 * @rxr_count: total number of Rx rings to allocate
796 * @rxr_idx: index of first Rx ring to allocate
797 *
798 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
799 **/
800static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
801				int v_count, int v_idx,
802				int txr_count, int txr_idx,
803				int rxr_count, int rxr_idx)
804{
805	struct ixgbe_q_vector *q_vector;
806	struct ixgbe_ring *ring;
807	int node = NUMA_NO_NODE;
808	int cpu = -1;
809	int ring_count, size;
810	u8 tcs = netdev_get_num_tc(adapter->netdev);
811
812	ring_count = txr_count + rxr_count;
813	size = sizeof(struct ixgbe_q_vector) +
814	       (sizeof(struct ixgbe_ring) * ring_count);
815
816	/* customize cpu for Flow Director mapping */
817	if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
818		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
819		if (rss_i > 1 && adapter->atr_sample_rate) {
820			if (cpu_online(v_idx)) {
821				cpu = v_idx;
822				node = cpu_to_node(cpu);
823			}
824		}
825	}
826
827	/* allocate q_vector and rings */
828	q_vector = kzalloc_node(size, GFP_KERNEL, node);
829	if (!q_vector)
830		q_vector = kzalloc(size, GFP_KERNEL);
831	if (!q_vector)
832		return -ENOMEM;
833
834	/* setup affinity mask and node */
835	if (cpu != -1)
836		cpumask_set_cpu(cpu, &q_vector->affinity_mask);
837	q_vector->numa_node = node;
838
839#ifdef CONFIG_IXGBE_DCA
840	/* initialize CPU for DCA */
841	q_vector->cpu = -1;
842
843#endif
844	/* initialize NAPI */
845	netif_napi_add(adapter->netdev, &q_vector->napi,
846		       ixgbe_poll, 64);
847	napi_hash_add(&q_vector->napi);
848
849#ifdef CONFIG_NET_RX_BUSY_POLL
850	/* initialize busy poll */
851	atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE);
852
853#endif
854	/* tie q_vector and adapter together */
855	adapter->q_vector[v_idx] = q_vector;
856	q_vector->adapter = adapter;
857	q_vector->v_idx = v_idx;
858
859	/* initialize work limits */
860	q_vector->tx.work_limit = adapter->tx_work_limit;
861
862	/* initialize pointer to rings */
863	ring = q_vector->ring;
864
865	/* intialize ITR */
866	if (txr_count && !rxr_count) {
867		/* tx only vector */
868		if (adapter->tx_itr_setting == 1)
869			q_vector->itr = IXGBE_12K_ITR;
870		else
871			q_vector->itr = adapter->tx_itr_setting;
872	} else {
873		/* rx or rx/tx vector */
874		if (adapter->rx_itr_setting == 1)
875			q_vector->itr = IXGBE_20K_ITR;
876		else
877			q_vector->itr = adapter->rx_itr_setting;
878	}
879
880	while (txr_count) {
881		/* assign generic ring traits */
882		ring->dev = &adapter->pdev->dev;
883		ring->netdev = adapter->netdev;
884
885		/* configure backlink on ring */
886		ring->q_vector = q_vector;
887
888		/* update q_vector Tx values */
889		ixgbe_add_ring(ring, &q_vector->tx);
890
891		/* apply Tx specific ring traits */
892		ring->count = adapter->tx_ring_count;
893		if (adapter->num_rx_pools > 1)
894			ring->queue_index =
895				txr_idx % adapter->num_rx_queues_per_pool;
896		else
897			ring->queue_index = txr_idx;
898
899		/* assign ring to adapter */
900		adapter->tx_ring[txr_idx] = ring;
901
902		/* update count and index */
903		txr_count--;
904		txr_idx += v_count;
905
906		/* push pointer to next ring */
907		ring++;
908	}
909
910	while (rxr_count) {
911		/* assign generic ring traits */
912		ring->dev = &adapter->pdev->dev;
913		ring->netdev = adapter->netdev;
914
915		/* configure backlink on ring */
916		ring->q_vector = q_vector;
917
918		/* update q_vector Rx values */
919		ixgbe_add_ring(ring, &q_vector->rx);
920
921		/*
922		 * 82599 errata, UDP frames with a 0 checksum
923		 * can be marked as checksum errors.
924		 */
925		if (adapter->hw.mac.type == ixgbe_mac_82599EB)
926			set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
927
928#ifdef IXGBE_FCOE
929		if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
930			struct ixgbe_ring_feature *f;
931			f = &adapter->ring_feature[RING_F_FCOE];
932			if ((rxr_idx >= f->offset) &&
933			    (rxr_idx < f->offset + f->indices))
934				set_bit(__IXGBE_RX_FCOE, &ring->state);
935		}
936
937#endif /* IXGBE_FCOE */
938		/* apply Rx specific ring traits */
939		ring->count = adapter->rx_ring_count;
940		if (adapter->num_rx_pools > 1)
941			ring->queue_index =
942				rxr_idx % adapter->num_rx_queues_per_pool;
943		else
944			ring->queue_index = rxr_idx;
945
946		/* assign ring to adapter */
947		adapter->rx_ring[rxr_idx] = ring;
948
949		/* update count and index */
950		rxr_count--;
951		rxr_idx += v_count;
952
953		/* push pointer to next ring */
954		ring++;
955	}
956
957	return 0;
958}
959
960/**
961 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
962 * @adapter: board private structure to initialize
963 * @v_idx: Index of vector to be freed
964 *
965 * This function frees the memory allocated to the q_vector.  In addition if
966 * NAPI is enabled it will delete any references to the NAPI struct prior
967 * to freeing the q_vector.
968 **/
969static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
970{
971	struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
972	struct ixgbe_ring *ring;
973
974	ixgbe_for_each_ring(ring, q_vector->tx)
975		adapter->tx_ring[ring->queue_index] = NULL;
976
977	ixgbe_for_each_ring(ring, q_vector->rx)
978		adapter->rx_ring[ring->queue_index] = NULL;
979
980	adapter->q_vector[v_idx] = NULL;
981	napi_hash_del(&q_vector->napi);
982	netif_napi_del(&q_vector->napi);
983
984	/*
985	 * ixgbe_get_stats64() might access the rings on this vector,
986	 * we must wait a grace period before freeing it.
987	 */
988	kfree_rcu(q_vector, rcu);
989}
990
991/**
992 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
993 * @adapter: board private structure to initialize
994 *
995 * We allocate one q_vector per queue interrupt.  If allocation fails we
996 * return -ENOMEM.
997 **/
998static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
999{
1000	int q_vectors = adapter->num_q_vectors;
1001	int rxr_remaining = adapter->num_rx_queues;
1002	int txr_remaining = adapter->num_tx_queues;
1003	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1004	int err;
1005
1006	/* only one q_vector if MSI-X is disabled. */
1007	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
1008		q_vectors = 1;
1009
1010	if (q_vectors >= (rxr_remaining + txr_remaining)) {
1011		for (; rxr_remaining; v_idx++) {
1012			err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
1013						   0, 0, 1, rxr_idx);
1014
1015			if (err)
1016				goto err_out;
1017
1018			/* update counts and index */
1019			rxr_remaining--;
1020			rxr_idx++;
1021		}
1022	}
1023
1024	for (; v_idx < q_vectors; v_idx++) {
1025		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1026		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1027		err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
1028					   tqpv, txr_idx,
1029					   rqpv, rxr_idx);
1030
1031		if (err)
1032			goto err_out;
1033
1034		/* update counts and index */
1035		rxr_remaining -= rqpv;
1036		txr_remaining -= tqpv;
1037		rxr_idx++;
1038		txr_idx++;
1039	}
1040
1041	return 0;
1042
1043err_out:
1044	adapter->num_tx_queues = 0;
1045	adapter->num_rx_queues = 0;
1046	adapter->num_q_vectors = 0;
1047
1048	while (v_idx--)
1049		ixgbe_free_q_vector(adapter, v_idx);
1050
1051	return -ENOMEM;
1052}
1053
1054/**
1055 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1056 * @adapter: board private structure to initialize
1057 *
1058 * This function frees the memory allocated to the q_vectors.  In addition if
1059 * NAPI is enabled it will delete any references to the NAPI struct prior
1060 * to freeing the q_vector.
1061 **/
1062static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
1063{
1064	int v_idx = adapter->num_q_vectors;
1065
1066	adapter->num_tx_queues = 0;
1067	adapter->num_rx_queues = 0;
1068	adapter->num_q_vectors = 0;
1069
1070	while (v_idx--)
1071		ixgbe_free_q_vector(adapter, v_idx);
1072}
1073
1074static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
1075{
1076	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
1077		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
1078		pci_disable_msix(adapter->pdev);
1079		kfree(adapter->msix_entries);
1080		adapter->msix_entries = NULL;
1081	} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
1082		adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED;
1083		pci_disable_msi(adapter->pdev);
1084	}
1085}
1086
1087/**
1088 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1089 * @adapter: board private structure to initialize
1090 *
1091 * Attempt to configure the interrupts using the best available
1092 * capabilities of the hardware and the kernel.
1093 **/
1094static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
1095{
1096	int err;
1097
1098	/* We will try to get MSI-X interrupts first */
1099	if (!ixgbe_acquire_msix_vectors(adapter))
1100		return;
1101
1102	/* At this point, we do not have MSI-X capabilities. We need to
1103	 * reconfigure or disable various features which require MSI-X
1104	 * capability.
1105	 */
1106
1107	/* Disable DCB unless we only have a single traffic class */
1108	if (netdev_get_num_tc(adapter->netdev) > 1) {
1109		e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1110		netdev_reset_tc(adapter->netdev);
1111
1112		if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1113			adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
1114
1115		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
1116		adapter->temp_dcb_cfg.pfc_mode_enable = false;
1117		adapter->dcb_cfg.pfc_mode_enable = false;
1118	}
1119
1120	adapter->dcb_cfg.num_tcs.pg_tcs = 1;
1121	adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
1122
1123	/* Disable SR-IOV support */
1124	e_dev_warn("Disabling SR-IOV support\n");
1125	ixgbe_disable_sriov(adapter);
1126
1127	/* Disable RSS */
1128	e_dev_warn("Disabling RSS support\n");
1129	adapter->ring_feature[RING_F_RSS].limit = 1;
1130
1131	/* recalculate number of queues now that many features have been
1132	 * changed or disabled.
1133	 */
1134	ixgbe_set_num_queues(adapter);
1135	adapter->num_q_vectors = 1;
1136
1137	err = pci_enable_msi(adapter->pdev);
1138	if (err)
1139		e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1140			   err);
1141	else
1142		adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
1143}
1144
1145/**
1146 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1147 * @adapter: board private structure to initialize
1148 *
1149 * We determine which interrupt scheme to use based on...
1150 * - Kernel support (MSI, MSI-X)
1151 *   - which can be user-defined (via MODULE_PARAM)
1152 * - Hardware queue count (num_*_queues)
1153 *   - defined by miscellaneous hardware support/features (RSS, etc.)
1154 **/
1155int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
1156{
1157	int err;
1158
1159	/* Number of supported queues */
1160	ixgbe_set_num_queues(adapter);
1161
1162	/* Set interrupt mode */
1163	ixgbe_set_interrupt_capability(adapter);
1164
1165	err = ixgbe_alloc_q_vectors(adapter);
1166	if (err) {
1167		e_dev_err("Unable to allocate memory for queue vectors\n");
1168		goto err_alloc_q_vectors;
1169	}
1170
1171	ixgbe_cache_ring_register(adapter);
1172
1173	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
1174		   (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
1175		   adapter->num_rx_queues, adapter->num_tx_queues);
1176
1177	set_bit(__IXGBE_DOWN, &adapter->state);
1178
1179	return 0;
1180
1181err_alloc_q_vectors:
1182	ixgbe_reset_interrupt_capability(adapter);
1183	return err;
1184}
1185
1186/**
1187 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1188 * @adapter: board private structure to clear interrupt scheme on
1189 *
1190 * We go through and clear interrupt specific resources and reset the structure
1191 * to pre-load conditions
1192 **/
1193void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
1194{
1195	adapter->num_tx_queues = 0;
1196	adapter->num_rx_queues = 0;
1197
1198	ixgbe_free_q_vectors(adapter);
1199	ixgbe_reset_interrupt_capability(adapter);
1200}
1201
1202void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
1203		       u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
1204{
1205	struct ixgbe_adv_tx_context_desc *context_desc;
1206	u16 i = tx_ring->next_to_use;
1207
1208	context_desc = IXGBE_TX_CTXTDESC(tx_ring, i);
1209
1210	i++;
1211	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1212
1213	/* set bits to identify this as an advanced context descriptor */
1214	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1215
1216	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
1217	context_desc->seqnum_seed	= cpu_to_le32(fcoe_sof_eof);
1218	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
1219	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
1220}
1221
1222