1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37
38#include <linux/module.h>
39#include <linux/moduleparam.h>
40#include <linux/init.h>
41#include <linux/pci.h>
42#include <linux/dma-mapping.h>
43#include <linux/netdevice.h>
44#include <linux/etherdevice.h>
45#include <linux/debugfs.h>
46#include <linux/ethtool.h>
47#include <linux/mdio.h>
48
49#include "t4vf_common.h"
50#include "t4vf_defs.h"
51
52#include "../cxgb4/t4_regs.h"
53#include "../cxgb4/t4_msg.h"
54
55/*
56 * Generic information about the driver.
57 */
58#define DRV_VERSION "2.0.0-ko"
59#define DRV_DESC "Chelsio T4/T5 Virtual Function (VF) Network Driver"
60
61/*
62 * Module Parameters.
63 * ==================
64 */
65
66/*
67 * Default ethtool "message level" for adapters.
68 */
69#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72
73static int dflt_msg_enable = DFLT_MSG_ENABLE;
74
75module_param(dflt_msg_enable, int, 0644);
76MODULE_PARM_DESC(dflt_msg_enable,
77		 "default adapter ethtool message level bitmap");
78
79/*
80 * The driver uses the best interrupt scheme available on a platform in the
81 * order MSI-X then MSI.  This parameter determines which of these schemes the
82 * driver may consider as follows:
83 *
84 *     msi = 2: choose from among MSI-X and MSI
85 *     msi = 1: only consider MSI interrupts
86 *
87 * Note that unlike the Physical Function driver, this Virtual Function driver
88 * does _not_ support legacy INTx interrupts (this limitation is mandated by
89 * the PCI-E SR-IOV standard).
90 */
91#define MSI_MSIX	2
92#define MSI_MSI		1
93#define MSI_DEFAULT	MSI_MSIX
94
95static int msi = MSI_DEFAULT;
96
97module_param(msi, int, 0644);
98MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
99
100/*
101 * Fundamental constants.
102 * ======================
103 */
104
105enum {
106	MAX_TXQ_ENTRIES		= 16384,
107	MAX_RSPQ_ENTRIES	= 16384,
108	MAX_RX_BUFFERS		= 16384,
109
110	MIN_TXQ_ENTRIES		= 32,
111	MIN_RSPQ_ENTRIES	= 128,
112	MIN_FL_ENTRIES		= 16,
113
114	/*
115	 * For purposes of manipulating the Free List size we need to
116	 * recognize that Free Lists are actually Egress Queues (the host
117	 * produces free buffers which the hardware consumes), Egress Queues
118	 * indices are all in units of Egress Context Units bytes, and free
119	 * list entries are 64-bit PCI DMA addresses.  And since the state of
120	 * the Producer Index == the Consumer Index implies an EMPTY list, we
121	 * always have at least one Egress Unit's worth of Free List entries
122	 * unused.  See sge.c for more details ...
123	 */
124	EQ_UNIT = SGE_EQ_IDXSIZE,
125	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
126	MIN_FL_RESID = FL_PER_EQ_UNIT,
127};
128
129/*
130 * Global driver state.
131 * ====================
132 */
133
134static struct dentry *cxgb4vf_debugfs_root;
135
136/*
137 * OS "Callback" functions.
138 * ========================
139 */
140
141/*
142 * The link status has changed on the indicated "port" (Virtual Interface).
143 */
144void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
145{
146	struct net_device *dev = adapter->port[pidx];
147
148	/*
149	 * If the port is disabled or the current recorded "link up"
150	 * status matches the new status, just return.
151	 */
152	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
153		return;
154
155	/*
156	 * Tell the OS that the link status has changed and print a short
157	 * informative message on the console about the event.
158	 */
159	if (link_ok) {
160		const char *s;
161		const char *fc;
162		const struct port_info *pi = netdev_priv(dev);
163
164		netif_carrier_on(dev);
165
166		switch (pi->link_cfg.speed) {
167		case 40000:
168			s = "40Gbps";
169			break;
170
171		case 10000:
172			s = "10Gbps";
173			break;
174
175		case 1000:
176			s = "1000Mbps";
177			break;
178
179		case 100:
180			s = "100Mbps";
181			break;
182
183		default:
184			s = "unknown";
185			break;
186		}
187
188		switch (pi->link_cfg.fc) {
189		case PAUSE_RX:
190			fc = "RX";
191			break;
192
193		case PAUSE_TX:
194			fc = "TX";
195			break;
196
197		case PAUSE_RX|PAUSE_TX:
198			fc = "RX/TX";
199			break;
200
201		default:
202			fc = "no";
203			break;
204		}
205
206		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
207	} else {
208		netif_carrier_off(dev);
209		netdev_info(dev, "link down\n");
210	}
211}
212
213/*
214 * THe port module type has changed on the indicated "port" (Virtual
215 * Interface).
216 */
217void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
218{
219	static const char * const mod_str[] = {
220		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
221	};
222	const struct net_device *dev = adapter->port[pidx];
223	const struct port_info *pi = netdev_priv(dev);
224
225	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
226		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
227			 dev->name);
228	else if (pi->mod_type < ARRAY_SIZE(mod_str))
229		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
230			 dev->name, mod_str[pi->mod_type]);
231	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
232		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
233			 "module inserted\n", dev->name);
234	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
235		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
236			 "forcing TWINAX\n", dev->name);
237	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
238		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
239			 dev->name);
240	else
241		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
242			 "inserted\n", dev->name, pi->mod_type);
243}
244
245/*
246 * Net device operations.
247 * ======================
248 */
249
250
251
252
253/*
254 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
255 * Interface).
256 */
257static int link_start(struct net_device *dev)
258{
259	int ret;
260	struct port_info *pi = netdev_priv(dev);
261
262	/*
263	 * We do not set address filters and promiscuity here, the stack does
264	 * that step explicitly. Enable vlan accel.
265	 */
266	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
267			      true);
268	if (ret == 0) {
269		ret = t4vf_change_mac(pi->adapter, pi->viid,
270				      pi->xact_addr_filt, dev->dev_addr, true);
271		if (ret >= 0) {
272			pi->xact_addr_filt = ret;
273			ret = 0;
274		}
275	}
276
277	/*
278	 * We don't need to actually "start the link" itself since the
279	 * firmware will do that for us when the first Virtual Interface
280	 * is enabled on a port.
281	 */
282	if (ret == 0)
283		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
284	return ret;
285}
286
287/*
288 * Name the MSI-X interrupts.
289 */
290static void name_msix_vecs(struct adapter *adapter)
291{
292	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
293	int pidx;
294
295	/*
296	 * Firmware events.
297	 */
298	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
299		 "%s-FWeventq", adapter->name);
300	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
301
302	/*
303	 * Ethernet queues.
304	 */
305	for_each_port(adapter, pidx) {
306		struct net_device *dev = adapter->port[pidx];
307		const struct port_info *pi = netdev_priv(dev);
308		int qs, msi;
309
310		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
311			snprintf(adapter->msix_info[msi].desc, namelen,
312				 "%s-%d", dev->name, qs);
313			adapter->msix_info[msi].desc[namelen] = 0;
314		}
315	}
316}
317
318/*
319 * Request all of our MSI-X resources.
320 */
321static int request_msix_queue_irqs(struct adapter *adapter)
322{
323	struct sge *s = &adapter->sge;
324	int rxq, msi, err;
325
326	/*
327	 * Firmware events.
328	 */
329	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
330			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
331	if (err)
332		return err;
333
334	/*
335	 * Ethernet queues.
336	 */
337	msi = MSIX_IQFLINT;
338	for_each_ethrxq(s, rxq) {
339		err = request_irq(adapter->msix_info[msi].vec,
340				  t4vf_sge_intr_msix, 0,
341				  adapter->msix_info[msi].desc,
342				  &s->ethrxq[rxq].rspq);
343		if (err)
344			goto err_free_irqs;
345		msi++;
346	}
347	return 0;
348
349err_free_irqs:
350	while (--rxq >= 0)
351		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
352	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
353	return err;
354}
355
356/*
357 * Free our MSI-X resources.
358 */
359static void free_msix_queue_irqs(struct adapter *adapter)
360{
361	struct sge *s = &adapter->sge;
362	int rxq, msi;
363
364	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
365	msi = MSIX_IQFLINT;
366	for_each_ethrxq(s, rxq)
367		free_irq(adapter->msix_info[msi++].vec,
368			 &s->ethrxq[rxq].rspq);
369}
370
371/*
372 * Turn on NAPI and start up interrupts on a response queue.
373 */
374static void qenable(struct sge_rspq *rspq)
375{
376	napi_enable(&rspq->napi);
377
378	/*
379	 * 0-increment the Going To Sleep register to start the timer and
380	 * enable interrupts.
381	 */
382	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
383		     CIDXINC_V(0) |
384		     SEINTARM_V(rspq->intr_params) |
385		     INGRESSQID_V(rspq->cntxt_id));
386}
387
388/*
389 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
390 */
391static void enable_rx(struct adapter *adapter)
392{
393	int rxq;
394	struct sge *s = &adapter->sge;
395
396	for_each_ethrxq(s, rxq)
397		qenable(&s->ethrxq[rxq].rspq);
398	qenable(&s->fw_evtq);
399
400	/*
401	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
402	 * its Going To Sleep register here to get it started.
403	 */
404	if (adapter->flags & USING_MSI)
405		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
406			     CIDXINC_V(0) |
407			     SEINTARM_V(s->intrq.intr_params) |
408			     INGRESSQID_V(s->intrq.cntxt_id));
409
410}
411
412/*
413 * Wait until all NAPI handlers are descheduled.
414 */
415static void quiesce_rx(struct adapter *adapter)
416{
417	struct sge *s = &adapter->sge;
418	int rxq;
419
420	for_each_ethrxq(s, rxq)
421		napi_disable(&s->ethrxq[rxq].rspq.napi);
422	napi_disable(&s->fw_evtq.napi);
423}
424
425/*
426 * Response queue handler for the firmware event queue.
427 */
428static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
429			  const struct pkt_gl *gl)
430{
431	/*
432	 * Extract response opcode and get pointer to CPL message body.
433	 */
434	struct adapter *adapter = rspq->adapter;
435	u8 opcode = ((const struct rss_header *)rsp)->opcode;
436	void *cpl = (void *)(rsp + 1);
437
438	switch (opcode) {
439	case CPL_FW6_MSG: {
440		/*
441		 * We've received an asynchronous message from the firmware.
442		 */
443		const struct cpl_fw6_msg *fw_msg = cpl;
444		if (fw_msg->type == FW6_TYPE_CMD_RPL)
445			t4vf_handle_fw_rpl(adapter, fw_msg->data);
446		break;
447	}
448
449	case CPL_FW4_MSG: {
450		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
451		 */
452		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
453		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
454		if (opcode != CPL_SGE_EGR_UPDATE) {
455			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
456				, opcode);
457			break;
458		}
459		cpl = (void *)p;
460		/*FALLTHROUGH*/
461	}
462
463	case CPL_SGE_EGR_UPDATE: {
464		/*
465		 * We've received an Egress Queue Status Update message.  We
466		 * get these, if the SGE is configured to send these when the
467		 * firmware passes certain points in processing our TX
468		 * Ethernet Queue or if we make an explicit request for one.
469		 * We use these updates to determine when we may need to
470		 * restart a TX Ethernet Queue which was stopped for lack of
471		 * free TX Queue Descriptors ...
472		 */
473		const struct cpl_sge_egr_update *p = cpl;
474		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
475		struct sge *s = &adapter->sge;
476		struct sge_txq *tq;
477		struct sge_eth_txq *txq;
478		unsigned int eq_idx;
479
480		/*
481		 * Perform sanity checking on the Queue ID to make sure it
482		 * really refers to one of our TX Ethernet Egress Queues which
483		 * is active and matches the queue's ID.  None of these error
484		 * conditions should ever happen so we may want to either make
485		 * them fatal and/or conditionalized under DEBUG.
486		 */
487		eq_idx = EQ_IDX(s, qid);
488		if (unlikely(eq_idx >= MAX_EGRQ)) {
489			dev_err(adapter->pdev_dev,
490				"Egress Update QID %d out of range\n", qid);
491			break;
492		}
493		tq = s->egr_map[eq_idx];
494		if (unlikely(tq == NULL)) {
495			dev_err(adapter->pdev_dev,
496				"Egress Update QID %d TXQ=NULL\n", qid);
497			break;
498		}
499		txq = container_of(tq, struct sge_eth_txq, q);
500		if (unlikely(tq->abs_id != qid)) {
501			dev_err(adapter->pdev_dev,
502				"Egress Update QID %d refers to TXQ %d\n",
503				qid, tq->abs_id);
504			break;
505		}
506
507		/*
508		 * Restart a stopped TX Queue which has less than half of its
509		 * TX ring in use ...
510		 */
511		txq->q.restarts++;
512		netif_tx_wake_queue(txq->txq);
513		break;
514	}
515
516	default:
517		dev_err(adapter->pdev_dev,
518			"unexpected CPL %#x on FW event queue\n", opcode);
519	}
520
521	return 0;
522}
523
524/*
525 * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
526 * to use and initializes them.  We support multiple "Queue Sets" per port if
527 * we have MSI-X, otherwise just one queue set per port.
528 */
529static int setup_sge_queues(struct adapter *adapter)
530{
531	struct sge *s = &adapter->sge;
532	int err, pidx, msix;
533
534	/*
535	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
536	 * state.
537	 */
538	bitmap_zero(s->starving_fl, MAX_EGRQ);
539
540	/*
541	 * If we're using MSI interrupt mode we need to set up a "forwarded
542	 * interrupt" queue which we'll set up with our MSI vector.  The rest
543	 * of the ingress queues will be set up to forward their interrupts to
544	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
545	 * the intrq's queue ID as the interrupt forwarding queue for the
546	 * subsequent calls ...
547	 */
548	if (adapter->flags & USING_MSI) {
549		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
550					 adapter->port[0], 0, NULL, NULL);
551		if (err)
552			goto err_free_queues;
553	}
554
555	/*
556	 * Allocate our ingress queue for asynchronous firmware messages.
557	 */
558	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
559				 MSIX_FW, NULL, fwevtq_handler);
560	if (err)
561		goto err_free_queues;
562
563	/*
564	 * Allocate each "port"'s initial Queue Sets.  These can be changed
565	 * later on ... up to the point where any interface on the adapter is
566	 * brought up at which point lots of things get nailed down
567	 * permanently ...
568	 */
569	msix = MSIX_IQFLINT;
570	for_each_port(adapter, pidx) {
571		struct net_device *dev = adapter->port[pidx];
572		struct port_info *pi = netdev_priv(dev);
573		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
574		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
575		int qs;
576
577		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
578			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
579						 dev, msix++,
580						 &rxq->fl, t4vf_ethrx_handler);
581			if (err)
582				goto err_free_queues;
583
584			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
585					     netdev_get_tx_queue(dev, qs),
586					     s->fw_evtq.cntxt_id);
587			if (err)
588				goto err_free_queues;
589
590			rxq->rspq.idx = qs;
591			memset(&rxq->stats, 0, sizeof(rxq->stats));
592		}
593	}
594
595	/*
596	 * Create the reverse mappings for the queues.
597	 */
598	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
599	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
600	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
601	for_each_port(adapter, pidx) {
602		struct net_device *dev = adapter->port[pidx];
603		struct port_info *pi = netdev_priv(dev);
604		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
605		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
606		int qs;
607
608		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
609			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
610			EQ_MAP(s, txq->q.abs_id) = &txq->q;
611
612			/*
613			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
614			 * for Free Lists but since all of the Egress Queues
615			 * (including Free Lists) have Relative Queue IDs
616			 * which are computed as Absolute - Base Queue ID, we
617			 * can synthesize the Absolute Queue IDs for the Free
618			 * Lists.  This is useful for debugging purposes when
619			 * we want to dump Queue Contexts via the PF Driver.
620			 */
621			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
622			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
623		}
624	}
625	return 0;
626
627err_free_queues:
628	t4vf_free_sge_resources(adapter);
629	return err;
630}
631
632/*
633 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
634 * queues.  We configure the RSS CPU lookup table to distribute to the number
635 * of HW receive queues, and the response queue lookup table to narrow that
636 * down to the response queues actually configured for each "port" (Virtual
637 * Interface).  We always configure the RSS mapping for all ports since the
638 * mapping table has plenty of entries.
639 */
640static int setup_rss(struct adapter *adapter)
641{
642	int pidx;
643
644	for_each_port(adapter, pidx) {
645		struct port_info *pi = adap2pinfo(adapter, pidx);
646		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
647		u16 rss[MAX_PORT_QSETS];
648		int qs, err;
649
650		for (qs = 0; qs < pi->nqsets; qs++)
651			rss[qs] = rxq[qs].rspq.abs_id;
652
653		err = t4vf_config_rss_range(adapter, pi->viid,
654					    0, pi->rss_size, rss, pi->nqsets);
655		if (err)
656			return err;
657
658		/*
659		 * Perform Global RSS Mode-specific initialization.
660		 */
661		switch (adapter->params.rss.mode) {
662		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
663			/*
664			 * If Tunnel All Lookup isn't specified in the global
665			 * RSS Configuration, then we need to specify a
666			 * default Ingress Queue for any ingress packets which
667			 * aren't hashed.  We'll use our first ingress queue
668			 * ...
669			 */
670			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
671				union rss_vi_config config;
672				err = t4vf_read_rss_vi_config(adapter,
673							      pi->viid,
674							      &config);
675				if (err)
676					return err;
677				config.basicvirtual.defaultq =
678					rxq[0].rspq.abs_id;
679				err = t4vf_write_rss_vi_config(adapter,
680							       pi->viid,
681							       &config);
682				if (err)
683					return err;
684			}
685			break;
686		}
687	}
688
689	return 0;
690}
691
692/*
693 * Bring the adapter up.  Called whenever we go from no "ports" open to having
694 * one open.  This function performs the actions necessary to make an adapter
695 * operational, such as completing the initialization of HW modules, and
696 * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
697 * this is called "cxgb_up" in the PF Driver.)
698 */
699static int adapter_up(struct adapter *adapter)
700{
701	int err;
702
703	/*
704	 * If this is the first time we've been called, perform basic
705	 * adapter setup.  Once we've done this, many of our adapter
706	 * parameters can no longer be changed ...
707	 */
708	if ((adapter->flags & FULL_INIT_DONE) == 0) {
709		err = setup_sge_queues(adapter);
710		if (err)
711			return err;
712		err = setup_rss(adapter);
713		if (err) {
714			t4vf_free_sge_resources(adapter);
715			return err;
716		}
717
718		if (adapter->flags & USING_MSIX)
719			name_msix_vecs(adapter);
720		adapter->flags |= FULL_INIT_DONE;
721	}
722
723	/*
724	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
725	 */
726	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
727	if (adapter->flags & USING_MSIX)
728		err = request_msix_queue_irqs(adapter);
729	else
730		err = request_irq(adapter->pdev->irq,
731				  t4vf_intr_handler(adapter), 0,
732				  adapter->name, adapter);
733	if (err) {
734		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
735			err);
736		return err;
737	}
738
739	/*
740	 * Enable NAPI ingress processing and return success.
741	 */
742	enable_rx(adapter);
743	t4vf_sge_start(adapter);
744	return 0;
745}
746
747/*
748 * Bring the adapter down.  Called whenever the last "port" (Virtual
749 * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
750 * Driver.)
751 */
752static void adapter_down(struct adapter *adapter)
753{
754	/*
755	 * Free interrupt resources.
756	 */
757	if (adapter->flags & USING_MSIX)
758		free_msix_queue_irqs(adapter);
759	else
760		free_irq(adapter->pdev->irq, adapter);
761
762	/*
763	 * Wait for NAPI handlers to finish.
764	 */
765	quiesce_rx(adapter);
766}
767
768/*
769 * Start up a net device.
770 */
771static int cxgb4vf_open(struct net_device *dev)
772{
773	int err;
774	struct port_info *pi = netdev_priv(dev);
775	struct adapter *adapter = pi->adapter;
776
777	/*
778	 * If this is the first interface that we're opening on the "adapter",
779	 * bring the "adapter" up now.
780	 */
781	if (adapter->open_device_map == 0) {
782		err = adapter_up(adapter);
783		if (err)
784			return err;
785	}
786
787	/*
788	 * Note that this interface is up and start everything up ...
789	 */
790	netif_set_real_num_tx_queues(dev, pi->nqsets);
791	err = netif_set_real_num_rx_queues(dev, pi->nqsets);
792	if (err)
793		goto err_unwind;
794	err = link_start(dev);
795	if (err)
796		goto err_unwind;
797
798	netif_tx_start_all_queues(dev);
799	set_bit(pi->port_id, &adapter->open_device_map);
800	return 0;
801
802err_unwind:
803	if (adapter->open_device_map == 0)
804		adapter_down(adapter);
805	return err;
806}
807
808/*
809 * Shut down a net device.  This routine is called "cxgb_close" in the PF
810 * Driver ...
811 */
812static int cxgb4vf_stop(struct net_device *dev)
813{
814	struct port_info *pi = netdev_priv(dev);
815	struct adapter *adapter = pi->adapter;
816
817	netif_tx_stop_all_queues(dev);
818	netif_carrier_off(dev);
819	t4vf_enable_vi(adapter, pi->viid, false, false);
820	pi->link_cfg.link_ok = 0;
821
822	clear_bit(pi->port_id, &adapter->open_device_map);
823	if (adapter->open_device_map == 0)
824		adapter_down(adapter);
825	return 0;
826}
827
828/*
829 * Translate our basic statistics into the standard "ifconfig" statistics.
830 */
831static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832{
833	struct t4vf_port_stats stats;
834	struct port_info *pi = netdev2pinfo(dev);
835	struct adapter *adapter = pi->adapter;
836	struct net_device_stats *ns = &dev->stats;
837	int err;
838
839	spin_lock(&adapter->stats_lock);
840	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841	spin_unlock(&adapter->stats_lock);
842
843	memset(ns, 0, sizeof(*ns));
844	if (err)
845		return ns;
846
847	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850			  stats.tx_ucast_frames + stats.tx_offload_frames);
851	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852			stats.rx_ucast_bytes);
853	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854			  stats.rx_ucast_frames);
855	ns->multicast = stats.rx_mcast_frames;
856	ns->tx_errors = stats.tx_drop_frames;
857	ns->rx_errors = stats.rx_err_frames;
858
859	return ns;
860}
861
862/*
863 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
864 * at a specified offset within the list, into an array of addrss pointers and
865 * return the number collected.
866 */
867static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
868							const u8 **addr,
869							unsigned int offset,
870							unsigned int maxaddrs)
871{
872	unsigned int index = 0;
873	unsigned int naddr = 0;
874	const struct netdev_hw_addr *ha;
875
876	for_each_dev_addr(dev, ha)
877		if (index++ >= offset) {
878			addr[naddr++] = ha->addr;
879			if (naddr >= maxaddrs)
880				break;
881		}
882	return naddr;
883}
884
885/*
886 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
887 * at a specified offset within the list, into an array of addrss pointers and
888 * return the number collected.
889 */
890static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
891							const u8 **addr,
892							unsigned int offset,
893							unsigned int maxaddrs)
894{
895	unsigned int index = 0;
896	unsigned int naddr = 0;
897	const struct netdev_hw_addr *ha;
898
899	netdev_for_each_mc_addr(ha, dev)
900		if (index++ >= offset) {
901			addr[naddr++] = ha->addr;
902			if (naddr >= maxaddrs)
903				break;
904		}
905	return naddr;
906}
907
908/*
909 * Configure the exact and hash address filters to handle a port's multicast
910 * and secondary unicast MAC addresses.
911 */
912static int set_addr_filters(const struct net_device *dev, bool sleep)
913{
914	u64 mhash = 0;
915	u64 uhash = 0;
916	bool free = true;
917	unsigned int offset, naddr;
918	const u8 *addr[7];
919	int ret;
920	const struct port_info *pi = netdev_priv(dev);
921
922	/* first do the secondary unicast addresses */
923	for (offset = 0; ; offset += naddr) {
924		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
925						     ARRAY_SIZE(addr));
926		if (naddr == 0)
927			break;
928
929		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
930					  naddr, addr, NULL, &uhash, sleep);
931		if (ret < 0)
932			return ret;
933
934		free = false;
935	}
936
937	/* next set up the multicast addresses */
938	for (offset = 0; ; offset += naddr) {
939		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
940						     ARRAY_SIZE(addr));
941		if (naddr == 0)
942			break;
943
944		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
945					  naddr, addr, NULL, &mhash, sleep);
946		if (ret < 0)
947			return ret;
948		free = false;
949	}
950
951	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
952				  uhash | mhash, sleep);
953}
954
955/*
956 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
957 * If @mtu is -1 it is left unchanged.
958 */
959static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
960{
961	int ret;
962	struct port_info *pi = netdev_priv(dev);
963
964	ret = set_addr_filters(dev, sleep_ok);
965	if (ret == 0)
966		ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
967				      (dev->flags & IFF_PROMISC) != 0,
968				      (dev->flags & IFF_ALLMULTI) != 0,
969				      1, -1, sleep_ok);
970	return ret;
971}
972
973/*
974 * Set the current receive modes on the device.
975 */
976static void cxgb4vf_set_rxmode(struct net_device *dev)
977{
978	/* unfortunately we can't return errors to the stack */
979	set_rxmode(dev, -1, false);
980}
981
982/*
983 * Find the entry in the interrupt holdoff timer value array which comes
984 * closest to the specified interrupt holdoff value.
985 */
986static int closest_timer(const struct sge *s, int us)
987{
988	int i, timer_idx = 0, min_delta = INT_MAX;
989
990	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
991		int delta = us - s->timer_val[i];
992		if (delta < 0)
993			delta = -delta;
994		if (delta < min_delta) {
995			min_delta = delta;
996			timer_idx = i;
997		}
998	}
999	return timer_idx;
1000}
1001
1002static int closest_thres(const struct sge *s, int thres)
1003{
1004	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1005
1006	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1007		delta = thres - s->counter_val[i];
1008		if (delta < 0)
1009			delta = -delta;
1010		if (delta < min_delta) {
1011			min_delta = delta;
1012			pktcnt_idx = i;
1013		}
1014	}
1015	return pktcnt_idx;
1016}
1017
1018/*
1019 * Return a queue's interrupt hold-off time in us.  0 means no timer.
1020 */
1021static unsigned int qtimer_val(const struct adapter *adapter,
1022			       const struct sge_rspq *rspq)
1023{
1024	unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
1025
1026	return timer_idx < SGE_NTIMERS
1027		? adapter->sge.timer_val[timer_idx]
1028		: 0;
1029}
1030
1031/**
1032 *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1033 *	@adapter: the adapter
1034 *	@rspq: the RX response queue
1035 *	@us: the hold-off time in us, or 0 to disable timer
1036 *	@cnt: the hold-off packet count, or 0 to disable counter
1037 *
1038 *	Sets an RX response queue's interrupt hold-off time and packet count.
1039 *	At least one of the two needs to be enabled for the queue to generate
1040 *	interrupts.
1041 */
1042static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1043			       unsigned int us, unsigned int cnt)
1044{
1045	unsigned int timer_idx;
1046
1047	/*
1048	 * If both the interrupt holdoff timer and count are specified as
1049	 * zero, default to a holdoff count of 1 ...
1050	 */
1051	if ((us | cnt) == 0)
1052		cnt = 1;
1053
1054	/*
1055	 * If an interrupt holdoff count has been specified, then find the
1056	 * closest configured holdoff count and use that.  If the response
1057	 * queue has already been created, then update its queue context
1058	 * parameters ...
1059	 */
1060	if (cnt) {
1061		int err;
1062		u32 v, pktcnt_idx;
1063
1064		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1065		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1066			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1067			    FW_PARAMS_PARAM_X_V(
1068					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1069			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1070			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1071			if (err)
1072				return err;
1073		}
1074		rspq->pktcnt_idx = pktcnt_idx;
1075	}
1076
1077	/*
1078	 * Compute the closest holdoff timer index from the supplied holdoff
1079	 * timer value.
1080	 */
1081	timer_idx = (us == 0
1082		     ? SGE_TIMER_RSTRT_CNTR
1083		     : closest_timer(&adapter->sge, us));
1084
1085	/*
1086	 * Update the response queue's interrupt coalescing parameters and
1087	 * return success.
1088	 */
1089	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1090			     (cnt > 0 ? QINTR_CNT_EN : 0));
1091	return 0;
1092}
1093
1094/*
1095 * Return a version number to identify the type of adapter.  The scheme is:
1096 * - bits 0..9: chip version
1097 * - bits 10..15: chip revision
1098 */
1099static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1100{
1101	/*
1102	 * Chip version 4, revision 0x3f (cxgb4vf).
1103	 */
1104	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1105}
1106
1107/*
1108 * Execute the specified ioctl command.
1109 */
1110static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1111{
1112	int ret = 0;
1113
1114	switch (cmd) {
1115	    /*
1116	     * The VF Driver doesn't have access to any of the other
1117	     * common Ethernet device ioctl()'s (like reading/writing
1118	     * PHY registers, etc.
1119	     */
1120
1121	default:
1122		ret = -EOPNOTSUPP;
1123		break;
1124	}
1125	return ret;
1126}
1127
1128/*
1129 * Change the device's MTU.
1130 */
1131static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1132{
1133	int ret;
1134	struct port_info *pi = netdev_priv(dev);
1135
1136	/* accommodate SACK */
1137	if (new_mtu < 81)
1138		return -EINVAL;
1139
1140	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1141			      -1, -1, -1, -1, true);
1142	if (!ret)
1143		dev->mtu = new_mtu;
1144	return ret;
1145}
1146
1147static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1148	netdev_features_t features)
1149{
1150	/*
1151	 * Since there is no support for separate rx/tx vlan accel
1152	 * enable/disable make sure tx flag is always in same state as rx.
1153	 */
1154	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1155		features |= NETIF_F_HW_VLAN_CTAG_TX;
1156	else
1157		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1158
1159	return features;
1160}
1161
1162static int cxgb4vf_set_features(struct net_device *dev,
1163	netdev_features_t features)
1164{
1165	struct port_info *pi = netdev_priv(dev);
1166	netdev_features_t changed = dev->features ^ features;
1167
1168	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1169		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1170				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1171
1172	return 0;
1173}
1174
1175/*
1176 * Change the devices MAC address.
1177 */
1178static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1179{
1180	int ret;
1181	struct sockaddr *addr = _addr;
1182	struct port_info *pi = netdev_priv(dev);
1183
1184	if (!is_valid_ether_addr(addr->sa_data))
1185		return -EADDRNOTAVAIL;
1186
1187	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1188			      addr->sa_data, true);
1189	if (ret < 0)
1190		return ret;
1191
1192	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1193	pi->xact_addr_filt = ret;
1194	return 0;
1195}
1196
1197#ifdef CONFIG_NET_POLL_CONTROLLER
1198/*
1199 * Poll all of our receive queues.  This is called outside of normal interrupt
1200 * context.
1201 */
1202static void cxgb4vf_poll_controller(struct net_device *dev)
1203{
1204	struct port_info *pi = netdev_priv(dev);
1205	struct adapter *adapter = pi->adapter;
1206
1207	if (adapter->flags & USING_MSIX) {
1208		struct sge_eth_rxq *rxq;
1209		int nqsets;
1210
1211		rxq = &adapter->sge.ethrxq[pi->first_qset];
1212		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1213			t4vf_sge_intr_msix(0, &rxq->rspq);
1214			rxq++;
1215		}
1216	} else
1217		t4vf_intr_handler(adapter)(0, adapter);
1218}
1219#endif
1220
1221/*
1222 * Ethtool operations.
1223 * ===================
1224 *
1225 * Note that we don't support any ethtool operations which change the physical
1226 * state of the port to which we're linked.
1227 */
1228
1229static unsigned int t4vf_from_fw_linkcaps(enum fw_port_type type,
1230					  unsigned int caps)
1231{
1232	unsigned int v = 0;
1233
1234	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1235	    type == FW_PORT_TYPE_BT_XAUI) {
1236		v |= SUPPORTED_TP;
1237		if (caps & FW_PORT_CAP_SPEED_100M)
1238			v |= SUPPORTED_100baseT_Full;
1239		if (caps & FW_PORT_CAP_SPEED_1G)
1240			v |= SUPPORTED_1000baseT_Full;
1241		if (caps & FW_PORT_CAP_SPEED_10G)
1242			v |= SUPPORTED_10000baseT_Full;
1243	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1244		v |= SUPPORTED_Backplane;
1245		if (caps & FW_PORT_CAP_SPEED_1G)
1246			v |= SUPPORTED_1000baseKX_Full;
1247		if (caps & FW_PORT_CAP_SPEED_10G)
1248			v |= SUPPORTED_10000baseKX4_Full;
1249	} else if (type == FW_PORT_TYPE_KR)
1250		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1251	else if (type == FW_PORT_TYPE_BP_AP)
1252		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1253		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1254	else if (type == FW_PORT_TYPE_BP4_AP)
1255		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1256		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1257		     SUPPORTED_10000baseKX4_Full;
1258	else if (type == FW_PORT_TYPE_FIBER_XFI ||
1259		 type == FW_PORT_TYPE_FIBER_XAUI ||
1260		 type == FW_PORT_TYPE_SFP ||
1261		 type == FW_PORT_TYPE_QSFP_10G ||
1262		 type == FW_PORT_TYPE_QSA) {
1263		v |= SUPPORTED_FIBRE;
1264		if (caps & FW_PORT_CAP_SPEED_1G)
1265			v |= SUPPORTED_1000baseT_Full;
1266		if (caps & FW_PORT_CAP_SPEED_10G)
1267			v |= SUPPORTED_10000baseT_Full;
1268	} else if (type == FW_PORT_TYPE_BP40_BA ||
1269		   type == FW_PORT_TYPE_QSFP) {
1270		v |= SUPPORTED_40000baseSR4_Full;
1271		v |= SUPPORTED_FIBRE;
1272	}
1273
1274	if (caps & FW_PORT_CAP_ANEG)
1275		v |= SUPPORTED_Autoneg;
1276	return v;
1277}
1278
1279static int cxgb4vf_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1280{
1281	const struct port_info *p = netdev_priv(dev);
1282
1283	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1284	    p->port_type == FW_PORT_TYPE_BT_XFI ||
1285	    p->port_type == FW_PORT_TYPE_BT_XAUI)
1286		cmd->port = PORT_TP;
1287	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1288		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1289		cmd->port = PORT_FIBRE;
1290	else if (p->port_type == FW_PORT_TYPE_SFP ||
1291		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
1292		 p->port_type == FW_PORT_TYPE_QSA ||
1293		 p->port_type == FW_PORT_TYPE_QSFP) {
1294		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
1295		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
1296		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
1297		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
1298			cmd->port = PORT_FIBRE;
1299		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1300			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1301			cmd->port = PORT_DA;
1302		else
1303			cmd->port = PORT_OTHER;
1304	} else
1305		cmd->port = PORT_OTHER;
1306
1307	if (p->mdio_addr >= 0) {
1308		cmd->phy_address = p->mdio_addr;
1309		cmd->transceiver = XCVR_EXTERNAL;
1310		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1311			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1312	} else {
1313		cmd->phy_address = 0;  /* not really, but no better option */
1314		cmd->transceiver = XCVR_INTERNAL;
1315		cmd->mdio_support = 0;
1316	}
1317
1318	cmd->supported = t4vf_from_fw_linkcaps(p->port_type,
1319					       p->link_cfg.supported);
1320	cmd->advertising = t4vf_from_fw_linkcaps(p->port_type,
1321					    p->link_cfg.advertising);
1322	ethtool_cmd_speed_set(cmd,
1323			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
1324	cmd->duplex = DUPLEX_FULL;
1325	cmd->autoneg = p->link_cfg.autoneg;
1326	cmd->maxtxpkt = 0;
1327	cmd->maxrxpkt = 0;
1328	return 0;
1329}
1330
1331/*
1332 * Return our driver information.
1333 */
1334static void cxgb4vf_get_drvinfo(struct net_device *dev,
1335				struct ethtool_drvinfo *drvinfo)
1336{
1337	struct adapter *adapter = netdev2adap(dev);
1338
1339	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1340	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1341	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1342		sizeof(drvinfo->bus_info));
1343	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1344		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1345		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1346		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1347		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1348		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1349		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1350		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1351		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1352		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1353}
1354
1355/*
1356 * Return current adapter message level.
1357 */
1358static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1359{
1360	return netdev2adap(dev)->msg_enable;
1361}
1362
1363/*
1364 * Set current adapter message level.
1365 */
1366static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1367{
1368	netdev2adap(dev)->msg_enable = msglevel;
1369}
1370
1371/*
1372 * Return the device's current Queue Set ring size parameters along with the
1373 * allowed maximum values.  Since ethtool doesn't understand the concept of
1374 * multi-queue devices, we just return the current values associated with the
1375 * first Queue Set.
1376 */
1377static void cxgb4vf_get_ringparam(struct net_device *dev,
1378				  struct ethtool_ringparam *rp)
1379{
1380	const struct port_info *pi = netdev_priv(dev);
1381	const struct sge *s = &pi->adapter->sge;
1382
1383	rp->rx_max_pending = MAX_RX_BUFFERS;
1384	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1385	rp->rx_jumbo_max_pending = 0;
1386	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1387
1388	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1389	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1390	rp->rx_jumbo_pending = 0;
1391	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1392}
1393
1394/*
1395 * Set the Queue Set ring size parameters for the device.  Again, since
1396 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1397 * apply these new values across all of the Queue Sets associated with the
1398 * device -- after vetting them of course!
1399 */
1400static int cxgb4vf_set_ringparam(struct net_device *dev,
1401				 struct ethtool_ringparam *rp)
1402{
1403	const struct port_info *pi = netdev_priv(dev);
1404	struct adapter *adapter = pi->adapter;
1405	struct sge *s = &adapter->sge;
1406	int qs;
1407
1408	if (rp->rx_pending > MAX_RX_BUFFERS ||
1409	    rp->rx_jumbo_pending ||
1410	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1411	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1412	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1413	    rp->rx_pending < MIN_FL_ENTRIES ||
1414	    rp->tx_pending < MIN_TXQ_ENTRIES)
1415		return -EINVAL;
1416
1417	if (adapter->flags & FULL_INIT_DONE)
1418		return -EBUSY;
1419
1420	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1421		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1422		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1423		s->ethtxq[qs].q.size = rp->tx_pending;
1424	}
1425	return 0;
1426}
1427
1428/*
1429 * Return the interrupt holdoff timer and count for the first Queue Set on the
1430 * device.  Our extension ioctl() (the cxgbtool interface) allows the
1431 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1432 */
1433static int cxgb4vf_get_coalesce(struct net_device *dev,
1434				struct ethtool_coalesce *coalesce)
1435{
1436	const struct port_info *pi = netdev_priv(dev);
1437	const struct adapter *adapter = pi->adapter;
1438	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1439
1440	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1441	coalesce->rx_max_coalesced_frames =
1442		((rspq->intr_params & QINTR_CNT_EN)
1443		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1444		 : 0);
1445	return 0;
1446}
1447
1448/*
1449 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1450 * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1451 * the interrupt holdoff timer on any of the device's Queue Sets.
1452 */
1453static int cxgb4vf_set_coalesce(struct net_device *dev,
1454				struct ethtool_coalesce *coalesce)
1455{
1456	const struct port_info *pi = netdev_priv(dev);
1457	struct adapter *adapter = pi->adapter;
1458
1459	return set_rxq_intr_params(adapter,
1460				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1461				   coalesce->rx_coalesce_usecs,
1462				   coalesce->rx_max_coalesced_frames);
1463}
1464
1465/*
1466 * Report current port link pause parameter settings.
1467 */
1468static void cxgb4vf_get_pauseparam(struct net_device *dev,
1469				   struct ethtool_pauseparam *pauseparam)
1470{
1471	struct port_info *pi = netdev_priv(dev);
1472
1473	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1474	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1475	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1476}
1477
1478/*
1479 * Identify the port by blinking the port's LED.
1480 */
1481static int cxgb4vf_phys_id(struct net_device *dev,
1482			   enum ethtool_phys_id_state state)
1483{
1484	unsigned int val;
1485	struct port_info *pi = netdev_priv(dev);
1486
1487	if (state == ETHTOOL_ID_ACTIVE)
1488		val = 0xffff;
1489	else if (state == ETHTOOL_ID_INACTIVE)
1490		val = 0;
1491	else
1492		return -EINVAL;
1493
1494	return t4vf_identify_port(pi->adapter, pi->viid, val);
1495}
1496
1497/*
1498 * Port stats maintained per queue of the port.
1499 */
1500struct queue_port_stats {
1501	u64 tso;
1502	u64 tx_csum;
1503	u64 rx_csum;
1504	u64 vlan_ex;
1505	u64 vlan_ins;
1506	u64 lro_pkts;
1507	u64 lro_merged;
1508};
1509
1510/*
1511 * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1512 * these need to match the order of statistics returned by
1513 * t4vf_get_port_stats().
1514 */
1515static const char stats_strings[][ETH_GSTRING_LEN] = {
1516	/*
1517	 * These must match the layout of the t4vf_port_stats structure.
1518	 */
1519	"TxBroadcastBytes  ",
1520	"TxBroadcastFrames ",
1521	"TxMulticastBytes  ",
1522	"TxMulticastFrames ",
1523	"TxUnicastBytes    ",
1524	"TxUnicastFrames   ",
1525	"TxDroppedFrames   ",
1526	"TxOffloadBytes    ",
1527	"TxOffloadFrames   ",
1528	"RxBroadcastBytes  ",
1529	"RxBroadcastFrames ",
1530	"RxMulticastBytes  ",
1531	"RxMulticastFrames ",
1532	"RxUnicastBytes    ",
1533	"RxUnicastFrames   ",
1534	"RxErrorFrames     ",
1535
1536	/*
1537	 * These are accumulated per-queue statistics and must match the
1538	 * order of the fields in the queue_port_stats structure.
1539	 */
1540	"TSO               ",
1541	"TxCsumOffload     ",
1542	"RxCsumGood        ",
1543	"VLANextractions   ",
1544	"VLANinsertions    ",
1545	"GROPackets        ",
1546	"GROMerged         ",
1547};
1548
1549/*
1550 * Return the number of statistics in the specified statistics set.
1551 */
1552static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1553{
1554	switch (sset) {
1555	case ETH_SS_STATS:
1556		return ARRAY_SIZE(stats_strings);
1557	default:
1558		return -EOPNOTSUPP;
1559	}
1560	/*NOTREACHED*/
1561}
1562
1563/*
1564 * Return the strings for the specified statistics set.
1565 */
1566static void cxgb4vf_get_strings(struct net_device *dev,
1567				u32 sset,
1568				u8 *data)
1569{
1570	switch (sset) {
1571	case ETH_SS_STATS:
1572		memcpy(data, stats_strings, sizeof(stats_strings));
1573		break;
1574	}
1575}
1576
1577/*
1578 * Small utility routine to accumulate queue statistics across the queues of
1579 * a "port".
1580 */
1581static void collect_sge_port_stats(const struct adapter *adapter,
1582				   const struct port_info *pi,
1583				   struct queue_port_stats *stats)
1584{
1585	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1586	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1587	int qs;
1588
1589	memset(stats, 0, sizeof(*stats));
1590	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1591		stats->tso += txq->tso;
1592		stats->tx_csum += txq->tx_cso;
1593		stats->rx_csum += rxq->stats.rx_cso;
1594		stats->vlan_ex += rxq->stats.vlan_ex;
1595		stats->vlan_ins += txq->vlan_ins;
1596		stats->lro_pkts += rxq->stats.lro_pkts;
1597		stats->lro_merged += rxq->stats.lro_merged;
1598	}
1599}
1600
1601/*
1602 * Return the ETH_SS_STATS statistics set.
1603 */
1604static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1605				      struct ethtool_stats *stats,
1606				      u64 *data)
1607{
1608	struct port_info *pi = netdev2pinfo(dev);
1609	struct adapter *adapter = pi->adapter;
1610	int err = t4vf_get_port_stats(adapter, pi->pidx,
1611				      (struct t4vf_port_stats *)data);
1612	if (err)
1613		memset(data, 0, sizeof(struct t4vf_port_stats));
1614
1615	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1616	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1617}
1618
1619/*
1620 * Return the size of our register map.
1621 */
1622static int cxgb4vf_get_regs_len(struct net_device *dev)
1623{
1624	return T4VF_REGMAP_SIZE;
1625}
1626
1627/*
1628 * Dump a block of registers, start to end inclusive, into a buffer.
1629 */
1630static void reg_block_dump(struct adapter *adapter, void *regbuf,
1631			   unsigned int start, unsigned int end)
1632{
1633	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1634
1635	for ( ; start <= end; start += sizeof(u32)) {
1636		/*
1637		 * Avoid reading the Mailbox Control register since that
1638		 * can trigger a Mailbox Ownership Arbitration cycle and
1639		 * interfere with communication with the firmware.
1640		 */
1641		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1642			*bp++ = 0xffff;
1643		else
1644			*bp++ = t4_read_reg(adapter, start);
1645	}
1646}
1647
1648/*
1649 * Copy our entire register map into the provided buffer.
1650 */
1651static void cxgb4vf_get_regs(struct net_device *dev,
1652			     struct ethtool_regs *regs,
1653			     void *regbuf)
1654{
1655	struct adapter *adapter = netdev2adap(dev);
1656
1657	regs->version = mk_adap_vers(adapter);
1658
1659	/*
1660	 * Fill in register buffer with our register map.
1661	 */
1662	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1663
1664	reg_block_dump(adapter, regbuf,
1665		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1666		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1667	reg_block_dump(adapter, regbuf,
1668		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1669		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1670
1671	/* T5 adds new registers in the PL Register map.
1672	 */
1673	reg_block_dump(adapter, regbuf,
1674		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1675		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1676		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1677	reg_block_dump(adapter, regbuf,
1678		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1679		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1680
1681	reg_block_dump(adapter, regbuf,
1682		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1683		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1684}
1685
1686/*
1687 * Report current Wake On LAN settings.
1688 */
1689static void cxgb4vf_get_wol(struct net_device *dev,
1690			    struct ethtool_wolinfo *wol)
1691{
1692	wol->supported = 0;
1693	wol->wolopts = 0;
1694	memset(&wol->sopass, 0, sizeof(wol->sopass));
1695}
1696
1697/*
1698 * TCP Segmentation Offload flags which we support.
1699 */
1700#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1701
1702static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1703	.get_settings		= cxgb4vf_get_settings,
1704	.get_drvinfo		= cxgb4vf_get_drvinfo,
1705	.get_msglevel		= cxgb4vf_get_msglevel,
1706	.set_msglevel		= cxgb4vf_set_msglevel,
1707	.get_ringparam		= cxgb4vf_get_ringparam,
1708	.set_ringparam		= cxgb4vf_set_ringparam,
1709	.get_coalesce		= cxgb4vf_get_coalesce,
1710	.set_coalesce		= cxgb4vf_set_coalesce,
1711	.get_pauseparam		= cxgb4vf_get_pauseparam,
1712	.get_link		= ethtool_op_get_link,
1713	.get_strings		= cxgb4vf_get_strings,
1714	.set_phys_id		= cxgb4vf_phys_id,
1715	.get_sset_count		= cxgb4vf_get_sset_count,
1716	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1717	.get_regs_len		= cxgb4vf_get_regs_len,
1718	.get_regs		= cxgb4vf_get_regs,
1719	.get_wol		= cxgb4vf_get_wol,
1720};
1721
1722/*
1723 * /sys/kernel/debug/cxgb4vf support code and data.
1724 * ================================================
1725 */
1726
1727/*
1728 * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1729 */
1730#define QPL	4
1731
1732static int sge_qinfo_show(struct seq_file *seq, void *v)
1733{
1734	struct adapter *adapter = seq->private;
1735	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1736	int qs, r = (uintptr_t)v - 1;
1737
1738	if (r)
1739		seq_putc(seq, '\n');
1740
1741	#define S3(fmt_spec, s, v) \
1742		do {\
1743			seq_printf(seq, "%-12s", s); \
1744			for (qs = 0; qs < n; ++qs) \
1745				seq_printf(seq, " %16" fmt_spec, v); \
1746			seq_putc(seq, '\n'); \
1747		} while (0)
1748	#define S(s, v)		S3("s", s, v)
1749	#define T(s, v)		S3("u", s, txq[qs].v)
1750	#define R(s, v)		S3("u", s, rxq[qs].v)
1751
1752	if (r < eth_entries) {
1753		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1754		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1755		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1756
1757		S("QType:", "Ethernet");
1758		S("Interface:",
1759		  (rxq[qs].rspq.netdev
1760		   ? rxq[qs].rspq.netdev->name
1761		   : "N/A"));
1762		S3("d", "Port:",
1763		   (rxq[qs].rspq.netdev
1764		    ? ((struct port_info *)
1765		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1766		    : -1));
1767		T("TxQ ID:", q.abs_id);
1768		T("TxQ size:", q.size);
1769		T("TxQ inuse:", q.in_use);
1770		T("TxQ PIdx:", q.pidx);
1771		T("TxQ CIdx:", q.cidx);
1772		R("RspQ ID:", rspq.abs_id);
1773		R("RspQ size:", rspq.size);
1774		R("RspQE size:", rspq.iqe_len);
1775		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1776		S3("u", "Intr pktcnt:",
1777		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1778		R("RspQ CIdx:", rspq.cidx);
1779		R("RspQ Gen:", rspq.gen);
1780		R("FL ID:", fl.abs_id);
1781		R("FL size:", fl.size - MIN_FL_RESID);
1782		R("FL avail:", fl.avail);
1783		R("FL PIdx:", fl.pidx);
1784		R("FL CIdx:", fl.cidx);
1785		return 0;
1786	}
1787
1788	r -= eth_entries;
1789	if (r == 0) {
1790		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1791
1792		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1793		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1794		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1795			   qtimer_val(adapter, evtq));
1796		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1797			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1798		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1799		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1800	} else if (r == 1) {
1801		const struct sge_rspq *intrq = &adapter->sge.intrq;
1802
1803		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1804		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1805		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1806			   qtimer_val(adapter, intrq));
1807		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1808			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1809		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1810		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1811	}
1812
1813	#undef R
1814	#undef T
1815	#undef S
1816	#undef S3
1817
1818	return 0;
1819}
1820
1821/*
1822 * Return the number of "entries" in our "file".  We group the multi-Queue
1823 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1824 *
1825 *     Ethernet RX/TX Queue Sets
1826 *     Firmware Event Queue
1827 *     Forwarded Interrupt Queue (if in MSI mode)
1828 */
1829static int sge_queue_entries(const struct adapter *adapter)
1830{
1831	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1832		((adapter->flags & USING_MSI) != 0);
1833}
1834
1835static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1836{
1837	int entries = sge_queue_entries(seq->private);
1838
1839	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1840}
1841
1842static void sge_queue_stop(struct seq_file *seq, void *v)
1843{
1844}
1845
1846static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1847{
1848	int entries = sge_queue_entries(seq->private);
1849
1850	++*pos;
1851	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1852}
1853
1854static const struct seq_operations sge_qinfo_seq_ops = {
1855	.start = sge_queue_start,
1856	.next  = sge_queue_next,
1857	.stop  = sge_queue_stop,
1858	.show  = sge_qinfo_show
1859};
1860
1861static int sge_qinfo_open(struct inode *inode, struct file *file)
1862{
1863	int res = seq_open(file, &sge_qinfo_seq_ops);
1864
1865	if (!res) {
1866		struct seq_file *seq = file->private_data;
1867		seq->private = inode->i_private;
1868	}
1869	return res;
1870}
1871
1872static const struct file_operations sge_qinfo_debugfs_fops = {
1873	.owner   = THIS_MODULE,
1874	.open    = sge_qinfo_open,
1875	.read    = seq_read,
1876	.llseek  = seq_lseek,
1877	.release = seq_release,
1878};
1879
1880/*
1881 * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1882 */
1883#define QPL	4
1884
1885static int sge_qstats_show(struct seq_file *seq, void *v)
1886{
1887	struct adapter *adapter = seq->private;
1888	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1889	int qs, r = (uintptr_t)v - 1;
1890
1891	if (r)
1892		seq_putc(seq, '\n');
1893
1894	#define S3(fmt, s, v) \
1895		do { \
1896			seq_printf(seq, "%-16s", s); \
1897			for (qs = 0; qs < n; ++qs) \
1898				seq_printf(seq, " %8" fmt, v); \
1899			seq_putc(seq, '\n'); \
1900		} while (0)
1901	#define S(s, v)		S3("s", s, v)
1902
1903	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1904	#define T(s, v)		T3("lu", s, v)
1905
1906	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1907	#define R(s, v)		R3("lu", s, v)
1908
1909	if (r < eth_entries) {
1910		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1911		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1912		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1913
1914		S("QType:", "Ethernet");
1915		S("Interface:",
1916		  (rxq[qs].rspq.netdev
1917		   ? rxq[qs].rspq.netdev->name
1918		   : "N/A"));
1919		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1920		R("RxPackets:", stats.pkts);
1921		R("RxCSO:", stats.rx_cso);
1922		R("VLANxtract:", stats.vlan_ex);
1923		R("LROmerged:", stats.lro_merged);
1924		R("LROpackets:", stats.lro_pkts);
1925		R("RxDrops:", stats.rx_drops);
1926		T("TSO:", tso);
1927		T("TxCSO:", tx_cso);
1928		T("VLANins:", vlan_ins);
1929		T("TxQFull:", q.stops);
1930		T("TxQRestarts:", q.restarts);
1931		T("TxMapErr:", mapping_err);
1932		R("FLAllocErr:", fl.alloc_failed);
1933		R("FLLrgAlcErr:", fl.large_alloc_failed);
1934		R("FLStarving:", fl.starving);
1935		return 0;
1936	}
1937
1938	r -= eth_entries;
1939	if (r == 0) {
1940		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1941
1942		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1943		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1944			   evtq->unhandled_irqs);
1945		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1946		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1947	} else if (r == 1) {
1948		const struct sge_rspq *intrq = &adapter->sge.intrq;
1949
1950		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1951		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1952			   intrq->unhandled_irqs);
1953		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1954		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1955	}
1956
1957	#undef R
1958	#undef T
1959	#undef S
1960	#undef R3
1961	#undef T3
1962	#undef S3
1963
1964	return 0;
1965}
1966
1967/*
1968 * Return the number of "entries" in our "file".  We group the multi-Queue
1969 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1970 *
1971 *     Ethernet RX/TX Queue Sets
1972 *     Firmware Event Queue
1973 *     Forwarded Interrupt Queue (if in MSI mode)
1974 */
1975static int sge_qstats_entries(const struct adapter *adapter)
1976{
1977	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1978		((adapter->flags & USING_MSI) != 0);
1979}
1980
1981static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1982{
1983	int entries = sge_qstats_entries(seq->private);
1984
1985	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1986}
1987
1988static void sge_qstats_stop(struct seq_file *seq, void *v)
1989{
1990}
1991
1992static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1993{
1994	int entries = sge_qstats_entries(seq->private);
1995
1996	(*pos)++;
1997	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1998}
1999
2000static const struct seq_operations sge_qstats_seq_ops = {
2001	.start = sge_qstats_start,
2002	.next  = sge_qstats_next,
2003	.stop  = sge_qstats_stop,
2004	.show  = sge_qstats_show
2005};
2006
2007static int sge_qstats_open(struct inode *inode, struct file *file)
2008{
2009	int res = seq_open(file, &sge_qstats_seq_ops);
2010
2011	if (res == 0) {
2012		struct seq_file *seq = file->private_data;
2013		seq->private = inode->i_private;
2014	}
2015	return res;
2016}
2017
2018static const struct file_operations sge_qstats_proc_fops = {
2019	.owner   = THIS_MODULE,
2020	.open    = sge_qstats_open,
2021	.read    = seq_read,
2022	.llseek  = seq_lseek,
2023	.release = seq_release,
2024};
2025
2026/*
2027 * Show PCI-E SR-IOV Virtual Function Resource Limits.
2028 */
2029static int resources_show(struct seq_file *seq, void *v)
2030{
2031	struct adapter *adapter = seq->private;
2032	struct vf_resources *vfres = &adapter->params.vfres;
2033
2034	#define S(desc, fmt, var) \
2035		seq_printf(seq, "%-60s " fmt "\n", \
2036			   desc " (" #var "):", vfres->var)
2037
2038	S("Virtual Interfaces", "%d", nvi);
2039	S("Egress Queues", "%d", neq);
2040	S("Ethernet Control", "%d", nethctrl);
2041	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2042	S("Ingress Queues", "%d", niq);
2043	S("Traffic Class", "%d", tc);
2044	S("Port Access Rights Mask", "%#x", pmask);
2045	S("MAC Address Filters", "%d", nexactf);
2046	S("Firmware Command Read Capabilities", "%#x", r_caps);
2047	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2048
2049	#undef S
2050
2051	return 0;
2052}
2053
2054static int resources_open(struct inode *inode, struct file *file)
2055{
2056	return single_open(file, resources_show, inode->i_private);
2057}
2058
2059static const struct file_operations resources_proc_fops = {
2060	.owner   = THIS_MODULE,
2061	.open    = resources_open,
2062	.read    = seq_read,
2063	.llseek  = seq_lseek,
2064	.release = single_release,
2065};
2066
2067/*
2068 * Show Virtual Interfaces.
2069 */
2070static int interfaces_show(struct seq_file *seq, void *v)
2071{
2072	if (v == SEQ_START_TOKEN) {
2073		seq_puts(seq, "Interface  Port   VIID\n");
2074	} else {
2075		struct adapter *adapter = seq->private;
2076		int pidx = (uintptr_t)v - 2;
2077		struct net_device *dev = adapter->port[pidx];
2078		struct port_info *pi = netdev_priv(dev);
2079
2080		seq_printf(seq, "%9s  %4d  %#5x\n",
2081			   dev->name, pi->port_id, pi->viid);
2082	}
2083	return 0;
2084}
2085
2086static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2087{
2088	return pos <= adapter->params.nports
2089		? (void *)(uintptr_t)(pos + 1)
2090		: NULL;
2091}
2092
2093static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2094{
2095	return *pos
2096		? interfaces_get_idx(seq->private, *pos)
2097		: SEQ_START_TOKEN;
2098}
2099
2100static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2101{
2102	(*pos)++;
2103	return interfaces_get_idx(seq->private, *pos);
2104}
2105
2106static void interfaces_stop(struct seq_file *seq, void *v)
2107{
2108}
2109
2110static const struct seq_operations interfaces_seq_ops = {
2111	.start = interfaces_start,
2112	.next  = interfaces_next,
2113	.stop  = interfaces_stop,
2114	.show  = interfaces_show
2115};
2116
2117static int interfaces_open(struct inode *inode, struct file *file)
2118{
2119	int res = seq_open(file, &interfaces_seq_ops);
2120
2121	if (res == 0) {
2122		struct seq_file *seq = file->private_data;
2123		seq->private = inode->i_private;
2124	}
2125	return res;
2126}
2127
2128static const struct file_operations interfaces_proc_fops = {
2129	.owner   = THIS_MODULE,
2130	.open    = interfaces_open,
2131	.read    = seq_read,
2132	.llseek  = seq_lseek,
2133	.release = seq_release,
2134};
2135
2136/*
2137 * /sys/kernel/debugfs/cxgb4vf/ files list.
2138 */
2139struct cxgb4vf_debugfs_entry {
2140	const char *name;		/* name of debugfs node */
2141	umode_t mode;			/* file system mode */
2142	const struct file_operations *fops;
2143};
2144
2145static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2146	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2147	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2148	{ "resources",  S_IRUGO, &resources_proc_fops },
2149	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2150};
2151
2152/*
2153 * Module and device initialization and cleanup code.
2154 * ==================================================
2155 */
2156
2157/*
2158 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2159 * directory (debugfs_root) has already been set up.
2160 */
2161static int setup_debugfs(struct adapter *adapter)
2162{
2163	int i;
2164
2165	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2166
2167	/*
2168	 * Debugfs support is best effort.
2169	 */
2170	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2171		(void)debugfs_create_file(debugfs_files[i].name,
2172				  debugfs_files[i].mode,
2173				  adapter->debugfs_root,
2174				  (void *)adapter,
2175				  debugfs_files[i].fops);
2176
2177	return 0;
2178}
2179
2180/*
2181 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2182 * it to our caller to tear down the directory (debugfs_root).
2183 */
2184static void cleanup_debugfs(struct adapter *adapter)
2185{
2186	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2187
2188	/*
2189	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2190	 * individual entries because a call will be made to
2191	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2192	 * persistent state.
2193	 */
2194	/* nothing to do */
2195}
2196
2197/*
2198 * Perform early "adapter" initialization.  This is where we discover what
2199 * adapter parameters we're going to be using and initialize basic adapter
2200 * hardware support.
2201 */
2202static int adap_init0(struct adapter *adapter)
2203{
2204	struct vf_resources *vfres = &adapter->params.vfres;
2205	struct sge_params *sge_params = &adapter->params.sge;
2206	struct sge *s = &adapter->sge;
2207	unsigned int ethqsets;
2208	int err;
2209	u32 param, val = 0;
2210
2211	/*
2212	 * Wait for the device to become ready before proceeding ...
2213	 */
2214	err = t4vf_wait_dev_ready(adapter);
2215	if (err) {
2216		dev_err(adapter->pdev_dev, "device didn't become ready:"
2217			" err=%d\n", err);
2218		return err;
2219	}
2220
2221	/*
2222	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2223	 * 2.6.31 and later we can't call pci_reset_function() in order to
2224	 * issue an FLR because of a self- deadlock on the device semaphore.
2225	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2226	 * cases where they're needed -- for instance, some versions of KVM
2227	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2228	 * use the firmware based reset in order to reset any per function
2229	 * state.
2230	 */
2231	err = t4vf_fw_reset(adapter);
2232	if (err < 0) {
2233		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2234		return err;
2235	}
2236
2237	/*
2238	 * Grab basic operational parameters.  These will predominantly have
2239	 * been set up by the Physical Function Driver or will be hard coded
2240	 * into the adapter.  We just have to live with them ...  Note that
2241	 * we _must_ get our VPD parameters before our SGE parameters because
2242	 * we need to know the adapter's core clock from the VPD in order to
2243	 * properly decode the SGE Timer Values.
2244	 */
2245	err = t4vf_get_dev_params(adapter);
2246	if (err) {
2247		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2248			" device parameters: err=%d\n", err);
2249		return err;
2250	}
2251	err = t4vf_get_vpd_params(adapter);
2252	if (err) {
2253		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2254			" VPD parameters: err=%d\n", err);
2255		return err;
2256	}
2257	err = t4vf_get_sge_params(adapter);
2258	if (err) {
2259		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2260			" SGE parameters: err=%d\n", err);
2261		return err;
2262	}
2263	err = t4vf_get_rss_glb_config(adapter);
2264	if (err) {
2265		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2266			" RSS parameters: err=%d\n", err);
2267		return err;
2268	}
2269	if (adapter->params.rss.mode !=
2270	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2271		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2272			" mode %d\n", adapter->params.rss.mode);
2273		return -EINVAL;
2274	}
2275	err = t4vf_sge_init(adapter);
2276	if (err) {
2277		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2278			" err=%d\n", err);
2279		return err;
2280	}
2281
2282	/* If we're running on newer firmware, let it know that we're
2283	 * prepared to deal with encapsulated CPL messages.  Older
2284	 * firmware won't understand this and we'll just get
2285	 * unencapsulated messages ...
2286	 */
2287	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2288		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2289	val = 1;
2290	(void) t4vf_set_params(adapter, 1, &param, &val);
2291
2292	/*
2293	 * Retrieve our RX interrupt holdoff timer values and counter
2294	 * threshold values from the SGE parameters.
2295	 */
2296	s->timer_val[0] = core_ticks_to_us(adapter,
2297		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2298	s->timer_val[1] = core_ticks_to_us(adapter,
2299		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2300	s->timer_val[2] = core_ticks_to_us(adapter,
2301		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2302	s->timer_val[3] = core_ticks_to_us(adapter,
2303		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2304	s->timer_val[4] = core_ticks_to_us(adapter,
2305		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2306	s->timer_val[5] = core_ticks_to_us(adapter,
2307		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2308
2309	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2310	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2311	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2312	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2313
2314	/*
2315	 * Grab our Virtual Interface resource allocation, extract the
2316	 * features that we're interested in and do a bit of sanity testing on
2317	 * what we discover.
2318	 */
2319	err = t4vf_get_vfres(adapter);
2320	if (err) {
2321		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2322			" resources: err=%d\n", err);
2323		return err;
2324	}
2325
2326	/*
2327	 * The number of "ports" which we support is equal to the number of
2328	 * Virtual Interfaces with which we've been provisioned.
2329	 */
2330	adapter->params.nports = vfres->nvi;
2331	if (adapter->params.nports > MAX_NPORTS) {
2332		dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2333			 " virtual interfaces\n", MAX_NPORTS,
2334			 adapter->params.nports);
2335		adapter->params.nports = MAX_NPORTS;
2336	}
2337
2338	/*
2339	 * We need to reserve a number of the ingress queues with Free List
2340	 * and Interrupt capabilities for special interrupt purposes (like
2341	 * asynchronous firmware messages, or forwarded interrupts if we're
2342	 * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2343	 * matched up one-for-one with Ethernet/Control egress queues in order
2344	 * to form "Queue Sets" which will be aportioned between the "ports".
2345	 * For each Queue Set, we'll need the ability to allocate two Egress
2346	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2347	 * Ethernet Queue.
2348	 */
2349	ethqsets = vfres->niqflint - INGQ_EXTRAS;
2350	if (vfres->nethctrl != ethqsets) {
2351		dev_warn(adapter->pdev_dev, "unequal number of [available]"
2352			 " ingress/egress queues (%d/%d); using minimum for"
2353			 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2354		ethqsets = min(vfres->nethctrl, ethqsets);
2355	}
2356	if (vfres->neq < ethqsets*2) {
2357		dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2358			 " to support Queue Sets (%d); reducing allowed Queue"
2359			 " Sets\n", vfres->neq, ethqsets);
2360		ethqsets = vfres->neq/2;
2361	}
2362	if (ethqsets > MAX_ETH_QSETS) {
2363		dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2364			 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2365		ethqsets = MAX_ETH_QSETS;
2366	}
2367	if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2368		dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2369			 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2370	}
2371	adapter->sge.max_ethqsets = ethqsets;
2372
2373	/*
2374	 * Check for various parameter sanity issues.  Most checks simply
2375	 * result in us using fewer resources than our provissioning but we
2376	 * do need at least  one "port" with which to work ...
2377	 */
2378	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2379		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2380			 " virtual interfaces (too few Queue Sets)\n",
2381			 adapter->sge.max_ethqsets, adapter->params.nports);
2382		adapter->params.nports = adapter->sge.max_ethqsets;
2383	}
2384	if (adapter->params.nports == 0) {
2385		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2386			"usable!\n");
2387		return -EINVAL;
2388	}
2389	return 0;
2390}
2391
2392static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2393			     u8 pkt_cnt_idx, unsigned int size,
2394			     unsigned int iqe_size)
2395{
2396	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2397			     (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2398	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2399			    ? pkt_cnt_idx
2400			    : 0);
2401	rspq->iqe_len = iqe_size;
2402	rspq->size = size;
2403}
2404
2405/*
2406 * Perform default configuration of DMA queues depending on the number and
2407 * type of ports we found and the number of available CPUs.  Most settings can
2408 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2409 * being brought up for the first time.
2410 */
2411static void cfg_queues(struct adapter *adapter)
2412{
2413	struct sge *s = &adapter->sge;
2414	int q10g, n10g, qidx, pidx, qs;
2415	size_t iqe_size;
2416
2417	/*
2418	 * We should not be called till we know how many Queue Sets we can
2419	 * support.  In particular, this means that we need to know what kind
2420	 * of interrupts we'll be using ...
2421	 */
2422	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2423
2424	/*
2425	 * Count the number of 10GbE Virtual Interfaces that we have.
2426	 */
2427	n10g = 0;
2428	for_each_port(adapter, pidx)
2429		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2430
2431	/*
2432	 * We default to 1 queue per non-10G port and up to # of cores queues
2433	 * per 10G port.
2434	 */
2435	if (n10g == 0)
2436		q10g = 0;
2437	else {
2438		int n1g = (adapter->params.nports - n10g);
2439		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2440		if (q10g > num_online_cpus())
2441			q10g = num_online_cpus();
2442	}
2443
2444	/*
2445	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2446	 * The layout will be established in setup_sge_queues() when the
2447	 * adapter is brough up for the first time.
2448	 */
2449	qidx = 0;
2450	for_each_port(adapter, pidx) {
2451		struct port_info *pi = adap2pinfo(adapter, pidx);
2452
2453		pi->first_qset = qidx;
2454		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2455		qidx += pi->nqsets;
2456	}
2457	s->ethqsets = qidx;
2458
2459	/*
2460	 * The Ingress Queue Entry Size for our various Response Queues needs
2461	 * to be big enough to accommodate the largest message we can receive
2462	 * from the chip/firmware; which is 64 bytes ...
2463	 */
2464	iqe_size = 64;
2465
2466	/*
2467	 * Set up default Queue Set parameters ...  Start off with the
2468	 * shortest interrupt holdoff timer.
2469	 */
2470	for (qs = 0; qs < s->max_ethqsets; qs++) {
2471		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2472		struct sge_eth_txq *txq = &s->ethtxq[qs];
2473
2474		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2475		rxq->fl.size = 72;
2476		txq->q.size = 1024;
2477	}
2478
2479	/*
2480	 * The firmware event queue is used for link state changes and
2481	 * notifications of TX DMA completions.
2482	 */
2483	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2484
2485	/*
2486	 * The forwarded interrupt queue is used when we're in MSI interrupt
2487	 * mode.  In this mode all interrupts associated with RX queues will
2488	 * be forwarded to a single queue which we'll associate with our MSI
2489	 * interrupt vector.  The messages dropped in the forwarded interrupt
2490	 * queue will indicate which ingress queue needs servicing ...  This
2491	 * queue needs to be large enough to accommodate all of the ingress
2492	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2493	 * from equalling the CIDX if every ingress queue has an outstanding
2494	 * interrupt).  The queue doesn't need to be any larger because no
2495	 * ingress queue will ever have more than one outstanding interrupt at
2496	 * any time ...
2497	 */
2498	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2499		  iqe_size);
2500}
2501
2502/*
2503 * Reduce the number of Ethernet queues across all ports to at most n.
2504 * n provides at least one queue per port.
2505 */
2506static void reduce_ethqs(struct adapter *adapter, int n)
2507{
2508	int i;
2509	struct port_info *pi;
2510
2511	/*
2512	 * While we have too many active Ether Queue Sets, interate across the
2513	 * "ports" and reduce their individual Queue Set allocations.
2514	 */
2515	BUG_ON(n < adapter->params.nports);
2516	while (n < adapter->sge.ethqsets)
2517		for_each_port(adapter, i) {
2518			pi = adap2pinfo(adapter, i);
2519			if (pi->nqsets > 1) {
2520				pi->nqsets--;
2521				adapter->sge.ethqsets--;
2522				if (adapter->sge.ethqsets <= n)
2523					break;
2524			}
2525		}
2526
2527	/*
2528	 * Reassign the starting Queue Sets for each of the "ports" ...
2529	 */
2530	n = 0;
2531	for_each_port(adapter, i) {
2532		pi = adap2pinfo(adapter, i);
2533		pi->first_qset = n;
2534		n += pi->nqsets;
2535	}
2536}
2537
2538/*
2539 * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2540 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2541 * need.  Minimally we need one for every Virtual Interface plus those needed
2542 * for our "extras".  Note that this process may lower the maximum number of
2543 * allowed Queue Sets ...
2544 */
2545static int enable_msix(struct adapter *adapter)
2546{
2547	int i, want, need, nqsets;
2548	struct msix_entry entries[MSIX_ENTRIES];
2549	struct sge *s = &adapter->sge;
2550
2551	for (i = 0; i < MSIX_ENTRIES; ++i)
2552		entries[i].entry = i;
2553
2554	/*
2555	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2556	 * plus those needed for our "extras" (for example, the firmware
2557	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2558	 * Interface plus those needed for our "extras".  So now we get to see
2559	 * if the song is right ...
2560	 */
2561	want = s->max_ethqsets + MSIX_EXTRAS;
2562	need = adapter->params.nports + MSIX_EXTRAS;
2563
2564	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2565	if (want < 0)
2566		return want;
2567
2568	nqsets = want - MSIX_EXTRAS;
2569	if (nqsets < s->max_ethqsets) {
2570		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2571			 " for %d Queue Sets\n", nqsets);
2572		s->max_ethqsets = nqsets;
2573		if (nqsets < s->ethqsets)
2574			reduce_ethqs(adapter, nqsets);
2575	}
2576	for (i = 0; i < want; ++i)
2577		adapter->msix_info[i].vec = entries[i].vector;
2578
2579	return 0;
2580}
2581
2582static const struct net_device_ops cxgb4vf_netdev_ops	= {
2583	.ndo_open		= cxgb4vf_open,
2584	.ndo_stop		= cxgb4vf_stop,
2585	.ndo_start_xmit		= t4vf_eth_xmit,
2586	.ndo_get_stats		= cxgb4vf_get_stats,
2587	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2588	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2589	.ndo_validate_addr	= eth_validate_addr,
2590	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2591	.ndo_change_mtu		= cxgb4vf_change_mtu,
2592	.ndo_fix_features	= cxgb4vf_fix_features,
2593	.ndo_set_features	= cxgb4vf_set_features,
2594#ifdef CONFIG_NET_POLL_CONTROLLER
2595	.ndo_poll_controller	= cxgb4vf_poll_controller,
2596#endif
2597};
2598
2599/*
2600 * "Probe" a device: initialize a device and construct all kernel and driver
2601 * state needed to manage the device.  This routine is called "init_one" in
2602 * the PF Driver ...
2603 */
2604static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2605			     const struct pci_device_id *ent)
2606{
2607	int pci_using_dac;
2608	int err, pidx;
2609	unsigned int pmask;
2610	struct adapter *adapter;
2611	struct port_info *pi;
2612	struct net_device *netdev;
2613
2614	/*
2615	 * Print our driver banner the first time we're called to initialize a
2616	 * device.
2617	 */
2618	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2619
2620	/*
2621	 * Initialize generic PCI device state.
2622	 */
2623	err = pci_enable_device(pdev);
2624	if (err) {
2625		dev_err(&pdev->dev, "cannot enable PCI device\n");
2626		return err;
2627	}
2628
2629	/*
2630	 * Reserve PCI resources for the device.  If we can't get them some
2631	 * other driver may have already claimed the device ...
2632	 */
2633	err = pci_request_regions(pdev, KBUILD_MODNAME);
2634	if (err) {
2635		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2636		goto err_disable_device;
2637	}
2638
2639	/*
2640	 * Set up our DMA mask: try for 64-bit address masking first and
2641	 * fall back to 32-bit if we can't get 64 bits ...
2642	 */
2643	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2644	if (err == 0) {
2645		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2646		if (err) {
2647			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2648				" coherent allocations\n");
2649			goto err_release_regions;
2650		}
2651		pci_using_dac = 1;
2652	} else {
2653		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2654		if (err != 0) {
2655			dev_err(&pdev->dev, "no usable DMA configuration\n");
2656			goto err_release_regions;
2657		}
2658		pci_using_dac = 0;
2659	}
2660
2661	/*
2662	 * Enable bus mastering for the device ...
2663	 */
2664	pci_set_master(pdev);
2665
2666	/*
2667	 * Allocate our adapter data structure and attach it to the device.
2668	 */
2669	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2670	if (!adapter) {
2671		err = -ENOMEM;
2672		goto err_release_regions;
2673	}
2674	pci_set_drvdata(pdev, adapter);
2675	adapter->pdev = pdev;
2676	adapter->pdev_dev = &pdev->dev;
2677
2678	/*
2679	 * Initialize SMP data synchronization resources.
2680	 */
2681	spin_lock_init(&adapter->stats_lock);
2682
2683	/*
2684	 * Map our I/O registers in BAR0.
2685	 */
2686	adapter->regs = pci_ioremap_bar(pdev, 0);
2687	if (!adapter->regs) {
2688		dev_err(&pdev->dev, "cannot map device registers\n");
2689		err = -ENOMEM;
2690		goto err_free_adapter;
2691	}
2692
2693	/* Wait for the device to become ready before proceeding ...
2694	 */
2695	err = t4vf_prep_adapter(adapter);
2696	if (err) {
2697		dev_err(adapter->pdev_dev, "device didn't become ready:"
2698			" err=%d\n", err);
2699		goto err_unmap_bar0;
2700	}
2701
2702	/* For T5 and later we want to use the new BAR-based User Doorbells,
2703	 * so we need to map BAR2 here ...
2704	 */
2705	if (!is_t4(adapter->params.chip)) {
2706		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2707					   pci_resource_len(pdev, 2));
2708		if (!adapter->bar2) {
2709			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2710			err = -ENOMEM;
2711			goto err_unmap_bar0;
2712		}
2713	}
2714	/*
2715	 * Initialize adapter level features.
2716	 */
2717	adapter->name = pci_name(pdev);
2718	adapter->msg_enable = dflt_msg_enable;
2719	err = adap_init0(adapter);
2720	if (err)
2721		goto err_unmap_bar;
2722
2723	/*
2724	 * Allocate our "adapter ports" and stitch everything together.
2725	 */
2726	pmask = adapter->params.vfres.pmask;
2727	for_each_port(adapter, pidx) {
2728		int port_id, viid;
2729
2730		/*
2731		 * We simplistically allocate our virtual interfaces
2732		 * sequentially across the port numbers to which we have
2733		 * access rights.  This should be configurable in some manner
2734		 * ...
2735		 */
2736		if (pmask == 0)
2737			break;
2738		port_id = ffs(pmask) - 1;
2739		pmask &= ~(1 << port_id);
2740		viid = t4vf_alloc_vi(adapter, port_id);
2741		if (viid < 0) {
2742			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2743				" err=%d\n", port_id, viid);
2744			err = viid;
2745			goto err_free_dev;
2746		}
2747
2748		/*
2749		 * Allocate our network device and stitch things together.
2750		 */
2751		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2752					   MAX_PORT_QSETS);
2753		if (netdev == NULL) {
2754			t4vf_free_vi(adapter, viid);
2755			err = -ENOMEM;
2756			goto err_free_dev;
2757		}
2758		adapter->port[pidx] = netdev;
2759		SET_NETDEV_DEV(netdev, &pdev->dev);
2760		pi = netdev_priv(netdev);
2761		pi->adapter = adapter;
2762		pi->pidx = pidx;
2763		pi->port_id = port_id;
2764		pi->viid = viid;
2765
2766		/*
2767		 * Initialize the starting state of our "port" and register
2768		 * it.
2769		 */
2770		pi->xact_addr_filt = -1;
2771		netif_carrier_off(netdev);
2772		netdev->irq = pdev->irq;
2773
2774		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2775			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2776			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2777		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2778			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2779			NETIF_F_HIGHDMA;
2780		netdev->features = netdev->hw_features |
2781				   NETIF_F_HW_VLAN_CTAG_TX;
2782		if (pci_using_dac)
2783			netdev->features |= NETIF_F_HIGHDMA;
2784
2785		netdev->priv_flags |= IFF_UNICAST_FLT;
2786
2787		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2788		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2789
2790		/*
2791		 * Initialize the hardware/software state for the port.
2792		 */
2793		err = t4vf_port_init(adapter, pidx);
2794		if (err) {
2795			dev_err(&pdev->dev, "cannot initialize port %d\n",
2796				pidx);
2797			goto err_free_dev;
2798		}
2799	}
2800
2801	/*
2802	 * The "card" is now ready to go.  If any errors occur during device
2803	 * registration we do not fail the whole "card" but rather proceed
2804	 * only with the ports we manage to register successfully.  However we
2805	 * must register at least one net device.
2806	 */
2807	for_each_port(adapter, pidx) {
2808		netdev = adapter->port[pidx];
2809		if (netdev == NULL)
2810			continue;
2811
2812		err = register_netdev(netdev);
2813		if (err) {
2814			dev_warn(&pdev->dev, "cannot register net device %s,"
2815				 " skipping\n", netdev->name);
2816			continue;
2817		}
2818
2819		set_bit(pidx, &adapter->registered_device_map);
2820	}
2821	if (adapter->registered_device_map == 0) {
2822		dev_err(&pdev->dev, "could not register any net devices\n");
2823		goto err_free_dev;
2824	}
2825
2826	/*
2827	 * Set up our debugfs entries.
2828	 */
2829	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2830		adapter->debugfs_root =
2831			debugfs_create_dir(pci_name(pdev),
2832					   cxgb4vf_debugfs_root);
2833		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2834			dev_warn(&pdev->dev, "could not create debugfs"
2835				 " directory");
2836		else
2837			setup_debugfs(adapter);
2838	}
2839
2840	/*
2841	 * See what interrupts we'll be using.  If we've been configured to
2842	 * use MSI-X interrupts, try to enable them but fall back to using
2843	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2844	 * get MSI interrupts we bail with the error.
2845	 */
2846	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2847		adapter->flags |= USING_MSIX;
2848	else {
2849		err = pci_enable_msi(pdev);
2850		if (err) {
2851			dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2852				" err=%d\n",
2853				msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2854			goto err_free_debugfs;
2855		}
2856		adapter->flags |= USING_MSI;
2857	}
2858
2859	/*
2860	 * Now that we know how many "ports" we have and what their types are,
2861	 * and how many Queue Sets we can support, we can configure our queue
2862	 * resources.
2863	 */
2864	cfg_queues(adapter);
2865
2866	/*
2867	 * Print a short notice on the existence and configuration of the new
2868	 * VF network device ...
2869	 */
2870	for_each_port(adapter, pidx) {
2871		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2872			 adapter->port[pidx]->name,
2873			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2874			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2875	}
2876
2877	/*
2878	 * Return success!
2879	 */
2880	return 0;
2881
2882	/*
2883	 * Error recovery and exit code.  Unwind state that's been created
2884	 * so far and return the error.
2885	 */
2886
2887err_free_debugfs:
2888	if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2889		cleanup_debugfs(adapter);
2890		debugfs_remove_recursive(adapter->debugfs_root);
2891	}
2892
2893err_free_dev:
2894	for_each_port(adapter, pidx) {
2895		netdev = adapter->port[pidx];
2896		if (netdev == NULL)
2897			continue;
2898		pi = netdev_priv(netdev);
2899		t4vf_free_vi(adapter, pi->viid);
2900		if (test_bit(pidx, &adapter->registered_device_map))
2901			unregister_netdev(netdev);
2902		free_netdev(netdev);
2903	}
2904
2905err_unmap_bar:
2906	if (!is_t4(adapter->params.chip))
2907		iounmap(adapter->bar2);
2908
2909err_unmap_bar0:
2910	iounmap(adapter->regs);
2911
2912err_free_adapter:
2913	kfree(adapter);
2914
2915err_release_regions:
2916	pci_release_regions(pdev);
2917	pci_clear_master(pdev);
2918
2919err_disable_device:
2920	pci_disable_device(pdev);
2921
2922	return err;
2923}
2924
2925/*
2926 * "Remove" a device: tear down all kernel and driver state created in the
2927 * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2928 * that this is called "remove_one" in the PF Driver.)
2929 */
2930static void cxgb4vf_pci_remove(struct pci_dev *pdev)
2931{
2932	struct adapter *adapter = pci_get_drvdata(pdev);
2933
2934	/*
2935	 * Tear down driver state associated with device.
2936	 */
2937	if (adapter) {
2938		int pidx;
2939
2940		/*
2941		 * Stop all of our activity.  Unregister network port,
2942		 * disable interrupts, etc.
2943		 */
2944		for_each_port(adapter, pidx)
2945			if (test_bit(pidx, &adapter->registered_device_map))
2946				unregister_netdev(adapter->port[pidx]);
2947		t4vf_sge_stop(adapter);
2948		if (adapter->flags & USING_MSIX) {
2949			pci_disable_msix(adapter->pdev);
2950			adapter->flags &= ~USING_MSIX;
2951		} else if (adapter->flags & USING_MSI) {
2952			pci_disable_msi(adapter->pdev);
2953			adapter->flags &= ~USING_MSI;
2954		}
2955
2956		/*
2957		 * Tear down our debugfs entries.
2958		 */
2959		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2960			cleanup_debugfs(adapter);
2961			debugfs_remove_recursive(adapter->debugfs_root);
2962		}
2963
2964		/*
2965		 * Free all of the various resources which we've acquired ...
2966		 */
2967		t4vf_free_sge_resources(adapter);
2968		for_each_port(adapter, pidx) {
2969			struct net_device *netdev = adapter->port[pidx];
2970			struct port_info *pi;
2971
2972			if (netdev == NULL)
2973				continue;
2974
2975			pi = netdev_priv(netdev);
2976			t4vf_free_vi(adapter, pi->viid);
2977			free_netdev(netdev);
2978		}
2979		iounmap(adapter->regs);
2980		if (!is_t4(adapter->params.chip))
2981			iounmap(adapter->bar2);
2982		kfree(adapter);
2983	}
2984
2985	/*
2986	 * Disable the device and release its PCI resources.
2987	 */
2988	pci_disable_device(pdev);
2989	pci_clear_master(pdev);
2990	pci_release_regions(pdev);
2991}
2992
2993/*
2994 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2995 * delivery.
2996 */
2997static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2998{
2999	struct adapter *adapter;
3000	int pidx;
3001
3002	adapter = pci_get_drvdata(pdev);
3003	if (!adapter)
3004		return;
3005
3006	/* Disable all Virtual Interfaces.  This will shut down the
3007	 * delivery of all ingress packets into the chip for these
3008	 * Virtual Interfaces.
3009	 */
3010	for_each_port(adapter, pidx)
3011		if (test_bit(pidx, &adapter->registered_device_map))
3012			unregister_netdev(adapter->port[pidx]);
3013
3014	/* Free up all Queues which will prevent further DMA and
3015	 * Interrupts allowing various internal pathways to drain.
3016	 */
3017	t4vf_sge_stop(adapter);
3018	if (adapter->flags & USING_MSIX) {
3019		pci_disable_msix(adapter->pdev);
3020		adapter->flags &= ~USING_MSIX;
3021	} else if (adapter->flags & USING_MSI) {
3022		pci_disable_msi(adapter->pdev);
3023		adapter->flags &= ~USING_MSI;
3024	}
3025
3026	/*
3027	 * Free up all Queues which will prevent further DMA and
3028	 * Interrupts allowing various internal pathways to drain.
3029	 */
3030	t4vf_free_sge_resources(adapter);
3031	pci_set_drvdata(pdev, NULL);
3032}
3033
3034/* Macros needed to support the PCI Device ID Table ...
3035 */
3036#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3037	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3038#define CH_PCI_DEVICE_ID_FUNCTION	0x8
3039
3040#define CH_PCI_ID_TABLE_ENTRY(devid) \
3041		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3042
3043#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3044
3045#include "../cxgb4/t4_pci_id_tbl.h"
3046
3047MODULE_DESCRIPTION(DRV_DESC);
3048MODULE_AUTHOR("Chelsio Communications");
3049MODULE_LICENSE("Dual BSD/GPL");
3050MODULE_VERSION(DRV_VERSION);
3051MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3052
3053static struct pci_driver cxgb4vf_driver = {
3054	.name		= KBUILD_MODNAME,
3055	.id_table	= cxgb4vf_pci_tbl,
3056	.probe		= cxgb4vf_pci_probe,
3057	.remove		= cxgb4vf_pci_remove,
3058	.shutdown	= cxgb4vf_pci_shutdown,
3059};
3060
3061/*
3062 * Initialize global driver state.
3063 */
3064static int __init cxgb4vf_module_init(void)
3065{
3066	int ret;
3067
3068	/*
3069	 * Vet our module parameters.
3070	 */
3071	if (msi != MSI_MSIX && msi != MSI_MSI) {
3072		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3073			msi, MSI_MSIX, MSI_MSI);
3074		return -EINVAL;
3075	}
3076
3077	/* Debugfs support is optional, just warn if this fails */
3078	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3079	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3080		pr_warn("could not create debugfs entry, continuing\n");
3081
3082	ret = pci_register_driver(&cxgb4vf_driver);
3083	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3084		debugfs_remove(cxgb4vf_debugfs_root);
3085	return ret;
3086}
3087
3088/*
3089 * Tear down global driver state.
3090 */
3091static void __exit cxgb4vf_module_exit(void)
3092{
3093	pci_unregister_driver(&cxgb4vf_driver);
3094	debugfs_remove(cxgb4vf_debugfs_root);
3095}
3096
3097module_init(cxgb4vf_module_init);
3098module_exit(cxgb4vf_module_exit);
3099