1/*
2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3 * driver for Linux.
4 *
5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37
38#include <linux/module.h>
39#include <linux/moduleparam.h>
40#include <linux/init.h>
41#include <linux/pci.h>
42#include <linux/dma-mapping.h>
43#include <linux/netdevice.h>
44#include <linux/etherdevice.h>
45#include <linux/debugfs.h>
46#include <linux/ethtool.h>
47#include <linux/mdio.h>
48
49#include "t4vf_common.h"
50#include "t4vf_defs.h"
51
52#include "../cxgb4/t4_regs.h"
53#include "../cxgb4/t4_msg.h"
54
55/*
56 * Generic information about the driver.
57 */
58#define DRV_VERSION "2.0.0-ko"
59#define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
60
61/*
62 * Module Parameters.
63 * ==================
64 */
65
66/*
67 * Default ethtool "message level" for adapters.
68 */
69#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72
73static int dflt_msg_enable = DFLT_MSG_ENABLE;
74
75module_param(dflt_msg_enable, int, 0644);
76MODULE_PARM_DESC(dflt_msg_enable,
77		 "default adapter ethtool message level bitmap");
78
79/*
80 * The driver uses the best interrupt scheme available on a platform in the
81 * order MSI-X then MSI.  This parameter determines which of these schemes the
82 * driver may consider as follows:
83 *
84 *     msi = 2: choose from among MSI-X and MSI
85 *     msi = 1: only consider MSI interrupts
86 *
87 * Note that unlike the Physical Function driver, this Virtual Function driver
88 * does _not_ support legacy INTx interrupts (this limitation is mandated by
89 * the PCI-E SR-IOV standard).
90 */
91#define MSI_MSIX	2
92#define MSI_MSI		1
93#define MSI_DEFAULT	MSI_MSIX
94
95static int msi = MSI_DEFAULT;
96
97module_param(msi, int, 0644);
98MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
99
100/*
101 * Fundamental constants.
102 * ======================
103 */
104
105enum {
106	MAX_TXQ_ENTRIES		= 16384,
107	MAX_RSPQ_ENTRIES	= 16384,
108	MAX_RX_BUFFERS		= 16384,
109
110	MIN_TXQ_ENTRIES		= 32,
111	MIN_RSPQ_ENTRIES	= 128,
112	MIN_FL_ENTRIES		= 16,
113
114	/*
115	 * For purposes of manipulating the Free List size we need to
116	 * recognize that Free Lists are actually Egress Queues (the host
117	 * produces free buffers which the hardware consumes), Egress Queues
118	 * indices are all in units of Egress Context Units bytes, and free
119	 * list entries are 64-bit PCI DMA addresses.  And since the state of
120	 * the Producer Index == the Consumer Index implies an EMPTY list, we
121	 * always have at least one Egress Unit's worth of Free List entries
122	 * unused.  See sge.c for more details ...
123	 */
124	EQ_UNIT = SGE_EQ_IDXSIZE,
125	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
126	MIN_FL_RESID = FL_PER_EQ_UNIT,
127};
128
129/*
130 * Global driver state.
131 * ====================
132 */
133
134static struct dentry *cxgb4vf_debugfs_root;
135
136/*
137 * OS "Callback" functions.
138 * ========================
139 */
140
141/*
142 * The link status has changed on the indicated "port" (Virtual Interface).
143 */
144void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
145{
146	struct net_device *dev = adapter->port[pidx];
147
148	/*
149	 * If the port is disabled or the current recorded "link up"
150	 * status matches the new status, just return.
151	 */
152	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
153		return;
154
155	/*
156	 * Tell the OS that the link status has changed and print a short
157	 * informative message on the console about the event.
158	 */
159	if (link_ok) {
160		const char *s;
161		const char *fc;
162		const struct port_info *pi = netdev_priv(dev);
163
164		netif_carrier_on(dev);
165
166		switch (pi->link_cfg.speed) {
167		case 40000:
168			s = "40Gbps";
169			break;
170
171		case 10000:
172			s = "10Gbps";
173			break;
174
175		case 1000:
176			s = "1000Mbps";
177			break;
178
179		case 100:
180			s = "100Mbps";
181			break;
182
183		default:
184			s = "unknown";
185			break;
186		}
187
188		switch (pi->link_cfg.fc) {
189		case PAUSE_RX:
190			fc = "RX";
191			break;
192
193		case PAUSE_TX:
194			fc = "TX";
195			break;
196
197		case PAUSE_RX|PAUSE_TX:
198			fc = "RX/TX";
199			break;
200
201		default:
202			fc = "no";
203			break;
204		}
205
206		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
207	} else {
208		netif_carrier_off(dev);
209		netdev_info(dev, "link down\n");
210	}
211}
212
213/*
214 * THe port module type has changed on the indicated "port" (Virtual
215 * Interface).
216 */
217void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
218{
219	static const char * const mod_str[] = {
220		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
221	};
222	const struct net_device *dev = adapter->port[pidx];
223	const struct port_info *pi = netdev_priv(dev);
224
225	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
226		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
227			 dev->name);
228	else if (pi->mod_type < ARRAY_SIZE(mod_str))
229		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
230			 dev->name, mod_str[pi->mod_type]);
231	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
232		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
233			 "module inserted\n", dev->name);
234	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
235		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
236			 "forcing TWINAX\n", dev->name);
237	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
238		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
239			 dev->name);
240	else
241		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
242			 "inserted\n", dev->name, pi->mod_type);
243}
244
245/*
246 * Net device operations.
247 * ======================
248 */
249
250
251
252
253/*
254 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
255 * Interface).
256 */
257static int link_start(struct net_device *dev)
258{
259	int ret;
260	struct port_info *pi = netdev_priv(dev);
261
262	/*
263	 * We do not set address filters and promiscuity here, the stack does
264	 * that step explicitly. Enable vlan accel.
265	 */
266	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
267			      true);
268	if (ret == 0) {
269		ret = t4vf_change_mac(pi->adapter, pi->viid,
270				      pi->xact_addr_filt, dev->dev_addr, true);
271		if (ret >= 0) {
272			pi->xact_addr_filt = ret;
273			ret = 0;
274		}
275	}
276
277	/*
278	 * We don't need to actually "start the link" itself since the
279	 * firmware will do that for us when the first Virtual Interface
280	 * is enabled on a port.
281	 */
282	if (ret == 0)
283		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
284	return ret;
285}
286
287/*
288 * Name the MSI-X interrupts.
289 */
290static void name_msix_vecs(struct adapter *adapter)
291{
292	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
293	int pidx;
294
295	/*
296	 * Firmware events.
297	 */
298	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
299		 "%s-FWeventq", adapter->name);
300	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
301
302	/*
303	 * Ethernet queues.
304	 */
305	for_each_port(adapter, pidx) {
306		struct net_device *dev = adapter->port[pidx];
307		const struct port_info *pi = netdev_priv(dev);
308		int qs, msi;
309
310		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
311			snprintf(adapter->msix_info[msi].desc, namelen,
312				 "%s-%d", dev->name, qs);
313			adapter->msix_info[msi].desc[namelen] = 0;
314		}
315	}
316}
317
318/*
319 * Request all of our MSI-X resources.
320 */
321static int request_msix_queue_irqs(struct adapter *adapter)
322{
323	struct sge *s = &adapter->sge;
324	int rxq, msi, err;
325
326	/*
327	 * Firmware events.
328	 */
329	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
330			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
331	if (err)
332		return err;
333
334	/*
335	 * Ethernet queues.
336	 */
337	msi = MSIX_IQFLINT;
338	for_each_ethrxq(s, rxq) {
339		err = request_irq(adapter->msix_info[msi].vec,
340				  t4vf_sge_intr_msix, 0,
341				  adapter->msix_info[msi].desc,
342				  &s->ethrxq[rxq].rspq);
343		if (err)
344			goto err_free_irqs;
345		msi++;
346	}
347	return 0;
348
349err_free_irqs:
350	while (--rxq >= 0)
351		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
352	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
353	return err;
354}
355
356/*
357 * Free our MSI-X resources.
358 */
359static void free_msix_queue_irqs(struct adapter *adapter)
360{
361	struct sge *s = &adapter->sge;
362	int rxq, msi;
363
364	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
365	msi = MSIX_IQFLINT;
366	for_each_ethrxq(s, rxq)
367		free_irq(adapter->msix_info[msi++].vec,
368			 &s->ethrxq[rxq].rspq);
369}
370
371/*
372 * Turn on NAPI and start up interrupts on a response queue.
373 */
374static void qenable(struct sge_rspq *rspq)
375{
376	napi_enable(&rspq->napi);
377
378	/*
379	 * 0-increment the Going To Sleep register to start the timer and
380	 * enable interrupts.
381	 */
382	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
383		     CIDXINC_V(0) |
384		     SEINTARM_V(rspq->intr_params) |
385		     INGRESSQID_V(rspq->cntxt_id));
386}
387
388/*
389 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
390 */
391static void enable_rx(struct adapter *adapter)
392{
393	int rxq;
394	struct sge *s = &adapter->sge;
395
396	for_each_ethrxq(s, rxq)
397		qenable(&s->ethrxq[rxq].rspq);
398	qenable(&s->fw_evtq);
399
400	/*
401	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
402	 * its Going To Sleep register here to get it started.
403	 */
404	if (adapter->flags & USING_MSI)
405		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
406			     CIDXINC_V(0) |
407			     SEINTARM_V(s->intrq.intr_params) |
408			     INGRESSQID_V(s->intrq.cntxt_id));
409
410}
411
412/*
413 * Wait until all NAPI handlers are descheduled.
414 */
415static void quiesce_rx(struct adapter *adapter)
416{
417	struct sge *s = &adapter->sge;
418	int rxq;
419
420	for_each_ethrxq(s, rxq)
421		napi_disable(&s->ethrxq[rxq].rspq.napi);
422	napi_disable(&s->fw_evtq.napi);
423}
424
425/*
426 * Response queue handler for the firmware event queue.
427 */
428static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
429			  const struct pkt_gl *gl)
430{
431	/*
432	 * Extract response opcode and get pointer to CPL message body.
433	 */
434	struct adapter *adapter = rspq->adapter;
435	u8 opcode = ((const struct rss_header *)rsp)->opcode;
436	void *cpl = (void *)(rsp + 1);
437
438	switch (opcode) {
439	case CPL_FW6_MSG: {
440		/*
441		 * We've received an asynchronous message from the firmware.
442		 */
443		const struct cpl_fw6_msg *fw_msg = cpl;
444		if (fw_msg->type == FW6_TYPE_CMD_RPL)
445			t4vf_handle_fw_rpl(adapter, fw_msg->data);
446		break;
447	}
448
449	case CPL_FW4_MSG: {
450		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
451		 */
452		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
453		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
454		if (opcode != CPL_SGE_EGR_UPDATE) {
455			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
456				, opcode);
457			break;
458		}
459		cpl = (void *)p;
460		/*FALLTHROUGH*/
461	}
462
463	case CPL_SGE_EGR_UPDATE: {
464		/*
465		 * We've received an Egress Queue Status Update message.  We
466		 * get these, if the SGE is configured to send these when the
467		 * firmware passes certain points in processing our TX
468		 * Ethernet Queue or if we make an explicit request for one.
469		 * We use these updates to determine when we may need to
470		 * restart a TX Ethernet Queue which was stopped for lack of
471		 * free TX Queue Descriptors ...
472		 */
473		const struct cpl_sge_egr_update *p = cpl;
474		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
475		struct sge *s = &adapter->sge;
476		struct sge_txq *tq;
477		struct sge_eth_txq *txq;
478		unsigned int eq_idx;
479
480		/*
481		 * Perform sanity checking on the Queue ID to make sure it
482		 * really refers to one of our TX Ethernet Egress Queues which
483		 * is active and matches the queue's ID.  None of these error
484		 * conditions should ever happen so we may want to either make
485		 * them fatal and/or conditionalized under DEBUG.
486		 */
487		eq_idx = EQ_IDX(s, qid);
488		if (unlikely(eq_idx >= MAX_EGRQ)) {
489			dev_err(adapter->pdev_dev,
490				"Egress Update QID %d out of range\n", qid);
491			break;
492		}
493		tq = s->egr_map[eq_idx];
494		if (unlikely(tq == NULL)) {
495			dev_err(adapter->pdev_dev,
496				"Egress Update QID %d TXQ=NULL\n", qid);
497			break;
498		}
499		txq = container_of(tq, struct sge_eth_txq, q);
500		if (unlikely(tq->abs_id != qid)) {
501			dev_err(adapter->pdev_dev,
502				"Egress Update QID %d refers to TXQ %d\n",
503				qid, tq->abs_id);
504			break;
505		}
506
507		/*
508		 * Restart a stopped TX Queue which has less than half of its
509		 * TX ring in use ...
510		 */
511		txq->q.restarts++;
512		netif_tx_wake_queue(txq->txq);
513		break;
514	}
515
516	default:
517		dev_err(adapter->pdev_dev,
518			"unexpected CPL %#x on FW event queue\n", opcode);
519	}
520
521	return 0;
522}
523
524/*
525 * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
526 * to use and initializes them.  We support multiple "Queue Sets" per port if
527 * we have MSI-X, otherwise just one queue set per port.
528 */
529static int setup_sge_queues(struct adapter *adapter)
530{
531	struct sge *s = &adapter->sge;
532	int err, pidx, msix;
533
534	/*
535	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
536	 * state.
537	 */
538	bitmap_zero(s->starving_fl, MAX_EGRQ);
539
540	/*
541	 * If we're using MSI interrupt mode we need to set up a "forwarded
542	 * interrupt" queue which we'll set up with our MSI vector.  The rest
543	 * of the ingress queues will be set up to forward their interrupts to
544	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
545	 * the intrq's queue ID as the interrupt forwarding queue for the
546	 * subsequent calls ...
547	 */
548	if (adapter->flags & USING_MSI) {
549		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
550					 adapter->port[0], 0, NULL, NULL);
551		if (err)
552			goto err_free_queues;
553	}
554
555	/*
556	 * Allocate our ingress queue for asynchronous firmware messages.
557	 */
558	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
559				 MSIX_FW, NULL, fwevtq_handler);
560	if (err)
561		goto err_free_queues;
562
563	/*
564	 * Allocate each "port"'s initial Queue Sets.  These can be changed
565	 * later on ... up to the point where any interface on the adapter is
566	 * brought up at which point lots of things get nailed down
567	 * permanently ...
568	 */
569	msix = MSIX_IQFLINT;
570	for_each_port(adapter, pidx) {
571		struct net_device *dev = adapter->port[pidx];
572		struct port_info *pi = netdev_priv(dev);
573		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
574		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
575		int qs;
576
577		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
578			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
579						 dev, msix++,
580						 &rxq->fl, t4vf_ethrx_handler);
581			if (err)
582				goto err_free_queues;
583
584			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
585					     netdev_get_tx_queue(dev, qs),
586					     s->fw_evtq.cntxt_id);
587			if (err)
588				goto err_free_queues;
589
590			rxq->rspq.idx = qs;
591			memset(&rxq->stats, 0, sizeof(rxq->stats));
592		}
593	}
594
595	/*
596	 * Create the reverse mappings for the queues.
597	 */
598	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
599	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
600	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
601	for_each_port(adapter, pidx) {
602		struct net_device *dev = adapter->port[pidx];
603		struct port_info *pi = netdev_priv(dev);
604		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
605		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
606		int qs;
607
608		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
609			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
610			EQ_MAP(s, txq->q.abs_id) = &txq->q;
611
612			/*
613			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
614			 * for Free Lists but since all of the Egress Queues
615			 * (including Free Lists) have Relative Queue IDs
616			 * which are computed as Absolute - Base Queue ID, we
617			 * can synthesize the Absolute Queue IDs for the Free
618			 * Lists.  This is useful for debugging purposes when
619			 * we want to dump Queue Contexts via the PF Driver.
620			 */
621			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
622			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
623		}
624	}
625	return 0;
626
627err_free_queues:
628	t4vf_free_sge_resources(adapter);
629	return err;
630}
631
632/*
633 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
634 * queues.  We configure the RSS CPU lookup table to distribute to the number
635 * of HW receive queues, and the response queue lookup table to narrow that
636 * down to the response queues actually configured for each "port" (Virtual
637 * Interface).  We always configure the RSS mapping for all ports since the
638 * mapping table has plenty of entries.
639 */
640static int setup_rss(struct adapter *adapter)
641{
642	int pidx;
643
644	for_each_port(adapter, pidx) {
645		struct port_info *pi = adap2pinfo(adapter, pidx);
646		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
647		u16 rss[MAX_PORT_QSETS];
648		int qs, err;
649
650		for (qs = 0; qs < pi->nqsets; qs++)
651			rss[qs] = rxq[qs].rspq.abs_id;
652
653		err = t4vf_config_rss_range(adapter, pi->viid,
654					    0, pi->rss_size, rss, pi->nqsets);
655		if (err)
656			return err;
657
658		/*
659		 * Perform Global RSS Mode-specific initialization.
660		 */
661		switch (adapter->params.rss.mode) {
662		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
663			/*
664			 * If Tunnel All Lookup isn't specified in the global
665			 * RSS Configuration, then we need to specify a
666			 * default Ingress Queue for any ingress packets which
667			 * aren't hashed.  We'll use our first ingress queue
668			 * ...
669			 */
670			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
671				union rss_vi_config config;
672				err = t4vf_read_rss_vi_config(adapter,
673							      pi->viid,
674							      &config);
675				if (err)
676					return err;
677				config.basicvirtual.defaultq =
678					rxq[0].rspq.abs_id;
679				err = t4vf_write_rss_vi_config(adapter,
680							       pi->viid,
681							       &config);
682				if (err)
683					return err;
684			}
685			break;
686		}
687	}
688
689	return 0;
690}
691
692/*
693 * Bring the adapter up.  Called whenever we go from no "ports" open to having
694 * one open.  This function performs the actions necessary to make an adapter
695 * operational, such as completing the initialization of HW modules, and
696 * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
697 * this is called "cxgb_up" in the PF Driver.)
698 */
699static int adapter_up(struct adapter *adapter)
700{
701	int err;
702
703	/*
704	 * If this is the first time we've been called, perform basic
705	 * adapter setup.  Once we've done this, many of our adapter
706	 * parameters can no longer be changed ...
707	 */
708	if ((adapter->flags & FULL_INIT_DONE) == 0) {
709		err = setup_sge_queues(adapter);
710		if (err)
711			return err;
712		err = setup_rss(adapter);
713		if (err) {
714			t4vf_free_sge_resources(adapter);
715			return err;
716		}
717
718		if (adapter->flags & USING_MSIX)
719			name_msix_vecs(adapter);
720		adapter->flags |= FULL_INIT_DONE;
721	}
722
723	/*
724	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
725	 */
726	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
727	if (adapter->flags & USING_MSIX)
728		err = request_msix_queue_irqs(adapter);
729	else
730		err = request_irq(adapter->pdev->irq,
731				  t4vf_intr_handler(adapter), 0,
732				  adapter->name, adapter);
733	if (err) {
734		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
735			err);
736		return err;
737	}
738
739	/*
740	 * Enable NAPI ingress processing and return success.
741	 */
742	enable_rx(adapter);
743	t4vf_sge_start(adapter);
744	return 0;
745}
746
747/*
748 * Bring the adapter down.  Called whenever the last "port" (Virtual
749 * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
750 * Driver.)
751 */
752static void adapter_down(struct adapter *adapter)
753{
754	/*
755	 * Free interrupt resources.
756	 */
757	if (adapter->flags & USING_MSIX)
758		free_msix_queue_irqs(adapter);
759	else
760		free_irq(adapter->pdev->irq, adapter);
761
762	/*
763	 * Wait for NAPI handlers to finish.
764	 */
765	quiesce_rx(adapter);
766}
767
768/*
769 * Start up a net device.
770 */
771static int cxgb4vf_open(struct net_device *dev)
772{
773	int err;
774	struct port_info *pi = netdev_priv(dev);
775	struct adapter *adapter = pi->adapter;
776
777	/*
778	 * If this is the first interface that we're opening on the "adapter",
779	 * bring the "adapter" up now.
780	 */
781	if (adapter->open_device_map == 0) {
782		err = adapter_up(adapter);
783		if (err)
784			return err;
785	}
786
787	/*
788	 * Note that this interface is up and start everything up ...
789	 */
790	netif_set_real_num_tx_queues(dev, pi->nqsets);
791	err = netif_set_real_num_rx_queues(dev, pi->nqsets);
792	if (err)
793		goto err_unwind;
794	err = link_start(dev);
795	if (err)
796		goto err_unwind;
797
798	netif_tx_start_all_queues(dev);
799	set_bit(pi->port_id, &adapter->open_device_map);
800	return 0;
801
802err_unwind:
803	if (adapter->open_device_map == 0)
804		adapter_down(adapter);
805	return err;
806}
807
808/*
809 * Shut down a net device.  This routine is called "cxgb_close" in the PF
810 * Driver ...
811 */
812static int cxgb4vf_stop(struct net_device *dev)
813{
814	struct port_info *pi = netdev_priv(dev);
815	struct adapter *adapter = pi->adapter;
816
817	netif_tx_stop_all_queues(dev);
818	netif_carrier_off(dev);
819	t4vf_enable_vi(adapter, pi->viid, false, false);
820	pi->link_cfg.link_ok = 0;
821
822	clear_bit(pi->port_id, &adapter->open_device_map);
823	if (adapter->open_device_map == 0)
824		adapter_down(adapter);
825	return 0;
826}
827
828/*
829 * Translate our basic statistics into the standard "ifconfig" statistics.
830 */
831static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832{
833	struct t4vf_port_stats stats;
834	struct port_info *pi = netdev2pinfo(dev);
835	struct adapter *adapter = pi->adapter;
836	struct net_device_stats *ns = &dev->stats;
837	int err;
838
839	spin_lock(&adapter->stats_lock);
840	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841	spin_unlock(&adapter->stats_lock);
842
843	memset(ns, 0, sizeof(*ns));
844	if (err)
845		return ns;
846
847	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850			  stats.tx_ucast_frames + stats.tx_offload_frames);
851	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852			stats.rx_ucast_bytes);
853	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854			  stats.rx_ucast_frames);
855	ns->multicast = stats.rx_mcast_frames;
856	ns->tx_errors = stats.tx_drop_frames;
857	ns->rx_errors = stats.rx_err_frames;
858
859	return ns;
860}
861
862/*
863 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
864 * at a specified offset within the list, into an array of addrss pointers and
865 * return the number collected.
866 */
867static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
868							const u8 **addr,
869							unsigned int offset,
870							unsigned int maxaddrs)
871{
872	unsigned int index = 0;
873	unsigned int naddr = 0;
874	const struct netdev_hw_addr *ha;
875
876	for_each_dev_addr(dev, ha)
877		if (index++ >= offset) {
878			addr[naddr++] = ha->addr;
879			if (naddr >= maxaddrs)
880				break;
881		}
882	return naddr;
883}
884
885/*
886 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
887 * at a specified offset within the list, into an array of addrss pointers and
888 * return the number collected.
889 */
890static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
891							const u8 **addr,
892							unsigned int offset,
893							unsigned int maxaddrs)
894{
895	unsigned int index = 0;
896	unsigned int naddr = 0;
897	const struct netdev_hw_addr *ha;
898
899	netdev_for_each_mc_addr(ha, dev)
900		if (index++ >= offset) {
901			addr[naddr++] = ha->addr;
902			if (naddr >= maxaddrs)
903				break;
904		}
905	return naddr;
906}
907
908/*
909 * Configure the exact and hash address filters to handle a port's multicast
910 * and secondary unicast MAC addresses.
911 */
912static int set_addr_filters(const struct net_device *dev, bool sleep)
913{
914	u64 mhash = 0;
915	u64 uhash = 0;
916	bool free = true;
917	unsigned int offset, naddr;
918	const u8 *addr[7];
919	int ret;
920	const struct port_info *pi = netdev_priv(dev);
921
922	/* first do the secondary unicast addresses */
923	for (offset = 0; ; offset += naddr) {
924		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
925						     ARRAY_SIZE(addr));
926		if (naddr == 0)
927			break;
928
929		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
930					  naddr, addr, NULL, &uhash, sleep);
931		if (ret < 0)
932			return ret;
933
934		free = false;
935	}
936
937	/* next set up the multicast addresses */
938	for (offset = 0; ; offset += naddr) {
939		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
940						     ARRAY_SIZE(addr));
941		if (naddr == 0)
942			break;
943
944		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
945					  naddr, addr, NULL, &mhash, sleep);
946		if (ret < 0)
947			return ret;
948		free = false;
949	}
950
951	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
952				  uhash | mhash, sleep);
953}
954
955/*
956 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
957 * If @mtu is -1 it is left unchanged.
958 */
959static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
960{
961	int ret;
962	struct port_info *pi = netdev_priv(dev);
963
964	ret = set_addr_filters(dev, sleep_ok);
965	if (ret == 0)
966		ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
967				      (dev->flags & IFF_PROMISC) != 0,
968				      (dev->flags & IFF_ALLMULTI) != 0,
969				      1, -1, sleep_ok);
970	return ret;
971}
972
973/*
974 * Set the current receive modes on the device.
975 */
976static void cxgb4vf_set_rxmode(struct net_device *dev)
977{
978	/* unfortunately we can't return errors to the stack */
979	set_rxmode(dev, -1, false);
980}
981
982/*
983 * Find the entry in the interrupt holdoff timer value array which comes
984 * closest to the specified interrupt holdoff value.
985 */
986static int closest_timer(const struct sge *s, int us)
987{
988	int i, timer_idx = 0, min_delta = INT_MAX;
989
990	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
991		int delta = us - s->timer_val[i];
992		if (delta < 0)
993			delta = -delta;
994		if (delta < min_delta) {
995			min_delta = delta;
996			timer_idx = i;
997		}
998	}
999	return timer_idx;
1000}
1001
1002static int closest_thres(const struct sge *s, int thres)
1003{
1004	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1005
1006	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1007		delta = thres - s->counter_val[i];
1008		if (delta < 0)
1009			delta = -delta;
1010		if (delta < min_delta) {
1011			min_delta = delta;
1012			pktcnt_idx = i;
1013		}
1014	}
1015	return pktcnt_idx;
1016}
1017
1018/*
1019 * Return a queue's interrupt hold-off time in us.  0 means no timer.
1020 */
1021static unsigned int qtimer_val(const struct adapter *adapter,
1022			       const struct sge_rspq *rspq)
1023{
1024	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1025
1026	return timer_idx < SGE_NTIMERS
1027		? adapter->sge.timer_val[timer_idx]
1028		: 0;
1029}
1030
1031/**
1032 *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1033 *	@adapter: the adapter
1034 *	@rspq: the RX response queue
1035 *	@us: the hold-off time in us, or 0 to disable timer
1036 *	@cnt: the hold-off packet count, or 0 to disable counter
1037 *
1038 *	Sets an RX response queue's interrupt hold-off time and packet count.
1039 *	At least one of the two needs to be enabled for the queue to generate
1040 *	interrupts.
1041 */
1042static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1043			       unsigned int us, unsigned int cnt)
1044{
1045	unsigned int timer_idx;
1046
1047	/*
1048	 * If both the interrupt holdoff timer and count are specified as
1049	 * zero, default to a holdoff count of 1 ...
1050	 */
1051	if ((us | cnt) == 0)
1052		cnt = 1;
1053
1054	/*
1055	 * If an interrupt holdoff count has been specified, then find the
1056	 * closest configured holdoff count and use that.  If the response
1057	 * queue has already been created, then update its queue context
1058	 * parameters ...
1059	 */
1060	if (cnt) {
1061		int err;
1062		u32 v, pktcnt_idx;
1063
1064		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1065		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1066			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1067			    FW_PARAMS_PARAM_X_V(
1068					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1069			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1070			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1071			if (err)
1072				return err;
1073		}
1074		rspq->pktcnt_idx = pktcnt_idx;
1075	}
1076
1077	/*
1078	 * Compute the closest holdoff timer index from the supplied holdoff
1079	 * timer value.
1080	 */
1081	timer_idx = (us == 0
1082		     ? SGE_TIMER_RSTRT_CNTR
1083		     : closest_timer(&adapter->sge, us));
1084
1085	/*
1086	 * Update the response queue's interrupt coalescing parameters and
1087	 * return success.
1088	 */
1089	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1090			     QINTR_CNT_EN_V(cnt > 0));
1091	return 0;
1092}
1093
1094/*
1095 * Return a version number to identify the type of adapter.  The scheme is:
1096 * - bits 0..9: chip version
1097 * - bits 10..15: chip revision
1098 */
1099static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1100{
1101	/*
1102	 * Chip version 4, revision 0x3f (cxgb4vf).
1103	 */
1104	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1105}
1106
1107/*
1108 * Execute the specified ioctl command.
1109 */
1110static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1111{
1112	int ret = 0;
1113
1114	switch (cmd) {
1115	    /*
1116	     * The VF Driver doesn't have access to any of the other
1117	     * common Ethernet device ioctl()'s (like reading/writing
1118	     * PHY registers, etc.
1119	     */
1120
1121	default:
1122		ret = -EOPNOTSUPP;
1123		break;
1124	}
1125	return ret;
1126}
1127
1128/*
1129 * Change the device's MTU.
1130 */
1131static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1132{
1133	int ret;
1134	struct port_info *pi = netdev_priv(dev);
1135
1136	/* accommodate SACK */
1137	if (new_mtu < 81)
1138		return -EINVAL;
1139
1140	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1141			      -1, -1, -1, -1, true);
1142	if (!ret)
1143		dev->mtu = new_mtu;
1144	return ret;
1145}
1146
1147static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1148	netdev_features_t features)
1149{
1150	/*
1151	 * Since there is no support for separate rx/tx vlan accel
1152	 * enable/disable make sure tx flag is always in same state as rx.
1153	 */
1154	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1155		features |= NETIF_F_HW_VLAN_CTAG_TX;
1156	else
1157		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1158
1159	return features;
1160}
1161
1162static int cxgb4vf_set_features(struct net_device *dev,
1163	netdev_features_t features)
1164{
1165	struct port_info *pi = netdev_priv(dev);
1166	netdev_features_t changed = dev->features ^ features;
1167
1168	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1169		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1170				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1171
1172	return 0;
1173}
1174
1175/*
1176 * Change the devices MAC address.
1177 */
1178static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1179{
1180	int ret;
1181	struct sockaddr *addr = _addr;
1182	struct port_info *pi = netdev_priv(dev);
1183
1184	if (!is_valid_ether_addr(addr->sa_data))
1185		return -EADDRNOTAVAIL;
1186
1187	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1188			      addr->sa_data, true);
1189	if (ret < 0)
1190		return ret;
1191
1192	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1193	pi->xact_addr_filt = ret;
1194	return 0;
1195}
1196
1197#ifdef CONFIG_NET_POLL_CONTROLLER
1198/*
1199 * Poll all of our receive queues.  This is called outside of normal interrupt
1200 * context.
1201 */
1202static void cxgb4vf_poll_controller(struct net_device *dev)
1203{
1204	struct port_info *pi = netdev_priv(dev);
1205	struct adapter *adapter = pi->adapter;
1206
1207	if (adapter->flags & USING_MSIX) {
1208		struct sge_eth_rxq *rxq;
1209		int nqsets;
1210
1211		rxq = &adapter->sge.ethrxq[pi->first_qset];
1212		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1213			t4vf_sge_intr_msix(0, &rxq->rspq);
1214			rxq++;
1215		}
1216	} else
1217		t4vf_intr_handler(adapter)(0, adapter);
1218}
1219#endif
1220
1221/*
1222 * Ethtool operations.
1223 * ===================
1224 *
1225 * Note that we don't support any ethtool operations which change the physical
1226 * state of the port to which we're linked.
1227 */
1228
1229static unsigned int t4vf_from_fw_linkcaps(enum fw_port_type type,
1230					  unsigned int caps)
1231{
1232	unsigned int v = 0;
1233
1234	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1235	    type == FW_PORT_TYPE_BT_XAUI) {
1236		v |= SUPPORTED_TP;
1237		if (caps & FW_PORT_CAP_SPEED_100M)
1238			v |= SUPPORTED_100baseT_Full;
1239		if (caps & FW_PORT_CAP_SPEED_1G)
1240			v |= SUPPORTED_1000baseT_Full;
1241		if (caps & FW_PORT_CAP_SPEED_10G)
1242			v |= SUPPORTED_10000baseT_Full;
1243	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1244		v |= SUPPORTED_Backplane;
1245		if (caps & FW_PORT_CAP_SPEED_1G)
1246			v |= SUPPORTED_1000baseKX_Full;
1247		if (caps & FW_PORT_CAP_SPEED_10G)
1248			v |= SUPPORTED_10000baseKX4_Full;
1249	} else if (type == FW_PORT_TYPE_KR)
1250		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1251	else if (type == FW_PORT_TYPE_BP_AP)
1252		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1253		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1254	else if (type == FW_PORT_TYPE_BP4_AP)
1255		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1256		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1257		     SUPPORTED_10000baseKX4_Full;
1258	else if (type == FW_PORT_TYPE_FIBER_XFI ||
1259		 type == FW_PORT_TYPE_FIBER_XAUI ||
1260		 type == FW_PORT_TYPE_SFP ||
1261		 type == FW_PORT_TYPE_QSFP_10G ||
1262		 type == FW_PORT_TYPE_QSA) {
1263		v |= SUPPORTED_FIBRE;
1264		if (caps & FW_PORT_CAP_SPEED_1G)
1265			v |= SUPPORTED_1000baseT_Full;
1266		if (caps & FW_PORT_CAP_SPEED_10G)
1267			v |= SUPPORTED_10000baseT_Full;
1268	} else if (type == FW_PORT_TYPE_BP40_BA ||
1269		   type == FW_PORT_TYPE_QSFP) {
1270		v |= SUPPORTED_40000baseSR4_Full;
1271		v |= SUPPORTED_FIBRE;
1272	}
1273
1274	if (caps & FW_PORT_CAP_ANEG)
1275		v |= SUPPORTED_Autoneg;
1276	return v;
1277}
1278
1279static int cxgb4vf_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1280{
1281	const struct port_info *p = netdev_priv(dev);
1282
1283	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1284	    p->port_type == FW_PORT_TYPE_BT_XFI ||
1285	    p->port_type == FW_PORT_TYPE_BT_XAUI)
1286		cmd->port = PORT_TP;
1287	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1288		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1289		cmd->port = PORT_FIBRE;
1290	else if (p->port_type == FW_PORT_TYPE_SFP ||
1291		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
1292		 p->port_type == FW_PORT_TYPE_QSA ||
1293		 p->port_type == FW_PORT_TYPE_QSFP) {
1294		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
1295		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
1296		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
1297		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
1298			cmd->port = PORT_FIBRE;
1299		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1300			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1301			cmd->port = PORT_DA;
1302		else
1303			cmd->port = PORT_OTHER;
1304	} else
1305		cmd->port = PORT_OTHER;
1306
1307	if (p->mdio_addr >= 0) {
1308		cmd->phy_address = p->mdio_addr;
1309		cmd->transceiver = XCVR_EXTERNAL;
1310		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1311			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1312	} else {
1313		cmd->phy_address = 0;  /* not really, but no better option */
1314		cmd->transceiver = XCVR_INTERNAL;
1315		cmd->mdio_support = 0;
1316	}
1317
1318	cmd->supported = t4vf_from_fw_linkcaps(p->port_type,
1319					       p->link_cfg.supported);
1320	cmd->advertising = t4vf_from_fw_linkcaps(p->port_type,
1321					    p->link_cfg.advertising);
1322	ethtool_cmd_speed_set(cmd,
1323			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
1324	cmd->duplex = DUPLEX_FULL;
1325	cmd->autoneg = p->link_cfg.autoneg;
1326	cmd->maxtxpkt = 0;
1327	cmd->maxrxpkt = 0;
1328	return 0;
1329}
1330
1331/*
1332 * Return our driver information.
1333 */
1334static void cxgb4vf_get_drvinfo(struct net_device *dev,
1335				struct ethtool_drvinfo *drvinfo)
1336{
1337	struct adapter *adapter = netdev2adap(dev);
1338
1339	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1340	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1341	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1342		sizeof(drvinfo->bus_info));
1343	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1344		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1345		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1346		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1347		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1348		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1349		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1350		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1351		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1352		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1353}
1354
1355/*
1356 * Return current adapter message level.
1357 */
1358static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1359{
1360	return netdev2adap(dev)->msg_enable;
1361}
1362
1363/*
1364 * Set current adapter message level.
1365 */
1366static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1367{
1368	netdev2adap(dev)->msg_enable = msglevel;
1369}
1370
1371/*
1372 * Return the device's current Queue Set ring size parameters along with the
1373 * allowed maximum values.  Since ethtool doesn't understand the concept of
1374 * multi-queue devices, we just return the current values associated with the
1375 * first Queue Set.
1376 */
1377static void cxgb4vf_get_ringparam(struct net_device *dev,
1378				  struct ethtool_ringparam *rp)
1379{
1380	const struct port_info *pi = netdev_priv(dev);
1381	const struct sge *s = &pi->adapter->sge;
1382
1383	rp->rx_max_pending = MAX_RX_BUFFERS;
1384	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1385	rp->rx_jumbo_max_pending = 0;
1386	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1387
1388	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1389	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1390	rp->rx_jumbo_pending = 0;
1391	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1392}
1393
1394/*
1395 * Set the Queue Set ring size parameters for the device.  Again, since
1396 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1397 * apply these new values across all of the Queue Sets associated with the
1398 * device -- after vetting them of course!
1399 */
1400static int cxgb4vf_set_ringparam(struct net_device *dev,
1401				 struct ethtool_ringparam *rp)
1402{
1403	const struct port_info *pi = netdev_priv(dev);
1404	struct adapter *adapter = pi->adapter;
1405	struct sge *s = &adapter->sge;
1406	int qs;
1407
1408	if (rp->rx_pending > MAX_RX_BUFFERS ||
1409	    rp->rx_jumbo_pending ||
1410	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1411	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1412	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1413	    rp->rx_pending < MIN_FL_ENTRIES ||
1414	    rp->tx_pending < MIN_TXQ_ENTRIES)
1415		return -EINVAL;
1416
1417	if (adapter->flags & FULL_INIT_DONE)
1418		return -EBUSY;
1419
1420	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1421		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1422		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1423		s->ethtxq[qs].q.size = rp->tx_pending;
1424	}
1425	return 0;
1426}
1427
1428/*
1429 * Return the interrupt holdoff timer and count for the first Queue Set on the
1430 * device.  Our extension ioctl() (the cxgbtool interface) allows the
1431 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1432 */
1433static int cxgb4vf_get_coalesce(struct net_device *dev,
1434				struct ethtool_coalesce *coalesce)
1435{
1436	const struct port_info *pi = netdev_priv(dev);
1437	const struct adapter *adapter = pi->adapter;
1438	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1439
1440	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1441	coalesce->rx_max_coalesced_frames =
1442		((rspq->intr_params & QINTR_CNT_EN_F)
1443		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1444		 : 0);
1445	return 0;
1446}
1447
1448/*
1449 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1450 * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1451 * the interrupt holdoff timer on any of the device's Queue Sets.
1452 */
1453static int cxgb4vf_set_coalesce(struct net_device *dev,
1454				struct ethtool_coalesce *coalesce)
1455{
1456	const struct port_info *pi = netdev_priv(dev);
1457	struct adapter *adapter = pi->adapter;
1458
1459	return set_rxq_intr_params(adapter,
1460				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1461				   coalesce->rx_coalesce_usecs,
1462				   coalesce->rx_max_coalesced_frames);
1463}
1464
1465/*
1466 * Report current port link pause parameter settings.
1467 */
1468static void cxgb4vf_get_pauseparam(struct net_device *dev,
1469				   struct ethtool_pauseparam *pauseparam)
1470{
1471	struct port_info *pi = netdev_priv(dev);
1472
1473	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1474	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1475	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1476}
1477
1478/*
1479 * Identify the port by blinking the port's LED.
1480 */
1481static int cxgb4vf_phys_id(struct net_device *dev,
1482			   enum ethtool_phys_id_state state)
1483{
1484	unsigned int val;
1485	struct port_info *pi = netdev_priv(dev);
1486
1487	if (state == ETHTOOL_ID_ACTIVE)
1488		val = 0xffff;
1489	else if (state == ETHTOOL_ID_INACTIVE)
1490		val = 0;
1491	else
1492		return -EINVAL;
1493
1494	return t4vf_identify_port(pi->adapter, pi->viid, val);
1495}
1496
1497/*
1498 * Port stats maintained per queue of the port.
1499 */
1500struct queue_port_stats {
1501	u64 tso;
1502	u64 tx_csum;
1503	u64 rx_csum;
1504	u64 vlan_ex;
1505	u64 vlan_ins;
1506	u64 lro_pkts;
1507	u64 lro_merged;
1508};
1509
1510/*
1511 * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1512 * these need to match the order of statistics returned by
1513 * t4vf_get_port_stats().
1514 */
1515static const char stats_strings[][ETH_GSTRING_LEN] = {
1516	/*
1517	 * These must match the layout of the t4vf_port_stats structure.
1518	 */
1519	"TxBroadcastBytes  ",
1520	"TxBroadcastFrames ",
1521	"TxMulticastBytes  ",
1522	"TxMulticastFrames ",
1523	"TxUnicastBytes    ",
1524	"TxUnicastFrames   ",
1525	"TxDroppedFrames   ",
1526	"TxOffloadBytes    ",
1527	"TxOffloadFrames   ",
1528	"RxBroadcastBytes  ",
1529	"RxBroadcastFrames ",
1530	"RxMulticastBytes  ",
1531	"RxMulticastFrames ",
1532	"RxUnicastBytes    ",
1533	"RxUnicastFrames   ",
1534	"RxErrorFrames     ",
1535
1536	/*
1537	 * These are accumulated per-queue statistics and must match the
1538	 * order of the fields in the queue_port_stats structure.
1539	 */
1540	"TSO               ",
1541	"TxCsumOffload     ",
1542	"RxCsumGood        ",
1543	"VLANextractions   ",
1544	"VLANinsertions    ",
1545	"GROPackets        ",
1546	"GROMerged         ",
1547};
1548
1549/*
1550 * Return the number of statistics in the specified statistics set.
1551 */
1552static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1553{
1554	switch (sset) {
1555	case ETH_SS_STATS:
1556		return ARRAY_SIZE(stats_strings);
1557	default:
1558		return -EOPNOTSUPP;
1559	}
1560	/*NOTREACHED*/
1561}
1562
1563/*
1564 * Return the strings for the specified statistics set.
1565 */
1566static void cxgb4vf_get_strings(struct net_device *dev,
1567				u32 sset,
1568				u8 *data)
1569{
1570	switch (sset) {
1571	case ETH_SS_STATS:
1572		memcpy(data, stats_strings, sizeof(stats_strings));
1573		break;
1574	}
1575}
1576
1577/*
1578 * Small utility routine to accumulate queue statistics across the queues of
1579 * a "port".
1580 */
1581static void collect_sge_port_stats(const struct adapter *adapter,
1582				   const struct port_info *pi,
1583				   struct queue_port_stats *stats)
1584{
1585	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1586	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1587	int qs;
1588
1589	memset(stats, 0, sizeof(*stats));
1590	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1591		stats->tso += txq->tso;
1592		stats->tx_csum += txq->tx_cso;
1593		stats->rx_csum += rxq->stats.rx_cso;
1594		stats->vlan_ex += rxq->stats.vlan_ex;
1595		stats->vlan_ins += txq->vlan_ins;
1596		stats->lro_pkts += rxq->stats.lro_pkts;
1597		stats->lro_merged += rxq->stats.lro_merged;
1598	}
1599}
1600
1601/*
1602 * Return the ETH_SS_STATS statistics set.
1603 */
1604static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1605				      struct ethtool_stats *stats,
1606				      u64 *data)
1607{
1608	struct port_info *pi = netdev2pinfo(dev);
1609	struct adapter *adapter = pi->adapter;
1610	int err = t4vf_get_port_stats(adapter, pi->pidx,
1611				      (struct t4vf_port_stats *)data);
1612	if (err)
1613		memset(data, 0, sizeof(struct t4vf_port_stats));
1614
1615	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1616	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1617}
1618
1619/*
1620 * Return the size of our register map.
1621 */
1622static int cxgb4vf_get_regs_len(struct net_device *dev)
1623{
1624	return T4VF_REGMAP_SIZE;
1625}
1626
1627/*
1628 * Dump a block of registers, start to end inclusive, into a buffer.
1629 */
1630static void reg_block_dump(struct adapter *adapter, void *regbuf,
1631			   unsigned int start, unsigned int end)
1632{
1633	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1634
1635	for ( ; start <= end; start += sizeof(u32)) {
1636		/*
1637		 * Avoid reading the Mailbox Control register since that
1638		 * can trigger a Mailbox Ownership Arbitration cycle and
1639		 * interfere with communication with the firmware.
1640		 */
1641		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1642			*bp++ = 0xffff;
1643		else
1644			*bp++ = t4_read_reg(adapter, start);
1645	}
1646}
1647
1648/*
1649 * Copy our entire register map into the provided buffer.
1650 */
1651static void cxgb4vf_get_regs(struct net_device *dev,
1652			     struct ethtool_regs *regs,
1653			     void *regbuf)
1654{
1655	struct adapter *adapter = netdev2adap(dev);
1656
1657	regs->version = mk_adap_vers(adapter);
1658
1659	/*
1660	 * Fill in register buffer with our register map.
1661	 */
1662	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1663
1664	reg_block_dump(adapter, regbuf,
1665		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1666		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1667	reg_block_dump(adapter, regbuf,
1668		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1669		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1670
1671	/* T5 adds new registers in the PL Register map.
1672	 */
1673	reg_block_dump(adapter, regbuf,
1674		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1675		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1676		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1677	reg_block_dump(adapter, regbuf,
1678		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1679		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1680
1681	reg_block_dump(adapter, regbuf,
1682		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1683		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1684}
1685
1686/*
1687 * Report current Wake On LAN settings.
1688 */
1689static void cxgb4vf_get_wol(struct net_device *dev,
1690			    struct ethtool_wolinfo *wol)
1691{
1692	wol->supported = 0;
1693	wol->wolopts = 0;
1694	memset(&wol->sopass, 0, sizeof(wol->sopass));
1695}
1696
1697/*
1698 * TCP Segmentation Offload flags which we support.
1699 */
1700#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1701
1702static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1703	.get_settings		= cxgb4vf_get_settings,
1704	.get_drvinfo		= cxgb4vf_get_drvinfo,
1705	.get_msglevel		= cxgb4vf_get_msglevel,
1706	.set_msglevel		= cxgb4vf_set_msglevel,
1707	.get_ringparam		= cxgb4vf_get_ringparam,
1708	.set_ringparam		= cxgb4vf_set_ringparam,
1709	.get_coalesce		= cxgb4vf_get_coalesce,
1710	.set_coalesce		= cxgb4vf_set_coalesce,
1711	.get_pauseparam		= cxgb4vf_get_pauseparam,
1712	.get_link		= ethtool_op_get_link,
1713	.get_strings		= cxgb4vf_get_strings,
1714	.set_phys_id		= cxgb4vf_phys_id,
1715	.get_sset_count		= cxgb4vf_get_sset_count,
1716	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1717	.get_regs_len		= cxgb4vf_get_regs_len,
1718	.get_regs		= cxgb4vf_get_regs,
1719	.get_wol		= cxgb4vf_get_wol,
1720};
1721
1722/*
1723 * /sys/kernel/debug/cxgb4vf support code and data.
1724 * ================================================
1725 */
1726
1727/*
1728 * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1729 */
1730#define QPL	4
1731
1732static int sge_qinfo_show(struct seq_file *seq, void *v)
1733{
1734	struct adapter *adapter = seq->private;
1735	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1736	int qs, r = (uintptr_t)v - 1;
1737
1738	if (r)
1739		seq_putc(seq, '\n');
1740
1741	#define S3(fmt_spec, s, v) \
1742		do {\
1743			seq_printf(seq, "%-12s", s); \
1744			for (qs = 0; qs < n; ++qs) \
1745				seq_printf(seq, " %16" fmt_spec, v); \
1746			seq_putc(seq, '\n'); \
1747		} while (0)
1748	#define S(s, v)		S3("s", s, v)
1749	#define T(s, v)		S3("u", s, txq[qs].v)
1750	#define R(s, v)		S3("u", s, rxq[qs].v)
1751
1752	if (r < eth_entries) {
1753		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1754		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1755		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1756
1757		S("QType:", "Ethernet");
1758		S("Interface:",
1759		  (rxq[qs].rspq.netdev
1760		   ? rxq[qs].rspq.netdev->name
1761		   : "N/A"));
1762		S3("d", "Port:",
1763		   (rxq[qs].rspq.netdev
1764		    ? ((struct port_info *)
1765		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1766		    : -1));
1767		T("TxQ ID:", q.abs_id);
1768		T("TxQ size:", q.size);
1769		T("TxQ inuse:", q.in_use);
1770		T("TxQ PIdx:", q.pidx);
1771		T("TxQ CIdx:", q.cidx);
1772		R("RspQ ID:", rspq.abs_id);
1773		R("RspQ size:", rspq.size);
1774		R("RspQE size:", rspq.iqe_len);
1775		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1776		S3("u", "Intr pktcnt:",
1777		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1778		R("RspQ CIdx:", rspq.cidx);
1779		R("RspQ Gen:", rspq.gen);
1780		R("FL ID:", fl.abs_id);
1781		R("FL size:", fl.size - MIN_FL_RESID);
1782		R("FL avail:", fl.avail);
1783		R("FL PIdx:", fl.pidx);
1784		R("FL CIdx:", fl.cidx);
1785		return 0;
1786	}
1787
1788	r -= eth_entries;
1789	if (r == 0) {
1790		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1791
1792		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1793		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1794		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1795			   qtimer_val(adapter, evtq));
1796		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1797			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1798		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1799		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1800	} else if (r == 1) {
1801		const struct sge_rspq *intrq = &adapter->sge.intrq;
1802
1803		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1804		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1805		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1806			   qtimer_val(adapter, intrq));
1807		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1808			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1809		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1810		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1811	}
1812
1813	#undef R
1814	#undef T
1815	#undef S
1816	#undef S3
1817
1818	return 0;
1819}
1820
1821/*
1822 * Return the number of "entries" in our "file".  We group the multi-Queue
1823 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1824 *
1825 *     Ethernet RX/TX Queue Sets
1826 *     Firmware Event Queue
1827 *     Forwarded Interrupt Queue (if in MSI mode)
1828 */
1829static int sge_queue_entries(const struct adapter *adapter)
1830{
1831	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1832		((adapter->flags & USING_MSI) != 0);
1833}
1834
1835static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1836{
1837	int entries = sge_queue_entries(seq->private);
1838
1839	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1840}
1841
1842static void sge_queue_stop(struct seq_file *seq, void *v)
1843{
1844}
1845
1846static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1847{
1848	int entries = sge_queue_entries(seq->private);
1849
1850	++*pos;
1851	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1852}
1853
1854static const struct seq_operations sge_qinfo_seq_ops = {
1855	.start = sge_queue_start,
1856	.next  = sge_queue_next,
1857	.stop  = sge_queue_stop,
1858	.show  = sge_qinfo_show
1859};
1860
1861static int sge_qinfo_open(struct inode *inode, struct file *file)
1862{
1863	int res = seq_open(file, &sge_qinfo_seq_ops);
1864
1865	if (!res) {
1866		struct seq_file *seq = file->private_data;
1867		seq->private = inode->i_private;
1868	}
1869	return res;
1870}
1871
1872static const struct file_operations sge_qinfo_debugfs_fops = {
1873	.owner   = THIS_MODULE,
1874	.open    = sge_qinfo_open,
1875	.read    = seq_read,
1876	.llseek  = seq_lseek,
1877	.release = seq_release,
1878};
1879
1880/*
1881 * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1882 */
1883#define QPL	4
1884
1885static int sge_qstats_show(struct seq_file *seq, void *v)
1886{
1887	struct adapter *adapter = seq->private;
1888	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1889	int qs, r = (uintptr_t)v - 1;
1890
1891	if (r)
1892		seq_putc(seq, '\n');
1893
1894	#define S3(fmt, s, v) \
1895		do { \
1896			seq_printf(seq, "%-16s", s); \
1897			for (qs = 0; qs < n; ++qs) \
1898				seq_printf(seq, " %8" fmt, v); \
1899			seq_putc(seq, '\n'); \
1900		} while (0)
1901	#define S(s, v)		S3("s", s, v)
1902
1903	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1904	#define T(s, v)		T3("lu", s, v)
1905
1906	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1907	#define R(s, v)		R3("lu", s, v)
1908
1909	if (r < eth_entries) {
1910		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1911		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1912		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1913
1914		S("QType:", "Ethernet");
1915		S("Interface:",
1916		  (rxq[qs].rspq.netdev
1917		   ? rxq[qs].rspq.netdev->name
1918		   : "N/A"));
1919		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1920		R("RxPackets:", stats.pkts);
1921		R("RxCSO:", stats.rx_cso);
1922		R("VLANxtract:", stats.vlan_ex);
1923		R("LROmerged:", stats.lro_merged);
1924		R("LROpackets:", stats.lro_pkts);
1925		R("RxDrops:", stats.rx_drops);
1926		T("TSO:", tso);
1927		T("TxCSO:", tx_cso);
1928		T("VLANins:", vlan_ins);
1929		T("TxQFull:", q.stops);
1930		T("TxQRestarts:", q.restarts);
1931		T("TxMapErr:", mapping_err);
1932		R("FLAllocErr:", fl.alloc_failed);
1933		R("FLLrgAlcErr:", fl.large_alloc_failed);
1934		R("FLStarving:", fl.starving);
1935		return 0;
1936	}
1937
1938	r -= eth_entries;
1939	if (r == 0) {
1940		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1941
1942		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1943		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1944			   evtq->unhandled_irqs);
1945		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1946		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1947	} else if (r == 1) {
1948		const struct sge_rspq *intrq = &adapter->sge.intrq;
1949
1950		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1951		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1952			   intrq->unhandled_irqs);
1953		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1954		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1955	}
1956
1957	#undef R
1958	#undef T
1959	#undef S
1960	#undef R3
1961	#undef T3
1962	#undef S3
1963
1964	return 0;
1965}
1966
1967/*
1968 * Return the number of "entries" in our "file".  We group the multi-Queue
1969 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1970 *
1971 *     Ethernet RX/TX Queue Sets
1972 *     Firmware Event Queue
1973 *     Forwarded Interrupt Queue (if in MSI mode)
1974 */
1975static int sge_qstats_entries(const struct adapter *adapter)
1976{
1977	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1978		((adapter->flags & USING_MSI) != 0);
1979}
1980
1981static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1982{
1983	int entries = sge_qstats_entries(seq->private);
1984
1985	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1986}
1987
1988static void sge_qstats_stop(struct seq_file *seq, void *v)
1989{
1990}
1991
1992static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1993{
1994	int entries = sge_qstats_entries(seq->private);
1995
1996	(*pos)++;
1997	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1998}
1999
2000static const struct seq_operations sge_qstats_seq_ops = {
2001	.start = sge_qstats_start,
2002	.next  = sge_qstats_next,
2003	.stop  = sge_qstats_stop,
2004	.show  = sge_qstats_show
2005};
2006
2007static int sge_qstats_open(struct inode *inode, struct file *file)
2008{
2009	int res = seq_open(file, &sge_qstats_seq_ops);
2010
2011	if (res == 0) {
2012		struct seq_file *seq = file->private_data;
2013		seq->private = inode->i_private;
2014	}
2015	return res;
2016}
2017
2018static const struct file_operations sge_qstats_proc_fops = {
2019	.owner   = THIS_MODULE,
2020	.open    = sge_qstats_open,
2021	.read    = seq_read,
2022	.llseek  = seq_lseek,
2023	.release = seq_release,
2024};
2025
2026/*
2027 * Show PCI-E SR-IOV Virtual Function Resource Limits.
2028 */
2029static int resources_show(struct seq_file *seq, void *v)
2030{
2031	struct adapter *adapter = seq->private;
2032	struct vf_resources *vfres = &adapter->params.vfres;
2033
2034	#define S(desc, fmt, var) \
2035		seq_printf(seq, "%-60s " fmt "\n", \
2036			   desc " (" #var "):", vfres->var)
2037
2038	S("Virtual Interfaces", "%d", nvi);
2039	S("Egress Queues", "%d", neq);
2040	S("Ethernet Control", "%d", nethctrl);
2041	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2042	S("Ingress Queues", "%d", niq);
2043	S("Traffic Class", "%d", tc);
2044	S("Port Access Rights Mask", "%#x", pmask);
2045	S("MAC Address Filters", "%d", nexactf);
2046	S("Firmware Command Read Capabilities", "%#x", r_caps);
2047	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2048
2049	#undef S
2050
2051	return 0;
2052}
2053
2054static int resources_open(struct inode *inode, struct file *file)
2055{
2056	return single_open(file, resources_show, inode->i_private);
2057}
2058
2059static const struct file_operations resources_proc_fops = {
2060	.owner   = THIS_MODULE,
2061	.open    = resources_open,
2062	.read    = seq_read,
2063	.llseek  = seq_lseek,
2064	.release = single_release,
2065};
2066
2067/*
2068 * Show Virtual Interfaces.
2069 */
2070static int interfaces_show(struct seq_file *seq, void *v)
2071{
2072	if (v == SEQ_START_TOKEN) {
2073		seq_puts(seq, "Interface  Port   VIID\n");
2074	} else {
2075		struct adapter *adapter = seq->private;
2076		int pidx = (uintptr_t)v - 2;
2077		struct net_device *dev = adapter->port[pidx];
2078		struct port_info *pi = netdev_priv(dev);
2079
2080		seq_printf(seq, "%9s  %4d  %#5x\n",
2081			   dev->name, pi->port_id, pi->viid);
2082	}
2083	return 0;
2084}
2085
2086static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2087{
2088	return pos <= adapter->params.nports
2089		? (void *)(uintptr_t)(pos + 1)
2090		: NULL;
2091}
2092
2093static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2094{
2095	return *pos
2096		? interfaces_get_idx(seq->private, *pos)
2097		: SEQ_START_TOKEN;
2098}
2099
2100static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2101{
2102	(*pos)++;
2103	return interfaces_get_idx(seq->private, *pos);
2104}
2105
2106static void interfaces_stop(struct seq_file *seq, void *v)
2107{
2108}
2109
2110static const struct seq_operations interfaces_seq_ops = {
2111	.start = interfaces_start,
2112	.next  = interfaces_next,
2113	.stop  = interfaces_stop,
2114	.show  = interfaces_show
2115};
2116
2117static int interfaces_open(struct inode *inode, struct file *file)
2118{
2119	int res = seq_open(file, &interfaces_seq_ops);
2120
2121	if (res == 0) {
2122		struct seq_file *seq = file->private_data;
2123		seq->private = inode->i_private;
2124	}
2125	return res;
2126}
2127
2128static const struct file_operations interfaces_proc_fops = {
2129	.owner   = THIS_MODULE,
2130	.open    = interfaces_open,
2131	.read    = seq_read,
2132	.llseek  = seq_lseek,
2133	.release = seq_release,
2134};
2135
2136/*
2137 * /sys/kernel/debugfs/cxgb4vf/ files list.
2138 */
2139struct cxgb4vf_debugfs_entry {
2140	const char *name;		/* name of debugfs node */
2141	umode_t mode;			/* file system mode */
2142	const struct file_operations *fops;
2143};
2144
2145static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2146	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2147	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2148	{ "resources",  S_IRUGO, &resources_proc_fops },
2149	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2150};
2151
2152/*
2153 * Module and device initialization and cleanup code.
2154 * ==================================================
2155 */
2156
2157/*
2158 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2159 * directory (debugfs_root) has already been set up.
2160 */
2161static int setup_debugfs(struct adapter *adapter)
2162{
2163	int i;
2164
2165	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2166
2167	/*
2168	 * Debugfs support is best effort.
2169	 */
2170	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2171		(void)debugfs_create_file(debugfs_files[i].name,
2172				  debugfs_files[i].mode,
2173				  adapter->debugfs_root,
2174				  (void *)adapter,
2175				  debugfs_files[i].fops);
2176
2177	return 0;
2178}
2179
2180/*
2181 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2182 * it to our caller to tear down the directory (debugfs_root).
2183 */
2184static void cleanup_debugfs(struct adapter *adapter)
2185{
2186	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2187
2188	/*
2189	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2190	 * individual entries because a call will be made to
2191	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2192	 * persistent state.
2193	 */
2194	/* nothing to do */
2195}
2196
2197/*
2198 * Perform early "adapter" initialization.  This is where we discover what
2199 * adapter parameters we're going to be using and initialize basic adapter
2200 * hardware support.
2201 */
2202static int adap_init0(struct adapter *adapter)
2203{
2204	struct vf_resources *vfres = &adapter->params.vfres;
2205	struct sge_params *sge_params = &adapter->params.sge;
2206	struct sge *s = &adapter->sge;
2207	unsigned int ethqsets;
2208	int err;
2209	u32 param, val = 0;
2210
2211	/*
2212	 * Wait for the device to become ready before proceeding ...
2213	 */
2214	err = t4vf_wait_dev_ready(adapter);
2215	if (err) {
2216		dev_err(adapter->pdev_dev, "device didn't become ready:"
2217			" err=%d\n", err);
2218		return err;
2219	}
2220
2221	/*
2222	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2223	 * 2.6.31 and later we can't call pci_reset_function() in order to
2224	 * issue an FLR because of a self- deadlock on the device semaphore.
2225	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2226	 * cases where they're needed -- for instance, some versions of KVM
2227	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2228	 * use the firmware based reset in order to reset any per function
2229	 * state.
2230	 */
2231	err = t4vf_fw_reset(adapter);
2232	if (err < 0) {
2233		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2234		return err;
2235	}
2236
2237	/*
2238	 * Grab basic operational parameters.  These will predominantly have
2239	 * been set up by the Physical Function Driver or will be hard coded
2240	 * into the adapter.  We just have to live with them ...  Note that
2241	 * we _must_ get our VPD parameters before our SGE parameters because
2242	 * we need to know the adapter's core clock from the VPD in order to
2243	 * properly decode the SGE Timer Values.
2244	 */
2245	err = t4vf_get_dev_params(adapter);
2246	if (err) {
2247		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2248			" device parameters: err=%d\n", err);
2249		return err;
2250	}
2251	err = t4vf_get_vpd_params(adapter);
2252	if (err) {
2253		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2254			" VPD parameters: err=%d\n", err);
2255		return err;
2256	}
2257	err = t4vf_get_sge_params(adapter);
2258	if (err) {
2259		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2260			" SGE parameters: err=%d\n", err);
2261		return err;
2262	}
2263	err = t4vf_get_rss_glb_config(adapter);
2264	if (err) {
2265		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2266			" RSS parameters: err=%d\n", err);
2267		return err;
2268	}
2269	if (adapter->params.rss.mode !=
2270	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2271		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2272			" mode %d\n", adapter->params.rss.mode);
2273		return -EINVAL;
2274	}
2275	err = t4vf_sge_init(adapter);
2276	if (err) {
2277		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2278			" err=%d\n", err);
2279		return err;
2280	}
2281
2282	/* If we're running on newer firmware, let it know that we're
2283	 * prepared to deal with encapsulated CPL messages.  Older
2284	 * firmware won't understand this and we'll just get
2285	 * unencapsulated messages ...
2286	 */
2287	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2288		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2289	val = 1;
2290	(void) t4vf_set_params(adapter, 1, &param, &val);
2291
2292	/*
2293	 * Retrieve our RX interrupt holdoff timer values and counter
2294	 * threshold values from the SGE parameters.
2295	 */
2296	s->timer_val[0] = core_ticks_to_us(adapter,
2297		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2298	s->timer_val[1] = core_ticks_to_us(adapter,
2299		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2300	s->timer_val[2] = core_ticks_to_us(adapter,
2301		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2302	s->timer_val[3] = core_ticks_to_us(adapter,
2303		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2304	s->timer_val[4] = core_ticks_to_us(adapter,
2305		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2306	s->timer_val[5] = core_ticks_to_us(adapter,
2307		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2308
2309	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2310	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2311	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2312	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2313
2314	/*
2315	 * Grab our Virtual Interface resource allocation, extract the
2316	 * features that we're interested in and do a bit of sanity testing on
2317	 * what we discover.
2318	 */
2319	err = t4vf_get_vfres(adapter);
2320	if (err) {
2321		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2322			" resources: err=%d\n", err);
2323		return err;
2324	}
2325
2326	/*
2327	 * The number of "ports" which we support is equal to the number of
2328	 * Virtual Interfaces with which we've been provisioned.
2329	 */
2330	adapter->params.nports = vfres->nvi;
2331	if (adapter->params.nports > MAX_NPORTS) {
2332		dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2333			 " virtual interfaces\n", MAX_NPORTS,
2334			 adapter->params.nports);
2335		adapter->params.nports = MAX_NPORTS;
2336	}
2337
2338	/*
2339	 * We need to reserve a number of the ingress queues with Free List
2340	 * and Interrupt capabilities for special interrupt purposes (like
2341	 * asynchronous firmware messages, or forwarded interrupts if we're
2342	 * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2343	 * matched up one-for-one with Ethernet/Control egress queues in order
2344	 * to form "Queue Sets" which will be aportioned between the "ports".
2345	 * For each Queue Set, we'll need the ability to allocate two Egress
2346	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2347	 * Ethernet Queue.
2348	 */
2349	ethqsets = vfres->niqflint - INGQ_EXTRAS;
2350	if (vfres->nethctrl != ethqsets) {
2351		dev_warn(adapter->pdev_dev, "unequal number of [available]"
2352			 " ingress/egress queues (%d/%d); using minimum for"
2353			 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2354		ethqsets = min(vfres->nethctrl, ethqsets);
2355	}
2356	if (vfres->neq < ethqsets*2) {
2357		dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2358			 " to support Queue Sets (%d); reducing allowed Queue"
2359			 " Sets\n", vfres->neq, ethqsets);
2360		ethqsets = vfres->neq/2;
2361	}
2362	if (ethqsets > MAX_ETH_QSETS) {
2363		dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2364			 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2365		ethqsets = MAX_ETH_QSETS;
2366	}
2367	if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2368		dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2369			 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2370	}
2371	adapter->sge.max_ethqsets = ethqsets;
2372
2373	/*
2374	 * Check for various parameter sanity issues.  Most checks simply
2375	 * result in us using fewer resources than our provissioning but we
2376	 * do need at least  one "port" with which to work ...
2377	 */
2378	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2379		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2380			 " virtual interfaces (too few Queue Sets)\n",
2381			 adapter->sge.max_ethqsets, adapter->params.nports);
2382		adapter->params.nports = adapter->sge.max_ethqsets;
2383	}
2384	if (adapter->params.nports == 0) {
2385		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2386			"usable!\n");
2387		return -EINVAL;
2388	}
2389	return 0;
2390}
2391
2392static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2393			     u8 pkt_cnt_idx, unsigned int size,
2394			     unsigned int iqe_size)
2395{
2396	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2397			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2398			      QINTR_CNT_EN_F : 0));
2399	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2400			    ? pkt_cnt_idx
2401			    : 0);
2402	rspq->iqe_len = iqe_size;
2403	rspq->size = size;
2404}
2405
2406/*
2407 * Perform default configuration of DMA queues depending on the number and
2408 * type of ports we found and the number of available CPUs.  Most settings can
2409 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2410 * being brought up for the first time.
2411 */
2412static void cfg_queues(struct adapter *adapter)
2413{
2414	struct sge *s = &adapter->sge;
2415	int q10g, n10g, qidx, pidx, qs;
2416	size_t iqe_size;
2417
2418	/*
2419	 * We should not be called till we know how many Queue Sets we can
2420	 * support.  In particular, this means that we need to know what kind
2421	 * of interrupts we'll be using ...
2422	 */
2423	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2424
2425	/*
2426	 * Count the number of 10GbE Virtual Interfaces that we have.
2427	 */
2428	n10g = 0;
2429	for_each_port(adapter, pidx)
2430		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2431
2432	/*
2433	 * We default to 1 queue per non-10G port and up to # of cores queues
2434	 * per 10G port.
2435	 */
2436	if (n10g == 0)
2437		q10g = 0;
2438	else {
2439		int n1g = (adapter->params.nports - n10g);
2440		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2441		if (q10g > num_online_cpus())
2442			q10g = num_online_cpus();
2443	}
2444
2445	/*
2446	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2447	 * The layout will be established in setup_sge_queues() when the
2448	 * adapter is brough up for the first time.
2449	 */
2450	qidx = 0;
2451	for_each_port(adapter, pidx) {
2452		struct port_info *pi = adap2pinfo(adapter, pidx);
2453
2454		pi->first_qset = qidx;
2455		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2456		qidx += pi->nqsets;
2457	}
2458	s->ethqsets = qidx;
2459
2460	/*
2461	 * The Ingress Queue Entry Size for our various Response Queues needs
2462	 * to be big enough to accommodate the largest message we can receive
2463	 * from the chip/firmware; which is 64 bytes ...
2464	 */
2465	iqe_size = 64;
2466
2467	/*
2468	 * Set up default Queue Set parameters ...  Start off with the
2469	 * shortest interrupt holdoff timer.
2470	 */
2471	for (qs = 0; qs < s->max_ethqsets; qs++) {
2472		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2473		struct sge_eth_txq *txq = &s->ethtxq[qs];
2474
2475		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2476		rxq->fl.size = 72;
2477		txq->q.size = 1024;
2478	}
2479
2480	/*
2481	 * The firmware event queue is used for link state changes and
2482	 * notifications of TX DMA completions.
2483	 */
2484	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2485
2486	/*
2487	 * The forwarded interrupt queue is used when we're in MSI interrupt
2488	 * mode.  In this mode all interrupts associated with RX queues will
2489	 * be forwarded to a single queue which we'll associate with our MSI
2490	 * interrupt vector.  The messages dropped in the forwarded interrupt
2491	 * queue will indicate which ingress queue needs servicing ...  This
2492	 * queue needs to be large enough to accommodate all of the ingress
2493	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2494	 * from equalling the CIDX if every ingress queue has an outstanding
2495	 * interrupt).  The queue doesn't need to be any larger because no
2496	 * ingress queue will ever have more than one outstanding interrupt at
2497	 * any time ...
2498	 */
2499	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2500		  iqe_size);
2501}
2502
2503/*
2504 * Reduce the number of Ethernet queues across all ports to at most n.
2505 * n provides at least one queue per port.
2506 */
2507static void reduce_ethqs(struct adapter *adapter, int n)
2508{
2509	int i;
2510	struct port_info *pi;
2511
2512	/*
2513	 * While we have too many active Ether Queue Sets, interate across the
2514	 * "ports" and reduce their individual Queue Set allocations.
2515	 */
2516	BUG_ON(n < adapter->params.nports);
2517	while (n < adapter->sge.ethqsets)
2518		for_each_port(adapter, i) {
2519			pi = adap2pinfo(adapter, i);
2520			if (pi->nqsets > 1) {
2521				pi->nqsets--;
2522				adapter->sge.ethqsets--;
2523				if (adapter->sge.ethqsets <= n)
2524					break;
2525			}
2526		}
2527
2528	/*
2529	 * Reassign the starting Queue Sets for each of the "ports" ...
2530	 */
2531	n = 0;
2532	for_each_port(adapter, i) {
2533		pi = adap2pinfo(adapter, i);
2534		pi->first_qset = n;
2535		n += pi->nqsets;
2536	}
2537}
2538
2539/*
2540 * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2541 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2542 * need.  Minimally we need one for every Virtual Interface plus those needed
2543 * for our "extras".  Note that this process may lower the maximum number of
2544 * allowed Queue Sets ...
2545 */
2546static int enable_msix(struct adapter *adapter)
2547{
2548	int i, want, need, nqsets;
2549	struct msix_entry entries[MSIX_ENTRIES];
2550	struct sge *s = &adapter->sge;
2551
2552	for (i = 0; i < MSIX_ENTRIES; ++i)
2553		entries[i].entry = i;
2554
2555	/*
2556	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2557	 * plus those needed for our "extras" (for example, the firmware
2558	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2559	 * Interface plus those needed for our "extras".  So now we get to see
2560	 * if the song is right ...
2561	 */
2562	want = s->max_ethqsets + MSIX_EXTRAS;
2563	need = adapter->params.nports + MSIX_EXTRAS;
2564
2565	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2566	if (want < 0)
2567		return want;
2568
2569	nqsets = want - MSIX_EXTRAS;
2570	if (nqsets < s->max_ethqsets) {
2571		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2572			 " for %d Queue Sets\n", nqsets);
2573		s->max_ethqsets = nqsets;
2574		if (nqsets < s->ethqsets)
2575			reduce_ethqs(adapter, nqsets);
2576	}
2577	for (i = 0; i < want; ++i)
2578		adapter->msix_info[i].vec = entries[i].vector;
2579
2580	return 0;
2581}
2582
2583static const struct net_device_ops cxgb4vf_netdev_ops	= {
2584	.ndo_open		= cxgb4vf_open,
2585	.ndo_stop		= cxgb4vf_stop,
2586	.ndo_start_xmit		= t4vf_eth_xmit,
2587	.ndo_get_stats		= cxgb4vf_get_stats,
2588	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2589	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2590	.ndo_validate_addr	= eth_validate_addr,
2591	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2592	.ndo_change_mtu		= cxgb4vf_change_mtu,
2593	.ndo_fix_features	= cxgb4vf_fix_features,
2594	.ndo_set_features	= cxgb4vf_set_features,
2595#ifdef CONFIG_NET_POLL_CONTROLLER
2596	.ndo_poll_controller	= cxgb4vf_poll_controller,
2597#endif
2598};
2599
2600/*
2601 * "Probe" a device: initialize a device and construct all kernel and driver
2602 * state needed to manage the device.  This routine is called "init_one" in
2603 * the PF Driver ...
2604 */
2605static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2606			     const struct pci_device_id *ent)
2607{
2608	int pci_using_dac;
2609	int err, pidx;
2610	unsigned int pmask;
2611	struct adapter *adapter;
2612	struct port_info *pi;
2613	struct net_device *netdev;
2614
2615	/*
2616	 * Print our driver banner the first time we're called to initialize a
2617	 * device.
2618	 */
2619	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2620
2621	/*
2622	 * Initialize generic PCI device state.
2623	 */
2624	err = pci_enable_device(pdev);
2625	if (err) {
2626		dev_err(&pdev->dev, "cannot enable PCI device\n");
2627		return err;
2628	}
2629
2630	/*
2631	 * Reserve PCI resources for the device.  If we can't get them some
2632	 * other driver may have already claimed the device ...
2633	 */
2634	err = pci_request_regions(pdev, KBUILD_MODNAME);
2635	if (err) {
2636		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2637		goto err_disable_device;
2638	}
2639
2640	/*
2641	 * Set up our DMA mask: try for 64-bit address masking first and
2642	 * fall back to 32-bit if we can't get 64 bits ...
2643	 */
2644	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2645	if (err == 0) {
2646		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2647		if (err) {
2648			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2649				" coherent allocations\n");
2650			goto err_release_regions;
2651		}
2652		pci_using_dac = 1;
2653	} else {
2654		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2655		if (err != 0) {
2656			dev_err(&pdev->dev, "no usable DMA configuration\n");
2657			goto err_release_regions;
2658		}
2659		pci_using_dac = 0;
2660	}
2661
2662	/*
2663	 * Enable bus mastering for the device ...
2664	 */
2665	pci_set_master(pdev);
2666
2667	/*
2668	 * Allocate our adapter data structure and attach it to the device.
2669	 */
2670	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2671	if (!adapter) {
2672		err = -ENOMEM;
2673		goto err_release_regions;
2674	}
2675	pci_set_drvdata(pdev, adapter);
2676	adapter->pdev = pdev;
2677	adapter->pdev_dev = &pdev->dev;
2678
2679	/*
2680	 * Initialize SMP data synchronization resources.
2681	 */
2682	spin_lock_init(&adapter->stats_lock);
2683
2684	/*
2685	 * Map our I/O registers in BAR0.
2686	 */
2687	adapter->regs = pci_ioremap_bar(pdev, 0);
2688	if (!adapter->regs) {
2689		dev_err(&pdev->dev, "cannot map device registers\n");
2690		err = -ENOMEM;
2691		goto err_free_adapter;
2692	}
2693
2694	/* Wait for the device to become ready before proceeding ...
2695	 */
2696	err = t4vf_prep_adapter(adapter);
2697	if (err) {
2698		dev_err(adapter->pdev_dev, "device didn't become ready:"
2699			" err=%d\n", err);
2700		goto err_unmap_bar0;
2701	}
2702
2703	/* For T5 and later we want to use the new BAR-based User Doorbells,
2704	 * so we need to map BAR2 here ...
2705	 */
2706	if (!is_t4(adapter->params.chip)) {
2707		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2708					   pci_resource_len(pdev, 2));
2709		if (!adapter->bar2) {
2710			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2711			err = -ENOMEM;
2712			goto err_unmap_bar0;
2713		}
2714	}
2715	/*
2716	 * Initialize adapter level features.
2717	 */
2718	adapter->name = pci_name(pdev);
2719	adapter->msg_enable = dflt_msg_enable;
2720	err = adap_init0(adapter);
2721	if (err)
2722		goto err_unmap_bar;
2723
2724	/*
2725	 * Allocate our "adapter ports" and stitch everything together.
2726	 */
2727	pmask = adapter->params.vfres.pmask;
2728	for_each_port(adapter, pidx) {
2729		int port_id, viid;
2730
2731		/*
2732		 * We simplistically allocate our virtual interfaces
2733		 * sequentially across the port numbers to which we have
2734		 * access rights.  This should be configurable in some manner
2735		 * ...
2736		 */
2737		if (pmask == 0)
2738			break;
2739		port_id = ffs(pmask) - 1;
2740		pmask &= ~(1 << port_id);
2741		viid = t4vf_alloc_vi(adapter, port_id);
2742		if (viid < 0) {
2743			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2744				" err=%d\n", port_id, viid);
2745			err = viid;
2746			goto err_free_dev;
2747		}
2748
2749		/*
2750		 * Allocate our network device and stitch things together.
2751		 */
2752		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2753					   MAX_PORT_QSETS);
2754		if (netdev == NULL) {
2755			t4vf_free_vi(adapter, viid);
2756			err = -ENOMEM;
2757			goto err_free_dev;
2758		}
2759		adapter->port[pidx] = netdev;
2760		SET_NETDEV_DEV(netdev, &pdev->dev);
2761		pi = netdev_priv(netdev);
2762		pi->adapter = adapter;
2763		pi->pidx = pidx;
2764		pi->port_id = port_id;
2765		pi->viid = viid;
2766
2767		/*
2768		 * Initialize the starting state of our "port" and register
2769		 * it.
2770		 */
2771		pi->xact_addr_filt = -1;
2772		netif_carrier_off(netdev);
2773		netdev->irq = pdev->irq;
2774
2775		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2776			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2777			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2778		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2779			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2780			NETIF_F_HIGHDMA;
2781		netdev->features = netdev->hw_features |
2782				   NETIF_F_HW_VLAN_CTAG_TX;
2783		if (pci_using_dac)
2784			netdev->features |= NETIF_F_HIGHDMA;
2785
2786		netdev->priv_flags |= IFF_UNICAST_FLT;
2787
2788		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2789		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2790
2791		/*
2792		 * Initialize the hardware/software state for the port.
2793		 */
2794		err = t4vf_port_init(adapter, pidx);
2795		if (err) {
2796			dev_err(&pdev->dev, "cannot initialize port %d\n",
2797				pidx);
2798			goto err_free_dev;
2799		}
2800	}
2801
2802	/*
2803	 * The "card" is now ready to go.  If any errors occur during device
2804	 * registration we do not fail the whole "card" but rather proceed
2805	 * only with the ports we manage to register successfully.  However we
2806	 * must register at least one net device.
2807	 */
2808	for_each_port(adapter, pidx) {
2809		netdev = adapter->port[pidx];
2810		if (netdev == NULL)
2811			continue;
2812
2813		err = register_netdev(netdev);
2814		if (err) {
2815			dev_warn(&pdev->dev, "cannot register net device %s,"
2816				 " skipping\n", netdev->name);
2817			continue;
2818		}
2819
2820		set_bit(pidx, &adapter->registered_device_map);
2821	}
2822	if (adapter->registered_device_map == 0) {
2823		dev_err(&pdev->dev, "could not register any net devices\n");
2824		goto err_free_dev;
2825	}
2826
2827	/*
2828	 * Set up our debugfs entries.
2829	 */
2830	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2831		adapter->debugfs_root =
2832			debugfs_create_dir(pci_name(pdev),
2833					   cxgb4vf_debugfs_root);
2834		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2835			dev_warn(&pdev->dev, "could not create debugfs"
2836				 " directory");
2837		else
2838			setup_debugfs(adapter);
2839	}
2840
2841	/*
2842	 * See what interrupts we'll be using.  If we've been configured to
2843	 * use MSI-X interrupts, try to enable them but fall back to using
2844	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2845	 * get MSI interrupts we bail with the error.
2846	 */
2847	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2848		adapter->flags |= USING_MSIX;
2849	else {
2850		err = pci_enable_msi(pdev);
2851		if (err) {
2852			dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2853				" err=%d\n",
2854				msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2855			goto err_free_debugfs;
2856		}
2857		adapter->flags |= USING_MSI;
2858	}
2859
2860	/*
2861	 * Now that we know how many "ports" we have and what their types are,
2862	 * and how many Queue Sets we can support, we can configure our queue
2863	 * resources.
2864	 */
2865	cfg_queues(adapter);
2866
2867	/*
2868	 * Print a short notice on the existence and configuration of the new
2869	 * VF network device ...
2870	 */
2871	for_each_port(adapter, pidx) {
2872		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2873			 adapter->port[pidx]->name,
2874			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2875			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2876	}
2877
2878	/*
2879	 * Return success!
2880	 */
2881	return 0;
2882
2883	/*
2884	 * Error recovery and exit code.  Unwind state that's been created
2885	 * so far and return the error.
2886	 */
2887
2888err_free_debugfs:
2889	if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2890		cleanup_debugfs(adapter);
2891		debugfs_remove_recursive(adapter->debugfs_root);
2892	}
2893
2894err_free_dev:
2895	for_each_port(adapter, pidx) {
2896		netdev = adapter->port[pidx];
2897		if (netdev == NULL)
2898			continue;
2899		pi = netdev_priv(netdev);
2900		t4vf_free_vi(adapter, pi->viid);
2901		if (test_bit(pidx, &adapter->registered_device_map))
2902			unregister_netdev(netdev);
2903		free_netdev(netdev);
2904	}
2905
2906err_unmap_bar:
2907	if (!is_t4(adapter->params.chip))
2908		iounmap(adapter->bar2);
2909
2910err_unmap_bar0:
2911	iounmap(adapter->regs);
2912
2913err_free_adapter:
2914	kfree(adapter);
2915
2916err_release_regions:
2917	pci_release_regions(pdev);
2918	pci_clear_master(pdev);
2919
2920err_disable_device:
2921	pci_disable_device(pdev);
2922
2923	return err;
2924}
2925
2926/*
2927 * "Remove" a device: tear down all kernel and driver state created in the
2928 * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2929 * that this is called "remove_one" in the PF Driver.)
2930 */
2931static void cxgb4vf_pci_remove(struct pci_dev *pdev)
2932{
2933	struct adapter *adapter = pci_get_drvdata(pdev);
2934
2935	/*
2936	 * Tear down driver state associated with device.
2937	 */
2938	if (adapter) {
2939		int pidx;
2940
2941		/*
2942		 * Stop all of our activity.  Unregister network port,
2943		 * disable interrupts, etc.
2944		 */
2945		for_each_port(adapter, pidx)
2946			if (test_bit(pidx, &adapter->registered_device_map))
2947				unregister_netdev(adapter->port[pidx]);
2948		t4vf_sge_stop(adapter);
2949		if (adapter->flags & USING_MSIX) {
2950			pci_disable_msix(adapter->pdev);
2951			adapter->flags &= ~USING_MSIX;
2952		} else if (adapter->flags & USING_MSI) {
2953			pci_disable_msi(adapter->pdev);
2954			adapter->flags &= ~USING_MSI;
2955		}
2956
2957		/*
2958		 * Tear down our debugfs entries.
2959		 */
2960		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2961			cleanup_debugfs(adapter);
2962			debugfs_remove_recursive(adapter->debugfs_root);
2963		}
2964
2965		/*
2966		 * Free all of the various resources which we've acquired ...
2967		 */
2968		t4vf_free_sge_resources(adapter);
2969		for_each_port(adapter, pidx) {
2970			struct net_device *netdev = adapter->port[pidx];
2971			struct port_info *pi;
2972
2973			if (netdev == NULL)
2974				continue;
2975
2976			pi = netdev_priv(netdev);
2977			t4vf_free_vi(adapter, pi->viid);
2978			free_netdev(netdev);
2979		}
2980		iounmap(adapter->regs);
2981		if (!is_t4(adapter->params.chip))
2982			iounmap(adapter->bar2);
2983		kfree(adapter);
2984	}
2985
2986	/*
2987	 * Disable the device and release its PCI resources.
2988	 */
2989	pci_disable_device(pdev);
2990	pci_clear_master(pdev);
2991	pci_release_regions(pdev);
2992}
2993
2994/*
2995 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2996 * delivery.
2997 */
2998static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2999{
3000	struct adapter *adapter;
3001	int pidx;
3002
3003	adapter = pci_get_drvdata(pdev);
3004	if (!adapter)
3005		return;
3006
3007	/* Disable all Virtual Interfaces.  This will shut down the
3008	 * delivery of all ingress packets into the chip for these
3009	 * Virtual Interfaces.
3010	 */
3011	for_each_port(adapter, pidx)
3012		if (test_bit(pidx, &adapter->registered_device_map))
3013			unregister_netdev(adapter->port[pidx]);
3014
3015	/* Free up all Queues which will prevent further DMA and
3016	 * Interrupts allowing various internal pathways to drain.
3017	 */
3018	t4vf_sge_stop(adapter);
3019	if (adapter->flags & USING_MSIX) {
3020		pci_disable_msix(adapter->pdev);
3021		adapter->flags &= ~USING_MSIX;
3022	} else if (adapter->flags & USING_MSI) {
3023		pci_disable_msi(adapter->pdev);
3024		adapter->flags &= ~USING_MSI;
3025	}
3026
3027	/*
3028	 * Free up all Queues which will prevent further DMA and
3029	 * Interrupts allowing various internal pathways to drain.
3030	 */
3031	t4vf_free_sge_resources(adapter);
3032	pci_set_drvdata(pdev, NULL);
3033}
3034
3035/* Macros needed to support the PCI Device ID Table ...
3036 */
3037#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3038	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3039#define CH_PCI_DEVICE_ID_FUNCTION	0x8
3040
3041#define CH_PCI_ID_TABLE_ENTRY(devid) \
3042		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3043
3044#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3045
3046#include "../cxgb4/t4_pci_id_tbl.h"
3047
3048MODULE_DESCRIPTION(DRV_DESC);
3049MODULE_AUTHOR("Chelsio Communications");
3050MODULE_LICENSE("Dual BSD/GPL");
3051MODULE_VERSION(DRV_VERSION);
3052MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3053
3054static struct pci_driver cxgb4vf_driver = {
3055	.name		= KBUILD_MODNAME,
3056	.id_table	= cxgb4vf_pci_tbl,
3057	.probe		= cxgb4vf_pci_probe,
3058	.remove		= cxgb4vf_pci_remove,
3059	.shutdown	= cxgb4vf_pci_shutdown,
3060};
3061
3062/*
3063 * Initialize global driver state.
3064 */
3065static int __init cxgb4vf_module_init(void)
3066{
3067	int ret;
3068
3069	/*
3070	 * Vet our module parameters.
3071	 */
3072	if (msi != MSI_MSIX && msi != MSI_MSI) {
3073		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3074			msi, MSI_MSIX, MSI_MSI);
3075		return -EINVAL;
3076	}
3077
3078	/* Debugfs support is optional, just warn if this fails */
3079	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3080	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3081		pr_warn("could not create debugfs entry, continuing\n");
3082
3083	ret = pci_register_driver(&cxgb4vf_driver);
3084	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3085		debugfs_remove(cxgb4vf_debugfs_root);
3086	return ret;
3087}
3088
3089/*
3090 * Tear down global driver state.
3091 */
3092static void __exit cxgb4vf_module_exit(void)
3093{
3094	pci_unregister_driver(&cxgb4vf_driver);
3095	debugfs_remove(cxgb4vf_debugfs_root);
3096}
3097
3098module_init(cxgb4vf_module_init);
3099module_exit(cxgb4vf_module_exit);
3100