1/*
2 *
3 * This file is provided under a dual BSD/GPLv2 license.  When using or
4 * redistributing this file, you may do so under either license.
5 *
6 * GPL LICENSE SUMMARY
7 *
8 * Copyright(c) 2015 Intel Corporation.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * BSD LICENSE
20 *
21 * Copyright(c) 2015 Intel Corporation.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 *  - Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 *  - Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in
31 *    the documentation and/or other materials provided with the
32 *    distribution.
33 *  - Neither the name of Intel Corporation nor the names of its
34 *    contributors may be used to endorse or promote products derived
35 *    from this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 */
50
51/*
52 * This file contains all of the code that is specific to the HFI chip
53 */
54
55#include <linux/pci.h>
56#include <linux/delay.h>
57#include <linux/interrupt.h>
58#include <linux/module.h>
59
60#include "hfi.h"
61#include "trace.h"
62#include "mad.h"
63#include "pio.h"
64#include "sdma.h"
65#include "eprom.h"
66
67#define NUM_IB_PORTS 1
68
69uint kdeth_qp;
70module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72
73uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74module_param(num_vls, uint, S_IRUGO);
75MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76
77/*
78 * Default time to aggregate two 10K packets from the idle state
79 * (timer not running). The timer starts at the end of the first packet,
80 * so only the time for one 10K packet and header plus a bit extra is needed.
81 * 10 * 1024 + 64 header byte = 10304 byte
82 * 10304 byte / 12.5 GB/s = 824.32ns
83 */
84uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85module_param(rcv_intr_timeout, uint, S_IRUGO);
86MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87
88uint rcv_intr_count = 16; /* same as qib */
89module_param(rcv_intr_count, uint, S_IRUGO);
90MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91
92ushort link_crc_mask = SUPPORTED_CRCS;
93module_param(link_crc_mask, ushort, S_IRUGO);
94MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95
96uint loopback;
97module_param_named(loopback, loopback, uint, S_IRUGO);
98MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99
100/* Other driver tunables */
101uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102static ushort crc_14b_sideband = 1;
103static uint use_flr = 1;
104uint quick_linkup; /* skip LNI */
105
106struct flag_table {
107	u64 flag;	/* the flag */
108	char *str;	/* description string */
109	u16 extra;	/* extra information */
110	u16 unused0;
111	u32 unused1;
112};
113
114/* str must be a string constant */
115#define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116#define FLAG_ENTRY0(str, flag) {flag, str, 0}
117
118/* Send Error Consequences */
119#define SEC_WRITE_DROPPED	0x1
120#define SEC_PACKET_DROPPED	0x2
121#define SEC_SC_HALTED		0x4	/* per-context only */
122#define SEC_SPC_FREEZE		0x8	/* per-HFI only */
123
124#define VL15CTXT                  1
125#define MIN_KERNEL_KCTXTS         2
126#define NUM_MAP_REGS             32
127
128/* Bit offset into the GUID which carries HFI id information */
129#define GUID_HFI_INDEX_SHIFT     39
130
131/* extract the emulation revision */
132#define emulator_rev(dd) ((dd)->irev >> 8)
133/* parallel and serial emulation versions are 3 and 4 respectively */
134#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136
137/* RSM fields */
138
139/* packet type */
140#define IB_PACKET_TYPE         2ull
141#define QW_SHIFT               6ull
142/* QPN[7..1] */
143#define QPN_WIDTH              7ull
144
145/* LRH.BTH: QW 0, OFFSET 48 - for match */
146#define LRH_BTH_QW             0ull
147#define LRH_BTH_BIT_OFFSET     48ull
148#define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
149#define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150#define LRH_BTH_SELECT
151#define LRH_BTH_MASK           3ull
152#define LRH_BTH_VALUE          2ull
153
154/* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155#define LRH_SC_QW              0ull
156#define LRH_SC_BIT_OFFSET      56ull
157#define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
158#define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159#define LRH_SC_MASK            128ull
160#define LRH_SC_VALUE           0ull
161
162/* SC[n..0] QW 0, OFFSET 60 - for select */
163#define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
164
165/* QPN[m+n:1] QW 1, OFFSET 1 */
166#define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
167
168/* defines to build power on SC2VL table */
169#define SC2VL_VAL( \
170	num, \
171	sc0, sc0val, \
172	sc1, sc1val, \
173	sc2, sc2val, \
174	sc3, sc3val, \
175	sc4, sc4val, \
176	sc5, sc5val, \
177	sc6, sc6val, \
178	sc7, sc7val) \
179( \
180	((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181	((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182	((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183	((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184	((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185	((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186	((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187	((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
188)
189
190#define DC_SC_VL_VAL( \
191	range, \
192	e0, e0val, \
193	e1, e1val, \
194	e2, e2val, \
195	e3, e3val, \
196	e4, e4val, \
197	e5, e5val, \
198	e6, e6val, \
199	e7, e7val, \
200	e8, e8val, \
201	e9, e9val, \
202	e10, e10val, \
203	e11, e11val, \
204	e12, e12val, \
205	e13, e13val, \
206	e14, e14val, \
207	e15, e15val) \
208( \
209	((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210	((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211	((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212	((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213	((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214	((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215	((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216	((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217	((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218	((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219	((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220	((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221	((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222	((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223	((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224	((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225)
226
227/* all CceStatus sub-block freeze bits */
228#define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229			| CCE_STATUS_RXE_FROZE_SMASK \
230			| CCE_STATUS_TXE_FROZE_SMASK \
231			| CCE_STATUS_TXE_PIO_FROZE_SMASK)
232/* all CceStatus sub-block TXE pause bits */
233#define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234			| CCE_STATUS_TXE_PAUSED_SMASK \
235			| CCE_STATUS_SDMA_PAUSED_SMASK)
236/* all CceStatus sub-block RXE pause bits */
237#define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238
239/*
240 * CCE Error flags.
241 */
242static struct flag_table cce_err_status_flags[] = {
243/* 0*/	FLAG_ENTRY0("CceCsrParityErr",
244		CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245/* 1*/	FLAG_ENTRY0("CceCsrReadBadAddrErr",
246		CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247/* 2*/	FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248		CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249/* 3*/	FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250		CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251/* 4*/	FLAG_ENTRY0("CceTrgtAccessErr",
252		CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253/* 5*/	FLAG_ENTRY0("CceRspdDataParityErr",
254		CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255/* 6*/	FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256		CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257/* 7*/	FLAG_ENTRY0("CceCsrCfgBusParityErr",
258		CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259/* 8*/	FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260		CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261/* 9*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263/*10*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265/*11*/	FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267/*12*/	FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268		CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269/*13*/	FLAG_ENTRY0("PcicRetryMemCorErr",
270		CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271/*14*/	FLAG_ENTRY0("PcicRetryMemCorErr",
272		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273/*15*/	FLAG_ENTRY0("PcicPostHdQCorErr",
274		CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275/*16*/	FLAG_ENTRY0("PcicPostHdQCorErr",
276		CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277/*17*/	FLAG_ENTRY0("PcicPostHdQCorErr",
278		CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279/*18*/	FLAG_ENTRY0("PcicCplDatQCorErr",
280		CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281/*19*/	FLAG_ENTRY0("PcicNPostHQParityErr",
282		CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283/*20*/	FLAG_ENTRY0("PcicNPostDatQParityErr",
284		CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285/*21*/	FLAG_ENTRY0("PcicRetryMemUncErr",
286		CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287/*22*/	FLAG_ENTRY0("PcicRetrySotMemUncErr",
288		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289/*23*/	FLAG_ENTRY0("PcicPostHdQUncErr",
290		CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291/*24*/	FLAG_ENTRY0("PcicPostDatQUncErr",
292		CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293/*25*/	FLAG_ENTRY0("PcicCplHdQUncErr",
294		CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295/*26*/	FLAG_ENTRY0("PcicCplDatQUncErr",
296		CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297/*27*/	FLAG_ENTRY0("PcicTransmitFrontParityErr",
298		CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299/*28*/	FLAG_ENTRY0("PcicTransmitBackParityErr",
300		CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301/*29*/	FLAG_ENTRY0("PcicReceiveParityErr",
302		CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303/*30*/	FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304		CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305/*31*/	FLAG_ENTRY0("LATriggered",
306		CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307/*32*/	FLAG_ENTRY0("CceSegReadBadAddrErr",
308		CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309/*33*/	FLAG_ENTRY0("CceSegWriteBadAddrErr",
310		CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311/*34*/	FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312		CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313/*35*/	FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314		CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315/*36*/	FLAG_ENTRY0("CceMsixTableCorErr",
316		CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317/*37*/	FLAG_ENTRY0("CceMsixTableUncErr",
318		CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319/*38*/	FLAG_ENTRY0("CceIntMapCorErr",
320		CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321/*39*/	FLAG_ENTRY0("CceIntMapUncErr",
322		CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323/*40*/	FLAG_ENTRY0("CceMsixCsrParityErr",
324		CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325/*41-63 reserved*/
326};
327
328/*
329 * Misc Error flags
330 */
331#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332static struct flag_table misc_err_status_flags[] = {
333/* 0*/	FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334/* 1*/	FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335/* 2*/	FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336/* 3*/	FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337/* 4*/	FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338/* 5*/	FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339/* 6*/	FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340/* 7*/	FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341/* 8*/	FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342/* 9*/	FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343/*10*/	FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344/*11*/	FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345/*12*/	FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346};
347
348/*
349 * TXE PIO Error flags and consequences
350 */
351static struct flag_table pio_err_status_flags[] = {
352/* 0*/	FLAG_ENTRY("PioWriteBadCtxt",
353	SEC_WRITE_DROPPED,
354	SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355/* 1*/	FLAG_ENTRY("PioWriteAddrParity",
356	SEC_SPC_FREEZE,
357	SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358/* 2*/	FLAG_ENTRY("PioCsrParity",
359	SEC_SPC_FREEZE,
360	SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361/* 3*/	FLAG_ENTRY("PioSbMemFifo0",
362	SEC_SPC_FREEZE,
363	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364/* 4*/	FLAG_ENTRY("PioSbMemFifo1",
365	SEC_SPC_FREEZE,
366	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367/* 5*/	FLAG_ENTRY("PioPccFifoParity",
368	SEC_SPC_FREEZE,
369	SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370/* 6*/	FLAG_ENTRY("PioPecFifoParity",
371	SEC_SPC_FREEZE,
372	SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373/* 7*/	FLAG_ENTRY("PioSbrdctlCrrelParity",
374	SEC_SPC_FREEZE,
375	SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376/* 8*/	FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377	SEC_SPC_FREEZE,
378	SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379/* 9*/	FLAG_ENTRY("PioPktEvictFifoParityErr",
380	SEC_SPC_FREEZE,
381	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382/*10*/	FLAG_ENTRY("PioSmPktResetParity",
383	SEC_SPC_FREEZE,
384	SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385/*11*/	FLAG_ENTRY("PioVlLenMemBank0Unc",
386	SEC_SPC_FREEZE,
387	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388/*12*/	FLAG_ENTRY("PioVlLenMemBank1Unc",
389	SEC_SPC_FREEZE,
390	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391/*13*/	FLAG_ENTRY("PioVlLenMemBank0Cor",
392	0,
393	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394/*14*/	FLAG_ENTRY("PioVlLenMemBank1Cor",
395	0,
396	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397/*15*/	FLAG_ENTRY("PioCreditRetFifoParity",
398	SEC_SPC_FREEZE,
399	SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400/*16*/	FLAG_ENTRY("PioPpmcPblFifo",
401	SEC_SPC_FREEZE,
402	SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403/*17*/	FLAG_ENTRY("PioInitSmIn",
404	0,
405	SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406/*18*/	FLAG_ENTRY("PioPktEvictSmOrArbSm",
407	SEC_SPC_FREEZE,
408	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409/*19*/	FLAG_ENTRY("PioHostAddrMemUnc",
410	SEC_SPC_FREEZE,
411	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412/*20*/	FLAG_ENTRY("PioHostAddrMemCor",
413	0,
414	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415/*21*/	FLAG_ENTRY("PioWriteDataParity",
416	SEC_SPC_FREEZE,
417	SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418/*22*/	FLAG_ENTRY("PioStateMachine",
419	SEC_SPC_FREEZE,
420	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421/*23*/	FLAG_ENTRY("PioWriteQwValidParity",
422	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424/*24*/	FLAG_ENTRY("PioBlockQwCountParity",
425	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427/*25*/	FLAG_ENTRY("PioVlfVlLenParity",
428	SEC_SPC_FREEZE,
429	SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430/*26*/	FLAG_ENTRY("PioVlfSopParity",
431	SEC_SPC_FREEZE,
432	SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433/*27*/	FLAG_ENTRY("PioVlFifoParity",
434	SEC_SPC_FREEZE,
435	SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436/*28*/	FLAG_ENTRY("PioPpmcBqcMemParity",
437	SEC_SPC_FREEZE,
438	SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439/*29*/	FLAG_ENTRY("PioPpmcSopLen",
440	SEC_SPC_FREEZE,
441	SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442/*30-31 reserved*/
443/*32*/	FLAG_ENTRY("PioCurrentFreeCntParity",
444	SEC_SPC_FREEZE,
445	SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446/*33*/	FLAG_ENTRY("PioLastReturnedCntParity",
447	SEC_SPC_FREEZE,
448	SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449/*34*/	FLAG_ENTRY("PioPccSopHeadParity",
450	SEC_SPC_FREEZE,
451	SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452/*35*/	FLAG_ENTRY("PioPecSopHeadParityErr",
453	SEC_SPC_FREEZE,
454	SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455/*36-63 reserved*/
456};
457
458/* TXE PIO errors that cause an SPC freeze */
459#define ALL_PIO_FREEZE_ERR \
460	(SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461	| SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464	| SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465	| SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466	| SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467	| SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469	| SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472	| SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473	| SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475	| SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476	| SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477	| SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478	| SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479	| SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480	| SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481	| SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482	| SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483	| SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484	| SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485	| SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486	| SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487	| SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488	| SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489
490/*
491 * TXE SDMA Error flags
492 */
493static struct flag_table sdma_err_status_flags[] = {
494/* 0*/	FLAG_ENTRY0("SDmaRpyTagErr",
495		SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496/* 1*/	FLAG_ENTRY0("SDmaCsrParityErr",
497		SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498/* 2*/	FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500/* 3*/	FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502/*04-63 reserved*/
503};
504
505/* TXE SDMA errors that cause an SPC freeze */
506#define ALL_SDMA_FREEZE_ERR  \
507		(SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510
511/*
512 * TXE Egress Error flags
513 */
514#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515static struct flag_table egress_err_status_flags[] = {
516/* 0*/	FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517/* 1*/	FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518/* 2 reserved */
519/* 3*/	FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520		SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521/* 4*/	FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522/* 5*/	FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523/* 6 reserved */
524/* 7*/	FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525		SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526/* 8*/	FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527		SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528/* 9-10 reserved */
529/*11*/	FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530		SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531/*12*/	FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532/*13*/	FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533/*14*/	FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534/*15*/	FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535/*16*/	FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536		SEES(TX_SDMA0_DISALLOWED_PACKET)),
537/*17*/	FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538		SEES(TX_SDMA1_DISALLOWED_PACKET)),
539/*18*/	FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540		SEES(TX_SDMA2_DISALLOWED_PACKET)),
541/*19*/	FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542		SEES(TX_SDMA3_DISALLOWED_PACKET)),
543/*20*/	FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544		SEES(TX_SDMA4_DISALLOWED_PACKET)),
545/*21*/	FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546		SEES(TX_SDMA5_DISALLOWED_PACKET)),
547/*22*/	FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548		SEES(TX_SDMA6_DISALLOWED_PACKET)),
549/*23*/	FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550		SEES(TX_SDMA7_DISALLOWED_PACKET)),
551/*24*/	FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552		SEES(TX_SDMA8_DISALLOWED_PACKET)),
553/*25*/	FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554		SEES(TX_SDMA9_DISALLOWED_PACKET)),
555/*26*/	FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556		SEES(TX_SDMA10_DISALLOWED_PACKET)),
557/*27*/	FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558		SEES(TX_SDMA11_DISALLOWED_PACKET)),
559/*28*/	FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560		SEES(TX_SDMA12_DISALLOWED_PACKET)),
561/*29*/	FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562		SEES(TX_SDMA13_DISALLOWED_PACKET)),
563/*30*/	FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564		SEES(TX_SDMA14_DISALLOWED_PACKET)),
565/*31*/	FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566		SEES(TX_SDMA15_DISALLOWED_PACKET)),
567/*32*/	FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568		SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569/*33*/	FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570		SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571/*34*/	FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572		SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573/*35*/	FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574		SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575/*36*/	FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576		SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577/*37*/	FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578		SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579/*38*/	FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580		SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581/*39*/	FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582		SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583/*40*/	FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584		SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585/*41*/	FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586/*42*/	FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587/*43*/	FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588/*44*/	FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589/*45*/	FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590/*46*/	FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591/*47*/	FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592/*48*/	FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593/*49*/	FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594/*50*/	FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595/*51*/	FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596/*52*/	FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597/*53*/	FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598/*54*/	FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599/*55*/	FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600/*56*/	FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601/*57*/	FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602/*58*/	FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603/*59*/	FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604/*60*/	FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605/*61*/	FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606/*62*/	FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607		SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608/*63*/	FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609		SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610};
611
612/*
613 * TXE Egress Error Info flags
614 */
615#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616static struct flag_table egress_err_info_flags[] = {
617/* 0*/	FLAG_ENTRY0("Reserved", 0ull),
618/* 1*/	FLAG_ENTRY0("VLErr", SEEI(VL)),
619/* 2*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620/* 3*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621/* 4*/	FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622/* 5*/	FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623/* 6*/	FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624/* 7*/	FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625/* 8*/	FLAG_ENTRY0("RawErr", SEEI(RAW)),
626/* 9*/	FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627/*10*/	FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628/*11*/	FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629/*12*/	FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630/*13*/	FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631/*14*/	FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632/*15*/	FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633/*16*/	FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634/*17*/	FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635/*18*/	FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636/*19*/	FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637/*20*/	FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638/*21*/	FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639};
640
641/* TXE Egress errors that cause an SPC freeze */
642#define ALL_TXE_EGRESS_FREEZE_ERR \
643	(SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644	| SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645	| SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646	| SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647	| SEES(TX_LAUNCH_CSR_PARITY) \
648	| SEES(TX_SBRD_CTL_CSR_PARITY) \
649	| SEES(TX_CONFIG_PARITY) \
650	| SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651	| SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652	| SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653	| SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654	| SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655	| SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656	| SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657	| SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658	| SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659	| SEES(TX_CREDIT_RETURN_PARITY))
660
661/*
662 * TXE Send error flags
663 */
664#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665static struct flag_table send_err_status_flags[] = {
666/* 0*/	FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667/* 1*/	FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668/* 2*/	FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669};
670
671/*
672 * TXE Send Context Error flags and consequences
673 */
674static struct flag_table sc_err_status_flags[] = {
675/* 0*/	FLAG_ENTRY("InconsistentSop",
676		SEC_PACKET_DROPPED | SEC_SC_HALTED,
677		SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678/* 1*/	FLAG_ENTRY("DisallowedPacket",
679		SEC_PACKET_DROPPED | SEC_SC_HALTED,
680		SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681/* 2*/	FLAG_ENTRY("WriteCrossesBoundary",
682		SEC_WRITE_DROPPED | SEC_SC_HALTED,
683		SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684/* 3*/	FLAG_ENTRY("WriteOverflow",
685		SEC_WRITE_DROPPED | SEC_SC_HALTED,
686		SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687/* 4*/	FLAG_ENTRY("WriteOutOfBounds",
688		SEC_WRITE_DROPPED | SEC_SC_HALTED,
689		SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690/* 5-63 reserved*/
691};
692
693/*
694 * RXE Receive Error flags
695 */
696#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697static struct flag_table rxe_err_status_flags[] = {
698/* 0*/	FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699/* 1*/	FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700/* 2*/	FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701/* 3*/	FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702/* 4*/	FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703/* 5*/	FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704/* 6*/	FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705/* 7*/	FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706/* 8*/	FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707/* 9*/	FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708/*10*/	FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709/*11*/	FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710/*12*/	FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711/*13*/	FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712/*14*/	FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713/*15*/	FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714/*16*/	FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715		RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716/*17*/	FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717/*18*/	FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718/*19*/	FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719		RXES(RBUF_BLOCK_LIST_READ_UNC)),
720/*20*/	FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721		RXES(RBUF_BLOCK_LIST_READ_COR)),
722/*21*/	FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723		RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724/*22*/	FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725		RXES(RBUF_CSR_QENT_CNT_PARITY)),
726/*23*/	FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727		RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728/*24*/	FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729		RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730/*25*/	FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731/*26*/	FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732/*27*/	FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733		RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734/*28*/	FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735/*29*/	FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736/*30*/	FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737/*31*/	FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738/*32*/	FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739/*33*/	FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740/*34*/	FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741/*35*/	FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742		RXES(RBUF_FL_INITDONE_PARITY)),
743/*36*/	FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744		RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745/*37*/	FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746/*38*/	FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747/*39*/	FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748/*40*/	FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749		RXES(LOOKUP_DES_PART1_UNC_COR)),
750/*41*/	FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751		RXES(LOOKUP_DES_PART2_PARITY)),
752/*42*/	FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753/*43*/	FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754/*44*/	FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755/*45*/	FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756/*46*/	FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757/*47*/	FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758/*48*/	FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759/*49*/	FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760/*50*/	FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761/*51*/	FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762/*52*/	FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763/*53*/	FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764/*54*/	FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765/*55*/	FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766/*56*/	FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767/*57*/	FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768/*58*/	FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769/*59*/	FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770/*60*/	FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771/*61*/	FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772/*62*/	FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773/*63*/	FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774};
775
776/* RXE errors that will trigger an SPC freeze */
777#define ALL_RXE_FREEZE_ERR  \
778	(RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779	| RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780	| RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781	| RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782	| RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786	| RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787	| RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788	| RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789	| RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790	| RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791	| RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792	| RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793	| RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794	| RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795	| RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796	| RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797	| RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798	| RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799	| RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800	| RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801	| RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802	| RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803	| RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807	| RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808	| RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809	| RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810	| RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813	| RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814	| RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815	| RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816	| RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817	| RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818	| RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819	| RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820	| RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821	| RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822
823#define RXE_FREEZE_ABORT_MASK \
824	(RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825	RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826	RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827
828/*
829 * DCC Error Flags
830 */
831#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832static struct flag_table dcc_err_flags[] = {
833	FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834	FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835	FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836	FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837	FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838	FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839	FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840	FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841	FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842	FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843	FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844	FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845	FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846	FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847	FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848	FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849	FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850	FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851	FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852	FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853	FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854	FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855	FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856	FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857	FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858	FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859	FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860	FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861	FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862	FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863	FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864	FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865	FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866	FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867	FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868	FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869	FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870	FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871	FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872	FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873	FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874	FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875	FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876	FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877	FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878	FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879};
880
881/*
882 * LCB error flags
883 */
884#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885static struct flag_table lcb_err_flags[] = {
886/* 0*/	FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887/* 1*/	FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888/* 2*/	FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889/* 3*/	FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890		LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891/* 4*/	FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892/* 5*/	FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893/* 6*/	FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894/* 7*/	FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895/* 8*/	FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896/* 9*/	FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897/*10*/	FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898/*11*/	FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899/*12*/	FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900/*13*/	FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901		LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902/*14*/	FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903/*15*/	FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904/*16*/	FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905/*17*/	FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906/*18*/	FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907/*19*/	FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908		LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909/*20*/	FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910/*21*/	FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911/*22*/	FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912/*23*/	FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913/*24*/	FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914/*25*/	FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915/*26*/	FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916		LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917/*27*/	FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918/*28*/	FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919		LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920/*29*/	FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921		LCBE(REDUNDANT_FLIT_PARITY_ERR))
922};
923
924/*
925 * DC8051 Error Flags
926 */
927#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928static struct flag_table dc8051_err_flags[] = {
929	FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930	FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931	FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932	FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933	FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934	FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940};
941
942/*
943 * DC8051 Information Error flags
944 *
945 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946 */
947static struct flag_table dc8051_info_err_flags[] = {
948	FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
949	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
950	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
951	FLAG_ENTRY0("Serdes internal loopback failure",
952					FAILED_SERDES_INTERNAL_LOOPBACK),
953	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
954	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
955	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
956	FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
957	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
958	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
961};
962
963/*
964 * DC8051 Information Host Information flags
965 *
966 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967 */
968static struct flag_table dc8051_info_host_msg_flags[] = {
969	FLAG_ENTRY0("Host request done", 0x0001),
970	FLAG_ENTRY0("BC SMA message", 0x0002),
971	FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972	FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973	FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974	FLAG_ENTRY0("External device config request", 0x0020),
975	FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976	FLAG_ENTRY0("LinkUp achieved", 0x0080),
977	FLAG_ENTRY0("Link going down", 0x0100),
978};
979
980
981static u32 encoded_size(u32 size);
982static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985			       u8 *continuous);
986static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989				      u8 *remote_tx_rate, u16 *link_widths);
990static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991				     u8 *flag_bits, u16 *link_widths);
992static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993				  u8 *device_rev);
994static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997			    u8 *tx_polarity_inversion,
998			    u8 *rx_polarity_inversion, u8 *max_rate);
999static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000				unsigned int context, u64 err_status);
1001static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002static void handle_dcc_err(struct hfi1_devdata *dd,
1003			   unsigned int context, u64 err_status);
1004static void handle_lcb_err(struct hfi1_devdata *dd,
1005			   unsigned int context, u64 err_status);
1006static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014static void set_partition_keys(struct hfi1_pportdata *);
1015static const char *link_state_name(u32 state);
1016static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017					  u32 state);
1018static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019			   u64 *out_data);
1020static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021static int thermal_init(struct hfi1_devdata *dd);
1022
1023static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024				  int msecs);
1025static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026static void handle_temp_err(struct hfi1_devdata *);
1027static void dc_shutdown(struct hfi1_devdata *);
1028static void dc_start(struct hfi1_devdata *);
1029
1030/*
1031 * Error interrupt table entry.  This is used as input to the interrupt
1032 * "clear down" routine used for all second tier error interrupt register.
1033 * Second tier interrupt registers have a single bit representing them
1034 * in the top-level CceIntStatus.
1035 */
1036struct err_reg_info {
1037	u32 status;		/* status CSR offset */
1038	u32 clear;		/* clear CSR offset */
1039	u32 mask;		/* mask CSR offset */
1040	void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041	const char *desc;
1042};
1043
1044#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047
1048/*
1049 * Helpers for building HFI and DC error interrupt table entries.  Different
1050 * helpers are needed because of inconsistent register names.
1051 */
1052#define EE(reg, handler, desc) \
1053	{ reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054		handler, desc }
1055#define DC_EE1(reg, handler, desc) \
1056	{ reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057#define DC_EE2(reg, handler, desc) \
1058	{ reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059
1060/*
1061 * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062 * another register containing more information.
1063 */
1064static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065/* 0*/	EE(CCE_ERR,		handle_cce_err,    "CceErr"),
1066/* 1*/	EE(RCV_ERR,		handle_rxe_err,    "RxeErr"),
1067/* 2*/	EE(MISC_ERR,	handle_misc_err,   "MiscErr"),
1068/* 3*/	{ 0, 0, 0, NULL }, /* reserved */
1069/* 4*/	EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070/* 5*/	EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071/* 6*/	EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072/* 7*/	EE(SEND_ERR,	handle_txe_err,    "TxeErr")
1073	/* the rest are reserved */
1074};
1075
1076/*
1077 * Index into the Various section of the interrupt sources
1078 * corresponding to the Critical Temperature interrupt.
1079 */
1080#define TCRIT_INT_SOURCE 4
1081
1082/*
1083 * SDMA error interrupt entry - refers to another register containing more
1084 * information.
1085 */
1086static const struct err_reg_info sdma_eng_err =
1087	EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088
1089static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090/* 0*/	{ 0, 0, 0, NULL }, /* PbcInt */
1091/* 1*/	{ 0, 0, 0, NULL }, /* GpioAssertInt */
1092/* 2*/	EE(ASIC_QSFP1,	handle_qsfp_int,	"QSFP1"),
1093/* 3*/	EE(ASIC_QSFP2,	handle_qsfp_int,	"QSFP2"),
1094/* 4*/	{ 0, 0, 0, NULL }, /* TCritInt */
1095	/* rest are reserved */
1096};
1097
1098/*
1099 * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100 * register can not be derived from the MTU value because 10K is not
1101 * a power of 2. Therefore, we need a constant. Everything else can
1102 * be calculated.
1103 */
1104#define DCC_CFG_PORT_MTU_CAP_10240 7
1105
1106/*
1107 * Table of the DC grouping of error interrupts.  Each entry refers to
1108 * another register containing more information.
1109 */
1110static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111/* 0*/	DC_EE1(DCC_ERR,		handle_dcc_err,	       "DCC Err"),
1112/* 1*/	DC_EE2(DC_LCB_ERR,	handle_lcb_err,	       "LCB Err"),
1113/* 2*/	DC_EE2(DC_DC8051_ERR,	handle_8051_interrupt, "DC8051 Interrupt"),
1114/* 3*/	/* dc_lbm_int - special, see is_dc_int() */
1115	/* the rest are reserved */
1116};
1117
1118struct cntr_entry {
1119	/*
1120	 * counter name
1121	 */
1122	char *name;
1123
1124	/*
1125	 * csr to read for name (if applicable)
1126	 */
1127	u64 csr;
1128
1129	/*
1130	 * offset into dd or ppd to store the counter's value
1131	 */
1132	int offset;
1133
1134	/*
1135	 * flags
1136	 */
1137	u8 flags;
1138
1139	/*
1140	 * accessor for stat element, context either dd or ppd
1141	 */
1142	u64 (*rw_cntr)(const struct cntr_entry *,
1143			       void *context,
1144			       int vl,
1145			       int mode,
1146			       u64 data);
1147};
1148
1149#define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150#define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151
1152#define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153{ \
1154	name, \
1155	csr, \
1156	offset, \
1157	flags, \
1158	accessor \
1159}
1160
1161/* 32bit RXE */
1162#define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163CNTR_ELEM(#name, \
1164	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1165	  0, flags | CNTR_32BIT, \
1166	  port_access_u32_csr)
1167
1168#define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169CNTR_ELEM(#name, \
1170	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1171	  0, flags | CNTR_32BIT, \
1172	  dev_access_u32_csr)
1173
1174/* 64bit RXE */
1175#define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176CNTR_ELEM(#name, \
1177	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1178	  0, flags, \
1179	  port_access_u64_csr)
1180
1181#define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182CNTR_ELEM(#name, \
1183	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1184	  0, flags, \
1185	  dev_access_u64_csr)
1186
1187#define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188#define OVR_ELM(ctx) \
1189CNTR_ELEM("RcvHdrOvr" #ctx, \
1190	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191	  0, CNTR_NORMAL, port_access_u64_csr)
1192
1193/* 32bit TXE */
1194#define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195CNTR_ELEM(#name, \
1196	  (counter * 8 + SEND_COUNTER_ARRAY32), \
1197	  0, flags | CNTR_32BIT, \
1198	  port_access_u32_csr)
1199
1200/* 64bit TXE */
1201#define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202CNTR_ELEM(#name, \
1203	  (counter * 8 + SEND_COUNTER_ARRAY64), \
1204	  0, flags, \
1205	  port_access_u64_csr)
1206
1207# define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208CNTR_ELEM(#name,\
1209	  counter * 8 + SEND_COUNTER_ARRAY64, \
1210	  0, \
1211	  flags, \
1212	  dev_access_u64_csr)
1213
1214/* CCE */
1215#define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216CNTR_ELEM(#name, \
1217	  (counter * 8 + CCE_COUNTER_ARRAY32), \
1218	  0, flags | CNTR_32BIT, \
1219	  dev_access_u32_csr)
1220
1221#define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222CNTR_ELEM(#name, \
1223	  (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224	  0, flags | CNTR_32BIT, \
1225	  dev_access_u32_csr)
1226
1227/* DC */
1228#define DC_PERF_CNTR(name, counter, flags) \
1229CNTR_ELEM(#name, \
1230	  counter, \
1231	  0, \
1232	  flags, \
1233	  dev_access_u64_csr)
1234
1235#define DC_PERF_CNTR_LCB(name, counter, flags) \
1236CNTR_ELEM(#name, \
1237	  counter, \
1238	  0, \
1239	  flags, \
1240	  dc_access_lcb_cntr)
1241
1242/* ibp counters */
1243#define SW_IBP_CNTR(name, cntr) \
1244CNTR_ELEM(#name, \
1245	  0, \
1246	  0, \
1247	  CNTR_SYNTH, \
1248	  access_ibp_##cntr)
1249
1250u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251{
1252	u64 val;
1253
1254	if (dd->flags & HFI1_PRESENT) {
1255		val = readq((void __iomem *)dd->kregbase + offset);
1256		return val;
1257	}
1258	return -1;
1259}
1260
1261void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262{
1263	if (dd->flags & HFI1_PRESENT)
1264		writeq(value, (void __iomem *)dd->kregbase + offset);
1265}
1266
1267void __iomem *get_csr_addr(
1268	struct hfi1_devdata *dd,
1269	u32 offset)
1270{
1271	return (void __iomem *)dd->kregbase + offset;
1272}
1273
1274static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275				 int mode, u64 value)
1276{
1277	u64 ret;
1278
1279
1280	if (mode == CNTR_MODE_R) {
1281		ret = read_csr(dd, csr);
1282	} else if (mode == CNTR_MODE_W) {
1283		write_csr(dd, csr, value);
1284		ret = value;
1285	} else {
1286		dd_dev_err(dd, "Invalid cntr register access mode");
1287		return 0;
1288	}
1289
1290	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291	return ret;
1292}
1293
1294/* Dev Access */
1295static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296			    void *context, int vl, int mode, u64 data)
1297{
1298	struct hfi1_devdata *dd = context;
1299
1300	if (vl != CNTR_INVALID_VL)
1301		return 0;
1302	return read_write_csr(dd, entry->csr, mode, data);
1303}
1304
1305static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306			    int vl, int mode, u64 data)
1307{
1308	struct hfi1_devdata *dd = context;
1309
1310	u64 val = 0;
1311	u64 csr = entry->csr;
1312
1313	if (entry->flags & CNTR_VL) {
1314		if (vl == CNTR_INVALID_VL)
1315			return 0;
1316		csr += 8 * vl;
1317	} else {
1318		if (vl != CNTR_INVALID_VL)
1319			return 0;
1320	}
1321
1322	val = read_write_csr(dd, csr, mode, data);
1323	return val;
1324}
1325
1326static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327			    int vl, int mode, u64 data)
1328{
1329	struct hfi1_devdata *dd = context;
1330	u32 csr = entry->csr;
1331	int ret = 0;
1332
1333	if (vl != CNTR_INVALID_VL)
1334		return 0;
1335	if (mode == CNTR_MODE_R)
1336		ret = read_lcb_csr(dd, csr, &data);
1337	else if (mode == CNTR_MODE_W)
1338		ret = write_lcb_csr(dd, csr, data);
1339
1340	if (ret) {
1341		dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342		return 0;
1343	}
1344
1345	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346	return data;
1347}
1348
1349/* Port Access */
1350static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351			     int vl, int mode, u64 data)
1352{
1353	struct hfi1_pportdata *ppd = context;
1354
1355	if (vl != CNTR_INVALID_VL)
1356		return 0;
1357	return read_write_csr(ppd->dd, entry->csr, mode, data);
1358}
1359
1360static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361			     void *context, int vl, int mode, u64 data)
1362{
1363	struct hfi1_pportdata *ppd = context;
1364	u64 val;
1365	u64 csr = entry->csr;
1366
1367	if (entry->flags & CNTR_VL) {
1368		if (vl == CNTR_INVALID_VL)
1369			return 0;
1370		csr += 8 * vl;
1371	} else {
1372		if (vl != CNTR_INVALID_VL)
1373			return 0;
1374	}
1375	val = read_write_csr(ppd->dd, csr, mode, data);
1376	return val;
1377}
1378
1379/* Software defined */
1380static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381				u64 data)
1382{
1383	u64 ret;
1384
1385	if (mode == CNTR_MODE_R) {
1386		ret = *cntr;
1387	} else if (mode == CNTR_MODE_W) {
1388		*cntr = data;
1389		ret = data;
1390	} else {
1391		dd_dev_err(dd, "Invalid cntr sw access mode");
1392		return 0;
1393	}
1394
1395	hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396
1397	return ret;
1398}
1399
1400static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401			       int vl, int mode, u64 data)
1402{
1403	struct hfi1_pportdata *ppd = context;
1404
1405	if (vl != CNTR_INVALID_VL)
1406		return 0;
1407	return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408}
1409
1410static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411			       int vl, int mode, u64 data)
1412{
1413	struct hfi1_pportdata *ppd = context;
1414
1415	if (vl != CNTR_INVALID_VL)
1416		return 0;
1417	return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418}
1419
1420static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421				    void *context, int vl, int mode, u64 data)
1422{
1423	struct hfi1_pportdata *ppd = context;
1424
1425	if (vl != CNTR_INVALID_VL)
1426		return 0;
1427
1428	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429}
1430
1431static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432				     void *context, int vl, int mode, u64 data)
1433{
1434	struct hfi1_pportdata *ppd = context;
1435
1436	if (vl != CNTR_INVALID_VL)
1437		return 0;
1438
1439	return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440			     mode, data);
1441}
1442
1443static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444				     void *context, int vl, int mode, u64 data)
1445{
1446	struct hfi1_pportdata *ppd = context;
1447
1448	if (vl != CNTR_INVALID_VL)
1449		return 0;
1450
1451	return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452			     mode, data);
1453}
1454
1455u64 get_all_cpu_total(u64 __percpu *cntr)
1456{
1457	int cpu;
1458	u64 counter = 0;
1459
1460	for_each_possible_cpu(cpu)
1461		counter += *per_cpu_ptr(cntr, cpu);
1462	return counter;
1463}
1464
1465static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466			  u64 __percpu *cntr,
1467			  int vl, int mode, u64 data)
1468{
1469
1470	u64 ret = 0;
1471
1472	if (vl != CNTR_INVALID_VL)
1473		return 0;
1474
1475	if (mode == CNTR_MODE_R) {
1476		ret = get_all_cpu_total(cntr) - *z_val;
1477	} else if (mode == CNTR_MODE_W) {
1478		/* A write can only zero the counter */
1479		if (data == 0)
1480			*z_val = get_all_cpu_total(cntr);
1481		else
1482			dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483	} else {
1484		dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485		return 0;
1486	}
1487
1488	return ret;
1489}
1490
1491static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492			      void *context, int vl, int mode, u64 data)
1493{
1494	struct hfi1_devdata *dd = context;
1495
1496	return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497			      mode, data);
1498}
1499
1500static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501			      void *context, int vl, int mode, u64 data)
1502{
1503	struct hfi1_devdata *dd = context;
1504
1505	return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506			      mode, data);
1507}
1508
1509static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510			      void *context, int vl, int mode, u64 data)
1511{
1512	struct hfi1_devdata *dd = context;
1513
1514	return dd->verbs_dev.n_piowait;
1515}
1516
1517static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518			      void *context, int vl, int mode, u64 data)
1519{
1520	struct hfi1_devdata *dd = context;
1521
1522	return dd->verbs_dev.n_txwait;
1523}
1524
1525static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526			       void *context, int vl, int mode, u64 data)
1527{
1528	struct hfi1_devdata *dd = context;
1529
1530	return dd->verbs_dev.n_kmem_wait;
1531}
1532
1533static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534			       void *context, int vl, int mode, u64 data)
1535{
1536	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1537
1538	return dd->verbs_dev.n_send_schedule;
1539}
1540
1541#define def_access_sw_cpu(cntr) \
1542static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,		      \
1543			      void *context, int vl, int mode, u64 data)      \
1544{									      \
1545	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1546	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
1547			      ppd->ibport_data.cntr, vl,		      \
1548			      mode, data);				      \
1549}
1550
1551def_access_sw_cpu(rc_acks);
1552def_access_sw_cpu(rc_qacks);
1553def_access_sw_cpu(rc_delayed_comp);
1554
1555#define def_access_ibp_counter(cntr) \
1556static u64 access_ibp_##cntr(const struct cntr_entry *entry,		      \
1557				void *context, int vl, int mode, u64 data)    \
1558{									      \
1559	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1560									      \
1561	if (vl != CNTR_INVALID_VL)					      \
1562		return 0;						      \
1563									      \
1564	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
1565			     mode, data);				      \
1566}
1567
1568def_access_ibp_counter(loop_pkts);
1569def_access_ibp_counter(rc_resends);
1570def_access_ibp_counter(rnr_naks);
1571def_access_ibp_counter(other_naks);
1572def_access_ibp_counter(rc_timeouts);
1573def_access_ibp_counter(pkt_drops);
1574def_access_ibp_counter(dmawait);
1575def_access_ibp_counter(rc_seqnak);
1576def_access_ibp_counter(rc_dupreq);
1577def_access_ibp_counter(rdma_seq);
1578def_access_ibp_counter(unaligned);
1579def_access_ibp_counter(seq_naks);
1580
1581static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1584			CNTR_NORMAL),
1585[C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1586			CNTR_NORMAL),
1587[C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588			RCV_TID_FLOW_GEN_MISMATCH_CNT,
1589			CNTR_NORMAL),
1590[C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1591			CNTR_NORMAL),
1592[C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1593			CNTR_NORMAL),
1594[C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595			RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596[C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597			CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598[C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1599			CNTR_NORMAL),
1600[C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1601			CNTR_NORMAL),
1602[C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1603			CNTR_NORMAL),
1604[C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1605			CNTR_NORMAL),
1606[C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1607			CNTR_NORMAL),
1608[C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1609			CNTR_NORMAL),
1610[C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611			CCE_RCV_URGENT_INT_CNT,	CNTR_NORMAL),
1612[C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613			CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614[C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1615			      CNTR_SYNTH),
1616[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617[C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1618				 CNTR_SYNTH),
1619[C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1620				  CNTR_SYNTH),
1621[C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1622				  CNTR_SYNTH),
1623[C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624				   DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625[C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626				  DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1627				  CNTR_SYNTH),
1628[C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629				DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630[C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1631			       CNTR_SYNTH),
1632[C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1633			      CNTR_SYNTH),
1634[C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1635			       CNTR_SYNTH),
1636[C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1637				 CNTR_SYNTH),
1638[C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1639				CNTR_SYNTH),
1640[C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1641				CNTR_SYNTH),
1642[C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1643			       CNTR_SYNTH),
1644[C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645				 CNTR_SYNTH | CNTR_VL),
1646[C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647				CNTR_SYNTH | CNTR_VL),
1648[C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649[C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650				 CNTR_SYNTH | CNTR_VL),
1651[C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652[C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653				 CNTR_SYNTH | CNTR_VL),
1654[C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1655			      CNTR_SYNTH),
1656[C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657				 CNTR_SYNTH | CNTR_VL),
1658[C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1659				CNTR_SYNTH),
1660[C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661				   CNTR_SYNTH | CNTR_VL),
1662[C_DC_TOTAL_CRC] =
1663	DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1664			 CNTR_SYNTH),
1665[C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1666				  CNTR_SYNTH),
1667[C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1668				  CNTR_SYNTH),
1669[C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1670				  CNTR_SYNTH),
1671[C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1672				  CNTR_SYNTH),
1673[C_DC_CRC_MULT_LN] =
1674	DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1675			 CNTR_SYNTH),
1676[C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1677				    CNTR_SYNTH),
1678[C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1679				    CNTR_SYNTH),
1680[C_DC_SEQ_CRC_CNT] =
1681	DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1682			 CNTR_SYNTH),
1683[C_DC_ESC0_ONLY_CNT] =
1684	DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1685			 CNTR_SYNTH),
1686[C_DC_ESC0_PLUS1_CNT] =
1687	DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1688			 CNTR_SYNTH),
1689[C_DC_ESC0_PLUS2_CNT] =
1690	DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1691			 CNTR_SYNTH),
1692[C_DC_REINIT_FROM_PEER_CNT] =
1693	DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1694			 CNTR_SYNTH),
1695[C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1696				  CNTR_SYNTH),
1697[C_DC_MISC_FLG_CNT] =
1698	DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1699			 CNTR_SYNTH),
1700[C_DC_PRF_GOOD_LTP_CNT] =
1701	DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702[C_DC_PRF_ACCEPTED_LTP_CNT] =
1703	DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1704			 CNTR_SYNTH),
1705[C_DC_PRF_RX_FLIT_CNT] =
1706	DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707[C_DC_PRF_TX_FLIT_CNT] =
1708	DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709[C_DC_PRF_CLK_CNTR] =
1710	DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711[C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712	DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713[C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714	DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1715			 CNTR_SYNTH),
1716[C_DC_PG_STS_TX_SBE_CNT] =
1717	DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718[C_DC_PG_STS_TX_MBE_CNT] =
1719	DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1720			 CNTR_SYNTH),
1721[C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722			    access_sw_cpu_intr),
1723[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724			    access_sw_cpu_rcv_limit),
1725[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726			    access_sw_vtx_wait),
1727[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728			    access_sw_pio_wait),
1729[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730			    access_sw_kmem_wait),
1731[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732			    access_sw_send_schedule),
1733};
1734
1735static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736[C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1737			CNTR_NORMAL),
1738[C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1739			CNTR_NORMAL),
1740[C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1741			CNTR_NORMAL),
1742[C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1743			CNTR_NORMAL),
1744[C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1745			CNTR_NORMAL),
1746[C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1747			CNTR_NORMAL),
1748[C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1749			CNTR_NORMAL),
1750[C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751[C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752[C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753[C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754			CNTR_SYNTH | CNTR_VL),
1755[C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756			CNTR_SYNTH | CNTR_VL),
1757[C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758			CNTR_SYNTH | CNTR_VL),
1759[C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760[C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761[C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762			access_sw_link_dn_cnt),
1763[C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764			access_sw_link_up_cnt),
1765[C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766			access_sw_xmit_discards),
1767[C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769			access_sw_xmit_discards),
1770[C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771			access_xmit_constraint_errs),
1772[C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773			access_rcv_constraint_errs),
1774[C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775[C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776[C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777[C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778[C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779[C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780[C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781[C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782[C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787			       access_sw_cpu_rc_acks),
1788[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789			       access_sw_cpu_rc_qacks),
1790[C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791			       access_sw_cpu_rc_delayed_comp),
1792[OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793[OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794[OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795[OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796[OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797[OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798[OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799[OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800[OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801[OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802[OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803[OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804[OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805[OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806[OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807[OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808[OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809[OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810[OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811[OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812[OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813[OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814[OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815[OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816[OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817[OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818[OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819[OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820[OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821[OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822[OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823[OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824[OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825[OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826[OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827[OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828[OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829[OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830[OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831[OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832[OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833[OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834[OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835[OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836[OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837[OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838[OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839[OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840[OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841[OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842[OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843[OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844[OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845[OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846[OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847[OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848[OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849[OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850[OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851[OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852[OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853[OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854[OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855[OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856[OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857[OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858[OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859[OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860[OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861[OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862[OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863[OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864[OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865[OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866[OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867[OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868[OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869[OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870[OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871[OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1872};
1873
1874/* ======================================================================== */
1875
1876/* return true if this is chip revision revision a0 */
1877int is_a0(struct hfi1_devdata *dd)
1878{
1879	return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880			& CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1881}
1882
1883/* return true if this is chip revision revision a */
1884int is_ax(struct hfi1_devdata *dd)
1885{
1886	u8 chip_rev_minor =
1887		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1889	return (chip_rev_minor & 0xf0) == 0;
1890}
1891
1892/* return true if this is chip revision revision b */
1893int is_bx(struct hfi1_devdata *dd)
1894{
1895	u8 chip_rev_minor =
1896		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1898	return !!(chip_rev_minor & 0x10);
1899}
1900
1901/*
1902 * Append string s to buffer buf.  Arguments curp and len are the current
1903 * position and remaining length, respectively.
1904 *
1905 * return 0 on success, 1 on out of room
1906 */
1907static int append_str(char *buf, char **curp, int *lenp, const char *s)
1908{
1909	char *p = *curp;
1910	int len = *lenp;
1911	int result = 0; /* success */
1912	char c;
1913
1914	/* add a comma, if first in the buffer */
1915	if (p != buf) {
1916		if (len == 0) {
1917			result = 1; /* out of room */
1918			goto done;
1919		}
1920		*p++ = ',';
1921		len--;
1922	}
1923
1924	/* copy the string */
1925	while ((c = *s++) != 0) {
1926		if (len == 0) {
1927			result = 1; /* out of room */
1928			goto done;
1929		}
1930		*p++ = c;
1931		len--;
1932	}
1933
1934done:
1935	/* write return values */
1936	*curp = p;
1937	*lenp = len;
1938
1939	return result;
1940}
1941
1942/*
1943 * Using the given flag table, print a comma separated string into
1944 * the buffer.  End in '*' if the buffer is too short.
1945 */
1946static char *flag_string(char *buf, int buf_len, u64 flags,
1947				struct flag_table *table, int table_size)
1948{
1949	char extra[32];
1950	char *p = buf;
1951	int len = buf_len;
1952	int no_room = 0;
1953	int i;
1954
1955	/* make sure there is at least 2 so we can form "*" */
1956	if (len < 2)
1957		return "";
1958
1959	len--;	/* leave room for a nul */
1960	for (i = 0; i < table_size; i++) {
1961		if (flags & table[i].flag) {
1962			no_room = append_str(buf, &p, &len, table[i].str);
1963			if (no_room)
1964				break;
1965			flags &= ~table[i].flag;
1966		}
1967	}
1968
1969	/* any undocumented bits left? */
1970	if (!no_room && flags) {
1971		snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972		no_room = append_str(buf, &p, &len, extra);
1973	}
1974
1975	/* add * if ran out of room */
1976	if (no_room) {
1977		/* may need to back up to add space for a '*' */
1978		if (len == 0)
1979			--p;
1980		*p++ = '*';
1981	}
1982
1983	/* add final nul - space already allocated above */
1984	*p = 0;
1985	return buf;
1986}
1987
1988/* first 8 CCE error interrupt source names */
1989static const char * const cce_misc_names[] = {
1990	"CceErrInt",		/* 0 */
1991	"RxeErrInt",		/* 1 */
1992	"MiscErrInt",		/* 2 */
1993	"Reserved3",		/* 3 */
1994	"PioErrInt",		/* 4 */
1995	"SDmaErrInt",		/* 5 */
1996	"EgressErrInt",		/* 6 */
1997	"TxeErrInt"		/* 7 */
1998};
1999
2000/*
2001 * Return the miscellaneous error interrupt name.
2002 */
2003static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2004{
2005	if (source < ARRAY_SIZE(cce_misc_names))
2006		strncpy(buf, cce_misc_names[source], bsize);
2007	else
2008		snprintf(buf,
2009			bsize,
2010			"Reserved%u",
2011			source + IS_GENERAL_ERR_START);
2012
2013	return buf;
2014}
2015
2016/*
2017 * Return the SDMA engine error interrupt name.
2018 */
2019static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2020{
2021	snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2022	return buf;
2023}
2024
2025/*
2026 * Return the send context error interrupt name.
2027 */
2028static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2029{
2030	snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2031	return buf;
2032}
2033
2034static const char * const various_names[] = {
2035	"PbcInt",
2036	"GpioAssertInt",
2037	"Qsfp1Int",
2038	"Qsfp2Int",
2039	"TCritInt"
2040};
2041
2042/*
2043 * Return the various interrupt name.
2044 */
2045static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2046{
2047	if (source < ARRAY_SIZE(various_names))
2048		strncpy(buf, various_names[source], bsize);
2049	else
2050		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2051	return buf;
2052}
2053
2054/*
2055 * Return the DC interrupt name.
2056 */
2057static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2058{
2059	static const char * const dc_int_names[] = {
2060		"common",
2061		"lcb",
2062		"8051",
2063		"lbm"	/* local block merge */
2064	};
2065
2066	if (source < ARRAY_SIZE(dc_int_names))
2067		snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2068	else
2069		snprintf(buf, bsize, "DCInt%u", source);
2070	return buf;
2071}
2072
2073static const char * const sdma_int_names[] = {
2074	"SDmaInt",
2075	"SdmaIdleInt",
2076	"SdmaProgressInt",
2077};
2078
2079/*
2080 * Return the SDMA engine interrupt name.
2081 */
2082static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2083{
2084	/* what interrupt */
2085	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2086	/* which engine */
2087	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2088
2089	if (likely(what < 3))
2090		snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2091	else
2092		snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2093	return buf;
2094}
2095
2096/*
2097 * Return the receive available interrupt name.
2098 */
2099static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2100{
2101	snprintf(buf, bsize, "RcvAvailInt%u", source);
2102	return buf;
2103}
2104
2105/*
2106 * Return the receive urgent interrupt name.
2107 */
2108static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2109{
2110	snprintf(buf, bsize, "RcvUrgentInt%u", source);
2111	return buf;
2112}
2113
2114/*
2115 * Return the send credit interrupt name.
2116 */
2117static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2118{
2119	snprintf(buf, bsize, "SendCreditInt%u", source);
2120	return buf;
2121}
2122
2123/*
2124 * Return the reserved interrupt name.
2125 */
2126static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2127{
2128	snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2129	return buf;
2130}
2131
2132static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2133{
2134	return flag_string(buf, buf_len, flags,
2135			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2136}
2137
2138static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2139{
2140	return flag_string(buf, buf_len, flags,
2141			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2142}
2143
2144static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2145{
2146	return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147			ARRAY_SIZE(misc_err_status_flags));
2148}
2149
2150static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2151{
2152	return flag_string(buf, buf_len, flags,
2153			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2154}
2155
2156static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2157{
2158	return flag_string(buf, buf_len, flags,
2159			sdma_err_status_flags,
2160			ARRAY_SIZE(sdma_err_status_flags));
2161}
2162
2163static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2164{
2165	return flag_string(buf, buf_len, flags,
2166		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2167}
2168
2169static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2170{
2171	return flag_string(buf, buf_len, flags,
2172		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2173}
2174
2175static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2176{
2177	return flag_string(buf, buf_len, flags,
2178			send_err_status_flags,
2179			ARRAY_SIZE(send_err_status_flags));
2180}
2181
2182static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2183{
2184	char buf[96];
2185
2186	/*
2187	 * For most these errors, there is nothing that can be done except
2188	 * report or record it.
2189	 */
2190	dd_dev_info(dd, "CCE Error: %s\n",
2191		cce_err_status_string(buf, sizeof(buf), reg));
2192
2193	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2194			&& is_a0(dd)
2195			&& (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196		/* this error requires a manual drop into SPC freeze mode */
2197		/* then a fix up */
2198		start_freeze_handling(dd->pport, FREEZE_SELF);
2199	}
2200}
2201
2202/*
2203 * Check counters for receive errors that do not have an interrupt
2204 * associated with them.
2205 */
2206#define RCVERR_CHECK_TIME 10
2207static void update_rcverr_timer(unsigned long opaque)
2208{
2209	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210	struct hfi1_pportdata *ppd = dd->pport;
2211	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2212
2213	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216		set_link_down_reason(ppd,
2217		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2220	}
2221	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2222
2223	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224}
2225
2226static int init_rcverr(struct hfi1_devdata *dd)
2227{
2228	setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229	/* Assume the hardware counter has been reset */
2230	dd->rcv_ovfl_cnt = 0;
2231	return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2232}
2233
2234static void free_rcverr(struct hfi1_devdata *dd)
2235{
2236	if (dd->rcverr_timer.data)
2237		del_timer_sync(&dd->rcverr_timer);
2238	dd->rcverr_timer.data = 0;
2239}
2240
2241static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2242{
2243	char buf[96];
2244
2245	dd_dev_info(dd, "Receive Error: %s\n",
2246		rxe_err_status_string(buf, sizeof(buf), reg));
2247
2248	if (reg & ALL_RXE_FREEZE_ERR) {
2249		int flags = 0;
2250
2251		/*
2252		 * Freeze mode recovery is disabled for the errors
2253		 * in RXE_FREEZE_ABORT_MASK
2254		 */
2255		if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256			flags = FREEZE_ABORT;
2257
2258		start_freeze_handling(dd->pport, flags);
2259	}
2260}
2261
2262static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263{
2264	char buf[96];
2265
2266	dd_dev_info(dd, "Misc Error: %s",
2267		misc_err_status_string(buf, sizeof(buf), reg));
2268}
2269
2270static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2271{
2272	char buf[96];
2273
2274	dd_dev_info(dd, "PIO Error: %s\n",
2275		pio_err_status_string(buf, sizeof(buf), reg));
2276
2277	if (reg & ALL_PIO_FREEZE_ERR)
2278		start_freeze_handling(dd->pport, 0);
2279}
2280
2281static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2282{
2283	char buf[96];
2284
2285	dd_dev_info(dd, "SDMA Error: %s\n",
2286		sdma_err_status_string(buf, sizeof(buf), reg));
2287
2288	if (reg & ALL_SDMA_FREEZE_ERR)
2289		start_freeze_handling(dd->pport, 0);
2290}
2291
2292static void count_port_inactive(struct hfi1_devdata *dd)
2293{
2294	struct hfi1_pportdata *ppd = dd->pport;
2295
2296	if (ppd->port_xmit_discards < ~(u64)0)
2297		ppd->port_xmit_discards++;
2298}
2299
2300/*
2301 * We have had a "disallowed packet" error during egress. Determine the
2302 * integrity check which failed, and update relevant error counter, etc.
2303 *
2304 * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305 * bit of state per integrity check, and so we can miss the reason for an
2306 * egress error if more than one packet fails the same integrity check
2307 * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2308 */
2309static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2310{
2311	struct hfi1_pportdata *ppd = dd->pport;
2312	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313	u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2314	char buf[96];
2315
2316	/* clear down all observed info as quickly as possible after read */
2317	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2318
2319	dd_dev_info(dd,
2320		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321		info, egress_err_info_string(buf, sizeof(buf), info), src);
2322
2323	/* Eventually add other counters for each bit */
2324
2325	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326		if (ppd->port_xmit_discards < ~(u64)0)
2327			ppd->port_xmit_discards++;
2328	}
2329}
2330
2331/*
2332 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333 * register. Does it represent a 'port inactive' error?
2334 */
2335static inline int port_inactive_err(u64 posn)
2336{
2337	return (posn >= SEES(TX_LINKDOWN) &&
2338		posn <= SEES(TX_INCORRECT_LINK_STATE));
2339}
2340
2341/*
2342 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343 * register. Does it represent a 'disallowed packet' error?
2344 */
2345static inline int disallowed_pkt_err(u64 posn)
2346{
2347	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2349}
2350
2351static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2352{
2353	u64 reg_copy = reg, handled = 0;
2354	char buf[96];
2355
2356	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357		start_freeze_handling(dd->pport, 0);
2358	if (is_a0(dd) && (reg &
2359		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361		start_freeze_handling(dd->pport, 0);
2362
2363	while (reg_copy) {
2364		int posn = fls64(reg_copy);
2365		/*
2366		 * fls64() returns a 1-based offset, but we generally
2367		 * want 0-based offsets.
2368		 */
2369		int shift = posn - 1;
2370
2371		if (port_inactive_err(shift)) {
2372			count_port_inactive(dd);
2373			handled |= (1ULL << shift);
2374		} else if (disallowed_pkt_err(shift)) {
2375			handle_send_egress_err_info(dd);
2376			handled |= (1ULL << shift);
2377		}
2378		clear_bit(shift, (unsigned long *)&reg_copy);
2379	}
2380
2381	reg &= ~handled;
2382
2383	if (reg)
2384		dd_dev_info(dd, "Egress Error: %s\n",
2385			egress_err_status_string(buf, sizeof(buf), reg));
2386}
2387
2388static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2389{
2390	char buf[96];
2391
2392	dd_dev_info(dd, "Send Error: %s\n",
2393		send_err_status_string(buf, sizeof(buf), reg));
2394
2395}
2396
2397/*
2398 * The maximum number of times the error clear down will loop before
2399 * blocking a repeating error.  This value is arbitrary.
2400 */
2401#define MAX_CLEAR_COUNT 20
2402
2403/*
2404 * Clear and handle an error register.  All error interrupts are funneled
2405 * through here to have a central location to correctly handle single-
2406 * or multi-shot errors.
2407 *
2408 * For non per-context registers, call this routine with a context value
2409 * of 0 so the per-context offset is zero.
2410 *
2411 * If the handler loops too many times, assume that something is wrong
2412 * and can't be fixed, so mask the error bits.
2413 */
2414static void interrupt_clear_down(struct hfi1_devdata *dd,
2415				 u32 context,
2416				 const struct err_reg_info *eri)
2417{
2418	u64 reg;
2419	u32 count;
2420
2421	/* read in a loop until no more errors are seen */
2422	count = 0;
2423	while (1) {
2424		reg = read_kctxt_csr(dd, context, eri->status);
2425		if (reg == 0)
2426			break;
2427		write_kctxt_csr(dd, context, eri->clear, reg);
2428		if (likely(eri->handler))
2429			eri->handler(dd, context, reg);
2430		count++;
2431		if (count > MAX_CLEAR_COUNT) {
2432			u64 mask;
2433
2434			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2435				eri->desc, reg);
2436			/*
2437			 * Read-modify-write so any other masked bits
2438			 * remain masked.
2439			 */
2440			mask = read_kctxt_csr(dd, context, eri->mask);
2441			mask &= ~reg;
2442			write_kctxt_csr(dd, context, eri->mask, mask);
2443			break;
2444		}
2445	}
2446}
2447
2448/*
2449 * CCE block "misc" interrupt.  Source is < 16.
2450 */
2451static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2452{
2453	const struct err_reg_info *eri = &misc_errs[source];
2454
2455	if (eri->handler) {
2456		interrupt_clear_down(dd, 0, eri);
2457	} else {
2458		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2459			source);
2460	}
2461}
2462
2463static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2464{
2465	return flag_string(buf, buf_len, flags,
2466			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2467}
2468
2469/*
2470 * Send context error interrupt.  Source (hw_context) is < 160.
2471 *
2472 * All send context errors cause the send context to halt.  The normal
2473 * clear-down mechanism cannot be used because we cannot clear the
2474 * error bits until several other long-running items are done first.
2475 * This is OK because with the context halted, nothing else is going
2476 * to happen on it anyway.
2477 */
2478static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479				unsigned int hw_context)
2480{
2481	struct send_context_info *sci;
2482	struct send_context *sc;
2483	char flags[96];
2484	u64 status;
2485	u32 sw_index;
2486
2487	sw_index = dd->hw_to_sw[hw_context];
2488	if (sw_index >= dd->num_send_contexts) {
2489		dd_dev_err(dd,
2490			"out of range sw index %u for send context %u\n",
2491			sw_index, hw_context);
2492		return;
2493	}
2494	sci = &dd->send_contexts[sw_index];
2495	sc = sci->sc;
2496	if (!sc) {
2497		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498			sw_index, hw_context);
2499		return;
2500	}
2501
2502	/* tell the software that a halt has begun */
2503	sc_stop(sc, SCF_HALTED);
2504
2505	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2506
2507	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508		send_context_err_status_string(flags, sizeof(flags), status));
2509
2510	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511		handle_send_egress_err_info(dd);
2512
2513	/*
2514	 * Automatically restart halted kernel contexts out of interrupt
2515	 * context.  User contexts must ask the driver to restart the context.
2516	 */
2517	if (sc->type != SC_USER)
2518		queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2519}
2520
2521static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522				unsigned int source, u64 status)
2523{
2524	struct sdma_engine *sde;
2525
2526	sde = &dd->per_sdma[source];
2527#ifdef CONFIG_SDMA_VERBOSITY
2528	dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529		   slashstrip(__FILE__), __LINE__, __func__);
2530	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531		   sde->this_idx, source, (unsigned long long)status);
2532#endif
2533	sdma_engine_error(sde, status);
2534}
2535
2536/*
2537 * CCE block SDMA error interrupt.  Source is < 16.
2538 */
2539static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2540{
2541#ifdef CONFIG_SDMA_VERBOSITY
2542	struct sdma_engine *sde = &dd->per_sdma[source];
2543
2544	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545		   slashstrip(__FILE__), __LINE__, __func__);
2546	dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2547		   source);
2548	sdma_dumpstate(sde);
2549#endif
2550	interrupt_clear_down(dd, source, &sdma_eng_err);
2551}
2552
2553/*
2554 * CCE block "various" interrupt.  Source is < 8.
2555 */
2556static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2557{
2558	const struct err_reg_info *eri = &various_err[source];
2559
2560	/*
2561	 * TCritInt cannot go through interrupt_clear_down()
2562	 * because it is not a second tier interrupt. The handler
2563	 * should be called directly.
2564	 */
2565	if (source == TCRIT_INT_SOURCE)
2566		handle_temp_err(dd);
2567	else if (eri->handler)
2568		interrupt_clear_down(dd, 0, eri);
2569	else
2570		dd_dev_info(dd,
2571			"%s: Unimplemented/reserved interrupt %d\n",
2572			__func__, source);
2573}
2574
2575static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2576{
2577	/* source is always zero */
2578	struct hfi1_pportdata *ppd = dd->pport;
2579	unsigned long flags;
2580	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2581
2582	if (reg & QSFP_HFI0_MODPRST_N) {
2583
2584		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2585				__func__);
2586
2587		if (!qsfp_mod_present(ppd)) {
2588			ppd->driver_link_ready = 0;
2589			/*
2590			 * Cable removed, reset all our information about the
2591			 * cache and cable capabilities
2592			 */
2593
2594			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2595			/*
2596			 * We don't set cache_refresh_required here as we expect
2597			 * an interrupt when a cable is inserted
2598			 */
2599			ppd->qsfp_info.cache_valid = 0;
2600			ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2602						flags);
2603			write_csr(dd,
2604					dd->hfi1_id ?
2605						ASIC_QSFP2_INVERT :
2606						ASIC_QSFP1_INVERT,
2607				qsfp_int_mgmt);
2608			if (ppd->host_link_state == HLS_DN_POLL) {
2609				/*
2610				 * The link is still in POLL. This means
2611				 * that the normal link down processing
2612				 * will not happen. We have to do it here
2613				 * before turning the DC off.
2614				 */
2615				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2616			}
2617		} else {
2618			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619			ppd->qsfp_info.cache_valid = 0;
2620			ppd->qsfp_info.cache_refresh_required = 1;
2621			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2622						flags);
2623
2624			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2625			write_csr(dd,
2626					dd->hfi1_id ?
2627						ASIC_QSFP2_INVERT :
2628						ASIC_QSFP1_INVERT,
2629				qsfp_int_mgmt);
2630		}
2631	}
2632
2633	if (reg & QSFP_HFI0_INT_N) {
2634
2635		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2636				__func__);
2637		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638		ppd->qsfp_info.check_interrupt_flags = 1;
2639		ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2641	}
2642
2643	/* Schedule the QSFP work only if there is a cable attached. */
2644	if (qsfp_mod_present(ppd))
2645		queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2646}
2647
2648static int request_host_lcb_access(struct hfi1_devdata *dd)
2649{
2650	int ret;
2651
2652	ret = do_8051_command(dd, HCMD_MISC,
2653		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2654		NULL);
2655	if (ret != HCMD_SUCCESS) {
2656		dd_dev_err(dd, "%s: command failed with error %d\n",
2657			__func__, ret);
2658	}
2659	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2660}
2661
2662static int request_8051_lcb_access(struct hfi1_devdata *dd)
2663{
2664	int ret;
2665
2666	ret = do_8051_command(dd, HCMD_MISC,
2667		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2668		NULL);
2669	if (ret != HCMD_SUCCESS) {
2670		dd_dev_err(dd, "%s: command failed with error %d\n",
2671			__func__, ret);
2672	}
2673	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2674}
2675
2676/*
2677 * Set the LCB selector - allow host access.  The DCC selector always
2678 * points to the host.
2679 */
2680static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2681{
2682	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2685}
2686
2687/*
2688 * Clear the LCB selector - allow 8051 access.  The DCC selector always
2689 * points to the host.
2690 */
2691static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2692{
2693	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2695}
2696
2697/*
2698 * Acquire LCB access from the 8051.  If the host already has access,
2699 * just increment a counter.  Otherwise, inform the 8051 that the
2700 * host is taking access.
2701 *
2702 * Returns:
2703 *	0 on success
2704 *	-EBUSY if the 8051 has control and cannot be disturbed
2705 *	-errno if unable to acquire access from the 8051
2706 */
2707int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2708{
2709	struct hfi1_pportdata *ppd = dd->pport;
2710	int ret = 0;
2711
2712	/*
2713	 * Use the host link state lock so the operation of this routine
2714	 * { link state check, selector change, count increment } can occur
2715	 * as a unit against a link state change.  Otherwise there is a
2716	 * race between the state change and the count increment.
2717	 */
2718	if (sleep_ok) {
2719		mutex_lock(&ppd->hls_lock);
2720	} else {
2721		while (!mutex_trylock(&ppd->hls_lock))
2722			udelay(1);
2723	}
2724
2725	/* this access is valid only when the link is up */
2726	if ((ppd->host_link_state & HLS_UP) == 0) {
2727		dd_dev_info(dd, "%s: link state %s not up\n",
2728			__func__, link_state_name(ppd->host_link_state));
2729		ret = -EBUSY;
2730		goto done;
2731	}
2732
2733	if (dd->lcb_access_count == 0) {
2734		ret = request_host_lcb_access(dd);
2735		if (ret) {
2736			dd_dev_err(dd,
2737				"%s: unable to acquire LCB access, err %d\n",
2738				__func__, ret);
2739			goto done;
2740		}
2741		set_host_lcb_access(dd);
2742	}
2743	dd->lcb_access_count++;
2744done:
2745	mutex_unlock(&ppd->hls_lock);
2746	return ret;
2747}
2748
2749/*
2750 * Release LCB access by decrementing the use count.  If the count is moving
2751 * from 1 to 0, inform 8051 that it has control back.
2752 *
2753 * Returns:
2754 *	0 on success
2755 *	-errno if unable to release access to the 8051
2756 */
2757int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2758{
2759	int ret = 0;
2760
2761	/*
2762	 * Use the host link state lock because the acquire needed it.
2763	 * Here, we only need to keep { selector change, count decrement }
2764	 * as a unit.
2765	 */
2766	if (sleep_ok) {
2767		mutex_lock(&dd->pport->hls_lock);
2768	} else {
2769		while (!mutex_trylock(&dd->pport->hls_lock))
2770			udelay(1);
2771	}
2772
2773	if (dd->lcb_access_count == 0) {
2774		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2775			__func__);
2776		goto done;
2777	}
2778
2779	if (dd->lcb_access_count == 1) {
2780		set_8051_lcb_access(dd);
2781		ret = request_8051_lcb_access(dd);
2782		if (ret) {
2783			dd_dev_err(dd,
2784				"%s: unable to release LCB access, err %d\n",
2785				__func__, ret);
2786			/* restore host access if the grant didn't work */
2787			set_host_lcb_access(dd);
2788			goto done;
2789		}
2790	}
2791	dd->lcb_access_count--;
2792done:
2793	mutex_unlock(&dd->pport->hls_lock);
2794	return ret;
2795}
2796
2797/*
2798 * Initialize LCB access variables and state.  Called during driver load,
2799 * after most of the initialization is finished.
2800 *
2801 * The DC default is LCB access on for the host.  The driver defaults to
2802 * leaving access to the 8051.  Assign access now - this constrains the call
2803 * to this routine to be after all LCB set-up is done.  In particular, after
2804 * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2805 */
2806static void init_lcb_access(struct hfi1_devdata *dd)
2807{
2808	dd->lcb_access_count = 0;
2809}
2810
2811/*
2812 * Write a response back to a 8051 request.
2813 */
2814static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2815{
2816	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2820}
2821
2822/*
2823 * Handle requests from the 8051.
2824 */
2825static void handle_8051_request(struct hfi1_devdata *dd)
2826{
2827	u64 reg;
2828	u16 data;
2829	u8 type;
2830
2831	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833		return;	/* no request */
2834
2835	/* zero out COMPLETED so the response is seen */
2836	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2837
2838	/* extract request details */
2839	type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840			& DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841	data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842			& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2843
2844	switch (type) {
2845	case HREQ_LOAD_CONFIG:
2846	case HREQ_SAVE_CONFIG:
2847	case HREQ_READ_CONFIG:
2848	case HREQ_SET_TX_EQ_ABS:
2849	case HREQ_SET_TX_EQ_REL:
2850	case HREQ_ENABLE:
2851		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2852			type);
2853		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2854		break;
2855
2856	case HREQ_CONFIG_DONE:
2857		hreq_response(dd, HREQ_SUCCESS, 0);
2858		break;
2859
2860	case HREQ_INTERFACE_TEST:
2861		hreq_response(dd, HREQ_SUCCESS, data);
2862		break;
2863
2864	default:
2865		dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2867		break;
2868	}
2869}
2870
2871static void write_global_credit(struct hfi1_devdata *dd,
2872				u8 vau, u16 total, u16 shared)
2873{
2874	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2875		((u64)total
2876			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2877		| ((u64)shared
2878			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2880}
2881
2882/*
2883 * Set up initial VL15 credits of the remote.  Assumes the rest of
2884 * the CM credit registers are zero from a previous global or credit reset .
2885 */
2886void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2887{
2888	/* leave shared count at zero for both global and VL15 */
2889	write_global_credit(dd, vau, vl15buf, 0);
2890
2891	/* We may need some credits for another VL when sending packets
2892	 * with the snoop interface. Dividing it down the middle for VL15
2893	 * and VL0 should suffice.
2894	 */
2895	if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897		    << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898		write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899		    << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2900	} else {
2901		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902			<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2903	}
2904}
2905
2906/*
2907 * Zero all credit details from the previous connection and
2908 * reset the CM manager's internal counters.
2909 */
2910void reset_link_credits(struct hfi1_devdata *dd)
2911{
2912	int i;
2913
2914	/* remove all previous VL credit limits */
2915	for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918	write_global_credit(dd, 0, 0, 0);
2919	/* reset the CM block */
2920	pio_send_control(dd, PSC_CM_RESET);
2921}
2922
2923/* convert a vCU to a CU */
2924static u32 vcu_to_cu(u8 vcu)
2925{
2926	return 1 << vcu;
2927}
2928
2929/* convert a CU to a vCU */
2930static u8 cu_to_vcu(u32 cu)
2931{
2932	return ilog2(cu);
2933}
2934
2935/* convert a vAU to an AU */
2936static u32 vau_to_au(u8 vau)
2937{
2938	return 8 * (1 << vau);
2939}
2940
2941static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2942{
2943	ppd->sm_trap_qp = 0x0;
2944	ppd->sa_qp = 0x1;
2945}
2946
2947/*
2948 * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2949 */
2950static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2951{
2952	u64 reg;
2953
2954	/* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955	write_csr(dd, DC_LCB_CFG_RUN, 0);
2956	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961	reg = read_csr(dd, DCC_CFG_RESET);
2962	write_csr(dd, DCC_CFG_RESET,
2963		reg
2964		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2967	if (!abort) {
2968		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2969		write_csr(dd, DCC_CFG_RESET, reg);
2970		write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2971	}
2972}
2973
2974/*
2975 * This routine should be called after the link has been transitioned to
2976 * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2977 * reset).
2978 *
2979 * The expectation is that the caller of this routine would have taken
2980 * care of properly transitioning the link into the correct state.
2981 */
2982static void dc_shutdown(struct hfi1_devdata *dd)
2983{
2984	unsigned long flags;
2985
2986	spin_lock_irqsave(&dd->dc8051_lock, flags);
2987	if (dd->dc_shutdown) {
2988		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2989		return;
2990	}
2991	dd->dc_shutdown = 1;
2992	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993	/* Shutdown the LCB */
2994	lcb_shutdown(dd, 1);
2995	/* Going to OFFLINE would have causes the 8051 to put the
2996	 * SerDes into reset already. Just need to shut down the 8051,
2997	 * itself. */
2998	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2999}
3000
3001/* Calling this after the DC has been brought out of reset should not
3002 * do any damage. */
3003static void dc_start(struct hfi1_devdata *dd)
3004{
3005	unsigned long flags;
3006	int ret;
3007
3008	spin_lock_irqsave(&dd->dc8051_lock, flags);
3009	if (!dd->dc_shutdown)
3010		goto done;
3011	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012	/* Take the 8051 out of reset */
3013	write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014	/* Wait until 8051 is ready */
3015	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3016	if (ret) {
3017		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3018			__func__);
3019	}
3020	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021	write_csr(dd, DCC_CFG_RESET, 0x10);
3022	/* lcb_shutdown() with abort=1 does not restore these */
3023	write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024	spin_lock_irqsave(&dd->dc8051_lock, flags);
3025	dd->dc_shutdown = 0;
3026done:
3027	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3028}
3029
3030/*
3031 * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3032 */
3033static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3034{
3035	u64 rx_radr, tx_radr;
3036	u32 version;
3037
3038	if (dd->icode != ICODE_FPGA_EMULATION)
3039		return;
3040
3041	/*
3042	 * These LCB defaults on emulator _s are good, nothing to do here:
3043	 *	LCB_CFG_TX_FIFOS_RADR
3044	 *	LCB_CFG_RX_FIFOS_RADR
3045	 *	LCB_CFG_LN_DCLK
3046	 *	LCB_CFG_IGNORE_LOST_RCLK
3047	 */
3048	if (is_emulator_s(dd))
3049		return;
3050	/* else this is _p */
3051
3052	version = emulator_rev(dd);
3053	if (!is_a0(dd))
3054		version = 0x2d;	/* all B0 use 0x2d or higher settings */
3055
3056	if (version <= 0x12) {
3057		/* release 0x12 and below */
3058
3059		/*
3060		 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061		 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062		 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3063		 */
3064		rx_radr =
3065		      0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3068		/*
3069		 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070		 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3071		 */
3072		tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073	} else if (version <= 0x18) {
3074		/* release 0x13 up to 0x18 */
3075		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3076		rx_radr =
3077		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081	} else if (version == 0x19) {
3082		/* release 0x19 */
3083		/* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3084		rx_radr =
3085		      0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089	} else if (version == 0x1a) {
3090		/* release 0x1a */
3091		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3092		rx_radr =
3093		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097		write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3098	} else {
3099		/* release 0x1b and higher */
3100		/* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3101		rx_radr =
3102		      0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3106	}
3107
3108	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3113}
3114
3115/*
3116 * Handle a SMA idle message
3117 *
3118 * This is a work-queue function outside of the interrupt.
3119 */
3120void handle_sma_message(struct work_struct *work)
3121{
3122	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3123							sma_message_work);
3124	struct hfi1_devdata *dd = ppd->dd;
3125	u64 msg;
3126	int ret;
3127
3128	/* msg is bytes 1-4 of the 40-bit idle message - the command code
3129	   is stripped off */
3130	ret = read_idle_sma(dd, &msg);
3131	if (ret)
3132		return;
3133	dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3134	/*
3135	 * React to the SMA message.  Byte[1] (0 for us) is the command.
3136	 */
3137	switch (msg & 0xff) {
3138	case SMA_IDLE_ARM:
3139		/*
3140		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3141		 * State Transitions
3142		 *
3143		 * Only expected in INIT or ARMED, discard otherwise.
3144		 */
3145		if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146			ppd->neighbor_normal = 1;
3147		break;
3148	case SMA_IDLE_ACTIVE:
3149		/*
3150		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3151		 * State Transitions
3152		 *
3153		 * Can activate the node.  Discard otherwise.
3154		 */
3155		if (ppd->host_link_state == HLS_UP_ARMED
3156					&& ppd->is_active_optimize_enabled) {
3157			ppd->neighbor_normal = 1;
3158			ret = set_link_state(ppd, HLS_UP_ACTIVE);
3159			if (ret)
3160				dd_dev_err(
3161					dd,
3162					"%s: received Active SMA idle message, couldn't set link to Active\n",
3163					__func__);
3164		}
3165		break;
3166	default:
3167		dd_dev_err(dd,
3168			"%s: received unexpected SMA idle message 0x%llx\n",
3169			__func__, msg);
3170		break;
3171	}
3172}
3173
3174static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3175{
3176	u64 rcvctrl;
3177	unsigned long flags;
3178
3179	spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180	rcvctrl = read_csr(dd, RCV_CTRL);
3181	rcvctrl |= add;
3182	rcvctrl &= ~clear;
3183	write_csr(dd, RCV_CTRL, rcvctrl);
3184	spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3185}
3186
3187static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3188{
3189	adjust_rcvctrl(dd, add, 0);
3190}
3191
3192static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3193{
3194	adjust_rcvctrl(dd, 0, clear);
3195}
3196
3197/*
3198 * Called from all interrupt handlers to start handling an SPC freeze.
3199 */
3200void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3201{
3202	struct hfi1_devdata *dd = ppd->dd;
3203	struct send_context *sc;
3204	int i;
3205
3206	if (flags & FREEZE_SELF)
3207		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3208
3209	/* enter frozen mode */
3210	dd->flags |= HFI1_FROZEN;
3211
3212	/* notify all SDMA engines that they are going into a freeze */
3213	sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3214
3215	/* do halt pre-handling on all enabled send contexts */
3216	for (i = 0; i < dd->num_send_contexts; i++) {
3217		sc = dd->send_contexts[i].sc;
3218		if (sc && (sc->flags & SCF_ENABLED))
3219			sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3220	}
3221
3222	/* Send context are frozen. Notify user space */
3223	hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3224
3225	if (flags & FREEZE_ABORT) {
3226		dd_dev_err(dd,
3227			   "Aborted freeze recovery. Please REBOOT system\n");
3228		return;
3229	}
3230	/* queue non-interrupt handler */
3231	queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3232}
3233
3234/*
3235 * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236 * depending on the "freeze" parameter.
3237 *
3238 * No need to return an error if it times out, our only option
3239 * is to proceed anyway.
3240 */
3241static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3242{
3243	unsigned long timeout;
3244	u64 reg;
3245
3246	timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3247	while (1) {
3248		reg = read_csr(dd, CCE_STATUS);
3249		if (freeze) {
3250			/* waiting until all indicators are set */
3251			if ((reg & ALL_FROZE) == ALL_FROZE)
3252				return;	/* all done */
3253		} else {
3254			/* waiting until all indicators are clear */
3255			if ((reg & ALL_FROZE) == 0)
3256				return; /* all done */
3257		}
3258
3259		if (time_after(jiffies, timeout)) {
3260			dd_dev_err(dd,
3261				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3262				freeze ? "" : "un",
3263				reg & ALL_FROZE,
3264				freeze ? ALL_FROZE : 0ull);
3265			return;
3266		}
3267		usleep_range(80, 120);
3268	}
3269}
3270
3271/*
3272 * Do all freeze handling for the RXE block.
3273 */
3274static void rxe_freeze(struct hfi1_devdata *dd)
3275{
3276	int i;
3277
3278	/* disable port */
3279	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3280
3281	/* disable all receive contexts */
3282	for (i = 0; i < dd->num_rcv_contexts; i++)
3283		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3284}
3285
3286/*
3287 * Unfreeze handling for the RXE block - kernel contexts only.
3288 * This will also enable the port.  User contexts will do unfreeze
3289 * handling on a per-context basis as they call into the driver.
3290 *
3291 */
3292static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3293{
3294	int i;
3295
3296	/* enable all kernel contexts */
3297	for (i = 0; i < dd->n_krcv_queues; i++)
3298		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3299
3300	/* enable port */
3301	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3302}
3303
3304/*
3305 * Non-interrupt SPC freeze handling.
3306 *
3307 * This is a work-queue function outside of the triggering interrupt.
3308 */
3309void handle_freeze(struct work_struct *work)
3310{
3311	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3312								freeze_work);
3313	struct hfi1_devdata *dd = ppd->dd;
3314
3315	/* wait for freeze indicators on all affected blocks */
3316	dd_dev_info(dd, "Entering SPC freeze\n");
3317	wait_for_freeze_status(dd, 1);
3318
3319	/* SPC is now frozen */
3320
3321	/* do send PIO freeze steps */
3322	pio_freeze(dd);
3323
3324	/* do send DMA freeze steps */
3325	sdma_freeze(dd);
3326
3327	/* do send egress freeze steps - nothing to do */
3328
3329	/* do receive freeze steps */
3330	rxe_freeze(dd);
3331
3332	/*
3333	 * Unfreeze the hardware - clear the freeze, wait for each
3334	 * block's frozen bit to clear, then clear the frozen flag.
3335	 */
3336	write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337	wait_for_freeze_status(dd, 0);
3338
3339	if (is_a0(dd)) {
3340		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341		wait_for_freeze_status(dd, 1);
3342		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343		wait_for_freeze_status(dd, 0);
3344	}
3345
3346	/* do send PIO unfreeze steps for kernel contexts */
3347	pio_kernel_unfreeze(dd);
3348
3349	/* do send DMA unfreeze steps */
3350	sdma_unfreeze(dd);
3351
3352	/* do send egress unfreeze steps - nothing to do */
3353
3354	/* do receive unfreeze steps for kernel contexts */
3355	rxe_kernel_unfreeze(dd);
3356
3357	/*
3358	 * The unfreeze procedure touches global device registers when
3359	 * it disables and re-enables RXE. Mark the device unfrozen
3360	 * after all that is done so other parts of the driver waiting
3361	 * for the device to unfreeze don't do things out of order.
3362	 *
3363	 * The above implies that the meaning of HFI1_FROZEN flag is
3364	 * "Device has gone into freeze mode and freeze mode handling
3365	 * is still in progress."
3366	 *
3367	 * The flag will be removed when freeze mode processing has
3368	 * completed.
3369	 */
3370	dd->flags &= ~HFI1_FROZEN;
3371	wake_up(&dd->event_queue);
3372
3373	/* no longer frozen */
3374	dd_dev_err(dd, "Exiting SPC freeze\n");
3375}
3376
3377/*
3378 * Handle a link up interrupt from the 8051.
3379 *
3380 * This is a work-queue function outside of the interrupt.
3381 */
3382void handle_link_up(struct work_struct *work)
3383{
3384	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3385								link_up_work);
3386	set_link_state(ppd, HLS_UP_INIT);
3387
3388	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389	read_ltp_rtt(ppd->dd);
3390	/*
3391	 * OPA specifies that certain counters are cleared on a transition
3392	 * to link up, so do that.
3393	 */
3394	clear_linkup_counters(ppd->dd);
3395	/*
3396	 * And (re)set link up default values.
3397	 */
3398	set_linkup_defaults(ppd);
3399
3400	/* enforce link speed enabled */
3401	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402		/* oops - current speed is not enabled, bounce */
3403		dd_dev_err(ppd->dd,
3404			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405			ppd->link_speed_active, ppd->link_speed_enabled);
3406		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407			OPA_LINKDOWN_REASON_SPEED_POLICY);
3408		set_link_state(ppd, HLS_DN_OFFLINE);
3409		start_link(ppd);
3410	}
3411}
3412
3413/* Several pieces of LNI information were cached for SMA in ppd.
3414 * Reset these on link down */
3415static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3416{
3417	ppd->neighbor_guid = 0;
3418	ppd->neighbor_port_number = 0;
3419	ppd->neighbor_type = 0;
3420	ppd->neighbor_fm_security = 0;
3421}
3422
3423/*
3424 * Handle a link down interrupt from the 8051.
3425 *
3426 * This is a work-queue function outside of the interrupt.
3427 */
3428void handle_link_down(struct work_struct *work)
3429{
3430	u8 lcl_reason, neigh_reason = 0;
3431	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3432								link_down_work);
3433
3434	/* go offline first, then deal with reasons */
3435	set_link_state(ppd, HLS_DN_OFFLINE);
3436
3437	lcl_reason = 0;
3438	read_planned_down_reason_code(ppd->dd, &neigh_reason);
3439
3440	/*
3441	 * If no reason, assume peer-initiated but missed
3442	 * LinkGoingDown idle flits.
3443	 */
3444	if (neigh_reason == 0)
3445		lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3446
3447	set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3448
3449	reset_neighbor_info(ppd);
3450
3451	/* disable the port */
3452	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3453
3454	/* If there is no cable attached, turn the DC off. Otherwise,
3455	 * start the link bring up. */
3456	if (!qsfp_mod_present(ppd))
3457		dc_shutdown(ppd->dd);
3458	else
3459		start_link(ppd);
3460}
3461
3462void handle_link_bounce(struct work_struct *work)
3463{
3464	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3465							link_bounce_work);
3466
3467	/*
3468	 * Only do something if the link is currently up.
3469	 */
3470	if (ppd->host_link_state & HLS_UP) {
3471		set_link_state(ppd, HLS_DN_OFFLINE);
3472		start_link(ppd);
3473	} else {
3474		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475			__func__, link_state_name(ppd->host_link_state));
3476	}
3477}
3478
3479/*
3480 * Mask conversion: Capability exchange to Port LTP.  The capability
3481 * exchange has an implicit 16b CRC that is mandatory.
3482 */
3483static int cap_to_port_ltp(int cap)
3484{
3485	int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3486
3487	if (cap & CAP_CRC_14B)
3488		port_ltp |= PORT_LTP_CRC_MODE_14;
3489	if (cap & CAP_CRC_48B)
3490		port_ltp |= PORT_LTP_CRC_MODE_48;
3491	if (cap & CAP_CRC_12B_16B_PER_LANE)
3492		port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3493
3494	return port_ltp;
3495}
3496
3497/*
3498 * Convert an OPA Port LTP mask to capability mask
3499 */
3500int port_ltp_to_cap(int port_ltp)
3501{
3502	int cap_mask = 0;
3503
3504	if (port_ltp & PORT_LTP_CRC_MODE_14)
3505		cap_mask |= CAP_CRC_14B;
3506	if (port_ltp & PORT_LTP_CRC_MODE_48)
3507		cap_mask |= CAP_CRC_48B;
3508	if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509		cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3510
3511	return cap_mask;
3512}
3513
3514/*
3515 * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3516 */
3517static int lcb_to_port_ltp(int lcb_crc)
3518{
3519	int port_ltp = 0;
3520
3521	if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522		port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523	else if (lcb_crc == LCB_CRC_48B)
3524		port_ltp = PORT_LTP_CRC_MODE_48;
3525	else if (lcb_crc == LCB_CRC_14B)
3526		port_ltp = PORT_LTP_CRC_MODE_14;
3527	else
3528		port_ltp = PORT_LTP_CRC_MODE_16;
3529
3530	return port_ltp;
3531}
3532
3533/*
3534 * Our neighbor has indicated that we are allowed to act as a fabric
3535 * manager, so place the full management partition key in the second
3536 * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537 * that we should already have the limited management partition key in
3538 * array element 1, and also that the port is not yet up when
3539 * add_full_mgmt_pkey() is invoked.
3540 */
3541static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3542{
3543	struct hfi1_devdata *dd = ppd->dd;
3544
3545	/* Sanity check - ppd->pkeys[2] should be 0 */
3546	if (ppd->pkeys[2] != 0)
3547		dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548			   __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549	ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3551}
3552
3553/*
3554 * Convert the given link width to the OPA link width bitmask.
3555 */
3556static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3557{
3558	switch (width) {
3559	case 0:
3560		/*
3561		 * Simulator and quick linkup do not set the width.
3562		 * Just set it to 4x without complaint.
3563		 */
3564		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565			return OPA_LINK_WIDTH_4X;
3566		return 0; /* no lanes up */
3567	case 1: return OPA_LINK_WIDTH_1X;
3568	case 2: return OPA_LINK_WIDTH_2X;
3569	case 3: return OPA_LINK_WIDTH_3X;
3570	default:
3571		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3572			__func__, width);
3573		/* fall through */
3574	case 4: return OPA_LINK_WIDTH_4X;
3575	}
3576}
3577
3578/*
3579 * Do a population count on the bottom nibble.
3580 */
3581static const u8 bit_counts[16] = {
3582	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3583};
3584static inline u8 nibble_to_count(u8 nibble)
3585{
3586	return bit_counts[nibble & 0xf];
3587}
3588
3589/*
3590 * Read the active lane information from the 8051 registers and return
3591 * their widths.
3592 *
3593 * Active lane information is found in these 8051 registers:
3594 *	enable_lane_tx
3595 *	enable_lane_rx
3596 */
3597static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3598			    u16 *rx_width)
3599{
3600	u16 tx, rx;
3601	u8 enable_lane_rx;
3602	u8 enable_lane_tx;
3603	u8 tx_polarity_inversion;
3604	u8 rx_polarity_inversion;
3605	u8 max_rate;
3606
3607	/* read the active lanes */
3608	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609				&rx_polarity_inversion, &max_rate);
3610	read_local_lni(dd, &enable_lane_rx);
3611
3612	/* convert to counts */
3613	tx = nibble_to_count(enable_lane_tx);
3614	rx = nibble_to_count(enable_lane_rx);
3615
3616	/*
3617	 * Set link_speed_active here, overriding what was set in
3618	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3619	 * set the max_rate field in handle_verify_cap until v0.19.
3620	 */
3621	if ((dd->icode == ICODE_RTL_SILICON)
3622				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623		/* max_rate: 0 = 12.5G, 1 = 25G */
3624		switch (max_rate) {
3625		case 0:
3626			dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3627			break;
3628		default:
3629			dd_dev_err(dd,
3630				"%s: unexpected max rate %d, using 25Gb\n",
3631				__func__, (int)max_rate);
3632			/* fall through */
3633		case 1:
3634			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3635			break;
3636		}
3637	}
3638
3639	dd_dev_info(dd,
3640		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641		enable_lane_tx, tx, enable_lane_rx, rx);
3642	*tx_width = link_width_to_bits(dd, tx);
3643	*rx_width = link_width_to_bits(dd, rx);
3644}
3645
3646/*
3647 * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648 * Valid after the end of VerifyCap and during LinkUp.  Does not change
3649 * after link up.  I.e. look elsewhere for downgrade information.
3650 *
3651 * Bits are:
3652 *	+ bits [7:4] contain the number of active transmitters
3653 *	+ bits [3:0] contain the number of active receivers
3654 * These are numbers 1 through 4 and can be different values if the
3655 * link is asymmetric.
3656 *
3657 * verify_cap_local_fm_link_width[0] retains its original value.
3658 */
3659static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3660			      u16 *rx_width)
3661{
3662	u16 widths, tx, rx;
3663	u8 misc_bits, local_flags;
3664	u16 active_tx, active_rx;
3665
3666	read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3667	tx = widths >> 12;
3668	rx = (widths >> 8) & 0xf;
3669
3670	*tx_width = link_width_to_bits(dd, tx);
3671	*rx_width = link_width_to_bits(dd, rx);
3672
3673	/* print the active widths */
3674	get_link_widths(dd, &active_tx, &active_rx);
3675}
3676
3677/*
3678 * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679 * hardware information when the link first comes up.
3680 *
3681 * The link width is not available until after VerifyCap.AllFramesReceived
3682 * (the trigger for handle_verify_cap), so this is outside that routine
3683 * and should be called when the 8051 signals linkup.
3684 */
3685void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3686{
3687	u16 tx_width, rx_width;
3688
3689	/* get end-of-LNI link widths */
3690	get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3691
3692	/* use tx_width as the link is supposed to be symmetric on link up */
3693	ppd->link_width_active = tx_width;
3694	/* link width downgrade active (LWD.A) starts out matching LW.A */
3695	ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696	ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697	/* per OPA spec, on link up LWD.E resets to LWD.S */
3698	ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699	/* cache the active egress rate (units {10^6 bits/sec]) */
3700	ppd->current_egress_rate = active_egress_rate(ppd);
3701}
3702
3703/*
3704 * Handle a verify capabilities interrupt from the 8051.
3705 *
3706 * This is a work-queue function outside of the interrupt.
3707 */
3708void handle_verify_cap(struct work_struct *work)
3709{
3710	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3711								link_vc_work);
3712	struct hfi1_devdata *dd = ppd->dd;
3713	u64 reg;
3714	u8 power_management;
3715	u8 continious;
3716	u8 vcu;
3717	u8 vau;
3718	u8 z;
3719	u16 vl15buf;
3720	u16 link_widths;
3721	u16 crc_mask;
3722	u16 crc_val;
3723	u16 device_id;
3724	u16 active_tx, active_rx;
3725	u8 partner_supported_crc;
3726	u8 remote_tx_rate;
3727	u8 device_rev;
3728
3729	set_link_state(ppd, HLS_VERIFY_CAP);
3730
3731	lcb_shutdown(dd, 0);
3732	adjust_lcb_for_fpga_serdes(dd);
3733
3734	/*
3735	 * These are now valid:
3736	 *	remote VerifyCap fields in the general LNI config
3737	 *	CSR DC8051_STS_REMOTE_GUID
3738	 *	CSR DC8051_STS_REMOTE_NODE_TYPE
3739	 *	CSR DC8051_STS_REMOTE_FM_SECURITY
3740	 *	CSR DC8051_STS_REMOTE_PORT_NO
3741	 */
3742
3743	read_vc_remote_phy(dd, &power_management, &continious);
3744	read_vc_remote_fabric(
3745		dd,
3746		&vau,
3747		&z,
3748		&vcu,
3749		&vl15buf,
3750		&partner_supported_crc);
3751	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752	read_remote_device_id(dd, &device_id, &device_rev);
3753	/*
3754	 * And the 'MgmtAllowed' information, which is exchanged during
3755	 * LNI, is also be available at this point.
3756	 */
3757	read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758	/* print the active widths */
3759	get_link_widths(dd, &active_tx, &active_rx);
3760	dd_dev_info(dd,
3761		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762		(int)power_management, (int)continious);
3763	dd_dev_info(dd,
3764		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3765		(int)vau,
3766		(int)z,
3767		(int)vcu,
3768		(int)vl15buf,
3769		(int)partner_supported_crc);
3770	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771		(u32)remote_tx_rate, (u32)link_widths);
3772	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773		(u32)device_id, (u32)device_rev);
3774	/*
3775	 * The peer vAU value just read is the peer receiver value.  HFI does
3776	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
3777	 * with Z=1 in the fabric capabilities sent to the peer.  The peer
3778	 * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779	 * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3780	 * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781	 * subject to the Z value exception.
3782	 */
3783	if (vau == 0)
3784		vau = 1;
3785	set_up_vl15(dd, vau, vl15buf);
3786
3787	/* set up the LCB CRC mode */
3788	crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3789
3790	/* order is important: use the lowest bit in common */
3791	if (crc_mask & CAP_CRC_14B)
3792		crc_val = LCB_CRC_14B;
3793	else if (crc_mask & CAP_CRC_48B)
3794		crc_val = LCB_CRC_48B;
3795	else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796		crc_val = LCB_CRC_12B_16B_PER_LANE;
3797	else
3798		crc_val = LCB_CRC_16B;
3799
3800	dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801	write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802		  (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3803
3804	/* set (14b only) or clear sideband credit */
3805	reg = read_csr(dd, SEND_CM_CTRL);
3806	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807		write_csr(dd, SEND_CM_CTRL,
3808			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3809	} else {
3810		write_csr(dd, SEND_CM_CTRL,
3811			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3812	}
3813
3814	ppd->link_speed_active = 0;	/* invalid value */
3815	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816		/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817		switch (remote_tx_rate) {
3818		case 0:
3819			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3820			break;
3821		case 1:
3822			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823			break;
3824		}
3825	} else {
3826		/* actual rate is highest bit of the ANDed rates */
3827		u8 rate = remote_tx_rate & ppd->local_tx_rate;
3828
3829		if (rate & 2)
3830			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3831		else if (rate & 1)
3832			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3833	}
3834	if (ppd->link_speed_active == 0) {
3835		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836			__func__, (int)remote_tx_rate);
3837		ppd->link_speed_active = OPA_LINK_SPEED_25G;
3838	}
3839
3840	/*
3841	 * Cache the values of the supported, enabled, and active
3842	 * LTP CRC modes to return in 'portinfo' queries. But the bit
3843	 * flags that are returned in the portinfo query differ from
3844	 * what's in the link_crc_mask, crc_sizes, and crc_val
3845	 * variables. Convert these here.
3846	 */
3847	ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848		/* supported crc modes */
3849	ppd->port_ltp_crc_mode |=
3850		cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851		/* enabled crc modes */
3852	ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853		/* active crc mode */
3854
3855	/* set up the remote credit return table */
3856	assign_remote_cm_au_table(dd, vcu);
3857
3858	/*
3859	 * The LCB is reset on entry to handle_verify_cap(), so this must
3860	 * be applied on every link up.
3861	 *
3862	 * Adjust LCB error kill enable to kill the link if
3863	 * these RBUF errors are seen:
3864	 *	REPLAY_BUF_MBE_SMASK
3865	 *	FLIT_INPUT_BUF_MBE_SMASK
3866	 */
3867	if (is_a0(dd)) {			/* fixed in B0 */
3868		reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869		reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870			| DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871		write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3872	}
3873
3874	/* pull LCB fifos out of reset - all fifo clocks must be stable */
3875	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3876
3877	/* give 8051 access to the LCB CSRs */
3878	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879	set_8051_lcb_access(dd);
3880
3881	ppd->neighbor_guid =
3882		read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883	ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885	ppd->neighbor_type =
3886		read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887		DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888	ppd->neighbor_fm_security =
3889		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3891	dd_dev_info(dd,
3892		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893		ppd->neighbor_guid, ppd->neighbor_type,
3894		ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895	if (ppd->mgmt_allowed)
3896		add_full_mgmt_pkey(ppd);
3897
3898	/* tell the 8051 to go to LinkUp */
3899	set_link_state(ppd, HLS_GOING_UP);
3900}
3901
3902/*
3903 * Apply the link width downgrade enabled policy against the current active
3904 * link widths.
3905 *
3906 * Called when the enabled policy changes or the active link widths change.
3907 */
3908void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3909{
3910	int skip = 1;
3911	int do_bounce = 0;
3912	u16 lwde = ppd->link_width_downgrade_enabled;
3913	u16 tx, rx;
3914
3915	mutex_lock(&ppd->hls_lock);
3916	/* only apply if the link is up */
3917	if (ppd->host_link_state & HLS_UP)
3918		skip = 0;
3919	mutex_unlock(&ppd->hls_lock);
3920	if (skip)
3921		return;
3922
3923	if (refresh_widths) {
3924		get_link_widths(ppd->dd, &tx, &rx);
3925		ppd->link_width_downgrade_tx_active = tx;
3926		ppd->link_width_downgrade_rx_active = rx;
3927	}
3928
3929	if (lwde == 0) {
3930		/* downgrade is disabled */
3931
3932		/* bounce if not at starting active width */
3933		if ((ppd->link_width_active !=
3934					ppd->link_width_downgrade_tx_active)
3935				|| (ppd->link_width_active !=
3936					ppd->link_width_downgrade_rx_active)) {
3937			dd_dev_err(ppd->dd,
3938				"Link downgrade is disabled and link has downgraded, downing link\n");
3939			dd_dev_err(ppd->dd,
3940				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941				ppd->link_width_active,
3942				ppd->link_width_downgrade_tx_active,
3943				ppd->link_width_downgrade_rx_active);
3944			do_bounce = 1;
3945		}
3946	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948		/* Tx or Rx is outside the enabled policy */
3949		dd_dev_err(ppd->dd,
3950			"Link is outside of downgrade allowed, downing link\n");
3951		dd_dev_err(ppd->dd,
3952			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3953			lwde,
3954			ppd->link_width_downgrade_tx_active,
3955			ppd->link_width_downgrade_rx_active);
3956		do_bounce = 1;
3957	}
3958
3959	if (do_bounce) {
3960		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962		set_link_state(ppd, HLS_DN_OFFLINE);
3963		start_link(ppd);
3964	}
3965}
3966
3967/*
3968 * Handle a link downgrade interrupt from the 8051.
3969 *
3970 * This is a work-queue function outside of the interrupt.
3971 */
3972void handle_link_downgrade(struct work_struct *work)
3973{
3974	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975							link_downgrade_work);
3976
3977	dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978	apply_link_downgrade_policy(ppd, 1);
3979}
3980
3981static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3982{
3983	return flag_string(buf, buf_len, flags, dcc_err_flags,
3984		ARRAY_SIZE(dcc_err_flags));
3985}
3986
3987static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3988{
3989	return flag_string(buf, buf_len, flags, lcb_err_flags,
3990		ARRAY_SIZE(lcb_err_flags));
3991}
3992
3993static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3994{
3995	return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996		ARRAY_SIZE(dc8051_err_flags));
3997}
3998
3999static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4000{
4001	return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002		ARRAY_SIZE(dc8051_info_err_flags));
4003}
4004
4005static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4006{
4007	return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008		ARRAY_SIZE(dc8051_info_host_msg_flags));
4009}
4010
4011static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4012{
4013	struct hfi1_pportdata *ppd = dd->pport;
4014	u64 info, err, host_msg;
4015	int queue_link_down = 0;
4016	char buf[96];
4017
4018	/* look at the flags */
4019	if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020		/* 8051 information set by firmware */
4021		/* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022		info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023		err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4025		host_msg = (info >>
4026			DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4028
4029		/*
4030		 * Handle error flags.
4031		 */
4032		if (err & FAILED_LNI) {
4033			/*
4034			 * LNI error indications are cleared by the 8051
4035			 * only when starting polling.  Only pay attention
4036			 * to them when in the states that occur during
4037			 * LNI.
4038			 */
4039			if (ppd->host_link_state
4040			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041				queue_link_down = 1;
4042				dd_dev_info(dd, "Link error: %s\n",
4043					dc8051_info_err_string(buf,
4044						sizeof(buf),
4045						err & FAILED_LNI));
4046			}
4047			err &= ~(u64)FAILED_LNI;
4048		}
4049		if (err) {
4050			/* report remaining errors, but do not do anything */
4051			dd_dev_err(dd, "8051 info error: %s\n",
4052				dc8051_info_err_string(buf, sizeof(buf), err));
4053		}
4054
4055		/*
4056		 * Handle host message flags.
4057		 */
4058		if (host_msg & HOST_REQ_DONE) {
4059			/*
4060			 * Presently, the driver does a busy wait for
4061			 * host requests to complete.  This is only an
4062			 * informational message.
4063			 * NOTE: The 8051 clears the host message
4064			 * information *on the next 8051 command*.
4065			 * Therefore, when linkup is achieved,
4066			 * this flag will still be set.
4067			 */
4068			host_msg &= ~(u64)HOST_REQ_DONE;
4069		}
4070		if (host_msg & BC_SMA_MSG) {
4071			queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072			host_msg &= ~(u64)BC_SMA_MSG;
4073		}
4074		if (host_msg & LINKUP_ACHIEVED) {
4075			dd_dev_info(dd, "8051: Link up\n");
4076			queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077			host_msg &= ~(u64)LINKUP_ACHIEVED;
4078		}
4079		if (host_msg & EXT_DEVICE_CFG_REQ) {
4080			handle_8051_request(dd);
4081			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4082		}
4083		if (host_msg & VERIFY_CAP_FRAME) {
4084			queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085			host_msg &= ~(u64)VERIFY_CAP_FRAME;
4086		}
4087		if (host_msg & LINK_GOING_DOWN) {
4088			const char *extra = "";
4089			/* no downgrade action needed if going down */
4090			if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091				host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092				extra = " (ignoring downgrade)";
4093			}
4094			dd_dev_info(dd, "8051: Link down%s\n", extra);
4095			queue_link_down = 1;
4096			host_msg &= ~(u64)LINK_GOING_DOWN;
4097		}
4098		if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099			queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100			host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4101		}
4102		if (host_msg) {
4103			/* report remaining messages, but do not do anything */
4104			dd_dev_info(dd, "8051 info host message: %s\n",
4105				dc8051_info_host_msg_string(buf, sizeof(buf),
4106					host_msg));
4107		}
4108
4109		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4110	}
4111	if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4112		/*
4113		 * Lost the 8051 heartbeat.  If this happens, we
4114		 * receive constant interrupts about it.  Disable
4115		 * the interrupt after the first.
4116		 */
4117		dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118		write_csr(dd, DC_DC8051_ERR_EN,
4119			read_csr(dd, DC_DC8051_ERR_EN)
4120			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4121
4122		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4123	}
4124	if (reg) {
4125		/* report the error, but do not do anything */
4126		dd_dev_err(dd, "8051 error: %s\n",
4127			dc8051_err_string(buf, sizeof(buf), reg));
4128	}
4129
4130	if (queue_link_down) {
4131		/* if the link is already going down or disabled, do not
4132		 * queue another */
4133		if ((ppd->host_link_state
4134				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135				|| ppd->link_enabled == 0) {
4136			dd_dev_info(dd, "%s: not queuing link down\n",
4137				__func__);
4138		} else {
4139			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4140		}
4141	}
4142}
4143
4144static const char * const fm_config_txt[] = {
4145[0] =
4146	"BadHeadDist: Distance violation between two head flits",
4147[1] =
4148	"BadTailDist: Distance violation between two tail flits",
4149[2] =
4150	"BadCtrlDist: Distance violation between two credit control flits",
4151[3] =
4152	"BadCrdAck: Credits return for unsupported VL",
4153[4] =
4154	"UnsupportedVLMarker: Received VL Marker",
4155[5] =
4156	"BadPreempt: Exceeded the preemption nesting level",
4157[6] =
4158	"BadControlFlit: Received unsupported control flit",
4159/* no 7 */
4160[8] =
4161	"UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4162};
4163
4164static const char * const port_rcv_txt[] = {
4165[1] =
4166	"BadPktLen: Illegal PktLen",
4167[2] =
4168	"PktLenTooLong: Packet longer than PktLen",
4169[3] =
4170	"PktLenTooShort: Packet shorter than PktLen",
4171[4] =
4172	"BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4173[5] =
4174	"BadDLID: Illegal DLID (0, doesn't match HFI)",
4175[6] =
4176	"BadL2: Illegal L2 opcode",
4177[7] =
4178	"BadSC: Unsupported SC",
4179[9] =
4180	"BadRC: Illegal RC",
4181[11] =
4182	"PreemptError: Preempting with same VL",
4183[12] =
4184	"PreemptVL15: Preempting a VL15 packet",
4185};
4186
4187#define OPA_LDR_FMCONFIG_OFFSET 16
4188#define OPA_LDR_PORTRCV_OFFSET 0
4189static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4190{
4191	u64 info, hdr0, hdr1;
4192	const char *extra;
4193	char buf[96];
4194	struct hfi1_pportdata *ppd = dd->pport;
4195	u8 lcl_reason = 0;
4196	int do_bounce = 0;
4197
4198	if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199		if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200			info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201			dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202			/* set status bit */
4203			dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4204		}
4205		reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4206	}
4207
4208	if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209		struct hfi1_pportdata *ppd = dd->pport;
4210		/* this counter saturates at (2^32) - 1 */
4211		if (ppd->link_downed < (u32)UINT_MAX)
4212			ppd->link_downed++;
4213		reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4214	}
4215
4216	if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217		u8 reason_valid = 1;
4218
4219		info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220		if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221			dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222			/* set status bit */
4223			dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4224		}
4225		switch (info) {
4226		case 0:
4227		case 1:
4228		case 2:
4229		case 3:
4230		case 4:
4231		case 5:
4232		case 6:
4233			extra = fm_config_txt[info];
4234			break;
4235		case 8:
4236			extra = fm_config_txt[info];
4237			if (ppd->port_error_action &
4238			    OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4239				do_bounce = 1;
4240				/*
4241				 * lcl_reason cannot be derived from info
4242				 * for this error
4243				 */
4244				lcl_reason =
4245				  OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4246			}
4247			break;
4248		default:
4249			reason_valid = 0;
4250			snprintf(buf, sizeof(buf), "reserved%lld", info);
4251			extra = buf;
4252			break;
4253		}
4254
4255		if (reason_valid && !do_bounce) {
4256			do_bounce = ppd->port_error_action &
4257					(1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258			lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4259		}
4260
4261		/* just report this */
4262		dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263		reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4264	}
4265
4266	if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267		u8 reason_valid = 1;
4268
4269		info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270		hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271		hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272		if (!(dd->err_info_rcvport.status_and_code &
4273		      OPA_EI_STATUS_SMASK)) {
4274			dd->err_info_rcvport.status_and_code =
4275				info & OPA_EI_CODE_SMASK;
4276			/* set status bit */
4277			dd->err_info_rcvport.status_and_code |=
4278				OPA_EI_STATUS_SMASK;
4279			/* save first 2 flits in the packet that caused
4280			 * the error */
4281			 dd->err_info_rcvport.packet_flit1 = hdr0;
4282			 dd->err_info_rcvport.packet_flit2 = hdr1;
4283		}
4284		switch (info) {
4285		case 1:
4286		case 2:
4287		case 3:
4288		case 4:
4289		case 5:
4290		case 6:
4291		case 7:
4292		case 9:
4293		case 11:
4294		case 12:
4295			extra = port_rcv_txt[info];
4296			break;
4297		default:
4298			reason_valid = 0;
4299			snprintf(buf, sizeof(buf), "reserved%lld", info);
4300			extra = buf;
4301			break;
4302		}
4303
4304		if (reason_valid && !do_bounce) {
4305			do_bounce = ppd->port_error_action &
4306					(1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307			lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4308		}
4309
4310		/* just report this */
4311		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4313			hdr0, hdr1);
4314
4315		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4316	}
4317
4318	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319		/* informative only */
4320		dd_dev_info(dd, "8051 access to LCB blocked\n");
4321		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4322	}
4323	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324		/* informative only */
4325		dd_dev_info(dd, "host access to LCB blocked\n");
4326		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4327	}
4328
4329	/* report any remaining errors */
4330	if (reg)
4331		dd_dev_info(dd, "DCC Error: %s\n",
4332			dcc_err_string(buf, sizeof(buf), reg));
4333
4334	if (lcl_reason == 0)
4335		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4336
4337	if (do_bounce) {
4338		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339		set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4341	}
4342}
4343
4344static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4345{
4346	char buf[96];
4347
4348	dd_dev_info(dd, "LCB Error: %s\n",
4349		lcb_err_string(buf, sizeof(buf), reg));
4350}
4351
4352/*
4353 * CCE block DC interrupt.  Source is < 8.
4354 */
4355static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4356{
4357	const struct err_reg_info *eri = &dc_errs[source];
4358
4359	if (eri->handler) {
4360		interrupt_clear_down(dd, 0, eri);
4361	} else if (source == 3 /* dc_lbm_int */) {
4362		/*
4363		 * This indicates that a parity error has occurred on the
4364		 * address/control lines presented to the LBM.  The error
4365		 * is a single pulse, there is no associated error flag,
4366		 * and it is non-maskable.  This is because if a parity
4367		 * error occurs on the request the request is dropped.
4368		 * This should never occur, but it is nice to know if it
4369		 * ever does.
4370		 */
4371		dd_dev_err(dd, "Parity error in DC LBM block\n");
4372	} else {
4373		dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4374	}
4375}
4376
4377/*
4378 * TX block send credit interrupt.  Source is < 160.
4379 */
4380static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4381{
4382	sc_group_release_update(dd, source);
4383}
4384
4385/*
4386 * TX block SDMA interrupt.  Source is < 48.
4387 *
4388 * SDMA interrupts are grouped by type:
4389 *
4390 *	 0 -  N-1 = SDma
4391 *	 N - 2N-1 = SDmaProgress
4392 *	2N - 3N-1 = SDmaIdle
4393 */
4394static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4395{
4396	/* what interrupt */
4397	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4398	/* which engine */
4399	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4400
4401#ifdef CONFIG_SDMA_VERBOSITY
4402	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403		   slashstrip(__FILE__), __LINE__, __func__);
4404	sdma_dumpstate(&dd->per_sdma[which]);
4405#endif
4406
4407	if (likely(what < 3 && which < dd->num_sdma)) {
4408		sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4409	} else {
4410		/* should not happen */
4411		dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4412	}
4413}
4414
4415/*
4416 * RX block receive available interrupt.  Source is < 160.
4417 */
4418static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4419{
4420	struct hfi1_ctxtdata *rcd;
4421	char *err_detail;
4422
4423	if (likely(source < dd->num_rcv_contexts)) {
4424		rcd = dd->rcd[source];
4425		if (rcd) {
4426			if (source < dd->first_user_ctxt)
4427				rcd->do_interrupt(rcd, 0);
4428			else
4429				handle_user_interrupt(rcd);
4430			return;	/* OK */
4431		}
4432		/* received an interrupt, but no rcd */
4433		err_detail = "dataless";
4434	} else {
4435		/* received an interrupt, but are not using that context */
4436		err_detail = "out of range";
4437	}
4438	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439		err_detail, source);
4440}
4441
4442/*
4443 * RX block receive urgent interrupt.  Source is < 160.
4444 */
4445static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4446{
4447	struct hfi1_ctxtdata *rcd;
4448	char *err_detail;
4449
4450	if (likely(source < dd->num_rcv_contexts)) {
4451		rcd = dd->rcd[source];
4452		if (rcd) {
4453			/* only pay attention to user urgent interrupts */
4454			if (source >= dd->first_user_ctxt)
4455				handle_user_interrupt(rcd);
4456			return;	/* OK */
4457		}
4458		/* received an interrupt, but no rcd */
4459		err_detail = "dataless";
4460	} else {
4461		/* received an interrupt, but are not using that context */
4462		err_detail = "out of range";
4463	}
4464	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465		err_detail, source);
4466}
4467
4468/*
4469 * Reserved range interrupt.  Should not be called in normal operation.
4470 */
4471static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4472{
4473	char name[64];
4474
4475	dd_dev_err(dd, "unexpected %s interrupt\n",
4476				is_reserved_name(name, sizeof(name), source));
4477}
4478
4479static const struct is_table is_table[] = {
4480/* start		     end
4481				name func		interrupt func */
4482{ IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4483				is_misc_err_name,	is_misc_err_int },
4484{ IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4485				is_sdma_eng_err_name,	is_sdma_eng_err_int },
4486{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487				is_sendctxt_err_name,	is_sendctxt_err_int },
4488{ IS_SDMA_START,	     IS_SDMA_END,
4489				is_sdma_eng_name,	is_sdma_eng_int },
4490{ IS_VARIOUS_START,	     IS_VARIOUS_END,
4491				is_various_name,	is_various_int },
4492{ IS_DC_START,	     IS_DC_END,
4493				is_dc_name,		is_dc_int },
4494{ IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4495				is_rcv_avail_name,	is_rcv_avail_int },
4496{ IS_RCVURGENT_START,    IS_RCVURGENT_END,
4497				is_rcv_urgent_name,	is_rcv_urgent_int },
4498{ IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4499				is_send_credit_name,	is_send_credit_int},
4500{ IS_RESERVED_START,     IS_RESERVED_END,
4501				is_reserved_name,	is_reserved_int},
4502};
4503
4504/*
4505 * Interrupt source interrupt - called when the given source has an interrupt.
4506 * Source is a bit index into an array of 64-bit integers.
4507 */
4508static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4509{
4510	const struct is_table *entry;
4511
4512	/* avoids a double compare by walking the table in-order */
4513	for (entry = &is_table[0]; entry->is_name; entry++) {
4514		if (source < entry->end) {
4515			trace_hfi1_interrupt(dd, entry, source);
4516			entry->is_int(dd, source - entry->start);
4517			return;
4518		}
4519	}
4520	/* fell off the end */
4521	dd_dev_err(dd, "invalid interrupt source %u\n", source);
4522}
4523
4524/*
4525 * General interrupt handler.  This is able to correctly handle
4526 * all interrupts in case INTx is used.
4527 */
4528static irqreturn_t general_interrupt(int irq, void *data)
4529{
4530	struct hfi1_devdata *dd = data;
4531	u64 regs[CCE_NUM_INT_CSRS];
4532	u32 bit;
4533	int i;
4534
4535	this_cpu_inc(*dd->int_counter);
4536
4537	/* phase 1: scan and clear all handled interrupts */
4538	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539		if (dd->gi_mask[i] == 0) {
4540			regs[i] = 0;	/* used later */
4541			continue;
4542		}
4543		regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4544				dd->gi_mask[i];
4545		/* only clear if anything is set */
4546		if (regs[i])
4547			write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4548	}
4549
4550	/* phase 2: call the appropriate handler */
4551	for_each_set_bit(bit, (unsigned long *)&regs[0],
4552						CCE_NUM_INT_CSRS*64) {
4553		is_interrupt(dd, bit);
4554	}
4555
4556	return IRQ_HANDLED;
4557}
4558
4559static irqreturn_t sdma_interrupt(int irq, void *data)
4560{
4561	struct sdma_engine *sde = data;
4562	struct hfi1_devdata *dd = sde->dd;
4563	u64 status;
4564
4565#ifdef CONFIG_SDMA_VERBOSITY
4566	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567		   slashstrip(__FILE__), __LINE__, __func__);
4568	sdma_dumpstate(sde);
4569#endif
4570
4571	this_cpu_inc(*dd->int_counter);
4572
4573	/* This read_csr is really bad in the hot path */
4574	status = read_csr(dd,
4575			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4576			& sde->imask;
4577	if (likely(status)) {
4578		/* clear the interrupt(s) */
4579		write_csr(dd,
4580			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4581			status);
4582
4583		/* handle the interrupt(s) */
4584		sdma_engine_interrupt(sde, status);
4585	} else
4586		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4587			sde->this_idx);
4588
4589	return IRQ_HANDLED;
4590}
4591
4592/*
4593 * Clear the receive interrupt, forcing the write and making sure
4594 * we have data from the chip, pushing everything in front of it
4595 * back to the host.
4596 */
4597static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4598{
4599	struct hfi1_devdata *dd = rcd->dd;
4600	u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4601
4602	mmiowb();	/* make sure everything before is written */
4603	write_csr(dd, addr, rcd->imask);
4604	/* force the above write on the chip and get a value back */
4605	(void)read_csr(dd, addr);
4606}
4607
4608/* force the receive interrupt */
4609static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4610{
4611	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4612}
4613
4614/* return non-zero if a packet is present */
4615static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4616{
4617	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618		return (rcd->seq_cnt ==
4619				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4620
4621	/* else is RDMA rtail */
4622	return (rcd->head != get_rcvhdrtail(rcd));
4623}
4624
4625/*
4626 * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4627 * This routine will try to handle packets immediately (latency), but if
4628 * it finds too many, it will invoke the thread handler (bandwitdh).  The
4629 * chip receive interupt is *not* cleared down until this or the thread (if
4630 * invoked) is finished.  The intent is to avoid extra interrupts while we
4631 * are processing packets anyway.
4632 */
4633static irqreturn_t receive_context_interrupt(int irq, void *data)
4634{
4635	struct hfi1_ctxtdata *rcd = data;
4636	struct hfi1_devdata *dd = rcd->dd;
4637	int disposition;
4638	int present;
4639
4640	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641	this_cpu_inc(*dd->int_counter);
4642
4643	/* receive interrupt remains blocked while processing packets */
4644	disposition = rcd->do_interrupt(rcd, 0);
4645
4646	/*
4647	 * Too many packets were seen while processing packets in this
4648	 * IRQ handler.  Invoke the handler thread.  The receive interrupt
4649	 * remains blocked.
4650	 */
4651	if (disposition == RCV_PKT_LIMIT)
4652		return IRQ_WAKE_THREAD;
4653
4654	/*
4655	 * The packet processor detected no more packets.  Clear the receive
4656	 * interrupt and recheck for a packet packet that may have arrived
4657	 * after the previous check and interrupt clear.  If a packet arrived,
4658	 * force another interrupt.
4659	 */
4660	clear_recv_intr(rcd);
4661	present = check_packet_present(rcd);
4662	if (present)
4663		force_recv_intr(rcd);
4664
4665	return IRQ_HANDLED;
4666}
4667
4668/*
4669 * Receive packet thread handler.  This expects to be invoked with the
4670 * receive interrupt still blocked.
4671 */
4672static irqreturn_t receive_context_thread(int irq, void *data)
4673{
4674	struct hfi1_ctxtdata *rcd = data;
4675	int present;
4676
4677	/* receive interrupt is still blocked from the IRQ handler */
4678	(void)rcd->do_interrupt(rcd, 1);
4679
4680	/*
4681	 * The packet processor will only return if it detected no more
4682	 * packets.  Hold IRQs here so we can safely clear the interrupt and
4683	 * recheck for a packet that may have arrived after the previous
4684	 * check and the interrupt clear.  If a packet arrived, force another
4685	 * interrupt.
4686	 */
4687	local_irq_disable();
4688	clear_recv_intr(rcd);
4689	present = check_packet_present(rcd);
4690	if (present)
4691		force_recv_intr(rcd);
4692	local_irq_enable();
4693
4694	return IRQ_HANDLED;
4695}
4696
4697/* ========================================================================= */
4698
4699u32 read_physical_state(struct hfi1_devdata *dd)
4700{
4701	u64 reg;
4702
4703	reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704	return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
4706}
4707
4708static u32 read_logical_state(struct hfi1_devdata *dd)
4709{
4710	u64 reg;
4711
4712	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713	return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714				& DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4715}
4716
4717static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4718{
4719	u64 reg;
4720
4721	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722	/* clear current state, set new state */
4723	reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724	reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725	write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4726}
4727
4728/*
4729 * Use the 8051 to read a LCB CSR.
4730 */
4731static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4732{
4733	u32 regno;
4734	int ret;
4735
4736	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737		if (acquire_lcb_access(dd, 0) == 0) {
4738			*data = read_csr(dd, addr);
4739			release_lcb_access(dd, 0);
4740			return 0;
4741		}
4742		return -EBUSY;
4743	}
4744
4745	/* register is an index of LCB registers: (offset - base) / 8 */
4746	regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747	ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748	if (ret != HCMD_SUCCESS)
4749		return -EBUSY;
4750	return 0;
4751}
4752
4753/*
4754 * Read an LCB CSR.  Access may not be in host control, so check.
4755 * Return 0 on success, -EBUSY on failure.
4756 */
4757int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4758{
4759	struct hfi1_pportdata *ppd = dd->pport;
4760
4761	/* if up, go through the 8051 for the value */
4762	if (ppd->host_link_state & HLS_UP)
4763		return read_lcb_via_8051(dd, addr, data);
4764	/* if going up or down, no access */
4765	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4766		return -EBUSY;
4767	/* otherwise, host has access */
4768	*data = read_csr(dd, addr);
4769	return 0;
4770}
4771
4772/*
4773 * Use the 8051 to write a LCB CSR.
4774 */
4775static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4776{
4777
4778	if (acquire_lcb_access(dd, 0) == 0) {
4779		write_csr(dd, addr, data);
4780		release_lcb_access(dd, 0);
4781		return 0;
4782	}
4783	return -EBUSY;
4784}
4785
4786/*
4787 * Write an LCB CSR.  Access may not be in host control, so check.
4788 * Return 0 on success, -EBUSY on failure.
4789 */
4790int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4791{
4792	struct hfi1_pportdata *ppd = dd->pport;
4793
4794	/* if up, go through the 8051 for the value */
4795	if (ppd->host_link_state & HLS_UP)
4796		return write_lcb_via_8051(dd, addr, data);
4797	/* if going up or down, no access */
4798	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4799		return -EBUSY;
4800	/* otherwise, host has access */
4801	write_csr(dd, addr, data);
4802	return 0;
4803}
4804
4805/*
4806 * Returns:
4807 *	< 0 = Linux error, not able to get access
4808 *	> 0 = 8051 command RETURN_CODE
4809 */
4810static int do_8051_command(
4811	struct hfi1_devdata *dd,
4812	u32 type,
4813	u64 in_data,
4814	u64 *out_data)
4815{
4816	u64 reg, completed;
4817	int return_code;
4818	unsigned long flags;
4819	unsigned long timeout;
4820
4821	hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4822
4823	/*
4824	 * Alternative to holding the lock for a long time:
4825	 * - keep busy wait - have other users bounce off
4826	 */
4827	spin_lock_irqsave(&dd->dc8051_lock, flags);
4828
4829	/* We can't send any commands to the 8051 if it's in reset */
4830	if (dd->dc_shutdown) {
4831		return_code = -ENODEV;
4832		goto fail;
4833	}
4834
4835	/*
4836	 * If an 8051 host command timed out previously, then the 8051 is
4837	 * stuck.
4838	 *
4839	 * On first timeout, attempt to reset and restart the entire DC
4840	 * block (including 8051). (Is this too big of a hammer?)
4841	 *
4842	 * If the 8051 times out a second time, the reset did not bring it
4843	 * back to healthy life. In that case, fail any subsequent commands.
4844	 */
4845	if (dd->dc8051_timed_out) {
4846		if (dd->dc8051_timed_out > 1) {
4847			dd_dev_err(dd,
4848				   "Previous 8051 host command timed out, skipping command %u\n",
4849				   type);
4850			return_code = -ENXIO;
4851			goto fail;
4852		}
4853		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4854		dc_shutdown(dd);
4855		dc_start(dd);
4856		spin_lock_irqsave(&dd->dc8051_lock, flags);
4857	}
4858
4859	/*
4860	 * If there is no timeout, then the 8051 command interface is
4861	 * waiting for a command.
4862	 */
4863
4864	/*
4865	 * Do two writes: the first to stabilize the type and req_data, the
4866	 * second to activate.
4867	 */
4868	reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869			<< DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870		| (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871			<< DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873	reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4875
4876	/* wait for completion, alternate: interrupt */
4877	timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4878	while (1) {
4879		reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880		completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4881		if (completed)
4882			break;
4883		if (time_after(jiffies, timeout)) {
4884			dd->dc8051_timed_out++;
4885			dd_dev_err(dd, "8051 host command %u timeout\n", type);
4886			if (out_data)
4887				*out_data = 0;
4888			return_code = -ETIMEDOUT;
4889			goto fail;
4890		}
4891		udelay(2);
4892	}
4893
4894	if (out_data) {
4895		*out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896				& DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897		if (type == HCMD_READ_LCB_CSR) {
4898			/* top 16 bits are in a different register */
4899			*out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900				& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4901				<< (48
4902				    - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4903		}
4904	}
4905	return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906				& DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907	dd->dc8051_timed_out = 0;
4908	/*
4909	 * Clear command for next user.
4910	 */
4911	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4912
4913fail:
4914	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4915
4916	return return_code;
4917}
4918
4919static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4920{
4921	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4922}
4923
4924static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925			    u8 lane_id, u32 config_data)
4926{
4927	u64 data;
4928	int ret;
4929
4930	data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931		| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932		| (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934	if (ret != HCMD_SUCCESS) {
4935		dd_dev_err(dd,
4936			"load 8051 config: field id %d, lane %d, err %d\n",
4937			(int)field_id, (int)lane_id, ret);
4938	}
4939	return ret;
4940}
4941
4942/*
4943 * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4944 * set the result, even on error.
4945 * Return 0 on success, -errno on failure
4946 */
4947static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4948			    u32 *result)
4949{
4950	u64 big_data;
4951	u32 addr;
4952	int ret;
4953
4954	/* address start depends on the lane_id */
4955	if (lane_id < 4)
4956		addr = (4 * NUM_GENERAL_FIELDS)
4957			+ (lane_id * 4 * NUM_LANE_FIELDS);
4958	else
4959		addr = 0;
4960	addr += field_id * 4;
4961
4962	/* read is in 8-byte chunks, hardware will truncate the address down */
4963	ret = read_8051_data(dd, addr, 8, &big_data);
4964
4965	if (ret == 0) {
4966		/* extract the 4 bytes we want */
4967		if (addr & 0x4)
4968			*result = (u32)(big_data >> 32);
4969		else
4970			*result = (u32)big_data;
4971	} else {
4972		*result = 0;
4973		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974			__func__, lane_id, field_id);
4975	}
4976
4977	return ret;
4978}
4979
4980static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4981			      u8 continuous)
4982{
4983	u32 frame;
4984
4985	frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986		| power_management << POWER_MANAGEMENT_SHIFT;
4987	return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988				GENERAL_CONFIG, frame);
4989}
4990
4991static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992				 u16 vl15buf, u8 crc_sizes)
4993{
4994	u32 frame;
4995
4996	frame = (u32)vau << VAU_SHIFT
4997		| (u32)z << Z_SHIFT
4998		| (u32)vcu << VCU_SHIFT
4999		| (u32)vl15buf << VL15BUF_SHIFT
5000		| (u32)crc_sizes << CRC_SIZES_SHIFT;
5001	return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002				GENERAL_CONFIG, frame);
5003}
5004
5005static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006				     u8 *flag_bits, u16 *link_widths)
5007{
5008	u32 frame;
5009
5010	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5011				&frame);
5012	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5015}
5016
5017static int write_vc_local_link_width(struct hfi1_devdata *dd,
5018				     u8 misc_bits,
5019				     u8 flag_bits,
5020				     u16 link_widths)
5021{
5022	u32 frame;
5023
5024	frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025		| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026		| (u32)link_widths << LINK_WIDTH_SHIFT;
5027	return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5028		     frame);
5029}
5030
5031static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5032				 u8 device_rev)
5033{
5034	u32 frame;
5035
5036	frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037		| ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038	return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5039}
5040
5041static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5042				  u8 *device_rev)
5043{
5044	u32 frame;
5045
5046	read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047	*device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048	*device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049			& REMOTE_DEVICE_REV_MASK;
5050}
5051
5052void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5053{
5054	u32 frame;
5055
5056	read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057	*ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058	*ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5059}
5060
5061static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5062			       u8 *continuous)
5063{
5064	u32 frame;
5065
5066	read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067	*power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068					& POWER_MANAGEMENT_MASK;
5069	*continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070					& CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5071}
5072
5073static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5075{
5076	u32 frame;
5077
5078	read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079	*vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080	*z = (frame >> Z_SHIFT) & Z_MASK;
5081	*vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082	*vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083	*crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5084}
5085
5086static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5087				      u8 *remote_tx_rate,
5088				      u16 *link_widths)
5089{
5090	u32 frame;
5091
5092	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5093				&frame);
5094	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095				& REMOTE_TX_RATE_MASK;
5096	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5097}
5098
5099static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5100{
5101	u32 frame;
5102
5103	read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104	*enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5105}
5106
5107static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5108{
5109	u32 frame;
5110
5111	read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112	*mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5113}
5114
5115static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5116{
5117	read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5118}
5119
5120static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5121{
5122	read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5123}
5124
5125void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5126{
5127	u32 frame;
5128	int ret;
5129
5130	*link_quality = 0;
5131	if (dd->pport->host_link_state & HLS_UP) {
5132		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5133					&frame);
5134		if (ret == 0)
5135			*link_quality = (frame >> LINK_QUALITY_SHIFT)
5136						& LINK_QUALITY_MASK;
5137	}
5138}
5139
5140static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5141{
5142	u32 frame;
5143
5144	read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145	*pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5146}
5147
5148static int read_tx_settings(struct hfi1_devdata *dd,
5149			    u8 *enable_lane_tx,
5150			    u8 *tx_polarity_inversion,
5151			    u8 *rx_polarity_inversion,
5152			    u8 *max_rate)
5153{
5154	u32 frame;
5155	int ret;
5156
5157	ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158	*enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159				& ENABLE_LANE_TX_MASK;
5160	*tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161				& TX_POLARITY_INVERSION_MASK;
5162	*rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163				& RX_POLARITY_INVERSION_MASK;
5164	*max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5165	return ret;
5166}
5167
5168static int write_tx_settings(struct hfi1_devdata *dd,
5169			     u8 enable_lane_tx,
5170			     u8 tx_polarity_inversion,
5171			     u8 rx_polarity_inversion,
5172			     u8 max_rate)
5173{
5174	u32 frame;
5175
5176	/* no need to mask, all variable sizes match field widths */
5177	frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178		| tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179		| rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180		| max_rate << MAX_RATE_SHIFT;
5181	return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5182}
5183
5184static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5185{
5186	u32 frame, version, prod_id;
5187	int ret, lane;
5188
5189	/* 4 lanes */
5190	for (lane = 0; lane < 4; lane++) {
5191		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5192		if (ret) {
5193			dd_dev_err(
5194				dd,
5195				"Unable to read lane %d firmware details\n",
5196				lane);
5197			continue;
5198		}
5199		version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200					& SPICO_ROM_VERSION_MASK;
5201		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202					& SPICO_ROM_PROD_ID_MASK;
5203		dd_dev_info(dd,
5204			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205			lane, version, prod_id);
5206	}
5207}
5208
5209/*
5210 * Read an idle LCB message.
5211 *
5212 * Returns 0 on success, -EINVAL on error
5213 */
5214static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5215{
5216	int ret;
5217
5218	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5219		type, data_out);
5220	if (ret != HCMD_SUCCESS) {
5221		dd_dev_err(dd, "read idle message: type %d, err %d\n",
5222			(u32)type, ret);
5223		return -EINVAL;
5224	}
5225	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226	/* return only the payload as we already know the type */
5227	*data_out >>= IDLE_PAYLOAD_SHIFT;
5228	return 0;
5229}
5230
5231/*
5232 * Read an idle SMA message.  To be done in response to a notification from
5233 * the 8051.
5234 *
5235 * Returns 0 on success, -EINVAL on error
5236 */
5237static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5238{
5239	return read_idle_message(dd,
5240			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5241}
5242
5243/*
5244 * Send an idle LCB message.
5245 *
5246 * Returns 0 on success, -EINVAL on error
5247 */
5248static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5249{
5250	int ret;
5251
5252	dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254	if (ret != HCMD_SUCCESS) {
5255		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5256			data, ret);
5257		return -EINVAL;
5258	}
5259	return 0;
5260}
5261
5262/*
5263 * Send an idle SMA message.
5264 *
5265 * Returns 0 on success, -EINVAL on error
5266 */
5267int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5268{
5269	u64 data;
5270
5271	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273	return send_idle_message(dd, data);
5274}
5275
5276/*
5277 * Initialize the LCB then do a quick link up.  This may or may not be
5278 * in loopback.
5279 *
5280 * return 0 on success, -errno on error
5281 */
5282static int do_quick_linkup(struct hfi1_devdata *dd)
5283{
5284	u64 reg;
5285	unsigned long timeout;
5286	int ret;
5287
5288	lcb_shutdown(dd, 0);
5289
5290	if (loopback) {
5291		/* LCB_CFG_LOOPBACK.VAL = 2 */
5292		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293		write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5296	}
5297
5298	/* start the LCBs */
5299	/* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5301
5302	/* simulator only loopback steps */
5303	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304		/* LCB_CFG_RUN.EN = 1 */
5305		write_csr(dd, DC_LCB_CFG_RUN,
5306			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5307
5308		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309		timeout = jiffies + msecs_to_jiffies(10);
5310		while (1) {
5311			reg = read_csr(dd,
5312				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5313			if (reg)
5314				break;
5315			if (time_after(jiffies, timeout)) {
5316				dd_dev_err(dd,
5317					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
5318				return -ETIMEDOUT;
5319			}
5320			udelay(2);
5321		}
5322
5323		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5325	}
5326
5327	if (!loopback) {
5328		/*
5329		 * When doing quick linkup and not in loopback, both
5330		 * sides must be done with LCB set-up before either
5331		 * starts the quick linkup.  Put a delay here so that
5332		 * both sides can be started and have a chance to be
5333		 * done with LCB set up before resuming.
5334		 */
5335		dd_dev_err(dd,
5336			"Pausing for peer to be finished with LCB set up\n");
5337		msleep(5000);
5338		dd_dev_err(dd,
5339			"Continuing with quick linkup\n");
5340	}
5341
5342	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343	set_8051_lcb_access(dd);
5344
5345	/*
5346	 * State "quick" LinkUp request sets the physical link state to
5347	 * LinkUp without a verify capability sequence.
5348	 * This state is in simulator v37 and later.
5349	 */
5350	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351	if (ret != HCMD_SUCCESS) {
5352		dd_dev_err(dd,
5353			"%s: set physical link state to quick LinkUp failed with return %d\n",
5354			__func__, ret);
5355
5356		set_host_lcb_access(dd);
5357		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5358
5359		if (ret >= 0)
5360			ret = -EINVAL;
5361		return ret;
5362	}
5363
5364	return 0; /* success */
5365}
5366
5367/*
5368 * Set the SerDes to internal loopback mode.
5369 * Returns 0 on success, -errno on error.
5370 */
5371static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5372{
5373	int ret;
5374
5375	ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376	if (ret == HCMD_SUCCESS)
5377		return 0;
5378	dd_dev_err(dd,
5379		"Set physical link state to SerDes Loopback failed with return %d\n",
5380		ret);
5381	if (ret >= 0)
5382		ret = -EINVAL;
5383	return ret;
5384}
5385
5386/*
5387 * Do all special steps to set up loopback.
5388 */
5389static int init_loopback(struct hfi1_devdata *dd)
5390{
5391	dd_dev_info(dd, "Entering loopback mode\n");
5392
5393	/* all loopbacks should disable self GUID check */
5394	write_csr(dd, DC_DC8051_CFG_MODE,
5395		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5396
5397	/*
5398	 * The simulator has only one loopback option - LCB.  Switch
5399	 * to that option, which includes quick link up.
5400	 *
5401	 * Accept all valid loopback values.
5402	 */
5403	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404		&& (loopback == LOOPBACK_SERDES
5405			|| loopback == LOOPBACK_LCB
5406			|| loopback == LOOPBACK_CABLE)) {
5407		loopback = LOOPBACK_LCB;
5408		quick_linkup = 1;
5409		return 0;
5410	}
5411
5412	/* handle serdes loopback */
5413	if (loopback == LOOPBACK_SERDES) {
5414		/* internal serdes loopack needs quick linkup on RTL */
5415		if (dd->icode == ICODE_RTL_SILICON)
5416			quick_linkup = 1;
5417		return set_serdes_loopback_mode(dd);
5418	}
5419
5420	/* LCB loopback - handled at poll time */
5421	if (loopback == LOOPBACK_LCB) {
5422		quick_linkup = 1; /* LCB is always quick linkup */
5423
5424		/* not supported in emulation due to emulation RTL changes */
5425		if (dd->icode == ICODE_FPGA_EMULATION) {
5426			dd_dev_err(dd,
5427				"LCB loopback not supported in emulation\n");
5428			return -EINVAL;
5429		}
5430		return 0;
5431	}
5432
5433	/* external cable loopback requires no extra steps */
5434	if (loopback == LOOPBACK_CABLE)
5435		return 0;
5436
5437	dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5438	return -EINVAL;
5439}
5440
5441/*
5442 * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443 * used in the Verify Capability link width attribute.
5444 */
5445static u16 opa_to_vc_link_widths(u16 opa_widths)
5446{
5447	int i;
5448	u16 result = 0;
5449
5450	static const struct link_bits {
5451		u16 from;
5452		u16 to;
5453	} opa_link_xlate[] = {
5454		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5455		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5456		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5457		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5458	};
5459
5460	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461		if (opa_widths & opa_link_xlate[i].from)
5462			result |= opa_link_xlate[i].to;
5463	}
5464	return result;
5465}
5466
5467/*
5468 * Set link attributes before moving to polling.
5469 */
5470static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5471{
5472	struct hfi1_devdata *dd = ppd->dd;
5473	u8 enable_lane_tx;
5474	u8 tx_polarity_inversion;
5475	u8 rx_polarity_inversion;
5476	int ret;
5477
5478	/* reset our fabric serdes to clear any lingering problems */
5479	fabric_serdes_reset(dd);
5480
5481	/* set the local tx rate - need to read-modify-write */
5482	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483		&rx_polarity_inversion, &ppd->local_tx_rate);
5484	if (ret)
5485		goto set_local_link_attributes_fail;
5486
5487	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488		/* set the tx rate to the fastest enabled */
5489		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490			ppd->local_tx_rate = 1;
5491		else
5492			ppd->local_tx_rate = 0;
5493	} else {
5494		/* set the tx rate to all enabled */
5495		ppd->local_tx_rate = 0;
5496		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497			ppd->local_tx_rate |= 2;
5498		if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499			ppd->local_tx_rate |= 1;
5500	}
5501
5502	enable_lane_tx = 0xF; /* enable all four lanes */
5503	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504		     rx_polarity_inversion, ppd->local_tx_rate);
5505	if (ret != HCMD_SUCCESS)
5506		goto set_local_link_attributes_fail;
5507
5508	/*
5509	 * DC supports continuous updates.
5510	 */
5511	ret = write_vc_local_phy(dd, 0 /* no power management */,
5512				     1 /* continuous updates */);
5513	if (ret != HCMD_SUCCESS)
5514		goto set_local_link_attributes_fail;
5515
5516	/* z=1 in the next call: AU of 0 is not supported by the hardware */
5517	ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518				    ppd->port_crc_mode_enabled);
5519	if (ret != HCMD_SUCCESS)
5520		goto set_local_link_attributes_fail;
5521
5522	ret = write_vc_local_link_width(dd, 0, 0,
5523		     opa_to_vc_link_widths(ppd->link_width_enabled));
5524	if (ret != HCMD_SUCCESS)
5525		goto set_local_link_attributes_fail;
5526
5527	/* let peer know who we are */
5528	ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529	if (ret == HCMD_SUCCESS)
5530		return 0;
5531
5532set_local_link_attributes_fail:
5533	dd_dev_err(dd,
5534		"Failed to set local link attributes, return 0x%x\n",
5535		ret);
5536	return ret;
5537}
5538
5539/*
5540 * Call this to start the link.  Schedule a retry if the cable is not
5541 * present or if unable to start polling.  Do not do anything if the
5542 * link is disabled.  Returns 0 if link is disabled or moved to polling
5543 */
5544int start_link(struct hfi1_pportdata *ppd)
5545{
5546	if (!ppd->link_enabled) {
5547		dd_dev_info(ppd->dd,
5548			"%s: stopping link start because link is disabled\n",
5549			__func__);
5550		return 0;
5551	}
5552	if (!ppd->driver_link_ready) {
5553		dd_dev_info(ppd->dd,
5554			"%s: stopping link start because driver is not ready\n",
5555			__func__);
5556		return 0;
5557	}
5558
5559	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560			loopback == LOOPBACK_LCB ||
5561			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562		return set_link_state(ppd, HLS_DN_POLL);
5563
5564	dd_dev_info(ppd->dd,
5565		"%s: stopping link start because no cable is present\n",
5566		__func__);
5567	return -EAGAIN;
5568}
5569
5570static void reset_qsfp(struct hfi1_pportdata *ppd)
5571{
5572	struct hfi1_devdata *dd = ppd->dd;
5573	u64 mask, qsfp_mask;
5574
5575	mask = (u64)QSFP_HFI0_RESET_N;
5576	qsfp_mask = read_csr(dd,
5577		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5578	qsfp_mask |= mask;
5579	write_csr(dd,
5580		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5581		qsfp_mask);
5582
5583	qsfp_mask = read_csr(dd,
5584		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5585	qsfp_mask &= ~mask;
5586	write_csr(dd,
5587		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5588		qsfp_mask);
5589
5590	udelay(10);
5591
5592	qsfp_mask |= mask;
5593	write_csr(dd,
5594		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5595		qsfp_mask);
5596}
5597
5598static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599					u8 *qsfp_interrupt_status)
5600{
5601	struct hfi1_devdata *dd = ppd->dd;
5602
5603	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5605		dd_dev_info(dd,
5606			"%s: QSFP cable on fire\n",
5607			__func__);
5608
5609	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5611		dd_dev_info(dd,
5612			"%s: QSFP cable temperature too low\n",
5613			__func__);
5614
5615	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5617		dd_dev_info(dd,
5618			"%s: QSFP supply voltage too high\n",
5619			__func__);
5620
5621	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5623		dd_dev_info(dd,
5624			"%s: QSFP supply voltage too low\n",
5625			__func__);
5626
5627	/* Byte 2 is vendor specific */
5628
5629	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5631		dd_dev_info(dd,
5632			"%s: Cable RX channel 1/2 power too high\n",
5633			__func__);
5634
5635	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5637		dd_dev_info(dd,
5638			"%s: Cable RX channel 1/2 power too low\n",
5639			__func__);
5640
5641	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5643		dd_dev_info(dd,
5644			"%s: Cable RX channel 3/4 power too high\n",
5645			__func__);
5646
5647	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5649		dd_dev_info(dd,
5650			"%s: Cable RX channel 3/4 power too low\n",
5651			__func__);
5652
5653	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5655		dd_dev_info(dd,
5656			"%s: Cable TX channel 1/2 bias too high\n",
5657			__func__);
5658
5659	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5661		dd_dev_info(dd,
5662			"%s: Cable TX channel 1/2 bias too low\n",
5663			__func__);
5664
5665	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5667		dd_dev_info(dd,
5668			"%s: Cable TX channel 3/4 bias too high\n",
5669			__func__);
5670
5671	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5673		dd_dev_info(dd,
5674			"%s: Cable TX channel 3/4 bias too low\n",
5675			__func__);
5676
5677	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5679		dd_dev_info(dd,
5680			"%s: Cable TX channel 1/2 power too high\n",
5681			__func__);
5682
5683	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5685		dd_dev_info(dd,
5686			"%s: Cable TX channel 1/2 power too low\n",
5687			__func__);
5688
5689	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5691		dd_dev_info(dd,
5692			"%s: Cable TX channel 3/4 power too high\n",
5693			__func__);
5694
5695	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5697		dd_dev_info(dd,
5698			"%s: Cable TX channel 3/4 power too low\n",
5699			__func__);
5700
5701	/* Bytes 9-10 and 11-12 are reserved */
5702	/* Bytes 13-15 are vendor specific */
5703
5704	return 0;
5705}
5706
5707static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5708{
5709	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5710
5711	return 0;
5712}
5713
5714static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5715{
5716	struct hfi1_devdata *dd = ppd->dd;
5717	u8 qsfp_interrupt_status = 0;
5718
5719	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5720		!= 1) {
5721		dd_dev_info(dd,
5722			"%s: Failed to read status of QSFP module\n",
5723			__func__);
5724		return -EIO;
5725	}
5726
5727	/* We don't care about alarms & warnings with a non-functional INT_N */
5728	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729		do_pre_lni_host_behaviors(ppd);
5730
5731	return 0;
5732}
5733
5734/* This routine will only be scheduled if the QSFP module is present */
5735static void qsfp_event(struct work_struct *work)
5736{
5737	struct qsfp_data *qd;
5738	struct hfi1_pportdata *ppd;
5739	struct hfi1_devdata *dd;
5740
5741	qd = container_of(work, struct qsfp_data, qsfp_work);
5742	ppd = qd->ppd;
5743	dd = ppd->dd;
5744
5745	/* Sanity check */
5746	if (!qsfp_mod_present(ppd))
5747		return;
5748
5749	/*
5750	 * Turn DC back on after cables has been
5751	 * re-inserted. Up until now, the DC has been in
5752	 * reset to save power.
5753	 */
5754	dc_start(dd);
5755
5756	if (qd->cache_refresh_required) {
5757		msleep(3000);
5758		reset_qsfp(ppd);
5759
5760		/* Check for QSFP interrupt after t_init (SFF 8679)
5761		 * + extra
5762		 */
5763		msleep(3000);
5764		if (!qd->qsfp_interrupt_functional) {
5765			if (do_qsfp_intr_fallback(ppd) < 0)
5766				dd_dev_info(dd, "%s: QSFP fallback failed\n",
5767					__func__);
5768			ppd->driver_link_ready = 1;
5769			start_link(ppd);
5770		}
5771	}
5772
5773	if (qd->check_interrupt_flags) {
5774		u8 qsfp_interrupt_status[16] = {0,};
5775
5776		if (qsfp_read(ppd, dd->hfi1_id, 6,
5777			      &qsfp_interrupt_status[0], 16) != 16) {
5778			dd_dev_info(dd,
5779				"%s: Failed to read status of QSFP module\n",
5780				__func__);
5781		} else {
5782			unsigned long flags;
5783			u8 data_status;
5784
5785			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786			ppd->qsfp_info.check_interrupt_flags = 0;
5787			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5788								flags);
5789
5790			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5791				 != 1) {
5792				dd_dev_info(dd,
5793				"%s: Failed to read status of QSFP module\n",
5794					__func__);
5795			}
5796			if (!(data_status & QSFP_DATA_NOT_READY)) {
5797				do_pre_lni_host_behaviors(ppd);
5798				start_link(ppd);
5799			} else
5800				handle_qsfp_error_conditions(ppd,
5801						qsfp_interrupt_status);
5802		}
5803	}
5804}
5805
5806void init_qsfp(struct hfi1_pportdata *ppd)
5807{
5808	struct hfi1_devdata *dd = ppd->dd;
5809	u64 qsfp_mask;
5810
5811	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813		ppd->driver_link_ready = 1;
5814		return;
5815	}
5816
5817	ppd->qsfp_info.ppd = ppd;
5818	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5819
5820	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821	/* Clear current status to avoid spurious interrupts */
5822	write_csr(dd,
5823			dd->hfi1_id ?
5824				ASIC_QSFP2_CLEAR :
5825				ASIC_QSFP1_CLEAR,
5826		qsfp_mask);
5827
5828	/* Handle active low nature of INT_N and MODPRST_N pins */
5829	if (qsfp_mod_present(ppd))
5830		qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5831	write_csr(dd,
5832		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5833		  qsfp_mask);
5834
5835	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5837	write_csr(dd,
5838		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5839		qsfp_mask);
5840
5841	if (qsfp_mod_present(ppd)) {
5842		msleep(3000);
5843		reset_qsfp(ppd);
5844
5845		/* Check for QSFP interrupt after t_init (SFF 8679)
5846		 * + extra
5847		 */
5848		msleep(3000);
5849		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850			if (do_qsfp_intr_fallback(ppd) < 0)
5851				dd_dev_info(dd,
5852					"%s: QSFP fallback failed\n",
5853					__func__);
5854			ppd->driver_link_ready = 1;
5855		}
5856	}
5857}
5858
5859int bringup_serdes(struct hfi1_pportdata *ppd)
5860{
5861	struct hfi1_devdata *dd = ppd->dd;
5862	u64 guid;
5863	int ret;
5864
5865	if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866		add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5867
5868	guid = ppd->guid;
5869	if (!guid) {
5870		if (dd->base_guid)
5871			guid = dd->base_guid + ppd->port - 1;
5872		ppd->guid = guid;
5873	}
5874
5875	/* the link defaults to enabled */
5876	ppd->link_enabled = 1;
5877	/* Set linkinit_reason on power up per OPA spec */
5878	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5879
5880	if (loopback) {
5881		ret = init_loopback(dd);
5882		if (ret < 0)
5883			return ret;
5884	}
5885
5886	return start_link(ppd);
5887}
5888
5889void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5890{
5891	struct hfi1_devdata *dd = ppd->dd;
5892
5893	/*
5894	 * Shut down the link and keep it down.   First turn off that the
5895	 * driver wants to allow the link to be up (driver_link_ready).
5896	 * Then make sure the link is not automatically restarted
5897	 * (link_enabled).  Cancel any pending restart.  And finally
5898	 * go offline.
5899	 */
5900	ppd->driver_link_ready = 0;
5901	ppd->link_enabled = 0;
5902
5903	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904	  OPA_LINKDOWN_REASON_SMA_DISABLED);
5905	set_link_state(ppd, HLS_DN_OFFLINE);
5906
5907	/* disable the port */
5908	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909}
5910
5911static inline int init_cpu_counters(struct hfi1_devdata *dd)
5912{
5913	struct hfi1_pportdata *ppd;
5914	int i;
5915
5916	ppd = (struct hfi1_pportdata *)(dd + 1);
5917	for (i = 0; i < dd->num_pports; i++, ppd++) {
5918		ppd->ibport_data.rc_acks = NULL;
5919		ppd->ibport_data.rc_qacks = NULL;
5920		ppd->ibport_data.rc_acks = alloc_percpu(u64);
5921		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5922		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5923		if ((ppd->ibport_data.rc_acks == NULL) ||
5924		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
5925		    (ppd->ibport_data.rc_qacks == NULL))
5926			return -ENOMEM;
5927	}
5928
5929	return 0;
5930}
5931
5932static const char * const pt_names[] = {
5933	"expected",
5934	"eager",
5935	"invalid"
5936};
5937
5938static const char *pt_name(u32 type)
5939{
5940	return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5941}
5942
5943/*
5944 * index is the index into the receive array
5945 */
5946void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5947		  u32 type, unsigned long pa, u16 order)
5948{
5949	u64 reg;
5950	void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5951			      (dd->kregbase + RCV_ARRAY));
5952
5953	if (!(dd->flags & HFI1_PRESENT))
5954		goto done;
5955
5956	if (type == PT_INVALID) {
5957		pa = 0;
5958	} else if (type > PT_INVALID) {
5959		dd_dev_err(dd,
5960			"unexpected receive array type %u for index %u, not handled\n",
5961			type, index);
5962		goto done;
5963	}
5964
5965	hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5966		  pt_name(type), index, pa, (unsigned long)order);
5967
5968#define RT_ADDR_SHIFT 12	/* 4KB kernel address boundary */
5969	reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5970		| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5971		| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5972					<< RCV_ARRAY_RT_ADDR_SHIFT;
5973	writeq(reg, base + (index * 8));
5974
5975	if (type == PT_EAGER)
5976		/*
5977		 * Eager entries are written one-by-one so we have to push them
5978		 * after we write the entry.
5979		 */
5980		flush_wc();
5981done:
5982	return;
5983}
5984
5985void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5986{
5987	struct hfi1_devdata *dd = rcd->dd;
5988	u32 i;
5989
5990	/* this could be optimized */
5991	for (i = rcd->eager_base; i < rcd->eager_base +
5992		     rcd->egrbufs.alloced; i++)
5993		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5994
5995	for (i = rcd->expected_base;
5996			i < rcd->expected_base + rcd->expected_count; i++)
5997		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5998}
5999
6000int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6001			struct hfi1_ctxt_info *kinfo)
6002{
6003	kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6004		HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6005	return 0;
6006}
6007
6008struct hfi1_message_header *hfi1_get_msgheader(
6009				struct hfi1_devdata *dd, __le32 *rhf_addr)
6010{
6011	u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6012
6013	return (struct hfi1_message_header *)
6014		(rhf_addr - dd->rhf_offset + offset);
6015}
6016
6017static const char * const ib_cfg_name_strings[] = {
6018	"HFI1_IB_CFG_LIDLMC",
6019	"HFI1_IB_CFG_LWID_DG_ENB",
6020	"HFI1_IB_CFG_LWID_ENB",
6021	"HFI1_IB_CFG_LWID",
6022	"HFI1_IB_CFG_SPD_ENB",
6023	"HFI1_IB_CFG_SPD",
6024	"HFI1_IB_CFG_RXPOL_ENB",
6025	"HFI1_IB_CFG_LREV_ENB",
6026	"HFI1_IB_CFG_LINKLATENCY",
6027	"HFI1_IB_CFG_HRTBT",
6028	"HFI1_IB_CFG_OP_VLS",
6029	"HFI1_IB_CFG_VL_HIGH_CAP",
6030	"HFI1_IB_CFG_VL_LOW_CAP",
6031	"HFI1_IB_CFG_OVERRUN_THRESH",
6032	"HFI1_IB_CFG_PHYERR_THRESH",
6033	"HFI1_IB_CFG_LINKDEFAULT",
6034	"HFI1_IB_CFG_PKEYS",
6035	"HFI1_IB_CFG_MTU",
6036	"HFI1_IB_CFG_LSTATE",
6037	"HFI1_IB_CFG_VL_HIGH_LIMIT",
6038	"HFI1_IB_CFG_PMA_TICKS",
6039	"HFI1_IB_CFG_PORT"
6040};
6041
6042static const char *ib_cfg_name(int which)
6043{
6044	if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6045		return "invalid";
6046	return ib_cfg_name_strings[which];
6047}
6048
6049int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6050{
6051	struct hfi1_devdata *dd = ppd->dd;
6052	int val = 0;
6053
6054	switch (which) {
6055	case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6056		val = ppd->link_width_enabled;
6057		break;
6058	case HFI1_IB_CFG_LWID: /* currently active Link-width */
6059		val = ppd->link_width_active;
6060		break;
6061	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6062		val = ppd->link_speed_enabled;
6063		break;
6064	case HFI1_IB_CFG_SPD: /* current Link speed */
6065		val = ppd->link_speed_active;
6066		break;
6067
6068	case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6069	case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6070	case HFI1_IB_CFG_LINKLATENCY:
6071		goto unimplemented;
6072
6073	case HFI1_IB_CFG_OP_VLS:
6074		val = ppd->vls_operational;
6075		break;
6076	case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6077		val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6078		break;
6079	case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6080		val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6081		break;
6082	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6083		val = ppd->overrun_threshold;
6084		break;
6085	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6086		val = ppd->phy_error_threshold;
6087		break;
6088	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6089		val = dd->link_default;
6090		break;
6091
6092	case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6093	case HFI1_IB_CFG_PMA_TICKS:
6094	default:
6095unimplemented:
6096		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6097			dd_dev_info(
6098				dd,
6099				"%s: which %s: not implemented\n",
6100				__func__,
6101				ib_cfg_name(which));
6102		break;
6103	}
6104
6105	return val;
6106}
6107
6108/*
6109 * The largest MAD packet size.
6110 */
6111#define MAX_MAD_PACKET 2048
6112
6113/*
6114 * Return the maximum header bytes that can go on the _wire_
6115 * for this device. This count includes the ICRC which is
6116 * not part of the packet held in memory but it is appended
6117 * by the HW.
6118 * This is dependent on the device's receive header entry size.
6119 * HFI allows this to be set per-receive context, but the
6120 * driver presently enforces a global value.
6121 */
6122u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6123{
6124	/*
6125	 * The maximum non-payload (MTU) bytes in LRH.PktLen are
6126	 * the Receive Header Entry Size minus the PBC (or RHF) size
6127	 * plus one DW for the ICRC appended by HW.
6128	 *
6129	 * dd->rcd[0].rcvhdrqentsize is in DW.
6130	 * We use rcd[0] as all context will have the same value. Also,
6131	 * the first kernel context would have been allocated by now so
6132	 * we are guaranteed a valid value.
6133	 */
6134	return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6135}
6136
6137/*
6138 * Set Send Length
6139 * @ppd - per port data
6140 *
6141 * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6142 * registers compare against LRH.PktLen, so use the max bytes included
6143 * in the LRH.
6144 *
6145 * This routine changes all VL values except VL15, which it maintains at
6146 * the same value.
6147 */
6148static void set_send_length(struct hfi1_pportdata *ppd)
6149{
6150	struct hfi1_devdata *dd = ppd->dd;
6151	u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6152	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6153			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6154		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6155	int i;
6156
6157	for (i = 0; i < ppd->vls_supported; i++) {
6158		if (dd->vld[i].mtu > maxvlmtu)
6159			maxvlmtu = dd->vld[i].mtu;
6160		if (i <= 3)
6161			len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6162				 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6163				((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6164		else
6165			len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6166				 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6167				((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6168	}
6169	write_csr(dd, SEND_LEN_CHECK0, len1);
6170	write_csr(dd, SEND_LEN_CHECK1, len2);
6171	/* adjust kernel credit return thresholds based on new MTUs */
6172	/* all kernel receive contexts have the same hdrqentsize */
6173	for (i = 0; i < ppd->vls_supported; i++) {
6174		sc_set_cr_threshold(dd->vld[i].sc,
6175			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6176				dd->rcd[0]->rcvhdrqentsize));
6177	}
6178	sc_set_cr_threshold(dd->vld[15].sc,
6179		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6180			dd->rcd[0]->rcvhdrqentsize));
6181
6182	/* Adjust maximum MTU for the port in DC */
6183	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6184		(ilog2(maxvlmtu >> 8) + 1);
6185	len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6186	len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6187	len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6188		DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6189	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6190}
6191
6192static void set_lidlmc(struct hfi1_pportdata *ppd)
6193{
6194	int i;
6195	u64 sreg = 0;
6196	struct hfi1_devdata *dd = ppd->dd;
6197	u32 mask = ~((1U << ppd->lmc) - 1);
6198	u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6199
6200	if (dd->hfi1_snoop.mode_flag)
6201		dd_dev_info(dd, "Set lid/lmc while snooping");
6202
6203	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6204		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6205	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6206			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6207	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6208			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6209	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6210
6211	/*
6212	 * Iterate over all the send contexts and set their SLID check
6213	 */
6214	sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6215			SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6216	       (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6217			SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6218
6219	for (i = 0; i < dd->chip_send_contexts; i++) {
6220		hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6221			  i, (u32)sreg);
6222		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6223	}
6224
6225	/* Now we have to do the same thing for the sdma engines */
6226	sdma_update_lmc(dd, mask, ppd->lid);
6227}
6228
6229static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6230{
6231	unsigned long timeout;
6232	u32 curr_state;
6233
6234	timeout = jiffies + msecs_to_jiffies(msecs);
6235	while (1) {
6236		curr_state = read_physical_state(dd);
6237		if (curr_state == state)
6238			break;
6239		if (time_after(jiffies, timeout)) {
6240			dd_dev_err(dd,
6241				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6242				state, curr_state);
6243			return -ETIMEDOUT;
6244		}
6245		usleep_range(1950, 2050); /* sleep 2ms-ish */
6246	}
6247
6248	return 0;
6249}
6250
6251/*
6252 * Helper for set_link_state().  Do not call except from that routine.
6253 * Expects ppd->hls_mutex to be held.
6254 *
6255 * @rem_reason value to be sent to the neighbor
6256 *
6257 * LinkDownReasons only set if transition succeeds.
6258 */
6259static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6260{
6261	struct hfi1_devdata *dd = ppd->dd;
6262	u32 pstate, previous_state;
6263	u32 last_local_state;
6264	u32 last_remote_state;
6265	int ret;
6266	int do_transition;
6267	int do_wait;
6268
6269	previous_state = ppd->host_link_state;
6270	ppd->host_link_state = HLS_GOING_OFFLINE;
6271	pstate = read_physical_state(dd);
6272	if (pstate == PLS_OFFLINE) {
6273		do_transition = 0;	/* in right state */
6274		do_wait = 0;		/* ...no need to wait */
6275	} else if ((pstate & 0xff) == PLS_OFFLINE) {
6276		do_transition = 0;	/* in an offline transient state */
6277		do_wait = 1;		/* ...wait for it to settle */
6278	} else {
6279		do_transition = 1;	/* need to move to offline */
6280		do_wait = 1;		/* ...will need to wait */
6281	}
6282
6283	if (do_transition) {
6284		ret = set_physical_link_state(dd,
6285			PLS_OFFLINE | (rem_reason << 8));
6286
6287		if (ret != HCMD_SUCCESS) {
6288			dd_dev_err(dd,
6289				"Failed to transition to Offline link state, return %d\n",
6290				ret);
6291			return -EINVAL;
6292		}
6293		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6294			ppd->offline_disabled_reason =
6295			OPA_LINKDOWN_REASON_TRANSIENT;
6296	}
6297
6298	if (do_wait) {
6299		/* it can take a while for the link to go down */
6300		ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6301		if (ret < 0)
6302			return ret;
6303	}
6304
6305	/* make sure the logical state is also down */
6306	wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6307
6308	/*
6309	 * Now in charge of LCB - must be after the physical state is
6310	 * offline.quiet and before host_link_state is changed.
6311	 */
6312	set_host_lcb_access(dd);
6313	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6314	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6315
6316	/*
6317	 * The LNI has a mandatory wait time after the physical state
6318	 * moves to Offline.Quiet.  The wait time may be different
6319	 * depending on how the link went down.  The 8051 firmware
6320	 * will observe the needed wait time and only move to ready
6321	 * when that is completed.  The largest of the quiet timeouts
6322	 * is 2.5s, so wait that long and then a bit more.
6323	 */
6324	ret = wait_fm_ready(dd, 3000);
6325	if (ret) {
6326		dd_dev_err(dd,
6327			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6328		/* state is really offline, so make it so */
6329		ppd->host_link_state = HLS_DN_OFFLINE;
6330		return ret;
6331	}
6332
6333	/*
6334	 * The state is now offline and the 8051 is ready to accept host
6335	 * requests.
6336	 *	- change our state
6337	 *	- notify others if we were previously in a linkup state
6338	 */
6339	ppd->host_link_state = HLS_DN_OFFLINE;
6340	if (previous_state & HLS_UP) {
6341		/* went down while link was up */
6342		handle_linkup_change(dd, 0);
6343	} else if (previous_state
6344			& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6345		/* went down while attempting link up */
6346		/* byte 1 of last_*_state is the failure reason */
6347		read_last_local_state(dd, &last_local_state);
6348		read_last_remote_state(dd, &last_remote_state);
6349		dd_dev_err(dd,
6350			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
6351			last_local_state, last_remote_state);
6352	}
6353
6354	/* the active link width (downgrade) is 0 on link down */
6355	ppd->link_width_active = 0;
6356	ppd->link_width_downgrade_tx_active = 0;
6357	ppd->link_width_downgrade_rx_active = 0;
6358	ppd->current_egress_rate = 0;
6359	return 0;
6360}
6361
6362/* return the link state name */
6363static const char *link_state_name(u32 state)
6364{
6365	const char *name;
6366	int n = ilog2(state);
6367	static const char * const names[] = {
6368		[__HLS_UP_INIT_BP]	 = "INIT",
6369		[__HLS_UP_ARMED_BP]	 = "ARMED",
6370		[__HLS_UP_ACTIVE_BP]	 = "ACTIVE",
6371		[__HLS_DN_DOWNDEF_BP]	 = "DOWNDEF",
6372		[__HLS_DN_POLL_BP]	 = "POLL",
6373		[__HLS_DN_DISABLE_BP]	 = "DISABLE",
6374		[__HLS_DN_OFFLINE_BP]	 = "OFFLINE",
6375		[__HLS_VERIFY_CAP_BP]	 = "VERIFY_CAP",
6376		[__HLS_GOING_UP_BP]	 = "GOING_UP",
6377		[__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6378		[__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6379	};
6380
6381	name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6382	return name ? name : "unknown";
6383}
6384
6385/* return the link state reason name */
6386static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6387{
6388	if (state == HLS_UP_INIT) {
6389		switch (ppd->linkinit_reason) {
6390		case OPA_LINKINIT_REASON_LINKUP:
6391			return "(LINKUP)";
6392		case OPA_LINKINIT_REASON_FLAPPING:
6393			return "(FLAPPING)";
6394		case OPA_LINKINIT_OUTSIDE_POLICY:
6395			return "(OUTSIDE_POLICY)";
6396		case OPA_LINKINIT_QUARANTINED:
6397			return "(QUARANTINED)";
6398		case OPA_LINKINIT_INSUFIC_CAPABILITY:
6399			return "(INSUFIC_CAPABILITY)";
6400		default:
6401			break;
6402		}
6403	}
6404	return "";
6405}
6406
6407/*
6408 * driver_physical_state - convert the driver's notion of a port's
6409 * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6410 * Return -1 (converted to a u32) to indicate error.
6411 */
6412u32 driver_physical_state(struct hfi1_pportdata *ppd)
6413{
6414	switch (ppd->host_link_state) {
6415	case HLS_UP_INIT:
6416	case HLS_UP_ARMED:
6417	case HLS_UP_ACTIVE:
6418		return IB_PORTPHYSSTATE_LINKUP;
6419	case HLS_DN_POLL:
6420		return IB_PORTPHYSSTATE_POLLING;
6421	case HLS_DN_DISABLE:
6422		return IB_PORTPHYSSTATE_DISABLED;
6423	case HLS_DN_OFFLINE:
6424		return OPA_PORTPHYSSTATE_OFFLINE;
6425	case HLS_VERIFY_CAP:
6426		return IB_PORTPHYSSTATE_POLLING;
6427	case HLS_GOING_UP:
6428		return IB_PORTPHYSSTATE_POLLING;
6429	case HLS_GOING_OFFLINE:
6430		return OPA_PORTPHYSSTATE_OFFLINE;
6431	case HLS_LINK_COOLDOWN:
6432		return OPA_PORTPHYSSTATE_OFFLINE;
6433	case HLS_DN_DOWNDEF:
6434	default:
6435		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6436			   ppd->host_link_state);
6437		return  -1;
6438	}
6439}
6440
6441/*
6442 * driver_logical_state - convert the driver's notion of a port's
6443 * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6444 * (converted to a u32) to indicate error.
6445 */
6446u32 driver_logical_state(struct hfi1_pportdata *ppd)
6447{
6448	if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6449		return IB_PORT_DOWN;
6450
6451	switch (ppd->host_link_state & HLS_UP) {
6452	case HLS_UP_INIT:
6453		return IB_PORT_INIT;
6454	case HLS_UP_ARMED:
6455		return IB_PORT_ARMED;
6456	case HLS_UP_ACTIVE:
6457		return IB_PORT_ACTIVE;
6458	default:
6459		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6460			   ppd->host_link_state);
6461	return -1;
6462	}
6463}
6464
6465void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6466			  u8 neigh_reason, u8 rem_reason)
6467{
6468	if (ppd->local_link_down_reason.latest == 0 &&
6469	    ppd->neigh_link_down_reason.latest == 0) {
6470		ppd->local_link_down_reason.latest = lcl_reason;
6471		ppd->neigh_link_down_reason.latest = neigh_reason;
6472		ppd->remote_link_down_reason = rem_reason;
6473	}
6474}
6475
6476/*
6477 * Change the physical and/or logical link state.
6478 *
6479 * Do not call this routine while inside an interrupt.  It contains
6480 * calls to routines that can take multiple seconds to finish.
6481 *
6482 * Returns 0 on success, -errno on failure.
6483 */
6484int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6485{
6486	struct hfi1_devdata *dd = ppd->dd;
6487	struct ib_event event = {.device = NULL};
6488	int ret1, ret = 0;
6489	int was_up, is_down;
6490	int orig_new_state, poll_bounce;
6491
6492	mutex_lock(&ppd->hls_lock);
6493
6494	orig_new_state = state;
6495	if (state == HLS_DN_DOWNDEF)
6496		state = dd->link_default;
6497
6498	/* interpret poll -> poll as a link bounce */
6499	poll_bounce = ppd->host_link_state == HLS_DN_POLL
6500				&& state == HLS_DN_POLL;
6501
6502	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6503		link_state_name(ppd->host_link_state),
6504		link_state_name(orig_new_state),
6505		poll_bounce ? "(bounce) " : "",
6506		link_state_reason_name(ppd, state));
6507
6508	was_up = !!(ppd->host_link_state & HLS_UP);
6509
6510	/*
6511	 * If we're going to a (HLS_*) link state that implies the logical
6512	 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6513	 * reset is_sm_config_started to 0.
6514	 */
6515	if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6516		ppd->is_sm_config_started = 0;
6517
6518	/*
6519	 * Do nothing if the states match.  Let a poll to poll link bounce
6520	 * go through.
6521	 */
6522	if (ppd->host_link_state == state && !poll_bounce)
6523		goto done;
6524
6525	switch (state) {
6526	case HLS_UP_INIT:
6527		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6528			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6529			/*
6530			 * Quick link up jumps from polling to here.
6531			 *
6532			 * Whether in normal or loopback mode, the
6533			 * simulator jumps from polling to link up.
6534			 * Accept that here.
6535			 */
6536			/* OK */;
6537		} else if (ppd->host_link_state != HLS_GOING_UP) {
6538			goto unexpected;
6539		}
6540
6541		ppd->host_link_state = HLS_UP_INIT;
6542		ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6543		if (ret) {
6544			/* logical state didn't change, stay at going_up */
6545			ppd->host_link_state = HLS_GOING_UP;
6546			dd_dev_err(dd,
6547				"%s: logical state did not change to INIT\n",
6548				__func__);
6549		} else {
6550			/* clear old transient LINKINIT_REASON code */
6551			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6552				ppd->linkinit_reason =
6553					OPA_LINKINIT_REASON_LINKUP;
6554
6555			/* enable the port */
6556			add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6557
6558			handle_linkup_change(dd, 1);
6559		}
6560		break;
6561	case HLS_UP_ARMED:
6562		if (ppd->host_link_state != HLS_UP_INIT)
6563			goto unexpected;
6564
6565		ppd->host_link_state = HLS_UP_ARMED;
6566		set_logical_state(dd, LSTATE_ARMED);
6567		ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6568		if (ret) {
6569			/* logical state didn't change, stay at init */
6570			ppd->host_link_state = HLS_UP_INIT;
6571			dd_dev_err(dd,
6572				"%s: logical state did not change to ARMED\n",
6573				__func__);
6574		}
6575		/*
6576		 * The simulator does not currently implement SMA messages,
6577		 * so neighbor_normal is not set.  Set it here when we first
6578		 * move to Armed.
6579		 */
6580		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6581			ppd->neighbor_normal = 1;
6582		break;
6583	case HLS_UP_ACTIVE:
6584		if (ppd->host_link_state != HLS_UP_ARMED)
6585			goto unexpected;
6586
6587		ppd->host_link_state = HLS_UP_ACTIVE;
6588		set_logical_state(dd, LSTATE_ACTIVE);
6589		ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6590		if (ret) {
6591			/* logical state didn't change, stay at armed */
6592			ppd->host_link_state = HLS_UP_ARMED;
6593			dd_dev_err(dd,
6594				"%s: logical state did not change to ACTIVE\n",
6595				__func__);
6596		} else {
6597
6598			/* tell all engines to go running */
6599			sdma_all_running(dd);
6600
6601			/* Signal the IB layer that the port has went active */
6602			event.device = &dd->verbs_dev.ibdev;
6603			event.element.port_num = ppd->port;
6604			event.event = IB_EVENT_PORT_ACTIVE;
6605		}
6606		break;
6607	case HLS_DN_POLL:
6608		if ((ppd->host_link_state == HLS_DN_DISABLE ||
6609		     ppd->host_link_state == HLS_DN_OFFLINE) &&
6610		    dd->dc_shutdown)
6611			dc_start(dd);
6612		/* Hand LED control to the DC */
6613		write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6614
6615		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6616			u8 tmp = ppd->link_enabled;
6617
6618			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6619			if (ret) {
6620				ppd->link_enabled = tmp;
6621				break;
6622			}
6623			ppd->remote_link_down_reason = 0;
6624
6625			if (ppd->driver_link_ready)
6626				ppd->link_enabled = 1;
6627		}
6628
6629		ret = set_local_link_attributes(ppd);
6630		if (ret)
6631			break;
6632
6633		ppd->port_error_action = 0;
6634		ppd->host_link_state = HLS_DN_POLL;
6635
6636		if (quick_linkup) {
6637			/* quick linkup does not go into polling */
6638			ret = do_quick_linkup(dd);
6639		} else {
6640			ret1 = set_physical_link_state(dd, PLS_POLLING);
6641			if (ret1 != HCMD_SUCCESS) {
6642				dd_dev_err(dd,
6643					"Failed to transition to Polling link state, return 0x%x\n",
6644					ret1);
6645				ret = -EINVAL;
6646			}
6647		}
6648		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6649		/*
6650		 * If an error occurred above, go back to offline.  The
6651		 * caller may reschedule another attempt.
6652		 */
6653		if (ret)
6654			goto_offline(ppd, 0);
6655		break;
6656	case HLS_DN_DISABLE:
6657		/* link is disabled */
6658		ppd->link_enabled = 0;
6659
6660		/* allow any state to transition to disabled */
6661
6662		/* must transition to offline first */
6663		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6664			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6665			if (ret)
6666				break;
6667			ppd->remote_link_down_reason = 0;
6668		}
6669
6670		ret1 = set_physical_link_state(dd, PLS_DISABLED);
6671		if (ret1 != HCMD_SUCCESS) {
6672			dd_dev_err(dd,
6673				"Failed to transition to Disabled link state, return 0x%x\n",
6674				ret1);
6675			ret = -EINVAL;
6676			break;
6677		}
6678		ppd->host_link_state = HLS_DN_DISABLE;
6679		dc_shutdown(dd);
6680		break;
6681	case HLS_DN_OFFLINE:
6682		if (ppd->host_link_state == HLS_DN_DISABLE)
6683			dc_start(dd);
6684
6685		/* allow any state to transition to offline */
6686		ret = goto_offline(ppd, ppd->remote_link_down_reason);
6687		if (!ret)
6688			ppd->remote_link_down_reason = 0;
6689		break;
6690	case HLS_VERIFY_CAP:
6691		if (ppd->host_link_state != HLS_DN_POLL)
6692			goto unexpected;
6693		ppd->host_link_state = HLS_VERIFY_CAP;
6694		break;
6695	case HLS_GOING_UP:
6696		if (ppd->host_link_state != HLS_VERIFY_CAP)
6697			goto unexpected;
6698
6699		ret1 = set_physical_link_state(dd, PLS_LINKUP);
6700		if (ret1 != HCMD_SUCCESS) {
6701			dd_dev_err(dd,
6702				"Failed to transition to link up state, return 0x%x\n",
6703				ret1);
6704			ret = -EINVAL;
6705			break;
6706		}
6707		ppd->host_link_state = HLS_GOING_UP;
6708		break;
6709
6710	case HLS_GOING_OFFLINE:		/* transient within goto_offline() */
6711	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
6712	default:
6713		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6714			__func__, state);
6715		ret = -EINVAL;
6716		break;
6717	}
6718
6719	is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6720			HLS_DN_DISABLE | HLS_DN_OFFLINE));
6721
6722	if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6723	    ppd->neigh_link_down_reason.sma == 0) {
6724		ppd->local_link_down_reason.sma =
6725		  ppd->local_link_down_reason.latest;
6726		ppd->neigh_link_down_reason.sma =
6727		  ppd->neigh_link_down_reason.latest;
6728	}
6729
6730	goto done;
6731
6732unexpected:
6733	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6734		__func__, link_state_name(ppd->host_link_state),
6735		link_state_name(state));
6736	ret = -EINVAL;
6737
6738done:
6739	mutex_unlock(&ppd->hls_lock);
6740
6741	if (event.device)
6742		ib_dispatch_event(&event);
6743
6744	return ret;
6745}
6746
6747int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6748{
6749	u64 reg;
6750	int ret = 0;
6751
6752	switch (which) {
6753	case HFI1_IB_CFG_LIDLMC:
6754		set_lidlmc(ppd);
6755		break;
6756	case HFI1_IB_CFG_VL_HIGH_LIMIT:
6757		/*
6758		 * The VL Arbitrator high limit is sent in units of 4k
6759		 * bytes, while HFI stores it in units of 64 bytes.
6760		 */
6761		val *= 4096/64;
6762		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6763			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6764		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6765		break;
6766	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6767		/* HFI only supports POLL as the default link down state */
6768		if (val != HLS_DN_POLL)
6769			ret = -EINVAL;
6770		break;
6771	case HFI1_IB_CFG_OP_VLS:
6772		if (ppd->vls_operational != val) {
6773			ppd->vls_operational = val;
6774			if (!ppd->port)
6775				ret = -EINVAL;
6776			else
6777				ret = sdma_map_init(
6778					ppd->dd,
6779					ppd->port - 1,
6780					val,
6781					NULL);
6782		}
6783		break;
6784	/*
6785	 * For link width, link width downgrade, and speed enable, always AND
6786	 * the setting with what is actually supported.  This has two benefits.
6787	 * First, enabled can't have unsupported values, no matter what the
6788	 * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6789	 * "fill in with your supported value" have all the bits in the
6790	 * field set, so simply ANDing with supported has the desired result.
6791	 */
6792	case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6793		ppd->link_width_enabled = val & ppd->link_width_supported;
6794		break;
6795	case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6796		ppd->link_width_downgrade_enabled =
6797				val & ppd->link_width_downgrade_supported;
6798		break;
6799	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6800		ppd->link_speed_enabled = val & ppd->link_speed_supported;
6801		break;
6802	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6803		/*
6804		 * HFI does not follow IB specs, save this value
6805		 * so we can report it, if asked.
6806		 */
6807		ppd->overrun_threshold = val;
6808		break;
6809	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6810		/*
6811		 * HFI does not follow IB specs, save this value
6812		 * so we can report it, if asked.
6813		 */
6814		ppd->phy_error_threshold = val;
6815		break;
6816
6817	case HFI1_IB_CFG_MTU:
6818		set_send_length(ppd);
6819		break;
6820
6821	case HFI1_IB_CFG_PKEYS:
6822		if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6823			set_partition_keys(ppd);
6824		break;
6825
6826	default:
6827		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6828			dd_dev_info(ppd->dd,
6829			  "%s: which %s, val 0x%x: not implemented\n",
6830			  __func__, ib_cfg_name(which), val);
6831		break;
6832	}
6833	return ret;
6834}
6835
6836/* begin functions related to vl arbitration table caching */
6837static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6838{
6839	int i;
6840
6841	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6842			VL_ARB_LOW_PRIO_TABLE_SIZE);
6843	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6844			VL_ARB_HIGH_PRIO_TABLE_SIZE);
6845
6846	/*
6847	 * Note that we always return values directly from the
6848	 * 'vl_arb_cache' (and do no CSR reads) in response to a
6849	 * 'Get(VLArbTable)'. This is obviously correct after a
6850	 * 'Set(VLArbTable)', since the cache will then be up to
6851	 * date. But it's also correct prior to any 'Set(VLArbTable)'
6852	 * since then both the cache, and the relevant h/w registers
6853	 * will be zeroed.
6854	 */
6855
6856	for (i = 0; i < MAX_PRIO_TABLE; i++)
6857		spin_lock_init(&ppd->vl_arb_cache[i].lock);
6858}
6859
6860/*
6861 * vl_arb_lock_cache
6862 *
6863 * All other vl_arb_* functions should be called only after locking
6864 * the cache.
6865 */
6866static inline struct vl_arb_cache *
6867vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6868{
6869	if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6870		return NULL;
6871	spin_lock(&ppd->vl_arb_cache[idx].lock);
6872	return &ppd->vl_arb_cache[idx];
6873}
6874
6875static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6876{
6877	spin_unlock(&ppd->vl_arb_cache[idx].lock);
6878}
6879
6880static void vl_arb_get_cache(struct vl_arb_cache *cache,
6881			     struct ib_vl_weight_elem *vl)
6882{
6883	memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6884}
6885
6886static void vl_arb_set_cache(struct vl_arb_cache *cache,
6887			     struct ib_vl_weight_elem *vl)
6888{
6889	memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6890}
6891
6892static int vl_arb_match_cache(struct vl_arb_cache *cache,
6893			      struct ib_vl_weight_elem *vl)
6894{
6895	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6896}
6897/* end functions related to vl arbitration table caching */
6898
6899static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6900			  u32 size, struct ib_vl_weight_elem *vl)
6901{
6902	struct hfi1_devdata *dd = ppd->dd;
6903	u64 reg;
6904	unsigned int i, is_up = 0;
6905	int drain, ret = 0;
6906
6907	mutex_lock(&ppd->hls_lock);
6908
6909	if (ppd->host_link_state & HLS_UP)
6910		is_up = 1;
6911
6912	drain = !is_ax(dd) && is_up;
6913
6914	if (drain)
6915		/*
6916		 * Before adjusting VL arbitration weights, empty per-VL
6917		 * FIFOs, otherwise a packet whose VL weight is being
6918		 * set to 0 could get stuck in a FIFO with no chance to
6919		 * egress.
6920		 */
6921		ret = stop_drain_data_vls(dd);
6922
6923	if (ret) {
6924		dd_dev_err(
6925			dd,
6926			"%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6927			__func__);
6928		goto err;
6929	}
6930
6931	for (i = 0; i < size; i++, vl++) {
6932		/*
6933		 * NOTE: The low priority shift and mask are used here, but
6934		 * they are the same for both the low and high registers.
6935		 */
6936		reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6937				<< SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6938		      | (((u64)vl->weight
6939				& SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6940				<< SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6941		write_csr(dd, target + (i * 8), reg);
6942	}
6943	pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6944
6945	if (drain)
6946		open_fill_data_vls(dd); /* reopen all VLs */
6947
6948err:
6949	mutex_unlock(&ppd->hls_lock);
6950
6951	return ret;
6952}
6953
6954/*
6955 * Read one credit merge VL register.
6956 */
6957static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6958			   struct vl_limit *vll)
6959{
6960	u64 reg = read_csr(dd, csr);
6961
6962	vll->dedicated = cpu_to_be16(
6963		(reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6964		& SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6965	vll->shared = cpu_to_be16(
6966		(reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6967		& SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6968}
6969
6970/*
6971 * Read the current credit merge limits.
6972 */
6973static int get_buffer_control(struct hfi1_devdata *dd,
6974			      struct buffer_control *bc, u16 *overall_limit)
6975{
6976	u64 reg;
6977	int i;
6978
6979	/* not all entries are filled in */
6980	memset(bc, 0, sizeof(*bc));
6981
6982	/* OPA and HFI have a 1-1 mapping */
6983	for (i = 0; i < TXE_NUM_DATA_VL; i++)
6984		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6985
6986	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6987	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6988
6989	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6990	bc->overall_shared_limit = cpu_to_be16(
6991		(reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6992		& SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6993	if (overall_limit)
6994		*overall_limit = (reg
6995			>> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6996			& SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6997	return sizeof(struct buffer_control);
6998}
6999
7000static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7001{
7002	u64 reg;
7003	int i;
7004
7005	/* each register contains 16 SC->VLnt mappings, 4 bits each */
7006	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7007	for (i = 0; i < sizeof(u64); i++) {
7008		u8 byte = *(((u8 *)&reg) + i);
7009
7010		dp->vlnt[2 * i] = byte & 0xf;
7011		dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7012	}
7013
7014	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7015	for (i = 0; i < sizeof(u64); i++) {
7016		u8 byte = *(((u8 *)&reg) + i);
7017
7018		dp->vlnt[16 + (2 * i)] = byte & 0xf;
7019		dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7020	}
7021	return sizeof(struct sc2vlnt);
7022}
7023
7024static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7025			      struct ib_vl_weight_elem *vl)
7026{
7027	unsigned int i;
7028
7029	for (i = 0; i < nelems; i++, vl++) {
7030		vl->vl = 0xf;
7031		vl->weight = 0;
7032	}
7033}
7034
7035static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7036{
7037	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7038		DC_SC_VL_VAL(15_0,
7039		0, dp->vlnt[0] & 0xf,
7040		1, dp->vlnt[1] & 0xf,
7041		2, dp->vlnt[2] & 0xf,
7042		3, dp->vlnt[3] & 0xf,
7043		4, dp->vlnt[4] & 0xf,
7044		5, dp->vlnt[5] & 0xf,
7045		6, dp->vlnt[6] & 0xf,
7046		7, dp->vlnt[7] & 0xf,
7047		8, dp->vlnt[8] & 0xf,
7048		9, dp->vlnt[9] & 0xf,
7049		10, dp->vlnt[10] & 0xf,
7050		11, dp->vlnt[11] & 0xf,
7051		12, dp->vlnt[12] & 0xf,
7052		13, dp->vlnt[13] & 0xf,
7053		14, dp->vlnt[14] & 0xf,
7054		15, dp->vlnt[15] & 0xf));
7055	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7056		DC_SC_VL_VAL(31_16,
7057		16, dp->vlnt[16] & 0xf,
7058		17, dp->vlnt[17] & 0xf,
7059		18, dp->vlnt[18] & 0xf,
7060		19, dp->vlnt[19] & 0xf,
7061		20, dp->vlnt[20] & 0xf,
7062		21, dp->vlnt[21] & 0xf,
7063		22, dp->vlnt[22] & 0xf,
7064		23, dp->vlnt[23] & 0xf,
7065		24, dp->vlnt[24] & 0xf,
7066		25, dp->vlnt[25] & 0xf,
7067		26, dp->vlnt[26] & 0xf,
7068		27, dp->vlnt[27] & 0xf,
7069		28, dp->vlnt[28] & 0xf,
7070		29, dp->vlnt[29] & 0xf,
7071		30, dp->vlnt[30] & 0xf,
7072		31, dp->vlnt[31] & 0xf));
7073}
7074
7075static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7076			u16 limit)
7077{
7078	if (limit != 0)
7079		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7080			what, (int)limit, idx);
7081}
7082
7083/* change only the shared limit portion of SendCmGLobalCredit */
7084static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7085{
7086	u64 reg;
7087
7088	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7089	reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7090	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7091	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7092}
7093
7094/* change only the total credit limit portion of SendCmGLobalCredit */
7095static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7096{
7097	u64 reg;
7098
7099	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7100	reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7101	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7102	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7103}
7104
7105/* set the given per-VL shared limit */
7106static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7107{
7108	u64 reg;
7109	u32 addr;
7110
7111	if (vl < TXE_NUM_DATA_VL)
7112		addr = SEND_CM_CREDIT_VL + (8 * vl);
7113	else
7114		addr = SEND_CM_CREDIT_VL15;
7115
7116	reg = read_csr(dd, addr);
7117	reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7118	reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7119	write_csr(dd, addr, reg);
7120}
7121
7122/* set the given per-VL dedicated limit */
7123static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7124{
7125	u64 reg;
7126	u32 addr;
7127
7128	if (vl < TXE_NUM_DATA_VL)
7129		addr = SEND_CM_CREDIT_VL + (8 * vl);
7130	else
7131		addr = SEND_CM_CREDIT_VL15;
7132
7133	reg = read_csr(dd, addr);
7134	reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7135	reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7136	write_csr(dd, addr, reg);
7137}
7138
7139/* spin until the given per-VL status mask bits clear */
7140static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7141				     const char *which)
7142{
7143	unsigned long timeout;
7144	u64 reg;
7145
7146	timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7147	while (1) {
7148		reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7149
7150		if (reg == 0)
7151			return;	/* success */
7152		if (time_after(jiffies, timeout))
7153			break;		/* timed out */
7154		udelay(1);
7155	}
7156
7157	dd_dev_err(dd,
7158		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7159		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7160	/*
7161	 * If this occurs, it is likely there was a credit loss on the link.
7162	 * The only recovery from that is a link bounce.
7163	 */
7164	dd_dev_err(dd,
7165		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7166}
7167
7168/*
7169 * The number of credits on the VLs may be changed while everything
7170 * is "live", but the following algorithm must be followed due to
7171 * how the hardware is actually implemented.  In particular,
7172 * Return_Credit_Status[] is the only correct status check.
7173 *
7174 * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7175 *     set Global_Shared_Credit_Limit = 0
7176 *     use_all_vl = 1
7177 * mask0 = all VLs that are changing either dedicated or shared limits
7178 * set Shared_Limit[mask0] = 0
7179 * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7180 * if (changing any dedicated limit)
7181 *     mask1 = all VLs that are lowering dedicated limits
7182 *     lower Dedicated_Limit[mask1]
7183 *     spin until Return_Credit_Status[mask1] == 0
7184 *     raise Dedicated_Limits
7185 * raise Shared_Limits
7186 * raise Global_Shared_Credit_Limit
7187 *
7188 * lower = if the new limit is lower, set the limit to the new value
7189 * raise = if the new limit is higher than the current value (may be changed
7190 *	earlier in the algorithm), set the new limit to the new value
7191 */
7192static int set_buffer_control(struct hfi1_devdata *dd,
7193			      struct buffer_control *new_bc)
7194{
7195	u64 changing_mask, ld_mask, stat_mask;
7196	int change_count;
7197	int i, use_all_mask;
7198	int this_shared_changing;
7199	/*
7200	 * A0: add the variable any_shared_limit_changing below and in the
7201	 * algorithm above.  If removing A0 support, it can be removed.
7202	 */
7203	int any_shared_limit_changing;
7204	struct buffer_control cur_bc;
7205	u8 changing[OPA_MAX_VLS];
7206	u8 lowering_dedicated[OPA_MAX_VLS];
7207	u16 cur_total;
7208	u32 new_total = 0;
7209	const u64 all_mask =
7210	SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7211	 | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7212	 | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7213	 | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7214	 | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7215	 | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7216	 | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7217	 | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7218	 | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7219
7220#define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7221#define NUM_USABLE_VLS 16	/* look at VL15 and less */
7222
7223
7224	/* find the new total credits, do sanity check on unused VLs */
7225	for (i = 0; i < OPA_MAX_VLS; i++) {
7226		if (valid_vl(i)) {
7227			new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7228			continue;
7229		}
7230		nonzero_msg(dd, i, "dedicated",
7231			be16_to_cpu(new_bc->vl[i].dedicated));
7232		nonzero_msg(dd, i, "shared",
7233			be16_to_cpu(new_bc->vl[i].shared));
7234		new_bc->vl[i].dedicated = 0;
7235		new_bc->vl[i].shared = 0;
7236	}
7237	new_total += be16_to_cpu(new_bc->overall_shared_limit);
7238	if (new_total > (u32)dd->link_credits)
7239		return -EINVAL;
7240	/* fetch the current values */
7241	get_buffer_control(dd, &cur_bc, &cur_total);
7242
7243	/*
7244	 * Create the masks we will use.
7245	 */
7246	memset(changing, 0, sizeof(changing));
7247	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7248	/* NOTE: Assumes that the individual VL bits are adjacent and in
7249	   increasing order */
7250	stat_mask =
7251		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7252	changing_mask = 0;
7253	ld_mask = 0;
7254	change_count = 0;
7255	any_shared_limit_changing = 0;
7256	for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7257		if (!valid_vl(i))
7258			continue;
7259		this_shared_changing = new_bc->vl[i].shared
7260						!= cur_bc.vl[i].shared;
7261		if (this_shared_changing)
7262			any_shared_limit_changing = 1;
7263		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7264				|| this_shared_changing) {
7265			changing[i] = 1;
7266			changing_mask |= stat_mask;
7267			change_count++;
7268		}
7269		if (be16_to_cpu(new_bc->vl[i].dedicated) <
7270					be16_to_cpu(cur_bc.vl[i].dedicated)) {
7271			lowering_dedicated[i] = 1;
7272			ld_mask |= stat_mask;
7273		}
7274	}
7275
7276	/* bracket the credit change with a total adjustment */
7277	if (new_total > cur_total)
7278		set_global_limit(dd, new_total);
7279
7280	/*
7281	 * Start the credit change algorithm.
7282	 */
7283	use_all_mask = 0;
7284	if ((be16_to_cpu(new_bc->overall_shared_limit) <
7285				be16_to_cpu(cur_bc.overall_shared_limit))
7286			|| (is_a0(dd) && any_shared_limit_changing)) {
7287		set_global_shared(dd, 0);
7288		cur_bc.overall_shared_limit = 0;
7289		use_all_mask = 1;
7290	}
7291
7292	for (i = 0; i < NUM_USABLE_VLS; i++) {
7293		if (!valid_vl(i))
7294			continue;
7295
7296		if (changing[i]) {
7297			set_vl_shared(dd, i, 0);
7298			cur_bc.vl[i].shared = 0;
7299		}
7300	}
7301
7302	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7303		"shared");
7304
7305	if (change_count > 0) {
7306		for (i = 0; i < NUM_USABLE_VLS; i++) {
7307			if (!valid_vl(i))
7308				continue;
7309
7310			if (lowering_dedicated[i]) {
7311				set_vl_dedicated(dd, i,
7312					be16_to_cpu(new_bc->vl[i].dedicated));
7313				cur_bc.vl[i].dedicated =
7314						new_bc->vl[i].dedicated;
7315			}
7316		}
7317
7318		wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7319
7320		/* now raise all dedicated that are going up */
7321		for (i = 0; i < NUM_USABLE_VLS; i++) {
7322			if (!valid_vl(i))
7323				continue;
7324
7325			if (be16_to_cpu(new_bc->vl[i].dedicated) >
7326					be16_to_cpu(cur_bc.vl[i].dedicated))
7327				set_vl_dedicated(dd, i,
7328					be16_to_cpu(new_bc->vl[i].dedicated));
7329		}
7330	}
7331
7332	/* next raise all shared that are going up */
7333	for (i = 0; i < NUM_USABLE_VLS; i++) {
7334		if (!valid_vl(i))
7335			continue;
7336
7337		if (be16_to_cpu(new_bc->vl[i].shared) >
7338				be16_to_cpu(cur_bc.vl[i].shared))
7339			set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7340	}
7341
7342	/* finally raise the global shared */
7343	if (be16_to_cpu(new_bc->overall_shared_limit) >
7344			be16_to_cpu(cur_bc.overall_shared_limit))
7345		set_global_shared(dd,
7346			be16_to_cpu(new_bc->overall_shared_limit));
7347
7348	/* bracket the credit change with a total adjustment */
7349	if (new_total < cur_total)
7350		set_global_limit(dd, new_total);
7351	return 0;
7352}
7353
7354/*
7355 * Read the given fabric manager table. Return the size of the
7356 * table (in bytes) on success, and a negative error code on
7357 * failure.
7358 */
7359int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7360
7361{
7362	int size;
7363	struct vl_arb_cache *vlc;
7364
7365	switch (which) {
7366	case FM_TBL_VL_HIGH_ARB:
7367		size = 256;
7368		/*
7369		 * OPA specifies 128 elements (of 2 bytes each), though
7370		 * HFI supports only 16 elements in h/w.
7371		 */
7372		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7373		vl_arb_get_cache(vlc, t);
7374		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7375		break;
7376	case FM_TBL_VL_LOW_ARB:
7377		size = 256;
7378		/*
7379		 * OPA specifies 128 elements (of 2 bytes each), though
7380		 * HFI supports only 16 elements in h/w.
7381		 */
7382		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7383		vl_arb_get_cache(vlc, t);
7384		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7385		break;
7386	case FM_TBL_BUFFER_CONTROL:
7387		size = get_buffer_control(ppd->dd, t, NULL);
7388		break;
7389	case FM_TBL_SC2VLNT:
7390		size = get_sc2vlnt(ppd->dd, t);
7391		break;
7392	case FM_TBL_VL_PREEMPT_ELEMS:
7393		size = 256;
7394		/* OPA specifies 128 elements, of 2 bytes each */
7395		get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7396		break;
7397	case FM_TBL_VL_PREEMPT_MATRIX:
7398		size = 256;
7399		/*
7400		 * OPA specifies that this is the same size as the VL
7401		 * arbitration tables (i.e., 256 bytes).
7402		 */
7403		break;
7404	default:
7405		return -EINVAL;
7406	}
7407	return size;
7408}
7409
7410/*
7411 * Write the given fabric manager table.
7412 */
7413int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7414{
7415	int ret = 0;
7416	struct vl_arb_cache *vlc;
7417
7418	switch (which) {
7419	case FM_TBL_VL_HIGH_ARB:
7420		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7421		if (vl_arb_match_cache(vlc, t)) {
7422			vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7423			break;
7424		}
7425		vl_arb_set_cache(vlc, t);
7426		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7427		ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7428				     VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7429		break;
7430	case FM_TBL_VL_LOW_ARB:
7431		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7432		if (vl_arb_match_cache(vlc, t)) {
7433			vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7434			break;
7435		}
7436		vl_arb_set_cache(vlc, t);
7437		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7438		ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7439				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7440		break;
7441	case FM_TBL_BUFFER_CONTROL:
7442		ret = set_buffer_control(ppd->dd, t);
7443		break;
7444	case FM_TBL_SC2VLNT:
7445		set_sc2vlnt(ppd->dd, t);
7446		break;
7447	default:
7448		ret = -EINVAL;
7449	}
7450	return ret;
7451}
7452
7453/*
7454 * Disable all data VLs.
7455 *
7456 * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7457 */
7458static int disable_data_vls(struct hfi1_devdata *dd)
7459{
7460	if (is_a0(dd))
7461		return 1;
7462
7463	pio_send_control(dd, PSC_DATA_VL_DISABLE);
7464
7465	return 0;
7466}
7467
7468/*
7469 * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7470 * Just re-enables all data VLs (the "fill" part happens
7471 * automatically - the name was chosen for symmetry with
7472 * stop_drain_data_vls()).
7473 *
7474 * Return 0 if successful, non-zero if the VLs cannot be enabled.
7475 */
7476int open_fill_data_vls(struct hfi1_devdata *dd)
7477{
7478	if (is_a0(dd))
7479		return 1;
7480
7481	pio_send_control(dd, PSC_DATA_VL_ENABLE);
7482
7483	return 0;
7484}
7485
7486/*
7487 * drain_data_vls() - assumes that disable_data_vls() has been called,
7488 * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7489 * engines to drop to 0.
7490 */
7491static void drain_data_vls(struct hfi1_devdata *dd)
7492{
7493	sc_wait(dd);
7494	sdma_wait(dd);
7495	pause_for_credit_return(dd);
7496}
7497
7498/*
7499 * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7500 *
7501 * Use open_fill_data_vls() to resume using data VLs.  This pair is
7502 * meant to be used like this:
7503 *
7504 * stop_drain_data_vls(dd);
7505 * // do things with per-VL resources
7506 * open_fill_data_vls(dd);
7507 */
7508int stop_drain_data_vls(struct hfi1_devdata *dd)
7509{
7510	int ret;
7511
7512	ret = disable_data_vls(dd);
7513	if (ret == 0)
7514		drain_data_vls(dd);
7515
7516	return ret;
7517}
7518
7519/*
7520 * Convert a nanosecond time to a cclock count.  No matter how slow
7521 * the cclock, a non-zero ns will always have a non-zero result.
7522 */
7523u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7524{
7525	u32 cclocks;
7526
7527	if (dd->icode == ICODE_FPGA_EMULATION)
7528		cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7529	else  /* simulation pretends to be ASIC */
7530		cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7531	if (ns && !cclocks)	/* if ns nonzero, must be at least 1 */
7532		cclocks = 1;
7533	return cclocks;
7534}
7535
7536/*
7537 * Convert a cclock count to nanoseconds. Not matter how slow
7538 * the cclock, a non-zero cclocks will always have a non-zero result.
7539 */
7540u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7541{
7542	u32 ns;
7543
7544	if (dd->icode == ICODE_FPGA_EMULATION)
7545		ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7546	else  /* simulation pretends to be ASIC */
7547		ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7548	if (cclocks && !ns)
7549		ns = 1;
7550	return ns;
7551}
7552
7553/*
7554 * Dynamically adjust the receive interrupt timeout for a context based on
7555 * incoming packet rate.
7556 *
7557 * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7558 */
7559static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7560{
7561	struct hfi1_devdata *dd = rcd->dd;
7562	u32 timeout = rcd->rcvavail_timeout;
7563
7564	/*
7565	 * This algorithm doubles or halves the timeout depending on whether
7566	 * the number of packets received in this interrupt were less than or
7567	 * greater equal the interrupt count.
7568	 *
7569	 * The calculations below do not allow a steady state to be achieved.
7570	 * Only at the endpoints it is possible to have an unchanging
7571	 * timeout.
7572	 */
7573	if (npkts < rcv_intr_count) {
7574		/*
7575		 * Not enough packets arrived before the timeout, adjust
7576		 * timeout downward.
7577		 */
7578		if (timeout < 2) /* already at minimum? */
7579			return;
7580		timeout >>= 1;
7581	} else {
7582		/*
7583		 * More than enough packets arrived before the timeout, adjust
7584		 * timeout upward.
7585		 */
7586		if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7587			return;
7588		timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7589	}
7590
7591	rcd->rcvavail_timeout = timeout;
7592	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
7593	   been verified to be in range */
7594	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7595		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7596}
7597
7598void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7599		    u32 intr_adjust, u32 npkts)
7600{
7601	struct hfi1_devdata *dd = rcd->dd;
7602	u64 reg;
7603	u32 ctxt = rcd->ctxt;
7604
7605	/*
7606	 * Need to write timeout register before updating RcvHdrHead to ensure
7607	 * that a new value is used when the HW decides to restart counting.
7608	 */
7609	if (intr_adjust)
7610		adjust_rcv_timeout(rcd, npkts);
7611	if (updegr) {
7612		reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7613			<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7614		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7615	}
7616	mmiowb();
7617	reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7618		(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7619			<< RCV_HDR_HEAD_HEAD_SHIFT);
7620	write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7621	mmiowb();
7622}
7623
7624u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7625{
7626	u32 head, tail;
7627
7628	head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7629		& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7630
7631	if (rcd->rcvhdrtail_kvaddr)
7632		tail = get_rcvhdrtail(rcd);
7633	else
7634		tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7635
7636	return head == tail;
7637}
7638
7639/*
7640 * Context Control and Receive Array encoding for buffer size:
7641 *	0x0 invalid
7642 *	0x1   4 KB
7643 *	0x2   8 KB
7644 *	0x3  16 KB
7645 *	0x4  32 KB
7646 *	0x5  64 KB
7647 *	0x6 128 KB
7648 *	0x7 256 KB
7649 *	0x8 512 KB (Receive Array only)
7650 *	0x9   1 MB (Receive Array only)
7651 *	0xa   2 MB (Receive Array only)
7652 *
7653 *	0xB-0xF - reserved (Receive Array only)
7654 *
7655 *
7656 * This routine assumes that the value has already been sanity checked.
7657 */
7658static u32 encoded_size(u32 size)
7659{
7660	switch (size) {
7661	case   4*1024: return 0x1;
7662	case   8*1024: return 0x2;
7663	case  16*1024: return 0x3;
7664	case  32*1024: return 0x4;
7665	case  64*1024: return 0x5;
7666	case 128*1024: return 0x6;
7667	case 256*1024: return 0x7;
7668	case 512*1024: return 0x8;
7669	case   1*1024*1024: return 0x9;
7670	case   2*1024*1024: return 0xa;
7671	}
7672	return 0x1;	/* if invalid, go with the minimum size */
7673}
7674
7675void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7676{
7677	struct hfi1_ctxtdata *rcd;
7678	u64 rcvctrl, reg;
7679	int did_enable = 0;
7680
7681	rcd = dd->rcd[ctxt];
7682	if (!rcd)
7683		return;
7684
7685	hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7686
7687	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7688	/* if the context already enabled, don't do the extra steps */
7689	if ((op & HFI1_RCVCTRL_CTXT_ENB)
7690			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7691		/* reset the tail and hdr addresses, and sequence count */
7692		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7693				rcd->rcvhdrq_phys);
7694		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7695			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7696					rcd->rcvhdrqtailaddr_phys);
7697		rcd->seq_cnt = 1;
7698
7699		/* reset the cached receive header queue head value */
7700		rcd->head = 0;
7701
7702		/*
7703		 * Zero the receive header queue so we don't get false
7704		 * positives when checking the sequence number.  The
7705		 * sequence numbers could land exactly on the same spot.
7706		 * E.g. a rcd restart before the receive header wrapped.
7707		 */
7708		memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7709
7710		/* starting timeout */
7711		rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7712
7713		/* enable the context */
7714		rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7715
7716		/* clean the egr buffer size first */
7717		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7718		rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7719				& RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7720					<< RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7721
7722		/* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7723		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7724		did_enable = 1;
7725
7726		/* zero RcvEgrIndexHead */
7727		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7728
7729		/* set eager count and base index */
7730		reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7731			& RCV_EGR_CTRL_EGR_CNT_MASK)
7732		       << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7733			(((rcd->eager_base >> RCV_SHIFT)
7734			  & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7735			 << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7736		write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7737
7738		/*
7739		 * Set TID (expected) count and base index.
7740		 * rcd->expected_count is set to individual RcvArray entries,
7741		 * not pairs, and the CSR takes a pair-count in groups of
7742		 * four, so divide by 8.
7743		 */
7744		reg = (((rcd->expected_count >> RCV_SHIFT)
7745					& RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7746				<< RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7747		      (((rcd->expected_base >> RCV_SHIFT)
7748					& RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7749				<< RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7750		write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7751		if (ctxt == VL15CTXT)
7752			write_csr(dd, RCV_VL15, VL15CTXT);
7753	}
7754	if (op & HFI1_RCVCTRL_CTXT_DIS) {
7755		write_csr(dd, RCV_VL15, 0);
7756		rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7757	}
7758	if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7759		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7760	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7761		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7762	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7763		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7764	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7765		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7766	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7767		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7768	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7769		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7770	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7771		/* In one-packet-per-eager mode, the size comes from
7772		   the RcvArray entry. */
7773		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7774		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7775	}
7776	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7777		rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7778	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7779		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7780	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7781		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7782	if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7783		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7784	if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7785		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7786	rcd->rcvctrl = rcvctrl;
7787	hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7788	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7789
7790	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
7791	if (did_enable
7792	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7793		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7794		if (reg != 0) {
7795			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7796				ctxt, reg);
7797			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7798			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7799			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7800			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7801			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7802			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7803				ctxt, reg, reg == 0 ? "not" : "still");
7804		}
7805	}
7806
7807	if (did_enable) {
7808		/*
7809		 * The interrupt timeout and count must be set after
7810		 * the context is enabled to take effect.
7811		 */
7812		/* set interrupt timeout */
7813		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7814			(u64)rcd->rcvavail_timeout <<
7815				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7816
7817		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7818		reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7819		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7820	}
7821
7822	if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7823		/*
7824		 * If the context has been disabled and the Tail Update has
7825		 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7826		 * it doesn't contain an address that is invalid.
7827		 */
7828		write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7829}
7830
7831u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7832		    u64 **cntrp)
7833{
7834	int ret;
7835	u64 val = 0;
7836
7837	if (namep) {
7838		ret = dd->cntrnameslen;
7839		if (pos != 0) {
7840			dd_dev_err(dd, "read_cntrs does not support indexing");
7841			return 0;
7842		}
7843		*namep = dd->cntrnames;
7844	} else {
7845		const struct cntr_entry *entry;
7846		int i, j;
7847
7848		ret = (dd->ndevcntrs) * sizeof(u64);
7849		if (pos != 0) {
7850			dd_dev_err(dd, "read_cntrs does not support indexing");
7851			return 0;
7852		}
7853
7854		/* Get the start of the block of counters */
7855		*cntrp = dd->cntrs;
7856
7857		/*
7858		 * Now go and fill in each counter in the block.
7859		 */
7860		for (i = 0; i < DEV_CNTR_LAST; i++) {
7861			entry = &dev_cntrs[i];
7862			hfi1_cdbg(CNTR, "reading %s", entry->name);
7863			if (entry->flags & CNTR_DISABLED) {
7864				/* Nothing */
7865				hfi1_cdbg(CNTR, "\tDisabled\n");
7866			} else {
7867				if (entry->flags & CNTR_VL) {
7868					hfi1_cdbg(CNTR, "\tPer VL\n");
7869					for (j = 0; j < C_VL_COUNT; j++) {
7870						val = entry->rw_cntr(entry,
7871								  dd, j,
7872								  CNTR_MODE_R,
7873								  0);
7874						hfi1_cdbg(
7875						   CNTR,
7876						   "\t\tRead 0x%llx for %d\n",
7877						   val, j);
7878						dd->cntrs[entry->offset + j] =
7879									    val;
7880					}
7881				} else {
7882					val = entry->rw_cntr(entry, dd,
7883							CNTR_INVALID_VL,
7884							CNTR_MODE_R, 0);
7885					dd->cntrs[entry->offset] = val;
7886					hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7887				}
7888			}
7889		}
7890	}
7891	return ret;
7892}
7893
7894/*
7895 * Used by sysfs to create files for hfi stats to read
7896 */
7897u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7898			char **namep, u64 **cntrp)
7899{
7900	int ret;
7901	u64 val = 0;
7902
7903	if (namep) {
7904		ret = dd->portcntrnameslen;
7905		if (pos != 0) {
7906			dd_dev_err(dd, "index not supported");
7907			return 0;
7908		}
7909		*namep = dd->portcntrnames;
7910	} else {
7911		const struct cntr_entry *entry;
7912		struct hfi1_pportdata *ppd;
7913		int i, j;
7914
7915		ret = (dd->nportcntrs) * sizeof(u64);
7916		if (pos != 0) {
7917			dd_dev_err(dd, "indexing not supported");
7918			return 0;
7919		}
7920		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7921		*cntrp = ppd->cntrs;
7922
7923		for (i = 0; i < PORT_CNTR_LAST; i++) {
7924			entry = &port_cntrs[i];
7925			hfi1_cdbg(CNTR, "reading %s", entry->name);
7926			if (entry->flags & CNTR_DISABLED) {
7927				/* Nothing */
7928				hfi1_cdbg(CNTR, "\tDisabled\n");
7929				continue;
7930			}
7931
7932			if (entry->flags & CNTR_VL) {
7933				hfi1_cdbg(CNTR, "\tPer VL");
7934				for (j = 0; j < C_VL_COUNT; j++) {
7935					val = entry->rw_cntr(entry, ppd, j,
7936							       CNTR_MODE_R,
7937							       0);
7938					hfi1_cdbg(
7939					   CNTR,
7940					   "\t\tRead 0x%llx for %d",
7941					   val, j);
7942					ppd->cntrs[entry->offset + j] = val;
7943				}
7944			} else {
7945				val = entry->rw_cntr(entry, ppd,
7946						       CNTR_INVALID_VL,
7947						       CNTR_MODE_R,
7948						       0);
7949				ppd->cntrs[entry->offset] = val;
7950				hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7951			}
7952		}
7953	}
7954	return ret;
7955}
7956
7957static void free_cntrs(struct hfi1_devdata *dd)
7958{
7959	struct hfi1_pportdata *ppd;
7960	int i;
7961
7962	if (dd->synth_stats_timer.data)
7963		del_timer_sync(&dd->synth_stats_timer);
7964	dd->synth_stats_timer.data = 0;
7965	ppd = (struct hfi1_pportdata *)(dd + 1);
7966	for (i = 0; i < dd->num_pports; i++, ppd++) {
7967		kfree(ppd->cntrs);
7968		kfree(ppd->scntrs);
7969		free_percpu(ppd->ibport_data.rc_acks);
7970		free_percpu(ppd->ibport_data.rc_qacks);
7971		free_percpu(ppd->ibport_data.rc_delayed_comp);
7972		ppd->cntrs = NULL;
7973		ppd->scntrs = NULL;
7974		ppd->ibport_data.rc_acks = NULL;
7975		ppd->ibport_data.rc_qacks = NULL;
7976		ppd->ibport_data.rc_delayed_comp = NULL;
7977	}
7978	kfree(dd->portcntrnames);
7979	dd->portcntrnames = NULL;
7980	kfree(dd->cntrs);
7981	dd->cntrs = NULL;
7982	kfree(dd->scntrs);
7983	dd->scntrs = NULL;
7984	kfree(dd->cntrnames);
7985	dd->cntrnames = NULL;
7986}
7987
7988#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7989#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7990
7991static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7992			      u64 *psval, void *context, int vl)
7993{
7994	u64 val;
7995	u64 sval = *psval;
7996
7997	if (entry->flags & CNTR_DISABLED) {
7998		dd_dev_err(dd, "Counter %s not enabled", entry->name);
7999		return 0;
8000	}
8001
8002	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8003
8004	val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8005
8006	/* If its a synthetic counter there is more work we need to do */
8007	if (entry->flags & CNTR_SYNTH) {
8008		if (sval == CNTR_MAX) {
8009			/* No need to read already saturated */
8010			return CNTR_MAX;
8011		}
8012
8013		if (entry->flags & CNTR_32BIT) {
8014			/* 32bit counters can wrap multiple times */
8015			u64 upper = sval >> 32;
8016			u64 lower = (sval << 32) >> 32;
8017
8018			if (lower > val) { /* hw wrapped */
8019				if (upper == CNTR_32BIT_MAX)
8020					val = CNTR_MAX;
8021				else
8022					upper++;
8023			}
8024
8025			if (val != CNTR_MAX)
8026				val = (upper << 32) | val;
8027
8028		} else {
8029			/* If we rolled we are saturated */
8030			if ((val < sval) || (val > CNTR_MAX))
8031				val = CNTR_MAX;
8032		}
8033	}
8034
8035	*psval = val;
8036
8037	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8038
8039	return val;
8040}
8041
8042static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8043			       struct cntr_entry *entry,
8044			       u64 *psval, void *context, int vl, u64 data)
8045{
8046	u64 val;
8047
8048	if (entry->flags & CNTR_DISABLED) {
8049		dd_dev_err(dd, "Counter %s not enabled", entry->name);
8050		return 0;
8051	}
8052
8053	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8054
8055	if (entry->flags & CNTR_SYNTH) {
8056		*psval = data;
8057		if (entry->flags & CNTR_32BIT) {
8058			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8059					     (data << 32) >> 32);
8060			val = data; /* return the full 64bit value */
8061		} else {
8062			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8063					     data);
8064		}
8065	} else {
8066		val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8067	}
8068
8069	*psval = val;
8070
8071	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8072
8073	return val;
8074}
8075
8076u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8077{
8078	struct cntr_entry *entry;
8079	u64 *sval;
8080
8081	entry = &dev_cntrs[index];
8082	sval = dd->scntrs + entry->offset;
8083
8084	if (vl != CNTR_INVALID_VL)
8085		sval += vl;
8086
8087	return read_dev_port_cntr(dd, entry, sval, dd, vl);
8088}
8089
8090u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8091{
8092	struct cntr_entry *entry;
8093	u64 *sval;
8094
8095	entry = &dev_cntrs[index];
8096	sval = dd->scntrs + entry->offset;
8097
8098	if (vl != CNTR_INVALID_VL)
8099		sval += vl;
8100
8101	return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8102}
8103
8104u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8105{
8106	struct cntr_entry *entry;
8107	u64 *sval;
8108
8109	entry = &port_cntrs[index];
8110	sval = ppd->scntrs + entry->offset;
8111
8112	if (vl != CNTR_INVALID_VL)
8113		sval += vl;
8114
8115	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8116	    (index <= C_RCV_HDR_OVF_LAST)) {
8117		/* We do not want to bother for disabled contexts */
8118		return 0;
8119	}
8120
8121	return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8122}
8123
8124u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8125{
8126	struct cntr_entry *entry;
8127	u64 *sval;
8128
8129	entry = &port_cntrs[index];
8130	sval = ppd->scntrs + entry->offset;
8131
8132	if (vl != CNTR_INVALID_VL)
8133		sval += vl;
8134
8135	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8136	    (index <= C_RCV_HDR_OVF_LAST)) {
8137		/* We do not want to bother for disabled contexts */
8138		return 0;
8139	}
8140
8141	return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8142}
8143
8144static void update_synth_timer(unsigned long opaque)
8145{
8146	u64 cur_tx;
8147	u64 cur_rx;
8148	u64 total_flits;
8149	u8 update = 0;
8150	int i, j, vl;
8151	struct hfi1_pportdata *ppd;
8152	struct cntr_entry *entry;
8153
8154	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8155
8156	/*
8157	 * Rather than keep beating on the CSRs pick a minimal set that we can
8158	 * check to watch for potential roll over. We can do this by looking at
8159	 * the number of flits sent/recv. If the total flits exceeds 32bits then
8160	 * we have to iterate all the counters and update.
8161	 */
8162	entry = &dev_cntrs[C_DC_RCV_FLITS];
8163	cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8164
8165	entry = &dev_cntrs[C_DC_XMIT_FLITS];
8166	cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8167
8168	hfi1_cdbg(
8169	    CNTR,
8170	    "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8171	    dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8172
8173	if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8174		/*
8175		 * May not be strictly necessary to update but it won't hurt and
8176		 * simplifies the logic here.
8177		 */
8178		update = 1;
8179		hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8180			  dd->unit);
8181	} else {
8182		total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8183		hfi1_cdbg(CNTR,
8184			  "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8185			  total_flits, (u64)CNTR_32BIT_MAX);
8186		if (total_flits >= CNTR_32BIT_MAX) {
8187			hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8188				  dd->unit);
8189			update = 1;
8190		}
8191	}
8192
8193	if (update) {
8194		hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8195		for (i = 0; i < DEV_CNTR_LAST; i++) {
8196			entry = &dev_cntrs[i];
8197			if (entry->flags & CNTR_VL) {
8198				for (vl = 0; vl < C_VL_COUNT; vl++)
8199					read_dev_cntr(dd, i, vl);
8200			} else {
8201				read_dev_cntr(dd, i, CNTR_INVALID_VL);
8202			}
8203		}
8204		ppd = (struct hfi1_pportdata *)(dd + 1);
8205		for (i = 0; i < dd->num_pports; i++, ppd++) {
8206			for (j = 0; j < PORT_CNTR_LAST; j++) {
8207				entry = &port_cntrs[j];
8208				if (entry->flags & CNTR_VL) {
8209					for (vl = 0; vl < C_VL_COUNT; vl++)
8210						read_port_cntr(ppd, j, vl);
8211				} else {
8212					read_port_cntr(ppd, j, CNTR_INVALID_VL);
8213				}
8214			}
8215		}
8216
8217		/*
8218		 * We want the value in the register. The goal is to keep track
8219		 * of the number of "ticks" not the counter value. In other
8220		 * words if the register rolls we want to notice it and go ahead
8221		 * and force an update.
8222		 */
8223		entry = &dev_cntrs[C_DC_XMIT_FLITS];
8224		dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8225						CNTR_MODE_R, 0);
8226
8227		entry = &dev_cntrs[C_DC_RCV_FLITS];
8228		dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8229						CNTR_MODE_R, 0);
8230
8231		hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8232			  dd->unit, dd->last_tx, dd->last_rx);
8233
8234	} else {
8235		hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8236	}
8237
8238mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8239}
8240
8241#define C_MAX_NAME 13 /* 12 chars + one for /0 */
8242static int init_cntrs(struct hfi1_devdata *dd)
8243{
8244	int i, rcv_ctxts, index, j;
8245	size_t sz;
8246	char *p;
8247	char name[C_MAX_NAME];
8248	struct hfi1_pportdata *ppd;
8249
8250	/* set up the stats timer; the add_timer is done at the end */
8251	setup_timer(&dd->synth_stats_timer, update_synth_timer,
8252		    (unsigned long)dd);
8253
8254	/***********************/
8255	/* per device counters */
8256	/***********************/
8257
8258	/* size names and determine how many we have*/
8259	dd->ndevcntrs = 0;
8260	sz = 0;
8261	index = 0;
8262
8263	for (i = 0; i < DEV_CNTR_LAST; i++) {
8264		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8265		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8266			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8267			continue;
8268		}
8269
8270		if (dev_cntrs[i].flags & CNTR_VL) {
8271			hfi1_dbg_early("\tProcessing VL cntr\n");
8272			dev_cntrs[i].offset = index;
8273			for (j = 0; j < C_VL_COUNT; j++) {
8274				memset(name, '\0', C_MAX_NAME);
8275				snprintf(name, C_MAX_NAME, "%s%d",
8276					dev_cntrs[i].name,
8277					vl_from_idx(j));
8278				sz += strlen(name);
8279				sz++;
8280				hfi1_dbg_early("\t\t%s\n", name);
8281				dd->ndevcntrs++;
8282				index++;
8283			}
8284		} else {
8285			/* +1 for newline  */
8286			sz += strlen(dev_cntrs[i].name) + 1;
8287			dd->ndevcntrs++;
8288			dev_cntrs[i].offset = index;
8289			index++;
8290			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8291		}
8292	}
8293
8294	/* allocate space for the counter values */
8295	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8296	if (!dd->cntrs)
8297		goto bail;
8298
8299	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8300	if (!dd->scntrs)
8301		goto bail;
8302
8303
8304	/* allocate space for the counter names */
8305	dd->cntrnameslen = sz;
8306	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8307	if (!dd->cntrnames)
8308		goto bail;
8309
8310	/* fill in the names */
8311	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8312		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8313			/* Nothing */
8314		} else {
8315			if (dev_cntrs[i].flags & CNTR_VL) {
8316				for (j = 0; j < C_VL_COUNT; j++) {
8317					memset(name, '\0', C_MAX_NAME);
8318					snprintf(name, C_MAX_NAME, "%s%d",
8319						dev_cntrs[i].name,
8320						vl_from_idx(j));
8321					memcpy(p, name, strlen(name));
8322					p += strlen(name);
8323					*p++ = '\n';
8324				}
8325			} else {
8326				memcpy(p, dev_cntrs[i].name,
8327				       strlen(dev_cntrs[i].name));
8328				p += strlen(dev_cntrs[i].name);
8329				*p++ = '\n';
8330			}
8331			index++;
8332		}
8333	}
8334
8335	/*********************/
8336	/* per port counters */
8337	/*********************/
8338
8339	/*
8340	 * Go through the counters for the overflows and disable the ones we
8341	 * don't need. This varies based on platform so we need to do it
8342	 * dynamically here.
8343	 */
8344	rcv_ctxts = dd->num_rcv_contexts;
8345	for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8346	     i <= C_RCV_HDR_OVF_LAST; i++) {
8347		port_cntrs[i].flags |= CNTR_DISABLED;
8348	}
8349
8350	/* size port counter names and determine how many we have*/
8351	sz = 0;
8352	dd->nportcntrs = 0;
8353	for (i = 0; i < PORT_CNTR_LAST; i++) {
8354		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8355		if (port_cntrs[i].flags & CNTR_DISABLED) {
8356			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8357			continue;
8358		}
8359
8360		if (port_cntrs[i].flags & CNTR_VL) {
8361			hfi1_dbg_early("\tProcessing VL cntr\n");
8362			port_cntrs[i].offset = dd->nportcntrs;
8363			for (j = 0; j < C_VL_COUNT; j++) {
8364				memset(name, '\0', C_MAX_NAME);
8365				snprintf(name, C_MAX_NAME, "%s%d",
8366					port_cntrs[i].name,
8367					vl_from_idx(j));
8368				sz += strlen(name);
8369				sz++;
8370				hfi1_dbg_early("\t\t%s\n", name);
8371				dd->nportcntrs++;
8372			}
8373		} else {
8374			/* +1 for newline  */
8375			sz += strlen(port_cntrs[i].name) + 1;
8376			port_cntrs[i].offset = dd->nportcntrs;
8377			dd->nportcntrs++;
8378			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8379		}
8380	}
8381
8382	/* allocate space for the counter names */
8383	dd->portcntrnameslen = sz;
8384	dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8385	if (!dd->portcntrnames)
8386		goto bail;
8387
8388	/* fill in port cntr names */
8389	for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8390		if (port_cntrs[i].flags & CNTR_DISABLED)
8391			continue;
8392
8393		if (port_cntrs[i].flags & CNTR_VL) {
8394			for (j = 0; j < C_VL_COUNT; j++) {
8395				memset(name, '\0', C_MAX_NAME);
8396				snprintf(name, C_MAX_NAME, "%s%d",
8397					port_cntrs[i].name,
8398					vl_from_idx(j));
8399				memcpy(p, name, strlen(name));
8400				p += strlen(name);
8401				*p++ = '\n';
8402			}
8403		} else {
8404			memcpy(p, port_cntrs[i].name,
8405			       strlen(port_cntrs[i].name));
8406			p += strlen(port_cntrs[i].name);
8407			*p++ = '\n';
8408		}
8409	}
8410
8411	/* allocate per port storage for counter values */
8412	ppd = (struct hfi1_pportdata *)(dd + 1);
8413	for (i = 0; i < dd->num_pports; i++, ppd++) {
8414		ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8415		if (!ppd->cntrs)
8416			goto bail;
8417
8418		ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8419		if (!ppd->scntrs)
8420			goto bail;
8421	}
8422
8423	/* CPU counters need to be allocated and zeroed */
8424	if (init_cpu_counters(dd))
8425		goto bail;
8426
8427	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8428	return 0;
8429bail:
8430	free_cntrs(dd);
8431	return -ENOMEM;
8432}
8433
8434
8435static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8436{
8437	switch (chip_lstate) {
8438	default:
8439		dd_dev_err(dd,
8440			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8441			 chip_lstate);
8442		/* fall through */
8443	case LSTATE_DOWN:
8444		return IB_PORT_DOWN;
8445	case LSTATE_INIT:
8446		return IB_PORT_INIT;
8447	case LSTATE_ARMED:
8448		return IB_PORT_ARMED;
8449	case LSTATE_ACTIVE:
8450		return IB_PORT_ACTIVE;
8451	}
8452}
8453
8454u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8455{
8456	/* look at the HFI meta-states only */
8457	switch (chip_pstate & 0xf0) {
8458	default:
8459		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8460			chip_pstate);
8461		/* fall through */
8462	case PLS_DISABLED:
8463		return IB_PORTPHYSSTATE_DISABLED;
8464	case PLS_OFFLINE:
8465		return OPA_PORTPHYSSTATE_OFFLINE;
8466	case PLS_POLLING:
8467		return IB_PORTPHYSSTATE_POLLING;
8468	case PLS_CONFIGPHY:
8469		return IB_PORTPHYSSTATE_TRAINING;
8470	case PLS_LINKUP:
8471		return IB_PORTPHYSSTATE_LINKUP;
8472	case PLS_PHYTEST:
8473		return IB_PORTPHYSSTATE_PHY_TEST;
8474	}
8475}
8476
8477/* return the OPA port logical state name */
8478const char *opa_lstate_name(u32 lstate)
8479{
8480	static const char * const port_logical_names[] = {
8481		"PORT_NOP",
8482		"PORT_DOWN",
8483		"PORT_INIT",
8484		"PORT_ARMED",
8485		"PORT_ACTIVE",
8486		"PORT_ACTIVE_DEFER",
8487	};
8488	if (lstate < ARRAY_SIZE(port_logical_names))
8489		return port_logical_names[lstate];
8490	return "unknown";
8491}
8492
8493/* return the OPA port physical state name */
8494const char *opa_pstate_name(u32 pstate)
8495{
8496	static const char * const port_physical_names[] = {
8497		"PHYS_NOP",
8498		"reserved1",
8499		"PHYS_POLL",
8500		"PHYS_DISABLED",
8501		"PHYS_TRAINING",
8502		"PHYS_LINKUP",
8503		"PHYS_LINK_ERR_RECOVER",
8504		"PHYS_PHY_TEST",
8505		"reserved8",
8506		"PHYS_OFFLINE",
8507		"PHYS_GANGED",
8508		"PHYS_TEST",
8509	};
8510	if (pstate < ARRAY_SIZE(port_physical_names))
8511		return port_physical_names[pstate];
8512	return "unknown";
8513}
8514
8515/*
8516 * Read the hardware link state and set the driver's cached value of it.
8517 * Return the (new) current value.
8518 */
8519u32 get_logical_state(struct hfi1_pportdata *ppd)
8520{
8521	u32 new_state;
8522
8523	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8524	if (new_state != ppd->lstate) {
8525		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8526			opa_lstate_name(new_state), new_state);
8527		ppd->lstate = new_state;
8528	}
8529	/*
8530	 * Set port status flags in the page mapped into userspace
8531	 * memory. Do it here to ensure a reliable state - this is
8532	 * the only function called by all state handling code.
8533	 * Always set the flags due to the fact that the cache value
8534	 * might have been changed explicitly outside of this
8535	 * function.
8536	 */
8537	if (ppd->statusp) {
8538		switch (ppd->lstate) {
8539		case IB_PORT_DOWN:
8540		case IB_PORT_INIT:
8541			*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8542					   HFI1_STATUS_IB_READY);
8543			break;
8544		case IB_PORT_ARMED:
8545			*ppd->statusp |= HFI1_STATUS_IB_CONF;
8546			break;
8547		case IB_PORT_ACTIVE:
8548			*ppd->statusp |= HFI1_STATUS_IB_READY;
8549			break;
8550		}
8551	}
8552	return ppd->lstate;
8553}
8554
8555/**
8556 * wait_logical_linkstate - wait for an IB link state change to occur
8557 * @ppd: port device
8558 * @state: the state to wait for
8559 * @msecs: the number of milliseconds to wait
8560 *
8561 * Wait up to msecs milliseconds for IB link state change to occur.
8562 * For now, take the easy polling route.
8563 * Returns 0 if state reached, otherwise -ETIMEDOUT.
8564 */
8565static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8566				  int msecs)
8567{
8568	unsigned long timeout;
8569
8570	timeout = jiffies + msecs_to_jiffies(msecs);
8571	while (1) {
8572		if (get_logical_state(ppd) == state)
8573			return 0;
8574		if (time_after(jiffies, timeout))
8575			break;
8576		msleep(20);
8577	}
8578	dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8579
8580	return -ETIMEDOUT;
8581}
8582
8583u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8584{
8585	static u32 remembered_state = 0xff;
8586	u32 pstate;
8587	u32 ib_pstate;
8588
8589	pstate = read_physical_state(ppd->dd);
8590	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8591	if (remembered_state != ib_pstate) {
8592		dd_dev_info(ppd->dd,
8593			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
8594			__func__, opa_pstate_name(ib_pstate), ib_pstate,
8595			pstate);
8596		remembered_state = ib_pstate;
8597	}
8598	return ib_pstate;
8599}
8600
8601/*
8602 * Read/modify/write ASIC_QSFP register bits as selected by mask
8603 * data: 0 or 1 in the positions depending on what needs to be written
8604 * dir: 0 for read, 1 for write
8605 * mask: select by setting
8606 *      I2CCLK  (bit 0)
8607 *      I2CDATA (bit 1)
8608 */
8609u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8610		  u32 mask)
8611{
8612	u64 qsfp_oe, target_oe;
8613
8614	target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8615	if (mask) {
8616		/* We are writing register bits, so lock access */
8617		dir &= mask;
8618		data &= mask;
8619
8620		qsfp_oe = read_csr(dd, target_oe);
8621		qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8622		write_csr(dd, target_oe, qsfp_oe);
8623	}
8624	/* We are exclusively reading bits here, but it is unlikely
8625	 * we'll get valid data when we set the direction of the pin
8626	 * in the same call, so read should call this function again
8627	 * to get valid data
8628	 */
8629	return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8630}
8631
8632#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8633(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8634
8635#define SET_STATIC_RATE_CONTROL_SMASK(r) \
8636(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8637
8638int hfi1_init_ctxt(struct send_context *sc)
8639{
8640	if (sc != NULL) {
8641		struct hfi1_devdata *dd = sc->dd;
8642		u64 reg;
8643		u8 set = (sc->type == SC_USER ?
8644			  HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8645			  HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8646		reg = read_kctxt_csr(dd, sc->hw_context,
8647				     SEND_CTXT_CHECK_ENABLE);
8648		if (set)
8649			CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8650		else
8651			SET_STATIC_RATE_CONTROL_SMASK(reg);
8652		write_kctxt_csr(dd, sc->hw_context,
8653				SEND_CTXT_CHECK_ENABLE, reg);
8654	}
8655	return 0;
8656}
8657
8658int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8659{
8660	int ret = 0;
8661	u64 reg;
8662
8663	if (dd->icode != ICODE_RTL_SILICON) {
8664		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8665			dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8666				    __func__);
8667		return -EINVAL;
8668	}
8669	reg = read_csr(dd, ASIC_STS_THERM);
8670	temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8671		      ASIC_STS_THERM_CURR_TEMP_MASK);
8672	temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8673			ASIC_STS_THERM_LO_TEMP_MASK);
8674	temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8675			ASIC_STS_THERM_HI_TEMP_MASK);
8676	temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8677			  ASIC_STS_THERM_CRIT_TEMP_MASK);
8678	/* triggers is a 3-bit value - 1 bit per trigger. */
8679	temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8680
8681	return ret;
8682}
8683
8684/* ========================================================================= */
8685
8686/*
8687 * Enable/disable chip from delivering interrupts.
8688 */
8689void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8690{
8691	int i;
8692
8693	/*
8694	 * In HFI, the mask needs to be 1 to allow interrupts.
8695	 */
8696	if (enable) {
8697		u64 cce_int_mask;
8698		const int qsfp1_int_smask = QSFP1_INT % 64;
8699		const int qsfp2_int_smask = QSFP2_INT % 64;
8700
8701		/* enable all interrupts */
8702		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8703			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8704
8705		/*
8706		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8707		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8708		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8709		 * the index of the appropriate CSR in the CCEIntMask CSR array
8710		 */
8711		cce_int_mask = read_csr(dd, CCE_INT_MASK +
8712						(8*(QSFP1_INT/64)));
8713		if (dd->hfi1_id) {
8714			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8715			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8716					cce_int_mask);
8717		} else {
8718			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8719			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8720					cce_int_mask);
8721		}
8722	} else {
8723		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8724			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8725	}
8726}
8727
8728/*
8729 * Clear all interrupt sources on the chip.
8730 */
8731static void clear_all_interrupts(struct hfi1_devdata *dd)
8732{
8733	int i;
8734
8735	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8736		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8737
8738	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8739	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8740	write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8741	write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8742	write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8743	write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8744	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8745	for (i = 0; i < dd->chip_send_contexts; i++)
8746		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8747	for (i = 0; i < dd->chip_sdma_engines; i++)
8748		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8749
8750	write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8751	write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8752	write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8753}
8754
8755/* Move to pcie.c? */
8756static void disable_intx(struct pci_dev *pdev)
8757{
8758	pci_intx(pdev, 0);
8759}
8760
8761static void clean_up_interrupts(struct hfi1_devdata *dd)
8762{
8763	int i;
8764
8765	/* remove irqs - must happen before disabling/turning off */
8766	if (dd->num_msix_entries) {
8767		/* MSI-X */
8768		struct hfi1_msix_entry *me = dd->msix_entries;
8769
8770		for (i = 0; i < dd->num_msix_entries; i++, me++) {
8771			if (me->arg == NULL) /* => no irq, no affinity */
8772				break;
8773			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8774					NULL);
8775			free_irq(me->msix.vector, me->arg);
8776		}
8777	} else {
8778		/* INTx */
8779		if (dd->requested_intx_irq) {
8780			free_irq(dd->pcidev->irq, dd);
8781			dd->requested_intx_irq = 0;
8782		}
8783	}
8784
8785	/* turn off interrupts */
8786	if (dd->num_msix_entries) {
8787		/* MSI-X */
8788		hfi1_nomsix(dd);
8789	} else {
8790		/* INTx */
8791		disable_intx(dd->pcidev);
8792	}
8793
8794	/* clean structures */
8795	for (i = 0; i < dd->num_msix_entries; i++)
8796		free_cpumask_var(dd->msix_entries[i].mask);
8797	kfree(dd->msix_entries);
8798	dd->msix_entries = NULL;
8799	dd->num_msix_entries = 0;
8800}
8801
8802/*
8803 * Remap the interrupt source from the general handler to the given MSI-X
8804 * interrupt.
8805 */
8806static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8807{
8808	u64 reg;
8809	int m, n;
8810
8811	/* clear from the handled mask of the general interrupt */
8812	m = isrc / 64;
8813	n = isrc % 64;
8814	dd->gi_mask[m] &= ~((u64)1 << n);
8815
8816	/* direct the chip source to the given MSI-X interrupt */
8817	m = isrc / 8;
8818	n = isrc % 8;
8819	reg = read_csr(dd, CCE_INT_MAP + (8*m));
8820	reg &= ~((u64)0xff << (8*n));
8821	reg |= ((u64)msix_intr & 0xff) << (8*n);
8822	write_csr(dd, CCE_INT_MAP + (8*m), reg);
8823}
8824
8825static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8826				  int engine, int msix_intr)
8827{
8828	/*
8829	 * SDMA engine interrupt sources grouped by type, rather than
8830	 * engine.  Per-engine interrupts are as follows:
8831	 *	SDMA
8832	 *	SDMAProgress
8833	 *	SDMAIdle
8834	 */
8835	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8836		msix_intr);
8837	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8838		msix_intr);
8839	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8840		msix_intr);
8841}
8842
8843static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8844					      int rx, int msix_intr)
8845{
8846	remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8847}
8848
8849static int request_intx_irq(struct hfi1_devdata *dd)
8850{
8851	int ret;
8852
8853	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8854		dd->unit);
8855	ret = request_irq(dd->pcidev->irq, general_interrupt,
8856				  IRQF_SHARED, dd->intx_name, dd);
8857	if (ret)
8858		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8859				ret);
8860	else
8861		dd->requested_intx_irq = 1;
8862	return ret;
8863}
8864
8865static int request_msix_irqs(struct hfi1_devdata *dd)
8866{
8867	const struct cpumask *local_mask;
8868	cpumask_var_t def, rcv;
8869	bool def_ret, rcv_ret;
8870	int first_general, last_general;
8871	int first_sdma, last_sdma;
8872	int first_rx, last_rx;
8873	int first_cpu, restart_cpu, curr_cpu;
8874	int rcv_cpu, sdma_cpu;
8875	int i, ret = 0, possible;
8876	int ht;
8877
8878	/* calculate the ranges we are going to use */
8879	first_general = 0;
8880	first_sdma = last_general = first_general + 1;
8881	first_rx = last_sdma = first_sdma + dd->num_sdma;
8882	last_rx = first_rx + dd->n_krcv_queues;
8883
8884	/*
8885	 * Interrupt affinity.
8886	 *
8887	 * non-rcv avail gets a default mask that
8888	 * starts as possible cpus with threads reset
8889	 * and each rcv avail reset.
8890	 *
8891	 * rcv avail gets node relative 1 wrapping back
8892	 * to the node relative 1 as necessary.
8893	 *
8894	 */
8895	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8896	/* if first cpu is invalid, use NUMA 0 */
8897	if (cpumask_first(local_mask) >= nr_cpu_ids)
8898		local_mask = topology_core_cpumask(0);
8899
8900	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8901	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8902	if (!def_ret || !rcv_ret)
8903		goto bail;
8904	/* use local mask as default */
8905	cpumask_copy(def, local_mask);
8906	possible = cpumask_weight(def);
8907	/* disarm threads from default */
8908	ht = cpumask_weight(
8909			topology_sibling_cpumask(cpumask_first(local_mask)));
8910	for (i = possible/ht; i < possible; i++)
8911		cpumask_clear_cpu(i, def);
8912	/* reset possible */
8913	possible = cpumask_weight(def);
8914	/* def now has full cores on chosen node*/
8915	first_cpu = cpumask_first(def);
8916	if (nr_cpu_ids >= first_cpu)
8917		first_cpu++;
8918	restart_cpu = first_cpu;
8919	curr_cpu = restart_cpu;
8920
8921	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8922		cpumask_clear_cpu(curr_cpu, def);
8923		cpumask_set_cpu(curr_cpu, rcv);
8924		if (curr_cpu >= possible)
8925			curr_cpu = restart_cpu;
8926		else
8927			curr_cpu++;
8928	}
8929	/* def mask has non-rcv, rcv has recv mask */
8930	rcv_cpu = cpumask_first(rcv);
8931	sdma_cpu = cpumask_first(def);
8932
8933	/*
8934	 * Sanity check - the code expects all SDMA chip source
8935	 * interrupts to be in the same CSR, starting at bit 0.  Verify
8936	 * that this is true by checking the bit location of the start.
8937	 */
8938	BUILD_BUG_ON(IS_SDMA_START % 64);
8939
8940	for (i = 0; i < dd->num_msix_entries; i++) {
8941		struct hfi1_msix_entry *me = &dd->msix_entries[i];
8942		const char *err_info;
8943		irq_handler_t handler;
8944		irq_handler_t thread = NULL;
8945		void *arg;
8946		int idx;
8947		struct hfi1_ctxtdata *rcd = NULL;
8948		struct sdma_engine *sde = NULL;
8949
8950		/* obtain the arguments to request_irq */
8951		if (first_general <= i && i < last_general) {
8952			idx = i - first_general;
8953			handler = general_interrupt;
8954			arg = dd;
8955			snprintf(me->name, sizeof(me->name),
8956				DRIVER_NAME"_%d", dd->unit);
8957			err_info = "general";
8958		} else if (first_sdma <= i && i < last_sdma) {
8959			idx = i - first_sdma;
8960			sde = &dd->per_sdma[idx];
8961			handler = sdma_interrupt;
8962			arg = sde;
8963			snprintf(me->name, sizeof(me->name),
8964				DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8965			err_info = "sdma";
8966			remap_sdma_interrupts(dd, idx, i);
8967		} else if (first_rx <= i && i < last_rx) {
8968			idx = i - first_rx;
8969			rcd = dd->rcd[idx];
8970			/* no interrupt if no rcd */
8971			if (!rcd)
8972				continue;
8973			/*
8974			 * Set the interrupt register and mask for this
8975			 * context's interrupt.
8976			 */
8977			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8978			rcd->imask = ((u64)1) <<
8979					((IS_RCVAVAIL_START+idx) % 64);
8980			handler = receive_context_interrupt;
8981			thread = receive_context_thread;
8982			arg = rcd;
8983			snprintf(me->name, sizeof(me->name),
8984				DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8985			err_info = "receive context";
8986			remap_receive_available_interrupt(dd, idx, i);
8987		} else {
8988			/* not in our expected range - complain, then
8989			   ignore it */
8990			dd_dev_err(dd,
8991				"Unexpected extra MSI-X interrupt %d\n", i);
8992			continue;
8993		}
8994		/* no argument, no interrupt */
8995		if (arg == NULL)
8996			continue;
8997		/* make sure the name is terminated */
8998		me->name[sizeof(me->name)-1] = 0;
8999
9000		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9001						me->name, arg);
9002		if (ret) {
9003			dd_dev_err(dd,
9004				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9005				 err_info, me->msix.vector, idx, ret);
9006			return ret;
9007		}
9008		/*
9009		 * assign arg after request_irq call, so it will be
9010		 * cleaned up
9011		 */
9012		me->arg = arg;
9013
9014		if (!zalloc_cpumask_var(
9015			&dd->msix_entries[i].mask,
9016			GFP_KERNEL))
9017			goto bail;
9018		if (handler == sdma_interrupt) {
9019			dd_dev_info(dd, "sdma engine %d cpu %d\n",
9020				sde->this_idx, sdma_cpu);
9021			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9022			sdma_cpu = cpumask_next(sdma_cpu, def);
9023			if (sdma_cpu >= nr_cpu_ids)
9024				sdma_cpu = cpumask_first(def);
9025		} else if (handler == receive_context_interrupt) {
9026			dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9027				rcd->ctxt, rcv_cpu);
9028			cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9029			rcv_cpu = cpumask_next(rcv_cpu, rcv);
9030			if (rcv_cpu >= nr_cpu_ids)
9031				rcv_cpu = cpumask_first(rcv);
9032		} else {
9033			/* otherwise first def */
9034			dd_dev_info(dd, "%s cpu %d\n",
9035				err_info, cpumask_first(def));
9036			cpumask_set_cpu(
9037				cpumask_first(def), dd->msix_entries[i].mask);
9038		}
9039		irq_set_affinity_hint(
9040			dd->msix_entries[i].msix.vector,
9041			dd->msix_entries[i].mask);
9042	}
9043
9044out:
9045	free_cpumask_var(def);
9046	free_cpumask_var(rcv);
9047	return ret;
9048bail:
9049	ret = -ENOMEM;
9050	goto  out;
9051}
9052
9053/*
9054 * Set the general handler to accept all interrupts, remap all
9055 * chip interrupts back to MSI-X 0.
9056 */
9057static void reset_interrupts(struct hfi1_devdata *dd)
9058{
9059	int i;
9060
9061	/* all interrupts handled by the general handler */
9062	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9063		dd->gi_mask[i] = ~(u64)0;
9064
9065	/* all chip interrupts map to MSI-X 0 */
9066	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9067		write_csr(dd, CCE_INT_MAP + (8*i), 0);
9068}
9069
9070static int set_up_interrupts(struct hfi1_devdata *dd)
9071{
9072	struct hfi1_msix_entry *entries;
9073	u32 total, request;
9074	int i, ret;
9075	int single_interrupt = 0; /* we expect to have all the interrupts */
9076
9077	/*
9078	 * Interrupt count:
9079	 *	1 general, "slow path" interrupt (includes the SDMA engines
9080	 *		slow source, SDMACleanupDone)
9081	 *	N interrupts - one per used SDMA engine
9082	 *	M interrupt - one per kernel receive context
9083	 */
9084	total = 1 + dd->num_sdma + dd->n_krcv_queues;
9085
9086	entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9087	if (!entries) {
9088		ret = -ENOMEM;
9089		goto fail;
9090	}
9091	/* 1-1 MSI-X entry assignment */
9092	for (i = 0; i < total; i++)
9093		entries[i].msix.entry = i;
9094
9095	/* ask for MSI-X interrupts */
9096	request = total;
9097	request_msix(dd, &request, entries);
9098
9099	if (request == 0) {
9100		/* using INTx */
9101		/* dd->num_msix_entries already zero */
9102		kfree(entries);
9103		single_interrupt = 1;
9104		dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9105	} else {
9106		/* using MSI-X */
9107		dd->num_msix_entries = request;
9108		dd->msix_entries = entries;
9109
9110		if (request != total) {
9111			/* using MSI-X, with reduced interrupts */
9112			dd_dev_err(
9113				dd,
9114				"cannot handle reduced interrupt case, want %u, got %u\n",
9115				total, request);
9116			ret = -EINVAL;
9117			goto fail;
9118		}
9119		dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9120	}
9121
9122	/* mask all interrupts */
9123	set_intr_state(dd, 0);
9124	/* clear all pending interrupts */
9125	clear_all_interrupts(dd);
9126
9127	/* reset general handler mask, chip MSI-X mappings */
9128	reset_interrupts(dd);
9129
9130	if (single_interrupt)
9131		ret = request_intx_irq(dd);
9132	else
9133		ret = request_msix_irqs(dd);
9134	if (ret)
9135		goto fail;
9136
9137	return 0;
9138
9139fail:
9140	clean_up_interrupts(dd);
9141	return ret;
9142}
9143
9144/*
9145 * Set up context values in dd.  Sets:
9146 *
9147 *	num_rcv_contexts - number of contexts being used
9148 *	n_krcv_queues - number of kernel contexts
9149 *	first_user_ctxt - first non-kernel context in array of contexts
9150 *	freectxts  - number of free user contexts
9151 *	num_send_contexts - number of PIO send contexts being used
9152 */
9153static int set_up_context_variables(struct hfi1_devdata *dd)
9154{
9155	int num_kernel_contexts;
9156	int num_user_contexts;
9157	int total_contexts;
9158	int ret;
9159	unsigned ngroups;
9160
9161	/*
9162	 * Kernel contexts: (to be fixed later):
9163	 * - min or 2 or 1 context/numa
9164	 * - Context 0 - default/errors
9165	 * - Context 1 - VL15
9166	 */
9167	if (n_krcvqs)
9168		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9169	else
9170		num_kernel_contexts = num_online_nodes();
9171	num_kernel_contexts =
9172		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9173	/*
9174	 * Every kernel receive context needs an ACK send context.
9175	 * one send context is allocated for each VL{0-7} and VL15
9176	 */
9177	if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9178		dd_dev_err(dd,
9179			   "Reducing # kernel rcv contexts to: %d, from %d\n",
9180			   (int)(dd->chip_send_contexts - num_vls - 1),
9181			   (int)num_kernel_contexts);
9182		num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9183	}
9184	/*
9185	 * User contexts: (to be fixed later)
9186	 *	- set to num_rcv_contexts if non-zero
9187	 *	- default to 1 user context per CPU
9188	 */
9189	if (num_rcv_contexts)
9190		num_user_contexts = num_rcv_contexts;
9191	else
9192		num_user_contexts = num_online_cpus();
9193
9194	total_contexts = num_kernel_contexts + num_user_contexts;
9195
9196	/*
9197	 * Adjust the counts given a global max.
9198	 */
9199	if (total_contexts > dd->chip_rcv_contexts) {
9200		dd_dev_err(dd,
9201			   "Reducing # user receive contexts to: %d, from %d\n",
9202			   (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9203			   (int)num_user_contexts);
9204		num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9205		/* recalculate */
9206		total_contexts = num_kernel_contexts + num_user_contexts;
9207	}
9208
9209	/* the first N are kernel contexts, the rest are user contexts */
9210	dd->num_rcv_contexts = total_contexts;
9211	dd->n_krcv_queues = num_kernel_contexts;
9212	dd->first_user_ctxt = num_kernel_contexts;
9213	dd->freectxts = num_user_contexts;
9214	dd_dev_info(dd,
9215		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9216		(int)dd->chip_rcv_contexts,
9217		(int)dd->num_rcv_contexts,
9218		(int)dd->n_krcv_queues,
9219		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
9220
9221	/*
9222	 * Receive array allocation:
9223	 *   All RcvArray entries are divided into groups of 8. This
9224	 *   is required by the hardware and will speed up writes to
9225	 *   consecutive entries by using write-combining of the entire
9226	 *   cacheline.
9227	 *
9228	 *   The number of groups are evenly divided among all contexts.
9229	 *   any left over groups will be given to the first N user
9230	 *   contexts.
9231	 */
9232	dd->rcv_entries.group_size = RCV_INCREMENT;
9233	ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9234	dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9235	dd->rcv_entries.nctxt_extra = ngroups -
9236		(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9237	dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9238		    dd->rcv_entries.ngroups,
9239		    dd->rcv_entries.nctxt_extra);
9240	if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9241	    MAX_EAGER_ENTRIES * 2) {
9242		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9243			dd->rcv_entries.group_size;
9244		dd_dev_info(dd,
9245		   "RcvArray group count too high, change to %u\n",
9246		   dd->rcv_entries.ngroups);
9247		dd->rcv_entries.nctxt_extra = 0;
9248	}
9249	/*
9250	 * PIO send contexts
9251	 */
9252	ret = init_sc_pools_and_sizes(dd);
9253	if (ret >= 0) {	/* success */
9254		dd->num_send_contexts = ret;
9255		dd_dev_info(
9256			dd,
9257			"send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9258			dd->chip_send_contexts,
9259			dd->num_send_contexts,
9260			dd->sc_sizes[SC_KERNEL].count,
9261			dd->sc_sizes[SC_ACK].count,
9262			dd->sc_sizes[SC_USER].count);
9263		ret = 0;	/* success */
9264	}
9265
9266	return ret;
9267}
9268
9269/*
9270 * Set the device/port partition key table. The MAD code
9271 * will ensure that, at least, the partial management
9272 * partition key is present in the table.
9273 */
9274static void set_partition_keys(struct hfi1_pportdata *ppd)
9275{
9276	struct hfi1_devdata *dd = ppd->dd;
9277	u64 reg = 0;
9278	int i;
9279
9280	dd_dev_info(dd, "Setting partition keys\n");
9281	for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9282		reg |= (ppd->pkeys[i] &
9283			RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9284			((i % 4) *
9285			 RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9286		/* Each register holds 4 PKey values. */
9287		if ((i % 4) == 3) {
9288			write_csr(dd, RCV_PARTITION_KEY +
9289				  ((i - 3) * 2), reg);
9290			reg = 0;
9291		}
9292	}
9293
9294	/* Always enable HW pkeys check when pkeys table is set */
9295	add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9296}
9297
9298/*
9299 * These CSRs and memories are uninitialized on reset and must be
9300 * written before reading to set the ECC/parity bits.
9301 *
9302 * NOTE: All user context CSRs that are not mmaped write-only
9303 * (e.g. the TID flows) must be initialized even if the driver never
9304 * reads them.
9305 */
9306static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9307{
9308	int i, j;
9309
9310	/* CceIntMap */
9311	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9312		write_csr(dd, CCE_INT_MAP+(8*i), 0);
9313
9314	/* SendCtxtCreditReturnAddr */
9315	for (i = 0; i < dd->chip_send_contexts; i++)
9316		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9317
9318	/* PIO Send buffers */
9319	/* SDMA Send buffers */
9320	/* These are not normally read, and (presently) have no method
9321	   to be read, so are not pre-initialized */
9322
9323	/* RcvHdrAddr */
9324	/* RcvHdrTailAddr */
9325	/* RcvTidFlowTable */
9326	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9327		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9328		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9329		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9330			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9331	}
9332
9333	/* RcvArray */
9334	for (i = 0; i < dd->chip_rcv_array_count; i++)
9335		write_csr(dd, RCV_ARRAY + (8*i),
9336					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9337
9338	/* RcvQPMapTable */
9339	for (i = 0; i < 32; i++)
9340		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9341}
9342
9343/*
9344 * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9345 */
9346static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9347			     u64 ctrl_bits)
9348{
9349	unsigned long timeout;
9350	u64 reg;
9351
9352	/* is the condition present? */
9353	reg = read_csr(dd, CCE_STATUS);
9354	if ((reg & status_bits) == 0)
9355		return;
9356
9357	/* clear the condition */
9358	write_csr(dd, CCE_CTRL, ctrl_bits);
9359
9360	/* wait for the condition to clear */
9361	timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9362	while (1) {
9363		reg = read_csr(dd, CCE_STATUS);
9364		if ((reg & status_bits) == 0)
9365			return;
9366		if (time_after(jiffies, timeout)) {
9367			dd_dev_err(dd,
9368				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9369				status_bits, reg & status_bits);
9370			return;
9371		}
9372		udelay(1);
9373	}
9374}
9375
9376/* set CCE CSRs to chip reset defaults */
9377static void reset_cce_csrs(struct hfi1_devdata *dd)
9378{
9379	int i;
9380
9381	/* CCE_REVISION read-only */
9382	/* CCE_REVISION2 read-only */
9383	/* CCE_CTRL - bits clear automatically */
9384	/* CCE_STATUS read-only, use CceCtrl to clear */
9385	clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9386	clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9387	clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9388	for (i = 0; i < CCE_NUM_SCRATCH; i++)
9389		write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9390	/* CCE_ERR_STATUS read-only */
9391	write_csr(dd, CCE_ERR_MASK, 0);
9392	write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9393	/* CCE_ERR_FORCE leave alone */
9394	for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9395		write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9396	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9397	/* CCE_PCIE_CTRL leave alone */
9398	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9399		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9400		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9401					CCE_MSIX_TABLE_UPPER_RESETCSR);
9402	}
9403	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9404		/* CCE_MSIX_PBA read-only */
9405		write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9406		write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9407	}
9408	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9409		write_csr(dd, CCE_INT_MAP, 0);
9410	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9411		/* CCE_INT_STATUS read-only */
9412		write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9413		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9414		/* CCE_INT_FORCE leave alone */
9415		/* CCE_INT_BLOCKED read-only */
9416	}
9417	for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9418		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9419}
9420
9421/* set ASIC CSRs to chip reset defaults */
9422static void reset_asic_csrs(struct hfi1_devdata *dd)
9423{
9424	int i;
9425
9426	/*
9427	 * If the HFIs are shared between separate nodes or VMs,
9428	 * then more will need to be done here.  One idea is a module
9429	 * parameter that returns early, letting the first power-on or
9430	 * a known first load do the reset and blocking all others.
9431	 */
9432
9433	if (!(dd->flags & HFI1_DO_INIT_ASIC))
9434		return;
9435
9436	if (dd->icode != ICODE_FPGA_EMULATION) {
9437		/* emulation does not have an SBus - leave these alone */
9438		/*
9439		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9440		 * Notes:
9441		 * o The reset is not zero if aimed at the core.  See the
9442		 *   SBus documentation for details.
9443		 * o If the SBus firmware has been updated (e.g. by the BIOS),
9444		 *   will the reset revert that?
9445		 */
9446		/* ASIC_CFG_SBUS_REQUEST leave alone */
9447		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9448	}
9449	/* ASIC_SBUS_RESULT read-only */
9450	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9451	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9452		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9453	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
9454
9455	/* We might want to retain this state across FLR if we ever use it */
9456	write_csr(dd, ASIC_CFG_DRV_STR, 0);
9457
9458	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9459	/* ASIC_STS_THERM read-only */
9460	/* ASIC_CFG_RESET leave alone */
9461
9462	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9463	/* ASIC_PCIE_SD_HOST_STATUS read-only */
9464	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9465	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9466	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9467	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9468	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9469	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470	for (i = 0; i < 16; i++)
9471		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9472
9473	/* ASIC_GPIO_IN read-only */
9474	write_csr(dd, ASIC_GPIO_OE, 0);
9475	write_csr(dd, ASIC_GPIO_INVERT, 0);
9476	write_csr(dd, ASIC_GPIO_OUT, 0);
9477	write_csr(dd, ASIC_GPIO_MASK, 0);
9478	/* ASIC_GPIO_STATUS read-only */
9479	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9480	/* ASIC_GPIO_FORCE leave alone */
9481
9482	/* ASIC_QSFP1_IN read-only */
9483	write_csr(dd, ASIC_QSFP1_OE, 0);
9484	write_csr(dd, ASIC_QSFP1_INVERT, 0);
9485	write_csr(dd, ASIC_QSFP1_OUT, 0);
9486	write_csr(dd, ASIC_QSFP1_MASK, 0);
9487	/* ASIC_QSFP1_STATUS read-only */
9488	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9489	/* ASIC_QSFP1_FORCE leave alone */
9490
9491	/* ASIC_QSFP2_IN read-only */
9492	write_csr(dd, ASIC_QSFP2_OE, 0);
9493	write_csr(dd, ASIC_QSFP2_INVERT, 0);
9494	write_csr(dd, ASIC_QSFP2_OUT, 0);
9495	write_csr(dd, ASIC_QSFP2_MASK, 0);
9496	/* ASIC_QSFP2_STATUS read-only */
9497	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9498	/* ASIC_QSFP2_FORCE leave alone */
9499
9500	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9501	/* this also writes a NOP command, clearing paging mode */
9502	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9503	write_csr(dd, ASIC_EEP_DATA, 0);
9504}
9505
9506/* set MISC CSRs to chip reset defaults */
9507static void reset_misc_csrs(struct hfi1_devdata *dd)
9508{
9509	int i;
9510
9511	for (i = 0; i < 32; i++) {
9512		write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9513		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9514		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9515	}
9516	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9517	   only be written 128-byte chunks */
9518	/* init RSA engine to clear lingering errors */
9519	write_csr(dd, MISC_CFG_RSA_CMD, 1);
9520	write_csr(dd, MISC_CFG_RSA_MU, 0);
9521	write_csr(dd, MISC_CFG_FW_CTRL, 0);
9522	/* MISC_STS_8051_DIGEST read-only */
9523	/* MISC_STS_SBM_DIGEST read-only */
9524	/* MISC_STS_PCIE_DIGEST read-only */
9525	/* MISC_STS_FAB_DIGEST read-only */
9526	/* MISC_ERR_STATUS read-only */
9527	write_csr(dd, MISC_ERR_MASK, 0);
9528	write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9529	/* MISC_ERR_FORCE leave alone */
9530}
9531
9532/* set TXE CSRs to chip reset defaults */
9533static void reset_txe_csrs(struct hfi1_devdata *dd)
9534{
9535	int i;
9536
9537	/*
9538	 * TXE Kernel CSRs
9539	 */
9540	write_csr(dd, SEND_CTRL, 0);
9541	__cm_reset(dd, 0);	/* reset CM internal state */
9542	/* SEND_CONTEXTS read-only */
9543	/* SEND_DMA_ENGINES read-only */
9544	/* SEND_PIO_MEM_SIZE read-only */
9545	/* SEND_DMA_MEM_SIZE read-only */
9546	write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9547	pio_reset_all(dd);	/* SEND_PIO_INIT_CTXT */
9548	/* SEND_PIO_ERR_STATUS read-only */
9549	write_csr(dd, SEND_PIO_ERR_MASK, 0);
9550	write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9551	/* SEND_PIO_ERR_FORCE leave alone */
9552	/* SEND_DMA_ERR_STATUS read-only */
9553	write_csr(dd, SEND_DMA_ERR_MASK, 0);
9554	write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9555	/* SEND_DMA_ERR_FORCE leave alone */
9556	/* SEND_EGRESS_ERR_STATUS read-only */
9557	write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9558	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9559	/* SEND_EGRESS_ERR_FORCE leave alone */
9560	write_csr(dd, SEND_BTH_QP, 0);
9561	write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9562	write_csr(dd, SEND_SC2VLT0, 0);
9563	write_csr(dd, SEND_SC2VLT1, 0);
9564	write_csr(dd, SEND_SC2VLT2, 0);
9565	write_csr(dd, SEND_SC2VLT3, 0);
9566	write_csr(dd, SEND_LEN_CHECK0, 0);
9567	write_csr(dd, SEND_LEN_CHECK1, 0);
9568	/* SEND_ERR_STATUS read-only */
9569	write_csr(dd, SEND_ERR_MASK, 0);
9570	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9571	/* SEND_ERR_FORCE read-only */
9572	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9573		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9574	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9575		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9576	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9577		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9578	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9579		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9580	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9581		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9582	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9583	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9584					SEND_CM_GLOBAL_CREDIT_RESETCSR);
9585	/* SEND_CM_CREDIT_USED_STATUS read-only */
9586	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9587	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9588	write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9589	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9590	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9591	for (i = 0; i < TXE_NUM_DATA_VL; i++)
9592		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9593	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9594	/* SEND_CM_CREDIT_USED_VL read-only */
9595	/* SEND_CM_CREDIT_USED_VL15 read-only */
9596	/* SEND_EGRESS_CTXT_STATUS read-only */
9597	/* SEND_EGRESS_SEND_DMA_STATUS read-only */
9598	write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9599	/* SEND_EGRESS_ERR_INFO read-only */
9600	/* SEND_EGRESS_ERR_SOURCE read-only */
9601
9602	/*
9603	 * TXE Per-Context CSRs
9604	 */
9605	for (i = 0; i < dd->chip_send_contexts; i++) {
9606		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9607		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9608		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9609		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9610		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9611		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9612		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9613		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9614		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9615		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9616		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9617		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9618	}
9619
9620	/*
9621	 * TXE Per-SDMA CSRs
9622	 */
9623	for (i = 0; i < dd->chip_sdma_engines; i++) {
9624		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9625		/* SEND_DMA_STATUS read-only */
9626		write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9627		write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9628		write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9629		/* SEND_DMA_HEAD read-only */
9630		write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9631		write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9632		/* SEND_DMA_IDLE_CNT read-only */
9633		write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9634		write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9635		/* SEND_DMA_DESC_FETCHED_CNT read-only */
9636		/* SEND_DMA_ENG_ERR_STATUS read-only */
9637		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9638		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9639		/* SEND_DMA_ENG_ERR_FORCE leave alone */
9640		write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9641		write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9642		write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9643		write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9644		write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9645		write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9646		write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9647	}
9648}
9649
9650/*
9651 * Expect on entry:
9652 * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9653 */
9654static void init_rbufs(struct hfi1_devdata *dd)
9655{
9656	u64 reg;
9657	int count;
9658
9659	/*
9660	 * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9661	 * clear.
9662	 */
9663	count = 0;
9664	while (1) {
9665		reg = read_csr(dd, RCV_STATUS);
9666		if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9667			    | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9668			break;
9669		/*
9670		 * Give up after 1ms - maximum wait time.
9671		 *
9672		 * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9673		 * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9674		 *	148 KB / (66% * 250MB/s) = 920us
9675		 */
9676		if (count++ > 500) {
9677			dd_dev_err(dd,
9678				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9679				__func__, reg);
9680			break;
9681		}
9682		udelay(2); /* do not busy-wait the CSR */
9683	}
9684
9685	/* start the init - expect RcvCtrl to be 0 */
9686	write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9687
9688	/*
9689	 * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9690	 * period after the write before RcvStatus.RxRbufInitDone is valid.
9691	 * The delay in the first run through the loop below is sufficient and
9692	 * required before the first read of RcvStatus.RxRbufInintDone.
9693	 */
9694	read_csr(dd, RCV_CTRL);
9695
9696	/* wait for the init to finish */
9697	count = 0;
9698	while (1) {
9699		/* delay is required first time through - see above */
9700		udelay(2); /* do not busy-wait the CSR */
9701		reg = read_csr(dd, RCV_STATUS);
9702		if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9703			break;
9704
9705		/* give up after 100us - slowest possible at 33MHz is 73us */
9706		if (count++ > 50) {
9707			dd_dev_err(dd,
9708				"%s: RcvStatus.RxRbufInit not set, continuing\n",
9709				__func__);
9710			break;
9711		}
9712	}
9713}
9714
9715/* set RXE CSRs to chip reset defaults */
9716static void reset_rxe_csrs(struct hfi1_devdata *dd)
9717{
9718	int i, j;
9719
9720	/*
9721	 * RXE Kernel CSRs
9722	 */
9723	write_csr(dd, RCV_CTRL, 0);
9724	init_rbufs(dd);
9725	/* RCV_STATUS read-only */
9726	/* RCV_CONTEXTS read-only */
9727	/* RCV_ARRAY_CNT read-only */
9728	/* RCV_BUF_SIZE read-only */
9729	write_csr(dd, RCV_BTH_QP, 0);
9730	write_csr(dd, RCV_MULTICAST, 0);
9731	write_csr(dd, RCV_BYPASS, 0);
9732	write_csr(dd, RCV_VL15, 0);
9733	/* this is a clear-down */
9734	write_csr(dd, RCV_ERR_INFO,
9735			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9736	/* RCV_ERR_STATUS read-only */
9737	write_csr(dd, RCV_ERR_MASK, 0);
9738	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9739	/* RCV_ERR_FORCE leave alone */
9740	for (i = 0; i < 32; i++)
9741		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9742	for (i = 0; i < 4; i++)
9743		write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9744	for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9745		write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9746	for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9747		write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9748	for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9749		write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9750		write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9751		write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9752	}
9753	for (i = 0; i < 32; i++)
9754		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9755
9756	/*
9757	 * RXE Kernel and User Per-Context CSRs
9758	 */
9759	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9760		/* kernel */
9761		write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9762		/* RCV_CTXT_STATUS read-only */
9763		write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9764		write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9765		write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9766		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9767		write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9768		write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9769		write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9770		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9771		write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9772		write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9773
9774		/* user */
9775		/* RCV_HDR_TAIL read-only */
9776		write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9777		/* RCV_EGR_INDEX_TAIL read-only */
9778		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9779		/* RCV_EGR_OFFSET_TAIL read-only */
9780		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9781			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9782				0);
9783		}
9784	}
9785}
9786
9787/*
9788 * Set sc2vl tables.
9789 *
9790 * They power on to zeros, so to avoid send context errors
9791 * they need to be set:
9792 *
9793 * SC 0-7 -> VL 0-7 (respectively)
9794 * SC 15  -> VL 15
9795 * otherwise
9796 *        -> VL 0
9797 */
9798static void init_sc2vl_tables(struct hfi1_devdata *dd)
9799{
9800	int i;
9801	/* init per architecture spec, constrained by hardware capability */
9802
9803	/* HFI maps sent packets */
9804	write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9805		0,
9806		0, 0, 1, 1,
9807		2, 2, 3, 3,
9808		4, 4, 5, 5,
9809		6, 6, 7, 7));
9810	write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9811		1,
9812		8, 0, 9, 0,
9813		10, 0, 11, 0,
9814		12, 0, 13, 0,
9815		14, 0, 15, 15));
9816	write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9817		2,
9818		16, 0, 17, 0,
9819		18, 0, 19, 0,
9820		20, 0, 21, 0,
9821		22, 0, 23, 0));
9822	write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9823		3,
9824		24, 0, 25, 0,
9825		26, 0, 27, 0,
9826		28, 0, 29, 0,
9827		30, 0, 31, 0));
9828
9829	/* DC maps received packets */
9830	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9831		15_0,
9832		0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9833		8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9834	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9835		31_16,
9836		16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9837		24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9838
9839	/* initialize the cached sc2vl values consistently with h/w */
9840	for (i = 0; i < 32; i++) {
9841		if (i < 8 || i == 15)
9842			*((u8 *)(dd->sc2vl) + i) = (u8)i;
9843		else
9844			*((u8 *)(dd->sc2vl) + i) = 0;
9845	}
9846}
9847
9848/*
9849 * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9850 * depend on the chip going through a power-on reset - a driver may be loaded
9851 * and unloaded many times.
9852 *
9853 * Do not write any CSR values to the chip in this routine - there may be
9854 * a reset following the (possible) FLR in this routine.
9855 *
9856 */
9857static void init_chip(struct hfi1_devdata *dd)
9858{
9859	int i;
9860
9861	/*
9862	 * Put the HFI CSRs in a known state.
9863	 * Combine this with a DC reset.
9864	 *
9865	 * Stop the device from doing anything while we do a
9866	 * reset.  We know there are no other active users of
9867	 * the device since we are now in charge.  Turn off
9868	 * off all outbound and inbound traffic and make sure
9869	 * the device does not generate any interrupts.
9870	 */
9871
9872	/* disable send contexts and SDMA engines */
9873	write_csr(dd, SEND_CTRL, 0);
9874	for (i = 0; i < dd->chip_send_contexts; i++)
9875		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9876	for (i = 0; i < dd->chip_sdma_engines; i++)
9877		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9878	/* disable port (turn off RXE inbound traffic) and contexts */
9879	write_csr(dd, RCV_CTRL, 0);
9880	for (i = 0; i < dd->chip_rcv_contexts; i++)
9881		write_csr(dd, RCV_CTXT_CTRL, 0);
9882	/* mask all interrupt sources */
9883	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9884		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9885
9886	/*
9887	 * DC Reset: do a full DC reset before the register clear.
9888	 * A recommended length of time to hold is one CSR read,
9889	 * so reread the CceDcCtrl.  Then, hold the DC in reset
9890	 * across the clear.
9891	 */
9892	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9893	(void) read_csr(dd, CCE_DC_CTRL);
9894
9895	if (use_flr) {
9896		/*
9897		 * A FLR will reset the SPC core and part of the PCIe.
9898		 * The parts that need to be restored have already been
9899		 * saved.
9900		 */
9901		dd_dev_info(dd, "Resetting CSRs with FLR\n");
9902
9903		/* do the FLR, the DC reset will remain */
9904		hfi1_pcie_flr(dd);
9905
9906		/* restore command and BARs */
9907		restore_pci_variables(dd);
9908
9909		if (is_a0(dd)) {
9910			dd_dev_info(dd, "Resetting CSRs with FLR\n");
9911			hfi1_pcie_flr(dd);
9912			restore_pci_variables(dd);
9913		}
9914
9915		reset_asic_csrs(dd);
9916	} else {
9917		dd_dev_info(dd, "Resetting CSRs with writes\n");
9918		reset_cce_csrs(dd);
9919		reset_txe_csrs(dd);
9920		reset_rxe_csrs(dd);
9921		reset_asic_csrs(dd);
9922		reset_misc_csrs(dd);
9923	}
9924	/* clear the DC reset */
9925	write_csr(dd, CCE_DC_CTRL, 0);
9926
9927	/* Set the LED off */
9928	if (is_a0(dd))
9929		setextled(dd, 0);
9930	/*
9931	 * Clear the QSFP reset.
9932	 * A0 leaves the out lines floating on power on, then on an FLR
9933	 * enforces a 0 on all out pins.  The driver does not touch
9934	 * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9935	 * anything  plugged constantly in reset, if it pays attention
9936	 * to RESET_N.
9937	 * A prime example of this is SiPh. For now, set all pins high.
9938	 * I2CCLK and I2CDAT will change per direction, and INT_N and
9939	 * MODPRS_N are input only and their value is ignored.
9940	 */
9941	if (is_a0(dd)) {
9942		write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9943		write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9944	}
9945}
9946
9947static void init_early_variables(struct hfi1_devdata *dd)
9948{
9949	int i;
9950
9951	/* assign link credit variables */
9952	dd->vau = CM_VAU;
9953	dd->link_credits = CM_GLOBAL_CREDITS;
9954	if (is_a0(dd))
9955		dd->link_credits--;
9956	dd->vcu = cu_to_vcu(hfi1_cu);
9957	/* enough room for 8 MAD packets plus header - 17K */
9958	dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9959	if (dd->vl15_init > dd->link_credits)
9960		dd->vl15_init = dd->link_credits;
9961
9962	write_uninitialized_csrs_and_memories(dd);
9963
9964	if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9965		for (i = 0; i < dd->num_pports; i++) {
9966			struct hfi1_pportdata *ppd = &dd->pport[i];
9967
9968			set_partition_keys(ppd);
9969		}
9970	init_sc2vl_tables(dd);
9971}
9972
9973static void init_kdeth_qp(struct hfi1_devdata *dd)
9974{
9975	/* user changed the KDETH_QP */
9976	if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9977		/* out of range or illegal value */
9978		dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9979		kdeth_qp = 0;
9980	}
9981	if (kdeth_qp == 0)	/* not set, or failed range check */
9982		kdeth_qp = DEFAULT_KDETH_QP;
9983
9984	write_csr(dd, SEND_BTH_QP,
9985			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9986				<< SEND_BTH_QP_KDETH_QP_SHIFT);
9987
9988	write_csr(dd, RCV_BTH_QP,
9989			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9990				<< RCV_BTH_QP_KDETH_QP_SHIFT);
9991}
9992
9993/**
9994 * init_qpmap_table
9995 * @dd - device data
9996 * @first_ctxt - first context
9997 * @last_ctxt - first context
9998 *
9999 * This return sets the qpn mapping table that
10000 * is indexed by qpn[8:1].
10001 *
10002 * The routine will round robin the 256 settings
10003 * from first_ctxt to last_ctxt.
10004 *
10005 * The first/last looks ahead to having specialized
10006 * receive contexts for mgmt and bypass.  Normal
10007 * verbs traffic will assumed to be on a range
10008 * of receive contexts.
10009 */
10010static void init_qpmap_table(struct hfi1_devdata *dd,
10011			     u32 first_ctxt,
10012			     u32 last_ctxt)
10013{
10014	u64 reg = 0;
10015	u64 regno = RCV_QP_MAP_TABLE;
10016	int i;
10017	u64 ctxt = first_ctxt;
10018
10019	for (i = 0; i < 256;) {
10020		if (ctxt == VL15CTXT) {
10021			ctxt++;
10022			if (ctxt > last_ctxt)
10023				ctxt = first_ctxt;
10024			continue;
10025		}
10026		reg |= ctxt << (8 * (i % 8));
10027		i++;
10028		ctxt++;
10029		if (ctxt > last_ctxt)
10030			ctxt = first_ctxt;
10031		if (i % 8 == 0) {
10032			write_csr(dd, regno, reg);
10033			reg = 0;
10034			regno += 8;
10035		}
10036	}
10037	if (i % 8)
10038		write_csr(dd, regno, reg);
10039
10040	add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10041			| RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10042}
10043
10044/**
10045 * init_qos - init RX qos
10046 * @dd - device data
10047 * @first_context
10048 *
10049 * This routine initializes Rule 0 and the
10050 * RSM map table to implement qos.
10051 *
10052 * If all of the limit tests succeed,
10053 * qos is applied based on the array
10054 * interpretation of krcvqs where
10055 * entry 0 is VL0.
10056 *
10057 * The number of vl bits (n) and the number of qpn
10058 * bits (m) are computed to feed both the RSM map table
10059 * and the single rule.
10060 *
10061 */
10062static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10063{
10064	u8 max_by_vl = 0;
10065	unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10066	u64 *rsmmap;
10067	u64 reg;
10068	u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10069
10070	/* validate */
10071	if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10072	    num_vls == 1 ||
10073	    krcvqsset <= 1)
10074		goto bail;
10075	for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10076		if (krcvqs[i] > max_by_vl)
10077			max_by_vl = krcvqs[i];
10078	if (max_by_vl > 32)
10079		goto bail;
10080	qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10081	/* determine bits vl */
10082	n = ilog2(num_vls);
10083	/* determine bits for qpn */
10084	m = ilog2(qpns_per_vl);
10085	if ((m + n) > 7)
10086		goto bail;
10087	if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10088		goto bail;
10089	rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10090	memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10091	/* init the local copy of the table */
10092	for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10093		unsigned tctxt;
10094
10095		for (qpn = 0, tctxt = ctxt;
10096		     krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10097			unsigned idx, regoff, regidx;
10098
10099			/* generate index <= 128 */
10100			idx = (qpn << n) ^ i;
10101			regoff = (idx % 8) * 8;
10102			regidx = idx / 8;
10103			reg = rsmmap[regidx];
10104			/* replace 0xff with context number */
10105			reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10106				<< regoff);
10107			reg |= (u64)(tctxt++) << regoff;
10108			rsmmap[regidx] = reg;
10109			if (tctxt == ctxt + krcvqs[i])
10110				tctxt = ctxt;
10111		}
10112		ctxt += krcvqs[i];
10113	}
10114	/* flush cached copies to chip */
10115	for (i = 0; i < NUM_MAP_REGS; i++)
10116		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10117	/* add rule0 */
10118	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10119		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10120			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10121		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10122	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10123		LRH_BTH_MATCH_OFFSET
10124			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10125		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10126		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10127		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10128		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10129		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10130	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10131		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10132		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10133		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10134		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10135	/* Enable RSM */
10136	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10137	kfree(rsmmap);
10138	/* map everything else (non-VL15) to context 0 */
10139	init_qpmap_table(
10140		dd,
10141		0,
10142		0);
10143	dd->qos_shift = n + 1;
10144	return;
10145bail:
10146	dd->qos_shift = 1;
10147	init_qpmap_table(
10148		dd,
10149		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10150		dd->n_krcv_queues - 1);
10151}
10152
10153static void init_rxe(struct hfi1_devdata *dd)
10154{
10155	/* enable all receive errors */
10156	write_csr(dd, RCV_ERR_MASK, ~0ull);
10157	/* setup QPN map table - start where VL15 context leaves off */
10158	init_qos(
10159		dd,
10160		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10161	/*
10162	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10163	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10164	 * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10165	 * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10166	 * Max_PayLoad_Size set to its minimum of 128.
10167	 *
10168	 * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10169	 * (64 bytes).  Max_Payload_Size is possibly modified upward in
10170	 * tune_pcie_caps() which is called after this routine.
10171	 */
10172}
10173
10174static void init_other(struct hfi1_devdata *dd)
10175{
10176	/* enable all CCE errors */
10177	write_csr(dd, CCE_ERR_MASK, ~0ull);
10178	/* enable *some* Misc errors */
10179	write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10180	/* enable all DC errors, except LCB */
10181	write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10182	write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10183}
10184
10185/*
10186 * Fill out the given AU table using the given CU.  A CU is defined in terms
10187 * AUs.  The table is a an encoding: given the index, how many AUs does that
10188 * represent?
10189 *
10190 * NOTE: Assumes that the register layout is the same for the
10191 * local and remote tables.
10192 */
10193static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10194			       u32 csr0to3, u32 csr4to7)
10195{
10196	write_csr(dd, csr0to3,
10197		   0ull <<
10198			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10199		|  1ull <<
10200			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10201		|  2ull * cu <<
10202			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10203		|  4ull * cu <<
10204			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10205	write_csr(dd, csr4to7,
10206		   8ull * cu <<
10207			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10208		| 16ull * cu <<
10209			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10210		| 32ull * cu <<
10211			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10212		| 64ull * cu <<
10213			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10214
10215}
10216
10217static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10218{
10219	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10220					SEND_CM_LOCAL_AU_TABLE4_TO7);
10221}
10222
10223void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10224{
10225	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10226					SEND_CM_REMOTE_AU_TABLE4_TO7);
10227}
10228
10229static void init_txe(struct hfi1_devdata *dd)
10230{
10231	int i;
10232
10233	/* enable all PIO, SDMA, general, and Egress errors */
10234	write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10235	write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10236	write_csr(dd, SEND_ERR_MASK, ~0ull);
10237	write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10238
10239	/* enable all per-context and per-SDMA engine errors */
10240	for (i = 0; i < dd->chip_send_contexts; i++)
10241		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10242	for (i = 0; i < dd->chip_sdma_engines; i++)
10243		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10244
10245	/* set the local CU to AU mapping */
10246	assign_local_cm_au_table(dd, dd->vcu);
10247
10248	/*
10249	 * Set reasonable default for Credit Return Timer
10250	 * Don't set on Simulator - causes it to choke.
10251	 */
10252	if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10253		write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10254}
10255
10256int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10257{
10258	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10259	unsigned sctxt;
10260	int ret = 0;
10261	u64 reg;
10262
10263	if (!rcd || !rcd->sc) {
10264		ret = -EINVAL;
10265		goto done;
10266	}
10267	sctxt = rcd->sc->hw_context;
10268	reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10269		((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10270		 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10271	/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10272	if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10273		reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10274	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10275	/*
10276	 * Enable send-side J_KEY integrity check, unless this is A0 h/w
10277	 * (due to A0 erratum).
10278	 */
10279	if (!is_a0(dd)) {
10280		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10281		reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10282		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10283	}
10284
10285	/* Enable J_KEY check on receive context. */
10286	reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10287		((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10288		 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10289	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10290done:
10291	return ret;
10292}
10293
10294int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10295{
10296	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10297	unsigned sctxt;
10298	int ret = 0;
10299	u64 reg;
10300
10301	if (!rcd || !rcd->sc) {
10302		ret = -EINVAL;
10303		goto done;
10304	}
10305	sctxt = rcd->sc->hw_context;
10306	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10307	/*
10308	 * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10309	 * This check would not have been enabled for A0 h/w, see
10310	 * set_ctxt_jkey().
10311	 */
10312	if (!is_a0(dd)) {
10313		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10314		reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10315		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10316	}
10317	/* Turn off the J_KEY on the receive side */
10318	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10319done:
10320	return ret;
10321}
10322
10323int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10324{
10325	struct hfi1_ctxtdata *rcd;
10326	unsigned sctxt;
10327	int ret = 0;
10328	u64 reg;
10329
10330	if (ctxt < dd->num_rcv_contexts)
10331		rcd = dd->rcd[ctxt];
10332	else {
10333		ret = -EINVAL;
10334		goto done;
10335	}
10336	if (!rcd || !rcd->sc) {
10337		ret = -EINVAL;
10338		goto done;
10339	}
10340	sctxt = rcd->sc->hw_context;
10341	reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10342		SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10343	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10344	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10345	reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10346	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10347done:
10348	return ret;
10349}
10350
10351int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10352{
10353	struct hfi1_ctxtdata *rcd;
10354	unsigned sctxt;
10355	int ret = 0;
10356	u64 reg;
10357
10358	if (ctxt < dd->num_rcv_contexts)
10359		rcd = dd->rcd[ctxt];
10360	else {
10361		ret = -EINVAL;
10362		goto done;
10363	}
10364	if (!rcd || !rcd->sc) {
10365		ret = -EINVAL;
10366		goto done;
10367	}
10368	sctxt = rcd->sc->hw_context;
10369	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10370	reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10371	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10372	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10373done:
10374	return ret;
10375}
10376
10377/*
10378 * Start doing the clean up the the chip. Our clean up happens in multiple
10379 * stages and this is just the first.
10380 */
10381void hfi1_start_cleanup(struct hfi1_devdata *dd)
10382{
10383	free_cntrs(dd);
10384	free_rcverr(dd);
10385	clean_up_interrupts(dd);
10386}
10387
10388#define HFI_BASE_GUID(dev) \
10389	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10390
10391/*
10392 * Certain chip functions need to be initialized only once per asic
10393 * instead of per-device. This function finds the peer device and
10394 * checks whether that chip initialization needs to be done by this
10395 * device.
10396 */
10397static void asic_should_init(struct hfi1_devdata *dd)
10398{
10399	unsigned long flags;
10400	struct hfi1_devdata *tmp, *peer = NULL;
10401
10402	spin_lock_irqsave(&hfi1_devs_lock, flags);
10403	/* Find our peer device */
10404	list_for_each_entry(tmp, &hfi1_dev_list, list) {
10405		if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10406		    dd->unit != tmp->unit) {
10407			peer = tmp;
10408			break;
10409		}
10410	}
10411
10412	/*
10413	 * "Claim" the ASIC for initialization if it hasn't been
10414	 " "claimed" yet.
10415	 */
10416	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10417		dd->flags |= HFI1_DO_INIT_ASIC;
10418	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10419}
10420
10421/**
10422 * Allocate and initialize the device structure for the hfi.
10423 * @dev: the pci_dev for hfi1_ib device
10424 * @ent: pci_device_id struct for this dev
10425 *
10426 * Also allocates, initializes, and returns the devdata struct for this
10427 * device instance
10428 *
10429 * This is global, and is called directly at init to set up the
10430 * chip-specific function pointers for later use.
10431 */
10432struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10433				  const struct pci_device_id *ent)
10434{
10435	struct hfi1_devdata *dd;
10436	struct hfi1_pportdata *ppd;
10437	u64 reg;
10438	int i, ret;
10439	static const char * const inames[] = { /* implementation names */
10440		"RTL silicon",
10441		"RTL VCS simulation",
10442		"RTL FPGA emulation",
10443		"Functional simulator"
10444	};
10445
10446	dd = hfi1_alloc_devdata(pdev,
10447		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10448	if (IS_ERR(dd))
10449		goto bail;
10450	ppd = dd->pport;
10451	for (i = 0; i < dd->num_pports; i++, ppd++) {
10452		int vl;
10453		/* init common fields */
10454		hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10455		/* DC supports 4 link widths */
10456		ppd->link_width_supported =
10457			OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10458			OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10459		ppd->link_width_downgrade_supported =
10460			ppd->link_width_supported;
10461		/* start out enabling only 4X */
10462		ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10463		ppd->link_width_downgrade_enabled =
10464					ppd->link_width_downgrade_supported;
10465		/* link width active is 0 when link is down */
10466		/* link width downgrade active is 0 when link is down */
10467
10468		if (num_vls < HFI1_MIN_VLS_SUPPORTED
10469			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
10470			hfi1_early_err(&pdev->dev,
10471				       "Invalid num_vls %u, using %u VLs\n",
10472				    num_vls, HFI1_MAX_VLS_SUPPORTED);
10473			num_vls = HFI1_MAX_VLS_SUPPORTED;
10474		}
10475		ppd->vls_supported = num_vls;
10476		ppd->vls_operational = ppd->vls_supported;
10477		/* Set the default MTU. */
10478		for (vl = 0; vl < num_vls; vl++)
10479			dd->vld[vl].mtu = hfi1_max_mtu;
10480		dd->vld[15].mtu = MAX_MAD_PACKET;
10481		/*
10482		 * Set the initial values to reasonable default, will be set
10483		 * for real when link is up.
10484		 */
10485		ppd->lstate = IB_PORT_DOWN;
10486		ppd->overrun_threshold = 0x4;
10487		ppd->phy_error_threshold = 0xf;
10488		ppd->port_crc_mode_enabled = link_crc_mask;
10489		/* initialize supported LTP CRC mode */
10490		ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10491		/* initialize enabled LTP CRC mode */
10492		ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10493		/* start in offline */
10494		ppd->host_link_state = HLS_DN_OFFLINE;
10495		init_vl_arb_caches(ppd);
10496	}
10497
10498	dd->link_default = HLS_DN_POLL;
10499
10500	/*
10501	 * Do remaining PCIe setup and save PCIe values in dd.
10502	 * Any error printing is already done by the init code.
10503	 * On return, we have the chip mapped.
10504	 */
10505	ret = hfi1_pcie_ddinit(dd, pdev, ent);
10506	if (ret < 0)
10507		goto bail_free;
10508
10509	/* verify that reads actually work, save revision for reset check */
10510	dd->revision = read_csr(dd, CCE_REVISION);
10511	if (dd->revision == ~(u64)0) {
10512		dd_dev_err(dd, "cannot read chip CSRs\n");
10513		ret = -EINVAL;
10514		goto bail_cleanup;
10515	}
10516	dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10517			& CCE_REVISION_CHIP_REV_MAJOR_MASK;
10518	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10519			& CCE_REVISION_CHIP_REV_MINOR_MASK;
10520
10521	/* obtain the hardware ID - NOT related to unit, which is a
10522	   software enumeration */
10523	reg = read_csr(dd, CCE_REVISION2);
10524	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10525					& CCE_REVISION2_HFI_ID_MASK;
10526	/* the variable size will remove unwanted bits */
10527	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10528	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10529	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10530		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10531		(int)dd->irev);
10532
10533	/* speeds the hardware can support */
10534	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10535	/* speeds allowed to run at */
10536	dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10537	/* give a reasonable active value, will be set on link up */
10538	dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10539
10540	dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10541	dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10542	dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10543	dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10544	dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10545	/* fix up link widths for emulation _p */
10546	ppd = dd->pport;
10547	if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10548		ppd->link_width_supported =
10549			ppd->link_width_enabled =
10550			ppd->link_width_downgrade_supported =
10551			ppd->link_width_downgrade_enabled =
10552				OPA_LINK_WIDTH_1X;
10553	}
10554	/* insure num_vls isn't larger than number of sdma engines */
10555	if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10556		dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10557				num_vls, HFI1_MAX_VLS_SUPPORTED);
10558		ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10559		ppd->vls_operational = ppd->vls_supported;
10560	}
10561
10562	/*
10563	 * Convert the ns parameter to the 64 * cclocks used in the CSR.
10564	 * Limit the max if larger than the field holds.  If timeout is
10565	 * non-zero, then the calculated field will be at least 1.
10566	 *
10567	 * Must be after icode is set up - the cclock rate depends
10568	 * on knowing the hardware being used.
10569	 */
10570	dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10571	if (dd->rcv_intr_timeout_csr >
10572			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10573		dd->rcv_intr_timeout_csr =
10574			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10575	else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10576		dd->rcv_intr_timeout_csr = 1;
10577
10578	/* needs to be done before we look for the peer device */
10579	read_guid(dd);
10580
10581	/* should this device init the ASIC block? */
10582	asic_should_init(dd);
10583
10584	/* obtain chip sizes, reset chip CSRs */
10585	init_chip(dd);
10586
10587	/* read in the PCIe link speed information */
10588	ret = pcie_speeds(dd);
10589	if (ret)
10590		goto bail_cleanup;
10591
10592	/* read in firmware */
10593	ret = hfi1_firmware_init(dd);
10594	if (ret)
10595		goto bail_cleanup;
10596
10597	/*
10598	 * In general, the PCIe Gen3 transition must occur after the
10599	 * chip has been idled (so it won't initiate any PCIe transactions
10600	 * e.g. an interrupt) and before the driver changes any registers
10601	 * (the transition will reset the registers).
10602	 *
10603	 * In particular, place this call after:
10604	 * - init_chip()     - the chip will not initiate any PCIe transactions
10605	 * - pcie_speeds()   - reads the current link speed
10606	 * - hfi1_firmware_init() - the needed firmware is ready to be
10607	 *			    downloaded
10608	 */
10609	ret = do_pcie_gen3_transition(dd);
10610	if (ret)
10611		goto bail_cleanup;
10612
10613	/* start setting dd values and adjusting CSRs */
10614	init_early_variables(dd);
10615
10616	parse_platform_config(dd);
10617
10618	/* add board names as they are defined */
10619	dd->boardname = kmalloc(64, GFP_KERNEL);
10620	if (!dd->boardname)
10621		goto bail_cleanup;
10622	snprintf(dd->boardname, 64, "Board ID 0x%llx",
10623		 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10624		    & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10625
10626	snprintf(dd->boardversion, BOARD_VERS_MAX,
10627		 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10628		 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10629		 dd->boardname,
10630		 (u32)dd->majrev,
10631		 (u32)dd->minrev,
10632		 (dd->revision >> CCE_REVISION_SW_SHIFT)
10633		    & CCE_REVISION_SW_MASK);
10634
10635	ret = set_up_context_variables(dd);
10636	if (ret)
10637		goto bail_cleanup;
10638
10639	/* set initial RXE CSRs */
10640	init_rxe(dd);
10641	/* set initial TXE CSRs */
10642	init_txe(dd);
10643	/* set initial non-RXE, non-TXE CSRs */
10644	init_other(dd);
10645	/* set up KDETH QP prefix in both RX and TX CSRs */
10646	init_kdeth_qp(dd);
10647
10648	/* send contexts must be set up before receive contexts */
10649	ret = init_send_contexts(dd);
10650	if (ret)
10651		goto bail_cleanup;
10652
10653	ret = hfi1_create_ctxts(dd);
10654	if (ret)
10655		goto bail_cleanup;
10656
10657	dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10658	/*
10659	 * rcd[0] is guaranteed to be valid by this point. Also, all
10660	 * context are using the same value, as per the module parameter.
10661	 */
10662	dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10663
10664	ret = init_pervl_scs(dd);
10665	if (ret)
10666		goto bail_cleanup;
10667
10668	/* sdma init */
10669	for (i = 0; i < dd->num_pports; ++i) {
10670		ret = sdma_init(dd, i);
10671		if (ret)
10672			goto bail_cleanup;
10673	}
10674
10675	/* use contexts created by hfi1_create_ctxts */
10676	ret = set_up_interrupts(dd);
10677	if (ret)
10678		goto bail_cleanup;
10679
10680	/* set up LCB access - must be after set_up_interrupts() */
10681	init_lcb_access(dd);
10682
10683	snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10684		 dd->base_guid & 0xFFFFFF);
10685
10686	dd->oui1 = dd->base_guid >> 56 & 0xFF;
10687	dd->oui2 = dd->base_guid >> 48 & 0xFF;
10688	dd->oui3 = dd->base_guid >> 40 & 0xFF;
10689
10690	ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10691	if (ret)
10692		goto bail_clear_intr;
10693	check_fabric_firmware_versions(dd);
10694
10695	thermal_init(dd);
10696
10697	ret = init_cntrs(dd);
10698	if (ret)
10699		goto bail_clear_intr;
10700
10701	ret = init_rcverr(dd);
10702	if (ret)
10703		goto bail_free_cntrs;
10704
10705	ret = eprom_init(dd);
10706	if (ret)
10707		goto bail_free_rcverr;
10708
10709	goto bail;
10710
10711bail_free_rcverr:
10712	free_rcverr(dd);
10713bail_free_cntrs:
10714	free_cntrs(dd);
10715bail_clear_intr:
10716	clean_up_interrupts(dd);
10717bail_cleanup:
10718	hfi1_pcie_ddcleanup(dd);
10719bail_free:
10720	hfi1_free_devdata(dd);
10721	dd = ERR_PTR(ret);
10722bail:
10723	return dd;
10724}
10725
10726static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10727			u32 dw_len)
10728{
10729	u32 delta_cycles;
10730	u32 current_egress_rate = ppd->current_egress_rate;
10731	/* rates here are in units of 10^6 bits/sec */
10732
10733	if (desired_egress_rate == -1)
10734		return 0; /* shouldn't happen */
10735
10736	if (desired_egress_rate >= current_egress_rate)
10737		return 0; /* we can't help go faster, only slower */
10738
10739	delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10740			egress_cycles(dw_len * 4, current_egress_rate);
10741
10742	return (u16)delta_cycles;
10743}
10744
10745
10746/**
10747 * create_pbc - build a pbc for transmission
10748 * @flags: special case flags or-ed in built pbc
10749 * @srate: static rate
10750 * @vl: vl
10751 * @dwlen: dword length (header words + data words + pbc words)
10752 *
10753 * Create a PBC with the given flags, rate, VL, and length.
10754 *
10755 * NOTE: The PBC created will not insert any HCRC - all callers but one are
10756 * for verbs, which does not use this PSM feature.  The lone other caller
10757 * is for the diagnostic interface which calls this if the user does not
10758 * supply their own PBC.
10759 */
10760u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10761	       u32 dw_len)
10762{
10763	u64 pbc, delay = 0;
10764
10765	if (unlikely(srate_mbs))
10766		delay = delay_cycles(ppd, srate_mbs, dw_len);
10767
10768	pbc = flags
10769		| (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10770		| ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10771		| (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10772		| (dw_len & PBC_LENGTH_DWS_MASK)
10773			<< PBC_LENGTH_DWS_SHIFT;
10774
10775	return pbc;
10776}
10777
10778#define SBUS_THERMAL    0x4f
10779#define SBUS_THERM_MONITOR_MODE 0x1
10780
10781#define THERM_FAILURE(dev, ret, reason) \
10782	dd_dev_err((dd),						\
10783		   "Thermal sensor initialization failed: %s (%d)\n",	\
10784		   (reason), (ret))
10785
10786/*
10787 * Initialize the Avago Thermal sensor.
10788 *
10789 * After initialization, enable polling of thermal sensor through
10790 * SBus interface. In order for this to work, the SBus Master
10791 * firmware has to be loaded due to the fact that the HW polling
10792 * logic uses SBus interrupts, which are not supported with
10793 * default firmware. Otherwise, no data will be returned through
10794 * the ASIC_STS_THERM CSR.
10795 */
10796static int thermal_init(struct hfi1_devdata *dd)
10797{
10798	int ret = 0;
10799
10800	if (dd->icode != ICODE_RTL_SILICON ||
10801	    !(dd->flags & HFI1_DO_INIT_ASIC))
10802		return ret;
10803
10804	acquire_hw_mutex(dd);
10805	dd_dev_info(dd, "Initializing thermal sensor\n");
10806
10807	/* Thermal Sensor Initialization */
10808	/*    Step 1: Reset the Thermal SBus Receiver */
10809	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10810				RESET_SBUS_RECEIVER, 0);
10811	if (ret) {
10812		THERM_FAILURE(dd, ret, "Bus Reset");
10813		goto done;
10814	}
10815	/*    Step 2: Set Reset bit in Thermal block */
10816	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10817				WRITE_SBUS_RECEIVER, 0x1);
10818	if (ret) {
10819		THERM_FAILURE(dd, ret, "Therm Block Reset");
10820		goto done;
10821	}
10822	/*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10823	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10824				WRITE_SBUS_RECEIVER, 0x32);
10825	if (ret) {
10826		THERM_FAILURE(dd, ret, "Write Clock Div");
10827		goto done;
10828	}
10829	/*    Step 4: Select temperature mode */
10830	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10831				WRITE_SBUS_RECEIVER,
10832				SBUS_THERM_MONITOR_MODE);
10833	if (ret) {
10834		THERM_FAILURE(dd, ret, "Write Mode Sel");
10835		goto done;
10836	}
10837	/*    Step 5: De-assert block reset and start conversion */
10838	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10839				WRITE_SBUS_RECEIVER, 0x2);
10840	if (ret) {
10841		THERM_FAILURE(dd, ret, "Write Reset Deassert");
10842		goto done;
10843	}
10844	/*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10845	msleep(22);
10846
10847	/* Enable polling of thermal readings */
10848	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10849done:
10850	release_hw_mutex(dd);
10851	return ret;
10852}
10853
10854static void handle_temp_err(struct hfi1_devdata *dd)
10855{
10856	struct hfi1_pportdata *ppd = &dd->pport[0];
10857	/*
10858	 * Thermal Critical Interrupt
10859	 * Put the device into forced freeze mode, take link down to
10860	 * offline, and put DC into reset.
10861	 */
10862	dd_dev_emerg(dd,
10863		     "Critical temperature reached! Forcing device into freeze mode!\n");
10864	dd->flags |= HFI1_FORCED_FREEZE;
10865	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10866	/*
10867	 * Shut DC down as much and as quickly as possible.
10868	 *
10869	 * Step 1: Take the link down to OFFLINE. This will cause the
10870	 *         8051 to put the Serdes in reset. However, we don't want to
10871	 *         go through the entire link state machine since we want to
10872	 *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10873	 *         but rather an attempt to save the chip.
10874	 *         Code below is almost the same as quiet_serdes() but avoids
10875	 *         all the extra work and the sleeps.
10876	 */
10877	ppd->driver_link_ready = 0;
10878	ppd->link_enabled = 0;
10879	set_physical_link_state(dd, PLS_OFFLINE |
10880				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10881	/*
10882	 * Step 2: Shutdown LCB and 8051
10883	 *         After shutdown, do not restore DC_CFG_RESET value.
10884	 */
10885	dc_shutdown(dd);
10886}
10887