1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50 
51 /*
52  * This file contains all of the code that is specific to the HFI chip
53  */
54 
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
59 
60 #include "hfi.h"
61 #include "trace.h"
62 #include "mad.h"
63 #include "pio.h"
64 #include "sdma.h"
65 #include "eprom.h"
66 
67 #define NUM_IB_PORTS 1
68 
69 uint kdeth_qp;
70 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72 
73 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74 module_param(num_vls, uint, S_IRUGO);
75 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76 
77 /*
78  * Default time to aggregate two 10K packets from the idle state
79  * (timer not running). The timer starts at the end of the first packet,
80  * so only the time for one 10K packet and header plus a bit extra is needed.
81  * 10 * 1024 + 64 header byte = 10304 byte
82  * 10304 byte / 12.5 GB/s = 824.32ns
83  */
84 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85 module_param(rcv_intr_timeout, uint, S_IRUGO);
86 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87 
88 uint rcv_intr_count = 16; /* same as qib */
89 module_param(rcv_intr_count, uint, S_IRUGO);
90 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91 
92 ushort link_crc_mask = SUPPORTED_CRCS;
93 module_param(link_crc_mask, ushort, S_IRUGO);
94 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95 
96 uint loopback;
97 module_param_named(loopback, loopback, uint, S_IRUGO);
98 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99 
100 /* Other driver tunables */
101 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102 static ushort crc_14b_sideband = 1;
103 static uint use_flr = 1;
104 uint quick_linkup; /* skip LNI */
105 
106 struct flag_table {
107 	u64 flag;	/* the flag */
108 	char *str;	/* description string */
109 	u16 extra;	/* extra information */
110 	u16 unused0;
111 	u32 unused1;
112 };
113 
114 /* str must be a string constant */
115 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
117 
118 /* Send Error Consequences */
119 #define SEC_WRITE_DROPPED	0x1
120 #define SEC_PACKET_DROPPED	0x2
121 #define SEC_SC_HALTED		0x4	/* per-context only */
122 #define SEC_SPC_FREEZE		0x8	/* per-HFI only */
123 
124 #define VL15CTXT                  1
125 #define MIN_KERNEL_KCTXTS         2
126 #define NUM_MAP_REGS             32
127 
128 /* Bit offset into the GUID which carries HFI id information */
129 #define GUID_HFI_INDEX_SHIFT     39
130 
131 /* extract the emulation revision */
132 #define emulator_rev(dd) ((dd)->irev >> 8)
133 /* parallel and serial emulation versions are 3 and 4 respectively */
134 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136 
137 /* RSM fields */
138 
139 /* packet type */
140 #define IB_PACKET_TYPE         2ull
141 #define QW_SHIFT               6ull
142 /* QPN[7..1] */
143 #define QPN_WIDTH              7ull
144 
145 /* LRH.BTH: QW 0, OFFSET 48 - for match */
146 #define LRH_BTH_QW             0ull
147 #define LRH_BTH_BIT_OFFSET     48ull
148 #define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
149 #define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150 #define LRH_BTH_SELECT
151 #define LRH_BTH_MASK           3ull
152 #define LRH_BTH_VALUE          2ull
153 
154 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155 #define LRH_SC_QW              0ull
156 #define LRH_SC_BIT_OFFSET      56ull
157 #define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
158 #define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159 #define LRH_SC_MASK            128ull
160 #define LRH_SC_VALUE           0ull
161 
162 /* SC[n..0] QW 0, OFFSET 60 - for select */
163 #define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
164 
165 /* QPN[m+n:1] QW 1, OFFSET 1 */
166 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
167 
168 /* defines to build power on SC2VL table */
169 #define SC2VL_VAL( \
170 	num, \
171 	sc0, sc0val, \
172 	sc1, sc1val, \
173 	sc2, sc2val, \
174 	sc3, sc3val, \
175 	sc4, sc4val, \
176 	sc5, sc5val, \
177 	sc6, sc6val, \
178 	sc7, sc7val) \
179 ( \
180 	((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181 	((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182 	((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183 	((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184 	((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185 	((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186 	((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187 	((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
188 )
189 
190 #define DC_SC_VL_VAL( \
191 	range, \
192 	e0, e0val, \
193 	e1, e1val, \
194 	e2, e2val, \
195 	e3, e3val, \
196 	e4, e4val, \
197 	e5, e5val, \
198 	e6, e6val, \
199 	e7, e7val, \
200 	e8, e8val, \
201 	e9, e9val, \
202 	e10, e10val, \
203 	e11, e11val, \
204 	e12, e12val, \
205 	e13, e13val, \
206 	e14, e14val, \
207 	e15, e15val) \
208 ( \
209 	((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210 	((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211 	((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212 	((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213 	((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214 	((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215 	((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216 	((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217 	((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218 	((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219 	((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220 	((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221 	((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222 	((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223 	((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224 	((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225 )
226 
227 /* all CceStatus sub-block freeze bits */
228 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229 			| CCE_STATUS_RXE_FROZE_SMASK \
230 			| CCE_STATUS_TXE_FROZE_SMASK \
231 			| CCE_STATUS_TXE_PIO_FROZE_SMASK)
232 /* all CceStatus sub-block TXE pause bits */
233 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234 			| CCE_STATUS_TXE_PAUSED_SMASK \
235 			| CCE_STATUS_SDMA_PAUSED_SMASK)
236 /* all CceStatus sub-block RXE pause bits */
237 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238 
239 /*
240  * CCE Error flags.
241  */
242 static struct flag_table cce_err_status_flags[] = {
243 /* 0*/	FLAG_ENTRY0("CceCsrParityErr",
244 		CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245 /* 1*/	FLAG_ENTRY0("CceCsrReadBadAddrErr",
246 		CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247 /* 2*/	FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248 		CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249 /* 3*/	FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250 		CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251 /* 4*/	FLAG_ENTRY0("CceTrgtAccessErr",
252 		CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253 /* 5*/	FLAG_ENTRY0("CceRspdDataParityErr",
254 		CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255 /* 6*/	FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256 		CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257 /* 7*/	FLAG_ENTRY0("CceCsrCfgBusParityErr",
258 		CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259 /* 8*/	FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260 		CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261 /* 9*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263 /*10*/	FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265 /*11*/	FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266 	    CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267 /*12*/	FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268 		CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269 /*13*/	FLAG_ENTRY0("PcicRetryMemCorErr",
270 		CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271 /*14*/	FLAG_ENTRY0("PcicRetryMemCorErr",
272 		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273 /*15*/	FLAG_ENTRY0("PcicPostHdQCorErr",
274 		CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275 /*16*/	FLAG_ENTRY0("PcicPostHdQCorErr",
276 		CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277 /*17*/	FLAG_ENTRY0("PcicPostHdQCorErr",
278 		CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279 /*18*/	FLAG_ENTRY0("PcicCplDatQCorErr",
280 		CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281 /*19*/	FLAG_ENTRY0("PcicNPostHQParityErr",
282 		CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283 /*20*/	FLAG_ENTRY0("PcicNPostDatQParityErr",
284 		CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285 /*21*/	FLAG_ENTRY0("PcicRetryMemUncErr",
286 		CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287 /*22*/	FLAG_ENTRY0("PcicRetrySotMemUncErr",
288 		CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289 /*23*/	FLAG_ENTRY0("PcicPostHdQUncErr",
290 		CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291 /*24*/	FLAG_ENTRY0("PcicPostDatQUncErr",
292 		CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293 /*25*/	FLAG_ENTRY0("PcicCplHdQUncErr",
294 		CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295 /*26*/	FLAG_ENTRY0("PcicCplDatQUncErr",
296 		CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297 /*27*/	FLAG_ENTRY0("PcicTransmitFrontParityErr",
298 		CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299 /*28*/	FLAG_ENTRY0("PcicTransmitBackParityErr",
300 		CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301 /*29*/	FLAG_ENTRY0("PcicReceiveParityErr",
302 		CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303 /*30*/	FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304 		CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305 /*31*/	FLAG_ENTRY0("LATriggered",
306 		CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307 /*32*/	FLAG_ENTRY0("CceSegReadBadAddrErr",
308 		CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309 /*33*/	FLAG_ENTRY0("CceSegWriteBadAddrErr",
310 		CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311 /*34*/	FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312 		CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313 /*35*/	FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314 		CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315 /*36*/	FLAG_ENTRY0("CceMsixTableCorErr",
316 		CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317 /*37*/	FLAG_ENTRY0("CceMsixTableUncErr",
318 		CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319 /*38*/	FLAG_ENTRY0("CceIntMapCorErr",
320 		CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321 /*39*/	FLAG_ENTRY0("CceIntMapUncErr",
322 		CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323 /*40*/	FLAG_ENTRY0("CceMsixCsrParityErr",
324 		CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325 /*41-63 reserved*/
326 };
327 
328 /*
329  * Misc Error flags
330  */
331 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332 static struct flag_table misc_err_status_flags[] = {
333 /* 0*/	FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334 /* 1*/	FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335 /* 2*/	FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336 /* 3*/	FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337 /* 4*/	FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338 /* 5*/	FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339 /* 6*/	FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340 /* 7*/	FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341 /* 8*/	FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342 /* 9*/	FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343 /*10*/	FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344 /*11*/	FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345 /*12*/	FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346 };
347 
348 /*
349  * TXE PIO Error flags and consequences
350  */
351 static struct flag_table pio_err_status_flags[] = {
352 /* 0*/	FLAG_ENTRY("PioWriteBadCtxt",
353 	SEC_WRITE_DROPPED,
354 	SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355 /* 1*/	FLAG_ENTRY("PioWriteAddrParity",
356 	SEC_SPC_FREEZE,
357 	SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358 /* 2*/	FLAG_ENTRY("PioCsrParity",
359 	SEC_SPC_FREEZE,
360 	SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361 /* 3*/	FLAG_ENTRY("PioSbMemFifo0",
362 	SEC_SPC_FREEZE,
363 	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364 /* 4*/	FLAG_ENTRY("PioSbMemFifo1",
365 	SEC_SPC_FREEZE,
366 	SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367 /* 5*/	FLAG_ENTRY("PioPccFifoParity",
368 	SEC_SPC_FREEZE,
369 	SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370 /* 6*/	FLAG_ENTRY("PioPecFifoParity",
371 	SEC_SPC_FREEZE,
372 	SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373 /* 7*/	FLAG_ENTRY("PioSbrdctlCrrelParity",
374 	SEC_SPC_FREEZE,
375 	SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376 /* 8*/	FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377 	SEC_SPC_FREEZE,
378 	SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379 /* 9*/	FLAG_ENTRY("PioPktEvictFifoParityErr",
380 	SEC_SPC_FREEZE,
381 	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382 /*10*/	FLAG_ENTRY("PioSmPktResetParity",
383 	SEC_SPC_FREEZE,
384 	SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385 /*11*/	FLAG_ENTRY("PioVlLenMemBank0Unc",
386 	SEC_SPC_FREEZE,
387 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388 /*12*/	FLAG_ENTRY("PioVlLenMemBank1Unc",
389 	SEC_SPC_FREEZE,
390 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391 /*13*/	FLAG_ENTRY("PioVlLenMemBank0Cor",
392 	0,
393 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394 /*14*/	FLAG_ENTRY("PioVlLenMemBank1Cor",
395 	0,
396 	SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397 /*15*/	FLAG_ENTRY("PioCreditRetFifoParity",
398 	SEC_SPC_FREEZE,
399 	SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400 /*16*/	FLAG_ENTRY("PioPpmcPblFifo",
401 	SEC_SPC_FREEZE,
402 	SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403 /*17*/	FLAG_ENTRY("PioInitSmIn",
404 	0,
405 	SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406 /*18*/	FLAG_ENTRY("PioPktEvictSmOrArbSm",
407 	SEC_SPC_FREEZE,
408 	SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409 /*19*/	FLAG_ENTRY("PioHostAddrMemUnc",
410 	SEC_SPC_FREEZE,
411 	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412 /*20*/	FLAG_ENTRY("PioHostAddrMemCor",
413 	0,
414 	SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415 /*21*/	FLAG_ENTRY("PioWriteDataParity",
416 	SEC_SPC_FREEZE,
417 	SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418 /*22*/	FLAG_ENTRY("PioStateMachine",
419 	SEC_SPC_FREEZE,
420 	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421 /*23*/	FLAG_ENTRY("PioWriteQwValidParity",
422 	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423 	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424 /*24*/	FLAG_ENTRY("PioBlockQwCountParity",
425 	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426 	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427 /*25*/	FLAG_ENTRY("PioVlfVlLenParity",
428 	SEC_SPC_FREEZE,
429 	SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430 /*26*/	FLAG_ENTRY("PioVlfSopParity",
431 	SEC_SPC_FREEZE,
432 	SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433 /*27*/	FLAG_ENTRY("PioVlFifoParity",
434 	SEC_SPC_FREEZE,
435 	SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436 /*28*/	FLAG_ENTRY("PioPpmcBqcMemParity",
437 	SEC_SPC_FREEZE,
438 	SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439 /*29*/	FLAG_ENTRY("PioPpmcSopLen",
440 	SEC_SPC_FREEZE,
441 	SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442 /*30-31 reserved*/
443 /*32*/	FLAG_ENTRY("PioCurrentFreeCntParity",
444 	SEC_SPC_FREEZE,
445 	SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446 /*33*/	FLAG_ENTRY("PioLastReturnedCntParity",
447 	SEC_SPC_FREEZE,
448 	SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449 /*34*/	FLAG_ENTRY("PioPccSopHeadParity",
450 	SEC_SPC_FREEZE,
451 	SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452 /*35*/	FLAG_ENTRY("PioPecSopHeadParityErr",
453 	SEC_SPC_FREEZE,
454 	SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455 /*36-63 reserved*/
456 };
457 
458 /* TXE PIO errors that cause an SPC freeze */
459 #define ALL_PIO_FREEZE_ERR \
460 	(SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461 	| SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462 	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463 	| SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464 	| SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465 	| SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466 	| SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467 	| SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468 	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469 	| SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470 	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471 	| SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472 	| SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473 	| SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474 	| SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475 	| SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476 	| SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477 	| SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478 	| SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479 	| SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480 	| SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481 	| SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482 	| SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483 	| SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484 	| SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485 	| SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486 	| SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487 	| SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488 	| SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489 
490 /*
491  * TXE SDMA Error flags
492  */
493 static struct flag_table sdma_err_status_flags[] = {
494 /* 0*/	FLAG_ENTRY0("SDmaRpyTagErr",
495 		SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496 /* 1*/	FLAG_ENTRY0("SDmaCsrParityErr",
497 		SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498 /* 2*/	FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499 		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500 /* 3*/	FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501 		SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502 /*04-63 reserved*/
503 };
504 
505 /* TXE SDMA errors that cause an SPC freeze */
506 #define ALL_SDMA_FREEZE_ERR  \
507 		(SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508 		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509 		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510 
511 /*
512  * TXE Egress Error flags
513  */
514 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515 static struct flag_table egress_err_status_flags[] = {
516 /* 0*/	FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517 /* 1*/	FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518 /* 2 reserved */
519 /* 3*/	FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520 		SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521 /* 4*/	FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522 /* 5*/	FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523 /* 6 reserved */
524 /* 7*/	FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525 		SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526 /* 8*/	FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527 		SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528 /* 9-10 reserved */
529 /*11*/	FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530 		SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531 /*12*/	FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532 /*13*/	FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533 /*14*/	FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534 /*15*/	FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535 /*16*/	FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536 		SEES(TX_SDMA0_DISALLOWED_PACKET)),
537 /*17*/	FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538 		SEES(TX_SDMA1_DISALLOWED_PACKET)),
539 /*18*/	FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540 		SEES(TX_SDMA2_DISALLOWED_PACKET)),
541 /*19*/	FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542 		SEES(TX_SDMA3_DISALLOWED_PACKET)),
543 /*20*/	FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544 		SEES(TX_SDMA4_DISALLOWED_PACKET)),
545 /*21*/	FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546 		SEES(TX_SDMA5_DISALLOWED_PACKET)),
547 /*22*/	FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548 		SEES(TX_SDMA6_DISALLOWED_PACKET)),
549 /*23*/	FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550 		SEES(TX_SDMA7_DISALLOWED_PACKET)),
551 /*24*/	FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552 		SEES(TX_SDMA8_DISALLOWED_PACKET)),
553 /*25*/	FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554 		SEES(TX_SDMA9_DISALLOWED_PACKET)),
555 /*26*/	FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556 		SEES(TX_SDMA10_DISALLOWED_PACKET)),
557 /*27*/	FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558 		SEES(TX_SDMA11_DISALLOWED_PACKET)),
559 /*28*/	FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560 		SEES(TX_SDMA12_DISALLOWED_PACKET)),
561 /*29*/	FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562 		SEES(TX_SDMA13_DISALLOWED_PACKET)),
563 /*30*/	FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564 		SEES(TX_SDMA14_DISALLOWED_PACKET)),
565 /*31*/	FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566 		SEES(TX_SDMA15_DISALLOWED_PACKET)),
567 /*32*/	FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568 		SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569 /*33*/	FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570 		SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571 /*34*/	FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572 		SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573 /*35*/	FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574 		SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575 /*36*/	FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576 		SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577 /*37*/	FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578 		SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579 /*38*/	FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580 		SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581 /*39*/	FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582 		SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583 /*40*/	FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584 		SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585 /*41*/	FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586 /*42*/	FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587 /*43*/	FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588 /*44*/	FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589 /*45*/	FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590 /*46*/	FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591 /*47*/	FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592 /*48*/	FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593 /*49*/	FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594 /*50*/	FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595 /*51*/	FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596 /*52*/	FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597 /*53*/	FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598 /*54*/	FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599 /*55*/	FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600 /*56*/	FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601 /*57*/	FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602 /*58*/	FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603 /*59*/	FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604 /*60*/	FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605 /*61*/	FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606 /*62*/	FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607 		SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608 /*63*/	FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609 		SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610 };
611 
612 /*
613  * TXE Egress Error Info flags
614  */
615 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616 static struct flag_table egress_err_info_flags[] = {
617 /* 0*/	FLAG_ENTRY0("Reserved", 0ull),
618 /* 1*/	FLAG_ENTRY0("VLErr", SEEI(VL)),
619 /* 2*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620 /* 3*/	FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 4*/	FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622 /* 5*/	FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623 /* 6*/	FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624 /* 7*/	FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625 /* 8*/	FLAG_ENTRY0("RawErr", SEEI(RAW)),
626 /* 9*/	FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627 /*10*/	FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628 /*11*/	FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629 /*12*/	FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630 /*13*/	FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631 /*14*/	FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632 /*15*/	FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633 /*16*/	FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634 /*17*/	FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635 /*18*/	FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636 /*19*/	FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637 /*20*/	FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638 /*21*/	FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639 };
640 
641 /* TXE Egress errors that cause an SPC freeze */
642 #define ALL_TXE_EGRESS_FREEZE_ERR \
643 	(SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644 	| SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645 	| SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646 	| SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647 	| SEES(TX_LAUNCH_CSR_PARITY) \
648 	| SEES(TX_SBRD_CTL_CSR_PARITY) \
649 	| SEES(TX_CONFIG_PARITY) \
650 	| SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651 	| SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652 	| SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653 	| SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654 	| SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655 	| SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656 	| SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657 	| SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658 	| SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659 	| SEES(TX_CREDIT_RETURN_PARITY))
660 
661 /*
662  * TXE Send error flags
663  */
664 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665 static struct flag_table send_err_status_flags[] = {
666 /* 0*/	FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667 /* 1*/	FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668 /* 2*/	FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669 };
670 
671 /*
672  * TXE Send Context Error flags and consequences
673  */
674 static struct flag_table sc_err_status_flags[] = {
675 /* 0*/	FLAG_ENTRY("InconsistentSop",
676 		SEC_PACKET_DROPPED | SEC_SC_HALTED,
677 		SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678 /* 1*/	FLAG_ENTRY("DisallowedPacket",
679 		SEC_PACKET_DROPPED | SEC_SC_HALTED,
680 		SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681 /* 2*/	FLAG_ENTRY("WriteCrossesBoundary",
682 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
683 		SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684 /* 3*/	FLAG_ENTRY("WriteOverflow",
685 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
686 		SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687 /* 4*/	FLAG_ENTRY("WriteOutOfBounds",
688 		SEC_WRITE_DROPPED | SEC_SC_HALTED,
689 		SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690 /* 5-63 reserved*/
691 };
692 
693 /*
694  * RXE Receive Error flags
695  */
696 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697 static struct flag_table rxe_err_status_flags[] = {
698 /* 0*/	FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699 /* 1*/	FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700 /* 2*/	FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701 /* 3*/	FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702 /* 4*/	FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703 /* 5*/	FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704 /* 6*/	FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705 /* 7*/	FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706 /* 8*/	FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707 /* 9*/	FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708 /*10*/	FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709 /*11*/	FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710 /*12*/	FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711 /*13*/	FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712 /*14*/	FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713 /*15*/	FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714 /*16*/	FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715 		RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716 /*17*/	FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717 /*18*/	FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718 /*19*/	FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719 		RXES(RBUF_BLOCK_LIST_READ_UNC)),
720 /*20*/	FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721 		RXES(RBUF_BLOCK_LIST_READ_COR)),
722 /*21*/	FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723 		RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724 /*22*/	FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725 		RXES(RBUF_CSR_QENT_CNT_PARITY)),
726 /*23*/	FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727 		RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728 /*24*/	FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729 		RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730 /*25*/	FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731 /*26*/	FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732 /*27*/	FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733 		RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734 /*28*/	FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735 /*29*/	FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736 /*30*/	FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737 /*31*/	FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738 /*32*/	FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739 /*33*/	FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740 /*34*/	FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741 /*35*/	FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742 		RXES(RBUF_FL_INITDONE_PARITY)),
743 /*36*/	FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744 		RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745 /*37*/	FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746 /*38*/	FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747 /*39*/	FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748 /*40*/	FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749 		RXES(LOOKUP_DES_PART1_UNC_COR)),
750 /*41*/	FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751 		RXES(LOOKUP_DES_PART2_PARITY)),
752 /*42*/	FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753 /*43*/	FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754 /*44*/	FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755 /*45*/	FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756 /*46*/	FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757 /*47*/	FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758 /*48*/	FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759 /*49*/	FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760 /*50*/	FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761 /*51*/	FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762 /*52*/	FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763 /*53*/	FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764 /*54*/	FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765 /*55*/	FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766 /*56*/	FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767 /*57*/	FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768 /*58*/	FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769 /*59*/	FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770 /*60*/	FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771 /*61*/	FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772 /*62*/	FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773 /*63*/	FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774 };
775 
776 /* RXE errors that will trigger an SPC freeze */
777 #define ALL_RXE_FREEZE_ERR  \
778 	(RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779 	| RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780 	| RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781 	| RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782 	| RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785 	| RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786 	| RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787 	| RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788 	| RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789 	| RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790 	| RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791 	| RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792 	| RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793 	| RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794 	| RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795 	| RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796 	| RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797 	| RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798 	| RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799 	| RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800 	| RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801 	| RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802 	| RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803 	| RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806 	| RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807 	| RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808 	| RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809 	| RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810 	| RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813 	| RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814 	| RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815 	| RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816 	| RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817 	| RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818 	| RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819 	| RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820 	| RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821 	| RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822 
823 #define RXE_FREEZE_ABORT_MASK \
824 	(RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825 	RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826 	RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827 
828 /*
829  * DCC Error Flags
830  */
831 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832 static struct flag_table dcc_err_flags[] = {
833 	FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834 	FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835 	FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836 	FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837 	FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838 	FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839 	FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840 	FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841 	FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842 	FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843 	FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844 	FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845 	FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846 	FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847 	FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848 	FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849 	FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850 	FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851 	FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852 	FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853 	FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854 	FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855 	FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856 	FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857 	FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858 	FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859 	FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860 	FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861 	FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862 	FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863 	FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864 	FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865 	FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866 	FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867 	FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868 	FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869 	FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870 	FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871 	FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872 	FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873 	FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874 	FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875 	FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876 	FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877 	FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878 	FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879 };
880 
881 /*
882  * LCB error flags
883  */
884 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885 static struct flag_table lcb_err_flags[] = {
886 /* 0*/	FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887 /* 1*/	FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888 /* 2*/	FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889 /* 3*/	FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890 		LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891 /* 4*/	FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892 /* 5*/	FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893 /* 6*/	FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894 /* 7*/	FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895 /* 8*/	FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896 /* 9*/	FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897 /*10*/	FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898 /*11*/	FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899 /*12*/	FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900 /*13*/	FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901 		LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902 /*14*/	FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903 /*15*/	FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904 /*16*/	FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905 /*17*/	FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906 /*18*/	FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907 /*19*/	FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908 		LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909 /*20*/	FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910 /*21*/	FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911 /*22*/	FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912 /*23*/	FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913 /*24*/	FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914 /*25*/	FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915 /*26*/	FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916 		LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917 /*27*/	FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918 /*28*/	FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919 		LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920 /*29*/	FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921 		LCBE(REDUNDANT_FLIT_PARITY_ERR))
922 };
923 
924 /*
925  * DC8051 Error Flags
926  */
927 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928 static struct flag_table dc8051_err_flags[] = {
929 	FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930 	FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931 	FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932 	FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933 	FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934 	FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935 	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936 	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937 	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938 		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939 	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940 };
941 
942 /*
943  * DC8051 Information Error flags
944  *
945  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946  */
947 static struct flag_table dc8051_info_err_flags[] = {
948 	FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
949 	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
950 	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
951 	FLAG_ENTRY0("Serdes internal loopback failure",
952 					FAILED_SERDES_INTERNAL_LOOPBACK),
953 	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
954 	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
955 	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
956 	FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
957 	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
958 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960 	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
961 };
962 
963 /*
964  * DC8051 Information Host Information flags
965  *
966  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967  */
968 static struct flag_table dc8051_info_host_msg_flags[] = {
969 	FLAG_ENTRY0("Host request done", 0x0001),
970 	FLAG_ENTRY0("BC SMA message", 0x0002),
971 	FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972 	FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973 	FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974 	FLAG_ENTRY0("External device config request", 0x0020),
975 	FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976 	FLAG_ENTRY0("LinkUp achieved", 0x0080),
977 	FLAG_ENTRY0("Link going down", 0x0100),
978 };
979 
980 
981 static u32 encoded_size(u32 size);
982 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985 			       u8 *continuous);
986 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987 				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989 				      u8 *remote_tx_rate, u16 *link_widths);
990 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991 				     u8 *flag_bits, u16 *link_widths);
992 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993 				  u8 *device_rev);
994 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997 			    u8 *tx_polarity_inversion,
998 			    u8 *rx_polarity_inversion, u8 *max_rate);
999 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000 				unsigned int context, u64 err_status);
1001 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002 static void handle_dcc_err(struct hfi1_devdata *dd,
1003 			   unsigned int context, u64 err_status);
1004 static void handle_lcb_err(struct hfi1_devdata *dd,
1005 			   unsigned int context, u64 err_status);
1006 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void set_partition_keys(struct hfi1_pportdata *);
1015 static const char *link_state_name(u32 state);
1016 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017 					  u32 state);
1018 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019 			   u64 *out_data);
1020 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021 static int thermal_init(struct hfi1_devdata *dd);
1022 
1023 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024 				  int msecs);
1025 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026 static void handle_temp_err(struct hfi1_devdata *);
1027 static void dc_shutdown(struct hfi1_devdata *);
1028 static void dc_start(struct hfi1_devdata *);
1029 
1030 /*
1031  * Error interrupt table entry.  This is used as input to the interrupt
1032  * "clear down" routine used for all second tier error interrupt register.
1033  * Second tier interrupt registers have a single bit representing them
1034  * in the top-level CceIntStatus.
1035  */
1036 struct err_reg_info {
1037 	u32 status;		/* status CSR offset */
1038 	u32 clear;		/* clear CSR offset */
1039 	u32 mask;		/* mask CSR offset */
1040 	void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041 	const char *desc;
1042 };
1043 
1044 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047 
1048 /*
1049  * Helpers for building HFI and DC error interrupt table entries.  Different
1050  * helpers are needed because of inconsistent register names.
1051  */
1052 #define EE(reg, handler, desc) \
1053 	{ reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054 		handler, desc }
1055 #define DC_EE1(reg, handler, desc) \
1056 	{ reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057 #define DC_EE2(reg, handler, desc) \
1058 	{ reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059 
1060 /*
1061  * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062  * another register containing more information.
1063  */
1064 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065 /* 0*/	EE(CCE_ERR,		handle_cce_err,    "CceErr"),
1066 /* 1*/	EE(RCV_ERR,		handle_rxe_err,    "RxeErr"),
1067 /* 2*/	EE(MISC_ERR,	handle_misc_err,   "MiscErr"),
1068 /* 3*/	{ 0, 0, 0, NULL }, /* reserved */
1069 /* 4*/	EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070 /* 5*/	EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071 /* 6*/	EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072 /* 7*/	EE(SEND_ERR,	handle_txe_err,    "TxeErr")
1073 	/* the rest are reserved */
1074 };
1075 
1076 /*
1077  * Index into the Various section of the interrupt sources
1078  * corresponding to the Critical Temperature interrupt.
1079  */
1080 #define TCRIT_INT_SOURCE 4
1081 
1082 /*
1083  * SDMA error interrupt entry - refers to another register containing more
1084  * information.
1085  */
1086 static const struct err_reg_info sdma_eng_err =
1087 	EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088 
1089 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090 /* 0*/	{ 0, 0, 0, NULL }, /* PbcInt */
1091 /* 1*/	{ 0, 0, 0, NULL }, /* GpioAssertInt */
1092 /* 2*/	EE(ASIC_QSFP1,	handle_qsfp_int,	"QSFP1"),
1093 /* 3*/	EE(ASIC_QSFP2,	handle_qsfp_int,	"QSFP2"),
1094 /* 4*/	{ 0, 0, 0, NULL }, /* TCritInt */
1095 	/* rest are reserved */
1096 };
1097 
1098 /*
1099  * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100  * register can not be derived from the MTU value because 10K is not
1101  * a power of 2. Therefore, we need a constant. Everything else can
1102  * be calculated.
1103  */
1104 #define DCC_CFG_PORT_MTU_CAP_10240 7
1105 
1106 /*
1107  * Table of the DC grouping of error interrupts.  Each entry refers to
1108  * another register containing more information.
1109  */
1110 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111 /* 0*/	DC_EE1(DCC_ERR,		handle_dcc_err,	       "DCC Err"),
1112 /* 1*/	DC_EE2(DC_LCB_ERR,	handle_lcb_err,	       "LCB Err"),
1113 /* 2*/	DC_EE2(DC_DC8051_ERR,	handle_8051_interrupt, "DC8051 Interrupt"),
1114 /* 3*/	/* dc_lbm_int - special, see is_dc_int() */
1115 	/* the rest are reserved */
1116 };
1117 
1118 struct cntr_entry {
1119 	/*
1120 	 * counter name
1121 	 */
1122 	char *name;
1123 
1124 	/*
1125 	 * csr to read for name (if applicable)
1126 	 */
1127 	u64 csr;
1128 
1129 	/*
1130 	 * offset into dd or ppd to store the counter's value
1131 	 */
1132 	int offset;
1133 
1134 	/*
1135 	 * flags
1136 	 */
1137 	u8 flags;
1138 
1139 	/*
1140 	 * accessor for stat element, context either dd or ppd
1141 	 */
1142 	u64 (*rw_cntr)(const struct cntr_entry *,
1143 			       void *context,
1144 			       int vl,
1145 			       int mode,
1146 			       u64 data);
1147 };
1148 
1149 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151 
1152 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153 { \
1154 	name, \
1155 	csr, \
1156 	offset, \
1157 	flags, \
1158 	accessor \
1159 }
1160 
1161 /* 32bit RXE */
1162 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163 CNTR_ELEM(#name, \
1164 	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1165 	  0, flags | CNTR_32BIT, \
1166 	  port_access_u32_csr)
1167 
1168 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169 CNTR_ELEM(#name, \
1170 	  (counter * 8 + RCV_COUNTER_ARRAY32), \
1171 	  0, flags | CNTR_32BIT, \
1172 	  dev_access_u32_csr)
1173 
1174 /* 64bit RXE */
1175 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176 CNTR_ELEM(#name, \
1177 	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1178 	  0, flags, \
1179 	  port_access_u64_csr)
1180 
1181 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182 CNTR_ELEM(#name, \
1183 	  (counter * 8 + RCV_COUNTER_ARRAY64), \
1184 	  0, flags, \
1185 	  dev_access_u64_csr)
1186 
1187 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188 #define OVR_ELM(ctx) \
1189 CNTR_ELEM("RcvHdrOvr" #ctx, \
1190 	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191 	  0, CNTR_NORMAL, port_access_u64_csr)
1192 
1193 /* 32bit TXE */
1194 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195 CNTR_ELEM(#name, \
1196 	  (counter * 8 + SEND_COUNTER_ARRAY32), \
1197 	  0, flags | CNTR_32BIT, \
1198 	  port_access_u32_csr)
1199 
1200 /* 64bit TXE */
1201 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202 CNTR_ELEM(#name, \
1203 	  (counter * 8 + SEND_COUNTER_ARRAY64), \
1204 	  0, flags, \
1205 	  port_access_u64_csr)
1206 
1207 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208 CNTR_ELEM(#name,\
1209 	  counter * 8 + SEND_COUNTER_ARRAY64, \
1210 	  0, \
1211 	  flags, \
1212 	  dev_access_u64_csr)
1213 
1214 /* CCE */
1215 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216 CNTR_ELEM(#name, \
1217 	  (counter * 8 + CCE_COUNTER_ARRAY32), \
1218 	  0, flags | CNTR_32BIT, \
1219 	  dev_access_u32_csr)
1220 
1221 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222 CNTR_ELEM(#name, \
1223 	  (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224 	  0, flags | CNTR_32BIT, \
1225 	  dev_access_u32_csr)
1226 
1227 /* DC */
1228 #define DC_PERF_CNTR(name, counter, flags) \
1229 CNTR_ELEM(#name, \
1230 	  counter, \
1231 	  0, \
1232 	  flags, \
1233 	  dev_access_u64_csr)
1234 
1235 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1236 CNTR_ELEM(#name, \
1237 	  counter, \
1238 	  0, \
1239 	  flags, \
1240 	  dc_access_lcb_cntr)
1241 
1242 /* ibp counters */
1243 #define SW_IBP_CNTR(name, cntr) \
1244 CNTR_ELEM(#name, \
1245 	  0, \
1246 	  0, \
1247 	  CNTR_SYNTH, \
1248 	  access_ibp_##cntr)
1249 
read_csr(const struct hfi1_devdata * dd,u32 offset)1250 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251 {
1252 	u64 val;
1253 
1254 	if (dd->flags & HFI1_PRESENT) {
1255 		val = readq((void __iomem *)dd->kregbase + offset);
1256 		return val;
1257 	}
1258 	return -1;
1259 }
1260 
write_csr(const struct hfi1_devdata * dd,u32 offset,u64 value)1261 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262 {
1263 	if (dd->flags & HFI1_PRESENT)
1264 		writeq(value, (void __iomem *)dd->kregbase + offset);
1265 }
1266 
get_csr_addr(struct hfi1_devdata * dd,u32 offset)1267 void __iomem *get_csr_addr(
1268 	struct hfi1_devdata *dd,
1269 	u32 offset)
1270 {
1271 	return (void __iomem *)dd->kregbase + offset;
1272 }
1273 
read_write_csr(const struct hfi1_devdata * dd,u32 csr,int mode,u64 value)1274 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275 				 int mode, u64 value)
1276 {
1277 	u64 ret;
1278 
1279 
1280 	if (mode == CNTR_MODE_R) {
1281 		ret = read_csr(dd, csr);
1282 	} else if (mode == CNTR_MODE_W) {
1283 		write_csr(dd, csr, value);
1284 		ret = value;
1285 	} else {
1286 		dd_dev_err(dd, "Invalid cntr register access mode");
1287 		return 0;
1288 	}
1289 
1290 	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291 	return ret;
1292 }
1293 
1294 /* Dev Access */
dev_access_u32_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1295 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296 			    void *context, int vl, int mode, u64 data)
1297 {
1298 	struct hfi1_devdata *dd = context;
1299 
1300 	if (vl != CNTR_INVALID_VL)
1301 		return 0;
1302 	return read_write_csr(dd, entry->csr, mode, data);
1303 }
1304 
dev_access_u64_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1305 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306 			    int vl, int mode, u64 data)
1307 {
1308 	struct hfi1_devdata *dd = context;
1309 
1310 	u64 val = 0;
1311 	u64 csr = entry->csr;
1312 
1313 	if (entry->flags & CNTR_VL) {
1314 		if (vl == CNTR_INVALID_VL)
1315 			return 0;
1316 		csr += 8 * vl;
1317 	} else {
1318 		if (vl != CNTR_INVALID_VL)
1319 			return 0;
1320 	}
1321 
1322 	val = read_write_csr(dd, csr, mode, data);
1323 	return val;
1324 }
1325 
dc_access_lcb_cntr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1326 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327 			    int vl, int mode, u64 data)
1328 {
1329 	struct hfi1_devdata *dd = context;
1330 	u32 csr = entry->csr;
1331 	int ret = 0;
1332 
1333 	if (vl != CNTR_INVALID_VL)
1334 		return 0;
1335 	if (mode == CNTR_MODE_R)
1336 		ret = read_lcb_csr(dd, csr, &data);
1337 	else if (mode == CNTR_MODE_W)
1338 		ret = write_lcb_csr(dd, csr, data);
1339 
1340 	if (ret) {
1341 		dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342 		return 0;
1343 	}
1344 
1345 	hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346 	return data;
1347 }
1348 
1349 /* Port Access */
port_access_u32_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1350 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351 			     int vl, int mode, u64 data)
1352 {
1353 	struct hfi1_pportdata *ppd = context;
1354 
1355 	if (vl != CNTR_INVALID_VL)
1356 		return 0;
1357 	return read_write_csr(ppd->dd, entry->csr, mode, data);
1358 }
1359 
port_access_u64_csr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1360 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361 			     void *context, int vl, int mode, u64 data)
1362 {
1363 	struct hfi1_pportdata *ppd = context;
1364 	u64 val;
1365 	u64 csr = entry->csr;
1366 
1367 	if (entry->flags & CNTR_VL) {
1368 		if (vl == CNTR_INVALID_VL)
1369 			return 0;
1370 		csr += 8 * vl;
1371 	} else {
1372 		if (vl != CNTR_INVALID_VL)
1373 			return 0;
1374 	}
1375 	val = read_write_csr(ppd->dd, csr, mode, data);
1376 	return val;
1377 }
1378 
1379 /* Software defined */
read_write_sw(struct hfi1_devdata * dd,u64 * cntr,int mode,u64 data)1380 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381 				u64 data)
1382 {
1383 	u64 ret;
1384 
1385 	if (mode == CNTR_MODE_R) {
1386 		ret = *cntr;
1387 	} else if (mode == CNTR_MODE_W) {
1388 		*cntr = data;
1389 		ret = data;
1390 	} else {
1391 		dd_dev_err(dd, "Invalid cntr sw access mode");
1392 		return 0;
1393 	}
1394 
1395 	hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396 
1397 	return ret;
1398 }
1399 
access_sw_link_dn_cnt(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1400 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401 			       int vl, int mode, u64 data)
1402 {
1403 	struct hfi1_pportdata *ppd = context;
1404 
1405 	if (vl != CNTR_INVALID_VL)
1406 		return 0;
1407 	return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408 }
1409 
access_sw_link_up_cnt(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1410 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411 			       int vl, int mode, u64 data)
1412 {
1413 	struct hfi1_pportdata *ppd = context;
1414 
1415 	if (vl != CNTR_INVALID_VL)
1416 		return 0;
1417 	return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418 }
1419 
access_sw_xmit_discards(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1420 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421 				    void *context, int vl, int mode, u64 data)
1422 {
1423 	struct hfi1_pportdata *ppd = context;
1424 
1425 	if (vl != CNTR_INVALID_VL)
1426 		return 0;
1427 
1428 	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429 }
1430 
access_xmit_constraint_errs(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1431 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432 				     void *context, int vl, int mode, u64 data)
1433 {
1434 	struct hfi1_pportdata *ppd = context;
1435 
1436 	if (vl != CNTR_INVALID_VL)
1437 		return 0;
1438 
1439 	return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440 			     mode, data);
1441 }
1442 
access_rcv_constraint_errs(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1443 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444 				     void *context, int vl, int mode, u64 data)
1445 {
1446 	struct hfi1_pportdata *ppd = context;
1447 
1448 	if (vl != CNTR_INVALID_VL)
1449 		return 0;
1450 
1451 	return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452 			     mode, data);
1453 }
1454 
get_all_cpu_total(u64 __percpu * cntr)1455 u64 get_all_cpu_total(u64 __percpu *cntr)
1456 {
1457 	int cpu;
1458 	u64 counter = 0;
1459 
1460 	for_each_possible_cpu(cpu)
1461 		counter += *per_cpu_ptr(cntr, cpu);
1462 	return counter;
1463 }
1464 
read_write_cpu(struct hfi1_devdata * dd,u64 * z_val,u64 __percpu * cntr,int vl,int mode,u64 data)1465 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466 			  u64 __percpu *cntr,
1467 			  int vl, int mode, u64 data)
1468 {
1469 
1470 	u64 ret = 0;
1471 
1472 	if (vl != CNTR_INVALID_VL)
1473 		return 0;
1474 
1475 	if (mode == CNTR_MODE_R) {
1476 		ret = get_all_cpu_total(cntr) - *z_val;
1477 	} else if (mode == CNTR_MODE_W) {
1478 		/* A write can only zero the counter */
1479 		if (data == 0)
1480 			*z_val = get_all_cpu_total(cntr);
1481 		else
1482 			dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483 	} else {
1484 		dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485 		return 0;
1486 	}
1487 
1488 	return ret;
1489 }
1490 
access_sw_cpu_intr(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1491 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492 			      void *context, int vl, int mode, u64 data)
1493 {
1494 	struct hfi1_devdata *dd = context;
1495 
1496 	return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497 			      mode, data);
1498 }
1499 
access_sw_cpu_rcv_limit(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1500 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501 			      void *context, int vl, int mode, u64 data)
1502 {
1503 	struct hfi1_devdata *dd = context;
1504 
1505 	return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506 			      mode, data);
1507 }
1508 
access_sw_pio_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1509 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510 			      void *context, int vl, int mode, u64 data)
1511 {
1512 	struct hfi1_devdata *dd = context;
1513 
1514 	return dd->verbs_dev.n_piowait;
1515 }
1516 
access_sw_vtx_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1517 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518 			      void *context, int vl, int mode, u64 data)
1519 {
1520 	struct hfi1_devdata *dd = context;
1521 
1522 	return dd->verbs_dev.n_txwait;
1523 }
1524 
access_sw_kmem_wait(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1525 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526 			       void *context, int vl, int mode, u64 data)
1527 {
1528 	struct hfi1_devdata *dd = context;
1529 
1530 	return dd->verbs_dev.n_kmem_wait;
1531 }
1532 
access_sw_send_schedule(const struct cntr_entry * entry,void * context,int vl,int mode,u64 data)1533 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534 			       void *context, int vl, int mode, u64 data)
1535 {
1536 	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1537 
1538 	return dd->verbs_dev.n_send_schedule;
1539 }
1540 
1541 #define def_access_sw_cpu(cntr) \
1542 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,		      \
1543 			      void *context, int vl, int mode, u64 data)      \
1544 {									      \
1545 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1546 	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
1547 			      ppd->ibport_data.cntr, vl,		      \
1548 			      mode, data);				      \
1549 }
1550 
1551 def_access_sw_cpu(rc_acks);
1552 def_access_sw_cpu(rc_qacks);
1553 def_access_sw_cpu(rc_delayed_comp);
1554 
1555 #define def_access_ibp_counter(cntr) \
1556 static u64 access_ibp_##cntr(const struct cntr_entry *entry,		      \
1557 				void *context, int vl, int mode, u64 data)    \
1558 {									      \
1559 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
1560 									      \
1561 	if (vl != CNTR_INVALID_VL)					      \
1562 		return 0;						      \
1563 									      \
1564 	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
1565 			     mode, data);				      \
1566 }
1567 
1568 def_access_ibp_counter(loop_pkts);
1569 def_access_ibp_counter(rc_resends);
1570 def_access_ibp_counter(rnr_naks);
1571 def_access_ibp_counter(other_naks);
1572 def_access_ibp_counter(rc_timeouts);
1573 def_access_ibp_counter(pkt_drops);
1574 def_access_ibp_counter(dmawait);
1575 def_access_ibp_counter(rc_seqnak);
1576 def_access_ibp_counter(rc_dupreq);
1577 def_access_ibp_counter(rdma_seq);
1578 def_access_ibp_counter(unaligned);
1579 def_access_ibp_counter(seq_naks);
1580 
1581 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1584 			CNTR_NORMAL),
1585 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1586 			CNTR_NORMAL),
1587 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588 			RCV_TID_FLOW_GEN_MISMATCH_CNT,
1589 			CNTR_NORMAL),
1590 [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1591 			CNTR_NORMAL),
1592 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1593 			CNTR_NORMAL),
1594 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595 			RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597 			CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1599 			CNTR_NORMAL),
1600 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1601 			CNTR_NORMAL),
1602 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1603 			CNTR_NORMAL),
1604 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1605 			CNTR_NORMAL),
1606 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1607 			CNTR_NORMAL),
1608 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1609 			CNTR_NORMAL),
1610 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611 			CCE_RCV_URGENT_INT_CNT,	CNTR_NORMAL),
1612 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613 			CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1615 			      CNTR_SYNTH),
1616 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1618 				 CNTR_SYNTH),
1619 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1620 				  CNTR_SYNTH),
1621 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1622 				  CNTR_SYNTH),
1623 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624 				   DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626 				  DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1627 				  CNTR_SYNTH),
1628 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629 				DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1631 			       CNTR_SYNTH),
1632 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1633 			      CNTR_SYNTH),
1634 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1635 			       CNTR_SYNTH),
1636 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1637 				 CNTR_SYNTH),
1638 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1639 				CNTR_SYNTH),
1640 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1641 				CNTR_SYNTH),
1642 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1643 			       CNTR_SYNTH),
1644 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645 				 CNTR_SYNTH | CNTR_VL),
1646 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647 				CNTR_SYNTH | CNTR_VL),
1648 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650 				 CNTR_SYNTH | CNTR_VL),
1651 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653 				 CNTR_SYNTH | CNTR_VL),
1654 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1655 			      CNTR_SYNTH),
1656 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657 				 CNTR_SYNTH | CNTR_VL),
1658 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1659 				CNTR_SYNTH),
1660 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661 				   CNTR_SYNTH | CNTR_VL),
1662 [C_DC_TOTAL_CRC] =
1663 	DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1664 			 CNTR_SYNTH),
1665 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1666 				  CNTR_SYNTH),
1667 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1668 				  CNTR_SYNTH),
1669 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1670 				  CNTR_SYNTH),
1671 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1672 				  CNTR_SYNTH),
1673 [C_DC_CRC_MULT_LN] =
1674 	DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1675 			 CNTR_SYNTH),
1676 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1677 				    CNTR_SYNTH),
1678 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1679 				    CNTR_SYNTH),
1680 [C_DC_SEQ_CRC_CNT] =
1681 	DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1682 			 CNTR_SYNTH),
1683 [C_DC_ESC0_ONLY_CNT] =
1684 	DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1685 			 CNTR_SYNTH),
1686 [C_DC_ESC0_PLUS1_CNT] =
1687 	DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1688 			 CNTR_SYNTH),
1689 [C_DC_ESC0_PLUS2_CNT] =
1690 	DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1691 			 CNTR_SYNTH),
1692 [C_DC_REINIT_FROM_PEER_CNT] =
1693 	DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1694 			 CNTR_SYNTH),
1695 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1696 				  CNTR_SYNTH),
1697 [C_DC_MISC_FLG_CNT] =
1698 	DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1699 			 CNTR_SYNTH),
1700 [C_DC_PRF_GOOD_LTP_CNT] =
1701 	DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1703 	DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1704 			 CNTR_SYNTH),
1705 [C_DC_PRF_RX_FLIT_CNT] =
1706 	DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707 [C_DC_PRF_TX_FLIT_CNT] =
1708 	DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709 [C_DC_PRF_CLK_CNTR] =
1710 	DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712 	DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714 	DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1715 			 CNTR_SYNTH),
1716 [C_DC_PG_STS_TX_SBE_CNT] =
1717 	DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718 [C_DC_PG_STS_TX_MBE_CNT] =
1719 	DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1720 			 CNTR_SYNTH),
1721 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722 			    access_sw_cpu_intr),
1723 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724 			    access_sw_cpu_rcv_limit),
1725 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726 			    access_sw_vtx_wait),
1727 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728 			    access_sw_pio_wait),
1729 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730 			    access_sw_kmem_wait),
1731 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732 			    access_sw_send_schedule),
1733 };
1734 
1735 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1737 			CNTR_NORMAL),
1738 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1739 			CNTR_NORMAL),
1740 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1741 			CNTR_NORMAL),
1742 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1743 			CNTR_NORMAL),
1744 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1745 			CNTR_NORMAL),
1746 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1747 			CNTR_NORMAL),
1748 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1749 			CNTR_NORMAL),
1750 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754 			CNTR_SYNTH | CNTR_VL),
1755 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756 			CNTR_SYNTH | CNTR_VL),
1757 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758 			CNTR_SYNTH | CNTR_VL),
1759 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762 			access_sw_link_dn_cnt),
1763 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764 			access_sw_link_up_cnt),
1765 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766 			access_sw_xmit_discards),
1767 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768 			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769 			access_sw_xmit_discards),
1770 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771 			access_xmit_constraint_errs),
1772 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773 			access_rcv_constraint_errs),
1774 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787 			       access_sw_cpu_rc_acks),
1788 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789 			       access_sw_cpu_rc_qacks),
1790 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791 			       access_sw_cpu_rc_delayed_comp),
1792 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1872 };
1873 
1874 /* ======================================================================== */
1875 
1876 /* return true if this is chip revision revision a0 */
is_a0(struct hfi1_devdata * dd)1877 int is_a0(struct hfi1_devdata *dd)
1878 {
1879 	return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880 			& CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1881 }
1882 
1883 /* return true if this is chip revision revision a */
is_ax(struct hfi1_devdata * dd)1884 int is_ax(struct hfi1_devdata *dd)
1885 {
1886 	u8 chip_rev_minor =
1887 		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1889 	return (chip_rev_minor & 0xf0) == 0;
1890 }
1891 
1892 /* return true if this is chip revision revision b */
is_bx(struct hfi1_devdata * dd)1893 int is_bx(struct hfi1_devdata *dd)
1894 {
1895 	u8 chip_rev_minor =
1896 		dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
1898 	return !!(chip_rev_minor & 0x10);
1899 }
1900 
1901 /*
1902  * Append string s to buffer buf.  Arguments curp and len are the current
1903  * position and remaining length, respectively.
1904  *
1905  * return 0 on success, 1 on out of room
1906  */
append_str(char * buf,char ** curp,int * lenp,const char * s)1907 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1908 {
1909 	char *p = *curp;
1910 	int len = *lenp;
1911 	int result = 0; /* success */
1912 	char c;
1913 
1914 	/* add a comma, if first in the buffer */
1915 	if (p != buf) {
1916 		if (len == 0) {
1917 			result = 1; /* out of room */
1918 			goto done;
1919 		}
1920 		*p++ = ',';
1921 		len--;
1922 	}
1923 
1924 	/* copy the string */
1925 	while ((c = *s++) != 0) {
1926 		if (len == 0) {
1927 			result = 1; /* out of room */
1928 			goto done;
1929 		}
1930 		*p++ = c;
1931 		len--;
1932 	}
1933 
1934 done:
1935 	/* write return values */
1936 	*curp = p;
1937 	*lenp = len;
1938 
1939 	return result;
1940 }
1941 
1942 /*
1943  * Using the given flag table, print a comma separated string into
1944  * the buffer.  End in '*' if the buffer is too short.
1945  */
flag_string(char * buf,int buf_len,u64 flags,struct flag_table * table,int table_size)1946 static char *flag_string(char *buf, int buf_len, u64 flags,
1947 				struct flag_table *table, int table_size)
1948 {
1949 	char extra[32];
1950 	char *p = buf;
1951 	int len = buf_len;
1952 	int no_room = 0;
1953 	int i;
1954 
1955 	/* make sure there is at least 2 so we can form "*" */
1956 	if (len < 2)
1957 		return "";
1958 
1959 	len--;	/* leave room for a nul */
1960 	for (i = 0; i < table_size; i++) {
1961 		if (flags & table[i].flag) {
1962 			no_room = append_str(buf, &p, &len, table[i].str);
1963 			if (no_room)
1964 				break;
1965 			flags &= ~table[i].flag;
1966 		}
1967 	}
1968 
1969 	/* any undocumented bits left? */
1970 	if (!no_room && flags) {
1971 		snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972 		no_room = append_str(buf, &p, &len, extra);
1973 	}
1974 
1975 	/* add * if ran out of room */
1976 	if (no_room) {
1977 		/* may need to back up to add space for a '*' */
1978 		if (len == 0)
1979 			--p;
1980 		*p++ = '*';
1981 	}
1982 
1983 	/* add final nul - space already allocated above */
1984 	*p = 0;
1985 	return buf;
1986 }
1987 
1988 /* first 8 CCE error interrupt source names */
1989 static const char * const cce_misc_names[] = {
1990 	"CceErrInt",		/* 0 */
1991 	"RxeErrInt",		/* 1 */
1992 	"MiscErrInt",		/* 2 */
1993 	"Reserved3",		/* 3 */
1994 	"PioErrInt",		/* 4 */
1995 	"SDmaErrInt",		/* 5 */
1996 	"EgressErrInt",		/* 6 */
1997 	"TxeErrInt"		/* 7 */
1998 };
1999 
2000 /*
2001  * Return the miscellaneous error interrupt name.
2002  */
is_misc_err_name(char * buf,size_t bsize,unsigned int source)2003 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2004 {
2005 	if (source < ARRAY_SIZE(cce_misc_names))
2006 		strncpy(buf, cce_misc_names[source], bsize);
2007 	else
2008 		snprintf(buf,
2009 			bsize,
2010 			"Reserved%u",
2011 			source + IS_GENERAL_ERR_START);
2012 
2013 	return buf;
2014 }
2015 
2016 /*
2017  * Return the SDMA engine error interrupt name.
2018  */
is_sdma_eng_err_name(char * buf,size_t bsize,unsigned int source)2019 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2020 {
2021 	snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2022 	return buf;
2023 }
2024 
2025 /*
2026  * Return the send context error interrupt name.
2027  */
is_sendctxt_err_name(char * buf,size_t bsize,unsigned int source)2028 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2029 {
2030 	snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2031 	return buf;
2032 }
2033 
2034 static const char * const various_names[] = {
2035 	"PbcInt",
2036 	"GpioAssertInt",
2037 	"Qsfp1Int",
2038 	"Qsfp2Int",
2039 	"TCritInt"
2040 };
2041 
2042 /*
2043  * Return the various interrupt name.
2044  */
is_various_name(char * buf,size_t bsize,unsigned int source)2045 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2046 {
2047 	if (source < ARRAY_SIZE(various_names))
2048 		strncpy(buf, various_names[source], bsize);
2049 	else
2050 		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2051 	return buf;
2052 }
2053 
2054 /*
2055  * Return the DC interrupt name.
2056  */
is_dc_name(char * buf,size_t bsize,unsigned int source)2057 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2058 {
2059 	static const char * const dc_int_names[] = {
2060 		"common",
2061 		"lcb",
2062 		"8051",
2063 		"lbm"	/* local block merge */
2064 	};
2065 
2066 	if (source < ARRAY_SIZE(dc_int_names))
2067 		snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2068 	else
2069 		snprintf(buf, bsize, "DCInt%u", source);
2070 	return buf;
2071 }
2072 
2073 static const char * const sdma_int_names[] = {
2074 	"SDmaInt",
2075 	"SdmaIdleInt",
2076 	"SdmaProgressInt",
2077 };
2078 
2079 /*
2080  * Return the SDMA engine interrupt name.
2081  */
is_sdma_eng_name(char * buf,size_t bsize,unsigned int source)2082 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2083 {
2084 	/* what interrupt */
2085 	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2086 	/* which engine */
2087 	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2088 
2089 	if (likely(what < 3))
2090 		snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2091 	else
2092 		snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2093 	return buf;
2094 }
2095 
2096 /*
2097  * Return the receive available interrupt name.
2098  */
is_rcv_avail_name(char * buf,size_t bsize,unsigned int source)2099 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2100 {
2101 	snprintf(buf, bsize, "RcvAvailInt%u", source);
2102 	return buf;
2103 }
2104 
2105 /*
2106  * Return the receive urgent interrupt name.
2107  */
is_rcv_urgent_name(char * buf,size_t bsize,unsigned int source)2108 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2109 {
2110 	snprintf(buf, bsize, "RcvUrgentInt%u", source);
2111 	return buf;
2112 }
2113 
2114 /*
2115  * Return the send credit interrupt name.
2116  */
is_send_credit_name(char * buf,size_t bsize,unsigned int source)2117 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2118 {
2119 	snprintf(buf, bsize, "SendCreditInt%u", source);
2120 	return buf;
2121 }
2122 
2123 /*
2124  * Return the reserved interrupt name.
2125  */
is_reserved_name(char * buf,size_t bsize,unsigned int source)2126 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2127 {
2128 	snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2129 	return buf;
2130 }
2131 
cce_err_status_string(char * buf,int buf_len,u64 flags)2132 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2133 {
2134 	return flag_string(buf, buf_len, flags,
2135 			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2136 }
2137 
rxe_err_status_string(char * buf,int buf_len,u64 flags)2138 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2139 {
2140 	return flag_string(buf, buf_len, flags,
2141 			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2142 }
2143 
misc_err_status_string(char * buf,int buf_len,u64 flags)2144 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2145 {
2146 	return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147 			ARRAY_SIZE(misc_err_status_flags));
2148 }
2149 
pio_err_status_string(char * buf,int buf_len,u64 flags)2150 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2151 {
2152 	return flag_string(buf, buf_len, flags,
2153 			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2154 }
2155 
sdma_err_status_string(char * buf,int buf_len,u64 flags)2156 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2157 {
2158 	return flag_string(buf, buf_len, flags,
2159 			sdma_err_status_flags,
2160 			ARRAY_SIZE(sdma_err_status_flags));
2161 }
2162 
egress_err_status_string(char * buf,int buf_len,u64 flags)2163 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2164 {
2165 	return flag_string(buf, buf_len, flags,
2166 		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2167 }
2168 
egress_err_info_string(char * buf,int buf_len,u64 flags)2169 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2170 {
2171 	return flag_string(buf, buf_len, flags,
2172 		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2173 }
2174 
send_err_status_string(char * buf,int buf_len,u64 flags)2175 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2176 {
2177 	return flag_string(buf, buf_len, flags,
2178 			send_err_status_flags,
2179 			ARRAY_SIZE(send_err_status_flags));
2180 }
2181 
handle_cce_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2182 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2183 {
2184 	char buf[96];
2185 
2186 	/*
2187 	 * For most these errors, there is nothing that can be done except
2188 	 * report or record it.
2189 	 */
2190 	dd_dev_info(dd, "CCE Error: %s\n",
2191 		cce_err_status_string(buf, sizeof(buf), reg));
2192 
2193 	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2194 			&& is_a0(dd)
2195 			&& (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196 		/* this error requires a manual drop into SPC freeze mode */
2197 		/* then a fix up */
2198 		start_freeze_handling(dd->pport, FREEZE_SELF);
2199 	}
2200 }
2201 
2202 /*
2203  * Check counters for receive errors that do not have an interrupt
2204  * associated with them.
2205  */
2206 #define RCVERR_CHECK_TIME 10
update_rcverr_timer(unsigned long opaque)2207 static void update_rcverr_timer(unsigned long opaque)
2208 {
2209 	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210 	struct hfi1_pportdata *ppd = dd->pport;
2211 	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2212 
2213 	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214 		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216 		set_link_down_reason(ppd,
2217 		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218 			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2220 	}
2221 	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2222 
2223 	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224 }
2225 
init_rcverr(struct hfi1_devdata * dd)2226 static int init_rcverr(struct hfi1_devdata *dd)
2227 {
2228 	setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229 	/* Assume the hardware counter has been reset */
2230 	dd->rcv_ovfl_cnt = 0;
2231 	return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2232 }
2233 
free_rcverr(struct hfi1_devdata * dd)2234 static void free_rcverr(struct hfi1_devdata *dd)
2235 {
2236 	if (dd->rcverr_timer.data)
2237 		del_timer_sync(&dd->rcverr_timer);
2238 	dd->rcverr_timer.data = 0;
2239 }
2240 
handle_rxe_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2241 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2242 {
2243 	char buf[96];
2244 
2245 	dd_dev_info(dd, "Receive Error: %s\n",
2246 		rxe_err_status_string(buf, sizeof(buf), reg));
2247 
2248 	if (reg & ALL_RXE_FREEZE_ERR) {
2249 		int flags = 0;
2250 
2251 		/*
2252 		 * Freeze mode recovery is disabled for the errors
2253 		 * in RXE_FREEZE_ABORT_MASK
2254 		 */
2255 		if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256 			flags = FREEZE_ABORT;
2257 
2258 		start_freeze_handling(dd->pport, flags);
2259 	}
2260 }
2261 
handle_misc_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2262 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263 {
2264 	char buf[96];
2265 
2266 	dd_dev_info(dd, "Misc Error: %s",
2267 		misc_err_status_string(buf, sizeof(buf), reg));
2268 }
2269 
handle_pio_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2270 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2271 {
2272 	char buf[96];
2273 
2274 	dd_dev_info(dd, "PIO Error: %s\n",
2275 		pio_err_status_string(buf, sizeof(buf), reg));
2276 
2277 	if (reg & ALL_PIO_FREEZE_ERR)
2278 		start_freeze_handling(dd->pport, 0);
2279 }
2280 
handle_sdma_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2281 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2282 {
2283 	char buf[96];
2284 
2285 	dd_dev_info(dd, "SDMA Error: %s\n",
2286 		sdma_err_status_string(buf, sizeof(buf), reg));
2287 
2288 	if (reg & ALL_SDMA_FREEZE_ERR)
2289 		start_freeze_handling(dd->pport, 0);
2290 }
2291 
count_port_inactive(struct hfi1_devdata * dd)2292 static void count_port_inactive(struct hfi1_devdata *dd)
2293 {
2294 	struct hfi1_pportdata *ppd = dd->pport;
2295 
2296 	if (ppd->port_xmit_discards < ~(u64)0)
2297 		ppd->port_xmit_discards++;
2298 }
2299 
2300 /*
2301  * We have had a "disallowed packet" error during egress. Determine the
2302  * integrity check which failed, and update relevant error counter, etc.
2303  *
2304  * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305  * bit of state per integrity check, and so we can miss the reason for an
2306  * egress error if more than one packet fails the same integrity check
2307  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2308  */
handle_send_egress_err_info(struct hfi1_devdata * dd)2309 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2310 {
2311 	struct hfi1_pportdata *ppd = dd->pport;
2312 	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313 	u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2314 	char buf[96];
2315 
2316 	/* clear down all observed info as quickly as possible after read */
2317 	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2318 
2319 	dd_dev_info(dd,
2320 		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321 		info, egress_err_info_string(buf, sizeof(buf), info), src);
2322 
2323 	/* Eventually add other counters for each bit */
2324 
2325 	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326 		if (ppd->port_xmit_discards < ~(u64)0)
2327 			ppd->port_xmit_discards++;
2328 	}
2329 }
2330 
2331 /*
2332  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333  * register. Does it represent a 'port inactive' error?
2334  */
port_inactive_err(u64 posn)2335 static inline int port_inactive_err(u64 posn)
2336 {
2337 	return (posn >= SEES(TX_LINKDOWN) &&
2338 		posn <= SEES(TX_INCORRECT_LINK_STATE));
2339 }
2340 
2341 /*
2342  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343  * register. Does it represent a 'disallowed packet' error?
2344  */
disallowed_pkt_err(u64 posn)2345 static inline int disallowed_pkt_err(u64 posn)
2346 {
2347 	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348 		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2349 }
2350 
handle_egress_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2351 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2352 {
2353 	u64 reg_copy = reg, handled = 0;
2354 	char buf[96];
2355 
2356 	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357 		start_freeze_handling(dd->pport, 0);
2358 	if (is_a0(dd) && (reg &
2359 		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360 		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361 		start_freeze_handling(dd->pport, 0);
2362 
2363 	while (reg_copy) {
2364 		int posn = fls64(reg_copy);
2365 		/*
2366 		 * fls64() returns a 1-based offset, but we generally
2367 		 * want 0-based offsets.
2368 		 */
2369 		int shift = posn - 1;
2370 
2371 		if (port_inactive_err(shift)) {
2372 			count_port_inactive(dd);
2373 			handled |= (1ULL << shift);
2374 		} else if (disallowed_pkt_err(shift)) {
2375 			handle_send_egress_err_info(dd);
2376 			handled |= (1ULL << shift);
2377 		}
2378 		clear_bit(shift, (unsigned long *)&reg_copy);
2379 	}
2380 
2381 	reg &= ~handled;
2382 
2383 	if (reg)
2384 		dd_dev_info(dd, "Egress Error: %s\n",
2385 			egress_err_status_string(buf, sizeof(buf), reg));
2386 }
2387 
handle_txe_err(struct hfi1_devdata * dd,u32 unused,u64 reg)2388 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2389 {
2390 	char buf[96];
2391 
2392 	dd_dev_info(dd, "Send Error: %s\n",
2393 		send_err_status_string(buf, sizeof(buf), reg));
2394 
2395 }
2396 
2397 /*
2398  * The maximum number of times the error clear down will loop before
2399  * blocking a repeating error.  This value is arbitrary.
2400  */
2401 #define MAX_CLEAR_COUNT 20
2402 
2403 /*
2404  * Clear and handle an error register.  All error interrupts are funneled
2405  * through here to have a central location to correctly handle single-
2406  * or multi-shot errors.
2407  *
2408  * For non per-context registers, call this routine with a context value
2409  * of 0 so the per-context offset is zero.
2410  *
2411  * If the handler loops too many times, assume that something is wrong
2412  * and can't be fixed, so mask the error bits.
2413  */
interrupt_clear_down(struct hfi1_devdata * dd,u32 context,const struct err_reg_info * eri)2414 static void interrupt_clear_down(struct hfi1_devdata *dd,
2415 				 u32 context,
2416 				 const struct err_reg_info *eri)
2417 {
2418 	u64 reg;
2419 	u32 count;
2420 
2421 	/* read in a loop until no more errors are seen */
2422 	count = 0;
2423 	while (1) {
2424 		reg = read_kctxt_csr(dd, context, eri->status);
2425 		if (reg == 0)
2426 			break;
2427 		write_kctxt_csr(dd, context, eri->clear, reg);
2428 		if (likely(eri->handler))
2429 			eri->handler(dd, context, reg);
2430 		count++;
2431 		if (count > MAX_CLEAR_COUNT) {
2432 			u64 mask;
2433 
2434 			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2435 				eri->desc, reg);
2436 			/*
2437 			 * Read-modify-write so any other masked bits
2438 			 * remain masked.
2439 			 */
2440 			mask = read_kctxt_csr(dd, context, eri->mask);
2441 			mask &= ~reg;
2442 			write_kctxt_csr(dd, context, eri->mask, mask);
2443 			break;
2444 		}
2445 	}
2446 }
2447 
2448 /*
2449  * CCE block "misc" interrupt.  Source is < 16.
2450  */
is_misc_err_int(struct hfi1_devdata * dd,unsigned int source)2451 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2452 {
2453 	const struct err_reg_info *eri = &misc_errs[source];
2454 
2455 	if (eri->handler) {
2456 		interrupt_clear_down(dd, 0, eri);
2457 	} else {
2458 		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2459 			source);
2460 	}
2461 }
2462 
send_context_err_status_string(char * buf,int buf_len,u64 flags)2463 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2464 {
2465 	return flag_string(buf, buf_len, flags,
2466 			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2467 }
2468 
2469 /*
2470  * Send context error interrupt.  Source (hw_context) is < 160.
2471  *
2472  * All send context errors cause the send context to halt.  The normal
2473  * clear-down mechanism cannot be used because we cannot clear the
2474  * error bits until several other long-running items are done first.
2475  * This is OK because with the context halted, nothing else is going
2476  * to happen on it anyway.
2477  */
is_sendctxt_err_int(struct hfi1_devdata * dd,unsigned int hw_context)2478 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479 				unsigned int hw_context)
2480 {
2481 	struct send_context_info *sci;
2482 	struct send_context *sc;
2483 	char flags[96];
2484 	u64 status;
2485 	u32 sw_index;
2486 
2487 	sw_index = dd->hw_to_sw[hw_context];
2488 	if (sw_index >= dd->num_send_contexts) {
2489 		dd_dev_err(dd,
2490 			"out of range sw index %u for send context %u\n",
2491 			sw_index, hw_context);
2492 		return;
2493 	}
2494 	sci = &dd->send_contexts[sw_index];
2495 	sc = sci->sc;
2496 	if (!sc) {
2497 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498 			sw_index, hw_context);
2499 		return;
2500 	}
2501 
2502 	/* tell the software that a halt has begun */
2503 	sc_stop(sc, SCF_HALTED);
2504 
2505 	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2506 
2507 	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508 		send_context_err_status_string(flags, sizeof(flags), status));
2509 
2510 	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511 		handle_send_egress_err_info(dd);
2512 
2513 	/*
2514 	 * Automatically restart halted kernel contexts out of interrupt
2515 	 * context.  User contexts must ask the driver to restart the context.
2516 	 */
2517 	if (sc->type != SC_USER)
2518 		queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2519 }
2520 
handle_sdma_eng_err(struct hfi1_devdata * dd,unsigned int source,u64 status)2521 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522 				unsigned int source, u64 status)
2523 {
2524 	struct sdma_engine *sde;
2525 
2526 	sde = &dd->per_sdma[source];
2527 #ifdef CONFIG_SDMA_VERBOSITY
2528 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529 		   slashstrip(__FILE__), __LINE__, __func__);
2530 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531 		   sde->this_idx, source, (unsigned long long)status);
2532 #endif
2533 	sdma_engine_error(sde, status);
2534 }
2535 
2536 /*
2537  * CCE block SDMA error interrupt.  Source is < 16.
2538  */
is_sdma_eng_err_int(struct hfi1_devdata * dd,unsigned int source)2539 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2540 {
2541 #ifdef CONFIG_SDMA_VERBOSITY
2542 	struct sdma_engine *sde = &dd->per_sdma[source];
2543 
2544 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545 		   slashstrip(__FILE__), __LINE__, __func__);
2546 	dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2547 		   source);
2548 	sdma_dumpstate(sde);
2549 #endif
2550 	interrupt_clear_down(dd, source, &sdma_eng_err);
2551 }
2552 
2553 /*
2554  * CCE block "various" interrupt.  Source is < 8.
2555  */
is_various_int(struct hfi1_devdata * dd,unsigned int source)2556 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2557 {
2558 	const struct err_reg_info *eri = &various_err[source];
2559 
2560 	/*
2561 	 * TCritInt cannot go through interrupt_clear_down()
2562 	 * because it is not a second tier interrupt. The handler
2563 	 * should be called directly.
2564 	 */
2565 	if (source == TCRIT_INT_SOURCE)
2566 		handle_temp_err(dd);
2567 	else if (eri->handler)
2568 		interrupt_clear_down(dd, 0, eri);
2569 	else
2570 		dd_dev_info(dd,
2571 			"%s: Unimplemented/reserved interrupt %d\n",
2572 			__func__, source);
2573 }
2574 
handle_qsfp_int(struct hfi1_devdata * dd,u32 src_ctx,u64 reg)2575 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2576 {
2577 	/* source is always zero */
2578 	struct hfi1_pportdata *ppd = dd->pport;
2579 	unsigned long flags;
2580 	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2581 
2582 	if (reg & QSFP_HFI0_MODPRST_N) {
2583 
2584 		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2585 				__func__);
2586 
2587 		if (!qsfp_mod_present(ppd)) {
2588 			ppd->driver_link_ready = 0;
2589 			/*
2590 			 * Cable removed, reset all our information about the
2591 			 * cache and cable capabilities
2592 			 */
2593 
2594 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2595 			/*
2596 			 * We don't set cache_refresh_required here as we expect
2597 			 * an interrupt when a cable is inserted
2598 			 */
2599 			ppd->qsfp_info.cache_valid = 0;
2600 			ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2602 						flags);
2603 			write_csr(dd,
2604 					dd->hfi1_id ?
2605 						ASIC_QSFP2_INVERT :
2606 						ASIC_QSFP1_INVERT,
2607 				qsfp_int_mgmt);
2608 			if (ppd->host_link_state == HLS_DN_POLL) {
2609 				/*
2610 				 * The link is still in POLL. This means
2611 				 * that the normal link down processing
2612 				 * will not happen. We have to do it here
2613 				 * before turning the DC off.
2614 				 */
2615 				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2616 			}
2617 		} else {
2618 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619 			ppd->qsfp_info.cache_valid = 0;
2620 			ppd->qsfp_info.cache_refresh_required = 1;
2621 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2622 						flags);
2623 
2624 			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2625 			write_csr(dd,
2626 					dd->hfi1_id ?
2627 						ASIC_QSFP2_INVERT :
2628 						ASIC_QSFP1_INVERT,
2629 				qsfp_int_mgmt);
2630 		}
2631 	}
2632 
2633 	if (reg & QSFP_HFI0_INT_N) {
2634 
2635 		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2636 				__func__);
2637 		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638 		ppd->qsfp_info.check_interrupt_flags = 1;
2639 		ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640 		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2641 	}
2642 
2643 	/* Schedule the QSFP work only if there is a cable attached. */
2644 	if (qsfp_mod_present(ppd))
2645 		queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2646 }
2647 
request_host_lcb_access(struct hfi1_devdata * dd)2648 static int request_host_lcb_access(struct hfi1_devdata *dd)
2649 {
2650 	int ret;
2651 
2652 	ret = do_8051_command(dd, HCMD_MISC,
2653 		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2654 		NULL);
2655 	if (ret != HCMD_SUCCESS) {
2656 		dd_dev_err(dd, "%s: command failed with error %d\n",
2657 			__func__, ret);
2658 	}
2659 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2660 }
2661 
request_8051_lcb_access(struct hfi1_devdata * dd)2662 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2663 {
2664 	int ret;
2665 
2666 	ret = do_8051_command(dd, HCMD_MISC,
2667 		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2668 		NULL);
2669 	if (ret != HCMD_SUCCESS) {
2670 		dd_dev_err(dd, "%s: command failed with error %d\n",
2671 			__func__, ret);
2672 	}
2673 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2674 }
2675 
2676 /*
2677  * Set the LCB selector - allow host access.  The DCC selector always
2678  * points to the host.
2679  */
set_host_lcb_access(struct hfi1_devdata * dd)2680 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2681 {
2682 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683 				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684 				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2685 }
2686 
2687 /*
2688  * Clear the LCB selector - allow 8051 access.  The DCC selector always
2689  * points to the host.
2690  */
set_8051_lcb_access(struct hfi1_devdata * dd)2691 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2692 {
2693 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694 				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2695 }
2696 
2697 /*
2698  * Acquire LCB access from the 8051.  If the host already has access,
2699  * just increment a counter.  Otherwise, inform the 8051 that the
2700  * host is taking access.
2701  *
2702  * Returns:
2703  *	0 on success
2704  *	-EBUSY if the 8051 has control and cannot be disturbed
2705  *	-errno if unable to acquire access from the 8051
2706  */
acquire_lcb_access(struct hfi1_devdata * dd,int sleep_ok)2707 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2708 {
2709 	struct hfi1_pportdata *ppd = dd->pport;
2710 	int ret = 0;
2711 
2712 	/*
2713 	 * Use the host link state lock so the operation of this routine
2714 	 * { link state check, selector change, count increment } can occur
2715 	 * as a unit against a link state change.  Otherwise there is a
2716 	 * race between the state change and the count increment.
2717 	 */
2718 	if (sleep_ok) {
2719 		mutex_lock(&ppd->hls_lock);
2720 	} else {
2721 		while (!mutex_trylock(&ppd->hls_lock))
2722 			udelay(1);
2723 	}
2724 
2725 	/* this access is valid only when the link is up */
2726 	if ((ppd->host_link_state & HLS_UP) == 0) {
2727 		dd_dev_info(dd, "%s: link state %s not up\n",
2728 			__func__, link_state_name(ppd->host_link_state));
2729 		ret = -EBUSY;
2730 		goto done;
2731 	}
2732 
2733 	if (dd->lcb_access_count == 0) {
2734 		ret = request_host_lcb_access(dd);
2735 		if (ret) {
2736 			dd_dev_err(dd,
2737 				"%s: unable to acquire LCB access, err %d\n",
2738 				__func__, ret);
2739 			goto done;
2740 		}
2741 		set_host_lcb_access(dd);
2742 	}
2743 	dd->lcb_access_count++;
2744 done:
2745 	mutex_unlock(&ppd->hls_lock);
2746 	return ret;
2747 }
2748 
2749 /*
2750  * Release LCB access by decrementing the use count.  If the count is moving
2751  * from 1 to 0, inform 8051 that it has control back.
2752  *
2753  * Returns:
2754  *	0 on success
2755  *	-errno if unable to release access to the 8051
2756  */
release_lcb_access(struct hfi1_devdata * dd,int sleep_ok)2757 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2758 {
2759 	int ret = 0;
2760 
2761 	/*
2762 	 * Use the host link state lock because the acquire needed it.
2763 	 * Here, we only need to keep { selector change, count decrement }
2764 	 * as a unit.
2765 	 */
2766 	if (sleep_ok) {
2767 		mutex_lock(&dd->pport->hls_lock);
2768 	} else {
2769 		while (!mutex_trylock(&dd->pport->hls_lock))
2770 			udelay(1);
2771 	}
2772 
2773 	if (dd->lcb_access_count == 0) {
2774 		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2775 			__func__);
2776 		goto done;
2777 	}
2778 
2779 	if (dd->lcb_access_count == 1) {
2780 		set_8051_lcb_access(dd);
2781 		ret = request_8051_lcb_access(dd);
2782 		if (ret) {
2783 			dd_dev_err(dd,
2784 				"%s: unable to release LCB access, err %d\n",
2785 				__func__, ret);
2786 			/* restore host access if the grant didn't work */
2787 			set_host_lcb_access(dd);
2788 			goto done;
2789 		}
2790 	}
2791 	dd->lcb_access_count--;
2792 done:
2793 	mutex_unlock(&dd->pport->hls_lock);
2794 	return ret;
2795 }
2796 
2797 /*
2798  * Initialize LCB access variables and state.  Called during driver load,
2799  * after most of the initialization is finished.
2800  *
2801  * The DC default is LCB access on for the host.  The driver defaults to
2802  * leaving access to the 8051.  Assign access now - this constrains the call
2803  * to this routine to be after all LCB set-up is done.  In particular, after
2804  * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2805  */
init_lcb_access(struct hfi1_devdata * dd)2806 static void init_lcb_access(struct hfi1_devdata *dd)
2807 {
2808 	dd->lcb_access_count = 0;
2809 }
2810 
2811 /*
2812  * Write a response back to a 8051 request.
2813  */
hreq_response(struct hfi1_devdata * dd,u8 return_code,u16 rsp_data)2814 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2815 {
2816 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817 		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818 		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819 		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2820 }
2821 
2822 /*
2823  * Handle requests from the 8051.
2824  */
handle_8051_request(struct hfi1_devdata * dd)2825 static void handle_8051_request(struct hfi1_devdata *dd)
2826 {
2827 	u64 reg;
2828 	u16 data;
2829 	u8 type;
2830 
2831 	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832 	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833 		return;	/* no request */
2834 
2835 	/* zero out COMPLETED so the response is seen */
2836 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2837 
2838 	/* extract request details */
2839 	type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840 			& DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841 	data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842 			& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2843 
2844 	switch (type) {
2845 	case HREQ_LOAD_CONFIG:
2846 	case HREQ_SAVE_CONFIG:
2847 	case HREQ_READ_CONFIG:
2848 	case HREQ_SET_TX_EQ_ABS:
2849 	case HREQ_SET_TX_EQ_REL:
2850 	case HREQ_ENABLE:
2851 		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2852 			type);
2853 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2854 		break;
2855 
2856 	case HREQ_CONFIG_DONE:
2857 		hreq_response(dd, HREQ_SUCCESS, 0);
2858 		break;
2859 
2860 	case HREQ_INTERFACE_TEST:
2861 		hreq_response(dd, HREQ_SUCCESS, data);
2862 		break;
2863 
2864 	default:
2865 		dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2867 		break;
2868 	}
2869 }
2870 
write_global_credit(struct hfi1_devdata * dd,u8 vau,u16 total,u16 shared)2871 static void write_global_credit(struct hfi1_devdata *dd,
2872 				u8 vau, u16 total, u16 shared)
2873 {
2874 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2875 		((u64)total
2876 			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2877 		| ((u64)shared
2878 			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879 		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2880 }
2881 
2882 /*
2883  * Set up initial VL15 credits of the remote.  Assumes the rest of
2884  * the CM credit registers are zero from a previous global or credit reset .
2885  */
set_up_vl15(struct hfi1_devdata * dd,u8 vau,u16 vl15buf)2886 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2887 {
2888 	/* leave shared count at zero for both global and VL15 */
2889 	write_global_credit(dd, vau, vl15buf, 0);
2890 
2891 	/* We may need some credits for another VL when sending packets
2892 	 * with the snoop interface. Dividing it down the middle for VL15
2893 	 * and VL0 should suffice.
2894 	 */
2895 	if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896 		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897 		    << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898 		write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899 		    << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2900 	} else {
2901 		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902 			<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2903 	}
2904 }
2905 
2906 /*
2907  * Zero all credit details from the previous connection and
2908  * reset the CM manager's internal counters.
2909  */
reset_link_credits(struct hfi1_devdata * dd)2910 void reset_link_credits(struct hfi1_devdata *dd)
2911 {
2912 	int i;
2913 
2914 	/* remove all previous VL credit limits */
2915 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916 		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918 	write_global_credit(dd, 0, 0, 0);
2919 	/* reset the CM block */
2920 	pio_send_control(dd, PSC_CM_RESET);
2921 }
2922 
2923 /* convert a vCU to a CU */
vcu_to_cu(u8 vcu)2924 static u32 vcu_to_cu(u8 vcu)
2925 {
2926 	return 1 << vcu;
2927 }
2928 
2929 /* convert a CU to a vCU */
cu_to_vcu(u32 cu)2930 static u8 cu_to_vcu(u32 cu)
2931 {
2932 	return ilog2(cu);
2933 }
2934 
2935 /* convert a vAU to an AU */
vau_to_au(u8 vau)2936 static u32 vau_to_au(u8 vau)
2937 {
2938 	return 8 * (1 << vau);
2939 }
2940 
set_linkup_defaults(struct hfi1_pportdata * ppd)2941 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2942 {
2943 	ppd->sm_trap_qp = 0x0;
2944 	ppd->sa_qp = 0x1;
2945 }
2946 
2947 /*
2948  * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2949  */
lcb_shutdown(struct hfi1_devdata * dd,int abort)2950 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2951 {
2952 	u64 reg;
2953 
2954 	/* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955 	write_csr(dd, DC_LCB_CFG_RUN, 0);
2956 	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958 		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959 	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960 	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961 	reg = read_csr(dd, DCC_CFG_RESET);
2962 	write_csr(dd, DCC_CFG_RESET,
2963 		reg
2964 		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965 		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966 	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2967 	if (!abort) {
2968 		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2969 		write_csr(dd, DCC_CFG_RESET, reg);
2970 		write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2971 	}
2972 }
2973 
2974 /*
2975  * This routine should be called after the link has been transitioned to
2976  * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2977  * reset).
2978  *
2979  * The expectation is that the caller of this routine would have taken
2980  * care of properly transitioning the link into the correct state.
2981  */
dc_shutdown(struct hfi1_devdata * dd)2982 static void dc_shutdown(struct hfi1_devdata *dd)
2983 {
2984 	unsigned long flags;
2985 
2986 	spin_lock_irqsave(&dd->dc8051_lock, flags);
2987 	if (dd->dc_shutdown) {
2988 		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2989 		return;
2990 	}
2991 	dd->dc_shutdown = 1;
2992 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993 	/* Shutdown the LCB */
2994 	lcb_shutdown(dd, 1);
2995 	/* Going to OFFLINE would have causes the 8051 to put the
2996 	 * SerDes into reset already. Just need to shut down the 8051,
2997 	 * itself. */
2998 	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2999 }
3000 
3001 /* Calling this after the DC has been brought out of reset should not
3002  * do any damage. */
dc_start(struct hfi1_devdata * dd)3003 static void dc_start(struct hfi1_devdata *dd)
3004 {
3005 	unsigned long flags;
3006 	int ret;
3007 
3008 	spin_lock_irqsave(&dd->dc8051_lock, flags);
3009 	if (!dd->dc_shutdown)
3010 		goto done;
3011 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012 	/* Take the 8051 out of reset */
3013 	write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014 	/* Wait until 8051 is ready */
3015 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3016 	if (ret) {
3017 		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3018 			__func__);
3019 	}
3020 	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021 	write_csr(dd, DCC_CFG_RESET, 0x10);
3022 	/* lcb_shutdown() with abort=1 does not restore these */
3023 	write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024 	spin_lock_irqsave(&dd->dc8051_lock, flags);
3025 	dd->dc_shutdown = 0;
3026 done:
3027 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3028 }
3029 
3030 /*
3031  * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3032  */
adjust_lcb_for_fpga_serdes(struct hfi1_devdata * dd)3033 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3034 {
3035 	u64 rx_radr, tx_radr;
3036 	u32 version;
3037 
3038 	if (dd->icode != ICODE_FPGA_EMULATION)
3039 		return;
3040 
3041 	/*
3042 	 * These LCB defaults on emulator _s are good, nothing to do here:
3043 	 *	LCB_CFG_TX_FIFOS_RADR
3044 	 *	LCB_CFG_RX_FIFOS_RADR
3045 	 *	LCB_CFG_LN_DCLK
3046 	 *	LCB_CFG_IGNORE_LOST_RCLK
3047 	 */
3048 	if (is_emulator_s(dd))
3049 		return;
3050 	/* else this is _p */
3051 
3052 	version = emulator_rev(dd);
3053 	if (!is_a0(dd))
3054 		version = 0x2d;	/* all B0 use 0x2d or higher settings */
3055 
3056 	if (version <= 0x12) {
3057 		/* release 0x12 and below */
3058 
3059 		/*
3060 		 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061 		 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062 		 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3063 		 */
3064 		rx_radr =
3065 		      0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3068 		/*
3069 		 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070 		 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3071 		 */
3072 		tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073 	} else if (version <= 0x18) {
3074 		/* release 0x13 up to 0x18 */
3075 		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3076 		rx_radr =
3077 		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080 		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081 	} else if (version == 0x19) {
3082 		/* release 0x19 */
3083 		/* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3084 		rx_radr =
3085 		      0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087 		    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088 		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089 	} else if (version == 0x1a) {
3090 		/* release 0x1a */
3091 		/* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3092 		rx_radr =
3093 		      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095 		    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096 		tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097 		write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3098 	} else {
3099 		/* release 0x1b and higher */
3100 		/* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3101 		rx_radr =
3102 		      0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103 		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104 		    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105 		tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3106 	}
3107 
3108 	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109 	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110 	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111 		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3113 }
3114 
3115 /*
3116  * Handle a SMA idle message
3117  *
3118  * This is a work-queue function outside of the interrupt.
3119  */
handle_sma_message(struct work_struct * work)3120 void handle_sma_message(struct work_struct *work)
3121 {
3122 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3123 							sma_message_work);
3124 	struct hfi1_devdata *dd = ppd->dd;
3125 	u64 msg;
3126 	int ret;
3127 
3128 	/* msg is bytes 1-4 of the 40-bit idle message - the command code
3129 	   is stripped off */
3130 	ret = read_idle_sma(dd, &msg);
3131 	if (ret)
3132 		return;
3133 	dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3134 	/*
3135 	 * React to the SMA message.  Byte[1] (0 for us) is the command.
3136 	 */
3137 	switch (msg & 0xff) {
3138 	case SMA_IDLE_ARM:
3139 		/*
3140 		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3141 		 * State Transitions
3142 		 *
3143 		 * Only expected in INIT or ARMED, discard otherwise.
3144 		 */
3145 		if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146 			ppd->neighbor_normal = 1;
3147 		break;
3148 	case SMA_IDLE_ACTIVE:
3149 		/*
3150 		 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3151 		 * State Transitions
3152 		 *
3153 		 * Can activate the node.  Discard otherwise.
3154 		 */
3155 		if (ppd->host_link_state == HLS_UP_ARMED
3156 					&& ppd->is_active_optimize_enabled) {
3157 			ppd->neighbor_normal = 1;
3158 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
3159 			if (ret)
3160 				dd_dev_err(
3161 					dd,
3162 					"%s: received Active SMA idle message, couldn't set link to Active\n",
3163 					__func__);
3164 		}
3165 		break;
3166 	default:
3167 		dd_dev_err(dd,
3168 			"%s: received unexpected SMA idle message 0x%llx\n",
3169 			__func__, msg);
3170 		break;
3171 	}
3172 }
3173 
adjust_rcvctrl(struct hfi1_devdata * dd,u64 add,u64 clear)3174 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3175 {
3176 	u64 rcvctrl;
3177 	unsigned long flags;
3178 
3179 	spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180 	rcvctrl = read_csr(dd, RCV_CTRL);
3181 	rcvctrl |= add;
3182 	rcvctrl &= ~clear;
3183 	write_csr(dd, RCV_CTRL, rcvctrl);
3184 	spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3185 }
3186 
add_rcvctrl(struct hfi1_devdata * dd,u64 add)3187 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3188 {
3189 	adjust_rcvctrl(dd, add, 0);
3190 }
3191 
clear_rcvctrl(struct hfi1_devdata * dd,u64 clear)3192 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3193 {
3194 	adjust_rcvctrl(dd, 0, clear);
3195 }
3196 
3197 /*
3198  * Called from all interrupt handlers to start handling an SPC freeze.
3199  */
start_freeze_handling(struct hfi1_pportdata * ppd,int flags)3200 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3201 {
3202 	struct hfi1_devdata *dd = ppd->dd;
3203 	struct send_context *sc;
3204 	int i;
3205 
3206 	if (flags & FREEZE_SELF)
3207 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3208 
3209 	/* enter frozen mode */
3210 	dd->flags |= HFI1_FROZEN;
3211 
3212 	/* notify all SDMA engines that they are going into a freeze */
3213 	sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3214 
3215 	/* do halt pre-handling on all enabled send contexts */
3216 	for (i = 0; i < dd->num_send_contexts; i++) {
3217 		sc = dd->send_contexts[i].sc;
3218 		if (sc && (sc->flags & SCF_ENABLED))
3219 			sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3220 	}
3221 
3222 	/* Send context are frozen. Notify user space */
3223 	hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3224 
3225 	if (flags & FREEZE_ABORT) {
3226 		dd_dev_err(dd,
3227 			   "Aborted freeze recovery. Please REBOOT system\n");
3228 		return;
3229 	}
3230 	/* queue non-interrupt handler */
3231 	queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3232 }
3233 
3234 /*
3235  * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236  * depending on the "freeze" parameter.
3237  *
3238  * No need to return an error if it times out, our only option
3239  * is to proceed anyway.
3240  */
wait_for_freeze_status(struct hfi1_devdata * dd,int freeze)3241 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3242 {
3243 	unsigned long timeout;
3244 	u64 reg;
3245 
3246 	timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3247 	while (1) {
3248 		reg = read_csr(dd, CCE_STATUS);
3249 		if (freeze) {
3250 			/* waiting until all indicators are set */
3251 			if ((reg & ALL_FROZE) == ALL_FROZE)
3252 				return;	/* all done */
3253 		} else {
3254 			/* waiting until all indicators are clear */
3255 			if ((reg & ALL_FROZE) == 0)
3256 				return; /* all done */
3257 		}
3258 
3259 		if (time_after(jiffies, timeout)) {
3260 			dd_dev_err(dd,
3261 				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3262 				freeze ? "" : "un",
3263 				reg & ALL_FROZE,
3264 				freeze ? ALL_FROZE : 0ull);
3265 			return;
3266 		}
3267 		usleep_range(80, 120);
3268 	}
3269 }
3270 
3271 /*
3272  * Do all freeze handling for the RXE block.
3273  */
rxe_freeze(struct hfi1_devdata * dd)3274 static void rxe_freeze(struct hfi1_devdata *dd)
3275 {
3276 	int i;
3277 
3278 	/* disable port */
3279 	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3280 
3281 	/* disable all receive contexts */
3282 	for (i = 0; i < dd->num_rcv_contexts; i++)
3283 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3284 }
3285 
3286 /*
3287  * Unfreeze handling for the RXE block - kernel contexts only.
3288  * This will also enable the port.  User contexts will do unfreeze
3289  * handling on a per-context basis as they call into the driver.
3290  *
3291  */
rxe_kernel_unfreeze(struct hfi1_devdata * dd)3292 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3293 {
3294 	int i;
3295 
3296 	/* enable all kernel contexts */
3297 	for (i = 0; i < dd->n_krcv_queues; i++)
3298 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3299 
3300 	/* enable port */
3301 	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3302 }
3303 
3304 /*
3305  * Non-interrupt SPC freeze handling.
3306  *
3307  * This is a work-queue function outside of the triggering interrupt.
3308  */
handle_freeze(struct work_struct * work)3309 void handle_freeze(struct work_struct *work)
3310 {
3311 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3312 								freeze_work);
3313 	struct hfi1_devdata *dd = ppd->dd;
3314 
3315 	/* wait for freeze indicators on all affected blocks */
3316 	dd_dev_info(dd, "Entering SPC freeze\n");
3317 	wait_for_freeze_status(dd, 1);
3318 
3319 	/* SPC is now frozen */
3320 
3321 	/* do send PIO freeze steps */
3322 	pio_freeze(dd);
3323 
3324 	/* do send DMA freeze steps */
3325 	sdma_freeze(dd);
3326 
3327 	/* do send egress freeze steps - nothing to do */
3328 
3329 	/* do receive freeze steps */
3330 	rxe_freeze(dd);
3331 
3332 	/*
3333 	 * Unfreeze the hardware - clear the freeze, wait for each
3334 	 * block's frozen bit to clear, then clear the frozen flag.
3335 	 */
3336 	write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337 	wait_for_freeze_status(dd, 0);
3338 
3339 	if (is_a0(dd)) {
3340 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341 		wait_for_freeze_status(dd, 1);
3342 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343 		wait_for_freeze_status(dd, 0);
3344 	}
3345 
3346 	/* do send PIO unfreeze steps for kernel contexts */
3347 	pio_kernel_unfreeze(dd);
3348 
3349 	/* do send DMA unfreeze steps */
3350 	sdma_unfreeze(dd);
3351 
3352 	/* do send egress unfreeze steps - nothing to do */
3353 
3354 	/* do receive unfreeze steps for kernel contexts */
3355 	rxe_kernel_unfreeze(dd);
3356 
3357 	/*
3358 	 * The unfreeze procedure touches global device registers when
3359 	 * it disables and re-enables RXE. Mark the device unfrozen
3360 	 * after all that is done so other parts of the driver waiting
3361 	 * for the device to unfreeze don't do things out of order.
3362 	 *
3363 	 * The above implies that the meaning of HFI1_FROZEN flag is
3364 	 * "Device has gone into freeze mode and freeze mode handling
3365 	 * is still in progress."
3366 	 *
3367 	 * The flag will be removed when freeze mode processing has
3368 	 * completed.
3369 	 */
3370 	dd->flags &= ~HFI1_FROZEN;
3371 	wake_up(&dd->event_queue);
3372 
3373 	/* no longer frozen */
3374 	dd_dev_err(dd, "Exiting SPC freeze\n");
3375 }
3376 
3377 /*
3378  * Handle a link up interrupt from the 8051.
3379  *
3380  * This is a work-queue function outside of the interrupt.
3381  */
handle_link_up(struct work_struct * work)3382 void handle_link_up(struct work_struct *work)
3383 {
3384 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3385 								link_up_work);
3386 	set_link_state(ppd, HLS_UP_INIT);
3387 
3388 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389 	read_ltp_rtt(ppd->dd);
3390 	/*
3391 	 * OPA specifies that certain counters are cleared on a transition
3392 	 * to link up, so do that.
3393 	 */
3394 	clear_linkup_counters(ppd->dd);
3395 	/*
3396 	 * And (re)set link up default values.
3397 	 */
3398 	set_linkup_defaults(ppd);
3399 
3400 	/* enforce link speed enabled */
3401 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402 		/* oops - current speed is not enabled, bounce */
3403 		dd_dev_err(ppd->dd,
3404 			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405 			ppd->link_speed_active, ppd->link_speed_enabled);
3406 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407 			OPA_LINKDOWN_REASON_SPEED_POLICY);
3408 		set_link_state(ppd, HLS_DN_OFFLINE);
3409 		start_link(ppd);
3410 	}
3411 }
3412 
3413 /* Several pieces of LNI information were cached for SMA in ppd.
3414  * Reset these on link down */
reset_neighbor_info(struct hfi1_pportdata * ppd)3415 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3416 {
3417 	ppd->neighbor_guid = 0;
3418 	ppd->neighbor_port_number = 0;
3419 	ppd->neighbor_type = 0;
3420 	ppd->neighbor_fm_security = 0;
3421 }
3422 
3423 /*
3424  * Handle a link down interrupt from the 8051.
3425  *
3426  * This is a work-queue function outside of the interrupt.
3427  */
handle_link_down(struct work_struct * work)3428 void handle_link_down(struct work_struct *work)
3429 {
3430 	u8 lcl_reason, neigh_reason = 0;
3431 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3432 								link_down_work);
3433 
3434 	/* go offline first, then deal with reasons */
3435 	set_link_state(ppd, HLS_DN_OFFLINE);
3436 
3437 	lcl_reason = 0;
3438 	read_planned_down_reason_code(ppd->dd, &neigh_reason);
3439 
3440 	/*
3441 	 * If no reason, assume peer-initiated but missed
3442 	 * LinkGoingDown idle flits.
3443 	 */
3444 	if (neigh_reason == 0)
3445 		lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3446 
3447 	set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3448 
3449 	reset_neighbor_info(ppd);
3450 
3451 	/* disable the port */
3452 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3453 
3454 	/* If there is no cable attached, turn the DC off. Otherwise,
3455 	 * start the link bring up. */
3456 	if (!qsfp_mod_present(ppd))
3457 		dc_shutdown(ppd->dd);
3458 	else
3459 		start_link(ppd);
3460 }
3461 
handle_link_bounce(struct work_struct * work)3462 void handle_link_bounce(struct work_struct *work)
3463 {
3464 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3465 							link_bounce_work);
3466 
3467 	/*
3468 	 * Only do something if the link is currently up.
3469 	 */
3470 	if (ppd->host_link_state & HLS_UP) {
3471 		set_link_state(ppd, HLS_DN_OFFLINE);
3472 		start_link(ppd);
3473 	} else {
3474 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475 			__func__, link_state_name(ppd->host_link_state));
3476 	}
3477 }
3478 
3479 /*
3480  * Mask conversion: Capability exchange to Port LTP.  The capability
3481  * exchange has an implicit 16b CRC that is mandatory.
3482  */
cap_to_port_ltp(int cap)3483 static int cap_to_port_ltp(int cap)
3484 {
3485 	int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3486 
3487 	if (cap & CAP_CRC_14B)
3488 		port_ltp |= PORT_LTP_CRC_MODE_14;
3489 	if (cap & CAP_CRC_48B)
3490 		port_ltp |= PORT_LTP_CRC_MODE_48;
3491 	if (cap & CAP_CRC_12B_16B_PER_LANE)
3492 		port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3493 
3494 	return port_ltp;
3495 }
3496 
3497 /*
3498  * Convert an OPA Port LTP mask to capability mask
3499  */
port_ltp_to_cap(int port_ltp)3500 int port_ltp_to_cap(int port_ltp)
3501 {
3502 	int cap_mask = 0;
3503 
3504 	if (port_ltp & PORT_LTP_CRC_MODE_14)
3505 		cap_mask |= CAP_CRC_14B;
3506 	if (port_ltp & PORT_LTP_CRC_MODE_48)
3507 		cap_mask |= CAP_CRC_48B;
3508 	if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509 		cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3510 
3511 	return cap_mask;
3512 }
3513 
3514 /*
3515  * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3516  */
lcb_to_port_ltp(int lcb_crc)3517 static int lcb_to_port_ltp(int lcb_crc)
3518 {
3519 	int port_ltp = 0;
3520 
3521 	if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522 		port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523 	else if (lcb_crc == LCB_CRC_48B)
3524 		port_ltp = PORT_LTP_CRC_MODE_48;
3525 	else if (lcb_crc == LCB_CRC_14B)
3526 		port_ltp = PORT_LTP_CRC_MODE_14;
3527 	else
3528 		port_ltp = PORT_LTP_CRC_MODE_16;
3529 
3530 	return port_ltp;
3531 }
3532 
3533 /*
3534  * Our neighbor has indicated that we are allowed to act as a fabric
3535  * manager, so place the full management partition key in the second
3536  * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537  * that we should already have the limited management partition key in
3538  * array element 1, and also that the port is not yet up when
3539  * add_full_mgmt_pkey() is invoked.
3540  */
add_full_mgmt_pkey(struct hfi1_pportdata * ppd)3541 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3542 {
3543 	struct hfi1_devdata *dd = ppd->dd;
3544 
3545 	/* Sanity check - ppd->pkeys[2] should be 0 */
3546 	if (ppd->pkeys[2] != 0)
3547 		dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548 			   __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3551 }
3552 
3553 /*
3554  * Convert the given link width to the OPA link width bitmask.
3555  */
link_width_to_bits(struct hfi1_devdata * dd,u16 width)3556 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3557 {
3558 	switch (width) {
3559 	case 0:
3560 		/*
3561 		 * Simulator and quick linkup do not set the width.
3562 		 * Just set it to 4x without complaint.
3563 		 */
3564 		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565 			return OPA_LINK_WIDTH_4X;
3566 		return 0; /* no lanes up */
3567 	case 1: return OPA_LINK_WIDTH_1X;
3568 	case 2: return OPA_LINK_WIDTH_2X;
3569 	case 3: return OPA_LINK_WIDTH_3X;
3570 	default:
3571 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3572 			__func__, width);
3573 		/* fall through */
3574 	case 4: return OPA_LINK_WIDTH_4X;
3575 	}
3576 }
3577 
3578 /*
3579  * Do a population count on the bottom nibble.
3580  */
3581 static const u8 bit_counts[16] = {
3582 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3583 };
nibble_to_count(u8 nibble)3584 static inline u8 nibble_to_count(u8 nibble)
3585 {
3586 	return bit_counts[nibble & 0xf];
3587 }
3588 
3589 /*
3590  * Read the active lane information from the 8051 registers and return
3591  * their widths.
3592  *
3593  * Active lane information is found in these 8051 registers:
3594  *	enable_lane_tx
3595  *	enable_lane_rx
3596  */
get_link_widths(struct hfi1_devdata * dd,u16 * tx_width,u16 * rx_width)3597 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3598 			    u16 *rx_width)
3599 {
3600 	u16 tx, rx;
3601 	u8 enable_lane_rx;
3602 	u8 enable_lane_tx;
3603 	u8 tx_polarity_inversion;
3604 	u8 rx_polarity_inversion;
3605 	u8 max_rate;
3606 
3607 	/* read the active lanes */
3608 	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609 				&rx_polarity_inversion, &max_rate);
3610 	read_local_lni(dd, &enable_lane_rx);
3611 
3612 	/* convert to counts */
3613 	tx = nibble_to_count(enable_lane_tx);
3614 	rx = nibble_to_count(enable_lane_rx);
3615 
3616 	/*
3617 	 * Set link_speed_active here, overriding what was set in
3618 	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3619 	 * set the max_rate field in handle_verify_cap until v0.19.
3620 	 */
3621 	if ((dd->icode == ICODE_RTL_SILICON)
3622 				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623 		/* max_rate: 0 = 12.5G, 1 = 25G */
3624 		switch (max_rate) {
3625 		case 0:
3626 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3627 			break;
3628 		default:
3629 			dd_dev_err(dd,
3630 				"%s: unexpected max rate %d, using 25Gb\n",
3631 				__func__, (int)max_rate);
3632 			/* fall through */
3633 		case 1:
3634 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3635 			break;
3636 		}
3637 	}
3638 
3639 	dd_dev_info(dd,
3640 		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641 		enable_lane_tx, tx, enable_lane_rx, rx);
3642 	*tx_width = link_width_to_bits(dd, tx);
3643 	*rx_width = link_width_to_bits(dd, rx);
3644 }
3645 
3646 /*
3647  * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648  * Valid after the end of VerifyCap and during LinkUp.  Does not change
3649  * after link up.  I.e. look elsewhere for downgrade information.
3650  *
3651  * Bits are:
3652  *	+ bits [7:4] contain the number of active transmitters
3653  *	+ bits [3:0] contain the number of active receivers
3654  * These are numbers 1 through 4 and can be different values if the
3655  * link is asymmetric.
3656  *
3657  * verify_cap_local_fm_link_width[0] retains its original value.
3658  */
get_linkup_widths(struct hfi1_devdata * dd,u16 * tx_width,u16 * rx_width)3659 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3660 			      u16 *rx_width)
3661 {
3662 	u16 widths, tx, rx;
3663 	u8 misc_bits, local_flags;
3664 	u16 active_tx, active_rx;
3665 
3666 	read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3667 	tx = widths >> 12;
3668 	rx = (widths >> 8) & 0xf;
3669 
3670 	*tx_width = link_width_to_bits(dd, tx);
3671 	*rx_width = link_width_to_bits(dd, rx);
3672 
3673 	/* print the active widths */
3674 	get_link_widths(dd, &active_tx, &active_rx);
3675 }
3676 
3677 /*
3678  * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679  * hardware information when the link first comes up.
3680  *
3681  * The link width is not available until after VerifyCap.AllFramesReceived
3682  * (the trigger for handle_verify_cap), so this is outside that routine
3683  * and should be called when the 8051 signals linkup.
3684  */
get_linkup_link_widths(struct hfi1_pportdata * ppd)3685 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3686 {
3687 	u16 tx_width, rx_width;
3688 
3689 	/* get end-of-LNI link widths */
3690 	get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3691 
3692 	/* use tx_width as the link is supposed to be symmetric on link up */
3693 	ppd->link_width_active = tx_width;
3694 	/* link width downgrade active (LWD.A) starts out matching LW.A */
3695 	ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696 	ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697 	/* per OPA spec, on link up LWD.E resets to LWD.S */
3698 	ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699 	/* cache the active egress rate (units {10^6 bits/sec]) */
3700 	ppd->current_egress_rate = active_egress_rate(ppd);
3701 }
3702 
3703 /*
3704  * Handle a verify capabilities interrupt from the 8051.
3705  *
3706  * This is a work-queue function outside of the interrupt.
3707  */
handle_verify_cap(struct work_struct * work)3708 void handle_verify_cap(struct work_struct *work)
3709 {
3710 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3711 								link_vc_work);
3712 	struct hfi1_devdata *dd = ppd->dd;
3713 	u64 reg;
3714 	u8 power_management;
3715 	u8 continious;
3716 	u8 vcu;
3717 	u8 vau;
3718 	u8 z;
3719 	u16 vl15buf;
3720 	u16 link_widths;
3721 	u16 crc_mask;
3722 	u16 crc_val;
3723 	u16 device_id;
3724 	u16 active_tx, active_rx;
3725 	u8 partner_supported_crc;
3726 	u8 remote_tx_rate;
3727 	u8 device_rev;
3728 
3729 	set_link_state(ppd, HLS_VERIFY_CAP);
3730 
3731 	lcb_shutdown(dd, 0);
3732 	adjust_lcb_for_fpga_serdes(dd);
3733 
3734 	/*
3735 	 * These are now valid:
3736 	 *	remote VerifyCap fields in the general LNI config
3737 	 *	CSR DC8051_STS_REMOTE_GUID
3738 	 *	CSR DC8051_STS_REMOTE_NODE_TYPE
3739 	 *	CSR DC8051_STS_REMOTE_FM_SECURITY
3740 	 *	CSR DC8051_STS_REMOTE_PORT_NO
3741 	 */
3742 
3743 	read_vc_remote_phy(dd, &power_management, &continious);
3744 	read_vc_remote_fabric(
3745 		dd,
3746 		&vau,
3747 		&z,
3748 		&vcu,
3749 		&vl15buf,
3750 		&partner_supported_crc);
3751 	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752 	read_remote_device_id(dd, &device_id, &device_rev);
3753 	/*
3754 	 * And the 'MgmtAllowed' information, which is exchanged during
3755 	 * LNI, is also be available at this point.
3756 	 */
3757 	read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758 	/* print the active widths */
3759 	get_link_widths(dd, &active_tx, &active_rx);
3760 	dd_dev_info(dd,
3761 		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762 		(int)power_management, (int)continious);
3763 	dd_dev_info(dd,
3764 		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3765 		(int)vau,
3766 		(int)z,
3767 		(int)vcu,
3768 		(int)vl15buf,
3769 		(int)partner_supported_crc);
3770 	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771 		(u32)remote_tx_rate, (u32)link_widths);
3772 	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773 		(u32)device_id, (u32)device_rev);
3774 	/*
3775 	 * The peer vAU value just read is the peer receiver value.  HFI does
3776 	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
3777 	 * with Z=1 in the fabric capabilities sent to the peer.  The peer
3778 	 * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779 	 * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3780 	 * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781 	 * subject to the Z value exception.
3782 	 */
3783 	if (vau == 0)
3784 		vau = 1;
3785 	set_up_vl15(dd, vau, vl15buf);
3786 
3787 	/* set up the LCB CRC mode */
3788 	crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3789 
3790 	/* order is important: use the lowest bit in common */
3791 	if (crc_mask & CAP_CRC_14B)
3792 		crc_val = LCB_CRC_14B;
3793 	else if (crc_mask & CAP_CRC_48B)
3794 		crc_val = LCB_CRC_48B;
3795 	else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796 		crc_val = LCB_CRC_12B_16B_PER_LANE;
3797 	else
3798 		crc_val = LCB_CRC_16B;
3799 
3800 	dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801 	write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802 		  (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3803 
3804 	/* set (14b only) or clear sideband credit */
3805 	reg = read_csr(dd, SEND_CM_CTRL);
3806 	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807 		write_csr(dd, SEND_CM_CTRL,
3808 			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3809 	} else {
3810 		write_csr(dd, SEND_CM_CTRL,
3811 			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3812 	}
3813 
3814 	ppd->link_speed_active = 0;	/* invalid value */
3815 	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816 		/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817 		switch (remote_tx_rate) {
3818 		case 0:
3819 			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3820 			break;
3821 		case 1:
3822 			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823 			break;
3824 		}
3825 	} else {
3826 		/* actual rate is highest bit of the ANDed rates */
3827 		u8 rate = remote_tx_rate & ppd->local_tx_rate;
3828 
3829 		if (rate & 2)
3830 			ppd->link_speed_active = OPA_LINK_SPEED_25G;
3831 		else if (rate & 1)
3832 			ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3833 	}
3834 	if (ppd->link_speed_active == 0) {
3835 		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836 			__func__, (int)remote_tx_rate);
3837 		ppd->link_speed_active = OPA_LINK_SPEED_25G;
3838 	}
3839 
3840 	/*
3841 	 * Cache the values of the supported, enabled, and active
3842 	 * LTP CRC modes to return in 'portinfo' queries. But the bit
3843 	 * flags that are returned in the portinfo query differ from
3844 	 * what's in the link_crc_mask, crc_sizes, and crc_val
3845 	 * variables. Convert these here.
3846 	 */
3847 	ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848 		/* supported crc modes */
3849 	ppd->port_ltp_crc_mode |=
3850 		cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851 		/* enabled crc modes */
3852 	ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853 		/* active crc mode */
3854 
3855 	/* set up the remote credit return table */
3856 	assign_remote_cm_au_table(dd, vcu);
3857 
3858 	/*
3859 	 * The LCB is reset on entry to handle_verify_cap(), so this must
3860 	 * be applied on every link up.
3861 	 *
3862 	 * Adjust LCB error kill enable to kill the link if
3863 	 * these RBUF errors are seen:
3864 	 *	REPLAY_BUF_MBE_SMASK
3865 	 *	FLIT_INPUT_BUF_MBE_SMASK
3866 	 */
3867 	if (is_a0(dd)) {			/* fixed in B0 */
3868 		reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869 		reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870 			| DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871 		write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3872 	}
3873 
3874 	/* pull LCB fifos out of reset - all fifo clocks must be stable */
3875 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3876 
3877 	/* give 8051 access to the LCB CSRs */
3878 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879 	set_8051_lcb_access(dd);
3880 
3881 	ppd->neighbor_guid =
3882 		read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883 	ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884 					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885 	ppd->neighbor_type =
3886 		read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887 		DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888 	ppd->neighbor_fm_security =
3889 		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890 		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3891 	dd_dev_info(dd,
3892 		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893 		ppd->neighbor_guid, ppd->neighbor_type,
3894 		ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895 	if (ppd->mgmt_allowed)
3896 		add_full_mgmt_pkey(ppd);
3897 
3898 	/* tell the 8051 to go to LinkUp */
3899 	set_link_state(ppd, HLS_GOING_UP);
3900 }
3901 
3902 /*
3903  * Apply the link width downgrade enabled policy against the current active
3904  * link widths.
3905  *
3906  * Called when the enabled policy changes or the active link widths change.
3907  */
apply_link_downgrade_policy(struct hfi1_pportdata * ppd,int refresh_widths)3908 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3909 {
3910 	int skip = 1;
3911 	int do_bounce = 0;
3912 	u16 lwde = ppd->link_width_downgrade_enabled;
3913 	u16 tx, rx;
3914 
3915 	mutex_lock(&ppd->hls_lock);
3916 	/* only apply if the link is up */
3917 	if (ppd->host_link_state & HLS_UP)
3918 		skip = 0;
3919 	mutex_unlock(&ppd->hls_lock);
3920 	if (skip)
3921 		return;
3922 
3923 	if (refresh_widths) {
3924 		get_link_widths(ppd->dd, &tx, &rx);
3925 		ppd->link_width_downgrade_tx_active = tx;
3926 		ppd->link_width_downgrade_rx_active = rx;
3927 	}
3928 
3929 	if (lwde == 0) {
3930 		/* downgrade is disabled */
3931 
3932 		/* bounce if not at starting active width */
3933 		if ((ppd->link_width_active !=
3934 					ppd->link_width_downgrade_tx_active)
3935 				|| (ppd->link_width_active !=
3936 					ppd->link_width_downgrade_rx_active)) {
3937 			dd_dev_err(ppd->dd,
3938 				"Link downgrade is disabled and link has downgraded, downing link\n");
3939 			dd_dev_err(ppd->dd,
3940 				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941 				ppd->link_width_active,
3942 				ppd->link_width_downgrade_tx_active,
3943 				ppd->link_width_downgrade_rx_active);
3944 			do_bounce = 1;
3945 		}
3946 	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947 		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948 		/* Tx or Rx is outside the enabled policy */
3949 		dd_dev_err(ppd->dd,
3950 			"Link is outside of downgrade allowed, downing link\n");
3951 		dd_dev_err(ppd->dd,
3952 			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3953 			lwde,
3954 			ppd->link_width_downgrade_tx_active,
3955 			ppd->link_width_downgrade_rx_active);
3956 		do_bounce = 1;
3957 	}
3958 
3959 	if (do_bounce) {
3960 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961 		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962 		set_link_state(ppd, HLS_DN_OFFLINE);
3963 		start_link(ppd);
3964 	}
3965 }
3966 
3967 /*
3968  * Handle a link downgrade interrupt from the 8051.
3969  *
3970  * This is a work-queue function outside of the interrupt.
3971  */
handle_link_downgrade(struct work_struct * work)3972 void handle_link_downgrade(struct work_struct *work)
3973 {
3974 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975 							link_downgrade_work);
3976 
3977 	dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978 	apply_link_downgrade_policy(ppd, 1);
3979 }
3980 
dcc_err_string(char * buf,int buf_len,u64 flags)3981 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3982 {
3983 	return flag_string(buf, buf_len, flags, dcc_err_flags,
3984 		ARRAY_SIZE(dcc_err_flags));
3985 }
3986 
lcb_err_string(char * buf,int buf_len,u64 flags)3987 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3988 {
3989 	return flag_string(buf, buf_len, flags, lcb_err_flags,
3990 		ARRAY_SIZE(lcb_err_flags));
3991 }
3992 
dc8051_err_string(char * buf,int buf_len,u64 flags)3993 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3994 {
3995 	return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996 		ARRAY_SIZE(dc8051_err_flags));
3997 }
3998 
dc8051_info_err_string(char * buf,int buf_len,u64 flags)3999 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4000 {
4001 	return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002 		ARRAY_SIZE(dc8051_info_err_flags));
4003 }
4004 
dc8051_info_host_msg_string(char * buf,int buf_len,u64 flags)4005 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4006 {
4007 	return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008 		ARRAY_SIZE(dc8051_info_host_msg_flags));
4009 }
4010 
handle_8051_interrupt(struct hfi1_devdata * dd,u32 unused,u64 reg)4011 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4012 {
4013 	struct hfi1_pportdata *ppd = dd->pport;
4014 	u64 info, err, host_msg;
4015 	int queue_link_down = 0;
4016 	char buf[96];
4017 
4018 	/* look at the flags */
4019 	if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020 		/* 8051 information set by firmware */
4021 		/* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022 		info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023 		err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024 			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4025 		host_msg = (info >>
4026 			DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027 			& DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4028 
4029 		/*
4030 		 * Handle error flags.
4031 		 */
4032 		if (err & FAILED_LNI) {
4033 			/*
4034 			 * LNI error indications are cleared by the 8051
4035 			 * only when starting polling.  Only pay attention
4036 			 * to them when in the states that occur during
4037 			 * LNI.
4038 			 */
4039 			if (ppd->host_link_state
4040 			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041 				queue_link_down = 1;
4042 				dd_dev_info(dd, "Link error: %s\n",
4043 					dc8051_info_err_string(buf,
4044 						sizeof(buf),
4045 						err & FAILED_LNI));
4046 			}
4047 			err &= ~(u64)FAILED_LNI;
4048 		}
4049 		if (err) {
4050 			/* report remaining errors, but do not do anything */
4051 			dd_dev_err(dd, "8051 info error: %s\n",
4052 				dc8051_info_err_string(buf, sizeof(buf), err));
4053 		}
4054 
4055 		/*
4056 		 * Handle host message flags.
4057 		 */
4058 		if (host_msg & HOST_REQ_DONE) {
4059 			/*
4060 			 * Presently, the driver does a busy wait for
4061 			 * host requests to complete.  This is only an
4062 			 * informational message.
4063 			 * NOTE: The 8051 clears the host message
4064 			 * information *on the next 8051 command*.
4065 			 * Therefore, when linkup is achieved,
4066 			 * this flag will still be set.
4067 			 */
4068 			host_msg &= ~(u64)HOST_REQ_DONE;
4069 		}
4070 		if (host_msg & BC_SMA_MSG) {
4071 			queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072 			host_msg &= ~(u64)BC_SMA_MSG;
4073 		}
4074 		if (host_msg & LINKUP_ACHIEVED) {
4075 			dd_dev_info(dd, "8051: Link up\n");
4076 			queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077 			host_msg &= ~(u64)LINKUP_ACHIEVED;
4078 		}
4079 		if (host_msg & EXT_DEVICE_CFG_REQ) {
4080 			handle_8051_request(dd);
4081 			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4082 		}
4083 		if (host_msg & VERIFY_CAP_FRAME) {
4084 			queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085 			host_msg &= ~(u64)VERIFY_CAP_FRAME;
4086 		}
4087 		if (host_msg & LINK_GOING_DOWN) {
4088 			const char *extra = "";
4089 			/* no downgrade action needed if going down */
4090 			if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091 				host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092 				extra = " (ignoring downgrade)";
4093 			}
4094 			dd_dev_info(dd, "8051: Link down%s\n", extra);
4095 			queue_link_down = 1;
4096 			host_msg &= ~(u64)LINK_GOING_DOWN;
4097 		}
4098 		if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099 			queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100 			host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4101 		}
4102 		if (host_msg) {
4103 			/* report remaining messages, but do not do anything */
4104 			dd_dev_info(dd, "8051 info host message: %s\n",
4105 				dc8051_info_host_msg_string(buf, sizeof(buf),
4106 					host_msg));
4107 		}
4108 
4109 		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4110 	}
4111 	if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4112 		/*
4113 		 * Lost the 8051 heartbeat.  If this happens, we
4114 		 * receive constant interrupts about it.  Disable
4115 		 * the interrupt after the first.
4116 		 */
4117 		dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118 		write_csr(dd, DC_DC8051_ERR_EN,
4119 			read_csr(dd, DC_DC8051_ERR_EN)
4120 			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4121 
4122 		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4123 	}
4124 	if (reg) {
4125 		/* report the error, but do not do anything */
4126 		dd_dev_err(dd, "8051 error: %s\n",
4127 			dc8051_err_string(buf, sizeof(buf), reg));
4128 	}
4129 
4130 	if (queue_link_down) {
4131 		/* if the link is already going down or disabled, do not
4132 		 * queue another */
4133 		if ((ppd->host_link_state
4134 				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135 				|| ppd->link_enabled == 0) {
4136 			dd_dev_info(dd, "%s: not queuing link down\n",
4137 				__func__);
4138 		} else {
4139 			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4140 		}
4141 	}
4142 }
4143 
4144 static const char * const fm_config_txt[] = {
4145 [0] =
4146 	"BadHeadDist: Distance violation between two head flits",
4147 [1] =
4148 	"BadTailDist: Distance violation between two tail flits",
4149 [2] =
4150 	"BadCtrlDist: Distance violation between two credit control flits",
4151 [3] =
4152 	"BadCrdAck: Credits return for unsupported VL",
4153 [4] =
4154 	"UnsupportedVLMarker: Received VL Marker",
4155 [5] =
4156 	"BadPreempt: Exceeded the preemption nesting level",
4157 [6] =
4158 	"BadControlFlit: Received unsupported control flit",
4159 /* no 7 */
4160 [8] =
4161 	"UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4162 };
4163 
4164 static const char * const port_rcv_txt[] = {
4165 [1] =
4166 	"BadPktLen: Illegal PktLen",
4167 [2] =
4168 	"PktLenTooLong: Packet longer than PktLen",
4169 [3] =
4170 	"PktLenTooShort: Packet shorter than PktLen",
4171 [4] =
4172 	"BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4173 [5] =
4174 	"BadDLID: Illegal DLID (0, doesn't match HFI)",
4175 [6] =
4176 	"BadL2: Illegal L2 opcode",
4177 [7] =
4178 	"BadSC: Unsupported SC",
4179 [9] =
4180 	"BadRC: Illegal RC",
4181 [11] =
4182 	"PreemptError: Preempting with same VL",
4183 [12] =
4184 	"PreemptVL15: Preempting a VL15 packet",
4185 };
4186 
4187 #define OPA_LDR_FMCONFIG_OFFSET 16
4188 #define OPA_LDR_PORTRCV_OFFSET 0
handle_dcc_err(struct hfi1_devdata * dd,u32 unused,u64 reg)4189 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4190 {
4191 	u64 info, hdr0, hdr1;
4192 	const char *extra;
4193 	char buf[96];
4194 	struct hfi1_pportdata *ppd = dd->pport;
4195 	u8 lcl_reason = 0;
4196 	int do_bounce = 0;
4197 
4198 	if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199 		if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200 			info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201 			dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202 			/* set status bit */
4203 			dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4204 		}
4205 		reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4206 	}
4207 
4208 	if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209 		struct hfi1_pportdata *ppd = dd->pport;
4210 		/* this counter saturates at (2^32) - 1 */
4211 		if (ppd->link_downed < (u32)UINT_MAX)
4212 			ppd->link_downed++;
4213 		reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4214 	}
4215 
4216 	if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217 		u8 reason_valid = 1;
4218 
4219 		info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220 		if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221 			dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222 			/* set status bit */
4223 			dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4224 		}
4225 		switch (info) {
4226 		case 0:
4227 		case 1:
4228 		case 2:
4229 		case 3:
4230 		case 4:
4231 		case 5:
4232 		case 6:
4233 			extra = fm_config_txt[info];
4234 			break;
4235 		case 8:
4236 			extra = fm_config_txt[info];
4237 			if (ppd->port_error_action &
4238 			    OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4239 				do_bounce = 1;
4240 				/*
4241 				 * lcl_reason cannot be derived from info
4242 				 * for this error
4243 				 */
4244 				lcl_reason =
4245 				  OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4246 			}
4247 			break;
4248 		default:
4249 			reason_valid = 0;
4250 			snprintf(buf, sizeof(buf), "reserved%lld", info);
4251 			extra = buf;
4252 			break;
4253 		}
4254 
4255 		if (reason_valid && !do_bounce) {
4256 			do_bounce = ppd->port_error_action &
4257 					(1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258 			lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4259 		}
4260 
4261 		/* just report this */
4262 		dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263 		reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4264 	}
4265 
4266 	if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267 		u8 reason_valid = 1;
4268 
4269 		info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270 		hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271 		hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272 		if (!(dd->err_info_rcvport.status_and_code &
4273 		      OPA_EI_STATUS_SMASK)) {
4274 			dd->err_info_rcvport.status_and_code =
4275 				info & OPA_EI_CODE_SMASK;
4276 			/* set status bit */
4277 			dd->err_info_rcvport.status_and_code |=
4278 				OPA_EI_STATUS_SMASK;
4279 			/* save first 2 flits in the packet that caused
4280 			 * the error */
4281 			 dd->err_info_rcvport.packet_flit1 = hdr0;
4282 			 dd->err_info_rcvport.packet_flit2 = hdr1;
4283 		}
4284 		switch (info) {
4285 		case 1:
4286 		case 2:
4287 		case 3:
4288 		case 4:
4289 		case 5:
4290 		case 6:
4291 		case 7:
4292 		case 9:
4293 		case 11:
4294 		case 12:
4295 			extra = port_rcv_txt[info];
4296 			break;
4297 		default:
4298 			reason_valid = 0;
4299 			snprintf(buf, sizeof(buf), "reserved%lld", info);
4300 			extra = buf;
4301 			break;
4302 		}
4303 
4304 		if (reason_valid && !do_bounce) {
4305 			do_bounce = ppd->port_error_action &
4306 					(1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307 			lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4308 		}
4309 
4310 		/* just report this */
4311 		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312 		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4313 			hdr0, hdr1);
4314 
4315 		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4316 	}
4317 
4318 	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319 		/* informative only */
4320 		dd_dev_info(dd, "8051 access to LCB blocked\n");
4321 		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4322 	}
4323 	if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324 		/* informative only */
4325 		dd_dev_info(dd, "host access to LCB blocked\n");
4326 		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4327 	}
4328 
4329 	/* report any remaining errors */
4330 	if (reg)
4331 		dd_dev_info(dd, "DCC Error: %s\n",
4332 			dcc_err_string(buf, sizeof(buf), reg));
4333 
4334 	if (lcl_reason == 0)
4335 		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4336 
4337 	if (do_bounce) {
4338 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339 		set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4341 	}
4342 }
4343 
handle_lcb_err(struct hfi1_devdata * dd,u32 unused,u64 reg)4344 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4345 {
4346 	char buf[96];
4347 
4348 	dd_dev_info(dd, "LCB Error: %s\n",
4349 		lcb_err_string(buf, sizeof(buf), reg));
4350 }
4351 
4352 /*
4353  * CCE block DC interrupt.  Source is < 8.
4354  */
is_dc_int(struct hfi1_devdata * dd,unsigned int source)4355 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4356 {
4357 	const struct err_reg_info *eri = &dc_errs[source];
4358 
4359 	if (eri->handler) {
4360 		interrupt_clear_down(dd, 0, eri);
4361 	} else if (source == 3 /* dc_lbm_int */) {
4362 		/*
4363 		 * This indicates that a parity error has occurred on the
4364 		 * address/control lines presented to the LBM.  The error
4365 		 * is a single pulse, there is no associated error flag,
4366 		 * and it is non-maskable.  This is because if a parity
4367 		 * error occurs on the request the request is dropped.
4368 		 * This should never occur, but it is nice to know if it
4369 		 * ever does.
4370 		 */
4371 		dd_dev_err(dd, "Parity error in DC LBM block\n");
4372 	} else {
4373 		dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4374 	}
4375 }
4376 
4377 /*
4378  * TX block send credit interrupt.  Source is < 160.
4379  */
is_send_credit_int(struct hfi1_devdata * dd,unsigned int source)4380 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4381 {
4382 	sc_group_release_update(dd, source);
4383 }
4384 
4385 /*
4386  * TX block SDMA interrupt.  Source is < 48.
4387  *
4388  * SDMA interrupts are grouped by type:
4389  *
4390  *	 0 -  N-1 = SDma
4391  *	 N - 2N-1 = SDmaProgress
4392  *	2N - 3N-1 = SDmaIdle
4393  */
is_sdma_eng_int(struct hfi1_devdata * dd,unsigned int source)4394 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4395 {
4396 	/* what interrupt */
4397 	unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4398 	/* which engine */
4399 	unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4400 
4401 #ifdef CONFIG_SDMA_VERBOSITY
4402 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403 		   slashstrip(__FILE__), __LINE__, __func__);
4404 	sdma_dumpstate(&dd->per_sdma[which]);
4405 #endif
4406 
4407 	if (likely(what < 3 && which < dd->num_sdma)) {
4408 		sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4409 	} else {
4410 		/* should not happen */
4411 		dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4412 	}
4413 }
4414 
4415 /*
4416  * RX block receive available interrupt.  Source is < 160.
4417  */
is_rcv_avail_int(struct hfi1_devdata * dd,unsigned int source)4418 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4419 {
4420 	struct hfi1_ctxtdata *rcd;
4421 	char *err_detail;
4422 
4423 	if (likely(source < dd->num_rcv_contexts)) {
4424 		rcd = dd->rcd[source];
4425 		if (rcd) {
4426 			if (source < dd->first_user_ctxt)
4427 				rcd->do_interrupt(rcd, 0);
4428 			else
4429 				handle_user_interrupt(rcd);
4430 			return;	/* OK */
4431 		}
4432 		/* received an interrupt, but no rcd */
4433 		err_detail = "dataless";
4434 	} else {
4435 		/* received an interrupt, but are not using that context */
4436 		err_detail = "out of range";
4437 	}
4438 	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439 		err_detail, source);
4440 }
4441 
4442 /*
4443  * RX block receive urgent interrupt.  Source is < 160.
4444  */
is_rcv_urgent_int(struct hfi1_devdata * dd,unsigned int source)4445 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4446 {
4447 	struct hfi1_ctxtdata *rcd;
4448 	char *err_detail;
4449 
4450 	if (likely(source < dd->num_rcv_contexts)) {
4451 		rcd = dd->rcd[source];
4452 		if (rcd) {
4453 			/* only pay attention to user urgent interrupts */
4454 			if (source >= dd->first_user_ctxt)
4455 				handle_user_interrupt(rcd);
4456 			return;	/* OK */
4457 		}
4458 		/* received an interrupt, but no rcd */
4459 		err_detail = "dataless";
4460 	} else {
4461 		/* received an interrupt, but are not using that context */
4462 		err_detail = "out of range";
4463 	}
4464 	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465 		err_detail, source);
4466 }
4467 
4468 /*
4469  * Reserved range interrupt.  Should not be called in normal operation.
4470  */
is_reserved_int(struct hfi1_devdata * dd,unsigned int source)4471 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4472 {
4473 	char name[64];
4474 
4475 	dd_dev_err(dd, "unexpected %s interrupt\n",
4476 				is_reserved_name(name, sizeof(name), source));
4477 }
4478 
4479 static const struct is_table is_table[] = {
4480 /* start		     end
4481 				name func		interrupt func */
4482 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4483 				is_misc_err_name,	is_misc_err_int },
4484 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4485 				is_sdma_eng_err_name,	is_sdma_eng_err_int },
4486 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487 				is_sendctxt_err_name,	is_sendctxt_err_int },
4488 { IS_SDMA_START,	     IS_SDMA_END,
4489 				is_sdma_eng_name,	is_sdma_eng_int },
4490 { IS_VARIOUS_START,	     IS_VARIOUS_END,
4491 				is_various_name,	is_various_int },
4492 { IS_DC_START,	     IS_DC_END,
4493 				is_dc_name,		is_dc_int },
4494 { IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4495 				is_rcv_avail_name,	is_rcv_avail_int },
4496 { IS_RCVURGENT_START,    IS_RCVURGENT_END,
4497 				is_rcv_urgent_name,	is_rcv_urgent_int },
4498 { IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4499 				is_send_credit_name,	is_send_credit_int},
4500 { IS_RESERVED_START,     IS_RESERVED_END,
4501 				is_reserved_name,	is_reserved_int},
4502 };
4503 
4504 /*
4505  * Interrupt source interrupt - called when the given source has an interrupt.
4506  * Source is a bit index into an array of 64-bit integers.
4507  */
is_interrupt(struct hfi1_devdata * dd,unsigned int source)4508 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4509 {
4510 	const struct is_table *entry;
4511 
4512 	/* avoids a double compare by walking the table in-order */
4513 	for (entry = &is_table[0]; entry->is_name; entry++) {
4514 		if (source < entry->end) {
4515 			trace_hfi1_interrupt(dd, entry, source);
4516 			entry->is_int(dd, source - entry->start);
4517 			return;
4518 		}
4519 	}
4520 	/* fell off the end */
4521 	dd_dev_err(dd, "invalid interrupt source %u\n", source);
4522 }
4523 
4524 /*
4525  * General interrupt handler.  This is able to correctly handle
4526  * all interrupts in case INTx is used.
4527  */
general_interrupt(int irq,void * data)4528 static irqreturn_t general_interrupt(int irq, void *data)
4529 {
4530 	struct hfi1_devdata *dd = data;
4531 	u64 regs[CCE_NUM_INT_CSRS];
4532 	u32 bit;
4533 	int i;
4534 
4535 	this_cpu_inc(*dd->int_counter);
4536 
4537 	/* phase 1: scan and clear all handled interrupts */
4538 	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539 		if (dd->gi_mask[i] == 0) {
4540 			regs[i] = 0;	/* used later */
4541 			continue;
4542 		}
4543 		regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4544 				dd->gi_mask[i];
4545 		/* only clear if anything is set */
4546 		if (regs[i])
4547 			write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4548 	}
4549 
4550 	/* phase 2: call the appropriate handler */
4551 	for_each_set_bit(bit, (unsigned long *)&regs[0],
4552 						CCE_NUM_INT_CSRS*64) {
4553 		is_interrupt(dd, bit);
4554 	}
4555 
4556 	return IRQ_HANDLED;
4557 }
4558 
sdma_interrupt(int irq,void * data)4559 static irqreturn_t sdma_interrupt(int irq, void *data)
4560 {
4561 	struct sdma_engine *sde = data;
4562 	struct hfi1_devdata *dd = sde->dd;
4563 	u64 status;
4564 
4565 #ifdef CONFIG_SDMA_VERBOSITY
4566 	dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567 		   slashstrip(__FILE__), __LINE__, __func__);
4568 	sdma_dumpstate(sde);
4569 #endif
4570 
4571 	this_cpu_inc(*dd->int_counter);
4572 
4573 	/* This read_csr is really bad in the hot path */
4574 	status = read_csr(dd,
4575 			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4576 			& sde->imask;
4577 	if (likely(status)) {
4578 		/* clear the interrupt(s) */
4579 		write_csr(dd,
4580 			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4581 			status);
4582 
4583 		/* handle the interrupt(s) */
4584 		sdma_engine_interrupt(sde, status);
4585 	} else
4586 		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4587 			sde->this_idx);
4588 
4589 	return IRQ_HANDLED;
4590 }
4591 
4592 /*
4593  * Clear the receive interrupt, forcing the write and making sure
4594  * we have data from the chip, pushing everything in front of it
4595  * back to the host.
4596  */
clear_recv_intr(struct hfi1_ctxtdata * rcd)4597 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4598 {
4599 	struct hfi1_devdata *dd = rcd->dd;
4600 	u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4601 
4602 	mmiowb();	/* make sure everything before is written */
4603 	write_csr(dd, addr, rcd->imask);
4604 	/* force the above write on the chip and get a value back */
4605 	(void)read_csr(dd, addr);
4606 }
4607 
4608 /* force the receive interrupt */
force_recv_intr(struct hfi1_ctxtdata * rcd)4609 static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4610 {
4611 	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4612 }
4613 
4614 /* return non-zero if a packet is present */
check_packet_present(struct hfi1_ctxtdata * rcd)4615 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4616 {
4617 	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618 		return (rcd->seq_cnt ==
4619 				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4620 
4621 	/* else is RDMA rtail */
4622 	return (rcd->head != get_rcvhdrtail(rcd));
4623 }
4624 
4625 /*
4626  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4627  * This routine will try to handle packets immediately (latency), but if
4628  * it finds too many, it will invoke the thread handler (bandwitdh).  The
4629  * chip receive interupt is *not* cleared down until this or the thread (if
4630  * invoked) is finished.  The intent is to avoid extra interrupts while we
4631  * are processing packets anyway.
4632  */
receive_context_interrupt(int irq,void * data)4633 static irqreturn_t receive_context_interrupt(int irq, void *data)
4634 {
4635 	struct hfi1_ctxtdata *rcd = data;
4636 	struct hfi1_devdata *dd = rcd->dd;
4637 	int disposition;
4638 	int present;
4639 
4640 	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641 	this_cpu_inc(*dd->int_counter);
4642 
4643 	/* receive interrupt remains blocked while processing packets */
4644 	disposition = rcd->do_interrupt(rcd, 0);
4645 
4646 	/*
4647 	 * Too many packets were seen while processing packets in this
4648 	 * IRQ handler.  Invoke the handler thread.  The receive interrupt
4649 	 * remains blocked.
4650 	 */
4651 	if (disposition == RCV_PKT_LIMIT)
4652 		return IRQ_WAKE_THREAD;
4653 
4654 	/*
4655 	 * The packet processor detected no more packets.  Clear the receive
4656 	 * interrupt and recheck for a packet packet that may have arrived
4657 	 * after the previous check and interrupt clear.  If a packet arrived,
4658 	 * force another interrupt.
4659 	 */
4660 	clear_recv_intr(rcd);
4661 	present = check_packet_present(rcd);
4662 	if (present)
4663 		force_recv_intr(rcd);
4664 
4665 	return IRQ_HANDLED;
4666 }
4667 
4668 /*
4669  * Receive packet thread handler.  This expects to be invoked with the
4670  * receive interrupt still blocked.
4671  */
receive_context_thread(int irq,void * data)4672 static irqreturn_t receive_context_thread(int irq, void *data)
4673 {
4674 	struct hfi1_ctxtdata *rcd = data;
4675 	int present;
4676 
4677 	/* receive interrupt is still blocked from the IRQ handler */
4678 	(void)rcd->do_interrupt(rcd, 1);
4679 
4680 	/*
4681 	 * The packet processor will only return if it detected no more
4682 	 * packets.  Hold IRQs here so we can safely clear the interrupt and
4683 	 * recheck for a packet that may have arrived after the previous
4684 	 * check and the interrupt clear.  If a packet arrived, force another
4685 	 * interrupt.
4686 	 */
4687 	local_irq_disable();
4688 	clear_recv_intr(rcd);
4689 	present = check_packet_present(rcd);
4690 	if (present)
4691 		force_recv_intr(rcd);
4692 	local_irq_enable();
4693 
4694 	return IRQ_HANDLED;
4695 }
4696 
4697 /* ========================================================================= */
4698 
read_physical_state(struct hfi1_devdata * dd)4699 u32 read_physical_state(struct hfi1_devdata *dd)
4700 {
4701 	u64 reg;
4702 
4703 	reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704 	return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705 				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
4706 }
4707 
read_logical_state(struct hfi1_devdata * dd)4708 static u32 read_logical_state(struct hfi1_devdata *dd)
4709 {
4710 	u64 reg;
4711 
4712 	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713 	return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714 				& DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4715 }
4716 
set_logical_state(struct hfi1_devdata * dd,u32 chip_lstate)4717 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4718 {
4719 	u64 reg;
4720 
4721 	reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722 	/* clear current state, set new state */
4723 	reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724 	reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725 	write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4726 }
4727 
4728 /*
4729  * Use the 8051 to read a LCB CSR.
4730  */
read_lcb_via_8051(struct hfi1_devdata * dd,u32 addr,u64 * data)4731 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4732 {
4733 	u32 regno;
4734 	int ret;
4735 
4736 	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737 		if (acquire_lcb_access(dd, 0) == 0) {
4738 			*data = read_csr(dd, addr);
4739 			release_lcb_access(dd, 0);
4740 			return 0;
4741 		}
4742 		return -EBUSY;
4743 	}
4744 
4745 	/* register is an index of LCB registers: (offset - base) / 8 */
4746 	regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747 	ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748 	if (ret != HCMD_SUCCESS)
4749 		return -EBUSY;
4750 	return 0;
4751 }
4752 
4753 /*
4754  * Read an LCB CSR.  Access may not be in host control, so check.
4755  * Return 0 on success, -EBUSY on failure.
4756  */
read_lcb_csr(struct hfi1_devdata * dd,u32 addr,u64 * data)4757 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4758 {
4759 	struct hfi1_pportdata *ppd = dd->pport;
4760 
4761 	/* if up, go through the 8051 for the value */
4762 	if (ppd->host_link_state & HLS_UP)
4763 		return read_lcb_via_8051(dd, addr, data);
4764 	/* if going up or down, no access */
4765 	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4766 		return -EBUSY;
4767 	/* otherwise, host has access */
4768 	*data = read_csr(dd, addr);
4769 	return 0;
4770 }
4771 
4772 /*
4773  * Use the 8051 to write a LCB CSR.
4774  */
write_lcb_via_8051(struct hfi1_devdata * dd,u32 addr,u64 data)4775 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4776 {
4777 
4778 	if (acquire_lcb_access(dd, 0) == 0) {
4779 		write_csr(dd, addr, data);
4780 		release_lcb_access(dd, 0);
4781 		return 0;
4782 	}
4783 	return -EBUSY;
4784 }
4785 
4786 /*
4787  * Write an LCB CSR.  Access may not be in host control, so check.
4788  * Return 0 on success, -EBUSY on failure.
4789  */
write_lcb_csr(struct hfi1_devdata * dd,u32 addr,u64 data)4790 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4791 {
4792 	struct hfi1_pportdata *ppd = dd->pport;
4793 
4794 	/* if up, go through the 8051 for the value */
4795 	if (ppd->host_link_state & HLS_UP)
4796 		return write_lcb_via_8051(dd, addr, data);
4797 	/* if going up or down, no access */
4798 	if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4799 		return -EBUSY;
4800 	/* otherwise, host has access */
4801 	write_csr(dd, addr, data);
4802 	return 0;
4803 }
4804 
4805 /*
4806  * Returns:
4807  *	< 0 = Linux error, not able to get access
4808  *	> 0 = 8051 command RETURN_CODE
4809  */
do_8051_command(struct hfi1_devdata * dd,u32 type,u64 in_data,u64 * out_data)4810 static int do_8051_command(
4811 	struct hfi1_devdata *dd,
4812 	u32 type,
4813 	u64 in_data,
4814 	u64 *out_data)
4815 {
4816 	u64 reg, completed;
4817 	int return_code;
4818 	unsigned long flags;
4819 	unsigned long timeout;
4820 
4821 	hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4822 
4823 	/*
4824 	 * Alternative to holding the lock for a long time:
4825 	 * - keep busy wait - have other users bounce off
4826 	 */
4827 	spin_lock_irqsave(&dd->dc8051_lock, flags);
4828 
4829 	/* We can't send any commands to the 8051 if it's in reset */
4830 	if (dd->dc_shutdown) {
4831 		return_code = -ENODEV;
4832 		goto fail;
4833 	}
4834 
4835 	/*
4836 	 * If an 8051 host command timed out previously, then the 8051 is
4837 	 * stuck.
4838 	 *
4839 	 * On first timeout, attempt to reset and restart the entire DC
4840 	 * block (including 8051). (Is this too big of a hammer?)
4841 	 *
4842 	 * If the 8051 times out a second time, the reset did not bring it
4843 	 * back to healthy life. In that case, fail any subsequent commands.
4844 	 */
4845 	if (dd->dc8051_timed_out) {
4846 		if (dd->dc8051_timed_out > 1) {
4847 			dd_dev_err(dd,
4848 				   "Previous 8051 host command timed out, skipping command %u\n",
4849 				   type);
4850 			return_code = -ENXIO;
4851 			goto fail;
4852 		}
4853 		spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4854 		dc_shutdown(dd);
4855 		dc_start(dd);
4856 		spin_lock_irqsave(&dd->dc8051_lock, flags);
4857 	}
4858 
4859 	/*
4860 	 * If there is no timeout, then the 8051 command interface is
4861 	 * waiting for a command.
4862 	 */
4863 
4864 	/*
4865 	 * Do two writes: the first to stabilize the type and req_data, the
4866 	 * second to activate.
4867 	 */
4868 	reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869 			<< DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870 		| (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871 			<< DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873 	reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4875 
4876 	/* wait for completion, alternate: interrupt */
4877 	timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4878 	while (1) {
4879 		reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880 		completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4881 		if (completed)
4882 			break;
4883 		if (time_after(jiffies, timeout)) {
4884 			dd->dc8051_timed_out++;
4885 			dd_dev_err(dd, "8051 host command %u timeout\n", type);
4886 			if (out_data)
4887 				*out_data = 0;
4888 			return_code = -ETIMEDOUT;
4889 			goto fail;
4890 		}
4891 		udelay(2);
4892 	}
4893 
4894 	if (out_data) {
4895 		*out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896 				& DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897 		if (type == HCMD_READ_LCB_CSR) {
4898 			/* top 16 bits are in a different register */
4899 			*out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900 				& DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4901 				<< (48
4902 				    - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4903 		}
4904 	}
4905 	return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906 				& DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907 	dd->dc8051_timed_out = 0;
4908 	/*
4909 	 * Clear command for next user.
4910 	 */
4911 	write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4912 
4913 fail:
4914 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4915 
4916 	return return_code;
4917 }
4918 
set_physical_link_state(struct hfi1_devdata * dd,u64 state)4919 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4920 {
4921 	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4922 }
4923 
load_8051_config(struct hfi1_devdata * dd,u8 field_id,u8 lane_id,u32 config_data)4924 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925 			    u8 lane_id, u32 config_data)
4926 {
4927 	u64 data;
4928 	int ret;
4929 
4930 	data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931 		| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932 		| (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933 	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934 	if (ret != HCMD_SUCCESS) {
4935 		dd_dev_err(dd,
4936 			"load 8051 config: field id %d, lane %d, err %d\n",
4937 			(int)field_id, (int)lane_id, ret);
4938 	}
4939 	return ret;
4940 }
4941 
4942 /*
4943  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4944  * set the result, even on error.
4945  * Return 0 on success, -errno on failure
4946  */
read_8051_config(struct hfi1_devdata * dd,u8 field_id,u8 lane_id,u32 * result)4947 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4948 			    u32 *result)
4949 {
4950 	u64 big_data;
4951 	u32 addr;
4952 	int ret;
4953 
4954 	/* address start depends on the lane_id */
4955 	if (lane_id < 4)
4956 		addr = (4 * NUM_GENERAL_FIELDS)
4957 			+ (lane_id * 4 * NUM_LANE_FIELDS);
4958 	else
4959 		addr = 0;
4960 	addr += field_id * 4;
4961 
4962 	/* read is in 8-byte chunks, hardware will truncate the address down */
4963 	ret = read_8051_data(dd, addr, 8, &big_data);
4964 
4965 	if (ret == 0) {
4966 		/* extract the 4 bytes we want */
4967 		if (addr & 0x4)
4968 			*result = (u32)(big_data >> 32);
4969 		else
4970 			*result = (u32)big_data;
4971 	} else {
4972 		*result = 0;
4973 		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974 			__func__, lane_id, field_id);
4975 	}
4976 
4977 	return ret;
4978 }
4979 
write_vc_local_phy(struct hfi1_devdata * dd,u8 power_management,u8 continuous)4980 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4981 			      u8 continuous)
4982 {
4983 	u32 frame;
4984 
4985 	frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986 		| power_management << POWER_MANAGEMENT_SHIFT;
4987 	return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988 				GENERAL_CONFIG, frame);
4989 }
4990 
write_vc_local_fabric(struct hfi1_devdata * dd,u8 vau,u8 z,u8 vcu,u16 vl15buf,u8 crc_sizes)4991 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992 				 u16 vl15buf, u8 crc_sizes)
4993 {
4994 	u32 frame;
4995 
4996 	frame = (u32)vau << VAU_SHIFT
4997 		| (u32)z << Z_SHIFT
4998 		| (u32)vcu << VCU_SHIFT
4999 		| (u32)vl15buf << VL15BUF_SHIFT
5000 		| (u32)crc_sizes << CRC_SIZES_SHIFT;
5001 	return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002 				GENERAL_CONFIG, frame);
5003 }
5004 
read_vc_local_link_width(struct hfi1_devdata * dd,u8 * misc_bits,u8 * flag_bits,u16 * link_widths)5005 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006 				     u8 *flag_bits, u16 *link_widths)
5007 {
5008 	u32 frame;
5009 
5010 	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5011 				&frame);
5012 	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013 	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5015 }
5016 
write_vc_local_link_width(struct hfi1_devdata * dd,u8 misc_bits,u8 flag_bits,u16 link_widths)5017 static int write_vc_local_link_width(struct hfi1_devdata *dd,
5018 				     u8 misc_bits,
5019 				     u8 flag_bits,
5020 				     u16 link_widths)
5021 {
5022 	u32 frame;
5023 
5024 	frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025 		| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026 		| (u32)link_widths << LINK_WIDTH_SHIFT;
5027 	return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5028 		     frame);
5029 }
5030 
write_local_device_id(struct hfi1_devdata * dd,u16 device_id,u8 device_rev)5031 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5032 				 u8 device_rev)
5033 {
5034 	u32 frame;
5035 
5036 	frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037 		| ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038 	return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5039 }
5040 
read_remote_device_id(struct hfi1_devdata * dd,u16 * device_id,u8 * device_rev)5041 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5042 				  u8 *device_rev)
5043 {
5044 	u32 frame;
5045 
5046 	read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047 	*device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048 	*device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049 			& REMOTE_DEVICE_REV_MASK;
5050 }
5051 
read_misc_status(struct hfi1_devdata * dd,u8 * ver_a,u8 * ver_b)5052 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5053 {
5054 	u32 frame;
5055 
5056 	read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057 	*ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058 	*ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5059 }
5060 
read_vc_remote_phy(struct hfi1_devdata * dd,u8 * power_management,u8 * continuous)5061 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5062 			       u8 *continuous)
5063 {
5064 	u32 frame;
5065 
5066 	read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067 	*power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068 					& POWER_MANAGEMENT_MASK;
5069 	*continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070 					& CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5071 }
5072 
read_vc_remote_fabric(struct hfi1_devdata * dd,u8 * vau,u8 * z,u8 * vcu,u16 * vl15buf,u8 * crc_sizes)5073 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074 				  u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5075 {
5076 	u32 frame;
5077 
5078 	read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079 	*vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080 	*z = (frame >> Z_SHIFT) & Z_MASK;
5081 	*vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082 	*vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083 	*crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5084 }
5085 
read_vc_remote_link_width(struct hfi1_devdata * dd,u8 * remote_tx_rate,u16 * link_widths)5086 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5087 				      u8 *remote_tx_rate,
5088 				      u16 *link_widths)
5089 {
5090 	u32 frame;
5091 
5092 	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5093 				&frame);
5094 	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095 				& REMOTE_TX_RATE_MASK;
5096 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5097 }
5098 
read_local_lni(struct hfi1_devdata * dd,u8 * enable_lane_rx)5099 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5100 {
5101 	u32 frame;
5102 
5103 	read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104 	*enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5105 }
5106 
read_mgmt_allowed(struct hfi1_devdata * dd,u8 * mgmt_allowed)5107 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5108 {
5109 	u32 frame;
5110 
5111 	read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112 	*mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5113 }
5114 
read_last_local_state(struct hfi1_devdata * dd,u32 * lls)5115 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5116 {
5117 	read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5118 }
5119 
read_last_remote_state(struct hfi1_devdata * dd,u32 * lrs)5120 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5121 {
5122 	read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5123 }
5124 
hfi1_read_link_quality(struct hfi1_devdata * dd,u8 * link_quality)5125 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5126 {
5127 	u32 frame;
5128 	int ret;
5129 
5130 	*link_quality = 0;
5131 	if (dd->pport->host_link_state & HLS_UP) {
5132 		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5133 					&frame);
5134 		if (ret == 0)
5135 			*link_quality = (frame >> LINK_QUALITY_SHIFT)
5136 						& LINK_QUALITY_MASK;
5137 	}
5138 }
5139 
read_planned_down_reason_code(struct hfi1_devdata * dd,u8 * pdrrc)5140 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5141 {
5142 	u32 frame;
5143 
5144 	read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145 	*pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5146 }
5147 
read_tx_settings(struct hfi1_devdata * dd,u8 * enable_lane_tx,u8 * tx_polarity_inversion,u8 * rx_polarity_inversion,u8 * max_rate)5148 static int read_tx_settings(struct hfi1_devdata *dd,
5149 			    u8 *enable_lane_tx,
5150 			    u8 *tx_polarity_inversion,
5151 			    u8 *rx_polarity_inversion,
5152 			    u8 *max_rate)
5153 {
5154 	u32 frame;
5155 	int ret;
5156 
5157 	ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158 	*enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159 				& ENABLE_LANE_TX_MASK;
5160 	*tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161 				& TX_POLARITY_INVERSION_MASK;
5162 	*rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163 				& RX_POLARITY_INVERSION_MASK;
5164 	*max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5165 	return ret;
5166 }
5167 
write_tx_settings(struct hfi1_devdata * dd,u8 enable_lane_tx,u8 tx_polarity_inversion,u8 rx_polarity_inversion,u8 max_rate)5168 static int write_tx_settings(struct hfi1_devdata *dd,
5169 			     u8 enable_lane_tx,
5170 			     u8 tx_polarity_inversion,
5171 			     u8 rx_polarity_inversion,
5172 			     u8 max_rate)
5173 {
5174 	u32 frame;
5175 
5176 	/* no need to mask, all variable sizes match field widths */
5177 	frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178 		| tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179 		| rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180 		| max_rate << MAX_RATE_SHIFT;
5181 	return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5182 }
5183 
check_fabric_firmware_versions(struct hfi1_devdata * dd)5184 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5185 {
5186 	u32 frame, version, prod_id;
5187 	int ret, lane;
5188 
5189 	/* 4 lanes */
5190 	for (lane = 0; lane < 4; lane++) {
5191 		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5192 		if (ret) {
5193 			dd_dev_err(
5194 				dd,
5195 				"Unable to read lane %d firmware details\n",
5196 				lane);
5197 			continue;
5198 		}
5199 		version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200 					& SPICO_ROM_VERSION_MASK;
5201 		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202 					& SPICO_ROM_PROD_ID_MASK;
5203 		dd_dev_info(dd,
5204 			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205 			lane, version, prod_id);
5206 	}
5207 }
5208 
5209 /*
5210  * Read an idle LCB message.
5211  *
5212  * Returns 0 on success, -EINVAL on error
5213  */
read_idle_message(struct hfi1_devdata * dd,u64 type,u64 * data_out)5214 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5215 {
5216 	int ret;
5217 
5218 	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5219 		type, data_out);
5220 	if (ret != HCMD_SUCCESS) {
5221 		dd_dev_err(dd, "read idle message: type %d, err %d\n",
5222 			(u32)type, ret);
5223 		return -EINVAL;
5224 	}
5225 	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226 	/* return only the payload as we already know the type */
5227 	*data_out >>= IDLE_PAYLOAD_SHIFT;
5228 	return 0;
5229 }
5230 
5231 /*
5232  * Read an idle SMA message.  To be done in response to a notification from
5233  * the 8051.
5234  *
5235  * Returns 0 on success, -EINVAL on error
5236  */
read_idle_sma(struct hfi1_devdata * dd,u64 * data)5237 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5238 {
5239 	return read_idle_message(dd,
5240 			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5241 }
5242 
5243 /*
5244  * Send an idle LCB message.
5245  *
5246  * Returns 0 on success, -EINVAL on error
5247  */
send_idle_message(struct hfi1_devdata * dd,u64 data)5248 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5249 {
5250 	int ret;
5251 
5252 	dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253 	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254 	if (ret != HCMD_SUCCESS) {
5255 		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5256 			data, ret);
5257 		return -EINVAL;
5258 	}
5259 	return 0;
5260 }
5261 
5262 /*
5263  * Send an idle SMA message.
5264  *
5265  * Returns 0 on success, -EINVAL on error
5266  */
send_idle_sma(struct hfi1_devdata * dd,u64 message)5267 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5268 {
5269 	u64 data;
5270 
5271 	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272 		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273 	return send_idle_message(dd, data);
5274 }
5275 
5276 /*
5277  * Initialize the LCB then do a quick link up.  This may or may not be
5278  * in loopback.
5279  *
5280  * return 0 on success, -errno on error
5281  */
do_quick_linkup(struct hfi1_devdata * dd)5282 static int do_quick_linkup(struct hfi1_devdata *dd)
5283 {
5284 	u64 reg;
5285 	unsigned long timeout;
5286 	int ret;
5287 
5288 	lcb_shutdown(dd, 0);
5289 
5290 	if (loopback) {
5291 		/* LCB_CFG_LOOPBACK.VAL = 2 */
5292 		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293 		write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294 			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295 		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5296 	}
5297 
5298 	/* start the LCBs */
5299 	/* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5301 
5302 	/* simulator only loopback steps */
5303 	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304 		/* LCB_CFG_RUN.EN = 1 */
5305 		write_csr(dd, DC_LCB_CFG_RUN,
5306 			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5307 
5308 		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309 		timeout = jiffies + msecs_to_jiffies(10);
5310 		while (1) {
5311 			reg = read_csr(dd,
5312 				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5313 			if (reg)
5314 				break;
5315 			if (time_after(jiffies, timeout)) {
5316 				dd_dev_err(dd,
5317 					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
5318 				return -ETIMEDOUT;
5319 			}
5320 			udelay(2);
5321 		}
5322 
5323 		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324 			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5325 	}
5326 
5327 	if (!loopback) {
5328 		/*
5329 		 * When doing quick linkup and not in loopback, both
5330 		 * sides must be done with LCB set-up before either
5331 		 * starts the quick linkup.  Put a delay here so that
5332 		 * both sides can be started and have a chance to be
5333 		 * done with LCB set up before resuming.
5334 		 */
5335 		dd_dev_err(dd,
5336 			"Pausing for peer to be finished with LCB set up\n");
5337 		msleep(5000);
5338 		dd_dev_err(dd,
5339 			"Continuing with quick linkup\n");
5340 	}
5341 
5342 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343 	set_8051_lcb_access(dd);
5344 
5345 	/*
5346 	 * State "quick" LinkUp request sets the physical link state to
5347 	 * LinkUp without a verify capability sequence.
5348 	 * This state is in simulator v37 and later.
5349 	 */
5350 	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351 	if (ret != HCMD_SUCCESS) {
5352 		dd_dev_err(dd,
5353 			"%s: set physical link state to quick LinkUp failed with return %d\n",
5354 			__func__, ret);
5355 
5356 		set_host_lcb_access(dd);
5357 		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5358 
5359 		if (ret >= 0)
5360 			ret = -EINVAL;
5361 		return ret;
5362 	}
5363 
5364 	return 0; /* success */
5365 }
5366 
5367 /*
5368  * Set the SerDes to internal loopback mode.
5369  * Returns 0 on success, -errno on error.
5370  */
set_serdes_loopback_mode(struct hfi1_devdata * dd)5371 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5372 {
5373 	int ret;
5374 
5375 	ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376 	if (ret == HCMD_SUCCESS)
5377 		return 0;
5378 	dd_dev_err(dd,
5379 		"Set physical link state to SerDes Loopback failed with return %d\n",
5380 		ret);
5381 	if (ret >= 0)
5382 		ret = -EINVAL;
5383 	return ret;
5384 }
5385 
5386 /*
5387  * Do all special steps to set up loopback.
5388  */
init_loopback(struct hfi1_devdata * dd)5389 static int init_loopback(struct hfi1_devdata *dd)
5390 {
5391 	dd_dev_info(dd, "Entering loopback mode\n");
5392 
5393 	/* all loopbacks should disable self GUID check */
5394 	write_csr(dd, DC_DC8051_CFG_MODE,
5395 		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5396 
5397 	/*
5398 	 * The simulator has only one loopback option - LCB.  Switch
5399 	 * to that option, which includes quick link up.
5400 	 *
5401 	 * Accept all valid loopback values.
5402 	 */
5403 	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404 		&& (loopback == LOOPBACK_SERDES
5405 			|| loopback == LOOPBACK_LCB
5406 			|| loopback == LOOPBACK_CABLE)) {
5407 		loopback = LOOPBACK_LCB;
5408 		quick_linkup = 1;
5409 		return 0;
5410 	}
5411 
5412 	/* handle serdes loopback */
5413 	if (loopback == LOOPBACK_SERDES) {
5414 		/* internal serdes loopack needs quick linkup on RTL */
5415 		if (dd->icode == ICODE_RTL_SILICON)
5416 			quick_linkup = 1;
5417 		return set_serdes_loopback_mode(dd);
5418 	}
5419 
5420 	/* LCB loopback - handled at poll time */
5421 	if (loopback == LOOPBACK_LCB) {
5422 		quick_linkup = 1; /* LCB is always quick linkup */
5423 
5424 		/* not supported in emulation due to emulation RTL changes */
5425 		if (dd->icode == ICODE_FPGA_EMULATION) {
5426 			dd_dev_err(dd,
5427 				"LCB loopback not supported in emulation\n");
5428 			return -EINVAL;
5429 		}
5430 		return 0;
5431 	}
5432 
5433 	/* external cable loopback requires no extra steps */
5434 	if (loopback == LOOPBACK_CABLE)
5435 		return 0;
5436 
5437 	dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5438 	return -EINVAL;
5439 }
5440 
5441 /*
5442  * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443  * used in the Verify Capability link width attribute.
5444  */
opa_to_vc_link_widths(u16 opa_widths)5445 static u16 opa_to_vc_link_widths(u16 opa_widths)
5446 {
5447 	int i;
5448 	u16 result = 0;
5449 
5450 	static const struct link_bits {
5451 		u16 from;
5452 		u16 to;
5453 	} opa_link_xlate[] = {
5454 		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5455 		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5456 		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5457 		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5458 	};
5459 
5460 	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461 		if (opa_widths & opa_link_xlate[i].from)
5462 			result |= opa_link_xlate[i].to;
5463 	}
5464 	return result;
5465 }
5466 
5467 /*
5468  * Set link attributes before moving to polling.
5469  */
set_local_link_attributes(struct hfi1_pportdata * ppd)5470 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5471 {
5472 	struct hfi1_devdata *dd = ppd->dd;
5473 	u8 enable_lane_tx;
5474 	u8 tx_polarity_inversion;
5475 	u8 rx_polarity_inversion;
5476 	int ret;
5477 
5478 	/* reset our fabric serdes to clear any lingering problems */
5479 	fabric_serdes_reset(dd);
5480 
5481 	/* set the local tx rate - need to read-modify-write */
5482 	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483 		&rx_polarity_inversion, &ppd->local_tx_rate);
5484 	if (ret)
5485 		goto set_local_link_attributes_fail;
5486 
5487 	if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488 		/* set the tx rate to the fastest enabled */
5489 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490 			ppd->local_tx_rate = 1;
5491 		else
5492 			ppd->local_tx_rate = 0;
5493 	} else {
5494 		/* set the tx rate to all enabled */
5495 		ppd->local_tx_rate = 0;
5496 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497 			ppd->local_tx_rate |= 2;
5498 		if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499 			ppd->local_tx_rate |= 1;
5500 	}
5501 
5502 	enable_lane_tx = 0xF; /* enable all four lanes */
5503 	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504 		     rx_polarity_inversion, ppd->local_tx_rate);
5505 	if (ret != HCMD_SUCCESS)
5506 		goto set_local_link_attributes_fail;
5507 
5508 	/*
5509 	 * DC supports continuous updates.
5510 	 */
5511 	ret = write_vc_local_phy(dd, 0 /* no power management */,
5512 				     1 /* continuous updates */);
5513 	if (ret != HCMD_SUCCESS)
5514 		goto set_local_link_attributes_fail;
5515 
5516 	/* z=1 in the next call: AU of 0 is not supported by the hardware */
5517 	ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518 				    ppd->port_crc_mode_enabled);
5519 	if (ret != HCMD_SUCCESS)
5520 		goto set_local_link_attributes_fail;
5521 
5522 	ret = write_vc_local_link_width(dd, 0, 0,
5523 		     opa_to_vc_link_widths(ppd->link_width_enabled));
5524 	if (ret != HCMD_SUCCESS)
5525 		goto set_local_link_attributes_fail;
5526 
5527 	/* let peer know who we are */
5528 	ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529 	if (ret == HCMD_SUCCESS)
5530 		return 0;
5531 
5532 set_local_link_attributes_fail:
5533 	dd_dev_err(dd,
5534 		"Failed to set local link attributes, return 0x%x\n",
5535 		ret);
5536 	return ret;
5537 }
5538 
5539 /*
5540  * Call this to start the link.  Schedule a retry if the cable is not
5541  * present or if unable to start polling.  Do not do anything if the
5542  * link is disabled.  Returns 0 if link is disabled or moved to polling
5543  */
start_link(struct hfi1_pportdata * ppd)5544 int start_link(struct hfi1_pportdata *ppd)
5545 {
5546 	if (!ppd->link_enabled) {
5547 		dd_dev_info(ppd->dd,
5548 			"%s: stopping link start because link is disabled\n",
5549 			__func__);
5550 		return 0;
5551 	}
5552 	if (!ppd->driver_link_ready) {
5553 		dd_dev_info(ppd->dd,
5554 			"%s: stopping link start because driver is not ready\n",
5555 			__func__);
5556 		return 0;
5557 	}
5558 
5559 	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560 			loopback == LOOPBACK_LCB ||
5561 			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562 		return set_link_state(ppd, HLS_DN_POLL);
5563 
5564 	dd_dev_info(ppd->dd,
5565 		"%s: stopping link start because no cable is present\n",
5566 		__func__);
5567 	return -EAGAIN;
5568 }
5569 
reset_qsfp(struct hfi1_pportdata * ppd)5570 static void reset_qsfp(struct hfi1_pportdata *ppd)
5571 {
5572 	struct hfi1_devdata *dd = ppd->dd;
5573 	u64 mask, qsfp_mask;
5574 
5575 	mask = (u64)QSFP_HFI0_RESET_N;
5576 	qsfp_mask = read_csr(dd,
5577 		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5578 	qsfp_mask |= mask;
5579 	write_csr(dd,
5580 		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5581 		qsfp_mask);
5582 
5583 	qsfp_mask = read_csr(dd,
5584 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5585 	qsfp_mask &= ~mask;
5586 	write_csr(dd,
5587 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5588 		qsfp_mask);
5589 
5590 	udelay(10);
5591 
5592 	qsfp_mask |= mask;
5593 	write_csr(dd,
5594 		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5595 		qsfp_mask);
5596 }
5597 
handle_qsfp_error_conditions(struct hfi1_pportdata * ppd,u8 * qsfp_interrupt_status)5598 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599 					u8 *qsfp_interrupt_status)
5600 {
5601 	struct hfi1_devdata *dd = ppd->dd;
5602 
5603 	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604 		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5605 		dd_dev_info(dd,
5606 			"%s: QSFP cable on fire\n",
5607 			__func__);
5608 
5609 	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610 		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5611 		dd_dev_info(dd,
5612 			"%s: QSFP cable temperature too low\n",
5613 			__func__);
5614 
5615 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616 		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5617 		dd_dev_info(dd,
5618 			"%s: QSFP supply voltage too high\n",
5619 			__func__);
5620 
5621 	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622 		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5623 		dd_dev_info(dd,
5624 			"%s: QSFP supply voltage too low\n",
5625 			__func__);
5626 
5627 	/* Byte 2 is vendor specific */
5628 
5629 	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630 		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5631 		dd_dev_info(dd,
5632 			"%s: Cable RX channel 1/2 power too high\n",
5633 			__func__);
5634 
5635 	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636 		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5637 		dd_dev_info(dd,
5638 			"%s: Cable RX channel 1/2 power too low\n",
5639 			__func__);
5640 
5641 	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642 		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5643 		dd_dev_info(dd,
5644 			"%s: Cable RX channel 3/4 power too high\n",
5645 			__func__);
5646 
5647 	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648 		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5649 		dd_dev_info(dd,
5650 			"%s: Cable RX channel 3/4 power too low\n",
5651 			__func__);
5652 
5653 	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654 		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5655 		dd_dev_info(dd,
5656 			"%s: Cable TX channel 1/2 bias too high\n",
5657 			__func__);
5658 
5659 	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660 		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5661 		dd_dev_info(dd,
5662 			"%s: Cable TX channel 1/2 bias too low\n",
5663 			__func__);
5664 
5665 	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666 		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5667 		dd_dev_info(dd,
5668 			"%s: Cable TX channel 3/4 bias too high\n",
5669 			__func__);
5670 
5671 	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672 		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5673 		dd_dev_info(dd,
5674 			"%s: Cable TX channel 3/4 bias too low\n",
5675 			__func__);
5676 
5677 	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678 		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5679 		dd_dev_info(dd,
5680 			"%s: Cable TX channel 1/2 power too high\n",
5681 			__func__);
5682 
5683 	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684 		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5685 		dd_dev_info(dd,
5686 			"%s: Cable TX channel 1/2 power too low\n",
5687 			__func__);
5688 
5689 	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690 		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5691 		dd_dev_info(dd,
5692 			"%s: Cable TX channel 3/4 power too high\n",
5693 			__func__);
5694 
5695 	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696 		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5697 		dd_dev_info(dd,
5698 			"%s: Cable TX channel 3/4 power too low\n",
5699 			__func__);
5700 
5701 	/* Bytes 9-10 and 11-12 are reserved */
5702 	/* Bytes 13-15 are vendor specific */
5703 
5704 	return 0;
5705 }
5706 
do_pre_lni_host_behaviors(struct hfi1_pportdata * ppd)5707 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5708 {
5709 	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5710 
5711 	return 0;
5712 }
5713 
do_qsfp_intr_fallback(struct hfi1_pportdata * ppd)5714 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5715 {
5716 	struct hfi1_devdata *dd = ppd->dd;
5717 	u8 qsfp_interrupt_status = 0;
5718 
5719 	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5720 		!= 1) {
5721 		dd_dev_info(dd,
5722 			"%s: Failed to read status of QSFP module\n",
5723 			__func__);
5724 		return -EIO;
5725 	}
5726 
5727 	/* We don't care about alarms & warnings with a non-functional INT_N */
5728 	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729 		do_pre_lni_host_behaviors(ppd);
5730 
5731 	return 0;
5732 }
5733 
5734 /* This routine will only be scheduled if the QSFP module is present */
qsfp_event(struct work_struct * work)5735 static void qsfp_event(struct work_struct *work)
5736 {
5737 	struct qsfp_data *qd;
5738 	struct hfi1_pportdata *ppd;
5739 	struct hfi1_devdata *dd;
5740 
5741 	qd = container_of(work, struct qsfp_data, qsfp_work);
5742 	ppd = qd->ppd;
5743 	dd = ppd->dd;
5744 
5745 	/* Sanity check */
5746 	if (!qsfp_mod_present(ppd))
5747 		return;
5748 
5749 	/*
5750 	 * Turn DC back on after cables has been
5751 	 * re-inserted. Up until now, the DC has been in
5752 	 * reset to save power.
5753 	 */
5754 	dc_start(dd);
5755 
5756 	if (qd->cache_refresh_required) {
5757 		msleep(3000);
5758 		reset_qsfp(ppd);
5759 
5760 		/* Check for QSFP interrupt after t_init (SFF 8679)
5761 		 * + extra
5762 		 */
5763 		msleep(3000);
5764 		if (!qd->qsfp_interrupt_functional) {
5765 			if (do_qsfp_intr_fallback(ppd) < 0)
5766 				dd_dev_info(dd, "%s: QSFP fallback failed\n",
5767 					__func__);
5768 			ppd->driver_link_ready = 1;
5769 			start_link(ppd);
5770 		}
5771 	}
5772 
5773 	if (qd->check_interrupt_flags) {
5774 		u8 qsfp_interrupt_status[16] = {0,};
5775 
5776 		if (qsfp_read(ppd, dd->hfi1_id, 6,
5777 			      &qsfp_interrupt_status[0], 16) != 16) {
5778 			dd_dev_info(dd,
5779 				"%s: Failed to read status of QSFP module\n",
5780 				__func__);
5781 		} else {
5782 			unsigned long flags;
5783 			u8 data_status;
5784 
5785 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786 			ppd->qsfp_info.check_interrupt_flags = 0;
5787 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5788 								flags);
5789 
5790 			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5791 				 != 1) {
5792 				dd_dev_info(dd,
5793 				"%s: Failed to read status of QSFP module\n",
5794 					__func__);
5795 			}
5796 			if (!(data_status & QSFP_DATA_NOT_READY)) {
5797 				do_pre_lni_host_behaviors(ppd);
5798 				start_link(ppd);
5799 			} else
5800 				handle_qsfp_error_conditions(ppd,
5801 						qsfp_interrupt_status);
5802 		}
5803 	}
5804 }
5805 
init_qsfp(struct hfi1_pportdata * ppd)5806 void init_qsfp(struct hfi1_pportdata *ppd)
5807 {
5808 	struct hfi1_devdata *dd = ppd->dd;
5809 	u64 qsfp_mask;
5810 
5811 	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812 			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813 		ppd->driver_link_ready = 1;
5814 		return;
5815 	}
5816 
5817 	ppd->qsfp_info.ppd = ppd;
5818 	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5819 
5820 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821 	/* Clear current status to avoid spurious interrupts */
5822 	write_csr(dd,
5823 			dd->hfi1_id ?
5824 				ASIC_QSFP2_CLEAR :
5825 				ASIC_QSFP1_CLEAR,
5826 		qsfp_mask);
5827 
5828 	/* Handle active low nature of INT_N and MODPRST_N pins */
5829 	if (qsfp_mod_present(ppd))
5830 		qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5831 	write_csr(dd,
5832 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5833 		  qsfp_mask);
5834 
5835 	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836 	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5837 	write_csr(dd,
5838 		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5839 		qsfp_mask);
5840 
5841 	if (qsfp_mod_present(ppd)) {
5842 		msleep(3000);
5843 		reset_qsfp(ppd);
5844 
5845 		/* Check for QSFP interrupt after t_init (SFF 8679)
5846 		 * + extra
5847 		 */
5848 		msleep(3000);
5849 		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850 			if (do_qsfp_intr_fallback(ppd) < 0)
5851 				dd_dev_info(dd,
5852 					"%s: QSFP fallback failed\n",
5853 					__func__);
5854 			ppd->driver_link_ready = 1;
5855 		}
5856 	}
5857 }
5858 
bringup_serdes(struct hfi1_pportdata * ppd)5859 int bringup_serdes(struct hfi1_pportdata *ppd)
5860 {
5861 	struct hfi1_devdata *dd = ppd->dd;
5862 	u64 guid;
5863 	int ret;
5864 
5865 	if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866 		add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5867 
5868 	guid = ppd->guid;
5869 	if (!guid) {
5870 		if (dd->base_guid)
5871 			guid = dd->base_guid + ppd->port - 1;
5872 		ppd->guid = guid;
5873 	}
5874 
5875 	/* the link defaults to enabled */
5876 	ppd->link_enabled = 1;
5877 	/* Set linkinit_reason on power up per OPA spec */
5878 	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5879 
5880 	if (loopback) {
5881 		ret = init_loopback(dd);
5882 		if (ret < 0)
5883 			return ret;
5884 	}
5885 
5886 	return start_link(ppd);
5887 }
5888 
hfi1_quiet_serdes(struct hfi1_pportdata * ppd)5889 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5890 {
5891 	struct hfi1_devdata *dd = ppd->dd;
5892 
5893 	/*
5894 	 * Shut down the link and keep it down.   First turn off that the
5895 	 * driver wants to allow the link to be up (driver_link_ready).
5896 	 * Then make sure the link is not automatically restarted
5897 	 * (link_enabled).  Cancel any pending restart.  And finally
5898 	 * go offline.
5899 	 */
5900 	ppd->driver_link_ready = 0;
5901 	ppd->link_enabled = 0;
5902 
5903 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904 	  OPA_LINKDOWN_REASON_SMA_DISABLED);
5905 	set_link_state(ppd, HLS_DN_OFFLINE);
5906 
5907 	/* disable the port */
5908 	clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909 }
5910 
init_cpu_counters(struct hfi1_devdata * dd)5911 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5912 {
5913 	struct hfi1_pportdata *ppd;
5914 	int i;
5915 
5916 	ppd = (struct hfi1_pportdata *)(dd + 1);
5917 	for (i = 0; i < dd->num_pports; i++, ppd++) {
5918 		ppd->ibport_data.rc_acks = NULL;
5919 		ppd->ibport_data.rc_qacks = NULL;
5920 		ppd->ibport_data.rc_acks = alloc_percpu(u64);
5921 		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5922 		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5923 		if ((ppd->ibport_data.rc_acks == NULL) ||
5924 		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
5925 		    (ppd->ibport_data.rc_qacks == NULL))
5926 			return -ENOMEM;
5927 	}
5928 
5929 	return 0;
5930 }
5931 
5932 static const char * const pt_names[] = {
5933 	"expected",
5934 	"eager",
5935 	"invalid"
5936 };
5937 
pt_name(u32 type)5938 static const char *pt_name(u32 type)
5939 {
5940 	return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5941 }
5942 
5943 /*
5944  * index is the index into the receive array
5945  */
hfi1_put_tid(struct hfi1_devdata * dd,u32 index,u32 type,unsigned long pa,u16 order)5946 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5947 		  u32 type, unsigned long pa, u16 order)
5948 {
5949 	u64 reg;
5950 	void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5951 			      (dd->kregbase + RCV_ARRAY));
5952 
5953 	if (!(dd->flags & HFI1_PRESENT))
5954 		goto done;
5955 
5956 	if (type == PT_INVALID) {
5957 		pa = 0;
5958 	} else if (type > PT_INVALID) {
5959 		dd_dev_err(dd,
5960 			"unexpected receive array type %u for index %u, not handled\n",
5961 			type, index);
5962 		goto done;
5963 	}
5964 
5965 	hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5966 		  pt_name(type), index, pa, (unsigned long)order);
5967 
5968 #define RT_ADDR_SHIFT 12	/* 4KB kernel address boundary */
5969 	reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5970 		| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5971 		| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5972 					<< RCV_ARRAY_RT_ADDR_SHIFT;
5973 	writeq(reg, base + (index * 8));
5974 
5975 	if (type == PT_EAGER)
5976 		/*
5977 		 * Eager entries are written one-by-one so we have to push them
5978 		 * after we write the entry.
5979 		 */
5980 		flush_wc();
5981 done:
5982 	return;
5983 }
5984 
hfi1_clear_tids(struct hfi1_ctxtdata * rcd)5985 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5986 {
5987 	struct hfi1_devdata *dd = rcd->dd;
5988 	u32 i;
5989 
5990 	/* this could be optimized */
5991 	for (i = rcd->eager_base; i < rcd->eager_base +
5992 		     rcd->egrbufs.alloced; i++)
5993 		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5994 
5995 	for (i = rcd->expected_base;
5996 			i < rcd->expected_base + rcd->expected_count; i++)
5997 		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5998 }
5999 
hfi1_get_base_kinfo(struct hfi1_ctxtdata * rcd,struct hfi1_ctxt_info * kinfo)6000 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6001 			struct hfi1_ctxt_info *kinfo)
6002 {
6003 	kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6004 		HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6005 	return 0;
6006 }
6007 
hfi1_get_msgheader(struct hfi1_devdata * dd,__le32 * rhf_addr)6008 struct hfi1_message_header *hfi1_get_msgheader(
6009 				struct hfi1_devdata *dd, __le32 *rhf_addr)
6010 {
6011 	u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6012 
6013 	return (struct hfi1_message_header *)
6014 		(rhf_addr - dd->rhf_offset + offset);
6015 }
6016 
6017 static const char * const ib_cfg_name_strings[] = {
6018 	"HFI1_IB_CFG_LIDLMC",
6019 	"HFI1_IB_CFG_LWID_DG_ENB",
6020 	"HFI1_IB_CFG_LWID_ENB",
6021 	"HFI1_IB_CFG_LWID",
6022 	"HFI1_IB_CFG_SPD_ENB",
6023 	"HFI1_IB_CFG_SPD",
6024 	"HFI1_IB_CFG_RXPOL_ENB",
6025 	"HFI1_IB_CFG_LREV_ENB",
6026 	"HFI1_IB_CFG_LINKLATENCY",
6027 	"HFI1_IB_CFG_HRTBT",
6028 	"HFI1_IB_CFG_OP_VLS",
6029 	"HFI1_IB_CFG_VL_HIGH_CAP",
6030 	"HFI1_IB_CFG_VL_LOW_CAP",
6031 	"HFI1_IB_CFG_OVERRUN_THRESH",
6032 	"HFI1_IB_CFG_PHYERR_THRESH",
6033 	"HFI1_IB_CFG_LINKDEFAULT",
6034 	"HFI1_IB_CFG_PKEYS",
6035 	"HFI1_IB_CFG_MTU",
6036 	"HFI1_IB_CFG_LSTATE",
6037 	"HFI1_IB_CFG_VL_HIGH_LIMIT",
6038 	"HFI1_IB_CFG_PMA_TICKS",
6039 	"HFI1_IB_CFG_PORT"
6040 };
6041 
ib_cfg_name(int which)6042 static const char *ib_cfg_name(int which)
6043 {
6044 	if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6045 		return "invalid";
6046 	return ib_cfg_name_strings[which];
6047 }
6048 
hfi1_get_ib_cfg(struct hfi1_pportdata * ppd,int which)6049 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6050 {
6051 	struct hfi1_devdata *dd = ppd->dd;
6052 	int val = 0;
6053 
6054 	switch (which) {
6055 	case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6056 		val = ppd->link_width_enabled;
6057 		break;
6058 	case HFI1_IB_CFG_LWID: /* currently active Link-width */
6059 		val = ppd->link_width_active;
6060 		break;
6061 	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6062 		val = ppd->link_speed_enabled;
6063 		break;
6064 	case HFI1_IB_CFG_SPD: /* current Link speed */
6065 		val = ppd->link_speed_active;
6066 		break;
6067 
6068 	case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6069 	case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6070 	case HFI1_IB_CFG_LINKLATENCY:
6071 		goto unimplemented;
6072 
6073 	case HFI1_IB_CFG_OP_VLS:
6074 		val = ppd->vls_operational;
6075 		break;
6076 	case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6077 		val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6078 		break;
6079 	case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6080 		val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6081 		break;
6082 	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6083 		val = ppd->overrun_threshold;
6084 		break;
6085 	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6086 		val = ppd->phy_error_threshold;
6087 		break;
6088 	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6089 		val = dd->link_default;
6090 		break;
6091 
6092 	case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6093 	case HFI1_IB_CFG_PMA_TICKS:
6094 	default:
6095 unimplemented:
6096 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6097 			dd_dev_info(
6098 				dd,
6099 				"%s: which %s: not implemented\n",
6100 				__func__,
6101 				ib_cfg_name(which));
6102 		break;
6103 	}
6104 
6105 	return val;
6106 }
6107 
6108 /*
6109  * The largest MAD packet size.
6110  */
6111 #define MAX_MAD_PACKET 2048
6112 
6113 /*
6114  * Return the maximum header bytes that can go on the _wire_
6115  * for this device. This count includes the ICRC which is
6116  * not part of the packet held in memory but it is appended
6117  * by the HW.
6118  * This is dependent on the device's receive header entry size.
6119  * HFI allows this to be set per-receive context, but the
6120  * driver presently enforces a global value.
6121  */
lrh_max_header_bytes(struct hfi1_devdata * dd)6122 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6123 {
6124 	/*
6125 	 * The maximum non-payload (MTU) bytes in LRH.PktLen are
6126 	 * the Receive Header Entry Size minus the PBC (or RHF) size
6127 	 * plus one DW for the ICRC appended by HW.
6128 	 *
6129 	 * dd->rcd[0].rcvhdrqentsize is in DW.
6130 	 * We use rcd[0] as all context will have the same value. Also,
6131 	 * the first kernel context would have been allocated by now so
6132 	 * we are guaranteed a valid value.
6133 	 */
6134 	return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6135 }
6136 
6137 /*
6138  * Set Send Length
6139  * @ppd - per port data
6140  *
6141  * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6142  * registers compare against LRH.PktLen, so use the max bytes included
6143  * in the LRH.
6144  *
6145  * This routine changes all VL values except VL15, which it maintains at
6146  * the same value.
6147  */
set_send_length(struct hfi1_pportdata * ppd)6148 static void set_send_length(struct hfi1_pportdata *ppd)
6149 {
6150 	struct hfi1_devdata *dd = ppd->dd;
6151 	u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6152 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6153 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6154 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6155 	int i;
6156 
6157 	for (i = 0; i < ppd->vls_supported; i++) {
6158 		if (dd->vld[i].mtu > maxvlmtu)
6159 			maxvlmtu = dd->vld[i].mtu;
6160 		if (i <= 3)
6161 			len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6162 				 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6163 				((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6164 		else
6165 			len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6166 				 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6167 				((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6168 	}
6169 	write_csr(dd, SEND_LEN_CHECK0, len1);
6170 	write_csr(dd, SEND_LEN_CHECK1, len2);
6171 	/* adjust kernel credit return thresholds based on new MTUs */
6172 	/* all kernel receive contexts have the same hdrqentsize */
6173 	for (i = 0; i < ppd->vls_supported; i++) {
6174 		sc_set_cr_threshold(dd->vld[i].sc,
6175 			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6176 				dd->rcd[0]->rcvhdrqentsize));
6177 	}
6178 	sc_set_cr_threshold(dd->vld[15].sc,
6179 		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6180 			dd->rcd[0]->rcvhdrqentsize));
6181 
6182 	/* Adjust maximum MTU for the port in DC */
6183 	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6184 		(ilog2(maxvlmtu >> 8) + 1);
6185 	len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6186 	len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6187 	len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6188 		DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6189 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6190 }
6191 
set_lidlmc(struct hfi1_pportdata * ppd)6192 static void set_lidlmc(struct hfi1_pportdata *ppd)
6193 {
6194 	int i;
6195 	u64 sreg = 0;
6196 	struct hfi1_devdata *dd = ppd->dd;
6197 	u32 mask = ~((1U << ppd->lmc) - 1);
6198 	u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6199 
6200 	if (dd->hfi1_snoop.mode_flag)
6201 		dd_dev_info(dd, "Set lid/lmc while snooping");
6202 
6203 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6204 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6205 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6206 			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6207 	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6208 			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6209 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6210 
6211 	/*
6212 	 * Iterate over all the send contexts and set their SLID check
6213 	 */
6214 	sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6215 			SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6216 	       (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6217 			SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6218 
6219 	for (i = 0; i < dd->chip_send_contexts; i++) {
6220 		hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6221 			  i, (u32)sreg);
6222 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6223 	}
6224 
6225 	/* Now we have to do the same thing for the sdma engines */
6226 	sdma_update_lmc(dd, mask, ppd->lid);
6227 }
6228 
wait_phy_linkstate(struct hfi1_devdata * dd,u32 state,u32 msecs)6229 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6230 {
6231 	unsigned long timeout;
6232 	u32 curr_state;
6233 
6234 	timeout = jiffies + msecs_to_jiffies(msecs);
6235 	while (1) {
6236 		curr_state = read_physical_state(dd);
6237 		if (curr_state == state)
6238 			break;
6239 		if (time_after(jiffies, timeout)) {
6240 			dd_dev_err(dd,
6241 				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6242 				state, curr_state);
6243 			return -ETIMEDOUT;
6244 		}
6245 		usleep_range(1950, 2050); /* sleep 2ms-ish */
6246 	}
6247 
6248 	return 0;
6249 }
6250 
6251 /*
6252  * Helper for set_link_state().  Do not call except from that routine.
6253  * Expects ppd->hls_mutex to be held.
6254  *
6255  * @rem_reason value to be sent to the neighbor
6256  *
6257  * LinkDownReasons only set if transition succeeds.
6258  */
goto_offline(struct hfi1_pportdata * ppd,u8 rem_reason)6259 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6260 {
6261 	struct hfi1_devdata *dd = ppd->dd;
6262 	u32 pstate, previous_state;
6263 	u32 last_local_state;
6264 	u32 last_remote_state;
6265 	int ret;
6266 	int do_transition;
6267 	int do_wait;
6268 
6269 	previous_state = ppd->host_link_state;
6270 	ppd->host_link_state = HLS_GOING_OFFLINE;
6271 	pstate = read_physical_state(dd);
6272 	if (pstate == PLS_OFFLINE) {
6273 		do_transition = 0;	/* in right state */
6274 		do_wait = 0;		/* ...no need to wait */
6275 	} else if ((pstate & 0xff) == PLS_OFFLINE) {
6276 		do_transition = 0;	/* in an offline transient state */
6277 		do_wait = 1;		/* ...wait for it to settle */
6278 	} else {
6279 		do_transition = 1;	/* need to move to offline */
6280 		do_wait = 1;		/* ...will need to wait */
6281 	}
6282 
6283 	if (do_transition) {
6284 		ret = set_physical_link_state(dd,
6285 			PLS_OFFLINE | (rem_reason << 8));
6286 
6287 		if (ret != HCMD_SUCCESS) {
6288 			dd_dev_err(dd,
6289 				"Failed to transition to Offline link state, return %d\n",
6290 				ret);
6291 			return -EINVAL;
6292 		}
6293 		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6294 			ppd->offline_disabled_reason =
6295 			OPA_LINKDOWN_REASON_TRANSIENT;
6296 	}
6297 
6298 	if (do_wait) {
6299 		/* it can take a while for the link to go down */
6300 		ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6301 		if (ret < 0)
6302 			return ret;
6303 	}
6304 
6305 	/* make sure the logical state is also down */
6306 	wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6307 
6308 	/*
6309 	 * Now in charge of LCB - must be after the physical state is
6310 	 * offline.quiet and before host_link_state is changed.
6311 	 */
6312 	set_host_lcb_access(dd);
6313 	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6314 	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6315 
6316 	/*
6317 	 * The LNI has a mandatory wait time after the physical state
6318 	 * moves to Offline.Quiet.  The wait time may be different
6319 	 * depending on how the link went down.  The 8051 firmware
6320 	 * will observe the needed wait time and only move to ready
6321 	 * when that is completed.  The largest of the quiet timeouts
6322 	 * is 2.5s, so wait that long and then a bit more.
6323 	 */
6324 	ret = wait_fm_ready(dd, 3000);
6325 	if (ret) {
6326 		dd_dev_err(dd,
6327 			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6328 		/* state is really offline, so make it so */
6329 		ppd->host_link_state = HLS_DN_OFFLINE;
6330 		return ret;
6331 	}
6332 
6333 	/*
6334 	 * The state is now offline and the 8051 is ready to accept host
6335 	 * requests.
6336 	 *	- change our state
6337 	 *	- notify others if we were previously in a linkup state
6338 	 */
6339 	ppd->host_link_state = HLS_DN_OFFLINE;
6340 	if (previous_state & HLS_UP) {
6341 		/* went down while link was up */
6342 		handle_linkup_change(dd, 0);
6343 	} else if (previous_state
6344 			& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6345 		/* went down while attempting link up */
6346 		/* byte 1 of last_*_state is the failure reason */
6347 		read_last_local_state(dd, &last_local_state);
6348 		read_last_remote_state(dd, &last_remote_state);
6349 		dd_dev_err(dd,
6350 			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
6351 			last_local_state, last_remote_state);
6352 	}
6353 
6354 	/* the active link width (downgrade) is 0 on link down */
6355 	ppd->link_width_active = 0;
6356 	ppd->link_width_downgrade_tx_active = 0;
6357 	ppd->link_width_downgrade_rx_active = 0;
6358 	ppd->current_egress_rate = 0;
6359 	return 0;
6360 }
6361 
6362 /* return the link state name */
link_state_name(u32 state)6363 static const char *link_state_name(u32 state)
6364 {
6365 	const char *name;
6366 	int n = ilog2(state);
6367 	static const char * const names[] = {
6368 		[__HLS_UP_INIT_BP]	 = "INIT",
6369 		[__HLS_UP_ARMED_BP]	 = "ARMED",
6370 		[__HLS_UP_ACTIVE_BP]	 = "ACTIVE",
6371 		[__HLS_DN_DOWNDEF_BP]	 = "DOWNDEF",
6372 		[__HLS_DN_POLL_BP]	 = "POLL",
6373 		[__HLS_DN_DISABLE_BP]	 = "DISABLE",
6374 		[__HLS_DN_OFFLINE_BP]	 = "OFFLINE",
6375 		[__HLS_VERIFY_CAP_BP]	 = "VERIFY_CAP",
6376 		[__HLS_GOING_UP_BP]	 = "GOING_UP",
6377 		[__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6378 		[__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6379 	};
6380 
6381 	name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6382 	return name ? name : "unknown";
6383 }
6384 
6385 /* return the link state reason name */
link_state_reason_name(struct hfi1_pportdata * ppd,u32 state)6386 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6387 {
6388 	if (state == HLS_UP_INIT) {
6389 		switch (ppd->linkinit_reason) {
6390 		case OPA_LINKINIT_REASON_LINKUP:
6391 			return "(LINKUP)";
6392 		case OPA_LINKINIT_REASON_FLAPPING:
6393 			return "(FLAPPING)";
6394 		case OPA_LINKINIT_OUTSIDE_POLICY:
6395 			return "(OUTSIDE_POLICY)";
6396 		case OPA_LINKINIT_QUARANTINED:
6397 			return "(QUARANTINED)";
6398 		case OPA_LINKINIT_INSUFIC_CAPABILITY:
6399 			return "(INSUFIC_CAPABILITY)";
6400 		default:
6401 			break;
6402 		}
6403 	}
6404 	return "";
6405 }
6406 
6407 /*
6408  * driver_physical_state - convert the driver's notion of a port's
6409  * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6410  * Return -1 (converted to a u32) to indicate error.
6411  */
driver_physical_state(struct hfi1_pportdata * ppd)6412 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6413 {
6414 	switch (ppd->host_link_state) {
6415 	case HLS_UP_INIT:
6416 	case HLS_UP_ARMED:
6417 	case HLS_UP_ACTIVE:
6418 		return IB_PORTPHYSSTATE_LINKUP;
6419 	case HLS_DN_POLL:
6420 		return IB_PORTPHYSSTATE_POLLING;
6421 	case HLS_DN_DISABLE:
6422 		return IB_PORTPHYSSTATE_DISABLED;
6423 	case HLS_DN_OFFLINE:
6424 		return OPA_PORTPHYSSTATE_OFFLINE;
6425 	case HLS_VERIFY_CAP:
6426 		return IB_PORTPHYSSTATE_POLLING;
6427 	case HLS_GOING_UP:
6428 		return IB_PORTPHYSSTATE_POLLING;
6429 	case HLS_GOING_OFFLINE:
6430 		return OPA_PORTPHYSSTATE_OFFLINE;
6431 	case HLS_LINK_COOLDOWN:
6432 		return OPA_PORTPHYSSTATE_OFFLINE;
6433 	case HLS_DN_DOWNDEF:
6434 	default:
6435 		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6436 			   ppd->host_link_state);
6437 		return  -1;
6438 	}
6439 }
6440 
6441 /*
6442  * driver_logical_state - convert the driver's notion of a port's
6443  * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6444  * (converted to a u32) to indicate error.
6445  */
driver_logical_state(struct hfi1_pportdata * ppd)6446 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6447 {
6448 	if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6449 		return IB_PORT_DOWN;
6450 
6451 	switch (ppd->host_link_state & HLS_UP) {
6452 	case HLS_UP_INIT:
6453 		return IB_PORT_INIT;
6454 	case HLS_UP_ARMED:
6455 		return IB_PORT_ARMED;
6456 	case HLS_UP_ACTIVE:
6457 		return IB_PORT_ACTIVE;
6458 	default:
6459 		dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6460 			   ppd->host_link_state);
6461 	return -1;
6462 	}
6463 }
6464 
set_link_down_reason(struct hfi1_pportdata * ppd,u8 lcl_reason,u8 neigh_reason,u8 rem_reason)6465 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6466 			  u8 neigh_reason, u8 rem_reason)
6467 {
6468 	if (ppd->local_link_down_reason.latest == 0 &&
6469 	    ppd->neigh_link_down_reason.latest == 0) {
6470 		ppd->local_link_down_reason.latest = lcl_reason;
6471 		ppd->neigh_link_down_reason.latest = neigh_reason;
6472 		ppd->remote_link_down_reason = rem_reason;
6473 	}
6474 }
6475 
6476 /*
6477  * Change the physical and/or logical link state.
6478  *
6479  * Do not call this routine while inside an interrupt.  It contains
6480  * calls to routines that can take multiple seconds to finish.
6481  *
6482  * Returns 0 on success, -errno on failure.
6483  */
set_link_state(struct hfi1_pportdata * ppd,u32 state)6484 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6485 {
6486 	struct hfi1_devdata *dd = ppd->dd;
6487 	struct ib_event event = {.device = NULL};
6488 	int ret1, ret = 0;
6489 	int was_up, is_down;
6490 	int orig_new_state, poll_bounce;
6491 
6492 	mutex_lock(&ppd->hls_lock);
6493 
6494 	orig_new_state = state;
6495 	if (state == HLS_DN_DOWNDEF)
6496 		state = dd->link_default;
6497 
6498 	/* interpret poll -> poll as a link bounce */
6499 	poll_bounce = ppd->host_link_state == HLS_DN_POLL
6500 				&& state == HLS_DN_POLL;
6501 
6502 	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6503 		link_state_name(ppd->host_link_state),
6504 		link_state_name(orig_new_state),
6505 		poll_bounce ? "(bounce) " : "",
6506 		link_state_reason_name(ppd, state));
6507 
6508 	was_up = !!(ppd->host_link_state & HLS_UP);
6509 
6510 	/*
6511 	 * If we're going to a (HLS_*) link state that implies the logical
6512 	 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6513 	 * reset is_sm_config_started to 0.
6514 	 */
6515 	if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6516 		ppd->is_sm_config_started = 0;
6517 
6518 	/*
6519 	 * Do nothing if the states match.  Let a poll to poll link bounce
6520 	 * go through.
6521 	 */
6522 	if (ppd->host_link_state == state && !poll_bounce)
6523 		goto done;
6524 
6525 	switch (state) {
6526 	case HLS_UP_INIT:
6527 		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6528 			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6529 			/*
6530 			 * Quick link up jumps from polling to here.
6531 			 *
6532 			 * Whether in normal or loopback mode, the
6533 			 * simulator jumps from polling to link up.
6534 			 * Accept that here.
6535 			 */
6536 			/* OK */;
6537 		} else if (ppd->host_link_state != HLS_GOING_UP) {
6538 			goto unexpected;
6539 		}
6540 
6541 		ppd->host_link_state = HLS_UP_INIT;
6542 		ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6543 		if (ret) {
6544 			/* logical state didn't change, stay at going_up */
6545 			ppd->host_link_state = HLS_GOING_UP;
6546 			dd_dev_err(dd,
6547 				"%s: logical state did not change to INIT\n",
6548 				__func__);
6549 		} else {
6550 			/* clear old transient LINKINIT_REASON code */
6551 			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6552 				ppd->linkinit_reason =
6553 					OPA_LINKINIT_REASON_LINKUP;
6554 
6555 			/* enable the port */
6556 			add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6557 
6558 			handle_linkup_change(dd, 1);
6559 		}
6560 		break;
6561 	case HLS_UP_ARMED:
6562 		if (ppd->host_link_state != HLS_UP_INIT)
6563 			goto unexpected;
6564 
6565 		ppd->host_link_state = HLS_UP_ARMED;
6566 		set_logical_state(dd, LSTATE_ARMED);
6567 		ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6568 		if (ret) {
6569 			/* logical state didn't change, stay at init */
6570 			ppd->host_link_state = HLS_UP_INIT;
6571 			dd_dev_err(dd,
6572 				"%s: logical state did not change to ARMED\n",
6573 				__func__);
6574 		}
6575 		/*
6576 		 * The simulator does not currently implement SMA messages,
6577 		 * so neighbor_normal is not set.  Set it here when we first
6578 		 * move to Armed.
6579 		 */
6580 		if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6581 			ppd->neighbor_normal = 1;
6582 		break;
6583 	case HLS_UP_ACTIVE:
6584 		if (ppd->host_link_state != HLS_UP_ARMED)
6585 			goto unexpected;
6586 
6587 		ppd->host_link_state = HLS_UP_ACTIVE;
6588 		set_logical_state(dd, LSTATE_ACTIVE);
6589 		ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6590 		if (ret) {
6591 			/* logical state didn't change, stay at armed */
6592 			ppd->host_link_state = HLS_UP_ARMED;
6593 			dd_dev_err(dd,
6594 				"%s: logical state did not change to ACTIVE\n",
6595 				__func__);
6596 		} else {
6597 
6598 			/* tell all engines to go running */
6599 			sdma_all_running(dd);
6600 
6601 			/* Signal the IB layer that the port has went active */
6602 			event.device = &dd->verbs_dev.ibdev;
6603 			event.element.port_num = ppd->port;
6604 			event.event = IB_EVENT_PORT_ACTIVE;
6605 		}
6606 		break;
6607 	case HLS_DN_POLL:
6608 		if ((ppd->host_link_state == HLS_DN_DISABLE ||
6609 		     ppd->host_link_state == HLS_DN_OFFLINE) &&
6610 		    dd->dc_shutdown)
6611 			dc_start(dd);
6612 		/* Hand LED control to the DC */
6613 		write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6614 
6615 		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6616 			u8 tmp = ppd->link_enabled;
6617 
6618 			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6619 			if (ret) {
6620 				ppd->link_enabled = tmp;
6621 				break;
6622 			}
6623 			ppd->remote_link_down_reason = 0;
6624 
6625 			if (ppd->driver_link_ready)
6626 				ppd->link_enabled = 1;
6627 		}
6628 
6629 		ret = set_local_link_attributes(ppd);
6630 		if (ret)
6631 			break;
6632 
6633 		ppd->port_error_action = 0;
6634 		ppd->host_link_state = HLS_DN_POLL;
6635 
6636 		if (quick_linkup) {
6637 			/* quick linkup does not go into polling */
6638 			ret = do_quick_linkup(dd);
6639 		} else {
6640 			ret1 = set_physical_link_state(dd, PLS_POLLING);
6641 			if (ret1 != HCMD_SUCCESS) {
6642 				dd_dev_err(dd,
6643 					"Failed to transition to Polling link state, return 0x%x\n",
6644 					ret1);
6645 				ret = -EINVAL;
6646 			}
6647 		}
6648 		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6649 		/*
6650 		 * If an error occurred above, go back to offline.  The
6651 		 * caller may reschedule another attempt.
6652 		 */
6653 		if (ret)
6654 			goto_offline(ppd, 0);
6655 		break;
6656 	case HLS_DN_DISABLE:
6657 		/* link is disabled */
6658 		ppd->link_enabled = 0;
6659 
6660 		/* allow any state to transition to disabled */
6661 
6662 		/* must transition to offline first */
6663 		if (ppd->host_link_state != HLS_DN_OFFLINE) {
6664 			ret = goto_offline(ppd, ppd->remote_link_down_reason);
6665 			if (ret)
6666 				break;
6667 			ppd->remote_link_down_reason = 0;
6668 		}
6669 
6670 		ret1 = set_physical_link_state(dd, PLS_DISABLED);
6671 		if (ret1 != HCMD_SUCCESS) {
6672 			dd_dev_err(dd,
6673 				"Failed to transition to Disabled link state, return 0x%x\n",
6674 				ret1);
6675 			ret = -EINVAL;
6676 			break;
6677 		}
6678 		ppd->host_link_state = HLS_DN_DISABLE;
6679 		dc_shutdown(dd);
6680 		break;
6681 	case HLS_DN_OFFLINE:
6682 		if (ppd->host_link_state == HLS_DN_DISABLE)
6683 			dc_start(dd);
6684 
6685 		/* allow any state to transition to offline */
6686 		ret = goto_offline(ppd, ppd->remote_link_down_reason);
6687 		if (!ret)
6688 			ppd->remote_link_down_reason = 0;
6689 		break;
6690 	case HLS_VERIFY_CAP:
6691 		if (ppd->host_link_state != HLS_DN_POLL)
6692 			goto unexpected;
6693 		ppd->host_link_state = HLS_VERIFY_CAP;
6694 		break;
6695 	case HLS_GOING_UP:
6696 		if (ppd->host_link_state != HLS_VERIFY_CAP)
6697 			goto unexpected;
6698 
6699 		ret1 = set_physical_link_state(dd, PLS_LINKUP);
6700 		if (ret1 != HCMD_SUCCESS) {
6701 			dd_dev_err(dd,
6702 				"Failed to transition to link up state, return 0x%x\n",
6703 				ret1);
6704 			ret = -EINVAL;
6705 			break;
6706 		}
6707 		ppd->host_link_state = HLS_GOING_UP;
6708 		break;
6709 
6710 	case HLS_GOING_OFFLINE:		/* transient within goto_offline() */
6711 	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
6712 	default:
6713 		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6714 			__func__, state);
6715 		ret = -EINVAL;
6716 		break;
6717 	}
6718 
6719 	is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6720 			HLS_DN_DISABLE | HLS_DN_OFFLINE));
6721 
6722 	if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6723 	    ppd->neigh_link_down_reason.sma == 0) {
6724 		ppd->local_link_down_reason.sma =
6725 		  ppd->local_link_down_reason.latest;
6726 		ppd->neigh_link_down_reason.sma =
6727 		  ppd->neigh_link_down_reason.latest;
6728 	}
6729 
6730 	goto done;
6731 
6732 unexpected:
6733 	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6734 		__func__, link_state_name(ppd->host_link_state),
6735 		link_state_name(state));
6736 	ret = -EINVAL;
6737 
6738 done:
6739 	mutex_unlock(&ppd->hls_lock);
6740 
6741 	if (event.device)
6742 		ib_dispatch_event(&event);
6743 
6744 	return ret;
6745 }
6746 
hfi1_set_ib_cfg(struct hfi1_pportdata * ppd,int which,u32 val)6747 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6748 {
6749 	u64 reg;
6750 	int ret = 0;
6751 
6752 	switch (which) {
6753 	case HFI1_IB_CFG_LIDLMC:
6754 		set_lidlmc(ppd);
6755 		break;
6756 	case HFI1_IB_CFG_VL_HIGH_LIMIT:
6757 		/*
6758 		 * The VL Arbitrator high limit is sent in units of 4k
6759 		 * bytes, while HFI stores it in units of 64 bytes.
6760 		 */
6761 		val *= 4096/64;
6762 		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6763 			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6764 		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6765 		break;
6766 	case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6767 		/* HFI only supports POLL as the default link down state */
6768 		if (val != HLS_DN_POLL)
6769 			ret = -EINVAL;
6770 		break;
6771 	case HFI1_IB_CFG_OP_VLS:
6772 		if (ppd->vls_operational != val) {
6773 			ppd->vls_operational = val;
6774 			if (!ppd->port)
6775 				ret = -EINVAL;
6776 			else
6777 				ret = sdma_map_init(
6778 					ppd->dd,
6779 					ppd->port - 1,
6780 					val,
6781 					NULL);
6782 		}
6783 		break;
6784 	/*
6785 	 * For link width, link width downgrade, and speed enable, always AND
6786 	 * the setting with what is actually supported.  This has two benefits.
6787 	 * First, enabled can't have unsupported values, no matter what the
6788 	 * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6789 	 * "fill in with your supported value" have all the bits in the
6790 	 * field set, so simply ANDing with supported has the desired result.
6791 	 */
6792 	case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6793 		ppd->link_width_enabled = val & ppd->link_width_supported;
6794 		break;
6795 	case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6796 		ppd->link_width_downgrade_enabled =
6797 				val & ppd->link_width_downgrade_supported;
6798 		break;
6799 	case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6800 		ppd->link_speed_enabled = val & ppd->link_speed_supported;
6801 		break;
6802 	case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6803 		/*
6804 		 * HFI does not follow IB specs, save this value
6805 		 * so we can report it, if asked.
6806 		 */
6807 		ppd->overrun_threshold = val;
6808 		break;
6809 	case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6810 		/*
6811 		 * HFI does not follow IB specs, save this value
6812 		 * so we can report it, if asked.
6813 		 */
6814 		ppd->phy_error_threshold = val;
6815 		break;
6816 
6817 	case HFI1_IB_CFG_MTU:
6818 		set_send_length(ppd);
6819 		break;
6820 
6821 	case HFI1_IB_CFG_PKEYS:
6822 		if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6823 			set_partition_keys(ppd);
6824 		break;
6825 
6826 	default:
6827 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6828 			dd_dev_info(ppd->dd,
6829 			  "%s: which %s, val 0x%x: not implemented\n",
6830 			  __func__, ib_cfg_name(which), val);
6831 		break;
6832 	}
6833 	return ret;
6834 }
6835 
6836 /* begin functions related to vl arbitration table caching */
init_vl_arb_caches(struct hfi1_pportdata * ppd)6837 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6838 {
6839 	int i;
6840 
6841 	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6842 			VL_ARB_LOW_PRIO_TABLE_SIZE);
6843 	BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6844 			VL_ARB_HIGH_PRIO_TABLE_SIZE);
6845 
6846 	/*
6847 	 * Note that we always return values directly from the
6848 	 * 'vl_arb_cache' (and do no CSR reads) in response to a
6849 	 * 'Get(VLArbTable)'. This is obviously correct after a
6850 	 * 'Set(VLArbTable)', since the cache will then be up to
6851 	 * date. But it's also correct prior to any 'Set(VLArbTable)'
6852 	 * since then both the cache, and the relevant h/w registers
6853 	 * will be zeroed.
6854 	 */
6855 
6856 	for (i = 0; i < MAX_PRIO_TABLE; i++)
6857 		spin_lock_init(&ppd->vl_arb_cache[i].lock);
6858 }
6859 
6860 /*
6861  * vl_arb_lock_cache
6862  *
6863  * All other vl_arb_* functions should be called only after locking
6864  * the cache.
6865  */
6866 static inline struct vl_arb_cache *
vl_arb_lock_cache(struct hfi1_pportdata * ppd,int idx)6867 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6868 {
6869 	if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6870 		return NULL;
6871 	spin_lock(&ppd->vl_arb_cache[idx].lock);
6872 	return &ppd->vl_arb_cache[idx];
6873 }
6874 
vl_arb_unlock_cache(struct hfi1_pportdata * ppd,int idx)6875 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6876 {
6877 	spin_unlock(&ppd->vl_arb_cache[idx].lock);
6878 }
6879 
vl_arb_get_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6880 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6881 			     struct ib_vl_weight_elem *vl)
6882 {
6883 	memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6884 }
6885 
vl_arb_set_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6886 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6887 			     struct ib_vl_weight_elem *vl)
6888 {
6889 	memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6890 }
6891 
vl_arb_match_cache(struct vl_arb_cache * cache,struct ib_vl_weight_elem * vl)6892 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6893 			      struct ib_vl_weight_elem *vl)
6894 {
6895 	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6896 }
6897 /* end functions related to vl arbitration table caching */
6898 
set_vl_weights(struct hfi1_pportdata * ppd,u32 target,u32 size,struct ib_vl_weight_elem * vl)6899 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6900 			  u32 size, struct ib_vl_weight_elem *vl)
6901 {
6902 	struct hfi1_devdata *dd = ppd->dd;
6903 	u64 reg;
6904 	unsigned int i, is_up = 0;
6905 	int drain, ret = 0;
6906 
6907 	mutex_lock(&ppd->hls_lock);
6908 
6909 	if (ppd->host_link_state & HLS_UP)
6910 		is_up = 1;
6911 
6912 	drain = !is_ax(dd) && is_up;
6913 
6914 	if (drain)
6915 		/*
6916 		 * Before adjusting VL arbitration weights, empty per-VL
6917 		 * FIFOs, otherwise a packet whose VL weight is being
6918 		 * set to 0 could get stuck in a FIFO with no chance to
6919 		 * egress.
6920 		 */
6921 		ret = stop_drain_data_vls(dd);
6922 
6923 	if (ret) {
6924 		dd_dev_err(
6925 			dd,
6926 			"%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6927 			__func__);
6928 		goto err;
6929 	}
6930 
6931 	for (i = 0; i < size; i++, vl++) {
6932 		/*
6933 		 * NOTE: The low priority shift and mask are used here, but
6934 		 * they are the same for both the low and high registers.
6935 		 */
6936 		reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6937 				<< SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6938 		      | (((u64)vl->weight
6939 				& SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6940 				<< SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6941 		write_csr(dd, target + (i * 8), reg);
6942 	}
6943 	pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6944 
6945 	if (drain)
6946 		open_fill_data_vls(dd); /* reopen all VLs */
6947 
6948 err:
6949 	mutex_unlock(&ppd->hls_lock);
6950 
6951 	return ret;
6952 }
6953 
6954 /*
6955  * Read one credit merge VL register.
6956  */
read_one_cm_vl(struct hfi1_devdata * dd,u32 csr,struct vl_limit * vll)6957 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6958 			   struct vl_limit *vll)
6959 {
6960 	u64 reg = read_csr(dd, csr);
6961 
6962 	vll->dedicated = cpu_to_be16(
6963 		(reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6964 		& SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6965 	vll->shared = cpu_to_be16(
6966 		(reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6967 		& SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6968 }
6969 
6970 /*
6971  * Read the current credit merge limits.
6972  */
get_buffer_control(struct hfi1_devdata * dd,struct buffer_control * bc,u16 * overall_limit)6973 static int get_buffer_control(struct hfi1_devdata *dd,
6974 			      struct buffer_control *bc, u16 *overall_limit)
6975 {
6976 	u64 reg;
6977 	int i;
6978 
6979 	/* not all entries are filled in */
6980 	memset(bc, 0, sizeof(*bc));
6981 
6982 	/* OPA and HFI have a 1-1 mapping */
6983 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
6984 		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6985 
6986 	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6987 	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6988 
6989 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6990 	bc->overall_shared_limit = cpu_to_be16(
6991 		(reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6992 		& SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6993 	if (overall_limit)
6994 		*overall_limit = (reg
6995 			>> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6996 			& SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6997 	return sizeof(struct buffer_control);
6998 }
6999 
get_sc2vlnt(struct hfi1_devdata * dd,struct sc2vlnt * dp)7000 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7001 {
7002 	u64 reg;
7003 	int i;
7004 
7005 	/* each register contains 16 SC->VLnt mappings, 4 bits each */
7006 	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7007 	for (i = 0; i < sizeof(u64); i++) {
7008 		u8 byte = *(((u8 *)&reg) + i);
7009 
7010 		dp->vlnt[2 * i] = byte & 0xf;
7011 		dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7012 	}
7013 
7014 	reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7015 	for (i = 0; i < sizeof(u64); i++) {
7016 		u8 byte = *(((u8 *)&reg) + i);
7017 
7018 		dp->vlnt[16 + (2 * i)] = byte & 0xf;
7019 		dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7020 	}
7021 	return sizeof(struct sc2vlnt);
7022 }
7023 
get_vlarb_preempt(struct hfi1_devdata * dd,u32 nelems,struct ib_vl_weight_elem * vl)7024 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7025 			      struct ib_vl_weight_elem *vl)
7026 {
7027 	unsigned int i;
7028 
7029 	for (i = 0; i < nelems; i++, vl++) {
7030 		vl->vl = 0xf;
7031 		vl->weight = 0;
7032 	}
7033 }
7034 
set_sc2vlnt(struct hfi1_devdata * dd,struct sc2vlnt * dp)7035 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7036 {
7037 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7038 		DC_SC_VL_VAL(15_0,
7039 		0, dp->vlnt[0] & 0xf,
7040 		1, dp->vlnt[1] & 0xf,
7041 		2, dp->vlnt[2] & 0xf,
7042 		3, dp->vlnt[3] & 0xf,
7043 		4, dp->vlnt[4] & 0xf,
7044 		5, dp->vlnt[5] & 0xf,
7045 		6, dp->vlnt[6] & 0xf,
7046 		7, dp->vlnt[7] & 0xf,
7047 		8, dp->vlnt[8] & 0xf,
7048 		9, dp->vlnt[9] & 0xf,
7049 		10, dp->vlnt[10] & 0xf,
7050 		11, dp->vlnt[11] & 0xf,
7051 		12, dp->vlnt[12] & 0xf,
7052 		13, dp->vlnt[13] & 0xf,
7053 		14, dp->vlnt[14] & 0xf,
7054 		15, dp->vlnt[15] & 0xf));
7055 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7056 		DC_SC_VL_VAL(31_16,
7057 		16, dp->vlnt[16] & 0xf,
7058 		17, dp->vlnt[17] & 0xf,
7059 		18, dp->vlnt[18] & 0xf,
7060 		19, dp->vlnt[19] & 0xf,
7061 		20, dp->vlnt[20] & 0xf,
7062 		21, dp->vlnt[21] & 0xf,
7063 		22, dp->vlnt[22] & 0xf,
7064 		23, dp->vlnt[23] & 0xf,
7065 		24, dp->vlnt[24] & 0xf,
7066 		25, dp->vlnt[25] & 0xf,
7067 		26, dp->vlnt[26] & 0xf,
7068 		27, dp->vlnt[27] & 0xf,
7069 		28, dp->vlnt[28] & 0xf,
7070 		29, dp->vlnt[29] & 0xf,
7071 		30, dp->vlnt[30] & 0xf,
7072 		31, dp->vlnt[31] & 0xf));
7073 }
7074 
nonzero_msg(struct hfi1_devdata * dd,int idx,const char * what,u16 limit)7075 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7076 			u16 limit)
7077 {
7078 	if (limit != 0)
7079 		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7080 			what, (int)limit, idx);
7081 }
7082 
7083 /* change only the shared limit portion of SendCmGLobalCredit */
set_global_shared(struct hfi1_devdata * dd,u16 limit)7084 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7085 {
7086 	u64 reg;
7087 
7088 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7089 	reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7090 	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7091 	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7092 }
7093 
7094 /* change only the total credit limit portion of SendCmGLobalCredit */
set_global_limit(struct hfi1_devdata * dd,u16 limit)7095 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7096 {
7097 	u64 reg;
7098 
7099 	reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7100 	reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7101 	reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7102 	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7103 }
7104 
7105 /* set the given per-VL shared limit */
set_vl_shared(struct hfi1_devdata * dd,int vl,u16 limit)7106 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7107 {
7108 	u64 reg;
7109 	u32 addr;
7110 
7111 	if (vl < TXE_NUM_DATA_VL)
7112 		addr = SEND_CM_CREDIT_VL + (8 * vl);
7113 	else
7114 		addr = SEND_CM_CREDIT_VL15;
7115 
7116 	reg = read_csr(dd, addr);
7117 	reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7118 	reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7119 	write_csr(dd, addr, reg);
7120 }
7121 
7122 /* set the given per-VL dedicated limit */
set_vl_dedicated(struct hfi1_devdata * dd,int vl,u16 limit)7123 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7124 {
7125 	u64 reg;
7126 	u32 addr;
7127 
7128 	if (vl < TXE_NUM_DATA_VL)
7129 		addr = SEND_CM_CREDIT_VL + (8 * vl);
7130 	else
7131 		addr = SEND_CM_CREDIT_VL15;
7132 
7133 	reg = read_csr(dd, addr);
7134 	reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7135 	reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7136 	write_csr(dd, addr, reg);
7137 }
7138 
7139 /* spin until the given per-VL status mask bits clear */
wait_for_vl_status_clear(struct hfi1_devdata * dd,u64 mask,const char * which)7140 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7141 				     const char *which)
7142 {
7143 	unsigned long timeout;
7144 	u64 reg;
7145 
7146 	timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7147 	while (1) {
7148 		reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7149 
7150 		if (reg == 0)
7151 			return;	/* success */
7152 		if (time_after(jiffies, timeout))
7153 			break;		/* timed out */
7154 		udelay(1);
7155 	}
7156 
7157 	dd_dev_err(dd,
7158 		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7159 		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7160 	/*
7161 	 * If this occurs, it is likely there was a credit loss on the link.
7162 	 * The only recovery from that is a link bounce.
7163 	 */
7164 	dd_dev_err(dd,
7165 		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7166 }
7167 
7168 /*
7169  * The number of credits on the VLs may be changed while everything
7170  * is "live", but the following algorithm must be followed due to
7171  * how the hardware is actually implemented.  In particular,
7172  * Return_Credit_Status[] is the only correct status check.
7173  *
7174  * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7175  *     set Global_Shared_Credit_Limit = 0
7176  *     use_all_vl = 1
7177  * mask0 = all VLs that are changing either dedicated or shared limits
7178  * set Shared_Limit[mask0] = 0
7179  * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7180  * if (changing any dedicated limit)
7181  *     mask1 = all VLs that are lowering dedicated limits
7182  *     lower Dedicated_Limit[mask1]
7183  *     spin until Return_Credit_Status[mask1] == 0
7184  *     raise Dedicated_Limits
7185  * raise Shared_Limits
7186  * raise Global_Shared_Credit_Limit
7187  *
7188  * lower = if the new limit is lower, set the limit to the new value
7189  * raise = if the new limit is higher than the current value (may be changed
7190  *	earlier in the algorithm), set the new limit to the new value
7191  */
set_buffer_control(struct hfi1_devdata * dd,struct buffer_control * new_bc)7192 static int set_buffer_control(struct hfi1_devdata *dd,
7193 			      struct buffer_control *new_bc)
7194 {
7195 	u64 changing_mask, ld_mask, stat_mask;
7196 	int change_count;
7197 	int i, use_all_mask;
7198 	int this_shared_changing;
7199 	/*
7200 	 * A0: add the variable any_shared_limit_changing below and in the
7201 	 * algorithm above.  If removing A0 support, it can be removed.
7202 	 */
7203 	int any_shared_limit_changing;
7204 	struct buffer_control cur_bc;
7205 	u8 changing[OPA_MAX_VLS];
7206 	u8 lowering_dedicated[OPA_MAX_VLS];
7207 	u16 cur_total;
7208 	u32 new_total = 0;
7209 	const u64 all_mask =
7210 	SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7211 	 | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7212 	 | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7213 	 | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7214 	 | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7215 	 | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7216 	 | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7217 	 | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7218 	 | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7219 
7220 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7221 #define NUM_USABLE_VLS 16	/* look at VL15 and less */
7222 
7223 
7224 	/* find the new total credits, do sanity check on unused VLs */
7225 	for (i = 0; i < OPA_MAX_VLS; i++) {
7226 		if (valid_vl(i)) {
7227 			new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7228 			continue;
7229 		}
7230 		nonzero_msg(dd, i, "dedicated",
7231 			be16_to_cpu(new_bc->vl[i].dedicated));
7232 		nonzero_msg(dd, i, "shared",
7233 			be16_to_cpu(new_bc->vl[i].shared));
7234 		new_bc->vl[i].dedicated = 0;
7235 		new_bc->vl[i].shared = 0;
7236 	}
7237 	new_total += be16_to_cpu(new_bc->overall_shared_limit);
7238 	if (new_total > (u32)dd->link_credits)
7239 		return -EINVAL;
7240 	/* fetch the current values */
7241 	get_buffer_control(dd, &cur_bc, &cur_total);
7242 
7243 	/*
7244 	 * Create the masks we will use.
7245 	 */
7246 	memset(changing, 0, sizeof(changing));
7247 	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7248 	/* NOTE: Assumes that the individual VL bits are adjacent and in
7249 	   increasing order */
7250 	stat_mask =
7251 		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7252 	changing_mask = 0;
7253 	ld_mask = 0;
7254 	change_count = 0;
7255 	any_shared_limit_changing = 0;
7256 	for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7257 		if (!valid_vl(i))
7258 			continue;
7259 		this_shared_changing = new_bc->vl[i].shared
7260 						!= cur_bc.vl[i].shared;
7261 		if (this_shared_changing)
7262 			any_shared_limit_changing = 1;
7263 		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7264 				|| this_shared_changing) {
7265 			changing[i] = 1;
7266 			changing_mask |= stat_mask;
7267 			change_count++;
7268 		}
7269 		if (be16_to_cpu(new_bc->vl[i].dedicated) <
7270 					be16_to_cpu(cur_bc.vl[i].dedicated)) {
7271 			lowering_dedicated[i] = 1;
7272 			ld_mask |= stat_mask;
7273 		}
7274 	}
7275 
7276 	/* bracket the credit change with a total adjustment */
7277 	if (new_total > cur_total)
7278 		set_global_limit(dd, new_total);
7279 
7280 	/*
7281 	 * Start the credit change algorithm.
7282 	 */
7283 	use_all_mask = 0;
7284 	if ((be16_to_cpu(new_bc->overall_shared_limit) <
7285 				be16_to_cpu(cur_bc.overall_shared_limit))
7286 			|| (is_a0(dd) && any_shared_limit_changing)) {
7287 		set_global_shared(dd, 0);
7288 		cur_bc.overall_shared_limit = 0;
7289 		use_all_mask = 1;
7290 	}
7291 
7292 	for (i = 0; i < NUM_USABLE_VLS; i++) {
7293 		if (!valid_vl(i))
7294 			continue;
7295 
7296 		if (changing[i]) {
7297 			set_vl_shared(dd, i, 0);
7298 			cur_bc.vl[i].shared = 0;
7299 		}
7300 	}
7301 
7302 	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7303 		"shared");
7304 
7305 	if (change_count > 0) {
7306 		for (i = 0; i < NUM_USABLE_VLS; i++) {
7307 			if (!valid_vl(i))
7308 				continue;
7309 
7310 			if (lowering_dedicated[i]) {
7311 				set_vl_dedicated(dd, i,
7312 					be16_to_cpu(new_bc->vl[i].dedicated));
7313 				cur_bc.vl[i].dedicated =
7314 						new_bc->vl[i].dedicated;
7315 			}
7316 		}
7317 
7318 		wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7319 
7320 		/* now raise all dedicated that are going up */
7321 		for (i = 0; i < NUM_USABLE_VLS; i++) {
7322 			if (!valid_vl(i))
7323 				continue;
7324 
7325 			if (be16_to_cpu(new_bc->vl[i].dedicated) >
7326 					be16_to_cpu(cur_bc.vl[i].dedicated))
7327 				set_vl_dedicated(dd, i,
7328 					be16_to_cpu(new_bc->vl[i].dedicated));
7329 		}
7330 	}
7331 
7332 	/* next raise all shared that are going up */
7333 	for (i = 0; i < NUM_USABLE_VLS; i++) {
7334 		if (!valid_vl(i))
7335 			continue;
7336 
7337 		if (be16_to_cpu(new_bc->vl[i].shared) >
7338 				be16_to_cpu(cur_bc.vl[i].shared))
7339 			set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7340 	}
7341 
7342 	/* finally raise the global shared */
7343 	if (be16_to_cpu(new_bc->overall_shared_limit) >
7344 			be16_to_cpu(cur_bc.overall_shared_limit))
7345 		set_global_shared(dd,
7346 			be16_to_cpu(new_bc->overall_shared_limit));
7347 
7348 	/* bracket the credit change with a total adjustment */
7349 	if (new_total < cur_total)
7350 		set_global_limit(dd, new_total);
7351 	return 0;
7352 }
7353 
7354 /*
7355  * Read the given fabric manager table. Return the size of the
7356  * table (in bytes) on success, and a negative error code on
7357  * failure.
7358  */
fm_get_table(struct hfi1_pportdata * ppd,int which,void * t)7359 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7360 
7361 {
7362 	int size;
7363 	struct vl_arb_cache *vlc;
7364 
7365 	switch (which) {
7366 	case FM_TBL_VL_HIGH_ARB:
7367 		size = 256;
7368 		/*
7369 		 * OPA specifies 128 elements (of 2 bytes each), though
7370 		 * HFI supports only 16 elements in h/w.
7371 		 */
7372 		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7373 		vl_arb_get_cache(vlc, t);
7374 		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7375 		break;
7376 	case FM_TBL_VL_LOW_ARB:
7377 		size = 256;
7378 		/*
7379 		 * OPA specifies 128 elements (of 2 bytes each), though
7380 		 * HFI supports only 16 elements in h/w.
7381 		 */
7382 		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7383 		vl_arb_get_cache(vlc, t);
7384 		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7385 		break;
7386 	case FM_TBL_BUFFER_CONTROL:
7387 		size = get_buffer_control(ppd->dd, t, NULL);
7388 		break;
7389 	case FM_TBL_SC2VLNT:
7390 		size = get_sc2vlnt(ppd->dd, t);
7391 		break;
7392 	case FM_TBL_VL_PREEMPT_ELEMS:
7393 		size = 256;
7394 		/* OPA specifies 128 elements, of 2 bytes each */
7395 		get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7396 		break;
7397 	case FM_TBL_VL_PREEMPT_MATRIX:
7398 		size = 256;
7399 		/*
7400 		 * OPA specifies that this is the same size as the VL
7401 		 * arbitration tables (i.e., 256 bytes).
7402 		 */
7403 		break;
7404 	default:
7405 		return -EINVAL;
7406 	}
7407 	return size;
7408 }
7409 
7410 /*
7411  * Write the given fabric manager table.
7412  */
fm_set_table(struct hfi1_pportdata * ppd,int which,void * t)7413 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7414 {
7415 	int ret = 0;
7416 	struct vl_arb_cache *vlc;
7417 
7418 	switch (which) {
7419 	case FM_TBL_VL_HIGH_ARB:
7420 		vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7421 		if (vl_arb_match_cache(vlc, t)) {
7422 			vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7423 			break;
7424 		}
7425 		vl_arb_set_cache(vlc, t);
7426 		vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7427 		ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7428 				     VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7429 		break;
7430 	case FM_TBL_VL_LOW_ARB:
7431 		vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7432 		if (vl_arb_match_cache(vlc, t)) {
7433 			vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7434 			break;
7435 		}
7436 		vl_arb_set_cache(vlc, t);
7437 		vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7438 		ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7439 				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7440 		break;
7441 	case FM_TBL_BUFFER_CONTROL:
7442 		ret = set_buffer_control(ppd->dd, t);
7443 		break;
7444 	case FM_TBL_SC2VLNT:
7445 		set_sc2vlnt(ppd->dd, t);
7446 		break;
7447 	default:
7448 		ret = -EINVAL;
7449 	}
7450 	return ret;
7451 }
7452 
7453 /*
7454  * Disable all data VLs.
7455  *
7456  * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7457  */
disable_data_vls(struct hfi1_devdata * dd)7458 static int disable_data_vls(struct hfi1_devdata *dd)
7459 {
7460 	if (is_a0(dd))
7461 		return 1;
7462 
7463 	pio_send_control(dd, PSC_DATA_VL_DISABLE);
7464 
7465 	return 0;
7466 }
7467 
7468 /*
7469  * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7470  * Just re-enables all data VLs (the "fill" part happens
7471  * automatically - the name was chosen for symmetry with
7472  * stop_drain_data_vls()).
7473  *
7474  * Return 0 if successful, non-zero if the VLs cannot be enabled.
7475  */
open_fill_data_vls(struct hfi1_devdata * dd)7476 int open_fill_data_vls(struct hfi1_devdata *dd)
7477 {
7478 	if (is_a0(dd))
7479 		return 1;
7480 
7481 	pio_send_control(dd, PSC_DATA_VL_ENABLE);
7482 
7483 	return 0;
7484 }
7485 
7486 /*
7487  * drain_data_vls() - assumes that disable_data_vls() has been called,
7488  * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7489  * engines to drop to 0.
7490  */
drain_data_vls(struct hfi1_devdata * dd)7491 static void drain_data_vls(struct hfi1_devdata *dd)
7492 {
7493 	sc_wait(dd);
7494 	sdma_wait(dd);
7495 	pause_for_credit_return(dd);
7496 }
7497 
7498 /*
7499  * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7500  *
7501  * Use open_fill_data_vls() to resume using data VLs.  This pair is
7502  * meant to be used like this:
7503  *
7504  * stop_drain_data_vls(dd);
7505  * // do things with per-VL resources
7506  * open_fill_data_vls(dd);
7507  */
stop_drain_data_vls(struct hfi1_devdata * dd)7508 int stop_drain_data_vls(struct hfi1_devdata *dd)
7509 {
7510 	int ret;
7511 
7512 	ret = disable_data_vls(dd);
7513 	if (ret == 0)
7514 		drain_data_vls(dd);
7515 
7516 	return ret;
7517 }
7518 
7519 /*
7520  * Convert a nanosecond time to a cclock count.  No matter how slow
7521  * the cclock, a non-zero ns will always have a non-zero result.
7522  */
ns_to_cclock(struct hfi1_devdata * dd,u32 ns)7523 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7524 {
7525 	u32 cclocks;
7526 
7527 	if (dd->icode == ICODE_FPGA_EMULATION)
7528 		cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7529 	else  /* simulation pretends to be ASIC */
7530 		cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7531 	if (ns && !cclocks)	/* if ns nonzero, must be at least 1 */
7532 		cclocks = 1;
7533 	return cclocks;
7534 }
7535 
7536 /*
7537  * Convert a cclock count to nanoseconds. Not matter how slow
7538  * the cclock, a non-zero cclocks will always have a non-zero result.
7539  */
cclock_to_ns(struct hfi1_devdata * dd,u32 cclocks)7540 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7541 {
7542 	u32 ns;
7543 
7544 	if (dd->icode == ICODE_FPGA_EMULATION)
7545 		ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7546 	else  /* simulation pretends to be ASIC */
7547 		ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7548 	if (cclocks && !ns)
7549 		ns = 1;
7550 	return ns;
7551 }
7552 
7553 /*
7554  * Dynamically adjust the receive interrupt timeout for a context based on
7555  * incoming packet rate.
7556  *
7557  * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7558  */
adjust_rcv_timeout(struct hfi1_ctxtdata * rcd,u32 npkts)7559 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7560 {
7561 	struct hfi1_devdata *dd = rcd->dd;
7562 	u32 timeout = rcd->rcvavail_timeout;
7563 
7564 	/*
7565 	 * This algorithm doubles or halves the timeout depending on whether
7566 	 * the number of packets received in this interrupt were less than or
7567 	 * greater equal the interrupt count.
7568 	 *
7569 	 * The calculations below do not allow a steady state to be achieved.
7570 	 * Only at the endpoints it is possible to have an unchanging
7571 	 * timeout.
7572 	 */
7573 	if (npkts < rcv_intr_count) {
7574 		/*
7575 		 * Not enough packets arrived before the timeout, adjust
7576 		 * timeout downward.
7577 		 */
7578 		if (timeout < 2) /* already at minimum? */
7579 			return;
7580 		timeout >>= 1;
7581 	} else {
7582 		/*
7583 		 * More than enough packets arrived before the timeout, adjust
7584 		 * timeout upward.
7585 		 */
7586 		if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7587 			return;
7588 		timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7589 	}
7590 
7591 	rcd->rcvavail_timeout = timeout;
7592 	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
7593 	   been verified to be in range */
7594 	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7595 		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7596 }
7597 
update_usrhead(struct hfi1_ctxtdata * rcd,u32 hd,u32 updegr,u32 egrhd,u32 intr_adjust,u32 npkts)7598 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7599 		    u32 intr_adjust, u32 npkts)
7600 {
7601 	struct hfi1_devdata *dd = rcd->dd;
7602 	u64 reg;
7603 	u32 ctxt = rcd->ctxt;
7604 
7605 	/*
7606 	 * Need to write timeout register before updating RcvHdrHead to ensure
7607 	 * that a new value is used when the HW decides to restart counting.
7608 	 */
7609 	if (intr_adjust)
7610 		adjust_rcv_timeout(rcd, npkts);
7611 	if (updegr) {
7612 		reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7613 			<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7614 		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7615 	}
7616 	mmiowb();
7617 	reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7618 		(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7619 			<< RCV_HDR_HEAD_HEAD_SHIFT);
7620 	write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7621 	mmiowb();
7622 }
7623 
hdrqempty(struct hfi1_ctxtdata * rcd)7624 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7625 {
7626 	u32 head, tail;
7627 
7628 	head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7629 		& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7630 
7631 	if (rcd->rcvhdrtail_kvaddr)
7632 		tail = get_rcvhdrtail(rcd);
7633 	else
7634 		tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7635 
7636 	return head == tail;
7637 }
7638 
7639 /*
7640  * Context Control and Receive Array encoding for buffer size:
7641  *	0x0 invalid
7642  *	0x1   4 KB
7643  *	0x2   8 KB
7644  *	0x3  16 KB
7645  *	0x4  32 KB
7646  *	0x5  64 KB
7647  *	0x6 128 KB
7648  *	0x7 256 KB
7649  *	0x8 512 KB (Receive Array only)
7650  *	0x9   1 MB (Receive Array only)
7651  *	0xa   2 MB (Receive Array only)
7652  *
7653  *	0xB-0xF - reserved (Receive Array only)
7654  *
7655  *
7656  * This routine assumes that the value has already been sanity checked.
7657  */
encoded_size(u32 size)7658 static u32 encoded_size(u32 size)
7659 {
7660 	switch (size) {
7661 	case   4*1024: return 0x1;
7662 	case   8*1024: return 0x2;
7663 	case  16*1024: return 0x3;
7664 	case  32*1024: return 0x4;
7665 	case  64*1024: return 0x5;
7666 	case 128*1024: return 0x6;
7667 	case 256*1024: return 0x7;
7668 	case 512*1024: return 0x8;
7669 	case   1*1024*1024: return 0x9;
7670 	case   2*1024*1024: return 0xa;
7671 	}
7672 	return 0x1;	/* if invalid, go with the minimum size */
7673 }
7674 
hfi1_rcvctrl(struct hfi1_devdata * dd,unsigned int op,int ctxt)7675 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7676 {
7677 	struct hfi1_ctxtdata *rcd;
7678 	u64 rcvctrl, reg;
7679 	int did_enable = 0;
7680 
7681 	rcd = dd->rcd[ctxt];
7682 	if (!rcd)
7683 		return;
7684 
7685 	hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7686 
7687 	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7688 	/* if the context already enabled, don't do the extra steps */
7689 	if ((op & HFI1_RCVCTRL_CTXT_ENB)
7690 			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7691 		/* reset the tail and hdr addresses, and sequence count */
7692 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7693 				rcd->rcvhdrq_phys);
7694 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7695 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7696 					rcd->rcvhdrqtailaddr_phys);
7697 		rcd->seq_cnt = 1;
7698 
7699 		/* reset the cached receive header queue head value */
7700 		rcd->head = 0;
7701 
7702 		/*
7703 		 * Zero the receive header queue so we don't get false
7704 		 * positives when checking the sequence number.  The
7705 		 * sequence numbers could land exactly on the same spot.
7706 		 * E.g. a rcd restart before the receive header wrapped.
7707 		 */
7708 		memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7709 
7710 		/* starting timeout */
7711 		rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7712 
7713 		/* enable the context */
7714 		rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7715 
7716 		/* clean the egr buffer size first */
7717 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7718 		rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7719 				& RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7720 					<< RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7721 
7722 		/* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7723 		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7724 		did_enable = 1;
7725 
7726 		/* zero RcvEgrIndexHead */
7727 		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7728 
7729 		/* set eager count and base index */
7730 		reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7731 			& RCV_EGR_CTRL_EGR_CNT_MASK)
7732 		       << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7733 			(((rcd->eager_base >> RCV_SHIFT)
7734 			  & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7735 			 << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7736 		write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7737 
7738 		/*
7739 		 * Set TID (expected) count and base index.
7740 		 * rcd->expected_count is set to individual RcvArray entries,
7741 		 * not pairs, and the CSR takes a pair-count in groups of
7742 		 * four, so divide by 8.
7743 		 */
7744 		reg = (((rcd->expected_count >> RCV_SHIFT)
7745 					& RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7746 				<< RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7747 		      (((rcd->expected_base >> RCV_SHIFT)
7748 					& RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7749 				<< RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7750 		write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7751 		if (ctxt == VL15CTXT)
7752 			write_csr(dd, RCV_VL15, VL15CTXT);
7753 	}
7754 	if (op & HFI1_RCVCTRL_CTXT_DIS) {
7755 		write_csr(dd, RCV_VL15, 0);
7756 		rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7757 	}
7758 	if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7759 		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7760 	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7761 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7762 	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7763 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7764 	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7765 		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7766 	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7767 		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7768 	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7769 		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7770 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7771 		/* In one-packet-per-eager mode, the size comes from
7772 		   the RcvArray entry. */
7773 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7774 		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7775 	}
7776 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7777 		rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7778 	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7779 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7780 	if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7781 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7782 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7783 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7784 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7785 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7786 	rcd->rcvctrl = rcvctrl;
7787 	hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7788 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7789 
7790 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
7791 	if (did_enable
7792 	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7793 		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7794 		if (reg != 0) {
7795 			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7796 				ctxt, reg);
7797 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7798 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7799 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7800 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7801 			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7802 			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7803 				ctxt, reg, reg == 0 ? "not" : "still");
7804 		}
7805 	}
7806 
7807 	if (did_enable) {
7808 		/*
7809 		 * The interrupt timeout and count must be set after
7810 		 * the context is enabled to take effect.
7811 		 */
7812 		/* set interrupt timeout */
7813 		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7814 			(u64)rcd->rcvavail_timeout <<
7815 				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7816 
7817 		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7818 		reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7819 		write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7820 	}
7821 
7822 	if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7823 		/*
7824 		 * If the context has been disabled and the Tail Update has
7825 		 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7826 		 * it doesn't contain an address that is invalid.
7827 		 */
7828 		write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7829 }
7830 
hfi1_read_cntrs(struct hfi1_devdata * dd,loff_t pos,char ** namep,u64 ** cntrp)7831 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7832 		    u64 **cntrp)
7833 {
7834 	int ret;
7835 	u64 val = 0;
7836 
7837 	if (namep) {
7838 		ret = dd->cntrnameslen;
7839 		if (pos != 0) {
7840 			dd_dev_err(dd, "read_cntrs does not support indexing");
7841 			return 0;
7842 		}
7843 		*namep = dd->cntrnames;
7844 	} else {
7845 		const struct cntr_entry *entry;
7846 		int i, j;
7847 
7848 		ret = (dd->ndevcntrs) * sizeof(u64);
7849 		if (pos != 0) {
7850 			dd_dev_err(dd, "read_cntrs does not support indexing");
7851 			return 0;
7852 		}
7853 
7854 		/* Get the start of the block of counters */
7855 		*cntrp = dd->cntrs;
7856 
7857 		/*
7858 		 * Now go and fill in each counter in the block.
7859 		 */
7860 		for (i = 0; i < DEV_CNTR_LAST; i++) {
7861 			entry = &dev_cntrs[i];
7862 			hfi1_cdbg(CNTR, "reading %s", entry->name);
7863 			if (entry->flags & CNTR_DISABLED) {
7864 				/* Nothing */
7865 				hfi1_cdbg(CNTR, "\tDisabled\n");
7866 			} else {
7867 				if (entry->flags & CNTR_VL) {
7868 					hfi1_cdbg(CNTR, "\tPer VL\n");
7869 					for (j = 0; j < C_VL_COUNT; j++) {
7870 						val = entry->rw_cntr(entry,
7871 								  dd, j,
7872 								  CNTR_MODE_R,
7873 								  0);
7874 						hfi1_cdbg(
7875 						   CNTR,
7876 						   "\t\tRead 0x%llx for %d\n",
7877 						   val, j);
7878 						dd->cntrs[entry->offset + j] =
7879 									    val;
7880 					}
7881 				} else {
7882 					val = entry->rw_cntr(entry, dd,
7883 							CNTR_INVALID_VL,
7884 							CNTR_MODE_R, 0);
7885 					dd->cntrs[entry->offset] = val;
7886 					hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7887 				}
7888 			}
7889 		}
7890 	}
7891 	return ret;
7892 }
7893 
7894 /*
7895  * Used by sysfs to create files for hfi stats to read
7896  */
hfi1_read_portcntrs(struct hfi1_devdata * dd,loff_t pos,u32 port,char ** namep,u64 ** cntrp)7897 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7898 			char **namep, u64 **cntrp)
7899 {
7900 	int ret;
7901 	u64 val = 0;
7902 
7903 	if (namep) {
7904 		ret = dd->portcntrnameslen;
7905 		if (pos != 0) {
7906 			dd_dev_err(dd, "index not supported");
7907 			return 0;
7908 		}
7909 		*namep = dd->portcntrnames;
7910 	} else {
7911 		const struct cntr_entry *entry;
7912 		struct hfi1_pportdata *ppd;
7913 		int i, j;
7914 
7915 		ret = (dd->nportcntrs) * sizeof(u64);
7916 		if (pos != 0) {
7917 			dd_dev_err(dd, "indexing not supported");
7918 			return 0;
7919 		}
7920 		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7921 		*cntrp = ppd->cntrs;
7922 
7923 		for (i = 0; i < PORT_CNTR_LAST; i++) {
7924 			entry = &port_cntrs[i];
7925 			hfi1_cdbg(CNTR, "reading %s", entry->name);
7926 			if (entry->flags & CNTR_DISABLED) {
7927 				/* Nothing */
7928 				hfi1_cdbg(CNTR, "\tDisabled\n");
7929 				continue;
7930 			}
7931 
7932 			if (entry->flags & CNTR_VL) {
7933 				hfi1_cdbg(CNTR, "\tPer VL");
7934 				for (j = 0; j < C_VL_COUNT; j++) {
7935 					val = entry->rw_cntr(entry, ppd, j,
7936 							       CNTR_MODE_R,
7937 							       0);
7938 					hfi1_cdbg(
7939 					   CNTR,
7940 					   "\t\tRead 0x%llx for %d",
7941 					   val, j);
7942 					ppd->cntrs[entry->offset + j] = val;
7943 				}
7944 			} else {
7945 				val = entry->rw_cntr(entry, ppd,
7946 						       CNTR_INVALID_VL,
7947 						       CNTR_MODE_R,
7948 						       0);
7949 				ppd->cntrs[entry->offset] = val;
7950 				hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7951 			}
7952 		}
7953 	}
7954 	return ret;
7955 }
7956 
free_cntrs(struct hfi1_devdata * dd)7957 static void free_cntrs(struct hfi1_devdata *dd)
7958 {
7959 	struct hfi1_pportdata *ppd;
7960 	int i;
7961 
7962 	if (dd->synth_stats_timer.data)
7963 		del_timer_sync(&dd->synth_stats_timer);
7964 	dd->synth_stats_timer.data = 0;
7965 	ppd = (struct hfi1_pportdata *)(dd + 1);
7966 	for (i = 0; i < dd->num_pports; i++, ppd++) {
7967 		kfree(ppd->cntrs);
7968 		kfree(ppd->scntrs);
7969 		free_percpu(ppd->ibport_data.rc_acks);
7970 		free_percpu(ppd->ibport_data.rc_qacks);
7971 		free_percpu(ppd->ibport_data.rc_delayed_comp);
7972 		ppd->cntrs = NULL;
7973 		ppd->scntrs = NULL;
7974 		ppd->ibport_data.rc_acks = NULL;
7975 		ppd->ibport_data.rc_qacks = NULL;
7976 		ppd->ibport_data.rc_delayed_comp = NULL;
7977 	}
7978 	kfree(dd->portcntrnames);
7979 	dd->portcntrnames = NULL;
7980 	kfree(dd->cntrs);
7981 	dd->cntrs = NULL;
7982 	kfree(dd->scntrs);
7983 	dd->scntrs = NULL;
7984 	kfree(dd->cntrnames);
7985 	dd->cntrnames = NULL;
7986 }
7987 
7988 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7989 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7990 
read_dev_port_cntr(struct hfi1_devdata * dd,struct cntr_entry * entry,u64 * psval,void * context,int vl)7991 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7992 			      u64 *psval, void *context, int vl)
7993 {
7994 	u64 val;
7995 	u64 sval = *psval;
7996 
7997 	if (entry->flags & CNTR_DISABLED) {
7998 		dd_dev_err(dd, "Counter %s not enabled", entry->name);
7999 		return 0;
8000 	}
8001 
8002 	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8003 
8004 	val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8005 
8006 	/* If its a synthetic counter there is more work we need to do */
8007 	if (entry->flags & CNTR_SYNTH) {
8008 		if (sval == CNTR_MAX) {
8009 			/* No need to read already saturated */
8010 			return CNTR_MAX;
8011 		}
8012 
8013 		if (entry->flags & CNTR_32BIT) {
8014 			/* 32bit counters can wrap multiple times */
8015 			u64 upper = sval >> 32;
8016 			u64 lower = (sval << 32) >> 32;
8017 
8018 			if (lower > val) { /* hw wrapped */
8019 				if (upper == CNTR_32BIT_MAX)
8020 					val = CNTR_MAX;
8021 				else
8022 					upper++;
8023 			}
8024 
8025 			if (val != CNTR_MAX)
8026 				val = (upper << 32) | val;
8027 
8028 		} else {
8029 			/* If we rolled we are saturated */
8030 			if ((val < sval) || (val > CNTR_MAX))
8031 				val = CNTR_MAX;
8032 		}
8033 	}
8034 
8035 	*psval = val;
8036 
8037 	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8038 
8039 	return val;
8040 }
8041 
write_dev_port_cntr(struct hfi1_devdata * dd,struct cntr_entry * entry,u64 * psval,void * context,int vl,u64 data)8042 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8043 			       struct cntr_entry *entry,
8044 			       u64 *psval, void *context, int vl, u64 data)
8045 {
8046 	u64 val;
8047 
8048 	if (entry->flags & CNTR_DISABLED) {
8049 		dd_dev_err(dd, "Counter %s not enabled", entry->name);
8050 		return 0;
8051 	}
8052 
8053 	hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8054 
8055 	if (entry->flags & CNTR_SYNTH) {
8056 		*psval = data;
8057 		if (entry->flags & CNTR_32BIT) {
8058 			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8059 					     (data << 32) >> 32);
8060 			val = data; /* return the full 64bit value */
8061 		} else {
8062 			val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8063 					     data);
8064 		}
8065 	} else {
8066 		val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8067 	}
8068 
8069 	*psval = val;
8070 
8071 	hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8072 
8073 	return val;
8074 }
8075 
read_dev_cntr(struct hfi1_devdata * dd,int index,int vl)8076 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8077 {
8078 	struct cntr_entry *entry;
8079 	u64 *sval;
8080 
8081 	entry = &dev_cntrs[index];
8082 	sval = dd->scntrs + entry->offset;
8083 
8084 	if (vl != CNTR_INVALID_VL)
8085 		sval += vl;
8086 
8087 	return read_dev_port_cntr(dd, entry, sval, dd, vl);
8088 }
8089 
write_dev_cntr(struct hfi1_devdata * dd,int index,int vl,u64 data)8090 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8091 {
8092 	struct cntr_entry *entry;
8093 	u64 *sval;
8094 
8095 	entry = &dev_cntrs[index];
8096 	sval = dd->scntrs + entry->offset;
8097 
8098 	if (vl != CNTR_INVALID_VL)
8099 		sval += vl;
8100 
8101 	return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8102 }
8103 
read_port_cntr(struct hfi1_pportdata * ppd,int index,int vl)8104 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8105 {
8106 	struct cntr_entry *entry;
8107 	u64 *sval;
8108 
8109 	entry = &port_cntrs[index];
8110 	sval = ppd->scntrs + entry->offset;
8111 
8112 	if (vl != CNTR_INVALID_VL)
8113 		sval += vl;
8114 
8115 	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8116 	    (index <= C_RCV_HDR_OVF_LAST)) {
8117 		/* We do not want to bother for disabled contexts */
8118 		return 0;
8119 	}
8120 
8121 	return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8122 }
8123 
write_port_cntr(struct hfi1_pportdata * ppd,int index,int vl,u64 data)8124 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8125 {
8126 	struct cntr_entry *entry;
8127 	u64 *sval;
8128 
8129 	entry = &port_cntrs[index];
8130 	sval = ppd->scntrs + entry->offset;
8131 
8132 	if (vl != CNTR_INVALID_VL)
8133 		sval += vl;
8134 
8135 	if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8136 	    (index <= C_RCV_HDR_OVF_LAST)) {
8137 		/* We do not want to bother for disabled contexts */
8138 		return 0;
8139 	}
8140 
8141 	return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8142 }
8143 
update_synth_timer(unsigned long opaque)8144 static void update_synth_timer(unsigned long opaque)
8145 {
8146 	u64 cur_tx;
8147 	u64 cur_rx;
8148 	u64 total_flits;
8149 	u8 update = 0;
8150 	int i, j, vl;
8151 	struct hfi1_pportdata *ppd;
8152 	struct cntr_entry *entry;
8153 
8154 	struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8155 
8156 	/*
8157 	 * Rather than keep beating on the CSRs pick a minimal set that we can
8158 	 * check to watch for potential roll over. We can do this by looking at
8159 	 * the number of flits sent/recv. If the total flits exceeds 32bits then
8160 	 * we have to iterate all the counters and update.
8161 	 */
8162 	entry = &dev_cntrs[C_DC_RCV_FLITS];
8163 	cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8164 
8165 	entry = &dev_cntrs[C_DC_XMIT_FLITS];
8166 	cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8167 
8168 	hfi1_cdbg(
8169 	    CNTR,
8170 	    "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8171 	    dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8172 
8173 	if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8174 		/*
8175 		 * May not be strictly necessary to update but it won't hurt and
8176 		 * simplifies the logic here.
8177 		 */
8178 		update = 1;
8179 		hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8180 			  dd->unit);
8181 	} else {
8182 		total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8183 		hfi1_cdbg(CNTR,
8184 			  "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8185 			  total_flits, (u64)CNTR_32BIT_MAX);
8186 		if (total_flits >= CNTR_32BIT_MAX) {
8187 			hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8188 				  dd->unit);
8189 			update = 1;
8190 		}
8191 	}
8192 
8193 	if (update) {
8194 		hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8195 		for (i = 0; i < DEV_CNTR_LAST; i++) {
8196 			entry = &dev_cntrs[i];
8197 			if (entry->flags & CNTR_VL) {
8198 				for (vl = 0; vl < C_VL_COUNT; vl++)
8199 					read_dev_cntr(dd, i, vl);
8200 			} else {
8201 				read_dev_cntr(dd, i, CNTR_INVALID_VL);
8202 			}
8203 		}
8204 		ppd = (struct hfi1_pportdata *)(dd + 1);
8205 		for (i = 0; i < dd->num_pports; i++, ppd++) {
8206 			for (j = 0; j < PORT_CNTR_LAST; j++) {
8207 				entry = &port_cntrs[j];
8208 				if (entry->flags & CNTR_VL) {
8209 					for (vl = 0; vl < C_VL_COUNT; vl++)
8210 						read_port_cntr(ppd, j, vl);
8211 				} else {
8212 					read_port_cntr(ppd, j, CNTR_INVALID_VL);
8213 				}
8214 			}
8215 		}
8216 
8217 		/*
8218 		 * We want the value in the register. The goal is to keep track
8219 		 * of the number of "ticks" not the counter value. In other
8220 		 * words if the register rolls we want to notice it and go ahead
8221 		 * and force an update.
8222 		 */
8223 		entry = &dev_cntrs[C_DC_XMIT_FLITS];
8224 		dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8225 						CNTR_MODE_R, 0);
8226 
8227 		entry = &dev_cntrs[C_DC_RCV_FLITS];
8228 		dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8229 						CNTR_MODE_R, 0);
8230 
8231 		hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8232 			  dd->unit, dd->last_tx, dd->last_rx);
8233 
8234 	} else {
8235 		hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8236 	}
8237 
8238 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8239 }
8240 
8241 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
init_cntrs(struct hfi1_devdata * dd)8242 static int init_cntrs(struct hfi1_devdata *dd)
8243 {
8244 	int i, rcv_ctxts, index, j;
8245 	size_t sz;
8246 	char *p;
8247 	char name[C_MAX_NAME];
8248 	struct hfi1_pportdata *ppd;
8249 
8250 	/* set up the stats timer; the add_timer is done at the end */
8251 	setup_timer(&dd->synth_stats_timer, update_synth_timer,
8252 		    (unsigned long)dd);
8253 
8254 	/***********************/
8255 	/* per device counters */
8256 	/***********************/
8257 
8258 	/* size names and determine how many we have*/
8259 	dd->ndevcntrs = 0;
8260 	sz = 0;
8261 	index = 0;
8262 
8263 	for (i = 0; i < DEV_CNTR_LAST; i++) {
8264 		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8265 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8266 			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8267 			continue;
8268 		}
8269 
8270 		if (dev_cntrs[i].flags & CNTR_VL) {
8271 			hfi1_dbg_early("\tProcessing VL cntr\n");
8272 			dev_cntrs[i].offset = index;
8273 			for (j = 0; j < C_VL_COUNT; j++) {
8274 				memset(name, '\0', C_MAX_NAME);
8275 				snprintf(name, C_MAX_NAME, "%s%d",
8276 					dev_cntrs[i].name,
8277 					vl_from_idx(j));
8278 				sz += strlen(name);
8279 				sz++;
8280 				hfi1_dbg_early("\t\t%s\n", name);
8281 				dd->ndevcntrs++;
8282 				index++;
8283 			}
8284 		} else {
8285 			/* +1 for newline  */
8286 			sz += strlen(dev_cntrs[i].name) + 1;
8287 			dd->ndevcntrs++;
8288 			dev_cntrs[i].offset = index;
8289 			index++;
8290 			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8291 		}
8292 	}
8293 
8294 	/* allocate space for the counter values */
8295 	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8296 	if (!dd->cntrs)
8297 		goto bail;
8298 
8299 	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8300 	if (!dd->scntrs)
8301 		goto bail;
8302 
8303 
8304 	/* allocate space for the counter names */
8305 	dd->cntrnameslen = sz;
8306 	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8307 	if (!dd->cntrnames)
8308 		goto bail;
8309 
8310 	/* fill in the names */
8311 	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8312 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
8313 			/* Nothing */
8314 		} else {
8315 			if (dev_cntrs[i].flags & CNTR_VL) {
8316 				for (j = 0; j < C_VL_COUNT; j++) {
8317 					memset(name, '\0', C_MAX_NAME);
8318 					snprintf(name, C_MAX_NAME, "%s%d",
8319 						dev_cntrs[i].name,
8320 						vl_from_idx(j));
8321 					memcpy(p, name, strlen(name));
8322 					p += strlen(name);
8323 					*p++ = '\n';
8324 				}
8325 			} else {
8326 				memcpy(p, dev_cntrs[i].name,
8327 				       strlen(dev_cntrs[i].name));
8328 				p += strlen(dev_cntrs[i].name);
8329 				*p++ = '\n';
8330 			}
8331 			index++;
8332 		}
8333 	}
8334 
8335 	/*********************/
8336 	/* per port counters */
8337 	/*********************/
8338 
8339 	/*
8340 	 * Go through the counters for the overflows and disable the ones we
8341 	 * don't need. This varies based on platform so we need to do it
8342 	 * dynamically here.
8343 	 */
8344 	rcv_ctxts = dd->num_rcv_contexts;
8345 	for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8346 	     i <= C_RCV_HDR_OVF_LAST; i++) {
8347 		port_cntrs[i].flags |= CNTR_DISABLED;
8348 	}
8349 
8350 	/* size port counter names and determine how many we have*/
8351 	sz = 0;
8352 	dd->nportcntrs = 0;
8353 	for (i = 0; i < PORT_CNTR_LAST; i++) {
8354 		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8355 		if (port_cntrs[i].flags & CNTR_DISABLED) {
8356 			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8357 			continue;
8358 		}
8359 
8360 		if (port_cntrs[i].flags & CNTR_VL) {
8361 			hfi1_dbg_early("\tProcessing VL cntr\n");
8362 			port_cntrs[i].offset = dd->nportcntrs;
8363 			for (j = 0; j < C_VL_COUNT; j++) {
8364 				memset(name, '\0', C_MAX_NAME);
8365 				snprintf(name, C_MAX_NAME, "%s%d",
8366 					port_cntrs[i].name,
8367 					vl_from_idx(j));
8368 				sz += strlen(name);
8369 				sz++;
8370 				hfi1_dbg_early("\t\t%s\n", name);
8371 				dd->nportcntrs++;
8372 			}
8373 		} else {
8374 			/* +1 for newline  */
8375 			sz += strlen(port_cntrs[i].name) + 1;
8376 			port_cntrs[i].offset = dd->nportcntrs;
8377 			dd->nportcntrs++;
8378 			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8379 		}
8380 	}
8381 
8382 	/* allocate space for the counter names */
8383 	dd->portcntrnameslen = sz;
8384 	dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8385 	if (!dd->portcntrnames)
8386 		goto bail;
8387 
8388 	/* fill in port cntr names */
8389 	for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8390 		if (port_cntrs[i].flags & CNTR_DISABLED)
8391 			continue;
8392 
8393 		if (port_cntrs[i].flags & CNTR_VL) {
8394 			for (j = 0; j < C_VL_COUNT; j++) {
8395 				memset(name, '\0', C_MAX_NAME);
8396 				snprintf(name, C_MAX_NAME, "%s%d",
8397 					port_cntrs[i].name,
8398 					vl_from_idx(j));
8399 				memcpy(p, name, strlen(name));
8400 				p += strlen(name);
8401 				*p++ = '\n';
8402 			}
8403 		} else {
8404 			memcpy(p, port_cntrs[i].name,
8405 			       strlen(port_cntrs[i].name));
8406 			p += strlen(port_cntrs[i].name);
8407 			*p++ = '\n';
8408 		}
8409 	}
8410 
8411 	/* allocate per port storage for counter values */
8412 	ppd = (struct hfi1_pportdata *)(dd + 1);
8413 	for (i = 0; i < dd->num_pports; i++, ppd++) {
8414 		ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8415 		if (!ppd->cntrs)
8416 			goto bail;
8417 
8418 		ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8419 		if (!ppd->scntrs)
8420 			goto bail;
8421 	}
8422 
8423 	/* CPU counters need to be allocated and zeroed */
8424 	if (init_cpu_counters(dd))
8425 		goto bail;
8426 
8427 	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8428 	return 0;
8429 bail:
8430 	free_cntrs(dd);
8431 	return -ENOMEM;
8432 }
8433 
8434 
chip_to_opa_lstate(struct hfi1_devdata * dd,u32 chip_lstate)8435 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8436 {
8437 	switch (chip_lstate) {
8438 	default:
8439 		dd_dev_err(dd,
8440 			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8441 			 chip_lstate);
8442 		/* fall through */
8443 	case LSTATE_DOWN:
8444 		return IB_PORT_DOWN;
8445 	case LSTATE_INIT:
8446 		return IB_PORT_INIT;
8447 	case LSTATE_ARMED:
8448 		return IB_PORT_ARMED;
8449 	case LSTATE_ACTIVE:
8450 		return IB_PORT_ACTIVE;
8451 	}
8452 }
8453 
chip_to_opa_pstate(struct hfi1_devdata * dd,u32 chip_pstate)8454 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8455 {
8456 	/* look at the HFI meta-states only */
8457 	switch (chip_pstate & 0xf0) {
8458 	default:
8459 		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8460 			chip_pstate);
8461 		/* fall through */
8462 	case PLS_DISABLED:
8463 		return IB_PORTPHYSSTATE_DISABLED;
8464 	case PLS_OFFLINE:
8465 		return OPA_PORTPHYSSTATE_OFFLINE;
8466 	case PLS_POLLING:
8467 		return IB_PORTPHYSSTATE_POLLING;
8468 	case PLS_CONFIGPHY:
8469 		return IB_PORTPHYSSTATE_TRAINING;
8470 	case PLS_LINKUP:
8471 		return IB_PORTPHYSSTATE_LINKUP;
8472 	case PLS_PHYTEST:
8473 		return IB_PORTPHYSSTATE_PHY_TEST;
8474 	}
8475 }
8476 
8477 /* return the OPA port logical state name */
opa_lstate_name(u32 lstate)8478 const char *opa_lstate_name(u32 lstate)
8479 {
8480 	static const char * const port_logical_names[] = {
8481 		"PORT_NOP",
8482 		"PORT_DOWN",
8483 		"PORT_INIT",
8484 		"PORT_ARMED",
8485 		"PORT_ACTIVE",
8486 		"PORT_ACTIVE_DEFER",
8487 	};
8488 	if (lstate < ARRAY_SIZE(port_logical_names))
8489 		return port_logical_names[lstate];
8490 	return "unknown";
8491 }
8492 
8493 /* return the OPA port physical state name */
opa_pstate_name(u32 pstate)8494 const char *opa_pstate_name(u32 pstate)
8495 {
8496 	static const char * const port_physical_names[] = {
8497 		"PHYS_NOP",
8498 		"reserved1",
8499 		"PHYS_POLL",
8500 		"PHYS_DISABLED",
8501 		"PHYS_TRAINING",
8502 		"PHYS_LINKUP",
8503 		"PHYS_LINK_ERR_RECOVER",
8504 		"PHYS_PHY_TEST",
8505 		"reserved8",
8506 		"PHYS_OFFLINE",
8507 		"PHYS_GANGED",
8508 		"PHYS_TEST",
8509 	};
8510 	if (pstate < ARRAY_SIZE(port_physical_names))
8511 		return port_physical_names[pstate];
8512 	return "unknown";
8513 }
8514 
8515 /*
8516  * Read the hardware link state and set the driver's cached value of it.
8517  * Return the (new) current value.
8518  */
get_logical_state(struct hfi1_pportdata * ppd)8519 u32 get_logical_state(struct hfi1_pportdata *ppd)
8520 {
8521 	u32 new_state;
8522 
8523 	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8524 	if (new_state != ppd->lstate) {
8525 		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8526 			opa_lstate_name(new_state), new_state);
8527 		ppd->lstate = new_state;
8528 	}
8529 	/*
8530 	 * Set port status flags in the page mapped into userspace
8531 	 * memory. Do it here to ensure a reliable state - this is
8532 	 * the only function called by all state handling code.
8533 	 * Always set the flags due to the fact that the cache value
8534 	 * might have been changed explicitly outside of this
8535 	 * function.
8536 	 */
8537 	if (ppd->statusp) {
8538 		switch (ppd->lstate) {
8539 		case IB_PORT_DOWN:
8540 		case IB_PORT_INIT:
8541 			*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8542 					   HFI1_STATUS_IB_READY);
8543 			break;
8544 		case IB_PORT_ARMED:
8545 			*ppd->statusp |= HFI1_STATUS_IB_CONF;
8546 			break;
8547 		case IB_PORT_ACTIVE:
8548 			*ppd->statusp |= HFI1_STATUS_IB_READY;
8549 			break;
8550 		}
8551 	}
8552 	return ppd->lstate;
8553 }
8554 
8555 /**
8556  * wait_logical_linkstate - wait for an IB link state change to occur
8557  * @ppd: port device
8558  * @state: the state to wait for
8559  * @msecs: the number of milliseconds to wait
8560  *
8561  * Wait up to msecs milliseconds for IB link state change to occur.
8562  * For now, take the easy polling route.
8563  * Returns 0 if state reached, otherwise -ETIMEDOUT.
8564  */
wait_logical_linkstate(struct hfi1_pportdata * ppd,u32 state,int msecs)8565 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8566 				  int msecs)
8567 {
8568 	unsigned long timeout;
8569 
8570 	timeout = jiffies + msecs_to_jiffies(msecs);
8571 	while (1) {
8572 		if (get_logical_state(ppd) == state)
8573 			return 0;
8574 		if (time_after(jiffies, timeout))
8575 			break;
8576 		msleep(20);
8577 	}
8578 	dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8579 
8580 	return -ETIMEDOUT;
8581 }
8582 
hfi1_ibphys_portstate(struct hfi1_pportdata * ppd)8583 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8584 {
8585 	static u32 remembered_state = 0xff;
8586 	u32 pstate;
8587 	u32 ib_pstate;
8588 
8589 	pstate = read_physical_state(ppd->dd);
8590 	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8591 	if (remembered_state != ib_pstate) {
8592 		dd_dev_info(ppd->dd,
8593 			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
8594 			__func__, opa_pstate_name(ib_pstate), ib_pstate,
8595 			pstate);
8596 		remembered_state = ib_pstate;
8597 	}
8598 	return ib_pstate;
8599 }
8600 
8601 /*
8602  * Read/modify/write ASIC_QSFP register bits as selected by mask
8603  * data: 0 or 1 in the positions depending on what needs to be written
8604  * dir: 0 for read, 1 for write
8605  * mask: select by setting
8606  *      I2CCLK  (bit 0)
8607  *      I2CDATA (bit 1)
8608  */
hfi1_gpio_mod(struct hfi1_devdata * dd,u32 target,u32 data,u32 dir,u32 mask)8609 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8610 		  u32 mask)
8611 {
8612 	u64 qsfp_oe, target_oe;
8613 
8614 	target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8615 	if (mask) {
8616 		/* We are writing register bits, so lock access */
8617 		dir &= mask;
8618 		data &= mask;
8619 
8620 		qsfp_oe = read_csr(dd, target_oe);
8621 		qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8622 		write_csr(dd, target_oe, qsfp_oe);
8623 	}
8624 	/* We are exclusively reading bits here, but it is unlikely
8625 	 * we'll get valid data when we set the direction of the pin
8626 	 * in the same call, so read should call this function again
8627 	 * to get valid data
8628 	 */
8629 	return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8630 }
8631 
8632 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8633 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8634 
8635 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8636 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8637 
hfi1_init_ctxt(struct send_context * sc)8638 int hfi1_init_ctxt(struct send_context *sc)
8639 {
8640 	if (sc != NULL) {
8641 		struct hfi1_devdata *dd = sc->dd;
8642 		u64 reg;
8643 		u8 set = (sc->type == SC_USER ?
8644 			  HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8645 			  HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8646 		reg = read_kctxt_csr(dd, sc->hw_context,
8647 				     SEND_CTXT_CHECK_ENABLE);
8648 		if (set)
8649 			CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8650 		else
8651 			SET_STATIC_RATE_CONTROL_SMASK(reg);
8652 		write_kctxt_csr(dd, sc->hw_context,
8653 				SEND_CTXT_CHECK_ENABLE, reg);
8654 	}
8655 	return 0;
8656 }
8657 
hfi1_tempsense_rd(struct hfi1_devdata * dd,struct hfi1_temp * temp)8658 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8659 {
8660 	int ret = 0;
8661 	u64 reg;
8662 
8663 	if (dd->icode != ICODE_RTL_SILICON) {
8664 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8665 			dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8666 				    __func__);
8667 		return -EINVAL;
8668 	}
8669 	reg = read_csr(dd, ASIC_STS_THERM);
8670 	temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8671 		      ASIC_STS_THERM_CURR_TEMP_MASK);
8672 	temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8673 			ASIC_STS_THERM_LO_TEMP_MASK);
8674 	temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8675 			ASIC_STS_THERM_HI_TEMP_MASK);
8676 	temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8677 			  ASIC_STS_THERM_CRIT_TEMP_MASK);
8678 	/* triggers is a 3-bit value - 1 bit per trigger. */
8679 	temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8680 
8681 	return ret;
8682 }
8683 
8684 /* ========================================================================= */
8685 
8686 /*
8687  * Enable/disable chip from delivering interrupts.
8688  */
set_intr_state(struct hfi1_devdata * dd,u32 enable)8689 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8690 {
8691 	int i;
8692 
8693 	/*
8694 	 * In HFI, the mask needs to be 1 to allow interrupts.
8695 	 */
8696 	if (enable) {
8697 		u64 cce_int_mask;
8698 		const int qsfp1_int_smask = QSFP1_INT % 64;
8699 		const int qsfp2_int_smask = QSFP2_INT % 64;
8700 
8701 		/* enable all interrupts */
8702 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8703 			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8704 
8705 		/*
8706 		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8707 		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8708 		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8709 		 * the index of the appropriate CSR in the CCEIntMask CSR array
8710 		 */
8711 		cce_int_mask = read_csr(dd, CCE_INT_MASK +
8712 						(8*(QSFP1_INT/64)));
8713 		if (dd->hfi1_id) {
8714 			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8715 			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8716 					cce_int_mask);
8717 		} else {
8718 			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8719 			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8720 					cce_int_mask);
8721 		}
8722 	} else {
8723 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8724 			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8725 	}
8726 }
8727 
8728 /*
8729  * Clear all interrupt sources on the chip.
8730  */
clear_all_interrupts(struct hfi1_devdata * dd)8731 static void clear_all_interrupts(struct hfi1_devdata *dd)
8732 {
8733 	int i;
8734 
8735 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8736 		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8737 
8738 	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8739 	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8740 	write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8741 	write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8742 	write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8743 	write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8744 	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8745 	for (i = 0; i < dd->chip_send_contexts; i++)
8746 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8747 	for (i = 0; i < dd->chip_sdma_engines; i++)
8748 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8749 
8750 	write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8751 	write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8752 	write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8753 }
8754 
8755 /* Move to pcie.c? */
disable_intx(struct pci_dev * pdev)8756 static void disable_intx(struct pci_dev *pdev)
8757 {
8758 	pci_intx(pdev, 0);
8759 }
8760 
clean_up_interrupts(struct hfi1_devdata * dd)8761 static void clean_up_interrupts(struct hfi1_devdata *dd)
8762 {
8763 	int i;
8764 
8765 	/* remove irqs - must happen before disabling/turning off */
8766 	if (dd->num_msix_entries) {
8767 		/* MSI-X */
8768 		struct hfi1_msix_entry *me = dd->msix_entries;
8769 
8770 		for (i = 0; i < dd->num_msix_entries; i++, me++) {
8771 			if (me->arg == NULL) /* => no irq, no affinity */
8772 				break;
8773 			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8774 					NULL);
8775 			free_irq(me->msix.vector, me->arg);
8776 		}
8777 	} else {
8778 		/* INTx */
8779 		if (dd->requested_intx_irq) {
8780 			free_irq(dd->pcidev->irq, dd);
8781 			dd->requested_intx_irq = 0;
8782 		}
8783 	}
8784 
8785 	/* turn off interrupts */
8786 	if (dd->num_msix_entries) {
8787 		/* MSI-X */
8788 		hfi1_nomsix(dd);
8789 	} else {
8790 		/* INTx */
8791 		disable_intx(dd->pcidev);
8792 	}
8793 
8794 	/* clean structures */
8795 	for (i = 0; i < dd->num_msix_entries; i++)
8796 		free_cpumask_var(dd->msix_entries[i].mask);
8797 	kfree(dd->msix_entries);
8798 	dd->msix_entries = NULL;
8799 	dd->num_msix_entries = 0;
8800 }
8801 
8802 /*
8803  * Remap the interrupt source from the general handler to the given MSI-X
8804  * interrupt.
8805  */
remap_intr(struct hfi1_devdata * dd,int isrc,int msix_intr)8806 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8807 {
8808 	u64 reg;
8809 	int m, n;
8810 
8811 	/* clear from the handled mask of the general interrupt */
8812 	m = isrc / 64;
8813 	n = isrc % 64;
8814 	dd->gi_mask[m] &= ~((u64)1 << n);
8815 
8816 	/* direct the chip source to the given MSI-X interrupt */
8817 	m = isrc / 8;
8818 	n = isrc % 8;
8819 	reg = read_csr(dd, CCE_INT_MAP + (8*m));
8820 	reg &= ~((u64)0xff << (8*n));
8821 	reg |= ((u64)msix_intr & 0xff) << (8*n);
8822 	write_csr(dd, CCE_INT_MAP + (8*m), reg);
8823 }
8824 
remap_sdma_interrupts(struct hfi1_devdata * dd,int engine,int msix_intr)8825 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8826 				  int engine, int msix_intr)
8827 {
8828 	/*
8829 	 * SDMA engine interrupt sources grouped by type, rather than
8830 	 * engine.  Per-engine interrupts are as follows:
8831 	 *	SDMA
8832 	 *	SDMAProgress
8833 	 *	SDMAIdle
8834 	 */
8835 	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8836 		msix_intr);
8837 	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8838 		msix_intr);
8839 	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8840 		msix_intr);
8841 }
8842 
remap_receive_available_interrupt(struct hfi1_devdata * dd,int rx,int msix_intr)8843 static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8844 					      int rx, int msix_intr)
8845 {
8846 	remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8847 }
8848 
request_intx_irq(struct hfi1_devdata * dd)8849 static int request_intx_irq(struct hfi1_devdata *dd)
8850 {
8851 	int ret;
8852 
8853 	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8854 		dd->unit);
8855 	ret = request_irq(dd->pcidev->irq, general_interrupt,
8856 				  IRQF_SHARED, dd->intx_name, dd);
8857 	if (ret)
8858 		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8859 				ret);
8860 	else
8861 		dd->requested_intx_irq = 1;
8862 	return ret;
8863 }
8864 
request_msix_irqs(struct hfi1_devdata * dd)8865 static int request_msix_irqs(struct hfi1_devdata *dd)
8866 {
8867 	const struct cpumask *local_mask;
8868 	cpumask_var_t def, rcv;
8869 	bool def_ret, rcv_ret;
8870 	int first_general, last_general;
8871 	int first_sdma, last_sdma;
8872 	int first_rx, last_rx;
8873 	int first_cpu, restart_cpu, curr_cpu;
8874 	int rcv_cpu, sdma_cpu;
8875 	int i, ret = 0, possible;
8876 	int ht;
8877 
8878 	/* calculate the ranges we are going to use */
8879 	first_general = 0;
8880 	first_sdma = last_general = first_general + 1;
8881 	first_rx = last_sdma = first_sdma + dd->num_sdma;
8882 	last_rx = first_rx + dd->n_krcv_queues;
8883 
8884 	/*
8885 	 * Interrupt affinity.
8886 	 *
8887 	 * non-rcv avail gets a default mask that
8888 	 * starts as possible cpus with threads reset
8889 	 * and each rcv avail reset.
8890 	 *
8891 	 * rcv avail gets node relative 1 wrapping back
8892 	 * to the node relative 1 as necessary.
8893 	 *
8894 	 */
8895 	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8896 	/* if first cpu is invalid, use NUMA 0 */
8897 	if (cpumask_first(local_mask) >= nr_cpu_ids)
8898 		local_mask = topology_core_cpumask(0);
8899 
8900 	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8901 	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8902 	if (!def_ret || !rcv_ret)
8903 		goto bail;
8904 	/* use local mask as default */
8905 	cpumask_copy(def, local_mask);
8906 	possible = cpumask_weight(def);
8907 	/* disarm threads from default */
8908 	ht = cpumask_weight(
8909 			topology_sibling_cpumask(cpumask_first(local_mask)));
8910 	for (i = possible/ht; i < possible; i++)
8911 		cpumask_clear_cpu(i, def);
8912 	/* reset possible */
8913 	possible = cpumask_weight(def);
8914 	/* def now has full cores on chosen node*/
8915 	first_cpu = cpumask_first(def);
8916 	if (nr_cpu_ids >= first_cpu)
8917 		first_cpu++;
8918 	restart_cpu = first_cpu;
8919 	curr_cpu = restart_cpu;
8920 
8921 	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8922 		cpumask_clear_cpu(curr_cpu, def);
8923 		cpumask_set_cpu(curr_cpu, rcv);
8924 		if (curr_cpu >= possible)
8925 			curr_cpu = restart_cpu;
8926 		else
8927 			curr_cpu++;
8928 	}
8929 	/* def mask has non-rcv, rcv has recv mask */
8930 	rcv_cpu = cpumask_first(rcv);
8931 	sdma_cpu = cpumask_first(def);
8932 
8933 	/*
8934 	 * Sanity check - the code expects all SDMA chip source
8935 	 * interrupts to be in the same CSR, starting at bit 0.  Verify
8936 	 * that this is true by checking the bit location of the start.
8937 	 */
8938 	BUILD_BUG_ON(IS_SDMA_START % 64);
8939 
8940 	for (i = 0; i < dd->num_msix_entries; i++) {
8941 		struct hfi1_msix_entry *me = &dd->msix_entries[i];
8942 		const char *err_info;
8943 		irq_handler_t handler;
8944 		irq_handler_t thread = NULL;
8945 		void *arg;
8946 		int idx;
8947 		struct hfi1_ctxtdata *rcd = NULL;
8948 		struct sdma_engine *sde = NULL;
8949 
8950 		/* obtain the arguments to request_irq */
8951 		if (first_general <= i && i < last_general) {
8952 			idx = i - first_general;
8953 			handler = general_interrupt;
8954 			arg = dd;
8955 			snprintf(me->name, sizeof(me->name),
8956 				DRIVER_NAME"_%d", dd->unit);
8957 			err_info = "general";
8958 		} else if (first_sdma <= i && i < last_sdma) {
8959 			idx = i - first_sdma;
8960 			sde = &dd->per_sdma[idx];
8961 			handler = sdma_interrupt;
8962 			arg = sde;
8963 			snprintf(me->name, sizeof(me->name),
8964 				DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8965 			err_info = "sdma";
8966 			remap_sdma_interrupts(dd, idx, i);
8967 		} else if (first_rx <= i && i < last_rx) {
8968 			idx = i - first_rx;
8969 			rcd = dd->rcd[idx];
8970 			/* no interrupt if no rcd */
8971 			if (!rcd)
8972 				continue;
8973 			/*
8974 			 * Set the interrupt register and mask for this
8975 			 * context's interrupt.
8976 			 */
8977 			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8978 			rcd->imask = ((u64)1) <<
8979 					((IS_RCVAVAIL_START+idx) % 64);
8980 			handler = receive_context_interrupt;
8981 			thread = receive_context_thread;
8982 			arg = rcd;
8983 			snprintf(me->name, sizeof(me->name),
8984 				DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8985 			err_info = "receive context";
8986 			remap_receive_available_interrupt(dd, idx, i);
8987 		} else {
8988 			/* not in our expected range - complain, then
8989 			   ignore it */
8990 			dd_dev_err(dd,
8991 				"Unexpected extra MSI-X interrupt %d\n", i);
8992 			continue;
8993 		}
8994 		/* no argument, no interrupt */
8995 		if (arg == NULL)
8996 			continue;
8997 		/* make sure the name is terminated */
8998 		me->name[sizeof(me->name)-1] = 0;
8999 
9000 		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9001 						me->name, arg);
9002 		if (ret) {
9003 			dd_dev_err(dd,
9004 				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9005 				 err_info, me->msix.vector, idx, ret);
9006 			return ret;
9007 		}
9008 		/*
9009 		 * assign arg after request_irq call, so it will be
9010 		 * cleaned up
9011 		 */
9012 		me->arg = arg;
9013 
9014 		if (!zalloc_cpumask_var(
9015 			&dd->msix_entries[i].mask,
9016 			GFP_KERNEL))
9017 			goto bail;
9018 		if (handler == sdma_interrupt) {
9019 			dd_dev_info(dd, "sdma engine %d cpu %d\n",
9020 				sde->this_idx, sdma_cpu);
9021 			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9022 			sdma_cpu = cpumask_next(sdma_cpu, def);
9023 			if (sdma_cpu >= nr_cpu_ids)
9024 				sdma_cpu = cpumask_first(def);
9025 		} else if (handler == receive_context_interrupt) {
9026 			dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9027 				rcd->ctxt, rcv_cpu);
9028 			cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9029 			rcv_cpu = cpumask_next(rcv_cpu, rcv);
9030 			if (rcv_cpu >= nr_cpu_ids)
9031 				rcv_cpu = cpumask_first(rcv);
9032 		} else {
9033 			/* otherwise first def */
9034 			dd_dev_info(dd, "%s cpu %d\n",
9035 				err_info, cpumask_first(def));
9036 			cpumask_set_cpu(
9037 				cpumask_first(def), dd->msix_entries[i].mask);
9038 		}
9039 		irq_set_affinity_hint(
9040 			dd->msix_entries[i].msix.vector,
9041 			dd->msix_entries[i].mask);
9042 	}
9043 
9044 out:
9045 	free_cpumask_var(def);
9046 	free_cpumask_var(rcv);
9047 	return ret;
9048 bail:
9049 	ret = -ENOMEM;
9050 	goto  out;
9051 }
9052 
9053 /*
9054  * Set the general handler to accept all interrupts, remap all
9055  * chip interrupts back to MSI-X 0.
9056  */
reset_interrupts(struct hfi1_devdata * dd)9057 static void reset_interrupts(struct hfi1_devdata *dd)
9058 {
9059 	int i;
9060 
9061 	/* all interrupts handled by the general handler */
9062 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9063 		dd->gi_mask[i] = ~(u64)0;
9064 
9065 	/* all chip interrupts map to MSI-X 0 */
9066 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9067 		write_csr(dd, CCE_INT_MAP + (8*i), 0);
9068 }
9069 
set_up_interrupts(struct hfi1_devdata * dd)9070 static int set_up_interrupts(struct hfi1_devdata *dd)
9071 {
9072 	struct hfi1_msix_entry *entries;
9073 	u32 total, request;
9074 	int i, ret;
9075 	int single_interrupt = 0; /* we expect to have all the interrupts */
9076 
9077 	/*
9078 	 * Interrupt count:
9079 	 *	1 general, "slow path" interrupt (includes the SDMA engines
9080 	 *		slow source, SDMACleanupDone)
9081 	 *	N interrupts - one per used SDMA engine
9082 	 *	M interrupt - one per kernel receive context
9083 	 */
9084 	total = 1 + dd->num_sdma + dd->n_krcv_queues;
9085 
9086 	entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9087 	if (!entries) {
9088 		ret = -ENOMEM;
9089 		goto fail;
9090 	}
9091 	/* 1-1 MSI-X entry assignment */
9092 	for (i = 0; i < total; i++)
9093 		entries[i].msix.entry = i;
9094 
9095 	/* ask for MSI-X interrupts */
9096 	request = total;
9097 	request_msix(dd, &request, entries);
9098 
9099 	if (request == 0) {
9100 		/* using INTx */
9101 		/* dd->num_msix_entries already zero */
9102 		kfree(entries);
9103 		single_interrupt = 1;
9104 		dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9105 	} else {
9106 		/* using MSI-X */
9107 		dd->num_msix_entries = request;
9108 		dd->msix_entries = entries;
9109 
9110 		if (request != total) {
9111 			/* using MSI-X, with reduced interrupts */
9112 			dd_dev_err(
9113 				dd,
9114 				"cannot handle reduced interrupt case, want %u, got %u\n",
9115 				total, request);
9116 			ret = -EINVAL;
9117 			goto fail;
9118 		}
9119 		dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9120 	}
9121 
9122 	/* mask all interrupts */
9123 	set_intr_state(dd, 0);
9124 	/* clear all pending interrupts */
9125 	clear_all_interrupts(dd);
9126 
9127 	/* reset general handler mask, chip MSI-X mappings */
9128 	reset_interrupts(dd);
9129 
9130 	if (single_interrupt)
9131 		ret = request_intx_irq(dd);
9132 	else
9133 		ret = request_msix_irqs(dd);
9134 	if (ret)
9135 		goto fail;
9136 
9137 	return 0;
9138 
9139 fail:
9140 	clean_up_interrupts(dd);
9141 	return ret;
9142 }
9143 
9144 /*
9145  * Set up context values in dd.  Sets:
9146  *
9147  *	num_rcv_contexts - number of contexts being used
9148  *	n_krcv_queues - number of kernel contexts
9149  *	first_user_ctxt - first non-kernel context in array of contexts
9150  *	freectxts  - number of free user contexts
9151  *	num_send_contexts - number of PIO send contexts being used
9152  */
set_up_context_variables(struct hfi1_devdata * dd)9153 static int set_up_context_variables(struct hfi1_devdata *dd)
9154 {
9155 	int num_kernel_contexts;
9156 	int num_user_contexts;
9157 	int total_contexts;
9158 	int ret;
9159 	unsigned ngroups;
9160 
9161 	/*
9162 	 * Kernel contexts: (to be fixed later):
9163 	 * - min or 2 or 1 context/numa
9164 	 * - Context 0 - default/errors
9165 	 * - Context 1 - VL15
9166 	 */
9167 	if (n_krcvqs)
9168 		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9169 	else
9170 		num_kernel_contexts = num_online_nodes();
9171 	num_kernel_contexts =
9172 		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9173 	/*
9174 	 * Every kernel receive context needs an ACK send context.
9175 	 * one send context is allocated for each VL{0-7} and VL15
9176 	 */
9177 	if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9178 		dd_dev_err(dd,
9179 			   "Reducing # kernel rcv contexts to: %d, from %d\n",
9180 			   (int)(dd->chip_send_contexts - num_vls - 1),
9181 			   (int)num_kernel_contexts);
9182 		num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9183 	}
9184 	/*
9185 	 * User contexts: (to be fixed later)
9186 	 *	- set to num_rcv_contexts if non-zero
9187 	 *	- default to 1 user context per CPU
9188 	 */
9189 	if (num_rcv_contexts)
9190 		num_user_contexts = num_rcv_contexts;
9191 	else
9192 		num_user_contexts = num_online_cpus();
9193 
9194 	total_contexts = num_kernel_contexts + num_user_contexts;
9195 
9196 	/*
9197 	 * Adjust the counts given a global max.
9198 	 */
9199 	if (total_contexts > dd->chip_rcv_contexts) {
9200 		dd_dev_err(dd,
9201 			   "Reducing # user receive contexts to: %d, from %d\n",
9202 			   (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9203 			   (int)num_user_contexts);
9204 		num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9205 		/* recalculate */
9206 		total_contexts = num_kernel_contexts + num_user_contexts;
9207 	}
9208 
9209 	/* the first N are kernel contexts, the rest are user contexts */
9210 	dd->num_rcv_contexts = total_contexts;
9211 	dd->n_krcv_queues = num_kernel_contexts;
9212 	dd->first_user_ctxt = num_kernel_contexts;
9213 	dd->freectxts = num_user_contexts;
9214 	dd_dev_info(dd,
9215 		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9216 		(int)dd->chip_rcv_contexts,
9217 		(int)dd->num_rcv_contexts,
9218 		(int)dd->n_krcv_queues,
9219 		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
9220 
9221 	/*
9222 	 * Receive array allocation:
9223 	 *   All RcvArray entries are divided into groups of 8. This
9224 	 *   is required by the hardware and will speed up writes to
9225 	 *   consecutive entries by using write-combining of the entire
9226 	 *   cacheline.
9227 	 *
9228 	 *   The number of groups are evenly divided among all contexts.
9229 	 *   any left over groups will be given to the first N user
9230 	 *   contexts.
9231 	 */
9232 	dd->rcv_entries.group_size = RCV_INCREMENT;
9233 	ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9234 	dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9235 	dd->rcv_entries.nctxt_extra = ngroups -
9236 		(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9237 	dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9238 		    dd->rcv_entries.ngroups,
9239 		    dd->rcv_entries.nctxt_extra);
9240 	if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9241 	    MAX_EAGER_ENTRIES * 2) {
9242 		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9243 			dd->rcv_entries.group_size;
9244 		dd_dev_info(dd,
9245 		   "RcvArray group count too high, change to %u\n",
9246 		   dd->rcv_entries.ngroups);
9247 		dd->rcv_entries.nctxt_extra = 0;
9248 	}
9249 	/*
9250 	 * PIO send contexts
9251 	 */
9252 	ret = init_sc_pools_and_sizes(dd);
9253 	if (ret >= 0) {	/* success */
9254 		dd->num_send_contexts = ret;
9255 		dd_dev_info(
9256 			dd,
9257 			"send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9258 			dd->chip_send_contexts,
9259 			dd->num_send_contexts,
9260 			dd->sc_sizes[SC_KERNEL].count,
9261 			dd->sc_sizes[SC_ACK].count,
9262 			dd->sc_sizes[SC_USER].count);
9263 		ret = 0;	/* success */
9264 	}
9265 
9266 	return ret;
9267 }
9268 
9269 /*
9270  * Set the device/port partition key table. The MAD code
9271  * will ensure that, at least, the partial management
9272  * partition key is present in the table.
9273  */
set_partition_keys(struct hfi1_pportdata * ppd)9274 static void set_partition_keys(struct hfi1_pportdata *ppd)
9275 {
9276 	struct hfi1_devdata *dd = ppd->dd;
9277 	u64 reg = 0;
9278 	int i;
9279 
9280 	dd_dev_info(dd, "Setting partition keys\n");
9281 	for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9282 		reg |= (ppd->pkeys[i] &
9283 			RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9284 			((i % 4) *
9285 			 RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9286 		/* Each register holds 4 PKey values. */
9287 		if ((i % 4) == 3) {
9288 			write_csr(dd, RCV_PARTITION_KEY +
9289 				  ((i - 3) * 2), reg);
9290 			reg = 0;
9291 		}
9292 	}
9293 
9294 	/* Always enable HW pkeys check when pkeys table is set */
9295 	add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9296 }
9297 
9298 /*
9299  * These CSRs and memories are uninitialized on reset and must be
9300  * written before reading to set the ECC/parity bits.
9301  *
9302  * NOTE: All user context CSRs that are not mmaped write-only
9303  * (e.g. the TID flows) must be initialized even if the driver never
9304  * reads them.
9305  */
write_uninitialized_csrs_and_memories(struct hfi1_devdata * dd)9306 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9307 {
9308 	int i, j;
9309 
9310 	/* CceIntMap */
9311 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9312 		write_csr(dd, CCE_INT_MAP+(8*i), 0);
9313 
9314 	/* SendCtxtCreditReturnAddr */
9315 	for (i = 0; i < dd->chip_send_contexts; i++)
9316 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9317 
9318 	/* PIO Send buffers */
9319 	/* SDMA Send buffers */
9320 	/* These are not normally read, and (presently) have no method
9321 	   to be read, so are not pre-initialized */
9322 
9323 	/* RcvHdrAddr */
9324 	/* RcvHdrTailAddr */
9325 	/* RcvTidFlowTable */
9326 	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9327 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9328 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9329 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9330 			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9331 	}
9332 
9333 	/* RcvArray */
9334 	for (i = 0; i < dd->chip_rcv_array_count; i++)
9335 		write_csr(dd, RCV_ARRAY + (8*i),
9336 					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9337 
9338 	/* RcvQPMapTable */
9339 	for (i = 0; i < 32; i++)
9340 		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9341 }
9342 
9343 /*
9344  * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9345  */
clear_cce_status(struct hfi1_devdata * dd,u64 status_bits,u64 ctrl_bits)9346 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9347 			     u64 ctrl_bits)
9348 {
9349 	unsigned long timeout;
9350 	u64 reg;
9351 
9352 	/* is the condition present? */
9353 	reg = read_csr(dd, CCE_STATUS);
9354 	if ((reg & status_bits) == 0)
9355 		return;
9356 
9357 	/* clear the condition */
9358 	write_csr(dd, CCE_CTRL, ctrl_bits);
9359 
9360 	/* wait for the condition to clear */
9361 	timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9362 	while (1) {
9363 		reg = read_csr(dd, CCE_STATUS);
9364 		if ((reg & status_bits) == 0)
9365 			return;
9366 		if (time_after(jiffies, timeout)) {
9367 			dd_dev_err(dd,
9368 				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9369 				status_bits, reg & status_bits);
9370 			return;
9371 		}
9372 		udelay(1);
9373 	}
9374 }
9375 
9376 /* set CCE CSRs to chip reset defaults */
reset_cce_csrs(struct hfi1_devdata * dd)9377 static void reset_cce_csrs(struct hfi1_devdata *dd)
9378 {
9379 	int i;
9380 
9381 	/* CCE_REVISION read-only */
9382 	/* CCE_REVISION2 read-only */
9383 	/* CCE_CTRL - bits clear automatically */
9384 	/* CCE_STATUS read-only, use CceCtrl to clear */
9385 	clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9386 	clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9387 	clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9388 	for (i = 0; i < CCE_NUM_SCRATCH; i++)
9389 		write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9390 	/* CCE_ERR_STATUS read-only */
9391 	write_csr(dd, CCE_ERR_MASK, 0);
9392 	write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9393 	/* CCE_ERR_FORCE leave alone */
9394 	for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9395 		write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9396 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9397 	/* CCE_PCIE_CTRL leave alone */
9398 	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9399 		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9400 		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9401 					CCE_MSIX_TABLE_UPPER_RESETCSR);
9402 	}
9403 	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9404 		/* CCE_MSIX_PBA read-only */
9405 		write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9406 		write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9407 	}
9408 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9409 		write_csr(dd, CCE_INT_MAP, 0);
9410 	for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9411 		/* CCE_INT_STATUS read-only */
9412 		write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9413 		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9414 		/* CCE_INT_FORCE leave alone */
9415 		/* CCE_INT_BLOCKED read-only */
9416 	}
9417 	for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9418 		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9419 }
9420 
9421 /* set ASIC CSRs to chip reset defaults */
reset_asic_csrs(struct hfi1_devdata * dd)9422 static void reset_asic_csrs(struct hfi1_devdata *dd)
9423 {
9424 	int i;
9425 
9426 	/*
9427 	 * If the HFIs are shared between separate nodes or VMs,
9428 	 * then more will need to be done here.  One idea is a module
9429 	 * parameter that returns early, letting the first power-on or
9430 	 * a known first load do the reset and blocking all others.
9431 	 */
9432 
9433 	if (!(dd->flags & HFI1_DO_INIT_ASIC))
9434 		return;
9435 
9436 	if (dd->icode != ICODE_FPGA_EMULATION) {
9437 		/* emulation does not have an SBus - leave these alone */
9438 		/*
9439 		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9440 		 * Notes:
9441 		 * o The reset is not zero if aimed at the core.  See the
9442 		 *   SBus documentation for details.
9443 		 * o If the SBus firmware has been updated (e.g. by the BIOS),
9444 		 *   will the reset revert that?
9445 		 */
9446 		/* ASIC_CFG_SBUS_REQUEST leave alone */
9447 		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9448 	}
9449 	/* ASIC_SBUS_RESULT read-only */
9450 	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9451 	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9452 		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9453 	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
9454 
9455 	/* We might want to retain this state across FLR if we ever use it */
9456 	write_csr(dd, ASIC_CFG_DRV_STR, 0);
9457 
9458 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9459 	/* ASIC_STS_THERM read-only */
9460 	/* ASIC_CFG_RESET leave alone */
9461 
9462 	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9463 	/* ASIC_PCIE_SD_HOST_STATUS read-only */
9464 	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9465 	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9466 	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9467 	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9468 	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9469 	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470 	for (i = 0; i < 16; i++)
9471 		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9472 
9473 	/* ASIC_GPIO_IN read-only */
9474 	write_csr(dd, ASIC_GPIO_OE, 0);
9475 	write_csr(dd, ASIC_GPIO_INVERT, 0);
9476 	write_csr(dd, ASIC_GPIO_OUT, 0);
9477 	write_csr(dd, ASIC_GPIO_MASK, 0);
9478 	/* ASIC_GPIO_STATUS read-only */
9479 	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9480 	/* ASIC_GPIO_FORCE leave alone */
9481 
9482 	/* ASIC_QSFP1_IN read-only */
9483 	write_csr(dd, ASIC_QSFP1_OE, 0);
9484 	write_csr(dd, ASIC_QSFP1_INVERT, 0);
9485 	write_csr(dd, ASIC_QSFP1_OUT, 0);
9486 	write_csr(dd, ASIC_QSFP1_MASK, 0);
9487 	/* ASIC_QSFP1_STATUS read-only */
9488 	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9489 	/* ASIC_QSFP1_FORCE leave alone */
9490 
9491 	/* ASIC_QSFP2_IN read-only */
9492 	write_csr(dd, ASIC_QSFP2_OE, 0);
9493 	write_csr(dd, ASIC_QSFP2_INVERT, 0);
9494 	write_csr(dd, ASIC_QSFP2_OUT, 0);
9495 	write_csr(dd, ASIC_QSFP2_MASK, 0);
9496 	/* ASIC_QSFP2_STATUS read-only */
9497 	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9498 	/* ASIC_QSFP2_FORCE leave alone */
9499 
9500 	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9501 	/* this also writes a NOP command, clearing paging mode */
9502 	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9503 	write_csr(dd, ASIC_EEP_DATA, 0);
9504 }
9505 
9506 /* set MISC CSRs to chip reset defaults */
reset_misc_csrs(struct hfi1_devdata * dd)9507 static void reset_misc_csrs(struct hfi1_devdata *dd)
9508 {
9509 	int i;
9510 
9511 	for (i = 0; i < 32; i++) {
9512 		write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9513 		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9514 		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9515 	}
9516 	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9517 	   only be written 128-byte chunks */
9518 	/* init RSA engine to clear lingering errors */
9519 	write_csr(dd, MISC_CFG_RSA_CMD, 1);
9520 	write_csr(dd, MISC_CFG_RSA_MU, 0);
9521 	write_csr(dd, MISC_CFG_FW_CTRL, 0);
9522 	/* MISC_STS_8051_DIGEST read-only */
9523 	/* MISC_STS_SBM_DIGEST read-only */
9524 	/* MISC_STS_PCIE_DIGEST read-only */
9525 	/* MISC_STS_FAB_DIGEST read-only */
9526 	/* MISC_ERR_STATUS read-only */
9527 	write_csr(dd, MISC_ERR_MASK, 0);
9528 	write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9529 	/* MISC_ERR_FORCE leave alone */
9530 }
9531 
9532 /* set TXE CSRs to chip reset defaults */
reset_txe_csrs(struct hfi1_devdata * dd)9533 static void reset_txe_csrs(struct hfi1_devdata *dd)
9534 {
9535 	int i;
9536 
9537 	/*
9538 	 * TXE Kernel CSRs
9539 	 */
9540 	write_csr(dd, SEND_CTRL, 0);
9541 	__cm_reset(dd, 0);	/* reset CM internal state */
9542 	/* SEND_CONTEXTS read-only */
9543 	/* SEND_DMA_ENGINES read-only */
9544 	/* SEND_PIO_MEM_SIZE read-only */
9545 	/* SEND_DMA_MEM_SIZE read-only */
9546 	write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9547 	pio_reset_all(dd);	/* SEND_PIO_INIT_CTXT */
9548 	/* SEND_PIO_ERR_STATUS read-only */
9549 	write_csr(dd, SEND_PIO_ERR_MASK, 0);
9550 	write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9551 	/* SEND_PIO_ERR_FORCE leave alone */
9552 	/* SEND_DMA_ERR_STATUS read-only */
9553 	write_csr(dd, SEND_DMA_ERR_MASK, 0);
9554 	write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9555 	/* SEND_DMA_ERR_FORCE leave alone */
9556 	/* SEND_EGRESS_ERR_STATUS read-only */
9557 	write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9558 	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9559 	/* SEND_EGRESS_ERR_FORCE leave alone */
9560 	write_csr(dd, SEND_BTH_QP, 0);
9561 	write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9562 	write_csr(dd, SEND_SC2VLT0, 0);
9563 	write_csr(dd, SEND_SC2VLT1, 0);
9564 	write_csr(dd, SEND_SC2VLT2, 0);
9565 	write_csr(dd, SEND_SC2VLT3, 0);
9566 	write_csr(dd, SEND_LEN_CHECK0, 0);
9567 	write_csr(dd, SEND_LEN_CHECK1, 0);
9568 	/* SEND_ERR_STATUS read-only */
9569 	write_csr(dd, SEND_ERR_MASK, 0);
9570 	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9571 	/* SEND_ERR_FORCE read-only */
9572 	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9573 		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9574 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9575 		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9576 	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9577 		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9578 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9579 		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9580 	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9581 		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9582 	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9583 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9584 					SEND_CM_GLOBAL_CREDIT_RESETCSR);
9585 	/* SEND_CM_CREDIT_USED_STATUS read-only */
9586 	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9587 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9588 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9589 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9590 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9591 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
9592 		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9593 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9594 	/* SEND_CM_CREDIT_USED_VL read-only */
9595 	/* SEND_CM_CREDIT_USED_VL15 read-only */
9596 	/* SEND_EGRESS_CTXT_STATUS read-only */
9597 	/* SEND_EGRESS_SEND_DMA_STATUS read-only */
9598 	write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9599 	/* SEND_EGRESS_ERR_INFO read-only */
9600 	/* SEND_EGRESS_ERR_SOURCE read-only */
9601 
9602 	/*
9603 	 * TXE Per-Context CSRs
9604 	 */
9605 	for (i = 0; i < dd->chip_send_contexts; i++) {
9606 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9607 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9608 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9609 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9610 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9611 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9612 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9613 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9614 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9615 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9616 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9617 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9618 	}
9619 
9620 	/*
9621 	 * TXE Per-SDMA CSRs
9622 	 */
9623 	for (i = 0; i < dd->chip_sdma_engines; i++) {
9624 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9625 		/* SEND_DMA_STATUS read-only */
9626 		write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9627 		write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9628 		write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9629 		/* SEND_DMA_HEAD read-only */
9630 		write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9631 		write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9632 		/* SEND_DMA_IDLE_CNT read-only */
9633 		write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9634 		write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9635 		/* SEND_DMA_DESC_FETCHED_CNT read-only */
9636 		/* SEND_DMA_ENG_ERR_STATUS read-only */
9637 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9638 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9639 		/* SEND_DMA_ENG_ERR_FORCE leave alone */
9640 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9641 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9642 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9643 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9644 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9645 		write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9646 		write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9647 	}
9648 }
9649 
9650 /*
9651  * Expect on entry:
9652  * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9653  */
init_rbufs(struct hfi1_devdata * dd)9654 static void init_rbufs(struct hfi1_devdata *dd)
9655 {
9656 	u64 reg;
9657 	int count;
9658 
9659 	/*
9660 	 * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9661 	 * clear.
9662 	 */
9663 	count = 0;
9664 	while (1) {
9665 		reg = read_csr(dd, RCV_STATUS);
9666 		if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9667 			    | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9668 			break;
9669 		/*
9670 		 * Give up after 1ms - maximum wait time.
9671 		 *
9672 		 * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9673 		 * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9674 		 *	148 KB / (66% * 250MB/s) = 920us
9675 		 */
9676 		if (count++ > 500) {
9677 			dd_dev_err(dd,
9678 				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9679 				__func__, reg);
9680 			break;
9681 		}
9682 		udelay(2); /* do not busy-wait the CSR */
9683 	}
9684 
9685 	/* start the init - expect RcvCtrl to be 0 */
9686 	write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9687 
9688 	/*
9689 	 * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9690 	 * period after the write before RcvStatus.RxRbufInitDone is valid.
9691 	 * The delay in the first run through the loop below is sufficient and
9692 	 * required before the first read of RcvStatus.RxRbufInintDone.
9693 	 */
9694 	read_csr(dd, RCV_CTRL);
9695 
9696 	/* wait for the init to finish */
9697 	count = 0;
9698 	while (1) {
9699 		/* delay is required first time through - see above */
9700 		udelay(2); /* do not busy-wait the CSR */
9701 		reg = read_csr(dd, RCV_STATUS);
9702 		if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9703 			break;
9704 
9705 		/* give up after 100us - slowest possible at 33MHz is 73us */
9706 		if (count++ > 50) {
9707 			dd_dev_err(dd,
9708 				"%s: RcvStatus.RxRbufInit not set, continuing\n",
9709 				__func__);
9710 			break;
9711 		}
9712 	}
9713 }
9714 
9715 /* set RXE CSRs to chip reset defaults */
reset_rxe_csrs(struct hfi1_devdata * dd)9716 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9717 {
9718 	int i, j;
9719 
9720 	/*
9721 	 * RXE Kernel CSRs
9722 	 */
9723 	write_csr(dd, RCV_CTRL, 0);
9724 	init_rbufs(dd);
9725 	/* RCV_STATUS read-only */
9726 	/* RCV_CONTEXTS read-only */
9727 	/* RCV_ARRAY_CNT read-only */
9728 	/* RCV_BUF_SIZE read-only */
9729 	write_csr(dd, RCV_BTH_QP, 0);
9730 	write_csr(dd, RCV_MULTICAST, 0);
9731 	write_csr(dd, RCV_BYPASS, 0);
9732 	write_csr(dd, RCV_VL15, 0);
9733 	/* this is a clear-down */
9734 	write_csr(dd, RCV_ERR_INFO,
9735 			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9736 	/* RCV_ERR_STATUS read-only */
9737 	write_csr(dd, RCV_ERR_MASK, 0);
9738 	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9739 	/* RCV_ERR_FORCE leave alone */
9740 	for (i = 0; i < 32; i++)
9741 		write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9742 	for (i = 0; i < 4; i++)
9743 		write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9744 	for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9745 		write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9746 	for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9747 		write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9748 	for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9749 		write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9750 		write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9751 		write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9752 	}
9753 	for (i = 0; i < 32; i++)
9754 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9755 
9756 	/*
9757 	 * RXE Kernel and User Per-Context CSRs
9758 	 */
9759 	for (i = 0; i < dd->chip_rcv_contexts; i++) {
9760 		/* kernel */
9761 		write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9762 		/* RCV_CTXT_STATUS read-only */
9763 		write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9764 		write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9765 		write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9766 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9767 		write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9768 		write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9769 		write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9770 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9771 		write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9772 		write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9773 
9774 		/* user */
9775 		/* RCV_HDR_TAIL read-only */
9776 		write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9777 		/* RCV_EGR_INDEX_TAIL read-only */
9778 		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9779 		/* RCV_EGR_OFFSET_TAIL read-only */
9780 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9781 			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9782 				0);
9783 		}
9784 	}
9785 }
9786 
9787 /*
9788  * Set sc2vl tables.
9789  *
9790  * They power on to zeros, so to avoid send context errors
9791  * they need to be set:
9792  *
9793  * SC 0-7 -> VL 0-7 (respectively)
9794  * SC 15  -> VL 15
9795  * otherwise
9796  *        -> VL 0
9797  */
init_sc2vl_tables(struct hfi1_devdata * dd)9798 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9799 {
9800 	int i;
9801 	/* init per architecture spec, constrained by hardware capability */
9802 
9803 	/* HFI maps sent packets */
9804 	write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9805 		0,
9806 		0, 0, 1, 1,
9807 		2, 2, 3, 3,
9808 		4, 4, 5, 5,
9809 		6, 6, 7, 7));
9810 	write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9811 		1,
9812 		8, 0, 9, 0,
9813 		10, 0, 11, 0,
9814 		12, 0, 13, 0,
9815 		14, 0, 15, 15));
9816 	write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9817 		2,
9818 		16, 0, 17, 0,
9819 		18, 0, 19, 0,
9820 		20, 0, 21, 0,
9821 		22, 0, 23, 0));
9822 	write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9823 		3,
9824 		24, 0, 25, 0,
9825 		26, 0, 27, 0,
9826 		28, 0, 29, 0,
9827 		30, 0, 31, 0));
9828 
9829 	/* DC maps received packets */
9830 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9831 		15_0,
9832 		0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9833 		8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9834 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9835 		31_16,
9836 		16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9837 		24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9838 
9839 	/* initialize the cached sc2vl values consistently with h/w */
9840 	for (i = 0; i < 32; i++) {
9841 		if (i < 8 || i == 15)
9842 			*((u8 *)(dd->sc2vl) + i) = (u8)i;
9843 		else
9844 			*((u8 *)(dd->sc2vl) + i) = 0;
9845 	}
9846 }
9847 
9848 /*
9849  * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9850  * depend on the chip going through a power-on reset - a driver may be loaded
9851  * and unloaded many times.
9852  *
9853  * Do not write any CSR values to the chip in this routine - there may be
9854  * a reset following the (possible) FLR in this routine.
9855  *
9856  */
init_chip(struct hfi1_devdata * dd)9857 static void init_chip(struct hfi1_devdata *dd)
9858 {
9859 	int i;
9860 
9861 	/*
9862 	 * Put the HFI CSRs in a known state.
9863 	 * Combine this with a DC reset.
9864 	 *
9865 	 * Stop the device from doing anything while we do a
9866 	 * reset.  We know there are no other active users of
9867 	 * the device since we are now in charge.  Turn off
9868 	 * off all outbound and inbound traffic and make sure
9869 	 * the device does not generate any interrupts.
9870 	 */
9871 
9872 	/* disable send contexts and SDMA engines */
9873 	write_csr(dd, SEND_CTRL, 0);
9874 	for (i = 0; i < dd->chip_send_contexts; i++)
9875 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9876 	for (i = 0; i < dd->chip_sdma_engines; i++)
9877 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9878 	/* disable port (turn off RXE inbound traffic) and contexts */
9879 	write_csr(dd, RCV_CTRL, 0);
9880 	for (i = 0; i < dd->chip_rcv_contexts; i++)
9881 		write_csr(dd, RCV_CTXT_CTRL, 0);
9882 	/* mask all interrupt sources */
9883 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9884 		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9885 
9886 	/*
9887 	 * DC Reset: do a full DC reset before the register clear.
9888 	 * A recommended length of time to hold is one CSR read,
9889 	 * so reread the CceDcCtrl.  Then, hold the DC in reset
9890 	 * across the clear.
9891 	 */
9892 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9893 	(void) read_csr(dd, CCE_DC_CTRL);
9894 
9895 	if (use_flr) {
9896 		/*
9897 		 * A FLR will reset the SPC core and part of the PCIe.
9898 		 * The parts that need to be restored have already been
9899 		 * saved.
9900 		 */
9901 		dd_dev_info(dd, "Resetting CSRs with FLR\n");
9902 
9903 		/* do the FLR, the DC reset will remain */
9904 		hfi1_pcie_flr(dd);
9905 
9906 		/* restore command and BARs */
9907 		restore_pci_variables(dd);
9908 
9909 		if (is_a0(dd)) {
9910 			dd_dev_info(dd, "Resetting CSRs with FLR\n");
9911 			hfi1_pcie_flr(dd);
9912 			restore_pci_variables(dd);
9913 		}
9914 
9915 		reset_asic_csrs(dd);
9916 	} else {
9917 		dd_dev_info(dd, "Resetting CSRs with writes\n");
9918 		reset_cce_csrs(dd);
9919 		reset_txe_csrs(dd);
9920 		reset_rxe_csrs(dd);
9921 		reset_asic_csrs(dd);
9922 		reset_misc_csrs(dd);
9923 	}
9924 	/* clear the DC reset */
9925 	write_csr(dd, CCE_DC_CTRL, 0);
9926 
9927 	/* Set the LED off */
9928 	if (is_a0(dd))
9929 		setextled(dd, 0);
9930 	/*
9931 	 * Clear the QSFP reset.
9932 	 * A0 leaves the out lines floating on power on, then on an FLR
9933 	 * enforces a 0 on all out pins.  The driver does not touch
9934 	 * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9935 	 * anything  plugged constantly in reset, if it pays attention
9936 	 * to RESET_N.
9937 	 * A prime example of this is SiPh. For now, set all pins high.
9938 	 * I2CCLK and I2CDAT will change per direction, and INT_N and
9939 	 * MODPRS_N are input only and their value is ignored.
9940 	 */
9941 	if (is_a0(dd)) {
9942 		write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9943 		write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9944 	}
9945 }
9946 
init_early_variables(struct hfi1_devdata * dd)9947 static void init_early_variables(struct hfi1_devdata *dd)
9948 {
9949 	int i;
9950 
9951 	/* assign link credit variables */
9952 	dd->vau = CM_VAU;
9953 	dd->link_credits = CM_GLOBAL_CREDITS;
9954 	if (is_a0(dd))
9955 		dd->link_credits--;
9956 	dd->vcu = cu_to_vcu(hfi1_cu);
9957 	/* enough room for 8 MAD packets plus header - 17K */
9958 	dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9959 	if (dd->vl15_init > dd->link_credits)
9960 		dd->vl15_init = dd->link_credits;
9961 
9962 	write_uninitialized_csrs_and_memories(dd);
9963 
9964 	if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9965 		for (i = 0; i < dd->num_pports; i++) {
9966 			struct hfi1_pportdata *ppd = &dd->pport[i];
9967 
9968 			set_partition_keys(ppd);
9969 		}
9970 	init_sc2vl_tables(dd);
9971 }
9972 
init_kdeth_qp(struct hfi1_devdata * dd)9973 static void init_kdeth_qp(struct hfi1_devdata *dd)
9974 {
9975 	/* user changed the KDETH_QP */
9976 	if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9977 		/* out of range or illegal value */
9978 		dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9979 		kdeth_qp = 0;
9980 	}
9981 	if (kdeth_qp == 0)	/* not set, or failed range check */
9982 		kdeth_qp = DEFAULT_KDETH_QP;
9983 
9984 	write_csr(dd, SEND_BTH_QP,
9985 			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9986 				<< SEND_BTH_QP_KDETH_QP_SHIFT);
9987 
9988 	write_csr(dd, RCV_BTH_QP,
9989 			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9990 				<< RCV_BTH_QP_KDETH_QP_SHIFT);
9991 }
9992 
9993 /**
9994  * init_qpmap_table
9995  * @dd - device data
9996  * @first_ctxt - first context
9997  * @last_ctxt - first context
9998  *
9999  * This return sets the qpn mapping table that
10000  * is indexed by qpn[8:1].
10001  *
10002  * The routine will round robin the 256 settings
10003  * from first_ctxt to last_ctxt.
10004  *
10005  * The first/last looks ahead to having specialized
10006  * receive contexts for mgmt and bypass.  Normal
10007  * verbs traffic will assumed to be on a range
10008  * of receive contexts.
10009  */
init_qpmap_table(struct hfi1_devdata * dd,u32 first_ctxt,u32 last_ctxt)10010 static void init_qpmap_table(struct hfi1_devdata *dd,
10011 			     u32 first_ctxt,
10012 			     u32 last_ctxt)
10013 {
10014 	u64 reg = 0;
10015 	u64 regno = RCV_QP_MAP_TABLE;
10016 	int i;
10017 	u64 ctxt = first_ctxt;
10018 
10019 	for (i = 0; i < 256;) {
10020 		if (ctxt == VL15CTXT) {
10021 			ctxt++;
10022 			if (ctxt > last_ctxt)
10023 				ctxt = first_ctxt;
10024 			continue;
10025 		}
10026 		reg |= ctxt << (8 * (i % 8));
10027 		i++;
10028 		ctxt++;
10029 		if (ctxt > last_ctxt)
10030 			ctxt = first_ctxt;
10031 		if (i % 8 == 0) {
10032 			write_csr(dd, regno, reg);
10033 			reg = 0;
10034 			regno += 8;
10035 		}
10036 	}
10037 	if (i % 8)
10038 		write_csr(dd, regno, reg);
10039 
10040 	add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10041 			| RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10042 }
10043 
10044 /**
10045  * init_qos - init RX qos
10046  * @dd - device data
10047  * @first_context
10048  *
10049  * This routine initializes Rule 0 and the
10050  * RSM map table to implement qos.
10051  *
10052  * If all of the limit tests succeed,
10053  * qos is applied based on the array
10054  * interpretation of krcvqs where
10055  * entry 0 is VL0.
10056  *
10057  * The number of vl bits (n) and the number of qpn
10058  * bits (m) are computed to feed both the RSM map table
10059  * and the single rule.
10060  *
10061  */
init_qos(struct hfi1_devdata * dd,u32 first_ctxt)10062 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10063 {
10064 	u8 max_by_vl = 0;
10065 	unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10066 	u64 *rsmmap;
10067 	u64 reg;
10068 	u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10069 
10070 	/* validate */
10071 	if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10072 	    num_vls == 1 ||
10073 	    krcvqsset <= 1)
10074 		goto bail;
10075 	for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10076 		if (krcvqs[i] > max_by_vl)
10077 			max_by_vl = krcvqs[i];
10078 	if (max_by_vl > 32)
10079 		goto bail;
10080 	qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10081 	/* determine bits vl */
10082 	n = ilog2(num_vls);
10083 	/* determine bits for qpn */
10084 	m = ilog2(qpns_per_vl);
10085 	if ((m + n) > 7)
10086 		goto bail;
10087 	if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10088 		goto bail;
10089 	rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10090 	memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10091 	/* init the local copy of the table */
10092 	for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10093 		unsigned tctxt;
10094 
10095 		for (qpn = 0, tctxt = ctxt;
10096 		     krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10097 			unsigned idx, regoff, regidx;
10098 
10099 			/* generate index <= 128 */
10100 			idx = (qpn << n) ^ i;
10101 			regoff = (idx % 8) * 8;
10102 			regidx = idx / 8;
10103 			reg = rsmmap[regidx];
10104 			/* replace 0xff with context number */
10105 			reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10106 				<< regoff);
10107 			reg |= (u64)(tctxt++) << regoff;
10108 			rsmmap[regidx] = reg;
10109 			if (tctxt == ctxt + krcvqs[i])
10110 				tctxt = ctxt;
10111 		}
10112 		ctxt += krcvqs[i];
10113 	}
10114 	/* flush cached copies to chip */
10115 	for (i = 0; i < NUM_MAP_REGS; i++)
10116 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10117 	/* add rule0 */
10118 	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10119 		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10120 			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10121 		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10122 	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10123 		LRH_BTH_MATCH_OFFSET
10124 			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10125 		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10126 		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10127 		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10128 		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10129 		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10130 	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10131 		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10132 		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10133 		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10134 		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10135 	/* Enable RSM */
10136 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10137 	kfree(rsmmap);
10138 	/* map everything else (non-VL15) to context 0 */
10139 	init_qpmap_table(
10140 		dd,
10141 		0,
10142 		0);
10143 	dd->qos_shift = n + 1;
10144 	return;
10145 bail:
10146 	dd->qos_shift = 1;
10147 	init_qpmap_table(
10148 		dd,
10149 		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10150 		dd->n_krcv_queues - 1);
10151 }
10152 
init_rxe(struct hfi1_devdata * dd)10153 static void init_rxe(struct hfi1_devdata *dd)
10154 {
10155 	/* enable all receive errors */
10156 	write_csr(dd, RCV_ERR_MASK, ~0ull);
10157 	/* setup QPN map table - start where VL15 context leaves off */
10158 	init_qos(
10159 		dd,
10160 		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10161 	/*
10162 	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10163 	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10164 	 * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10165 	 * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10166 	 * Max_PayLoad_Size set to its minimum of 128.
10167 	 *
10168 	 * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10169 	 * (64 bytes).  Max_Payload_Size is possibly modified upward in
10170 	 * tune_pcie_caps() which is called after this routine.
10171 	 */
10172 }
10173 
init_other(struct hfi1_devdata * dd)10174 static void init_other(struct hfi1_devdata *dd)
10175 {
10176 	/* enable all CCE errors */
10177 	write_csr(dd, CCE_ERR_MASK, ~0ull);
10178 	/* enable *some* Misc errors */
10179 	write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10180 	/* enable all DC errors, except LCB */
10181 	write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10182 	write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10183 }
10184 
10185 /*
10186  * Fill out the given AU table using the given CU.  A CU is defined in terms
10187  * AUs.  The table is a an encoding: given the index, how many AUs does that
10188  * represent?
10189  *
10190  * NOTE: Assumes that the register layout is the same for the
10191  * local and remote tables.
10192  */
assign_cm_au_table(struct hfi1_devdata * dd,u32 cu,u32 csr0to3,u32 csr4to7)10193 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10194 			       u32 csr0to3, u32 csr4to7)
10195 {
10196 	write_csr(dd, csr0to3,
10197 		   0ull <<
10198 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10199 		|  1ull <<
10200 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10201 		|  2ull * cu <<
10202 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10203 		|  4ull * cu <<
10204 			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10205 	write_csr(dd, csr4to7,
10206 		   8ull * cu <<
10207 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10208 		| 16ull * cu <<
10209 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10210 		| 32ull * cu <<
10211 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10212 		| 64ull * cu <<
10213 			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10214 
10215 }
10216 
assign_local_cm_au_table(struct hfi1_devdata * dd,u8 vcu)10217 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10218 {
10219 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10220 					SEND_CM_LOCAL_AU_TABLE4_TO7);
10221 }
10222 
assign_remote_cm_au_table(struct hfi1_devdata * dd,u8 vcu)10223 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10224 {
10225 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10226 					SEND_CM_REMOTE_AU_TABLE4_TO7);
10227 }
10228 
init_txe(struct hfi1_devdata * dd)10229 static void init_txe(struct hfi1_devdata *dd)
10230 {
10231 	int i;
10232 
10233 	/* enable all PIO, SDMA, general, and Egress errors */
10234 	write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10235 	write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10236 	write_csr(dd, SEND_ERR_MASK, ~0ull);
10237 	write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10238 
10239 	/* enable all per-context and per-SDMA engine errors */
10240 	for (i = 0; i < dd->chip_send_contexts; i++)
10241 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10242 	for (i = 0; i < dd->chip_sdma_engines; i++)
10243 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10244 
10245 	/* set the local CU to AU mapping */
10246 	assign_local_cm_au_table(dd, dd->vcu);
10247 
10248 	/*
10249 	 * Set reasonable default for Credit Return Timer
10250 	 * Don't set on Simulator - causes it to choke.
10251 	 */
10252 	if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10253 		write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10254 }
10255 
hfi1_set_ctxt_jkey(struct hfi1_devdata * dd,unsigned ctxt,u16 jkey)10256 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10257 {
10258 	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10259 	unsigned sctxt;
10260 	int ret = 0;
10261 	u64 reg;
10262 
10263 	if (!rcd || !rcd->sc) {
10264 		ret = -EINVAL;
10265 		goto done;
10266 	}
10267 	sctxt = rcd->sc->hw_context;
10268 	reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10269 		((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10270 		 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10271 	/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10272 	if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10273 		reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10274 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10275 	/*
10276 	 * Enable send-side J_KEY integrity check, unless this is A0 h/w
10277 	 * (due to A0 erratum).
10278 	 */
10279 	if (!is_a0(dd)) {
10280 		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10281 		reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10282 		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10283 	}
10284 
10285 	/* Enable J_KEY check on receive context. */
10286 	reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10287 		((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10288 		 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10289 	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10290 done:
10291 	return ret;
10292 }
10293 
hfi1_clear_ctxt_jkey(struct hfi1_devdata * dd,unsigned ctxt)10294 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10295 {
10296 	struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10297 	unsigned sctxt;
10298 	int ret = 0;
10299 	u64 reg;
10300 
10301 	if (!rcd || !rcd->sc) {
10302 		ret = -EINVAL;
10303 		goto done;
10304 	}
10305 	sctxt = rcd->sc->hw_context;
10306 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10307 	/*
10308 	 * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10309 	 * This check would not have been enabled for A0 h/w, see
10310 	 * set_ctxt_jkey().
10311 	 */
10312 	if (!is_a0(dd)) {
10313 		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10314 		reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10315 		write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10316 	}
10317 	/* Turn off the J_KEY on the receive side */
10318 	write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10319 done:
10320 	return ret;
10321 }
10322 
hfi1_set_ctxt_pkey(struct hfi1_devdata * dd,unsigned ctxt,u16 pkey)10323 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10324 {
10325 	struct hfi1_ctxtdata *rcd;
10326 	unsigned sctxt;
10327 	int ret = 0;
10328 	u64 reg;
10329 
10330 	if (ctxt < dd->num_rcv_contexts)
10331 		rcd = dd->rcd[ctxt];
10332 	else {
10333 		ret = -EINVAL;
10334 		goto done;
10335 	}
10336 	if (!rcd || !rcd->sc) {
10337 		ret = -EINVAL;
10338 		goto done;
10339 	}
10340 	sctxt = rcd->sc->hw_context;
10341 	reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10342 		SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10343 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10344 	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10345 	reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10346 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10347 done:
10348 	return ret;
10349 }
10350 
hfi1_clear_ctxt_pkey(struct hfi1_devdata * dd,unsigned ctxt)10351 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10352 {
10353 	struct hfi1_ctxtdata *rcd;
10354 	unsigned sctxt;
10355 	int ret = 0;
10356 	u64 reg;
10357 
10358 	if (ctxt < dd->num_rcv_contexts)
10359 		rcd = dd->rcd[ctxt];
10360 	else {
10361 		ret = -EINVAL;
10362 		goto done;
10363 	}
10364 	if (!rcd || !rcd->sc) {
10365 		ret = -EINVAL;
10366 		goto done;
10367 	}
10368 	sctxt = rcd->sc->hw_context;
10369 	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10370 	reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10371 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10372 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10373 done:
10374 	return ret;
10375 }
10376 
10377 /*
10378  * Start doing the clean up the the chip. Our clean up happens in multiple
10379  * stages and this is just the first.
10380  */
hfi1_start_cleanup(struct hfi1_devdata * dd)10381 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10382 {
10383 	free_cntrs(dd);
10384 	free_rcverr(dd);
10385 	clean_up_interrupts(dd);
10386 }
10387 
10388 #define HFI_BASE_GUID(dev) \
10389 	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10390 
10391 /*
10392  * Certain chip functions need to be initialized only once per asic
10393  * instead of per-device. This function finds the peer device and
10394  * checks whether that chip initialization needs to be done by this
10395  * device.
10396  */
asic_should_init(struct hfi1_devdata * dd)10397 static void asic_should_init(struct hfi1_devdata *dd)
10398 {
10399 	unsigned long flags;
10400 	struct hfi1_devdata *tmp, *peer = NULL;
10401 
10402 	spin_lock_irqsave(&hfi1_devs_lock, flags);
10403 	/* Find our peer device */
10404 	list_for_each_entry(tmp, &hfi1_dev_list, list) {
10405 		if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10406 		    dd->unit != tmp->unit) {
10407 			peer = tmp;
10408 			break;
10409 		}
10410 	}
10411 
10412 	/*
10413 	 * "Claim" the ASIC for initialization if it hasn't been
10414 	 " "claimed" yet.
10415 	 */
10416 	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10417 		dd->flags |= HFI1_DO_INIT_ASIC;
10418 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10419 }
10420 
10421 /**
10422  * Allocate and initialize the device structure for the hfi.
10423  * @dev: the pci_dev for hfi1_ib device
10424  * @ent: pci_device_id struct for this dev
10425  *
10426  * Also allocates, initializes, and returns the devdata struct for this
10427  * device instance
10428  *
10429  * This is global, and is called directly at init to set up the
10430  * chip-specific function pointers for later use.
10431  */
hfi1_init_dd(struct pci_dev * pdev,const struct pci_device_id * ent)10432 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10433 				  const struct pci_device_id *ent)
10434 {
10435 	struct hfi1_devdata *dd;
10436 	struct hfi1_pportdata *ppd;
10437 	u64 reg;
10438 	int i, ret;
10439 	static const char * const inames[] = { /* implementation names */
10440 		"RTL silicon",
10441 		"RTL VCS simulation",
10442 		"RTL FPGA emulation",
10443 		"Functional simulator"
10444 	};
10445 
10446 	dd = hfi1_alloc_devdata(pdev,
10447 		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10448 	if (IS_ERR(dd))
10449 		goto bail;
10450 	ppd = dd->pport;
10451 	for (i = 0; i < dd->num_pports; i++, ppd++) {
10452 		int vl;
10453 		/* init common fields */
10454 		hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10455 		/* DC supports 4 link widths */
10456 		ppd->link_width_supported =
10457 			OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10458 			OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10459 		ppd->link_width_downgrade_supported =
10460 			ppd->link_width_supported;
10461 		/* start out enabling only 4X */
10462 		ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10463 		ppd->link_width_downgrade_enabled =
10464 					ppd->link_width_downgrade_supported;
10465 		/* link width active is 0 when link is down */
10466 		/* link width downgrade active is 0 when link is down */
10467 
10468 		if (num_vls < HFI1_MIN_VLS_SUPPORTED
10469 			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
10470 			hfi1_early_err(&pdev->dev,
10471 				       "Invalid num_vls %u, using %u VLs\n",
10472 				    num_vls, HFI1_MAX_VLS_SUPPORTED);
10473 			num_vls = HFI1_MAX_VLS_SUPPORTED;
10474 		}
10475 		ppd->vls_supported = num_vls;
10476 		ppd->vls_operational = ppd->vls_supported;
10477 		/* Set the default MTU. */
10478 		for (vl = 0; vl < num_vls; vl++)
10479 			dd->vld[vl].mtu = hfi1_max_mtu;
10480 		dd->vld[15].mtu = MAX_MAD_PACKET;
10481 		/*
10482 		 * Set the initial values to reasonable default, will be set
10483 		 * for real when link is up.
10484 		 */
10485 		ppd->lstate = IB_PORT_DOWN;
10486 		ppd->overrun_threshold = 0x4;
10487 		ppd->phy_error_threshold = 0xf;
10488 		ppd->port_crc_mode_enabled = link_crc_mask;
10489 		/* initialize supported LTP CRC mode */
10490 		ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10491 		/* initialize enabled LTP CRC mode */
10492 		ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10493 		/* start in offline */
10494 		ppd->host_link_state = HLS_DN_OFFLINE;
10495 		init_vl_arb_caches(ppd);
10496 	}
10497 
10498 	dd->link_default = HLS_DN_POLL;
10499 
10500 	/*
10501 	 * Do remaining PCIe setup and save PCIe values in dd.
10502 	 * Any error printing is already done by the init code.
10503 	 * On return, we have the chip mapped.
10504 	 */
10505 	ret = hfi1_pcie_ddinit(dd, pdev, ent);
10506 	if (ret < 0)
10507 		goto bail_free;
10508 
10509 	/* verify that reads actually work, save revision for reset check */
10510 	dd->revision = read_csr(dd, CCE_REVISION);
10511 	if (dd->revision == ~(u64)0) {
10512 		dd_dev_err(dd, "cannot read chip CSRs\n");
10513 		ret = -EINVAL;
10514 		goto bail_cleanup;
10515 	}
10516 	dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10517 			& CCE_REVISION_CHIP_REV_MAJOR_MASK;
10518 	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10519 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
10520 
10521 	/* obtain the hardware ID - NOT related to unit, which is a
10522 	   software enumeration */
10523 	reg = read_csr(dd, CCE_REVISION2);
10524 	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10525 					& CCE_REVISION2_HFI_ID_MASK;
10526 	/* the variable size will remove unwanted bits */
10527 	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10528 	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10529 	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10530 		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10531 		(int)dd->irev);
10532 
10533 	/* speeds the hardware can support */
10534 	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10535 	/* speeds allowed to run at */
10536 	dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10537 	/* give a reasonable active value, will be set on link up */
10538 	dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10539 
10540 	dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10541 	dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10542 	dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10543 	dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10544 	dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10545 	/* fix up link widths for emulation _p */
10546 	ppd = dd->pport;
10547 	if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10548 		ppd->link_width_supported =
10549 			ppd->link_width_enabled =
10550 			ppd->link_width_downgrade_supported =
10551 			ppd->link_width_downgrade_enabled =
10552 				OPA_LINK_WIDTH_1X;
10553 	}
10554 	/* insure num_vls isn't larger than number of sdma engines */
10555 	if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10556 		dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10557 				num_vls, HFI1_MAX_VLS_SUPPORTED);
10558 		ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10559 		ppd->vls_operational = ppd->vls_supported;
10560 	}
10561 
10562 	/*
10563 	 * Convert the ns parameter to the 64 * cclocks used in the CSR.
10564 	 * Limit the max if larger than the field holds.  If timeout is
10565 	 * non-zero, then the calculated field will be at least 1.
10566 	 *
10567 	 * Must be after icode is set up - the cclock rate depends
10568 	 * on knowing the hardware being used.
10569 	 */
10570 	dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10571 	if (dd->rcv_intr_timeout_csr >
10572 			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10573 		dd->rcv_intr_timeout_csr =
10574 			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10575 	else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10576 		dd->rcv_intr_timeout_csr = 1;
10577 
10578 	/* needs to be done before we look for the peer device */
10579 	read_guid(dd);
10580 
10581 	/* should this device init the ASIC block? */
10582 	asic_should_init(dd);
10583 
10584 	/* obtain chip sizes, reset chip CSRs */
10585 	init_chip(dd);
10586 
10587 	/* read in the PCIe link speed information */
10588 	ret = pcie_speeds(dd);
10589 	if (ret)
10590 		goto bail_cleanup;
10591 
10592 	/* read in firmware */
10593 	ret = hfi1_firmware_init(dd);
10594 	if (ret)
10595 		goto bail_cleanup;
10596 
10597 	/*
10598 	 * In general, the PCIe Gen3 transition must occur after the
10599 	 * chip has been idled (so it won't initiate any PCIe transactions
10600 	 * e.g. an interrupt) and before the driver changes any registers
10601 	 * (the transition will reset the registers).
10602 	 *
10603 	 * In particular, place this call after:
10604 	 * - init_chip()     - the chip will not initiate any PCIe transactions
10605 	 * - pcie_speeds()   - reads the current link speed
10606 	 * - hfi1_firmware_init() - the needed firmware is ready to be
10607 	 *			    downloaded
10608 	 */
10609 	ret = do_pcie_gen3_transition(dd);
10610 	if (ret)
10611 		goto bail_cleanup;
10612 
10613 	/* start setting dd values and adjusting CSRs */
10614 	init_early_variables(dd);
10615 
10616 	parse_platform_config(dd);
10617 
10618 	/* add board names as they are defined */
10619 	dd->boardname = kmalloc(64, GFP_KERNEL);
10620 	if (!dd->boardname)
10621 		goto bail_cleanup;
10622 	snprintf(dd->boardname, 64, "Board ID 0x%llx",
10623 		 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10624 		    & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10625 
10626 	snprintf(dd->boardversion, BOARD_VERS_MAX,
10627 		 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10628 		 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10629 		 dd->boardname,
10630 		 (u32)dd->majrev,
10631 		 (u32)dd->minrev,
10632 		 (dd->revision >> CCE_REVISION_SW_SHIFT)
10633 		    & CCE_REVISION_SW_MASK);
10634 
10635 	ret = set_up_context_variables(dd);
10636 	if (ret)
10637 		goto bail_cleanup;
10638 
10639 	/* set initial RXE CSRs */
10640 	init_rxe(dd);
10641 	/* set initial TXE CSRs */
10642 	init_txe(dd);
10643 	/* set initial non-RXE, non-TXE CSRs */
10644 	init_other(dd);
10645 	/* set up KDETH QP prefix in both RX and TX CSRs */
10646 	init_kdeth_qp(dd);
10647 
10648 	/* send contexts must be set up before receive contexts */
10649 	ret = init_send_contexts(dd);
10650 	if (ret)
10651 		goto bail_cleanup;
10652 
10653 	ret = hfi1_create_ctxts(dd);
10654 	if (ret)
10655 		goto bail_cleanup;
10656 
10657 	dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10658 	/*
10659 	 * rcd[0] is guaranteed to be valid by this point. Also, all
10660 	 * context are using the same value, as per the module parameter.
10661 	 */
10662 	dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10663 
10664 	ret = init_pervl_scs(dd);
10665 	if (ret)
10666 		goto bail_cleanup;
10667 
10668 	/* sdma init */
10669 	for (i = 0; i < dd->num_pports; ++i) {
10670 		ret = sdma_init(dd, i);
10671 		if (ret)
10672 			goto bail_cleanup;
10673 	}
10674 
10675 	/* use contexts created by hfi1_create_ctxts */
10676 	ret = set_up_interrupts(dd);
10677 	if (ret)
10678 		goto bail_cleanup;
10679 
10680 	/* set up LCB access - must be after set_up_interrupts() */
10681 	init_lcb_access(dd);
10682 
10683 	snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10684 		 dd->base_guid & 0xFFFFFF);
10685 
10686 	dd->oui1 = dd->base_guid >> 56 & 0xFF;
10687 	dd->oui2 = dd->base_guid >> 48 & 0xFF;
10688 	dd->oui3 = dd->base_guid >> 40 & 0xFF;
10689 
10690 	ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10691 	if (ret)
10692 		goto bail_clear_intr;
10693 	check_fabric_firmware_versions(dd);
10694 
10695 	thermal_init(dd);
10696 
10697 	ret = init_cntrs(dd);
10698 	if (ret)
10699 		goto bail_clear_intr;
10700 
10701 	ret = init_rcverr(dd);
10702 	if (ret)
10703 		goto bail_free_cntrs;
10704 
10705 	ret = eprom_init(dd);
10706 	if (ret)
10707 		goto bail_free_rcverr;
10708 
10709 	goto bail;
10710 
10711 bail_free_rcverr:
10712 	free_rcverr(dd);
10713 bail_free_cntrs:
10714 	free_cntrs(dd);
10715 bail_clear_intr:
10716 	clean_up_interrupts(dd);
10717 bail_cleanup:
10718 	hfi1_pcie_ddcleanup(dd);
10719 bail_free:
10720 	hfi1_free_devdata(dd);
10721 	dd = ERR_PTR(ret);
10722 bail:
10723 	return dd;
10724 }
10725 
delay_cycles(struct hfi1_pportdata * ppd,u32 desired_egress_rate,u32 dw_len)10726 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10727 			u32 dw_len)
10728 {
10729 	u32 delta_cycles;
10730 	u32 current_egress_rate = ppd->current_egress_rate;
10731 	/* rates here are in units of 10^6 bits/sec */
10732 
10733 	if (desired_egress_rate == -1)
10734 		return 0; /* shouldn't happen */
10735 
10736 	if (desired_egress_rate >= current_egress_rate)
10737 		return 0; /* we can't help go faster, only slower */
10738 
10739 	delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10740 			egress_cycles(dw_len * 4, current_egress_rate);
10741 
10742 	return (u16)delta_cycles;
10743 }
10744 
10745 
10746 /**
10747  * create_pbc - build a pbc for transmission
10748  * @flags: special case flags or-ed in built pbc
10749  * @srate: static rate
10750  * @vl: vl
10751  * @dwlen: dword length (header words + data words + pbc words)
10752  *
10753  * Create a PBC with the given flags, rate, VL, and length.
10754  *
10755  * NOTE: The PBC created will not insert any HCRC - all callers but one are
10756  * for verbs, which does not use this PSM feature.  The lone other caller
10757  * is for the diagnostic interface which calls this if the user does not
10758  * supply their own PBC.
10759  */
create_pbc(struct hfi1_pportdata * ppd,u64 flags,int srate_mbs,u32 vl,u32 dw_len)10760 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10761 	       u32 dw_len)
10762 {
10763 	u64 pbc, delay = 0;
10764 
10765 	if (unlikely(srate_mbs))
10766 		delay = delay_cycles(ppd, srate_mbs, dw_len);
10767 
10768 	pbc = flags
10769 		| (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10770 		| ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10771 		| (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10772 		| (dw_len & PBC_LENGTH_DWS_MASK)
10773 			<< PBC_LENGTH_DWS_SHIFT;
10774 
10775 	return pbc;
10776 }
10777 
10778 #define SBUS_THERMAL    0x4f
10779 #define SBUS_THERM_MONITOR_MODE 0x1
10780 
10781 #define THERM_FAILURE(dev, ret, reason) \
10782 	dd_dev_err((dd),						\
10783 		   "Thermal sensor initialization failed: %s (%d)\n",	\
10784 		   (reason), (ret))
10785 
10786 /*
10787  * Initialize the Avago Thermal sensor.
10788  *
10789  * After initialization, enable polling of thermal sensor through
10790  * SBus interface. In order for this to work, the SBus Master
10791  * firmware has to be loaded due to the fact that the HW polling
10792  * logic uses SBus interrupts, which are not supported with
10793  * default firmware. Otherwise, no data will be returned through
10794  * the ASIC_STS_THERM CSR.
10795  */
thermal_init(struct hfi1_devdata * dd)10796 static int thermal_init(struct hfi1_devdata *dd)
10797 {
10798 	int ret = 0;
10799 
10800 	if (dd->icode != ICODE_RTL_SILICON ||
10801 	    !(dd->flags & HFI1_DO_INIT_ASIC))
10802 		return ret;
10803 
10804 	acquire_hw_mutex(dd);
10805 	dd_dev_info(dd, "Initializing thermal sensor\n");
10806 
10807 	/* Thermal Sensor Initialization */
10808 	/*    Step 1: Reset the Thermal SBus Receiver */
10809 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10810 				RESET_SBUS_RECEIVER, 0);
10811 	if (ret) {
10812 		THERM_FAILURE(dd, ret, "Bus Reset");
10813 		goto done;
10814 	}
10815 	/*    Step 2: Set Reset bit in Thermal block */
10816 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10817 				WRITE_SBUS_RECEIVER, 0x1);
10818 	if (ret) {
10819 		THERM_FAILURE(dd, ret, "Therm Block Reset");
10820 		goto done;
10821 	}
10822 	/*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10823 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10824 				WRITE_SBUS_RECEIVER, 0x32);
10825 	if (ret) {
10826 		THERM_FAILURE(dd, ret, "Write Clock Div");
10827 		goto done;
10828 	}
10829 	/*    Step 4: Select temperature mode */
10830 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10831 				WRITE_SBUS_RECEIVER,
10832 				SBUS_THERM_MONITOR_MODE);
10833 	if (ret) {
10834 		THERM_FAILURE(dd, ret, "Write Mode Sel");
10835 		goto done;
10836 	}
10837 	/*    Step 5: De-assert block reset and start conversion */
10838 	ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10839 				WRITE_SBUS_RECEIVER, 0x2);
10840 	if (ret) {
10841 		THERM_FAILURE(dd, ret, "Write Reset Deassert");
10842 		goto done;
10843 	}
10844 	/*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10845 	msleep(22);
10846 
10847 	/* Enable polling of thermal readings */
10848 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10849 done:
10850 	release_hw_mutex(dd);
10851 	return ret;
10852 }
10853 
handle_temp_err(struct hfi1_devdata * dd)10854 static void handle_temp_err(struct hfi1_devdata *dd)
10855 {
10856 	struct hfi1_pportdata *ppd = &dd->pport[0];
10857 	/*
10858 	 * Thermal Critical Interrupt
10859 	 * Put the device into forced freeze mode, take link down to
10860 	 * offline, and put DC into reset.
10861 	 */
10862 	dd_dev_emerg(dd,
10863 		     "Critical temperature reached! Forcing device into freeze mode!\n");
10864 	dd->flags |= HFI1_FORCED_FREEZE;
10865 	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10866 	/*
10867 	 * Shut DC down as much and as quickly as possible.
10868 	 *
10869 	 * Step 1: Take the link down to OFFLINE. This will cause the
10870 	 *         8051 to put the Serdes in reset. However, we don't want to
10871 	 *         go through the entire link state machine since we want to
10872 	 *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10873 	 *         but rather an attempt to save the chip.
10874 	 *         Code below is almost the same as quiet_serdes() but avoids
10875 	 *         all the extra work and the sleeps.
10876 	 */
10877 	ppd->driver_link_ready = 0;
10878 	ppd->link_enabled = 0;
10879 	set_physical_link_state(dd, PLS_OFFLINE |
10880 				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10881 	/*
10882 	 * Step 2: Shutdown LCB and 8051
10883 	 *         After shutdown, do not restore DC_CFG_RESET value.
10884 	 */
10885 	dc_shutdown(dd);
10886 }
10887