This source file includes following definitions.
- mlx5_fpga_device_alloc
- mlx5_fpga_image_name
- mlx5_fpga_device_name
- mlx5_fpga_device_load_check
- mlx5_fpga_device_brb
- fpga_err_event
- fpga_qp_err_event
- mlx5_fpga_device_start
- mlx5_fpga_init
- mlx5_fpga_device_stop
- mlx5_fpga_cleanup
- mlx5_fpga_syndrome_to_string
- mlx5_fpga_qp_syndrome_to_string
- mlx5_fpga_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 #include <linux/module.h>
34 #include <linux/etherdevice.h>
35 #include <linux/mlx5/driver.h>
36
37 #include "mlx5_core.h"
38 #include "lib/mlx5.h"
39 #include "lib/eq.h"
40 #include "fpga/core.h"
41 #include "fpga/conn.h"
42
43 static const char *const mlx5_fpga_error_strings[] = {
44 "Null Syndrome",
45 "Corrupted DDR",
46 "Flash Timeout",
47 "Internal Link Error",
48 "Watchdog HW Failure",
49 "I2C Failure",
50 "Image Changed",
51 "Temperature Critical",
52 };
53
54 static const char * const mlx5_fpga_qp_error_strings[] = {
55 "Null Syndrome",
56 "Retry Counter Expired",
57 "RNR Expired",
58 };
59 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
60 {
61 struct mlx5_fpga_device *fdev = NULL;
62
63 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
64 if (!fdev)
65 return NULL;
66
67 spin_lock_init(&fdev->state_lock);
68 fdev->state = MLX5_FPGA_STATUS_NONE;
69 return fdev;
70 }
71
72 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
73 {
74 switch (image) {
75 case MLX5_FPGA_IMAGE_USER:
76 return "user";
77 case MLX5_FPGA_IMAGE_FACTORY:
78 return "factory";
79 default:
80 return "unknown";
81 }
82 }
83
84 static const char *mlx5_fpga_device_name(u32 device)
85 {
86 switch (device) {
87 case MLX5_FPGA_DEVICE_KU040:
88 return "ku040";
89 case MLX5_FPGA_DEVICE_KU060:
90 return "ku060";
91 case MLX5_FPGA_DEVICE_KU060_2:
92 return "ku060_2";
93 case MLX5_FPGA_DEVICE_UNKNOWN:
94 default:
95 return "unknown";
96 }
97 }
98
99 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
100 {
101 struct mlx5_fpga_query query;
102 int err;
103
104 err = mlx5_fpga_query(fdev->mdev, &query);
105 if (err) {
106 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
107 return err;
108 }
109
110 fdev->last_admin_image = query.admin_image;
111 fdev->last_oper_image = query.oper_image;
112
113 mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
114 query.status, query.admin_image, query.oper_image);
115
116 if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
117 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
118 mlx5_fpga_image_name(fdev->last_oper_image),
119 query.status);
120 return -EIO;
121 }
122
123 return 0;
124 }
125
126 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
127 {
128 int err;
129 struct mlx5_core_dev *mdev = fdev->mdev;
130
131 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
132 if (err) {
133 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
134 return err;
135 }
136 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
137 if (err) {
138 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
139 return err;
140 }
141 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
142 if (err) {
143 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
144 return err;
145 }
146 return 0;
147 }
148
149 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
150
151 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
152 {
153 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
154
155 return mlx5_fpga_event(fdev, event, eqe);
156 }
157
158 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
159 {
160 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
161
162 return mlx5_fpga_event(fdev, event, eqe);
163 }
164
165 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
166 {
167 struct mlx5_fpga_device *fdev = mdev->fpga;
168 unsigned int max_num_qps;
169 unsigned long flags;
170 u32 fpga_device_id;
171 int err;
172
173 if (!fdev)
174 return 0;
175
176 err = mlx5_fpga_device_load_check(fdev);
177 if (err)
178 goto out;
179
180 err = mlx5_fpga_caps(fdev->mdev);
181 if (err)
182 goto out;
183
184 fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
185 mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
186 mlx5_fpga_device_name(fpga_device_id),
187 fpga_device_id,
188 mlx5_fpga_image_name(fdev->last_oper_image),
189 MLX5_CAP_FPGA(fdev->mdev, image_version),
190 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
191 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
192 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
193
194 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
195 if (!max_num_qps) {
196 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
197 err = -ENOTSUPP;
198 goto out;
199 }
200
201 err = mlx5_core_reserve_gids(mdev, max_num_qps);
202 if (err)
203 goto out;
204
205 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
206 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
207 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
208 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
209
210 err = mlx5_fpga_conn_device_init(fdev);
211 if (err)
212 goto err_rsvd_gid;
213
214 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
215 err = mlx5_fpga_device_brb(fdev);
216 if (err)
217 goto err_conn_init;
218 }
219
220 goto out;
221
222 err_conn_init:
223 mlx5_fpga_conn_device_cleanup(fdev);
224
225 err_rsvd_gid:
226 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
227 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
228 mlx5_core_unreserve_gids(mdev, max_num_qps);
229 out:
230 spin_lock_irqsave(&fdev->state_lock, flags);
231 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
232 spin_unlock_irqrestore(&fdev->state_lock, flags);
233 return err;
234 }
235
236 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
237 {
238 struct mlx5_fpga_device *fdev = NULL;
239
240 if (!MLX5_CAP_GEN(mdev, fpga)) {
241 mlx5_core_dbg(mdev, "FPGA capability not present\n");
242 return 0;
243 }
244
245 mlx5_core_dbg(mdev, "Initializing FPGA\n");
246
247 fdev = mlx5_fpga_device_alloc();
248 if (!fdev)
249 return -ENOMEM;
250
251 fdev->mdev = mdev;
252 mdev->fpga = fdev;
253
254 return 0;
255 }
256
257 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
258 {
259 struct mlx5_fpga_device *fdev = mdev->fpga;
260 unsigned int max_num_qps;
261 unsigned long flags;
262 int err;
263
264 if (!fdev)
265 return;
266
267 spin_lock_irqsave(&fdev->state_lock, flags);
268 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
269 spin_unlock_irqrestore(&fdev->state_lock, flags);
270 return;
271 }
272 fdev->state = MLX5_FPGA_STATUS_NONE;
273 spin_unlock_irqrestore(&fdev->state_lock, flags);
274
275 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
276 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
277 if (err)
278 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
279 err);
280 }
281
282 mlx5_fpga_conn_device_cleanup(fdev);
283 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
284 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
285
286 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
287 mlx5_core_unreserve_gids(mdev, max_num_qps);
288 }
289
290 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
291 {
292 struct mlx5_fpga_device *fdev = mdev->fpga;
293
294 mlx5_fpga_device_stop(mdev);
295 kfree(fdev);
296 mdev->fpga = NULL;
297 }
298
299 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
300 {
301 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
302 return mlx5_fpga_error_strings[syndrome];
303 return "Unknown";
304 }
305
306 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
307 {
308 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
309 return mlx5_fpga_qp_error_strings[syndrome];
310 return "Unknown";
311 }
312
313 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
314 unsigned long event, void *eqe)
315 {
316 void *data = ((struct mlx5_eqe *)eqe)->data.raw;
317 const char *event_name;
318 bool teardown = false;
319 unsigned long flags;
320 u8 syndrome;
321
322 switch (event) {
323 case MLX5_EVENT_TYPE_FPGA_ERROR:
324 syndrome = MLX5_GET(fpga_error_event, data, syndrome);
325 event_name = mlx5_fpga_syndrome_to_string(syndrome);
326 break;
327 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
328 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
329 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
330 break;
331 default:
332 return NOTIFY_DONE;
333 }
334
335 spin_lock_irqsave(&fdev->state_lock, flags);
336 switch (fdev->state) {
337 case MLX5_FPGA_STATUS_SUCCESS:
338 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
339 teardown = true;
340 break;
341 default:
342 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
343 syndrome, event_name);
344 }
345 spin_unlock_irqrestore(&fdev->state_lock, flags);
346
347
348
349
350
351 if (teardown)
352 mlx5_trigger_health_work(fdev->mdev);
353
354 return NOTIFY_OK;
355 }