1 /*
2  * This file is provided under a dual BSD/GPLv2 license.  When using or
3  * redistributing this file, you may do so under either license.
4  *
5  * GPL LICENSE SUMMARY
6  *
7  * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8  *
9  * This program is free software; you can redistribute it and/or modify it
10  * under the terms and conditions of the GNU General Public License,
11  * version 2, as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * BSD LICENSE
22  *
23  * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
24  *
25  * Redistribution and use in source and binary forms, with or without
26  * modification, are permitted provided that the following conditions are met:
27  *
28  *   * Redistributions of source code must retain the above copyright
29  *     notice, this list of conditions and the following disclaimer.
30  *   * Redistributions in binary form must reproduce the above copyright
31  *     notice, this list of conditions and the following disclaimer in
32  *     the documentation and/or other materials provided with the
33  *     distribution.
34  *   * Neither the name of Intel Corporation nor the names of its
35  *     contributors may be used to endorse or promote products derived
36  *     from this software without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
39  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
42  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
43  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
44  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
45  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
46  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
47  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
48  * POSSIBILITY OF SUCH DAMAGE.
49  */
50 
51 /*
52  * Support routines for v3+ hardware
53  */
54 #include <linux/module.h>
55 #include <linux/pci.h>
56 #include <linux/gfp.h>
57 #include <linux/dmaengine.h>
58 #include <linux/dma-mapping.h>
59 #include <linux/prefetch.h>
60 #include "../dmaengine.h"
61 #include "registers.h"
62 #include "hw.h"
63 #include "dma.h"
64 #include "dma_v2.h"
65 
66 extern struct kmem_cache *ioat3_sed_cache;
67 
68 /* ioat hardware assumes at least two sources for raid operations */
69 #define src_cnt_to_sw(x) ((x) + 2)
70 #define src_cnt_to_hw(x) ((x) - 2)
71 #define ndest_to_sw(x) ((x) + 1)
72 #define ndest_to_hw(x) ((x) - 1)
73 #define src16_cnt_to_sw(x) ((x) + 9)
74 #define src16_cnt_to_hw(x) ((x) - 9)
75 
76 /* provide a lookup table for setting the source address in the base or
77  * extended descriptor of an xor or pq descriptor
78  */
79 static const u8 xor_idx_to_desc = 0xe0;
80 static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
81 static const u8 pq_idx_to_desc = 0xf8;
82 static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
83 				       2, 2, 2, 2, 2, 2, 2 };
84 static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
85 static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
86 					0, 1, 2, 3, 4, 5, 6 };
87 
88 static void ioat3_eh(struct ioat2_dma_chan *ioat);
89 
xor_set_src(struct ioat_raw_descriptor * descs[2],dma_addr_t addr,u32 offset,int idx)90 static void xor_set_src(struct ioat_raw_descriptor *descs[2],
91 			dma_addr_t addr, u32 offset, int idx)
92 {
93 	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
94 
95 	raw->field[xor_idx_to_field[idx]] = addr + offset;
96 }
97 
pq_get_src(struct ioat_raw_descriptor * descs[2],int idx)98 static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
99 {
100 	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
101 
102 	return raw->field[pq_idx_to_field[idx]];
103 }
104 
pq16_get_src(struct ioat_raw_descriptor * desc[3],int idx)105 static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
106 {
107 	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
108 
109 	return raw->field[pq16_idx_to_field[idx]];
110 }
111 
pq_set_src(struct ioat_raw_descriptor * descs[2],dma_addr_t addr,u32 offset,u8 coef,int idx)112 static void pq_set_src(struct ioat_raw_descriptor *descs[2],
113 		       dma_addr_t addr, u32 offset, u8 coef, int idx)
114 {
115 	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
116 	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
117 
118 	raw->field[pq_idx_to_field[idx]] = addr + offset;
119 	pq->coef[idx] = coef;
120 }
121 
is_jf_ioat(struct pci_dev * pdev)122 static bool is_jf_ioat(struct pci_dev *pdev)
123 {
124 	switch (pdev->device) {
125 	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
126 	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
127 	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
128 	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
129 	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
130 	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
131 	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
132 	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
133 	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
134 	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
135 		return true;
136 	default:
137 		return false;
138 	}
139 }
140 
is_snb_ioat(struct pci_dev * pdev)141 static bool is_snb_ioat(struct pci_dev *pdev)
142 {
143 	switch (pdev->device) {
144 	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
145 	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
146 	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
147 	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
148 	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
149 	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
150 	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
151 	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
152 	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
153 	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
154 		return true;
155 	default:
156 		return false;
157 	}
158 }
159 
is_ivb_ioat(struct pci_dev * pdev)160 static bool is_ivb_ioat(struct pci_dev *pdev)
161 {
162 	switch (pdev->device) {
163 	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
164 	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
165 	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
166 	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
167 	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
168 	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
169 	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
170 	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
171 	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
172 	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
173 		return true;
174 	default:
175 		return false;
176 	}
177 
178 }
179 
is_hsw_ioat(struct pci_dev * pdev)180 static bool is_hsw_ioat(struct pci_dev *pdev)
181 {
182 	switch (pdev->device) {
183 	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
184 	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
185 	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
186 	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
187 	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
188 	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
189 	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
190 	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
191 	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
192 	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
193 		return true;
194 	default:
195 		return false;
196 	}
197 
198 }
199 
is_xeon_cb32(struct pci_dev * pdev)200 static bool is_xeon_cb32(struct pci_dev *pdev)
201 {
202 	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
203 		is_hsw_ioat(pdev);
204 }
205 
is_bwd_ioat(struct pci_dev * pdev)206 static bool is_bwd_ioat(struct pci_dev *pdev)
207 {
208 	switch (pdev->device) {
209 	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
210 	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
211 	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
212 	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
213 	/* even though not Atom, BDX-DE has same DMA silicon */
214 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
215 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
216 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
217 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
218 		return true;
219 	default:
220 		return false;
221 	}
222 }
223 
is_bwd_noraid(struct pci_dev * pdev)224 static bool is_bwd_noraid(struct pci_dev *pdev)
225 {
226 	switch (pdev->device) {
227 	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
228 	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
229 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
230 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
231 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
232 	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
233 		return true;
234 	default:
235 		return false;
236 	}
237 
238 }
239 
pq16_set_src(struct ioat_raw_descriptor * desc[3],dma_addr_t addr,u32 offset,u8 coef,unsigned idx)240 static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
241 			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
242 {
243 	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
244 	struct ioat_pq16a_descriptor *pq16 =
245 		(struct ioat_pq16a_descriptor *)desc[1];
246 	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
247 
248 	raw->field[pq16_idx_to_field[idx]] = addr + offset;
249 
250 	if (idx < 8)
251 		pq->coef[idx] = coef;
252 	else
253 		pq16->coef[idx - 8] = coef;
254 }
255 
256 static struct ioat_sed_ent *
ioat3_alloc_sed(struct ioatdma_device * device,unsigned int hw_pool)257 ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
258 {
259 	struct ioat_sed_ent *sed;
260 	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
261 
262 	sed = kmem_cache_alloc(ioat3_sed_cache, flags);
263 	if (!sed)
264 		return NULL;
265 
266 	sed->hw_pool = hw_pool;
267 	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
268 				 flags, &sed->dma);
269 	if (!sed->hw) {
270 		kmem_cache_free(ioat3_sed_cache, sed);
271 		return NULL;
272 	}
273 
274 	return sed;
275 }
276 
ioat3_free_sed(struct ioatdma_device * device,struct ioat_sed_ent * sed)277 static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed)
278 {
279 	if (!sed)
280 		return;
281 
282 	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
283 	kmem_cache_free(ioat3_sed_cache, sed);
284 }
285 
desc_has_ext(struct ioat_ring_ent * desc)286 static bool desc_has_ext(struct ioat_ring_ent *desc)
287 {
288 	struct ioat_dma_descriptor *hw = desc->hw;
289 
290 	if (hw->ctl_f.op == IOAT_OP_XOR ||
291 	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
292 		struct ioat_xor_descriptor *xor = desc->xor;
293 
294 		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
295 			return true;
296 	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
297 		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
298 		struct ioat_pq_descriptor *pq = desc->pq;
299 
300 		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
301 			return true;
302 	}
303 
304 	return false;
305 }
306 
ioat3_get_current_completion(struct ioat_chan_common * chan)307 static u64 ioat3_get_current_completion(struct ioat_chan_common *chan)
308 {
309 	u64 phys_complete;
310 	u64 completion;
311 
312 	completion = *chan->completion;
313 	phys_complete = ioat_chansts_to_addr(completion);
314 
315 	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
316 		(unsigned long long) phys_complete);
317 
318 	return phys_complete;
319 }
320 
ioat3_cleanup_preamble(struct ioat_chan_common * chan,u64 * phys_complete)321 static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan,
322 				   u64 *phys_complete)
323 {
324 	*phys_complete = ioat3_get_current_completion(chan);
325 	if (*phys_complete == chan->last_completion)
326 		return false;
327 
328 	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
329 	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
330 
331 	return true;
332 }
333 
334 static void
desc_get_errstat(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc)335 desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc)
336 {
337 	struct ioat_dma_descriptor *hw = desc->hw;
338 
339 	switch (hw->ctl_f.op) {
340 	case IOAT_OP_PQ_VAL:
341 	case IOAT_OP_PQ_VAL_16S:
342 	{
343 		struct ioat_pq_descriptor *pq = desc->pq;
344 
345 		/* check if there's error written */
346 		if (!pq->dwbes_f.wbes)
347 			return;
348 
349 		/* need to set a chanerr var for checking to clear later */
350 
351 		if (pq->dwbes_f.p_val_err)
352 			*desc->result |= SUM_CHECK_P_RESULT;
353 
354 		if (pq->dwbes_f.q_val_err)
355 			*desc->result |= SUM_CHECK_Q_RESULT;
356 
357 		return;
358 	}
359 	default:
360 		return;
361 	}
362 }
363 
364 /**
365  * __cleanup - reclaim used descriptors
366  * @ioat: channel (ring) to clean
367  *
368  * The difference from the dma_v2.c __cleanup() is that this routine
369  * handles extended descriptors and dma-unmapping raid operations.
370  */
__cleanup(struct ioat2_dma_chan * ioat,dma_addr_t phys_complete)371 static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
372 {
373 	struct ioat_chan_common *chan = &ioat->base;
374 	struct ioatdma_device *device = chan->device;
375 	struct ioat_ring_ent *desc;
376 	bool seen_current = false;
377 	int idx = ioat->tail, i;
378 	u16 active;
379 
380 	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
381 		__func__, ioat->head, ioat->tail, ioat->issued);
382 
383 	/*
384 	 * At restart of the channel, the completion address and the
385 	 * channel status will be 0 due to starting a new chain. Since
386 	 * it's new chain and the first descriptor "fails", there is
387 	 * nothing to clean up. We do not want to reap the entire submitted
388 	 * chain due to this 0 address value and then BUG.
389 	 */
390 	if (!phys_complete)
391 		return;
392 
393 	active = ioat2_ring_active(ioat);
394 	for (i = 0; i < active && !seen_current; i++) {
395 		struct dma_async_tx_descriptor *tx;
396 
397 		smp_read_barrier_depends();
398 		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
399 		desc = ioat2_get_ring_ent(ioat, idx + i);
400 		dump_desc_dbg(ioat, desc);
401 
402 		/* set err stat if we are using dwbes */
403 		if (device->cap & IOAT_CAP_DWBES)
404 			desc_get_errstat(ioat, desc);
405 
406 		tx = &desc->txd;
407 		if (tx->cookie) {
408 			dma_cookie_complete(tx);
409 			dma_descriptor_unmap(tx);
410 			if (tx->callback) {
411 				tx->callback(tx->callback_param);
412 				tx->callback = NULL;
413 			}
414 		}
415 
416 		if (tx->phys == phys_complete)
417 			seen_current = true;
418 
419 		/* skip extended descriptors */
420 		if (desc_has_ext(desc)) {
421 			BUG_ON(i + 1 >= active);
422 			i++;
423 		}
424 
425 		/* cleanup super extended descriptors */
426 		if (desc->sed) {
427 			ioat3_free_sed(device, desc->sed);
428 			desc->sed = NULL;
429 		}
430 	}
431 	smp_mb(); /* finish all descriptor reads before incrementing tail */
432 	ioat->tail = idx + i;
433 	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
434 	chan->last_completion = phys_complete;
435 
436 	if (active - i == 0) {
437 		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
438 			__func__);
439 		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
440 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
441 	}
442 	/* 5 microsecond delay per pending descriptor */
443 	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
444 	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
445 }
446 
ioat3_cleanup(struct ioat2_dma_chan * ioat)447 static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
448 {
449 	struct ioat_chan_common *chan = &ioat->base;
450 	u64 phys_complete;
451 
452 	spin_lock_bh(&chan->cleanup_lock);
453 
454 	if (ioat3_cleanup_preamble(chan, &phys_complete))
455 		__cleanup(ioat, phys_complete);
456 
457 	if (is_ioat_halted(*chan->completion)) {
458 		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
459 
460 		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
461 			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
462 			ioat3_eh(ioat);
463 		}
464 	}
465 
466 	spin_unlock_bh(&chan->cleanup_lock);
467 }
468 
ioat3_cleanup_event(unsigned long data)469 static void ioat3_cleanup_event(unsigned long data)
470 {
471 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
472 	struct ioat_chan_common *chan = &ioat->base;
473 
474 	ioat3_cleanup(ioat);
475 	if (!test_bit(IOAT_RUN, &chan->state))
476 		return;
477 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
478 }
479 
ioat3_restart_channel(struct ioat2_dma_chan * ioat)480 static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
481 {
482 	struct ioat_chan_common *chan = &ioat->base;
483 	u64 phys_complete;
484 
485 	ioat2_quiesce(chan, 0);
486 	if (ioat3_cleanup_preamble(chan, &phys_complete))
487 		__cleanup(ioat, phys_complete);
488 
489 	__ioat2_restart_chan(ioat);
490 }
491 
ioat3_eh(struct ioat2_dma_chan * ioat)492 static void ioat3_eh(struct ioat2_dma_chan *ioat)
493 {
494 	struct ioat_chan_common *chan = &ioat->base;
495 	struct pci_dev *pdev = to_pdev(chan);
496 	struct ioat_dma_descriptor *hw;
497 	struct dma_async_tx_descriptor *tx;
498 	u64 phys_complete;
499 	struct ioat_ring_ent *desc;
500 	u32 err_handled = 0;
501 	u32 chanerr_int;
502 	u32 chanerr;
503 
504 	/* cleanup so tail points to descriptor that caused the error */
505 	if (ioat3_cleanup_preamble(chan, &phys_complete))
506 		__cleanup(ioat, phys_complete);
507 
508 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
509 	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
510 
511 	dev_dbg(to_dev(chan), "%s: error = %x:%x\n",
512 		__func__, chanerr, chanerr_int);
513 
514 	desc = ioat2_get_ring_ent(ioat, ioat->tail);
515 	hw = desc->hw;
516 	dump_desc_dbg(ioat, desc);
517 
518 	switch (hw->ctl_f.op) {
519 	case IOAT_OP_XOR_VAL:
520 		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
521 			*desc->result |= SUM_CHECK_P_RESULT;
522 			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
523 		}
524 		break;
525 	case IOAT_OP_PQ_VAL:
526 	case IOAT_OP_PQ_VAL_16S:
527 		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
528 			*desc->result |= SUM_CHECK_P_RESULT;
529 			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
530 		}
531 		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
532 			*desc->result |= SUM_CHECK_Q_RESULT;
533 			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
534 		}
535 		break;
536 	}
537 
538 	/* fault on unhandled error or spurious halt */
539 	if (chanerr ^ err_handled || chanerr == 0) {
540 		dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n",
541 			__func__, chanerr, err_handled);
542 		BUG();
543 	} else { /* cleanup the faulty descriptor */
544 		tx = &desc->txd;
545 		if (tx->cookie) {
546 			dma_cookie_complete(tx);
547 			dma_descriptor_unmap(tx);
548 			if (tx->callback) {
549 				tx->callback(tx->callback_param);
550 				tx->callback = NULL;
551 			}
552 		}
553 	}
554 
555 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
556 	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
557 
558 	/* mark faulting descriptor as complete */
559 	*chan->completion = desc->txd.phys;
560 
561 	spin_lock_bh(&ioat->prep_lock);
562 	ioat3_restart_channel(ioat);
563 	spin_unlock_bh(&ioat->prep_lock);
564 }
565 
check_active(struct ioat2_dma_chan * ioat)566 static void check_active(struct ioat2_dma_chan *ioat)
567 {
568 	struct ioat_chan_common *chan = &ioat->base;
569 
570 	if (ioat2_ring_active(ioat)) {
571 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
572 		return;
573 	}
574 
575 	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state))
576 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
577 	else if (ioat->alloc_order > ioat_get_alloc_order()) {
578 		/* if the ring is idle, empty, and oversized try to step
579 		 * down the size
580 		 */
581 		reshape_ring(ioat, ioat->alloc_order - 1);
582 
583 		/* keep shrinking until we get back to our minimum
584 		 * default size
585 		 */
586 		if (ioat->alloc_order > ioat_get_alloc_order())
587 			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
588 	}
589 
590 }
591 
ioat3_timer_event(unsigned long data)592 static void ioat3_timer_event(unsigned long data)
593 {
594 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
595 	struct ioat_chan_common *chan = &ioat->base;
596 	dma_addr_t phys_complete;
597 	u64 status;
598 
599 	status = ioat_chansts(chan);
600 
601 	/* when halted due to errors check for channel
602 	 * programming errors before advancing the completion state
603 	 */
604 	if (is_ioat_halted(status)) {
605 		u32 chanerr;
606 
607 		chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
608 		dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
609 			__func__, chanerr);
610 		if (test_bit(IOAT_RUN, &chan->state))
611 			BUG_ON(is_ioat_bug(chanerr));
612 		else /* we never got off the ground */
613 			return;
614 	}
615 
616 	/* if we haven't made progress and we have already
617 	 * acknowledged a pending completion once, then be more
618 	 * forceful with a restart
619 	 */
620 	spin_lock_bh(&chan->cleanup_lock);
621 	if (ioat_cleanup_preamble(chan, &phys_complete))
622 		__cleanup(ioat, phys_complete);
623 	else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
624 		spin_lock_bh(&ioat->prep_lock);
625 		ioat3_restart_channel(ioat);
626 		spin_unlock_bh(&ioat->prep_lock);
627 		spin_unlock_bh(&chan->cleanup_lock);
628 		return;
629 	} else {
630 		set_bit(IOAT_COMPLETION_ACK, &chan->state);
631 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
632 	}
633 
634 
635 	if (ioat2_ring_active(ioat))
636 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
637 	else {
638 		spin_lock_bh(&ioat->prep_lock);
639 		check_active(ioat);
640 		spin_unlock_bh(&ioat->prep_lock);
641 	}
642 	spin_unlock_bh(&chan->cleanup_lock);
643 }
644 
645 static enum dma_status
ioat3_tx_status(struct dma_chan * c,dma_cookie_t cookie,struct dma_tx_state * txstate)646 ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
647 		struct dma_tx_state *txstate)
648 {
649 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
650 	enum dma_status ret;
651 
652 	ret = dma_cookie_status(c, cookie, txstate);
653 	if (ret == DMA_COMPLETE)
654 		return ret;
655 
656 	ioat3_cleanup(ioat);
657 
658 	return dma_cookie_status(c, cookie, txstate);
659 }
660 
661 static struct dma_async_tx_descriptor *
__ioat3_prep_xor_lock(struct dma_chan * c,enum sum_check_flags * result,dma_addr_t dest,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)662 __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
663 		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
664 		      size_t len, unsigned long flags)
665 {
666 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
667 	struct ioat_ring_ent *compl_desc;
668 	struct ioat_ring_ent *desc;
669 	struct ioat_ring_ent *ext;
670 	size_t total_len = len;
671 	struct ioat_xor_descriptor *xor;
672 	struct ioat_xor_ext_descriptor *xor_ex = NULL;
673 	struct ioat_dma_descriptor *hw;
674 	int num_descs, with_ext, idx, i;
675 	u32 offset = 0;
676 	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
677 
678 	BUG_ON(src_cnt < 2);
679 
680 	num_descs = ioat2_xferlen_to_descs(ioat, len);
681 	/* we need 2x the number of descriptors to cover greater than 5
682 	 * sources
683 	 */
684 	if (src_cnt > 5) {
685 		with_ext = 1;
686 		num_descs *= 2;
687 	} else
688 		with_ext = 0;
689 
690 	/* completion writes from the raid engine may pass completion
691 	 * writes from the legacy engine, so we need one extra null
692 	 * (legacy) descriptor to ensure all completion writes arrive in
693 	 * order.
694 	 */
695 	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
696 		idx = ioat->head;
697 	else
698 		return NULL;
699 	i = 0;
700 	do {
701 		struct ioat_raw_descriptor *descs[2];
702 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
703 		int s;
704 
705 		desc = ioat2_get_ring_ent(ioat, idx + i);
706 		xor = desc->xor;
707 
708 		/* save a branch by unconditionally retrieving the
709 		 * extended descriptor xor_set_src() knows to not write
710 		 * to it in the single descriptor case
711 		 */
712 		ext = ioat2_get_ring_ent(ioat, idx + i + 1);
713 		xor_ex = ext->xor_ex;
714 
715 		descs[0] = (struct ioat_raw_descriptor *) xor;
716 		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
717 		for (s = 0; s < src_cnt; s++)
718 			xor_set_src(descs, src[s], offset, s);
719 		xor->size = xfer_size;
720 		xor->dst_addr = dest + offset;
721 		xor->ctl = 0;
722 		xor->ctl_f.op = op;
723 		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
724 
725 		len -= xfer_size;
726 		offset += xfer_size;
727 		dump_desc_dbg(ioat, desc);
728 	} while ((i += 1 + with_ext) < num_descs);
729 
730 	/* last xor descriptor carries the unmap parameters and fence bit */
731 	desc->txd.flags = flags;
732 	desc->len = total_len;
733 	if (result)
734 		desc->result = result;
735 	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
736 
737 	/* completion descriptor carries interrupt bit */
738 	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
739 	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
740 	hw = compl_desc->hw;
741 	hw->ctl = 0;
742 	hw->ctl_f.null = 1;
743 	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
744 	hw->ctl_f.compl_write = 1;
745 	hw->size = NULL_DESC_BUFFER_SIZE;
746 	dump_desc_dbg(ioat, compl_desc);
747 
748 	/* we leave the channel locked to ensure in order submission */
749 	return &compl_desc->txd;
750 }
751 
752 static struct dma_async_tx_descriptor *
ioat3_prep_xor(struct dma_chan * chan,dma_addr_t dest,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)753 ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
754 	       unsigned int src_cnt, size_t len, unsigned long flags)
755 {
756 	return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
757 }
758 
759 static struct dma_async_tx_descriptor *
ioat3_prep_xor_val(struct dma_chan * chan,dma_addr_t * src,unsigned int src_cnt,size_t len,enum sum_check_flags * result,unsigned long flags)760 ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
761 		    unsigned int src_cnt, size_t len,
762 		    enum sum_check_flags *result, unsigned long flags)
763 {
764 	/* the cleanup routine only sets bits on validate failure, it
765 	 * does not clear bits on validate success... so clear it here
766 	 */
767 	*result = 0;
768 
769 	return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
770 				     src_cnt - 1, len, flags);
771 }
772 
773 static void
dump_pq_desc_dbg(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc,struct ioat_ring_ent * ext)774 dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
775 {
776 	struct device *dev = to_dev(&ioat->base);
777 	struct ioat_pq_descriptor *pq = desc->pq;
778 	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
779 	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
780 	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
781 	int i;
782 
783 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
784 		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
785 		" src_cnt: %d)\n",
786 		desc_id(desc), (unsigned long long) desc->txd.phys,
787 		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
788 		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
789 		pq->ctl_f.compl_write,
790 		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
791 		pq->ctl_f.src_cnt);
792 	for (i = 0; i < src_cnt; i++)
793 		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
794 			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
795 	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
796 	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
797 	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
798 }
799 
dump_pq16_desc_dbg(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc)800 static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat,
801 			       struct ioat_ring_ent *desc)
802 {
803 	struct device *dev = to_dev(&ioat->base);
804 	struct ioat_pq_descriptor *pq = desc->pq;
805 	struct ioat_raw_descriptor *descs[] = { (void *)pq,
806 						(void *)pq,
807 						(void *)pq };
808 	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
809 	int i;
810 
811 	if (desc->sed) {
812 		descs[1] = (void *)desc->sed->hw;
813 		descs[2] = (void *)desc->sed->hw + 64;
814 	}
815 
816 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
817 		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
818 		" src_cnt: %d)\n",
819 		desc_id(desc), (unsigned long long) desc->txd.phys,
820 		(unsigned long long) pq->next,
821 		desc->txd.flags, pq->size, pq->ctl,
822 		pq->ctl_f.op, pq->ctl_f.int_en,
823 		pq->ctl_f.compl_write,
824 		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
825 		pq->ctl_f.src_cnt);
826 	for (i = 0; i < src_cnt; i++) {
827 		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
828 			(unsigned long long) pq16_get_src(descs, i),
829 			pq->coef[i]);
830 	}
831 	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
832 	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
833 }
834 
835 static struct dma_async_tx_descriptor *
__ioat3_prep_pq_lock(struct dma_chan * c,enum sum_check_flags * result,const dma_addr_t * dst,const dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)836 __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
837 		     const dma_addr_t *dst, const dma_addr_t *src,
838 		     unsigned int src_cnt, const unsigned char *scf,
839 		     size_t len, unsigned long flags)
840 {
841 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
842 	struct ioat_chan_common *chan = &ioat->base;
843 	struct ioatdma_device *device = chan->device;
844 	struct ioat_ring_ent *compl_desc;
845 	struct ioat_ring_ent *desc;
846 	struct ioat_ring_ent *ext;
847 	size_t total_len = len;
848 	struct ioat_pq_descriptor *pq;
849 	struct ioat_pq_ext_descriptor *pq_ex = NULL;
850 	struct ioat_dma_descriptor *hw;
851 	u32 offset = 0;
852 	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
853 	int i, s, idx, with_ext, num_descs;
854 	int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0;
855 
856 	dev_dbg(to_dev(chan), "%s\n", __func__);
857 	/* the engine requires at least two sources (we provide
858 	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
859 	 */
860 	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
861 
862 	num_descs = ioat2_xferlen_to_descs(ioat, len);
863 	/* we need 2x the number of descriptors to cover greater than 3
864 	 * sources (we need 1 extra source in the q-only continuation
865 	 * case and 3 extra sources in the p+q continuation case.
866 	 */
867 	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
868 	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
869 		with_ext = 1;
870 		num_descs *= 2;
871 	} else
872 		with_ext = 0;
873 
874 	/* completion writes from the raid engine may pass completion
875 	 * writes from the legacy engine, so we need one extra null
876 	 * (legacy) descriptor to ensure all completion writes arrive in
877 	 * order.
878 	 */
879 	if (likely(num_descs) &&
880 	    ioat2_check_space_lock(ioat, num_descs + cb32) == 0)
881 		idx = ioat->head;
882 	else
883 		return NULL;
884 	i = 0;
885 	do {
886 		struct ioat_raw_descriptor *descs[2];
887 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
888 
889 		desc = ioat2_get_ring_ent(ioat, idx + i);
890 		pq = desc->pq;
891 
892 		/* save a branch by unconditionally retrieving the
893 		 * extended descriptor pq_set_src() knows to not write
894 		 * to it in the single descriptor case
895 		 */
896 		ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
897 		pq_ex = ext->pq_ex;
898 
899 		descs[0] = (struct ioat_raw_descriptor *) pq;
900 		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
901 
902 		for (s = 0; s < src_cnt; s++)
903 			pq_set_src(descs, src[s], offset, scf[s], s);
904 
905 		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
906 		if (dmaf_p_disabled_continue(flags))
907 			pq_set_src(descs, dst[1], offset, 1, s++);
908 		else if (dmaf_continue(flags)) {
909 			pq_set_src(descs, dst[0], offset, 0, s++);
910 			pq_set_src(descs, dst[1], offset, 1, s++);
911 			pq_set_src(descs, dst[1], offset, 0, s++);
912 		}
913 		pq->size = xfer_size;
914 		pq->p_addr = dst[0] + offset;
915 		pq->q_addr = dst[1] + offset;
916 		pq->ctl = 0;
917 		pq->ctl_f.op = op;
918 		/* we turn on descriptor write back error status */
919 		if (device->cap & IOAT_CAP_DWBES)
920 			pq->ctl_f.wb_en = result ? 1 : 0;
921 		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
922 		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
923 		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
924 
925 		len -= xfer_size;
926 		offset += xfer_size;
927 	} while ((i += 1 + with_ext) < num_descs);
928 
929 	/* last pq descriptor carries the unmap parameters and fence bit */
930 	desc->txd.flags = flags;
931 	desc->len = total_len;
932 	if (result)
933 		desc->result = result;
934 	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
935 	dump_pq_desc_dbg(ioat, desc, ext);
936 
937 	if (!cb32) {
938 		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
939 		pq->ctl_f.compl_write = 1;
940 		compl_desc = desc;
941 	} else {
942 		/* completion descriptor carries interrupt bit */
943 		compl_desc = ioat2_get_ring_ent(ioat, idx + i);
944 		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
945 		hw = compl_desc->hw;
946 		hw->ctl = 0;
947 		hw->ctl_f.null = 1;
948 		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
949 		hw->ctl_f.compl_write = 1;
950 		hw->size = NULL_DESC_BUFFER_SIZE;
951 		dump_desc_dbg(ioat, compl_desc);
952 	}
953 
954 
955 	/* we leave the channel locked to ensure in order submission */
956 	return &compl_desc->txd;
957 }
958 
959 static struct dma_async_tx_descriptor *
__ioat3_prep_pq16_lock(struct dma_chan * c,enum sum_check_flags * result,const dma_addr_t * dst,const dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)960 __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
961 		       const dma_addr_t *dst, const dma_addr_t *src,
962 		       unsigned int src_cnt, const unsigned char *scf,
963 		       size_t len, unsigned long flags)
964 {
965 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
966 	struct ioat_chan_common *chan = &ioat->base;
967 	struct ioatdma_device *device = chan->device;
968 	struct ioat_ring_ent *desc;
969 	size_t total_len = len;
970 	struct ioat_pq_descriptor *pq;
971 	u32 offset = 0;
972 	u8 op;
973 	int i, s, idx, num_descs;
974 
975 	/* this function is only called with 9-16 sources */
976 	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
977 
978 	dev_dbg(to_dev(chan), "%s\n", __func__);
979 
980 	num_descs = ioat2_xferlen_to_descs(ioat, len);
981 
982 	/*
983 	 * 16 source pq is only available on cb3.3 and has no completion
984 	 * write hw bug.
985 	 */
986 	if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0)
987 		idx = ioat->head;
988 	else
989 		return NULL;
990 
991 	i = 0;
992 
993 	do {
994 		struct ioat_raw_descriptor *descs[4];
995 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
996 
997 		desc = ioat2_get_ring_ent(ioat, idx + i);
998 		pq = desc->pq;
999 
1000 		descs[0] = (struct ioat_raw_descriptor *) pq;
1001 
1002 		desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
1003 		if (!desc->sed) {
1004 			dev_err(to_dev(chan),
1005 				"%s: no free sed entries\n", __func__);
1006 			return NULL;
1007 		}
1008 
1009 		pq->sed_addr = desc->sed->dma;
1010 		desc->sed->parent = desc;
1011 
1012 		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
1013 		descs[2] = (void *)descs[1] + 64;
1014 
1015 		for (s = 0; s < src_cnt; s++)
1016 			pq16_set_src(descs, src[s], offset, scf[s], s);
1017 
1018 		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
1019 		if (dmaf_p_disabled_continue(flags))
1020 			pq16_set_src(descs, dst[1], offset, 1, s++);
1021 		else if (dmaf_continue(flags)) {
1022 			pq16_set_src(descs, dst[0], offset, 0, s++);
1023 			pq16_set_src(descs, dst[1], offset, 1, s++);
1024 			pq16_set_src(descs, dst[1], offset, 0, s++);
1025 		}
1026 
1027 		pq->size = xfer_size;
1028 		pq->p_addr = dst[0] + offset;
1029 		pq->q_addr = dst[1] + offset;
1030 		pq->ctl = 0;
1031 		pq->ctl_f.op = op;
1032 		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
1033 		/* we turn on descriptor write back error status */
1034 		if (device->cap & IOAT_CAP_DWBES)
1035 			pq->ctl_f.wb_en = result ? 1 : 0;
1036 		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
1037 		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
1038 
1039 		len -= xfer_size;
1040 		offset += xfer_size;
1041 	} while (++i < num_descs);
1042 
1043 	/* last pq descriptor carries the unmap parameters and fence bit */
1044 	desc->txd.flags = flags;
1045 	desc->len = total_len;
1046 	if (result)
1047 		desc->result = result;
1048 	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1049 
1050 	/* with cb3.3 we should be able to do completion w/o a null desc */
1051 	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
1052 	pq->ctl_f.compl_write = 1;
1053 
1054 	dump_pq16_desc_dbg(ioat, desc);
1055 
1056 	/* we leave the channel locked to ensure in order submission */
1057 	return &desc->txd;
1058 }
1059 
src_cnt_flags(unsigned int src_cnt,unsigned long flags)1060 static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
1061 {
1062 	if (dmaf_p_disabled_continue(flags))
1063 		return src_cnt + 1;
1064 	else if (dmaf_continue(flags))
1065 		return src_cnt + 3;
1066 	else
1067 		return src_cnt;
1068 }
1069 
1070 static struct dma_async_tx_descriptor *
ioat3_prep_pq(struct dma_chan * chan,dma_addr_t * dst,dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)1071 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
1072 	      unsigned int src_cnt, const unsigned char *scf, size_t len,
1073 	      unsigned long flags)
1074 {
1075 	/* specify valid address for disabled result */
1076 	if (flags & DMA_PREP_PQ_DISABLE_P)
1077 		dst[0] = dst[1];
1078 	if (flags & DMA_PREP_PQ_DISABLE_Q)
1079 		dst[1] = dst[0];
1080 
1081 	/* handle the single source multiply case from the raid6
1082 	 * recovery path
1083 	 */
1084 	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
1085 		dma_addr_t single_source[2];
1086 		unsigned char single_source_coef[2];
1087 
1088 		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
1089 		single_source[0] = src[0];
1090 		single_source[1] = src[0];
1091 		single_source_coef[0] = scf[0];
1092 		single_source_coef[1] = 0;
1093 
1094 		return src_cnt_flags(src_cnt, flags) > 8 ?
1095 			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
1096 					       2, single_source_coef, len,
1097 					       flags) :
1098 			__ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
1099 					     single_source_coef, len, flags);
1100 
1101 	} else {
1102 		return src_cnt_flags(src_cnt, flags) > 8 ?
1103 			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
1104 					       scf, len, flags) :
1105 			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
1106 					     scf, len, flags);
1107 	}
1108 }
1109 
1110 static struct dma_async_tx_descriptor *
ioat3_prep_pq_val(struct dma_chan * chan,dma_addr_t * pq,dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,enum sum_check_flags * pqres,unsigned long flags)1111 ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
1112 		  unsigned int src_cnt, const unsigned char *scf, size_t len,
1113 		  enum sum_check_flags *pqres, unsigned long flags)
1114 {
1115 	/* specify valid address for disabled result */
1116 	if (flags & DMA_PREP_PQ_DISABLE_P)
1117 		pq[0] = pq[1];
1118 	if (flags & DMA_PREP_PQ_DISABLE_Q)
1119 		pq[1] = pq[0];
1120 
1121 	/* the cleanup routine only sets bits on validate failure, it
1122 	 * does not clear bits on validate success... so clear it here
1123 	 */
1124 	*pqres = 0;
1125 
1126 	return src_cnt_flags(src_cnt, flags) > 8 ?
1127 		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
1128 				       flags) :
1129 		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
1130 				     flags);
1131 }
1132 
1133 static struct dma_async_tx_descriptor *
ioat3_prep_pqxor(struct dma_chan * chan,dma_addr_t dst,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)1134 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
1135 		 unsigned int src_cnt, size_t len, unsigned long flags)
1136 {
1137 	unsigned char scf[src_cnt];
1138 	dma_addr_t pq[2];
1139 
1140 	memset(scf, 0, src_cnt);
1141 	pq[0] = dst;
1142 	flags |= DMA_PREP_PQ_DISABLE_Q;
1143 	pq[1] = dst; /* specify valid address for disabled result */
1144 
1145 	return src_cnt_flags(src_cnt, flags) > 8 ?
1146 		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
1147 				       flags) :
1148 		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
1149 				     flags);
1150 }
1151 
1152 static struct dma_async_tx_descriptor *
ioat3_prep_pqxor_val(struct dma_chan * chan,dma_addr_t * src,unsigned int src_cnt,size_t len,enum sum_check_flags * result,unsigned long flags)1153 ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
1154 		     unsigned int src_cnt, size_t len,
1155 		     enum sum_check_flags *result, unsigned long flags)
1156 {
1157 	unsigned char scf[src_cnt];
1158 	dma_addr_t pq[2];
1159 
1160 	/* the cleanup routine only sets bits on validate failure, it
1161 	 * does not clear bits on validate success... so clear it here
1162 	 */
1163 	*result = 0;
1164 
1165 	memset(scf, 0, src_cnt);
1166 	pq[0] = src[0];
1167 	flags |= DMA_PREP_PQ_DISABLE_Q;
1168 	pq[1] = pq[0]; /* specify valid address for disabled result */
1169 
1170 	return src_cnt_flags(src_cnt, flags) > 8 ?
1171 		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
1172 				       scf, len, flags) :
1173 		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
1174 				     scf, len, flags);
1175 }
1176 
1177 static struct dma_async_tx_descriptor *
ioat3_prep_interrupt_lock(struct dma_chan * c,unsigned long flags)1178 ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
1179 {
1180 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
1181 	struct ioat_ring_ent *desc;
1182 	struct ioat_dma_descriptor *hw;
1183 
1184 	if (ioat2_check_space_lock(ioat, 1) == 0)
1185 		desc = ioat2_get_ring_ent(ioat, ioat->head);
1186 	else
1187 		return NULL;
1188 
1189 	hw = desc->hw;
1190 	hw->ctl = 0;
1191 	hw->ctl_f.null = 1;
1192 	hw->ctl_f.int_en = 1;
1193 	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1194 	hw->ctl_f.compl_write = 1;
1195 	hw->size = NULL_DESC_BUFFER_SIZE;
1196 	hw->src_addr = 0;
1197 	hw->dst_addr = 0;
1198 
1199 	desc->txd.flags = flags;
1200 	desc->len = 1;
1201 
1202 	dump_desc_dbg(ioat, desc);
1203 
1204 	/* we leave the channel locked to ensure in order submission */
1205 	return &desc->txd;
1206 }
1207 
ioat3_dma_test_callback(void * dma_async_param)1208 static void ioat3_dma_test_callback(void *dma_async_param)
1209 {
1210 	struct completion *cmp = dma_async_param;
1211 
1212 	complete(cmp);
1213 }
1214 
1215 #define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
ioat_xor_val_self_test(struct ioatdma_device * device)1216 static int ioat_xor_val_self_test(struct ioatdma_device *device)
1217 {
1218 	int i, src_idx;
1219 	struct page *dest;
1220 	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
1221 	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
1222 	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
1223 	dma_addr_t dest_dma;
1224 	struct dma_async_tx_descriptor *tx;
1225 	struct dma_chan *dma_chan;
1226 	dma_cookie_t cookie;
1227 	u8 cmp_byte = 0;
1228 	u32 cmp_word;
1229 	u32 xor_val_result;
1230 	int err = 0;
1231 	struct completion cmp;
1232 	unsigned long tmo;
1233 	struct device *dev = &device->pdev->dev;
1234 	struct dma_device *dma = &device->common;
1235 	u8 op = 0;
1236 
1237 	dev_dbg(dev, "%s\n", __func__);
1238 
1239 	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
1240 		return 0;
1241 
1242 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
1243 		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
1244 		if (!xor_srcs[src_idx]) {
1245 			while (src_idx--)
1246 				__free_page(xor_srcs[src_idx]);
1247 			return -ENOMEM;
1248 		}
1249 	}
1250 
1251 	dest = alloc_page(GFP_KERNEL);
1252 	if (!dest) {
1253 		while (src_idx--)
1254 			__free_page(xor_srcs[src_idx]);
1255 		return -ENOMEM;
1256 	}
1257 
1258 	/* Fill in src buffers */
1259 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
1260 		u8 *ptr = page_address(xor_srcs[src_idx]);
1261 		for (i = 0; i < PAGE_SIZE; i++)
1262 			ptr[i] = (1 << src_idx);
1263 	}
1264 
1265 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
1266 		cmp_byte ^= (u8) (1 << src_idx);
1267 
1268 	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
1269 			(cmp_byte << 8) | cmp_byte;
1270 
1271 	memset(page_address(dest), 0, PAGE_SIZE);
1272 
1273 	dma_chan = container_of(dma->channels.next, struct dma_chan,
1274 				device_node);
1275 	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
1276 		err = -ENODEV;
1277 		goto out;
1278 	}
1279 
1280 	/* test xor */
1281 	op = IOAT_OP_XOR;
1282 
1283 	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1284 	if (dma_mapping_error(dev, dest_dma))
1285 		goto dma_unmap;
1286 
1287 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1288 		dma_srcs[i] = DMA_ERROR_CODE;
1289 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
1290 		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
1291 					   DMA_TO_DEVICE);
1292 		if (dma_mapping_error(dev, dma_srcs[i]))
1293 			goto dma_unmap;
1294 	}
1295 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
1296 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
1297 				      DMA_PREP_INTERRUPT);
1298 
1299 	if (!tx) {
1300 		dev_err(dev, "Self-test xor prep failed\n");
1301 		err = -ENODEV;
1302 		goto dma_unmap;
1303 	}
1304 
1305 	async_tx_ack(tx);
1306 	init_completion(&cmp);
1307 	tx->callback = ioat3_dma_test_callback;
1308 	tx->callback_param = &cmp;
1309 	cookie = tx->tx_submit(tx);
1310 	if (cookie < 0) {
1311 		dev_err(dev, "Self-test xor setup failed\n");
1312 		err = -ENODEV;
1313 		goto dma_unmap;
1314 	}
1315 	dma->device_issue_pending(dma_chan);
1316 
1317 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1318 
1319 	if (tmo == 0 ||
1320 	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
1321 		dev_err(dev, "Self-test xor timed out\n");
1322 		err = -ENODEV;
1323 		goto dma_unmap;
1324 	}
1325 
1326 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1327 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1328 
1329 	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1330 	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
1331 		u32 *ptr = page_address(dest);
1332 		if (ptr[i] != cmp_word) {
1333 			dev_err(dev, "Self-test xor failed compare\n");
1334 			err = -ENODEV;
1335 			goto free_resources;
1336 		}
1337 	}
1338 	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1339 
1340 	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1341 
1342 	/* skip validate if the capability is not present */
1343 	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1344 		goto free_resources;
1345 
1346 	op = IOAT_OP_XOR_VAL;
1347 
1348 	/* validate the sources with the destintation page */
1349 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1350 		xor_val_srcs[i] = xor_srcs[i];
1351 	xor_val_srcs[i] = dest;
1352 
1353 	xor_val_result = 1;
1354 
1355 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1356 		dma_srcs[i] = DMA_ERROR_CODE;
1357 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
1358 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1359 					   DMA_TO_DEVICE);
1360 		if (dma_mapping_error(dev, dma_srcs[i]))
1361 			goto dma_unmap;
1362 	}
1363 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1364 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1365 					  &xor_val_result, DMA_PREP_INTERRUPT);
1366 	if (!tx) {
1367 		dev_err(dev, "Self-test zero prep failed\n");
1368 		err = -ENODEV;
1369 		goto dma_unmap;
1370 	}
1371 
1372 	async_tx_ack(tx);
1373 	init_completion(&cmp);
1374 	tx->callback = ioat3_dma_test_callback;
1375 	tx->callback_param = &cmp;
1376 	cookie = tx->tx_submit(tx);
1377 	if (cookie < 0) {
1378 		dev_err(dev, "Self-test zero setup failed\n");
1379 		err = -ENODEV;
1380 		goto dma_unmap;
1381 	}
1382 	dma->device_issue_pending(dma_chan);
1383 
1384 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1385 
1386 	if (tmo == 0 ||
1387 	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
1388 		dev_err(dev, "Self-test validate timed out\n");
1389 		err = -ENODEV;
1390 		goto dma_unmap;
1391 	}
1392 
1393 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1394 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1395 
1396 	if (xor_val_result != 0) {
1397 		dev_err(dev, "Self-test validate failed compare\n");
1398 		err = -ENODEV;
1399 		goto free_resources;
1400 	}
1401 
1402 	memset(page_address(dest), 0, PAGE_SIZE);
1403 
1404 	/* test for non-zero parity sum */
1405 	op = IOAT_OP_XOR_VAL;
1406 
1407 	xor_val_result = 0;
1408 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1409 		dma_srcs[i] = DMA_ERROR_CODE;
1410 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
1411 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1412 					   DMA_TO_DEVICE);
1413 		if (dma_mapping_error(dev, dma_srcs[i]))
1414 			goto dma_unmap;
1415 	}
1416 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1417 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1418 					  &xor_val_result, DMA_PREP_INTERRUPT);
1419 	if (!tx) {
1420 		dev_err(dev, "Self-test 2nd zero prep failed\n");
1421 		err = -ENODEV;
1422 		goto dma_unmap;
1423 	}
1424 
1425 	async_tx_ack(tx);
1426 	init_completion(&cmp);
1427 	tx->callback = ioat3_dma_test_callback;
1428 	tx->callback_param = &cmp;
1429 	cookie = tx->tx_submit(tx);
1430 	if (cookie < 0) {
1431 		dev_err(dev, "Self-test  2nd zero setup failed\n");
1432 		err = -ENODEV;
1433 		goto dma_unmap;
1434 	}
1435 	dma->device_issue_pending(dma_chan);
1436 
1437 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1438 
1439 	if (tmo == 0 ||
1440 	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
1441 		dev_err(dev, "Self-test 2nd validate timed out\n");
1442 		err = -ENODEV;
1443 		goto dma_unmap;
1444 	}
1445 
1446 	if (xor_val_result != SUM_CHECK_P_RESULT) {
1447 		dev_err(dev, "Self-test validate failed compare\n");
1448 		err = -ENODEV;
1449 		goto dma_unmap;
1450 	}
1451 
1452 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1453 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1454 
1455 	goto free_resources;
1456 dma_unmap:
1457 	if (op == IOAT_OP_XOR) {
1458 		if (dest_dma != DMA_ERROR_CODE)
1459 			dma_unmap_page(dev, dest_dma, PAGE_SIZE,
1460 				       DMA_FROM_DEVICE);
1461 		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1462 			if (dma_srcs[i] != DMA_ERROR_CODE)
1463 				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1464 					       DMA_TO_DEVICE);
1465 	} else if (op == IOAT_OP_XOR_VAL) {
1466 		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1467 			if (dma_srcs[i] != DMA_ERROR_CODE)
1468 				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1469 					       DMA_TO_DEVICE);
1470 	}
1471 free_resources:
1472 	dma->device_free_chan_resources(dma_chan);
1473 out:
1474 	src_idx = IOAT_NUM_SRC_TEST;
1475 	while (src_idx--)
1476 		__free_page(xor_srcs[src_idx]);
1477 	__free_page(dest);
1478 	return err;
1479 }
1480 
ioat3_dma_self_test(struct ioatdma_device * device)1481 static int ioat3_dma_self_test(struct ioatdma_device *device)
1482 {
1483 	int rc = ioat_dma_self_test(device);
1484 
1485 	if (rc)
1486 		return rc;
1487 
1488 	rc = ioat_xor_val_self_test(device);
1489 	if (rc)
1490 		return rc;
1491 
1492 	return 0;
1493 }
1494 
ioat3_irq_reinit(struct ioatdma_device * device)1495 static int ioat3_irq_reinit(struct ioatdma_device *device)
1496 {
1497 	struct pci_dev *pdev = device->pdev;
1498 	int irq = pdev->irq, i;
1499 
1500 	if (!is_bwd_ioat(pdev))
1501 		return 0;
1502 
1503 	switch (device->irq_mode) {
1504 	case IOAT_MSIX:
1505 		for (i = 0; i < device->common.chancnt; i++) {
1506 			struct msix_entry *msix = &device->msix_entries[i];
1507 			struct ioat_chan_common *chan;
1508 
1509 			chan = ioat_chan_by_index(device, i);
1510 			devm_free_irq(&pdev->dev, msix->vector, chan);
1511 		}
1512 
1513 		pci_disable_msix(pdev);
1514 		break;
1515 	case IOAT_MSI:
1516 		pci_disable_msi(pdev);
1517 		/* fall through */
1518 	case IOAT_INTX:
1519 		devm_free_irq(&pdev->dev, irq, device);
1520 		break;
1521 	default:
1522 		return 0;
1523 	}
1524 	device->irq_mode = IOAT_NOIRQ;
1525 
1526 	return ioat_dma_setup_interrupts(device);
1527 }
1528 
ioat3_reset_hw(struct ioat_chan_common * chan)1529 static int ioat3_reset_hw(struct ioat_chan_common *chan)
1530 {
1531 	/* throw away whatever the channel was doing and get it
1532 	 * initialized, with ioat3 specific workarounds
1533 	 */
1534 	struct ioatdma_device *device = chan->device;
1535 	struct pci_dev *pdev = device->pdev;
1536 	u32 chanerr;
1537 	u16 dev_id;
1538 	int err;
1539 
1540 	ioat2_quiesce(chan, msecs_to_jiffies(100));
1541 
1542 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
1543 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
1544 
1545 	if (device->version < IOAT_VER_3_3) {
1546 		/* clear any pending errors */
1547 		err = pci_read_config_dword(pdev,
1548 				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
1549 		if (err) {
1550 			dev_err(&pdev->dev,
1551 				"channel error register unreachable\n");
1552 			return err;
1553 		}
1554 		pci_write_config_dword(pdev,
1555 				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
1556 
1557 		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1558 		 * (workaround for spurious config parity error after restart)
1559 		 */
1560 		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1561 		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
1562 			pci_write_config_dword(pdev,
1563 					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
1564 					       0x10);
1565 		}
1566 	}
1567 
1568 	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
1569 	if (!err)
1570 		err = ioat3_irq_reinit(device);
1571 
1572 	if (err)
1573 		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
1574 
1575 	return err;
1576 }
1577 
ioat3_intr_quirk(struct ioatdma_device * device)1578 static void ioat3_intr_quirk(struct ioatdma_device *device)
1579 {
1580 	struct dma_device *dma;
1581 	struct dma_chan *c;
1582 	struct ioat_chan_common *chan;
1583 	u32 errmask;
1584 
1585 	dma = &device->common;
1586 
1587 	/*
1588 	 * if we have descriptor write back error status, we mask the
1589 	 * error interrupts
1590 	 */
1591 	if (device->cap & IOAT_CAP_DWBES) {
1592 		list_for_each_entry(c, &dma->channels, device_node) {
1593 			chan = to_chan_common(c);
1594 			errmask = readl(chan->reg_base +
1595 					IOAT_CHANERR_MASK_OFFSET);
1596 			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
1597 				   IOAT_CHANERR_XOR_Q_ERR;
1598 			writel(errmask, chan->reg_base +
1599 					IOAT_CHANERR_MASK_OFFSET);
1600 		}
1601 	}
1602 }
1603 
ioat3_dma_probe(struct ioatdma_device * device,int dca)1604 int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1605 {
1606 	struct pci_dev *pdev = device->pdev;
1607 	int dca_en = system_has_dca_enabled(pdev);
1608 	struct dma_device *dma;
1609 	struct dma_chan *c;
1610 	struct ioat_chan_common *chan;
1611 	bool is_raid_device = false;
1612 	int err;
1613 
1614 	device->enumerate_channels = ioat2_enumerate_channels;
1615 	device->reset_hw = ioat3_reset_hw;
1616 	device->self_test = ioat3_dma_self_test;
1617 	device->intr_quirk = ioat3_intr_quirk;
1618 	dma = &device->common;
1619 	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1620 	dma->device_issue_pending = ioat2_issue_pending;
1621 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1622 	dma->device_free_chan_resources = ioat2_free_chan_resources;
1623 
1624 	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1625 	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1626 
1627 	device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1628 
1629 	if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
1630 		device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
1631 
1632 	/* dca is incompatible with raid operations */
1633 	if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1634 		device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1635 
1636 	if (device->cap & IOAT_CAP_XOR) {
1637 		is_raid_device = true;
1638 		dma->max_xor = 8;
1639 
1640 		dma_cap_set(DMA_XOR, dma->cap_mask);
1641 		dma->device_prep_dma_xor = ioat3_prep_xor;
1642 
1643 		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1644 		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1645 	}
1646 
1647 	if (device->cap & IOAT_CAP_PQ) {
1648 		is_raid_device = true;
1649 
1650 		dma->device_prep_dma_pq = ioat3_prep_pq;
1651 		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1652 		dma_cap_set(DMA_PQ, dma->cap_mask);
1653 		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1654 
1655 		if (device->cap & IOAT_CAP_RAID16SS) {
1656 			dma_set_maxpq(dma, 16, 0);
1657 		} else {
1658 			dma_set_maxpq(dma, 8, 0);
1659 		}
1660 
1661 		if (!(device->cap & IOAT_CAP_XOR)) {
1662 			dma->device_prep_dma_xor = ioat3_prep_pqxor;
1663 			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1664 			dma_cap_set(DMA_XOR, dma->cap_mask);
1665 			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1666 
1667 			if (device->cap & IOAT_CAP_RAID16SS) {
1668 				dma->max_xor = 16;
1669 			} else {
1670 				dma->max_xor = 8;
1671 			}
1672 		}
1673 	}
1674 
1675 	dma->device_tx_status = ioat3_tx_status;
1676 	device->cleanup_fn = ioat3_cleanup_event;
1677 	device->timer_fn = ioat3_timer_event;
1678 
1679 	/* starting with CB3.3 super extended descriptors are supported */
1680 	if (device->cap & IOAT_CAP_RAID16SS) {
1681 		char pool_name[14];
1682 		int i;
1683 
1684 		for (i = 0; i < MAX_SED_POOLS; i++) {
1685 			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
1686 
1687 			/* allocate SED DMA pool */
1688 			device->sed_hw_pool[i] = dmam_pool_create(pool_name,
1689 					&pdev->dev,
1690 					SED_SIZE * (i + 1), 64, 0);
1691 			if (!device->sed_hw_pool[i])
1692 				return -ENOMEM;
1693 
1694 		}
1695 	}
1696 
1697 	err = ioat_probe(device);
1698 	if (err)
1699 		return err;
1700 
1701 	list_for_each_entry(c, &dma->channels, device_node) {
1702 		chan = to_chan_common(c);
1703 		writel(IOAT_DMA_DCA_ANY_CPU,
1704 		       chan->reg_base + IOAT_DCACTRL_OFFSET);
1705 	}
1706 
1707 	err = ioat_register(device);
1708 	if (err)
1709 		return err;
1710 
1711 	ioat_kobject_add(device, &ioat2_ktype);
1712 
1713 	if (dca)
1714 		device->dca = ioat3_dca_init(pdev, device->reg_base);
1715 
1716 	return 0;
1717 }
1718