1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50 
51 #include "hfi.h"
52 
53 /* additive distance between non-SOP and SOP space */
54 #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
55 #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
56 /* number of QUADWORDs in a block */
57 #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
58 
59 /**
60  * pio_copy - copy data block to MMIO space
61  * @pbuf: a number of blocks allocated within a PIO send context
62  * @pbc: PBC to send
63  * @from: source, must be 8 byte aligned
64  * @count: number of DWORD (32-bit) quantities to copy from source
65  *
66  * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
67  * Must always write full BLOCK_SIZE bytes blocks.  The first block must
68  * be written to the corresponding SOP=1 address.
69  *
70  * Known:
71  * o pbuf->start always starts on a block boundary
72  * o pbuf can wrap only at a block boundary
73  */
pio_copy(struct hfi1_devdata * dd,struct pio_buf * pbuf,u64 pbc,const void * from,size_t count)74 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
75 	      const void *from, size_t count)
76 {
77 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
78 	void __iomem *send = dest + PIO_BLOCK_SIZE;
79 	void __iomem *dend;			/* 8-byte data end */
80 
81 	/* write the PBC */
82 	writeq(pbc, dest);
83 	dest += sizeof(u64);
84 
85 	/* calculate where the QWORD data ends - in SOP=1 space */
86 	dend = dest + ((count>>1) * sizeof(u64));
87 
88 	if (dend < send) {
89 		/* all QWORD data is within the SOP block, does *not*
90 		   reach the end of the SOP block */
91 
92 		while (dest < dend) {
93 			writeq(*(u64 *)from, dest);
94 			from += sizeof(u64);
95 			dest += sizeof(u64);
96 		}
97 		/*
98 		 * No boundary checks are needed here:
99 		 * 0. We're not on the SOP block boundary
100 		 * 1. The possible DWORD dangle will still be within
101 		 *    the SOP block
102 		 * 2. We cannot wrap except on a block boundary.
103 		 */
104 	} else {
105 		/* QWORD data extends _to_ or beyond the SOP block */
106 
107 		/* write 8-byte SOP chunk data */
108 		while (dest < send) {
109 			writeq(*(u64 *)from, dest);
110 			from += sizeof(u64);
111 			dest += sizeof(u64);
112 		}
113 		/* drop out of the SOP range */
114 		dest -= SOP_DISTANCE;
115 		dend -= SOP_DISTANCE;
116 
117 		/*
118 		 * If the wrap comes before or matches the data end,
119 		 * copy until until the wrap, then wrap.
120 		 *
121 		 * If the data ends at the end of the SOP above and
122 		 * the buffer wraps, then pbuf->end == dend == dest
123 		 * and nothing will get written, but we will wrap in
124 		 * case there is a dangling DWORD.
125 		 */
126 		if (pbuf->end <= dend) {
127 			while (dest < pbuf->end) {
128 				writeq(*(u64 *)from, dest);
129 				from += sizeof(u64);
130 				dest += sizeof(u64);
131 			}
132 
133 			dest -= pbuf->size;
134 			dend -= pbuf->size;
135 		}
136 
137 		/* write 8-byte non-SOP, non-wrap chunk data */
138 		while (dest < dend) {
139 			writeq(*(u64 *)from, dest);
140 			from += sizeof(u64);
141 			dest += sizeof(u64);
142 		}
143 	}
144 	/* at this point we have wrapped if we are going to wrap */
145 
146 	/* write dangling u32, if any */
147 	if (count & 1) {
148 		union mix val;
149 
150 		val.val64 = 0;
151 		val.val32[0] = *(u32 *)from;
152 		writeq(val.val64, dest);
153 		dest += sizeof(u64);
154 	}
155 	/* fill in rest of block, no need to check pbuf->end
156 	   as we only wrap on a block boundary */
157 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
158 		writeq(0, dest);
159 		dest += sizeof(u64);
160 	}
161 
162 	/* finished with this buffer */
163 	atomic_dec(&pbuf->sc->buffers_allocated);
164 }
165 
166 /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
167 #define USE_SHIFTS 1
168 #ifdef USE_SHIFTS
169 /*
170  * Handle carry bytes using shifts and masks.
171  *
172  * NOTE: the value the unused portion of carry is expected to always be zero.
173  */
174 
175 /*
176  * "zero" shift - bit shift used to zero out upper bytes.  Input is
177  * the count of LSB bytes to preserve.
178  */
179 #define zshift(x) (8 * (8-(x)))
180 
181 /*
182  * "merge" shift - bit shift used to merge with carry bytes.  Input is
183  * the LSB byte count to move beyond.
184  */
185 #define mshift(x) (8 * (x))
186 
187 /*
188  * Read nbytes bytes from "from" and return them in the LSB bytes
189  * of pbuf->carry.  Other bytes are zeroed.  Any previous value
190  * pbuf->carry is lost.
191  *
192  * NOTES:
193  * o do not read from from if nbytes is zero
194  * o from may _not_ be u64 aligned
195  * o nbytes must not span a QW boundary
196  */
read_low_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)197 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
198 							unsigned int nbytes)
199 {
200 	unsigned long off;
201 
202 	if (nbytes == 0) {
203 		pbuf->carry.val64 = 0;
204 	} else {
205 		/* align our pointer */
206 		off = (unsigned long)from & 0x7;
207 		from = (void *)((unsigned long)from & ~0x7l);
208 		pbuf->carry.val64 = ((*(u64 *)from)
209 				<< zshift(nbytes + off))/* zero upper bytes */
210 				>> zshift(nbytes);	/* place at bottom */
211 	}
212 	pbuf->carry_bytes = nbytes;
213 }
214 
215 /*
216  * Read nbytes bytes from "from" and put them at the next significant bytes
217  * of pbuf->carry.  Unused bytes are zeroed.  It is expected that the extra
218  * read does not overfill carry.
219  *
220  * NOTES:
221  * o from may _not_ be u64 aligned
222  * o nbytes may span a QW boundary
223  */
read_extra_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)224 static inline void read_extra_bytes(struct pio_buf *pbuf,
225 					const void *from, unsigned int nbytes)
226 {
227 	unsigned long off = (unsigned long)from & 0x7;
228 	unsigned int room, xbytes;
229 
230 	/* align our pointer */
231 	from = (void *)((unsigned long)from & ~0x7l);
232 
233 	/* check count first - don't read anything if count is zero */
234 	while (nbytes) {
235 		/* find the number of bytes in this u64 */
236 		room = 8 - off;	/* this u64 has room for this many bytes */
237 		xbytes = nbytes > room ? room : nbytes;
238 
239 		/*
240 		 * shift down to zero lower bytes, shift up to zero upper
241 		 * bytes, shift back down to move into place
242 		 */
243 		pbuf->carry.val64 |= (((*(u64 *)from)
244 					>> mshift(off))
245 					<< zshift(xbytes))
246 					>> zshift(xbytes+pbuf->carry_bytes);
247 		off = 0;
248 		pbuf->carry_bytes += xbytes;
249 		nbytes -= xbytes;
250 		from += sizeof(u64);
251 	}
252 }
253 
254 /*
255  * Zero extra bytes from the end of pbuf->carry.
256  *
257  * NOTES:
258  * o zbytes <= old_bytes
259  */
zero_extra_bytes(struct pio_buf * pbuf,unsigned int zbytes)260 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
261 {
262 	unsigned int remaining;
263 
264 	if (zbytes == 0)	/* nothing to do */
265 		return;
266 
267 	remaining = pbuf->carry_bytes - zbytes;	/* remaining bytes */
268 
269 	/* NOTE: zshift only guaranteed to work if remaining != 0 */
270 	if (remaining)
271 		pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
272 					>> zshift(remaining);
273 	else
274 		pbuf->carry.val64 = 0;
275 	pbuf->carry_bytes = remaining;
276 }
277 
278 /*
279  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
280  * Put the unused part of the next 8 bytes of src into the LSB bytes of
281  * pbuf->carry with the upper bytes zeroed..
282  *
283  * NOTES:
284  * o result must keep unused bytes zeroed
285  * o src must be u64 aligned
286  */
merge_write8(struct pio_buf * pbuf,void __iomem * dest,const void * src)287 static inline void merge_write8(
288 	struct pio_buf *pbuf,
289 	void __iomem *dest,
290 	const void *src)
291 {
292 	u64 new, temp;
293 
294 	new = *(u64 *)src;
295 	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
296 	writeq(temp, dest);
297 	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
298 }
299 
300 /*
301  * Write a quad word using all bytes of carry.
302  */
carry8_write8(union mix carry,void __iomem * dest)303 static inline void carry8_write8(union mix carry, void __iomem *dest)
304 {
305 	writeq(carry.val64, dest);
306 }
307 
308 /*
309  * Write a quad word using all the valid bytes of carry.  If carry
310  * has zero valid bytes, nothing is written.
311  * Returns 0 on nothing written, non-zero on quad word written.
312  */
carry_write8(struct pio_buf * pbuf,void __iomem * dest)313 static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
314 {
315 	if (pbuf->carry_bytes) {
316 		/* unused bytes are always kept zeroed, so just write */
317 		writeq(pbuf->carry.val64, dest);
318 		return 1;
319 	}
320 
321 	return 0;
322 }
323 
324 #else /* USE_SHIFTS */
325 /*
326  * Handle carry bytes using byte copies.
327  *
328  * NOTE: the value the unused portion of carry is left uninitialized.
329  */
330 
331 /*
332  * Jump copy - no-loop copy for < 8 bytes.
333  */
jcopy(u8 * dest,const u8 * src,u32 n)334 static inline void jcopy(u8 *dest, const u8 *src, u32 n)
335 {
336 	switch (n) {
337 	case 7:
338 		*dest++ = *src++;
339 	case 6:
340 		*dest++ = *src++;
341 	case 5:
342 		*dest++ = *src++;
343 	case 4:
344 		*dest++ = *src++;
345 	case 3:
346 		*dest++ = *src++;
347 	case 2:
348 		*dest++ = *src++;
349 	case 1:
350 		*dest++ = *src++;
351 	}
352 }
353 
354 /*
355  * Read nbytes from "from" and and place them in the low bytes
356  * of pbuf->carry.  Other bytes are left as-is.  Any previous
357  * value in pbuf->carry is lost.
358  *
359  * NOTES:
360  * o do not read from from if nbytes is zero
361  * o from may _not_ be u64 aligned.
362  */
read_low_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)363 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
364 							unsigned int nbytes)
365 {
366 	jcopy(&pbuf->carry.val8[0], from, nbytes);
367 	pbuf->carry_bytes = nbytes;
368 }
369 
370 /*
371  * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
372  * It is expected that the extra read does not overfill carry.
373  *
374  * NOTES:
375  * o from may _not_ be u64 aligned
376  * o nbytes may span a QW boundary
377  */
read_extra_bytes(struct pio_buf * pbuf,const void * from,unsigned int nbytes)378 static inline void read_extra_bytes(struct pio_buf *pbuf,
379 					const void *from, unsigned int nbytes)
380 {
381 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
382 	pbuf->carry_bytes += nbytes;
383 }
384 
385 /*
386  * Zero extra bytes from the end of pbuf->carry.
387  *
388  * We do not care about the value of unused bytes in carry, so just
389  * reduce the byte count.
390  *
391  * NOTES:
392  * o zbytes <= old_bytes
393  */
zero_extra_bytes(struct pio_buf * pbuf,unsigned int zbytes)394 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
395 {
396 	pbuf->carry_bytes -= zbytes;
397 }
398 
399 /*
400  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
401  * Put the unused part of the next 8 bytes of src into the low bytes of
402  * pbuf->carry.
403  */
merge_write8(struct pio_buf * pbuf,void * dest,const void * src)404 static inline void merge_write8(
405 	struct pio_buf *pbuf,
406 	void *dest,
407 	const void *src)
408 {
409 	u32 remainder = 8 - pbuf->carry_bytes;
410 
411 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
412 	writeq(pbuf->carry.val64, dest);
413 	jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
414 }
415 
416 /*
417  * Write a quad word using all bytes of carry.
418  */
carry8_write8(union mix carry,void * dest)419 static inline void carry8_write8(union mix carry, void *dest)
420 {
421 	writeq(carry.val64, dest);
422 }
423 
424 /*
425  * Write a quad word using all the valid bytes of carry.  If carry
426  * has zero valid bytes, nothing is written.
427  * Returns 0 on nothing written, non-zero on quad word written.
428  */
carry_write8(struct pio_buf * pbuf,void * dest)429 static inline int carry_write8(struct pio_buf *pbuf, void *dest)
430 {
431 	if (pbuf->carry_bytes) {
432 		u64 zero = 0;
433 
434 		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
435 						8 - pbuf->carry_bytes);
436 		writeq(pbuf->carry.val64, dest);
437 		return 1;
438 	}
439 
440 	return 0;
441 }
442 #endif /* USE_SHIFTS */
443 
444 /*
445  * Segmented PIO Copy - start
446  *
447  * Start a PIO copy.
448  *
449  * @pbuf: destination buffer
450  * @pbc: the PBC for the PIO buffer
451  * @from: data source, QWORD aligned
452  * @nbytes: bytes to copy
453  */
seg_pio_copy_start(struct pio_buf * pbuf,u64 pbc,const void * from,size_t nbytes)454 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
455 				const void *from, size_t nbytes)
456 {
457 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
458 	void __iomem *send = dest + PIO_BLOCK_SIZE;
459 	void __iomem *dend;			/* 8-byte data end */
460 
461 	writeq(pbc, dest);
462 	dest += sizeof(u64);
463 
464 	/* calculate where the QWORD data ends - in SOP=1 space */
465 	dend = dest + ((nbytes>>3) * sizeof(u64));
466 
467 	if (dend < send) {
468 		/* all QWORD data is within the SOP block, does *not*
469 		   reach the end of the SOP block */
470 
471 		while (dest < dend) {
472 			writeq(*(u64 *)from, dest);
473 			from += sizeof(u64);
474 			dest += sizeof(u64);
475 		}
476 		/*
477 		 * No boundary checks are needed here:
478 		 * 0. We're not on the SOP block boundary
479 		 * 1. The possible DWORD dangle will still be within
480 		 *    the SOP block
481 		 * 2. We cannot wrap except on a block boundary.
482 		 */
483 	} else {
484 		/* QWORD data extends _to_ or beyond the SOP block */
485 
486 		/* write 8-byte SOP chunk data */
487 		while (dest < send) {
488 			writeq(*(u64 *)from, dest);
489 			from += sizeof(u64);
490 			dest += sizeof(u64);
491 		}
492 		/* drop out of the SOP range */
493 		dest -= SOP_DISTANCE;
494 		dend -= SOP_DISTANCE;
495 
496 		/*
497 		 * If the wrap comes before or matches the data end,
498 		 * copy until until the wrap, then wrap.
499 		 *
500 		 * If the data ends at the end of the SOP above and
501 		 * the buffer wraps, then pbuf->end == dend == dest
502 		 * and nothing will get written, but we will wrap in
503 		 * case there is a dangling DWORD.
504 		 */
505 		if (pbuf->end <= dend) {
506 			while (dest < pbuf->end) {
507 				writeq(*(u64 *)from, dest);
508 				from += sizeof(u64);
509 				dest += sizeof(u64);
510 			}
511 
512 			dest -= pbuf->size;
513 			dend -= pbuf->size;
514 		}
515 
516 		/* write 8-byte non-SOP, non-wrap chunk data */
517 		while (dest < dend) {
518 			writeq(*(u64 *)from, dest);
519 			from += sizeof(u64);
520 			dest += sizeof(u64);
521 		}
522 	}
523 	/* at this point we have wrapped if we are going to wrap */
524 
525 	/* ...but it doesn't matter as we're done writing */
526 
527 	/* save dangling bytes, if any */
528 	read_low_bytes(pbuf, from, nbytes & 0x7);
529 
530 	pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
531 }
532 
533 /*
534  * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
535  * bytes are non-zero.
536  *
537  * Whole u64s must be written to the chip, so bytes must be manually merged.
538  *
539  * @pbuf: destination buffer
540  * @from: data source, is QWORD aligned.
541  * @nbytes: bytes to copy
542  *
543  * Must handle nbytes < 8.
544  */
mid_copy_mix(struct pio_buf * pbuf,const void * from,size_t nbytes)545 static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
546 {
547 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
548 	void __iomem *dend;			/* 8-byte data end */
549 	unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
550 	unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
551 
552 	/* calculate 8-byte data end */
553 	dend = dest + (qw_to_write * sizeof(u64));
554 
555 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
556 		/*
557 		 * Still within SOP block.  We don't need to check for
558 		 * wrap because we are still in the first block and
559 		 * can only wrap on block boundaries.
560 		 */
561 		void __iomem *send;		/* SOP end */
562 		void __iomem *xend;
563 
564 		/* calculate the end of data or end of block, whichever
565 		   comes first */
566 		send = pbuf->start + PIO_BLOCK_SIZE;
567 		xend = send < dend ? send : dend;
568 
569 		/* shift up to SOP=1 space */
570 		dest += SOP_DISTANCE;
571 		xend += SOP_DISTANCE;
572 
573 		/* write 8-byte chunk data */
574 		while (dest < xend) {
575 			merge_write8(pbuf, dest, from);
576 			from += sizeof(u64);
577 			dest += sizeof(u64);
578 		}
579 
580 		/* shift down to SOP=0 space */
581 		dest -= SOP_DISTANCE;
582 	}
583 	/*
584 	 * At this point dest could be (either, both, or neither):
585 	 * - at dend
586 	 * - at the wrap
587 	 */
588 
589 	/*
590 	 * If the wrap comes before or matches the data end,
591 	 * copy until until the wrap, then wrap.
592 	 *
593 	 * If dest is at the wrap, we will fall into the if,
594 	 * not do the loop, when wrap.
595 	 *
596 	 * If the data ends at the end of the SOP above and
597 	 * the buffer wraps, then pbuf->end == dend == dest
598 	 * and nothing will get written.
599 	 */
600 	if (pbuf->end <= dend) {
601 		while (dest < pbuf->end) {
602 			merge_write8(pbuf, dest, from);
603 			from += sizeof(u64);
604 			dest += sizeof(u64);
605 		}
606 
607 		dest -= pbuf->size;
608 		dend -= pbuf->size;
609 	}
610 
611 	/* write 8-byte non-SOP, non-wrap chunk data */
612 	while (dest < dend) {
613 		merge_write8(pbuf, dest, from);
614 		from += sizeof(u64);
615 		dest += sizeof(u64);
616 	}
617 
618 	/* adjust carry */
619 	if (pbuf->carry_bytes < bytes_left) {
620 		/* need to read more */
621 		read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
622 	} else {
623 		/* remove invalid bytes */
624 		zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
625 	}
626 
627 	pbuf->qw_written += qw_to_write;
628 }
629 
630 /*
631  * Mid copy helper, "straight case" - source pointer is 64-bit aligned
632  * with no carry bytes.
633  *
634  * @pbuf: destination buffer
635  * @from: data source, is QWORD aligned
636  * @nbytes: bytes to copy
637  *
638  * Must handle nbytes < 8.
639  */
mid_copy_straight(struct pio_buf * pbuf,const void * from,size_t nbytes)640 static void mid_copy_straight(struct pio_buf *pbuf,
641 						const void *from, size_t nbytes)
642 {
643 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
644 	void __iomem *dend;			/* 8-byte data end */
645 
646 	/* calculate 8-byte data end */
647 	dend = dest + ((nbytes>>3) * sizeof(u64));
648 
649 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
650 		/*
651 		 * Still within SOP block.  We don't need to check for
652 		 * wrap because we are still in the first block and
653 		 * can only wrap on block boundaries.
654 		 */
655 		void __iomem *send;		/* SOP end */
656 		void __iomem *xend;
657 
658 		/* calculate the end of data or end of block, whichever
659 		   comes first */
660 		send = pbuf->start + PIO_BLOCK_SIZE;
661 		xend = send < dend ? send : dend;
662 
663 		/* shift up to SOP=1 space */
664 		dest += SOP_DISTANCE;
665 		xend += SOP_DISTANCE;
666 
667 		/* write 8-byte chunk data */
668 		while (dest < xend) {
669 			writeq(*(u64 *)from, dest);
670 			from += sizeof(u64);
671 			dest += sizeof(u64);
672 		}
673 
674 		/* shift down to SOP=0 space */
675 		dest -= SOP_DISTANCE;
676 	}
677 	/*
678 	 * At this point dest could be (either, both, or neither):
679 	 * - at dend
680 	 * - at the wrap
681 	 */
682 
683 	/*
684 	 * If the wrap comes before or matches the data end,
685 	 * copy until until the wrap, then wrap.
686 	 *
687 	 * If dest is at the wrap, we will fall into the if,
688 	 * not do the loop, when wrap.
689 	 *
690 	 * If the data ends at the end of the SOP above and
691 	 * the buffer wraps, then pbuf->end == dend == dest
692 	 * and nothing will get written.
693 	 */
694 	if (pbuf->end <= dend) {
695 		while (dest < pbuf->end) {
696 			writeq(*(u64 *)from, dest);
697 			from += sizeof(u64);
698 			dest += sizeof(u64);
699 		}
700 
701 		dest -= pbuf->size;
702 		dend -= pbuf->size;
703 	}
704 
705 	/* write 8-byte non-SOP, non-wrap chunk data */
706 	while (dest < dend) {
707 		writeq(*(u64 *)from, dest);
708 		from += sizeof(u64);
709 		dest += sizeof(u64);
710 	}
711 
712 	/* we know carry_bytes was zero on entry to this routine */
713 	read_low_bytes(pbuf, from, nbytes & 0x7);
714 
715 	pbuf->qw_written += nbytes>>3;
716 }
717 
718 /*
719  * Segmented PIO Copy - middle
720  *
721  * Must handle any aligned tail and any aligned source with any byte count.
722  *
723  * @pbuf: a number of blocks allocated within a PIO send context
724  * @from: data source
725  * @nbytes: number of bytes to copy
726  */
seg_pio_copy_mid(struct pio_buf * pbuf,const void * from,size_t nbytes)727 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
728 {
729 	unsigned long from_align = (unsigned long)from & 0x7;
730 
731 	if (pbuf->carry_bytes + nbytes < 8) {
732 		/* not enough bytes to fill a QW */
733 		read_extra_bytes(pbuf, from, nbytes);
734 		return;
735 	}
736 
737 	if (from_align) {
738 		/* misaligned source pointer - align it */
739 		unsigned long to_align;
740 
741 		/* bytes to read to align "from" */
742 		to_align = 8 - from_align;
743 
744 		/*
745 		 * In the advance-to-alignment logic below, we do not need
746 		 * to check if we are using more than nbytes.  This is because
747 		 * if we are here, we already know that carry+nbytes will
748 		 * fill at least one QW.
749 		 */
750 		if (pbuf->carry_bytes + to_align < 8) {
751 			/* not enough align bytes to fill a QW */
752 			read_extra_bytes(pbuf, from, to_align);
753 			from += to_align;
754 			nbytes -= to_align;
755 		} else {
756 			/* bytes to fill carry */
757 			unsigned long to_fill = 8 - pbuf->carry_bytes;
758 			/* bytes left over to be read */
759 			unsigned long extra = to_align - to_fill;
760 			void __iomem *dest;
761 
762 			/* fill carry... */
763 			read_extra_bytes(pbuf, from, to_fill);
764 			from += to_fill;
765 			nbytes -= to_fill;
766 
767 			/* ...now write carry */
768 			dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
769 
770 			/*
771 			 * The two checks immediately below cannot both be
772 			 * true, hence the else.  If we have wrapped, we
773 			 * cannot still be within the first block.
774 			 * Conversely, if we are still in the first block, we
775 			 * cannot have wrapped.  We do the wrap check first
776 			 * as that is more likely.
777 			 */
778 			/* adjust if we've wrapped */
779 			if (dest >= pbuf->end)
780 				dest -= pbuf->size;
781 			/* jump to SOP range if within the first block */
782 			else if (pbuf->qw_written < PIO_BLOCK_QWS)
783 				dest += SOP_DISTANCE;
784 
785 			carry8_write8(pbuf->carry, dest);
786 			pbuf->qw_written++;
787 
788 			/* read any extra bytes to do final alignment */
789 			/* this will overwrite anything in pbuf->carry */
790 			read_low_bytes(pbuf, from, extra);
791 			from += extra;
792 			nbytes -= extra;
793 		}
794 
795 		/* at this point, from is QW aligned */
796 	}
797 
798 	if (pbuf->carry_bytes)
799 		mid_copy_mix(pbuf, from, nbytes);
800 	else
801 		mid_copy_straight(pbuf, from, nbytes);
802 }
803 
804 /*
805  * Segmented PIO Copy - end
806  *
807  * Write any remainder (in pbuf->carry) and finish writing the whole block.
808  *
809  * @pbuf: a number of blocks allocated within a PIO send context
810  */
seg_pio_copy_end(struct pio_buf * pbuf)811 void seg_pio_copy_end(struct pio_buf *pbuf)
812 {
813 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
814 
815 	/*
816 	 * The two checks immediately below cannot both be true, hence the
817 	 * else.  If we have wrapped, we cannot still be within the first
818 	 * block.  Conversely, if we are still in the first block, we
819 	 * cannot have wrapped.  We do the wrap check first as that is
820 	 * more likely.
821 	 */
822 	/* adjust if we have wrapped */
823 	if (dest >= pbuf->end)
824 		dest -= pbuf->size;
825 	/* jump to the SOP range if within the first block */
826 	else if (pbuf->qw_written < PIO_BLOCK_QWS)
827 		dest += SOP_DISTANCE;
828 
829 	/* write final bytes, if any */
830 	if (carry_write8(pbuf, dest)) {
831 		dest += sizeof(u64);
832 		/*
833 		 * NOTE: We do not need to recalculate whether dest needs
834 		 * SOP_DISTANCE or not.
835 		 *
836 		 * If we are in the first block and the dangle write
837 		 * keeps us in the same block, dest will need
838 		 * to retain SOP_DISTANCE in the loop below.
839 		 *
840 		 * If we are in the first block and the dangle write pushes
841 		 * us to the next block, then loop below will not run
842 		 * and dest is not used.  Hence we do not need to update
843 		 * it.
844 		 *
845 		 * If we are past the first block, then SOP_DISTANCE
846 		 * was never added, so there is nothing to do.
847 		 */
848 	}
849 
850 	/* fill in rest of block */
851 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
852 		writeq(0, dest);
853 		dest += sizeof(u64);
854 	}
855 
856 	/* finished with this buffer */
857 	atomic_dec(&pbuf->sc->buffers_allocated);
858 }
859