1/*
2 * Copyright (c) 2005-2011 Atheros Communications Inc.
3 * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "hif.h"
19#include "pci.h"
20#include "ce.h"
21#include "debug.h"
22
23/*
24 * Support for Copy Engine hardware, which is mainly used for
25 * communication between Host and Target over a PCIe interconnect.
26 */
27
28/*
29 * A single CopyEngine (CE) comprises two "rings":
30 *   a source ring
31 *   a destination ring
32 *
33 * Each ring consists of a number of descriptors which specify
34 * an address, length, and meta-data.
35 *
36 * Typically, one side of the PCIe interconnect (Host or Target)
37 * controls one ring and the other side controls the other ring.
38 * The source side chooses when to initiate a transfer and it
39 * chooses what to send (buffer address, length). The destination
40 * side keeps a supply of "anonymous receive buffers" available and
41 * it handles incoming data as it arrives (when the destination
42 * recieves an interrupt).
43 *
44 * The sender may send a simple buffer (address/length) or it may
45 * send a small list of buffers.  When a small list is sent, hardware
46 * "gathers" these and they end up in a single destination buffer
47 * with a single interrupt.
48 *
49 * There are several "contexts" managed by this layer -- more, it
50 * may seem -- than should be needed. These are provided mainly for
51 * maximum flexibility and especially to facilitate a simpler HIF
52 * implementation. There are per-CopyEngine recv, send, and watermark
53 * contexts. These are supplied by the caller when a recv, send,
54 * or watermark handler is established and they are echoed back to
55 * the caller when the respective callbacks are invoked. There is
56 * also a per-transfer context supplied by the caller when a buffer
57 * (or sendlist) is sent and when a buffer is enqueued for recv.
58 * These per-transfer contexts are echoed back to the caller when
59 * the buffer is sent/received.
60 */
61
62static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63						       u32 ce_ctrl_addr,
64						       unsigned int n)
65{
66	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67}
68
69static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70						      u32 ce_ctrl_addr)
71{
72	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73}
74
75static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76						      u32 ce_ctrl_addr,
77						      unsigned int n)
78{
79	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80}
81
82static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83						     u32 ce_ctrl_addr)
84{
85	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86}
87
88static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89						    u32 ce_ctrl_addr)
90{
91	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92}
93
94static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95						    u32 ce_ctrl_addr,
96						    unsigned int addr)
97{
98	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99}
100
101static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102					       u32 ce_ctrl_addr,
103					       unsigned int n)
104{
105	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106}
107
108static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109					       u32 ce_ctrl_addr,
110					       unsigned int n)
111{
112	u32 ctrl1_addr = ath10k_pci_read32((ar),
113					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114
115	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117			   CE_CTRL1_DMAX_LENGTH_SET(n));
118}
119
120static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121						    u32 ce_ctrl_addr,
122						    unsigned int n)
123{
124	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125
126	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129}
130
131static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132						     u32 ce_ctrl_addr,
133						     unsigned int n)
134{
135	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136
137	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140}
141
142static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143						     u32 ce_ctrl_addr)
144{
145	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146}
147
148static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149						     u32 ce_ctrl_addr,
150						     u32 addr)
151{
152	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153}
154
155static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156						u32 ce_ctrl_addr,
157						unsigned int n)
158{
159	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160}
161
162static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163						   u32 ce_ctrl_addr,
164						   unsigned int n)
165{
166	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167
168	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170			   SRC_WATERMARK_HIGH_SET(n));
171}
172
173static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174						  u32 ce_ctrl_addr,
175						  unsigned int n)
176{
177	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178
179	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181			   SRC_WATERMARK_LOW_SET(n));
182}
183
184static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185						    u32 ce_ctrl_addr,
186						    unsigned int n)
187{
188	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189
190	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192			   DST_WATERMARK_HIGH_SET(n));
193}
194
195static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196						   u32 ce_ctrl_addr,
197						   unsigned int n)
198{
199	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200
201	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202			   (addr & ~DST_WATERMARK_LOW_MASK) |
203			   DST_WATERMARK_LOW_SET(n));
204}
205
206static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207							u32 ce_ctrl_addr)
208{
209	u32 host_ie_addr = ath10k_pci_read32(ar,
210					     ce_ctrl_addr + HOST_IE_ADDRESS);
211
212	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214}
215
216static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217							u32 ce_ctrl_addr)
218{
219	u32 host_ie_addr = ath10k_pci_read32(ar,
220					     ce_ctrl_addr + HOST_IE_ADDRESS);
221
222	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224}
225
226static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227						    u32 ce_ctrl_addr)
228{
229	u32 host_ie_addr = ath10k_pci_read32(ar,
230					     ce_ctrl_addr + HOST_IE_ADDRESS);
231
232	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233			   host_ie_addr & ~CE_WATERMARK_MASK);
234}
235
236static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237					       u32 ce_ctrl_addr)
238{
239	u32 misc_ie_addr = ath10k_pci_read32(ar,
240					     ce_ctrl_addr + MISC_IE_ADDRESS);
241
242	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243			   misc_ie_addr | CE_ERROR_MASK);
244}
245
246static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
247						u32 ce_ctrl_addr)
248{
249	u32 misc_ie_addr = ath10k_pci_read32(ar,
250					     ce_ctrl_addr + MISC_IE_ADDRESS);
251
252	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
253			   misc_ie_addr & ~CE_ERROR_MASK);
254}
255
256static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
257						     u32 ce_ctrl_addr,
258						     unsigned int mask)
259{
260	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
261}
262
263/*
264 * Guts of ath10k_ce_send, used by both ath10k_ce_send and
265 * ath10k_ce_sendlist_send.
266 * The caller takes responsibility for any needed locking.
267 */
268int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
269			  void *per_transfer_context,
270			  u32 buffer,
271			  unsigned int nbytes,
272			  unsigned int transfer_id,
273			  unsigned int flags)
274{
275	struct ath10k *ar = ce_state->ar;
276	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
277	struct ce_desc *desc, *sdesc;
278	unsigned int nentries_mask = src_ring->nentries_mask;
279	unsigned int sw_index = src_ring->sw_index;
280	unsigned int write_index = src_ring->write_index;
281	u32 ctrl_addr = ce_state->ctrl_addr;
282	u32 desc_flags = 0;
283	int ret = 0;
284
285	if (nbytes > ce_state->src_sz_max)
286		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
287			    __func__, nbytes, ce_state->src_sz_max);
288
289	if (unlikely(CE_RING_DELTA(nentries_mask,
290				   write_index, sw_index - 1) <= 0)) {
291		ret = -ENOSR;
292		goto exit;
293	}
294
295	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
296				   write_index);
297	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
298
299	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
300
301	if (flags & CE_SEND_FLAG_GATHER)
302		desc_flags |= CE_DESC_FLAGS_GATHER;
303	if (flags & CE_SEND_FLAG_BYTE_SWAP)
304		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
305
306	sdesc->addr   = __cpu_to_le32(buffer);
307	sdesc->nbytes = __cpu_to_le16(nbytes);
308	sdesc->flags  = __cpu_to_le16(desc_flags);
309
310	*desc = *sdesc;
311
312	src_ring->per_transfer_context[write_index] = per_transfer_context;
313
314	/* Update Source Ring Write Index */
315	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
316
317	/* WORKAROUND */
318	if (!(flags & CE_SEND_FLAG_GATHER))
319		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
320
321	src_ring->write_index = write_index;
322exit:
323	return ret;
324}
325
326void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
327{
328	struct ath10k *ar = pipe->ar;
329	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
330	struct ath10k_ce_ring *src_ring = pipe->src_ring;
331	u32 ctrl_addr = pipe->ctrl_addr;
332
333	lockdep_assert_held(&ar_pci->ce_lock);
334
335	/*
336	 * This function must be called only if there is an incomplete
337	 * scatter-gather transfer (before index register is updated)
338	 * that needs to be cleaned up.
339	 */
340	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
341		return;
342
343	if (WARN_ON_ONCE(src_ring->write_index ==
344			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
345		return;
346
347	src_ring->write_index--;
348	src_ring->write_index &= src_ring->nentries_mask;
349
350	src_ring->per_transfer_context[src_ring->write_index] = NULL;
351}
352
353int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
354		   void *per_transfer_context,
355		   u32 buffer,
356		   unsigned int nbytes,
357		   unsigned int transfer_id,
358		   unsigned int flags)
359{
360	struct ath10k *ar = ce_state->ar;
361	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
362	int ret;
363
364	spin_lock_bh(&ar_pci->ce_lock);
365	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
366				    buffer, nbytes, transfer_id, flags);
367	spin_unlock_bh(&ar_pci->ce_lock);
368
369	return ret;
370}
371
372int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
373{
374	struct ath10k *ar = pipe->ar;
375	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
376	int delta;
377
378	spin_lock_bh(&ar_pci->ce_lock);
379	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
380			      pipe->src_ring->write_index,
381			      pipe->src_ring->sw_index - 1);
382	spin_unlock_bh(&ar_pci->ce_lock);
383
384	return delta;
385}
386
387int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
388{
389	struct ath10k *ar = pipe->ar;
390	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
391	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
392	unsigned int nentries_mask = dest_ring->nentries_mask;
393	unsigned int write_index = dest_ring->write_index;
394	unsigned int sw_index = dest_ring->sw_index;
395
396	lockdep_assert_held(&ar_pci->ce_lock);
397
398	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
399}
400
401int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
402{
403	struct ath10k *ar = pipe->ar;
404	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
405	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
406	unsigned int nentries_mask = dest_ring->nentries_mask;
407	unsigned int write_index = dest_ring->write_index;
408	unsigned int sw_index = dest_ring->sw_index;
409	struct ce_desc *base = dest_ring->base_addr_owner_space;
410	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
411	u32 ctrl_addr = pipe->ctrl_addr;
412
413	lockdep_assert_held(&ar_pci->ce_lock);
414
415	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
416		return -EIO;
417
418	desc->addr = __cpu_to_le32(paddr);
419	desc->nbytes = 0;
420
421	dest_ring->per_transfer_context[write_index] = ctx;
422	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
423	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
424	dest_ring->write_index = write_index;
425
426	return 0;
427}
428
429int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
430{
431	struct ath10k *ar = pipe->ar;
432	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
433	int ret;
434
435	spin_lock_bh(&ar_pci->ce_lock);
436	ret = __ath10k_ce_rx_post_buf(pipe, ctx, paddr);
437	spin_unlock_bh(&ar_pci->ce_lock);
438
439	return ret;
440}
441
442/*
443 * Guts of ath10k_ce_completed_recv_next.
444 * The caller takes responsibility for any necessary locking.
445 */
446int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
447					 void **per_transfer_contextp,
448					 u32 *bufferp,
449					 unsigned int *nbytesp,
450					 unsigned int *transfer_idp,
451					 unsigned int *flagsp)
452{
453	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
454	unsigned int nentries_mask = dest_ring->nentries_mask;
455	unsigned int sw_index = dest_ring->sw_index;
456
457	struct ce_desc *base = dest_ring->base_addr_owner_space;
458	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
459	struct ce_desc sdesc;
460	u16 nbytes;
461
462	/* Copy in one go for performance reasons */
463	sdesc = *desc;
464
465	nbytes = __le16_to_cpu(sdesc.nbytes);
466	if (nbytes == 0) {
467		/*
468		 * This closes a relatively unusual race where the Host
469		 * sees the updated DRRI before the update to the
470		 * corresponding descriptor has completed. We treat this
471		 * as a descriptor that is not yet done.
472		 */
473		return -EIO;
474	}
475
476	desc->nbytes = 0;
477
478	/* Return data from completed destination descriptor */
479	*bufferp = __le32_to_cpu(sdesc.addr);
480	*nbytesp = nbytes;
481	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
482
483	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
484		*flagsp = CE_RECV_FLAG_SWAPPED;
485	else
486		*flagsp = 0;
487
488	if (per_transfer_contextp)
489		*per_transfer_contextp =
490			dest_ring->per_transfer_context[sw_index];
491
492	/* sanity */
493	dest_ring->per_transfer_context[sw_index] = NULL;
494
495	/* Update sw_index */
496	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
497	dest_ring->sw_index = sw_index;
498
499	return 0;
500}
501
502int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
503				  void **per_transfer_contextp,
504				  u32 *bufferp,
505				  unsigned int *nbytesp,
506				  unsigned int *transfer_idp,
507				  unsigned int *flagsp)
508{
509	struct ath10k *ar = ce_state->ar;
510	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
511	int ret;
512
513	spin_lock_bh(&ar_pci->ce_lock);
514	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
515						   per_transfer_contextp,
516						   bufferp, nbytesp,
517						   transfer_idp, flagsp);
518	spin_unlock_bh(&ar_pci->ce_lock);
519
520	return ret;
521}
522
523int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
524			       void **per_transfer_contextp,
525			       u32 *bufferp)
526{
527	struct ath10k_ce_ring *dest_ring;
528	unsigned int nentries_mask;
529	unsigned int sw_index;
530	unsigned int write_index;
531	int ret;
532	struct ath10k *ar;
533	struct ath10k_pci *ar_pci;
534
535	dest_ring = ce_state->dest_ring;
536
537	if (!dest_ring)
538		return -EIO;
539
540	ar = ce_state->ar;
541	ar_pci = ath10k_pci_priv(ar);
542
543	spin_lock_bh(&ar_pci->ce_lock);
544
545	nentries_mask = dest_ring->nentries_mask;
546	sw_index = dest_ring->sw_index;
547	write_index = dest_ring->write_index;
548	if (write_index != sw_index) {
549		struct ce_desc *base = dest_ring->base_addr_owner_space;
550		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
551
552		/* Return data from completed destination descriptor */
553		*bufferp = __le32_to_cpu(desc->addr);
554
555		if (per_transfer_contextp)
556			*per_transfer_contextp =
557				dest_ring->per_transfer_context[sw_index];
558
559		/* sanity */
560		dest_ring->per_transfer_context[sw_index] = NULL;
561		desc->nbytes = 0;
562
563		/* Update sw_index */
564		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
565		dest_ring->sw_index = sw_index;
566		ret = 0;
567	} else {
568		ret = -EIO;
569	}
570
571	spin_unlock_bh(&ar_pci->ce_lock);
572
573	return ret;
574}
575
576/*
577 * Guts of ath10k_ce_completed_send_next.
578 * The caller takes responsibility for any necessary locking.
579 */
580int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
581					 void **per_transfer_contextp,
582					 u32 *bufferp,
583					 unsigned int *nbytesp,
584					 unsigned int *transfer_idp)
585{
586	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
587	u32 ctrl_addr = ce_state->ctrl_addr;
588	struct ath10k *ar = ce_state->ar;
589	unsigned int nentries_mask = src_ring->nentries_mask;
590	unsigned int sw_index = src_ring->sw_index;
591	struct ce_desc *sdesc, *sbase;
592	unsigned int read_index;
593
594	if (src_ring->hw_index == sw_index) {
595		/*
596		 * The SW completion index has caught up with the cached
597		 * version of the HW completion index.
598		 * Update the cached HW completion index to see whether
599		 * the SW has really caught up to the HW, or if the cached
600		 * value of the HW index has become stale.
601		 */
602
603		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
604		if (read_index == 0xffffffff)
605			return -ENODEV;
606
607		read_index &= nentries_mask;
608		src_ring->hw_index = read_index;
609	}
610
611	read_index = src_ring->hw_index;
612
613	if (read_index == sw_index)
614		return -EIO;
615
616	sbase = src_ring->shadow_base;
617	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
618
619	/* Return data from completed source descriptor */
620	*bufferp = __le32_to_cpu(sdesc->addr);
621	*nbytesp = __le16_to_cpu(sdesc->nbytes);
622	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
623			   CE_DESC_FLAGS_META_DATA);
624
625	if (per_transfer_contextp)
626		*per_transfer_contextp =
627			src_ring->per_transfer_context[sw_index];
628
629	/* sanity */
630	src_ring->per_transfer_context[sw_index] = NULL;
631
632	/* Update sw_index */
633	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
634	src_ring->sw_index = sw_index;
635
636	return 0;
637}
638
639/* NB: Modeled after ath10k_ce_completed_send_next */
640int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
641			       void **per_transfer_contextp,
642			       u32 *bufferp,
643			       unsigned int *nbytesp,
644			       unsigned int *transfer_idp)
645{
646	struct ath10k_ce_ring *src_ring;
647	unsigned int nentries_mask;
648	unsigned int sw_index;
649	unsigned int write_index;
650	int ret;
651	struct ath10k *ar;
652	struct ath10k_pci *ar_pci;
653
654	src_ring = ce_state->src_ring;
655
656	if (!src_ring)
657		return -EIO;
658
659	ar = ce_state->ar;
660	ar_pci = ath10k_pci_priv(ar);
661
662	spin_lock_bh(&ar_pci->ce_lock);
663
664	nentries_mask = src_ring->nentries_mask;
665	sw_index = src_ring->sw_index;
666	write_index = src_ring->write_index;
667
668	if (write_index != sw_index) {
669		struct ce_desc *base = src_ring->base_addr_owner_space;
670		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
671
672		/* Return data from completed source descriptor */
673		*bufferp = __le32_to_cpu(desc->addr);
674		*nbytesp = __le16_to_cpu(desc->nbytes);
675		*transfer_idp = MS(__le16_to_cpu(desc->flags),
676						CE_DESC_FLAGS_META_DATA);
677
678		if (per_transfer_contextp)
679			*per_transfer_contextp =
680				src_ring->per_transfer_context[sw_index];
681
682		/* sanity */
683		src_ring->per_transfer_context[sw_index] = NULL;
684
685		/* Update sw_index */
686		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
687		src_ring->sw_index = sw_index;
688		ret = 0;
689	} else {
690		ret = -EIO;
691	}
692
693	spin_unlock_bh(&ar_pci->ce_lock);
694
695	return ret;
696}
697
698int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
699				  void **per_transfer_contextp,
700				  u32 *bufferp,
701				  unsigned int *nbytesp,
702				  unsigned int *transfer_idp)
703{
704	struct ath10k *ar = ce_state->ar;
705	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
706	int ret;
707
708	spin_lock_bh(&ar_pci->ce_lock);
709	ret = ath10k_ce_completed_send_next_nolock(ce_state,
710						   per_transfer_contextp,
711						   bufferp, nbytesp,
712						   transfer_idp);
713	spin_unlock_bh(&ar_pci->ce_lock);
714
715	return ret;
716}
717
718/*
719 * Guts of interrupt handler for per-engine interrupts on a particular CE.
720 *
721 * Invokes registered callbacks for recv_complete,
722 * send_complete, and watermarks.
723 */
724void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
725{
726	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
727	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
728	u32 ctrl_addr = ce_state->ctrl_addr;
729
730	spin_lock_bh(&ar_pci->ce_lock);
731
732	/* Clear the copy-complete interrupts that will be handled here. */
733	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
734					  HOST_IS_COPY_COMPLETE_MASK);
735
736	spin_unlock_bh(&ar_pci->ce_lock);
737
738	if (ce_state->recv_cb)
739		ce_state->recv_cb(ce_state);
740
741	if (ce_state->send_cb)
742		ce_state->send_cb(ce_state);
743
744	spin_lock_bh(&ar_pci->ce_lock);
745
746	/*
747	 * Misc CE interrupts are not being handled, but still need
748	 * to be cleared.
749	 */
750	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
751
752	spin_unlock_bh(&ar_pci->ce_lock);
753}
754
755/*
756 * Handler for per-engine interrupts on ALL active CEs.
757 * This is used in cases where the system is sharing a
758 * single interrput for all CEs
759 */
760
761void ath10k_ce_per_engine_service_any(struct ath10k *ar)
762{
763	int ce_id;
764	u32 intr_summary;
765
766	intr_summary = CE_INTERRUPT_SUMMARY(ar);
767
768	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
769		if (intr_summary & (1 << ce_id))
770			intr_summary &= ~(1 << ce_id);
771		else
772			/* no intr pending on this CE */
773			continue;
774
775		ath10k_ce_per_engine_service(ar, ce_id);
776	}
777}
778
779/*
780 * Adjust interrupts for the copy complete handler.
781 * If it's needed for either send or recv, then unmask
782 * this interrupt; otherwise, mask it.
783 *
784 * Called with ce_lock held.
785 */
786static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
787{
788	u32 ctrl_addr = ce_state->ctrl_addr;
789	struct ath10k *ar = ce_state->ar;
790	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
791
792	if ((!disable_copy_compl_intr) &&
793	    (ce_state->send_cb || ce_state->recv_cb))
794		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
795	else
796		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
797
798	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
799}
800
801int ath10k_ce_disable_interrupts(struct ath10k *ar)
802{
803	int ce_id;
804
805	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
806		u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
807
808		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
809		ath10k_ce_error_intr_disable(ar, ctrl_addr);
810		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
811	}
812
813	return 0;
814}
815
816void ath10k_ce_enable_interrupts(struct ath10k *ar)
817{
818	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
819	int ce_id;
820
821	/* Skip the last copy engine, CE7 the diagnostic window, as that
822	 * uses polling and isn't initialized for interrupts.
823	 */
824	for (ce_id = 0; ce_id < CE_COUNT - 1; ce_id++)
825		ath10k_ce_per_engine_handler_adjust(&ar_pci->ce_states[ce_id]);
826}
827
828static int ath10k_ce_init_src_ring(struct ath10k *ar,
829				   unsigned int ce_id,
830				   const struct ce_attr *attr)
831{
832	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
833	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
834	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
835	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
836
837	nentries = roundup_pow_of_two(attr->src_nentries);
838
839	memset(src_ring->base_addr_owner_space, 0,
840	       nentries * sizeof(struct ce_desc));
841
842	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
843	src_ring->sw_index &= src_ring->nentries_mask;
844	src_ring->hw_index = src_ring->sw_index;
845
846	src_ring->write_index =
847		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
848	src_ring->write_index &= src_ring->nentries_mask;
849
850	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
851					 src_ring->base_addr_ce_space);
852	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
853	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
854	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
855	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
856	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
857
858	ath10k_dbg(ar, ATH10K_DBG_BOOT,
859		   "boot init ce src ring id %d entries %d base_addr %p\n",
860		   ce_id, nentries, src_ring->base_addr_owner_space);
861
862	return 0;
863}
864
865static int ath10k_ce_init_dest_ring(struct ath10k *ar,
866				    unsigned int ce_id,
867				    const struct ce_attr *attr)
868{
869	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
870	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
871	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
872	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
873
874	nentries = roundup_pow_of_two(attr->dest_nentries);
875
876	memset(dest_ring->base_addr_owner_space, 0,
877	       nentries * sizeof(struct ce_desc));
878
879	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
880	dest_ring->sw_index &= dest_ring->nentries_mask;
881	dest_ring->write_index =
882		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
883	dest_ring->write_index &= dest_ring->nentries_mask;
884
885	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
886					  dest_ring->base_addr_ce_space);
887	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
888	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
889	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
890	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
891
892	ath10k_dbg(ar, ATH10K_DBG_BOOT,
893		   "boot ce dest ring id %d entries %d base_addr %p\n",
894		   ce_id, nentries, dest_ring->base_addr_owner_space);
895
896	return 0;
897}
898
899static struct ath10k_ce_ring *
900ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
901			 const struct ce_attr *attr)
902{
903	struct ath10k_ce_ring *src_ring;
904	u32 nentries = attr->src_nentries;
905	dma_addr_t base_addr;
906
907	nentries = roundup_pow_of_two(nentries);
908
909	src_ring = kzalloc(sizeof(*src_ring) +
910			   (nentries *
911			    sizeof(*src_ring->per_transfer_context)),
912			   GFP_KERNEL);
913	if (src_ring == NULL)
914		return ERR_PTR(-ENOMEM);
915
916	src_ring->nentries = nentries;
917	src_ring->nentries_mask = nentries - 1;
918
919	/*
920	 * Legacy platforms that do not support cache
921	 * coherent DMA are unsupported
922	 */
923	src_ring->base_addr_owner_space_unaligned =
924		dma_alloc_coherent(ar->dev,
925				   (nentries * sizeof(struct ce_desc) +
926				    CE_DESC_RING_ALIGN),
927				   &base_addr, GFP_KERNEL);
928	if (!src_ring->base_addr_owner_space_unaligned) {
929		kfree(src_ring);
930		return ERR_PTR(-ENOMEM);
931	}
932
933	src_ring->base_addr_ce_space_unaligned = base_addr;
934
935	src_ring->base_addr_owner_space = PTR_ALIGN(
936			src_ring->base_addr_owner_space_unaligned,
937			CE_DESC_RING_ALIGN);
938	src_ring->base_addr_ce_space = ALIGN(
939			src_ring->base_addr_ce_space_unaligned,
940			CE_DESC_RING_ALIGN);
941
942	/*
943	 * Also allocate a shadow src ring in regular
944	 * mem to use for faster access.
945	 */
946	src_ring->shadow_base_unaligned =
947		kmalloc((nentries * sizeof(struct ce_desc) +
948			 CE_DESC_RING_ALIGN), GFP_KERNEL);
949	if (!src_ring->shadow_base_unaligned) {
950		dma_free_coherent(ar->dev,
951				  (nentries * sizeof(struct ce_desc) +
952				   CE_DESC_RING_ALIGN),
953				  src_ring->base_addr_owner_space,
954				  src_ring->base_addr_ce_space);
955		kfree(src_ring);
956		return ERR_PTR(-ENOMEM);
957	}
958
959	src_ring->shadow_base = PTR_ALIGN(
960			src_ring->shadow_base_unaligned,
961			CE_DESC_RING_ALIGN);
962
963	return src_ring;
964}
965
966static struct ath10k_ce_ring *
967ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
968			  const struct ce_attr *attr)
969{
970	struct ath10k_ce_ring *dest_ring;
971	u32 nentries;
972	dma_addr_t base_addr;
973
974	nentries = roundup_pow_of_two(attr->dest_nentries);
975
976	dest_ring = kzalloc(sizeof(*dest_ring) +
977			    (nentries *
978			     sizeof(*dest_ring->per_transfer_context)),
979			    GFP_KERNEL);
980	if (dest_ring == NULL)
981		return ERR_PTR(-ENOMEM);
982
983	dest_ring->nentries = nentries;
984	dest_ring->nentries_mask = nentries - 1;
985
986	/*
987	 * Legacy platforms that do not support cache
988	 * coherent DMA are unsupported
989	 */
990	dest_ring->base_addr_owner_space_unaligned =
991		dma_alloc_coherent(ar->dev,
992				   (nentries * sizeof(struct ce_desc) +
993				    CE_DESC_RING_ALIGN),
994				   &base_addr, GFP_KERNEL);
995	if (!dest_ring->base_addr_owner_space_unaligned) {
996		kfree(dest_ring);
997		return ERR_PTR(-ENOMEM);
998	}
999
1000	dest_ring->base_addr_ce_space_unaligned = base_addr;
1001
1002	/*
1003	 * Correctly initialize memory to 0 to prevent garbage
1004	 * data crashing system when download firmware
1005	 */
1006	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1007	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
1008
1009	dest_ring->base_addr_owner_space = PTR_ALIGN(
1010			dest_ring->base_addr_owner_space_unaligned,
1011			CE_DESC_RING_ALIGN);
1012	dest_ring->base_addr_ce_space = ALIGN(
1013			dest_ring->base_addr_ce_space_unaligned,
1014			CE_DESC_RING_ALIGN);
1015
1016	return dest_ring;
1017}
1018
1019/*
1020 * Initialize a Copy Engine based on caller-supplied attributes.
1021 * This may be called once to initialize both source and destination
1022 * rings or it may be called twice for separate source and destination
1023 * initialization. It may be that only one side or the other is
1024 * initialized by software/firmware.
1025 */
1026int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1027			const struct ce_attr *attr)
1028{
1029	int ret;
1030
1031	if (attr->src_nentries) {
1032		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1033		if (ret) {
1034			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1035				   ce_id, ret);
1036			return ret;
1037		}
1038	}
1039
1040	if (attr->dest_nentries) {
1041		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1042		if (ret) {
1043			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1044				   ce_id, ret);
1045			return ret;
1046		}
1047	}
1048
1049	return 0;
1050}
1051
1052static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1053{
1054	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1055
1056	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0);
1057	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1058	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1059	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1060}
1061
1062static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1063{
1064	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1065
1066	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0);
1067	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1068	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1069}
1070
1071void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1072{
1073	ath10k_ce_deinit_src_ring(ar, ce_id);
1074	ath10k_ce_deinit_dest_ring(ar, ce_id);
1075}
1076
1077int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1078			 const struct ce_attr *attr,
1079			 void (*send_cb)(struct ath10k_ce_pipe *),
1080			 void (*recv_cb)(struct ath10k_ce_pipe *))
1081{
1082	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1083	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1084	int ret;
1085
1086	/*
1087	 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1088	 * additional TX locking checks.
1089	 *
1090	 * For the lack of a better place do the check here.
1091	 */
1092	BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC >
1093		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1094	BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC >
1095		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1096	BUILD_BUG_ON(2*TARGET_TLV_NUM_MSDU_DESC >
1097		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1098
1099	ce_state->ar = ar;
1100	ce_state->id = ce_id;
1101	ce_state->ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1102	ce_state->attr_flags = attr->flags;
1103	ce_state->src_sz_max = attr->src_sz_max;
1104
1105	if (attr->src_nentries)
1106		ce_state->send_cb = send_cb;
1107
1108	if (attr->dest_nentries)
1109		ce_state->recv_cb = recv_cb;
1110
1111	if (attr->src_nentries) {
1112		ce_state->src_ring = ath10k_ce_alloc_src_ring(ar, ce_id, attr);
1113		if (IS_ERR(ce_state->src_ring)) {
1114			ret = PTR_ERR(ce_state->src_ring);
1115			ath10k_err(ar, "failed to allocate copy engine source ring %d: %d\n",
1116				   ce_id, ret);
1117			ce_state->src_ring = NULL;
1118			return ret;
1119		}
1120	}
1121
1122	if (attr->dest_nentries) {
1123		ce_state->dest_ring = ath10k_ce_alloc_dest_ring(ar, ce_id,
1124								attr);
1125		if (IS_ERR(ce_state->dest_ring)) {
1126			ret = PTR_ERR(ce_state->dest_ring);
1127			ath10k_err(ar, "failed to allocate copy engine destination ring %d: %d\n",
1128				   ce_id, ret);
1129			ce_state->dest_ring = NULL;
1130			return ret;
1131		}
1132	}
1133
1134	return 0;
1135}
1136
1137void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1138{
1139	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1140	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1141
1142	if (ce_state->src_ring) {
1143		kfree(ce_state->src_ring->shadow_base_unaligned);
1144		dma_free_coherent(ar->dev,
1145				  (ce_state->src_ring->nentries *
1146				   sizeof(struct ce_desc) +
1147				   CE_DESC_RING_ALIGN),
1148				  ce_state->src_ring->base_addr_owner_space,
1149				  ce_state->src_ring->base_addr_ce_space);
1150		kfree(ce_state->src_ring);
1151	}
1152
1153	if (ce_state->dest_ring) {
1154		dma_free_coherent(ar->dev,
1155				  (ce_state->dest_ring->nentries *
1156				   sizeof(struct ce_desc) +
1157				   CE_DESC_RING_ALIGN),
1158				  ce_state->dest_ring->base_addr_owner_space,
1159				  ce_state->dest_ring->base_addr_ce_space);
1160		kfree(ce_state->dest_ring);
1161	}
1162
1163	ce_state->src_ring = NULL;
1164	ce_state->dest_ring = NULL;
1165}
1166