1/*
2 *
3 * This file is provided under a dual BSD/GPLv2 license.  When using or
4 * redistributing this file, you may do so under either license.
5 *
6 * GPL LICENSE SUMMARY
7 *
8 * Copyright(c) 2015 Intel Corporation.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * BSD LICENSE
20 *
21 * Copyright(c) 2015 Intel Corporation.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 *  - Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 *  - Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in
31 *    the documentation and/or other materials provided with the
32 *    distribution.
33 *  - Neither the name of Intel Corporation nor the names of its
34 *    contributors may be used to endorse or promote products derived
35 *    from this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 */
50
51/*
52 * This file contains support for diagnostic functions.  It is accessed by
53 * opening the hfi1_diag device, normally minor number 129.  Diagnostic use
54 * of the chip may render the chip or board unusable until the driver
55 * is unloaded, or in some cases, until the system is rebooted.
56 *
57 * Accesses to the chip through this interface are not similar to going
58 * through the /sys/bus/pci resource mmap interface.
59 */
60
61#include <linux/io.h>
62#include <linux/pci.h>
63#include <linux/poll.h>
64#include <linux/vmalloc.h>
65#include <linux/export.h>
66#include <linux/fs.h>
67#include <linux/uaccess.h>
68#include <linux/module.h>
69#include <rdma/ib_smi.h>
70#include "hfi.h"
71#include "device.h"
72#include "common.h"
73#include "trace.h"
74
75#undef pr_fmt
76#define pr_fmt(fmt) DRIVER_NAME ": " fmt
77#define snoop_dbg(fmt, ...) \
78	hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__)
79
80/* Snoop option mask */
81#define SNOOP_DROP_SEND	(1 << 0)
82#define SNOOP_USE_METADATA	(1 << 1)
83
84static u8 snoop_flags;
85
86/*
87 * Extract packet length from LRH header.
88 * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
89 * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
90 */
91#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
92
93enum hfi1_filter_status {
94	HFI1_FILTER_HIT,
95	HFI1_FILTER_ERR,
96	HFI1_FILTER_MISS
97};
98
99/* snoop processing functions */
100rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = {
101	[RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler,
102	[RHF_RCV_TYPE_EAGER]    = snoop_recv_handler,
103	[RHF_RCV_TYPE_IB]       = snoop_recv_handler,
104	[RHF_RCV_TYPE_ERROR]    = snoop_recv_handler,
105	[RHF_RCV_TYPE_BYPASS]   = snoop_recv_handler,
106	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
107	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
108	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid
109};
110
111/* Snoop packet structure */
112struct snoop_packet {
113	struct list_head list;
114	u32 total_len;
115	u8 data[];
116};
117
118/* Do not make these an enum or it will blow up the capture_md */
119#define PKT_DIR_EGRESS 0x0
120#define PKT_DIR_INGRESS 0x1
121
122/* Packet capture metadata returned to the user with the packet. */
123struct capture_md {
124	u8 port;
125	u8 dir;
126	u8 reserved[6];
127	union {
128		u64 pbc;
129		u64 rhf;
130	} u;
131};
132
133static atomic_t diagpkt_count = ATOMIC_INIT(0);
134static struct cdev diagpkt_cdev;
135static struct device *diagpkt_device;
136
137static ssize_t diagpkt_write(struct file *fp, const char __user *data,
138				 size_t count, loff_t *off);
139
140static const struct file_operations diagpkt_file_ops = {
141	.owner = THIS_MODULE,
142	.write = diagpkt_write,
143	.llseek = noop_llseek,
144};
145
146/*
147 * This is used for communication with user space for snoop extended IOCTLs
148 */
149struct hfi1_link_info {
150	__be64 node_guid;
151	u8 port_mode;
152	u8 port_state;
153	u16 link_speed_active;
154	u16 link_width_active;
155	u16 vl15_init;
156	u8 port_number;
157	/*
158	 * Add padding to make this a full IB SMP payload. Note: changing the
159	 * size of this structure will make the IOCTLs created with _IOWR
160	 * change.
161	 * Be sure to run tests on all IOCTLs when making changes to this
162	 * structure.
163	 */
164	u8 res[47];
165};
166
167/*
168 * This starts our ioctl sequence numbers *way* off from the ones
169 * defined in ib_core.
170 */
171#define SNOOP_CAPTURE_VERSION 0x1
172
173#define IB_IOCTL_MAGIC          0x1b /* See Documentation/ioctl-number.txt */
174#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
175#define HFI1_SNOOP_IOC_BASE_SEQ 0x80
176
177#define HFI1_SNOOP_IOCGETLINKSTATE \
178	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ)
179#define HFI1_SNOOP_IOCSETLINKSTATE \
180	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+1)
181#define HFI1_SNOOP_IOCCLEARQUEUE \
182	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+2)
183#define HFI1_SNOOP_IOCCLEARFILTER \
184	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+3)
185#define HFI1_SNOOP_IOCSETFILTER \
186	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+4)
187#define HFI1_SNOOP_IOCGETVERSION \
188	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+5)
189#define HFI1_SNOOP_IOCSET_OPTS \
190	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+6)
191
192/*
193 * These offsets +6/+7 could change, but these are already known and used
194 * IOCTL numbers so don't change them without a good reason.
195 */
196#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \
197	_IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+6, \
198		struct hfi1_link_info)
199#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \
200	_IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ+7, \
201		struct hfi1_link_info)
202
203static int hfi1_snoop_open(struct inode *in, struct file *fp);
204static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
205				size_t pkt_len, loff_t *off);
206static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
207				 size_t count, loff_t *off);
208static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
209static unsigned int hfi1_snoop_poll(struct file *fp,
210					struct poll_table_struct *wait);
211static int hfi1_snoop_release(struct inode *in, struct file *fp);
212
213struct hfi1_packet_filter_command {
214	int opcode;
215	int length;
216	void *value_ptr;
217};
218
219/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */
220#define HFI1_SNOOP_INGRESS 0x1
221#define HFI1_SNOOP_EGRESS  0x2
222
223enum hfi1_packet_filter_opcodes {
224	FILTER_BY_LID,
225	FILTER_BY_DLID,
226	FILTER_BY_MAD_MGMT_CLASS,
227	FILTER_BY_QP_NUMBER,
228	FILTER_BY_PKT_TYPE,
229	FILTER_BY_SERVICE_LEVEL,
230	FILTER_BY_PKEY,
231	FILTER_BY_DIRECTION,
232};
233
234static const struct file_operations snoop_file_ops = {
235	.owner = THIS_MODULE,
236	.open = hfi1_snoop_open,
237	.read = hfi1_snoop_read,
238	.unlocked_ioctl = hfi1_ioctl,
239	.poll = hfi1_snoop_poll,
240	.write = hfi1_snoop_write,
241	.release = hfi1_snoop_release
242};
243
244struct hfi1_filter_array {
245	int (*filter)(void *, void *, void *);
246};
247
248static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value);
249static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value);
250static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
251				      void *value);
252static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value);
253static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
254				     void *value);
255static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
256					void *value);
257static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
258static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
259
260static struct hfi1_filter_array hfi1_filters[] = {
261	{ hfi1_filter_lid },
262	{ hfi1_filter_dlid },
263	{ hfi1_filter_mad_mgmt_class },
264	{ hfi1_filter_qp_number },
265	{ hfi1_filter_ibpacket_type },
266	{ hfi1_filter_ib_service_level },
267	{ hfi1_filter_ib_pkey },
268	{ hfi1_filter_direction },
269};
270
271#define HFI1_MAX_FILTERS	ARRAY_SIZE(hfi1_filters)
272#define HFI1_DIAG_MINOR_BASE	129
273
274static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name);
275
276int hfi1_diag_add(struct hfi1_devdata *dd)
277{
278	char name[16];
279	int ret = 0;
280
281	snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(),
282		 dd->unit);
283	/*
284	 * Do this for each device as opposed to the normal diagpkt
285	 * interface which is one per host
286	 */
287	ret = hfi1_snoop_add(dd, name);
288	if (ret)
289		dd_dev_err(dd, "Unable to init snoop/capture device");
290
291	snprintf(name, sizeof(name), "%s_diagpkt", class_name());
292	if (atomic_inc_return(&diagpkt_count) == 1) {
293		ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name,
294				     &diagpkt_file_ops, &diagpkt_cdev,
295				     &diagpkt_device, false);
296	}
297
298	return ret;
299}
300
301/* this must be called w/ dd->snoop_in_lock held */
302static void drain_snoop_list(struct list_head *queue)
303{
304	struct list_head *pos, *q;
305	struct snoop_packet *packet;
306
307	list_for_each_safe(pos, q, queue) {
308		packet = list_entry(pos, struct snoop_packet, list);
309		list_del(pos);
310		kfree(packet);
311	}
312}
313
314static void hfi1_snoop_remove(struct hfi1_devdata *dd)
315{
316	unsigned long flags = 0;
317
318	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
319	drain_snoop_list(&dd->hfi1_snoop.queue);
320	hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev);
321	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
322}
323
324void hfi1_diag_remove(struct hfi1_devdata *dd)
325{
326
327	hfi1_snoop_remove(dd);
328	if (atomic_dec_and_test(&diagpkt_count))
329		hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
330	hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
331}
332
333
334/*
335 * Allocated structure shared between the credit return mechanism and
336 * diagpkt_send().
337 */
338struct diagpkt_wait {
339	struct completion credits_returned;
340	int code;
341	atomic_t count;
342};
343
344/*
345 * When each side is finished with the structure, they call this.
346 * The last user frees the structure.
347 */
348static void put_diagpkt_wait(struct diagpkt_wait *wait)
349{
350	if (atomic_dec_and_test(&wait->count))
351		kfree(wait);
352}
353
354/*
355 * Callback from the credit return code.  Set the complete, which
356 * will let diapkt_send() continue.
357 */
358static void diagpkt_complete(void *arg, int code)
359{
360	struct diagpkt_wait *wait = (struct diagpkt_wait *)arg;
361
362	wait->code = code;
363	complete(&wait->credits_returned);
364	put_diagpkt_wait(wait);	/* finished with the structure */
365}
366
367/**
368 * diagpkt_send - send a packet
369 * @dp: diag packet descriptor
370 */
371static ssize_t diagpkt_send(struct diag_pkt *dp)
372{
373	struct hfi1_devdata *dd;
374	struct send_context *sc;
375	struct pio_buf *pbuf;
376	u32 *tmpbuf = NULL;
377	ssize_t ret = 0;
378	u32 pkt_len, total_len;
379	pio_release_cb credit_cb = NULL;
380	void *credit_arg = NULL;
381	struct diagpkt_wait *wait = NULL;
382
383	dd = hfi1_lookup(dp->unit);
384	if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) {
385		ret = -ENODEV;
386		goto bail;
387	}
388	if (!(dd->flags & HFI1_INITTED)) {
389		/* no hardware, freeze, etc. */
390		ret = -ENODEV;
391		goto bail;
392	}
393
394	if (dp->version != _DIAG_PKT_VERS) {
395		dd_dev_err(dd, "Invalid version %u for diagpkt_write\n",
396			    dp->version);
397		ret = -EINVAL;
398		goto bail;
399	}
400
401	/* send count must be an exact number of dwords */
402	if (dp->len & 3) {
403		ret = -EINVAL;
404		goto bail;
405	}
406
407	/* there is only port 1 */
408	if (dp->port != 1) {
409		ret = -EINVAL;
410		goto bail;
411	}
412
413	/* need a valid context */
414	if (dp->sw_index >= dd->num_send_contexts) {
415		ret = -EINVAL;
416		goto bail;
417	}
418	/* can only use kernel contexts */
419	if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) {
420		ret = -EINVAL;
421		goto bail;
422	}
423	/* must be allocated */
424	sc = dd->send_contexts[dp->sw_index].sc;
425	if (!sc) {
426		ret = -EINVAL;
427		goto bail;
428	}
429	/* must be enabled */
430	if (!(sc->flags & SCF_ENABLED)) {
431		ret = -EINVAL;
432		goto bail;
433	}
434
435	/* allocate a buffer and copy the data in */
436	tmpbuf = vmalloc(dp->len);
437	if (!tmpbuf) {
438		ret = -ENOMEM;
439		goto bail;
440	}
441
442	if (copy_from_user(tmpbuf,
443			   (const void __user *) (unsigned long) dp->data,
444			   dp->len)) {
445		ret = -EFAULT;
446		goto bail;
447	}
448
449	/*
450	 * pkt_len is how much data we have to write, includes header and data.
451	 * total_len is length of the packet in Dwords plus the PBC should not
452	 * include the CRC.
453	 */
454	pkt_len = dp->len >> 2;
455	total_len = pkt_len + 2; /* PBC + packet */
456
457	/* if 0, fill in a default */
458	if (dp->pbc == 0) {
459		struct hfi1_pportdata *ppd = dd->pport;
460
461		hfi1_cdbg(PKT, "Generating PBC");
462		dp->pbc = create_pbc(ppd, 0, 0, 0, total_len);
463	} else {
464		hfi1_cdbg(PKT, "Using passed in PBC");
465	}
466
467	hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc);
468
469	/*
470	 * The caller wants to wait until the packet is sent and to
471	 * check for errors.  The best we can do is wait until
472	 * the buffer credits are returned and check if any packet
473	 * error has occurred.  If there are any late errors, this
474	 * could miss it.  If there are other senders who generate
475	 * an error, this may find it.  However, in general, it
476	 * should catch most.
477	 */
478	if (dp->flags & F_DIAGPKT_WAIT) {
479		/* always force a credit return */
480		dp->pbc |= PBC_CREDIT_RETURN;
481		/* turn on credit return interrupts */
482		sc_add_credit_return_intr(sc);
483		wait = kmalloc(sizeof(*wait), GFP_KERNEL);
484		if (!wait) {
485			ret = -ENOMEM;
486			goto bail;
487		}
488		init_completion(&wait->credits_returned);
489		atomic_set(&wait->count, 2);
490		wait->code = PRC_OK;
491
492		credit_cb = diagpkt_complete;
493		credit_arg = wait;
494	}
495
496	pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg);
497	if (!pbuf) {
498		/*
499		 * No send buffer means no credit callback.  Undo
500		 * the wait set-up that was done above.  We free wait
501		 * because the callback will never be called.
502		 */
503		if (dp->flags & F_DIAGPKT_WAIT) {
504			sc_del_credit_return_intr(sc);
505			kfree(wait);
506			wait = NULL;
507		}
508		ret = -ENOSPC;
509		goto bail;
510	}
511
512	pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len);
513	/* no flush needed as the HW knows the packet size */
514
515	ret = sizeof(*dp);
516
517	if (dp->flags & F_DIAGPKT_WAIT) {
518		/* wait for credit return */
519		ret = wait_for_completion_interruptible(
520						&wait->credits_returned);
521		/*
522		 * If the wait returns an error, the wait was interrupted,
523		 * e.g. with a ^C in the user program.  The callback is
524		 * still pending.  This is OK as the wait structure is
525		 * kmalloc'ed and the structure will free itself when
526		 * all users are done with it.
527		 *
528		 * A context disable occurs on a send context restart, so
529		 * include that in the list of errors below to check for.
530		 * NOTE: PRC_FILL_ERR is at best informational and cannot
531		 * be depended on.
532		 */
533		if (!ret && (((wait->code & PRC_STATUS_ERR)
534				|| (wait->code & PRC_FILL_ERR)
535				|| (wait->code & PRC_SC_DISABLE))))
536			ret = -EIO;
537
538		put_diagpkt_wait(wait);	/* finished with the structure */
539		sc_del_credit_return_intr(sc);
540	}
541
542bail:
543	vfree(tmpbuf);
544	return ret;
545}
546
547static ssize_t diagpkt_write(struct file *fp, const char __user *data,
548				 size_t count, loff_t *off)
549{
550	struct hfi1_devdata *dd;
551	struct send_context *sc;
552	u8 vl;
553
554	struct diag_pkt dp;
555
556	if (count != sizeof(dp))
557		return -EINVAL;
558
559	if (copy_from_user(&dp, data, sizeof(dp)))
560		return -EFAULT;
561
562	/*
563	* The Send Context is derived from the PbcVL value
564	* if PBC is populated
565	*/
566	if (dp.pbc) {
567		dd = hfi1_lookup(dp.unit);
568		if (dd == NULL)
569			return -ENODEV;
570		vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
571		sc = dd->vld[vl].sc;
572		if (sc) {
573			dp.sw_index = sc->sw_index;
574			hfi1_cdbg(
575			       PKT,
576			       "Packet sent over VL %d via Send Context %u(%u)",
577			       vl, sc->sw_index, sc->hw_context);
578		}
579	}
580
581	return diagpkt_send(&dp);
582}
583
584static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name)
585{
586	int ret = 0;
587
588	dd->hfi1_snoop.mode_flag = 0;
589	spin_lock_init(&dd->hfi1_snoop.snoop_lock);
590	INIT_LIST_HEAD(&dd->hfi1_snoop.queue);
591	init_waitqueue_head(&dd->hfi1_snoop.waitq);
592
593	ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name,
594			     &snoop_file_ops,
595			     &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev,
596			     false);
597
598	if (ret) {
599		dd_dev_err(dd, "Couldn't create %s device: %d", name, ret);
600		hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev,
601				 &dd->hfi1_snoop.class_dev);
602	}
603
604	return ret;
605}
606
607static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in)
608{
609	int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE;
610	struct hfi1_devdata *dd;
611
612	dd = hfi1_lookup(unit);
613	return dd;
614
615}
616
617/* clear or restore send context integrity checks */
618static void adjust_integrity_checks(struct hfi1_devdata *dd)
619{
620	struct send_context *sc;
621	unsigned long sc_flags;
622	int i;
623
624	spin_lock_irqsave(&dd->sc_lock, sc_flags);
625	for (i = 0; i < dd->num_send_contexts; i++) {
626		int enable;
627
628		sc = dd->send_contexts[i].sc;
629
630		if (!sc)
631			continue;	/* not allocated */
632
633		enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
634			 dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE;
635
636		set_pio_integrity(sc);
637
638		if (enable) /* take HFI_CAP_* flags into account */
639			hfi1_init_ctxt(sc);
640	}
641	spin_unlock_irqrestore(&dd->sc_lock, sc_flags);
642}
643
644static int hfi1_snoop_open(struct inode *in, struct file *fp)
645{
646	int ret;
647	int mode_flag = 0;
648	unsigned long flags = 0;
649	struct hfi1_devdata *dd;
650	struct list_head *queue;
651
652	mutex_lock(&hfi1_mutex);
653
654	dd = hfi1_dd_from_sc_inode(in);
655	if (dd == NULL) {
656		ret = -ENODEV;
657		goto bail;
658	}
659
660	/*
661	 * File mode determines snoop or capture. Some existing user
662	 * applications expect the capture device to be able to be opened RDWR
663	 * because they expect a dedicated capture device. For this reason we
664	 * support a module param to force capture mode even if the file open
665	 * mode matches snoop.
666	 */
667	if ((fp->f_flags & O_ACCMODE) == O_RDONLY) {
668		snoop_dbg("Capture Enabled");
669		mode_flag = HFI1_PORT_CAPTURE_MODE;
670	} else if ((fp->f_flags & O_ACCMODE) == O_RDWR) {
671		snoop_dbg("Snoop Enabled");
672		mode_flag = HFI1_PORT_SNOOP_MODE;
673	} else {
674		snoop_dbg("Invalid");
675		ret =  -EINVAL;
676		goto bail;
677	}
678	queue = &dd->hfi1_snoop.queue;
679
680	/*
681	 * We are not supporting snoop and capture at the same time.
682	 */
683	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
684	if (dd->hfi1_snoop.mode_flag) {
685		ret = -EBUSY;
686		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
687		goto bail;
688	}
689
690	dd->hfi1_snoop.mode_flag = mode_flag;
691	drain_snoop_list(queue);
692
693	dd->hfi1_snoop.filter_callback = NULL;
694	dd->hfi1_snoop.filter_value = NULL;
695
696	/*
697	 * Send side packet integrity checks are not helpful when snooping so
698	 * disable and re-enable when we stop snooping.
699	 */
700	if (mode_flag == HFI1_PORT_SNOOP_MODE) {
701		/* clear after snoop mode is on */
702		adjust_integrity_checks(dd); /* clear */
703
704		/*
705		 * We also do not want to be doing the DLID LMC check for
706		 * ingressed packets.
707		 */
708		dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1);
709		write_csr(dd, DCC_CFG_PORT_CONFIG1,
710			  (dd->hfi1_snoop.dcc_cfg >> 32) << 32);
711	}
712
713	/*
714	 * As soon as we set these function pointers the recv and send handlers
715	 * are active. This is a race condition so we must make sure to drain
716	 * the queue and init filter values above. Technically we should add
717	 * locking here but all that will happen is on recv a packet will get
718	 * allocated and get stuck on the snoop_lock before getting added to the
719	 * queue. Same goes for send.
720	 */
721	dd->rhf_rcv_function_map = snoop_rhf_rcv_functions;
722	dd->process_pio_send = snoop_send_pio_handler;
723	dd->process_dma_send = snoop_send_pio_handler;
724	dd->pio_inline_send = snoop_inline_pio_send;
725
726	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
727	ret = 0;
728
729bail:
730	mutex_unlock(&hfi1_mutex);
731
732	return ret;
733}
734
735static int hfi1_snoop_release(struct inode *in, struct file *fp)
736{
737	unsigned long flags = 0;
738	struct hfi1_devdata *dd;
739	int mode_flag;
740
741	dd = hfi1_dd_from_sc_inode(in);
742	if (dd == NULL)
743		return -ENODEV;
744
745	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
746
747	/* clear the snoop mode before re-adjusting send context CSRs */
748	mode_flag = dd->hfi1_snoop.mode_flag;
749	dd->hfi1_snoop.mode_flag = 0;
750
751	/*
752	 * Drain the queue and clear the filters we are done with it. Don't
753	 * forget to restore the packet integrity checks
754	 */
755	drain_snoop_list(&dd->hfi1_snoop.queue);
756	if (mode_flag == HFI1_PORT_SNOOP_MODE) {
757		/* restore after snoop mode is clear */
758		adjust_integrity_checks(dd); /* restore */
759
760		/*
761		 * Also should probably reset the DCC_CONFIG1 register for DLID
762		 * checking on incoming packets again. Use the value saved when
763		 * opening the snoop device.
764		 */
765		write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg);
766	}
767
768	dd->hfi1_snoop.filter_callback = NULL;
769	kfree(dd->hfi1_snoop.filter_value);
770	dd->hfi1_snoop.filter_value = NULL;
771
772	/*
773	 * User is done snooping and capturing, return control to the normal
774	 * handler. Re-enable SDMA handling.
775	 */
776	dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
777	dd->process_pio_send = hfi1_verbs_send_pio;
778	dd->process_dma_send = hfi1_verbs_send_dma;
779	dd->pio_inline_send = pio_copy;
780
781	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
782
783	snoop_dbg("snoop/capture device released");
784
785	return 0;
786}
787
788static unsigned int hfi1_snoop_poll(struct file *fp,
789				    struct poll_table_struct *wait)
790{
791	int ret = 0;
792	unsigned long flags = 0;
793
794	struct hfi1_devdata *dd;
795
796	dd = hfi1_dd_from_sc_inode(fp->f_inode);
797	if (dd == NULL)
798		return -ENODEV;
799
800	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
801
802	poll_wait(fp, &dd->hfi1_snoop.waitq, wait);
803	if (!list_empty(&dd->hfi1_snoop.queue))
804		ret |= POLLIN | POLLRDNORM;
805
806	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
807	return ret;
808
809}
810
811static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
812				size_t count, loff_t *off)
813{
814	struct diag_pkt dpkt;
815	struct hfi1_devdata *dd;
816	size_t ret;
817	u8 byte_two, sl, sc5, sc4, vl, byte_one;
818	struct send_context *sc;
819	u32 len;
820	u64 pbc;
821	struct hfi1_ibport *ibp;
822	struct hfi1_pportdata *ppd;
823
824	dd = hfi1_dd_from_sc_inode(fp->f_inode);
825	if (dd == NULL)
826		return -ENODEV;
827
828	ppd = dd->pport;
829	snoop_dbg("received %lu bytes from user", count);
830
831	memset(&dpkt, 0, sizeof(struct diag_pkt));
832	dpkt.version = _DIAG_PKT_VERS;
833	dpkt.unit = dd->unit;
834	dpkt.port = 1;
835
836	if (likely(!(snoop_flags & SNOOP_USE_METADATA))) {
837		/*
838		* We need to generate the PBC and not let diagpkt_send do it,
839		* to do this we need the VL and the length in dwords.
840		* The VL can be determined by using the SL and looking up the
841		* SC. Then the SC can be converted into VL. The exception to
842		* this is those packets which are from an SMI queue pair.
843		* Since we can't detect anything about the QP here we have to
844		* rely on the SC. If its 0xF then we assume its SMI and
845		* do not look at the SL.
846		*/
847		if (copy_from_user(&byte_one, data, 1))
848			return -EINVAL;
849
850		if (copy_from_user(&byte_two, data+1, 1))
851			return -EINVAL;
852
853		sc4 = (byte_one >> 4) & 0xf;
854		if (sc4 == 0xF) {
855			snoop_dbg("Detected VL15 packet ignoring SL in packet");
856			vl = sc4;
857		} else {
858			sl = (byte_two >> 4) & 0xf;
859			ibp = to_iport(&dd->verbs_dev.ibdev, 1);
860			sc5 = ibp->sl_to_sc[sl];
861			vl = sc_to_vlt(dd, sc5);
862			if (vl != sc4) {
863				snoop_dbg("VL %d does not match SC %d of packet",
864					  vl, sc4);
865				return -EINVAL;
866			}
867		}
868
869		sc = dd->vld[vl].sc; /* Look up the context based on VL */
870		if (sc) {
871			dpkt.sw_index = sc->sw_index;
872			snoop_dbg("Sending on context %u(%u)", sc->sw_index,
873				  sc->hw_context);
874		} else {
875			snoop_dbg("Could not find context for vl %d", vl);
876			return -EINVAL;
877		}
878
879		len = (count >> 2) + 2; /* Add in PBC */
880		pbc = create_pbc(ppd, 0, 0, vl, len);
881	} else {
882		if (copy_from_user(&pbc, data, sizeof(pbc)))
883			return -EINVAL;
884		vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
885		sc = dd->vld[vl].sc; /* Look up the context based on VL */
886		if (sc) {
887			dpkt.sw_index = sc->sw_index;
888		} else {
889			snoop_dbg("Could not find context for vl %d", vl);
890			return -EINVAL;
891		}
892		data += sizeof(pbc);
893		count -= sizeof(pbc);
894	}
895	dpkt.len = count;
896	dpkt.data = (unsigned long)data;
897
898	snoop_dbg("PBC: vl=0x%llx Length=0x%llx",
899		  (pbc >> 12) & 0xf,
900		  (pbc & 0xfff));
901
902	dpkt.pbc = pbc;
903	ret = diagpkt_send(&dpkt);
904	/*
905	 * diagpkt_send only returns number of bytes in the diagpkt so patch
906	 * that up here before returning.
907	 */
908	if (ret == sizeof(dpkt))
909		return count;
910
911	return ret;
912}
913
914static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
915			       size_t pkt_len, loff_t *off)
916{
917	ssize_t ret = 0;
918	unsigned long flags = 0;
919	struct snoop_packet *packet = NULL;
920	struct hfi1_devdata *dd;
921
922	dd = hfi1_dd_from_sc_inode(fp->f_inode);
923	if (dd == NULL)
924		return -ENODEV;
925
926	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
927
928	while (list_empty(&dd->hfi1_snoop.queue)) {
929		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
930
931		if (fp->f_flags & O_NONBLOCK)
932			return -EAGAIN;
933
934		if (wait_event_interruptible(
935				dd->hfi1_snoop.waitq,
936				!list_empty(&dd->hfi1_snoop.queue)))
937			return -EINTR;
938
939		spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
940	}
941
942	if (!list_empty(&dd->hfi1_snoop.queue)) {
943		packet = list_entry(dd->hfi1_snoop.queue.next,
944				    struct snoop_packet, list);
945		list_del(&packet->list);
946		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
947		if (pkt_len >= packet->total_len) {
948			if (copy_to_user(data, packet->data,
949				packet->total_len))
950				ret = -EFAULT;
951			else
952				ret = packet->total_len;
953		} else
954			ret = -EINVAL;
955
956		kfree(packet);
957	} else
958		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
959
960	return ret;
961}
962
963static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
964{
965	struct hfi1_devdata *dd;
966	void *filter_value = NULL;
967	long ret = 0;
968	int value = 0;
969	u8 physState = 0;
970	u8 linkState = 0;
971	u16 devState = 0;
972	unsigned long flags = 0;
973	unsigned long *argp = NULL;
974	struct hfi1_packet_filter_command filter_cmd = {0};
975	int mode_flag = 0;
976	struct hfi1_pportdata *ppd = NULL;
977	unsigned int index;
978	struct hfi1_link_info link_info;
979
980	dd = hfi1_dd_from_sc_inode(fp->f_inode);
981	if (dd == NULL)
982		return -ENODEV;
983
984	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
985
986	mode_flag = dd->hfi1_snoop.mode_flag;
987
988	if (((_IOC_DIR(cmd) & _IOC_READ)
989	    && !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)))
990	    || ((_IOC_DIR(cmd) & _IOC_WRITE)
991	    && !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)))) {
992		ret = -EFAULT;
993	} else if (!capable(CAP_SYS_ADMIN)) {
994		ret = -EPERM;
995	} else if ((mode_flag & HFI1_PORT_CAPTURE_MODE) &&
996		   (cmd != HFI1_SNOOP_IOCCLEARQUEUE) &&
997		   (cmd != HFI1_SNOOP_IOCCLEARFILTER) &&
998		   (cmd != HFI1_SNOOP_IOCSETFILTER)) {
999		/* Capture devices are allowed only 3 operations
1000		 * 1.Clear capture queue
1001		 * 2.Clear capture filter
1002		 * 3.Set capture filter
1003		 * Other are invalid.
1004		 */
1005		ret = -EINVAL;
1006	} else {
1007		switch (cmd) {
1008		case HFI1_SNOOP_IOCSETLINKSTATE:
1009			snoop_dbg("HFI1_SNOOP_IOCSETLINKSTATE is not valid");
1010			ret = -EINVAL;
1011			break;
1012
1013		case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA:
1014			memset(&link_info, 0, sizeof(link_info));
1015
1016			if (copy_from_user(&link_info,
1017				(struct hfi1_link_info __user *)arg,
1018				sizeof(link_info)))
1019				ret = -EFAULT;
1020
1021			value = link_info.port_state;
1022			index = link_info.port_number;
1023			if (index > dd->num_pports - 1) {
1024				ret = -EINVAL;
1025				break;
1026			}
1027
1028			ppd = &dd->pport[index];
1029			if (!ppd) {
1030				ret = -EINVAL;
1031				break;
1032			}
1033
1034			/* What we want to transition to */
1035			physState = (value >> 4) & 0xF;
1036			linkState = value & 0xF;
1037			snoop_dbg("Setting link state 0x%x", value);
1038
1039			switch (linkState) {
1040			case IB_PORT_NOP:
1041				if (physState == 0)
1042					break;
1043					/* fall through */
1044			case IB_PORT_DOWN:
1045				switch (physState) {
1046				case 0:
1047					devState = HLS_DN_DOWNDEF;
1048					break;
1049				case 2:
1050					devState = HLS_DN_POLL;
1051					break;
1052				case 3:
1053					devState = HLS_DN_DISABLE;
1054					break;
1055				default:
1056					ret = -EINVAL;
1057					goto done;
1058				}
1059				ret = set_link_state(ppd, devState);
1060				break;
1061			case IB_PORT_ARMED:
1062				ret = set_link_state(ppd, HLS_UP_ARMED);
1063				if (!ret)
1064					send_idle_sma(dd, SMA_IDLE_ARM);
1065				break;
1066			case IB_PORT_ACTIVE:
1067				ret = set_link_state(ppd, HLS_UP_ACTIVE);
1068				if (!ret)
1069					send_idle_sma(dd, SMA_IDLE_ACTIVE);
1070				break;
1071			default:
1072				ret = -EINVAL;
1073				break;
1074			}
1075
1076			if (ret)
1077				break;
1078			/* fall through */
1079		case HFI1_SNOOP_IOCGETLINKSTATE:
1080		case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA:
1081			if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) {
1082				memset(&link_info, 0, sizeof(link_info));
1083				if (copy_from_user(&link_info,
1084					(struct hfi1_link_info __user *)arg,
1085					sizeof(link_info)))
1086					ret = -EFAULT;
1087				index = link_info.port_number;
1088			} else {
1089				ret = __get_user(index, (int __user *) arg);
1090				if (ret !=  0)
1091					break;
1092			}
1093
1094			if (index > dd->num_pports - 1) {
1095				ret = -EINVAL;
1096				break;
1097			}
1098
1099			ppd = &dd->pport[index];
1100			if (!ppd) {
1101				ret = -EINVAL;
1102				break;
1103			}
1104			value = hfi1_ibphys_portstate(ppd);
1105			value <<= 4;
1106			value |= driver_lstate(ppd);
1107
1108			snoop_dbg("Link port | Link State: %d", value);
1109
1110			if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) ||
1111			    (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) {
1112				link_info.port_state = value;
1113				link_info.node_guid = cpu_to_be64(ppd->guid);
1114				link_info.link_speed_active =
1115							ppd->link_speed_active;
1116				link_info.link_width_active =
1117							ppd->link_width_active;
1118				if (copy_to_user(
1119					(struct hfi1_link_info __user *)arg,
1120					&link_info, sizeof(link_info)))
1121					ret = -EFAULT;
1122			} else {
1123				ret = __put_user(value, (int __user *)arg);
1124			}
1125			break;
1126
1127		case HFI1_SNOOP_IOCCLEARQUEUE:
1128			snoop_dbg("Clearing snoop queue");
1129			drain_snoop_list(&dd->hfi1_snoop.queue);
1130			break;
1131
1132		case HFI1_SNOOP_IOCCLEARFILTER:
1133			snoop_dbg("Clearing filter");
1134			if (dd->hfi1_snoop.filter_callback) {
1135				/* Drain packets first */
1136				drain_snoop_list(&dd->hfi1_snoop.queue);
1137				dd->hfi1_snoop.filter_callback = NULL;
1138			}
1139			kfree(dd->hfi1_snoop.filter_value);
1140			dd->hfi1_snoop.filter_value = NULL;
1141			break;
1142
1143		case HFI1_SNOOP_IOCSETFILTER:
1144			snoop_dbg("Setting filter");
1145			/* just copy command structure */
1146			argp = (unsigned long *)arg;
1147			if (copy_from_user(&filter_cmd, (void __user *)argp,
1148					     sizeof(filter_cmd))) {
1149				ret = -EFAULT;
1150				break;
1151			}
1152			if (filter_cmd.opcode >= HFI1_MAX_FILTERS) {
1153				pr_alert("Invalid opcode in request\n");
1154				ret = -EINVAL;
1155				break;
1156			}
1157
1158			snoop_dbg("Opcode %d Len %d Ptr %p",
1159				   filter_cmd.opcode, filter_cmd.length,
1160				   filter_cmd.value_ptr);
1161
1162			filter_value = kcalloc(filter_cmd.length, sizeof(u8),
1163					       GFP_KERNEL);
1164			if (!filter_value) {
1165				pr_alert("Not enough memory\n");
1166				ret = -ENOMEM;
1167				break;
1168			}
1169			/* copy remaining data from userspace */
1170			if (copy_from_user((u8 *)filter_value,
1171					(void __user *)filter_cmd.value_ptr,
1172					filter_cmd.length)) {
1173				kfree(filter_value);
1174				ret = -EFAULT;
1175				break;
1176			}
1177			/* Drain packets first */
1178			drain_snoop_list(&dd->hfi1_snoop.queue);
1179			dd->hfi1_snoop.filter_callback =
1180				hfi1_filters[filter_cmd.opcode].filter;
1181			/* just in case we see back to back sets */
1182			kfree(dd->hfi1_snoop.filter_value);
1183			dd->hfi1_snoop.filter_value = filter_value;
1184
1185			break;
1186		case HFI1_SNOOP_IOCGETVERSION:
1187			value = SNOOP_CAPTURE_VERSION;
1188			snoop_dbg("Getting version: %d", value);
1189			ret = __put_user(value, (int __user *)arg);
1190			break;
1191		case HFI1_SNOOP_IOCSET_OPTS:
1192			snoop_flags = 0;
1193			ret = __get_user(value, (int __user *) arg);
1194			if (ret != 0)
1195				break;
1196
1197			snoop_dbg("Setting snoop option %d", value);
1198			if (value & SNOOP_DROP_SEND)
1199				snoop_flags |= SNOOP_DROP_SEND;
1200			if (value & SNOOP_USE_METADATA)
1201				snoop_flags |= SNOOP_USE_METADATA;
1202			break;
1203		default:
1204			ret = -ENOTTY;
1205			break;
1206		}
1207	}
1208done:
1209	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1210	return ret;
1211}
1212
1213static void snoop_list_add_tail(struct snoop_packet *packet,
1214				struct hfi1_devdata *dd)
1215{
1216	unsigned long flags = 0;
1217
1218	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
1219	if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) ||
1220		   (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) {
1221		list_add_tail(&packet->list, &dd->hfi1_snoop.queue);
1222		snoop_dbg("Added packet to list");
1223	}
1224
1225	/*
1226	 * Technically we can could have closed the snoop device while waiting
1227	 * on the above lock and it is gone now. The snoop mode_flag will
1228	 * prevent us from adding the packet to the queue though.
1229	 */
1230
1231	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1232	wake_up_interruptible(&dd->hfi1_snoop.waitq);
1233}
1234
1235static inline int hfi1_filter_check(void *val, const char *msg)
1236{
1237	if (!val) {
1238		snoop_dbg("Error invalid %s value for filter", msg);
1239		return HFI1_FILTER_ERR;
1240	}
1241	return 0;
1242}
1243
1244static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value)
1245{
1246	struct hfi1_ib_header *hdr;
1247	int ret;
1248
1249	ret = hfi1_filter_check(ibhdr, "header");
1250	if (ret)
1251		return ret;
1252	ret = hfi1_filter_check(value, "user");
1253	if (ret)
1254		return ret;
1255	hdr = (struct hfi1_ib_header *)ibhdr;
1256
1257	if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */
1258		return HFI1_FILTER_HIT; /* matched */
1259
1260	return HFI1_FILTER_MISS; /* Not matched */
1261}
1262
1263static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value)
1264{
1265	struct hfi1_ib_header *hdr;
1266	int ret;
1267
1268	ret = hfi1_filter_check(ibhdr, "header");
1269	if (ret)
1270		return ret;
1271	ret = hfi1_filter_check(value, "user");
1272	if (ret)
1273		return ret;
1274
1275	hdr = (struct hfi1_ib_header *)ibhdr;
1276
1277	if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
1278		return HFI1_FILTER_HIT;
1279
1280	return HFI1_FILTER_MISS;
1281}
1282
1283/* Not valid for outgoing packets, send handler passes null for data*/
1284static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
1285				      void *value)
1286{
1287	struct hfi1_ib_header *hdr;
1288	struct hfi1_other_headers *ohdr = NULL;
1289	struct ib_smp *smp = NULL;
1290	u32 qpn = 0;
1291	int ret;
1292
1293	ret = hfi1_filter_check(ibhdr, "header");
1294	if (ret)
1295		return ret;
1296	ret = hfi1_filter_check(packet_data, "packet_data");
1297	if (ret)
1298		return ret;
1299	ret = hfi1_filter_check(value, "user");
1300	if (ret)
1301		return ret;
1302
1303	hdr = (struct hfi1_ib_header *)ibhdr;
1304
1305	/* Check for GRH */
1306	if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
1307		ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
1308	else
1309		ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
1310
1311	qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
1312	if (qpn <= 1) {
1313		smp = (struct ib_smp *)packet_data;
1314		if (*((u8 *)value) == smp->mgmt_class)
1315			return HFI1_FILTER_HIT;
1316		else
1317			return HFI1_FILTER_MISS;
1318	}
1319	return HFI1_FILTER_ERR;
1320}
1321
1322static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value)
1323{
1324
1325	struct hfi1_ib_header *hdr;
1326	struct hfi1_other_headers *ohdr = NULL;
1327	int ret;
1328
1329	ret = hfi1_filter_check(ibhdr, "header");
1330	if (ret)
1331		return ret;
1332	ret = hfi1_filter_check(value, "user");
1333	if (ret)
1334		return ret;
1335
1336	hdr = (struct hfi1_ib_header *)ibhdr;
1337
1338	/* Check for GRH */
1339	if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
1340		ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
1341	else
1342		ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
1343	if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
1344		return HFI1_FILTER_HIT;
1345
1346	return HFI1_FILTER_MISS;
1347}
1348
1349static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
1350				     void *value)
1351{
1352	u32 lnh = 0;
1353	u8 opcode = 0;
1354	struct hfi1_ib_header *hdr;
1355	struct hfi1_other_headers *ohdr = NULL;
1356	int ret;
1357
1358	ret = hfi1_filter_check(ibhdr, "header");
1359	if (ret)
1360		return ret;
1361	ret = hfi1_filter_check(value, "user");
1362	if (ret)
1363		return ret;
1364
1365	hdr = (struct hfi1_ib_header *)ibhdr;
1366
1367	lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
1368
1369	if (lnh == HFI1_LRH_BTH)
1370		ohdr = &hdr->u.oth;
1371	else if (lnh == HFI1_LRH_GRH)
1372		ohdr = &hdr->u.l.oth;
1373	else
1374		return HFI1_FILTER_ERR;
1375
1376	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
1377
1378	if (*((u8 *)value) == ((opcode >> 5) & 0x7))
1379		return HFI1_FILTER_HIT;
1380
1381	return HFI1_FILTER_MISS;
1382}
1383
1384static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
1385					void *value)
1386{
1387	struct hfi1_ib_header *hdr;
1388	int ret;
1389
1390	ret = hfi1_filter_check(ibhdr, "header");
1391	if (ret)
1392		return ret;
1393	ret = hfi1_filter_check(value, "user");
1394	if (ret)
1395		return ret;
1396
1397	hdr = (struct hfi1_ib_header *)ibhdr;
1398
1399	if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF))
1400		return HFI1_FILTER_HIT;
1401
1402	return HFI1_FILTER_MISS;
1403}
1404
1405static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
1406{
1407
1408	u32 lnh = 0;
1409	struct hfi1_ib_header *hdr;
1410	struct hfi1_other_headers *ohdr = NULL;
1411	int ret;
1412
1413	ret = hfi1_filter_check(ibhdr, "header");
1414	if (ret)
1415		return ret;
1416	ret = hfi1_filter_check(value, "user");
1417	if (ret)
1418		return ret;
1419
1420	hdr = (struct hfi1_ib_header *)ibhdr;
1421
1422	lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
1423	if (lnh == HFI1_LRH_BTH)
1424		ohdr = &hdr->u.oth;
1425	else if (lnh == HFI1_LRH_GRH)
1426		ohdr = &hdr->u.l.oth;
1427	else
1428		return HFI1_FILTER_ERR;
1429
1430	/* P_key is 16-bit entity, however top most bit indicates
1431	 * type of membership. 0 for limited and 1 for Full.
1432	 * Limited members cannot accept information from other
1433	 * Limited members, but communication is allowed between
1434	 * every other combination of membership.
1435	 * Hence we'll omit comparing top-most bit while filtering
1436	 */
1437
1438	if ((*(u16 *)value & 0x7FFF) ==
1439		((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
1440		return HFI1_FILTER_HIT;
1441
1442	return HFI1_FILTER_MISS;
1443}
1444
1445/*
1446 * If packet_data is NULL then this is coming from one of the send functions.
1447 * Thus we know if its an ingressed or egressed packet.
1448 */
1449static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value)
1450{
1451	u8 user_dir = *(u8 *)value;
1452	int ret;
1453
1454	ret = hfi1_filter_check(value, "user");
1455	if (ret)
1456		return ret;
1457
1458	if (packet_data) {
1459		/* Incoming packet */
1460		if (user_dir & HFI1_SNOOP_INGRESS)
1461			return HFI1_FILTER_HIT;
1462	} else {
1463		/* Outgoing packet */
1464		if (user_dir & HFI1_SNOOP_EGRESS)
1465			return HFI1_FILTER_HIT;
1466	}
1467
1468	return HFI1_FILTER_MISS;
1469}
1470
1471/*
1472 * Allocate a snoop packet. The structure that is stored in the ring buffer, not
1473 * to be confused with an hfi packet type.
1474 */
1475static struct snoop_packet *allocate_snoop_packet(u32 hdr_len,
1476						  u32 data_len,
1477						  u32 md_len)
1478{
1479
1480	struct snoop_packet *packet;
1481
1482	packet = kzalloc(sizeof(struct snoop_packet) + hdr_len + data_len
1483			 + md_len,
1484			 GFP_ATOMIC | __GFP_NOWARN);
1485	if (likely(packet))
1486		INIT_LIST_HEAD(&packet->list);
1487
1488
1489	return packet;
1490}
1491
1492/*
1493 * Instead of having snoop and capture code intermixed with the recv functions,
1494 * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call
1495 * and land in here for snoop/capture but if not enabled the call will go
1496 * through as before. This gives us a single point to constrain all of the snoop
1497 * snoop recv logic. There is nothing special that needs to happen for bypass
1498 * packets. This routine should not try to look into the packet. It just copied
1499 * it. There is no guarantee for filters when it comes to bypass packets as
1500 * there is no specific support. Bottom line is this routine does now even know
1501 * what a bypass packet is.
1502 */
1503int snoop_recv_handler(struct hfi1_packet *packet)
1504{
1505	struct hfi1_pportdata *ppd = packet->rcd->ppd;
1506	struct hfi1_ib_header *hdr = packet->hdr;
1507	int header_size = packet->hlen;
1508	void *data = packet->ebuf;
1509	u32 tlen = packet->tlen;
1510	struct snoop_packet *s_packet = NULL;
1511	int ret;
1512	int snoop_mode = 0;
1513	u32 md_len = 0;
1514	struct capture_md md;
1515
1516	snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen,
1517		  data);
1518
1519	trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size,
1520			    data);
1521
1522	if (!ppd->dd->hfi1_snoop.filter_callback) {
1523		snoop_dbg("filter not set");
1524		ret = HFI1_FILTER_HIT;
1525	} else {
1526		ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data,
1527					ppd->dd->hfi1_snoop.filter_value);
1528	}
1529
1530	switch (ret) {
1531	case HFI1_FILTER_ERR:
1532		snoop_dbg("Error in filter call");
1533		break;
1534	case HFI1_FILTER_MISS:
1535		snoop_dbg("Filter Miss");
1536		break;
1537	case HFI1_FILTER_HIT:
1538
1539		if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1540			snoop_mode = 1;
1541		if ((snoop_mode == 0) ||
1542		    unlikely(snoop_flags & SNOOP_USE_METADATA))
1543			md_len = sizeof(struct capture_md);
1544
1545
1546		s_packet = allocate_snoop_packet(header_size,
1547						 tlen - header_size,
1548						 md_len);
1549
1550		if (unlikely(s_packet == NULL)) {
1551			dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
1552			break;
1553		}
1554
1555		if (md_len > 0) {
1556			memset(&md, 0, sizeof(struct capture_md));
1557			md.port = 1;
1558			md.dir = PKT_DIR_INGRESS;
1559			md.u.rhf = packet->rhf;
1560			memcpy(s_packet->data, &md, md_len);
1561		}
1562
1563		/* We should always have a header */
1564		if (hdr) {
1565			memcpy(s_packet->data + md_len, hdr, header_size);
1566		} else {
1567			dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n");
1568			kfree(s_packet);
1569			break;
1570		}
1571
1572		/*
1573		 * Packets with no data are possible. If there is no data needed
1574		 * to take care of the last 4 bytes which are normally included
1575		 * with data buffers and are included in tlen.  Since we kzalloc
1576		 * the buffer we do not need to set any values but if we decide
1577		 * not to use kzalloc we should zero them.
1578		 */
1579		if (data)
1580			memcpy(s_packet->data + header_size + md_len, data,
1581			       tlen - header_size);
1582
1583		s_packet->total_len = tlen + md_len;
1584		snoop_list_add_tail(s_packet, ppd->dd);
1585
1586		/*
1587		 * If we are snooping the packet not capturing then throw away
1588		 * after adding to the list.
1589		 */
1590		snoop_dbg("Capturing packet");
1591		if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) {
1592			snoop_dbg("Throwing packet away");
1593			/*
1594			 * If we are dropping the packet we still may need to
1595			 * handle the case where error flags are set, this is
1596			 * normally done by the type specific handler but that
1597			 * won't be called in this case.
1598			 */
1599			if (unlikely(rhf_err_flags(packet->rhf)))
1600				handle_eflags(packet);
1601
1602			/* throw the packet on the floor */
1603			return RHF_RCV_CONTINUE;
1604		}
1605		break;
1606	default:
1607		break;
1608	}
1609
1610	/*
1611	 * We do not care what type of packet came in here - just pass it off
1612	 * to the normal handler.
1613	 */
1614	return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)]
1615			(packet);
1616}
1617
1618/*
1619 * Handle snooping and capturing packets when sdma is being used.
1620 */
1621int snoop_send_dma_handler(struct hfi1_qp *qp, struct ahg_ib_header *ibhdr,
1622			   u32 hdrwords, struct hfi1_sge_state *ss, u32 len,
1623			   u32 plen, u32 dwords, u64 pbc)
1624{
1625	pr_alert("Snooping/Capture of  Send DMA Packets Is Not Supported!\n");
1626	snoop_dbg("Unsupported Operation");
1627	return hfi1_verbs_send_dma(qp, ibhdr, hdrwords, ss, len, plen, dwords,
1628				  0);
1629}
1630
1631/*
1632 * Handle snooping and capturing packets when pio is being used. Does not handle
1633 * bypass packets. The only way to send a bypass packet currently is to use the
1634 * diagpkt interface. When that interface is enable snoop/capture is not.
1635 */
1636int snoop_send_pio_handler(struct hfi1_qp *qp, struct ahg_ib_header *ahdr,
1637			   u32 hdrwords, struct hfi1_sge_state *ss, u32 len,
1638			   u32 plen, u32 dwords, u64 pbc)
1639{
1640	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1641	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
1642	struct snoop_packet *s_packet = NULL;
1643	u32 *hdr = (u32 *)&ahdr->ibh;
1644	u32 length = 0;
1645	struct hfi1_sge_state temp_ss;
1646	void *data = NULL;
1647	void *data_start = NULL;
1648	int ret;
1649	int snoop_mode = 0;
1650	int md_len = 0;
1651	struct capture_md md;
1652	u32 vl;
1653	u32 hdr_len = hdrwords << 2;
1654	u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
1655
1656	md.u.pbc = 0;
1657
1658	snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u",
1659		  hdrwords, len, plen, dwords, tlen);
1660	if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1661		snoop_mode = 1;
1662	if ((snoop_mode == 0) ||
1663	    unlikely(snoop_flags & SNOOP_USE_METADATA))
1664		md_len = sizeof(struct capture_md);
1665
1666	/* not using ss->total_len as arg 2 b/c that does not count CRC */
1667	s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len);
1668
1669	if (unlikely(s_packet == NULL)) {
1670		dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
1671		goto out;
1672	}
1673
1674	s_packet->total_len = tlen + md_len;
1675
1676	if (md_len > 0) {
1677		memset(&md, 0, sizeof(struct capture_md));
1678		md.port = 1;
1679		md.dir = PKT_DIR_EGRESS;
1680		if (likely(pbc == 0)) {
1681			vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
1682			md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
1683		} else {
1684			md.u.pbc = 0;
1685		}
1686		memcpy(s_packet->data, &md, md_len);
1687	} else {
1688		md.u.pbc = pbc;
1689	}
1690
1691	/* Copy header */
1692	if (likely(hdr)) {
1693		memcpy(s_packet->data + md_len, hdr, hdr_len);
1694	} else {
1695		dd_dev_err(ppd->dd,
1696			   "Unable to copy header to snoop/capture packet\n");
1697		kfree(s_packet);
1698		goto out;
1699	}
1700
1701	if (ss) {
1702		data = s_packet->data + hdr_len + md_len;
1703		data_start = data;
1704
1705		/*
1706		 * Copy SGE State
1707		 * The update_sge() function below will not modify the
1708		 * individual SGEs in the array. It will make a copy each time
1709		 * and operate on that. So we only need to copy this instance
1710		 * and it won't impact PIO.
1711		 */
1712		temp_ss = *ss;
1713		length = len;
1714
1715		snoop_dbg("Need to copy %d bytes", length);
1716		while (length) {
1717			void *addr = temp_ss.sge.vaddr;
1718			u32 slen = temp_ss.sge.length;
1719
1720			if (slen > length) {
1721				slen = length;
1722				snoop_dbg("slen %d > len %d", slen, length);
1723			}
1724			snoop_dbg("copy %d to %p", slen, addr);
1725			memcpy(data, addr, slen);
1726			update_sge(&temp_ss, slen);
1727			length -= slen;
1728			data += slen;
1729			snoop_dbg("data is now %p bytes left %d", data, length);
1730		}
1731		snoop_dbg("Completed SGE copy");
1732	}
1733
1734	/*
1735	 * Why do the filter check down here? Because the event tracing has its
1736	 * own filtering and we need to have the walked the SGE list.
1737	 */
1738	if (!ppd->dd->hfi1_snoop.filter_callback) {
1739		snoop_dbg("filter not set\n");
1740		ret = HFI1_FILTER_HIT;
1741	} else {
1742		ret = ppd->dd->hfi1_snoop.filter_callback(
1743					&ahdr->ibh,
1744					NULL,
1745					ppd->dd->hfi1_snoop.filter_value);
1746	}
1747
1748	switch (ret) {
1749	case HFI1_FILTER_ERR:
1750		snoop_dbg("Error in filter call");
1751		/* fall through */
1752	case HFI1_FILTER_MISS:
1753		snoop_dbg("Filter Miss");
1754		kfree(s_packet);
1755		break;
1756	case HFI1_FILTER_HIT:
1757		snoop_dbg("Capturing packet");
1758		snoop_list_add_tail(s_packet, ppd->dd);
1759
1760		if (unlikely((snoop_flags & SNOOP_DROP_SEND) &&
1761			     (ppd->dd->hfi1_snoop.mode_flag &
1762			      HFI1_PORT_SNOOP_MODE))) {
1763			unsigned long flags;
1764
1765			snoop_dbg("Dropping packet");
1766			if (qp->s_wqe) {
1767				spin_lock_irqsave(&qp->s_lock, flags);
1768				hfi1_send_complete(
1769					qp,
1770					qp->s_wqe,
1771					IB_WC_SUCCESS);
1772				spin_unlock_irqrestore(&qp->s_lock, flags);
1773			} else if (qp->ibqp.qp_type == IB_QPT_RC) {
1774				spin_lock_irqsave(&qp->s_lock, flags);
1775				hfi1_rc_send_complete(qp, &ahdr->ibh);
1776				spin_unlock_irqrestore(&qp->s_lock, flags);
1777			}
1778			return 0;
1779		}
1780		break;
1781	default:
1782		kfree(s_packet);
1783		break;
1784	}
1785out:
1786	return hfi1_verbs_send_pio(qp, ahdr, hdrwords, ss, len, plen, dwords,
1787				  md.u.pbc);
1788}
1789
1790/*
1791 * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently
1792 * this can be used anywhere, but the intention is for inline ACKs for RC and
1793 * CCA packets. We don't restrict this usage though.
1794 */
1795void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
1796			   u64 pbc, const void *from, size_t count)
1797{
1798	int snoop_mode = 0;
1799	int md_len = 0;
1800	struct capture_md md;
1801	struct snoop_packet *s_packet = NULL;
1802
1803	/*
1804	 * count is in dwords so we need to convert to bytes.
1805	 * We also need to account for CRC which would be tacked on by hardware.
1806	 */
1807	int packet_len = (count << 2) + 4;
1808	int ret;
1809
1810	snoop_dbg("ACK OUT: len %d", packet_len);
1811
1812	if (!dd->hfi1_snoop.filter_callback) {
1813		snoop_dbg("filter not set");
1814		ret = HFI1_FILTER_HIT;
1815	} else {
1816		ret = dd->hfi1_snoop.filter_callback(
1817				(struct hfi1_ib_header *)from,
1818				NULL,
1819				dd->hfi1_snoop.filter_value);
1820	}
1821
1822	switch (ret) {
1823	case HFI1_FILTER_ERR:
1824		snoop_dbg("Error in filter call");
1825		/* fall through */
1826	case HFI1_FILTER_MISS:
1827		snoop_dbg("Filter Miss");
1828		break;
1829	case HFI1_FILTER_HIT:
1830		snoop_dbg("Capturing packet");
1831		if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1832			snoop_mode = 1;
1833		if ((snoop_mode == 0) ||
1834		    unlikely(snoop_flags & SNOOP_USE_METADATA))
1835			md_len = sizeof(struct capture_md);
1836
1837		s_packet = allocate_snoop_packet(packet_len, 0, md_len);
1838
1839		if (unlikely(s_packet == NULL)) {
1840			dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n");
1841			goto inline_pio_out;
1842		}
1843
1844		s_packet->total_len = packet_len + md_len;
1845
1846		/* Fill in the metadata for the packet */
1847		if (md_len > 0) {
1848			memset(&md, 0, sizeof(struct capture_md));
1849			md.port = 1;
1850			md.dir = PKT_DIR_EGRESS;
1851			md.u.pbc = pbc;
1852			memcpy(s_packet->data, &md, md_len);
1853		}
1854
1855		/* Add the packet data which is a single buffer */
1856		memcpy(s_packet->data + md_len, from, packet_len);
1857
1858		snoop_list_add_tail(s_packet, dd);
1859
1860		if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) {
1861			snoop_dbg("Dropping packet");
1862			return;
1863		}
1864		break;
1865	default:
1866		break;
1867	}
1868
1869inline_pio_out:
1870	pio_copy(dd, pbuf, pbc, from, count);
1871
1872}
1873