1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
2  * All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT.  See the GNU General Public License for more
12  * details.
13  */
14 
15 /* This driver lives in a spar partition, and registers to ethernet io
16  * channels from the visorbus driver. It creates netdev devices and
17  * forwards transmit to the IO channel and accepts rcvs from the IO
18  * Partition via the IO channel.
19  */
20 
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
27 
28 #include "visorbus.h"
29 #include "iochannel.h"
30 
31 #define VISORNIC_INFINITE_RSP_WAIT 0
32 #define VISORNICSOPENMAX 32
33 #define MAXDEVICES     16384
34 
35 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36  *         = 163840 bytes
37  */
38 #define MAX_BUF 163840
39 
40 static int visornic_probe(struct visor_device *dev);
41 static void visornic_remove(struct visor_device *dev);
42 static int visornic_pause(struct visor_device *dev,
43 			  visorbus_state_complete_func complete_func);
44 static int visornic_resume(struct visor_device *dev,
45 			   visorbus_state_complete_func complete_func);
46 
47 /* DEBUGFS declarations */
48 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
49 				 size_t len, loff_t *offset);
50 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
51 				 size_t len, loff_t *ppos);
52 static struct dentry *visornic_debugfs_dir;
53 static const struct file_operations debugfs_info_fops = {
54 	.read = info_debugfs_read,
55 };
56 
57 static const struct file_operations debugfs_enable_ints_fops = {
58 	.write = enable_ints_write,
59 };
60 
61 static struct workqueue_struct *visornic_timeout_reset_workqueue;
62 
63 /* GUIDS for director channel type supported by this driver.  */
64 static struct visor_channeltype_descriptor visornic_channel_types[] = {
65 	/* Note that the only channel type we expect to be reported by the
66 	 * bus driver is the SPAR_VNIC channel.
67 	 */
68 	{ SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
69 	{ NULL_UUID_LE, NULL }
70 };
71 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
72 /*
73  * FIXME XXX: This next line of code must be fixed and removed before
74  * acceptance into the 'normal' part of the kernel.  It is only here as a place
75  * holder to get module autoloading functionality working for visorbus.  Code
76  * must be added to scripts/mode/file2alias.c, etc., to get this working
77  * properly.
78  */
79 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
80 
81 /* This is used to tell the visor bus driver which types of visor devices
82  * we support, and what functions to call when a visor device that we support
83  * is attached or removed.
84  */
85 static struct visor_driver visornic_driver = {
86 	.name = "visornic",
87 	.version = "1.0.0.0",
88 	.vertag = NULL,
89 	.owner = THIS_MODULE,
90 	.channel_types = visornic_channel_types,
91 	.probe = visornic_probe,
92 	.remove = visornic_remove,
93 	.pause = visornic_pause,
94 	.resume = visornic_resume,
95 	.channel_interrupt = NULL,
96 };
97 
98 struct chanstat {
99 	unsigned long got_rcv;
100 	unsigned long got_enbdisack;
101 	unsigned long got_xmit_done;
102 	unsigned long xmit_fail;
103 	unsigned long sent_enbdis;
104 	unsigned long sent_promisc;
105 	unsigned long sent_post;
106 	unsigned long sent_post_failed;
107 	unsigned long sent_xmit;
108 	unsigned long reject_count;
109 	unsigned long extra_rcvbufs_sent;
110 };
111 
112 struct visornic_devdata {
113 	unsigned short enabled;		/* 0 disabled 1 enabled to receive */
114 	unsigned short enab_dis_acked;	/* NET_RCV_ENABLE/DISABLE acked by
115 					 * IOPART
116 					 */
117 	struct visor_device *dev;
118 	struct net_device *netdev;
119 	struct net_device_stats net_stats;
120 	atomic_t interrupt_rcvd;
121 	wait_queue_head_t rsp_queue;
122 	struct sk_buff **rcvbuf;
123 	u64 incarnation_id;		/* lets IOPART know about re-birth */
124 	unsigned short old_flags;	/* flags as they were prior to
125 					 * set_multicast_list
126 					 */
127 	atomic_t usage;			/* count of users */
128 	int num_rcv_bufs;		/* indicates how many rcv buffers
129 					 * the vnic will post
130 					 */
131 	int num_rcv_bufs_could_not_alloc;
132 	atomic_t num_rcvbuf_in_iovm;
133 	unsigned long alloc_failed_in_if_needed_cnt;
134 	unsigned long alloc_failed_in_repost_rtn_cnt;
135 	unsigned long max_outstanding_net_xmits; /* absolute max number of
136 						  * outstanding xmits - should
137 						  * never hit this
138 						  */
139 	unsigned long upper_threshold_net_xmits;  /* high water mark for
140 						   * calling netif_stop_queue()
141 						   */
142 	unsigned long lower_threshold_net_xmits; /* high water mark for calling
143 						  * netif_wake_queue()
144 						  */
145 	struct sk_buff_head xmitbufhead; /* xmitbufhead is the head of the
146 					  * xmit buffer list that have been
147 					  * sent to the IOPART end
148 					  */
149 	visorbus_state_complete_func server_down_complete_func;
150 	struct work_struct timeout_reset;
151 	struct uiscmdrsp *cmdrsp_rcv;	 /* cmdrsp_rcv is used for
152 					  * posting/unposting rcv buffers
153 					  */
154 	struct uiscmdrsp *xmit_cmdrsp;	 /* used to issue NET_XMIT - there is
155 					  * never more that one xmit in
156 					  * progress at a time
157 					  */
158 	bool server_down;		 /* IOPART is down */
159 	bool server_change_state;	 /* Processing SERVER_CHANGESTATE msg */
160 	bool going_away;		 /* device is being torn down */
161 	struct dentry *eth_debugfs_dir;
162 	u64 interrupts_rcvd;
163 	u64 interrupts_notme;
164 	u64 interrupts_disabled;
165 	u64 busy_cnt;
166 	spinlock_t priv_lock;  /* spinlock to access devdata structures */
167 
168 	/* flow control counter */
169 	u64 flow_control_upper_hits;
170 	u64 flow_control_lower_hits;
171 
172 	/* debug counters */
173 	unsigned long n_rcv0;			/* # rcvs of 0 buffers */
174 	unsigned long n_rcv1;			/* # rcvs of 1 buffers */
175 	unsigned long n_rcv2;			/* # rcvs of 2 buffers */
176 	unsigned long n_rcvx;			/* # rcvs of >2 buffers */
177 	unsigned long found_repost_rcvbuf_cnt;	/* # times we called
178 						 *   repost_rcvbuf_cnt
179 						 */
180 	unsigned long repost_found_skb_cnt;	/* # times found the skb */
181 	unsigned long n_repost_deficit;		/* # times we couldn't find
182 						 *   all of the rcv buffers
183 						 */
184 	unsigned long bad_rcv_buf;		/* # times we negleted to
185 						 * free the rcv skb because
186 						 * we didn't know where it
187 						 * came from
188 						 */
189 	unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
190 
191 	int queuefullmsg_logged;
192 	struct chanstat chstat;
193 	struct timer_list irq_poll_timer;
194 	struct napi_struct napi;
195 	struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
196 };
197 
198 static int visornic_poll(struct napi_struct *napi, int budget);
199 static void poll_for_irq(unsigned long v);
200 
201 /**
202  *	visor_copy_fragsinfo_from_skb(
203  *	@skb_in: skbuff that we are pulling the frags from
204  *	@firstfraglen: length of first fragment in skb
205  *	@frags_max: max len of frags array
206  *	@frags: frags array filled in on output
207  *
208  *	Copy the fragment list in the SKB to a phys_info
209  *	array that the IOPART understands.
210  *	Return value indicates number of entries filled in frags
211  *	Negative values indicate an error.
212  */
213 static unsigned int
visor_copy_fragsinfo_from_skb(struct sk_buff * skb,unsigned int firstfraglen,unsigned int frags_max,struct phys_info frags[])214 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
215 			      unsigned int frags_max,
216 			      struct phys_info frags[])
217 {
218 	unsigned int count = 0, ii, size, offset = 0, numfrags;
219 	unsigned int total_count;
220 
221 	numfrags = skb_shinfo(skb)->nr_frags;
222 
223 	/*
224 	 * Compute the number of fragments this skb has, and if its more than
225 	 * frag array can hold, linearize the skb
226 	 */
227 	total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
228 	if (firstfraglen % PI_PAGE_SIZE)
229 		total_count++;
230 
231 	if (total_count > frags_max) {
232 		if (skb_linearize(skb))
233 			return -EINVAL;
234 		numfrags = skb_shinfo(skb)->nr_frags;
235 		firstfraglen = 0;
236 	}
237 
238 	while (firstfraglen) {
239 		if (count == frags_max)
240 			return -EINVAL;
241 
242 		frags[count].pi_pfn =
243 			page_to_pfn(virt_to_page(skb->data + offset));
244 		frags[count].pi_off =
245 			(unsigned long)(skb->data + offset) & PI_PAGE_MASK;
246 		size = min_t(unsigned int, firstfraglen,
247 			     PI_PAGE_SIZE - frags[count].pi_off);
248 
249 		/* can take smallest of firstfraglen (what's left) OR
250 		 * bytes left in the page
251 		 */
252 		frags[count].pi_len = size;
253 		firstfraglen -= size;
254 		offset += size;
255 		count++;
256 	}
257 	if (numfrags) {
258 		if ((count + numfrags) > frags_max)
259 			return -EINVAL;
260 
261 		for (ii = 0; ii < numfrags; ii++) {
262 			count = add_physinfo_entries(page_to_pfn(
263 				skb_frag_page(&skb_shinfo(skb)->frags[ii])),
264 					      skb_shinfo(skb)->frags[ii].
265 					      page_offset,
266 					      skb_shinfo(skb)->frags[ii].
267 					      size, count, frags_max, frags);
268 			/*
269 			 * add_physinfo_entries only returns
270 			 * zero if the frags array is out of room
271 			 * That should never happen because we
272 			 * fail above, if count+numfrags > frags_max.
273 			 * Given that theres no recovery mechanism from putting
274 			 * half a packet in the I/O channel, panic here as this
275 			 * should never happen
276 			 */
277 			BUG_ON(!count);
278 		}
279 	}
280 	if (skb_shinfo(skb)->frag_list) {
281 		struct sk_buff *skbinlist;
282 		int c;
283 
284 		for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
285 		     skbinlist = skbinlist->next) {
286 			c = visor_copy_fragsinfo_from_skb(skbinlist,
287 							  skbinlist->len -
288 							  skbinlist->data_len,
289 							  frags_max - count,
290 							  &frags[count]);
291 			if (c < 0)
292 				return c;
293 			count += c;
294 		}
295 	}
296 	return count;
297 }
298 
enable_ints_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)299 static ssize_t enable_ints_write(struct file *file,
300 				 const char __user *buffer,
301 				 size_t count, loff_t *ppos)
302 {
303 	/*
304 	 * Don't want to break ABI here by having a debugfs
305 	 * file that no longer exists or is writable, so
306 	 * lets just make this a vestigual function
307 	 */
308 	return count;
309 }
310 
311 /**
312  *	visornic_serverdown_complete - IOPART went down, need to pause
313  *				       device
314  *	@work: Work queue it was scheduled on
315  *
316  *	The IO partition has gone down and we need to do some cleanup
317  *	for when it comes back. Treat the IO partition as the link
318  *	being down.
319  *	Returns void.
320  */
321 static void
visornic_serverdown_complete(struct visornic_devdata * devdata)322 visornic_serverdown_complete(struct visornic_devdata *devdata)
323 {
324 	struct net_device *netdev;
325 
326 	netdev = devdata->netdev;
327 
328 	/* Stop polling for interrupts */
329 	del_timer_sync(&devdata->irq_poll_timer);
330 
331 	rtnl_lock();
332 	dev_close(netdev);
333 	rtnl_unlock();
334 
335 	atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
336 	devdata->chstat.sent_xmit = 0;
337 	devdata->chstat.got_xmit_done = 0;
338 
339 	if (devdata->server_down_complete_func)
340 		(*devdata->server_down_complete_func)(devdata->dev, 0);
341 
342 	devdata->server_down = true;
343 	devdata->server_change_state = false;
344 	devdata->server_down_complete_func = NULL;
345 }
346 
347 /**
348  *	visornic_serverdown - Command has notified us that IOPARt is down
349  *	@devdata: device that is being managed by IOPART
350  *
351  *	Schedule the work needed to handle the server down request. Make
352  *	sure we haven't already handled the server change state event.
353  *	Returns 0 if we scheduled the work, -EINVAL on error.
354  */
355 static int
visornic_serverdown(struct visornic_devdata * devdata,visorbus_state_complete_func complete_func)356 visornic_serverdown(struct visornic_devdata *devdata,
357 		    visorbus_state_complete_func complete_func)
358 {
359 	unsigned long flags;
360 
361 	spin_lock_irqsave(&devdata->priv_lock, flags);
362 	if (!devdata->server_down && !devdata->server_change_state) {
363 		if (devdata->going_away) {
364 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
365 			dev_dbg(&devdata->dev->device,
366 				"%s aborting because device removal pending\n",
367 				__func__);
368 			return -ENODEV;
369 		}
370 		devdata->server_change_state = true;
371 		devdata->server_down_complete_func = complete_func;
372 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
373 		visornic_serverdown_complete(devdata);
374 	} else if (devdata->server_change_state) {
375 		dev_dbg(&devdata->dev->device, "%s changing state\n",
376 			__func__);
377 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
378 		return -EINVAL;
379 	} else
380 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
381 	return 0;
382 }
383 
384 /**
385  *	alloc_rcv_buf	- alloc rcv buffer to be given to the IO Partition.
386  *	@netdev: network adapter the rcv bufs are attached too.
387  *
388  *	Create an sk_buff (rcv_buf) that will be passed to the IO Partition
389  *	so that it can write rcv data into our memory space.
390  *	Return pointer to sk_buff
391  */
392 static struct sk_buff *
alloc_rcv_buf(struct net_device * netdev)393 alloc_rcv_buf(struct net_device *netdev)
394 {
395 	struct sk_buff *skb;
396 
397 	/* NOTE: the first fragment in each rcv buffer is pointed to by
398 	 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
399 	 * in length, so the firstfrag is large enough to hold 1514.
400 	 */
401 	skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
402 	if (!skb)
403 		return NULL;
404 	skb->dev = netdev;
405 	skb->len = RCVPOST_BUF_SIZE;
406 	/* current value of mtu doesn't come into play here; large
407 	 * packets will just end up using multiple rcv buffers all of
408 	 * same size
409 	 */
410 	skb->data_len = 0;      /* dev_alloc_skb already zeroes it out
411 				 * for clarification.
412 				 */
413 	return skb;
414 }
415 
416 /**
417  *	post_skb	- post a skb to the IO Partition.
418  *	@cmdrsp: cmdrsp packet to be send to the IO Partition
419  *	@devdata: visornic_devdata to post the skb too
420  *	@skb: skb to give to the IO partition
421  *
422  *	Send the skb to the IO Partition.
423  *	Returns void
424  */
425 static inline void
post_skb(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb)426 post_skb(struct uiscmdrsp *cmdrsp,
427 	 struct visornic_devdata *devdata, struct sk_buff *skb)
428 {
429 	cmdrsp->net.buf = skb;
430 	cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
431 	cmdrsp->net.rcvpost.frag.pi_off =
432 		(unsigned long)skb->data & PI_PAGE_MASK;
433 	cmdrsp->net.rcvpost.frag.pi_len = skb->len;
434 	cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
435 
436 	if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
437 		cmdrsp->net.type = NET_RCV_POST;
438 		cmdrsp->cmdtype = CMD_NET_TYPE;
439 		if (visorchannel_signalinsert(devdata->dev->visorchannel,
440 					  IOCHAN_TO_IOPART,
441 					  cmdrsp)) {
442 			atomic_inc(&devdata->num_rcvbuf_in_iovm);
443 			devdata->chstat.sent_post++;
444 		} else {
445 			devdata->chstat.sent_post_failed++;
446 		}
447 	}
448 }
449 
450 /**
451  *	send_enbdis	- send NET_RCV_ENBDIS to IO Partition
452  *	@netdev: netdevice we are enable/disable, used as context
453  *		 return value
454  *	@state: enable = 1/disable = 0
455  *	@devdata: visornic device we are enabling/disabling
456  *
457  *	Send the enable/disable message to the IO Partition.
458  *	Returns void
459  */
460 static void
send_enbdis(struct net_device * netdev,int state,struct visornic_devdata * devdata)461 send_enbdis(struct net_device *netdev, int state,
462 	    struct visornic_devdata *devdata)
463 {
464 	devdata->cmdrsp_rcv->net.enbdis.enable = state;
465 	devdata->cmdrsp_rcv->net.enbdis.context = netdev;
466 	devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
467 	devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
468 	if (visorchannel_signalinsert(devdata->dev->visorchannel,
469 				  IOCHAN_TO_IOPART,
470 				  devdata->cmdrsp_rcv))
471 		devdata->chstat.sent_enbdis++;
472 }
473 
474 /**
475  *	visornic_disable_with_timeout - Disable network adapter
476  *	@netdev: netdevice to disale
477  *	@timeout: timeout to wait for disable
478  *
479  *	Disable the network adapter and inform the IO Partition that we
480  *	are disabled, reclaim memory from rcv bufs.
481  *	Returns 0 on success, negative for failure of IO Partition
482  *	responding.
483  *
484  */
485 static int
visornic_disable_with_timeout(struct net_device * netdev,const int timeout)486 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
487 {
488 	struct visornic_devdata *devdata = netdev_priv(netdev);
489 	int i;
490 	unsigned long flags;
491 	int wait = 0;
492 
493 	/* send a msg telling the other end we are stopping incoming pkts */
494 	spin_lock_irqsave(&devdata->priv_lock, flags);
495 	devdata->enabled = 0;
496 	devdata->enab_dis_acked = 0; /* must wait for ack */
497 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
498 
499 	/* send disable and wait for ack -- don't hold lock when sending
500 	 * disable because if the queue is full, insert might sleep.
501 	 */
502 	send_enbdis(netdev, 0, devdata);
503 
504 	/* wait for ack to arrive before we try to free rcv buffers
505 	 * NOTE: the other end automatically unposts the rcv buffers when
506 	 * when it gets a disable.
507 	 */
508 	spin_lock_irqsave(&devdata->priv_lock, flags);
509 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
510 	       (wait < timeout)) {
511 		if (devdata->enab_dis_acked)
512 			break;
513 		if (devdata->server_down || devdata->server_change_state) {
514 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
515 			dev_dbg(&netdev->dev, "%s server went away\n",
516 				__func__);
517 			return -EIO;
518 		}
519 		set_current_state(TASK_INTERRUPTIBLE);
520 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
521 		wait += schedule_timeout(msecs_to_jiffies(10));
522 		spin_lock_irqsave(&devdata->priv_lock, flags);
523 	}
524 
525 	/* Wait for usage to go to 1 (no other users) before freeing
526 	 * rcv buffers
527 	 */
528 	if (atomic_read(&devdata->usage) > 1) {
529 		while (1) {
530 			set_current_state(TASK_INTERRUPTIBLE);
531 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
532 			schedule_timeout(msecs_to_jiffies(10));
533 			spin_lock_irqsave(&devdata->priv_lock, flags);
534 			if (atomic_read(&devdata->usage))
535 				break;
536 		}
537 	}
538 	/* we've set enabled to 0, so we can give up the lock. */
539 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
540 
541 	/* stop the transmit queue so nothing more can be transmitted */
542 	netif_stop_queue(netdev);
543 
544 	napi_disable(&devdata->napi);
545 
546 	skb_queue_purge(&devdata->xmitbufhead);
547 
548 	/* Free rcv buffers - other end has automatically unposed them on
549 	 * disable
550 	 */
551 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
552 		if (devdata->rcvbuf[i]) {
553 			kfree_skb(devdata->rcvbuf[i]);
554 			devdata->rcvbuf[i] = NULL;
555 		}
556 	}
557 
558 	return 0;
559 }
560 
561 /**
562  *	init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
563  *	@netdev: struct netdevice
564  *	@devdata: visornic_devdata
565  *
566  *	Allocate rcv buffers and post them to the IO Partition.
567  *	Return 0 for success, and negative for failure.
568  */
569 static int
init_rcv_bufs(struct net_device * netdev,struct visornic_devdata * devdata)570 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
571 {
572 	int i, count;
573 
574 	/* allocate fixed number of receive buffers to post to uisnic
575 	 * post receive buffers after we've allocated a required amount
576 	 */
577 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
578 		devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
579 		if (!devdata->rcvbuf[i])
580 			break; /* if we failed to allocate one let us stop */
581 	}
582 	if (i == 0) /* couldn't even allocate one -- bail out */
583 		return -ENOMEM;
584 	count = i;
585 
586 	/* Ensure we can alloc 2/3rd of the requeested number of buffers.
587 	 * 2/3 is an arbitrary choice; used also in ndis init.c
588 	 */
589 	if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
590 		/* free receive buffers we did alloc and then bail out */
591 		for (i = 0; i < count; i++) {
592 			kfree_skb(devdata->rcvbuf[i]);
593 			devdata->rcvbuf[i] = NULL;
594 		}
595 		return -ENOMEM;
596 	}
597 
598 	/* post receive buffers to receive incoming input - without holding
599 	 * lock - we've not enabled nor started the queue so there shouldn't
600 	 * be any rcv or xmit activity
601 	 */
602 	for (i = 0; i < count; i++)
603 		post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
604 
605 	return 0;
606 }
607 
608 /**
609  *	visornic_enable_with_timeout	- send enable to IO Part
610  *	@netdev: struct net_device
611  *	@timeout: Time to wait for the ACK from the enable
612  *
613  *	Sends enable to IOVM, inits, and posts receive buffers to IOVM
614  *	timeout is defined in msecs (timeout of 0 specifies infinite wait)
615  *	Return 0 for success, negavite for failure.
616  */
617 static int
visornic_enable_with_timeout(struct net_device * netdev,const int timeout)618 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
619 {
620 	int i;
621 	struct visornic_devdata *devdata = netdev_priv(netdev);
622 	unsigned long flags;
623 	int wait = 0;
624 
625 	/* NOTE: the other end automatically unposts the rcv buffers when it
626 	 * gets a disable.
627 	 */
628 	i = init_rcv_bufs(netdev, devdata);
629 	if (i < 0) {
630 		dev_err(&netdev->dev,
631 			"%s failed to init rcv bufs (%d)\n", __func__, i);
632 		return i;
633 	}
634 
635 	spin_lock_irqsave(&devdata->priv_lock, flags);
636 	devdata->enabled = 1;
637 	devdata->enab_dis_acked = 0;
638 
639 	/* now we're ready, let's send an ENB to uisnic but until we get
640 	 * an ACK back from uisnic, we'll drop the packets
641 	 */
642 	devdata->n_rcv_packets_not_accepted = 0;
643 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
644 
645 	/* send enable and wait for ack -- don't hold lock when sending enable
646 	 * because if the queue is full, insert might sleep.
647 	 */
648 	napi_enable(&devdata->napi);
649 	send_enbdis(netdev, 1, devdata);
650 
651 	spin_lock_irqsave(&devdata->priv_lock, flags);
652 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
653 	       (wait < timeout)) {
654 		if (devdata->enab_dis_acked)
655 			break;
656 		if (devdata->server_down || devdata->server_change_state) {
657 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
658 			dev_dbg(&netdev->dev, "%s server went away\n",
659 				__func__);
660 			return -EIO;
661 		}
662 		set_current_state(TASK_INTERRUPTIBLE);
663 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
664 		wait += schedule_timeout(msecs_to_jiffies(10));
665 		spin_lock_irqsave(&devdata->priv_lock, flags);
666 	}
667 
668 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
669 
670 	if (!devdata->enab_dis_acked) {
671 		dev_err(&netdev->dev, "%s missing ACK\n", __func__);
672 		return -EIO;
673 	}
674 
675 	netif_start_queue(netdev);
676 
677 	return 0;
678 }
679 
680 /**
681  *	visornic_timeout_reset	- handle xmit timeout resets
682  *	@work	work item that scheduled the work
683  *
684  *	Transmit Timeouts are typically handled by resetting the
685  *	device for our virtual NIC we will send a Disable and Enable
686  *	to the IOVM. If it doesn't respond we will trigger a serverdown.
687  */
688 static void
visornic_timeout_reset(struct work_struct * work)689 visornic_timeout_reset(struct work_struct *work)
690 {
691 	struct visornic_devdata *devdata;
692 	struct net_device *netdev;
693 	int response = 0;
694 
695 	devdata = container_of(work, struct visornic_devdata, timeout_reset);
696 	netdev = devdata->netdev;
697 
698 	rtnl_lock();
699 	if (!netif_running(netdev)) {
700 		rtnl_unlock();
701 		return;
702 	}
703 
704 	response = visornic_disable_with_timeout(netdev,
705 						 VISORNIC_INFINITE_RSP_WAIT);
706 	if (response)
707 		goto call_serverdown;
708 
709 	response = visornic_enable_with_timeout(netdev,
710 						VISORNIC_INFINITE_RSP_WAIT);
711 	if (response)
712 		goto call_serverdown;
713 
714 	rtnl_unlock();
715 
716 	return;
717 
718 call_serverdown:
719 	visornic_serverdown(devdata, NULL);
720 	rtnl_unlock();
721 }
722 
723 /**
724  *	visornic_open - Enable the visornic device and mark the queue started
725  *	@netdev: netdevice to start
726  *
727  *      Enable the device and start the transmit queue.
728  *      Return 0 for success
729  */
730 static int
visornic_open(struct net_device * netdev)731 visornic_open(struct net_device *netdev)
732 {
733 	visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
734 
735 	return 0;
736 }
737 
738 /**
739  *	visornic_close - Disables the visornic device and stops the queues
740  *	@netdev: netdevice to start
741  *
742  *      Disable the device and stop the transmit queue.
743  *      Return 0 for success
744  */
745 static int
visornic_close(struct net_device * netdev)746 visornic_close(struct net_device *netdev)
747 {
748 	visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
749 
750 	return 0;
751 }
752 
753 /**
754  *	devdata_xmits_outstanding - compute outstanding xmits
755  *	@devdata: visornic_devdata for device
756  *
757  *	Return value is the number of outstanding xmits.
758  */
devdata_xmits_outstanding(struct visornic_devdata * devdata)759 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
760 {
761 	if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
762 		return devdata->chstat.sent_xmit -
763 			devdata->chstat.got_xmit_done;
764 	else
765 		return (ULONG_MAX - devdata->chstat.got_xmit_done
766 			+ devdata->chstat.sent_xmit + 1);
767 }
768 
769 /**
770  *	vnic_hit_high_watermark
771  *	@devdata: indicates visornic device we are checking
772  *	@high_watermark: max num of unacked xmits we will tolerate,
773  *                       before we will start throttling
774  *
775  *      Returns true iff the number of unacked xmits sent to
776  *      the IO partition is >= high_watermark.
777  */
vnic_hit_high_watermark(struct visornic_devdata * devdata,ulong high_watermark)778 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
779 					   ulong high_watermark)
780 {
781 	return (devdata_xmits_outstanding(devdata) >= high_watermark);
782 }
783 
784 /**
785  *	vnic_hit_low_watermark
786  *	@devdata: indicates visornic device we are checking
787  *	@low_watermark: we will wait until the num of unacked xmits
788  *                      drops to this value or lower before we start
789  *                      transmitting again
790  *
791  *      Returns true iff the number of unacked xmits sent to
792  *      the IO partition is <= low_watermark.
793  */
vnic_hit_low_watermark(struct visornic_devdata * devdata,ulong low_watermark)794 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
795 					  ulong low_watermark)
796 {
797 	return (devdata_xmits_outstanding(devdata) <= low_watermark);
798 }
799 
800 /**
801  *	visornic_xmit - send a packet to the IO Partition
802  *	@skb: Packet to be sent
803  *	@netdev: net device the packet is being sent from
804  *
805  *	Convert the skb to a cmdrsp so the IO Partition can undersand it.
806  *	Send the XMIT command to the IO Partition for processing. This
807  *	function is protected from concurrent calls by a spinlock xmit_lock
808  *	in the net_device struct, but as soon as the function returns it
809  *	can be called again.
810  *	Returns NETDEV_TX_OK.
811  */
812 static int
visornic_xmit(struct sk_buff * skb,struct net_device * netdev)813 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
814 {
815 	struct visornic_devdata *devdata;
816 	int len, firstfraglen, padlen;
817 	struct uiscmdrsp *cmdrsp = NULL;
818 	unsigned long flags;
819 
820 	devdata = netdev_priv(netdev);
821 	spin_lock_irqsave(&devdata->priv_lock, flags);
822 
823 	if (netif_queue_stopped(netdev) || devdata->server_down ||
824 	    devdata->server_change_state) {
825 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
826 		devdata->busy_cnt++;
827 		dev_dbg(&netdev->dev,
828 			"%s busy - queue stopped\n", __func__);
829 		kfree_skb(skb);
830 		return NETDEV_TX_OK;
831 	}
832 
833 	/* sk_buff struct is used to host network data throughout all the
834 	 * linux network subsystems
835 	 */
836 	len = skb->len;
837 
838 	/* skb->len is the FULL length of data (including fragmentary portion)
839 	 * skb->data_len is the length of the fragment portion in frags
840 	 * skb->len - skb->data_len is size of the 1st fragment in skb->data
841 	 * calculate the length of the first fragment that skb->data is
842 	 * pointing to
843 	 */
844 	firstfraglen = skb->len - skb->data_len;
845 	if (firstfraglen < ETH_HEADER_SIZE) {
846 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
847 		devdata->busy_cnt++;
848 		dev_err(&netdev->dev,
849 			"%s busy - first frag too small (%d)\n",
850 			__func__, firstfraglen);
851 		kfree_skb(skb);
852 		return NETDEV_TX_OK;
853 	}
854 
855 	if ((len < ETH_MIN_PACKET_SIZE) &&
856 	    ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
857 		/* pad the packet out to minimum size */
858 		padlen = ETH_MIN_PACKET_SIZE - len;
859 		memset(&skb->data[len], 0, padlen);
860 		skb->tail += padlen;
861 		skb->len += padlen;
862 		len += padlen;
863 		firstfraglen += padlen;
864 	}
865 
866 	cmdrsp = devdata->xmit_cmdrsp;
867 	/* clear cmdrsp */
868 	memset(cmdrsp, 0, SIZEOF_CMDRSP);
869 	cmdrsp->net.type = NET_XMIT;
870 	cmdrsp->cmdtype = CMD_NET_TYPE;
871 
872 	/* save the pointer to skb -- we'll need it for completion */
873 	cmdrsp->net.buf = skb;
874 
875 	if (vnic_hit_high_watermark(devdata,
876 				    devdata->max_outstanding_net_xmits)) {
877 		/* too many NET_XMITs queued over to IOVM - need to wait
878 		 */
879 		devdata->chstat.reject_count++;
880 		if (!devdata->queuefullmsg_logged &&
881 		    ((devdata->chstat.reject_count & 0x3ff) == 1))
882 			devdata->queuefullmsg_logged = 1;
883 		netif_stop_queue(netdev);
884 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
885 		devdata->busy_cnt++;
886 		dev_dbg(&netdev->dev,
887 			"%s busy - waiting for iovm to catch up\n",
888 			__func__);
889 		kfree_skb(skb);
890 		return NETDEV_TX_OK;
891 	}
892 	if (devdata->queuefullmsg_logged)
893 		devdata->queuefullmsg_logged = 0;
894 
895 	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
896 		cmdrsp->net.xmt.lincsum.valid = 1;
897 		cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
898 		if (skb_transport_header(skb) > skb->data) {
899 			cmdrsp->net.xmt.lincsum.hrawoff =
900 				skb_transport_header(skb) - skb->data;
901 			cmdrsp->net.xmt.lincsum.hrawoff = 1;
902 		}
903 		if (skb_network_header(skb) > skb->data) {
904 			cmdrsp->net.xmt.lincsum.nhrawoff =
905 				skb_network_header(skb) - skb->data;
906 			cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
907 		}
908 		cmdrsp->net.xmt.lincsum.csum = skb->csum;
909 	} else {
910 		cmdrsp->net.xmt.lincsum.valid = 0;
911 	}
912 
913 	/* save off the length of the entire data packet */
914 	cmdrsp->net.xmt.len = len;
915 
916 	/* copy ethernet header from first frag into ocmdrsp
917 	 * - everything else will be pass in frags & DMA'ed
918 	 */
919 	memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
920 	/* copy frags info - from skb->data we need to only provide access
921 	 * beyond eth header
922 	 */
923 	cmdrsp->net.xmt.num_frags =
924 		visor_copy_fragsinfo_from_skb(skb, firstfraglen,
925 					      MAX_PHYS_INFO,
926 					      cmdrsp->net.xmt.frags);
927 	if (cmdrsp->net.xmt.num_frags < 0) {
928 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
929 		devdata->busy_cnt++;
930 		dev_err(&netdev->dev,
931 			"%s busy - copy frags failed\n", __func__);
932 		kfree_skb(skb);
933 		return NETDEV_TX_OK;
934 	}
935 
936 	if (!visorchannel_signalinsert(devdata->dev->visorchannel,
937 				       IOCHAN_TO_IOPART, cmdrsp)) {
938 		netif_stop_queue(netdev);
939 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
940 		devdata->busy_cnt++;
941 		dev_dbg(&netdev->dev,
942 			"%s busy - signalinsert failed\n", __func__);
943 		kfree_skb(skb);
944 		return NETDEV_TX_OK;
945 	}
946 
947 	/* Track the skbs that have been sent to the IOVM for XMIT */
948 	skb_queue_head(&devdata->xmitbufhead, skb);
949 
950 	/* update xmt stats */
951 	devdata->net_stats.tx_packets++;
952 	devdata->net_stats.tx_bytes += skb->len;
953 	devdata->chstat.sent_xmit++;
954 
955 	/* check to see if we have hit the high watermark for
956 	 * netif_stop_queue()
957 	 */
958 	if (vnic_hit_high_watermark(devdata,
959 				    devdata->upper_threshold_net_xmits)) {
960 		/* too many NET_XMITs queued over to IOVM - need to wait */
961 		netif_stop_queue(netdev); /* calling stop queue - call
962 					   * netif_wake_queue() after lower
963 					   * threshold
964 					   */
965 		dev_dbg(&netdev->dev,
966 			"%s busy - invoking iovm flow control\n",
967 			__func__);
968 		devdata->flow_control_upper_hits++;
969 	}
970 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
971 
972 	/* skb will be freed when we get back NET_XMIT_DONE */
973 	return NETDEV_TX_OK;
974 }
975 
976 /**
977  *	visornic_get_stats - returns net_stats of the visornic device
978  *	@netdev: netdevice
979  *
980  *	Returns the net_device_stats for the device
981  */
982 static struct net_device_stats *
visornic_get_stats(struct net_device * netdev)983 visornic_get_stats(struct net_device *netdev)
984 {
985 	struct visornic_devdata *devdata = netdev_priv(netdev);
986 
987 	return &devdata->net_stats;
988 }
989 
990 /**
991  *	visornic_change_mtu - changes mtu of device.
992  *	@netdev: netdevice
993  *	@new_mtu: value of new mtu
994  *
995  *	MTU cannot be changed by system, must be changed via
996  *	CONTROLVM message. All vnics and pnics in a switch have
997  *	to have the same MTU for everything to work.
998  *	Currently not supported.
999  *	Returns EINVAL
1000  */
1001 static int
visornic_change_mtu(struct net_device * netdev,int new_mtu)1002 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1003 {
1004 	return -EINVAL;
1005 }
1006 
1007 /**
1008  *	visornic_set_multi - changes mtu of device.
1009  *	@netdev: netdevice
1010  *
1011  *	Only flag we support currently is IFF_PROMISC
1012  *	Returns void
1013  */
1014 static void
visornic_set_multi(struct net_device * netdev)1015 visornic_set_multi(struct net_device *netdev)
1016 {
1017 	struct uiscmdrsp *cmdrsp;
1018 	struct visornic_devdata *devdata = netdev_priv(netdev);
1019 
1020 	/* any filtering changes */
1021 	if (devdata->old_flags != netdev->flags) {
1022 		if ((netdev->flags & IFF_PROMISC) !=
1023 		    (devdata->old_flags & IFF_PROMISC)) {
1024 			cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1025 			if (!cmdrsp)
1026 				return;
1027 			cmdrsp->cmdtype = CMD_NET_TYPE;
1028 			cmdrsp->net.type = NET_RCV_PROMISC;
1029 			cmdrsp->net.enbdis.context = netdev;
1030 			cmdrsp->net.enbdis.enable =
1031 				(netdev->flags & IFF_PROMISC);
1032 			visorchannel_signalinsert(devdata->dev->visorchannel,
1033 						  IOCHAN_TO_IOPART,
1034 						  cmdrsp);
1035 			kfree(cmdrsp);
1036 		}
1037 		devdata->old_flags = netdev->flags;
1038 	}
1039 }
1040 
1041 /**
1042  *	visornic_xmit_timeout - request to timeout the xmit
1043  *	@netdev
1044  *
1045  *	Queue the work and return. Make sure we have not already
1046  *	been informed the IO Partition is gone, if it is gone
1047  *	we will already timeout the xmits.
1048  */
1049 static void
visornic_xmit_timeout(struct net_device * netdev)1050 visornic_xmit_timeout(struct net_device *netdev)
1051 {
1052 	struct visornic_devdata *devdata = netdev_priv(netdev);
1053 	unsigned long flags;
1054 
1055 	spin_lock_irqsave(&devdata->priv_lock, flags);
1056 	if (devdata->going_away) {
1057 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1058 		dev_dbg(&devdata->dev->device,
1059 			"%s aborting because device removal pending\n",
1060 			__func__);
1061 		return;
1062 	}
1063 
1064 	/* Ensure that a ServerDown message hasn't been received */
1065 	if (!devdata->enabled ||
1066 	    (devdata->server_down && !devdata->server_change_state)) {
1067 		dev_dbg(&netdev->dev, "%s no processing\n",
1068 			__func__);
1069 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1070 		return;
1071 	}
1072 	queue_work(visornic_timeout_reset_workqueue, &devdata->timeout_reset);
1073 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1074 }
1075 
1076 /**
1077  *	repost_return	- repost rcv bufs that have come back
1078  *	@cmdrsp: io channel command struct to post
1079  *	@devdata: visornic devdata for the device
1080  *	@skb: skb
1081  *	@netdev: netdevice
1082  *
1083  *	Repost rcv buffers that have been returned to us when
1084  *	we are finished with them.
1085  *	Returns 0 for success, -1 for error.
1086  */
1087 static inline int
repost_return(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb,struct net_device * netdev)1088 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1089 	      struct sk_buff *skb, struct net_device *netdev)
1090 {
1091 	struct net_pkt_rcv copy;
1092 	int i = 0, cc, numreposted;
1093 	int found_skb = 0;
1094 	int status = 0;
1095 
1096 	copy = cmdrsp->net.rcv;
1097 	switch (copy.numrcvbufs) {
1098 	case 0:
1099 		devdata->n_rcv0++;
1100 		break;
1101 	case 1:
1102 		devdata->n_rcv1++;
1103 		break;
1104 	case 2:
1105 		devdata->n_rcv2++;
1106 		break;
1107 	default:
1108 		devdata->n_rcvx++;
1109 		break;
1110 	}
1111 	for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1112 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1113 			if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1114 				continue;
1115 
1116 			if ((skb) && devdata->rcvbuf[i] == skb) {
1117 				devdata->found_repost_rcvbuf_cnt++;
1118 				found_skb = 1;
1119 				devdata->repost_found_skb_cnt++;
1120 			}
1121 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1122 			if (!devdata->rcvbuf[i]) {
1123 				devdata->num_rcv_bufs_could_not_alloc++;
1124 				devdata->alloc_failed_in_repost_rtn_cnt++;
1125 				status = -ENOMEM;
1126 				break;
1127 			}
1128 			post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1129 			numreposted++;
1130 			break;
1131 		}
1132 	}
1133 	if (numreposted != copy.numrcvbufs) {
1134 		devdata->n_repost_deficit++;
1135 		status = -EINVAL;
1136 	}
1137 	if (skb) {
1138 		if (found_skb) {
1139 			kfree_skb(skb);
1140 		} else {
1141 			status = -EINVAL;
1142 			devdata->bad_rcv_buf++;
1143 		}
1144 	}
1145 	return status;
1146 }
1147 
1148 /**
1149  *	visornic_rx - Handle receive packets coming back from IO Part
1150  *	@cmdrsp: Receive packet returned from IO Part
1151  *
1152  *	Got a receive packet back from the IO Part, handle it and send
1153  *	it up the stack.
1154  *	Returns void
1155  */
1156 static int
visornic_rx(struct uiscmdrsp * cmdrsp)1157 visornic_rx(struct uiscmdrsp *cmdrsp)
1158 {
1159 	struct visornic_devdata *devdata;
1160 	struct sk_buff *skb, *prev, *curr;
1161 	struct net_device *netdev;
1162 	int cc, currsize, off;
1163 	struct ethhdr *eth;
1164 	unsigned long flags;
1165 	int rx_count = 0;
1166 
1167 	/* post new rcv buf to the other end using the cmdrsp we have at hand
1168 	 * post it without holding lock - but we'll use the signal lock to
1169 	 * synchronize the queue insert the cmdrsp that contains the net.rcv
1170 	 * is the one we are using to repost, so copy the info we need from it.
1171 	 */
1172 	skb = cmdrsp->net.buf;
1173 	netdev = skb->dev;
1174 
1175 	devdata = netdev_priv(netdev);
1176 
1177 	spin_lock_irqsave(&devdata->priv_lock, flags);
1178 	atomic_dec(&devdata->num_rcvbuf_in_iovm);
1179 
1180 	/* set length to how much was ACTUALLY received -
1181 	 * NOTE: rcv_done_len includes actual length of data rcvd
1182 	 * including ethhdr
1183 	 */
1184 	skb->len = cmdrsp->net.rcv.rcv_done_len;
1185 
1186 	/* update rcv stats - call it with priv_lock held */
1187 	devdata->net_stats.rx_packets++;
1188 	devdata->net_stats.rx_bytes += skb->len;
1189 
1190 	/* test enabled while holding lock */
1191 	if (!(devdata->enabled && devdata->enab_dis_acked)) {
1192 		/* don't process it unless we're in enable mode and until
1193 		 * we've gotten an ACK saying the other end got our RCV enable
1194 		 */
1195 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1196 		repost_return(cmdrsp, devdata, skb, netdev);
1197 		return rx_count;
1198 	}
1199 
1200 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1201 
1202 	/* when skb was allocated, skb->dev, skb->data, skb->len and
1203 	 * skb->data_len were setup. AND, data has already put into the
1204 	 * skb (both first frag and in frags pages)
1205 	 * NOTE: firstfragslen is the amount of data in skb->data and that
1206 	 * which is not in nr_frags or frag_list. This is now simply
1207 	 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1208 	 * firstfrag & set data_len to show rest see if we have to chain
1209 	 * frag_list.
1210 	 */
1211 	if (skb->len > RCVPOST_BUF_SIZE) {	/* do PRECAUTIONARY check */
1212 		if (cmdrsp->net.rcv.numrcvbufs < 2) {
1213 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1214 				dev_err(&devdata->netdev->dev,
1215 					"repost_return failed");
1216 			return rx_count;
1217 		}
1218 		/* length rcvd is greater than firstfrag in this skb rcv buf  */
1219 		skb->tail += RCVPOST_BUF_SIZE;	/* amount in skb->data */
1220 		skb->data_len = skb->len - RCVPOST_BUF_SIZE;	/* amount that
1221 								   will be in
1222 								   frag_list */
1223 	} else {
1224 		/* data fits in this skb - no chaining - do
1225 		 * PRECAUTIONARY check
1226 		 */
1227 		if (cmdrsp->net.rcv.numrcvbufs != 1) {	/* should be 1 */
1228 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1229 				dev_err(&devdata->netdev->dev,
1230 					"repost_return failed");
1231 			return rx_count;
1232 		}
1233 		skb->tail += skb->len;
1234 		skb->data_len = 0;	/* nothing rcvd in frag_list */
1235 	}
1236 	off = skb_tail_pointer(skb) - skb->data;
1237 
1238 	/* amount we bumped tail by in the head skb
1239 	 * it is used to calculate the size of each chained skb below
1240 	 * it is also used to index into bufline to continue the copy
1241 	 * (for chansocktwopc)
1242 	 * if necessary chain the rcv skbs together.
1243 	 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1244 	 * chain the rest to that one.
1245 	 * - do PRECAUTIONARY check
1246 	 */
1247 	if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1248 		if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1249 			dev_err(&devdata->netdev->dev, "repost_return failed");
1250 		return rx_count;
1251 	}
1252 
1253 	if (cmdrsp->net.rcv.numrcvbufs > 1) {
1254 		/* chain the various rcv buffers into the skb's frag_list. */
1255 		/* Note: off was initialized above  */
1256 		for (cc = 1, prev = NULL;
1257 		     cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1258 			curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1259 			curr->next = NULL;
1260 			if (!prev)	/* start of list- set head */
1261 				skb_shinfo(skb)->frag_list = curr;
1262 			else
1263 				prev->next = curr;
1264 			prev = curr;
1265 
1266 			/* should we set skb->len and skb->data_len for each
1267 			 * buffer being chained??? can't hurt!
1268 			 */
1269 			currsize = min(skb->len - off,
1270 				       (unsigned int)RCVPOST_BUF_SIZE);
1271 			curr->len = currsize;
1272 			curr->tail += currsize;
1273 			curr->data_len = 0;
1274 			off += currsize;
1275 		}
1276 		/* assert skb->len == off */
1277 		if (skb->len != off) {
1278 			netdev_err(devdata->netdev,
1279 				   "something wrong; skb->len:%d != off:%d\n",
1280 				   skb->len, off);
1281 		}
1282 	}
1283 
1284 	/* set up packet's protocl type using ethernet header - this
1285 	 * sets up skb->pkt_type & it also PULLS out the eth header
1286 	 */
1287 	skb->protocol = eth_type_trans(skb, netdev);
1288 
1289 	eth = eth_hdr(skb);
1290 
1291 	skb->csum = 0;
1292 	skb->ip_summed = CHECKSUM_NONE;
1293 
1294 	do {
1295 		if (netdev->flags & IFF_PROMISC)
1296 			break;	/* accept all packets */
1297 		if (skb->pkt_type == PACKET_BROADCAST) {
1298 			if (netdev->flags & IFF_BROADCAST)
1299 				break;	/* accept all broadcast packets */
1300 		} else if (skb->pkt_type == PACKET_MULTICAST) {
1301 			if ((netdev->flags & IFF_MULTICAST) &&
1302 			    (netdev_mc_count(netdev))) {
1303 				struct netdev_hw_addr *ha;
1304 				int found_mc = 0;
1305 
1306 				/* only accept multicast packets that we can
1307 				 * find in our multicast address list
1308 				 */
1309 				netdev_for_each_mc_addr(ha, netdev) {
1310 					if (ether_addr_equal(eth->h_dest,
1311 							     ha->addr)) {
1312 						found_mc = 1;
1313 						break;
1314 					}
1315 				}
1316 				if (found_mc)
1317 					break;	/* accept packet, dest
1318 						   matches a multicast
1319 						   address */
1320 			}
1321 		} else if (skb->pkt_type == PACKET_HOST) {
1322 			break;	/* accept packet, h_dest must match vnic
1323 				   mac address */
1324 		} else if (skb->pkt_type == PACKET_OTHERHOST) {
1325 			/* something is not right */
1326 			dev_err(&devdata->netdev->dev,
1327 				"**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1328 				netdev->name, eth->h_dest, netdev->dev_addr);
1329 		}
1330 		/* drop packet - don't forward it up to OS */
1331 		devdata->n_rcv_packets_not_accepted++;
1332 		repost_return(cmdrsp, devdata, skb, netdev);
1333 		return rx_count;
1334 	} while (0);
1335 
1336 	rx_count++;
1337 	netif_receive_skb(skb);
1338 	/* netif_rx returns various values, but "in practice most drivers
1339 	 * ignore the return value
1340 	 */
1341 
1342 	skb = NULL;
1343 	/*
1344 	 * whether the packet got dropped or handled, the skb is freed by
1345 	 * kernel code, so we shouldn't free it. but we should repost a
1346 	 * new rcv buffer.
1347 	 */
1348 	repost_return(cmdrsp, devdata, skb, netdev);
1349 	return rx_count;
1350 }
1351 
1352 /**
1353  *	devdata_initialize	- Initialize devdata structure
1354  *	@devdata: visornic_devdata structure to initialize
1355  *	#dev: visorbus_deviced it belongs to
1356  *
1357  *	Setup initial values for the visornic based on channel and default
1358  *	values.
1359  *	Returns a pointer to the devdata if successful, else NULL
1360  */
1361 static struct visornic_devdata *
devdata_initialize(struct visornic_devdata * devdata,struct visor_device * dev)1362 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1363 {
1364 	if (!devdata)
1365 		return NULL;
1366 	memset(devdata, '\0', sizeof(struct visornic_devdata));
1367 	devdata->dev = dev;
1368 	devdata->incarnation_id = get_jiffies_64();
1369 	return devdata;
1370 }
1371 
1372 /**
1373  *	devdata_release	- Frees up references in devdata
1374  *	@devdata: struct to clean up
1375  *
1376  *	Frees up references in devdata.
1377  *	Returns void
1378  */
devdata_release(struct visornic_devdata * devdata)1379 static void devdata_release(struct visornic_devdata *devdata)
1380 {
1381 	kfree(devdata->rcvbuf);
1382 	kfree(devdata->cmdrsp_rcv);
1383 	kfree(devdata->xmit_cmdrsp);
1384 }
1385 
1386 static const struct net_device_ops visornic_dev_ops = {
1387 	.ndo_open = visornic_open,
1388 	.ndo_stop = visornic_close,
1389 	.ndo_start_xmit = visornic_xmit,
1390 	.ndo_get_stats = visornic_get_stats,
1391 	.ndo_change_mtu = visornic_change_mtu,
1392 	.ndo_tx_timeout = visornic_xmit_timeout,
1393 	.ndo_set_rx_mode = visornic_set_multi,
1394 };
1395 
1396 /* DebugFS code */
info_debugfs_read(struct file * file,char __user * buf,size_t len,loff_t * offset)1397 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1398 				 size_t len, loff_t *offset)
1399 {
1400 	ssize_t bytes_read = 0;
1401 	int str_pos = 0;
1402 	struct visornic_devdata *devdata;
1403 	struct net_device *dev;
1404 	char *vbuf;
1405 
1406 	if (len > MAX_BUF)
1407 		len = MAX_BUF;
1408 	vbuf = kzalloc(len, GFP_KERNEL);
1409 	if (!vbuf)
1410 		return -ENOMEM;
1411 
1412 	/* for each vnic channel
1413 	 * dump out channel specific data
1414 	 */
1415 	rcu_read_lock();
1416 	for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1417 		/*
1418 		 * Only consider netdevs that are visornic, and are open
1419 		 */
1420 		if ((dev->netdev_ops != &visornic_dev_ops) ||
1421 		    (!netif_queue_stopped(dev)))
1422 			continue;
1423 
1424 		devdata = netdev_priv(dev);
1425 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1426 				     "netdev = %s (0x%p), MAC Addr %pM\n",
1427 				     dev->name,
1428 				     dev,
1429 				     dev->dev_addr);
1430 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1431 				     "VisorNic Dev Info = 0x%p\n", devdata);
1432 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1433 				     " num_rcv_bufs = %d\n",
1434 				     devdata->num_rcv_bufs);
1435 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1436 				     " max_oustanding_next_xmits = %lu\n",
1437 				    devdata->max_outstanding_net_xmits);
1438 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1439 				     " upper_threshold_net_xmits = %lu\n",
1440 				     devdata->upper_threshold_net_xmits);
1441 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1442 				     " lower_threshold_net_xmits = %lu\n",
1443 				     devdata->lower_threshold_net_xmits);
1444 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1445 				     " queuefullmsg_logged = %d\n",
1446 				     devdata->queuefullmsg_logged);
1447 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448 				     " chstat.got_rcv = %lu\n",
1449 				     devdata->chstat.got_rcv);
1450 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1451 				     " chstat.got_enbdisack = %lu\n",
1452 				     devdata->chstat.got_enbdisack);
1453 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1454 				     " chstat.got_xmit_done = %lu\n",
1455 				     devdata->chstat.got_xmit_done);
1456 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457 				     " chstat.xmit_fail = %lu\n",
1458 				     devdata->chstat.xmit_fail);
1459 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460 				     " chstat.sent_enbdis = %lu\n",
1461 				     devdata->chstat.sent_enbdis);
1462 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463 				     " chstat.sent_promisc = %lu\n",
1464 				     devdata->chstat.sent_promisc);
1465 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466 				     " chstat.sent_post = %lu\n",
1467 				     devdata->chstat.sent_post);
1468 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469 				     " chstat.sent_post_failed = %lu\n",
1470 				     devdata->chstat.sent_post_failed);
1471 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472 				     " chstat.sent_xmit = %lu\n",
1473 				     devdata->chstat.sent_xmit);
1474 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475 				     " chstat.reject_count = %lu\n",
1476 				     devdata->chstat.reject_count);
1477 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478 				     " chstat.extra_rcvbufs_sent = %lu\n",
1479 				     devdata->chstat.extra_rcvbufs_sent);
1480 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481 				     " n_rcv0 = %lu\n", devdata->n_rcv0);
1482 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483 				     " n_rcv1 = %lu\n", devdata->n_rcv1);
1484 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485 				     " n_rcv2 = %lu\n", devdata->n_rcv2);
1486 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487 				     " n_rcvx = %lu\n", devdata->n_rcvx);
1488 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489 				     " num_rcvbuf_in_iovm = %d\n",
1490 				     atomic_read(&devdata->num_rcvbuf_in_iovm));
1491 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492 				     " alloc_failed_in_if_needed_cnt = %lu\n",
1493 				     devdata->alloc_failed_in_if_needed_cnt);
1494 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495 				     " alloc_failed_in_repost_rtn_cnt = %lu\n",
1496 				     devdata->alloc_failed_in_repost_rtn_cnt);
1497 		/* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498 		 *		     " inner_loop_limit_reached_cnt = %lu\n",
1499 		 *		     devdata->inner_loop_limit_reached_cnt);
1500 		 */
1501 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502 				     " found_repost_rcvbuf_cnt = %lu\n",
1503 				     devdata->found_repost_rcvbuf_cnt);
1504 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505 				     " repost_found_skb_cnt = %lu\n",
1506 				     devdata->repost_found_skb_cnt);
1507 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1508 				     " n_repost_deficit = %lu\n",
1509 				     devdata->n_repost_deficit);
1510 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511 				     " bad_rcv_buf = %lu\n",
1512 				     devdata->bad_rcv_buf);
1513 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514 				     " n_rcv_packets_not_accepted = %lu\n",
1515 				     devdata->n_rcv_packets_not_accepted);
1516 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1517 				     " interrupts_rcvd = %llu\n",
1518 				     devdata->interrupts_rcvd);
1519 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520 				     " interrupts_notme = %llu\n",
1521 				     devdata->interrupts_notme);
1522 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1523 				     " interrupts_disabled = %llu\n",
1524 				     devdata->interrupts_disabled);
1525 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1526 				     " busy_cnt = %llu\n",
1527 				     devdata->busy_cnt);
1528 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1529 				     " flow_control_upper_hits = %llu\n",
1530 				     devdata->flow_control_upper_hits);
1531 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1532 				     " flow_control_lower_hits = %llu\n",
1533 				     devdata->flow_control_lower_hits);
1534 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535 				     " netif_queue = %s\n",
1536 				     netif_queue_stopped(devdata->netdev) ?
1537 				     "stopped" : "running");
1538 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1539 				     " xmits_outstanding = %lu\n",
1540 				     devdata_xmits_outstanding(devdata));
1541 	}
1542 	rcu_read_unlock();
1543 	bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1544 	kfree(vbuf);
1545 	return bytes_read;
1546 }
1547 
1548 /**
1549  *	send_rcv_posts_if_needed
1550  *	@devdata: visornic device
1551  *
1552  *	Send receive buffers to the IO Partition.
1553  *	Returns void
1554  */
1555 static void
send_rcv_posts_if_needed(struct visornic_devdata * devdata)1556 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1557 {
1558 	int i;
1559 	struct net_device *netdev;
1560 	struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1561 	int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1562 
1563 	/* don't do this until vnic is marked ready */
1564 	if (!(devdata->enabled && devdata->enab_dis_acked))
1565 		return;
1566 
1567 	netdev = devdata->netdev;
1568 	rcv_bufs_allocated = 0;
1569 	/* this code is trying to prevent getting stuck here forever,
1570 	 * but still retry it if you cant allocate them all this time.
1571 	 */
1572 	cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1573 	while (cur_num_rcv_bufs_to_alloc > 0) {
1574 		cur_num_rcv_bufs_to_alloc--;
1575 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1576 			if (devdata->rcvbuf[i])
1577 				continue;
1578 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1579 			if (!devdata->rcvbuf[i]) {
1580 				devdata->alloc_failed_in_if_needed_cnt++;
1581 				break;
1582 			}
1583 			rcv_bufs_allocated++;
1584 			post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1585 			devdata->chstat.extra_rcvbufs_sent++;
1586 		}
1587 	}
1588 	devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1589 }
1590 
1591 /**
1592  *	drain_resp_queue  - drains and ignores all messages from the resp queue
1593  *	@cmdrsp: io channel command response message
1594  *	@devdata: visornic device to drain
1595  */
1596 static void
drain_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata)1597 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1598 {
1599 	while (visorchannel_signalremove(devdata->dev->visorchannel,
1600 					 IOCHAN_FROM_IOPART,
1601 					 cmdrsp))
1602 		;
1603 }
1604 
1605 /**
1606  *	service_resp_queue	- drains the response queue
1607  *	@cmdrsp: io channel command response message
1608  *	@devdata: visornic device to drain
1609  *
1610  *	Drain the respones queue of any responses from the IO partition.
1611  *	Process the responses as we get them.
1612  *	Returns when response queue is empty or when the threadd stops.
1613  */
1614 static void
service_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,int * rx_work_done)1615 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1616 		   int *rx_work_done)
1617 {
1618 	unsigned long flags;
1619 	struct net_device *netdev;
1620 
1621 	/* TODO: CLIENT ACQUIRE -- Don't really need this at the
1622 	 * moment */
1623 	for (;;) {
1624 		if (!visorchannel_signalremove(devdata->dev->visorchannel,
1625 					       IOCHAN_FROM_IOPART,
1626 					       cmdrsp))
1627 			break; /* queue empty */
1628 
1629 		switch (cmdrsp->net.type) {
1630 		case NET_RCV:
1631 			devdata->chstat.got_rcv++;
1632 			/* process incoming packet */
1633 			*rx_work_done += visornic_rx(cmdrsp);
1634 			break;
1635 		case NET_XMIT_DONE:
1636 			spin_lock_irqsave(&devdata->priv_lock, flags);
1637 			devdata->chstat.got_xmit_done++;
1638 			if (cmdrsp->net.xmtdone.xmt_done_result)
1639 				devdata->chstat.xmit_fail++;
1640 			/* only call queue wake if we stopped it */
1641 			netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1642 			/* ASSERT netdev == vnicinfo->netdev; */
1643 			if ((netdev == devdata->netdev) &&
1644 			    netif_queue_stopped(netdev)) {
1645 				/* check to see if we have crossed
1646 				 * the lower watermark for
1647 				 * netif_wake_queue()
1648 				 */
1649 				if (vnic_hit_low_watermark(devdata,
1650 					devdata->lower_threshold_net_xmits)) {
1651 					/* enough NET_XMITs completed
1652 					 * so can restart netif queue
1653 					 */
1654 					netif_wake_queue(netdev);
1655 					devdata->flow_control_lower_hits++;
1656 				}
1657 			}
1658 			skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1659 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1660 			kfree_skb(cmdrsp->net.buf);
1661 			break;
1662 		case NET_RCV_ENBDIS_ACK:
1663 			devdata->chstat.got_enbdisack++;
1664 			netdev = (struct net_device *)
1665 			cmdrsp->net.enbdis.context;
1666 			spin_lock_irqsave(&devdata->priv_lock, flags);
1667 			devdata->enab_dis_acked = 1;
1668 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1669 
1670 			if (devdata->server_down &&
1671 			    devdata->server_change_state) {
1672 				/* Inform Linux that the link is up */
1673 				devdata->server_down = false;
1674 				devdata->server_change_state = false;
1675 				netif_wake_queue(netdev);
1676 				netif_carrier_on(netdev);
1677 			}
1678 			break;
1679 		case NET_CONNECT_STATUS:
1680 			netdev = devdata->netdev;
1681 			if (cmdrsp->net.enbdis.enable == 1) {
1682 				spin_lock_irqsave(&devdata->priv_lock, flags);
1683 				devdata->enabled = cmdrsp->net.enbdis.enable;
1684 				spin_unlock_irqrestore(&devdata->priv_lock,
1685 						       flags);
1686 				netif_wake_queue(netdev);
1687 				netif_carrier_on(netdev);
1688 			} else {
1689 				netif_stop_queue(netdev);
1690 				netif_carrier_off(netdev);
1691 				spin_lock_irqsave(&devdata->priv_lock, flags);
1692 				devdata->enabled = cmdrsp->net.enbdis.enable;
1693 				spin_unlock_irqrestore(&devdata->priv_lock,
1694 						       flags);
1695 			}
1696 			break;
1697 		default:
1698 			break;
1699 		}
1700 		/* cmdrsp is now available for reuse  */
1701 	}
1702 }
1703 
visornic_poll(struct napi_struct * napi,int budget)1704 static int visornic_poll(struct napi_struct *napi, int budget)
1705 {
1706 	struct visornic_devdata *devdata = container_of(napi,
1707 							struct visornic_devdata,
1708 							napi);
1709 	int rx_count = 0;
1710 
1711 	send_rcv_posts_if_needed(devdata);
1712 	service_resp_queue(devdata->cmdrsp, devdata, &rx_count);
1713 
1714 	/*
1715 	 * If there aren't any more packets to receive
1716 	 * stop the poll
1717 	 */
1718 	if (rx_count < budget)
1719 		napi_complete(napi);
1720 
1721 	return rx_count;
1722 }
1723 
1724 /**
1725  *	poll_for_irq	- Checks the status of the response queue.
1726  *	@v: void pointer to the visronic devdata
1727  *
1728  *	Main function of the vnic_incoming thread. Peridocially check the
1729  *	response queue and drain it if needed.
1730  *	Returns when thread has stopped.
1731  */
1732 static void
poll_for_irq(unsigned long v)1733 poll_for_irq(unsigned long v)
1734 {
1735 	struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1736 
1737 	if (!visorchannel_signalempty(
1738 				   devdata->dev->visorchannel,
1739 				   IOCHAN_FROM_IOPART))
1740 		napi_schedule(&devdata->napi);
1741 
1742 	atomic_set(&devdata->interrupt_rcvd, 0);
1743 
1744 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1745 
1746 }
1747 
1748 /**
1749  *	visornic_probe	- probe function for visornic devices
1750  *	@dev: The visor device discovered
1751  *
1752  *	Called when visorbus discovers a visornic device on its
1753  *	bus. It creates a new visornic ethernet adapter.
1754  *	Returns 0 or negative for error.
1755  */
visornic_probe(struct visor_device * dev)1756 static int visornic_probe(struct visor_device *dev)
1757 {
1758 	struct visornic_devdata *devdata = NULL;
1759 	struct net_device *netdev = NULL;
1760 	int err;
1761 	int channel_offset = 0;
1762 	u64 features;
1763 
1764 	netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1765 	if (!netdev) {
1766 		dev_err(&dev->device,
1767 			"%s alloc_etherdev failed\n", __func__);
1768 		return -ENOMEM;
1769 	}
1770 
1771 	netdev->netdev_ops = &visornic_dev_ops;
1772 	netdev->watchdog_timeo = (5 * HZ);
1773 	SET_NETDEV_DEV(netdev, &dev->device);
1774 
1775 	/* Get MAC adddress from channel and read it into the device. */
1776 	netdev->addr_len = ETH_ALEN;
1777 	channel_offset = offsetof(struct spar_io_channel_protocol,
1778 				  vnic.macaddr);
1779 	err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1780 				    ETH_ALEN);
1781 	if (err < 0) {
1782 		dev_err(&dev->device,
1783 			"%s failed to get mac addr from chan (%d)\n",
1784 			__func__, err);
1785 		goto cleanup_netdev;
1786 	}
1787 
1788 	devdata = devdata_initialize(netdev_priv(netdev), dev);
1789 	if (!devdata) {
1790 		dev_err(&dev->device,
1791 			"%s devdata_initialize failed\n", __func__);
1792 		err = -ENOMEM;
1793 		goto cleanup_netdev;
1794 	}
1795 	/* don't trust messages laying around in the channel */
1796 	drain_resp_queue(devdata->cmdrsp, devdata);
1797 
1798 	devdata->netdev = netdev;
1799 	dev_set_drvdata(&dev->device, devdata);
1800 	init_waitqueue_head(&devdata->rsp_queue);
1801 	spin_lock_init(&devdata->priv_lock);
1802 	devdata->enabled = 0; /* not yet */
1803 	atomic_set(&devdata->usage, 1);
1804 
1805 	/* Setup rcv bufs */
1806 	channel_offset = offsetof(struct spar_io_channel_protocol,
1807 				  vnic.num_rcv_bufs);
1808 	err = visorbus_read_channel(dev, channel_offset,
1809 				    &devdata->num_rcv_bufs, 4);
1810 	if (err) {
1811 		dev_err(&dev->device,
1812 			"%s failed to get #rcv bufs from chan (%d)\n",
1813 			__func__, err);
1814 		goto cleanup_netdev;
1815 	}
1816 
1817 	devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1818 				  sizeof(struct sk_buff *), GFP_KERNEL);
1819 	if (!devdata->rcvbuf) {
1820 		err = -ENOMEM;
1821 		goto cleanup_rcvbuf;
1822 	}
1823 
1824 	/* set the net_xmit outstanding threshold */
1825 	/* always leave two slots open but you should have 3 at a minimum */
1826 	/* note that max_outstanding_net_xmits must be > 0 */
1827 	devdata->max_outstanding_net_xmits =
1828 		max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1829 	devdata->upper_threshold_net_xmits =
1830 		max_t(unsigned long,
1831 		      2, (devdata->max_outstanding_net_xmits - 1));
1832 	devdata->lower_threshold_net_xmits =
1833 		max_t(unsigned long,
1834 		      1, (devdata->max_outstanding_net_xmits / 2));
1835 
1836 	skb_queue_head_init(&devdata->xmitbufhead);
1837 
1838 	/* create a cmdrsp we can use to post and unpost rcv buffers */
1839 	devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1840 	if (!devdata->cmdrsp_rcv) {
1841 		err = -ENOMEM;
1842 		goto cleanup_cmdrsp_rcv;
1843 	}
1844 	devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1845 	if (!devdata->xmit_cmdrsp) {
1846 		err = -ENOMEM;
1847 		goto cleanup_xmit_cmdrsp;
1848 	}
1849 	INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1850 	devdata->server_down = false;
1851 	devdata->server_change_state = false;
1852 
1853 	/*set the default mtu */
1854 	channel_offset = offsetof(struct spar_io_channel_protocol,
1855 				  vnic.mtu);
1856 	err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1857 	if (err) {
1858 		dev_err(&dev->device,
1859 			"%s failed to get mtu from chan (%d)\n",
1860 			__func__, err);
1861 		goto cleanup_xmit_cmdrsp;
1862 	}
1863 
1864 	/* TODO: Setup Interrupt information */
1865 	/* Let's start our threads to get responses */
1866 	netif_napi_add(netdev, &devdata->napi, visornic_poll, 64);
1867 
1868 	setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1869 		    (unsigned long)devdata);
1870 	/*
1871 	 * Note: This time has to start running before the while
1872 	 * loop below because the napi routine is responsible for
1873 	 * setting enab_dis_acked
1874 	 */
1875 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1876 
1877 	channel_offset = offsetof(struct spar_io_channel_protocol,
1878 				  channel_header.features);
1879 	err = visorbus_read_channel(dev, channel_offset, &features, 8);
1880 	if (err) {
1881 		dev_err(&dev->device,
1882 			"%s failed to get features from chan (%d)\n",
1883 			__func__, err);
1884 		goto cleanup_napi_add;
1885 	}
1886 
1887 	features |= ULTRA_IO_CHANNEL_IS_POLLING;
1888 	features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1889 	err = visorbus_write_channel(dev, channel_offset, &features, 8);
1890 	if (err) {
1891 		dev_err(&dev->device,
1892 			"%s failed to set features in chan (%d)\n",
1893 			__func__, err);
1894 		goto cleanup_napi_add;
1895 	}
1896 
1897 	err = register_netdev(netdev);
1898 	if (err) {
1899 		dev_err(&dev->device,
1900 			"%s register_netdev failed (%d)\n", __func__, err);
1901 		goto cleanup_napi_add;
1902 	}
1903 
1904 	/* create debgug/sysfs directories */
1905 	devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1906 						      visornic_debugfs_dir);
1907 	if (!devdata->eth_debugfs_dir) {
1908 		dev_err(&dev->device,
1909 			"%s debugfs_create_dir %s failed\n",
1910 			__func__, netdev->name);
1911 		err = -ENOMEM;
1912 		goto cleanup_register_netdev;
1913 	}
1914 
1915 	dev_info(&dev->device, "%s success netdev=%s\n",
1916 		 __func__, netdev->name);
1917 	return 0;
1918 
1919 cleanup_register_netdev:
1920 	unregister_netdev(netdev);
1921 
1922 cleanup_napi_add:
1923 	del_timer_sync(&devdata->irq_poll_timer);
1924 	netif_napi_del(&devdata->napi);
1925 
1926 cleanup_xmit_cmdrsp:
1927 	kfree(devdata->xmit_cmdrsp);
1928 
1929 cleanup_cmdrsp_rcv:
1930 	kfree(devdata->cmdrsp_rcv);
1931 
1932 cleanup_rcvbuf:
1933 	kfree(devdata->rcvbuf);
1934 
1935 cleanup_netdev:
1936 	free_netdev(netdev);
1937 	return err;
1938 }
1939 
1940 /**
1941  *	host_side_disappeared	- IO part is gone.
1942  *	@devdata: device object
1943  *
1944  *	IO partition servicing this device is gone, do cleanup
1945  *	Returns void.
1946  */
host_side_disappeared(struct visornic_devdata * devdata)1947 static void host_side_disappeared(struct visornic_devdata *devdata)
1948 {
1949 	unsigned long flags;
1950 
1951 	spin_lock_irqsave(&devdata->priv_lock, flags);
1952 	devdata->dev = NULL;   /* indicate device destroyed */
1953 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1954 }
1955 
1956 /**
1957  *	visornic_remove		- Called when visornic dev goes away
1958  *	@dev: visornic device that is being removed
1959  *
1960  *	Called when DEVICE_DESTROY gets called to remove device.
1961  *	Returns void
1962  */
visornic_remove(struct visor_device * dev)1963 static void visornic_remove(struct visor_device *dev)
1964 {
1965 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1966 	struct net_device *netdev;
1967 	unsigned long flags;
1968 
1969 	if (!devdata) {
1970 		dev_err(&dev->device, "%s no devdata\n", __func__);
1971 		return;
1972 	}
1973 	spin_lock_irqsave(&devdata->priv_lock, flags);
1974 	if (devdata->going_away) {
1975 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1976 		dev_err(&dev->device, "%s already being removed\n", __func__);
1977 		return;
1978 	}
1979 	devdata->going_away = true;
1980 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1981 	netdev = devdata->netdev;
1982 	if (!netdev) {
1983 		dev_err(&dev->device, "%s not net device\n", __func__);
1984 		return;
1985 	}
1986 
1987 	/* going_away prevents new items being added to the workqueues */
1988 	flush_workqueue(visornic_timeout_reset_workqueue);
1989 
1990 	debugfs_remove_recursive(devdata->eth_debugfs_dir);
1991 
1992 	unregister_netdev(netdev);  /* this will call visornic_close() */
1993 
1994 	del_timer_sync(&devdata->irq_poll_timer);
1995 	netif_napi_del(&devdata->napi);
1996 
1997 	dev_set_drvdata(&dev->device, NULL);
1998 	host_side_disappeared(devdata);
1999 	devdata_release(devdata);
2000 	free_netdev(netdev);
2001 }
2002 
2003 /**
2004  *	visornic_pause		- Called when IO Part disappears
2005  *	@dev: visornic device that is being serviced
2006  *	@complete_func: call when finished.
2007  *
2008  *	Called when the IO Partition has gone down. Need to free
2009  *	up resources and wait for IO partition to come back. Mark
2010  *	link as down and don't attempt any DMA. When we have freed
2011  *	memory call the complete_func so that Command knows we are
2012  *	done. If we don't call complete_func, IO part will never
2013  *	come back.
2014  *	Returns 0 for success.
2015  */
visornic_pause(struct visor_device * dev,visorbus_state_complete_func complete_func)2016 static int visornic_pause(struct visor_device *dev,
2017 			  visorbus_state_complete_func complete_func)
2018 {
2019 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2020 
2021 	visornic_serverdown(devdata, complete_func);
2022 	return 0;
2023 }
2024 
2025 /**
2026  *	visornic_resume		- Called when IO part has recovered
2027  *	@dev: visornic device that is being serviced
2028  *	@compelte_func: call when finished
2029  *
2030  *	Called when the IO partition has recovered. Reestablish
2031  *	connection to the IO part and set the link up. Okay to do
2032  *	DMA again.
2033  *	Returns 0 for success.
2034  */
visornic_resume(struct visor_device * dev,visorbus_state_complete_func complete_func)2035 static int visornic_resume(struct visor_device *dev,
2036 			   visorbus_state_complete_func complete_func)
2037 {
2038 	struct visornic_devdata *devdata;
2039 	struct net_device *netdev;
2040 	unsigned long flags;
2041 
2042 	devdata = dev_get_drvdata(&dev->device);
2043 	if (!devdata) {
2044 		dev_err(&dev->device, "%s no devdata\n", __func__);
2045 		return -EINVAL;
2046 	}
2047 
2048 	netdev = devdata->netdev;
2049 
2050 	spin_lock_irqsave(&devdata->priv_lock, flags);
2051 	if (devdata->server_change_state) {
2052 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2053 		dev_err(&dev->device, "%s server already changing state\n",
2054 			__func__);
2055 		return -EINVAL;
2056 	}
2057 	if (!devdata->server_down) {
2058 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2059 		dev_err(&dev->device, "%s server not down\n", __func__);
2060 		complete_func(dev, 0);
2061 		return 0;
2062 	}
2063 	devdata->server_change_state = true;
2064 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
2065 
2066 	/* Must transition channel to ATTACHED state BEFORE
2067 	 * we can start using the device again.
2068 	 * TODO: State transitions
2069 	 */
2070 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2071 
2072 	init_rcv_bufs(netdev, devdata);
2073 
2074 	rtnl_lock();
2075 	dev_open(netdev);
2076 	rtnl_unlock();
2077 
2078 	complete_func(dev, 0);
2079 	return 0;
2080 }
2081 
2082 /**
2083  *	visornic_init	- Init function
2084  *
2085  *	Init function for the visornic driver. Do initial driver setup
2086  *	and wait for devices.
2087  *	Returns 0 for success, negative for error.
2088  */
visornic_init(void)2089 static int visornic_init(void)
2090 {
2091 	struct dentry *ret;
2092 	int err = -ENOMEM;
2093 
2094 	visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2095 	if (!visornic_debugfs_dir)
2096 		return err;
2097 
2098 	ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2099 				  &debugfs_info_fops);
2100 	if (!ret)
2101 		goto cleanup_debugfs;
2102 	ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2103 				  NULL, &debugfs_enable_ints_fops);
2104 	if (!ret)
2105 		goto cleanup_debugfs;
2106 
2107 	/* create workqueue for tx timeout reset */
2108 	visornic_timeout_reset_workqueue =
2109 		create_singlethread_workqueue("visornic_timeout_reset");
2110 	if (!visornic_timeout_reset_workqueue)
2111 		goto cleanup_workqueue;
2112 
2113 	err = visorbus_register_visor_driver(&visornic_driver);
2114 	if (!err)
2115 		return 0;
2116 
2117 cleanup_workqueue:
2118 	if (visornic_timeout_reset_workqueue) {
2119 		flush_workqueue(visornic_timeout_reset_workqueue);
2120 		destroy_workqueue(visornic_timeout_reset_workqueue);
2121 	}
2122 cleanup_debugfs:
2123 	debugfs_remove_recursive(visornic_debugfs_dir);
2124 
2125 	return err;
2126 }
2127 
2128 /**
2129  *	visornic_cleanup	- driver exit routine
2130  *
2131  *	Unregister driver from the bus and free up memory.
2132  */
visornic_cleanup(void)2133 static void visornic_cleanup(void)
2134 {
2135 	visorbus_unregister_visor_driver(&visornic_driver);
2136 
2137 	if (visornic_timeout_reset_workqueue) {
2138 		flush_workqueue(visornic_timeout_reset_workqueue);
2139 		destroy_workqueue(visornic_timeout_reset_workqueue);
2140 	}
2141 	debugfs_remove_recursive(visornic_debugfs_dir);
2142 }
2143 
2144 module_init(visornic_init);
2145 module_exit(visornic_cleanup);
2146 
2147 MODULE_AUTHOR("Unisys");
2148 MODULE_LICENSE("GPL");
2149 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2150 MODULE_VERSION("1.0.0.0");
2151