root/drivers/staging/unisys/visornic/visornic_main.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_physinfo_entries
  2. visor_copy_fragsinfo_from_skb
  3. enable_ints_write
  4. visornic_serverdown_complete
  5. visornic_serverdown
  6. alloc_rcv_buf
  7. post_skb
  8. send_enbdis
  9. visornic_disable_with_timeout
  10. init_rcv_bufs
  11. visornic_enable_with_timeout
  12. visornic_timeout_reset
  13. visornic_open
  14. visornic_close
  15. devdata_xmits_outstanding
  16. vnic_hit_high_watermark
  17. vnic_hit_low_watermark
  18. visornic_xmit
  19. visornic_get_stats
  20. visornic_change_mtu
  21. visornic_set_multi
  22. visornic_xmit_timeout
  23. repost_return
  24. visornic_rx
  25. devdata_initialize
  26. devdata_release
  27. info_debugfs_read
  28. send_rcv_posts_if_needed
  29. drain_resp_queue
  30. service_resp_queue
  31. visornic_poll
  32. poll_for_irq
  33. visornic_probe
  34. host_side_disappeared
  35. visornic_remove
  36. visornic_pause
  37. visornic_resume
  38. visornic_init
  39. visornic_cleanup

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   3  * All rights reserved.
   4  */
   5 
   6 /* This driver lives in a spar partition, and registers to ethernet io
   7  * channels from the visorbus driver. It creates netdev devices and
   8  * forwards transmit to the IO channel and accepts rcvs from the IO
   9  * Partition via the IO channel.
  10  */
  11 
  12 #include <linux/debugfs.h>
  13 #include <linux/etherdevice.h>
  14 #include <linux/module.h>
  15 #include <linux/netdevice.h>
  16 #include <linux/kthread.h>
  17 #include <linux/skbuff.h>
  18 #include <linux/rtnetlink.h>
  19 #include <linux/visorbus.h>
  20 
  21 #include "iochannel.h"
  22 
  23 #define VISORNIC_INFINITE_RSP_WAIT 0
  24 
  25 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  26  *         = 163840 bytes
  27  */
  28 #define MAX_BUF 163840
  29 #define NAPI_WEIGHT 64
  30 
  31 /* GUIDS for director channel type supported by this driver.  */
  32 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
  33 #define VISOR_VNIC_CHANNEL_GUID \
  34         GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
  35                 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
  36 #define VISOR_VNIC_CHANNEL_GUID_STR \
  37         "8cd5994d-c58e-11da-95a9-00e08161165f"
  38 
  39 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  40         /* Note that the only channel type we expect to be reported by the
  41          * bus driver is the VISOR_VNIC channel.
  42          */
  43         { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
  44           VISOR_VNIC_CHANNEL_VERSIONID },
  45         {}
  46 };
  47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  48 /* FIXME XXX: This next line of code must be fixed and removed before
  49  * acceptance into the 'normal' part of the kernel.  It is only here as a place
  50  * holder to get module autoloading functionality working for visorbus.  Code
  51  * must be added to scripts/mode/file2alias.c, etc., to get this working
  52  * properly.
  53  */
  54 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
  55 
  56 struct chanstat {
  57         unsigned long got_rcv;
  58         unsigned long got_enbdisack;
  59         unsigned long got_xmit_done;
  60         unsigned long xmit_fail;
  61         unsigned long sent_enbdis;
  62         unsigned long sent_promisc;
  63         unsigned long sent_post;
  64         unsigned long sent_post_failed;
  65         unsigned long sent_xmit;
  66         unsigned long reject_count;
  67         unsigned long extra_rcvbufs_sent;
  68 };
  69 
  70 /* struct visornic_devdata
  71  * @enabled:                        0 disabled 1 enabled to receive.
  72  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
  73  * @struct *dev:
  74  * @struct *netdev:
  75  * @struct net_stats:
  76  * @interrupt_rcvd:
  77  * @rsp_queue:
  78  * @struct **rcvbuf:
  79  * @incarnation_id:                 incarnation_id lets IOPART know about
  80  *                                  re-birth.
  81  * @old_flags:                      flags as they were prior to
  82  *                                  set_multicast_list.
  83  * @usage:                          count of users.
  84  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
  85  * @num_rcv_bufs_could_not_alloc:
  86  * @num_rcvbuf_in_iovm:
  87  * @alloc_failed_in_if_needed_cnt:
  88  * @alloc_failed_in_repost_rtn_cnt:
  89  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
  90  *                                  - should never hit this.
  91  * @upper_threshold_net_xmits:      high water mark for calling
  92  *                                  netif_stop_queue().
  93  * @lower_threshold_net_xmits:      high water mark for calling
  94  *                                  netif_wake_queue().
  95  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
  96  *                                  sent to the IOPART end.
  97  * @server_down_complete_func:
  98  * @struct timeout_reset:
  99  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
 100  *                                  buffers.
 101  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
 102  *                                  active xmit at a time.
 103  * @server_down:                    IOPART is down.
 104  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
 105  * @going_away:                     device is being torn down.
 106  * @struct *eth_debugfs_dir:
 107  * @interrupts_rcvd:
 108  * @interrupts_notme:
 109  * @interrupts_disabled:
 110  * @busy_cnt:
 111  * @priv_lock:                      spinlock to access devdata structures.
 112  * @flow_control_upper_hits:
 113  * @flow_control_lower_hits:
 114  * @n_rcv0:                         # rcvs of 0 buffers.
 115  * @n_rcv1:                         # rcvs of 1 buffers.
 116  * @n_rcv2:                         # rcvs of 2 buffers.
 117  * @n_rcvx:                         # rcvs of >2 buffers.
 118  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
 119  * @repost_found_skb_cnt:           # of found the skb.
 120  * @n_repost_deficit:               # of lost rcv buffers.
 121  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
 122  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
 123  * @queuefullmsg_logged:
 124  * @struct chstat:
 125  * @struct irq_poll_timer:
 126  * @struct napi:
 127  * @struct cmdrsp:
 128  */
 129 struct visornic_devdata {
 130         unsigned short enabled;
 131         unsigned short enab_dis_acked;
 132 
 133         struct visor_device *dev;
 134         struct net_device *netdev;
 135         struct net_device_stats net_stats;
 136         atomic_t interrupt_rcvd;
 137         wait_queue_head_t rsp_queue;
 138         struct sk_buff **rcvbuf;
 139         u64 incarnation_id;
 140         unsigned short old_flags;
 141         atomic_t usage;
 142 
 143         int num_rcv_bufs;
 144         int num_rcv_bufs_could_not_alloc;
 145         atomic_t num_rcvbuf_in_iovm;
 146         unsigned long alloc_failed_in_if_needed_cnt;
 147         unsigned long alloc_failed_in_repost_rtn_cnt;
 148 
 149         unsigned long max_outstanding_net_xmits;
 150         unsigned long upper_threshold_net_xmits;
 151         unsigned long lower_threshold_net_xmits;
 152         struct sk_buff_head xmitbufhead;
 153 
 154         visorbus_state_complete_func server_down_complete_func;
 155         struct work_struct timeout_reset;
 156         struct uiscmdrsp *cmdrsp_rcv;
 157         struct uiscmdrsp *xmit_cmdrsp;
 158         bool server_down;
 159         bool server_change_state;
 160         bool going_away;
 161         struct dentry *eth_debugfs_dir;
 162         u64 interrupts_rcvd;
 163         u64 interrupts_notme;
 164         u64 interrupts_disabled;
 165         u64 busy_cnt;
 166         /* spinlock to access devdata structures. */
 167         spinlock_t priv_lock;
 168 
 169         /* flow control counter */
 170         u64 flow_control_upper_hits;
 171         u64 flow_control_lower_hits;
 172 
 173         /* debug counters */
 174         unsigned long n_rcv0;
 175         unsigned long n_rcv1;
 176         unsigned long n_rcv2;
 177         unsigned long n_rcvx;
 178         unsigned long found_repost_rcvbuf_cnt;
 179         unsigned long repost_found_skb_cnt;
 180         unsigned long n_repost_deficit;
 181         unsigned long bad_rcv_buf;
 182         unsigned long n_rcv_packets_not_accepted;
 183 
 184         int queuefullmsg_logged;
 185         struct chanstat chstat;
 186         struct timer_list irq_poll_timer;
 187         struct napi_struct napi;
 188         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 189 };
 190 
 191 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
 192 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
 193                                 u16 index, u16 max_pi_arr_entries,
 194                                 struct phys_info pi_arr[])
 195 {
 196         u16 i, len, firstlen;
 197 
 198         firstlen = PI_PAGE_SIZE - inp_off;
 199         if (inp_len <= firstlen) {
 200                 /* The input entry spans only one page - add as is. */
 201                 if (index >= max_pi_arr_entries)
 202                         return 0;
 203                 pi_arr[index].pi_pfn = inp_pfn;
 204                 pi_arr[index].pi_off = (u16)inp_off;
 205                 pi_arr[index].pi_len = (u16)inp_len;
 206                 return index + 1;
 207         }
 208 
 209         /* This entry spans multiple pages. */
 210         for (len = inp_len, i = 0; len;
 211                 len -= pi_arr[index + i].pi_len, i++) {
 212                 if (index + i >= max_pi_arr_entries)
 213                         return 0;
 214                 pi_arr[index + i].pi_pfn = inp_pfn + i;
 215                 if (i == 0) {
 216                         pi_arr[index].pi_off = inp_off;
 217                         pi_arr[index].pi_len = firstlen;
 218                 } else {
 219                         pi_arr[index + i].pi_off = 0;
 220                         pi_arr[index + i].pi_len = min_t(u16, len,
 221                                                          PI_PAGE_SIZE);
 222                 }
 223         }
 224         return index + i;
 225 }
 226 
 227 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
 228  *                                 array that the IOPART understands
 229  * @skb:          Skbuff that we are pulling the frags from.
 230  * @firstfraglen: Length of first fragment in skb.
 231  * @frags_max:    Max len of frags array.
 232  * @frags:        Frags array filled in on output.
 233  *
 234  * Return: Positive integer indicating number of entries filled in frags on
 235  *         success, negative integer on error.
 236  */
 237 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
 238                                          unsigned int firstfraglen,
 239                                          unsigned int frags_max,
 240                                          struct phys_info frags[])
 241 {
 242         unsigned int count = 0, frag, size, offset = 0, numfrags;
 243         unsigned int total_count;
 244 
 245         numfrags = skb_shinfo(skb)->nr_frags;
 246 
 247         /* Compute the number of fragments this skb has, and if its more than
 248          * frag array can hold, linearize the skb
 249          */
 250         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 251         if (firstfraglen % PI_PAGE_SIZE)
 252                 total_count++;
 253 
 254         if (total_count > frags_max) {
 255                 if (skb_linearize(skb))
 256                         return -EINVAL;
 257                 numfrags = skb_shinfo(skb)->nr_frags;
 258                 firstfraglen = 0;
 259         }
 260 
 261         while (firstfraglen) {
 262                 if (count == frags_max)
 263                         return -EINVAL;
 264 
 265                 frags[count].pi_pfn =
 266                         page_to_pfn(virt_to_page(skb->data + offset));
 267                 frags[count].pi_off =
 268                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 269                 size = min_t(unsigned int, firstfraglen,
 270                              PI_PAGE_SIZE - frags[count].pi_off);
 271 
 272                 /* can take smallest of firstfraglen (what's left) OR
 273                  * bytes left in the page
 274                  */
 275                 frags[count].pi_len = size;
 276                 firstfraglen -= size;
 277                 offset += size;
 278                 count++;
 279         }
 280         if (numfrags) {
 281                 if ((count + numfrags) > frags_max)
 282                         return -EINVAL;
 283 
 284                 for (frag = 0; frag < numfrags; frag++) {
 285                         count = add_physinfo_entries(page_to_pfn(
 286                                   skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 287                                   skb_frag_off(&skb_shinfo(skb)->frags[frag]),
 288                                   skb_frag_size(&skb_shinfo(skb)->frags[frag]),
 289                                   count, frags_max, frags);
 290                         /* add_physinfo_entries only returns
 291                          * zero if the frags array is out of room
 292                          * That should never happen because we
 293                          * fail above, if count+numfrags > frags_max.
 294                          */
 295                         if (!count)
 296                                 return -EINVAL;
 297                 }
 298         }
 299         if (skb_shinfo(skb)->frag_list) {
 300                 struct sk_buff *skbinlist;
 301                 int c;
 302 
 303                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 304                      skbinlist = skbinlist->next) {
 305                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 306                                                           skbinlist->len -
 307                                                           skbinlist->data_len,
 308                                                           frags_max - count,
 309                                                           &frags[count]);
 310                         if (c < 0)
 311                                 return c;
 312                         count += c;
 313                 }
 314         }
 315         return count;
 316 }
 317 
 318 static ssize_t enable_ints_write(struct file *file,
 319                                  const char __user *buffer,
 320                                  size_t count, loff_t *ppos)
 321 {
 322         /* Don't want to break ABI here by having a debugfs
 323          * file that no longer exists or is writable, so
 324          * lets just make this a vestigual function
 325          */
 326         return count;
 327 }
 328 
 329 static const struct file_operations debugfs_enable_ints_fops = {
 330         .write = enable_ints_write,
 331 };
 332 
 333 /* visornic_serverdown_complete - pause device following IOPART going down
 334  * @devdata: Device managed by IOPART.
 335  *
 336  * The IO partition has gone down, and we need to do some cleanup for when it
 337  * comes back. Treat the IO partition as the link being down.
 338  */
 339 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
 340 {
 341         struct net_device *netdev = devdata->netdev;
 342 
 343         /* Stop polling for interrupts */
 344         del_timer_sync(&devdata->irq_poll_timer);
 345 
 346         rtnl_lock();
 347         dev_close(netdev);
 348         rtnl_unlock();
 349 
 350         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 351         devdata->chstat.sent_xmit = 0;
 352         devdata->chstat.got_xmit_done = 0;
 353 
 354         if (devdata->server_down_complete_func)
 355                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 356 
 357         devdata->server_down = true;
 358         devdata->server_change_state = false;
 359         devdata->server_down_complete_func = NULL;
 360 }
 361 
 362 /* visornic_serverdown - Command has notified us that IOPART is down
 363  * @devdata:       Device managed by IOPART.
 364  * @complete_func: Function to call when finished.
 365  *
 366  * Schedule the work needed to handle the server down request. Make sure we
 367  * haven't already handled the server change state event.
 368  *
 369  * Return: 0 if we scheduled the work, negative integer on error.
 370  */
 371 static int visornic_serverdown(struct visornic_devdata *devdata,
 372                                visorbus_state_complete_func complete_func)
 373 {
 374         unsigned long flags;
 375         int err;
 376 
 377         spin_lock_irqsave(&devdata->priv_lock, flags);
 378         if (devdata->server_change_state) {
 379                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 380                         __func__);
 381                 err = -EINVAL;
 382                 goto err_unlock;
 383         }
 384         if (devdata->server_down) {
 385                 dev_dbg(&devdata->dev->device, "%s already down\n",
 386                         __func__);
 387                 err = -EINVAL;
 388                 goto err_unlock;
 389         }
 390         if (devdata->going_away) {
 391                 dev_dbg(&devdata->dev->device,
 392                         "%s aborting because device removal pending\n",
 393                         __func__);
 394                 err = -ENODEV;
 395                 goto err_unlock;
 396         }
 397         devdata->server_change_state = true;
 398         devdata->server_down_complete_func = complete_func;
 399         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 400 
 401         visornic_serverdown_complete(devdata);
 402         return 0;
 403 
 404 err_unlock:
 405         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 406         return err;
 407 }
 408 
 409 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
 410  * @netdev: Network adapter the rcv bufs are attached too.
 411  *
 412  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 413  * so that it can write rcv data into our memory space.
 414  *
 415  * Return: Pointer to sk_buff.
 416  */
 417 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
 418 {
 419         struct sk_buff *skb;
 420 
 421         /* NOTE: the first fragment in each rcv buffer is pointed to by
 422          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 423          * in length, so the first frag is large enough to hold 1514.
 424          */
 425         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 426         if (!skb)
 427                 return NULL;
 428         skb->dev = netdev;
 429         /* current value of mtu doesn't come into play here; large
 430          * packets will just end up using multiple rcv buffers all of
 431          * same size.
 432          */
 433         skb->len = RCVPOST_BUF_SIZE;
 434         /* alloc_skb already zeroes it out for clarification. */
 435         skb->data_len = 0;
 436         return skb;
 437 }
 438 
 439 /* post_skb - post a skb to the IO Partition
 440  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
 441  * @devdata: visornic_devdata to post the skb to.
 442  * @skb:     Skb to give to the IO partition.
 443  *
 444  * Return: 0 on success, negative integer on error.
 445  */
 446 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
 447                     struct sk_buff *skb)
 448 {
 449         int err;
 450 
 451         cmdrsp->net.buf = skb;
 452         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 453         cmdrsp->net.rcvpost.frag.pi_off =
 454                 (unsigned long)skb->data & PI_PAGE_MASK;
 455         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 456         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 457 
 458         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
 459                 return -EINVAL;
 460 
 461         cmdrsp->net.type = NET_RCV_POST;
 462         cmdrsp->cmdtype = CMD_NET_TYPE;
 463         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 464                                         IOCHAN_TO_IOPART,
 465                                         cmdrsp);
 466         if (err) {
 467                 devdata->chstat.sent_post_failed++;
 468                 return err;
 469         }
 470 
 471         atomic_inc(&devdata->num_rcvbuf_in_iovm);
 472         devdata->chstat.sent_post++;
 473         return 0;
 474 }
 475 
 476 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
 477  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
 478  * @state:   Enable = 1/disable = 0.
 479  * @devdata: Visornic device we are enabling/disabling.
 480  *
 481  * Send the enable/disable message to the IO Partition.
 482  *
 483  * Return: 0 on success, negative integer on error.
 484  */
 485 static int send_enbdis(struct net_device *netdev, int state,
 486                        struct visornic_devdata *devdata)
 487 {
 488         int err;
 489 
 490         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 491         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 492         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 493         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 494         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 495                                         IOCHAN_TO_IOPART,
 496                                         devdata->cmdrsp_rcv);
 497         if (err)
 498                 return err;
 499         devdata->chstat.sent_enbdis++;
 500         return 0;
 501 }
 502 
 503 /* visornic_disable_with_timeout - disable network adapter
 504  * @netdev:  netdevice to disable.
 505  * @timeout: Timeout to wait for disable.
 506  *
 507  * Disable the network adapter and inform the IO Partition that we are disabled.
 508  * Reclaim memory from rcv bufs.
 509  *
 510  * Return: 0 on success, negative integer on failure of IO Partition responding.
 511  */
 512 static int visornic_disable_with_timeout(struct net_device *netdev,
 513                                          const int timeout)
 514 {
 515         struct visornic_devdata *devdata = netdev_priv(netdev);
 516         int i;
 517         unsigned long flags;
 518         int wait = 0;
 519         int err;
 520 
 521         /* send a msg telling the other end we are stopping incoming pkts */
 522         spin_lock_irqsave(&devdata->priv_lock, flags);
 523         devdata->enabled = 0;
 524         /* must wait for ack */
 525         devdata->enab_dis_acked = 0;
 526         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 527 
 528         /* send disable and wait for ack -- don't hold lock when sending
 529          * disable because if the queue is full, insert might sleep.
 530          * If an error occurs, don't wait for the timeout.
 531          */
 532         err = send_enbdis(netdev, 0, devdata);
 533         if (err)
 534                 return err;
 535 
 536         /* wait for ack to arrive before we try to free rcv buffers
 537          * NOTE: the other end automatically unposts the rcv buffers when
 538          * when it gets a disable.
 539          */
 540         spin_lock_irqsave(&devdata->priv_lock, flags);
 541         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 542                (wait < timeout)) {
 543                 if (devdata->enab_dis_acked)
 544                         break;
 545                 if (devdata->server_down || devdata->server_change_state) {
 546                         dev_dbg(&netdev->dev, "%s server went away\n",
 547                                 __func__);
 548                         break;
 549                 }
 550                 set_current_state(TASK_INTERRUPTIBLE);
 551                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 552                 wait += schedule_timeout(msecs_to_jiffies(10));
 553                 spin_lock_irqsave(&devdata->priv_lock, flags);
 554         }
 555 
 556         /* Wait for usage to go to 1 (no other users) before freeing
 557          * rcv buffers
 558          */
 559         if (atomic_read(&devdata->usage) > 1) {
 560                 while (1) {
 561                         set_current_state(TASK_INTERRUPTIBLE);
 562                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 563                         schedule_timeout(msecs_to_jiffies(10));
 564                         spin_lock_irqsave(&devdata->priv_lock, flags);
 565                         if (atomic_read(&devdata->usage))
 566                                 break;
 567                 }
 568         }
 569         /* we've set enabled to 0, so we can give up the lock. */
 570         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 571 
 572         /* stop the transmit queue so nothing more can be transmitted */
 573         netif_stop_queue(netdev);
 574 
 575         napi_disable(&devdata->napi);
 576 
 577         skb_queue_purge(&devdata->xmitbufhead);
 578 
 579         /* Free rcv buffers - other end has automatically unposed them on
 580          * disable
 581          */
 582         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 583                 if (devdata->rcvbuf[i]) {
 584                         kfree_skb(devdata->rcvbuf[i]);
 585                         devdata->rcvbuf[i] = NULL;
 586                 }
 587         }
 588 
 589         return 0;
 590 }
 591 
 592 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
 593  * @netdev:  struct netdevice.
 594  * @devdata: visornic_devdata.
 595  *
 596  * Allocate rcv buffers and post them to the IO Partition.
 597  *
 598  * Return: 0 on success, negative integer on failure.
 599  */
 600 static int init_rcv_bufs(struct net_device *netdev,
 601                          struct visornic_devdata *devdata)
 602 {
 603         int i, j, count, err;
 604 
 605         /* allocate fixed number of receive buffers to post to uisnic
 606          * post receive buffers after we've allocated a required amount
 607          */
 608         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 609                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 610                 /* if we failed to allocate one let us stop */
 611                 if (!devdata->rcvbuf[i])
 612                         break;
 613         }
 614         /* couldn't even allocate one -- bail out */
 615         if (i == 0)
 616                 return -ENOMEM;
 617         count = i;
 618 
 619         /* Ensure we can alloc 2/3rd of the requested number of buffers.
 620          * 2/3 is an arbitrary choice; used also in ndis init.c
 621          */
 622         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 623                 /* free receive buffers we did alloc and then bail out */
 624                 for (i = 0; i < count; i++) {
 625                         kfree_skb(devdata->rcvbuf[i]);
 626                         devdata->rcvbuf[i] = NULL;
 627                 }
 628                 return -ENOMEM;
 629         }
 630 
 631         /* post receive buffers to receive incoming input - without holding
 632          * lock - we've not enabled nor started the queue so there shouldn't
 633          * be any rcv or xmit activity
 634          */
 635         for (i = 0; i < count; i++) {
 636                 err = post_skb(devdata->cmdrsp_rcv, devdata,
 637                                devdata->rcvbuf[i]);
 638                 if (!err)
 639                         continue;
 640 
 641                 /* Error handling -
 642                  * If we posted at least one skb, we should return success,
 643                  * but need to free the resources that we have not successfully
 644                  * posted.
 645                  */
 646                 for (j = i; j < count; j++) {
 647                         kfree_skb(devdata->rcvbuf[j]);
 648                         devdata->rcvbuf[j] = NULL;
 649                 }
 650                 if (i == 0)
 651                         return err;
 652                 break;
 653         }
 654 
 655         return 0;
 656 }
 657 
 658 /* visornic_enable_with_timeout - send enable to IO Partition
 659  * @netdev:  struct net_device.
 660  * @timeout: Time to wait for the ACK from the enable.
 661  *
 662  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
 663  * defined in msecs (timeout of 0 specifies infinite wait).
 664  *
 665  * Return: 0 on success, negative integer on failure.
 666  */
 667 static int visornic_enable_with_timeout(struct net_device *netdev,
 668                                         const int timeout)
 669 {
 670         int err = 0;
 671         struct visornic_devdata *devdata = netdev_priv(netdev);
 672         unsigned long flags;
 673         int wait = 0;
 674 
 675         napi_enable(&devdata->napi);
 676 
 677         /* NOTE: the other end automatically unposts the rcv buffers when it
 678          * gets a disable.
 679          */
 680         err = init_rcv_bufs(netdev, devdata);
 681         if (err < 0) {
 682                 dev_err(&netdev->dev,
 683                         "%s failed to init rcv bufs\n", __func__);
 684                 return err;
 685         }
 686 
 687         spin_lock_irqsave(&devdata->priv_lock, flags);
 688         devdata->enabled = 1;
 689         devdata->enab_dis_acked = 0;
 690 
 691         /* now we're ready, let's send an ENB to uisnic but until we get
 692          * an ACK back from uisnic, we'll drop the packets
 693          */
 694         devdata->n_rcv_packets_not_accepted = 0;
 695         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 696 
 697         /* send enable and wait for ack -- don't hold lock when sending enable
 698          * because if the queue is full, insert might sleep. If an error
 699          * occurs error out.
 700          */
 701         err = send_enbdis(netdev, 1, devdata);
 702         if (err)
 703                 return err;
 704 
 705         spin_lock_irqsave(&devdata->priv_lock, flags);
 706         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 707                (wait < timeout)) {
 708                 if (devdata->enab_dis_acked)
 709                         break;
 710                 if (devdata->server_down || devdata->server_change_state) {
 711                         dev_dbg(&netdev->dev, "%s server went away\n",
 712                                 __func__);
 713                         break;
 714                 }
 715                 set_current_state(TASK_INTERRUPTIBLE);
 716                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 717                 wait += schedule_timeout(msecs_to_jiffies(10));
 718                 spin_lock_irqsave(&devdata->priv_lock, flags);
 719         }
 720 
 721         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 722 
 723         if (!devdata->enab_dis_acked) {
 724                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 725                 return -EIO;
 726         }
 727 
 728         netif_start_queue(netdev);
 729         return 0;
 730 }
 731 
 732 /* visornic_timeout_reset - handle xmit timeout resets
 733  * @work: Work item that scheduled the work.
 734  *
 735  * Transmit timeouts are typically handled by resetting the device for our
 736  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
 737  * respond, we will trigger a serverdown.
 738  */
 739 static void visornic_timeout_reset(struct work_struct *work)
 740 {
 741         struct visornic_devdata *devdata;
 742         struct net_device *netdev;
 743         int response = 0;
 744 
 745         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 746         netdev = devdata->netdev;
 747 
 748         rtnl_lock();
 749         if (!netif_running(netdev)) {
 750                 rtnl_unlock();
 751                 return;
 752         }
 753 
 754         response = visornic_disable_with_timeout(netdev,
 755                                                  VISORNIC_INFINITE_RSP_WAIT);
 756         if (response)
 757                 goto call_serverdown;
 758 
 759         response = visornic_enable_with_timeout(netdev,
 760                                                 VISORNIC_INFINITE_RSP_WAIT);
 761         if (response)
 762                 goto call_serverdown;
 763 
 764         rtnl_unlock();
 765 
 766         return;
 767 
 768 call_serverdown:
 769         visornic_serverdown(devdata, NULL);
 770         rtnl_unlock();
 771 }
 772 
 773 /* visornic_open - enable the visornic device and mark the queue started
 774  * @netdev: netdevice to start.
 775  *
 776  * Enable the device and start the transmit queue.
 777  *
 778  * Return: 0 on success.
 779  */
 780 static int visornic_open(struct net_device *netdev)
 781 {
 782         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 783         return 0;
 784 }
 785 
 786 /* visornic_close - disables the visornic device and stops the queues
 787  * @netdev: netdevice to stop.
 788  *
 789  * Disable the device and stop the transmit queue.
 790  *
 791  * Return 0 on success.
 792  */
 793 static int visornic_close(struct net_device *netdev)
 794 {
 795         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 796         return 0;
 797 }
 798 
 799 /* devdata_xmits_outstanding - compute outstanding xmits
 800  * @devdata: visornic_devdata for device
 801  *
 802  * Return: Long integer representing the number of outstanding xmits.
 803  */
 804 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 805 {
 806         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 807                 return devdata->chstat.sent_xmit -
 808                         devdata->chstat.got_xmit_done;
 809         return (ULONG_MAX - devdata->chstat.got_xmit_done
 810                 + devdata->chstat.sent_xmit + 1);
 811 }
 812 
 813 /* vnic_hit_high_watermark
 814  * @devdata:        Indicates visornic device we are checking.
 815  * @high_watermark: Max num of unacked xmits we will tolerate before we will
 816  *                  start throttling.
 817  *
 818  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
 819  *         high_watermark. False otherwise.
 820  */
 821 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 822                                     ulong high_watermark)
 823 {
 824         return (devdata_xmits_outstanding(devdata) >= high_watermark);
 825 }
 826 
 827 /* vnic_hit_low_watermark
 828  * @devdata:       Indicates visornic device we are checking.
 829  * @low_watermark: We will wait until the num of unacked xmits drops to this
 830  *                 value or lower before we start transmitting again.
 831  *
 832  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
 833  *         low_watermark.
 834  */
 835 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 836                                    ulong low_watermark)
 837 {
 838         return (devdata_xmits_outstanding(devdata) <= low_watermark);
 839 }
 840 
 841 /* visornic_xmit - send a packet to the IO Partition
 842  * @skb:    Packet to be sent.
 843  * @netdev: Net device the packet is being sent from.
 844  *
 845  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
 846  * the XMIT command to the IO Partition for processing. This function is
 847  * protected from concurrent calls by a spinlock xmit_lock in the net_device
 848  * struct. As soon as the function returns, it can be called again.
 849  *
 850  * Return: NETDEV_TX_OK.
 851  */
 852 static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 853 {
 854         struct visornic_devdata *devdata;
 855         int len, firstfraglen, padlen;
 856         struct uiscmdrsp *cmdrsp = NULL;
 857         unsigned long flags;
 858         int err;
 859 
 860         devdata = netdev_priv(netdev);
 861         spin_lock_irqsave(&devdata->priv_lock, flags);
 862 
 863         if (netif_queue_stopped(netdev) || devdata->server_down ||
 864             devdata->server_change_state) {
 865                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 866                 devdata->busy_cnt++;
 867                 dev_dbg(&netdev->dev,
 868                         "%s busy - queue stopped\n", __func__);
 869                 kfree_skb(skb);
 870                 return NETDEV_TX_OK;
 871         }
 872 
 873         /* sk_buff struct is used to host network data throughout all the
 874          * linux network subsystems
 875          */
 876         len = skb->len;
 877 
 878         /* skb->len is the FULL length of data (including fragmentary portion)
 879          * skb->data_len is the length of the fragment portion in frags
 880          * skb->len - skb->data_len is size of the 1st fragment in skb->data
 881          * calculate the length of the first fragment that skb->data is
 882          * pointing to
 883          */
 884         firstfraglen = skb->len - skb->data_len;
 885         if (firstfraglen < ETH_HLEN) {
 886                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 887                 devdata->busy_cnt++;
 888                 dev_err(&netdev->dev,
 889                         "%s busy - first frag too small (%d)\n",
 890                         __func__, firstfraglen);
 891                 kfree_skb(skb);
 892                 return NETDEV_TX_OK;
 893         }
 894 
 895         if (len < ETH_MIN_PACKET_SIZE &&
 896             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 897                 /* pad the packet out to minimum size */
 898                 padlen = ETH_MIN_PACKET_SIZE - len;
 899                 skb_put_zero(skb, padlen);
 900                 len += padlen;
 901                 firstfraglen += padlen;
 902         }
 903 
 904         cmdrsp = devdata->xmit_cmdrsp;
 905         /* clear cmdrsp */
 906         memset(cmdrsp, 0, SIZEOF_CMDRSP);
 907         cmdrsp->net.type = NET_XMIT;
 908         cmdrsp->cmdtype = CMD_NET_TYPE;
 909 
 910         /* save the pointer to skb -- we'll need it for completion */
 911         cmdrsp->net.buf = skb;
 912 
 913         if (vnic_hit_high_watermark(devdata,
 914                                     devdata->max_outstanding_net_xmits)) {
 915                 /* extra NET_XMITs queued over to IOVM - need to wait */
 916                 devdata->chstat.reject_count++;
 917                 if (!devdata->queuefullmsg_logged &&
 918                     ((devdata->chstat.reject_count & 0x3ff) == 1))
 919                         devdata->queuefullmsg_logged = 1;
 920                 netif_stop_queue(netdev);
 921                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 922                 devdata->busy_cnt++;
 923                 dev_dbg(&netdev->dev,
 924                         "%s busy - waiting for iovm to catch up\n",
 925                         __func__);
 926                 kfree_skb(skb);
 927                 return NETDEV_TX_OK;
 928         }
 929         if (devdata->queuefullmsg_logged)
 930                 devdata->queuefullmsg_logged = 0;
 931 
 932         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 933                 cmdrsp->net.xmt.lincsum.valid = 1;
 934                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 935                 if (skb_transport_header(skb) > skb->data) {
 936                         cmdrsp->net.xmt.lincsum.hrawoff =
 937                                 skb_transport_header(skb) - skb->data;
 938                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
 939                 }
 940                 if (skb_network_header(skb) > skb->data) {
 941                         cmdrsp->net.xmt.lincsum.nhrawoff =
 942                                 skb_network_header(skb) - skb->data;
 943                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 944                 }
 945                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
 946         } else {
 947                 cmdrsp->net.xmt.lincsum.valid = 0;
 948         }
 949 
 950         /* save off the length of the entire data packet */
 951         cmdrsp->net.xmt.len = len;
 952 
 953         /* copy ethernet header from first frag into ocmdrsp
 954          * - everything else will be pass in frags & DMA'ed
 955          */
 956         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 957 
 958         /* copy frags info - from skb->data we need to only provide access
 959          * beyond eth header
 960          */
 961         cmdrsp->net.xmt.num_frags =
 962                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 963                                               MAX_PHYS_INFO,
 964                                               cmdrsp->net.xmt.frags);
 965         if (cmdrsp->net.xmt.num_frags < 0) {
 966                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 967                 devdata->busy_cnt++;
 968                 dev_err(&netdev->dev,
 969                         "%s busy - copy frags failed\n", __func__);
 970                 kfree_skb(skb);
 971                 return NETDEV_TX_OK;
 972         }
 973 
 974         err = visorchannel_signalinsert(devdata->dev->visorchannel,
 975                                         IOCHAN_TO_IOPART, cmdrsp);
 976         if (err) {
 977                 netif_stop_queue(netdev);
 978                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 979                 devdata->busy_cnt++;
 980                 dev_dbg(&netdev->dev,
 981                         "%s busy - signalinsert failed\n", __func__);
 982                 kfree_skb(skb);
 983                 return NETDEV_TX_OK;
 984         }
 985 
 986         /* Track the skbs that have been sent to the IOVM for XMIT */
 987         skb_queue_head(&devdata->xmitbufhead, skb);
 988 
 989         /* update xmt stats */
 990         devdata->net_stats.tx_packets++;
 991         devdata->net_stats.tx_bytes += skb->len;
 992         devdata->chstat.sent_xmit++;
 993 
 994         /* check if we have hit the high watermark for netif_stop_queue() */
 995         if (vnic_hit_high_watermark(devdata,
 996                                     devdata->upper_threshold_net_xmits)) {
 997                 /* extra NET_XMITs queued over to IOVM - need to wait */
 998                 /* stop queue - call netif_wake_queue() after lower threshold */
 999                 netif_stop_queue(netdev);
1000                 dev_dbg(&netdev->dev,
1001                         "%s busy - invoking iovm flow control\n",
1002                         __func__);
1003                 devdata->flow_control_upper_hits++;
1004         }
1005         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1006 
1007         /* skb will be freed when we get back NET_XMIT_DONE */
1008         return NETDEV_TX_OK;
1009 }
1010 
1011 /* visornic_get_stats - returns net_stats of the visornic device
1012  * @netdev: netdevice.
1013  *
1014  * Return: Pointer to the net_device_stats struct for the device.
1015  */
1016 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1017 {
1018         struct visornic_devdata *devdata = netdev_priv(netdev);
1019 
1020         return &devdata->net_stats;
1021 }
1022 
1023 /* visornic_change_mtu - changes mtu of device
1024  * @netdev: netdevice.
1025  * @new_mtu: Value of new mtu.
1026  *
1027  * The device's MTU cannot be changed by system; it must be changed via a
1028  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1029  * for everything to work. Currently not supported.
1030  *
1031  * Return: -EINVAL.
1032  */
1033 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1034 {
1035         return -EINVAL;
1036 }
1037 
1038 /* visornic_set_multi - set visornic device flags
1039  * @netdev: netdevice.
1040  *
1041  * The only flag we currently support is IFF_PROMISC.
1042  */
1043 static void visornic_set_multi(struct net_device *netdev)
1044 {
1045         struct uiscmdrsp *cmdrsp;
1046         struct visornic_devdata *devdata = netdev_priv(netdev);
1047         int err = 0;
1048 
1049         if (devdata->old_flags == netdev->flags)
1050                 return;
1051 
1052         if ((netdev->flags & IFF_PROMISC) ==
1053             (devdata->old_flags & IFF_PROMISC))
1054                 goto out_save_flags;
1055 
1056         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1057         if (!cmdrsp)
1058                 return;
1059         cmdrsp->cmdtype = CMD_NET_TYPE;
1060         cmdrsp->net.type = NET_RCV_PROMISC;
1061         cmdrsp->net.enbdis.context = netdev;
1062         cmdrsp->net.enbdis.enable =
1063                 netdev->flags & IFF_PROMISC;
1064         err = visorchannel_signalinsert(devdata->dev->visorchannel,
1065                                         IOCHAN_TO_IOPART,
1066                                         cmdrsp);
1067         kfree(cmdrsp);
1068         if (err)
1069                 return;
1070 
1071 out_save_flags:
1072         devdata->old_flags = netdev->flags;
1073 }
1074 
1075 /* visornic_xmit_timeout - request to timeout the xmit
1076  * @netdev: netdevice.
1077  *
1078  * Queue the work and return. Make sure we have not already been informed that
1079  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1080  */
1081 static void visornic_xmit_timeout(struct net_device *netdev)
1082 {
1083         struct visornic_devdata *devdata = netdev_priv(netdev);
1084         unsigned long flags;
1085 
1086         spin_lock_irqsave(&devdata->priv_lock, flags);
1087         if (devdata->going_away) {
1088                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1089                 dev_dbg(&devdata->dev->device,
1090                         "%s aborting because device removal pending\n",
1091                         __func__);
1092                 return;
1093         }
1094 
1095         /* Ensure that a ServerDown message hasn't been received */
1096         if (!devdata->enabled ||
1097             (devdata->server_down && !devdata->server_change_state)) {
1098                 dev_dbg(&netdev->dev, "%s no processing\n",
1099                         __func__);
1100                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1101                 return;
1102         }
1103         schedule_work(&devdata->timeout_reset);
1104         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1105 }
1106 
1107 /* repost_return - repost rcv bufs that have come back
1108  * @cmdrsp: IO channel command struct to post.
1109  * @devdata: Visornic devdata for the device.
1110  * @skb: Socket buffer.
1111  * @netdev: netdevice.
1112  *
1113  * Repost rcv buffers that have been returned to us when we are finished
1114  * with them.
1115  *
1116  * Return: 0 for success, negative integer on error.
1117  */
1118 static int repost_return(struct uiscmdrsp *cmdrsp,
1119                          struct visornic_devdata *devdata,
1120                          struct sk_buff *skb, struct net_device *netdev)
1121 {
1122         struct net_pkt_rcv copy;
1123         int i = 0, cc, numreposted;
1124         int found_skb = 0;
1125         int status = 0;
1126 
1127         copy = cmdrsp->net.rcv;
1128         switch (copy.numrcvbufs) {
1129         case 0:
1130                 devdata->n_rcv0++;
1131                 break;
1132         case 1:
1133                 devdata->n_rcv1++;
1134                 break;
1135         case 2:
1136                 devdata->n_rcv2++;
1137                 break;
1138         default:
1139                 devdata->n_rcvx++;
1140                 break;
1141         }
1142         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1143                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1144                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1145                                 continue;
1146 
1147                         if ((skb) && devdata->rcvbuf[i] == skb) {
1148                                 devdata->found_repost_rcvbuf_cnt++;
1149                                 found_skb = 1;
1150                                 devdata->repost_found_skb_cnt++;
1151                         }
1152                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1153                         if (!devdata->rcvbuf[i]) {
1154                                 devdata->num_rcv_bufs_could_not_alloc++;
1155                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1156                                 status = -ENOMEM;
1157                                 break;
1158                         }
1159                         status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1160                         if (status) {
1161                                 kfree_skb(devdata->rcvbuf[i]);
1162                                 devdata->rcvbuf[i] = NULL;
1163                                 break;
1164                         }
1165                         numreposted++;
1166                         break;
1167                 }
1168         }
1169         if (numreposted != copy.numrcvbufs) {
1170                 devdata->n_repost_deficit++;
1171                 status = -EINVAL;
1172         }
1173         if (skb) {
1174                 if (found_skb) {
1175                         kfree_skb(skb);
1176                 } else {
1177                         status = -EINVAL;
1178                         devdata->bad_rcv_buf++;
1179                 }
1180         }
1181         return status;
1182 }
1183 
1184 /* visornic_rx - handle receive packets coming back from IO Partition
1185  * @cmdrsp: Receive packet returned from IO Partition.
1186  *
1187  * Got a receive packet back from the IO Partition; handle it and send it up
1188  * the stack.
1189 
1190  * Return: 1 iff an skb was received, otherwise 0.
1191  */
1192 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1193 {
1194         struct visornic_devdata *devdata;
1195         struct sk_buff *skb, *prev, *curr;
1196         struct net_device *netdev;
1197         int cc, currsize, off;
1198         struct ethhdr *eth;
1199         unsigned long flags;
1200 
1201         /* post new rcv buf to the other end using the cmdrsp we have at hand
1202          * post it without holding lock - but we'll use the signal lock to
1203          * synchronize the queue insert the cmdrsp that contains the net.rcv
1204          * is the one we are using to repost, so copy the info we need from it.
1205          */
1206         skb = cmdrsp->net.buf;
1207         netdev = skb->dev;
1208 
1209         devdata = netdev_priv(netdev);
1210 
1211         spin_lock_irqsave(&devdata->priv_lock, flags);
1212         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1213 
1214         /* set length to how much was ACTUALLY received -
1215          * NOTE: rcv_done_len includes actual length of data rcvd
1216          * including ethhdr
1217          */
1218         skb->len = cmdrsp->net.rcv.rcv_done_len;
1219 
1220         /* update rcv stats - call it with priv_lock held */
1221         devdata->net_stats.rx_packets++;
1222         devdata->net_stats.rx_bytes += skb->len;
1223 
1224         /* test enabled while holding lock */
1225         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1226                 /* don't process it unless we're in enable mode and until
1227                  * we've gotten an ACK saying the other end got our RCV enable
1228                  */
1229                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1230                 repost_return(cmdrsp, devdata, skb, netdev);
1231                 return 0;
1232         }
1233 
1234         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1235 
1236         /* when skb was allocated, skb->dev, skb->data, skb->len and
1237          * skb->data_len were setup. AND, data has already put into the
1238          * skb (both first frag and in frags pages)
1239          * NOTE: firstfragslen is the amount of data in skb->data and that
1240          * which is not in nr_frags or frag_list. This is now simply
1241          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1242          * firstfrag & set data_len to show rest see if we have to chain
1243          * frag_list.
1244          */
1245         /* do PRECAUTIONARY check */
1246         if (skb->len > RCVPOST_BUF_SIZE) {
1247                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1248                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1249                                 dev_err(&devdata->netdev->dev,
1250                                         "repost_return failed");
1251                         return 0;
1252                 }
1253                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1254                 /* amount in skb->data */
1255                 skb->tail += RCVPOST_BUF_SIZE;
1256                 /* amount that will be in frag_list */
1257                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1258         } else {
1259                 /* data fits in this skb - no chaining - do
1260                  * PRECAUTIONARY check
1261                  */
1262                 /* should be 1 */
1263                 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1264                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1265                                 dev_err(&devdata->netdev->dev,
1266                                         "repost_return failed");
1267                         return 0;
1268                 }
1269                 skb->tail += skb->len;
1270                 /* nothing rcvd in frag_list */
1271                 skb->data_len = 0;
1272         }
1273         off = skb_tail_pointer(skb) - skb->data;
1274 
1275         /* amount we bumped tail by in the head skb
1276          * it is used to calculate the size of each chained skb below
1277          * it is also used to index into bufline to continue the copy
1278          * (for chansocktwopc)
1279          * if necessary chain the rcv skbs together.
1280          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1281          * chain the rest to that one.
1282          * - do PRECAUTIONARY check
1283          */
1284         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1285                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1286                         dev_err(&devdata->netdev->dev, "repost_return failed");
1287                 return 0;
1288         }
1289 
1290         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1291                 /* chain the various rcv buffers into the skb's frag_list. */
1292                 /* Note: off was initialized above  */
1293                 for (cc = 1, prev = NULL;
1294                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1295                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1296                         curr->next = NULL;
1297                         /* start of list- set head */
1298                         if (!prev)
1299                                 skb_shinfo(skb)->frag_list = curr;
1300                         else
1301                                 prev->next = curr;
1302                         prev = curr;
1303 
1304                         /* should we set skb->len and skb->data_len for each
1305                          * buffer being chained??? can't hurt!
1306                          */
1307                         currsize = min(skb->len - off,
1308                                        (unsigned int)RCVPOST_BUF_SIZE);
1309                         curr->len = currsize;
1310                         curr->tail += currsize;
1311                         curr->data_len = 0;
1312                         off += currsize;
1313                 }
1314                 /* assert skb->len == off */
1315                 if (skb->len != off) {
1316                         netdev_err(devdata->netdev,
1317                                    "something wrong; skb->len:%d != off:%d\n",
1318                                    skb->len, off);
1319                 }
1320         }
1321 
1322         /* set up packet's protocol type using ethernet header - this
1323          * sets up skb->pkt_type & it also PULLS out the eth header
1324          */
1325         skb->protocol = eth_type_trans(skb, netdev);
1326         eth = eth_hdr(skb);
1327         skb->csum = 0;
1328         skb->ip_summed = CHECKSUM_NONE;
1329 
1330         do {
1331                 /* accept all packets */
1332                 if (netdev->flags & IFF_PROMISC)
1333                         break;
1334                 if (skb->pkt_type == PACKET_BROADCAST) {
1335                         /* accept all broadcast packets */
1336                         if (netdev->flags & IFF_BROADCAST)
1337                                 break;
1338                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1339                         if ((netdev->flags & IFF_MULTICAST) &&
1340                             (netdev_mc_count(netdev))) {
1341                                 struct netdev_hw_addr *ha;
1342                                 int found_mc = 0;
1343 
1344                                 /* only accept multicast packets that we can
1345                                  * find in our multicast address list
1346                                  */
1347                                 netdev_for_each_mc_addr(ha, netdev) {
1348                                         if (ether_addr_equal(eth->h_dest,
1349                                                              ha->addr)) {
1350                                                 found_mc = 1;
1351                                                 break;
1352                                         }
1353                                 }
1354                                 /* accept pkt, dest matches a multicast addr */
1355                                 if (found_mc)
1356                                         break;
1357                         }
1358                 /* accept packet, h_dest must match vnic  mac address */
1359                 } else if (skb->pkt_type == PACKET_HOST) {
1360                         break;
1361                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1362                         /* something is not right */
1363                         dev_err(&devdata->netdev->dev,
1364                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1365                                 netdev->name, eth->h_dest, netdev->dev_addr);
1366                 }
1367                 /* drop packet - don't forward it up to OS */
1368                 devdata->n_rcv_packets_not_accepted++;
1369                 repost_return(cmdrsp, devdata, skb, netdev);
1370                 return 0;
1371         } while (0);
1372 
1373         netif_receive_skb(skb);
1374         /* netif_rx returns various values, but "in practice most drivers
1375          * ignore the return value
1376          */
1377 
1378         skb = NULL;
1379         /* whether the packet got dropped or handled, the skb is freed by
1380          * kernel code, so we shouldn't free it. but we should repost a
1381          * new rcv buffer.
1382          */
1383         repost_return(cmdrsp, devdata, skb, netdev);
1384         return 1;
1385 }
1386 
1387 /* devdata_initialize - initialize devdata structure
1388  * @devdata: visornic_devdata structure to initialize.
1389  * @dev:     visorbus_device it belongs to.
1390  *
1391  * Setup initial values for the visornic, based on channel and default values.
1392  *
1393  * Return: A pointer to the devdata structure.
1394  */
1395 static struct visornic_devdata *devdata_initialize(
1396                                         struct visornic_devdata *devdata,
1397                                         struct visor_device *dev)
1398 {
1399         devdata->dev = dev;
1400         devdata->incarnation_id = get_jiffies_64();
1401         return devdata;
1402 }
1403 
1404 /* devdata_release - free up references in devdata
1405  * @devdata: Struct to clean up.
1406  */
1407 static void devdata_release(struct visornic_devdata *devdata)
1408 {
1409         kfree(devdata->rcvbuf);
1410         kfree(devdata->cmdrsp_rcv);
1411         kfree(devdata->xmit_cmdrsp);
1412 }
1413 
1414 static const struct net_device_ops visornic_dev_ops = {
1415         .ndo_open = visornic_open,
1416         .ndo_stop = visornic_close,
1417         .ndo_start_xmit = visornic_xmit,
1418         .ndo_get_stats = visornic_get_stats,
1419         .ndo_change_mtu = visornic_change_mtu,
1420         .ndo_tx_timeout = visornic_xmit_timeout,
1421         .ndo_set_rx_mode = visornic_set_multi,
1422 };
1423 
1424 /* DebugFS code */
1425 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1426                                  size_t len, loff_t *offset)
1427 {
1428         ssize_t bytes_read = 0;
1429         int str_pos = 0;
1430         struct visornic_devdata *devdata;
1431         struct net_device *dev;
1432         char *vbuf;
1433 
1434         if (len > MAX_BUF)
1435                 len = MAX_BUF;
1436         vbuf = kzalloc(len, GFP_KERNEL);
1437         if (!vbuf)
1438                 return -ENOMEM;
1439 
1440         /* for each vnic channel dump out channel specific data */
1441         rcu_read_lock();
1442         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1443                 /* Only consider netdevs that are visornic, and are open */
1444                 if (dev->netdev_ops != &visornic_dev_ops ||
1445                     (!netif_queue_stopped(dev)))
1446                         continue;
1447 
1448                 devdata = netdev_priv(dev);
1449                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1450                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1451                                      dev->name,
1452                                      dev,
1453                                      dev->dev_addr);
1454                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455                                      "VisorNic Dev Info = 0x%p\n", devdata);
1456                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457                                      " num_rcv_bufs = %d\n",
1458                                      devdata->num_rcv_bufs);
1459                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460                                      " max_outstanding_next_xmits = %lu\n",
1461                                     devdata->max_outstanding_net_xmits);
1462                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463                                      " upper_threshold_net_xmits = %lu\n",
1464                                      devdata->upper_threshold_net_xmits);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      " lower_threshold_net_xmits = %lu\n",
1467                                      devdata->lower_threshold_net_xmits);
1468                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469                                      " queuefullmsg_logged = %d\n",
1470                                      devdata->queuefullmsg_logged);
1471                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472                                      " chstat.got_rcv = %lu\n",
1473                                      devdata->chstat.got_rcv);
1474                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475                                      " chstat.got_enbdisack = %lu\n",
1476                                      devdata->chstat.got_enbdisack);
1477                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478                                      " chstat.got_xmit_done = %lu\n",
1479                                      devdata->chstat.got_xmit_done);
1480                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481                                      " chstat.xmit_fail = %lu\n",
1482                                      devdata->chstat.xmit_fail);
1483                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1484                                      " chstat.sent_enbdis = %lu\n",
1485                                      devdata->chstat.sent_enbdis);
1486                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487                                      " chstat.sent_promisc = %lu\n",
1488                                      devdata->chstat.sent_promisc);
1489                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1490                                      " chstat.sent_post = %lu\n",
1491                                      devdata->chstat.sent_post);
1492                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1493                                      " chstat.sent_post_failed = %lu\n",
1494                                      devdata->chstat.sent_post_failed);
1495                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1496                                      " chstat.sent_xmit = %lu\n",
1497                                      devdata->chstat.sent_xmit);
1498                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1499                                      " chstat.reject_count = %lu\n",
1500                                      devdata->chstat.reject_count);
1501                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502                                      " chstat.extra_rcvbufs_sent = %lu\n",
1503                                      devdata->chstat.extra_rcvbufs_sent);
1504                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1506                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1508                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1510                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1512                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513                                      " num_rcvbuf_in_iovm = %d\n",
1514                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1515                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1517                                      devdata->alloc_failed_in_if_needed_cnt);
1518                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1519                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1520                                      devdata->alloc_failed_in_repost_rtn_cnt);
1521                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1523                  *                   devdata->inner_loop_limit_reached_cnt);
1524                  */
1525                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1526                                      " found_repost_rcvbuf_cnt = %lu\n",
1527                                      devdata->found_repost_rcvbuf_cnt);
1528                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1529                                      " repost_found_skb_cnt = %lu\n",
1530                                      devdata->repost_found_skb_cnt);
1531                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1532                                      " n_repost_deficit = %lu\n",
1533                                      devdata->n_repost_deficit);
1534                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535                                      " bad_rcv_buf = %lu\n",
1536                                      devdata->bad_rcv_buf);
1537                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1538                                      " n_rcv_packets_not_accepted = %lu\n",
1539                                      devdata->n_rcv_packets_not_accepted);
1540                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1541                                      " interrupts_rcvd = %llu\n",
1542                                      devdata->interrupts_rcvd);
1543                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1544                                      " interrupts_notme = %llu\n",
1545                                      devdata->interrupts_notme);
1546                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1547                                      " interrupts_disabled = %llu\n",
1548                                      devdata->interrupts_disabled);
1549                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1550                                      " busy_cnt = %llu\n",
1551                                      devdata->busy_cnt);
1552                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1553                                      " flow_control_upper_hits = %llu\n",
1554                                      devdata->flow_control_upper_hits);
1555                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1556                                      " flow_control_lower_hits = %llu\n",
1557                                      devdata->flow_control_lower_hits);
1558                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559                                      " netif_queue = %s\n",
1560                                      netif_queue_stopped(devdata->netdev) ?
1561                                      "stopped" : "running");
1562                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1563                                      " xmits_outstanding = %lu\n",
1564                                      devdata_xmits_outstanding(devdata));
1565         }
1566         rcu_read_unlock();
1567         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1568         kfree(vbuf);
1569         return bytes_read;
1570 }
1571 
1572 static struct dentry *visornic_debugfs_dir;
1573 static const struct file_operations debugfs_info_fops = {
1574         .read = info_debugfs_read,
1575 };
1576 
1577 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1578  * @devdata: Visornic device.
1579  */
1580 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1581 {
1582         int i;
1583         struct net_device *netdev;
1584         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1585         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1586         int err;
1587 
1588         /* don't do this until vnic is marked ready */
1589         if (!(devdata->enabled && devdata->enab_dis_acked))
1590                 return;
1591 
1592         netdev = devdata->netdev;
1593         rcv_bufs_allocated = 0;
1594         /* this code is trying to prevent getting stuck here forever,
1595          * but still retry it if you cant allocate them all this time.
1596          */
1597         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1598         while (cur_num_rcv_bufs_to_alloc > 0) {
1599                 cur_num_rcv_bufs_to_alloc--;
1600                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1601                         if (devdata->rcvbuf[i])
1602                                 continue;
1603                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1604                         if (!devdata->rcvbuf[i]) {
1605                                 devdata->alloc_failed_in_if_needed_cnt++;
1606                                 break;
1607                         }
1608                         rcv_bufs_allocated++;
1609                         err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1610                         if (err) {
1611                                 kfree_skb(devdata->rcvbuf[i]);
1612                                 devdata->rcvbuf[i] = NULL;
1613                                 break;
1614                         }
1615                         devdata->chstat.extra_rcvbufs_sent++;
1616                 }
1617         }
1618         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1619 }
1620 
1621 /* drain_resp_queue - drains and ignores all messages from the resp queue
1622  * @cmdrsp:  IO channel command response message.
1623  * @devdata: Visornic device to drain.
1624  */
1625 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1626                              struct visornic_devdata *devdata)
1627 {
1628         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1629                                           IOCHAN_FROM_IOPART,
1630                                           cmdrsp))
1631                 ;
1632 }
1633 
1634 /* service_resp_queue - drain the response queue
1635  * @cmdrsp:  IO channel command response message.
1636  * @devdata: Visornic device to drain.
1637  * @rx_work_done:
1638  * @budget:
1639  *
1640  * Drain the response queue of any responses from the IO Partition. Process the
1641  * responses as we get them.
1642  */
1643 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1644                                struct visornic_devdata *devdata,
1645                                int *rx_work_done, int budget)
1646 {
1647         unsigned long flags;
1648         struct net_device *netdev;
1649 
1650         while (*rx_work_done < budget) {
1651                 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1652                  * moment
1653                  */
1654                 /* queue empty */
1655                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1656                                               IOCHAN_FROM_IOPART,
1657                                               cmdrsp))
1658                         break;
1659 
1660                 switch (cmdrsp->net.type) {
1661                 case NET_RCV:
1662                         devdata->chstat.got_rcv++;
1663                         /* process incoming packet */
1664                         *rx_work_done += visornic_rx(cmdrsp);
1665                         break;
1666                 case NET_XMIT_DONE:
1667                         spin_lock_irqsave(&devdata->priv_lock, flags);
1668                         devdata->chstat.got_xmit_done++;
1669                         if (cmdrsp->net.xmtdone.xmt_done_result)
1670                                 devdata->chstat.xmit_fail++;
1671                         /* only call queue wake if we stopped it */
1672                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1673                         /* ASSERT netdev == vnicinfo->netdev; */
1674                         if (netdev == devdata->netdev &&
1675                             netif_queue_stopped(netdev)) {
1676                                 /* check if we have crossed the lower watermark
1677                                  * for netif_wake_queue()
1678                                  */
1679                                 if (vnic_hit_low_watermark
1680                                     (devdata,
1681                                      devdata->lower_threshold_net_xmits)) {
1682                                         /* enough NET_XMITs completed
1683                                          * so can restart netif queue
1684                                          */
1685                                         netif_wake_queue(netdev);
1686                                         devdata->flow_control_lower_hits++;
1687                                 }
1688                         }
1689                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1690                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1691                         kfree_skb(cmdrsp->net.buf);
1692                         break;
1693                 case NET_RCV_ENBDIS_ACK:
1694                         devdata->chstat.got_enbdisack++;
1695                         netdev = (struct net_device *)
1696                         cmdrsp->net.enbdis.context;
1697                         spin_lock_irqsave(&devdata->priv_lock, flags);
1698                         devdata->enab_dis_acked = 1;
1699                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1700 
1701                         if (devdata->server_down &&
1702                             devdata->server_change_state) {
1703                                 /* Inform Linux that the link is up */
1704                                 devdata->server_down = false;
1705                                 devdata->server_change_state = false;
1706                                 netif_wake_queue(netdev);
1707                                 netif_carrier_on(netdev);
1708                         }
1709                         break;
1710                 case NET_CONNECT_STATUS:
1711                         netdev = devdata->netdev;
1712                         if (cmdrsp->net.enbdis.enable == 1) {
1713                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1714                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1715                                 spin_unlock_irqrestore(&devdata->priv_lock,
1716                                                        flags);
1717                                 netif_wake_queue(netdev);
1718                                 netif_carrier_on(netdev);
1719                         } else {
1720                                 netif_stop_queue(netdev);
1721                                 netif_carrier_off(netdev);
1722                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1723                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1724                                 spin_unlock_irqrestore(&devdata->priv_lock,
1725                                                        flags);
1726                         }
1727                         break;
1728                 default:
1729                         break;
1730                 }
1731                 /* cmdrsp is now available for reuse  */
1732         }
1733 }
1734 
1735 static int visornic_poll(struct napi_struct *napi, int budget)
1736 {
1737         struct visornic_devdata *devdata = container_of(napi,
1738                                                         struct visornic_devdata,
1739                                                         napi);
1740         int rx_count = 0;
1741 
1742         send_rcv_posts_if_needed(devdata);
1743         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1744 
1745         /* If there aren't any more packets to receive stop the poll */
1746         if (rx_count < budget)
1747                 napi_complete_done(napi, rx_count);
1748 
1749         return rx_count;
1750 }
1751 
1752 /* poll_for_irq - checks the status of the response queue
1753  * @t: pointer to the 'struct timer_list' from which we can retrieve the
1754  *     the visornic devdata struct.
1755  *
1756  * Main function of the vnic_incoming thread. Periodically check the response
1757  * queue and drain it if needed.
1758  */
1759 static void poll_for_irq(struct timer_list *t)
1760 {
1761         struct visornic_devdata *devdata = from_timer(devdata, t,
1762                                                       irq_poll_timer);
1763 
1764         if (!visorchannel_signalempty(
1765                                    devdata->dev->visorchannel,
1766                                    IOCHAN_FROM_IOPART))
1767                 napi_schedule(&devdata->napi);
1768 
1769         atomic_set(&devdata->interrupt_rcvd, 0);
1770 
1771         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1772 }
1773 
1774 /* visornic_probe - probe function for visornic devices
1775  * @dev: The visor device discovered.
1776  *
1777  * Called when visorbus discovers a visornic device on its bus. It creates a new
1778  * visornic ethernet adapter.
1779  *
1780  * Return: 0 on success, or negative integer on error.
1781  */
1782 static int visornic_probe(struct visor_device *dev)
1783 {
1784         struct visornic_devdata *devdata = NULL;
1785         struct net_device *netdev = NULL;
1786         int err;
1787         int channel_offset = 0;
1788         u64 features;
1789 
1790         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1791         if (!netdev) {
1792                 dev_err(&dev->device,
1793                         "%s alloc_etherdev failed\n", __func__);
1794                 return -ENOMEM;
1795         }
1796 
1797         netdev->netdev_ops = &visornic_dev_ops;
1798         netdev->watchdog_timeo = 5 * HZ;
1799         SET_NETDEV_DEV(netdev, &dev->device);
1800 
1801         /* Get MAC address from channel and read it into the device. */
1802         netdev->addr_len = ETH_ALEN;
1803         channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1804         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1805                                     ETH_ALEN);
1806         if (err < 0) {
1807                 dev_err(&dev->device,
1808                         "%s failed to get mac addr from chan (%d)\n",
1809                         __func__, err);
1810                 goto cleanup_netdev;
1811         }
1812 
1813         devdata = devdata_initialize(netdev_priv(netdev), dev);
1814         if (!devdata) {
1815                 dev_err(&dev->device,
1816                         "%s devdata_initialize failed\n", __func__);
1817                 err = -ENOMEM;
1818                 goto cleanup_netdev;
1819         }
1820         /* don't trust messages laying around in the channel */
1821         drain_resp_queue(devdata->cmdrsp, devdata);
1822 
1823         devdata->netdev = netdev;
1824         dev_set_drvdata(&dev->device, devdata);
1825         init_waitqueue_head(&devdata->rsp_queue);
1826         spin_lock_init(&devdata->priv_lock);
1827         /* not yet */
1828         devdata->enabled = 0;
1829         atomic_set(&devdata->usage, 1);
1830 
1831         /* Setup rcv bufs */
1832         channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1833         err = visorbus_read_channel(dev, channel_offset,
1834                                     &devdata->num_rcv_bufs, 4);
1835         if (err) {
1836                 dev_err(&dev->device,
1837                         "%s failed to get #rcv bufs from chan (%d)\n",
1838                         __func__, err);
1839                 goto cleanup_netdev;
1840         }
1841 
1842         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1843                                   sizeof(struct sk_buff *), GFP_KERNEL);
1844         if (!devdata->rcvbuf) {
1845                 err = -ENOMEM;
1846                 goto cleanup_netdev;
1847         }
1848 
1849         /* set the net_xmit outstanding threshold
1850          * always leave two slots open but you should have 3 at a minimum
1851          * note that max_outstanding_net_xmits must be > 0
1852          */
1853         devdata->max_outstanding_net_xmits =
1854                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1855         devdata->upper_threshold_net_xmits =
1856                 max_t(unsigned long,
1857                       2, (devdata->max_outstanding_net_xmits - 1));
1858         devdata->lower_threshold_net_xmits =
1859                 max_t(unsigned long,
1860                       1, (devdata->max_outstanding_net_xmits / 2));
1861 
1862         skb_queue_head_init(&devdata->xmitbufhead);
1863 
1864         /* create a cmdrsp we can use to post and unpost rcv buffers */
1865         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1866         if (!devdata->cmdrsp_rcv) {
1867                 err = -ENOMEM;
1868                 goto cleanup_rcvbuf;
1869         }
1870         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1871         if (!devdata->xmit_cmdrsp) {
1872                 err = -ENOMEM;
1873                 goto cleanup_cmdrsp_rcv;
1874         }
1875         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1876         devdata->server_down = false;
1877         devdata->server_change_state = false;
1878 
1879         /*set the default mtu */
1880         channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1881         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1882         if (err) {
1883                 dev_err(&dev->device,
1884                         "%s failed to get mtu from chan (%d)\n",
1885                         __func__, err);
1886                 goto cleanup_xmit_cmdrsp;
1887         }
1888 
1889         /* TODO: Setup Interrupt information */
1890         /* Let's start our threads to get responses */
1891         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1892 
1893         timer_setup(&devdata->irq_poll_timer, poll_for_irq, 0);
1894         /* Note: This time has to start running before the while
1895          * loop below because the napi routine is responsible for
1896          * setting enab_dis_acked
1897          */
1898         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1899 
1900         channel_offset = offsetof(struct visor_io_channel,
1901                                   channel_header.features);
1902         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1903         if (err) {
1904                 dev_err(&dev->device,
1905                         "%s failed to get features from chan (%d)\n",
1906                         __func__, err);
1907                 goto cleanup_napi_add;
1908         }
1909 
1910         features |= VISOR_CHANNEL_IS_POLLING;
1911         features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1912         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1913         if (err) {
1914                 dev_err(&dev->device,
1915                         "%s failed to set features in chan (%d)\n",
1916                         __func__, err);
1917                 goto cleanup_napi_add;
1918         }
1919 
1920         /* Note: Interrupts have to be enable before the while
1921          * loop below because the napi routine is responsible for
1922          * setting enab_dis_acked
1923          */
1924         visorbus_enable_channel_interrupts(dev);
1925 
1926         err = register_netdev(netdev);
1927         if (err) {
1928                 dev_err(&dev->device,
1929                         "%s register_netdev failed (%d)\n", __func__, err);
1930                 goto cleanup_napi_add;
1931         }
1932 
1933         /* create debug/sysfs directories */
1934         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1935                                                       visornic_debugfs_dir);
1936         if (!devdata->eth_debugfs_dir) {
1937                 dev_err(&dev->device,
1938                         "%s debugfs_create_dir %s failed\n",
1939                         __func__, netdev->name);
1940                 err = -ENOMEM;
1941                 goto cleanup_register_netdev;
1942         }
1943 
1944         dev_info(&dev->device, "%s success netdev=%s\n",
1945                  __func__, netdev->name);
1946         return 0;
1947 
1948 cleanup_register_netdev:
1949         unregister_netdev(netdev);
1950 
1951 cleanup_napi_add:
1952         del_timer_sync(&devdata->irq_poll_timer);
1953         netif_napi_del(&devdata->napi);
1954 
1955 cleanup_xmit_cmdrsp:
1956         kfree(devdata->xmit_cmdrsp);
1957 
1958 cleanup_cmdrsp_rcv:
1959         kfree(devdata->cmdrsp_rcv);
1960 
1961 cleanup_rcvbuf:
1962         kfree(devdata->rcvbuf);
1963 
1964 cleanup_netdev:
1965         free_netdev(netdev);
1966         return err;
1967 }
1968 
1969 /* host_side_disappeared - IO Partition is gone
1970  * @devdata: Device object.
1971  *
1972  * IO partition servicing this device is gone; do cleanup.
1973  */
1974 static void host_side_disappeared(struct visornic_devdata *devdata)
1975 {
1976         unsigned long flags;
1977 
1978         spin_lock_irqsave(&devdata->priv_lock, flags);
1979         /* indicate device destroyed */
1980         devdata->dev = NULL;
1981         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1982 }
1983 
1984 /* visornic_remove - called when visornic dev goes away
1985  * @dev: Visornic device that is being removed.
1986  *
1987  * Called when DEVICE_DESTROY gets called to remove device.
1988  */
1989 static void visornic_remove(struct visor_device *dev)
1990 {
1991         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1992         struct net_device *netdev;
1993         unsigned long flags;
1994 
1995         if (!devdata) {
1996                 dev_err(&dev->device, "%s no devdata\n", __func__);
1997                 return;
1998         }
1999         spin_lock_irqsave(&devdata->priv_lock, flags);
2000         if (devdata->going_away) {
2001                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2002                 dev_err(&dev->device, "%s already being removed\n", __func__);
2003                 return;
2004         }
2005         devdata->going_away = true;
2006         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2007         netdev = devdata->netdev;
2008         if (!netdev) {
2009                 dev_err(&dev->device, "%s not net device\n", __func__);
2010                 return;
2011         }
2012 
2013         /* going_away prevents new items being added to the workqueues */
2014         cancel_work_sync(&devdata->timeout_reset);
2015 
2016         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2017         /* this will call visornic_close() */
2018         unregister_netdev(netdev);
2019 
2020         del_timer_sync(&devdata->irq_poll_timer);
2021         netif_napi_del(&devdata->napi);
2022 
2023         dev_set_drvdata(&dev->device, NULL);
2024         host_side_disappeared(devdata);
2025         devdata_release(devdata);
2026         free_netdev(netdev);
2027 }
2028 
2029 /* visornic_pause - called when IO Part disappears
2030  * @dev:           Visornic device that is being serviced.
2031  * @complete_func: Call when finished.
2032  *
2033  * Called when the IO Partition has gone down. Need to free up resources and
2034  * wait for IO partition to come back. Mark link as down and don't attempt any
2035  * DMA. When we have freed memory, call the complete_func so that Command knows
2036  * we are done. If we don't call complete_func, the IO Partition will never
2037  * come back.
2038  *
2039  * Return: 0 on success.
2040  */
2041 static int visornic_pause(struct visor_device *dev,
2042                           visorbus_state_complete_func complete_func)
2043 {
2044         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2045 
2046         visornic_serverdown(devdata, complete_func);
2047         return 0;
2048 }
2049 
2050 /* visornic_resume - called when IO Partition has recovered
2051  * @dev:           Visornic device that is being serviced.
2052  * @compelte_func: Call when finished.
2053  *
2054  * Called when the IO partition has recovered. Re-establish connection to the IO
2055  * Partition and set the link up. Okay to do DMA again.
2056  *
2057  * Returns 0 for success, negative integer on error.
2058  */
2059 static int visornic_resume(struct visor_device *dev,
2060                            visorbus_state_complete_func complete_func)
2061 {
2062         struct visornic_devdata *devdata;
2063         struct net_device *netdev;
2064         unsigned long flags;
2065 
2066         devdata = dev_get_drvdata(&dev->device);
2067         if (!devdata) {
2068                 dev_err(&dev->device, "%s no devdata\n", __func__);
2069                 return -EINVAL;
2070         }
2071 
2072         netdev = devdata->netdev;
2073 
2074         spin_lock_irqsave(&devdata->priv_lock, flags);
2075         if (devdata->server_change_state) {
2076                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2077                 dev_err(&dev->device, "%s server already changing state\n",
2078                         __func__);
2079                 return -EINVAL;
2080         }
2081         if (!devdata->server_down) {
2082                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2083                 dev_err(&dev->device, "%s server not down\n", __func__);
2084                 complete_func(dev, 0);
2085                 return 0;
2086         }
2087         devdata->server_change_state = true;
2088         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2089 
2090         /* Must transition channel to ATTACHED state BEFORE
2091          * we can start using the device again.
2092          * TODO: State transitions
2093          */
2094         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2095 
2096         rtnl_lock();
2097         dev_open(netdev, NULL);
2098         rtnl_unlock();
2099 
2100         complete_func(dev, 0);
2101         return 0;
2102 }
2103 
2104 /* This is used to tell the visorbus driver which types of visor devices
2105  * we support, and what functions to call when a visor device that we support
2106  * is attached or removed.
2107  */
2108 static struct visor_driver visornic_driver = {
2109         .name = "visornic",
2110         .owner = THIS_MODULE,
2111         .channel_types = visornic_channel_types,
2112         .probe = visornic_probe,
2113         .remove = visornic_remove,
2114         .pause = visornic_pause,
2115         .resume = visornic_resume,
2116         .channel_interrupt = NULL,
2117 };
2118 
2119 /* visornic_init - init function
2120  *
2121  * Init function for the visornic driver. Do initial driver setup and wait
2122  * for devices.
2123  *
2124  * Return: 0 on success, negative integer on error.
2125  */
2126 static int visornic_init(void)
2127 {
2128         int err;
2129 
2130         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2131 
2132         debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2133                             &debugfs_info_fops);
2134         debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2135                             &debugfs_enable_ints_fops);
2136 
2137         err = visorbus_register_visor_driver(&visornic_driver);
2138         if (err)
2139                 debugfs_remove_recursive(visornic_debugfs_dir);
2140 
2141         return err;
2142 }
2143 
2144 /* visornic_cleanup - driver exit routine
2145  *
2146  * Unregister driver from the bus and free up memory.
2147  */
2148 static void visornic_cleanup(void)
2149 {
2150         visorbus_unregister_visor_driver(&visornic_driver);
2151         debugfs_remove_recursive(visornic_debugfs_dir);
2152 }
2153 
2154 module_init(visornic_init);
2155 module_exit(visornic_cleanup);
2156 
2157 MODULE_AUTHOR("Unisys");
2158 MODULE_LICENSE("GPL");
2159 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");

/* [<][>][^][v][top][bottom][index][help] */