root/arch/sparc/kernel/ldc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. state_to_str
  2. __advance
  3. rx_advance
  4. tx_advance
  5. handshake_get_tx_packet
  6. head_for_data
  7. tx_has_space_for
  8. data_get_tx_packet
  9. set_tx_tail
  10. __set_rx_head
  11. send_tx_packet
  12. handshake_compose_ctrl
  13. start_handshake
  14. send_version_nack
  15. send_version_ack
  16. send_rts
  17. send_rtr
  18. send_rdx
  19. send_data_nack
  20. ldc_abort
  21. find_by_major
  22. process_ver_info
  23. process_ver_ack
  24. process_ver_nack
  25. process_version
  26. process_rts
  27. process_rtr
  28. rx_seq_ok
  29. process_rdx
  30. process_control_frame
  31. process_error_frame
  32. process_data_ack
  33. send_events
  34. ldc_rx
  35. ldc_tx
  36. __ldc_channel_exists
  37. alloc_queue
  38. free_queue
  39. ldc_cookie_to_index
  40. ldc_demap
  41. ldc_iommu_init
  42. ldc_iommu_release
  43. ldc_alloc
  44. ldc_unbind
  45. ldc_free
  46. ldc_bind
  47. ldc_connect
  48. ldc_disconnect
  49. ldc_state
  50. ldc_set_state
  51. ldc_mode
  52. ldc_rx_reset
  53. __ldc_print
  54. write_raw
  55. read_raw
  56. write_nonraw
  57. rx_bad_seq
  58. data_ack_nack
  59. rx_data_wait
  60. rx_set_head
  61. send_data_ack
  62. read_nonraw
  63. write_stream
  64. read_stream
  65. ldc_write
  66. ldc_read
  67. pagesize_code
  68. make_cookie
  69. alloc_npages
  70. perm_to_mte
  71. pages_in_region
  72. fill_cookies
  73. sg_count_one
  74. sg_count_pages
  75. ldc_map_sg
  76. ldc_map_single
  77. free_npages
  78. ldc_unmap
  79. ldc_copy
  80. ldc_alloc_exp_dring
  81. ldc_free_exp_dring
  82. ldc_init

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* ldc.c: Logical Domain Channel link-layer protocol driver.
   3  *
   4  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   5  */
   6 
   7 #include <linux/kernel.h>
   8 #include <linux/export.h>
   9 #include <linux/slab.h>
  10 #include <linux/spinlock.h>
  11 #include <linux/delay.h>
  12 #include <linux/errno.h>
  13 #include <linux/string.h>
  14 #include <linux/scatterlist.h>
  15 #include <linux/interrupt.h>
  16 #include <linux/list.h>
  17 #include <linux/init.h>
  18 #include <linux/bitmap.h>
  19 #include <asm/iommu-common.h>
  20 
  21 #include <asm/hypervisor.h>
  22 #include <asm/iommu.h>
  23 #include <asm/page.h>
  24 #include <asm/ldc.h>
  25 #include <asm/mdesc.h>
  26 
  27 #define DRV_MODULE_NAME         "ldc"
  28 #define PFX DRV_MODULE_NAME     ": "
  29 #define DRV_MODULE_VERSION      "1.1"
  30 #define DRV_MODULE_RELDATE      "July 22, 2008"
  31 
  32 #define COOKIE_PGSZ_CODE        0xf000000000000000ULL
  33 #define COOKIE_PGSZ_CODE_SHIFT  60ULL
  34 
  35 
  36 static char version[] =
  37         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  38 
  39 /* Packet header layout for unreliable and reliable mode frames.
  40  * When in RAW mode, packets are simply straight 64-byte payloads
  41  * with no headers.
  42  */
  43 struct ldc_packet {
  44         u8                      type;
  45 #define LDC_CTRL                0x01
  46 #define LDC_DATA                0x02
  47 #define LDC_ERR                 0x10
  48 
  49         u8                      stype;
  50 #define LDC_INFO                0x01
  51 #define LDC_ACK                 0x02
  52 #define LDC_NACK                0x04
  53 
  54         u8                      ctrl;
  55 #define LDC_VERS                0x01 /* Link Version            */
  56 #define LDC_RTS                 0x02 /* Request To Send         */
  57 #define LDC_RTR                 0x03 /* Ready To Receive        */
  58 #define LDC_RDX                 0x04 /* Ready for Data eXchange */
  59 #define LDC_CTRL_MSK            0x0f
  60 
  61         u8                      env;
  62 #define LDC_LEN                 0x3f
  63 #define LDC_FRAG_MASK           0xc0
  64 #define LDC_START               0x40
  65 #define LDC_STOP                0x80
  66 
  67         u32                     seqid;
  68 
  69         union {
  70                 u8              u_data[LDC_PACKET_SIZE - 8];
  71                 struct {
  72                         u32     pad;
  73                         u32     ackid;
  74                         u8      r_data[LDC_PACKET_SIZE - 8 - 8];
  75                 } r;
  76         } u;
  77 };
  78 
  79 struct ldc_version {
  80         u16 major;
  81         u16 minor;
  82 };
  83 
  84 /* Ordered from largest major to lowest.  */
  85 static struct ldc_version ver_arr[] = {
  86         { .major = 1, .minor = 0 },
  87 };
  88 
  89 #define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
  90 #define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
  91 
  92 struct ldc_channel;
  93 
  94 struct ldc_mode_ops {
  95         int (*write)(struct ldc_channel *, const void *, unsigned int);
  96         int (*read)(struct ldc_channel *, void *, unsigned int);
  97 };
  98 
  99 static const struct ldc_mode_ops raw_ops;
 100 static const struct ldc_mode_ops nonraw_ops;
 101 static const struct ldc_mode_ops stream_ops;
 102 
 103 int ldom_domaining_enabled;
 104 
 105 struct ldc_iommu {
 106         /* Protects ldc_unmap.  */
 107         spinlock_t                      lock;
 108         struct ldc_mtable_entry         *page_table;
 109         struct iommu_map_table          iommu_map_table;
 110 };
 111 
 112 struct ldc_channel {
 113         /* Protects all operations that depend upon channel state.  */
 114         spinlock_t                      lock;
 115 
 116         unsigned long                   id;
 117 
 118         u8                              *mssbuf;
 119         u32                             mssbuf_len;
 120         u32                             mssbuf_off;
 121 
 122         struct ldc_packet               *tx_base;
 123         unsigned long                   tx_head;
 124         unsigned long                   tx_tail;
 125         unsigned long                   tx_num_entries;
 126         unsigned long                   tx_ra;
 127 
 128         unsigned long                   tx_acked;
 129 
 130         struct ldc_packet               *rx_base;
 131         unsigned long                   rx_head;
 132         unsigned long                   rx_tail;
 133         unsigned long                   rx_num_entries;
 134         unsigned long                   rx_ra;
 135 
 136         u32                             rcv_nxt;
 137         u32                             snd_nxt;
 138 
 139         unsigned long                   chan_state;
 140 
 141         struct ldc_channel_config       cfg;
 142         void                            *event_arg;
 143 
 144         const struct ldc_mode_ops       *mops;
 145 
 146         struct ldc_iommu                iommu;
 147 
 148         struct ldc_version              ver;
 149 
 150         u8                              hs_state;
 151 #define LDC_HS_CLOSED                   0x00
 152 #define LDC_HS_OPEN                     0x01
 153 #define LDC_HS_GOTVERS                  0x02
 154 #define LDC_HS_SENTRTR                  0x03
 155 #define LDC_HS_GOTRTR                   0x04
 156 #define LDC_HS_COMPLETE                 0x10
 157 
 158         u8                              flags;
 159 #define LDC_FLAG_ALLOCED_QUEUES         0x01
 160 #define LDC_FLAG_REGISTERED_QUEUES      0x02
 161 #define LDC_FLAG_REGISTERED_IRQS        0x04
 162 #define LDC_FLAG_RESET                  0x10
 163 
 164         u8                              mss;
 165         u8                              state;
 166 
 167 #define LDC_IRQ_NAME_MAX                32
 168         char                            rx_irq_name[LDC_IRQ_NAME_MAX];
 169         char                            tx_irq_name[LDC_IRQ_NAME_MAX];
 170 
 171         struct hlist_head               mh_list;
 172 
 173         struct hlist_node               list;
 174 };
 175 
 176 #define ldcdbg(TYPE, f, a...) \
 177 do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 178                 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 179 } while (0)
 180 
 181 #define LDC_ABORT(lp)   ldc_abort((lp), __func__)
 182 
 183 static const char *state_to_str(u8 state)
 184 {
 185         switch (state) {
 186         case LDC_STATE_INVALID:
 187                 return "INVALID";
 188         case LDC_STATE_INIT:
 189                 return "INIT";
 190         case LDC_STATE_BOUND:
 191                 return "BOUND";
 192         case LDC_STATE_READY:
 193                 return "READY";
 194         case LDC_STATE_CONNECTED:
 195                 return "CONNECTED";
 196         default:
 197                 return "<UNKNOWN>";
 198         }
 199 }
 200 
 201 static unsigned long __advance(unsigned long off, unsigned long num_entries)
 202 {
 203         off += LDC_PACKET_SIZE;
 204         if (off == (num_entries * LDC_PACKET_SIZE))
 205                 off = 0;
 206 
 207         return off;
 208 }
 209 
 210 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 211 {
 212         return __advance(off, lp->rx_num_entries);
 213 }
 214 
 215 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 216 {
 217         return __advance(off, lp->tx_num_entries);
 218 }
 219 
 220 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 221                                                   unsigned long *new_tail)
 222 {
 223         struct ldc_packet *p;
 224         unsigned long t;
 225 
 226         t = tx_advance(lp, lp->tx_tail);
 227         if (t == lp->tx_head)
 228                 return NULL;
 229 
 230         *new_tail = t;
 231 
 232         p = lp->tx_base;
 233         return p + (lp->tx_tail / LDC_PACKET_SIZE);
 234 }
 235 
 236 /* When we are in reliable or stream mode, have to track the next packet
 237  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 238  * to be careful not to stomp over the queue past that point.  During
 239  * the handshake, we don't have TX data packets pending in the queue
 240  * and that's why handshake_get_tx_packet() need not be mindful of
 241  * lp->tx_acked.
 242  */
 243 static unsigned long head_for_data(struct ldc_channel *lp)
 244 {
 245         if (lp->cfg.mode == LDC_MODE_STREAM)
 246                 return lp->tx_acked;
 247         return lp->tx_head;
 248 }
 249 
 250 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 251 {
 252         unsigned long limit, tail, new_tail, diff;
 253         unsigned int mss;
 254 
 255         limit = head_for_data(lp);
 256         tail = lp->tx_tail;
 257         new_tail = tx_advance(lp, tail);
 258         if (new_tail == limit)
 259                 return 0;
 260 
 261         if (limit > new_tail)
 262                 diff = limit - new_tail;
 263         else
 264                 diff = (limit +
 265                         ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 266         diff /= LDC_PACKET_SIZE;
 267         mss = lp->mss;
 268 
 269         if (diff * mss < size)
 270                 return 0;
 271 
 272         return 1;
 273 }
 274 
 275 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 276                                              unsigned long *new_tail)
 277 {
 278         struct ldc_packet *p;
 279         unsigned long h, t;
 280 
 281         h = head_for_data(lp);
 282         t = tx_advance(lp, lp->tx_tail);
 283         if (t == h)
 284                 return NULL;
 285 
 286         *new_tail = t;
 287 
 288         p = lp->tx_base;
 289         return p + (lp->tx_tail / LDC_PACKET_SIZE);
 290 }
 291 
 292 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 293 {
 294         unsigned long orig_tail = lp->tx_tail;
 295         int limit = 1000;
 296 
 297         lp->tx_tail = tail;
 298         while (limit-- > 0) {
 299                 unsigned long err;
 300 
 301                 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 302                 if (!err)
 303                         return 0;
 304 
 305                 if (err != HV_EWOULDBLOCK) {
 306                         lp->tx_tail = orig_tail;
 307                         return -EINVAL;
 308                 }
 309                 udelay(1);
 310         }
 311 
 312         lp->tx_tail = orig_tail;
 313         return -EBUSY;
 314 }
 315 
 316 /* This just updates the head value in the hypervisor using
 317  * a polling loop with a timeout.  The caller takes care of
 318  * upating software state representing the head change, if any.
 319  */
 320 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 321 {
 322         int limit = 1000;
 323 
 324         while (limit-- > 0) {
 325                 unsigned long err;
 326 
 327                 err = sun4v_ldc_rx_set_qhead(lp->id, head);
 328                 if (!err)
 329                         return 0;
 330 
 331                 if (err != HV_EWOULDBLOCK)
 332                         return -EINVAL;
 333 
 334                 udelay(1);
 335         }
 336 
 337         return -EBUSY;
 338 }
 339 
 340 static int send_tx_packet(struct ldc_channel *lp,
 341                           struct ldc_packet *p,
 342                           unsigned long new_tail)
 343 {
 344         BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 345 
 346         return set_tx_tail(lp, new_tail);
 347 }
 348 
 349 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 350                                                  u8 stype, u8 ctrl,
 351                                                  void *data, int dlen,
 352                                                  unsigned long *new_tail)
 353 {
 354         struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 355 
 356         if (p) {
 357                 memset(p, 0, sizeof(*p));
 358                 p->type = LDC_CTRL;
 359                 p->stype = stype;
 360                 p->ctrl = ctrl;
 361                 if (data)
 362                         memcpy(p->u.u_data, data, dlen);
 363         }
 364         return p;
 365 }
 366 
 367 static int start_handshake(struct ldc_channel *lp)
 368 {
 369         struct ldc_packet *p;
 370         struct ldc_version *ver;
 371         unsigned long new_tail;
 372 
 373         ver = &ver_arr[0];
 374 
 375         ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 376                ver->major, ver->minor);
 377 
 378         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 379                                    ver, sizeof(*ver), &new_tail);
 380         if (p) {
 381                 int err = send_tx_packet(lp, p, new_tail);
 382                 if (!err)
 383                         lp->flags &= ~LDC_FLAG_RESET;
 384                 return err;
 385         }
 386         return -EBUSY;
 387 }
 388 
 389 static int send_version_nack(struct ldc_channel *lp,
 390                              u16 major, u16 minor)
 391 {
 392         struct ldc_packet *p;
 393         struct ldc_version ver;
 394         unsigned long new_tail;
 395 
 396         ver.major = major;
 397         ver.minor = minor;
 398 
 399         p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 400                                    &ver, sizeof(ver), &new_tail);
 401         if (p) {
 402                 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 403                        ver.major, ver.minor);
 404 
 405                 return send_tx_packet(lp, p, new_tail);
 406         }
 407         return -EBUSY;
 408 }
 409 
 410 static int send_version_ack(struct ldc_channel *lp,
 411                             struct ldc_version *vp)
 412 {
 413         struct ldc_packet *p;
 414         unsigned long new_tail;
 415 
 416         p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 417                                    vp, sizeof(*vp), &new_tail);
 418         if (p) {
 419                 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 420                        vp->major, vp->minor);
 421 
 422                 return send_tx_packet(lp, p, new_tail);
 423         }
 424         return -EBUSY;
 425 }
 426 
 427 static int send_rts(struct ldc_channel *lp)
 428 {
 429         struct ldc_packet *p;
 430         unsigned long new_tail;
 431 
 432         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 433                                    &new_tail);
 434         if (p) {
 435                 p->env = lp->cfg.mode;
 436                 p->seqid = 0;
 437                 lp->rcv_nxt = 0;
 438 
 439                 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 440                        p->env, p->seqid);
 441 
 442                 return send_tx_packet(lp, p, new_tail);
 443         }
 444         return -EBUSY;
 445 }
 446 
 447 static int send_rtr(struct ldc_channel *lp)
 448 {
 449         struct ldc_packet *p;
 450         unsigned long new_tail;
 451 
 452         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 453                                    &new_tail);
 454         if (p) {
 455                 p->env = lp->cfg.mode;
 456                 p->seqid = 0;
 457 
 458                 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 459                        p->env, p->seqid);
 460 
 461                 return send_tx_packet(lp, p, new_tail);
 462         }
 463         return -EBUSY;
 464 }
 465 
 466 static int send_rdx(struct ldc_channel *lp)
 467 {
 468         struct ldc_packet *p;
 469         unsigned long new_tail;
 470 
 471         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 472                                    &new_tail);
 473         if (p) {
 474                 p->env = 0;
 475                 p->seqid = ++lp->snd_nxt;
 476                 p->u.r.ackid = lp->rcv_nxt;
 477 
 478                 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 479                        p->env, p->seqid, p->u.r.ackid);
 480 
 481                 return send_tx_packet(lp, p, new_tail);
 482         }
 483         return -EBUSY;
 484 }
 485 
 486 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 487 {
 488         struct ldc_packet *p;
 489         unsigned long new_tail;
 490         int err;
 491 
 492         p = data_get_tx_packet(lp, &new_tail);
 493         if (!p)
 494                 return -EBUSY;
 495         memset(p, 0, sizeof(*p));
 496         p->type = data_pkt->type;
 497         p->stype = LDC_NACK;
 498         p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 499         p->seqid = lp->snd_nxt + 1;
 500         p->u.r.ackid = lp->rcv_nxt;
 501 
 502         ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 503                p->type, p->ctrl, p->seqid, p->u.r.ackid);
 504 
 505         err = send_tx_packet(lp, p, new_tail);
 506         if (!err)
 507                 lp->snd_nxt++;
 508 
 509         return err;
 510 }
 511 
 512 static int ldc_abort(struct ldc_channel *lp, const char *msg)
 513 {
 514         unsigned long hv_err;
 515 
 516         ldcdbg(STATE, "ABORT[%s]\n", msg);
 517         ldc_print(lp);
 518 
 519         /* We report but do not act upon the hypervisor errors because
 520          * there really isn't much we can do if they fail at this point.
 521          */
 522         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 523         if (hv_err)
 524                 printk(KERN_ERR PFX "ldc_abort: "
 525                        "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 526                        lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 527 
 528         hv_err = sun4v_ldc_tx_get_state(lp->id,
 529                                         &lp->tx_head,
 530                                         &lp->tx_tail,
 531                                         &lp->chan_state);
 532         if (hv_err)
 533                 printk(KERN_ERR PFX "ldc_abort: "
 534                        "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 535                        lp->id, hv_err);
 536 
 537         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 538         if (hv_err)
 539                 printk(KERN_ERR PFX "ldc_abort: "
 540                        "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 541                        lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 542 
 543         /* Refetch the RX queue state as well, because we could be invoked
 544          * here in the queue processing context.
 545          */
 546         hv_err = sun4v_ldc_rx_get_state(lp->id,
 547                                         &lp->rx_head,
 548                                         &lp->rx_tail,
 549                                         &lp->chan_state);
 550         if (hv_err)
 551                 printk(KERN_ERR PFX "ldc_abort: "
 552                        "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 553                        lp->id, hv_err);
 554 
 555         return -ECONNRESET;
 556 }
 557 
 558 static struct ldc_version *find_by_major(u16 major)
 559 {
 560         struct ldc_version *ret = NULL;
 561         int i;
 562 
 563         for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 564                 struct ldc_version *v = &ver_arr[i];
 565                 if (v->major <= major) {
 566                         ret = v;
 567                         break;
 568                 }
 569         }
 570         return ret;
 571 }
 572 
 573 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 574 {
 575         struct ldc_version *vap;
 576         int err;
 577 
 578         ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 579                vp->major, vp->minor);
 580 
 581         if (lp->hs_state == LDC_HS_GOTVERS) {
 582                 lp->hs_state = LDC_HS_OPEN;
 583                 memset(&lp->ver, 0, sizeof(lp->ver));
 584         }
 585 
 586         vap = find_by_major(vp->major);
 587         if (!vap) {
 588                 err = send_version_nack(lp, 0, 0);
 589         } else if (vap->major != vp->major) {
 590                 err = send_version_nack(lp, vap->major, vap->minor);
 591         } else {
 592                 struct ldc_version ver = *vp;
 593                 if (ver.minor > vap->minor)
 594                         ver.minor = vap->minor;
 595                 err = send_version_ack(lp, &ver);
 596                 if (!err) {
 597                         lp->ver = ver;
 598                         lp->hs_state = LDC_HS_GOTVERS;
 599                 }
 600         }
 601         if (err)
 602                 return LDC_ABORT(lp);
 603 
 604         return 0;
 605 }
 606 
 607 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 608 {
 609         ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 610                vp->major, vp->minor);
 611 
 612         if (lp->hs_state == LDC_HS_GOTVERS) {
 613                 if (lp->ver.major != vp->major ||
 614                     lp->ver.minor != vp->minor)
 615                         return LDC_ABORT(lp);
 616         } else {
 617                 lp->ver = *vp;
 618                 lp->hs_state = LDC_HS_GOTVERS;
 619         }
 620         if (send_rts(lp))
 621                 return LDC_ABORT(lp);
 622         return 0;
 623 }
 624 
 625 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 626 {
 627         struct ldc_version *vap;
 628         struct ldc_packet *p;
 629         unsigned long new_tail;
 630 
 631         if (vp->major == 0 && vp->minor == 0)
 632                 return LDC_ABORT(lp);
 633 
 634         vap = find_by_major(vp->major);
 635         if (!vap)
 636                 return LDC_ABORT(lp);
 637 
 638         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 639                                            vap, sizeof(*vap),
 640                                            &new_tail);
 641         if (!p)
 642                 return LDC_ABORT(lp);
 643 
 644         return send_tx_packet(lp, p, new_tail);
 645 }
 646 
 647 static int process_version(struct ldc_channel *lp,
 648                            struct ldc_packet *p)
 649 {
 650         struct ldc_version *vp;
 651 
 652         vp = (struct ldc_version *) p->u.u_data;
 653 
 654         switch (p->stype) {
 655         case LDC_INFO:
 656                 return process_ver_info(lp, vp);
 657 
 658         case LDC_ACK:
 659                 return process_ver_ack(lp, vp);
 660 
 661         case LDC_NACK:
 662                 return process_ver_nack(lp, vp);
 663 
 664         default:
 665                 return LDC_ABORT(lp);
 666         }
 667 }
 668 
 669 static int process_rts(struct ldc_channel *lp,
 670                        struct ldc_packet *p)
 671 {
 672         ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 673                p->stype, p->seqid, p->env);
 674 
 675         if (p->stype     != LDC_INFO       ||
 676             lp->hs_state != LDC_HS_GOTVERS ||
 677             p->env       != lp->cfg.mode)
 678                 return LDC_ABORT(lp);
 679 
 680         lp->snd_nxt = p->seqid;
 681         lp->rcv_nxt = p->seqid;
 682         lp->hs_state = LDC_HS_SENTRTR;
 683         if (send_rtr(lp))
 684                 return LDC_ABORT(lp);
 685 
 686         return 0;
 687 }
 688 
 689 static int process_rtr(struct ldc_channel *lp,
 690                        struct ldc_packet *p)
 691 {
 692         ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 693                p->stype, p->seqid, p->env);
 694 
 695         if (p->stype     != LDC_INFO ||
 696             p->env       != lp->cfg.mode)
 697                 return LDC_ABORT(lp);
 698 
 699         lp->snd_nxt = p->seqid;
 700         lp->hs_state = LDC_HS_COMPLETE;
 701         ldc_set_state(lp, LDC_STATE_CONNECTED);
 702         send_rdx(lp);
 703 
 704         return LDC_EVENT_UP;
 705 }
 706 
 707 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 708 {
 709         return lp->rcv_nxt + 1 == seqid;
 710 }
 711 
 712 static int process_rdx(struct ldc_channel *lp,
 713                        struct ldc_packet *p)
 714 {
 715         ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 716                p->stype, p->seqid, p->env, p->u.r.ackid);
 717 
 718         if (p->stype != LDC_INFO ||
 719             !(rx_seq_ok(lp, p->seqid)))
 720                 return LDC_ABORT(lp);
 721 
 722         lp->rcv_nxt = p->seqid;
 723 
 724         lp->hs_state = LDC_HS_COMPLETE;
 725         ldc_set_state(lp, LDC_STATE_CONNECTED);
 726 
 727         return LDC_EVENT_UP;
 728 }
 729 
 730 static int process_control_frame(struct ldc_channel *lp,
 731                                  struct ldc_packet *p)
 732 {
 733         switch (p->ctrl) {
 734         case LDC_VERS:
 735                 return process_version(lp, p);
 736 
 737         case LDC_RTS:
 738                 return process_rts(lp, p);
 739 
 740         case LDC_RTR:
 741                 return process_rtr(lp, p);
 742 
 743         case LDC_RDX:
 744                 return process_rdx(lp, p);
 745 
 746         default:
 747                 return LDC_ABORT(lp);
 748         }
 749 }
 750 
 751 static int process_error_frame(struct ldc_channel *lp,
 752                                struct ldc_packet *p)
 753 {
 754         return LDC_ABORT(lp);
 755 }
 756 
 757 static int process_data_ack(struct ldc_channel *lp,
 758                             struct ldc_packet *ack)
 759 {
 760         unsigned long head = lp->tx_acked;
 761         u32 ackid = ack->u.r.ackid;
 762 
 763         while (1) {
 764                 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 765 
 766                 head = tx_advance(lp, head);
 767 
 768                 if (p->seqid == ackid) {
 769                         lp->tx_acked = head;
 770                         return 0;
 771                 }
 772                 if (head == lp->tx_tail)
 773                         return LDC_ABORT(lp);
 774         }
 775 
 776         return 0;
 777 }
 778 
 779 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 780 {
 781         if (event_mask & LDC_EVENT_RESET)
 782                 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 783         if (event_mask & LDC_EVENT_UP)
 784                 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 785         if (event_mask & LDC_EVENT_DATA_READY)
 786                 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 787 }
 788 
 789 static irqreturn_t ldc_rx(int irq, void *dev_id)
 790 {
 791         struct ldc_channel *lp = dev_id;
 792         unsigned long orig_state, flags;
 793         unsigned int event_mask;
 794 
 795         spin_lock_irqsave(&lp->lock, flags);
 796 
 797         orig_state = lp->chan_state;
 798 
 799         /* We should probably check for hypervisor errors here and
 800          * reset the LDC channel if we get one.
 801          */
 802         sun4v_ldc_rx_get_state(lp->id,
 803                                &lp->rx_head,
 804                                &lp->rx_tail,
 805                                &lp->chan_state);
 806 
 807         ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 808                orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 809 
 810         event_mask = 0;
 811 
 812         if (lp->cfg.mode == LDC_MODE_RAW &&
 813             lp->chan_state == LDC_CHANNEL_UP) {
 814                 lp->hs_state = LDC_HS_COMPLETE;
 815                 ldc_set_state(lp, LDC_STATE_CONNECTED);
 816 
 817                 /*
 818                  * Generate an LDC_EVENT_UP event if the channel
 819                  * was not already up.
 820                  */
 821                 if (orig_state != LDC_CHANNEL_UP) {
 822                         event_mask |= LDC_EVENT_UP;
 823                         orig_state = lp->chan_state;
 824                 }
 825         }
 826 
 827         /* If we are in reset state, flush the RX queue and ignore
 828          * everything.
 829          */
 830         if (lp->flags & LDC_FLAG_RESET) {
 831                 (void) ldc_rx_reset(lp);
 832                 goto out;
 833         }
 834 
 835         /* Once we finish the handshake, we let the ldc_read()
 836          * paths do all of the control frame and state management.
 837          * Just trigger the callback.
 838          */
 839         if (lp->hs_state == LDC_HS_COMPLETE) {
 840 handshake_complete:
 841                 if (lp->chan_state != orig_state) {
 842                         unsigned int event = LDC_EVENT_RESET;
 843 
 844                         if (lp->chan_state == LDC_CHANNEL_UP)
 845                                 event = LDC_EVENT_UP;
 846 
 847                         event_mask |= event;
 848                 }
 849                 if (lp->rx_head != lp->rx_tail)
 850                         event_mask |= LDC_EVENT_DATA_READY;
 851 
 852                 goto out;
 853         }
 854 
 855         if (lp->chan_state != orig_state)
 856                 goto out;
 857 
 858         while (lp->rx_head != lp->rx_tail) {
 859                 struct ldc_packet *p;
 860                 unsigned long new;
 861                 int err;
 862 
 863                 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 864 
 865                 switch (p->type) {
 866                 case LDC_CTRL:
 867                         err = process_control_frame(lp, p);
 868                         if (err > 0)
 869                                 event_mask |= err;
 870                         break;
 871 
 872                 case LDC_DATA:
 873                         event_mask |= LDC_EVENT_DATA_READY;
 874                         err = 0;
 875                         break;
 876 
 877                 case LDC_ERR:
 878                         err = process_error_frame(lp, p);
 879                         break;
 880 
 881                 default:
 882                         err = LDC_ABORT(lp);
 883                         break;
 884                 }
 885 
 886                 if (err < 0)
 887                         break;
 888 
 889                 new = lp->rx_head;
 890                 new += LDC_PACKET_SIZE;
 891                 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 892                         new = 0;
 893                 lp->rx_head = new;
 894 
 895                 err = __set_rx_head(lp, new);
 896                 if (err < 0) {
 897                         (void) LDC_ABORT(lp);
 898                         break;
 899                 }
 900                 if (lp->hs_state == LDC_HS_COMPLETE)
 901                         goto handshake_complete;
 902         }
 903 
 904 out:
 905         spin_unlock_irqrestore(&lp->lock, flags);
 906 
 907         send_events(lp, event_mask);
 908 
 909         return IRQ_HANDLED;
 910 }
 911 
 912 static irqreturn_t ldc_tx(int irq, void *dev_id)
 913 {
 914         struct ldc_channel *lp = dev_id;
 915         unsigned long flags, orig_state;
 916         unsigned int event_mask = 0;
 917 
 918         spin_lock_irqsave(&lp->lock, flags);
 919 
 920         orig_state = lp->chan_state;
 921 
 922         /* We should probably check for hypervisor errors here and
 923          * reset the LDC channel if we get one.
 924          */
 925         sun4v_ldc_tx_get_state(lp->id,
 926                                &lp->tx_head,
 927                                &lp->tx_tail,
 928                                &lp->chan_state);
 929 
 930         ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 931                orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 932 
 933         if (lp->cfg.mode == LDC_MODE_RAW &&
 934             lp->chan_state == LDC_CHANNEL_UP) {
 935                 lp->hs_state = LDC_HS_COMPLETE;
 936                 ldc_set_state(lp, LDC_STATE_CONNECTED);
 937 
 938                 /*
 939                  * Generate an LDC_EVENT_UP event if the channel
 940                  * was not already up.
 941                  */
 942                 if (orig_state != LDC_CHANNEL_UP) {
 943                         event_mask |= LDC_EVENT_UP;
 944                         orig_state = lp->chan_state;
 945                 }
 946         }
 947 
 948         spin_unlock_irqrestore(&lp->lock, flags);
 949 
 950         send_events(lp, event_mask);
 951 
 952         return IRQ_HANDLED;
 953 }
 954 
 955 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 956  * XXX that addition and removal from the ldc_channel_list has
 957  * XXX atomicity, otherwise the __ldc_channel_exists() check is
 958  * XXX totally pointless as another thread can slip into ldc_alloc()
 959  * XXX and add a channel with the same ID.  There also needs to be
 960  * XXX a spinlock for ldc_channel_list.
 961  */
 962 static HLIST_HEAD(ldc_channel_list);
 963 
 964 static int __ldc_channel_exists(unsigned long id)
 965 {
 966         struct ldc_channel *lp;
 967 
 968         hlist_for_each_entry(lp, &ldc_channel_list, list) {
 969                 if (lp->id == id)
 970                         return 1;
 971         }
 972         return 0;
 973 }
 974 
 975 static int alloc_queue(const char *name, unsigned long num_entries,
 976                        struct ldc_packet **base, unsigned long *ra)
 977 {
 978         unsigned long size, order;
 979         void *q;
 980 
 981         size = num_entries * LDC_PACKET_SIZE;
 982         order = get_order(size);
 983 
 984         q = (void *) __get_free_pages(GFP_KERNEL, order);
 985         if (!q) {
 986                 printk(KERN_ERR PFX "Alloc of %s queue failed with "
 987                        "size=%lu order=%lu\n", name, size, order);
 988                 return -ENOMEM;
 989         }
 990 
 991         memset(q, 0, PAGE_SIZE << order);
 992 
 993         *base = q;
 994         *ra = __pa(q);
 995 
 996         return 0;
 997 }
 998 
 999 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
1000 {
1001         unsigned long size, order;
1002 
1003         if (!q)
1004                 return;
1005 
1006         size = num_entries * LDC_PACKET_SIZE;
1007         order = get_order(size);
1008 
1009         free_pages((unsigned long)q, order);
1010 }
1011 
1012 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1013 {
1014         u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1015         /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1016 
1017         cookie &= ~COOKIE_PGSZ_CODE;
1018 
1019         return (cookie >> (13ULL + (szcode * 3ULL)));
1020 }
1021 
1022 static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1023                       unsigned long entry, unsigned long npages)
1024 {
1025         struct ldc_mtable_entry *base;
1026         unsigned long i, shift;
1027 
1028         shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1029         base = iommu->page_table + entry;
1030         for (i = 0; i < npages; i++) {
1031                 if (base->cookie)
1032                         sun4v_ldc_revoke(id, cookie + (i << shift),
1033                                          base->cookie);
1034                 base->mte = 0;
1035         }
1036 }
1037 
1038 /* XXX Make this configurable... XXX */
1039 #define LDC_IOTABLE_SIZE        (8 * 1024)
1040 
1041 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1042 {
1043         unsigned long sz, num_tsb_entries, tsbsize, order;
1044         struct ldc_iommu *ldc_iommu = &lp->iommu;
1045         struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1046         struct ldc_mtable_entry *table;
1047         unsigned long hv_err;
1048         int err;
1049 
1050         num_tsb_entries = LDC_IOTABLE_SIZE;
1051         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1052         spin_lock_init(&ldc_iommu->lock);
1053 
1054         sz = num_tsb_entries / 8;
1055         sz = (sz + 7UL) & ~7UL;
1056         iommu->map = kzalloc(sz, GFP_KERNEL);
1057         if (!iommu->map) {
1058                 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1059                 return -ENOMEM;
1060         }
1061         iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1062                             NULL, false /* no large pool */,
1063                             1 /* npools */,
1064                             true /* skip span boundary check */);
1065 
1066         order = get_order(tsbsize);
1067 
1068         table = (struct ldc_mtable_entry *)
1069                 __get_free_pages(GFP_KERNEL, order);
1070         err = -ENOMEM;
1071         if (!table) {
1072                 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1073                        "size=%lu order=%lu\n", tsbsize, order);
1074                 goto out_free_map;
1075         }
1076 
1077         memset(table, 0, PAGE_SIZE << order);
1078 
1079         ldc_iommu->page_table = table;
1080 
1081         hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1082                                          num_tsb_entries);
1083         err = -EINVAL;
1084         if (hv_err)
1085                 goto out_free_table;
1086 
1087         return 0;
1088 
1089 out_free_table:
1090         free_pages((unsigned long) table, order);
1091         ldc_iommu->page_table = NULL;
1092 
1093 out_free_map:
1094         kfree(iommu->map);
1095         iommu->map = NULL;
1096 
1097         return err;
1098 }
1099 
1100 static void ldc_iommu_release(struct ldc_channel *lp)
1101 {
1102         struct ldc_iommu *ldc_iommu = &lp->iommu;
1103         struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1104         unsigned long num_tsb_entries, tsbsize, order;
1105 
1106         (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1107 
1108         num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1109         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1110         order = get_order(tsbsize);
1111 
1112         free_pages((unsigned long) ldc_iommu->page_table, order);
1113         ldc_iommu->page_table = NULL;
1114 
1115         kfree(iommu->map);
1116         iommu->map = NULL;
1117 }
1118 
1119 struct ldc_channel *ldc_alloc(unsigned long id,
1120                               const struct ldc_channel_config *cfgp,
1121                               void *event_arg,
1122                               const char *name)
1123 {
1124         struct ldc_channel *lp;
1125         const struct ldc_mode_ops *mops;
1126         unsigned long dummy1, dummy2, hv_err;
1127         u8 mss, *mssbuf;
1128         int err;
1129 
1130         err = -ENODEV;
1131         if (!ldom_domaining_enabled)
1132                 goto out_err;
1133 
1134         err = -EINVAL;
1135         if (!cfgp)
1136                 goto out_err;
1137         if (!name)
1138                 goto out_err;
1139 
1140         switch (cfgp->mode) {
1141         case LDC_MODE_RAW:
1142                 mops = &raw_ops;
1143                 mss = LDC_PACKET_SIZE;
1144                 break;
1145 
1146         case LDC_MODE_UNRELIABLE:
1147                 mops = &nonraw_ops;
1148                 mss = LDC_PACKET_SIZE - 8;
1149                 break;
1150 
1151         case LDC_MODE_STREAM:
1152                 mops = &stream_ops;
1153                 mss = LDC_PACKET_SIZE - 8 - 8;
1154                 break;
1155 
1156         default:
1157                 goto out_err;
1158         }
1159 
1160         if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1161                 goto out_err;
1162 
1163         hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1164         err = -ENODEV;
1165         if (hv_err == HV_ECHANNEL)
1166                 goto out_err;
1167 
1168         err = -EEXIST;
1169         if (__ldc_channel_exists(id))
1170                 goto out_err;
1171 
1172         mssbuf = NULL;
1173 
1174         lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1175         err = -ENOMEM;
1176         if (!lp)
1177                 goto out_err;
1178 
1179         spin_lock_init(&lp->lock);
1180 
1181         lp->id = id;
1182 
1183         err = ldc_iommu_init(name, lp);
1184         if (err)
1185                 goto out_free_ldc;
1186 
1187         lp->mops = mops;
1188         lp->mss = mss;
1189 
1190         lp->cfg = *cfgp;
1191         if (!lp->cfg.mtu)
1192                 lp->cfg.mtu = LDC_DEFAULT_MTU;
1193 
1194         if (lp->cfg.mode == LDC_MODE_STREAM) {
1195                 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1196                 if (!mssbuf) {
1197                         err = -ENOMEM;
1198                         goto out_free_iommu;
1199                 }
1200                 lp->mssbuf = mssbuf;
1201         }
1202 
1203         lp->event_arg = event_arg;
1204 
1205         /* XXX allow setting via ldc_channel_config to override defaults
1206          * XXX or use some formula based upon mtu
1207          */
1208         lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1209         lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1210 
1211         err = alloc_queue("TX", lp->tx_num_entries,
1212                           &lp->tx_base, &lp->tx_ra);
1213         if (err)
1214                 goto out_free_mssbuf;
1215 
1216         err = alloc_queue("RX", lp->rx_num_entries,
1217                           &lp->rx_base, &lp->rx_ra);
1218         if (err)
1219                 goto out_free_txq;
1220 
1221         lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1222 
1223         lp->hs_state = LDC_HS_CLOSED;
1224         ldc_set_state(lp, LDC_STATE_INIT);
1225 
1226         INIT_HLIST_NODE(&lp->list);
1227         hlist_add_head(&lp->list, &ldc_channel_list);
1228 
1229         INIT_HLIST_HEAD(&lp->mh_list);
1230 
1231         snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232         snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233 
1234         err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1235                           lp->rx_irq_name, lp);
1236         if (err)
1237                 goto out_free_txq;
1238 
1239         err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1240                           lp->tx_irq_name, lp);
1241         if (err) {
1242                 free_irq(lp->cfg.rx_irq, lp);
1243                 goto out_free_txq;
1244         }
1245 
1246         return lp;
1247 
1248 out_free_txq:
1249         free_queue(lp->tx_num_entries, lp->tx_base);
1250 
1251 out_free_mssbuf:
1252         kfree(mssbuf);
1253 
1254 out_free_iommu:
1255         ldc_iommu_release(lp);
1256 
1257 out_free_ldc:
1258         kfree(lp);
1259 
1260 out_err:
1261         return ERR_PTR(err);
1262 }
1263 EXPORT_SYMBOL(ldc_alloc);
1264 
1265 void ldc_unbind(struct ldc_channel *lp)
1266 {
1267         if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1268                 free_irq(lp->cfg.rx_irq, lp);
1269                 free_irq(lp->cfg.tx_irq, lp);
1270                 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1271         }
1272 
1273         if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1274                 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275                 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1276                 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1277         }
1278         if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1279                 free_queue(lp->tx_num_entries, lp->tx_base);
1280                 free_queue(lp->rx_num_entries, lp->rx_base);
1281                 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1282         }
1283 
1284         ldc_set_state(lp, LDC_STATE_INIT);
1285 }
1286 EXPORT_SYMBOL(ldc_unbind);
1287 
1288 void ldc_free(struct ldc_channel *lp)
1289 {
1290         ldc_unbind(lp);
1291         hlist_del(&lp->list);
1292         kfree(lp->mssbuf);
1293         ldc_iommu_release(lp);
1294 
1295         kfree(lp);
1296 }
1297 EXPORT_SYMBOL(ldc_free);
1298 
1299 /* Bind the channel.  This registers the LDC queues with
1300  * the hypervisor and puts the channel into a pseudo-listening
1301  * state.  This does not initiate a handshake, ldc_connect() does
1302  * that.
1303  */
1304 int ldc_bind(struct ldc_channel *lp)
1305 {
1306         unsigned long hv_err, flags;
1307         int err = -EINVAL;
1308 
1309         if (lp->state != LDC_STATE_INIT)
1310                 return -EINVAL;
1311 
1312         spin_lock_irqsave(&lp->lock, flags);
1313 
1314         enable_irq(lp->cfg.rx_irq);
1315         enable_irq(lp->cfg.tx_irq);
1316 
1317         lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1318 
1319         err = -ENODEV;
1320         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1321         if (hv_err)
1322                 goto out_free_irqs;
1323 
1324         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1325         if (hv_err)
1326                 goto out_free_irqs;
1327 
1328         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1329         if (hv_err)
1330                 goto out_unmap_tx;
1331 
1332         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1333         if (hv_err)
1334                 goto out_unmap_tx;
1335 
1336         lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1337 
1338         hv_err = sun4v_ldc_tx_get_state(lp->id,
1339                                         &lp->tx_head,
1340                                         &lp->tx_tail,
1341                                         &lp->chan_state);
1342         err = -EBUSY;
1343         if (hv_err)
1344                 goto out_unmap_rx;
1345 
1346         lp->tx_acked = lp->tx_head;
1347 
1348         lp->hs_state = LDC_HS_OPEN;
1349         ldc_set_state(lp, LDC_STATE_BOUND);
1350 
1351         if (lp->cfg.mode == LDC_MODE_RAW) {
1352                 /*
1353                  * There is no handshake in RAW mode, so handshake
1354                  * is completed.
1355                  */
1356                 lp->hs_state = LDC_HS_COMPLETE;
1357         }
1358 
1359         spin_unlock_irqrestore(&lp->lock, flags);
1360 
1361         return 0;
1362 
1363 out_unmap_rx:
1364         lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1365         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1366 
1367 out_unmap_tx:
1368         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1369 
1370 out_free_irqs:
1371         lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1372         free_irq(lp->cfg.tx_irq, lp);
1373         free_irq(lp->cfg.rx_irq, lp);
1374 
1375         spin_unlock_irqrestore(&lp->lock, flags);
1376 
1377         return err;
1378 }
1379 EXPORT_SYMBOL(ldc_bind);
1380 
1381 int ldc_connect(struct ldc_channel *lp)
1382 {
1383         unsigned long flags;
1384         int err;
1385 
1386         if (lp->cfg.mode == LDC_MODE_RAW)
1387                 return -EINVAL;
1388 
1389         spin_lock_irqsave(&lp->lock, flags);
1390 
1391         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1392             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1393             lp->hs_state != LDC_HS_OPEN)
1394                 err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1395         else
1396                 err = start_handshake(lp);
1397 
1398         spin_unlock_irqrestore(&lp->lock, flags);
1399 
1400         return err;
1401 }
1402 EXPORT_SYMBOL(ldc_connect);
1403 
1404 int ldc_disconnect(struct ldc_channel *lp)
1405 {
1406         unsigned long hv_err, flags;
1407         int err;
1408 
1409         if (lp->cfg.mode == LDC_MODE_RAW)
1410                 return -EINVAL;
1411 
1412         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1413             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1414                 return -EINVAL;
1415 
1416         spin_lock_irqsave(&lp->lock, flags);
1417 
1418         err = -ENODEV;
1419         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1420         if (hv_err)
1421                 goto out_err;
1422 
1423         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1424         if (hv_err)
1425                 goto out_err;
1426 
1427         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1428         if (hv_err)
1429                 goto out_err;
1430 
1431         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1432         if (hv_err)
1433                 goto out_err;
1434 
1435         ldc_set_state(lp, LDC_STATE_BOUND);
1436         lp->hs_state = LDC_HS_OPEN;
1437         lp->flags |= LDC_FLAG_RESET;
1438 
1439         spin_unlock_irqrestore(&lp->lock, flags);
1440 
1441         return 0;
1442 
1443 out_err:
1444         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1445         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1446         free_irq(lp->cfg.tx_irq, lp);
1447         free_irq(lp->cfg.rx_irq, lp);
1448         lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1449                        LDC_FLAG_REGISTERED_QUEUES);
1450         ldc_set_state(lp, LDC_STATE_INIT);
1451 
1452         spin_unlock_irqrestore(&lp->lock, flags);
1453 
1454         return err;
1455 }
1456 EXPORT_SYMBOL(ldc_disconnect);
1457 
1458 int ldc_state(struct ldc_channel *lp)
1459 {
1460         return lp->state;
1461 }
1462 EXPORT_SYMBOL(ldc_state);
1463 
1464 void ldc_set_state(struct ldc_channel *lp, u8 state)
1465 {
1466         ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1467                state_to_str(lp->state),
1468                state_to_str(state));
1469 
1470         lp->state = state;
1471 }
1472 EXPORT_SYMBOL(ldc_set_state);
1473 
1474 int ldc_mode(struct ldc_channel *lp)
1475 {
1476         return lp->cfg.mode;
1477 }
1478 EXPORT_SYMBOL(ldc_mode);
1479 
1480 int ldc_rx_reset(struct ldc_channel *lp)
1481 {
1482         return __set_rx_head(lp, lp->rx_tail);
1483 }
1484 EXPORT_SYMBOL(ldc_rx_reset);
1485 
1486 void __ldc_print(struct ldc_channel *lp, const char *caller)
1487 {
1488         pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1489                 "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1490                 "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1491                 "\trcv_nxt=%u snd_nxt=%u\n",
1492                 caller, lp->id, lp->flags, state_to_str(lp->state),
1493                 lp->chan_state, lp->hs_state,
1494                 lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1495                 lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1496                 lp->rcv_nxt, lp->snd_nxt);
1497 }
1498 EXPORT_SYMBOL(__ldc_print);
1499 
1500 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1501 {
1502         struct ldc_packet *p;
1503         unsigned long new_tail, hv_err;
1504         int err;
1505 
1506         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1507                                         &lp->chan_state);
1508         if (unlikely(hv_err))
1509                 return -EBUSY;
1510 
1511         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1512                 return LDC_ABORT(lp);
1513 
1514         if (size > LDC_PACKET_SIZE)
1515                 return -EMSGSIZE;
1516 
1517         p = data_get_tx_packet(lp, &new_tail);
1518         if (!p)
1519                 return -EAGAIN;
1520 
1521         memcpy(p, buf, size);
1522 
1523         err = send_tx_packet(lp, p, new_tail);
1524         if (!err)
1525                 err = size;
1526 
1527         return err;
1528 }
1529 
1530 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1531 {
1532         struct ldc_packet *p;
1533         unsigned long hv_err, new;
1534         int err;
1535 
1536         if (size < LDC_PACKET_SIZE)
1537                 return -EINVAL;
1538 
1539         hv_err = sun4v_ldc_rx_get_state(lp->id,
1540                                         &lp->rx_head,
1541                                         &lp->rx_tail,
1542                                         &lp->chan_state);
1543         if (hv_err)
1544                 return LDC_ABORT(lp);
1545 
1546         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1547             lp->chan_state == LDC_CHANNEL_RESETTING)
1548                 return -ECONNRESET;
1549 
1550         if (lp->rx_head == lp->rx_tail)
1551                 return 0;
1552 
1553         p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1554         memcpy(buf, p, LDC_PACKET_SIZE);
1555 
1556         new = rx_advance(lp, lp->rx_head);
1557         lp->rx_head = new;
1558 
1559         err = __set_rx_head(lp, new);
1560         if (err < 0)
1561                 err = -ECONNRESET;
1562         else
1563                 err = LDC_PACKET_SIZE;
1564 
1565         return err;
1566 }
1567 
1568 static const struct ldc_mode_ops raw_ops = {
1569         .write          =       write_raw,
1570         .read           =       read_raw,
1571 };
1572 
1573 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1574                         unsigned int size)
1575 {
1576         unsigned long hv_err, tail;
1577         unsigned int copied;
1578         u32 seq;
1579         int err;
1580 
1581         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1582                                         &lp->chan_state);
1583         if (unlikely(hv_err))
1584                 return -EBUSY;
1585 
1586         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1587                 return LDC_ABORT(lp);
1588 
1589         if (!tx_has_space_for(lp, size))
1590                 return -EAGAIN;
1591 
1592         seq = lp->snd_nxt;
1593         copied = 0;
1594         tail = lp->tx_tail;
1595         while (copied < size) {
1596                 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1597                 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1598                             p->u.u_data :
1599                             p->u.r.r_data);
1600                 int data_len;
1601 
1602                 p->type = LDC_DATA;
1603                 p->stype = LDC_INFO;
1604                 p->ctrl = 0;
1605 
1606                 data_len = size - copied;
1607                 if (data_len > lp->mss)
1608                         data_len = lp->mss;
1609 
1610                 BUG_ON(data_len > LDC_LEN);
1611 
1612                 p->env = (data_len |
1613                           (copied == 0 ? LDC_START : 0) |
1614                           (data_len == size - copied ? LDC_STOP : 0));
1615 
1616                 p->seqid = ++seq;
1617 
1618                 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1619                        p->type,
1620                        p->stype,
1621                        p->ctrl,
1622                        p->env,
1623                        p->seqid);
1624 
1625                 memcpy(data, buf, data_len);
1626                 buf += data_len;
1627                 copied += data_len;
1628 
1629                 tail = tx_advance(lp, tail);
1630         }
1631 
1632         err = set_tx_tail(lp, tail);
1633         if (!err) {
1634                 lp->snd_nxt = seq;
1635                 err = size;
1636         }
1637 
1638         return err;
1639 }
1640 
1641 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1642                       struct ldc_packet *first_frag)
1643 {
1644         int err;
1645 
1646         if (first_frag)
1647                 lp->rcv_nxt = first_frag->seqid - 1;
1648 
1649         err = send_data_nack(lp, p);
1650         if (err)
1651                 return err;
1652 
1653         err = ldc_rx_reset(lp);
1654         if (err < 0)
1655                 return LDC_ABORT(lp);
1656 
1657         return 0;
1658 }
1659 
1660 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1661 {
1662         if (p->stype & LDC_ACK) {
1663                 int err = process_data_ack(lp, p);
1664                 if (err)
1665                         return err;
1666         }
1667         if (p->stype & LDC_NACK)
1668                 return LDC_ABORT(lp);
1669 
1670         return 0;
1671 }
1672 
1673 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1674 {
1675         unsigned long dummy;
1676         int limit = 1000;
1677 
1678         ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1679                cur_head, lp->rx_head, lp->rx_tail);
1680         while (limit-- > 0) {
1681                 unsigned long hv_err;
1682 
1683                 hv_err = sun4v_ldc_rx_get_state(lp->id,
1684                                                 &dummy,
1685                                                 &lp->rx_tail,
1686                                                 &lp->chan_state);
1687                 if (hv_err)
1688                         return LDC_ABORT(lp);
1689 
1690                 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1691                     lp->chan_state == LDC_CHANNEL_RESETTING)
1692                         return -ECONNRESET;
1693 
1694                 if (cur_head != lp->rx_tail) {
1695                         ldcdbg(DATA, "DATA WAIT DONE "
1696                                "head[%lx] tail[%lx] chan_state[%lx]\n",
1697                                dummy, lp->rx_tail, lp->chan_state);
1698                         return 0;
1699                 }
1700 
1701                 udelay(1);
1702         }
1703         return -EAGAIN;
1704 }
1705 
1706 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1707 {
1708         int err = __set_rx_head(lp, head);
1709 
1710         if (err < 0)
1711                 return LDC_ABORT(lp);
1712 
1713         lp->rx_head = head;
1714         return 0;
1715 }
1716 
1717 static void send_data_ack(struct ldc_channel *lp)
1718 {
1719         unsigned long new_tail;
1720         struct ldc_packet *p;
1721 
1722         p = data_get_tx_packet(lp, &new_tail);
1723         if (likely(p)) {
1724                 int err;
1725 
1726                 memset(p, 0, sizeof(*p));
1727                 p->type = LDC_DATA;
1728                 p->stype = LDC_ACK;
1729                 p->ctrl = 0;
1730                 p->seqid = lp->snd_nxt + 1;
1731                 p->u.r.ackid = lp->rcv_nxt;
1732 
1733                 err = send_tx_packet(lp, p, new_tail);
1734                 if (!err)
1735                         lp->snd_nxt++;
1736         }
1737 }
1738 
1739 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1740 {
1741         struct ldc_packet *first_frag;
1742         unsigned long hv_err, new;
1743         int err, copied;
1744 
1745         hv_err = sun4v_ldc_rx_get_state(lp->id,
1746                                         &lp->rx_head,
1747                                         &lp->rx_tail,
1748                                         &lp->chan_state);
1749         if (hv_err)
1750                 return LDC_ABORT(lp);
1751 
1752         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1753             lp->chan_state == LDC_CHANNEL_RESETTING)
1754                 return -ECONNRESET;
1755 
1756         if (lp->rx_head == lp->rx_tail)
1757                 return 0;
1758 
1759         first_frag = NULL;
1760         copied = err = 0;
1761         new = lp->rx_head;
1762         while (1) {
1763                 struct ldc_packet *p;
1764                 int pkt_len;
1765 
1766                 BUG_ON(new == lp->rx_tail);
1767                 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1768 
1769                 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1770                        "rcv_nxt[%08x]\n",
1771                        p->type,
1772                        p->stype,
1773                        p->ctrl,
1774                        p->env,
1775                        p->seqid,
1776                        p->u.r.ackid,
1777                        lp->rcv_nxt);
1778 
1779                 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1780                         err = rx_bad_seq(lp, p, first_frag);
1781                         copied = 0;
1782                         break;
1783                 }
1784 
1785                 if (p->type & LDC_CTRL) {
1786                         err = process_control_frame(lp, p);
1787                         if (err < 0)
1788                                 break;
1789                         err = 0;
1790                 }
1791 
1792                 lp->rcv_nxt = p->seqid;
1793 
1794                 /*
1795                  * If this is a control-only packet, there is nothing
1796                  * else to do but advance the rx queue since the packet
1797                  * was already processed above.
1798                  */
1799                 if (!(p->type & LDC_DATA)) {
1800                         new = rx_advance(lp, new);
1801                         break;
1802                 }
1803                 if (p->stype & (LDC_ACK | LDC_NACK)) {
1804                         err = data_ack_nack(lp, p);
1805                         if (err)
1806                                 break;
1807                 }
1808                 if (!(p->stype & LDC_INFO)) {
1809                         new = rx_advance(lp, new);
1810                         err = rx_set_head(lp, new);
1811                         if (err)
1812                                 break;
1813                         goto no_data;
1814                 }
1815 
1816                 pkt_len = p->env & LDC_LEN;
1817 
1818                 /* Every initial packet starts with the START bit set.
1819                  *
1820                  * Singleton packets will have both START+STOP set.
1821                  *
1822                  * Fragments will have START set in the first frame, STOP
1823                  * set in the last frame, and neither bit set in middle
1824                  * frames of the packet.
1825                  *
1826                  * Therefore if we are at the beginning of a packet and
1827                  * we don't see START, or we are in the middle of a fragmented
1828                  * packet and do see START, we are unsynchronized and should
1829                  * flush the RX queue.
1830                  */
1831                 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1832                     (first_frag != NULL &&  (p->env & LDC_START))) {
1833                         if (!first_frag)
1834                                 new = rx_advance(lp, new);
1835 
1836                         err = rx_set_head(lp, new);
1837                         if (err)
1838                                 break;
1839 
1840                         if (!first_frag)
1841                                 goto no_data;
1842                 }
1843                 if (!first_frag)
1844                         first_frag = p;
1845 
1846                 if (pkt_len > size - copied) {
1847                         /* User didn't give us a big enough buffer,
1848                          * what to do?  This is a pretty serious error.
1849                          *
1850                          * Since we haven't updated the RX ring head to
1851                          * consume any of the packets, signal the error
1852                          * to the user and just leave the RX ring alone.
1853                          *
1854                          * This seems the best behavior because this allows
1855                          * a user of the LDC layer to start with a small
1856                          * RX buffer for ldc_read() calls and use -EMSGSIZE
1857                          * as a cue to enlarge it's read buffer.
1858                          */
1859                         err = -EMSGSIZE;
1860                         break;
1861                 }
1862 
1863                 /* Ok, we are gonna eat this one.  */
1864                 new = rx_advance(lp, new);
1865 
1866                 memcpy(buf,
1867                        (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1868                         p->u.u_data : p->u.r.r_data), pkt_len);
1869                 buf += pkt_len;
1870                 copied += pkt_len;
1871 
1872                 if (p->env & LDC_STOP)
1873                         break;
1874 
1875 no_data:
1876                 if (new == lp->rx_tail) {
1877                         err = rx_data_wait(lp, new);
1878                         if (err)
1879                                 break;
1880                 }
1881         }
1882 
1883         if (!err)
1884                 err = rx_set_head(lp, new);
1885 
1886         if (err && first_frag)
1887                 lp->rcv_nxt = first_frag->seqid - 1;
1888 
1889         if (!err) {
1890                 err = copied;
1891                 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1892                         send_data_ack(lp);
1893         }
1894 
1895         return err;
1896 }
1897 
1898 static const struct ldc_mode_ops nonraw_ops = {
1899         .write          =       write_nonraw,
1900         .read           =       read_nonraw,
1901 };
1902 
1903 static int write_stream(struct ldc_channel *lp, const void *buf,
1904                         unsigned int size)
1905 {
1906         if (size > lp->cfg.mtu)
1907                 size = lp->cfg.mtu;
1908         return write_nonraw(lp, buf, size);
1909 }
1910 
1911 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1912 {
1913         if (!lp->mssbuf_len) {
1914                 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1915                 if (err < 0)
1916                         return err;
1917 
1918                 lp->mssbuf_len = err;
1919                 lp->mssbuf_off = 0;
1920         }
1921 
1922         if (size > lp->mssbuf_len)
1923                 size = lp->mssbuf_len;
1924         memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1925 
1926         lp->mssbuf_off += size;
1927         lp->mssbuf_len -= size;
1928 
1929         return size;
1930 }
1931 
1932 static const struct ldc_mode_ops stream_ops = {
1933         .write          =       write_stream,
1934         .read           =       read_stream,
1935 };
1936 
1937 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1938 {
1939         unsigned long flags;
1940         int err;
1941 
1942         if (!buf)
1943                 return -EINVAL;
1944 
1945         if (!size)
1946                 return 0;
1947 
1948         spin_lock_irqsave(&lp->lock, flags);
1949 
1950         if (lp->hs_state != LDC_HS_COMPLETE)
1951                 err = -ENOTCONN;
1952         else
1953                 err = lp->mops->write(lp, buf, size);
1954 
1955         spin_unlock_irqrestore(&lp->lock, flags);
1956 
1957         return err;
1958 }
1959 EXPORT_SYMBOL(ldc_write);
1960 
1961 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1962 {
1963         unsigned long flags;
1964         int err;
1965 
1966         ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
1967 
1968         if (!buf)
1969                 return -EINVAL;
1970 
1971         if (!size)
1972                 return 0;
1973 
1974         spin_lock_irqsave(&lp->lock, flags);
1975 
1976         if (lp->hs_state != LDC_HS_COMPLETE)
1977                 err = -ENOTCONN;
1978         else
1979                 err = lp->mops->read(lp, buf, size);
1980 
1981         spin_unlock_irqrestore(&lp->lock, flags);
1982 
1983         ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
1984                lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
1985 
1986         return err;
1987 }
1988 EXPORT_SYMBOL(ldc_read);
1989 
1990 static u64 pagesize_code(void)
1991 {
1992         switch (PAGE_SIZE) {
1993         default:
1994         case (8ULL * 1024ULL):
1995                 return 0;
1996         case (64ULL * 1024ULL):
1997                 return 1;
1998         case (512ULL * 1024ULL):
1999                 return 2;
2000         case (4ULL * 1024ULL * 1024ULL):
2001                 return 3;
2002         case (32ULL * 1024ULL * 1024ULL):
2003                 return 4;
2004         case (256ULL * 1024ULL * 1024ULL):
2005                 return 5;
2006         }
2007 }
2008 
2009 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
2010 {
2011         return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
2012                 (index << PAGE_SHIFT) |
2013                 page_offset);
2014 }
2015 
2016 
2017 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
2018                                              unsigned long npages)
2019 {
2020         long entry;
2021 
2022         entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
2023                                       npages, NULL, (unsigned long)-1, 0);
2024         if (unlikely(entry == IOMMU_ERROR_CODE))
2025                 return NULL;
2026 
2027         return iommu->page_table + entry;
2028 }
2029 
2030 static u64 perm_to_mte(unsigned int map_perm)
2031 {
2032         u64 mte_base;
2033 
2034         mte_base = pagesize_code();
2035 
2036         if (map_perm & LDC_MAP_SHADOW) {
2037                 if (map_perm & LDC_MAP_R)
2038                         mte_base |= LDC_MTE_COPY_R;
2039                 if (map_perm & LDC_MAP_W)
2040                         mte_base |= LDC_MTE_COPY_W;
2041         }
2042         if (map_perm & LDC_MAP_DIRECT) {
2043                 if (map_perm & LDC_MAP_R)
2044                         mte_base |= LDC_MTE_READ;
2045                 if (map_perm & LDC_MAP_W)
2046                         mte_base |= LDC_MTE_WRITE;
2047                 if (map_perm & LDC_MAP_X)
2048                         mte_base |= LDC_MTE_EXEC;
2049         }
2050         if (map_perm & LDC_MAP_IO) {
2051                 if (map_perm & LDC_MAP_R)
2052                         mte_base |= LDC_MTE_IOMMU_R;
2053                 if (map_perm & LDC_MAP_W)
2054                         mte_base |= LDC_MTE_IOMMU_W;
2055         }
2056 
2057         return mte_base;
2058 }
2059 
2060 static int pages_in_region(unsigned long base, long len)
2061 {
2062         int count = 0;
2063 
2064         do {
2065                 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2066 
2067                 len -= (new - base);
2068                 base = new;
2069                 count++;
2070         } while (len > 0);
2071 
2072         return count;
2073 }
2074 
2075 struct cookie_state {
2076         struct ldc_mtable_entry         *page_table;
2077         struct ldc_trans_cookie         *cookies;
2078         u64                             mte_base;
2079         u64                             prev_cookie;
2080         u32                             pte_idx;
2081         u32                             nc;
2082 };
2083 
2084 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2085                          unsigned long off, unsigned long len)
2086 {
2087         do {
2088                 unsigned long tlen, new = pa + PAGE_SIZE;
2089                 u64 this_cookie;
2090 
2091                 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2092 
2093                 tlen = PAGE_SIZE;
2094                 if (off)
2095                         tlen = PAGE_SIZE - off;
2096                 if (tlen > len)
2097                         tlen = len;
2098 
2099                 this_cookie = make_cookie(sp->pte_idx,
2100                                           pagesize_code(), off);
2101 
2102                 off = 0;
2103 
2104                 if (this_cookie == sp->prev_cookie) {
2105                         sp->cookies[sp->nc - 1].cookie_size += tlen;
2106                 } else {
2107                         sp->cookies[sp->nc].cookie_addr = this_cookie;
2108                         sp->cookies[sp->nc].cookie_size = tlen;
2109                         sp->nc++;
2110                 }
2111                 sp->prev_cookie = this_cookie + tlen;
2112 
2113                 sp->pte_idx++;
2114 
2115                 len -= tlen;
2116                 pa = new;
2117         } while (len > 0);
2118 }
2119 
2120 static int sg_count_one(struct scatterlist *sg)
2121 {
2122         unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2123         long len = sg->length;
2124 
2125         if ((sg->offset | len) & (8UL - 1))
2126                 return -EFAULT;
2127 
2128         return pages_in_region(base + sg->offset, len);
2129 }
2130 
2131 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2132 {
2133         int count;
2134         int i;
2135 
2136         count = 0;
2137         for (i = 0; i < num_sg; i++) {
2138                 int err = sg_count_one(sg + i);
2139                 if (err < 0)
2140                         return err;
2141                 count += err;
2142         }
2143 
2144         return count;
2145 }
2146 
2147 int ldc_map_sg(struct ldc_channel *lp,
2148                struct scatterlist *sg, int num_sg,
2149                struct ldc_trans_cookie *cookies, int ncookies,
2150                unsigned int map_perm)
2151 {
2152         unsigned long i, npages;
2153         struct ldc_mtable_entry *base;
2154         struct cookie_state state;
2155         struct ldc_iommu *iommu;
2156         int err;
2157         struct scatterlist *s;
2158 
2159         if (map_perm & ~LDC_MAP_ALL)
2160                 return -EINVAL;
2161 
2162         err = sg_count_pages(sg, num_sg);
2163         if (err < 0)
2164                 return err;
2165 
2166         npages = err;
2167         if (err > ncookies)
2168                 return -EMSGSIZE;
2169 
2170         iommu = &lp->iommu;
2171 
2172         base = alloc_npages(iommu, npages);
2173 
2174         if (!base)
2175                 return -ENOMEM;
2176 
2177         state.page_table = iommu->page_table;
2178         state.cookies = cookies;
2179         state.mte_base = perm_to_mte(map_perm);
2180         state.prev_cookie = ~(u64)0;
2181         state.pte_idx = (base - iommu->page_table);
2182         state.nc = 0;
2183 
2184         for_each_sg(sg, s, num_sg, i) {
2185                 fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2186                              s->offset, s->length);
2187         }
2188 
2189         return state.nc;
2190 }
2191 EXPORT_SYMBOL(ldc_map_sg);
2192 
2193 int ldc_map_single(struct ldc_channel *lp,
2194                    void *buf, unsigned int len,
2195                    struct ldc_trans_cookie *cookies, int ncookies,
2196                    unsigned int map_perm)
2197 {
2198         unsigned long npages, pa;
2199         struct ldc_mtable_entry *base;
2200         struct cookie_state state;
2201         struct ldc_iommu *iommu;
2202 
2203         if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2204                 return -EINVAL;
2205 
2206         pa = __pa(buf);
2207         if ((pa | len) & (8UL - 1))
2208                 return -EFAULT;
2209 
2210         npages = pages_in_region(pa, len);
2211 
2212         iommu = &lp->iommu;
2213 
2214         base = alloc_npages(iommu, npages);
2215 
2216         if (!base)
2217                 return -ENOMEM;
2218 
2219         state.page_table = iommu->page_table;
2220         state.cookies = cookies;
2221         state.mte_base = perm_to_mte(map_perm);
2222         state.prev_cookie = ~(u64)0;
2223         state.pte_idx = (base - iommu->page_table);
2224         state.nc = 0;
2225         fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2226         BUG_ON(state.nc > ncookies);
2227 
2228         return state.nc;
2229 }
2230 EXPORT_SYMBOL(ldc_map_single);
2231 
2232 
2233 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2234                         u64 cookie, u64 size)
2235 {
2236         unsigned long npages, entry;
2237 
2238         npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2239 
2240         entry = ldc_cookie_to_index(cookie, iommu);
2241         ldc_demap(iommu, id, cookie, entry, npages);
2242         iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2243 }
2244 
2245 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2246                int ncookies)
2247 {
2248         struct ldc_iommu *iommu = &lp->iommu;
2249         int i;
2250         unsigned long flags;
2251 
2252         spin_lock_irqsave(&iommu->lock, flags);
2253         for (i = 0; i < ncookies; i++) {
2254                 u64 addr = cookies[i].cookie_addr;
2255                 u64 size = cookies[i].cookie_size;
2256 
2257                 free_npages(lp->id, iommu, addr, size);
2258         }
2259         spin_unlock_irqrestore(&iommu->lock, flags);
2260 }
2261 EXPORT_SYMBOL(ldc_unmap);
2262 
2263 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2264              void *buf, unsigned int len, unsigned long offset,
2265              struct ldc_trans_cookie *cookies, int ncookies)
2266 {
2267         unsigned int orig_len;
2268         unsigned long ra;
2269         int i;
2270 
2271         if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2272                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2273                        lp->id, copy_dir);
2274                 return -EINVAL;
2275         }
2276 
2277         ra = __pa(buf);
2278         if ((ra | len | offset) & (8UL - 1)) {
2279                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2280                        "ra[%lx] len[%x] offset[%lx]\n",
2281                        lp->id, ra, len, offset);
2282                 return -EFAULT;
2283         }
2284 
2285         if (lp->hs_state != LDC_HS_COMPLETE ||
2286             (lp->flags & LDC_FLAG_RESET)) {
2287                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2288                        "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2289                 return -ECONNRESET;
2290         }
2291 
2292         orig_len = len;
2293         for (i = 0; i < ncookies; i++) {
2294                 unsigned long cookie_raddr = cookies[i].cookie_addr;
2295                 unsigned long this_len = cookies[i].cookie_size;
2296                 unsigned long actual_len;
2297 
2298                 if (unlikely(offset)) {
2299                         unsigned long this_off = offset;
2300 
2301                         if (this_off > this_len)
2302                                 this_off = this_len;
2303 
2304                         offset -= this_off;
2305                         this_len -= this_off;
2306                         if (!this_len)
2307                                 continue;
2308                         cookie_raddr += this_off;
2309                 }
2310 
2311                 if (this_len > len)
2312                         this_len = len;
2313 
2314                 while (1) {
2315                         unsigned long hv_err;
2316 
2317                         hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2318                                                 cookie_raddr, ra,
2319                                                 this_len, &actual_len);
2320                         if (unlikely(hv_err)) {
2321                                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2322                                        "HV error %lu\n",
2323                                        lp->id, hv_err);
2324                                 if (lp->hs_state != LDC_HS_COMPLETE ||
2325                                     (lp->flags & LDC_FLAG_RESET))
2326                                         return -ECONNRESET;
2327                                 else
2328                                         return -EFAULT;
2329                         }
2330 
2331                         cookie_raddr += actual_len;
2332                         ra += actual_len;
2333                         len -= actual_len;
2334                         if (actual_len == this_len)
2335                                 break;
2336 
2337                         this_len -= actual_len;
2338                 }
2339 
2340                 if (!len)
2341                         break;
2342         }
2343 
2344         /* It is caller policy what to do about short copies.
2345          * For example, a networking driver can declare the
2346          * packet a runt and drop it.
2347          */
2348 
2349         return orig_len - len;
2350 }
2351 EXPORT_SYMBOL(ldc_copy);
2352 
2353 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2354                           struct ldc_trans_cookie *cookies, int *ncookies,
2355                           unsigned int map_perm)
2356 {
2357         void *buf;
2358         int err;
2359 
2360         if (len & (8UL - 1))
2361                 return ERR_PTR(-EINVAL);
2362 
2363         buf = kzalloc(len, GFP_ATOMIC);
2364         if (!buf)
2365                 return ERR_PTR(-ENOMEM);
2366 
2367         err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2368         if (err < 0) {
2369                 kfree(buf);
2370                 return ERR_PTR(err);
2371         }
2372         *ncookies = err;
2373 
2374         return buf;
2375 }
2376 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2377 
2378 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2379                         struct ldc_trans_cookie *cookies, int ncookies)
2380 {
2381         ldc_unmap(lp, cookies, ncookies);
2382         kfree(buf);
2383 }
2384 EXPORT_SYMBOL(ldc_free_exp_dring);
2385 
2386 static int __init ldc_init(void)
2387 {
2388         unsigned long major, minor;
2389         struct mdesc_handle *hp;
2390         const u64 *v;
2391         int err;
2392         u64 mp;
2393 
2394         hp = mdesc_grab();
2395         if (!hp)
2396                 return -ENODEV;
2397 
2398         mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2399         err = -ENODEV;
2400         if (mp == MDESC_NODE_NULL)
2401                 goto out;
2402 
2403         v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2404         if (!v)
2405                 goto out;
2406 
2407         major = 1;
2408         minor = 0;
2409         if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2410                 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2411                 goto out;
2412         }
2413 
2414         printk(KERN_INFO "%s", version);
2415 
2416         if (!*v) {
2417                 printk(KERN_INFO PFX "Domaining disabled.\n");
2418                 goto out;
2419         }
2420         ldom_domaining_enabled = 1;
2421         err = 0;
2422 
2423 out:
2424         mdesc_release(hp);
2425         return err;
2426 }
2427 
2428 core_initcall(ldc_init);

/* [<][>][^][v][top][bottom][index][help] */