root/drivers/infiniband/sw/siw/siw_qp_rx.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. siw_rx_umem
  2. siw_rx_kva
  3. siw_rx_pbl
  4. siw_rresp_check_ntoh
  5. siw_write_check_ntoh
  6. siw_send_check_ntoh
  7. siw_rqe_get
  8. siw_proc_send
  9. siw_proc_write
  10. siw_proc_rreq
  11. siw_init_rresp
  12. siw_orqe_start_rx
  13. siw_proc_rresp
  14. siw_proc_terminate
  15. siw_get_trailer
  16. siw_check_tx_fence
  17. siw_rdmap_complete
  18. siw_tcp_rx_data

   1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2 
   3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   4 /* Copyright (c) 2008-2019, IBM Corporation */
   5 
   6 #include <linux/errno.h>
   7 #include <linux/types.h>
   8 #include <linux/net.h>
   9 #include <linux/scatterlist.h>
  10 #include <linux/highmem.h>
  11 
  12 #include <rdma/iw_cm.h>
  13 #include <rdma/ib_verbs.h>
  14 
  15 #include "siw.h"
  16 #include "siw_verbs.h"
  17 #include "siw_mem.h"
  18 
  19 /*
  20  * siw_rx_umem()
  21  *
  22  * Receive data of @len into target referenced by @dest_addr.
  23  *
  24  * @srx:        Receive Context
  25  * @umem:       siw representation of target memory
  26  * @dest_addr:  user virtual address
  27  * @len:        number of bytes to place
  28  */
  29 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
  30                        u64 dest_addr, int len)
  31 {
  32         int copied = 0;
  33 
  34         while (len) {
  35                 struct page *p;
  36                 int pg_off, bytes, rv;
  37                 void *dest;
  38 
  39                 p = siw_get_upage(umem, dest_addr);
  40                 if (unlikely(!p)) {
  41                         pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
  42                                 __func__, qp_id(rx_qp(srx)),
  43                                 (void *)(uintptr_t)dest_addr,
  44                                 (void *)(uintptr_t)umem->fp_addr);
  45                         /* siw internal error */
  46                         srx->skb_copied += copied;
  47                         srx->skb_new -= copied;
  48 
  49                         return -EFAULT;
  50                 }
  51                 pg_off = dest_addr & ~PAGE_MASK;
  52                 bytes = min(len, (int)PAGE_SIZE - pg_off);
  53 
  54                 siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
  55 
  56                 dest = kmap_atomic(p);
  57                 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
  58                                    bytes);
  59 
  60                 if (unlikely(rv)) {
  61                         kunmap_atomic(dest);
  62                         srx->skb_copied += copied;
  63                         srx->skb_new -= copied;
  64 
  65                         pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
  66                                 qp_id(rx_qp(srx)), __func__, len, p, rv);
  67 
  68                         return -EFAULT;
  69                 }
  70                 if (srx->mpa_crc_hd) {
  71                         if (rx_qp(srx)->kernel_verbs) {
  72                                 crypto_shash_update(srx->mpa_crc_hd,
  73                                         (u8 *)(dest + pg_off), bytes);
  74                                 kunmap_atomic(dest);
  75                         } else {
  76                                 kunmap_atomic(dest);
  77                                 /*
  78                                  * Do CRC on original, not target buffer.
  79                                  * Some user land applications may
  80                                  * concurrently write the target buffer,
  81                                  * which would yield a broken CRC.
  82                                  * Walking the skb twice is very ineffcient.
  83                                  * Folding the CRC into skb_copy_bits()
  84                                  * would be much better, but is currently
  85                                  * not supported.
  86                                  */
  87                                 siw_crc_skb(srx, bytes);
  88                         }
  89                 } else {
  90                         kunmap_atomic(dest);
  91                 }
  92                 srx->skb_offset += bytes;
  93                 copied += bytes;
  94                 len -= bytes;
  95                 dest_addr += bytes;
  96                 pg_off = 0;
  97         }
  98         srx->skb_copied += copied;
  99         srx->skb_new -= copied;
 100 
 101         return copied;
 102 }
 103 
 104 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
 105 {
 106         int rv;
 107 
 108         siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
 109 
 110         rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
 111         if (unlikely(rv)) {
 112                 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
 113                         qp_id(rx_qp(srx)), __func__, len, kva, rv);
 114 
 115                 return rv;
 116         }
 117         if (srx->mpa_crc_hd)
 118                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
 119 
 120         srx->skb_offset += len;
 121         srx->skb_copied += len;
 122         srx->skb_new -= len;
 123 
 124         return len;
 125 }
 126 
 127 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
 128                       struct siw_mem *mem, u64 addr, int len)
 129 {
 130         struct siw_pbl *pbl = mem->pbl;
 131         u64 offset = addr - mem->va;
 132         int copied = 0;
 133 
 134         while (len) {
 135                 int bytes;
 136                 dma_addr_t buf_addr =
 137                         siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
 138                 if (!buf_addr)
 139                         break;
 140 
 141                 bytes = min(bytes, len);
 142                 if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
 143                         copied += bytes;
 144                         offset += bytes;
 145                         len -= bytes;
 146                 } else {
 147                         break;
 148                 }
 149         }
 150         return copied;
 151 }
 152 
 153 /*
 154  * siw_rresp_check_ntoh()
 155  *
 156  * Check incoming RRESP fragment header against expected
 157  * header values and update expected values for potential next
 158  * fragment.
 159  *
 160  * NOTE: This function must be called only if a RRESP DDP segment
 161  *       starts but not for fragmented consecutive pieces of an
 162  *       already started DDP segment.
 163  */
 164 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
 165                                 struct siw_rx_fpdu *frx)
 166 {
 167         struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
 168         struct siw_wqe *wqe = &frx->wqe_active;
 169         enum ddp_ecode ecode;
 170 
 171         u32 sink_stag = be32_to_cpu(rresp->sink_stag);
 172         u64 sink_to = be64_to_cpu(rresp->sink_to);
 173 
 174         if (frx->first_ddp_seg) {
 175                 srx->ddp_stag = wqe->sqe.sge[0].lkey;
 176                 srx->ddp_to = wqe->sqe.sge[0].laddr;
 177                 frx->pbl_idx = 0;
 178         }
 179         /* Below checks extend beyond the semantics of DDP, and
 180          * into RDMAP:
 181          * We check if the read response matches exactly the
 182          * read request which was send to the remote peer to
 183          * trigger this read response. RFC5040/5041 do not
 184          * always have a proper error code for the detected
 185          * error cases. We choose 'base or bounds error' for
 186          * cases where the inbound STag is valid, but offset
 187          * or length do not match our response receive state.
 188          */
 189         if (unlikely(srx->ddp_stag != sink_stag)) {
 190                 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
 191                         qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
 192                 ecode = DDP_ECODE_T_INVALID_STAG;
 193                 goto error;
 194         }
 195         if (unlikely(srx->ddp_to != sink_to)) {
 196                 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
 197                         qp_id(rx_qp(srx)), (unsigned long long)sink_to,
 198                         (unsigned long long)srx->ddp_to);
 199                 ecode = DDP_ECODE_T_BASE_BOUNDS;
 200                 goto error;
 201         }
 202         if (unlikely(!frx->more_ddp_segs &&
 203                      (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
 204                 pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
 205                         qp_id(rx_qp(srx)),
 206                         wqe->processed + srx->fpdu_part_rem, wqe->bytes);
 207                 ecode = DDP_ECODE_T_BASE_BOUNDS;
 208                 goto error;
 209         }
 210         return 0;
 211 error:
 212         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 213                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
 214         return -EINVAL;
 215 }
 216 
 217 /*
 218  * siw_write_check_ntoh()
 219  *
 220  * Check incoming WRITE fragment header against expected
 221  * header values and update expected values for potential next
 222  * fragment
 223  *
 224  * NOTE: This function must be called only if a WRITE DDP segment
 225  *       starts but not for fragmented consecutive pieces of an
 226  *       already started DDP segment.
 227  */
 228 static int siw_write_check_ntoh(struct siw_rx_stream *srx,
 229                                 struct siw_rx_fpdu *frx)
 230 {
 231         struct iwarp_rdma_write *write = &srx->hdr.rwrite;
 232         enum ddp_ecode ecode;
 233 
 234         u32 sink_stag = be32_to_cpu(write->sink_stag);
 235         u64 sink_to = be64_to_cpu(write->sink_to);
 236 
 237         if (frx->first_ddp_seg) {
 238                 srx->ddp_stag = sink_stag;
 239                 srx->ddp_to = sink_to;
 240                 frx->pbl_idx = 0;
 241         } else {
 242                 if (unlikely(srx->ddp_stag != sink_stag)) {
 243                         pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
 244                                 qp_id(rx_qp(srx)), sink_stag,
 245                                 srx->ddp_stag);
 246                         ecode = DDP_ECODE_T_INVALID_STAG;
 247                         goto error;
 248                 }
 249                 if (unlikely(srx->ddp_to != sink_to)) {
 250                         pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
 251                                 qp_id(rx_qp(srx)),
 252                                 (unsigned long long)sink_to,
 253                                 (unsigned long long)srx->ddp_to);
 254                         ecode = DDP_ECODE_T_BASE_BOUNDS;
 255                         goto error;
 256                 }
 257         }
 258         return 0;
 259 error:
 260         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 261                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
 262         return -EINVAL;
 263 }
 264 
 265 /*
 266  * siw_send_check_ntoh()
 267  *
 268  * Check incoming SEND fragment header against expected
 269  * header values and update expected MSN if no next
 270  * fragment expected
 271  *
 272  * NOTE: This function must be called only if a SEND DDP segment
 273  *       starts but not for fragmented consecutive pieces of an
 274  *       already started DDP segment.
 275  */
 276 static int siw_send_check_ntoh(struct siw_rx_stream *srx,
 277                                struct siw_rx_fpdu *frx)
 278 {
 279         struct iwarp_send_inv *send = &srx->hdr.send_inv;
 280         struct siw_wqe *wqe = &frx->wqe_active;
 281         enum ddp_ecode ecode;
 282 
 283         u32 ddp_msn = be32_to_cpu(send->ddp_msn);
 284         u32 ddp_mo = be32_to_cpu(send->ddp_mo);
 285         u32 ddp_qn = be32_to_cpu(send->ddp_qn);
 286 
 287         if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
 288                 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
 289                         qp_id(rx_qp(srx)), ddp_qn);
 290                 ecode = DDP_ECODE_UT_INVALID_QN;
 291                 goto error;
 292         }
 293         if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
 294                 pr_warn("siw: [QP %u]: send msn: %u != %u\n",
 295                         qp_id(rx_qp(srx)), ddp_msn,
 296                         srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
 297                 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
 298                 goto error;
 299         }
 300         if (unlikely(ddp_mo != wqe->processed)) {
 301                 pr_warn("siw: [QP %u], send mo: %u != %u\n",
 302                         qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
 303                 ecode = DDP_ECODE_UT_INVALID_MO;
 304                 goto error;
 305         }
 306         if (frx->first_ddp_seg) {
 307                 /* initialize user memory write position */
 308                 frx->sge_idx = 0;
 309                 frx->sge_off = 0;
 310                 frx->pbl_idx = 0;
 311 
 312                 /* only valid for SEND_INV and SEND_SE_INV operations */
 313                 srx->inval_stag = be32_to_cpu(send->inval_stag);
 314         }
 315         if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
 316                 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
 317                            wqe->bytes, wqe->processed, srx->fpdu_part_rem);
 318                 wqe->wc_status = SIW_WC_LOC_LEN_ERR;
 319                 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
 320                 goto error;
 321         }
 322         return 0;
 323 error:
 324         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 325                            DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
 326         return -EINVAL;
 327 }
 328 
 329 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
 330 {
 331         struct siw_rqe *rqe;
 332         struct siw_srq *srq;
 333         struct siw_wqe *wqe = NULL;
 334         bool srq_event = false;
 335         unsigned long flags;
 336 
 337         srq = qp->srq;
 338         if (srq) {
 339                 spin_lock_irqsave(&srq->lock, flags);
 340                 if (unlikely(!srq->num_rqe))
 341                         goto out;
 342 
 343                 rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
 344         } else {
 345                 if (unlikely(!qp->recvq))
 346                         goto out;
 347 
 348                 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
 349         }
 350         if (likely(rqe->flags == SIW_WQE_VALID)) {
 351                 int num_sge = rqe->num_sge;
 352 
 353                 if (likely(num_sge <= SIW_MAX_SGE)) {
 354                         int i = 0;
 355 
 356                         wqe = rx_wqe(&qp->rx_untagged);
 357                         rx_type(wqe) = SIW_OP_RECEIVE;
 358                         wqe->wr_status = SIW_WR_INPROGRESS;
 359                         wqe->bytes = 0;
 360                         wqe->processed = 0;
 361 
 362                         wqe->rqe.id = rqe->id;
 363                         wqe->rqe.num_sge = num_sge;
 364 
 365                         while (i < num_sge) {
 366                                 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
 367                                 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
 368                                 wqe->rqe.sge[i].length = rqe->sge[i].length;
 369                                 wqe->bytes += wqe->rqe.sge[i].length;
 370                                 wqe->mem[i] = NULL;
 371                                 i++;
 372                         }
 373                         /* can be re-used by appl */
 374                         smp_store_mb(rqe->flags, 0);
 375                 } else {
 376                         siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
 377                         if (srq)
 378                                 spin_unlock_irqrestore(&srq->lock, flags);
 379                         return NULL;
 380                 }
 381                 if (!srq) {
 382                         qp->rq_get++;
 383                 } else {
 384                         if (srq->armed) {
 385                                 /* Test SRQ limit */
 386                                 u32 off = (srq->rq_get + srq->limit) %
 387                                           srq->num_rqe;
 388                                 struct siw_rqe *rqe2 = &srq->recvq[off];
 389 
 390                                 if (!(rqe2->flags & SIW_WQE_VALID)) {
 391                                         srq->armed = 0;
 392                                         srq_event = true;
 393                                 }
 394                         }
 395                         srq->rq_get++;
 396                 }
 397         }
 398 out:
 399         if (srq) {
 400                 spin_unlock_irqrestore(&srq->lock, flags);
 401                 if (srq_event)
 402                         siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
 403         }
 404         return wqe;
 405 }
 406 
 407 /*
 408  * siw_proc_send:
 409  *
 410  * Process one incoming SEND and place data into memory referenced by
 411  * receive wqe.
 412  *
 413  * Function supports partially received sends (suspending/resuming
 414  * current receive wqe processing)
 415  *
 416  * return value:
 417  *      0:       reached the end of a DDP segment
 418  *      -EAGAIN: to be called again to finish the DDP segment
 419  */
 420 int siw_proc_send(struct siw_qp *qp)
 421 {
 422         struct siw_rx_stream *srx = &qp->rx_stream;
 423         struct siw_rx_fpdu *frx = &qp->rx_untagged;
 424         struct siw_wqe *wqe;
 425         u32 data_bytes; /* all data bytes available */
 426         u32 rcvd_bytes; /* sum of data bytes rcvd */
 427         int rv = 0;
 428 
 429         if (frx->first_ddp_seg) {
 430                 wqe = siw_rqe_get(qp);
 431                 if (unlikely(!wqe)) {
 432                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 433                                            DDP_ETYPE_UNTAGGED_BUF,
 434                                            DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
 435                         return -ENOENT;
 436                 }
 437         } else {
 438                 wqe = rx_wqe(frx);
 439         }
 440         if (srx->state == SIW_GET_DATA_START) {
 441                 rv = siw_send_check_ntoh(srx, frx);
 442                 if (unlikely(rv)) {
 443                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
 444                         return rv;
 445                 }
 446                 if (!srx->fpdu_part_rem) /* zero length SEND */
 447                         return 0;
 448         }
 449         data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
 450         rcvd_bytes = 0;
 451 
 452         /* A zero length SEND will skip below loop */
 453         while (data_bytes) {
 454                 struct ib_pd *pd;
 455                 struct siw_mem **mem, *mem_p;
 456                 struct siw_sge *sge;
 457                 u32 sge_bytes; /* data bytes avail for SGE */
 458 
 459                 sge = &wqe->rqe.sge[frx->sge_idx];
 460 
 461                 if (!sge->length) {
 462                         /* just skip empty sge's */
 463                         frx->sge_idx++;
 464                         frx->sge_off = 0;
 465                         frx->pbl_idx = 0;
 466                         continue;
 467                 }
 468                 sge_bytes = min(data_bytes, sge->length - frx->sge_off);
 469                 mem = &wqe->mem[frx->sge_idx];
 470 
 471                 /*
 472                  * check with QP's PD if no SRQ present, SRQ's PD otherwise
 473                  */
 474                 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
 475 
 476                 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
 477                                    frx->sge_off, sge_bytes);
 478                 if (unlikely(rv)) {
 479                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 480                                            DDP_ETYPE_CATASTROPHIC,
 481                                            DDP_ECODE_CATASTROPHIC, 0);
 482 
 483                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 484                         break;
 485                 }
 486                 mem_p = *mem;
 487                 if (mem_p->mem_obj == NULL)
 488                         rv = siw_rx_kva(srx,
 489                                 (void *)(uintptr_t)(sge->laddr + frx->sge_off),
 490                                 sge_bytes);
 491                 else if (!mem_p->is_pbl)
 492                         rv = siw_rx_umem(srx, mem_p->umem,
 493                                          sge->laddr + frx->sge_off, sge_bytes);
 494                 else
 495                         rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
 496                                         sge->laddr + frx->sge_off, sge_bytes);
 497 
 498                 if (unlikely(rv != sge_bytes)) {
 499                         wqe->processed += rcvd_bytes;
 500 
 501                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 502                                            DDP_ETYPE_CATASTROPHIC,
 503                                            DDP_ECODE_CATASTROPHIC, 0);
 504                         return -EINVAL;
 505                 }
 506                 frx->sge_off += rv;
 507 
 508                 if (frx->sge_off == sge->length) {
 509                         frx->sge_idx++;
 510                         frx->sge_off = 0;
 511                         frx->pbl_idx = 0;
 512                 }
 513                 data_bytes -= rv;
 514                 rcvd_bytes += rv;
 515 
 516                 srx->fpdu_part_rem -= rv;
 517                 srx->fpdu_part_rcvd += rv;
 518         }
 519         wqe->processed += rcvd_bytes;
 520 
 521         if (!srx->fpdu_part_rem)
 522                 return 0;
 523 
 524         return (rv < 0) ? rv : -EAGAIN;
 525 }
 526 
 527 /*
 528  * siw_proc_write:
 529  *
 530  * Place incoming WRITE after referencing and checking target buffer
 531 
 532  * Function supports partially received WRITEs (suspending/resuming
 533  * current receive processing)
 534  *
 535  * return value:
 536  *      0:       reached the end of a DDP segment
 537  *      -EAGAIN: to be called again to finish the DDP segment
 538  */
 539 int siw_proc_write(struct siw_qp *qp)
 540 {
 541         struct siw_rx_stream *srx = &qp->rx_stream;
 542         struct siw_rx_fpdu *frx = &qp->rx_tagged;
 543         struct siw_mem *mem;
 544         int bytes, rv;
 545 
 546         if (srx->state == SIW_GET_DATA_START) {
 547                 if (!srx->fpdu_part_rem) /* zero length WRITE */
 548                         return 0;
 549 
 550                 rv = siw_write_check_ntoh(srx, frx);
 551                 if (unlikely(rv)) {
 552                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
 553                         return rv;
 554                 }
 555         }
 556         bytes = min(srx->fpdu_part_rem, srx->skb_new);
 557 
 558         if (frx->first_ddp_seg) {
 559                 struct siw_wqe *wqe = rx_wqe(frx);
 560 
 561                 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
 562                 if (unlikely(!rx_mem(frx))) {
 563                         siw_dbg_qp(qp,
 564                                    "sink stag not found/invalid, stag 0x%08x\n",
 565                                    srx->ddp_stag);
 566 
 567                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 568                                            DDP_ETYPE_TAGGED_BUF,
 569                                            DDP_ECODE_T_INVALID_STAG, 0);
 570                         return -EINVAL;
 571                 }
 572                 wqe->rqe.num_sge = 1;
 573                 rx_type(wqe) = SIW_OP_WRITE;
 574                 wqe->wr_status = SIW_WR_INPROGRESS;
 575         }
 576         mem = rx_mem(frx);
 577 
 578         /*
 579          * Check if application re-registered memory with different
 580          * key field of STag.
 581          */
 582         if (unlikely(mem->stag != srx->ddp_stag)) {
 583                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 584                                    DDP_ETYPE_TAGGED_BUF,
 585                                    DDP_ECODE_T_INVALID_STAG, 0);
 586                 return -EINVAL;
 587         }
 588         rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
 589                            IB_ACCESS_REMOTE_WRITE, bytes);
 590         if (unlikely(rv)) {
 591                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 592                                    DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
 593                                    0);
 594 
 595                 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 596 
 597                 return -EINVAL;
 598         }
 599 
 600         if (mem->mem_obj == NULL)
 601                 rv = siw_rx_kva(srx,
 602                         (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
 603                         bytes);
 604         else if (!mem->is_pbl)
 605                 rv = siw_rx_umem(srx, mem->umem,
 606                                  srx->ddp_to + srx->fpdu_part_rcvd, bytes);
 607         else
 608                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
 609                                 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
 610 
 611         if (unlikely(rv != bytes)) {
 612                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 613                                    DDP_ETYPE_CATASTROPHIC,
 614                                    DDP_ECODE_CATASTROPHIC, 0);
 615                 return -EINVAL;
 616         }
 617         srx->fpdu_part_rem -= rv;
 618         srx->fpdu_part_rcvd += rv;
 619 
 620         if (!srx->fpdu_part_rem) {
 621                 srx->ddp_to += srx->fpdu_part_rcvd;
 622                 return 0;
 623         }
 624         return -EAGAIN;
 625 }
 626 
 627 /*
 628  * Inbound RREQ's cannot carry user data.
 629  */
 630 int siw_proc_rreq(struct siw_qp *qp)
 631 {
 632         struct siw_rx_stream *srx = &qp->rx_stream;
 633 
 634         if (!srx->fpdu_part_rem)
 635                 return 0;
 636 
 637         pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
 638                 be16_to_cpu(srx->hdr.ctrl.mpa_len));
 639 
 640         return -EPROTO;
 641 }
 642 
 643 /*
 644  * siw_init_rresp:
 645  *
 646  * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
 647  * Put it at the tail of the IRQ, if there is another WQE currently in
 648  * transmit processing. If not, make it the current WQE to be processed
 649  * and schedule transmit processing.
 650  *
 651  * Can be called from softirq context and from process
 652  * context (RREAD socket loopback case!)
 653  *
 654  * return value:
 655  *      0:      success,
 656  *              failure code otherwise
 657  */
 658 
 659 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
 660 {
 661         struct siw_wqe *tx_work = tx_wqe(qp);
 662         struct siw_sqe *resp;
 663 
 664         uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
 665                  laddr = be64_to_cpu(srx->hdr.rreq.source_to);
 666         uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
 667                  lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
 668                  rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
 669                  msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
 670 
 671         int run_sq = 1, rv = 0;
 672         unsigned long flags;
 673 
 674         if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
 675                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 676                                    DDP_ETYPE_UNTAGGED_BUF,
 677                                    DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
 678                 return -EPROTO;
 679         }
 680         spin_lock_irqsave(&qp->sq_lock, flags);
 681 
 682         if (tx_work->wr_status == SIW_WR_IDLE) {
 683                 /*
 684                  * immediately schedule READ response w/o
 685                  * consuming IRQ entry: IRQ must be empty.
 686                  */
 687                 tx_work->processed = 0;
 688                 tx_work->mem[0] = NULL;
 689                 tx_work->wr_status = SIW_WR_QUEUED;
 690                 resp = &tx_work->sqe;
 691         } else {
 692                 resp = irq_alloc_free(qp);
 693                 run_sq = 0;
 694         }
 695         if (likely(resp)) {
 696                 resp->opcode = SIW_OP_READ_RESPONSE;
 697 
 698                 resp->sge[0].length = length;
 699                 resp->sge[0].laddr = laddr;
 700                 resp->sge[0].lkey = lkey;
 701 
 702                 /* Keep aside message sequence number for potential
 703                  * error reporting during Read Response generation.
 704                  */
 705                 resp->sge[1].length = msn;
 706 
 707                 resp->raddr = raddr;
 708                 resp->rkey = rkey;
 709                 resp->num_sge = length ? 1 : 0;
 710 
 711                 /* RRESP now valid as current TX wqe or placed into IRQ */
 712                 smp_store_mb(resp->flags, SIW_WQE_VALID);
 713         } else {
 714                 pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
 715                         qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
 716 
 717                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
 718                                    RDMAP_ETYPE_REMOTE_OPERATION,
 719                                    RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
 720                 rv = -EPROTO;
 721         }
 722 
 723         spin_unlock_irqrestore(&qp->sq_lock, flags);
 724 
 725         if (run_sq)
 726                 rv = siw_sq_start(qp);
 727 
 728         return rv;
 729 }
 730 
 731 /*
 732  * Only called at start of Read.Resonse processing.
 733  * Transfer pending Read from tip of ORQ into currrent rx wqe,
 734  * but keep ORQ entry valid until Read.Response processing done.
 735  * No Queue locking needed.
 736  */
 737 static int siw_orqe_start_rx(struct siw_qp *qp)
 738 {
 739         struct siw_sqe *orqe;
 740         struct siw_wqe *wqe = NULL;
 741 
 742         /* make sure ORQ indices are current */
 743         smp_mb();
 744 
 745         orqe = orq_get_current(qp);
 746         if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
 747                 /* RRESP is a TAGGED RDMAP operation */
 748                 wqe = rx_wqe(&qp->rx_tagged);
 749                 wqe->sqe.id = orqe->id;
 750                 wqe->sqe.opcode = orqe->opcode;
 751                 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
 752                 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
 753                 wqe->sqe.sge[0].length = orqe->sge[0].length;
 754                 wqe->sqe.flags = orqe->flags;
 755                 wqe->sqe.num_sge = 1;
 756                 wqe->bytes = orqe->sge[0].length;
 757                 wqe->processed = 0;
 758                 wqe->mem[0] = NULL;
 759                 /* make sure WQE is completely written before valid */
 760                 smp_wmb();
 761                 wqe->wr_status = SIW_WR_INPROGRESS;
 762 
 763                 return 0;
 764         }
 765         return -EPROTO;
 766 }
 767 
 768 /*
 769  * siw_proc_rresp:
 770  *
 771  * Place incoming RRESP data into memory referenced by RREQ WQE
 772  * which is at the tip of the ORQ
 773  *
 774  * Function supports partially received RRESP's (suspending/resuming
 775  * current receive processing)
 776  */
 777 int siw_proc_rresp(struct siw_qp *qp)
 778 {
 779         struct siw_rx_stream *srx = &qp->rx_stream;
 780         struct siw_rx_fpdu *frx = &qp->rx_tagged;
 781         struct siw_wqe *wqe = rx_wqe(frx);
 782         struct siw_mem **mem, *mem_p;
 783         struct siw_sge *sge;
 784         int bytes, rv;
 785 
 786         if (frx->first_ddp_seg) {
 787                 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
 788                         pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
 789                                 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
 790                         rv = -EPROTO;
 791                         goto error_term;
 792                 }
 793                 /*
 794                  * fetch pending RREQ from orq
 795                  */
 796                 rv = siw_orqe_start_rx(qp);
 797                 if (rv) {
 798                         pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
 799                                 qp_id(qp), qp->orq_get % qp->attrs.orq_size);
 800                         goto error_term;
 801                 }
 802                 rv = siw_rresp_check_ntoh(srx, frx);
 803                 if (unlikely(rv)) {
 804                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
 805                         return rv;
 806                 }
 807         } else {
 808                 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
 809                         pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
 810                                 qp_id(qp), wqe->wr_status);
 811                         rv = -EPROTO;
 812                         goto error_term;
 813                 }
 814         }
 815         if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
 816                 return 0;
 817 
 818         sge = wqe->sqe.sge; /* there is only one */
 819         mem = &wqe->mem[0];
 820 
 821         if (!(*mem)) {
 822                 /*
 823                  * check target memory which resolves memory on first fragment
 824                  */
 825                 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
 826                                    wqe->bytes);
 827                 if (unlikely(rv)) {
 828                         siw_dbg_qp(qp, "target mem check: %d\n", rv);
 829                         wqe->wc_status = SIW_WC_LOC_PROT_ERR;
 830 
 831                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 832                                            DDP_ETYPE_TAGGED_BUF,
 833                                            siw_tagged_error(-rv), 0);
 834 
 835                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 836 
 837                         return -EINVAL;
 838                 }
 839         }
 840         mem_p = *mem;
 841 
 842         bytes = min(srx->fpdu_part_rem, srx->skb_new);
 843 
 844         if (mem_p->mem_obj == NULL)
 845                 rv = siw_rx_kva(srx,
 846                         (void *)(uintptr_t)(sge->laddr + wqe->processed),
 847                         bytes);
 848         else if (!mem_p->is_pbl)
 849                 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
 850                                  bytes);
 851         else
 852                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
 853                                 sge->laddr + wqe->processed, bytes);
 854         if (rv != bytes) {
 855                 wqe->wc_status = SIW_WC_GENERAL_ERR;
 856                 rv = -EINVAL;
 857                 goto error_term;
 858         }
 859         srx->fpdu_part_rem -= rv;
 860         srx->fpdu_part_rcvd += rv;
 861         wqe->processed += rv;
 862 
 863         if (!srx->fpdu_part_rem) {
 864                 srx->ddp_to += srx->fpdu_part_rcvd;
 865                 return 0;
 866         }
 867         return -EAGAIN;
 868 
 869 error_term:
 870         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
 871                            DDP_ECODE_CATASTROPHIC, 0);
 872         return rv;
 873 }
 874 
 875 int siw_proc_terminate(struct siw_qp *qp)
 876 {
 877         struct siw_rx_stream *srx = &qp->rx_stream;
 878         struct sk_buff *skb = srx->skb;
 879         struct iwarp_terminate *term = &srx->hdr.terminate;
 880         union iwarp_hdr term_info;
 881         u8 *infop = (u8 *)&term_info;
 882         enum rdma_opcode op;
 883         u16 to_copy = sizeof(struct iwarp_ctrl);
 884 
 885         pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
 886                 __rdmap_term_layer(term), __rdmap_term_etype(term),
 887                 __rdmap_term_ecode(term));
 888 
 889         if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
 890             be32_to_cpu(term->ddp_msn) !=
 891                     qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
 892             be32_to_cpu(term->ddp_mo) != 0) {
 893                 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
 894                         be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
 895                         be32_to_cpu(term->ddp_mo));
 896                 return -ECONNRESET;
 897         }
 898         /*
 899          * Receive remaining pieces of TERM if indicated
 900          */
 901         if (!term->flag_m)
 902                 return -ECONNRESET;
 903 
 904         /* Do not take the effort to reassemble a network fragmented
 905          * TERM message
 906          */
 907         if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
 908                 return -ECONNRESET;
 909 
 910         memset(infop, 0, sizeof(term_info));
 911 
 912         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
 913 
 914         op = __rdmap_get_opcode(&term_info.ctrl);
 915         if (op >= RDMAP_TERMINATE)
 916                 goto out;
 917 
 918         infop += to_copy;
 919         srx->skb_offset += to_copy;
 920         srx->skb_new -= to_copy;
 921         srx->skb_copied += to_copy;
 922         srx->fpdu_part_rcvd += to_copy;
 923         srx->fpdu_part_rem -= to_copy;
 924 
 925         to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
 926 
 927         /* Again, no network fragmented TERM's */
 928         if (to_copy + MPA_CRC_SIZE > srx->skb_new)
 929                 return -ECONNRESET;
 930 
 931         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
 932 
 933         if (term->flag_r) {
 934                 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
 935                            op, be16_to_cpu(term_info.ctrl.mpa_len),
 936                            term->flag_m ? "valid" : "invalid");
 937         } else if (term->flag_d) {
 938                 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
 939                            op, be16_to_cpu(term_info.ctrl.mpa_len),
 940                            term->flag_m ? "valid" : "invalid");
 941         }
 942 out:
 943         srx->skb_new -= to_copy;
 944         srx->skb_offset += to_copy;
 945         srx->skb_copied += to_copy;
 946         srx->fpdu_part_rcvd += to_copy;
 947         srx->fpdu_part_rem -= to_copy;
 948 
 949         return -ECONNRESET;
 950 }
 951 
 952 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
 953 {
 954         struct sk_buff *skb = srx->skb;
 955         u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
 956         __wsum crc_in, crc_own = 0;
 957 
 958         siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
 959                    srx->fpdu_part_rem, srx->skb_new, srx->pad);
 960 
 961         if (srx->skb_new < srx->fpdu_part_rem)
 962                 return -EAGAIN;
 963 
 964         skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
 965 
 966         if (srx->mpa_crc_hd && srx->pad)
 967                 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
 968 
 969         srx->skb_new -= srx->fpdu_part_rem;
 970         srx->skb_offset += srx->fpdu_part_rem;
 971         srx->skb_copied += srx->fpdu_part_rem;
 972 
 973         if (!srx->mpa_crc_hd)
 974                 return 0;
 975 
 976         /*
 977          * CRC32 is computed, transmitted and received directly in NBO,
 978          * so there's never a reason to convert byte order.
 979          */
 980         crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
 981         crc_in = (__force __wsum)srx->trailer.crc;
 982 
 983         if (unlikely(crc_in != crc_own)) {
 984                 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
 985                         crc_in, crc_own, qp->rx_stream.rdmap_op);
 986 
 987                 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
 988                                    LLP_ETYPE_MPA,
 989                                    LLP_ECODE_RECEIVED_CRC, 0);
 990                 return -EINVAL;
 991         }
 992         return 0;
 993 }
 994 
 995 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
 996 
 997 static int siw_get_hdr(struct siw_rx_stream *srx)
 998 {
 999         struct sk_buff *skb = srx->skb;
1000         struct siw_qp *qp = rx_qp(srx);
1001         struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1002         struct siw_rx_fpdu *frx;
1003         u8 opcode;
1004         int bytes;
1005 
1006         if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1007                 /*
1008                  * copy a mimimum sized (tagged) DDP frame control part
1009                  */
1010                 bytes = min_t(int, srx->skb_new,
1011                               MIN_DDP_HDR - srx->fpdu_part_rcvd);
1012 
1013                 skb_copy_bits(skb, srx->skb_offset,
1014                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1015 
1016                 srx->fpdu_part_rcvd += bytes;
1017 
1018                 srx->skb_new -= bytes;
1019                 srx->skb_offset += bytes;
1020                 srx->skb_copied += bytes;
1021 
1022                 if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1023                         return -EAGAIN;
1024 
1025                 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1026                         enum ddp_etype etype;
1027                         enum ddp_ecode ecode;
1028 
1029                         pr_warn("siw: received ddp version unsupported %d\n",
1030                                 __ddp_get_version(c_hdr));
1031 
1032                         if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1033                                 etype = DDP_ETYPE_TAGGED_BUF;
1034                                 ecode = DDP_ECODE_T_VERSION;
1035                         } else {
1036                                 etype = DDP_ETYPE_UNTAGGED_BUF;
1037                                 ecode = DDP_ECODE_UT_VERSION;
1038                         }
1039                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1040                                            etype, ecode, 0);
1041                         return -EINVAL;
1042                 }
1043                 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1044                         pr_warn("siw: received rdmap version unsupported %d\n",
1045                                 __rdmap_get_version(c_hdr));
1046 
1047                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1048                                            RDMAP_ETYPE_REMOTE_OPERATION,
1049                                            RDMAP_ECODE_VERSION, 0);
1050                         return -EINVAL;
1051                 }
1052                 opcode = __rdmap_get_opcode(c_hdr);
1053 
1054                 if (opcode > RDMAP_TERMINATE) {
1055                         pr_warn("siw: received unknown packet type %u\n",
1056                                 opcode);
1057 
1058                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1059                                            RDMAP_ETYPE_REMOTE_OPERATION,
1060                                            RDMAP_ECODE_OPCODE, 0);
1061                         return -EINVAL;
1062                 }
1063                 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1064         } else {
1065                 opcode = __rdmap_get_opcode(c_hdr);
1066         }
1067         set_rx_fpdu_context(qp, opcode);
1068         frx = qp->rx_fpdu;
1069 
1070         /*
1071          * Figure out len of current hdr: variable length of
1072          * iwarp hdr may force us to copy hdr information in
1073          * two steps. Only tagged DDP messages are already
1074          * completely received.
1075          */
1076         if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1077                 bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
1078 
1079                 if (srx->skb_new < bytes)
1080                         return -EAGAIN;
1081 
1082                 skb_copy_bits(skb, srx->skb_offset,
1083                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1084 
1085                 srx->fpdu_part_rcvd += bytes;
1086 
1087                 srx->skb_new -= bytes;
1088                 srx->skb_offset += bytes;
1089                 srx->skb_copied += bytes;
1090         }
1091 
1092         /*
1093          * DDP/RDMAP header receive completed. Check if the current
1094          * DDP segment starts a new RDMAP message or continues a previously
1095          * started RDMAP message.
1096          *
1097          * Alternating reception of DDP segments (or FPDUs) from incomplete
1098          * tagged and untagged RDMAP messages is supported, as long as
1099          * the current tagged or untagged message gets eventually completed
1100          * w/o intersection from another message of the same type
1101          * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1102          * but not by a READ RESPONSE etc.
1103          */
1104         if (srx->mpa_crc_hd) {
1105                 /*
1106                  * Restart CRC computation
1107                  */
1108                 crypto_shash_init(srx->mpa_crc_hd);
1109                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1110                                     srx->fpdu_part_rcvd);
1111         }
1112         if (frx->more_ddp_segs) {
1113                 frx->first_ddp_seg = 0;
1114                 if (frx->prev_rdmap_op != opcode) {
1115                         pr_warn("siw: packet intersection: %u : %u\n",
1116                                 frx->prev_rdmap_op, opcode);
1117                         /*
1118                          * The last inbound RDMA operation of same type
1119                          * (tagged or untagged) is left unfinished.
1120                          * To complete it in error, make it the current
1121                          * operation again, even with the header already
1122                          * overwritten. For error handling, only the opcode
1123                          * and current rx context are relevant.
1124                          */
1125                         set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1126                         __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1127                         return -EPROTO;
1128                 }
1129         } else {
1130                 frx->prev_rdmap_op = opcode;
1131                 frx->first_ddp_seg = 1;
1132         }
1133         frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1134 
1135         return 0;
1136 }
1137 
1138 static int siw_check_tx_fence(struct siw_qp *qp)
1139 {
1140         struct siw_wqe *tx_waiting = tx_wqe(qp);
1141         struct siw_sqe *rreq;
1142         int resume_tx = 0, rv = 0;
1143         unsigned long flags;
1144 
1145         spin_lock_irqsave(&qp->orq_lock, flags);
1146 
1147         rreq = orq_get_current(qp);
1148 
1149         /* free current orq entry */
1150         WRITE_ONCE(rreq->flags, 0);
1151 
1152         if (qp->tx_ctx.orq_fence) {
1153                 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1154                         pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1155                                 qp_id(qp), tx_waiting->wr_status);
1156                         rv = -EPROTO;
1157                         goto out;
1158                 }
1159                 /* resume SQ processing */
1160                 if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1161                     tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1162                         rreq = orq_get_tail(qp);
1163                         if (unlikely(!rreq)) {
1164                                 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1165                                 rv = -EPROTO;
1166                                 goto out;
1167                         }
1168                         siw_read_to_orq(rreq, &tx_waiting->sqe);
1169 
1170                         qp->orq_put++;
1171                         qp->tx_ctx.orq_fence = 0;
1172                         resume_tx = 1;
1173 
1174                 } else if (siw_orq_empty(qp)) {
1175                         qp->tx_ctx.orq_fence = 0;
1176                         resume_tx = 1;
1177                 } else {
1178                         pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
1179                                 qp_id(qp), qp->orq_get, qp->orq_put);
1180                         rv = -EPROTO;
1181                 }
1182         }
1183         qp->orq_get++;
1184 out:
1185         spin_unlock_irqrestore(&qp->orq_lock, flags);
1186 
1187         if (resume_tx)
1188                 rv = siw_sq_start(qp);
1189 
1190         return rv;
1191 }
1192 
1193 /*
1194  * siw_rdmap_complete()
1195  *
1196  * Complete processing of an RDMA message after receiving all
1197  * DDP segmens or ABort processing after encountering error case.
1198  *
1199  *   o SENDs + RRESPs will need for completion,
1200  *   o RREQs need for  READ RESPONSE initialization
1201  *   o WRITEs need memory dereferencing
1202  *
1203  * TODO: Failed WRITEs need local error to be surfaced.
1204  */
1205 static int siw_rdmap_complete(struct siw_qp *qp, int error)
1206 {
1207         struct siw_rx_stream *srx = &qp->rx_stream;
1208         struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1209         enum siw_wc_status wc_status = wqe->wc_status;
1210         u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1211         int rv = 0;
1212 
1213         switch (opcode) {
1214         case RDMAP_SEND_SE:
1215         case RDMAP_SEND_SE_INVAL:
1216                 wqe->rqe.flags |= SIW_WQE_SOLICITED;
1217                 /* Fall through */
1218 
1219         case RDMAP_SEND:
1220         case RDMAP_SEND_INVAL:
1221                 if (wqe->wr_status == SIW_WR_IDLE)
1222                         break;
1223 
1224                 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1225 
1226                 if (error != 0 && wc_status == SIW_WC_SUCCESS)
1227                         wc_status = SIW_WC_GENERAL_ERR;
1228                 /*
1229                  * Handle STag invalidation request
1230                  */
1231                 if (wc_status == SIW_WC_SUCCESS &&
1232                     (opcode == RDMAP_SEND_INVAL ||
1233                      opcode == RDMAP_SEND_SE_INVAL)) {
1234                         rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1235                         if (rv) {
1236                                 siw_init_terminate(
1237                                         qp, TERM_ERROR_LAYER_RDMAP,
1238                                         rv == -EACCES ?
1239                                                 RDMAP_ETYPE_REMOTE_PROTECTION :
1240                                                 RDMAP_ETYPE_REMOTE_OPERATION,
1241                                         RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1242 
1243                                 wc_status = SIW_WC_REM_INV_REQ_ERR;
1244                         }
1245                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1246                                               rv ? 0 : srx->inval_stag,
1247                                               wc_status);
1248                 } else {
1249                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1250                                               0, wc_status);
1251                 }
1252                 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1253                 break;
1254 
1255         case RDMAP_RDMA_READ_RESP:
1256                 if (wqe->wr_status == SIW_WR_IDLE)
1257                         break;
1258 
1259                 if (error != 0) {
1260                         if ((srx->state == SIW_GET_HDR &&
1261                              qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1262                                 /* possible RREQ in ORQ left untouched */
1263                                 break;
1264 
1265                         if (wc_status == SIW_WC_SUCCESS)
1266                                 wc_status = SIW_WC_GENERAL_ERR;
1267                 } else if (qp->kernel_verbs &&
1268                            rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1269                         /*
1270                          * Handle any STag invalidation request
1271                          */
1272                         rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1273                         if (rv) {
1274                                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1275                                                    RDMAP_ETYPE_CATASTROPHIC,
1276                                                    RDMAP_ECODE_UNSPECIFIED, 0);
1277 
1278                                 if (wc_status == SIW_WC_SUCCESS) {
1279                                         wc_status = SIW_WC_GENERAL_ERR;
1280                                         error = rv;
1281                                 }
1282                         }
1283                 }
1284                 /*
1285                  * All errors turn the wqe into signalled.
1286                  */
1287                 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1288                         rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1289                                               wc_status);
1290                 siw_wqe_put_mem(wqe, SIW_OP_READ);
1291 
1292                 if (!error)
1293                         rv = siw_check_tx_fence(qp);
1294                 else
1295                         /* Disable current ORQ eleement */
1296                         WRITE_ONCE(orq_get_current(qp)->flags, 0);
1297                 break;
1298 
1299         case RDMAP_RDMA_READ_REQ:
1300                 if (!error) {
1301                         rv = siw_init_rresp(qp, srx);
1302                         srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1303                 }
1304                 break;
1305 
1306         case RDMAP_RDMA_WRITE:
1307                 if (wqe->wr_status == SIW_WR_IDLE)
1308                         break;
1309 
1310                 /*
1311                  * Free References from memory object if
1312                  * attached to receive context (inbound WRITE).
1313                  * While a zero-length WRITE is allowed,
1314                  * no memory reference got created.
1315                  */
1316                 if (rx_mem(&qp->rx_tagged)) {
1317                         siw_mem_put(rx_mem(&qp->rx_tagged));
1318                         rx_mem(&qp->rx_tagged) = NULL;
1319                 }
1320                 break;
1321 
1322         default:
1323                 break;
1324         }
1325         wqe->wr_status = SIW_WR_IDLE;
1326 
1327         return rv;
1328 }
1329 
1330 /*
1331  * siw_tcp_rx_data()
1332  *
1333  * Main routine to consume inbound TCP payload
1334  *
1335  * @rd_desc:    read descriptor
1336  * @skb:        socket buffer
1337  * @off:        offset in skb
1338  * @len:        skb->len - offset : payload in skb
1339  */
1340 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1341                     unsigned int off, size_t len)
1342 {
1343         struct siw_qp *qp = rd_desc->arg.data;
1344         struct siw_rx_stream *srx = &qp->rx_stream;
1345         int rv;
1346 
1347         srx->skb = skb;
1348         srx->skb_new = skb->len - off;
1349         srx->skb_offset = off;
1350         srx->skb_copied = 0;
1351 
1352         siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1353 
1354         while (srx->skb_new) {
1355                 int run_completion = 1;
1356 
1357                 if (unlikely(srx->rx_suspend)) {
1358                         /* Do not process any more data */
1359                         srx->skb_copied += srx->skb_new;
1360                         break;
1361                 }
1362                 switch (srx->state) {
1363                 case SIW_GET_HDR:
1364                         rv = siw_get_hdr(srx);
1365                         if (!rv) {
1366                                 srx->fpdu_part_rem =
1367                                         be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1368                                         srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1369 
1370                                 if (srx->fpdu_part_rem)
1371                                         srx->pad = -srx->fpdu_part_rem & 0x3;
1372                                 else
1373                                         srx->pad = 0;
1374 
1375                                 srx->state = SIW_GET_DATA_START;
1376                                 srx->fpdu_part_rcvd = 0;
1377                         }
1378                         break;
1379 
1380                 case SIW_GET_DATA_MORE:
1381                         /*
1382                          * Another data fragment of the same DDP segment.
1383                          * Setting first_ddp_seg = 0 avoids repeating
1384                          * initializations that shall occur only once per
1385                          * DDP segment.
1386                          */
1387                         qp->rx_fpdu->first_ddp_seg = 0;
1388                         /* Fall through */
1389 
1390                 case SIW_GET_DATA_START:
1391                         /*
1392                          * Headers will be checked by the opcode-specific
1393                          * data receive function below.
1394                          */
1395                         rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1396                         if (!rv) {
1397                                 int mpa_len =
1398                                         be16_to_cpu(srx->hdr.ctrl.mpa_len)
1399                                         + MPA_HDR_SIZE;
1400 
1401                                 srx->fpdu_part_rem = (-mpa_len & 0x3)
1402                                                       + MPA_CRC_SIZE;
1403                                 srx->fpdu_part_rcvd = 0;
1404                                 srx->state = SIW_GET_TRAILER;
1405                         } else {
1406                                 if (unlikely(rv == -ECONNRESET))
1407                                         run_completion = 0;
1408                                 else
1409                                         srx->state = SIW_GET_DATA_MORE;
1410                         }
1411                         break;
1412 
1413                 case SIW_GET_TRAILER:
1414                         /*
1415                          * read CRC + any padding
1416                          */
1417                         rv = siw_get_trailer(qp, srx);
1418                         if (likely(!rv)) {
1419                                 /*
1420                                  * FPDU completed.
1421                                  * complete RDMAP message if last fragment
1422                                  */
1423                                 srx->state = SIW_GET_HDR;
1424                                 srx->fpdu_part_rcvd = 0;
1425 
1426                                 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1427                                       DDP_FLAG_LAST))
1428                                         /* more frags */
1429                                         break;
1430 
1431                                 rv = siw_rdmap_complete(qp, 0);
1432                                 run_completion = 0;
1433                         }
1434                         break;
1435 
1436                 default:
1437                         pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1438                         rv = -EPROTO;
1439                         run_completion = 0;
1440                 }
1441                 if (unlikely(rv != 0 && rv != -EAGAIN)) {
1442                         if ((srx->state > SIW_GET_HDR ||
1443                              qp->rx_fpdu->more_ddp_segs) && run_completion)
1444                                 siw_rdmap_complete(qp, rv);
1445 
1446                         siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1447                                    srx->state);
1448 
1449                         siw_qp_cm_drop(qp, 1);
1450 
1451                         break;
1452                 }
1453                 if (rv) {
1454                         siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1455                                    srx->state, srx->fpdu_part_rem);
1456                         break;
1457                 }
1458         }
1459         return srx->skb_copied;
1460 }

/* [<][>][^][v][top][bottom][index][help] */