1/* 2 * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include "qib.h" 35 36/** 37 * qib_alloc_lkey - allocate an lkey 38 * @mr: memory region that this lkey protects 39 * @dma_region: 0->normal key, 1->restricted DMA key 40 * 41 * Returns 0 if successful, otherwise returns -errno. 42 * 43 * Increments mr reference count as required. 44 * 45 * Sets the lkey field mr for non-dma regions. 46 * 47 */ 48 49int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) 50{ 51 unsigned long flags; 52 u32 r; 53 u32 n; 54 int ret = 0; 55 struct qib_ibdev *dev = to_idev(mr->pd->device); 56 struct qib_lkey_table *rkt = &dev->lk_table; 57 58 spin_lock_irqsave(&rkt->lock, flags); 59 60 /* special case for dma_mr lkey == 0 */ 61 if (dma_region) { 62 struct qib_mregion *tmr; 63 64 tmr = rcu_access_pointer(dev->dma_mr); 65 if (!tmr) { 66 qib_get_mr(mr); 67 rcu_assign_pointer(dev->dma_mr, mr); 68 mr->lkey_published = 1; 69 } 70 goto success; 71 } 72 73 /* Find the next available LKEY */ 74 r = rkt->next; 75 n = r; 76 for (;;) { 77 if (rkt->table[r] == NULL) 78 break; 79 r = (r + 1) & (rkt->max - 1); 80 if (r == n) 81 goto bail; 82 } 83 rkt->next = (r + 1) & (rkt->max - 1); 84 /* 85 * Make sure lkey is never zero which is reserved to indicate an 86 * unrestricted LKEY. 87 */ 88 rkt->gen++; 89 /* 90 * bits are capped in qib_verbs.c to insure enough bits 91 * for generation number 92 */ 93 mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | 94 ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) 95 << 8); 96 if (mr->lkey == 0) { 97 mr->lkey |= 1 << 8; 98 rkt->gen++; 99 } 100 qib_get_mr(mr); 101 rcu_assign_pointer(rkt->table[r], mr); 102 mr->lkey_published = 1; 103success: 104 spin_unlock_irqrestore(&rkt->lock, flags); 105out: 106 return ret; 107bail: 108 spin_unlock_irqrestore(&rkt->lock, flags); 109 ret = -ENOMEM; 110 goto out; 111} 112 113/** 114 * qib_free_lkey - free an lkey 115 * @mr: mr to free from tables 116 */ 117void qib_free_lkey(struct qib_mregion *mr) 118{ 119 unsigned long flags; 120 u32 lkey = mr->lkey; 121 u32 r; 122 struct qib_ibdev *dev = to_idev(mr->pd->device); 123 struct qib_lkey_table *rkt = &dev->lk_table; 124 125 spin_lock_irqsave(&rkt->lock, flags); 126 if (!mr->lkey_published) 127 goto out; 128 if (lkey == 0) 129 RCU_INIT_POINTER(dev->dma_mr, NULL); 130 else { 131 r = lkey >> (32 - ib_qib_lkey_table_size); 132 RCU_INIT_POINTER(rkt->table[r], NULL); 133 } 134 qib_put_mr(mr); 135 mr->lkey_published = 0; 136out: 137 spin_unlock_irqrestore(&rkt->lock, flags); 138} 139 140/** 141 * qib_lkey_ok - check IB SGE for validity and initialize 142 * @rkt: table containing lkey to check SGE against 143 * @pd: protection domain 144 * @isge: outgoing internal SGE 145 * @sge: SGE to check 146 * @acc: access flags 147 * 148 * Return 1 if valid and successful, otherwise returns 0. 149 * 150 * increments the reference count upon success 151 * 152 * Check the IB SGE for validity and initialize our internal version 153 * of it. 154 */ 155int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, 156 struct qib_sge *isge, struct ib_sge *sge, int acc) 157{ 158 struct qib_mregion *mr; 159 unsigned n, m; 160 size_t off; 161 162 /* 163 * We use LKEY == zero for kernel virtual addresses 164 * (see qib_get_dma_mr and qib_dma.c). 165 */ 166 rcu_read_lock(); 167 if (sge->lkey == 0) { 168 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 169 170 if (pd->user) 171 goto bail; 172 mr = rcu_dereference(dev->dma_mr); 173 if (!mr) 174 goto bail; 175 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 176 goto bail; 177 rcu_read_unlock(); 178 179 isge->mr = mr; 180 isge->vaddr = (void *) sge->addr; 181 isge->length = sge->length; 182 isge->sge_length = sge->length; 183 isge->m = 0; 184 isge->n = 0; 185 goto ok; 186 } 187 mr = rcu_dereference( 188 rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); 189 if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 190 goto bail; 191 192 off = sge->addr - mr->user_base; 193 if (unlikely(sge->addr < mr->user_base || 194 off + sge->length > mr->length || 195 (mr->access_flags & acc) != acc)) 196 goto bail; 197 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 198 goto bail; 199 rcu_read_unlock(); 200 201 off += mr->offset; 202 if (mr->page_shift) { 203 /* 204 page sizes are uniform power of 2 so no loop is necessary 205 entries_spanned_by_off is the number of times the loop below 206 would have executed. 207 */ 208 size_t entries_spanned_by_off; 209 210 entries_spanned_by_off = off >> mr->page_shift; 211 off -= (entries_spanned_by_off << mr->page_shift); 212 m = entries_spanned_by_off/QIB_SEGSZ; 213 n = entries_spanned_by_off%QIB_SEGSZ; 214 } else { 215 m = 0; 216 n = 0; 217 while (off >= mr->map[m]->segs[n].length) { 218 off -= mr->map[m]->segs[n].length; 219 n++; 220 if (n >= QIB_SEGSZ) { 221 m++; 222 n = 0; 223 } 224 } 225 } 226 isge->mr = mr; 227 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 228 isge->length = mr->map[m]->segs[n].length - off; 229 isge->sge_length = sge->length; 230 isge->m = m; 231 isge->n = n; 232ok: 233 return 1; 234bail: 235 rcu_read_unlock(); 236 return 0; 237} 238 239/** 240 * qib_rkey_ok - check the IB virtual address, length, and RKEY 241 * @qp: qp for validation 242 * @sge: SGE state 243 * @len: length of data 244 * @vaddr: virtual address to place data 245 * @rkey: rkey to check 246 * @acc: access flags 247 * 248 * Return 1 if successful, otherwise 0. 249 * 250 * increments the reference count upon success 251 */ 252int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, 253 u32 len, u64 vaddr, u32 rkey, int acc) 254{ 255 struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; 256 struct qib_mregion *mr; 257 unsigned n, m; 258 size_t off; 259 260 /* 261 * We use RKEY == zero for kernel virtual addresses 262 * (see qib_get_dma_mr and qib_dma.c). 263 */ 264 rcu_read_lock(); 265 if (rkey == 0) { 266 struct qib_pd *pd = to_ipd(qp->ibqp.pd); 267 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 268 269 if (pd->user) 270 goto bail; 271 mr = rcu_dereference(dev->dma_mr); 272 if (!mr) 273 goto bail; 274 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 275 goto bail; 276 rcu_read_unlock(); 277 278 sge->mr = mr; 279 sge->vaddr = (void *) vaddr; 280 sge->length = len; 281 sge->sge_length = len; 282 sge->m = 0; 283 sge->n = 0; 284 goto ok; 285 } 286 287 mr = rcu_dereference( 288 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); 289 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 290 goto bail; 291 292 off = vaddr - mr->iova; 293 if (unlikely(vaddr < mr->iova || off + len > mr->length || 294 (mr->access_flags & acc) == 0)) 295 goto bail; 296 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 297 goto bail; 298 rcu_read_unlock(); 299 300 off += mr->offset; 301 if (mr->page_shift) { 302 /* 303 page sizes are uniform power of 2 so no loop is necessary 304 entries_spanned_by_off is the number of times the loop below 305 would have executed. 306 */ 307 size_t entries_spanned_by_off; 308 309 entries_spanned_by_off = off >> mr->page_shift; 310 off -= (entries_spanned_by_off << mr->page_shift); 311 m = entries_spanned_by_off/QIB_SEGSZ; 312 n = entries_spanned_by_off%QIB_SEGSZ; 313 } else { 314 m = 0; 315 n = 0; 316 while (off >= mr->map[m]->segs[n].length) { 317 off -= mr->map[m]->segs[n].length; 318 n++; 319 if (n >= QIB_SEGSZ) { 320 m++; 321 n = 0; 322 } 323 } 324 } 325 sge->mr = mr; 326 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 327 sge->length = mr->map[m]->segs[n].length - off; 328 sge->sge_length = len; 329 sge->m = m; 330 sge->n = n; 331ok: 332 return 1; 333bail: 334 rcu_read_unlock(); 335 return 0; 336} 337 338/* 339 * Initialize the memory region specified by the work reqeust. 340 */ 341int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) 342{ 343 struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; 344 struct qib_pd *pd = to_ipd(qp->ibqp.pd); 345 struct qib_mregion *mr; 346 u32 rkey = wr->wr.fast_reg.rkey; 347 unsigned i, n, m; 348 int ret = -EINVAL; 349 unsigned long flags; 350 u64 *page_list; 351 size_t ps; 352 353 spin_lock_irqsave(&rkt->lock, flags); 354 if (pd->user || rkey == 0) 355 goto bail; 356 357 mr = rcu_dereference_protected( 358 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], 359 lockdep_is_held(&rkt->lock)); 360 if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) 361 goto bail; 362 363 if (wr->wr.fast_reg.page_list_len > mr->max_segs) 364 goto bail; 365 366 ps = 1UL << wr->wr.fast_reg.page_shift; 367 if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) 368 goto bail; 369 370 mr->user_base = wr->wr.fast_reg.iova_start; 371 mr->iova = wr->wr.fast_reg.iova_start; 372 mr->lkey = rkey; 373 mr->length = wr->wr.fast_reg.length; 374 mr->access_flags = wr->wr.fast_reg.access_flags; 375 page_list = wr->wr.fast_reg.page_list->page_list; 376 m = 0; 377 n = 0; 378 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { 379 mr->map[m]->segs[n].vaddr = (void *) page_list[i]; 380 mr->map[m]->segs[n].length = ps; 381 if (++n == QIB_SEGSZ) { 382 m++; 383 n = 0; 384 } 385 } 386 387 ret = 0; 388bail: 389 spin_unlock_irqrestore(&rkt->lock, flags); 390 return ret; 391} 392