This source file includes following definitions.
- siw_mem_add
- siw_mem_id2obj
- siw_free_plist
- siw_umem_release
- siw_mr_add_mem
- siw_mr_drop_mem
- siw_free_mem
- siw_check_mem
- siw_check_sge
- siw_wqe_put_mem
- siw_invalidate_stag
- siw_pbl_get_buffer
- siw_pbl_alloc
- siw_umem_get
1
2
3
4
5
6 #include <linux/gfp.h>
7 #include <rdma/ib_verbs.h>
8 #include <linux/dma-mapping.h>
9 #include <linux/slab.h>
10 #include <linux/sched/mm.h>
11 #include <linux/resource.h>
12
13 #include "siw.h"
14 #include "siw_mem.h"
15
16
17
18
19
20
21 int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
22 {
23 struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
24 u32 id, next;
25
26 get_random_bytes(&next, 4);
27 next &= 0x00ffffff;
28
29 if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
30 GFP_KERNEL) < 0)
31 return -ENOMEM;
32
33
34 m->stag = id << 8;
35
36 siw_dbg_mem(m, "new MEM object\n");
37
38 return 0;
39 }
40
41
42
43
44
45
46
47
48 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
49 {
50 struct siw_mem *mem;
51
52 rcu_read_lock();
53 mem = xa_load(&sdev->mem_xa, stag_index);
54 if (likely(mem && kref_get_unless_zero(&mem->ref))) {
55 rcu_read_unlock();
56 return mem;
57 }
58 rcu_read_unlock();
59
60 return NULL;
61 }
62
63 static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
64 bool dirty)
65 {
66 put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
67 }
68
69 void siw_umem_release(struct siw_umem *umem, bool dirty)
70 {
71 struct mm_struct *mm_s = umem->owning_mm;
72 int i, num_pages = umem->num_pages;
73
74 for (i = 0; num_pages; i++) {
75 int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
76
77 siw_free_plist(&umem->page_chunk[i], to_free,
78 umem->writable && dirty);
79 kfree(umem->page_chunk[i].plist);
80 num_pages -= to_free;
81 }
82 atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
83
84 mmdrop(mm_s);
85 kfree(umem->page_chunk);
86 kfree(umem);
87 }
88
89 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
90 u64 start, u64 len, int rights)
91 {
92 struct siw_device *sdev = to_siw_dev(pd->device);
93 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
94 struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
95 u32 id, next;
96
97 if (!mem)
98 return -ENOMEM;
99
100 mem->mem_obj = mem_obj;
101 mem->stag_valid = 0;
102 mem->sdev = sdev;
103 mem->va = start;
104 mem->len = len;
105 mem->pd = pd;
106 mem->perms = rights & IWARP_ACCESS_MASK;
107 kref_init(&mem->ref);
108
109 mr->mem = mem;
110
111 get_random_bytes(&next, 4);
112 next &= 0x00ffffff;
113
114 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
115 GFP_KERNEL) < 0) {
116 kfree(mem);
117 return -ENOMEM;
118 }
119
120 mem->stag = id << 8;
121 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
122
123 return 0;
124 }
125
126 void siw_mr_drop_mem(struct siw_mr *mr)
127 {
128 struct siw_mem *mem = mr->mem, *found;
129
130 mem->stag_valid = 0;
131
132
133 smp_mb();
134
135 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
136 WARN_ON(found != mem);
137 siw_mem_put(mem);
138 }
139
140 void siw_free_mem(struct kref *ref)
141 {
142 struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
143
144 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
145
146 if (!mem->is_mw && mem->mem_obj) {
147 if (mem->is_pbl == 0)
148 siw_umem_release(mem->umem, true);
149 else
150 kfree(mem->pbl);
151 }
152 kfree(mem);
153 }
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
169 enum ib_access_flags perms, int len)
170 {
171 if (!mem->stag_valid) {
172 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
173 return -E_STAG_INVALID;
174 }
175 if (mem->pd != pd) {
176 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
177 return -E_PD_MISMATCH;
178 }
179
180
181
182 if ((mem->perms & perms) < perms) {
183 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
184 mem->perms, perms);
185 return -E_ACCESS_PERM;
186 }
187
188
189
190 if (addr < mem->va || addr + len > mem->va + mem->len) {
191 siw_dbg_pd(pd, "MEM interval len %d\n", len);
192 siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
193 (void *)(uintptr_t)addr,
194 (void *)(uintptr_t)(addr + len));
195 siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
196 (void *)(uintptr_t)mem->va,
197 (void *)(uintptr_t)(mem->va + mem->len),
198 mem->stag);
199
200 return -E_BASE_BOUNDS;
201 }
202 return E_ACCESS_OK;
203 }
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
223 enum ib_access_flags perms, u32 off, int len)
224 {
225 struct siw_device *sdev = to_siw_dev(pd->device);
226 struct siw_mem *new = NULL;
227 int rv = E_ACCESS_OK;
228
229 if (len + off > sge->length) {
230 rv = -E_BASE_BOUNDS;
231 goto fail;
232 }
233 if (*mem == NULL) {
234 new = siw_mem_id2obj(sdev, sge->lkey >> 8);
235 if (unlikely(!new)) {
236 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
237 rv = -E_STAG_INVALID;
238 goto fail;
239 }
240 *mem = new;
241 }
242
243 if (unlikely((*mem)->stag != sge->lkey)) {
244 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
245 rv = -E_STAG_INVALID;
246 goto fail;
247 }
248 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
249 if (unlikely(rv))
250 goto fail;
251
252 return 0;
253
254 fail:
255 if (new) {
256 *mem = NULL;
257 siw_mem_put(new);
258 }
259 return rv;
260 }
261
262 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
263 {
264 switch (op) {
265 case SIW_OP_SEND:
266 case SIW_OP_WRITE:
267 case SIW_OP_SEND_WITH_IMM:
268 case SIW_OP_SEND_REMOTE_INV:
269 case SIW_OP_READ:
270 case SIW_OP_READ_LOCAL_INV:
271 if (!(wqe->sqe.flags & SIW_WQE_INLINE))
272 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
273 break;
274
275 case SIW_OP_RECEIVE:
276 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
277 break;
278
279 case SIW_OP_READ_RESPONSE:
280 siw_unref_mem_sgl(wqe->mem, 1);
281 break;
282
283 default:
284
285
286
287
288 break;
289 }
290 }
291
292 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
293 {
294 struct siw_device *sdev = to_siw_dev(pd->device);
295 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
296 int rv = 0;
297
298 if (unlikely(!mem)) {
299 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
300 return -EINVAL;
301 }
302 if (unlikely(mem->pd != pd)) {
303 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
304 rv = -EACCES;
305 goto out;
306 }
307
308
309
310
311 mem->stag_valid = 0;
312
313 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
314 out:
315 siw_mem_put(mem);
316 return rv;
317 }
318
319
320
321
322
323
324
325 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
326 {
327 int i = idx ? *idx : 0;
328
329 while (i < pbl->num_buf) {
330 struct siw_pble *pble = &pbl->pbe[i];
331
332 if (pble->pbl_off + pble->size > off) {
333 u64 pble_off = off - pble->pbl_off;
334
335 if (len)
336 *len = pble->size - pble_off;
337 if (idx)
338 *idx = i;
339
340 return pble->addr + pble_off;
341 }
342 i++;
343 }
344 if (len)
345 *len = 0;
346 return 0;
347 }
348
349 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
350 {
351 struct siw_pbl *pbl;
352 int buf_size = sizeof(*pbl);
353
354 if (num_buf == 0)
355 return ERR_PTR(-EINVAL);
356
357 buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
358
359 pbl = kzalloc(buf_size, GFP_KERNEL);
360 if (!pbl)
361 return ERR_PTR(-ENOMEM);
362
363 pbl->max_buf = num_buf;
364
365 return pbl;
366 }
367
368 struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
369 {
370 struct siw_umem *umem;
371 struct mm_struct *mm_s;
372 u64 first_page_va;
373 unsigned long mlock_limit;
374 unsigned int foll_flags = FOLL_WRITE;
375 int num_pages, num_chunks, i, rv = 0;
376
377 if (!can_do_mlock())
378 return ERR_PTR(-EPERM);
379
380 if (!len)
381 return ERR_PTR(-EINVAL);
382
383 first_page_va = start & PAGE_MASK;
384 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
385 num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
386
387 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
388 if (!umem)
389 return ERR_PTR(-ENOMEM);
390
391 mm_s = current->mm;
392 umem->owning_mm = mm_s;
393 umem->writable = writable;
394
395 mmgrab(mm_s);
396
397 if (!writable)
398 foll_flags |= FOLL_FORCE;
399
400 down_read(&mm_s->mmap_sem);
401
402 mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
403
404 if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
405 rv = -ENOMEM;
406 goto out_sem_up;
407 }
408 umem->fp_addr = first_page_va;
409
410 umem->page_chunk =
411 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
412 if (!umem->page_chunk) {
413 rv = -ENOMEM;
414 goto out_sem_up;
415 }
416 for (i = 0; num_pages; i++) {
417 int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
418
419 umem->page_chunk[i].plist =
420 kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
421 if (!umem->page_chunk[i].plist) {
422 rv = -ENOMEM;
423 goto out_sem_up;
424 }
425 got = 0;
426 while (nents) {
427 struct page **plist = &umem->page_chunk[i].plist[got];
428
429 rv = get_user_pages(first_page_va, nents,
430 foll_flags | FOLL_LONGTERM,
431 plist, NULL);
432 if (rv < 0)
433 goto out_sem_up;
434
435 umem->num_pages += rv;
436 atomic64_add(rv, &mm_s->pinned_vm);
437 first_page_va += rv * PAGE_SIZE;
438 nents -= rv;
439 got += rv;
440 }
441 num_pages -= got;
442 }
443 out_sem_up:
444 up_read(&mm_s->mmap_sem);
445
446 if (rv > 0)
447 return umem;
448
449 siw_umem_release(umem, false);
450
451 return ERR_PTR(rv);
452 }