root/drivers/infiniband/core/umem.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __ib_umem_release
  2. ib_umem_add_sg_table
  3. ib_umem_find_best_pgsz
  4. ib_umem_get
  5. ib_umem_release
  6. ib_umem_page_count
  7. ib_umem_copy_from

   1 /*
   2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   3  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
   4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   5  *
   6  * This software is available to you under a choice of one of two
   7  * licenses.  You may choose to be licensed under the terms of the GNU
   8  * General Public License (GPL) Version 2, available from the file
   9  * COPYING in the main directory of this source tree, or the
  10  * OpenIB.org BSD license below:
  11  *
  12  *     Redistribution and use in source and binary forms, with or
  13  *     without modification, are permitted provided that the following
  14  *     conditions are met:
  15  *
  16  *      - Redistributions of source code must retain the above
  17  *        copyright notice, this list of conditions and the following
  18  *        disclaimer.
  19  *
  20  *      - Redistributions in binary form must reproduce the above
  21  *        copyright notice, this list of conditions and the following
  22  *        disclaimer in the documentation and/or other materials
  23  *        provided with the distribution.
  24  *
  25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32  * SOFTWARE.
  33  */
  34 
  35 #include <linux/mm.h>
  36 #include <linux/dma-mapping.h>
  37 #include <linux/sched/signal.h>
  38 #include <linux/sched/mm.h>
  39 #include <linux/export.h>
  40 #include <linux/slab.h>
  41 #include <linux/pagemap.h>
  42 #include <rdma/ib_umem_odp.h>
  43 
  44 #include "uverbs.h"
  45 
  46 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
  47 {
  48         struct sg_page_iter sg_iter;
  49         struct page *page;
  50 
  51         if (umem->nmap > 0)
  52                 ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
  53                                 DMA_BIDIRECTIONAL);
  54 
  55         for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
  56                 page = sg_page_iter_page(&sg_iter);
  57                 put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
  58         }
  59 
  60         sg_free_table(&umem->sg_head);
  61 }
  62 
  63 /* ib_umem_add_sg_table - Add N contiguous pages to scatter table
  64  *
  65  * sg: current scatterlist entry
  66  * page_list: array of npage struct page pointers
  67  * npages: number of pages in page_list
  68  * max_seg_sz: maximum segment size in bytes
  69  * nents: [out] number of entries in the scatterlist
  70  *
  71  * Return new end of scatterlist
  72  */
  73 static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
  74                                                 struct page **page_list,
  75                                                 unsigned long npages,
  76                                                 unsigned int max_seg_sz,
  77                                                 int *nents)
  78 {
  79         unsigned long first_pfn;
  80         unsigned long i = 0;
  81         bool update_cur_sg = false;
  82         bool first = !sg_page(sg);
  83 
  84         /* Check if new page_list is contiguous with end of previous page_list.
  85          * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
  86          */
  87         if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
  88                        page_to_pfn(page_list[0])))
  89                 update_cur_sg = true;
  90 
  91         while (i != npages) {
  92                 unsigned long len;
  93                 struct page *first_page = page_list[i];
  94 
  95                 first_pfn = page_to_pfn(first_page);
  96 
  97                 /* Compute the number of contiguous pages we have starting
  98                  * at i
  99                  */
 100                 for (len = 0; i != npages &&
 101                               first_pfn + len == page_to_pfn(page_list[i]) &&
 102                               len < (max_seg_sz >> PAGE_SHIFT);
 103                      len++)
 104                         i++;
 105 
 106                 /* Squash N contiguous pages from page_list into current sge */
 107                 if (update_cur_sg) {
 108                         if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
 109                                 sg_set_page(sg, sg_page(sg),
 110                                             sg->length + (len << PAGE_SHIFT),
 111                                             0);
 112                                 update_cur_sg = false;
 113                                 continue;
 114                         }
 115                         update_cur_sg = false;
 116                 }
 117 
 118                 /* Squash N contiguous pages into next sge or first sge */
 119                 if (!first)
 120                         sg = sg_next(sg);
 121 
 122                 (*nents)++;
 123                 sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
 124                 first = false;
 125         }
 126 
 127         return sg;
 128 }
 129 
 130 /**
 131  * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
 132  *
 133  * @umem: umem struct
 134  * @pgsz_bitmap: bitmap of HW supported page sizes
 135  * @virt: IOVA
 136  *
 137  * This helper is intended for HW that support multiple page
 138  * sizes but can do only a single page size in an MR.
 139  *
 140  * Returns 0 if the umem requires page sizes not supported by
 141  * the driver to be mapped. Drivers always supporting PAGE_SIZE
 142  * or smaller will never see a 0 result.
 143  */
 144 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 145                                      unsigned long pgsz_bitmap,
 146                                      unsigned long virt)
 147 {
 148         struct scatterlist *sg;
 149         unsigned int best_pg_bit;
 150         unsigned long va, pgoff;
 151         dma_addr_t mask;
 152         int i;
 153 
 154         /* At minimum, drivers must support PAGE_SIZE or smaller */
 155         if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
 156                 return 0;
 157 
 158         va = virt;
 159         /* max page size not to exceed MR length */
 160         mask = roundup_pow_of_two(umem->length);
 161         /* offset into first SGL */
 162         pgoff = umem->address & ~PAGE_MASK;
 163 
 164         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
 165                 /* Walk SGL and reduce max page size if VA/PA bits differ
 166                  * for any address.
 167                  */
 168                 mask |= (sg_dma_address(sg) + pgoff) ^ va;
 169                 va += sg_dma_len(sg) - pgoff;
 170                 /* Except for the last entry, the ending iova alignment sets
 171                  * the maximum possible page size as the low bits of the iova
 172                  * must be zero when starting the next chunk.
 173                  */
 174                 if (i != (umem->nmap - 1))
 175                         mask |= va;
 176                 pgoff = 0;
 177         }
 178         best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
 179 
 180         return BIT_ULL(best_pg_bit);
 181 }
 182 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 183 
 184 /**
 185  * ib_umem_get - Pin and DMA map userspace memory.
 186  *
 187  * @udata: userspace context to pin memory for
 188  * @addr: userspace virtual address to start at
 189  * @size: length of region to pin
 190  * @access: IB_ACCESS_xxx flags for memory being pinned
 191  * @dmasync: flush in-flight DMA when the memory region is written
 192  */
 193 struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 194                             size_t size, int access, int dmasync)
 195 {
 196         struct ib_ucontext *context;
 197         struct ib_umem *umem;
 198         struct page **page_list;
 199         unsigned long lock_limit;
 200         unsigned long new_pinned;
 201         unsigned long cur_base;
 202         struct mm_struct *mm;
 203         unsigned long npages;
 204         int ret;
 205         unsigned long dma_attrs = 0;
 206         struct scatterlist *sg;
 207         unsigned int gup_flags = FOLL_WRITE;
 208 
 209         if (!udata)
 210                 return ERR_PTR(-EIO);
 211 
 212         context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
 213                           ->context;
 214         if (!context)
 215                 return ERR_PTR(-EIO);
 216 
 217         if (dmasync)
 218                 dma_attrs |= DMA_ATTR_WRITE_BARRIER;
 219 
 220         /*
 221          * If the combination of the addr and size requested for this memory
 222          * region causes an integer overflow, return error.
 223          */
 224         if (((addr + size) < addr) ||
 225             PAGE_ALIGN(addr + size) < (addr + size))
 226                 return ERR_PTR(-EINVAL);
 227 
 228         if (!can_do_mlock())
 229                 return ERR_PTR(-EPERM);
 230 
 231         if (access & IB_ACCESS_ON_DEMAND)
 232                 return ERR_PTR(-EOPNOTSUPP);
 233 
 234         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 235         if (!umem)
 236                 return ERR_PTR(-ENOMEM);
 237         umem->ibdev = context->device;
 238         umem->length     = size;
 239         umem->address    = addr;
 240         umem->writable   = ib_access_writable(access);
 241         umem->owning_mm = mm = current->mm;
 242         mmgrab(mm);
 243 
 244         page_list = (struct page **) __get_free_page(GFP_KERNEL);
 245         if (!page_list) {
 246                 ret = -ENOMEM;
 247                 goto umem_kfree;
 248         }
 249 
 250         npages = ib_umem_num_pages(umem);
 251         if (npages == 0 || npages > UINT_MAX) {
 252                 ret = -EINVAL;
 253                 goto out;
 254         }
 255 
 256         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 257 
 258         new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
 259         if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
 260                 atomic64_sub(npages, &mm->pinned_vm);
 261                 ret = -ENOMEM;
 262                 goto out;
 263         }
 264 
 265         cur_base = addr & PAGE_MASK;
 266 
 267         ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
 268         if (ret)
 269                 goto vma;
 270 
 271         if (!umem->writable)
 272                 gup_flags |= FOLL_FORCE;
 273 
 274         sg = umem->sg_head.sgl;
 275 
 276         while (npages) {
 277                 down_read(&mm->mmap_sem);
 278                 ret = get_user_pages(cur_base,
 279                                      min_t(unsigned long, npages,
 280                                            PAGE_SIZE / sizeof (struct page *)),
 281                                      gup_flags | FOLL_LONGTERM,
 282                                      page_list, NULL);
 283                 if (ret < 0) {
 284                         up_read(&mm->mmap_sem);
 285                         goto umem_release;
 286                 }
 287 
 288                 cur_base += ret * PAGE_SIZE;
 289                 npages   -= ret;
 290 
 291                 sg = ib_umem_add_sg_table(sg, page_list, ret,
 292                         dma_get_max_seg_size(context->device->dma_device),
 293                         &umem->sg_nents);
 294 
 295                 up_read(&mm->mmap_sem);
 296         }
 297 
 298         sg_mark_end(sg);
 299 
 300         umem->nmap = ib_dma_map_sg_attrs(context->device,
 301                                   umem->sg_head.sgl,
 302                                   umem->sg_nents,
 303                                   DMA_BIDIRECTIONAL,
 304                                   dma_attrs);
 305 
 306         if (!umem->nmap) {
 307                 ret = -ENOMEM;
 308                 goto umem_release;
 309         }
 310 
 311         ret = 0;
 312         goto out;
 313 
 314 umem_release:
 315         __ib_umem_release(context->device, umem, 0);
 316 vma:
 317         atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
 318 out:
 319         free_page((unsigned long) page_list);
 320 umem_kfree:
 321         if (ret) {
 322                 mmdrop(umem->owning_mm);
 323                 kfree(umem);
 324         }
 325         return ret ? ERR_PTR(ret) : umem;
 326 }
 327 EXPORT_SYMBOL(ib_umem_get);
 328 
 329 /**
 330  * ib_umem_release - release memory pinned with ib_umem_get
 331  * @umem: umem struct to release
 332  */
 333 void ib_umem_release(struct ib_umem *umem)
 334 {
 335         if (!umem)
 336                 return;
 337         if (umem->is_odp)
 338                 return ib_umem_odp_release(to_ib_umem_odp(umem));
 339 
 340         __ib_umem_release(umem->ibdev, umem, 1);
 341 
 342         atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
 343         mmdrop(umem->owning_mm);
 344         kfree(umem);
 345 }
 346 EXPORT_SYMBOL(ib_umem_release);
 347 
 348 int ib_umem_page_count(struct ib_umem *umem)
 349 {
 350         int i, n = 0;
 351         struct scatterlist *sg;
 352 
 353         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
 354                 n += sg_dma_len(sg) >> PAGE_SHIFT;
 355 
 356         return n;
 357 }
 358 EXPORT_SYMBOL(ib_umem_page_count);
 359 
 360 /*
 361  * Copy from the given ib_umem's pages to the given buffer.
 362  *
 363  * umem - the umem to copy from
 364  * offset - offset to start copying from
 365  * dst - destination buffer
 366  * length - buffer length
 367  *
 368  * Returns 0 on success, or an error code.
 369  */
 370 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 371                       size_t length)
 372 {
 373         size_t end = offset + length;
 374         int ret;
 375 
 376         if (offset > umem->length || length > umem->length - offset) {
 377                 pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
 378                        offset, umem->length, end);
 379                 return -EINVAL;
 380         }
 381 
 382         ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length,
 383                                  offset + ib_umem_offset(umem));
 384 
 385         if (ret < 0)
 386                 return ret;
 387         else if (ret != length)
 388                 return -EINVAL;
 389         else
 390                 return 0;
 391 }
 392 EXPORT_SYMBOL(ib_umem_copy_from);

/* [<][>][^][v][top][bottom][index][help] */