root/net/xdp/xdp_umem.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. xdp_add_sk_umem
  2. xdp_del_sk_umem
  3. xdp_reg_umem_at_qid
  4. xdp_get_umem_from_qid
  5. xdp_clear_umem_at_qid
  6. xdp_umem_assign_dev
  7. xdp_umem_clear_dev
  8. xdp_umem_unmap_pages
  9. xdp_umem_map_pages
  10. xdp_umem_unpin_pages
  11. xdp_umem_unaccount_pages
  12. xdp_umem_release
  13. xdp_umem_release_deferred
  14. xdp_get_umem
  15. xdp_put_umem
  16. xdp_umem_pin_pages
  17. xdp_umem_account_pages
  18. xdp_umem_reg
  19. xdp_umem_create
  20. xdp_umem_validate_queues

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* XDP user-space packet buffer
   3  * Copyright(c) 2018 Intel Corporation.
   4  */
   5 
   6 #include <linux/init.h>
   7 #include <linux/sched/mm.h>
   8 #include <linux/sched/signal.h>
   9 #include <linux/sched/task.h>
  10 #include <linux/uaccess.h>
  11 #include <linux/slab.h>
  12 #include <linux/bpf.h>
  13 #include <linux/mm.h>
  14 #include <linux/netdevice.h>
  15 #include <linux/rtnetlink.h>
  16 #include <linux/idr.h>
  17 #include <linux/vmalloc.h>
  18 
  19 #include "xdp_umem.h"
  20 #include "xsk_queue.h"
  21 
  22 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
  23 
  24 static DEFINE_IDA(umem_ida);
  25 
  26 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  27 {
  28         unsigned long flags;
  29 
  30         if (!xs->tx)
  31                 return;
  32 
  33         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  34         list_add_rcu(&xs->list, &umem->xsk_list);
  35         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  36 }
  37 
  38 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  39 {
  40         unsigned long flags;
  41 
  42         if (!xs->tx)
  43                 return;
  44 
  45         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  46         list_del_rcu(&xs->list);
  47         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  48 }
  49 
  50 /* The umem is stored both in the _rx struct and the _tx struct as we do
  51  * not know if the device has more tx queues than rx, or the opposite.
  52  * This might also change during run time.
  53  */
  54 static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
  55                                u16 queue_id)
  56 {
  57         if (queue_id >= max_t(unsigned int,
  58                               dev->real_num_rx_queues,
  59                               dev->real_num_tx_queues))
  60                 return -EINVAL;
  61 
  62         if (queue_id < dev->real_num_rx_queues)
  63                 dev->_rx[queue_id].umem = umem;
  64         if (queue_id < dev->real_num_tx_queues)
  65                 dev->_tx[queue_id].umem = umem;
  66 
  67         return 0;
  68 }
  69 
  70 struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
  71                                        u16 queue_id)
  72 {
  73         if (queue_id < dev->real_num_rx_queues)
  74                 return dev->_rx[queue_id].umem;
  75         if (queue_id < dev->real_num_tx_queues)
  76                 return dev->_tx[queue_id].umem;
  77 
  78         return NULL;
  79 }
  80 EXPORT_SYMBOL(xdp_get_umem_from_qid);
  81 
  82 static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
  83 {
  84         if (queue_id < dev->real_num_rx_queues)
  85                 dev->_rx[queue_id].umem = NULL;
  86         if (queue_id < dev->real_num_tx_queues)
  87                 dev->_tx[queue_id].umem = NULL;
  88 }
  89 
  90 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
  91                         u16 queue_id, u16 flags)
  92 {
  93         bool force_zc, force_copy;
  94         struct netdev_bpf bpf;
  95         int err = 0;
  96 
  97         ASSERT_RTNL();
  98 
  99         force_zc = flags & XDP_ZEROCOPY;
 100         force_copy = flags & XDP_COPY;
 101 
 102         if (force_zc && force_copy)
 103                 return -EINVAL;
 104 
 105         if (xdp_get_umem_from_qid(dev, queue_id))
 106                 return -EBUSY;
 107 
 108         err = xdp_reg_umem_at_qid(dev, umem, queue_id);
 109         if (err)
 110                 return err;
 111 
 112         umem->dev = dev;
 113         umem->queue_id = queue_id;
 114 
 115         if (flags & XDP_USE_NEED_WAKEUP) {
 116                 umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
 117                 /* Tx needs to be explicitly woken up the first time.
 118                  * Also for supporting drivers that do not implement this
 119                  * feature. They will always have to call sendto().
 120                  */
 121                 xsk_set_tx_need_wakeup(umem);
 122         }
 123 
 124         dev_hold(dev);
 125 
 126         if (force_copy)
 127                 /* For copy-mode, we are done. */
 128                 return 0;
 129 
 130         if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
 131                 err = -EOPNOTSUPP;
 132                 goto err_unreg_umem;
 133         }
 134 
 135         bpf.command = XDP_SETUP_XSK_UMEM;
 136         bpf.xsk.umem = umem;
 137         bpf.xsk.queue_id = queue_id;
 138 
 139         err = dev->netdev_ops->ndo_bpf(dev, &bpf);
 140         if (err)
 141                 goto err_unreg_umem;
 142 
 143         umem->zc = true;
 144         return 0;
 145 
 146 err_unreg_umem:
 147         if (!force_zc)
 148                 err = 0; /* fallback to copy mode */
 149         if (err)
 150                 xdp_clear_umem_at_qid(dev, queue_id);
 151         return err;
 152 }
 153 
 154 void xdp_umem_clear_dev(struct xdp_umem *umem)
 155 {
 156         struct netdev_bpf bpf;
 157         int err;
 158 
 159         ASSERT_RTNL();
 160 
 161         if (!umem->dev)
 162                 return;
 163 
 164         if (umem->zc) {
 165                 bpf.command = XDP_SETUP_XSK_UMEM;
 166                 bpf.xsk.umem = NULL;
 167                 bpf.xsk.queue_id = umem->queue_id;
 168 
 169                 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
 170 
 171                 if (err)
 172                         WARN(1, "failed to disable umem!\n");
 173         }
 174 
 175         xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
 176 
 177         dev_put(umem->dev);
 178         umem->dev = NULL;
 179         umem->zc = false;
 180 }
 181 
 182 static void xdp_umem_unmap_pages(struct xdp_umem *umem)
 183 {
 184         unsigned int i;
 185 
 186         for (i = 0; i < umem->npgs; i++)
 187                 if (PageHighMem(umem->pgs[i]))
 188                         vunmap(umem->pages[i].addr);
 189 }
 190 
 191 static int xdp_umem_map_pages(struct xdp_umem *umem)
 192 {
 193         unsigned int i;
 194         void *addr;
 195 
 196         for (i = 0; i < umem->npgs; i++) {
 197                 if (PageHighMem(umem->pgs[i]))
 198                         addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
 199                 else
 200                         addr = page_address(umem->pgs[i]);
 201 
 202                 if (!addr) {
 203                         xdp_umem_unmap_pages(umem);
 204                         return -ENOMEM;
 205                 }
 206 
 207                 umem->pages[i].addr = addr;
 208         }
 209 
 210         return 0;
 211 }
 212 
 213 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 214 {
 215         put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 216 
 217         kfree(umem->pgs);
 218         umem->pgs = NULL;
 219 }
 220 
 221 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 222 {
 223         if (umem->user) {
 224                 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
 225                 free_uid(umem->user);
 226         }
 227 }
 228 
 229 static void xdp_umem_release(struct xdp_umem *umem)
 230 {
 231         rtnl_lock();
 232         xdp_umem_clear_dev(umem);
 233         rtnl_unlock();
 234 
 235         ida_simple_remove(&umem_ida, umem->id);
 236 
 237         if (umem->fq) {
 238                 xskq_destroy(umem->fq);
 239                 umem->fq = NULL;
 240         }
 241 
 242         if (umem->cq) {
 243                 xskq_destroy(umem->cq);
 244                 umem->cq = NULL;
 245         }
 246 
 247         xsk_reuseq_destroy(umem);
 248 
 249         xdp_umem_unmap_pages(umem);
 250         xdp_umem_unpin_pages(umem);
 251 
 252         kfree(umem->pages);
 253         umem->pages = NULL;
 254 
 255         xdp_umem_unaccount_pages(umem);
 256         kfree(umem);
 257 }
 258 
 259 static void xdp_umem_release_deferred(struct work_struct *work)
 260 {
 261         struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
 262 
 263         xdp_umem_release(umem);
 264 }
 265 
 266 void xdp_get_umem(struct xdp_umem *umem)
 267 {
 268         refcount_inc(&umem->users);
 269 }
 270 
 271 void xdp_put_umem(struct xdp_umem *umem)
 272 {
 273         if (!umem)
 274                 return;
 275 
 276         if (refcount_dec_and_test(&umem->users)) {
 277                 INIT_WORK(&umem->work, xdp_umem_release_deferred);
 278                 schedule_work(&umem->work);
 279         }
 280 }
 281 
 282 static int xdp_umem_pin_pages(struct xdp_umem *umem)
 283 {
 284         unsigned int gup_flags = FOLL_WRITE;
 285         long npgs;
 286         int err;
 287 
 288         umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
 289                             GFP_KERNEL | __GFP_NOWARN);
 290         if (!umem->pgs)
 291                 return -ENOMEM;
 292 
 293         down_read(&current->mm->mmap_sem);
 294         npgs = get_user_pages(umem->address, umem->npgs,
 295                               gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
 296         up_read(&current->mm->mmap_sem);
 297 
 298         if (npgs != umem->npgs) {
 299                 if (npgs >= 0) {
 300                         umem->npgs = npgs;
 301                         err = -ENOMEM;
 302                         goto out_pin;
 303                 }
 304                 err = npgs;
 305                 goto out_pgs;
 306         }
 307         return 0;
 308 
 309 out_pin:
 310         xdp_umem_unpin_pages(umem);
 311 out_pgs:
 312         kfree(umem->pgs);
 313         umem->pgs = NULL;
 314         return err;
 315 }
 316 
 317 static int xdp_umem_account_pages(struct xdp_umem *umem)
 318 {
 319         unsigned long lock_limit, new_npgs, old_npgs;
 320 
 321         if (capable(CAP_IPC_LOCK))
 322                 return 0;
 323 
 324         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 325         umem->user = get_uid(current_user());
 326 
 327         do {
 328                 old_npgs = atomic_long_read(&umem->user->locked_vm);
 329                 new_npgs = old_npgs + umem->npgs;
 330                 if (new_npgs > lock_limit) {
 331                         free_uid(umem->user);
 332                         umem->user = NULL;
 333                         return -ENOBUFS;
 334                 }
 335         } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
 336                                      new_npgs) != old_npgs);
 337         return 0;
 338 }
 339 
 340 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 341 {
 342         bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
 343         u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 344         u64 npgs, addr = mr->addr, size = mr->len;
 345         unsigned int chunks, chunks_per_page;
 346         int err;
 347 
 348         if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
 349                 /* Strictly speaking we could support this, if:
 350                  * - huge pages, or*
 351                  * - using an IOMMU, or
 352                  * - making sure the memory area is consecutive
 353                  * but for now, we simply say "computer says no".
 354                  */
 355                 return -EINVAL;
 356         }
 357 
 358         if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
 359                         XDP_UMEM_USES_NEED_WAKEUP))
 360                 return -EINVAL;
 361 
 362         if (!unaligned_chunks && !is_power_of_2(chunk_size))
 363                 return -EINVAL;
 364 
 365         if (!PAGE_ALIGNED(addr)) {
 366                 /* Memory area has to be page size aligned. For
 367                  * simplicity, this might change.
 368                  */
 369                 return -EINVAL;
 370         }
 371 
 372         if ((addr + size) < addr)
 373                 return -EINVAL;
 374 
 375         npgs = div_u64(size, PAGE_SIZE);
 376         if (npgs > U32_MAX)
 377                 return -EINVAL;
 378 
 379         chunks = (unsigned int)div_u64(size, chunk_size);
 380         if (chunks == 0)
 381                 return -EINVAL;
 382 
 383         if (!unaligned_chunks) {
 384                 chunks_per_page = PAGE_SIZE / chunk_size;
 385                 if (chunks < chunks_per_page || chunks % chunks_per_page)
 386                         return -EINVAL;
 387         }
 388 
 389         if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 390                 return -EINVAL;
 391 
 392         umem->address = (unsigned long)addr;
 393         umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
 394                                             : ~((u64)chunk_size - 1);
 395         umem->size = size;
 396         umem->headroom = headroom;
 397         umem->chunk_size_nohr = chunk_size - headroom;
 398         umem->npgs = (u32)npgs;
 399         umem->pgs = NULL;
 400         umem->user = NULL;
 401         umem->flags = mr->flags;
 402         INIT_LIST_HEAD(&umem->xsk_list);
 403         spin_lock_init(&umem->xsk_list_lock);
 404 
 405         refcount_set(&umem->users, 1);
 406 
 407         err = xdp_umem_account_pages(umem);
 408         if (err)
 409                 return err;
 410 
 411         err = xdp_umem_pin_pages(umem);
 412         if (err)
 413                 goto out_account;
 414 
 415         umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
 416         if (!umem->pages) {
 417                 err = -ENOMEM;
 418                 goto out_pin;
 419         }
 420 
 421         err = xdp_umem_map_pages(umem);
 422         if (!err)
 423                 return 0;
 424 
 425         kfree(umem->pages);
 426 
 427 out_pin:
 428         xdp_umem_unpin_pages(umem);
 429 out_account:
 430         xdp_umem_unaccount_pages(umem);
 431         return err;
 432 }
 433 
 434 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
 435 {
 436         struct xdp_umem *umem;
 437         int err;
 438 
 439         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 440         if (!umem)
 441                 return ERR_PTR(-ENOMEM);
 442 
 443         err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
 444         if (err < 0) {
 445                 kfree(umem);
 446                 return ERR_PTR(err);
 447         }
 448         umem->id = err;
 449 
 450         err = xdp_umem_reg(umem, mr);
 451         if (err) {
 452                 ida_simple_remove(&umem_ida, umem->id);
 453                 kfree(umem);
 454                 return ERR_PTR(err);
 455         }
 456 
 457         return umem;
 458 }
 459 
 460 bool xdp_umem_validate_queues(struct xdp_umem *umem)
 461 {
 462         return umem->fq && umem->cq;
 463 }

/* [<][>][^][v][top][bottom][index][help] */