1/* 2 * Percpu IDA library 3 * 4 * Copyright (C) 2013 Datera, Inc. Kent Overstreet 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2, or (at 9 * your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * General Public License for more details. 15 */ 16 17#include <linux/bitmap.h> 18#include <linux/bitops.h> 19#include <linux/bug.h> 20#include <linux/err.h> 21#include <linux/export.h> 22#include <linux/init.h> 23#include <linux/kernel.h> 24#include <linux/percpu.h> 25#include <linux/sched.h> 26#include <linux/string.h> 27#include <linux/spinlock.h> 28#include <linux/percpu_ida.h> 29 30struct percpu_ida_cpu { 31 /* 32 * Even though this is percpu, we need a lock for tag stealing by remote 33 * CPUs: 34 */ 35 spinlock_t lock; 36 37 /* nr_free/freelist form a stack of free IDs */ 38 unsigned nr_free; 39 unsigned freelist[]; 40}; 41 42static inline void move_tags(unsigned *dst, unsigned *dst_nr, 43 unsigned *src, unsigned *src_nr, 44 unsigned nr) 45{ 46 *src_nr -= nr; 47 memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); 48 *dst_nr += nr; 49} 50 51/* 52 * Try to steal tags from a remote cpu's percpu freelist. 53 * 54 * We first check how many percpu freelists have tags 55 * 56 * Then we iterate through the cpus until we find some tags - we don't attempt 57 * to find the "best" cpu to steal from, to keep cacheline bouncing to a 58 * minimum. 59 */ 60static inline void steal_tags(struct percpu_ida *pool, 61 struct percpu_ida_cpu *tags) 62{ 63 unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; 64 struct percpu_ida_cpu *remote; 65 66 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); 67 cpus_have_tags; cpus_have_tags--) { 68 cpu = cpumask_next(cpu, &pool->cpus_have_tags); 69 70 if (cpu >= nr_cpu_ids) { 71 cpu = cpumask_first(&pool->cpus_have_tags); 72 if (cpu >= nr_cpu_ids) 73 BUG(); 74 } 75 76 pool->cpu_last_stolen = cpu; 77 remote = per_cpu_ptr(pool->tag_cpu, cpu); 78 79 cpumask_clear_cpu(cpu, &pool->cpus_have_tags); 80 81 if (remote == tags) 82 continue; 83 84 spin_lock(&remote->lock); 85 86 if (remote->nr_free) { 87 memcpy(tags->freelist, 88 remote->freelist, 89 sizeof(unsigned) * remote->nr_free); 90 91 tags->nr_free = remote->nr_free; 92 remote->nr_free = 0; 93 } 94 95 spin_unlock(&remote->lock); 96 97 if (tags->nr_free) 98 break; 99 } 100} 101 102/* 103 * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto 104 * our percpu freelist: 105 */ 106static inline void alloc_global_tags(struct percpu_ida *pool, 107 struct percpu_ida_cpu *tags) 108{ 109 move_tags(tags->freelist, &tags->nr_free, 110 pool->freelist, &pool->nr_free, 111 min(pool->nr_free, pool->percpu_batch_size)); 112} 113 114static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) 115{ 116 int tag = -ENOSPC; 117 118 spin_lock(&tags->lock); 119 if (tags->nr_free) 120 tag = tags->freelist[--tags->nr_free]; 121 spin_unlock(&tags->lock); 122 123 return tag; 124} 125 126/** 127 * percpu_ida_alloc - allocate a tag 128 * @pool: pool to allocate from 129 * @state: task state for prepare_to_wait 130 * 131 * Returns a tag - an integer in the range [0..nr_tags) (passed to 132 * tag_pool_init()), or otherwise -ENOSPC on allocation failure. 133 * 134 * Safe to be called from interrupt context (assuming it isn't passed 135 * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). 136 * 137 * @gfp indicates whether or not to wait until a free id is available (it's not 138 * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep 139 * however long it takes until another thread frees an id (same semantics as a 140 * mempool). 141 * 142 * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. 143 */ 144int percpu_ida_alloc(struct percpu_ida *pool, int state) 145{ 146 DEFINE_WAIT(wait); 147 struct percpu_ida_cpu *tags; 148 unsigned long flags; 149 int tag; 150 151 local_irq_save(flags); 152 tags = this_cpu_ptr(pool->tag_cpu); 153 154 /* Fastpath */ 155 tag = alloc_local_tag(tags); 156 if (likely(tag >= 0)) { 157 local_irq_restore(flags); 158 return tag; 159 } 160 161 while (1) { 162 spin_lock(&pool->lock); 163 164 /* 165 * prepare_to_wait() must come before steal_tags(), in case 166 * percpu_ida_free() on another cpu flips a bit in 167 * cpus_have_tags 168 * 169 * global lock held and irqs disabled, don't need percpu lock 170 */ 171 if (state != TASK_RUNNING) 172 prepare_to_wait(&pool->wait, &wait, state); 173 174 if (!tags->nr_free) 175 alloc_global_tags(pool, tags); 176 if (!tags->nr_free) 177 steal_tags(pool, tags); 178 179 if (tags->nr_free) { 180 tag = tags->freelist[--tags->nr_free]; 181 if (tags->nr_free) 182 cpumask_set_cpu(smp_processor_id(), 183 &pool->cpus_have_tags); 184 } 185 186 spin_unlock(&pool->lock); 187 local_irq_restore(flags); 188 189 if (tag >= 0 || state == TASK_RUNNING) 190 break; 191 192 if (signal_pending_state(state, current)) { 193 tag = -ERESTARTSYS; 194 break; 195 } 196 197 schedule(); 198 199 local_irq_save(flags); 200 tags = this_cpu_ptr(pool->tag_cpu); 201 } 202 if (state != TASK_RUNNING) 203 finish_wait(&pool->wait, &wait); 204 205 return tag; 206} 207EXPORT_SYMBOL_GPL(percpu_ida_alloc); 208 209/** 210 * percpu_ida_free - free a tag 211 * @pool: pool @tag was allocated from 212 * @tag: a tag previously allocated with percpu_ida_alloc() 213 * 214 * Safe to be called from interrupt context. 215 */ 216void percpu_ida_free(struct percpu_ida *pool, unsigned tag) 217{ 218 struct percpu_ida_cpu *tags; 219 unsigned long flags; 220 unsigned nr_free; 221 222 BUG_ON(tag >= pool->nr_tags); 223 224 local_irq_save(flags); 225 tags = this_cpu_ptr(pool->tag_cpu); 226 227 spin_lock(&tags->lock); 228 tags->freelist[tags->nr_free++] = tag; 229 230 nr_free = tags->nr_free; 231 spin_unlock(&tags->lock); 232 233 if (nr_free == 1) { 234 cpumask_set_cpu(smp_processor_id(), 235 &pool->cpus_have_tags); 236 wake_up(&pool->wait); 237 } 238 239 if (nr_free == pool->percpu_max_size) { 240 spin_lock(&pool->lock); 241 242 /* 243 * Global lock held and irqs disabled, don't need percpu 244 * lock 245 */ 246 if (tags->nr_free == pool->percpu_max_size) { 247 move_tags(pool->freelist, &pool->nr_free, 248 tags->freelist, &tags->nr_free, 249 pool->percpu_batch_size); 250 251 wake_up(&pool->wait); 252 } 253 spin_unlock(&pool->lock); 254 } 255 256 local_irq_restore(flags); 257} 258EXPORT_SYMBOL_GPL(percpu_ida_free); 259 260/** 261 * percpu_ida_destroy - release a tag pool's resources 262 * @pool: pool to free 263 * 264 * Frees the resources allocated by percpu_ida_init(). 265 */ 266void percpu_ida_destroy(struct percpu_ida *pool) 267{ 268 free_percpu(pool->tag_cpu); 269 free_pages((unsigned long) pool->freelist, 270 get_order(pool->nr_tags * sizeof(unsigned))); 271} 272EXPORT_SYMBOL_GPL(percpu_ida_destroy); 273 274/** 275 * percpu_ida_init - initialize a percpu tag pool 276 * @pool: pool to initialize 277 * @nr_tags: number of tags that will be available for allocation 278 * 279 * Initializes @pool so that it can be used to allocate tags - integers in the 280 * range [0, nr_tags). Typically, they'll be used by driver code to refer to a 281 * preallocated array of tag structures. 282 * 283 * Allocation is percpu, but sharding is limited by nr_tags - for best 284 * performance, the workload should not span more cpus than nr_tags / 128. 285 */ 286int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, 287 unsigned long max_size, unsigned long batch_size) 288{ 289 unsigned i, cpu, order; 290 291 memset(pool, 0, sizeof(*pool)); 292 293 init_waitqueue_head(&pool->wait); 294 spin_lock_init(&pool->lock); 295 pool->nr_tags = nr_tags; 296 pool->percpu_max_size = max_size; 297 pool->percpu_batch_size = batch_size; 298 299 /* Guard against overflow */ 300 if (nr_tags > (unsigned) INT_MAX + 1) { 301 pr_err("percpu_ida_init(): nr_tags too large\n"); 302 return -EINVAL; 303 } 304 305 order = get_order(nr_tags * sizeof(unsigned)); 306 pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); 307 if (!pool->freelist) 308 return -ENOMEM; 309 310 for (i = 0; i < nr_tags; i++) 311 pool->freelist[i] = i; 312 313 pool->nr_free = nr_tags; 314 315 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + 316 pool->percpu_max_size * sizeof(unsigned), 317 sizeof(unsigned)); 318 if (!pool->tag_cpu) 319 goto err; 320 321 for_each_possible_cpu(cpu) 322 spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); 323 324 return 0; 325err: 326 percpu_ida_destroy(pool); 327 return -ENOMEM; 328} 329EXPORT_SYMBOL_GPL(__percpu_ida_init); 330 331/** 332 * percpu_ida_for_each_free - iterate free ids of a pool 333 * @pool: pool to iterate 334 * @fn: interate callback function 335 * @data: parameter for @fn 336 * 337 * Note, this doesn't guarantee to iterate all free ids restrictly. Some free 338 * ids might be missed, some might be iterated duplicated, and some might 339 * be iterated and not free soon. 340 */ 341int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, 342 void *data) 343{ 344 unsigned long flags; 345 struct percpu_ida_cpu *remote; 346 unsigned cpu, i, err = 0; 347 348 local_irq_save(flags); 349 for_each_possible_cpu(cpu) { 350 remote = per_cpu_ptr(pool->tag_cpu, cpu); 351 spin_lock(&remote->lock); 352 for (i = 0; i < remote->nr_free; i++) { 353 err = fn(remote->freelist[i], data); 354 if (err) 355 break; 356 } 357 spin_unlock(&remote->lock); 358 if (err) 359 goto out; 360 } 361 362 spin_lock(&pool->lock); 363 for (i = 0; i < pool->nr_free; i++) { 364 err = fn(pool->freelist[i], data); 365 if (err) 366 break; 367 } 368 spin_unlock(&pool->lock); 369out: 370 local_irq_restore(flags); 371 return err; 372} 373EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); 374 375/** 376 * percpu_ida_free_tags - return free tags number of a specific cpu or global pool 377 * @pool: pool related 378 * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids 379 * 380 * Note: this just returns a snapshot of free tags number. 381 */ 382unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) 383{ 384 struct percpu_ida_cpu *remote; 385 if (cpu == nr_cpu_ids) 386 return pool->nr_free; 387 remote = per_cpu_ptr(pool->tag_cpu, cpu); 388 return remote->nr_free; 389} 390EXPORT_SYMBOL_GPL(percpu_ida_free_tags); 391