1/* 2 * cpu_rmap.c: CPU affinity reverse-map support 3 * Copyright 2011 Solarflare Communications Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published 7 * by the Free Software Foundation, incorporated herein by reference. 8 */ 9 10#include <linux/cpu_rmap.h> 11#include <linux/interrupt.h> 12#include <linux/export.h> 13 14/* 15 * These functions maintain a mapping from CPUs to some ordered set of 16 * objects with CPU affinities. This can be seen as a reverse-map of 17 * CPU affinity. However, we do not assume that the object affinities 18 * cover all CPUs in the system. For those CPUs not directly covered 19 * by object affinities, we attempt to find a nearest object based on 20 * CPU topology. 21 */ 22 23/** 24 * alloc_cpu_rmap - allocate CPU affinity reverse-map 25 * @size: Number of objects to be mapped 26 * @flags: Allocation flags e.g. %GFP_KERNEL 27 */ 28struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) 29{ 30 struct cpu_rmap *rmap; 31 unsigned int cpu; 32 size_t obj_offset; 33 34 /* This is a silly number of objects, and we use u16 indices. */ 35 if (size > 0xffff) 36 return NULL; 37 38 /* Offset of object pointer array from base structure */ 39 obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), 40 sizeof(void *)); 41 42 rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); 43 if (!rmap) 44 return NULL; 45 46 kref_init(&rmap->refcount); 47 rmap->obj = (void **)((char *)rmap + obj_offset); 48 49 /* Initially assign CPUs to objects on a rota, since we have 50 * no idea where the objects are. Use infinite distance, so 51 * any object with known distance is preferable. Include the 52 * CPUs that are not present/online, since we definitely want 53 * any newly-hotplugged CPUs to have some object assigned. 54 */ 55 for_each_possible_cpu(cpu) { 56 rmap->near[cpu].index = cpu % size; 57 rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 58 } 59 60 rmap->size = size; 61 return rmap; 62} 63EXPORT_SYMBOL(alloc_cpu_rmap); 64 65/** 66 * cpu_rmap_release - internal reclaiming helper called from kref_put 67 * @ref: kref to struct cpu_rmap 68 */ 69static void cpu_rmap_release(struct kref *ref) 70{ 71 struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); 72 kfree(rmap); 73} 74 75/** 76 * cpu_rmap_get - internal helper to get new ref on a cpu_rmap 77 * @rmap: reverse-map allocated with alloc_cpu_rmap() 78 */ 79static inline void cpu_rmap_get(struct cpu_rmap *rmap) 80{ 81 kref_get(&rmap->refcount); 82} 83 84/** 85 * cpu_rmap_put - release ref on a cpu_rmap 86 * @rmap: reverse-map allocated with alloc_cpu_rmap() 87 */ 88int cpu_rmap_put(struct cpu_rmap *rmap) 89{ 90 return kref_put(&rmap->refcount, cpu_rmap_release); 91} 92EXPORT_SYMBOL(cpu_rmap_put); 93 94/* Reevaluate nearest object for given CPU, comparing with the given 95 * neighbours at the given distance. 96 */ 97static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, 98 const struct cpumask *mask, u16 dist) 99{ 100 int neigh; 101 102 for_each_cpu(neigh, mask) { 103 if (rmap->near[cpu].dist > dist && 104 rmap->near[neigh].dist <= dist) { 105 rmap->near[cpu].index = rmap->near[neigh].index; 106 rmap->near[cpu].dist = dist; 107 return true; 108 } 109 } 110 return false; 111} 112 113#ifdef DEBUG 114static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 115{ 116 unsigned index; 117 unsigned int cpu; 118 119 pr_info("cpu_rmap %p, %s:\n", rmap, prefix); 120 121 for_each_possible_cpu(cpu) { 122 index = rmap->near[cpu].index; 123 pr_info("cpu %d -> obj %u (distance %u)\n", 124 cpu, index, rmap->near[cpu].dist); 125 } 126} 127#else 128static inline void 129debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 130{ 131} 132#endif 133 134/** 135 * cpu_rmap_add - add object to a rmap 136 * @rmap: CPU rmap allocated with alloc_cpu_rmap() 137 * @obj: Object to add to rmap 138 * 139 * Return index of object. 140 */ 141int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) 142{ 143 u16 index; 144 145 BUG_ON(rmap->used >= rmap->size); 146 index = rmap->used++; 147 rmap->obj[index] = obj; 148 return index; 149} 150EXPORT_SYMBOL(cpu_rmap_add); 151 152/** 153 * cpu_rmap_update - update CPU rmap following a change of object affinity 154 * @rmap: CPU rmap to update 155 * @index: Index of object whose affinity changed 156 * @affinity: New CPU affinity of object 157 */ 158int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, 159 const struct cpumask *affinity) 160{ 161 cpumask_var_t update_mask; 162 unsigned int cpu; 163 164 if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) 165 return -ENOMEM; 166 167 /* Invalidate distance for all CPUs for which this used to be 168 * the nearest object. Mark those CPUs for update. 169 */ 170 for_each_online_cpu(cpu) { 171 if (rmap->near[cpu].index == index) { 172 rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 173 cpumask_set_cpu(cpu, update_mask); 174 } 175 } 176 177 debug_print_rmap(rmap, "after invalidating old distances"); 178 179 /* Set distance to 0 for all CPUs in the new affinity mask. 180 * Mark all CPUs within their NUMA nodes for update. 181 */ 182 for_each_cpu(cpu, affinity) { 183 rmap->near[cpu].index = index; 184 rmap->near[cpu].dist = 0; 185 cpumask_or(update_mask, update_mask, 186 cpumask_of_node(cpu_to_node(cpu))); 187 } 188 189 debug_print_rmap(rmap, "after updating neighbours"); 190 191 /* Update distances based on topology */ 192 for_each_cpu(cpu, update_mask) { 193 if (cpu_rmap_copy_neigh(rmap, cpu, 194 topology_thread_cpumask(cpu), 1)) 195 continue; 196 if (cpu_rmap_copy_neigh(rmap, cpu, 197 topology_core_cpumask(cpu), 2)) 198 continue; 199 if (cpu_rmap_copy_neigh(rmap, cpu, 200 cpumask_of_node(cpu_to_node(cpu)), 3)) 201 continue; 202 /* We could continue into NUMA node distances, but for now 203 * we give up. 204 */ 205 } 206 207 debug_print_rmap(rmap, "after copying neighbours"); 208 209 free_cpumask_var(update_mask); 210 return 0; 211} 212EXPORT_SYMBOL(cpu_rmap_update); 213 214/* Glue between IRQ affinity notifiers and CPU rmaps */ 215 216struct irq_glue { 217 struct irq_affinity_notify notify; 218 struct cpu_rmap *rmap; 219 u16 index; 220}; 221 222/** 223 * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs 224 * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL 225 * 226 * Must be called in process context, before freeing the IRQs. 227 */ 228void free_irq_cpu_rmap(struct cpu_rmap *rmap) 229{ 230 struct irq_glue *glue; 231 u16 index; 232 233 if (!rmap) 234 return; 235 236 for (index = 0; index < rmap->used; index++) { 237 glue = rmap->obj[index]; 238 irq_set_affinity_notifier(glue->notify.irq, NULL); 239 } 240 241 cpu_rmap_put(rmap); 242} 243EXPORT_SYMBOL(free_irq_cpu_rmap); 244 245/** 246 * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated 247 * @notify: struct irq_affinity_notify passed by irq/manage.c 248 * @mask: cpu mask for new SMP affinity 249 * 250 * This is executed in workqueue context. 251 */ 252static void 253irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) 254{ 255 struct irq_glue *glue = 256 container_of(notify, struct irq_glue, notify); 257 int rc; 258 259 rc = cpu_rmap_update(glue->rmap, glue->index, mask); 260 if (rc) 261 pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); 262} 263 264/** 265 * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem 266 * @ref: kref to struct irq_affinity_notify passed by irq/manage.c 267 */ 268static void irq_cpu_rmap_release(struct kref *ref) 269{ 270 struct irq_glue *glue = 271 container_of(ref, struct irq_glue, notify.kref); 272 273 cpu_rmap_put(glue->rmap); 274 kfree(glue); 275} 276 277/** 278 * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map 279 * @rmap: The reverse-map 280 * @irq: The IRQ number 281 * 282 * This adds an IRQ affinity notifier that will update the reverse-map 283 * automatically. 284 * 285 * Must be called in process context, after the IRQ is allocated but 286 * before it is bound with request_irq(). 287 */ 288int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) 289{ 290 struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); 291 int rc; 292 293 if (!glue) 294 return -ENOMEM; 295 glue->notify.notify = irq_cpu_rmap_notify; 296 glue->notify.release = irq_cpu_rmap_release; 297 glue->rmap = rmap; 298 cpu_rmap_get(rmap); 299 glue->index = cpu_rmap_add(rmap, glue); 300 rc = irq_set_affinity_notifier(irq, &glue->notify); 301 if (rc) { 302 cpu_rmap_put(glue->rmap); 303 kfree(glue); 304 } 305 return rc; 306} 307EXPORT_SYMBOL(irq_cpu_rmap_add); 308