1/* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15{ 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20} 21 22/** 23 * pcpu_get_pages - get temp pages array 24 * @chunk: chunk of interest 25 * 26 * Returns pointer to array of pointers to struct page which can be indexed 27 * with pcpu_page_idx(). Note that there is only one array and accesses 28 * should be serialized by pcpu_alloc_mutex. 29 * 30 * RETURNS: 31 * Pointer to temp pages array on success. 32 */ 33static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc) 34{ 35 static struct page **pages; 36 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 37 38 lockdep_assert_held(&pcpu_alloc_mutex); 39 40 if (!pages) 41 pages = pcpu_mem_zalloc(pages_size); 42 return pages; 43} 44 45/** 46 * pcpu_free_pages - free pages which were allocated for @chunk 47 * @chunk: chunk pages were allocated for 48 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 49 * @page_start: page index of the first page to be freed 50 * @page_end: page index of the last page to be freed + 1 51 * 52 * Free pages [@page_start and @page_end) in @pages for all units. 53 * The pages were allocated for @chunk. 54 */ 55static void pcpu_free_pages(struct pcpu_chunk *chunk, 56 struct page **pages, int page_start, int page_end) 57{ 58 unsigned int cpu; 59 int i; 60 61 for_each_possible_cpu(cpu) { 62 for (i = page_start; i < page_end; i++) { 63 struct page *page = pages[pcpu_page_idx(cpu, i)]; 64 65 if (page) 66 __free_page(page); 67 } 68 } 69} 70 71/** 72 * pcpu_alloc_pages - allocates pages for @chunk 73 * @chunk: target chunk 74 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 75 * @page_start: page index of the first page to be allocated 76 * @page_end: page index of the last page to be allocated + 1 77 * 78 * Allocate pages [@page_start,@page_end) into @pages for all units. 79 * The allocation is for @chunk. Percpu core doesn't care about the 80 * content of @pages and will pass it verbatim to pcpu_map_pages(). 81 */ 82static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 83 struct page **pages, int page_start, int page_end) 84{ 85 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 86 unsigned int cpu, tcpu; 87 int i; 88 89 for_each_possible_cpu(cpu) { 90 for (i = page_start; i < page_end; i++) { 91 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 92 93 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 94 if (!*pagep) 95 goto err; 96 } 97 } 98 return 0; 99 100err: 101 while (--i >= page_start) 102 __free_page(pages[pcpu_page_idx(cpu, i)]); 103 104 for_each_possible_cpu(tcpu) { 105 if (tcpu == cpu) 106 break; 107 for (i = page_start; i < page_end; i++) 108 __free_page(pages[pcpu_page_idx(tcpu, i)]); 109 } 110 return -ENOMEM; 111} 112 113/** 114 * pcpu_pre_unmap_flush - flush cache prior to unmapping 115 * @chunk: chunk the regions to be flushed belongs to 116 * @page_start: page index of the first page to be flushed 117 * @page_end: page index of the last page to be flushed + 1 118 * 119 * Pages in [@page_start,@page_end) of @chunk are about to be 120 * unmapped. Flush cache. As each flushing trial can be very 121 * expensive, issue flush on the whole region at once rather than 122 * doing it for each cpu. This could be an overkill but is more 123 * scalable. 124 */ 125static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 126 int page_start, int page_end) 127{ 128 flush_cache_vunmap( 129 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 130 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 131} 132 133static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 134{ 135 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 136} 137 138/** 139 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 140 * @chunk: chunk of interest 141 * @pages: pages array which can be used to pass information to free 142 * @page_start: page index of the first page to unmap 143 * @page_end: page index of the last page to unmap + 1 144 * 145 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 146 * Corresponding elements in @pages were cleared by the caller and can 147 * be used to carry information to pcpu_free_pages() which will be 148 * called after all unmaps are finished. The caller should call 149 * proper pre/post flush functions. 150 */ 151static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 152 struct page **pages, int page_start, int page_end) 153{ 154 unsigned int cpu; 155 int i; 156 157 for_each_possible_cpu(cpu) { 158 for (i = page_start; i < page_end; i++) { 159 struct page *page; 160 161 page = pcpu_chunk_page(chunk, cpu, i); 162 WARN_ON(!page); 163 pages[pcpu_page_idx(cpu, i)] = page; 164 } 165 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 166 page_end - page_start); 167 } 168} 169 170/** 171 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 172 * @chunk: pcpu_chunk the regions to be flushed belong to 173 * @page_start: page index of the first page to be flushed 174 * @page_end: page index of the last page to be flushed + 1 175 * 176 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 177 * TLB for the regions. This can be skipped if the area is to be 178 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 179 * 180 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 181 * for the whole region. 182 */ 183static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 184 int page_start, int page_end) 185{ 186 flush_tlb_kernel_range( 187 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 188 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 189} 190 191static int __pcpu_map_pages(unsigned long addr, struct page **pages, 192 int nr_pages) 193{ 194 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 195 PAGE_KERNEL, pages); 196} 197 198/** 199 * pcpu_map_pages - map pages into a pcpu_chunk 200 * @chunk: chunk of interest 201 * @pages: pages array containing pages to be mapped 202 * @page_start: page index of the first page to map 203 * @page_end: page index of the last page to map + 1 204 * 205 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 206 * caller is responsible for calling pcpu_post_map_flush() after all 207 * mappings are complete. 208 * 209 * This function is responsible for setting up whatever is necessary for 210 * reverse lookup (addr -> chunk). 211 */ 212static int pcpu_map_pages(struct pcpu_chunk *chunk, 213 struct page **pages, int page_start, int page_end) 214{ 215 unsigned int cpu, tcpu; 216 int i, err; 217 218 for_each_possible_cpu(cpu) { 219 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 220 &pages[pcpu_page_idx(cpu, page_start)], 221 page_end - page_start); 222 if (err < 0) 223 goto err; 224 225 for (i = page_start; i < page_end; i++) 226 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 227 chunk); 228 } 229 return 0; 230err: 231 for_each_possible_cpu(tcpu) { 232 if (tcpu == cpu) 233 break; 234 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 235 page_end - page_start); 236 } 237 pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); 238 return err; 239} 240 241/** 242 * pcpu_post_map_flush - flush cache after mapping 243 * @chunk: pcpu_chunk the regions to be flushed belong to 244 * @page_start: page index of the first page to be flushed 245 * @page_end: page index of the last page to be flushed + 1 246 * 247 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 248 * cache. 249 * 250 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 251 * for the whole region. 252 */ 253static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 254 int page_start, int page_end) 255{ 256 flush_cache_vmap( 257 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 258 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 259} 260 261/** 262 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 263 * @chunk: chunk of interest 264 * @page_start: the start page 265 * @page_end: the end page 266 * 267 * For each cpu, populate and map pages [@page_start,@page_end) into 268 * @chunk. 269 * 270 * CONTEXT: 271 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 272 */ 273static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 274 int page_start, int page_end) 275{ 276 struct page **pages; 277 278 pages = pcpu_get_pages(chunk); 279 if (!pages) 280 return -ENOMEM; 281 282 if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) 283 return -ENOMEM; 284 285 if (pcpu_map_pages(chunk, pages, page_start, page_end)) { 286 pcpu_free_pages(chunk, pages, page_start, page_end); 287 return -ENOMEM; 288 } 289 pcpu_post_map_flush(chunk, page_start, page_end); 290 291 return 0; 292} 293 294/** 295 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 296 * @chunk: chunk to depopulate 297 * @page_start: the start page 298 * @page_end: the end page 299 * 300 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 301 * from @chunk. 302 * 303 * CONTEXT: 304 * pcpu_alloc_mutex. 305 */ 306static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, 307 int page_start, int page_end) 308{ 309 struct page **pages; 310 311 /* 312 * If control reaches here, there must have been at least one 313 * successful population attempt so the temp pages array must 314 * be available now. 315 */ 316 pages = pcpu_get_pages(chunk); 317 BUG_ON(!pages); 318 319 /* unmap and free */ 320 pcpu_pre_unmap_flush(chunk, page_start, page_end); 321 322 pcpu_unmap_pages(chunk, pages, page_start, page_end); 323 324 /* no need to flush tlb, vmalloc will handle it lazily */ 325 326 pcpu_free_pages(chunk, pages, page_start, page_end); 327} 328 329static struct pcpu_chunk *pcpu_create_chunk(void) 330{ 331 struct pcpu_chunk *chunk; 332 struct vm_struct **vms; 333 334 chunk = pcpu_alloc_chunk(); 335 if (!chunk) 336 return NULL; 337 338 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 339 pcpu_nr_groups, pcpu_atom_size); 340 if (!vms) { 341 pcpu_free_chunk(chunk); 342 return NULL; 343 } 344 345 chunk->data = vms; 346 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 347 return chunk; 348} 349 350static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 351{ 352 if (chunk && chunk->data) 353 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 354 pcpu_free_chunk(chunk); 355} 356 357static struct page *pcpu_addr_to_page(void *addr) 358{ 359 return vmalloc_to_page(addr); 360} 361 362static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 363{ 364 /* no extra restriction */ 365 return 0; 366} 367