This source file includes following definitions.
- free_work
- vunmap_pte_range
- vunmap_pmd_range
- vunmap_pud_range
- vunmap_p4d_range
- vunmap_page_range
- vmap_pte_range
- vmap_pmd_range
- vmap_pud_range
- vmap_p4d_range
- vmap_page_range_noflush
- vmap_page_range
- is_vmalloc_or_module_addr
- vmalloc_to_page
- vmalloc_to_pfn
- va_size
- get_subtree_max_size
- compute_subtree_max_size
- RB_DECLARE_CALLBACKS_MAX
- __find_vmap_area
- find_va_links
- get_va_next_sibling
- link_va
- unlink_va
- augment_tree_propagate_check
- augment_tree_propagate_from
- insert_vmap_area
- insert_vmap_area_augment
- merge_or_add_vmap_area
- is_within_this_va
- find_vmap_lowest_match
- find_vmap_lowest_linear_match
- find_vmap_lowest_match_check
- classify_va_fit_type
- adjust_va_to_fit_type
- __alloc_vmap_area
- alloc_vmap_area
- register_vmap_purge_notifier
- unregister_vmap_purge_notifier
- __free_vmap_area
- free_vmap_area
- unmap_vmap_area
- lazy_max_pages
- set_iounmap_nonlazy
- __purge_vmap_area_lazy
- try_purge_vmap_area_lazy
- purge_vmap_area_lazy
- free_vmap_area_noflush
- free_unmap_vmap_area
- find_vmap_area
- addr_to_vb_idx
- vmap_block_vaddr
- new_vmap_block
- free_vmap_block
- purge_fragmented_blocks
- purge_fragmented_blocks_allcpus
- vb_alloc
- vb_free
- _vm_unmap_aliases
- vm_unmap_aliases
- vm_unmap_ram
- vm_map_ram
- vm_area_add_early
- vm_area_register_early
- vmap_init_free_space
- vmalloc_init
- map_kernel_range_noflush
- unmap_kernel_range_noflush
- unmap_kernel_range
- map_vm_area
- setup_vmalloc_vm
- clear_vm_uninitialized_flag
- __get_vm_area_node
- __get_vm_area
- __get_vm_area_caller
- get_vm_area
- get_vm_area_caller
- find_vm_area
- remove_vm_area
- set_area_direct_map
- vm_remove_mappings
- __vunmap
- __vfree_deferred
- vfree_atomic
- __vfree
- vfree
- vunmap
- vmap
- __vmalloc_area_node
- __vmalloc_node_range
- __vmalloc_node
- __vmalloc
- __vmalloc_node_flags
- __vmalloc_node_flags_caller
- vmalloc
- vzalloc
- vmalloc_user
- vmalloc_node
- vzalloc_node
- vmalloc_exec
- vmalloc_32
- vmalloc_32_user
- aligned_vread
- aligned_vwrite
- vread
- vwrite
- remap_vmalloc_range_partial
- remap_vmalloc_range
- vmalloc_sync_mappings
- vmalloc_sync_unmappings
- f
- alloc_vm_area
- free_vm_area
- node_to_va
- pvm_find_va_enclose_addr
- pvm_determine_end_from_reverse
- pcpu_get_vm_areas
- pcpu_free_vm_areas
- s_start
- s_next
- s_stop
- show_numa_info
- show_purge_info
- s_show
- proc_vmalloc_init
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/vmalloc.h>
13 #include <linux/mm.h>
14 #include <linux/module.h>
15 #include <linux/highmem.h>
16 #include <linux/sched/signal.h>
17 #include <linux/slab.h>
18 #include <linux/spinlock.h>
19 #include <linux/interrupt.h>
20 #include <linux/proc_fs.h>
21 #include <linux/seq_file.h>
22 #include <linux/set_memory.h>
23 #include <linux/debugobjects.h>
24 #include <linux/kallsyms.h>
25 #include <linux/list.h>
26 #include <linux/notifier.h>
27 #include <linux/rbtree.h>
28 #include <linux/radix-tree.h>
29 #include <linux/rcupdate.h>
30 #include <linux/pfn.h>
31 #include <linux/kmemleak.h>
32 #include <linux/atomic.h>
33 #include <linux/compiler.h>
34 #include <linux/llist.h>
35 #include <linux/bitops.h>
36 #include <linux/rbtree_augmented.h>
37 #include <linux/overflow.h>
38
39 #include <linux/uaccess.h>
40 #include <asm/tlbflush.h>
41 #include <asm/shmparam.h>
42
43 #include "internal.h"
44
45 struct vfree_deferred {
46 struct llist_head list;
47 struct work_struct wq;
48 };
49 static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
50
51 static void __vunmap(const void *, int);
52
53 static void free_work(struct work_struct *w)
54 {
55 struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
56 struct llist_node *t, *llnode;
57
58 llist_for_each_safe(llnode, t, llist_del_all(&p->list))
59 __vunmap((void *)llnode, 1);
60 }
61
62
63
64 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
65 {
66 pte_t *pte;
67
68 pte = pte_offset_kernel(pmd, addr);
69 do {
70 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
71 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
72 } while (pte++, addr += PAGE_SIZE, addr != end);
73 }
74
75 static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
76 {
77 pmd_t *pmd;
78 unsigned long next;
79
80 pmd = pmd_offset(pud, addr);
81 do {
82 next = pmd_addr_end(addr, end);
83 if (pmd_clear_huge(pmd))
84 continue;
85 if (pmd_none_or_clear_bad(pmd))
86 continue;
87 vunmap_pte_range(pmd, addr, next);
88 } while (pmd++, addr = next, addr != end);
89 }
90
91 static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
92 {
93 pud_t *pud;
94 unsigned long next;
95
96 pud = pud_offset(p4d, addr);
97 do {
98 next = pud_addr_end(addr, end);
99 if (pud_clear_huge(pud))
100 continue;
101 if (pud_none_or_clear_bad(pud))
102 continue;
103 vunmap_pmd_range(pud, addr, next);
104 } while (pud++, addr = next, addr != end);
105 }
106
107 static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
108 {
109 p4d_t *p4d;
110 unsigned long next;
111
112 p4d = p4d_offset(pgd, addr);
113 do {
114 next = p4d_addr_end(addr, end);
115 if (p4d_clear_huge(p4d))
116 continue;
117 if (p4d_none_or_clear_bad(p4d))
118 continue;
119 vunmap_pud_range(p4d, addr, next);
120 } while (p4d++, addr = next, addr != end);
121 }
122
123 static void vunmap_page_range(unsigned long addr, unsigned long end)
124 {
125 pgd_t *pgd;
126 unsigned long next;
127
128 BUG_ON(addr >= end);
129 pgd = pgd_offset_k(addr);
130 do {
131 next = pgd_addr_end(addr, end);
132 if (pgd_none_or_clear_bad(pgd))
133 continue;
134 vunmap_p4d_range(pgd, addr, next);
135 } while (pgd++, addr = next, addr != end);
136 }
137
138 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
139 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
140 {
141 pte_t *pte;
142
143
144
145
146
147
148 pte = pte_alloc_kernel(pmd, addr);
149 if (!pte)
150 return -ENOMEM;
151 do {
152 struct page *page = pages[*nr];
153
154 if (WARN_ON(!pte_none(*pte)))
155 return -EBUSY;
156 if (WARN_ON(!page))
157 return -ENOMEM;
158 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
159 (*nr)++;
160 } while (pte++, addr += PAGE_SIZE, addr != end);
161 return 0;
162 }
163
164 static int vmap_pmd_range(pud_t *pud, unsigned long addr,
165 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
166 {
167 pmd_t *pmd;
168 unsigned long next;
169
170 pmd = pmd_alloc(&init_mm, pud, addr);
171 if (!pmd)
172 return -ENOMEM;
173 do {
174 next = pmd_addr_end(addr, end);
175 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
176 return -ENOMEM;
177 } while (pmd++, addr = next, addr != end);
178 return 0;
179 }
180
181 static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
182 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
183 {
184 pud_t *pud;
185 unsigned long next;
186
187 pud = pud_alloc(&init_mm, p4d, addr);
188 if (!pud)
189 return -ENOMEM;
190 do {
191 next = pud_addr_end(addr, end);
192 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
193 return -ENOMEM;
194 } while (pud++, addr = next, addr != end);
195 return 0;
196 }
197
198 static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
199 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
200 {
201 p4d_t *p4d;
202 unsigned long next;
203
204 p4d = p4d_alloc(&init_mm, pgd, addr);
205 if (!p4d)
206 return -ENOMEM;
207 do {
208 next = p4d_addr_end(addr, end);
209 if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
210 return -ENOMEM;
211 } while (p4d++, addr = next, addr != end);
212 return 0;
213 }
214
215
216
217
218
219
220
221 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
222 pgprot_t prot, struct page **pages)
223 {
224 pgd_t *pgd;
225 unsigned long next;
226 unsigned long addr = start;
227 int err = 0;
228 int nr = 0;
229
230 BUG_ON(addr >= end);
231 pgd = pgd_offset_k(addr);
232 do {
233 next = pgd_addr_end(addr, end);
234 err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
235 if (err)
236 return err;
237 } while (pgd++, addr = next, addr != end);
238
239 return nr;
240 }
241
242 static int vmap_page_range(unsigned long start, unsigned long end,
243 pgprot_t prot, struct page **pages)
244 {
245 int ret;
246
247 ret = vmap_page_range_noflush(start, end, prot, pages);
248 flush_cache_vmap(start, end);
249 return ret;
250 }
251
252 int is_vmalloc_or_module_addr(const void *x)
253 {
254
255
256
257
258
259 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
260 unsigned long addr = (unsigned long)x;
261 if (addr >= MODULES_VADDR && addr < MODULES_END)
262 return 1;
263 #endif
264 return is_vmalloc_addr(x);
265 }
266
267
268
269
270 struct page *vmalloc_to_page(const void *vmalloc_addr)
271 {
272 unsigned long addr = (unsigned long) vmalloc_addr;
273 struct page *page = NULL;
274 pgd_t *pgd = pgd_offset_k(addr);
275 p4d_t *p4d;
276 pud_t *pud;
277 pmd_t *pmd;
278 pte_t *ptep, pte;
279
280
281
282
283
284 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
285
286 if (pgd_none(*pgd))
287 return NULL;
288 p4d = p4d_offset(pgd, addr);
289 if (p4d_none(*p4d))
290 return NULL;
291 pud = pud_offset(p4d, addr);
292
293
294
295
296
297
298
299
300
301 WARN_ON_ONCE(pud_bad(*pud));
302 if (pud_none(*pud) || pud_bad(*pud))
303 return NULL;
304 pmd = pmd_offset(pud, addr);
305 WARN_ON_ONCE(pmd_bad(*pmd));
306 if (pmd_none(*pmd) || pmd_bad(*pmd))
307 return NULL;
308
309 ptep = pte_offset_map(pmd, addr);
310 pte = *ptep;
311 if (pte_present(pte))
312 page = pte_page(pte);
313 pte_unmap(ptep);
314 return page;
315 }
316 EXPORT_SYMBOL(vmalloc_to_page);
317
318
319
320
321 unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
322 {
323 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
324 }
325 EXPORT_SYMBOL(vmalloc_to_pfn);
326
327
328
329
330 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
331 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
332
333
334 static DEFINE_SPINLOCK(vmap_area_lock);
335
336 LIST_HEAD(vmap_area_list);
337 static LLIST_HEAD(vmap_purge_list);
338 static struct rb_root vmap_area_root = RB_ROOT;
339 static bool vmap_initialized __read_mostly;
340
341
342
343
344
345
346
347 static struct kmem_cache *vmap_area_cachep;
348
349
350
351
352
353 static LIST_HEAD(free_vmap_area_list);
354
355
356
357
358
359
360
361
362
363
364
365 static struct rb_root free_vmap_area_root = RB_ROOT;
366
367
368
369
370
371
372 static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
373
374 static __always_inline unsigned long
375 va_size(struct vmap_area *va)
376 {
377 return (va->va_end - va->va_start);
378 }
379
380 static __always_inline unsigned long
381 get_subtree_max_size(struct rb_node *node)
382 {
383 struct vmap_area *va;
384
385 va = rb_entry_safe(node, struct vmap_area, rb_node);
386 return va ? va->subtree_max_size : 0;
387 }
388
389
390
391
392 static __always_inline unsigned long
393 compute_subtree_max_size(struct vmap_area *va)
394 {
395 return max3(va_size(va),
396 get_subtree_max_size(va->rb_node.rb_left),
397 get_subtree_max_size(va->rb_node.rb_right));
398 }
399
400 RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
401 struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
402
403 static void purge_vmap_area_lazy(void);
404 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
405 static unsigned long lazy_max_pages(void);
406
407 static atomic_long_t nr_vmalloc_pages;
408
409 unsigned long vmalloc_nr_pages(void)
410 {
411 return atomic_long_read(&nr_vmalloc_pages);
412 }
413
414 static struct vmap_area *__find_vmap_area(unsigned long addr)
415 {
416 struct rb_node *n = vmap_area_root.rb_node;
417
418 while (n) {
419 struct vmap_area *va;
420
421 va = rb_entry(n, struct vmap_area, rb_node);
422 if (addr < va->va_start)
423 n = n->rb_left;
424 else if (addr >= va->va_end)
425 n = n->rb_right;
426 else
427 return va;
428 }
429
430 return NULL;
431 }
432
433
434
435
436
437 static __always_inline struct rb_node **
438 find_va_links(struct vmap_area *va,
439 struct rb_root *root, struct rb_node *from,
440 struct rb_node **parent)
441 {
442 struct vmap_area *tmp_va;
443 struct rb_node **link;
444
445 if (root) {
446 link = &root->rb_node;
447 if (unlikely(!*link)) {
448 *parent = NULL;
449 return link;
450 }
451 } else {
452 link = &from;
453 }
454
455
456
457
458
459
460 do {
461 tmp_va = rb_entry(*link, struct vmap_area, rb_node);
462
463
464
465
466
467
468 if (va->va_start < tmp_va->va_end &&
469 va->va_end <= tmp_va->va_start)
470 link = &(*link)->rb_left;
471 else if (va->va_end > tmp_va->va_start &&
472 va->va_start >= tmp_va->va_end)
473 link = &(*link)->rb_right;
474 else
475 BUG();
476 } while (*link);
477
478 *parent = &tmp_va->rb_node;
479 return link;
480 }
481
482 static __always_inline struct list_head *
483 get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
484 {
485 struct list_head *list;
486
487 if (unlikely(!parent))
488
489
490
491
492
493
494 return NULL;
495
496 list = &rb_entry(parent, struct vmap_area, rb_node)->list;
497 return (&parent->rb_right == link ? list->next : list);
498 }
499
500 static __always_inline void
501 link_va(struct vmap_area *va, struct rb_root *root,
502 struct rb_node *parent, struct rb_node **link, struct list_head *head)
503 {
504
505
506
507
508 if (likely(parent)) {
509 head = &rb_entry(parent, struct vmap_area, rb_node)->list;
510 if (&parent->rb_right != link)
511 head = head->prev;
512 }
513
514
515 rb_link_node(&va->rb_node, parent, link);
516 if (root == &free_vmap_area_root) {
517
518
519
520
521
522
523
524
525
526
527
528 rb_insert_augmented(&va->rb_node,
529 root, &free_vmap_area_rb_augment_cb);
530 va->subtree_max_size = 0;
531 } else {
532 rb_insert_color(&va->rb_node, root);
533 }
534
535
536 list_add(&va->list, head);
537 }
538
539 static __always_inline void
540 unlink_va(struct vmap_area *va, struct rb_root *root)
541 {
542 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
543 return;
544
545 if (root == &free_vmap_area_root)
546 rb_erase_augmented(&va->rb_node,
547 root, &free_vmap_area_rb_augment_cb);
548 else
549 rb_erase(&va->rb_node, root);
550
551 list_del(&va->list);
552 RB_CLEAR_NODE(&va->rb_node);
553 }
554
555 #if DEBUG_AUGMENT_PROPAGATE_CHECK
556 static void
557 augment_tree_propagate_check(struct rb_node *n)
558 {
559 struct vmap_area *va;
560 struct rb_node *node;
561 unsigned long size;
562 bool found = false;
563
564 if (n == NULL)
565 return;
566
567 va = rb_entry(n, struct vmap_area, rb_node);
568 size = va->subtree_max_size;
569 node = n;
570
571 while (node) {
572 va = rb_entry(node, struct vmap_area, rb_node);
573
574 if (get_subtree_max_size(node->rb_left) == size) {
575 node = node->rb_left;
576 } else {
577 if (va_size(va) == size) {
578 found = true;
579 break;
580 }
581
582 node = node->rb_right;
583 }
584 }
585
586 if (!found) {
587 va = rb_entry(n, struct vmap_area, rb_node);
588 pr_emerg("tree is corrupted: %lu, %lu\n",
589 va_size(va), va->subtree_max_size);
590 }
591
592 augment_tree_propagate_check(n->rb_left);
593 augment_tree_propagate_check(n->rb_right);
594 }
595 #endif
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624 static __always_inline void
625 augment_tree_propagate_from(struct vmap_area *va)
626 {
627 struct rb_node *node = &va->rb_node;
628 unsigned long new_va_sub_max_size;
629
630 while (node) {
631 va = rb_entry(node, struct vmap_area, rb_node);
632 new_va_sub_max_size = compute_subtree_max_size(va);
633
634
635
636
637
638
639
640 if (va->subtree_max_size == new_va_sub_max_size)
641 break;
642
643 va->subtree_max_size = new_va_sub_max_size;
644 node = rb_parent(&va->rb_node);
645 }
646
647 #if DEBUG_AUGMENT_PROPAGATE_CHECK
648 augment_tree_propagate_check(free_vmap_area_root.rb_node);
649 #endif
650 }
651
652 static void
653 insert_vmap_area(struct vmap_area *va,
654 struct rb_root *root, struct list_head *head)
655 {
656 struct rb_node **link;
657 struct rb_node *parent;
658
659 link = find_va_links(va, root, NULL, &parent);
660 link_va(va, root, parent, link, head);
661 }
662
663 static void
664 insert_vmap_area_augment(struct vmap_area *va,
665 struct rb_node *from, struct rb_root *root,
666 struct list_head *head)
667 {
668 struct rb_node **link;
669 struct rb_node *parent;
670
671 if (from)
672 link = find_va_links(va, NULL, from, &parent);
673 else
674 link = find_va_links(va, root, NULL, &parent);
675
676 link_va(va, root, parent, link, head);
677 augment_tree_propagate_from(va);
678 }
679
680
681
682
683
684
685
686 static __always_inline void
687 merge_or_add_vmap_area(struct vmap_area *va,
688 struct rb_root *root, struct list_head *head)
689 {
690 struct vmap_area *sibling;
691 struct list_head *next;
692 struct rb_node **link;
693 struct rb_node *parent;
694 bool merged = false;
695
696
697
698
699
700 link = find_va_links(va, root, NULL, &parent);
701
702
703
704
705 next = get_va_next_sibling(parent, link);
706 if (unlikely(next == NULL))
707 goto insert;
708
709
710
711
712
713
714
715
716 if (next != head) {
717 sibling = list_entry(next, struct vmap_area, list);
718 if (sibling->va_start == va->va_end) {
719 sibling->va_start = va->va_start;
720
721
722 augment_tree_propagate_from(sibling);
723
724
725 kmem_cache_free(vmap_area_cachep, va);
726
727
728 va = sibling;
729 merged = true;
730 }
731 }
732
733
734
735
736
737
738
739
740 if (next->prev != head) {
741 sibling = list_entry(next->prev, struct vmap_area, list);
742 if (sibling->va_end == va->va_start) {
743 sibling->va_end = va->va_end;
744
745
746 augment_tree_propagate_from(sibling);
747
748 if (merged)
749 unlink_va(va, root);
750
751
752 kmem_cache_free(vmap_area_cachep, va);
753 return;
754 }
755 }
756
757 insert:
758 if (!merged) {
759 link_va(va, root, parent, link, head);
760 augment_tree_propagate_from(va);
761 }
762 }
763
764 static __always_inline bool
765 is_within_this_va(struct vmap_area *va, unsigned long size,
766 unsigned long align, unsigned long vstart)
767 {
768 unsigned long nva_start_addr;
769
770 if (va->va_start > vstart)
771 nva_start_addr = ALIGN(va->va_start, align);
772 else
773 nva_start_addr = ALIGN(vstart, align);
774
775
776 if (nva_start_addr + size < nva_start_addr ||
777 nva_start_addr < vstart)
778 return false;
779
780 return (nva_start_addr + size <= va->va_end);
781 }
782
783
784
785
786
787
788 static __always_inline struct vmap_area *
789 find_vmap_lowest_match(unsigned long size,
790 unsigned long align, unsigned long vstart)
791 {
792 struct vmap_area *va;
793 struct rb_node *node;
794 unsigned long length;
795
796
797 node = free_vmap_area_root.rb_node;
798
799
800 length = size + align - 1;
801
802 while (node) {
803 va = rb_entry(node, struct vmap_area, rb_node);
804
805 if (get_subtree_max_size(node->rb_left) >= length &&
806 vstart < va->va_start) {
807 node = node->rb_left;
808 } else {
809 if (is_within_this_va(va, size, align, vstart))
810 return va;
811
812
813
814
815
816
817 if (get_subtree_max_size(node->rb_right) >= length) {
818 node = node->rb_right;
819 continue;
820 }
821
822
823
824
825
826
827 while ((node = rb_parent(node))) {
828 va = rb_entry(node, struct vmap_area, rb_node);
829 if (is_within_this_va(va, size, align, vstart))
830 return va;
831
832 if (get_subtree_max_size(node->rb_right) >= length &&
833 vstart <= va->va_start) {
834 node = node->rb_right;
835 break;
836 }
837 }
838 }
839 }
840
841 return NULL;
842 }
843
844 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
845 #include <linux/random.h>
846
847 static struct vmap_area *
848 find_vmap_lowest_linear_match(unsigned long size,
849 unsigned long align, unsigned long vstart)
850 {
851 struct vmap_area *va;
852
853 list_for_each_entry(va, &free_vmap_area_list, list) {
854 if (!is_within_this_va(va, size, align, vstart))
855 continue;
856
857 return va;
858 }
859
860 return NULL;
861 }
862
863 static void
864 find_vmap_lowest_match_check(unsigned long size)
865 {
866 struct vmap_area *va_1, *va_2;
867 unsigned long vstart;
868 unsigned int rnd;
869
870 get_random_bytes(&rnd, sizeof(rnd));
871 vstart = VMALLOC_START + rnd;
872
873 va_1 = find_vmap_lowest_match(size, 1, vstart);
874 va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
875
876 if (va_1 != va_2)
877 pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
878 va_1, va_2, vstart);
879 }
880 #endif
881
882 enum fit_type {
883 NOTHING_FIT = 0,
884 FL_FIT_TYPE = 1,
885 LE_FIT_TYPE = 2,
886 RE_FIT_TYPE = 3,
887 NE_FIT_TYPE = 4
888 };
889
890 static __always_inline enum fit_type
891 classify_va_fit_type(struct vmap_area *va,
892 unsigned long nva_start_addr, unsigned long size)
893 {
894 enum fit_type type;
895
896
897 if (nva_start_addr < va->va_start ||
898 nva_start_addr + size > va->va_end)
899 return NOTHING_FIT;
900
901
902 if (va->va_start == nva_start_addr) {
903 if (va->va_end == nva_start_addr + size)
904 type = FL_FIT_TYPE;
905 else
906 type = LE_FIT_TYPE;
907 } else if (va->va_end == nva_start_addr + size) {
908 type = RE_FIT_TYPE;
909 } else {
910 type = NE_FIT_TYPE;
911 }
912
913 return type;
914 }
915
916 static __always_inline int
917 adjust_va_to_fit_type(struct vmap_area *va,
918 unsigned long nva_start_addr, unsigned long size,
919 enum fit_type type)
920 {
921 struct vmap_area *lva = NULL;
922
923 if (type == FL_FIT_TYPE) {
924
925
926
927
928
929
930
931 unlink_va(va, &free_vmap_area_root);
932 kmem_cache_free(vmap_area_cachep, va);
933 } else if (type == LE_FIT_TYPE) {
934
935
936
937
938
939
940
941 va->va_start += size;
942 } else if (type == RE_FIT_TYPE) {
943
944
945
946
947
948
949
950 va->va_end = nva_start_addr;
951 } else if (type == NE_FIT_TYPE) {
952
953
954
955
956
957
958
959 lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
960 if (unlikely(!lva)) {
961
962
963
964
965
966
967
968
969
970
971
972
973 lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
974 if (!lva)
975 return -1;
976 }
977
978
979
980
981 lva->va_start = va->va_start;
982 lva->va_end = nva_start_addr;
983
984
985
986
987 va->va_start = nva_start_addr + size;
988 } else {
989 return -1;
990 }
991
992 if (type != FL_FIT_TYPE) {
993 augment_tree_propagate_from(va);
994
995 if (lva)
996 insert_vmap_area_augment(lva, &va->rb_node,
997 &free_vmap_area_root, &free_vmap_area_list);
998 }
999
1000 return 0;
1001 }
1002
1003
1004
1005
1006
1007 static __always_inline unsigned long
1008 __alloc_vmap_area(unsigned long size, unsigned long align,
1009 unsigned long vstart, unsigned long vend)
1010 {
1011 unsigned long nva_start_addr;
1012 struct vmap_area *va;
1013 enum fit_type type;
1014 int ret;
1015
1016 va = find_vmap_lowest_match(size, align, vstart);
1017 if (unlikely(!va))
1018 return vend;
1019
1020 if (va->va_start > vstart)
1021 nva_start_addr = ALIGN(va->va_start, align);
1022 else
1023 nva_start_addr = ALIGN(vstart, align);
1024
1025
1026 if (nva_start_addr + size > vend)
1027 return vend;
1028
1029
1030 type = classify_va_fit_type(va, nva_start_addr, size);
1031 if (WARN_ON_ONCE(type == NOTHING_FIT))
1032 return vend;
1033
1034
1035 ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
1036 if (ret)
1037 return vend;
1038
1039 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
1040 find_vmap_lowest_match_check(size);
1041 #endif
1042
1043 return nva_start_addr;
1044 }
1045
1046
1047
1048
1049
1050 static struct vmap_area *alloc_vmap_area(unsigned long size,
1051 unsigned long align,
1052 unsigned long vstart, unsigned long vend,
1053 int node, gfp_t gfp_mask)
1054 {
1055 struct vmap_area *va, *pva;
1056 unsigned long addr;
1057 int purged = 0;
1058
1059 BUG_ON(!size);
1060 BUG_ON(offset_in_page(size));
1061 BUG_ON(!is_power_of_2(align));
1062
1063 if (unlikely(!vmap_initialized))
1064 return ERR_PTR(-EBUSY);
1065
1066 might_sleep();
1067
1068 va = kmem_cache_alloc_node(vmap_area_cachep,
1069 gfp_mask & GFP_RECLAIM_MASK, node);
1070 if (unlikely(!va))
1071 return ERR_PTR(-ENOMEM);
1072
1073
1074
1075
1076
1077 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
1078
1079 retry:
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 preempt_disable();
1093 if (!__this_cpu_read(ne_fit_preload_node)) {
1094 preempt_enable();
1095 pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
1096 preempt_disable();
1097
1098 if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) {
1099 if (pva)
1100 kmem_cache_free(vmap_area_cachep, pva);
1101 }
1102 }
1103
1104 spin_lock(&vmap_area_lock);
1105 preempt_enable();
1106
1107
1108
1109
1110
1111 addr = __alloc_vmap_area(size, align, vstart, vend);
1112 if (unlikely(addr == vend))
1113 goto overflow;
1114
1115 va->va_start = addr;
1116 va->va_end = addr + size;
1117 va->vm = NULL;
1118 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
1119
1120 spin_unlock(&vmap_area_lock);
1121
1122 BUG_ON(!IS_ALIGNED(va->va_start, align));
1123 BUG_ON(va->va_start < vstart);
1124 BUG_ON(va->va_end > vend);
1125
1126 return va;
1127
1128 overflow:
1129 spin_unlock(&vmap_area_lock);
1130 if (!purged) {
1131 purge_vmap_area_lazy();
1132 purged = 1;
1133 goto retry;
1134 }
1135
1136 if (gfpflags_allow_blocking(gfp_mask)) {
1137 unsigned long freed = 0;
1138 blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
1139 if (freed > 0) {
1140 purged = 0;
1141 goto retry;
1142 }
1143 }
1144
1145 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
1146 pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
1147 size);
1148
1149 kmem_cache_free(vmap_area_cachep, va);
1150 return ERR_PTR(-EBUSY);
1151 }
1152
1153 int register_vmap_purge_notifier(struct notifier_block *nb)
1154 {
1155 return blocking_notifier_chain_register(&vmap_notify_list, nb);
1156 }
1157 EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
1158
1159 int unregister_vmap_purge_notifier(struct notifier_block *nb)
1160 {
1161 return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
1162 }
1163 EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
1164
1165 static void __free_vmap_area(struct vmap_area *va)
1166 {
1167
1168
1169
1170 unlink_va(va, &vmap_area_root);
1171
1172
1173
1174
1175 merge_or_add_vmap_area(va,
1176 &free_vmap_area_root, &free_vmap_area_list);
1177 }
1178
1179
1180
1181
1182 static void free_vmap_area(struct vmap_area *va)
1183 {
1184 spin_lock(&vmap_area_lock);
1185 __free_vmap_area(va);
1186 spin_unlock(&vmap_area_lock);
1187 }
1188
1189
1190
1191
1192 static void unmap_vmap_area(struct vmap_area *va)
1193 {
1194 vunmap_page_range(va->va_start, va->va_end);
1195 }
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213 static unsigned long lazy_max_pages(void)
1214 {
1215 unsigned int log;
1216
1217 log = fls(num_online_cpus());
1218
1219 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
1220 }
1221
1222 static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
1223
1224
1225
1226
1227
1228
1229 static DEFINE_MUTEX(vmap_purge_lock);
1230
1231
1232 static void purge_fragmented_blocks_allcpus(void);
1233
1234
1235
1236
1237
1238 void set_iounmap_nonlazy(void)
1239 {
1240 atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
1241 }
1242
1243
1244
1245
1246 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
1247 {
1248 unsigned long resched_threshold;
1249 struct llist_node *valist;
1250 struct vmap_area *va;
1251 struct vmap_area *n_va;
1252
1253 lockdep_assert_held(&vmap_purge_lock);
1254
1255 valist = llist_del_all(&vmap_purge_list);
1256 if (unlikely(valist == NULL))
1257 return false;
1258
1259
1260
1261
1262
1263 vmalloc_sync_unmappings();
1264
1265
1266
1267
1268
1269 llist_for_each_entry(va, valist, purge_list) {
1270 if (va->va_start < start)
1271 start = va->va_start;
1272 if (va->va_end > end)
1273 end = va->va_end;
1274 }
1275
1276 flush_tlb_kernel_range(start, end);
1277 resched_threshold = lazy_max_pages() << 1;
1278
1279 spin_lock(&vmap_area_lock);
1280 llist_for_each_entry_safe(va, n_va, valist, purge_list) {
1281 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
1282
1283
1284
1285
1286
1287
1288 merge_or_add_vmap_area(va,
1289 &free_vmap_area_root, &free_vmap_area_list);
1290
1291 atomic_long_sub(nr, &vmap_lazy_nr);
1292
1293 if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
1294 cond_resched_lock(&vmap_area_lock);
1295 }
1296 spin_unlock(&vmap_area_lock);
1297 return true;
1298 }
1299
1300
1301
1302
1303
1304 static void try_purge_vmap_area_lazy(void)
1305 {
1306 if (mutex_trylock(&vmap_purge_lock)) {
1307 __purge_vmap_area_lazy(ULONG_MAX, 0);
1308 mutex_unlock(&vmap_purge_lock);
1309 }
1310 }
1311
1312
1313
1314
1315 static void purge_vmap_area_lazy(void)
1316 {
1317 mutex_lock(&vmap_purge_lock);
1318 purge_fragmented_blocks_allcpus();
1319 __purge_vmap_area_lazy(ULONG_MAX, 0);
1320 mutex_unlock(&vmap_purge_lock);
1321 }
1322
1323
1324
1325
1326
1327
1328 static void free_vmap_area_noflush(struct vmap_area *va)
1329 {
1330 unsigned long nr_lazy;
1331
1332 spin_lock(&vmap_area_lock);
1333 unlink_va(va, &vmap_area_root);
1334 spin_unlock(&vmap_area_lock);
1335
1336 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
1337 PAGE_SHIFT, &vmap_lazy_nr);
1338
1339
1340 llist_add(&va->purge_list, &vmap_purge_list);
1341
1342 if (unlikely(nr_lazy > lazy_max_pages()))
1343 try_purge_vmap_area_lazy();
1344 }
1345
1346
1347
1348
1349 static void free_unmap_vmap_area(struct vmap_area *va)
1350 {
1351 flush_cache_vunmap(va->va_start, va->va_end);
1352 unmap_vmap_area(va);
1353 if (debug_pagealloc_enabled_static())
1354 flush_tlb_kernel_range(va->va_start, va->va_end);
1355
1356 free_vmap_area_noflush(va);
1357 }
1358
1359 static struct vmap_area *find_vmap_area(unsigned long addr)
1360 {
1361 struct vmap_area *va;
1362
1363 spin_lock(&vmap_area_lock);
1364 va = __find_vmap_area(addr);
1365 spin_unlock(&vmap_area_lock);
1366
1367 return va;
1368 }
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381 #if BITS_PER_LONG == 32
1382 #define VMALLOC_SPACE (128UL*1024*1024)
1383 #else
1384 #define VMALLOC_SPACE (128UL*1024*1024*1024)
1385 #endif
1386
1387 #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
1388 #define VMAP_MAX_ALLOC BITS_PER_LONG
1389 #define VMAP_BBMAP_BITS_MAX 1024
1390 #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
1391 #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
1392 #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
1393 #define VMAP_BBMAP_BITS \
1394 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
1395 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
1396 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
1397
1398 #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
1399
1400 struct vmap_block_queue {
1401 spinlock_t lock;
1402 struct list_head free;
1403 };
1404
1405 struct vmap_block {
1406 spinlock_t lock;
1407 struct vmap_area *va;
1408 unsigned long free, dirty;
1409 unsigned long dirty_min, dirty_max;
1410 struct list_head free_list;
1411 struct rcu_head rcu_head;
1412 struct list_head purge;
1413 };
1414
1415
1416 static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
1417
1418
1419
1420
1421
1422
1423 static DEFINE_SPINLOCK(vmap_block_tree_lock);
1424 static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
1425
1426
1427
1428
1429
1430
1431
1432
1433 static unsigned long addr_to_vb_idx(unsigned long addr)
1434 {
1435 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
1436 addr /= VMAP_BLOCK_SIZE;
1437 return addr;
1438 }
1439
1440 static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
1441 {
1442 unsigned long addr;
1443
1444 addr = va_start + (pages_off << PAGE_SHIFT);
1445 BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
1446 return (void *)addr;
1447 }
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
1458 {
1459 struct vmap_block_queue *vbq;
1460 struct vmap_block *vb;
1461 struct vmap_area *va;
1462 unsigned long vb_idx;
1463 int node, err;
1464 void *vaddr;
1465
1466 node = numa_node_id();
1467
1468 vb = kmalloc_node(sizeof(struct vmap_block),
1469 gfp_mask & GFP_RECLAIM_MASK, node);
1470 if (unlikely(!vb))
1471 return ERR_PTR(-ENOMEM);
1472
1473 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
1474 VMALLOC_START, VMALLOC_END,
1475 node, gfp_mask);
1476 if (IS_ERR(va)) {
1477 kfree(vb);
1478 return ERR_CAST(va);
1479 }
1480
1481 err = radix_tree_preload(gfp_mask);
1482 if (unlikely(err)) {
1483 kfree(vb);
1484 free_vmap_area(va);
1485 return ERR_PTR(err);
1486 }
1487
1488 vaddr = vmap_block_vaddr(va->va_start, 0);
1489 spin_lock_init(&vb->lock);
1490 vb->va = va;
1491
1492 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
1493 vb->free = VMAP_BBMAP_BITS - (1UL << order);
1494 vb->dirty = 0;
1495 vb->dirty_min = VMAP_BBMAP_BITS;
1496 vb->dirty_max = 0;
1497 INIT_LIST_HEAD(&vb->free_list);
1498
1499 vb_idx = addr_to_vb_idx(va->va_start);
1500 spin_lock(&vmap_block_tree_lock);
1501 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
1502 spin_unlock(&vmap_block_tree_lock);
1503 BUG_ON(err);
1504 radix_tree_preload_end();
1505
1506 vbq = &get_cpu_var(vmap_block_queue);
1507 spin_lock(&vbq->lock);
1508 list_add_tail_rcu(&vb->free_list, &vbq->free);
1509 spin_unlock(&vbq->lock);
1510 put_cpu_var(vmap_block_queue);
1511
1512 return vaddr;
1513 }
1514
1515 static void free_vmap_block(struct vmap_block *vb)
1516 {
1517 struct vmap_block *tmp;
1518 unsigned long vb_idx;
1519
1520 vb_idx = addr_to_vb_idx(vb->va->va_start);
1521 spin_lock(&vmap_block_tree_lock);
1522 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
1523 spin_unlock(&vmap_block_tree_lock);
1524 BUG_ON(tmp != vb);
1525
1526 free_vmap_area_noflush(vb->va);
1527 kfree_rcu(vb, rcu_head);
1528 }
1529
1530 static void purge_fragmented_blocks(int cpu)
1531 {
1532 LIST_HEAD(purge);
1533 struct vmap_block *vb;
1534 struct vmap_block *n_vb;
1535 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1536
1537 rcu_read_lock();
1538 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1539
1540 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
1541 continue;
1542
1543 spin_lock(&vb->lock);
1544 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
1545 vb->free = 0;
1546 vb->dirty = VMAP_BBMAP_BITS;
1547 vb->dirty_min = 0;
1548 vb->dirty_max = VMAP_BBMAP_BITS;
1549 spin_lock(&vbq->lock);
1550 list_del_rcu(&vb->free_list);
1551 spin_unlock(&vbq->lock);
1552 spin_unlock(&vb->lock);
1553 list_add_tail(&vb->purge, &purge);
1554 } else
1555 spin_unlock(&vb->lock);
1556 }
1557 rcu_read_unlock();
1558
1559 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
1560 list_del(&vb->purge);
1561 free_vmap_block(vb);
1562 }
1563 }
1564
1565 static void purge_fragmented_blocks_allcpus(void)
1566 {
1567 int cpu;
1568
1569 for_each_possible_cpu(cpu)
1570 purge_fragmented_blocks(cpu);
1571 }
1572
1573 static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
1574 {
1575 struct vmap_block_queue *vbq;
1576 struct vmap_block *vb;
1577 void *vaddr = NULL;
1578 unsigned int order;
1579
1580 BUG_ON(offset_in_page(size));
1581 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
1582 if (WARN_ON(size == 0)) {
1583
1584
1585
1586
1587
1588 return NULL;
1589 }
1590 order = get_order(size);
1591
1592 rcu_read_lock();
1593 vbq = &get_cpu_var(vmap_block_queue);
1594 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1595 unsigned long pages_off;
1596
1597 spin_lock(&vb->lock);
1598 if (vb->free < (1UL << order)) {
1599 spin_unlock(&vb->lock);
1600 continue;
1601 }
1602
1603 pages_off = VMAP_BBMAP_BITS - vb->free;
1604 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
1605 vb->free -= 1UL << order;
1606 if (vb->free == 0) {
1607 spin_lock(&vbq->lock);
1608 list_del_rcu(&vb->free_list);
1609 spin_unlock(&vbq->lock);
1610 }
1611
1612 spin_unlock(&vb->lock);
1613 break;
1614 }
1615
1616 put_cpu_var(vmap_block_queue);
1617 rcu_read_unlock();
1618
1619
1620 if (!vaddr)
1621 vaddr = new_vmap_block(order, gfp_mask);
1622
1623 return vaddr;
1624 }
1625
1626 static void vb_free(const void *addr, unsigned long size)
1627 {
1628 unsigned long offset;
1629 unsigned long vb_idx;
1630 unsigned int order;
1631 struct vmap_block *vb;
1632
1633 BUG_ON(offset_in_page(size));
1634 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
1635
1636 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
1637
1638 order = get_order(size);
1639
1640 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
1641 offset >>= PAGE_SHIFT;
1642
1643 vb_idx = addr_to_vb_idx((unsigned long)addr);
1644 rcu_read_lock();
1645 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
1646 rcu_read_unlock();
1647 BUG_ON(!vb);
1648
1649 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
1650
1651 if (debug_pagealloc_enabled_static())
1652 flush_tlb_kernel_range((unsigned long)addr,
1653 (unsigned long)addr + size);
1654
1655 spin_lock(&vb->lock);
1656
1657
1658 vb->dirty_min = min(vb->dirty_min, offset);
1659 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
1660
1661 vb->dirty += 1UL << order;
1662 if (vb->dirty == VMAP_BBMAP_BITS) {
1663 BUG_ON(vb->free);
1664 spin_unlock(&vb->lock);
1665 free_vmap_block(vb);
1666 } else
1667 spin_unlock(&vb->lock);
1668 }
1669
1670 static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
1671 {
1672 int cpu;
1673
1674 if (unlikely(!vmap_initialized))
1675 return;
1676
1677 might_sleep();
1678
1679 for_each_possible_cpu(cpu) {
1680 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1681 struct vmap_block *vb;
1682
1683 rcu_read_lock();
1684 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1685 spin_lock(&vb->lock);
1686 if (vb->dirty) {
1687 unsigned long va_start = vb->va->va_start;
1688 unsigned long s, e;
1689
1690 s = va_start + (vb->dirty_min << PAGE_SHIFT);
1691 e = va_start + (vb->dirty_max << PAGE_SHIFT);
1692
1693 start = min(s, start);
1694 end = max(e, end);
1695
1696 flush = 1;
1697 }
1698 spin_unlock(&vb->lock);
1699 }
1700 rcu_read_unlock();
1701 }
1702
1703 mutex_lock(&vmap_purge_lock);
1704 purge_fragmented_blocks_allcpus();
1705 if (!__purge_vmap_area_lazy(start, end) && flush)
1706 flush_tlb_kernel_range(start, end);
1707 mutex_unlock(&vmap_purge_lock);
1708 }
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723 void vm_unmap_aliases(void)
1724 {
1725 unsigned long start = ULONG_MAX, end = 0;
1726 int flush = 0;
1727
1728 _vm_unmap_aliases(start, end, flush);
1729 }
1730 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1731
1732
1733
1734
1735
1736
1737 void vm_unmap_ram(const void *mem, unsigned int count)
1738 {
1739 unsigned long size = (unsigned long)count << PAGE_SHIFT;
1740 unsigned long addr = (unsigned long)mem;
1741 struct vmap_area *va;
1742
1743 might_sleep();
1744 BUG_ON(!addr);
1745 BUG_ON(addr < VMALLOC_START);
1746 BUG_ON(addr > VMALLOC_END);
1747 BUG_ON(!PAGE_ALIGNED(addr));
1748
1749 if (likely(count <= VMAP_MAX_ALLOC)) {
1750 debug_check_no_locks_freed(mem, size);
1751 vb_free(mem, size);
1752 return;
1753 }
1754
1755 va = find_vmap_area(addr);
1756 BUG_ON(!va);
1757 debug_check_no_locks_freed((void *)va->va_start,
1758 (va->va_end - va->va_start));
1759 free_unmap_vmap_area(va);
1760 }
1761 EXPORT_SYMBOL(vm_unmap_ram);
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1779 {
1780 unsigned long size = (unsigned long)count << PAGE_SHIFT;
1781 unsigned long addr;
1782 void *mem;
1783
1784 if (likely(count <= VMAP_MAX_ALLOC)) {
1785 mem = vb_alloc(size, GFP_KERNEL);
1786 if (IS_ERR(mem))
1787 return NULL;
1788 addr = (unsigned long)mem;
1789 } else {
1790 struct vmap_area *va;
1791 va = alloc_vmap_area(size, PAGE_SIZE,
1792 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1793 if (IS_ERR(va))
1794 return NULL;
1795
1796 addr = va->va_start;
1797 mem = (void *)addr;
1798 }
1799 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1800 vm_unmap_ram(mem, count);
1801 return NULL;
1802 }
1803 return mem;
1804 }
1805 EXPORT_SYMBOL(vm_map_ram);
1806
1807 static struct vm_struct *vmlist __initdata;
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819 void __init vm_area_add_early(struct vm_struct *vm)
1820 {
1821 struct vm_struct *tmp, **p;
1822
1823 BUG_ON(vmap_initialized);
1824 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1825 if (tmp->addr >= vm->addr) {
1826 BUG_ON(tmp->addr < vm->addr + vm->size);
1827 break;
1828 } else
1829 BUG_ON(tmp->addr + tmp->size > vm->addr);
1830 }
1831 vm->next = *p;
1832 *p = vm;
1833 }
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847 void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1848 {
1849 static size_t vm_init_off __initdata;
1850 unsigned long addr;
1851
1852 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1853 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1854
1855 vm->addr = (void *)addr;
1856
1857 vm_area_add_early(vm);
1858 }
1859
1860 static void vmap_init_free_space(void)
1861 {
1862 unsigned long vmap_start = 1;
1863 const unsigned long vmap_end = ULONG_MAX;
1864 struct vmap_area *busy, *free;
1865
1866
1867
1868
1869
1870
1871
1872 list_for_each_entry(busy, &vmap_area_list, list) {
1873 if (busy->va_start - vmap_start > 0) {
1874 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
1875 if (!WARN_ON_ONCE(!free)) {
1876 free->va_start = vmap_start;
1877 free->va_end = busy->va_start;
1878
1879 insert_vmap_area_augment(free, NULL,
1880 &free_vmap_area_root,
1881 &free_vmap_area_list);
1882 }
1883 }
1884
1885 vmap_start = busy->va_end;
1886 }
1887
1888 if (vmap_end - vmap_start > 0) {
1889 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
1890 if (!WARN_ON_ONCE(!free)) {
1891 free->va_start = vmap_start;
1892 free->va_end = vmap_end;
1893
1894 insert_vmap_area_augment(free, NULL,
1895 &free_vmap_area_root,
1896 &free_vmap_area_list);
1897 }
1898 }
1899 }
1900
1901 void __init vmalloc_init(void)
1902 {
1903 struct vmap_area *va;
1904 struct vm_struct *tmp;
1905 int i;
1906
1907
1908
1909
1910 vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
1911
1912 for_each_possible_cpu(i) {
1913 struct vmap_block_queue *vbq;
1914 struct vfree_deferred *p;
1915
1916 vbq = &per_cpu(vmap_block_queue, i);
1917 spin_lock_init(&vbq->lock);
1918 INIT_LIST_HEAD(&vbq->free);
1919 p = &per_cpu(vfree_deferred, i);
1920 init_llist_head(&p->list);
1921 INIT_WORK(&p->wq, free_work);
1922 }
1923
1924
1925 for (tmp = vmlist; tmp; tmp = tmp->next) {
1926 va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
1927 if (WARN_ON_ONCE(!va))
1928 continue;
1929
1930 va->va_start = (unsigned long)tmp->addr;
1931 va->va_end = va->va_start + tmp->size;
1932 va->vm = tmp;
1933 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
1934 }
1935
1936
1937
1938
1939 vmap_init_free_space();
1940 vmap_initialized = true;
1941 }
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962 int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1963 pgprot_t prot, struct page **pages)
1964 {
1965 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1966 }
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982 void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1983 {
1984 vunmap_page_range(addr, addr + size);
1985 }
1986 EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996 void unmap_kernel_range(unsigned long addr, unsigned long size)
1997 {
1998 unsigned long end = addr + size;
1999
2000 flush_cache_vunmap(addr, end);
2001 vunmap_page_range(addr, end);
2002 flush_tlb_kernel_range(addr, end);
2003 }
2004 EXPORT_SYMBOL_GPL(unmap_kernel_range);
2005
2006 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
2007 {
2008 unsigned long addr = (unsigned long)area->addr;
2009 unsigned long end = addr + get_vm_area_size(area);
2010 int err;
2011
2012 err = vmap_page_range(addr, end, prot, pages);
2013
2014 return err > 0 ? 0 : err;
2015 }
2016 EXPORT_SYMBOL_GPL(map_vm_area);
2017
2018 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
2019 unsigned long flags, const void *caller)
2020 {
2021 spin_lock(&vmap_area_lock);
2022 vm->flags = flags;
2023 vm->addr = (void *)va->va_start;
2024 vm->size = va->va_end - va->va_start;
2025 vm->caller = caller;
2026 va->vm = vm;
2027 spin_unlock(&vmap_area_lock);
2028 }
2029
2030 static void clear_vm_uninitialized_flag(struct vm_struct *vm)
2031 {
2032
2033
2034
2035
2036
2037 smp_wmb();
2038 vm->flags &= ~VM_UNINITIALIZED;
2039 }
2040
2041 static struct vm_struct *__get_vm_area_node(unsigned long size,
2042 unsigned long align, unsigned long flags, unsigned long start,
2043 unsigned long end, int node, gfp_t gfp_mask, const void *caller)
2044 {
2045 struct vmap_area *va;
2046 struct vm_struct *area;
2047
2048 BUG_ON(in_interrupt());
2049 size = PAGE_ALIGN(size);
2050 if (unlikely(!size))
2051 return NULL;
2052
2053 if (flags & VM_IOREMAP)
2054 align = 1ul << clamp_t(int, get_count_order_long(size),
2055 PAGE_SHIFT, IOREMAP_MAX_ORDER);
2056
2057 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
2058 if (unlikely(!area))
2059 return NULL;
2060
2061 if (!(flags & VM_NO_GUARD))
2062 size += PAGE_SIZE;
2063
2064 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
2065 if (IS_ERR(va)) {
2066 kfree(area);
2067 return NULL;
2068 }
2069
2070 setup_vmalloc_vm(area, va, flags, caller);
2071
2072 return area;
2073 }
2074
2075 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
2076 unsigned long start, unsigned long end)
2077 {
2078 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
2079 GFP_KERNEL, __builtin_return_address(0));
2080 }
2081 EXPORT_SYMBOL_GPL(__get_vm_area);
2082
2083 struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
2084 unsigned long start, unsigned long end,
2085 const void *caller)
2086 {
2087 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
2088 GFP_KERNEL, caller);
2089 }
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102 struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
2103 {
2104 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
2105 NUMA_NO_NODE, GFP_KERNEL,
2106 __builtin_return_address(0));
2107 }
2108
2109 struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
2110 const void *caller)
2111 {
2112 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
2113 NUMA_NO_NODE, GFP_KERNEL, caller);
2114 }
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126 struct vm_struct *find_vm_area(const void *addr)
2127 {
2128 struct vmap_area *va;
2129
2130 va = find_vmap_area((unsigned long)addr);
2131 if (!va)
2132 return NULL;
2133
2134 return va->vm;
2135 }
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147 struct vm_struct *remove_vm_area(const void *addr)
2148 {
2149 struct vmap_area *va;
2150
2151 might_sleep();
2152
2153 spin_lock(&vmap_area_lock);
2154 va = __find_vmap_area((unsigned long)addr);
2155 if (va && va->vm) {
2156 struct vm_struct *vm = va->vm;
2157
2158 va->vm = NULL;
2159 spin_unlock(&vmap_area_lock);
2160
2161 kasan_free_shadow(vm);
2162 free_unmap_vmap_area(va);
2163
2164 return vm;
2165 }
2166
2167 spin_unlock(&vmap_area_lock);
2168 return NULL;
2169 }
2170
2171 static inline void set_area_direct_map(const struct vm_struct *area,
2172 int (*set_direct_map)(struct page *page))
2173 {
2174 int i;
2175
2176 for (i = 0; i < area->nr_pages; i++)
2177 if (page_address(area->pages[i]))
2178 set_direct_map(area->pages[i]);
2179 }
2180
2181
2182 static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
2183 {
2184 unsigned long start = ULONG_MAX, end = 0;
2185 int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
2186 int flush_dmap = 0;
2187 int i;
2188
2189 remove_vm_area(area->addr);
2190
2191
2192 if (!flush_reset)
2193 return;
2194
2195
2196
2197
2198
2199 if (!deallocate_pages) {
2200 vm_unmap_aliases();
2201 return;
2202 }
2203
2204
2205
2206
2207
2208
2209 for (i = 0; i < area->nr_pages; i++) {
2210 unsigned long addr = (unsigned long)page_address(area->pages[i]);
2211 if (addr) {
2212 start = min(addr, start);
2213 end = max(addr + PAGE_SIZE, end);
2214 flush_dmap = 1;
2215 }
2216 }
2217
2218
2219
2220
2221
2222
2223 set_area_direct_map(area, set_direct_map_invalid_noflush);
2224 _vm_unmap_aliases(start, end, flush_dmap);
2225 set_area_direct_map(area, set_direct_map_default_noflush);
2226 }
2227
2228 static void __vunmap(const void *addr, int deallocate_pages)
2229 {
2230 struct vm_struct *area;
2231
2232 if (!addr)
2233 return;
2234
2235 if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
2236 addr))
2237 return;
2238
2239 area = find_vm_area(addr);
2240 if (unlikely(!area)) {
2241 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
2242 addr);
2243 return;
2244 }
2245
2246 debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
2247 debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
2248
2249 vm_remove_mappings(area, deallocate_pages);
2250
2251 if (deallocate_pages) {
2252 int i;
2253
2254 for (i = 0; i < area->nr_pages; i++) {
2255 struct page *page = area->pages[i];
2256
2257 BUG_ON(!page);
2258 __free_pages(page, 0);
2259 }
2260 atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
2261
2262 kvfree(area->pages);
2263 }
2264
2265 kfree(area);
2266 return;
2267 }
2268
2269 static inline void __vfree_deferred(const void *addr)
2270 {
2271
2272
2273
2274
2275
2276
2277 struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
2278
2279 if (llist_add((struct llist_node *)addr, &p->list))
2280 schedule_work(&p->wq);
2281 }
2282
2283
2284
2285
2286
2287
2288
2289
2290 void vfree_atomic(const void *addr)
2291 {
2292 BUG_ON(in_nmi());
2293
2294 kmemleak_free(addr);
2295
2296 if (!addr)
2297 return;
2298 __vfree_deferred(addr);
2299 }
2300
2301 static void __vfree(const void *addr)
2302 {
2303 if (unlikely(in_interrupt()))
2304 __vfree_deferred(addr);
2305 else
2306 __vunmap(addr, 1);
2307 }
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325 void vfree(const void *addr)
2326 {
2327 BUG_ON(in_nmi());
2328
2329 kmemleak_free(addr);
2330
2331 might_sleep_if(!in_interrupt());
2332
2333 if (!addr)
2334 return;
2335
2336 __vfree(addr);
2337 }
2338 EXPORT_SYMBOL(vfree);
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 void vunmap(const void *addr)
2350 {
2351 BUG_ON(in_interrupt());
2352 might_sleep();
2353 if (addr)
2354 __vunmap(addr, 0);
2355 }
2356 EXPORT_SYMBOL(vunmap);
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370 void *vmap(struct page **pages, unsigned int count,
2371 unsigned long flags, pgprot_t prot)
2372 {
2373 struct vm_struct *area;
2374 unsigned long size;
2375
2376 might_sleep();
2377
2378 if (count > totalram_pages())
2379 return NULL;
2380
2381 size = (unsigned long)count << PAGE_SHIFT;
2382 area = get_vm_area_caller(size, flags, __builtin_return_address(0));
2383 if (!area)
2384 return NULL;
2385
2386 if (map_vm_area(area, prot, pages)) {
2387 vunmap(area->addr);
2388 return NULL;
2389 }
2390
2391 return area->addr;
2392 }
2393 EXPORT_SYMBOL(vmap);
2394
2395 static void *__vmalloc_node(unsigned long size, unsigned long align,
2396 gfp_t gfp_mask, pgprot_t prot,
2397 int node, const void *caller);
2398 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
2399 pgprot_t prot, int node)
2400 {
2401 struct page **pages;
2402 unsigned int nr_pages, array_size, i;
2403 const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
2404 const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
2405 const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
2406 0 :
2407 __GFP_HIGHMEM;
2408
2409 nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
2410 array_size = (nr_pages * sizeof(struct page *));
2411
2412
2413 if (array_size > PAGE_SIZE) {
2414 pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
2415 PAGE_KERNEL, node, area->caller);
2416 } else {
2417 pages = kmalloc_node(array_size, nested_gfp, node);
2418 }
2419
2420 if (!pages) {
2421 remove_vm_area(area->addr);
2422 kfree(area);
2423 return NULL;
2424 }
2425
2426 area->pages = pages;
2427 area->nr_pages = nr_pages;
2428
2429 for (i = 0; i < area->nr_pages; i++) {
2430 struct page *page;
2431
2432 if (node == NUMA_NO_NODE)
2433 page = alloc_page(alloc_mask|highmem_mask);
2434 else
2435 page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
2436
2437 if (unlikely(!page)) {
2438
2439 area->nr_pages = i;
2440 atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
2441 goto fail;
2442 }
2443 area->pages[i] = page;
2444 if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
2445 cond_resched();
2446 }
2447 atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
2448
2449 if (map_vm_area(area, prot, pages))
2450 goto fail;
2451 return area->addr;
2452
2453 fail:
2454 warn_alloc(gfp_mask, NULL,
2455 "vmalloc: allocation failure, allocated %ld of %ld bytes",
2456 (area->nr_pages*PAGE_SIZE), area->size);
2457 __vfree(area->addr);
2458 return NULL;
2459 }
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479 void *__vmalloc_node_range(unsigned long size, unsigned long align,
2480 unsigned long start, unsigned long end, gfp_t gfp_mask,
2481 pgprot_t prot, unsigned long vm_flags, int node,
2482 const void *caller)
2483 {
2484 struct vm_struct *area;
2485 void *addr;
2486 unsigned long real_size = size;
2487
2488 size = PAGE_ALIGN(size);
2489 if (!size || (size >> PAGE_SHIFT) > totalram_pages())
2490 goto fail;
2491
2492 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
2493 vm_flags, start, end, node, gfp_mask, caller);
2494 if (!area)
2495 goto fail;
2496
2497 addr = __vmalloc_area_node(area, gfp_mask, prot, node);
2498 if (!addr)
2499 return NULL;
2500
2501
2502
2503
2504
2505
2506 clear_vm_uninitialized_flag(area);
2507
2508 kmemleak_vmalloc(area, size, gfp_mask);
2509
2510 return addr;
2511
2512 fail:
2513 warn_alloc(gfp_mask, NULL,
2514 "vmalloc: allocation failure: %lu bytes", real_size);
2515 return NULL;
2516 }
2517
2518
2519
2520
2521
2522
2523 #ifdef CONFIG_TEST_VMALLOC_MODULE
2524 EXPORT_SYMBOL_GPL(__vmalloc_node_range);
2525 #endif
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548 static void *__vmalloc_node(unsigned long size, unsigned long align,
2549 gfp_t gfp_mask, pgprot_t prot,
2550 int node, const void *caller)
2551 {
2552 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
2553 gfp_mask, prot, 0, node, caller);
2554 }
2555
2556 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
2557 {
2558 return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
2559 __builtin_return_address(0));
2560 }
2561 EXPORT_SYMBOL(__vmalloc);
2562
2563 static inline void *__vmalloc_node_flags(unsigned long size,
2564 int node, gfp_t flags)
2565 {
2566 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
2567 node, __builtin_return_address(0));
2568 }
2569
2570
2571 void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
2572 void *caller)
2573 {
2574 return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
2575 }
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589 void *vmalloc(unsigned long size)
2590 {
2591 return __vmalloc_node_flags(size, NUMA_NO_NODE,
2592 GFP_KERNEL);
2593 }
2594 EXPORT_SYMBOL(vmalloc);
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609 void *vzalloc(unsigned long size)
2610 {
2611 return __vmalloc_node_flags(size, NUMA_NO_NODE,
2612 GFP_KERNEL | __GFP_ZERO);
2613 }
2614 EXPORT_SYMBOL(vzalloc);
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625 void *vmalloc_user(unsigned long size)
2626 {
2627 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
2628 GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
2629 VM_USERMAP, NUMA_NO_NODE,
2630 __builtin_return_address(0));
2631 }
2632 EXPORT_SYMBOL(vmalloc_user);
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647 void *vmalloc_node(unsigned long size, int node)
2648 {
2649 return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
2650 node, __builtin_return_address(0));
2651 }
2652 EXPORT_SYMBOL(vmalloc_node);
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668 void *vzalloc_node(unsigned long size, int node)
2669 {
2670 return __vmalloc_node_flags(size, node,
2671 GFP_KERNEL | __GFP_ZERO);
2672 }
2673 EXPORT_SYMBOL(vzalloc_node);
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688 void *vmalloc_exec(unsigned long size)
2689 {
2690 return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
2691 GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
2692 NUMA_NO_NODE, __builtin_return_address(0));
2693 }
2694
2695 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
2696 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
2697 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
2698 #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
2699 #else
2700
2701
2702
2703
2704 #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
2705 #endif
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716 void *vmalloc_32(unsigned long size)
2717 {
2718 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
2719 NUMA_NO_NODE, __builtin_return_address(0));
2720 }
2721 EXPORT_SYMBOL(vmalloc_32);
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732 void *vmalloc_32_user(unsigned long size)
2733 {
2734 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
2735 GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
2736 VM_USERMAP, NUMA_NO_NODE,
2737 __builtin_return_address(0));
2738 }
2739 EXPORT_SYMBOL(vmalloc_32_user);
2740
2741
2742
2743
2744
2745
2746 static int aligned_vread(char *buf, char *addr, unsigned long count)
2747 {
2748 struct page *p;
2749 int copied = 0;
2750
2751 while (count) {
2752 unsigned long offset, length;
2753
2754 offset = offset_in_page(addr);
2755 length = PAGE_SIZE - offset;
2756 if (length > count)
2757 length = count;
2758 p = vmalloc_to_page(addr);
2759
2760
2761
2762
2763
2764
2765
2766 if (p) {
2767
2768
2769
2770
2771 void *map = kmap_atomic(p);
2772 memcpy(buf, map + offset, length);
2773 kunmap_atomic(map);
2774 } else
2775 memset(buf, 0, length);
2776
2777 addr += length;
2778 buf += length;
2779 copied += length;
2780 count -= length;
2781 }
2782 return copied;
2783 }
2784
2785 static int aligned_vwrite(char *buf, char *addr, unsigned long count)
2786 {
2787 struct page *p;
2788 int copied = 0;
2789
2790 while (count) {
2791 unsigned long offset, length;
2792
2793 offset = offset_in_page(addr);
2794 length = PAGE_SIZE - offset;
2795 if (length > count)
2796 length = count;
2797 p = vmalloc_to_page(addr);
2798
2799
2800
2801
2802
2803
2804
2805 if (p) {
2806
2807
2808
2809
2810 void *map = kmap_atomic(p);
2811 memcpy(map + offset, buf, length);
2812 kunmap_atomic(map);
2813 }
2814 addr += length;
2815 buf += length;
2816 copied += length;
2817 count -= length;
2818 }
2819 return copied;
2820 }
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846 long vread(char *buf, char *addr, unsigned long count)
2847 {
2848 struct vmap_area *va;
2849 struct vm_struct *vm;
2850 char *vaddr, *buf_start = buf;
2851 unsigned long buflen = count;
2852 unsigned long n;
2853
2854
2855 if ((unsigned long) addr + count < count)
2856 count = -(unsigned long) addr;
2857
2858 spin_lock(&vmap_area_lock);
2859 list_for_each_entry(va, &vmap_area_list, list) {
2860 if (!count)
2861 break;
2862
2863 if (!va->vm)
2864 continue;
2865
2866 vm = va->vm;
2867 vaddr = (char *) vm->addr;
2868 if (addr >= vaddr + get_vm_area_size(vm))
2869 continue;
2870 while (addr < vaddr) {
2871 if (count == 0)
2872 goto finished;
2873 *buf = '\0';
2874 buf++;
2875 addr++;
2876 count--;
2877 }
2878 n = vaddr + get_vm_area_size(vm) - addr;
2879 if (n > count)
2880 n = count;
2881 if (!(vm->flags & VM_IOREMAP))
2882 aligned_vread(buf, addr, n);
2883 else
2884 memset(buf, 0, n);
2885 buf += n;
2886 addr += n;
2887 count -= n;
2888 }
2889 finished:
2890 spin_unlock(&vmap_area_lock);
2891
2892 if (buf == buf_start)
2893 return 0;
2894
2895 if (buf != buf_start + buflen)
2896 memset(buf, 0, buflen - (buf - buf_start));
2897
2898 return buflen;
2899 }
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925 long vwrite(char *buf, char *addr, unsigned long count)
2926 {
2927 struct vmap_area *va;
2928 struct vm_struct *vm;
2929 char *vaddr;
2930 unsigned long n, buflen;
2931 int copied = 0;
2932
2933
2934 if ((unsigned long) addr + count < count)
2935 count = -(unsigned long) addr;
2936 buflen = count;
2937
2938 spin_lock(&vmap_area_lock);
2939 list_for_each_entry(va, &vmap_area_list, list) {
2940 if (!count)
2941 break;
2942
2943 if (!va->vm)
2944 continue;
2945
2946 vm = va->vm;
2947 vaddr = (char *) vm->addr;
2948 if (addr >= vaddr + get_vm_area_size(vm))
2949 continue;
2950 while (addr < vaddr) {
2951 if (count == 0)
2952 goto finished;
2953 buf++;
2954 addr++;
2955 count--;
2956 }
2957 n = vaddr + get_vm_area_size(vm) - addr;
2958 if (n > count)
2959 n = count;
2960 if (!(vm->flags & VM_IOREMAP)) {
2961 aligned_vwrite(buf, addr, n);
2962 copied++;
2963 }
2964 buf += n;
2965 addr += n;
2966 count -= n;
2967 }
2968 finished:
2969 spin_unlock(&vmap_area_lock);
2970 if (!copied)
2971 return 0;
2972 return buflen;
2973 }
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992 int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2993 void *kaddr, unsigned long pgoff,
2994 unsigned long size)
2995 {
2996 struct vm_struct *area;
2997 unsigned long off;
2998 unsigned long end_index;
2999
3000 if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
3001 return -EINVAL;
3002
3003 size = PAGE_ALIGN(size);
3004
3005 if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
3006 return -EINVAL;
3007
3008 area = find_vm_area(kaddr);
3009 if (!area)
3010 return -EINVAL;
3011
3012 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
3013 return -EINVAL;
3014
3015 if (check_add_overflow(size, off, &end_index) ||
3016 end_index > get_vm_area_size(area))
3017 return -EINVAL;
3018 kaddr += off;
3019
3020 do {
3021 struct page *page = vmalloc_to_page(kaddr);
3022 int ret;
3023
3024 ret = vm_insert_page(vma, uaddr, page);
3025 if (ret)
3026 return ret;
3027
3028 uaddr += PAGE_SIZE;
3029 kaddr += PAGE_SIZE;
3030 size -= PAGE_SIZE;
3031 } while (size > 0);
3032
3033 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
3034
3035 return 0;
3036 }
3037 EXPORT_SYMBOL(remap_vmalloc_range_partial);
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
3054 unsigned long pgoff)
3055 {
3056 return remap_vmalloc_range_partial(vma, vma->vm_start,
3057 addr, pgoff,
3058 vma->vm_end - vma->vm_start);
3059 }
3060 EXPORT_SYMBOL(remap_vmalloc_range);
3061
3062
3063
3064
3065
3066
3067
3068
3069 void __weak vmalloc_sync_mappings(void)
3070 {
3071 }
3072
3073 void __weak vmalloc_sync_unmappings(void)
3074 {
3075 }
3076
3077 static int f(pte_t *pte, unsigned long addr, void *data)
3078 {
3079 pte_t ***p = data;
3080
3081 if (p) {
3082 *(*p) = pte;
3083 (*p)++;
3084 }
3085 return 0;
3086 }
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
3103 {
3104 struct vm_struct *area;
3105
3106 area = get_vm_area_caller(size, VM_IOREMAP,
3107 __builtin_return_address(0));
3108 if (area == NULL)
3109 return NULL;
3110
3111
3112
3113
3114
3115 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
3116 size, f, ptes ? &ptes : NULL)) {
3117 free_vm_area(area);
3118 return NULL;
3119 }
3120
3121 return area;
3122 }
3123 EXPORT_SYMBOL_GPL(alloc_vm_area);
3124
3125 void free_vm_area(struct vm_struct *area)
3126 {
3127 struct vm_struct *ret;
3128 ret = remove_vm_area(area->addr);
3129 BUG_ON(ret != area);
3130 kfree(area);
3131 }
3132 EXPORT_SYMBOL_GPL(free_vm_area);
3133
3134 #ifdef CONFIG_SMP
3135 static struct vmap_area *node_to_va(struct rb_node *n)
3136 {
3137 return rb_entry_safe(n, struct vmap_area, rb_node);
3138 }
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149 static struct vmap_area *
3150 pvm_find_va_enclose_addr(unsigned long addr)
3151 {
3152 struct vmap_area *va, *tmp;
3153 struct rb_node *n;
3154
3155 n = free_vmap_area_root.rb_node;
3156 va = NULL;
3157
3158 while (n) {
3159 tmp = rb_entry(n, struct vmap_area, rb_node);
3160 if (tmp->va_start <= addr) {
3161 va = tmp;
3162 if (tmp->va_end >= addr)
3163 break;
3164
3165 n = n->rb_right;
3166 } else {
3167 n = n->rb_left;
3168 }
3169 }
3170
3171 return va;
3172 }
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183 static unsigned long
3184 pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
3185 {
3186 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3187 unsigned long addr;
3188
3189 if (likely(*va)) {
3190 list_for_each_entry_from_reverse((*va),
3191 &free_vmap_area_list, list) {
3192 addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
3193 if ((*va)->va_start < addr)
3194 return addr;
3195 }
3196 }
3197
3198 return 0;
3199 }
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
3226 const size_t *sizes, int nr_vms,
3227 size_t align)
3228 {
3229 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
3230 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3231 struct vmap_area **vas, *va;
3232 struct vm_struct **vms;
3233 int area, area2, last_area, term_area;
3234 unsigned long base, start, size, end, last_end;
3235 bool purged = false;
3236 enum fit_type type;
3237
3238
3239 BUG_ON(offset_in_page(align) || !is_power_of_2(align));
3240 for (last_area = 0, area = 0; area < nr_vms; area++) {
3241 start = offsets[area];
3242 end = start + sizes[area];
3243
3244
3245 BUG_ON(!IS_ALIGNED(offsets[area], align));
3246 BUG_ON(!IS_ALIGNED(sizes[area], align));
3247
3248
3249 if (start > offsets[last_area])
3250 last_area = area;
3251
3252 for (area2 = area + 1; area2 < nr_vms; area2++) {
3253 unsigned long start2 = offsets[area2];
3254 unsigned long end2 = start2 + sizes[area2];
3255
3256 BUG_ON(start2 < end && start < end2);
3257 }
3258 }
3259 last_end = offsets[last_area] + sizes[last_area];
3260
3261 if (vmalloc_end - vmalloc_start < last_end) {
3262 WARN_ON(true);
3263 return NULL;
3264 }
3265
3266 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
3267 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
3268 if (!vas || !vms)
3269 goto err_free2;
3270
3271 for (area = 0; area < nr_vms; area++) {
3272 vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
3273 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
3274 if (!vas[area] || !vms[area])
3275 goto err_free;
3276 }
3277 retry:
3278 spin_lock(&vmap_area_lock);
3279
3280
3281 area = term_area = last_area;
3282 start = offsets[area];
3283 end = start + sizes[area];
3284
3285 va = pvm_find_va_enclose_addr(vmalloc_end);
3286 base = pvm_determine_end_from_reverse(&va, align) - end;
3287
3288 while (true) {
3289
3290
3291
3292
3293 if (base + last_end < vmalloc_start + last_end)
3294 goto overflow;
3295
3296
3297
3298
3299 if (va == NULL)
3300 goto overflow;
3301
3302
3303
3304
3305
3306 if (base + end > va->va_end) {
3307 base = pvm_determine_end_from_reverse(&va, align) - end;
3308 term_area = area;
3309 continue;
3310 }
3311
3312
3313
3314
3315 if (base + start < va->va_start) {
3316 va = node_to_va(rb_prev(&va->rb_node));
3317 base = pvm_determine_end_from_reverse(&va, align) - end;
3318 term_area = area;
3319 continue;
3320 }
3321
3322
3323
3324
3325
3326 area = (area + nr_vms - 1) % nr_vms;
3327 if (area == term_area)
3328 break;
3329
3330 start = offsets[area];
3331 end = start + sizes[area];
3332 va = pvm_find_va_enclose_addr(base + end);
3333 }
3334
3335
3336 for (area = 0; area < nr_vms; area++) {
3337 int ret;
3338
3339 start = base + offsets[area];
3340 size = sizes[area];
3341
3342 va = pvm_find_va_enclose_addr(start);
3343 if (WARN_ON_ONCE(va == NULL))
3344
3345 goto recovery;
3346
3347 type = classify_va_fit_type(va, start, size);
3348 if (WARN_ON_ONCE(type == NOTHING_FIT))
3349
3350 goto recovery;
3351
3352 ret = adjust_va_to_fit_type(va, start, size, type);
3353 if (unlikely(ret))
3354 goto recovery;
3355
3356
3357 va = vas[area];
3358 va->va_start = start;
3359 va->va_end = start + size;
3360
3361 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
3362 }
3363
3364 spin_unlock(&vmap_area_lock);
3365
3366
3367 for (area = 0; area < nr_vms; area++)
3368 setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
3369 pcpu_get_vm_areas);
3370
3371 kfree(vas);
3372 return vms;
3373
3374 recovery:
3375
3376 while (area--) {
3377 __free_vmap_area(vas[area]);
3378 vas[area] = NULL;
3379 }
3380
3381 overflow:
3382 spin_unlock(&vmap_area_lock);
3383 if (!purged) {
3384 purge_vmap_area_lazy();
3385 purged = true;
3386
3387
3388 for (area = 0; area < nr_vms; area++) {
3389 if (vas[area])
3390 continue;
3391
3392 vas[area] = kmem_cache_zalloc(
3393 vmap_area_cachep, GFP_KERNEL);
3394 if (!vas[area])
3395 goto err_free;
3396 }
3397
3398 goto retry;
3399 }
3400
3401 err_free:
3402 for (area = 0; area < nr_vms; area++) {
3403 if (vas[area])
3404 kmem_cache_free(vmap_area_cachep, vas[area]);
3405
3406 kfree(vms[area]);
3407 }
3408 err_free2:
3409 kfree(vas);
3410 kfree(vms);
3411 return NULL;
3412 }
3413
3414
3415
3416
3417
3418
3419
3420
3421 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
3422 {
3423 int i;
3424
3425 for (i = 0; i < nr_vms; i++)
3426 free_vm_area(vms[i]);
3427 kfree(vms);
3428 }
3429 #endif
3430
3431 #ifdef CONFIG_PROC_FS
3432 static void *s_start(struct seq_file *m, loff_t *pos)
3433 __acquires(&vmap_area_lock)
3434 {
3435 spin_lock(&vmap_area_lock);
3436 return seq_list_start(&vmap_area_list, *pos);
3437 }
3438
3439 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
3440 {
3441 return seq_list_next(p, &vmap_area_list, pos);
3442 }
3443
3444 static void s_stop(struct seq_file *m, void *p)
3445 __releases(&vmap_area_lock)
3446 {
3447 spin_unlock(&vmap_area_lock);
3448 }
3449
3450 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
3451 {
3452 if (IS_ENABLED(CONFIG_NUMA)) {
3453 unsigned int nr, *counters = m->private;
3454
3455 if (!counters)
3456 return;
3457
3458 if (v->flags & VM_UNINITIALIZED)
3459 return;
3460
3461 smp_rmb();
3462
3463 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
3464
3465 for (nr = 0; nr < v->nr_pages; nr++)
3466 counters[page_to_nid(v->pages[nr])]++;
3467
3468 for_each_node_state(nr, N_HIGH_MEMORY)
3469 if (counters[nr])
3470 seq_printf(m, " N%u=%u", nr, counters[nr]);
3471 }
3472 }
3473
3474 static void show_purge_info(struct seq_file *m)
3475 {
3476 struct llist_node *head;
3477 struct vmap_area *va;
3478
3479 head = READ_ONCE(vmap_purge_list.first);
3480 if (head == NULL)
3481 return;
3482
3483 llist_for_each_entry(va, head, purge_list) {
3484 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
3485 (void *)va->va_start, (void *)va->va_end,
3486 va->va_end - va->va_start);
3487 }
3488 }
3489
3490 static int s_show(struct seq_file *m, void *p)
3491 {
3492 struct vmap_area *va;
3493 struct vm_struct *v;
3494
3495 va = list_entry(p, struct vmap_area, list);
3496
3497
3498
3499
3500
3501 if (!va->vm) {
3502 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
3503 (void *)va->va_start, (void *)va->va_end,
3504 va->va_end - va->va_start);
3505
3506 return 0;
3507 }
3508
3509 v = va->vm;
3510
3511 seq_printf(m, "0x%pK-0x%pK %7ld",
3512 v->addr, v->addr + v->size, v->size);
3513
3514 if (v->caller)
3515 seq_printf(m, " %pS", v->caller);
3516
3517 if (v->nr_pages)
3518 seq_printf(m, " pages=%d", v->nr_pages);
3519
3520 if (v->phys_addr)
3521 seq_printf(m, " phys=%pa", &v->phys_addr);
3522
3523 if (v->flags & VM_IOREMAP)
3524 seq_puts(m, " ioremap");
3525
3526 if (v->flags & VM_ALLOC)
3527 seq_puts(m, " vmalloc");
3528
3529 if (v->flags & VM_MAP)
3530 seq_puts(m, " vmap");
3531
3532 if (v->flags & VM_USERMAP)
3533 seq_puts(m, " user");
3534
3535 if (v->flags & VM_DMA_COHERENT)
3536 seq_puts(m, " dma-coherent");
3537
3538 if (is_vmalloc_addr(v->pages))
3539 seq_puts(m, " vpages");
3540
3541 show_numa_info(m, v);
3542 seq_putc(m, '\n');
3543
3544
3545
3546
3547
3548
3549
3550 if (list_is_last(&va->list, &vmap_area_list))
3551 show_purge_info(m);
3552
3553 return 0;
3554 }
3555
3556 static const struct seq_operations vmalloc_op = {
3557 .start = s_start,
3558 .next = s_next,
3559 .stop = s_stop,
3560 .show = s_show,
3561 };
3562
3563 static int __init proc_vmalloc_init(void)
3564 {
3565 if (IS_ENABLED(CONFIG_NUMA))
3566 proc_create_seq_private("vmallocinfo", 0400, NULL,
3567 &vmalloc_op,
3568 nr_node_ids * sizeof(unsigned int), NULL);
3569 else
3570 proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
3571 return 0;
3572 }
3573 module_init(proc_vmalloc_init);
3574
3575 #endif