This source file includes following definitions.
- arch_filter_pgprot
- vm_get_page_prot
- vm_pgprot_modify
- vma_set_page_prot
- __remove_shared_vm_struct
- unlink_file_vma
- remove_vma
- SYSCALL_DEFINE1
- vma_compute_gap
- vma_compute_subtree_gap
- browse_rb
- validate_mm_rb
- validate_mm
- RB_DECLARE_CALLBACKS_MAX
- vma_rb_insert
- __vma_rb_erase
- vma_rb_erase_ignore
- vma_rb_erase
- anon_vma_interval_tree_pre_update_vma
- anon_vma_interval_tree_post_update_vma
- find_vma_links
- count_vma_pages_range
- __vma_link_rb
- __vma_link_file
- __vma_link
- vma_link
- __insert_vm_struct
- __vma_unlink_common
- __vma_unlink_prev
- __vma_adjust
- is_mergeable_vma
- is_mergeable_anon_vma
- can_vma_merge_before
- can_vma_merge_after
- vma_merge
- anon_vma_compatible
- reusable_anon_vma
- find_mergeable_anon_vma
- round_hint_to_min
- mlock_future_check
- file_mmap_size_max
- file_mmap_ok
- do_mmap
- ksys_mmap_pgoff
- SYSCALL_DEFINE6
- SYSCALL_DEFINE1
- vma_wants_writenotify
- accountable_mapping
- mmap_region
- unmapped_area
- unmapped_area_topdown
- arch_get_unmapped_area
- arch_get_unmapped_area_topdown
- get_unmapped_area
- find_vma
- find_vma_prev
- acct_stack_growth
- expand_upwards
- expand_downwards
- cmdline_parse_stack_guard_gap
- expand_stack
- find_extend_vma
- expand_stack
- find_extend_vma
- remove_vma_list
- unmap_region
- detach_vmas_to_be_unmapped
- __split_vma
- split_vma
- __do_munmap
- do_munmap
- __vm_munmap
- vm_munmap
- SYSCALL_DEFINE2
- SYSCALL_DEFINE5
- do_brk_flags
- vm_brk_flags
- vm_brk
- exit_mmap
- insert_vm_struct
- copy_vma
- may_expand_vm
- vm_stat_account
- special_mapping_close
- special_mapping_name
- special_mapping_mremap
- special_mapping_fault
- __install_special_mapping
- vma_is_special_mapping
- _install_special_mapping
- install_special_mapping
- vm_lock_anon_vma
- vm_lock_mapping
- mm_take_all_locks
- vm_unlock_anon_vma
- vm_unlock_mapping
- mm_drop_all_locks
- mmap_init
- init_user_reserve
- init_admin_reserve
- reserve_mem_notifier
- init_reserve_notifier
1
2
3
4
5
6
7
8
9
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12 #include <linux/kernel.h>
13 #include <linux/slab.h>
14 #include <linux/backing-dev.h>
15 #include <linux/mm.h>
16 #include <linux/vmacache.h>
17 #include <linux/shm.h>
18 #include <linux/mman.h>
19 #include <linux/pagemap.h>
20 #include <linux/swap.h>
21 #include <linux/syscalls.h>
22 #include <linux/capability.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/fs.h>
26 #include <linux/personality.h>
27 #include <linux/security.h>
28 #include <linux/hugetlb.h>
29 #include <linux/shmem_fs.h>
30 #include <linux/profile.h>
31 #include <linux/export.h>
32 #include <linux/mount.h>
33 #include <linux/mempolicy.h>
34 #include <linux/rmap.h>
35 #include <linux/mmu_notifier.h>
36 #include <linux/mmdebug.h>
37 #include <linux/perf_event.h>
38 #include <linux/audit.h>
39 #include <linux/khugepaged.h>
40 #include <linux/uprobes.h>
41 #include <linux/rbtree_augmented.h>
42 #include <linux/notifier.h>
43 #include <linux/memory.h>
44 #include <linux/printk.h>
45 #include <linux/userfaultfd_k.h>
46 #include <linux/moduleparam.h>
47 #include <linux/pkeys.h>
48 #include <linux/oom.h>
49 #include <linux/sched/mm.h>
50
51 #include <linux/uaccess.h>
52 #include <asm/cacheflush.h>
53 #include <asm/tlb.h>
54 #include <asm/mmu_context.h>
55
56 #include "internal.h"
57
58 #ifndef arch_mmap_check
59 #define arch_mmap_check(addr, len, flags) (0)
60 #endif
61
62 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
63 const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
64 const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
65 int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
66 #endif
67 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
68 const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
69 const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
70 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
71 #endif
72
73 static bool ignore_rlimit_data;
74 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
75
76 static void unmap_region(struct mm_struct *mm,
77 struct vm_area_struct *vma, struct vm_area_struct *prev,
78 unsigned long start, unsigned long end);
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 pgprot_t protection_map[16] __ro_after_init = {
95 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
96 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
97 };
98
99 #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
100 static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
101 {
102 return prot;
103 }
104 #endif
105
106 pgprot_t vm_get_page_prot(unsigned long vm_flags)
107 {
108 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
109 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
110 pgprot_val(arch_vm_get_page_prot(vm_flags)));
111
112 return arch_filter_pgprot(ret);
113 }
114 EXPORT_SYMBOL(vm_get_page_prot);
115
116 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
117 {
118 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
119 }
120
121
122 void vma_set_page_prot(struct vm_area_struct *vma)
123 {
124 unsigned long vm_flags = vma->vm_flags;
125 pgprot_t vm_page_prot;
126
127 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
128 if (vma_wants_writenotify(vma, vm_page_prot)) {
129 vm_flags &= ~VM_SHARED;
130 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
131 }
132
133 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
134 }
135
136
137
138
139 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
140 struct file *file, struct address_space *mapping)
141 {
142 if (vma->vm_flags & VM_DENYWRITE)
143 atomic_inc(&file_inode(file)->i_writecount);
144 if (vma->vm_flags & VM_SHARED)
145 mapping_unmap_writable(mapping);
146
147 flush_dcache_mmap_lock(mapping);
148 vma_interval_tree_remove(vma, &mapping->i_mmap);
149 flush_dcache_mmap_unlock(mapping);
150 }
151
152
153
154
155
156 void unlink_file_vma(struct vm_area_struct *vma)
157 {
158 struct file *file = vma->vm_file;
159
160 if (file) {
161 struct address_space *mapping = file->f_mapping;
162 i_mmap_lock_write(mapping);
163 __remove_shared_vm_struct(vma, file, mapping);
164 i_mmap_unlock_write(mapping);
165 }
166 }
167
168
169
170
171 static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
172 {
173 struct vm_area_struct *next = vma->vm_next;
174
175 might_sleep();
176 if (vma->vm_ops && vma->vm_ops->close)
177 vma->vm_ops->close(vma);
178 if (vma->vm_file)
179 fput(vma->vm_file);
180 mpol_put(vma_policy(vma));
181 vm_area_free(vma);
182 return next;
183 }
184
185 static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
186 struct list_head *uf);
187 SYSCALL_DEFINE1(brk, unsigned long, brk)
188 {
189 unsigned long retval;
190 unsigned long newbrk, oldbrk, origbrk;
191 struct mm_struct *mm = current->mm;
192 struct vm_area_struct *next;
193 unsigned long min_brk;
194 bool populate;
195 bool downgraded = false;
196 LIST_HEAD(uf);
197
198 if (down_write_killable(&mm->mmap_sem))
199 return -EINTR;
200
201 origbrk = mm->brk;
202
203 #ifdef CONFIG_COMPAT_BRK
204
205
206
207
208
209 if (current->brk_randomized)
210 min_brk = mm->start_brk;
211 else
212 min_brk = mm->end_data;
213 #else
214 min_brk = mm->start_brk;
215 #endif
216 if (brk < min_brk)
217 goto out;
218
219
220
221
222
223
224
225 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
226 mm->end_data, mm->start_data))
227 goto out;
228
229 newbrk = PAGE_ALIGN(brk);
230 oldbrk = PAGE_ALIGN(mm->brk);
231 if (oldbrk == newbrk) {
232 mm->brk = brk;
233 goto success;
234 }
235
236
237
238
239
240 if (brk <= mm->brk) {
241 int ret;
242
243
244
245
246
247
248 mm->brk = brk;
249 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
250 if (ret < 0) {
251 mm->brk = origbrk;
252 goto out;
253 } else if (ret == 1) {
254 downgraded = true;
255 }
256 goto success;
257 }
258
259
260 next = find_vma(mm, oldbrk);
261 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
262 goto out;
263
264
265 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
266 goto out;
267 mm->brk = brk;
268
269 success:
270 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
271 if (downgraded)
272 up_read(&mm->mmap_sem);
273 else
274 up_write(&mm->mmap_sem);
275 userfaultfd_unmap_complete(mm, &uf);
276 if (populate)
277 mm_populate(oldbrk, newbrk - oldbrk);
278 return brk;
279
280 out:
281 retval = origbrk;
282 up_write(&mm->mmap_sem);
283 return retval;
284 }
285
286 static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
287 {
288 unsigned long gap, prev_end;
289
290
291
292
293
294
295
296 gap = vm_start_gap(vma);
297 if (vma->vm_prev) {
298 prev_end = vm_end_gap(vma->vm_prev);
299 if (gap > prev_end)
300 gap -= prev_end;
301 else
302 gap = 0;
303 }
304 return gap;
305 }
306
307 #ifdef CONFIG_DEBUG_VM_RB
308 static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
309 {
310 unsigned long max = vma_compute_gap(vma), subtree_gap;
311 if (vma->vm_rb.rb_left) {
312 subtree_gap = rb_entry(vma->vm_rb.rb_left,
313 struct vm_area_struct, vm_rb)->rb_subtree_gap;
314 if (subtree_gap > max)
315 max = subtree_gap;
316 }
317 if (vma->vm_rb.rb_right) {
318 subtree_gap = rb_entry(vma->vm_rb.rb_right,
319 struct vm_area_struct, vm_rb)->rb_subtree_gap;
320 if (subtree_gap > max)
321 max = subtree_gap;
322 }
323 return max;
324 }
325
326 static int browse_rb(struct mm_struct *mm)
327 {
328 struct rb_root *root = &mm->mm_rb;
329 int i = 0, j, bug = 0;
330 struct rb_node *nd, *pn = NULL;
331 unsigned long prev = 0, pend = 0;
332
333 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
334 struct vm_area_struct *vma;
335 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
336 if (vma->vm_start < prev) {
337 pr_emerg("vm_start %lx < prev %lx\n",
338 vma->vm_start, prev);
339 bug = 1;
340 }
341 if (vma->vm_start < pend) {
342 pr_emerg("vm_start %lx < pend %lx\n",
343 vma->vm_start, pend);
344 bug = 1;
345 }
346 if (vma->vm_start > vma->vm_end) {
347 pr_emerg("vm_start %lx > vm_end %lx\n",
348 vma->vm_start, vma->vm_end);
349 bug = 1;
350 }
351 spin_lock(&mm->page_table_lock);
352 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
353 pr_emerg("free gap %lx, correct %lx\n",
354 vma->rb_subtree_gap,
355 vma_compute_subtree_gap(vma));
356 bug = 1;
357 }
358 spin_unlock(&mm->page_table_lock);
359 i++;
360 pn = nd;
361 prev = vma->vm_start;
362 pend = vma->vm_end;
363 }
364 j = 0;
365 for (nd = pn; nd; nd = rb_prev(nd))
366 j++;
367 if (i != j) {
368 pr_emerg("backwards %d, forwards %d\n", j, i);
369 bug = 1;
370 }
371 return bug ? -1 : i;
372 }
373
374 static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
375 {
376 struct rb_node *nd;
377
378 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
379 struct vm_area_struct *vma;
380 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
381 VM_BUG_ON_VMA(vma != ignore &&
382 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
383 vma);
384 }
385 }
386
387 static void validate_mm(struct mm_struct *mm)
388 {
389 int bug = 0;
390 int i = 0;
391 unsigned long highest_address = 0;
392 struct vm_area_struct *vma = mm->mmap;
393
394 while (vma) {
395 struct anon_vma *anon_vma = vma->anon_vma;
396 struct anon_vma_chain *avc;
397
398 if (anon_vma) {
399 anon_vma_lock_read(anon_vma);
400 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
401 anon_vma_interval_tree_verify(avc);
402 anon_vma_unlock_read(anon_vma);
403 }
404
405 highest_address = vm_end_gap(vma);
406 vma = vma->vm_next;
407 i++;
408 }
409 if (i != mm->map_count) {
410 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
411 bug = 1;
412 }
413 if (highest_address != mm->highest_vm_end) {
414 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
415 mm->highest_vm_end, highest_address);
416 bug = 1;
417 }
418 i = browse_rb(mm);
419 if (i != mm->map_count) {
420 if (i != -1)
421 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
422 bug = 1;
423 }
424 VM_BUG_ON_MM(bug, mm);
425 }
426 #else
427 #define validate_mm_rb(root, ignore) do { } while (0)
428 #define validate_mm(mm) do { } while (0)
429 #endif
430
431 RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
432 struct vm_area_struct, vm_rb,
433 unsigned long, rb_subtree_gap, vma_compute_gap)
434
435
436
437
438
439
440 static void vma_gap_update(struct vm_area_struct *vma)
441 {
442
443
444
445
446 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
447 }
448
449 static inline void vma_rb_insert(struct vm_area_struct *vma,
450 struct rb_root *root)
451 {
452
453 validate_mm_rb(root, NULL);
454
455 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
456 }
457
458 static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
459 {
460
461
462
463
464
465 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
466 }
467
468 static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
469 struct rb_root *root,
470 struct vm_area_struct *ignore)
471 {
472
473
474
475
476
477 validate_mm_rb(root, ignore);
478
479 __vma_rb_erase(vma, root);
480 }
481
482 static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
483 struct rb_root *root)
484 {
485
486
487
488
489 validate_mm_rb(root, vma);
490
491 __vma_rb_erase(vma, root);
492 }
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508 static inline void
509 anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
510 {
511 struct anon_vma_chain *avc;
512
513 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
514 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
515 }
516
517 static inline void
518 anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
519 {
520 struct anon_vma_chain *avc;
521
522 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
523 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
524 }
525
526 static int find_vma_links(struct mm_struct *mm, unsigned long addr,
527 unsigned long end, struct vm_area_struct **pprev,
528 struct rb_node ***rb_link, struct rb_node **rb_parent)
529 {
530 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
531
532 __rb_link = &mm->mm_rb.rb_node;
533 rb_prev = __rb_parent = NULL;
534
535 while (*__rb_link) {
536 struct vm_area_struct *vma_tmp;
537
538 __rb_parent = *__rb_link;
539 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
540
541 if (vma_tmp->vm_end > addr) {
542
543 if (vma_tmp->vm_start < end)
544 return -ENOMEM;
545 __rb_link = &__rb_parent->rb_left;
546 } else {
547 rb_prev = __rb_parent;
548 __rb_link = &__rb_parent->rb_right;
549 }
550 }
551
552 *pprev = NULL;
553 if (rb_prev)
554 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
555 *rb_link = __rb_link;
556 *rb_parent = __rb_parent;
557 return 0;
558 }
559
560 static unsigned long count_vma_pages_range(struct mm_struct *mm,
561 unsigned long addr, unsigned long end)
562 {
563 unsigned long nr_pages = 0;
564 struct vm_area_struct *vma;
565
566
567 vma = find_vma_intersection(mm, addr, end);
568 if (!vma)
569 return 0;
570
571 nr_pages = (min(end, vma->vm_end) -
572 max(addr, vma->vm_start)) >> PAGE_SHIFT;
573
574
575 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
576 unsigned long overlap_len;
577
578 if (vma->vm_start > end)
579 break;
580
581 overlap_len = min(end, vma->vm_end) - vma->vm_start;
582 nr_pages += overlap_len >> PAGE_SHIFT;
583 }
584
585 return nr_pages;
586 }
587
588 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
589 struct rb_node **rb_link, struct rb_node *rb_parent)
590 {
591
592 if (vma->vm_next)
593 vma_gap_update(vma->vm_next);
594 else
595 mm->highest_vm_end = vm_end_gap(vma);
596
597
598
599
600
601
602
603
604
605
606 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
607 vma->rb_subtree_gap = 0;
608 vma_gap_update(vma);
609 vma_rb_insert(vma, &mm->mm_rb);
610 }
611
612 static void __vma_link_file(struct vm_area_struct *vma)
613 {
614 struct file *file;
615
616 file = vma->vm_file;
617 if (file) {
618 struct address_space *mapping = file->f_mapping;
619
620 if (vma->vm_flags & VM_DENYWRITE)
621 atomic_dec(&file_inode(file)->i_writecount);
622 if (vma->vm_flags & VM_SHARED)
623 atomic_inc(&mapping->i_mmap_writable);
624
625 flush_dcache_mmap_lock(mapping);
626 vma_interval_tree_insert(vma, &mapping->i_mmap);
627 flush_dcache_mmap_unlock(mapping);
628 }
629 }
630
631 static void
632 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
633 struct vm_area_struct *prev, struct rb_node **rb_link,
634 struct rb_node *rb_parent)
635 {
636 __vma_link_list(mm, vma, prev, rb_parent);
637 __vma_link_rb(mm, vma, rb_link, rb_parent);
638 }
639
640 static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
641 struct vm_area_struct *prev, struct rb_node **rb_link,
642 struct rb_node *rb_parent)
643 {
644 struct address_space *mapping = NULL;
645
646 if (vma->vm_file) {
647 mapping = vma->vm_file->f_mapping;
648 i_mmap_lock_write(mapping);
649 }
650
651 __vma_link(mm, vma, prev, rb_link, rb_parent);
652 __vma_link_file(vma);
653
654 if (mapping)
655 i_mmap_unlock_write(mapping);
656
657 mm->map_count++;
658 validate_mm(mm);
659 }
660
661
662
663
664
665 static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
666 {
667 struct vm_area_struct *prev;
668 struct rb_node **rb_link, *rb_parent;
669
670 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
671 &prev, &rb_link, &rb_parent))
672 BUG();
673 __vma_link(mm, vma, prev, rb_link, rb_parent);
674 mm->map_count++;
675 }
676
677 static __always_inline void __vma_unlink_common(struct mm_struct *mm,
678 struct vm_area_struct *vma,
679 struct vm_area_struct *prev,
680 bool has_prev,
681 struct vm_area_struct *ignore)
682 {
683 struct vm_area_struct *next;
684
685 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
686 next = vma->vm_next;
687 if (has_prev)
688 prev->vm_next = next;
689 else {
690 prev = vma->vm_prev;
691 if (prev)
692 prev->vm_next = next;
693 else
694 mm->mmap = next;
695 }
696 if (next)
697 next->vm_prev = prev;
698
699
700 vmacache_invalidate(mm);
701 }
702
703 static inline void __vma_unlink_prev(struct mm_struct *mm,
704 struct vm_area_struct *vma,
705 struct vm_area_struct *prev)
706 {
707 __vma_unlink_common(mm, vma, prev, true, vma);
708 }
709
710
711
712
713
714
715
716
717 int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
718 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
719 struct vm_area_struct *expand)
720 {
721 struct mm_struct *mm = vma->vm_mm;
722 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
723 struct address_space *mapping = NULL;
724 struct rb_root_cached *root = NULL;
725 struct anon_vma *anon_vma = NULL;
726 struct file *file = vma->vm_file;
727 bool start_changed = false, end_changed = false;
728 long adjust_next = 0;
729 int remove_next = 0;
730
731 if (next && !insert) {
732 struct vm_area_struct *exporter = NULL, *importer = NULL;
733
734 if (end >= next->vm_end) {
735
736
737
738
739
740
741 if (next == expand) {
742
743
744
745
746 VM_WARN_ON(end != next->vm_end);
747
748
749
750
751
752 remove_next = 3;
753 VM_WARN_ON(file != next->vm_file);
754 swap(vma, next);
755 } else {
756 VM_WARN_ON(expand != vma);
757
758
759
760
761 remove_next = 1 + (end > next->vm_end);
762 VM_WARN_ON(remove_next == 2 &&
763 end != next->vm_next->vm_end);
764 VM_WARN_ON(remove_next == 1 &&
765 end != next->vm_end);
766
767 end = next->vm_end;
768 }
769
770 exporter = next;
771 importer = vma;
772
773
774
775
776
777 if (remove_next == 2 && !next->anon_vma)
778 exporter = next->vm_next;
779
780 } else if (end > next->vm_start) {
781
782
783
784
785 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
786 exporter = next;
787 importer = vma;
788 VM_WARN_ON(expand != importer);
789 } else if (end < vma->vm_end) {
790
791
792
793
794
795 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
796 exporter = vma;
797 importer = next;
798 VM_WARN_ON(expand != importer);
799 }
800
801
802
803
804
805
806 if (exporter && exporter->anon_vma && !importer->anon_vma) {
807 int error;
808
809 importer->anon_vma = exporter->anon_vma;
810 error = anon_vma_clone(importer, exporter);
811 if (error)
812 return error;
813 }
814 }
815 again:
816 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
817
818 if (file) {
819 mapping = file->f_mapping;
820 root = &mapping->i_mmap;
821 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
822
823 if (adjust_next)
824 uprobe_munmap(next, next->vm_start, next->vm_end);
825
826 i_mmap_lock_write(mapping);
827 if (insert) {
828
829
830
831
832
833
834 __vma_link_file(insert);
835 }
836 }
837
838 anon_vma = vma->anon_vma;
839 if (!anon_vma && adjust_next)
840 anon_vma = next->anon_vma;
841 if (anon_vma) {
842 VM_WARN_ON(adjust_next && next->anon_vma &&
843 anon_vma != next->anon_vma);
844 anon_vma_lock_write(anon_vma);
845 anon_vma_interval_tree_pre_update_vma(vma);
846 if (adjust_next)
847 anon_vma_interval_tree_pre_update_vma(next);
848 }
849
850 if (root) {
851 flush_dcache_mmap_lock(mapping);
852 vma_interval_tree_remove(vma, root);
853 if (adjust_next)
854 vma_interval_tree_remove(next, root);
855 }
856
857 if (start != vma->vm_start) {
858 vma->vm_start = start;
859 start_changed = true;
860 }
861 if (end != vma->vm_end) {
862 vma->vm_end = end;
863 end_changed = true;
864 }
865 vma->vm_pgoff = pgoff;
866 if (adjust_next) {
867 next->vm_start += adjust_next << PAGE_SHIFT;
868 next->vm_pgoff += adjust_next;
869 }
870
871 if (root) {
872 if (adjust_next)
873 vma_interval_tree_insert(next, root);
874 vma_interval_tree_insert(vma, root);
875 flush_dcache_mmap_unlock(mapping);
876 }
877
878 if (remove_next) {
879
880
881
882
883 if (remove_next != 3)
884 __vma_unlink_prev(mm, next, vma);
885 else
886
887
888
889
890
891
892
893
894
895 __vma_unlink_common(mm, next, NULL, false, vma);
896 if (file)
897 __remove_shared_vm_struct(next, file, mapping);
898 } else if (insert) {
899
900
901
902
903
904 __insert_vm_struct(mm, insert);
905 } else {
906 if (start_changed)
907 vma_gap_update(vma);
908 if (end_changed) {
909 if (!next)
910 mm->highest_vm_end = vm_end_gap(vma);
911 else if (!adjust_next)
912 vma_gap_update(next);
913 }
914 }
915
916 if (anon_vma) {
917 anon_vma_interval_tree_post_update_vma(vma);
918 if (adjust_next)
919 anon_vma_interval_tree_post_update_vma(next);
920 anon_vma_unlock_write(anon_vma);
921 }
922 if (mapping)
923 i_mmap_unlock_write(mapping);
924
925 if (root) {
926 uprobe_mmap(vma);
927
928 if (adjust_next)
929 uprobe_mmap(next);
930 }
931
932 if (remove_next) {
933 if (file) {
934 uprobe_munmap(next, next->vm_start, next->vm_end);
935 fput(file);
936 }
937 if (next->anon_vma)
938 anon_vma_merge(vma, next);
939 mm->map_count--;
940 mpol_put(vma_policy(next));
941 vm_area_free(next);
942
943
944
945
946
947 if (remove_next != 3) {
948
949
950
951
952
953
954 next = vma->vm_next;
955 } else {
956
957
958
959
960
961
962
963
964
965
966 next = vma;
967 }
968 if (remove_next == 2) {
969 remove_next = 1;
970 end = next->vm_end;
971 goto again;
972 }
973 else if (next)
974 vma_gap_update(next);
975 else {
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
996 }
997 }
998 if (insert && file)
999 uprobe_mmap(insert);
1000
1001 validate_mm(mm);
1002
1003 return 0;
1004 }
1005
1006
1007
1008
1009
1010 static inline int is_mergeable_vma(struct vm_area_struct *vma,
1011 struct file *file, unsigned long vm_flags,
1012 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1013 {
1014
1015
1016
1017
1018
1019
1020
1021
1022 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1023 return 0;
1024 if (vma->vm_file != file)
1025 return 0;
1026 if (vma->vm_ops && vma->vm_ops->close)
1027 return 0;
1028 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1029 return 0;
1030 return 1;
1031 }
1032
1033 static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1034 struct anon_vma *anon_vma2,
1035 struct vm_area_struct *vma)
1036 {
1037
1038
1039
1040
1041 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1042 list_is_singular(&vma->anon_vma_chain)))
1043 return 1;
1044 return anon_vma1 == anon_vma2;
1045 }
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058 static int
1059 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1060 struct anon_vma *anon_vma, struct file *file,
1061 pgoff_t vm_pgoff,
1062 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1063 {
1064 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1065 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1066 if (vma->vm_pgoff == vm_pgoff)
1067 return 1;
1068 }
1069 return 0;
1070 }
1071
1072
1073
1074
1075
1076
1077
1078
1079 static int
1080 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1081 struct anon_vma *anon_vma, struct file *file,
1082 pgoff_t vm_pgoff,
1083 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1084 {
1085 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1086 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1087 pgoff_t vm_pglen;
1088 vm_pglen = vma_pages(vma);
1089 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1090 return 1;
1091 }
1092 return 0;
1093 }
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135 struct vm_area_struct *vma_merge(struct mm_struct *mm,
1136 struct vm_area_struct *prev, unsigned long addr,
1137 unsigned long end, unsigned long vm_flags,
1138 struct anon_vma *anon_vma, struct file *file,
1139 pgoff_t pgoff, struct mempolicy *policy,
1140 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1141 {
1142 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1143 struct vm_area_struct *area, *next;
1144 int err;
1145
1146
1147
1148
1149
1150 if (vm_flags & VM_SPECIAL)
1151 return NULL;
1152
1153 if (prev)
1154 next = prev->vm_next;
1155 else
1156 next = mm->mmap;
1157 area = next;
1158 if (area && area->vm_end == end)
1159 next = next->vm_next;
1160
1161
1162 VM_WARN_ON(prev && addr <= prev->vm_start);
1163 VM_WARN_ON(area && end > area->vm_end);
1164 VM_WARN_ON(addr >= end);
1165
1166
1167
1168
1169 if (prev && prev->vm_end == addr &&
1170 mpol_equal(vma_policy(prev), policy) &&
1171 can_vma_merge_after(prev, vm_flags,
1172 anon_vma, file, pgoff,
1173 vm_userfaultfd_ctx)) {
1174
1175
1176
1177 if (next && end == next->vm_start &&
1178 mpol_equal(policy, vma_policy(next)) &&
1179 can_vma_merge_before(next, vm_flags,
1180 anon_vma, file,
1181 pgoff+pglen,
1182 vm_userfaultfd_ctx) &&
1183 is_mergeable_anon_vma(prev->anon_vma,
1184 next->anon_vma, NULL)) {
1185
1186 err = __vma_adjust(prev, prev->vm_start,
1187 next->vm_end, prev->vm_pgoff, NULL,
1188 prev);
1189 } else
1190 err = __vma_adjust(prev, prev->vm_start,
1191 end, prev->vm_pgoff, NULL, prev);
1192 if (err)
1193 return NULL;
1194 khugepaged_enter_vma_merge(prev, vm_flags);
1195 return prev;
1196 }
1197
1198
1199
1200
1201 if (next && end == next->vm_start &&
1202 mpol_equal(policy, vma_policy(next)) &&
1203 can_vma_merge_before(next, vm_flags,
1204 anon_vma, file, pgoff+pglen,
1205 vm_userfaultfd_ctx)) {
1206 if (prev && addr < prev->vm_end)
1207 err = __vma_adjust(prev, prev->vm_start,
1208 addr, prev->vm_pgoff, NULL, next);
1209 else {
1210 err = __vma_adjust(area, addr, next->vm_end,
1211 next->vm_pgoff - pglen, NULL, next);
1212
1213
1214
1215
1216
1217 area = next;
1218 }
1219 if (err)
1220 return NULL;
1221 khugepaged_enter_vma_merge(area, vm_flags);
1222 return area;
1223 }
1224
1225 return NULL;
1226 }
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241 static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1242 {
1243 return a->vm_end == b->vm_start &&
1244 mpol_equal(vma_policy(a), vma_policy(b)) &&
1245 a->vm_file == b->vm_file &&
1246 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1247 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1248 }
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272 static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1273 {
1274 if (anon_vma_compatible(a, b)) {
1275 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1276
1277 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1278 return anon_vma;
1279 }
1280 return NULL;
1281 }
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1292 {
1293 struct anon_vma *anon_vma;
1294 struct vm_area_struct *near;
1295
1296 near = vma->vm_next;
1297 if (!near)
1298 goto try_prev;
1299
1300 anon_vma = reusable_anon_vma(near, vma, near);
1301 if (anon_vma)
1302 return anon_vma;
1303 try_prev:
1304 near = vma->vm_prev;
1305 if (!near)
1306 goto none;
1307
1308 anon_vma = reusable_anon_vma(near, near, vma);
1309 if (anon_vma)
1310 return anon_vma;
1311 none:
1312
1313
1314
1315
1316
1317
1318
1319
1320 return NULL;
1321 }
1322
1323
1324
1325
1326
1327 static inline unsigned long round_hint_to_min(unsigned long hint)
1328 {
1329 hint &= PAGE_MASK;
1330 if (((void *)hint != NULL) &&
1331 (hint < mmap_min_addr))
1332 return PAGE_ALIGN(mmap_min_addr);
1333 return hint;
1334 }
1335
1336 static inline int mlock_future_check(struct mm_struct *mm,
1337 unsigned long flags,
1338 unsigned long len)
1339 {
1340 unsigned long locked, lock_limit;
1341
1342
1343 if (flags & VM_LOCKED) {
1344 locked = len >> PAGE_SHIFT;
1345 locked += mm->locked_vm;
1346 lock_limit = rlimit(RLIMIT_MEMLOCK);
1347 lock_limit >>= PAGE_SHIFT;
1348 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1349 return -EAGAIN;
1350 }
1351 return 0;
1352 }
1353
1354 static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1355 {
1356 if (S_ISREG(inode->i_mode))
1357 return MAX_LFS_FILESIZE;
1358
1359 if (S_ISBLK(inode->i_mode))
1360 return MAX_LFS_FILESIZE;
1361
1362 if (S_ISSOCK(inode->i_mode))
1363 return MAX_LFS_FILESIZE;
1364
1365
1366 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1367 return 0;
1368
1369
1370 return ULONG_MAX;
1371 }
1372
1373 static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1374 unsigned long pgoff, unsigned long len)
1375 {
1376 u64 maxsize = file_mmap_size_max(file, inode);
1377
1378 if (maxsize && len > maxsize)
1379 return false;
1380 maxsize -= len;
1381 if (pgoff > maxsize >> PAGE_SHIFT)
1382 return false;
1383 return true;
1384 }
1385
1386
1387
1388
1389 unsigned long do_mmap(struct file *file, unsigned long addr,
1390 unsigned long len, unsigned long prot,
1391 unsigned long flags, vm_flags_t vm_flags,
1392 unsigned long pgoff, unsigned long *populate,
1393 struct list_head *uf)
1394 {
1395 struct mm_struct *mm = current->mm;
1396 int pkey = 0;
1397
1398 *populate = 0;
1399
1400 if (!len)
1401 return -EINVAL;
1402
1403
1404
1405
1406
1407
1408
1409 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1410 if (!(file && path_noexec(&file->f_path)))
1411 prot |= PROT_EXEC;
1412
1413
1414 if (flags & MAP_FIXED_NOREPLACE)
1415 flags |= MAP_FIXED;
1416
1417 if (!(flags & MAP_FIXED))
1418 addr = round_hint_to_min(addr);
1419
1420
1421 len = PAGE_ALIGN(len);
1422 if (!len)
1423 return -ENOMEM;
1424
1425
1426 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1427 return -EOVERFLOW;
1428
1429
1430 if (mm->map_count > sysctl_max_map_count)
1431 return -ENOMEM;
1432
1433
1434
1435
1436 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1437 if (offset_in_page(addr))
1438 return addr;
1439
1440 if (flags & MAP_FIXED_NOREPLACE) {
1441 struct vm_area_struct *vma = find_vma(mm, addr);
1442
1443 if (vma && vma->vm_start < addr + len)
1444 return -EEXIST;
1445 }
1446
1447 if (prot == PROT_EXEC) {
1448 pkey = execute_only_pkey(mm);
1449 if (pkey < 0)
1450 pkey = 0;
1451 }
1452
1453
1454
1455
1456
1457 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1458 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1459
1460 if (flags & MAP_LOCKED)
1461 if (!can_do_mlock())
1462 return -EPERM;
1463
1464 if (mlock_future_check(mm, vm_flags, len))
1465 return -EAGAIN;
1466
1467 if (file) {
1468 struct inode *inode = file_inode(file);
1469 unsigned long flags_mask;
1470
1471 if (!file_mmap_ok(file, inode, pgoff, len))
1472 return -EOVERFLOW;
1473
1474 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1475
1476 switch (flags & MAP_TYPE) {
1477 case MAP_SHARED:
1478
1479
1480
1481
1482
1483
1484
1485 flags &= LEGACY_MAP_MASK;
1486
1487 case MAP_SHARED_VALIDATE:
1488 if (flags & ~flags_mask)
1489 return -EOPNOTSUPP;
1490 if (prot & PROT_WRITE) {
1491 if (!(file->f_mode & FMODE_WRITE))
1492 return -EACCES;
1493 if (IS_SWAPFILE(file->f_mapping->host))
1494 return -ETXTBSY;
1495 }
1496
1497
1498
1499
1500
1501 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1502 return -EACCES;
1503
1504
1505
1506
1507 if (locks_verify_locked(file))
1508 return -EAGAIN;
1509
1510 vm_flags |= VM_SHARED | VM_MAYSHARE;
1511 if (!(file->f_mode & FMODE_WRITE))
1512 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1513
1514
1515 case MAP_PRIVATE:
1516 if (!(file->f_mode & FMODE_READ))
1517 return -EACCES;
1518 if (path_noexec(&file->f_path)) {
1519 if (vm_flags & VM_EXEC)
1520 return -EPERM;
1521 vm_flags &= ~VM_MAYEXEC;
1522 }
1523
1524 if (!file->f_op->mmap)
1525 return -ENODEV;
1526 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1527 return -EINVAL;
1528 break;
1529
1530 default:
1531 return -EINVAL;
1532 }
1533 } else {
1534 switch (flags & MAP_TYPE) {
1535 case MAP_SHARED:
1536 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1537 return -EINVAL;
1538
1539
1540
1541 pgoff = 0;
1542 vm_flags |= VM_SHARED | VM_MAYSHARE;
1543 break;
1544 case MAP_PRIVATE:
1545
1546
1547
1548 pgoff = addr >> PAGE_SHIFT;
1549 break;
1550 default:
1551 return -EINVAL;
1552 }
1553 }
1554
1555
1556
1557
1558
1559 if (flags & MAP_NORESERVE) {
1560
1561 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1562 vm_flags |= VM_NORESERVE;
1563
1564
1565 if (file && is_file_hugepages(file))
1566 vm_flags |= VM_NORESERVE;
1567 }
1568
1569 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1570 if (!IS_ERR_VALUE(addr) &&
1571 ((vm_flags & VM_LOCKED) ||
1572 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1573 *populate = len;
1574 return addr;
1575 }
1576
1577 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1578 unsigned long prot, unsigned long flags,
1579 unsigned long fd, unsigned long pgoff)
1580 {
1581 struct file *file = NULL;
1582 unsigned long retval;
1583
1584 if (!(flags & MAP_ANONYMOUS)) {
1585 audit_mmap_fd(fd, flags);
1586 file = fget(fd);
1587 if (!file)
1588 return -EBADF;
1589 if (is_file_hugepages(file))
1590 len = ALIGN(len, huge_page_size(hstate_file(file)));
1591 retval = -EINVAL;
1592 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1593 goto out_fput;
1594 } else if (flags & MAP_HUGETLB) {
1595 struct user_struct *user = NULL;
1596 struct hstate *hs;
1597
1598 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1599 if (!hs)
1600 return -EINVAL;
1601
1602 len = ALIGN(len, huge_page_size(hs));
1603
1604
1605
1606
1607
1608
1609 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1610 VM_NORESERVE,
1611 &user, HUGETLB_ANONHUGE_INODE,
1612 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1613 if (IS_ERR(file))
1614 return PTR_ERR(file);
1615 }
1616
1617 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1618
1619 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1620 out_fput:
1621 if (file)
1622 fput(file);
1623 return retval;
1624 }
1625
1626 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1627 unsigned long, prot, unsigned long, flags,
1628 unsigned long, fd, unsigned long, pgoff)
1629 {
1630 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1631 }
1632
1633 #ifdef __ARCH_WANT_SYS_OLD_MMAP
1634 struct mmap_arg_struct {
1635 unsigned long addr;
1636 unsigned long len;
1637 unsigned long prot;
1638 unsigned long flags;
1639 unsigned long fd;
1640 unsigned long offset;
1641 };
1642
1643 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1644 {
1645 struct mmap_arg_struct a;
1646
1647 if (copy_from_user(&a, arg, sizeof(a)))
1648 return -EFAULT;
1649 if (offset_in_page(a.offset))
1650 return -EINVAL;
1651
1652 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1653 a.offset >> PAGE_SHIFT);
1654 }
1655 #endif
1656
1657
1658
1659
1660
1661
1662
1663 int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1664 {
1665 vm_flags_t vm_flags = vma->vm_flags;
1666 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1667
1668
1669 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1670 return 0;
1671
1672
1673 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1674 return 1;
1675
1676
1677
1678 if (pgprot_val(vm_page_prot) !=
1679 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1680 return 0;
1681
1682
1683 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1684 return 1;
1685
1686
1687 if (vm_flags & VM_PFNMAP)
1688 return 0;
1689
1690
1691 return vma->vm_file && vma->vm_file->f_mapping &&
1692 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1693 }
1694
1695
1696
1697
1698
1699 static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1700 {
1701
1702
1703
1704
1705 if (file && is_file_hugepages(file))
1706 return 0;
1707
1708 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1709 }
1710
1711 unsigned long mmap_region(struct file *file, unsigned long addr,
1712 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1713 struct list_head *uf)
1714 {
1715 struct mm_struct *mm = current->mm;
1716 struct vm_area_struct *vma, *prev;
1717 int error;
1718 struct rb_node **rb_link, *rb_parent;
1719 unsigned long charged = 0;
1720
1721
1722 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1723 unsigned long nr_pages;
1724
1725
1726
1727
1728
1729 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1730
1731 if (!may_expand_vm(mm, vm_flags,
1732 (len >> PAGE_SHIFT) - nr_pages))
1733 return -ENOMEM;
1734 }
1735
1736
1737 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1738 &rb_parent)) {
1739 if (do_munmap(mm, addr, len, uf))
1740 return -ENOMEM;
1741 }
1742
1743
1744
1745
1746 if (accountable_mapping(file, vm_flags)) {
1747 charged = len >> PAGE_SHIFT;
1748 if (security_vm_enough_memory_mm(mm, charged))
1749 return -ENOMEM;
1750 vm_flags |= VM_ACCOUNT;
1751 }
1752
1753
1754
1755
1756 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1757 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1758 if (vma)
1759 goto out;
1760
1761
1762
1763
1764
1765
1766 vma = vm_area_alloc(mm);
1767 if (!vma) {
1768 error = -ENOMEM;
1769 goto unacct_error;
1770 }
1771
1772 vma->vm_start = addr;
1773 vma->vm_end = addr + len;
1774 vma->vm_flags = vm_flags;
1775 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1776 vma->vm_pgoff = pgoff;
1777
1778 if (file) {
1779 if (vm_flags & VM_DENYWRITE) {
1780 error = deny_write_access(file);
1781 if (error)
1782 goto free_vma;
1783 }
1784 if (vm_flags & VM_SHARED) {
1785 error = mapping_map_writable(file->f_mapping);
1786 if (error)
1787 goto allow_write_and_free_vma;
1788 }
1789
1790
1791
1792
1793
1794
1795 vma->vm_file = get_file(file);
1796 error = call_mmap(file, vma);
1797 if (error)
1798 goto unmap_and_free_vma;
1799
1800
1801
1802
1803
1804
1805
1806
1807 WARN_ON_ONCE(addr != vma->vm_start);
1808
1809 addr = vma->vm_start;
1810 vm_flags = vma->vm_flags;
1811 } else if (vm_flags & VM_SHARED) {
1812 error = shmem_zero_setup(vma);
1813 if (error)
1814 goto free_vma;
1815 } else {
1816 vma_set_anonymous(vma);
1817 }
1818
1819 vma_link(mm, vma, prev, rb_link, rb_parent);
1820
1821 if (file) {
1822 if (vm_flags & VM_SHARED)
1823 mapping_unmap_writable(file->f_mapping);
1824 if (vm_flags & VM_DENYWRITE)
1825 allow_write_access(file);
1826 }
1827 file = vma->vm_file;
1828 out:
1829 perf_event_mmap(vma);
1830
1831 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1832 if (vm_flags & VM_LOCKED) {
1833 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1834 is_vm_hugetlb_page(vma) ||
1835 vma == get_gate_vma(current->mm))
1836 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1837 else
1838 mm->locked_vm += (len >> PAGE_SHIFT);
1839 }
1840
1841 if (file)
1842 uprobe_mmap(vma);
1843
1844
1845
1846
1847
1848
1849
1850
1851 vma->vm_flags |= VM_SOFTDIRTY;
1852
1853 vma_set_page_prot(vma);
1854
1855 return addr;
1856
1857 unmap_and_free_vma:
1858 vma->vm_file = NULL;
1859 fput(file);
1860
1861
1862 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1863 charged = 0;
1864 if (vm_flags & VM_SHARED)
1865 mapping_unmap_writable(file->f_mapping);
1866 allow_write_and_free_vma:
1867 if (vm_flags & VM_DENYWRITE)
1868 allow_write_access(file);
1869 free_vma:
1870 vm_area_free(vma);
1871 unacct_error:
1872 if (charged)
1873 vm_unacct_memory(charged);
1874 return error;
1875 }
1876
1877 unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1878 {
1879
1880
1881
1882
1883
1884
1885
1886
1887 struct mm_struct *mm = current->mm;
1888 struct vm_area_struct *vma;
1889 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1890
1891
1892 length = info->length + info->align_mask;
1893 if (length < info->length)
1894 return -ENOMEM;
1895
1896
1897 if (info->high_limit < length)
1898 return -ENOMEM;
1899 high_limit = info->high_limit - length;
1900
1901 if (info->low_limit > high_limit)
1902 return -ENOMEM;
1903 low_limit = info->low_limit + length;
1904
1905
1906 if (RB_EMPTY_ROOT(&mm->mm_rb))
1907 goto check_highest;
1908 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1909 if (vma->rb_subtree_gap < length)
1910 goto check_highest;
1911
1912 while (true) {
1913
1914 gap_end = vm_start_gap(vma);
1915 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1916 struct vm_area_struct *left =
1917 rb_entry(vma->vm_rb.rb_left,
1918 struct vm_area_struct, vm_rb);
1919 if (left->rb_subtree_gap >= length) {
1920 vma = left;
1921 continue;
1922 }
1923 }
1924
1925 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1926 check_current:
1927
1928 if (gap_start > high_limit)
1929 return -ENOMEM;
1930 if (gap_end >= low_limit &&
1931 gap_end > gap_start && gap_end - gap_start >= length)
1932 goto found;
1933
1934
1935 if (vma->vm_rb.rb_right) {
1936 struct vm_area_struct *right =
1937 rb_entry(vma->vm_rb.rb_right,
1938 struct vm_area_struct, vm_rb);
1939 if (right->rb_subtree_gap >= length) {
1940 vma = right;
1941 continue;
1942 }
1943 }
1944
1945
1946 while (true) {
1947 struct rb_node *prev = &vma->vm_rb;
1948 if (!rb_parent(prev))
1949 goto check_highest;
1950 vma = rb_entry(rb_parent(prev),
1951 struct vm_area_struct, vm_rb);
1952 if (prev == vma->vm_rb.rb_left) {
1953 gap_start = vm_end_gap(vma->vm_prev);
1954 gap_end = vm_start_gap(vma);
1955 goto check_current;
1956 }
1957 }
1958 }
1959
1960 check_highest:
1961
1962 gap_start = mm->highest_vm_end;
1963 gap_end = ULONG_MAX;
1964 if (gap_start > high_limit)
1965 return -ENOMEM;
1966
1967 found:
1968
1969 if (gap_start < info->low_limit)
1970 gap_start = info->low_limit;
1971
1972
1973 gap_start += (info->align_offset - gap_start) & info->align_mask;
1974
1975 VM_BUG_ON(gap_start + info->length > info->high_limit);
1976 VM_BUG_ON(gap_start + info->length > gap_end);
1977 return gap_start;
1978 }
1979
1980 unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1981 {
1982 struct mm_struct *mm = current->mm;
1983 struct vm_area_struct *vma;
1984 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1985
1986
1987 length = info->length + info->align_mask;
1988 if (length < info->length)
1989 return -ENOMEM;
1990
1991
1992
1993
1994
1995 gap_end = info->high_limit;
1996 if (gap_end < length)
1997 return -ENOMEM;
1998 high_limit = gap_end - length;
1999
2000 if (info->low_limit > high_limit)
2001 return -ENOMEM;
2002 low_limit = info->low_limit + length;
2003
2004
2005 gap_start = mm->highest_vm_end;
2006 if (gap_start <= high_limit)
2007 goto found_highest;
2008
2009
2010 if (RB_EMPTY_ROOT(&mm->mm_rb))
2011 return -ENOMEM;
2012 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2013 if (vma->rb_subtree_gap < length)
2014 return -ENOMEM;
2015
2016 while (true) {
2017
2018 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2019 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2020 struct vm_area_struct *right =
2021 rb_entry(vma->vm_rb.rb_right,
2022 struct vm_area_struct, vm_rb);
2023 if (right->rb_subtree_gap >= length) {
2024 vma = right;
2025 continue;
2026 }
2027 }
2028
2029 check_current:
2030
2031 gap_end = vm_start_gap(vma);
2032 if (gap_end < low_limit)
2033 return -ENOMEM;
2034 if (gap_start <= high_limit &&
2035 gap_end > gap_start && gap_end - gap_start >= length)
2036 goto found;
2037
2038
2039 if (vma->vm_rb.rb_left) {
2040 struct vm_area_struct *left =
2041 rb_entry(vma->vm_rb.rb_left,
2042 struct vm_area_struct, vm_rb);
2043 if (left->rb_subtree_gap >= length) {
2044 vma = left;
2045 continue;
2046 }
2047 }
2048
2049
2050 while (true) {
2051 struct rb_node *prev = &vma->vm_rb;
2052 if (!rb_parent(prev))
2053 return -ENOMEM;
2054 vma = rb_entry(rb_parent(prev),
2055 struct vm_area_struct, vm_rb);
2056 if (prev == vma->vm_rb.rb_right) {
2057 gap_start = vma->vm_prev ?
2058 vm_end_gap(vma->vm_prev) : 0;
2059 goto check_current;
2060 }
2061 }
2062 }
2063
2064 found:
2065
2066 if (gap_end > info->high_limit)
2067 gap_end = info->high_limit;
2068
2069 found_highest:
2070
2071 gap_end -= info->length;
2072 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2073
2074 VM_BUG_ON(gap_end < info->low_limit);
2075 VM_BUG_ON(gap_end < gap_start);
2076 return gap_end;
2077 }
2078
2079
2080 #ifndef arch_get_mmap_end
2081 #define arch_get_mmap_end(addr) (TASK_SIZE)
2082 #endif
2083
2084 #ifndef arch_get_mmap_base
2085 #define arch_get_mmap_base(addr, base) (base)
2086 #endif
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099 #ifndef HAVE_ARCH_UNMAPPED_AREA
2100 unsigned long
2101 arch_get_unmapped_area(struct file *filp, unsigned long addr,
2102 unsigned long len, unsigned long pgoff, unsigned long flags)
2103 {
2104 struct mm_struct *mm = current->mm;
2105 struct vm_area_struct *vma, *prev;
2106 struct vm_unmapped_area_info info;
2107 const unsigned long mmap_end = arch_get_mmap_end(addr);
2108
2109 if (len > mmap_end - mmap_min_addr)
2110 return -ENOMEM;
2111
2112 if (flags & MAP_FIXED)
2113 return addr;
2114
2115 if (addr) {
2116 addr = PAGE_ALIGN(addr);
2117 vma = find_vma_prev(mm, addr, &prev);
2118 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2119 (!vma || addr + len <= vm_start_gap(vma)) &&
2120 (!prev || addr >= vm_end_gap(prev)))
2121 return addr;
2122 }
2123
2124 info.flags = 0;
2125 info.length = len;
2126 info.low_limit = mm->mmap_base;
2127 info.high_limit = mmap_end;
2128 info.align_mask = 0;
2129 return vm_unmapped_area(&info);
2130 }
2131 #endif
2132
2133
2134
2135
2136
2137 #ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2138 unsigned long
2139 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2140 unsigned long len, unsigned long pgoff,
2141 unsigned long flags)
2142 {
2143 struct vm_area_struct *vma, *prev;
2144 struct mm_struct *mm = current->mm;
2145 struct vm_unmapped_area_info info;
2146 const unsigned long mmap_end = arch_get_mmap_end(addr);
2147
2148
2149 if (len > mmap_end - mmap_min_addr)
2150 return -ENOMEM;
2151
2152 if (flags & MAP_FIXED)
2153 return addr;
2154
2155
2156 if (addr) {
2157 addr = PAGE_ALIGN(addr);
2158 vma = find_vma_prev(mm, addr, &prev);
2159 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2160 (!vma || addr + len <= vm_start_gap(vma)) &&
2161 (!prev || addr >= vm_end_gap(prev)))
2162 return addr;
2163 }
2164
2165 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2166 info.length = len;
2167 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2168 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2169 info.align_mask = 0;
2170 addr = vm_unmapped_area(&info);
2171
2172
2173
2174
2175
2176
2177
2178 if (offset_in_page(addr)) {
2179 VM_BUG_ON(addr != -ENOMEM);
2180 info.flags = 0;
2181 info.low_limit = TASK_UNMAPPED_BASE;
2182 info.high_limit = mmap_end;
2183 addr = vm_unmapped_area(&info);
2184 }
2185
2186 return addr;
2187 }
2188 #endif
2189
2190 unsigned long
2191 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2192 unsigned long pgoff, unsigned long flags)
2193 {
2194 unsigned long (*get_area)(struct file *, unsigned long,
2195 unsigned long, unsigned long, unsigned long);
2196
2197 unsigned long error = arch_mmap_check(addr, len, flags);
2198 if (error)
2199 return error;
2200
2201
2202 if (len > TASK_SIZE)
2203 return -ENOMEM;
2204
2205 get_area = current->mm->get_unmapped_area;
2206 if (file) {
2207 if (file->f_op->get_unmapped_area)
2208 get_area = file->f_op->get_unmapped_area;
2209 } else if (flags & MAP_SHARED) {
2210
2211
2212
2213
2214
2215 pgoff = 0;
2216 get_area = shmem_get_unmapped_area;
2217 }
2218
2219 addr = get_area(file, addr, len, pgoff, flags);
2220 if (IS_ERR_VALUE(addr))
2221 return addr;
2222
2223 if (addr > TASK_SIZE - len)
2224 return -ENOMEM;
2225 if (offset_in_page(addr))
2226 return -EINVAL;
2227
2228 error = security_mmap_addr(addr);
2229 return error ? error : addr;
2230 }
2231
2232 EXPORT_SYMBOL(get_unmapped_area);
2233
2234
2235 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2236 {
2237 struct rb_node *rb_node;
2238 struct vm_area_struct *vma;
2239
2240
2241 vma = vmacache_find(mm, addr);
2242 if (likely(vma))
2243 return vma;
2244
2245 rb_node = mm->mm_rb.rb_node;
2246
2247 while (rb_node) {
2248 struct vm_area_struct *tmp;
2249
2250 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2251
2252 if (tmp->vm_end > addr) {
2253 vma = tmp;
2254 if (tmp->vm_start <= addr)
2255 break;
2256 rb_node = rb_node->rb_left;
2257 } else
2258 rb_node = rb_node->rb_right;
2259 }
2260
2261 if (vma)
2262 vmacache_update(addr, vma);
2263 return vma;
2264 }
2265
2266 EXPORT_SYMBOL(find_vma);
2267
2268
2269
2270
2271 struct vm_area_struct *
2272 find_vma_prev(struct mm_struct *mm, unsigned long addr,
2273 struct vm_area_struct **pprev)
2274 {
2275 struct vm_area_struct *vma;
2276
2277 vma = find_vma(mm, addr);
2278 if (vma) {
2279 *pprev = vma->vm_prev;
2280 } else {
2281 struct rb_node *rb_node = rb_last(&mm->mm_rb);
2282
2283 *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
2284 }
2285 return vma;
2286 }
2287
2288
2289
2290
2291
2292
2293 static int acct_stack_growth(struct vm_area_struct *vma,
2294 unsigned long size, unsigned long grow)
2295 {
2296 struct mm_struct *mm = vma->vm_mm;
2297 unsigned long new_start;
2298
2299
2300 if (!may_expand_vm(mm, vma->vm_flags, grow))
2301 return -ENOMEM;
2302
2303
2304 if (size > rlimit(RLIMIT_STACK))
2305 return -ENOMEM;
2306
2307
2308 if (vma->vm_flags & VM_LOCKED) {
2309 unsigned long locked;
2310 unsigned long limit;
2311 locked = mm->locked_vm + grow;
2312 limit = rlimit(RLIMIT_MEMLOCK);
2313 limit >>= PAGE_SHIFT;
2314 if (locked > limit && !capable(CAP_IPC_LOCK))
2315 return -ENOMEM;
2316 }
2317
2318
2319 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2320 vma->vm_end - size;
2321 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2322 return -EFAULT;
2323
2324
2325
2326
2327
2328 if (security_vm_enough_memory_mm(mm, grow))
2329 return -ENOMEM;
2330
2331 return 0;
2332 }
2333
2334 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2335
2336
2337
2338
2339 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2340 {
2341 struct mm_struct *mm = vma->vm_mm;
2342 struct vm_area_struct *next;
2343 unsigned long gap_addr;
2344 int error = 0;
2345
2346 if (!(vma->vm_flags & VM_GROWSUP))
2347 return -EFAULT;
2348
2349
2350 address &= PAGE_MASK;
2351 if (address >= (TASK_SIZE & PAGE_MASK))
2352 return -ENOMEM;
2353 address += PAGE_SIZE;
2354
2355
2356 gap_addr = address + stack_guard_gap;
2357
2358
2359 if (gap_addr < address || gap_addr > TASK_SIZE)
2360 gap_addr = TASK_SIZE;
2361
2362 next = vma->vm_next;
2363 if (next && next->vm_start < gap_addr &&
2364 (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2365 if (!(next->vm_flags & VM_GROWSUP))
2366 return -ENOMEM;
2367
2368 }
2369
2370
2371 if (unlikely(anon_vma_prepare(vma)))
2372 return -ENOMEM;
2373
2374
2375
2376
2377
2378
2379 anon_vma_lock_write(vma->anon_vma);
2380
2381
2382 if (address > vma->vm_end) {
2383 unsigned long size, grow;
2384
2385 size = address - vma->vm_start;
2386 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2387
2388 error = -ENOMEM;
2389 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2390 error = acct_stack_growth(vma, size, grow);
2391 if (!error) {
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403 spin_lock(&mm->page_table_lock);
2404 if (vma->vm_flags & VM_LOCKED)
2405 mm->locked_vm += grow;
2406 vm_stat_account(mm, vma->vm_flags, grow);
2407 anon_vma_interval_tree_pre_update_vma(vma);
2408 vma->vm_end = address;
2409 anon_vma_interval_tree_post_update_vma(vma);
2410 if (vma->vm_next)
2411 vma_gap_update(vma->vm_next);
2412 else
2413 mm->highest_vm_end = vm_end_gap(vma);
2414 spin_unlock(&mm->page_table_lock);
2415
2416 perf_event_mmap(vma);
2417 }
2418 }
2419 }
2420 anon_vma_unlock_write(vma->anon_vma);
2421 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2422 validate_mm(mm);
2423 return error;
2424 }
2425 #endif
2426
2427
2428
2429
2430 int expand_downwards(struct vm_area_struct *vma,
2431 unsigned long address)
2432 {
2433 struct mm_struct *mm = vma->vm_mm;
2434 struct vm_area_struct *prev;
2435 int error = 0;
2436
2437 address &= PAGE_MASK;
2438 if (address < mmap_min_addr)
2439 return -EPERM;
2440
2441
2442 prev = vma->vm_prev;
2443
2444 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2445 (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2446 if (address - prev->vm_end < stack_guard_gap)
2447 return -ENOMEM;
2448 }
2449
2450
2451 if (unlikely(anon_vma_prepare(vma)))
2452 return -ENOMEM;
2453
2454
2455
2456
2457
2458
2459 anon_vma_lock_write(vma->anon_vma);
2460
2461
2462 if (address < vma->vm_start) {
2463 unsigned long size, grow;
2464
2465 size = vma->vm_end - address;
2466 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2467
2468 error = -ENOMEM;
2469 if (grow <= vma->vm_pgoff) {
2470 error = acct_stack_growth(vma, size, grow);
2471 if (!error) {
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483 spin_lock(&mm->page_table_lock);
2484 if (vma->vm_flags & VM_LOCKED)
2485 mm->locked_vm += grow;
2486 vm_stat_account(mm, vma->vm_flags, grow);
2487 anon_vma_interval_tree_pre_update_vma(vma);
2488 vma->vm_start = address;
2489 vma->vm_pgoff -= grow;
2490 anon_vma_interval_tree_post_update_vma(vma);
2491 vma_gap_update(vma);
2492 spin_unlock(&mm->page_table_lock);
2493
2494 perf_event_mmap(vma);
2495 }
2496 }
2497 }
2498 anon_vma_unlock_write(vma->anon_vma);
2499 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2500 validate_mm(mm);
2501 return error;
2502 }
2503
2504
2505 unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2506
2507 static int __init cmdline_parse_stack_guard_gap(char *p)
2508 {
2509 unsigned long val;
2510 char *endptr;
2511
2512 val = simple_strtoul(p, &endptr, 10);
2513 if (!*endptr)
2514 stack_guard_gap = val << PAGE_SHIFT;
2515
2516 return 0;
2517 }
2518 __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2519
2520 #ifdef CONFIG_STACK_GROWSUP
2521 int expand_stack(struct vm_area_struct *vma, unsigned long address)
2522 {
2523 return expand_upwards(vma, address);
2524 }
2525
2526 struct vm_area_struct *
2527 find_extend_vma(struct mm_struct *mm, unsigned long addr)
2528 {
2529 struct vm_area_struct *vma, *prev;
2530
2531 addr &= PAGE_MASK;
2532 vma = find_vma_prev(mm, addr, &prev);
2533 if (vma && (vma->vm_start <= addr))
2534 return vma;
2535
2536 if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
2537 return NULL;
2538 if (prev->vm_flags & VM_LOCKED)
2539 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2540 return prev;
2541 }
2542 #else
2543 int expand_stack(struct vm_area_struct *vma, unsigned long address)
2544 {
2545 return expand_downwards(vma, address);
2546 }
2547
2548 struct vm_area_struct *
2549 find_extend_vma(struct mm_struct *mm, unsigned long addr)
2550 {
2551 struct vm_area_struct *vma;
2552 unsigned long start;
2553
2554 addr &= PAGE_MASK;
2555 vma = find_vma(mm, addr);
2556 if (!vma)
2557 return NULL;
2558 if (vma->vm_start <= addr)
2559 return vma;
2560 if (!(vma->vm_flags & VM_GROWSDOWN))
2561 return NULL;
2562
2563 if (!mmget_still_valid(mm))
2564 return NULL;
2565 start = vma->vm_start;
2566 if (expand_stack(vma, addr))
2567 return NULL;
2568 if (vma->vm_flags & VM_LOCKED)
2569 populate_vma_page_range(vma, addr, start, NULL);
2570 return vma;
2571 }
2572 #endif
2573
2574 EXPORT_SYMBOL_GPL(find_extend_vma);
2575
2576
2577
2578
2579
2580
2581
2582 static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2583 {
2584 unsigned long nr_accounted = 0;
2585
2586
2587 update_hiwater_vm(mm);
2588 do {
2589 long nrpages = vma_pages(vma);
2590
2591 if (vma->vm_flags & VM_ACCOUNT)
2592 nr_accounted += nrpages;
2593 vm_stat_account(mm, vma->vm_flags, -nrpages);
2594 vma = remove_vma(vma);
2595 } while (vma);
2596 vm_unacct_memory(nr_accounted);
2597 validate_mm(mm);
2598 }
2599
2600
2601
2602
2603
2604
2605 static void unmap_region(struct mm_struct *mm,
2606 struct vm_area_struct *vma, struct vm_area_struct *prev,
2607 unsigned long start, unsigned long end)
2608 {
2609 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2610 struct mmu_gather tlb;
2611
2612 lru_add_drain();
2613 tlb_gather_mmu(&tlb, mm, start, end);
2614 update_hiwater_rss(mm);
2615 unmap_vmas(&tlb, vma, start, end);
2616 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2617 next ? next->vm_start : USER_PGTABLES_CEILING);
2618 tlb_finish_mmu(&tlb, start, end);
2619 }
2620
2621
2622
2623
2624
2625 static void
2626 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2627 struct vm_area_struct *prev, unsigned long end)
2628 {
2629 struct vm_area_struct **insertion_point;
2630 struct vm_area_struct *tail_vma = NULL;
2631
2632 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2633 vma->vm_prev = NULL;
2634 do {
2635 vma_rb_erase(vma, &mm->mm_rb);
2636 mm->map_count--;
2637 tail_vma = vma;
2638 vma = vma->vm_next;
2639 } while (vma && vma->vm_start < end);
2640 *insertion_point = vma;
2641 if (vma) {
2642 vma->vm_prev = prev;
2643 vma_gap_update(vma);
2644 } else
2645 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2646 tail_vma->vm_next = NULL;
2647
2648
2649 vmacache_invalidate(mm);
2650 }
2651
2652
2653
2654
2655
2656 int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2657 unsigned long addr, int new_below)
2658 {
2659 struct vm_area_struct *new;
2660 int err;
2661
2662 if (vma->vm_ops && vma->vm_ops->split) {
2663 err = vma->vm_ops->split(vma, addr);
2664 if (err)
2665 return err;
2666 }
2667
2668 new = vm_area_dup(vma);
2669 if (!new)
2670 return -ENOMEM;
2671
2672 if (new_below)
2673 new->vm_end = addr;
2674 else {
2675 new->vm_start = addr;
2676 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2677 }
2678
2679 err = vma_dup_policy(vma, new);
2680 if (err)
2681 goto out_free_vma;
2682
2683 err = anon_vma_clone(new, vma);
2684 if (err)
2685 goto out_free_mpol;
2686
2687 if (new->vm_file)
2688 get_file(new->vm_file);
2689
2690 if (new->vm_ops && new->vm_ops->open)
2691 new->vm_ops->open(new);
2692
2693 if (new_below)
2694 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2695 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2696 else
2697 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2698
2699
2700 if (!err)
2701 return 0;
2702
2703
2704 if (new->vm_ops && new->vm_ops->close)
2705 new->vm_ops->close(new);
2706 if (new->vm_file)
2707 fput(new->vm_file);
2708 unlink_anon_vmas(new);
2709 out_free_mpol:
2710 mpol_put(vma_policy(new));
2711 out_free_vma:
2712 vm_area_free(new);
2713 return err;
2714 }
2715
2716
2717
2718
2719
2720 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2721 unsigned long addr, int new_below)
2722 {
2723 if (mm->map_count >= sysctl_max_map_count)
2724 return -ENOMEM;
2725
2726 return __split_vma(mm, vma, addr, new_below);
2727 }
2728
2729
2730
2731
2732
2733
2734 int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2735 struct list_head *uf, bool downgrade)
2736 {
2737 unsigned long end;
2738 struct vm_area_struct *vma, *prev, *last;
2739
2740 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2741 return -EINVAL;
2742
2743 len = PAGE_ALIGN(len);
2744 end = start + len;
2745 if (len == 0)
2746 return -EINVAL;
2747
2748
2749
2750
2751
2752
2753 arch_unmap(mm, start, end);
2754
2755
2756 vma = find_vma(mm, start);
2757 if (!vma)
2758 return 0;
2759 prev = vma->vm_prev;
2760
2761
2762
2763 if (vma->vm_start >= end)
2764 return 0;
2765
2766
2767
2768
2769
2770
2771
2772
2773 if (start > vma->vm_start) {
2774 int error;
2775
2776
2777
2778
2779
2780
2781 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2782 return -ENOMEM;
2783
2784 error = __split_vma(mm, vma, start, 0);
2785 if (error)
2786 return error;
2787 prev = vma;
2788 }
2789
2790
2791 last = find_vma(mm, end);
2792 if (last && end > last->vm_start) {
2793 int error = __split_vma(mm, last, end, 1);
2794 if (error)
2795 return error;
2796 }
2797 vma = prev ? prev->vm_next : mm->mmap;
2798
2799 if (unlikely(uf)) {
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2810 if (error)
2811 return error;
2812 }
2813
2814
2815
2816
2817 if (mm->locked_vm) {
2818 struct vm_area_struct *tmp = vma;
2819 while (tmp && tmp->vm_start < end) {
2820 if (tmp->vm_flags & VM_LOCKED) {
2821 mm->locked_vm -= vma_pages(tmp);
2822 munlock_vma_pages_all(tmp);
2823 }
2824
2825 tmp = tmp->vm_next;
2826 }
2827 }
2828
2829
2830 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2831
2832 if (downgrade)
2833 downgrade_write(&mm->mmap_sem);
2834
2835 unmap_region(mm, vma, prev, start, end);
2836
2837
2838 remove_vma_list(mm, vma);
2839
2840 return downgrade ? 1 : 0;
2841 }
2842
2843 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2844 struct list_head *uf)
2845 {
2846 return __do_munmap(mm, start, len, uf, false);
2847 }
2848
2849 static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2850 {
2851 int ret;
2852 struct mm_struct *mm = current->mm;
2853 LIST_HEAD(uf);
2854
2855 if (down_write_killable(&mm->mmap_sem))
2856 return -EINTR;
2857
2858 ret = __do_munmap(mm, start, len, &uf, downgrade);
2859
2860
2861
2862
2863
2864 if (ret == 1) {
2865 up_read(&mm->mmap_sem);
2866 ret = 0;
2867 } else
2868 up_write(&mm->mmap_sem);
2869
2870 userfaultfd_unmap_complete(mm, &uf);
2871 return ret;
2872 }
2873
2874 int vm_munmap(unsigned long start, size_t len)
2875 {
2876 return __vm_munmap(start, len, false);
2877 }
2878 EXPORT_SYMBOL(vm_munmap);
2879
2880 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2881 {
2882 addr = untagged_addr(addr);
2883 profile_munmap(addr);
2884 return __vm_munmap(addr, len, true);
2885 }
2886
2887
2888
2889
2890
2891 SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2892 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2893 {
2894
2895 struct mm_struct *mm = current->mm;
2896 struct vm_area_struct *vma;
2897 unsigned long populate = 0;
2898 unsigned long ret = -EINVAL;
2899 struct file *file;
2900
2901 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2902 current->comm, current->pid);
2903
2904 if (prot)
2905 return ret;
2906 start = start & PAGE_MASK;
2907 size = size & PAGE_MASK;
2908
2909 if (start + size <= start)
2910 return ret;
2911
2912
2913 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2914 return ret;
2915
2916 if (down_write_killable(&mm->mmap_sem))
2917 return -EINTR;
2918
2919 vma = find_vma(mm, start);
2920
2921 if (!vma || !(vma->vm_flags & VM_SHARED))
2922 goto out;
2923
2924 if (start < vma->vm_start)
2925 goto out;
2926
2927 if (start + size > vma->vm_end) {
2928 struct vm_area_struct *next;
2929
2930 for (next = vma->vm_next; next; next = next->vm_next) {
2931
2932 if (next->vm_start != next->vm_prev->vm_end)
2933 goto out;
2934
2935 if (next->vm_file != vma->vm_file)
2936 goto out;
2937
2938 if (next->vm_flags != vma->vm_flags)
2939 goto out;
2940
2941 if (start + size <= next->vm_end)
2942 break;
2943 }
2944
2945 if (!next)
2946 goto out;
2947 }
2948
2949 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2950 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2951 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2952
2953 flags &= MAP_NONBLOCK;
2954 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2955 if (vma->vm_flags & VM_LOCKED) {
2956 struct vm_area_struct *tmp;
2957 flags |= MAP_LOCKED;
2958
2959
2960 for (tmp = vma; tmp->vm_start >= start + size;
2961 tmp = tmp->vm_next) {
2962
2963
2964
2965
2966 vma_adjust_trans_huge(tmp, start, start + size, 0);
2967
2968 munlock_vma_pages_range(tmp,
2969 max(tmp->vm_start, start),
2970 min(tmp->vm_end, start + size));
2971 }
2972 }
2973
2974 file = get_file(vma->vm_file);
2975 ret = do_mmap_pgoff(vma->vm_file, start, size,
2976 prot, flags, pgoff, &populate, NULL);
2977 fput(file);
2978 out:
2979 up_write(&mm->mmap_sem);
2980 if (populate)
2981 mm_populate(ret, populate);
2982 if (!IS_ERR_VALUE(ret))
2983 ret = 0;
2984 return ret;
2985 }
2986
2987
2988
2989
2990
2991
2992 static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
2993 {
2994 struct mm_struct *mm = current->mm;
2995 struct vm_area_struct *vma, *prev;
2996 struct rb_node **rb_link, *rb_parent;
2997 pgoff_t pgoff = addr >> PAGE_SHIFT;
2998 int error;
2999
3000
3001 if ((flags & (~VM_EXEC)) != 0)
3002 return -EINVAL;
3003 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3004
3005 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3006 if (offset_in_page(error))
3007 return error;
3008
3009 error = mlock_future_check(mm, mm->def_flags, len);
3010 if (error)
3011 return error;
3012
3013
3014
3015
3016 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
3017 &rb_parent)) {
3018 if (do_munmap(mm, addr, len, uf))
3019 return -ENOMEM;
3020 }
3021
3022
3023 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3024 return -ENOMEM;
3025
3026 if (mm->map_count > sysctl_max_map_count)
3027 return -ENOMEM;
3028
3029 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3030 return -ENOMEM;
3031
3032
3033 vma = vma_merge(mm, prev, addr, addr + len, flags,
3034 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
3035 if (vma)
3036 goto out;
3037
3038
3039
3040
3041 vma = vm_area_alloc(mm);
3042 if (!vma) {
3043 vm_unacct_memory(len >> PAGE_SHIFT);
3044 return -ENOMEM;
3045 }
3046
3047 vma_set_anonymous(vma);
3048 vma->vm_start = addr;
3049 vma->vm_end = addr + len;
3050 vma->vm_pgoff = pgoff;
3051 vma->vm_flags = flags;
3052 vma->vm_page_prot = vm_get_page_prot(flags);
3053 vma_link(mm, vma, prev, rb_link, rb_parent);
3054 out:
3055 perf_event_mmap(vma);
3056 mm->total_vm += len >> PAGE_SHIFT;
3057 mm->data_vm += len >> PAGE_SHIFT;
3058 if (flags & VM_LOCKED)
3059 mm->locked_vm += (len >> PAGE_SHIFT);
3060 vma->vm_flags |= VM_SOFTDIRTY;
3061 return 0;
3062 }
3063
3064 int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3065 {
3066 struct mm_struct *mm = current->mm;
3067 unsigned long len;
3068 int ret;
3069 bool populate;
3070 LIST_HEAD(uf);
3071
3072 len = PAGE_ALIGN(request);
3073 if (len < request)
3074 return -ENOMEM;
3075 if (!len)
3076 return 0;
3077
3078 if (down_write_killable(&mm->mmap_sem))
3079 return -EINTR;
3080
3081 ret = do_brk_flags(addr, len, flags, &uf);
3082 populate = ((mm->def_flags & VM_LOCKED) != 0);
3083 up_write(&mm->mmap_sem);
3084 userfaultfd_unmap_complete(mm, &uf);
3085 if (populate && !ret)
3086 mm_populate(addr, len);
3087 return ret;
3088 }
3089 EXPORT_SYMBOL(vm_brk_flags);
3090
3091 int vm_brk(unsigned long addr, unsigned long len)
3092 {
3093 return vm_brk_flags(addr, len, 0);
3094 }
3095 EXPORT_SYMBOL(vm_brk);
3096
3097
3098 void exit_mmap(struct mm_struct *mm)
3099 {
3100 struct mmu_gather tlb;
3101 struct vm_area_struct *vma;
3102 unsigned long nr_accounted = 0;
3103
3104
3105 mmu_notifier_release(mm);
3106
3107 if (unlikely(mm_is_oom_victim(mm))) {
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124 (void)__oom_reap_task_mm(mm);
3125
3126 set_bit(MMF_OOM_SKIP, &mm->flags);
3127 down_write(&mm->mmap_sem);
3128 up_write(&mm->mmap_sem);
3129 }
3130
3131 if (mm->locked_vm) {
3132 vma = mm->mmap;
3133 while (vma) {
3134 if (vma->vm_flags & VM_LOCKED)
3135 munlock_vma_pages_all(vma);
3136 vma = vma->vm_next;
3137 }
3138 }
3139
3140 arch_exit_mmap(mm);
3141
3142 vma = mm->mmap;
3143 if (!vma)
3144 return;
3145
3146 lru_add_drain();
3147 flush_cache_mm(mm);
3148 tlb_gather_mmu(&tlb, mm, 0, -1);
3149
3150
3151 unmap_vmas(&tlb, vma, 0, -1);
3152 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3153 tlb_finish_mmu(&tlb, 0, -1);
3154
3155
3156
3157
3158
3159 while (vma) {
3160 if (vma->vm_flags & VM_ACCOUNT)
3161 nr_accounted += vma_pages(vma);
3162 vma = remove_vma(vma);
3163 }
3164 vm_unacct_memory(nr_accounted);
3165 }
3166
3167
3168
3169
3170
3171 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3172 {
3173 struct vm_area_struct *prev;
3174 struct rb_node **rb_link, *rb_parent;
3175
3176 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3177 &prev, &rb_link, &rb_parent))
3178 return -ENOMEM;
3179 if ((vma->vm_flags & VM_ACCOUNT) &&
3180 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3181 return -ENOMEM;
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195 if (vma_is_anonymous(vma)) {
3196 BUG_ON(vma->anon_vma);
3197 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3198 }
3199
3200 vma_link(mm, vma, prev, rb_link, rb_parent);
3201 return 0;
3202 }
3203
3204
3205
3206
3207
3208 struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3209 unsigned long addr, unsigned long len, pgoff_t pgoff,
3210 bool *need_rmap_locks)
3211 {
3212 struct vm_area_struct *vma = *vmap;
3213 unsigned long vma_start = vma->vm_start;
3214 struct mm_struct *mm = vma->vm_mm;
3215 struct vm_area_struct *new_vma, *prev;
3216 struct rb_node **rb_link, *rb_parent;
3217 bool faulted_in_anon_vma = true;
3218
3219
3220
3221
3222
3223 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3224 pgoff = addr >> PAGE_SHIFT;
3225 faulted_in_anon_vma = false;
3226 }
3227
3228 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3229 return NULL;
3230 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3231 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3232 vma->vm_userfaultfd_ctx);
3233 if (new_vma) {
3234
3235
3236
3237 if (unlikely(vma_start >= new_vma->vm_start &&
3238 vma_start < new_vma->vm_end)) {
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3252 *vmap = vma = new_vma;
3253 }
3254 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3255 } else {
3256 new_vma = vm_area_dup(vma);
3257 if (!new_vma)
3258 goto out;
3259 new_vma->vm_start = addr;
3260 new_vma->vm_end = addr + len;
3261 new_vma->vm_pgoff = pgoff;
3262 if (vma_dup_policy(vma, new_vma))
3263 goto out_free_vma;
3264 if (anon_vma_clone(new_vma, vma))
3265 goto out_free_mempol;
3266 if (new_vma->vm_file)
3267 get_file(new_vma->vm_file);
3268 if (new_vma->vm_ops && new_vma->vm_ops->open)
3269 new_vma->vm_ops->open(new_vma);
3270 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3271 *need_rmap_locks = false;
3272 }
3273 return new_vma;
3274
3275 out_free_mempol:
3276 mpol_put(vma_policy(new_vma));
3277 out_free_vma:
3278 vm_area_free(new_vma);
3279 out:
3280 return NULL;
3281 }
3282
3283
3284
3285
3286
3287 bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3288 {
3289 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3290 return false;
3291
3292 if (is_data_mapping(flags) &&
3293 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3294
3295 if (rlimit(RLIMIT_DATA) == 0 &&
3296 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3297 return true;
3298
3299 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3300 current->comm, current->pid,
3301 (mm->data_vm + npages) << PAGE_SHIFT,
3302 rlimit(RLIMIT_DATA),
3303 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3304
3305 if (!ignore_rlimit_data)
3306 return false;
3307 }
3308
3309 return true;
3310 }
3311
3312 void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3313 {
3314 mm->total_vm += npages;
3315
3316 if (is_exec_mapping(flags))
3317 mm->exec_vm += npages;
3318 else if (is_stack_mapping(flags))
3319 mm->stack_vm += npages;
3320 else if (is_data_mapping(flags))
3321 mm->data_vm += npages;
3322 }
3323
3324 static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3325
3326
3327
3328
3329 static void special_mapping_close(struct vm_area_struct *vma)
3330 {
3331 }
3332
3333 static const char *special_mapping_name(struct vm_area_struct *vma)
3334 {
3335 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3336 }
3337
3338 static int special_mapping_mremap(struct vm_area_struct *new_vma)
3339 {
3340 struct vm_special_mapping *sm = new_vma->vm_private_data;
3341
3342 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3343 return -EFAULT;
3344
3345 if (sm->mremap)
3346 return sm->mremap(sm, new_vma);
3347
3348 return 0;
3349 }
3350
3351 static const struct vm_operations_struct special_mapping_vmops = {
3352 .close = special_mapping_close,
3353 .fault = special_mapping_fault,
3354 .mremap = special_mapping_mremap,
3355 .name = special_mapping_name,
3356 };
3357
3358 static const struct vm_operations_struct legacy_special_mapping_vmops = {
3359 .close = special_mapping_close,
3360 .fault = special_mapping_fault,
3361 };
3362
3363 static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3364 {
3365 struct vm_area_struct *vma = vmf->vma;
3366 pgoff_t pgoff;
3367 struct page **pages;
3368
3369 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3370 pages = vma->vm_private_data;
3371 } else {
3372 struct vm_special_mapping *sm = vma->vm_private_data;
3373
3374 if (sm->fault)
3375 return sm->fault(sm, vmf->vma, vmf);
3376
3377 pages = sm->pages;
3378 }
3379
3380 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3381 pgoff--;
3382
3383 if (*pages) {
3384 struct page *page = *pages;
3385 get_page(page);
3386 vmf->page = page;
3387 return 0;
3388 }
3389
3390 return VM_FAULT_SIGBUS;
3391 }
3392
3393 static struct vm_area_struct *__install_special_mapping(
3394 struct mm_struct *mm,
3395 unsigned long addr, unsigned long len,
3396 unsigned long vm_flags, void *priv,
3397 const struct vm_operations_struct *ops)
3398 {
3399 int ret;
3400 struct vm_area_struct *vma;
3401
3402 vma = vm_area_alloc(mm);
3403 if (unlikely(vma == NULL))
3404 return ERR_PTR(-ENOMEM);
3405
3406 vma->vm_start = addr;
3407 vma->vm_end = addr + len;
3408
3409 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3410 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3411
3412 vma->vm_ops = ops;
3413 vma->vm_private_data = priv;
3414
3415 ret = insert_vm_struct(mm, vma);
3416 if (ret)
3417 goto out;
3418
3419 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3420
3421 perf_event_mmap(vma);
3422
3423 return vma;
3424
3425 out:
3426 vm_area_free(vma);
3427 return ERR_PTR(ret);
3428 }
3429
3430 bool vma_is_special_mapping(const struct vm_area_struct *vma,
3431 const struct vm_special_mapping *sm)
3432 {
3433 return vma->vm_private_data == sm &&
3434 (vma->vm_ops == &special_mapping_vmops ||
3435 vma->vm_ops == &legacy_special_mapping_vmops);
3436 }
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447 struct vm_area_struct *_install_special_mapping(
3448 struct mm_struct *mm,
3449 unsigned long addr, unsigned long len,
3450 unsigned long vm_flags, const struct vm_special_mapping *spec)
3451 {
3452 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3453 &special_mapping_vmops);
3454 }
3455
3456 int install_special_mapping(struct mm_struct *mm,
3457 unsigned long addr, unsigned long len,
3458 unsigned long vm_flags, struct page **pages)
3459 {
3460 struct vm_area_struct *vma = __install_special_mapping(
3461 mm, addr, len, vm_flags, (void *)pages,
3462 &legacy_special_mapping_vmops);
3463
3464 return PTR_ERR_OR_ZERO(vma);
3465 }
3466
3467 static DEFINE_MUTEX(mm_all_locks_mutex);
3468
3469 static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3470 {
3471 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3472
3473
3474
3475
3476 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486 if (__test_and_set_bit(0, (unsigned long *)
3487 &anon_vma->root->rb_root.rb_root.rb_node))
3488 BUG();
3489 }
3490 }
3491
3492 static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3493 {
3494 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3505 BUG();
3506 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3507 }
3508 }
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547 int mm_take_all_locks(struct mm_struct *mm)
3548 {
3549 struct vm_area_struct *vma;
3550 struct anon_vma_chain *avc;
3551
3552 BUG_ON(down_read_trylock(&mm->mmap_sem));
3553
3554 mutex_lock(&mm_all_locks_mutex);
3555
3556 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3557 if (signal_pending(current))
3558 goto out_unlock;
3559 if (vma->vm_file && vma->vm_file->f_mapping &&
3560 is_vm_hugetlb_page(vma))
3561 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3562 }
3563
3564 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3565 if (signal_pending(current))
3566 goto out_unlock;
3567 if (vma->vm_file && vma->vm_file->f_mapping &&
3568 !is_vm_hugetlb_page(vma))
3569 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3570 }
3571
3572 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3573 if (signal_pending(current))
3574 goto out_unlock;
3575 if (vma->anon_vma)
3576 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3577 vm_lock_anon_vma(mm, avc->anon_vma);
3578 }
3579
3580 return 0;
3581
3582 out_unlock:
3583 mm_drop_all_locks(mm);
3584 return -EINTR;
3585 }
3586
3587 static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3588 {
3589 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602 if (!__test_and_clear_bit(0, (unsigned long *)
3603 &anon_vma->root->rb_root.rb_root.rb_node))
3604 BUG();
3605 anon_vma_unlock_write(anon_vma);
3606 }
3607 }
3608
3609 static void vm_unlock_mapping(struct address_space *mapping)
3610 {
3611 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3612
3613
3614
3615
3616 i_mmap_unlock_write(mapping);
3617 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3618 &mapping->flags))
3619 BUG();
3620 }
3621 }
3622
3623
3624
3625
3626
3627 void mm_drop_all_locks(struct mm_struct *mm)
3628 {
3629 struct vm_area_struct *vma;
3630 struct anon_vma_chain *avc;
3631
3632 BUG_ON(down_read_trylock(&mm->mmap_sem));
3633 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3634
3635 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3636 if (vma->anon_vma)
3637 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3638 vm_unlock_anon_vma(avc->anon_vma);
3639 if (vma->vm_file && vma->vm_file->f_mapping)
3640 vm_unlock_mapping(vma->vm_file->f_mapping);
3641 }
3642
3643 mutex_unlock(&mm_all_locks_mutex);
3644 }
3645
3646
3647
3648
3649 void __init mmap_init(void)
3650 {
3651 int ret;
3652
3653 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3654 VM_BUG_ON(ret);
3655 }
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667 static int init_user_reserve(void)
3668 {
3669 unsigned long free_kbytes;
3670
3671 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3672
3673 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3674 return 0;
3675 }
3676 subsys_initcall(init_user_reserve);
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688 static int init_admin_reserve(void)
3689 {
3690 unsigned long free_kbytes;
3691
3692 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3693
3694 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3695 return 0;
3696 }
3697 subsys_initcall(init_admin_reserve);
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717 static int reserve_mem_notifier(struct notifier_block *nb,
3718 unsigned long action, void *data)
3719 {
3720 unsigned long tmp, free_kbytes;
3721
3722 switch (action) {
3723 case MEM_ONLINE:
3724
3725 tmp = sysctl_user_reserve_kbytes;
3726 if (0 < tmp && tmp < (1UL << 17))
3727 init_user_reserve();
3728
3729
3730 tmp = sysctl_admin_reserve_kbytes;
3731 if (0 < tmp && tmp < (1UL << 13))
3732 init_admin_reserve();
3733
3734 break;
3735 case MEM_OFFLINE:
3736 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3737
3738 if (sysctl_user_reserve_kbytes > free_kbytes) {
3739 init_user_reserve();
3740 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3741 sysctl_user_reserve_kbytes);
3742 }
3743
3744 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3745 init_admin_reserve();
3746 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3747 sysctl_admin_reserve_kbytes);
3748 }
3749 break;
3750 default:
3751 break;
3752 }
3753 return NOTIFY_OK;
3754 }
3755
3756 static struct notifier_block reserve_mem_nb = {
3757 .notifier_call = reserve_mem_notifier,
3758 };
3759
3760 static int __meminit init_reserve_notifier(void)
3761 {
3762 if (register_hotmemory_notifier(&reserve_mem_nb))
3763 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3764
3765 return 0;
3766 }
3767 subsys_initcall(init_reserve_notifier);