1#ifndef _LINUX_MMU_NOTIFIER_H 2#define _LINUX_MMU_NOTIFIER_H 3 4#include <linux/list.h> 5#include <linux/spinlock.h> 6#include <linux/mm_types.h> 7#include <linux/srcu.h> 8 9struct mmu_notifier; 10struct mmu_notifier_ops; 11 12#ifdef CONFIG_MMU_NOTIFIER 13 14/* 15 * The mmu notifier_mm structure is allocated and installed in 16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected 17 * critical section and it's released only when mm_count reaches zero 18 * in mmdrop(). 19 */ 20struct mmu_notifier_mm { 21 /* all mmu notifiers registerd in this mm are queued in this list */ 22 struct hlist_head list; 23 /* to serialize the list modifications and hlist_unhashed */ 24 spinlock_t lock; 25}; 26 27struct mmu_notifier_ops { 28 /* 29 * Called either by mmu_notifier_unregister or when the mm is 30 * being destroyed by exit_mmap, always before all pages are 31 * freed. This can run concurrently with other mmu notifier 32 * methods (the ones invoked outside the mm context) and it 33 * should tear down all secondary mmu mappings and freeze the 34 * secondary mmu. If this method isn't implemented you've to 35 * be sure that nothing could possibly write to the pages 36 * through the secondary mmu by the time the last thread with 37 * tsk->mm == mm exits. 38 * 39 * As side note: the pages freed after ->release returns could 40 * be immediately reallocated by the gart at an alias physical 41 * address with a different cache model, so if ->release isn't 42 * implemented because all _software_ driven memory accesses 43 * through the secondary mmu are terminated by the time the 44 * last thread of this mm quits, you've also to be sure that 45 * speculative _hardware_ operations can't allocate dirty 46 * cachelines in the cpu that could not be snooped and made 47 * coherent with the other read and write operations happening 48 * through the gart alias address, so leading to memory 49 * corruption. 50 */ 51 void (*release)(struct mmu_notifier *mn, 52 struct mm_struct *mm); 53 54 /* 55 * clear_flush_young is called after the VM is 56 * test-and-clearing the young/accessed bitflag in the 57 * pte. This way the VM will provide proper aging to the 58 * accesses to the page through the secondary MMUs and not 59 * only to the ones through the Linux pte. 60 * Start-end is necessary in case the secondary MMU is mapping the page 61 * at a smaller granularity than the primary MMU. 62 */ 63 int (*clear_flush_young)(struct mmu_notifier *mn, 64 struct mm_struct *mm, 65 unsigned long start, 66 unsigned long end); 67 68 /* 69 * test_young is called to check the young/accessed bitflag in 70 * the secondary pte. This is used to know if the page is 71 * frequently used without actually clearing the flag or tearing 72 * down the secondary mapping on the page. 73 */ 74 int (*test_young)(struct mmu_notifier *mn, 75 struct mm_struct *mm, 76 unsigned long address); 77 78 /* 79 * change_pte is called in cases that pte mapping to page is changed: 80 * for example, when ksm remaps pte to point to a new shared page. 81 */ 82 void (*change_pte)(struct mmu_notifier *mn, 83 struct mm_struct *mm, 84 unsigned long address, 85 pte_t pte); 86 87 /* 88 * Before this is invoked any secondary MMU is still ok to 89 * read/write to the page previously pointed to by the Linux 90 * pte because the page hasn't been freed yet and it won't be 91 * freed until this returns. If required set_page_dirty has to 92 * be called internally to this method. 93 */ 94 void (*invalidate_page)(struct mmu_notifier *mn, 95 struct mm_struct *mm, 96 unsigned long address); 97 98 /* 99 * invalidate_range_start() and invalidate_range_end() must be 100 * paired and are called only when the mmap_sem and/or the 101 * locks protecting the reverse maps are held. If the subsystem 102 * can't guarantee that no additional references are taken to 103 * the pages in the range, it has to implement the 104 * invalidate_range() notifier to remove any references taken 105 * after invalidate_range_start(). 106 * 107 * Invalidation of multiple concurrent ranges may be 108 * optionally permitted by the driver. Either way the 109 * establishment of sptes is forbidden in the range passed to 110 * invalidate_range_begin/end for the whole duration of the 111 * invalidate_range_begin/end critical section. 112 * 113 * invalidate_range_start() is called when all pages in the 114 * range are still mapped and have at least a refcount of one. 115 * 116 * invalidate_range_end() is called when all pages in the 117 * range have been unmapped and the pages have been freed by 118 * the VM. 119 * 120 * The VM will remove the page table entries and potentially 121 * the page between invalidate_range_start() and 122 * invalidate_range_end(). If the page must not be freed 123 * because of pending I/O or other circumstances then the 124 * invalidate_range_start() callback (or the initial mapping 125 * by the driver) must make sure that the refcount is kept 126 * elevated. 127 * 128 * If the driver increases the refcount when the pages are 129 * initially mapped into an address space then either 130 * invalidate_range_start() or invalidate_range_end() may 131 * decrease the refcount. If the refcount is decreased on 132 * invalidate_range_start() then the VM can free pages as page 133 * table entries are removed. If the refcount is only 134 * droppped on invalidate_range_end() then the driver itself 135 * will drop the last refcount but it must take care to flush 136 * any secondary tlb before doing the final free on the 137 * page. Pages will no longer be referenced by the linux 138 * address space but may still be referenced by sptes until 139 * the last refcount is dropped. 140 */ 141 void (*invalidate_range_start)(struct mmu_notifier *mn, 142 struct mm_struct *mm, 143 unsigned long start, unsigned long end); 144 void (*invalidate_range_end)(struct mmu_notifier *mn, 145 struct mm_struct *mm, 146 unsigned long start, unsigned long end); 147 148 /* 149 * invalidate_range() is either called between 150 * invalidate_range_start() and invalidate_range_end() when the 151 * VM has to free pages that where unmapped, but before the 152 * pages are actually freed, or outside of _start()/_end() when 153 * a (remote) TLB is necessary. 154 * 155 * If invalidate_range() is used to manage a non-CPU TLB with 156 * shared page-tables, it not necessary to implement the 157 * invalidate_range_start()/end() notifiers, as 158 * invalidate_range() alread catches the points in time when an 159 * external TLB range needs to be flushed. 160 * 161 * The invalidate_range() function is called under the ptl 162 * spin-lock and not allowed to sleep. 163 * 164 * Note that this function might be called with just a sub-range 165 * of what was passed to invalidate_range_start()/end(), if 166 * called between those functions. 167 */ 168 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, 169 unsigned long start, unsigned long end); 170}; 171 172/* 173 * The notifier chains are protected by mmap_sem and/or the reverse map 174 * semaphores. Notifier chains are only changed when all reverse maps and 175 * the mmap_sem locks are taken. 176 * 177 * Therefore notifier chains can only be traversed when either 178 * 179 * 1. mmap_sem is held. 180 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). 181 * 3. No other concurrent thread can access the list (release) 182 */ 183struct mmu_notifier { 184 struct hlist_node hlist; 185 const struct mmu_notifier_ops *ops; 186}; 187 188static inline int mm_has_notifiers(struct mm_struct *mm) 189{ 190 return unlikely(mm->mmu_notifier_mm); 191} 192 193extern int mmu_notifier_register(struct mmu_notifier *mn, 194 struct mm_struct *mm); 195extern int __mmu_notifier_register(struct mmu_notifier *mn, 196 struct mm_struct *mm); 197extern void mmu_notifier_unregister(struct mmu_notifier *mn, 198 struct mm_struct *mm); 199extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 200 struct mm_struct *mm); 201extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 202extern void __mmu_notifier_release(struct mm_struct *mm); 203extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 204 unsigned long start, 205 unsigned long end); 206extern int __mmu_notifier_test_young(struct mm_struct *mm, 207 unsigned long address); 208extern void __mmu_notifier_change_pte(struct mm_struct *mm, 209 unsigned long address, pte_t pte); 210extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 211 unsigned long address); 212extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 213 unsigned long start, unsigned long end); 214extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 215 unsigned long start, unsigned long end); 216extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, 217 unsigned long start, unsigned long end); 218 219static inline void mmu_notifier_release(struct mm_struct *mm) 220{ 221 if (mm_has_notifiers(mm)) 222 __mmu_notifier_release(mm); 223} 224 225static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 226 unsigned long start, 227 unsigned long end) 228{ 229 if (mm_has_notifiers(mm)) 230 return __mmu_notifier_clear_flush_young(mm, start, end); 231 return 0; 232} 233 234static inline int mmu_notifier_test_young(struct mm_struct *mm, 235 unsigned long address) 236{ 237 if (mm_has_notifiers(mm)) 238 return __mmu_notifier_test_young(mm, address); 239 return 0; 240} 241 242static inline void mmu_notifier_change_pte(struct mm_struct *mm, 243 unsigned long address, pte_t pte) 244{ 245 if (mm_has_notifiers(mm)) 246 __mmu_notifier_change_pte(mm, address, pte); 247} 248 249static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 250 unsigned long address) 251{ 252 if (mm_has_notifiers(mm)) 253 __mmu_notifier_invalidate_page(mm, address); 254} 255 256static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 257 unsigned long start, unsigned long end) 258{ 259 if (mm_has_notifiers(mm)) 260 __mmu_notifier_invalidate_range_start(mm, start, end); 261} 262 263static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 264 unsigned long start, unsigned long end) 265{ 266 if (mm_has_notifiers(mm)) 267 __mmu_notifier_invalidate_range_end(mm, start, end); 268} 269 270static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 271 unsigned long start, unsigned long end) 272{ 273 if (mm_has_notifiers(mm)) 274 __mmu_notifier_invalidate_range(mm, start, end); 275} 276 277static inline void mmu_notifier_mm_init(struct mm_struct *mm) 278{ 279 mm->mmu_notifier_mm = NULL; 280} 281 282static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 283{ 284 if (mm_has_notifiers(mm)) 285 __mmu_notifier_mm_destroy(mm); 286} 287 288#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 289({ \ 290 int __young; \ 291 struct vm_area_struct *___vma = __vma; \ 292 unsigned long ___address = __address; \ 293 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 294 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 295 ___address, \ 296 ___address + \ 297 PAGE_SIZE); \ 298 __young; \ 299}) 300 301#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ 302({ \ 303 int __young; \ 304 struct vm_area_struct *___vma = __vma; \ 305 unsigned long ___address = __address; \ 306 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 307 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 308 ___address, \ 309 ___address + \ 310 PMD_SIZE); \ 311 __young; \ 312}) 313 314#define ptep_clear_flush_notify(__vma, __address, __ptep) \ 315({ \ 316 unsigned long ___addr = __address & PAGE_MASK; \ 317 struct mm_struct *___mm = (__vma)->vm_mm; \ 318 pte_t ___pte; \ 319 \ 320 ___pte = ptep_clear_flush(__vma, __address, __ptep); \ 321 mmu_notifier_invalidate_range(___mm, ___addr, \ 322 ___addr + PAGE_SIZE); \ 323 \ 324 ___pte; \ 325}) 326 327#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \ 328({ \ 329 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 330 struct mm_struct *___mm = (__vma)->vm_mm; \ 331 pmd_t ___pmd; \ 332 \ 333 ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \ 334 mmu_notifier_invalidate_range(___mm, ___haddr, \ 335 ___haddr + HPAGE_PMD_SIZE); \ 336 \ 337 ___pmd; \ 338}) 339 340#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \ 341({ \ 342 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 343 pmd_t ___pmd; \ 344 \ 345 ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \ 346 mmu_notifier_invalidate_range(__mm, ___haddr, \ 347 ___haddr + HPAGE_PMD_SIZE); \ 348 \ 349 ___pmd; \ 350}) 351 352/* 353 * set_pte_at_notify() sets the pte _after_ running the notifier. 354 * This is safe to start by updating the secondary MMUs, because the primary MMU 355 * pte invalidate must have already happened with a ptep_clear_flush() before 356 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is 357 * required when we change both the protection of the mapping from read-only to 358 * read-write and the pfn (like during copy on write page faults). Otherwise the 359 * old page would remain mapped readonly in the secondary MMUs after the new 360 * page is already writable by some CPU through the primary MMU. 361 */ 362#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 363({ \ 364 struct mm_struct *___mm = __mm; \ 365 unsigned long ___address = __address; \ 366 pte_t ___pte = __pte; \ 367 \ 368 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 369 set_pte_at(___mm, ___address, __ptep, ___pte); \ 370}) 371 372extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 373 void (*func)(struct rcu_head *rcu)); 374extern void mmu_notifier_synchronize(void); 375 376#else /* CONFIG_MMU_NOTIFIER */ 377 378static inline void mmu_notifier_release(struct mm_struct *mm) 379{ 380} 381 382static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 383 unsigned long start, 384 unsigned long end) 385{ 386 return 0; 387} 388 389static inline int mmu_notifier_test_young(struct mm_struct *mm, 390 unsigned long address) 391{ 392 return 0; 393} 394 395static inline void mmu_notifier_change_pte(struct mm_struct *mm, 396 unsigned long address, pte_t pte) 397{ 398} 399 400static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 401 unsigned long address) 402{ 403} 404 405static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 406 unsigned long start, unsigned long end) 407{ 408} 409 410static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 411 unsigned long start, unsigned long end) 412{ 413} 414 415static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 416 unsigned long start, unsigned long end) 417{ 418} 419 420static inline void mmu_notifier_mm_init(struct mm_struct *mm) 421{ 422} 423 424static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 425{ 426} 427 428#define ptep_clear_flush_young_notify ptep_clear_flush_young 429#define pmdp_clear_flush_young_notify pmdp_clear_flush_young 430#define ptep_clear_flush_notify ptep_clear_flush 431#define pmdp_clear_flush_notify pmdp_clear_flush 432#define pmdp_get_and_clear_notify pmdp_get_and_clear 433#define set_pte_at_notify set_pte_at 434 435#endif /* CONFIG_MMU_NOTIFIER */ 436 437#endif /* _LINUX_MMU_NOTIFIER_H */ 438