1/* 2 * Lockless get_user_pages_fast for MIPS 3 * 4 * Copyright (C) 2008 Nick Piggin 5 * Copyright (C) 2008 Novell Inc. 6 * Copyright (C) 2011 Ralf Baechle 7 */ 8#include <linux/sched.h> 9#include <linux/mm.h> 10#include <linux/vmstat.h> 11#include <linux/highmem.h> 12#include <linux/swap.h> 13#include <linux/hugetlb.h> 14 15#include <asm/cpu-features.h> 16#include <asm/pgtable.h> 17 18static inline pte_t gup_get_pte(pte_t *ptep) 19{ 20#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) 21 pte_t pte; 22 23retry: 24 pte.pte_low = ptep->pte_low; 25 smp_rmb(); 26 pte.pte_high = ptep->pte_high; 27 smp_rmb(); 28 if (unlikely(pte.pte_low != ptep->pte_low)) 29 goto retry; 30 31 return pte; 32#else 33 return READ_ONCE(*ptep); 34#endif 35} 36 37static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 38 int write, struct page **pages, int *nr) 39{ 40 pte_t *ptep = pte_offset_map(&pmd, addr); 41 do { 42 pte_t pte = gup_get_pte(ptep); 43 struct page *page; 44 45 if (!pte_present(pte) || 46 pte_special(pte) || (write && !pte_write(pte))) { 47 pte_unmap(ptep); 48 return 0; 49 } 50 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 51 page = pte_page(pte); 52 get_page(page); 53 SetPageReferenced(page); 54 pages[*nr] = page; 55 (*nr)++; 56 57 } while (ptep++, addr += PAGE_SIZE, addr != end); 58 59 pte_unmap(ptep - 1); 60 return 1; 61} 62 63static inline void get_head_page_multiple(struct page *page, int nr) 64{ 65 VM_BUG_ON(page != compound_head(page)); 66 VM_BUG_ON(page_count(page) == 0); 67 atomic_add(nr, &page->_count); 68 SetPageReferenced(page); 69} 70 71static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, 72 int write, struct page **pages, int *nr) 73{ 74 pte_t pte = *(pte_t *)&pmd; 75 struct page *head, *page; 76 int refs; 77 78 if (write && !pte_write(pte)) 79 return 0; 80 /* hugepages are never "special" */ 81 VM_BUG_ON(pte_special(pte)); 82 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 83 84 refs = 0; 85 head = pte_page(pte); 86 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 87 do { 88 VM_BUG_ON(compound_head(page) != head); 89 pages[*nr] = page; 90 if (PageTail(page)) 91 get_huge_page_tail(page); 92 (*nr)++; 93 page++; 94 refs++; 95 } while (addr += PAGE_SIZE, addr != end); 96 97 get_head_page_multiple(head, refs); 98 return 1; 99} 100 101static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 102 int write, struct page **pages, int *nr) 103{ 104 unsigned long next; 105 pmd_t *pmdp; 106 107 pmdp = pmd_offset(&pud, addr); 108 do { 109 pmd_t pmd = *pmdp; 110 111 next = pmd_addr_end(addr, end); 112 /* 113 * The pmd_trans_splitting() check below explains why 114 * pmdp_splitting_flush has to flush the tlb, to stop 115 * this gup-fast code from running while we set the 116 * splitting bit in the pmd. Returning zero will take 117 * the slow path that will call wait_split_huge_page() 118 * if the pmd is still in splitting state. gup-fast 119 * can't because it has irq disabled and 120 * wait_split_huge_page() would never return as the 121 * tlb flush IPI wouldn't run. 122 */ 123 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 124 return 0; 125 if (unlikely(pmd_huge(pmd))) { 126 if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) 127 return 0; 128 } else { 129 if (!gup_pte_range(pmd, addr, next, write, pages,nr)) 130 return 0; 131 } 132 } while (pmdp++, addr = next, addr != end); 133 134 return 1; 135} 136 137static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, 138 int write, struct page **pages, int *nr) 139{ 140 pte_t pte = *(pte_t *)&pud; 141 struct page *head, *page; 142 int refs; 143 144 if (write && !pte_write(pte)) 145 return 0; 146 /* hugepages are never "special" */ 147 VM_BUG_ON(pte_special(pte)); 148 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 149 150 refs = 0; 151 head = pte_page(pte); 152 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 153 do { 154 VM_BUG_ON(compound_head(page) != head); 155 pages[*nr] = page; 156 if (PageTail(page)) 157 get_huge_page_tail(page); 158 (*nr)++; 159 page++; 160 refs++; 161 } while (addr += PAGE_SIZE, addr != end); 162 163 get_head_page_multiple(head, refs); 164 return 1; 165} 166 167static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 168 int write, struct page **pages, int *nr) 169{ 170 unsigned long next; 171 pud_t *pudp; 172 173 pudp = pud_offset(&pgd, addr); 174 do { 175 pud_t pud = *pudp; 176 177 next = pud_addr_end(addr, end); 178 if (pud_none(pud)) 179 return 0; 180 if (unlikely(pud_huge(pud))) { 181 if (!gup_huge_pud(pud, addr, next, write, pages,nr)) 182 return 0; 183 } else { 184 if (!gup_pmd_range(pud, addr, next, write, pages,nr)) 185 return 0; 186 } 187 } while (pudp++, addr = next, addr != end); 188 189 return 1; 190} 191 192/* 193 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 194 * back to the regular GUP. 195 */ 196int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 197 struct page **pages) 198{ 199 struct mm_struct *mm = current->mm; 200 unsigned long addr, len, end; 201 unsigned long next; 202 unsigned long flags; 203 pgd_t *pgdp; 204 int nr = 0; 205 206 start &= PAGE_MASK; 207 addr = start; 208 len = (unsigned long) nr_pages << PAGE_SHIFT; 209 end = start + len; 210 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 211 (void __user *)start, len))) 212 return 0; 213 214 /* 215 * XXX: batch / limit 'nr', to avoid large irq off latency 216 * needs some instrumenting to determine the common sizes used by 217 * important workloads (eg. DB2), and whether limiting the batch 218 * size will decrease performance. 219 * 220 * It seems like we're in the clear for the moment. Direct-IO is 221 * the main guy that batches up lots of get_user_pages, and even 222 * they are limited to 64-at-a-time which is not so many. 223 */ 224 /* 225 * This doesn't prevent pagetable teardown, but does prevent 226 * the pagetables and pages from being freed. 227 * 228 * So long as we atomically load page table pointers versus teardown, 229 * we can follow the address down to the page and take a ref on it. 230 */ 231 local_irq_save(flags); 232 pgdp = pgd_offset(mm, addr); 233 do { 234 pgd_t pgd = *pgdp; 235 236 next = pgd_addr_end(addr, end); 237 if (pgd_none(pgd)) 238 break; 239 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 240 break; 241 } while (pgdp++, addr = next, addr != end); 242 local_irq_restore(flags); 243 244 return nr; 245} 246 247/** 248 * get_user_pages_fast() - pin user pages in memory 249 * @start: starting user address 250 * @nr_pages: number of pages from start to pin 251 * @write: whether pages will be written to 252 * @pages: array that receives pointers to the pages pinned. 253 * Should be at least nr_pages long. 254 * 255 * Attempt to pin user pages in memory without taking mm->mmap_sem. 256 * If not successful, it will fall back to taking the lock and 257 * calling get_user_pages(). 258 * 259 * Returns number of pages pinned. This may be fewer than the number 260 * requested. If nr_pages is 0 or negative, returns 0. If no pages 261 * were pinned, returns -errno. 262 */ 263int get_user_pages_fast(unsigned long start, int nr_pages, int write, 264 struct page **pages) 265{ 266 struct mm_struct *mm = current->mm; 267 unsigned long addr, len, end; 268 unsigned long next; 269 pgd_t *pgdp; 270 int ret, nr = 0; 271 272 start &= PAGE_MASK; 273 addr = start; 274 len = (unsigned long) nr_pages << PAGE_SHIFT; 275 276 end = start + len; 277 if (end < start || cpu_has_dc_aliases) 278 goto slow_irqon; 279 280 /* XXX: batch / limit 'nr' */ 281 local_irq_disable(); 282 pgdp = pgd_offset(mm, addr); 283 do { 284 pgd_t pgd = *pgdp; 285 286 next = pgd_addr_end(addr, end); 287 if (pgd_none(pgd)) 288 goto slow; 289 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 290 goto slow; 291 } while (pgdp++, addr = next, addr != end); 292 local_irq_enable(); 293 294 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 295 return nr; 296slow: 297 local_irq_enable(); 298 299slow_irqon: 300 /* Try to get the remaining pages with get_user_pages */ 301 start += nr << PAGE_SHIFT; 302 pages += nr; 303 304 ret = get_user_pages_unlocked(current, mm, start, 305 (end - start) >> PAGE_SHIFT, 306 write, 0, pages); 307 308 /* Have to be a bit careful with return values */ 309 if (nr > 0) { 310 if (ret < 0) 311 ret = nr; 312 else 313 ret += nr; 314 } 315 return ret; 316} 317