1/* 2 * Lockless get_user_pages_fast for s390 3 * 4 * Copyright IBM Corp. 2010 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7#include <linux/sched.h> 8#include <linux/mm.h> 9#include <linux/hugetlb.h> 10#include <linux/vmstat.h> 11#include <linux/pagemap.h> 12#include <linux/rwsem.h> 13#include <asm/pgtable.h> 14 15/* 16 * The performance critical leaf functions are made noinline otherwise gcc 17 * inlines everything into a single function which results in too much 18 * register pressure. 19 */ 20static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, 21 unsigned long end, int write, struct page **pages, int *nr) 22{ 23 unsigned long mask; 24 pte_t *ptep, pte; 25 struct page *page; 26 27 mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; 28 29 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); 30 do { 31 pte = *ptep; 32 barrier(); 33 if ((pte_val(pte) & mask) != 0) 34 return 0; 35 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 36 page = pte_page(pte); 37 if (!page_cache_get_speculative(page)) 38 return 0; 39 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 40 put_page(page); 41 return 0; 42 } 43 pages[*nr] = page; 44 (*nr)++; 45 46 } while (ptep++, addr += PAGE_SIZE, addr != end); 47 48 return 1; 49} 50 51static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, 52 unsigned long end, int write, struct page **pages, int *nr) 53{ 54 unsigned long mask, result; 55 struct page *head, *page, *tail; 56 int refs; 57 58 result = write ? 0 : _SEGMENT_ENTRY_PROTECT; 59 mask = result | _SEGMENT_ENTRY_INVALID; 60 if ((pmd_val(pmd) & mask) != result) 61 return 0; 62 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); 63 64 refs = 0; 65 head = pmd_page(pmd); 66 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 67 tail = page; 68 do { 69 VM_BUG_ON(compound_head(page) != head); 70 pages[*nr] = page; 71 (*nr)++; 72 page++; 73 refs++; 74 } while (addr += PAGE_SIZE, addr != end); 75 76 if (!page_cache_add_speculative(head, refs)) { 77 *nr -= refs; 78 return 0; 79 } 80 81 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { 82 *nr -= refs; 83 while (refs--) 84 put_page(head); 85 return 0; 86 } 87 88 /* 89 * Any tail page need their mapcount reference taken before we 90 * return. 91 */ 92 while (refs--) { 93 if (PageTail(tail)) 94 get_huge_page_tail(tail); 95 tail++; 96 } 97 98 return 1; 99} 100 101 102static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, 103 unsigned long end, int write, struct page **pages, int *nr) 104{ 105 unsigned long next; 106 pmd_t *pmdp, pmd; 107 108 pmdp = (pmd_t *) pudp; 109 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 110 pmdp = (pmd_t *) pud_deref(pud); 111 pmdp += pmd_index(addr); 112 do { 113 pmd = *pmdp; 114 barrier(); 115 next = pmd_addr_end(addr, end); 116 /* 117 * The pmd_trans_splitting() check below explains why 118 * pmdp_splitting_flush() has to serialize with 119 * smp_call_function() against our disabled IRQs, to stop 120 * this gup-fast code from running while we set the 121 * splitting bit in the pmd. Returning zero will take 122 * the slow path that will call wait_split_huge_page() 123 * if the pmd is still in splitting state. 124 */ 125 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 126 return 0; 127 if (unlikely(pmd_large(pmd))) { 128 if (!gup_huge_pmd(pmdp, pmd, addr, next, 129 write, pages, nr)) 130 return 0; 131 } else if (!gup_pte_range(pmdp, pmd, addr, next, 132 write, pages, nr)) 133 return 0; 134 } while (pmdp++, addr = next, addr != end); 135 136 return 1; 137} 138 139static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, 140 unsigned long end, int write, struct page **pages, int *nr) 141{ 142 unsigned long next; 143 pud_t *pudp, pud; 144 145 pudp = (pud_t *) pgdp; 146 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) 147 pudp = (pud_t *) pgd_deref(pgd); 148 pudp += pud_index(addr); 149 do { 150 pud = *pudp; 151 barrier(); 152 next = pud_addr_end(addr, end); 153 if (pud_none(pud)) 154 return 0; 155 if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) 156 return 0; 157 } while (pudp++, addr = next, addr != end); 158 159 return 1; 160} 161 162/* 163 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 164 * back to the regular GUP. 165 */ 166int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 167 struct page **pages) 168{ 169 struct mm_struct *mm = current->mm; 170 unsigned long addr, len, end; 171 unsigned long next, flags; 172 pgd_t *pgdp, pgd; 173 int nr = 0; 174 175 start &= PAGE_MASK; 176 addr = start; 177 len = (unsigned long) nr_pages << PAGE_SHIFT; 178 end = start + len; 179 if ((end <= start) || (end > TASK_SIZE)) 180 return 0; 181 /* 182 * local_irq_save() doesn't prevent pagetable teardown, but does 183 * prevent the pagetables from being freed on s390. 184 * 185 * So long as we atomically load page table pointers versus teardown, 186 * we can follow the address down to the the page and take a ref on it. 187 */ 188 local_irq_save(flags); 189 pgdp = pgd_offset(mm, addr); 190 do { 191 pgd = *pgdp; 192 barrier(); 193 next = pgd_addr_end(addr, end); 194 if (pgd_none(pgd)) 195 break; 196 if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) 197 break; 198 } while (pgdp++, addr = next, addr != end); 199 local_irq_restore(flags); 200 201 return nr; 202} 203 204/** 205 * get_user_pages_fast() - pin user pages in memory 206 * @start: starting user address 207 * @nr_pages: number of pages from start to pin 208 * @write: whether pages will be written to 209 * @pages: array that receives pointers to the pages pinned. 210 * Should be at least nr_pages long. 211 * 212 * Attempt to pin user pages in memory without taking mm->mmap_sem. 213 * If not successful, it will fall back to taking the lock and 214 * calling get_user_pages(). 215 * 216 * Returns number of pages pinned. This may be fewer than the number 217 * requested. If nr_pages is 0 or negative, returns 0. If no pages 218 * were pinned, returns -errno. 219 */ 220int get_user_pages_fast(unsigned long start, int nr_pages, int write, 221 struct page **pages) 222{ 223 struct mm_struct *mm = current->mm; 224 int nr, ret; 225 226 start &= PAGE_MASK; 227 nr = __get_user_pages_fast(start, nr_pages, write, pages); 228 if (nr == nr_pages) 229 return nr; 230 231 /* Try to get the remaining pages with get_user_pages */ 232 start += nr << PAGE_SHIFT; 233 pages += nr; 234 ret = get_user_pages_unlocked(current, mm, start, 235 nr_pages - nr, write, 0, pages); 236 /* Have to be a bit careful with return values */ 237 if (nr > 0) 238 ret = (ret < 0) ? nr : ret + nr; 239 return ret; 240} 241