1/* 2 * fs/logfs/dev_bdev.c - Device access methods for block devices 3 * 4 * As should be obvious for Linux kernel code, license is GPLv2 5 * 6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> 7 */ 8#include "logfs.h" 9#include <linux/bio.h> 10#include <linux/blkdev.h> 11#include <linux/buffer_head.h> 12#include <linux/gfp.h> 13#include <linux/prefetch.h> 14 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 16 17static int sync_request(struct page *page, struct block_device *bdev, int rw) 18{ 19 struct bio bio; 20 struct bio_vec bio_vec; 21 22 bio_init(&bio); 23 bio.bi_max_vecs = 1; 24 bio.bi_io_vec = &bio_vec; 25 bio_vec.bv_page = page; 26 bio_vec.bv_len = PAGE_SIZE; 27 bio_vec.bv_offset = 0; 28 bio.bi_vcnt = 1; 29 bio.bi_bdev = bdev; 30 bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9); 31 bio.bi_iter.bi_size = PAGE_SIZE; 32 33 return submit_bio_wait(rw, &bio); 34} 35 36static int bdev_readpage(void *_sb, struct page *page) 37{ 38 struct super_block *sb = _sb; 39 struct block_device *bdev = logfs_super(sb)->s_bdev; 40 int err; 41 42 err = sync_request(page, bdev, READ); 43 if (err) { 44 ClearPageUptodate(page); 45 SetPageError(page); 46 } else { 47 SetPageUptodate(page); 48 ClearPageError(page); 49 } 50 unlock_page(page); 51 return err; 52} 53 54static DECLARE_WAIT_QUEUE_HEAD(wq); 55 56static void writeseg_end_io(struct bio *bio) 57{ 58 struct bio_vec *bvec; 59 int i; 60 struct super_block *sb = bio->bi_private; 61 struct logfs_super *super = logfs_super(sb); 62 63 BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ 64 65 bio_for_each_segment_all(bvec, bio, i) { 66 end_page_writeback(bvec->bv_page); 67 page_cache_release(bvec->bv_page); 68 } 69 bio_put(bio); 70 if (atomic_dec_and_test(&super->s_pending_writes)) 71 wake_up(&wq); 72} 73 74static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, 75 size_t nr_pages) 76{ 77 struct logfs_super *super = logfs_super(sb); 78 struct address_space *mapping = super->s_mapping_inode->i_mapping; 79 struct bio *bio; 80 struct page *page; 81 unsigned int max_pages; 82 int i; 83 84 max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); 85 86 bio = bio_alloc(GFP_NOFS, max_pages); 87 BUG_ON(!bio); 88 89 for (i = 0; i < nr_pages; i++) { 90 if (i >= max_pages) { 91 /* Block layer cannot split bios :( */ 92 bio->bi_vcnt = i; 93 bio->bi_iter.bi_size = i * PAGE_SIZE; 94 bio->bi_bdev = super->s_bdev; 95 bio->bi_iter.bi_sector = ofs >> 9; 96 bio->bi_private = sb; 97 bio->bi_end_io = writeseg_end_io; 98 atomic_inc(&super->s_pending_writes); 99 submit_bio(WRITE, bio); 100 101 ofs += i * PAGE_SIZE; 102 index += i; 103 nr_pages -= i; 104 i = 0; 105 106 bio = bio_alloc(GFP_NOFS, max_pages); 107 BUG_ON(!bio); 108 } 109 page = find_lock_page(mapping, index + i); 110 BUG_ON(!page); 111 bio->bi_io_vec[i].bv_page = page; 112 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 113 bio->bi_io_vec[i].bv_offset = 0; 114 115 BUG_ON(PageWriteback(page)); 116 set_page_writeback(page); 117 unlock_page(page); 118 } 119 bio->bi_vcnt = nr_pages; 120 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 121 bio->bi_bdev = super->s_bdev; 122 bio->bi_iter.bi_sector = ofs >> 9; 123 bio->bi_private = sb; 124 bio->bi_end_io = writeseg_end_io; 125 atomic_inc(&super->s_pending_writes); 126 submit_bio(WRITE, bio); 127 return 0; 128} 129 130static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) 131{ 132 struct logfs_super *super = logfs_super(sb); 133 int head; 134 135 BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO); 136 137 if (len == 0) { 138 /* This can happen when the object fit perfectly into a 139 * segment, the segment gets written per sync and subsequently 140 * closed. 141 */ 142 return; 143 } 144 head = ofs & (PAGE_SIZE - 1); 145 if (head) { 146 ofs -= head; 147 len += head; 148 } 149 len = PAGE_ALIGN(len); 150 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 151} 152 153 154static void erase_end_io(struct bio *bio) 155{ 156 struct super_block *sb = bio->bi_private; 157 struct logfs_super *super = logfs_super(sb); 158 159 BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ 160 BUG_ON(bio->bi_vcnt == 0); 161 bio_put(bio); 162 if (atomic_dec_and_test(&super->s_pending_writes)) 163 wake_up(&wq); 164} 165 166static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, 167 size_t nr_pages) 168{ 169 struct logfs_super *super = logfs_super(sb); 170 struct bio *bio; 171 unsigned int max_pages; 172 int i; 173 174 max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); 175 176 bio = bio_alloc(GFP_NOFS, max_pages); 177 BUG_ON(!bio); 178 179 for (i = 0; i < nr_pages; i++) { 180 if (i >= max_pages) { 181 /* Block layer cannot split bios :( */ 182 bio->bi_vcnt = i; 183 bio->bi_iter.bi_size = i * PAGE_SIZE; 184 bio->bi_bdev = super->s_bdev; 185 bio->bi_iter.bi_sector = ofs >> 9; 186 bio->bi_private = sb; 187 bio->bi_end_io = erase_end_io; 188 atomic_inc(&super->s_pending_writes); 189 submit_bio(WRITE, bio); 190 191 ofs += i * PAGE_SIZE; 192 index += i; 193 nr_pages -= i; 194 i = 0; 195 196 bio = bio_alloc(GFP_NOFS, max_pages); 197 BUG_ON(!bio); 198 } 199 bio->bi_io_vec[i].bv_page = super->s_erase_page; 200 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 201 bio->bi_io_vec[i].bv_offset = 0; 202 } 203 bio->bi_vcnt = nr_pages; 204 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 205 bio->bi_bdev = super->s_bdev; 206 bio->bi_iter.bi_sector = ofs >> 9; 207 bio->bi_private = sb; 208 bio->bi_end_io = erase_end_io; 209 atomic_inc(&super->s_pending_writes); 210 submit_bio(WRITE, bio); 211 return 0; 212} 213 214static int bdev_erase(struct super_block *sb, loff_t to, size_t len, 215 int ensure_write) 216{ 217 struct logfs_super *super = logfs_super(sb); 218 219 BUG_ON(to & (PAGE_SIZE - 1)); 220 BUG_ON(len & (PAGE_SIZE - 1)); 221 222 if (super->s_flags & LOGFS_SB_FLAG_RO) 223 return -EROFS; 224 225 if (ensure_write) { 226 /* 227 * Object store doesn't care whether erases happen or not. 228 * But for the journal they are required. Otherwise a scan 229 * can find an old commit entry and assume it is the current 230 * one, travelling back in time. 231 */ 232 do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT); 233 } 234 235 return 0; 236} 237 238static void bdev_sync(struct super_block *sb) 239{ 240 struct logfs_super *super = logfs_super(sb); 241 242 wait_event(wq, atomic_read(&super->s_pending_writes) == 0); 243} 244 245static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs) 246{ 247 struct logfs_super *super = logfs_super(sb); 248 struct address_space *mapping = super->s_mapping_inode->i_mapping; 249 filler_t *filler = bdev_readpage; 250 251 *ofs = 0; 252 return read_cache_page(mapping, 0, filler, sb); 253} 254 255static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs) 256{ 257 struct logfs_super *super = logfs_super(sb); 258 struct address_space *mapping = super->s_mapping_inode->i_mapping; 259 filler_t *filler = bdev_readpage; 260 u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000; 261 pgoff_t index = pos >> PAGE_SHIFT; 262 263 *ofs = pos; 264 return read_cache_page(mapping, index, filler, sb); 265} 266 267static int bdev_write_sb(struct super_block *sb, struct page *page) 268{ 269 struct block_device *bdev = logfs_super(sb)->s_bdev; 270 271 /* Nothing special to do for block devices. */ 272 return sync_request(page, bdev, WRITE); 273} 274 275static void bdev_put_device(struct logfs_super *s) 276{ 277 blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 278} 279 280static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 281{ 282 return 0; 283} 284 285static const struct logfs_device_ops bd_devops = { 286 .find_first_sb = bdev_find_first_sb, 287 .find_last_sb = bdev_find_last_sb, 288 .write_sb = bdev_write_sb, 289 .readpage = bdev_readpage, 290 .writeseg = bdev_writeseg, 291 .erase = bdev_erase, 292 .can_write_buf = bdev_can_write_buf, 293 .sync = bdev_sync, 294 .put_device = bdev_put_device, 295}; 296 297int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, 298 const char *devname) 299{ 300 struct block_device *bdev; 301 302 bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 303 type); 304 if (IS_ERR(bdev)) 305 return PTR_ERR(bdev); 306 307 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 308 int mtdnr = MINOR(bdev->bd_dev); 309 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 310 return logfs_get_sb_mtd(p, mtdnr); 311 } 312 313 p->s_bdev = bdev; 314 p->s_mtd = NULL; 315 p->s_devops = &bd_devops; 316 return 0; 317} 318