1/* 2 * fs/logfs/dev_bdev.c - Device access methods for block devices 3 * 4 * As should be obvious for Linux kernel code, license is GPLv2 5 * 6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> 7 */ 8#include "logfs.h" 9#include <linux/bio.h> 10#include <linux/blkdev.h> 11#include <linux/buffer_head.h> 12#include <linux/gfp.h> 13#include <linux/prefetch.h> 14 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 16 17static int sync_request(struct page *page, struct block_device *bdev, int rw) 18{ 19 struct bio bio; 20 struct bio_vec bio_vec; 21 22 bio_init(&bio); 23 bio.bi_max_vecs = 1; 24 bio.bi_io_vec = &bio_vec; 25 bio_vec.bv_page = page; 26 bio_vec.bv_len = PAGE_SIZE; 27 bio_vec.bv_offset = 0; 28 bio.bi_vcnt = 1; 29 bio.bi_bdev = bdev; 30 bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9); 31 bio.bi_iter.bi_size = PAGE_SIZE; 32 33 return submit_bio_wait(rw, &bio); 34} 35 36static int bdev_readpage(void *_sb, struct page *page) 37{ 38 struct super_block *sb = _sb; 39 struct block_device *bdev = logfs_super(sb)->s_bdev; 40 int err; 41 42 err = sync_request(page, bdev, READ); 43 if (err) { 44 ClearPageUptodate(page); 45 SetPageError(page); 46 } else { 47 SetPageUptodate(page); 48 ClearPageError(page); 49 } 50 unlock_page(page); 51 return err; 52} 53 54static DECLARE_WAIT_QUEUE_HEAD(wq); 55 56static void writeseg_end_io(struct bio *bio, int err) 57{ 58 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 59 struct bio_vec *bvec; 60 int i; 61 struct super_block *sb = bio->bi_private; 62 struct logfs_super *super = logfs_super(sb); 63 64 BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ 65 BUG_ON(err); 66 67 bio_for_each_segment_all(bvec, bio, i) { 68 end_page_writeback(bvec->bv_page); 69 page_cache_release(bvec->bv_page); 70 } 71 bio_put(bio); 72 if (atomic_dec_and_test(&super->s_pending_writes)) 73 wake_up(&wq); 74} 75 76static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, 77 size_t nr_pages) 78{ 79 struct logfs_super *super = logfs_super(sb); 80 struct address_space *mapping = super->s_mapping_inode->i_mapping; 81 struct bio *bio; 82 struct page *page; 83 unsigned int max_pages; 84 int i; 85 86 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); 87 88 bio = bio_alloc(GFP_NOFS, max_pages); 89 BUG_ON(!bio); 90 91 for (i = 0; i < nr_pages; i++) { 92 if (i >= max_pages) { 93 /* Block layer cannot split bios :( */ 94 bio->bi_vcnt = i; 95 bio->bi_iter.bi_size = i * PAGE_SIZE; 96 bio->bi_bdev = super->s_bdev; 97 bio->bi_iter.bi_sector = ofs >> 9; 98 bio->bi_private = sb; 99 bio->bi_end_io = writeseg_end_io; 100 atomic_inc(&super->s_pending_writes); 101 submit_bio(WRITE, bio); 102 103 ofs += i * PAGE_SIZE; 104 index += i; 105 nr_pages -= i; 106 i = 0; 107 108 bio = bio_alloc(GFP_NOFS, max_pages); 109 BUG_ON(!bio); 110 } 111 page = find_lock_page(mapping, index + i); 112 BUG_ON(!page); 113 bio->bi_io_vec[i].bv_page = page; 114 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 115 bio->bi_io_vec[i].bv_offset = 0; 116 117 BUG_ON(PageWriteback(page)); 118 set_page_writeback(page); 119 unlock_page(page); 120 } 121 bio->bi_vcnt = nr_pages; 122 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 123 bio->bi_bdev = super->s_bdev; 124 bio->bi_iter.bi_sector = ofs >> 9; 125 bio->bi_private = sb; 126 bio->bi_end_io = writeseg_end_io; 127 atomic_inc(&super->s_pending_writes); 128 submit_bio(WRITE, bio); 129 return 0; 130} 131 132static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) 133{ 134 struct logfs_super *super = logfs_super(sb); 135 int head; 136 137 BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO); 138 139 if (len == 0) { 140 /* This can happen when the object fit perfectly into a 141 * segment, the segment gets written per sync and subsequently 142 * closed. 143 */ 144 return; 145 } 146 head = ofs & (PAGE_SIZE - 1); 147 if (head) { 148 ofs -= head; 149 len += head; 150 } 151 len = PAGE_ALIGN(len); 152 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 153} 154 155 156static void erase_end_io(struct bio *bio, int err) 157{ 158 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 159 struct super_block *sb = bio->bi_private; 160 struct logfs_super *super = logfs_super(sb); 161 162 BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ 163 BUG_ON(err); 164 BUG_ON(bio->bi_vcnt == 0); 165 bio_put(bio); 166 if (atomic_dec_and_test(&super->s_pending_writes)) 167 wake_up(&wq); 168} 169 170static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, 171 size_t nr_pages) 172{ 173 struct logfs_super *super = logfs_super(sb); 174 struct bio *bio; 175 unsigned int max_pages; 176 int i; 177 178 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); 179 180 bio = bio_alloc(GFP_NOFS, max_pages); 181 BUG_ON(!bio); 182 183 for (i = 0; i < nr_pages; i++) { 184 if (i >= max_pages) { 185 /* Block layer cannot split bios :( */ 186 bio->bi_vcnt = i; 187 bio->bi_iter.bi_size = i * PAGE_SIZE; 188 bio->bi_bdev = super->s_bdev; 189 bio->bi_iter.bi_sector = ofs >> 9; 190 bio->bi_private = sb; 191 bio->bi_end_io = erase_end_io; 192 atomic_inc(&super->s_pending_writes); 193 submit_bio(WRITE, bio); 194 195 ofs += i * PAGE_SIZE; 196 index += i; 197 nr_pages -= i; 198 i = 0; 199 200 bio = bio_alloc(GFP_NOFS, max_pages); 201 BUG_ON(!bio); 202 } 203 bio->bi_io_vec[i].bv_page = super->s_erase_page; 204 bio->bi_io_vec[i].bv_len = PAGE_SIZE; 205 bio->bi_io_vec[i].bv_offset = 0; 206 } 207 bio->bi_vcnt = nr_pages; 208 bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; 209 bio->bi_bdev = super->s_bdev; 210 bio->bi_iter.bi_sector = ofs >> 9; 211 bio->bi_private = sb; 212 bio->bi_end_io = erase_end_io; 213 atomic_inc(&super->s_pending_writes); 214 submit_bio(WRITE, bio); 215 return 0; 216} 217 218static int bdev_erase(struct super_block *sb, loff_t to, size_t len, 219 int ensure_write) 220{ 221 struct logfs_super *super = logfs_super(sb); 222 223 BUG_ON(to & (PAGE_SIZE - 1)); 224 BUG_ON(len & (PAGE_SIZE - 1)); 225 226 if (super->s_flags & LOGFS_SB_FLAG_RO) 227 return -EROFS; 228 229 if (ensure_write) { 230 /* 231 * Object store doesn't care whether erases happen or not. 232 * But for the journal they are required. Otherwise a scan 233 * can find an old commit entry and assume it is the current 234 * one, travelling back in time. 235 */ 236 do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT); 237 } 238 239 return 0; 240} 241 242static void bdev_sync(struct super_block *sb) 243{ 244 struct logfs_super *super = logfs_super(sb); 245 246 wait_event(wq, atomic_read(&super->s_pending_writes) == 0); 247} 248 249static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs) 250{ 251 struct logfs_super *super = logfs_super(sb); 252 struct address_space *mapping = super->s_mapping_inode->i_mapping; 253 filler_t *filler = bdev_readpage; 254 255 *ofs = 0; 256 return read_cache_page(mapping, 0, filler, sb); 257} 258 259static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs) 260{ 261 struct logfs_super *super = logfs_super(sb); 262 struct address_space *mapping = super->s_mapping_inode->i_mapping; 263 filler_t *filler = bdev_readpage; 264 u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000; 265 pgoff_t index = pos >> PAGE_SHIFT; 266 267 *ofs = pos; 268 return read_cache_page(mapping, index, filler, sb); 269} 270 271static int bdev_write_sb(struct super_block *sb, struct page *page) 272{ 273 struct block_device *bdev = logfs_super(sb)->s_bdev; 274 275 /* Nothing special to do for block devices. */ 276 return sync_request(page, bdev, WRITE); 277} 278 279static void bdev_put_device(struct logfs_super *s) 280{ 281 blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 282} 283 284static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 285{ 286 return 0; 287} 288 289static const struct logfs_device_ops bd_devops = { 290 .find_first_sb = bdev_find_first_sb, 291 .find_last_sb = bdev_find_last_sb, 292 .write_sb = bdev_write_sb, 293 .readpage = bdev_readpage, 294 .writeseg = bdev_writeseg, 295 .erase = bdev_erase, 296 .can_write_buf = bdev_can_write_buf, 297 .sync = bdev_sync, 298 .put_device = bdev_put_device, 299}; 300 301int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, 302 const char *devname) 303{ 304 struct block_device *bdev; 305 306 bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 307 type); 308 if (IS_ERR(bdev)) 309 return PTR_ERR(bdev); 310 311 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 312 int mtdnr = MINOR(bdev->bd_dev); 313 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 314 return logfs_get_sb_mtd(p, mtdnr); 315 } 316 317 p->s_bdev = bdev; 318 p->s_mtd = NULL; 319 p->s_devops = &bd_devops; 320 return 0; 321} 322