This source file includes following definitions.
- BDEV_I
- I_BDEV
- bdev_write_inode
- kill_bdev
- invalidate_bdev
- set_init_blocksize
- set_blocksize
- sb_set_blocksize
- sb_min_blocksize
- blkdev_get_block
- bdev_file_inode
- dio_bio_write_op
- blkdev_bio_end_io_simple
- __blkdev_direct_IO_simple
- blkdev_iopoll
- blkdev_bio_end_io
- __blkdev_direct_IO
- blkdev_direct_IO
- blkdev_init
- __sync_blockdev
- sync_blockdev
- fsync_bdev
- freeze_bdev
- thaw_bdev
- blkdev_writepage
- blkdev_readpage
- blkdev_readpages
- blkdev_write_begin
- blkdev_write_end
- block_llseek
- blkdev_fsync
- bdev_read_page
- bdev_write_page
- bdev_alloc_inode
- bdev_free_inode
- init_once
- bdev_evict_inode
- bd_init_fs_context
- bdev_cache_init
- hash
- bdev_test
- bdev_set
- bdev_unhash_inode
- bdget
- bdgrab
- nr_blockdev_pages
- bdput
- bd_acquire
- bd_forget
- bd_may_claim
- bd_prepare_to_claim
- bdev_get_gendisk
- bd_start_claiming
- bd_clear_claiming
- bd_finish_claiming
- bd_abort_claiming
- bd_find_holder_disk
- add_symlink
- del_symlink
- bd_link_disk_holder
- bd_unlink_disk_holder
- flush_disk
- check_disk_size_change
- revalidate_disk
- check_disk_change
- bd_set_size
- bdev_disk_changed
- __blkdev_get
- blkdev_get
- blkdev_get_by_path
- blkdev_get_by_dev
- blkdev_open
- __blkdev_put
- blkdev_put
- blkdev_close
- block_ioctl
- blkdev_write_iter
- blkdev_read_iter
- blkdev_releasepage
- blkdev_writepages
- blkdev_fallocate
- ioctl_by_bdev
- lookup_bdev
- __invalidate_device
- iterate_bdevs
1
2
3
4
5
6
7
8
9 #include <linux/init.h>
10 #include <linux/mm.h>
11 #include <linux/fcntl.h>
12 #include <linux/slab.h>
13 #include <linux/kmod.h>
14 #include <linux/major.h>
15 #include <linux/device_cgroup.h>
16 #include <linux/highmem.h>
17 #include <linux/blkdev.h>
18 #include <linux/backing-dev.h>
19 #include <linux/module.h>
20 #include <linux/blkpg.h>
21 #include <linux/magic.h>
22 #include <linux/dax.h>
23 #include <linux/buffer_head.h>
24 #include <linux/swap.h>
25 #include <linux/pagevec.h>
26 #include <linux/writeback.h>
27 #include <linux/mpage.h>
28 #include <linux/mount.h>
29 #include <linux/pseudo_fs.h>
30 #include <linux/uio.h>
31 #include <linux/namei.h>
32 #include <linux/log2.h>
33 #include <linux/cleancache.h>
34 #include <linux/task_io_accounting_ops.h>
35 #include <linux/falloc.h>
36 #include <linux/uaccess.h>
37 #include <linux/suspend.h>
38 #include "internal.h"
39
40 struct bdev_inode {
41 struct block_device bdev;
42 struct inode vfs_inode;
43 };
44
45 static const struct address_space_operations def_blk_aops;
46
47 static inline struct bdev_inode *BDEV_I(struct inode *inode)
48 {
49 return container_of(inode, struct bdev_inode, vfs_inode);
50 }
51
52 struct block_device *I_BDEV(struct inode *inode)
53 {
54 return &BDEV_I(inode)->bdev;
55 }
56 EXPORT_SYMBOL(I_BDEV);
57
58 static void bdev_write_inode(struct block_device *bdev)
59 {
60 struct inode *inode = bdev->bd_inode;
61 int ret;
62
63 spin_lock(&inode->i_lock);
64 while (inode->i_state & I_DIRTY) {
65 spin_unlock(&inode->i_lock);
66 ret = write_inode_now(inode, true);
67 if (ret) {
68 char name[BDEVNAME_SIZE];
69 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
70 "for block device %s (err=%d).\n",
71 bdevname(bdev, name), ret);
72 }
73 spin_lock(&inode->i_lock);
74 }
75 spin_unlock(&inode->i_lock);
76 }
77
78
79 void kill_bdev(struct block_device *bdev)
80 {
81 struct address_space *mapping = bdev->bd_inode->i_mapping;
82
83 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
84 return;
85
86 invalidate_bh_lrus();
87 truncate_inode_pages(mapping, 0);
88 }
89 EXPORT_SYMBOL(kill_bdev);
90
91
92 void invalidate_bdev(struct block_device *bdev)
93 {
94 struct address_space *mapping = bdev->bd_inode->i_mapping;
95
96 if (mapping->nrpages) {
97 invalidate_bh_lrus();
98 lru_add_drain_all();
99 invalidate_mapping_pages(mapping, 0, -1);
100 }
101
102
103
104 cleancache_invalidate_inode(mapping);
105 }
106 EXPORT_SYMBOL(invalidate_bdev);
107
108 static void set_init_blocksize(struct block_device *bdev)
109 {
110 unsigned bsize = bdev_logical_block_size(bdev);
111 loff_t size = i_size_read(bdev->bd_inode);
112
113 while (bsize < PAGE_SIZE) {
114 if (size & bsize)
115 break;
116 bsize <<= 1;
117 }
118 bdev->bd_block_size = bsize;
119 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
120 }
121
122 int set_blocksize(struct block_device *bdev, int size)
123 {
124
125 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
126 return -EINVAL;
127
128
129 if (size < bdev_logical_block_size(bdev))
130 return -EINVAL;
131
132
133 if (bdev->bd_block_size != size) {
134 sync_blockdev(bdev);
135 bdev->bd_block_size = size;
136 bdev->bd_inode->i_blkbits = blksize_bits(size);
137 kill_bdev(bdev);
138 }
139 return 0;
140 }
141
142 EXPORT_SYMBOL(set_blocksize);
143
144 int sb_set_blocksize(struct super_block *sb, int size)
145 {
146 if (set_blocksize(sb->s_bdev, size))
147 return 0;
148
149
150 sb->s_blocksize = size;
151 sb->s_blocksize_bits = blksize_bits(size);
152 return sb->s_blocksize;
153 }
154
155 EXPORT_SYMBOL(sb_set_blocksize);
156
157 int sb_min_blocksize(struct super_block *sb, int size)
158 {
159 int minsize = bdev_logical_block_size(sb->s_bdev);
160 if (size < minsize)
161 size = minsize;
162 return sb_set_blocksize(sb, size);
163 }
164
165 EXPORT_SYMBOL(sb_min_blocksize);
166
167 static int
168 blkdev_get_block(struct inode *inode, sector_t iblock,
169 struct buffer_head *bh, int create)
170 {
171 bh->b_bdev = I_BDEV(inode);
172 bh->b_blocknr = iblock;
173 set_buffer_mapped(bh);
174 return 0;
175 }
176
177 static struct inode *bdev_file_inode(struct file *file)
178 {
179 return file->f_mapping->host;
180 }
181
182 static unsigned int dio_bio_write_op(struct kiocb *iocb)
183 {
184 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
185
186
187 if (iocb->ki_flags & IOCB_DSYNC)
188 op |= REQ_FUA;
189 return op;
190 }
191
192 #define DIO_INLINE_BIO_VECS 4
193
194 static void blkdev_bio_end_io_simple(struct bio *bio)
195 {
196 struct task_struct *waiter = bio->bi_private;
197
198 WRITE_ONCE(bio->bi_private, NULL);
199 blk_wake_io_task(waiter);
200 }
201
202 static ssize_t
203 __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
204 int nr_pages)
205 {
206 struct file *file = iocb->ki_filp;
207 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
208 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
209 loff_t pos = iocb->ki_pos;
210 bool should_dirty = false;
211 struct bio bio;
212 ssize_t ret;
213 blk_qc_t qc;
214
215 if ((pos | iov_iter_alignment(iter)) &
216 (bdev_logical_block_size(bdev) - 1))
217 return -EINVAL;
218
219 if (nr_pages <= DIO_INLINE_BIO_VECS)
220 vecs = inline_vecs;
221 else {
222 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
223 GFP_KERNEL);
224 if (!vecs)
225 return -ENOMEM;
226 }
227
228 bio_init(&bio, vecs, nr_pages);
229 bio_set_dev(&bio, bdev);
230 bio.bi_iter.bi_sector = pos >> 9;
231 bio.bi_write_hint = iocb->ki_hint;
232 bio.bi_private = current;
233 bio.bi_end_io = blkdev_bio_end_io_simple;
234 bio.bi_ioprio = iocb->ki_ioprio;
235
236 ret = bio_iov_iter_get_pages(&bio, iter);
237 if (unlikely(ret))
238 goto out;
239 ret = bio.bi_iter.bi_size;
240
241 if (iov_iter_rw(iter) == READ) {
242 bio.bi_opf = REQ_OP_READ;
243 if (iter_is_iovec(iter))
244 should_dirty = true;
245 } else {
246 bio.bi_opf = dio_bio_write_op(iocb);
247 task_io_account_write(ret);
248 }
249 if (iocb->ki_flags & IOCB_HIPRI)
250 bio_set_polled(&bio, iocb);
251
252 qc = submit_bio(&bio);
253 for (;;) {
254 set_current_state(TASK_UNINTERRUPTIBLE);
255 if (!READ_ONCE(bio.bi_private))
256 break;
257 if (!(iocb->ki_flags & IOCB_HIPRI) ||
258 !blk_poll(bdev_get_queue(bdev), qc, true))
259 io_schedule();
260 }
261 __set_current_state(TASK_RUNNING);
262
263 bio_release_pages(&bio, should_dirty);
264 if (unlikely(bio.bi_status))
265 ret = blk_status_to_errno(bio.bi_status);
266
267 out:
268 if (vecs != inline_vecs)
269 kfree(vecs);
270
271 bio_uninit(&bio);
272
273 return ret;
274 }
275
276 struct blkdev_dio {
277 union {
278 struct kiocb *iocb;
279 struct task_struct *waiter;
280 };
281 size_t size;
282 atomic_t ref;
283 bool multi_bio : 1;
284 bool should_dirty : 1;
285 bool is_sync : 1;
286 struct bio bio;
287 };
288
289 static struct bio_set blkdev_dio_pool;
290
291 static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
292 {
293 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
294 struct request_queue *q = bdev_get_queue(bdev);
295
296 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
297 }
298
299 static void blkdev_bio_end_io(struct bio *bio)
300 {
301 struct blkdev_dio *dio = bio->bi_private;
302 bool should_dirty = dio->should_dirty;
303
304 if (bio->bi_status && !dio->bio.bi_status)
305 dio->bio.bi_status = bio->bi_status;
306
307 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
308 if (!dio->is_sync) {
309 struct kiocb *iocb = dio->iocb;
310 ssize_t ret;
311
312 if (likely(!dio->bio.bi_status)) {
313 ret = dio->size;
314 iocb->ki_pos += ret;
315 } else {
316 ret = blk_status_to_errno(dio->bio.bi_status);
317 }
318
319 dio->iocb->ki_complete(iocb, ret, 0);
320 if (dio->multi_bio)
321 bio_put(&dio->bio);
322 } else {
323 struct task_struct *waiter = dio->waiter;
324
325 WRITE_ONCE(dio->waiter, NULL);
326 blk_wake_io_task(waiter);
327 }
328 }
329
330 if (should_dirty) {
331 bio_check_pages_dirty(bio);
332 } else {
333 bio_release_pages(bio, false);
334 bio_put(bio);
335 }
336 }
337
338 static ssize_t
339 __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
340 {
341 struct file *file = iocb->ki_filp;
342 struct inode *inode = bdev_file_inode(file);
343 struct block_device *bdev = I_BDEV(inode);
344 struct blk_plug plug;
345 struct blkdev_dio *dio;
346 struct bio *bio;
347 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
348 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
349 loff_t pos = iocb->ki_pos;
350 blk_qc_t qc = BLK_QC_T_NONE;
351 int ret = 0;
352
353 if ((pos | iov_iter_alignment(iter)) &
354 (bdev_logical_block_size(bdev) - 1))
355 return -EINVAL;
356
357 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
358
359 dio = container_of(bio, struct blkdev_dio, bio);
360 dio->is_sync = is_sync = is_sync_kiocb(iocb);
361 if (dio->is_sync) {
362 dio->waiter = current;
363 bio_get(bio);
364 } else {
365 dio->iocb = iocb;
366 }
367
368 dio->size = 0;
369 dio->multi_bio = false;
370 dio->should_dirty = is_read && iter_is_iovec(iter);
371
372
373
374
375
376 if (!is_poll)
377 blk_start_plug(&plug);
378
379 for (;;) {
380 bio_set_dev(bio, bdev);
381 bio->bi_iter.bi_sector = pos >> 9;
382 bio->bi_write_hint = iocb->ki_hint;
383 bio->bi_private = dio;
384 bio->bi_end_io = blkdev_bio_end_io;
385 bio->bi_ioprio = iocb->ki_ioprio;
386
387 ret = bio_iov_iter_get_pages(bio, iter);
388 if (unlikely(ret)) {
389 bio->bi_status = BLK_STS_IOERR;
390 bio_endio(bio);
391 break;
392 }
393
394 if (is_read) {
395 bio->bi_opf = REQ_OP_READ;
396 if (dio->should_dirty)
397 bio_set_pages_dirty(bio);
398 } else {
399 bio->bi_opf = dio_bio_write_op(iocb);
400 task_io_account_write(bio->bi_iter.bi_size);
401 }
402
403 dio->size += bio->bi_iter.bi_size;
404 pos += bio->bi_iter.bi_size;
405
406 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
407 if (!nr_pages) {
408 bool polled = false;
409
410 if (iocb->ki_flags & IOCB_HIPRI) {
411 bio_set_polled(bio, iocb);
412 polled = true;
413 }
414
415 qc = submit_bio(bio);
416
417 if (polled)
418 WRITE_ONCE(iocb->ki_cookie, qc);
419 break;
420 }
421
422 if (!dio->multi_bio) {
423
424
425
426
427
428 if (!is_sync)
429 bio_get(bio);
430 dio->multi_bio = true;
431 atomic_set(&dio->ref, 2);
432 } else {
433 atomic_inc(&dio->ref);
434 }
435
436 submit_bio(bio);
437 bio = bio_alloc(GFP_KERNEL, nr_pages);
438 }
439
440 if (!is_poll)
441 blk_finish_plug(&plug);
442
443 if (!is_sync)
444 return -EIOCBQUEUED;
445
446 for (;;) {
447 set_current_state(TASK_UNINTERRUPTIBLE);
448 if (!READ_ONCE(dio->waiter))
449 break;
450
451 if (!(iocb->ki_flags & IOCB_HIPRI) ||
452 !blk_poll(bdev_get_queue(bdev), qc, true))
453 io_schedule();
454 }
455 __set_current_state(TASK_RUNNING);
456
457 if (!ret)
458 ret = blk_status_to_errno(dio->bio.bi_status);
459 if (likely(!ret))
460 ret = dio->size;
461
462 bio_put(&dio->bio);
463 return ret;
464 }
465
466 static ssize_t
467 blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
468 {
469 int nr_pages;
470
471 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
472 if (!nr_pages)
473 return 0;
474 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
475 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
476
477 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
478 }
479
480 static __init int blkdev_init(void)
481 {
482 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
483 }
484 module_init(blkdev_init);
485
486 int __sync_blockdev(struct block_device *bdev, int wait)
487 {
488 if (!bdev)
489 return 0;
490 if (!wait)
491 return filemap_flush(bdev->bd_inode->i_mapping);
492 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
493 }
494
495
496
497
498
499 int sync_blockdev(struct block_device *bdev)
500 {
501 return __sync_blockdev(bdev, 1);
502 }
503 EXPORT_SYMBOL(sync_blockdev);
504
505
506
507
508
509
510 int fsync_bdev(struct block_device *bdev)
511 {
512 struct super_block *sb = get_super(bdev);
513 if (sb) {
514 int res = sync_filesystem(sb);
515 drop_super(sb);
516 return res;
517 }
518 return sync_blockdev(bdev);
519 }
520 EXPORT_SYMBOL(fsync_bdev);
521
522
523
524
525
526
527
528
529
530
531
532
533
534 struct super_block *freeze_bdev(struct block_device *bdev)
535 {
536 struct super_block *sb;
537 int error = 0;
538
539 mutex_lock(&bdev->bd_fsfreeze_mutex);
540 if (++bdev->bd_fsfreeze_count > 1) {
541
542
543
544
545
546 sb = get_super(bdev);
547 if (sb)
548 drop_super(sb);
549 mutex_unlock(&bdev->bd_fsfreeze_mutex);
550 return sb;
551 }
552
553 sb = get_active_super(bdev);
554 if (!sb)
555 goto out;
556 if (sb->s_op->freeze_super)
557 error = sb->s_op->freeze_super(sb);
558 else
559 error = freeze_super(sb);
560 if (error) {
561 deactivate_super(sb);
562 bdev->bd_fsfreeze_count--;
563 mutex_unlock(&bdev->bd_fsfreeze_mutex);
564 return ERR_PTR(error);
565 }
566 deactivate_super(sb);
567 out:
568 sync_blockdev(bdev);
569 mutex_unlock(&bdev->bd_fsfreeze_mutex);
570 return sb;
571 }
572 EXPORT_SYMBOL(freeze_bdev);
573
574
575
576
577
578
579
580
581 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
582 {
583 int error = -EINVAL;
584
585 mutex_lock(&bdev->bd_fsfreeze_mutex);
586 if (!bdev->bd_fsfreeze_count)
587 goto out;
588
589 error = 0;
590 if (--bdev->bd_fsfreeze_count > 0)
591 goto out;
592
593 if (!sb)
594 goto out;
595
596 if (sb->s_op->thaw_super)
597 error = sb->s_op->thaw_super(sb);
598 else
599 error = thaw_super(sb);
600 if (error)
601 bdev->bd_fsfreeze_count++;
602 out:
603 mutex_unlock(&bdev->bd_fsfreeze_mutex);
604 return error;
605 }
606 EXPORT_SYMBOL(thaw_bdev);
607
608 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
609 {
610 return block_write_full_page(page, blkdev_get_block, wbc);
611 }
612
613 static int blkdev_readpage(struct file * file, struct page * page)
614 {
615 return block_read_full_page(page, blkdev_get_block);
616 }
617
618 static int blkdev_readpages(struct file *file, struct address_space *mapping,
619 struct list_head *pages, unsigned nr_pages)
620 {
621 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
622 }
623
624 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
625 loff_t pos, unsigned len, unsigned flags,
626 struct page **pagep, void **fsdata)
627 {
628 return block_write_begin(mapping, pos, len, flags, pagep,
629 blkdev_get_block);
630 }
631
632 static int blkdev_write_end(struct file *file, struct address_space *mapping,
633 loff_t pos, unsigned len, unsigned copied,
634 struct page *page, void *fsdata)
635 {
636 int ret;
637 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
638
639 unlock_page(page);
640 put_page(page);
641
642 return ret;
643 }
644
645
646
647
648
649
650 static loff_t block_llseek(struct file *file, loff_t offset, int whence)
651 {
652 struct inode *bd_inode = bdev_file_inode(file);
653 loff_t retval;
654
655 inode_lock(bd_inode);
656 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
657 inode_unlock(bd_inode);
658 return retval;
659 }
660
661 int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
662 {
663 struct inode *bd_inode = bdev_file_inode(filp);
664 struct block_device *bdev = I_BDEV(bd_inode);
665 int error;
666
667 error = file_write_and_wait_range(filp, start, end);
668 if (error)
669 return error;
670
671
672
673
674
675
676 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
677 if (error == -EOPNOTSUPP)
678 error = 0;
679
680 return error;
681 }
682 EXPORT_SYMBOL(blkdev_fsync);
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700 int bdev_read_page(struct block_device *bdev, sector_t sector,
701 struct page *page)
702 {
703 const struct block_device_operations *ops = bdev->bd_disk->fops;
704 int result = -EOPNOTSUPP;
705
706 if (!ops->rw_page || bdev_get_integrity(bdev))
707 return result;
708
709 result = blk_queue_enter(bdev->bd_queue, 0);
710 if (result)
711 return result;
712 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
713 REQ_OP_READ);
714 blk_queue_exit(bdev->bd_queue);
715 return result;
716 }
717 EXPORT_SYMBOL_GPL(bdev_read_page);
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738 int bdev_write_page(struct block_device *bdev, sector_t sector,
739 struct page *page, struct writeback_control *wbc)
740 {
741 int result;
742 const struct block_device_operations *ops = bdev->bd_disk->fops;
743
744 if (!ops->rw_page || bdev_get_integrity(bdev))
745 return -EOPNOTSUPP;
746 result = blk_queue_enter(bdev->bd_queue, 0);
747 if (result)
748 return result;
749
750 set_page_writeback(page);
751 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
752 REQ_OP_WRITE);
753 if (result) {
754 end_page_writeback(page);
755 } else {
756 clean_page_buffers(page);
757 unlock_page(page);
758 }
759 blk_queue_exit(bdev->bd_queue);
760 return result;
761 }
762 EXPORT_SYMBOL_GPL(bdev_write_page);
763
764
765
766
767
768 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
769 static struct kmem_cache * bdev_cachep __read_mostly;
770
771 static struct inode *bdev_alloc_inode(struct super_block *sb)
772 {
773 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
774 if (!ei)
775 return NULL;
776 return &ei->vfs_inode;
777 }
778
779 static void bdev_free_inode(struct inode *inode)
780 {
781 kmem_cache_free(bdev_cachep, BDEV_I(inode));
782 }
783
784 static void init_once(void *foo)
785 {
786 struct bdev_inode *ei = (struct bdev_inode *) foo;
787 struct block_device *bdev = &ei->bdev;
788
789 memset(bdev, 0, sizeof(*bdev));
790 mutex_init(&bdev->bd_mutex);
791 INIT_LIST_HEAD(&bdev->bd_list);
792 #ifdef CONFIG_SYSFS
793 INIT_LIST_HEAD(&bdev->bd_holder_disks);
794 #endif
795 bdev->bd_bdi = &noop_backing_dev_info;
796 inode_init_once(&ei->vfs_inode);
797
798 mutex_init(&bdev->bd_fsfreeze_mutex);
799 }
800
801 static void bdev_evict_inode(struct inode *inode)
802 {
803 struct block_device *bdev = &BDEV_I(inode)->bdev;
804 truncate_inode_pages_final(&inode->i_data);
805 invalidate_inode_buffers(inode);
806 clear_inode(inode);
807 spin_lock(&bdev_lock);
808 list_del_init(&bdev->bd_list);
809 spin_unlock(&bdev_lock);
810
811 inode_detach_wb(inode);
812 if (bdev->bd_bdi != &noop_backing_dev_info) {
813 bdi_put(bdev->bd_bdi);
814 bdev->bd_bdi = &noop_backing_dev_info;
815 }
816 }
817
818 static const struct super_operations bdev_sops = {
819 .statfs = simple_statfs,
820 .alloc_inode = bdev_alloc_inode,
821 .free_inode = bdev_free_inode,
822 .drop_inode = generic_delete_inode,
823 .evict_inode = bdev_evict_inode,
824 };
825
826 static int bd_init_fs_context(struct fs_context *fc)
827 {
828 struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
829 if (!ctx)
830 return -ENOMEM;
831 fc->s_iflags |= SB_I_CGROUPWB;
832 ctx->ops = &bdev_sops;
833 return 0;
834 }
835
836 static struct file_system_type bd_type = {
837 .name = "bdev",
838 .init_fs_context = bd_init_fs_context,
839 .kill_sb = kill_anon_super,
840 };
841
842 struct super_block *blockdev_superblock __read_mostly;
843 EXPORT_SYMBOL_GPL(blockdev_superblock);
844
845 void __init bdev_cache_init(void)
846 {
847 int err;
848 static struct vfsmount *bd_mnt;
849
850 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
851 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
852 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
853 init_once);
854 err = register_filesystem(&bd_type);
855 if (err)
856 panic("Cannot register bdev pseudo-fs");
857 bd_mnt = kern_mount(&bd_type);
858 if (IS_ERR(bd_mnt))
859 panic("Cannot create bdev pseudo-fs");
860 blockdev_superblock = bd_mnt->mnt_sb;
861 }
862
863
864
865
866
867
868 static inline unsigned long hash(dev_t dev)
869 {
870 return MAJOR(dev)+MINOR(dev);
871 }
872
873 static int bdev_test(struct inode *inode, void *data)
874 {
875 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
876 }
877
878 static int bdev_set(struct inode *inode, void *data)
879 {
880 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
881 return 0;
882 }
883
884 static LIST_HEAD(all_bdevs);
885
886
887
888
889
890 void bdev_unhash_inode(dev_t dev)
891 {
892 struct inode *inode;
893
894 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
895 if (inode) {
896 remove_inode_hash(inode);
897 iput(inode);
898 }
899 }
900
901 struct block_device *bdget(dev_t dev)
902 {
903 struct block_device *bdev;
904 struct inode *inode;
905
906 inode = iget5_locked(blockdev_superblock, hash(dev),
907 bdev_test, bdev_set, &dev);
908
909 if (!inode)
910 return NULL;
911
912 bdev = &BDEV_I(inode)->bdev;
913
914 if (inode->i_state & I_NEW) {
915 bdev->bd_contains = NULL;
916 bdev->bd_super = NULL;
917 bdev->bd_inode = inode;
918 bdev->bd_block_size = i_blocksize(inode);
919 bdev->bd_part_count = 0;
920 bdev->bd_invalidated = 0;
921 inode->i_mode = S_IFBLK;
922 inode->i_rdev = dev;
923 inode->i_bdev = bdev;
924 inode->i_data.a_ops = &def_blk_aops;
925 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
926 spin_lock(&bdev_lock);
927 list_add(&bdev->bd_list, &all_bdevs);
928 spin_unlock(&bdev_lock);
929 unlock_new_inode(inode);
930 }
931 return bdev;
932 }
933
934 EXPORT_SYMBOL(bdget);
935
936
937
938
939
940 struct block_device *bdgrab(struct block_device *bdev)
941 {
942 ihold(bdev->bd_inode);
943 return bdev;
944 }
945 EXPORT_SYMBOL(bdgrab);
946
947 long nr_blockdev_pages(void)
948 {
949 struct block_device *bdev;
950 long ret = 0;
951 spin_lock(&bdev_lock);
952 list_for_each_entry(bdev, &all_bdevs, bd_list) {
953 ret += bdev->bd_inode->i_mapping->nrpages;
954 }
955 spin_unlock(&bdev_lock);
956 return ret;
957 }
958
959 void bdput(struct block_device *bdev)
960 {
961 iput(bdev->bd_inode);
962 }
963
964 EXPORT_SYMBOL(bdput);
965
966 static struct block_device *bd_acquire(struct inode *inode)
967 {
968 struct block_device *bdev;
969
970 spin_lock(&bdev_lock);
971 bdev = inode->i_bdev;
972 if (bdev && !inode_unhashed(bdev->bd_inode)) {
973 bdgrab(bdev);
974 spin_unlock(&bdev_lock);
975 return bdev;
976 }
977 spin_unlock(&bdev_lock);
978
979
980
981
982
983
984
985 if (bdev)
986 bd_forget(inode);
987
988 bdev = bdget(inode->i_rdev);
989 if (bdev) {
990 spin_lock(&bdev_lock);
991 if (!inode->i_bdev) {
992
993
994
995
996
997
998 bdgrab(bdev);
999 inode->i_bdev = bdev;
1000 inode->i_mapping = bdev->bd_inode->i_mapping;
1001 }
1002 spin_unlock(&bdev_lock);
1003 }
1004 return bdev;
1005 }
1006
1007
1008
1009 void bd_forget(struct inode *inode)
1010 {
1011 struct block_device *bdev = NULL;
1012
1013 spin_lock(&bdev_lock);
1014 if (!sb_is_blkdev_sb(inode->i_sb))
1015 bdev = inode->i_bdev;
1016 inode->i_bdev = NULL;
1017 inode->i_mapping = &inode->i_data;
1018 spin_unlock(&bdev_lock);
1019
1020 if (bdev)
1021 bdput(bdev);
1022 }
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038 static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1039 void *holder)
1040 {
1041 if (bdev->bd_holder == holder)
1042 return true;
1043 else if (bdev->bd_holder != NULL)
1044 return false;
1045 else if (whole == bdev)
1046 return true;
1047
1048 else if (whole->bd_holder == bd_may_claim)
1049 return true;
1050 else if (whole->bd_holder != NULL)
1051 return false;
1052 else
1053 return true;
1054 }
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074 static int bd_prepare_to_claim(struct block_device *bdev,
1075 struct block_device *whole, void *holder)
1076 {
1077 retry:
1078
1079 if (!bd_may_claim(bdev, whole, holder))
1080 return -EBUSY;
1081
1082
1083 if (whole->bd_claiming) {
1084 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1085 DEFINE_WAIT(wait);
1086
1087 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1088 spin_unlock(&bdev_lock);
1089 schedule();
1090 finish_wait(wq, &wait);
1091 spin_lock(&bdev_lock);
1092 goto retry;
1093 }
1094
1095
1096 return 0;
1097 }
1098
1099 static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1100 {
1101 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1102
1103 if (!disk)
1104 return NULL;
1105
1106
1107
1108
1109
1110
1111
1112
1113 if (inode_unhashed(bdev->bd_inode)) {
1114 put_disk_and_module(disk);
1115 return NULL;
1116 }
1117 return disk;
1118 }
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143 struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
1144 {
1145 struct gendisk *disk;
1146 struct block_device *whole;
1147 int partno, err;
1148
1149 might_sleep();
1150
1151
1152
1153
1154
1155 disk = bdev_get_gendisk(bdev, &partno);
1156 if (!disk)
1157 return ERR_PTR(-ENXIO);
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 if (partno)
1168 whole = bdget_disk(disk, 0);
1169 else
1170 whole = bdgrab(bdev);
1171
1172 put_disk_and_module(disk);
1173 if (!whole)
1174 return ERR_PTR(-ENOMEM);
1175
1176
1177 spin_lock(&bdev_lock);
1178
1179 err = bd_prepare_to_claim(bdev, whole, holder);
1180 if (err == 0) {
1181 whole->bd_claiming = holder;
1182 spin_unlock(&bdev_lock);
1183 return whole;
1184 } else {
1185 spin_unlock(&bdev_lock);
1186 bdput(whole);
1187 return ERR_PTR(err);
1188 }
1189 }
1190 EXPORT_SYMBOL(bd_start_claiming);
1191
1192 static void bd_clear_claiming(struct block_device *whole, void *holder)
1193 {
1194 lockdep_assert_held(&bdev_lock);
1195
1196 BUG_ON(whole->bd_claiming != holder);
1197 whole->bd_claiming = NULL;
1198 wake_up_bit(&whole->bd_claiming, 0);
1199 }
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210 void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
1211 void *holder)
1212 {
1213 spin_lock(&bdev_lock);
1214 BUG_ON(!bd_may_claim(bdev, whole, holder));
1215
1216
1217
1218
1219 whole->bd_holders++;
1220 whole->bd_holder = bd_may_claim;
1221 bdev->bd_holders++;
1222 bdev->bd_holder = holder;
1223 bd_clear_claiming(whole, holder);
1224 spin_unlock(&bdev_lock);
1225 }
1226 EXPORT_SYMBOL(bd_finish_claiming);
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238 void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
1239 void *holder)
1240 {
1241 spin_lock(&bdev_lock);
1242 bd_clear_claiming(whole, holder);
1243 spin_unlock(&bdev_lock);
1244 }
1245 EXPORT_SYMBOL(bd_abort_claiming);
1246
1247 #ifdef CONFIG_SYSFS
1248 struct bd_holder_disk {
1249 struct list_head list;
1250 struct gendisk *disk;
1251 int refcnt;
1252 };
1253
1254 static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1255 struct gendisk *disk)
1256 {
1257 struct bd_holder_disk *holder;
1258
1259 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1260 if (holder->disk == disk)
1261 return holder;
1262 return NULL;
1263 }
1264
1265 static int add_symlink(struct kobject *from, struct kobject *to)
1266 {
1267 return sysfs_create_link(from, to, kobject_name(to));
1268 }
1269
1270 static void del_symlink(struct kobject *from, struct kobject *to)
1271 {
1272 sysfs_remove_link(from, kobject_name(to));
1273 }
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1304 {
1305 struct bd_holder_disk *holder;
1306 int ret = 0;
1307
1308 mutex_lock(&bdev->bd_mutex);
1309
1310 WARN_ON_ONCE(!bdev->bd_holder);
1311
1312
1313 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1314 goto out_unlock;
1315
1316 holder = bd_find_holder_disk(bdev, disk);
1317 if (holder) {
1318 holder->refcnt++;
1319 goto out_unlock;
1320 }
1321
1322 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1323 if (!holder) {
1324 ret = -ENOMEM;
1325 goto out_unlock;
1326 }
1327
1328 INIT_LIST_HEAD(&holder->list);
1329 holder->disk = disk;
1330 holder->refcnt = 1;
1331
1332 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1333 if (ret)
1334 goto out_free;
1335
1336 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1337 if (ret)
1338 goto out_del;
1339
1340
1341
1342
1343 kobject_get(bdev->bd_part->holder_dir);
1344
1345 list_add(&holder->list, &bdev->bd_holder_disks);
1346 goto out_unlock;
1347
1348 out_del:
1349 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1350 out_free:
1351 kfree(holder);
1352 out_unlock:
1353 mutex_unlock(&bdev->bd_mutex);
1354 return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1369 {
1370 struct bd_holder_disk *holder;
1371
1372 mutex_lock(&bdev->bd_mutex);
1373
1374 holder = bd_find_holder_disk(bdev, disk);
1375
1376 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1377 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1378 del_symlink(bdev->bd_part->holder_dir,
1379 &disk_to_dev(disk)->kobj);
1380 kobject_put(bdev->bd_part->holder_dir);
1381 list_del_init(&holder->list);
1382 kfree(holder);
1383 }
1384
1385 mutex_unlock(&bdev->bd_mutex);
1386 }
1387 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1388 #endif
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 static void flush_disk(struct block_device *bdev, bool kill_dirty)
1401 {
1402 if (__invalidate_device(bdev, kill_dirty)) {
1403 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1404 "resized disk %s\n",
1405 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1406 }
1407 bdev->bd_invalidated = 1;
1408 }
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
1421 bool verbose)
1422 {
1423 loff_t disk_size, bdev_size;
1424
1425 disk_size = (loff_t)get_capacity(disk) << 9;
1426 bdev_size = i_size_read(bdev->bd_inode);
1427 if (disk_size != bdev_size) {
1428 if (verbose) {
1429 printk(KERN_INFO
1430 "%s: detected capacity change from %lld to %lld\n",
1431 disk->disk_name, bdev_size, disk_size);
1432 }
1433 i_size_write(bdev->bd_inode, disk_size);
1434 if (bdev_size > disk_size)
1435 flush_disk(bdev, false);
1436 }
1437 }
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447 int revalidate_disk(struct gendisk *disk)
1448 {
1449 int ret = 0;
1450
1451 if (disk->fops->revalidate_disk)
1452 ret = disk->fops->revalidate_disk(disk);
1453
1454
1455
1456
1457
1458 if (!(disk->flags & GENHD_FL_HIDDEN)) {
1459 struct block_device *bdev = bdget_disk(disk, 0);
1460
1461 if (!bdev)
1462 return ret;
1463
1464 mutex_lock(&bdev->bd_mutex);
1465 check_disk_size_change(disk, bdev, ret == 0);
1466 bdev->bd_invalidated = 0;
1467 mutex_unlock(&bdev->bd_mutex);
1468 bdput(bdev);
1469 }
1470 return ret;
1471 }
1472 EXPORT_SYMBOL(revalidate_disk);
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483 int check_disk_change(struct block_device *bdev)
1484 {
1485 struct gendisk *disk = bdev->bd_disk;
1486 const struct block_device_operations *bdops = disk->fops;
1487 unsigned int events;
1488
1489 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1490 DISK_EVENT_EJECT_REQUEST);
1491 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1492 return 0;
1493
1494 flush_disk(bdev, true);
1495 if (bdops->revalidate_disk)
1496 bdops->revalidate_disk(bdev->bd_disk);
1497 return 1;
1498 }
1499
1500 EXPORT_SYMBOL(check_disk_change);
1501
1502 void bd_set_size(struct block_device *bdev, loff_t size)
1503 {
1504 inode_lock(bdev->bd_inode);
1505 i_size_write(bdev->bd_inode, size);
1506 inode_unlock(bdev->bd_inode);
1507 }
1508 EXPORT_SYMBOL(bd_set_size);
1509
1510 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1511
1512 static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
1513 {
1514 if (disk_part_scan_enabled(bdev->bd_disk)) {
1515 if (invalidate)
1516 invalidate_partitions(bdev->bd_disk, bdev);
1517 else
1518 rescan_partitions(bdev->bd_disk, bdev);
1519 } else {
1520 check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
1521 bdev->bd_invalidated = 0;
1522 }
1523 }
1524
1525
1526
1527
1528
1529
1530
1531
1532 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1533 {
1534 struct gendisk *disk;
1535 int ret;
1536 int partno;
1537 int perm = 0;
1538 bool first_open = false;
1539
1540 if (mode & FMODE_READ)
1541 perm |= MAY_READ;
1542 if (mode & FMODE_WRITE)
1543 perm |= MAY_WRITE;
1544
1545
1546
1547 if (!for_part) {
1548 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1549 if (ret != 0) {
1550 bdput(bdev);
1551 return ret;
1552 }
1553 }
1554
1555 restart:
1556
1557 ret = -ENXIO;
1558 disk = bdev_get_gendisk(bdev, &partno);
1559 if (!disk)
1560 goto out;
1561
1562 disk_block_events(disk);
1563 mutex_lock_nested(&bdev->bd_mutex, for_part);
1564 if (!bdev->bd_openers) {
1565 first_open = true;
1566 bdev->bd_disk = disk;
1567 bdev->bd_queue = disk->queue;
1568 bdev->bd_contains = bdev;
1569 bdev->bd_partno = partno;
1570
1571 if (!partno) {
1572 ret = -ENXIO;
1573 bdev->bd_part = disk_get_part(disk, partno);
1574 if (!bdev->bd_part)
1575 goto out_clear;
1576
1577 ret = 0;
1578 if (disk->fops->open) {
1579 ret = disk->fops->open(bdev, mode);
1580 if (ret == -ERESTARTSYS) {
1581
1582
1583
1584
1585 disk_put_part(bdev->bd_part);
1586 bdev->bd_part = NULL;
1587 bdev->bd_disk = NULL;
1588 bdev->bd_queue = NULL;
1589 mutex_unlock(&bdev->bd_mutex);
1590 disk_unblock_events(disk);
1591 put_disk_and_module(disk);
1592 goto restart;
1593 }
1594 }
1595
1596 if (!ret) {
1597 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1598 set_init_blocksize(bdev);
1599 }
1600
1601
1602
1603
1604
1605
1606
1607 if (bdev->bd_invalidated &&
1608 (!ret || ret == -ENOMEDIUM))
1609 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1610
1611 if (ret)
1612 goto out_clear;
1613 } else {
1614 struct block_device *whole;
1615 whole = bdget_disk(disk, 0);
1616 ret = -ENOMEM;
1617 if (!whole)
1618 goto out_clear;
1619 BUG_ON(for_part);
1620 ret = __blkdev_get(whole, mode, 1);
1621 if (ret)
1622 goto out_clear;
1623 bdev->bd_contains = whole;
1624 bdev->bd_part = disk_get_part(disk, partno);
1625 if (!(disk->flags & GENHD_FL_UP) ||
1626 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1627 ret = -ENXIO;
1628 goto out_clear;
1629 }
1630 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1631 set_init_blocksize(bdev);
1632 }
1633
1634 if (bdev->bd_bdi == &noop_backing_dev_info)
1635 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1636 } else {
1637 if (bdev->bd_contains == bdev) {
1638 ret = 0;
1639 if (bdev->bd_disk->fops->open)
1640 ret = bdev->bd_disk->fops->open(bdev, mode);
1641
1642 if (bdev->bd_invalidated &&
1643 (!ret || ret == -ENOMEDIUM))
1644 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1645 if (ret)
1646 goto out_unlock_bdev;
1647 }
1648 }
1649 bdev->bd_openers++;
1650 if (for_part)
1651 bdev->bd_part_count++;
1652 mutex_unlock(&bdev->bd_mutex);
1653 disk_unblock_events(disk);
1654
1655 if (!first_open)
1656 put_disk_and_module(disk);
1657 return 0;
1658
1659 out_clear:
1660 disk_put_part(bdev->bd_part);
1661 bdev->bd_disk = NULL;
1662 bdev->bd_part = NULL;
1663 bdev->bd_queue = NULL;
1664 if (bdev != bdev->bd_contains)
1665 __blkdev_put(bdev->bd_contains, mode, 1);
1666 bdev->bd_contains = NULL;
1667 out_unlock_bdev:
1668 mutex_unlock(&bdev->bd_mutex);
1669 disk_unblock_events(disk);
1670 put_disk_and_module(disk);
1671 out:
1672 bdput(bdev);
1673
1674 return ret;
1675 }
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1697 {
1698 struct block_device *whole = NULL;
1699 int res;
1700
1701 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1702
1703 if ((mode & FMODE_EXCL) && holder) {
1704 whole = bd_start_claiming(bdev, holder);
1705 if (IS_ERR(whole)) {
1706 bdput(bdev);
1707 return PTR_ERR(whole);
1708 }
1709 }
1710
1711 res = __blkdev_get(bdev, mode, 0);
1712
1713 if (whole) {
1714 struct gendisk *disk = whole->bd_disk;
1715
1716
1717 mutex_lock(&bdev->bd_mutex);
1718 if (!res)
1719 bd_finish_claiming(bdev, whole, holder);
1720 else
1721 bd_abort_claiming(bdev, whole, holder);
1722
1723
1724
1725
1726
1727
1728
1729 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1730 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1731 bdev->bd_write_holder = true;
1732 disk_block_events(disk);
1733 }
1734
1735 mutex_unlock(&bdev->bd_mutex);
1736 bdput(whole);
1737 }
1738
1739 return res;
1740 }
1741 EXPORT_SYMBOL(blkdev_get);
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1761 void *holder)
1762 {
1763 struct block_device *bdev;
1764 int err;
1765
1766 bdev = lookup_bdev(path);
1767 if (IS_ERR(bdev))
1768 return bdev;
1769
1770 err = blkdev_get(bdev, mode, holder);
1771 if (err)
1772 return ERR_PTR(err);
1773
1774 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1775 blkdev_put(bdev, mode);
1776 return ERR_PTR(-EACCES);
1777 }
1778
1779 return bdev;
1780 }
1781 EXPORT_SYMBOL(blkdev_get_by_path);
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1806 {
1807 struct block_device *bdev;
1808 int err;
1809
1810 bdev = bdget(dev);
1811 if (!bdev)
1812 return ERR_PTR(-ENOMEM);
1813
1814 err = blkdev_get(bdev, mode, holder);
1815 if (err)
1816 return ERR_PTR(err);
1817
1818 return bdev;
1819 }
1820 EXPORT_SYMBOL(blkdev_get_by_dev);
1821
1822 static int blkdev_open(struct inode * inode, struct file * filp)
1823 {
1824 struct block_device *bdev;
1825
1826
1827
1828
1829
1830
1831
1832 filp->f_flags |= O_LARGEFILE;
1833
1834 filp->f_mode |= FMODE_NOWAIT;
1835
1836 if (filp->f_flags & O_NDELAY)
1837 filp->f_mode |= FMODE_NDELAY;
1838 if (filp->f_flags & O_EXCL)
1839 filp->f_mode |= FMODE_EXCL;
1840 if ((filp->f_flags & O_ACCMODE) == 3)
1841 filp->f_mode |= FMODE_WRITE_IOCTL;
1842
1843 bdev = bd_acquire(inode);
1844 if (bdev == NULL)
1845 return -ENOMEM;
1846
1847 filp->f_mapping = bdev->bd_inode->i_mapping;
1848 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1849
1850 return blkdev_get(bdev, filp->f_mode, filp);
1851 }
1852
1853 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1854 {
1855 struct gendisk *disk = bdev->bd_disk;
1856 struct block_device *victim = NULL;
1857
1858 mutex_lock_nested(&bdev->bd_mutex, for_part);
1859 if (for_part)
1860 bdev->bd_part_count--;
1861
1862 if (!--bdev->bd_openers) {
1863 WARN_ON_ONCE(bdev->bd_holders);
1864 sync_blockdev(bdev);
1865 kill_bdev(bdev);
1866
1867 bdev_write_inode(bdev);
1868 }
1869 if (bdev->bd_contains == bdev) {
1870 if (disk->fops->release)
1871 disk->fops->release(disk, mode);
1872 }
1873 if (!bdev->bd_openers) {
1874 disk_put_part(bdev->bd_part);
1875 bdev->bd_part = NULL;
1876 bdev->bd_disk = NULL;
1877 if (bdev != bdev->bd_contains)
1878 victim = bdev->bd_contains;
1879 bdev->bd_contains = NULL;
1880
1881 put_disk_and_module(disk);
1882 }
1883 mutex_unlock(&bdev->bd_mutex);
1884 bdput(bdev);
1885 if (victim)
1886 __blkdev_put(victim, mode, 1);
1887 }
1888
1889 void blkdev_put(struct block_device *bdev, fmode_t mode)
1890 {
1891 mutex_lock(&bdev->bd_mutex);
1892
1893 if (mode & FMODE_EXCL) {
1894 bool bdev_free;
1895
1896
1897
1898
1899
1900
1901 spin_lock(&bdev_lock);
1902
1903 WARN_ON_ONCE(--bdev->bd_holders < 0);
1904 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1905
1906
1907 if ((bdev_free = !bdev->bd_holders))
1908 bdev->bd_holder = NULL;
1909 if (!bdev->bd_contains->bd_holders)
1910 bdev->bd_contains->bd_holder = NULL;
1911
1912 spin_unlock(&bdev_lock);
1913
1914
1915
1916
1917
1918 if (bdev_free && bdev->bd_write_holder) {
1919 disk_unblock_events(bdev->bd_disk);
1920 bdev->bd_write_holder = false;
1921 }
1922 }
1923
1924
1925
1926
1927
1928
1929 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1930
1931 mutex_unlock(&bdev->bd_mutex);
1932
1933 __blkdev_put(bdev, mode, 0);
1934 }
1935 EXPORT_SYMBOL(blkdev_put);
1936
1937 static int blkdev_close(struct inode * inode, struct file * filp)
1938 {
1939 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1940 blkdev_put(bdev, filp->f_mode);
1941 return 0;
1942 }
1943
1944 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1945 {
1946 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1947 fmode_t mode = file->f_mode;
1948
1949
1950
1951
1952
1953 if (file->f_flags & O_NDELAY)
1954 mode |= FMODE_NDELAY;
1955 else
1956 mode &= ~FMODE_NDELAY;
1957
1958 return blkdev_ioctl(bdev, mode, cmd, arg);
1959 }
1960
1961
1962
1963
1964
1965
1966
1967
1968 ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1969 {
1970 struct file *file = iocb->ki_filp;
1971 struct inode *bd_inode = bdev_file_inode(file);
1972 loff_t size = i_size_read(bd_inode);
1973 struct blk_plug plug;
1974 ssize_t ret;
1975
1976 if (bdev_read_only(I_BDEV(bd_inode)))
1977 return -EPERM;
1978
1979
1980 if (IS_SWAPFILE(bd_inode) && !hibernation_available())
1981 return -ETXTBSY;
1982
1983 if (!iov_iter_count(from))
1984 return 0;
1985
1986 if (iocb->ki_pos >= size)
1987 return -ENOSPC;
1988
1989 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1990 return -EOPNOTSUPP;
1991
1992 iov_iter_truncate(from, size - iocb->ki_pos);
1993
1994 blk_start_plug(&plug);
1995 ret = __generic_file_write_iter(iocb, from);
1996 if (ret > 0)
1997 ret = generic_write_sync(iocb, ret);
1998 blk_finish_plug(&plug);
1999 return ret;
2000 }
2001 EXPORT_SYMBOL_GPL(blkdev_write_iter);
2002
2003 ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
2004 {
2005 struct file *file = iocb->ki_filp;
2006 struct inode *bd_inode = bdev_file_inode(file);
2007 loff_t size = i_size_read(bd_inode);
2008 loff_t pos = iocb->ki_pos;
2009
2010 if (pos >= size)
2011 return 0;
2012
2013 size -= pos;
2014 iov_iter_truncate(to, size);
2015 return generic_file_read_iter(iocb, to);
2016 }
2017 EXPORT_SYMBOL_GPL(blkdev_read_iter);
2018
2019
2020
2021
2022
2023 static int blkdev_releasepage(struct page *page, gfp_t wait)
2024 {
2025 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
2026
2027 if (super && super->s_op->bdev_try_to_free_page)
2028 return super->s_op->bdev_try_to_free_page(super, page, wait);
2029
2030 return try_to_free_buffers(page);
2031 }
2032
2033 static int blkdev_writepages(struct address_space *mapping,
2034 struct writeback_control *wbc)
2035 {
2036 return generic_writepages(mapping, wbc);
2037 }
2038
2039 static const struct address_space_operations def_blk_aops = {
2040 .readpage = blkdev_readpage,
2041 .readpages = blkdev_readpages,
2042 .writepage = blkdev_writepage,
2043 .write_begin = blkdev_write_begin,
2044 .write_end = blkdev_write_end,
2045 .writepages = blkdev_writepages,
2046 .releasepage = blkdev_releasepage,
2047 .direct_IO = blkdev_direct_IO,
2048 .migratepage = buffer_migrate_page_norefs,
2049 .is_dirty_writeback = buffer_check_dirty_writeback,
2050 };
2051
2052 #define BLKDEV_FALLOC_FL_SUPPORTED \
2053 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
2054 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
2055
2056 static long blkdev_fallocate(struct file *file, int mode, loff_t start,
2057 loff_t len)
2058 {
2059 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
2060 struct address_space *mapping;
2061 loff_t end = start + len - 1;
2062 loff_t isize;
2063 int error;
2064
2065
2066 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
2067 return -EOPNOTSUPP;
2068
2069
2070 isize = i_size_read(bdev->bd_inode);
2071 if (start >= isize)
2072 return -EINVAL;
2073 if (end >= isize) {
2074 if (mode & FALLOC_FL_KEEP_SIZE) {
2075 len = isize - start;
2076 end = start + len - 1;
2077 } else
2078 return -EINVAL;
2079 }
2080
2081
2082
2083
2084 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
2085 return -EINVAL;
2086
2087
2088 mapping = bdev->bd_inode->i_mapping;
2089 truncate_inode_pages_range(mapping, start, end);
2090
2091 switch (mode) {
2092 case FALLOC_FL_ZERO_RANGE:
2093 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2094 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2095 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2096 break;
2097 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2098 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2099 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2100 break;
2101 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2102 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2103 GFP_KERNEL, 0);
2104 break;
2105 default:
2106 return -EOPNOTSUPP;
2107 }
2108 if (error)
2109 return error;
2110
2111
2112
2113
2114
2115
2116 return invalidate_inode_pages2_range(mapping,
2117 start >> PAGE_SHIFT,
2118 end >> PAGE_SHIFT);
2119 }
2120
2121 const struct file_operations def_blk_fops = {
2122 .open = blkdev_open,
2123 .release = blkdev_close,
2124 .llseek = block_llseek,
2125 .read_iter = blkdev_read_iter,
2126 .write_iter = blkdev_write_iter,
2127 .iopoll = blkdev_iopoll,
2128 .mmap = generic_file_mmap,
2129 .fsync = blkdev_fsync,
2130 .unlocked_ioctl = block_ioctl,
2131 #ifdef CONFIG_COMPAT
2132 .compat_ioctl = compat_blkdev_ioctl,
2133 #endif
2134 .splice_read = generic_file_splice_read,
2135 .splice_write = iter_file_splice_write,
2136 .fallocate = blkdev_fallocate,
2137 };
2138
2139 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2140 {
2141 int res;
2142 mm_segment_t old_fs = get_fs();
2143 set_fs(KERNEL_DS);
2144 res = blkdev_ioctl(bdev, 0, cmd, arg);
2145 set_fs(old_fs);
2146 return res;
2147 }
2148
2149 EXPORT_SYMBOL(ioctl_by_bdev);
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159 struct block_device *lookup_bdev(const char *pathname)
2160 {
2161 struct block_device *bdev;
2162 struct inode *inode;
2163 struct path path;
2164 int error;
2165
2166 if (!pathname || !*pathname)
2167 return ERR_PTR(-EINVAL);
2168
2169 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2170 if (error)
2171 return ERR_PTR(error);
2172
2173 inode = d_backing_inode(path.dentry);
2174 error = -ENOTBLK;
2175 if (!S_ISBLK(inode->i_mode))
2176 goto fail;
2177 error = -EACCES;
2178 if (!may_open_dev(&path))
2179 goto fail;
2180 error = -ENOMEM;
2181 bdev = bd_acquire(inode);
2182 if (!bdev)
2183 goto fail;
2184 out:
2185 path_put(&path);
2186 return bdev;
2187 fail:
2188 bdev = ERR_PTR(error);
2189 goto out;
2190 }
2191 EXPORT_SYMBOL(lookup_bdev);
2192
2193 int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2194 {
2195 struct super_block *sb = get_super(bdev);
2196 int res = 0;
2197
2198 if (sb) {
2199
2200
2201
2202
2203
2204
2205 shrink_dcache_sb(sb);
2206 res = invalidate_inodes(sb, kill_dirty);
2207 drop_super(sb);
2208 }
2209 invalidate_bdev(bdev);
2210 return res;
2211 }
2212 EXPORT_SYMBOL(__invalidate_device);
2213
2214 void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2215 {
2216 struct inode *inode, *old_inode = NULL;
2217
2218 spin_lock(&blockdev_superblock->s_inode_list_lock);
2219 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2220 struct address_space *mapping = inode->i_mapping;
2221 struct block_device *bdev;
2222
2223 spin_lock(&inode->i_lock);
2224 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2225 mapping->nrpages == 0) {
2226 spin_unlock(&inode->i_lock);
2227 continue;
2228 }
2229 __iget(inode);
2230 spin_unlock(&inode->i_lock);
2231 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2232
2233
2234
2235
2236
2237
2238
2239
2240 iput(old_inode);
2241 old_inode = inode;
2242 bdev = I_BDEV(inode);
2243
2244 mutex_lock(&bdev->bd_mutex);
2245 if (bdev->bd_openers)
2246 func(bdev, arg);
2247 mutex_unlock(&bdev->bd_mutex);
2248
2249 spin_lock(&blockdev_superblock->s_inode_list_lock);
2250 }
2251 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2252 iput(old_inode);
2253 }