1/* 2 * Code extracted from drivers/block/genhd.c 3 * Copyright (C) 1991-1998 Linus Torvalds 4 * Re-organised Feb 1998 Russell King 5 * 6 * We now have independent partition support from the 7 * block drivers, which allows all the partition code to 8 * be grouped in one location, and it to be mostly self 9 * contained. 10 */ 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/fs.h> 15#include <linux/slab.h> 16#include <linux/kmod.h> 17#include <linux/ctype.h> 18#include <linux/genhd.h> 19#include <linux/blktrace_api.h> 20 21#include "partitions/check.h" 22 23#ifdef CONFIG_BLK_DEV_MD 24extern void md_autodetect_dev(dev_t dev); 25#endif 26 27/* 28 * disk_name() is used by partition check code and the genhd driver. 29 * It formats the devicename of the indicated disk into 30 * the supplied buffer (of size at least 32), and returns 31 * a pointer to that same buffer (for convenience). 32 */ 33 34char *disk_name(struct gendisk *hd, int partno, char *buf) 35{ 36 if (!partno) 37 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 38 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 39 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 40 else 41 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 42 43 return buf; 44} 45 46const char *bdevname(struct block_device *bdev, char *buf) 47{ 48 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 49} 50 51EXPORT_SYMBOL(bdevname); 52 53/* 54 * There's very little reason to use this, you should really 55 * have a struct block_device just about everywhere and use 56 * bdevname() instead. 57 */ 58const char *__bdevname(dev_t dev, char *buffer) 59{ 60 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 61 MAJOR(dev), MINOR(dev)); 62 return buffer; 63} 64 65EXPORT_SYMBOL(__bdevname); 66 67static ssize_t part_partition_show(struct device *dev, 68 struct device_attribute *attr, char *buf) 69{ 70 struct hd_struct *p = dev_to_part(dev); 71 72 return sprintf(buf, "%d\n", p->partno); 73} 74 75static ssize_t part_start_show(struct device *dev, 76 struct device_attribute *attr, char *buf) 77{ 78 struct hd_struct *p = dev_to_part(dev); 79 80 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 81} 82 83ssize_t part_size_show(struct device *dev, 84 struct device_attribute *attr, char *buf) 85{ 86 struct hd_struct *p = dev_to_part(dev); 87 return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); 88} 89 90static ssize_t part_ro_show(struct device *dev, 91 struct device_attribute *attr, char *buf) 92{ 93 struct hd_struct *p = dev_to_part(dev); 94 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 95} 96 97static ssize_t part_alignment_offset_show(struct device *dev, 98 struct device_attribute *attr, char *buf) 99{ 100 struct hd_struct *p = dev_to_part(dev); 101 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 102} 103 104static ssize_t part_discard_alignment_show(struct device *dev, 105 struct device_attribute *attr, char *buf) 106{ 107 struct hd_struct *p = dev_to_part(dev); 108 return sprintf(buf, "%u\n", p->discard_alignment); 109} 110 111ssize_t part_stat_show(struct device *dev, 112 struct device_attribute *attr, char *buf) 113{ 114 struct hd_struct *p = dev_to_part(dev); 115 int cpu; 116 117 cpu = part_stat_lock(); 118 part_round_stats(cpu, p); 119 part_stat_unlock(); 120 return sprintf(buf, 121 "%8lu %8lu %8llu %8u " 122 "%8lu %8lu %8llu %8u " 123 "%8u %8u %8u" 124 "\n", 125 part_stat_read(p, ios[READ]), 126 part_stat_read(p, merges[READ]), 127 (unsigned long long)part_stat_read(p, sectors[READ]), 128 jiffies_to_msecs(part_stat_read(p, ticks[READ])), 129 part_stat_read(p, ios[WRITE]), 130 part_stat_read(p, merges[WRITE]), 131 (unsigned long long)part_stat_read(p, sectors[WRITE]), 132 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 133 part_in_flight(p), 134 jiffies_to_msecs(part_stat_read(p, io_ticks)), 135 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 136} 137 138ssize_t part_inflight_show(struct device *dev, 139 struct device_attribute *attr, char *buf) 140{ 141 struct hd_struct *p = dev_to_part(dev); 142 143 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), 144 atomic_read(&p->in_flight[1])); 145} 146 147#ifdef CONFIG_FAIL_MAKE_REQUEST 148ssize_t part_fail_show(struct device *dev, 149 struct device_attribute *attr, char *buf) 150{ 151 struct hd_struct *p = dev_to_part(dev); 152 153 return sprintf(buf, "%d\n", p->make_it_fail); 154} 155 156ssize_t part_fail_store(struct device *dev, 157 struct device_attribute *attr, 158 const char *buf, size_t count) 159{ 160 struct hd_struct *p = dev_to_part(dev); 161 int i; 162 163 if (count > 0 && sscanf(buf, "%d", &i) > 0) 164 p->make_it_fail = (i == 0) ? 0 : 1; 165 166 return count; 167} 168#endif 169 170static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 171static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 172static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 173static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); 174static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 175static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 176 NULL); 177static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 178static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 179#ifdef CONFIG_FAIL_MAKE_REQUEST 180static struct device_attribute dev_attr_fail = 181 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 182#endif 183 184static struct attribute *part_attrs[] = { 185 &dev_attr_partition.attr, 186 &dev_attr_start.attr, 187 &dev_attr_size.attr, 188 &dev_attr_ro.attr, 189 &dev_attr_alignment_offset.attr, 190 &dev_attr_discard_alignment.attr, 191 &dev_attr_stat.attr, 192 &dev_attr_inflight.attr, 193#ifdef CONFIG_FAIL_MAKE_REQUEST 194 &dev_attr_fail.attr, 195#endif 196 NULL 197}; 198 199static struct attribute_group part_attr_group = { 200 .attrs = part_attrs, 201}; 202 203static const struct attribute_group *part_attr_groups[] = { 204 &part_attr_group, 205#ifdef CONFIG_BLK_DEV_IO_TRACE 206 &blk_trace_attr_group, 207#endif 208 NULL 209}; 210 211static void part_release(struct device *dev) 212{ 213 struct hd_struct *p = dev_to_part(dev); 214 blk_free_devt(dev->devt); 215 free_part_stats(p); 216 free_part_info(p); 217 kfree(p); 218} 219 220struct device_type part_type = { 221 .name = "partition", 222 .groups = part_attr_groups, 223 .release = part_release, 224}; 225 226static void delete_partition_rcu_cb(struct rcu_head *head) 227{ 228 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 229 230 part->start_sect = 0; 231 part->nr_sects = 0; 232 part_stat_set_all(part, 0); 233 put_device(part_to_dev(part)); 234} 235 236void __delete_partition(struct hd_struct *part) 237{ 238 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 239} 240 241void delete_partition(struct gendisk *disk, int partno) 242{ 243 struct disk_part_tbl *ptbl = disk->part_tbl; 244 struct hd_struct *part; 245 246 if (partno >= ptbl->len) 247 return; 248 249 part = ptbl->part[partno]; 250 if (!part) 251 return; 252 253 rcu_assign_pointer(ptbl->part[partno], NULL); 254 rcu_assign_pointer(ptbl->last_lookup, NULL); 255 kobject_put(part->holder_dir); 256 device_del(part_to_dev(part)); 257 258 hd_struct_put(part); 259} 260 261static ssize_t whole_disk_show(struct device *dev, 262 struct device_attribute *attr, char *buf) 263{ 264 return 0; 265} 266static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 267 whole_disk_show, NULL); 268 269struct hd_struct *add_partition(struct gendisk *disk, int partno, 270 sector_t start, sector_t len, int flags, 271 struct partition_meta_info *info) 272{ 273 struct hd_struct *p; 274 dev_t devt = MKDEV(0, 0); 275 struct device *ddev = disk_to_dev(disk); 276 struct device *pdev; 277 struct disk_part_tbl *ptbl; 278 const char *dname; 279 int err; 280 281 err = disk_expand_part_tbl(disk, partno); 282 if (err) 283 return ERR_PTR(err); 284 ptbl = disk->part_tbl; 285 286 if (ptbl->part[partno]) 287 return ERR_PTR(-EBUSY); 288 289 p = kzalloc(sizeof(*p), GFP_KERNEL); 290 if (!p) 291 return ERR_PTR(-EBUSY); 292 293 if (!init_part_stats(p)) { 294 err = -ENOMEM; 295 goto out_free; 296 } 297 298 seqcount_init(&p->nr_sects_seq); 299 pdev = part_to_dev(p); 300 301 p->start_sect = start; 302 p->alignment_offset = 303 queue_limit_alignment_offset(&disk->queue->limits, start); 304 p->discard_alignment = 305 queue_limit_discard_alignment(&disk->queue->limits, start); 306 p->nr_sects = len; 307 p->partno = partno; 308 p->policy = get_disk_ro(disk); 309 310 if (info) { 311 struct partition_meta_info *pinfo = alloc_part_info(disk); 312 if (!pinfo) 313 goto out_free_stats; 314 memcpy(pinfo, info, sizeof(*info)); 315 p->info = pinfo; 316 } 317 318 dname = dev_name(ddev); 319 if (isdigit(dname[strlen(dname) - 1])) 320 dev_set_name(pdev, "%sp%d", dname, partno); 321 else 322 dev_set_name(pdev, "%s%d", dname, partno); 323 324 device_initialize(pdev); 325 pdev->class = &block_class; 326 pdev->type = &part_type; 327 pdev->parent = ddev; 328 329 err = blk_alloc_devt(p, &devt); 330 if (err) 331 goto out_free_info; 332 pdev->devt = devt; 333 334 /* delay uevent until 'holders' subdir is created */ 335 dev_set_uevent_suppress(pdev, 1); 336 err = device_add(pdev); 337 if (err) 338 goto out_put; 339 340 err = -ENOMEM; 341 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 342 if (!p->holder_dir) 343 goto out_del; 344 345 dev_set_uevent_suppress(pdev, 0); 346 if (flags & ADDPART_FLAG_WHOLEDISK) { 347 err = device_create_file(pdev, &dev_attr_whole_disk); 348 if (err) 349 goto out_del; 350 } 351 352 /* everything is up and running, commence */ 353 rcu_assign_pointer(ptbl->part[partno], p); 354 355 /* suppress uevent if the disk suppresses it */ 356 if (!dev_get_uevent_suppress(ddev)) 357 kobject_uevent(&pdev->kobj, KOBJ_ADD); 358 359 hd_ref_init(p); 360 return p; 361 362out_free_info: 363 free_part_info(p); 364out_free_stats: 365 free_part_stats(p); 366out_free: 367 kfree(p); 368 return ERR_PTR(err); 369out_del: 370 kobject_put(p->holder_dir); 371 device_del(pdev); 372out_put: 373 put_device(pdev); 374 blk_free_devt(devt); 375 return ERR_PTR(err); 376} 377 378static bool disk_unlock_native_capacity(struct gendisk *disk) 379{ 380 const struct block_device_operations *bdops = disk->fops; 381 382 if (bdops->unlock_native_capacity && 383 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 384 printk(KERN_CONT "enabling native capacity\n"); 385 bdops->unlock_native_capacity(disk); 386 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 387 return true; 388 } else { 389 printk(KERN_CONT "truncated\n"); 390 return false; 391 } 392} 393 394static int drop_partitions(struct gendisk *disk, struct block_device *bdev) 395{ 396 struct disk_part_iter piter; 397 struct hd_struct *part; 398 int res; 399 400 if (bdev->bd_part_count) 401 return -EBUSY; 402 res = invalidate_partition(disk, 0); 403 if (res) 404 return res; 405 406 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 407 while ((part = disk_part_iter_next(&piter))) 408 delete_partition(disk, part->partno); 409 disk_part_iter_exit(&piter); 410 411 return 0; 412} 413 414int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 415{ 416 struct parsed_partitions *state = NULL; 417 struct hd_struct *part; 418 int p, highest, res; 419rescan: 420 if (state && !IS_ERR(state)) { 421 free_partitions(state); 422 state = NULL; 423 } 424 425 res = drop_partitions(disk, bdev); 426 if (res) 427 return res; 428 429 if (disk->fops->revalidate_disk) 430 disk->fops->revalidate_disk(disk); 431 check_disk_size_change(disk, bdev); 432 bdev->bd_invalidated = 0; 433 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 434 return 0; 435 if (IS_ERR(state)) { 436 /* 437 * I/O error reading the partition table. If any 438 * partition code tried to read beyond EOD, retry 439 * after unlocking native capacity. 440 */ 441 if (PTR_ERR(state) == -ENOSPC) { 442 printk(KERN_WARNING "%s: partition table beyond EOD, ", 443 disk->disk_name); 444 if (disk_unlock_native_capacity(disk)) 445 goto rescan; 446 } 447 return -EIO; 448 } 449 /* 450 * If any partition code tried to read beyond EOD, try 451 * unlocking native capacity even if partition table is 452 * successfully read as we could be missing some partitions. 453 */ 454 if (state->access_beyond_eod) { 455 printk(KERN_WARNING 456 "%s: partition table partially beyond EOD, ", 457 disk->disk_name); 458 if (disk_unlock_native_capacity(disk)) 459 goto rescan; 460 } 461 462 /* tell userspace that the media / partition table may have changed */ 463 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 464 465 /* Detect the highest partition number and preallocate 466 * disk->part_tbl. This is an optimization and not strictly 467 * necessary. 468 */ 469 for (p = 1, highest = 0; p < state->limit; p++) 470 if (state->parts[p].size) 471 highest = p; 472 473 disk_expand_part_tbl(disk, highest); 474 475 /* add partitions */ 476 for (p = 1; p < state->limit; p++) { 477 sector_t size, from; 478 struct partition_meta_info *info = NULL; 479 480 size = state->parts[p].size; 481 if (!size) 482 continue; 483 484 from = state->parts[p].from; 485 if (from >= get_capacity(disk)) { 486 printk(KERN_WARNING 487 "%s: p%d start %llu is beyond EOD, ", 488 disk->disk_name, p, (unsigned long long) from); 489 if (disk_unlock_native_capacity(disk)) 490 goto rescan; 491 continue; 492 } 493 494 if (from + size > get_capacity(disk)) { 495 printk(KERN_WARNING 496 "%s: p%d size %llu extends beyond EOD, ", 497 disk->disk_name, p, (unsigned long long) size); 498 499 if (disk_unlock_native_capacity(disk)) { 500 /* free state and restart */ 501 goto rescan; 502 } else { 503 /* 504 * we can not ignore partitions of broken tables 505 * created by for example camera firmware, but 506 * we limit them to the end of the disk to avoid 507 * creating invalid block devices 508 */ 509 size = get_capacity(disk) - from; 510 } 511 } 512 513 if (state->parts[p].has_info) 514 info = &state->parts[p].info; 515 part = add_partition(disk, p, from, size, 516 state->parts[p].flags, 517 &state->parts[p].info); 518 if (IS_ERR(part)) { 519 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 520 disk->disk_name, p, -PTR_ERR(part)); 521 continue; 522 } 523#ifdef CONFIG_BLK_DEV_MD 524 if (state->parts[p].flags & ADDPART_FLAG_RAID) 525 md_autodetect_dev(part_to_dev(part)->devt); 526#endif 527 } 528 free_partitions(state); 529 return 0; 530} 531 532int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) 533{ 534 int res; 535 536 if (!bdev->bd_invalidated) 537 return 0; 538 539 res = drop_partitions(disk, bdev); 540 if (res) 541 return res; 542 543 set_capacity(disk, 0); 544 check_disk_size_change(disk, bdev); 545 bdev->bd_invalidated = 0; 546 /* tell userspace that the media / partition table may have changed */ 547 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 548 549 return 0; 550} 551 552unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 553{ 554 struct address_space *mapping = bdev->bd_inode->i_mapping; 555 struct page *page; 556 557 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 558 NULL); 559 if (!IS_ERR(page)) { 560 if (PageError(page)) 561 goto fail; 562 p->v = page; 563 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9); 564fail: 565 page_cache_release(page); 566 } 567 p->v = NULL; 568 return NULL; 569} 570 571EXPORT_SYMBOL(read_dev_sector); 572