root/drivers/block/aoe/aoedev.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. minor_get_dyn
  2. minor_get_static
  3. minor_get
  4. minor_free
  5. aoedev_put
  6. dummy_timer
  7. aoe_failip
  8. downdev_frame
  9. aoedev_downdev
  10. user_req
  11. freedev
  12. flush
  13. aoedev_flush
  14. skbfree
  15. skbpoolfree
  16. aoedev_by_aoeaddr
  17. freetgt
  18. aoedev_exit
  19. aoedev_init

   1 /* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2 /*
   3  * aoedev.c
   4  * AoE device utility functions; maintains device list.
   5  */
   6 
   7 #include <linux/hdreg.h>
   8 #include <linux/blk-mq.h>
   9 #include <linux/netdevice.h>
  10 #include <linux/delay.h>
  11 #include <linux/slab.h>
  12 #include <linux/bitmap.h>
  13 #include <linux/kdev_t.h>
  14 #include <linux/moduleparam.h>
  15 #include <linux/string.h>
  16 #include "aoe.h"
  17 
  18 static void freetgt(struct aoedev *d, struct aoetgt *t);
  19 static void skbpoolfree(struct aoedev *d);
  20 
  21 static int aoe_dyndevs = 1;
  22 module_param(aoe_dyndevs, int, 0644);
  23 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  24 
  25 static struct aoedev *devlist;
  26 static DEFINE_SPINLOCK(devlist_lock);
  27 
  28 /* Because some systems will have one, many, or no
  29  *   - partitions,
  30  *   - slots per shelf,
  31  *   - or shelves,
  32  * we need some flexibility in the way the minor numbers
  33  * are allocated.  So they are dynamic.
  34  */
  35 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  36 
  37 static DEFINE_SPINLOCK(used_minors_lock);
  38 static DECLARE_BITMAP(used_minors, N_DEVS);
  39 
  40 static int
  41 minor_get_dyn(ulong *sysminor)
  42 {
  43         ulong flags;
  44         ulong n;
  45         int error = 0;
  46 
  47         spin_lock_irqsave(&used_minors_lock, flags);
  48         n = find_first_zero_bit(used_minors, N_DEVS);
  49         if (n < N_DEVS)
  50                 set_bit(n, used_minors);
  51         else
  52                 error = -1;
  53         spin_unlock_irqrestore(&used_minors_lock, flags);
  54 
  55         *sysminor = n * AOE_PARTITIONS;
  56         return error;
  57 }
  58 
  59 static int
  60 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  61 {
  62         ulong flags;
  63         ulong n;
  64         int error = 0;
  65         enum {
  66                 /* for backwards compatibility when !aoe_dyndevs,
  67                  * a static number of supported slots per shelf */
  68                 NPERSHELF = 16,
  69         };
  70 
  71         if (aoemin >= NPERSHELF) {
  72                 pr_err("aoe: %s %d slots per shelf\n",
  73                         "static minor device numbers support only",
  74                         NPERSHELF);
  75                 error = -1;
  76                 goto out;
  77         }
  78 
  79         n = aoemaj * NPERSHELF + aoemin;
  80         if (n >= N_DEVS) {
  81                 pr_err("aoe: %s with e%ld.%d\n",
  82                         "cannot use static minor device numbers",
  83                         aoemaj, aoemin);
  84                 error = -1;
  85                 goto out;
  86         }
  87 
  88         spin_lock_irqsave(&used_minors_lock, flags);
  89         if (test_bit(n, used_minors)) {
  90                 pr_err("aoe: %s %lu\n",
  91                         "existing device already has static minor number",
  92                         n);
  93                 error = -1;
  94         } else
  95                 set_bit(n, used_minors);
  96         spin_unlock_irqrestore(&used_minors_lock, flags);
  97         *sysminor = n * AOE_PARTITIONS;
  98 out:
  99         return error;
 100 }
 101 
 102 static int
 103 minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
 104 {
 105         if (aoe_dyndevs)
 106                 return minor_get_dyn(sysminor);
 107         else
 108                 return minor_get_static(sysminor, aoemaj, aoemin);
 109 }
 110 
 111 static void
 112 minor_free(ulong minor)
 113 {
 114         ulong flags;
 115 
 116         minor /= AOE_PARTITIONS;
 117         BUG_ON(minor >= N_DEVS);
 118 
 119         spin_lock_irqsave(&used_minors_lock, flags);
 120         BUG_ON(!test_bit(minor, used_minors));
 121         clear_bit(minor, used_minors);
 122         spin_unlock_irqrestore(&used_minors_lock, flags);
 123 }
 124 
 125 /*
 126  * Users who grab a pointer to the device with aoedev_by_aoeaddr
 127  * automatically get a reference count and must be responsible
 128  * for performing a aoedev_put.  With the addition of async
 129  * kthread processing I'm no longer confident that we can
 130  * guarantee consistency in the face of device flushes.
 131  *
 132  * For the time being, we only bother to add extra references for
 133  * frames sitting on the iocq.  When the kthreads finish processing
 134  * these frames, they will aoedev_put the device.
 135  */
 136 
 137 void
 138 aoedev_put(struct aoedev *d)
 139 {
 140         ulong flags;
 141 
 142         spin_lock_irqsave(&devlist_lock, flags);
 143         d->ref--;
 144         spin_unlock_irqrestore(&devlist_lock, flags);
 145 }
 146 
 147 static void
 148 dummy_timer(struct timer_list *t)
 149 {
 150         struct aoedev *d;
 151 
 152         d = from_timer(d, t, timer);
 153         if (d->flags & DEVFL_TKILL)
 154                 return;
 155         d->timer.expires = jiffies + HZ;
 156         add_timer(&d->timer);
 157 }
 158 
 159 static void
 160 aoe_failip(struct aoedev *d)
 161 {
 162         struct request *rq;
 163         struct aoe_req *req;
 164         struct bio *bio;
 165 
 166         aoe_failbuf(d, d->ip.buf);
 167         rq = d->ip.rq;
 168         if (rq == NULL)
 169                 return;
 170 
 171         req = blk_mq_rq_to_pdu(rq);
 172         while ((bio = d->ip.nxbio)) {
 173                 bio->bi_status = BLK_STS_IOERR;
 174                 d->ip.nxbio = bio->bi_next;
 175                 req->nr_bios--;
 176         }
 177 
 178         if (!req->nr_bios)
 179                 aoe_end_request(d, rq, 0);
 180 }
 181 
 182 static void
 183 downdev_frame(struct list_head *pos)
 184 {
 185         struct frame *f;
 186 
 187         f = list_entry(pos, struct frame, head);
 188         list_del(pos);
 189         if (f->buf) {
 190                 f->buf->nframesout--;
 191                 aoe_failbuf(f->t->d, f->buf);
 192         }
 193         aoe_freetframe(f);
 194 }
 195 
 196 void
 197 aoedev_downdev(struct aoedev *d)
 198 {
 199         struct aoetgt *t, **tt, **te;
 200         struct list_head *head, *pos, *nx;
 201         int i;
 202 
 203         d->flags &= ~DEVFL_UP;
 204 
 205         /* clean out active and to-be-retransmitted buffers */
 206         for (i = 0; i < NFACTIVE; i++) {
 207                 head = &d->factive[i];
 208                 list_for_each_safe(pos, nx, head)
 209                         downdev_frame(pos);
 210         }
 211         head = &d->rexmitq;
 212         list_for_each_safe(pos, nx, head)
 213                 downdev_frame(pos);
 214 
 215         /* reset window dressings */
 216         tt = d->targets;
 217         te = tt + d->ntargets;
 218         for (; tt < te && (t = *tt); tt++) {
 219                 aoecmd_wreset(t);
 220                 t->nout = 0;
 221         }
 222 
 223         /* clean out the in-process request (if any) */
 224         aoe_failip(d);
 225 
 226         /* fast fail all pending I/O */
 227         if (d->blkq) {
 228                 /* UP is cleared, freeze+quiesce to insure all are errored */
 229                 blk_mq_freeze_queue(d->blkq);
 230                 blk_mq_quiesce_queue(d->blkq);
 231                 blk_mq_unquiesce_queue(d->blkq);
 232                 blk_mq_unfreeze_queue(d->blkq);
 233         }
 234 
 235         if (d->gd)
 236                 set_capacity(d->gd, 0);
 237 }
 238 
 239 /* return whether the user asked for this particular
 240  * device to be flushed
 241  */
 242 static int
 243 user_req(char *s, size_t slen, struct aoedev *d)
 244 {
 245         const char *p;
 246         size_t lim;
 247 
 248         if (!d->gd)
 249                 return 0;
 250         p = kbasename(d->gd->disk_name);
 251         lim = sizeof(d->gd->disk_name);
 252         lim -= p - d->gd->disk_name;
 253         if (slen < lim)
 254                 lim = slen;
 255 
 256         return !strncmp(s, p, lim);
 257 }
 258 
 259 static void
 260 freedev(struct aoedev *d)
 261 {
 262         struct aoetgt **t, **e;
 263         int freeing = 0;
 264         unsigned long flags;
 265 
 266         spin_lock_irqsave(&d->lock, flags);
 267         if (d->flags & DEVFL_TKILL
 268         && !(d->flags & DEVFL_FREEING)) {
 269                 d->flags |= DEVFL_FREEING;
 270                 freeing = 1;
 271         }
 272         spin_unlock_irqrestore(&d->lock, flags);
 273         if (!freeing)
 274                 return;
 275 
 276         del_timer_sync(&d->timer);
 277         if (d->gd) {
 278                 aoedisk_rm_debugfs(d);
 279                 del_gendisk(d->gd);
 280                 put_disk(d->gd);
 281                 blk_mq_free_tag_set(&d->tag_set);
 282                 blk_cleanup_queue(d->blkq);
 283         }
 284         t = d->targets;
 285         e = t + d->ntargets;
 286         for (; t < e && *t; t++)
 287                 freetgt(d, *t);
 288 
 289         mempool_destroy(d->bufpool);
 290         skbpoolfree(d);
 291         minor_free(d->sysminor);
 292 
 293         spin_lock_irqsave(&d->lock, flags);
 294         d->flags |= DEVFL_FREED;
 295         spin_unlock_irqrestore(&d->lock, flags);
 296 }
 297 
 298 enum flush_parms {
 299         NOT_EXITING = 0,
 300         EXITING = 1,
 301 };
 302 
 303 static int
 304 flush(const char __user *str, size_t cnt, int exiting)
 305 {
 306         ulong flags;
 307         struct aoedev *d, **dd;
 308         char buf[16];
 309         int all = 0;
 310         int specified = 0;      /* flush a specific device */
 311         unsigned int skipflags;
 312 
 313         skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
 314 
 315         if (!exiting && cnt >= 3) {
 316                 if (cnt > sizeof buf)
 317                         cnt = sizeof buf;
 318                 if (copy_from_user(buf, str, cnt))
 319                         return -EFAULT;
 320                 all = !strncmp(buf, "all", 3);
 321                 if (!all)
 322                         specified = 1;
 323         }
 324 
 325         flush_scheduled_work();
 326         /* pass one: do aoedev_downdev, which might sleep */
 327 restart1:
 328         spin_lock_irqsave(&devlist_lock, flags);
 329         for (d = devlist; d; d = d->next) {
 330                 spin_lock(&d->lock);
 331                 if (d->flags & DEVFL_TKILL)
 332                         goto cont;
 333 
 334                 if (exiting) {
 335                         /* unconditionally take each device down */
 336                 } else if (specified) {
 337                         if (!user_req(buf, cnt, d))
 338                                 goto cont;
 339                 } else if ((!all && (d->flags & DEVFL_UP))
 340                 || d->flags & skipflags
 341                 || d->nopen
 342                 || d->ref)
 343                         goto cont;
 344 
 345                 spin_unlock(&d->lock);
 346                 spin_unlock_irqrestore(&devlist_lock, flags);
 347                 aoedev_downdev(d);
 348                 d->flags |= DEVFL_TKILL;
 349                 goto restart1;
 350 cont:
 351                 spin_unlock(&d->lock);
 352         }
 353         spin_unlock_irqrestore(&devlist_lock, flags);
 354 
 355         /* pass two: call freedev, which might sleep,
 356          * for aoedevs marked with DEVFL_TKILL
 357          */
 358 restart2:
 359         spin_lock_irqsave(&devlist_lock, flags);
 360         for (d = devlist; d; d = d->next) {
 361                 spin_lock(&d->lock);
 362                 if (d->flags & DEVFL_TKILL
 363                 && !(d->flags & DEVFL_FREEING)) {
 364                         spin_unlock(&d->lock);
 365                         spin_unlock_irqrestore(&devlist_lock, flags);
 366                         freedev(d);
 367                         goto restart2;
 368                 }
 369                 spin_unlock(&d->lock);
 370         }
 371 
 372         /* pass three: remove aoedevs marked with DEVFL_FREED */
 373         for (dd = &devlist, d = *dd; d; d = *dd) {
 374                 struct aoedev *doomed = NULL;
 375 
 376                 spin_lock(&d->lock);
 377                 if (d->flags & DEVFL_FREED) {
 378                         *dd = d->next;
 379                         doomed = d;
 380                 } else {
 381                         dd = &d->next;
 382                 }
 383                 spin_unlock(&d->lock);
 384                 if (doomed)
 385                         kfree(doomed->targets);
 386                 kfree(doomed);
 387         }
 388         spin_unlock_irqrestore(&devlist_lock, flags);
 389 
 390         return 0;
 391 }
 392 
 393 int
 394 aoedev_flush(const char __user *str, size_t cnt)
 395 {
 396         return flush(str, cnt, NOT_EXITING);
 397 }
 398 
 399 /* This has been confirmed to occur once with Tms=3*1000 due to the
 400  * driver changing link and not processing its transmit ring.  The
 401  * problem is hard enough to solve by returning an error that I'm
 402  * still punting on "solving" this.
 403  */
 404 static void
 405 skbfree(struct sk_buff *skb)
 406 {
 407         enum { Sms = 250, Tms = 30 * 1000};
 408         int i = Tms / Sms;
 409 
 410         if (skb == NULL)
 411                 return;
 412         while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 413                 msleep(Sms);
 414         if (i < 0) {
 415                 printk(KERN_ERR
 416                         "aoe: %s holds ref: %s\n",
 417                         skb->dev ? skb->dev->name : "netif",
 418                         "cannot free skb -- memory leaked.");
 419                 return;
 420         }
 421         skb->truesize -= skb->data_len;
 422         skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 423         skb_trim(skb, 0);
 424         dev_kfree_skb(skb);
 425 }
 426 
 427 static void
 428 skbpoolfree(struct aoedev *d)
 429 {
 430         struct sk_buff *skb, *tmp;
 431 
 432         skb_queue_walk_safe(&d->skbpool, skb, tmp)
 433                 skbfree(skb);
 434 
 435         __skb_queue_head_init(&d->skbpool);
 436 }
 437 
 438 /* find it or allocate it */
 439 struct aoedev *
 440 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 441 {
 442         struct aoedev *d;
 443         int i;
 444         ulong flags;
 445         ulong sysminor = 0;
 446 
 447         spin_lock_irqsave(&devlist_lock, flags);
 448 
 449         for (d=devlist; d; d=d->next)
 450                 if (d->aoemajor == maj && d->aoeminor == min) {
 451                         spin_lock(&d->lock);
 452                         if (d->flags & DEVFL_TKILL) {
 453                                 spin_unlock(&d->lock);
 454                                 d = NULL;
 455                                 goto out;
 456                         }
 457                         d->ref++;
 458                         spin_unlock(&d->lock);
 459                         break;
 460                 }
 461         if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 462                 goto out;
 463         d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 464         if (!d)
 465                 goto out;
 466         d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
 467         if (!d->targets) {
 468                 kfree(d);
 469                 d = NULL;
 470                 goto out;
 471         }
 472         d->ntargets = NTARGETS;
 473         INIT_WORK(&d->work, aoecmd_sleepwork);
 474         spin_lock_init(&d->lock);
 475         INIT_LIST_HEAD(&d->rq_list);
 476         skb_queue_head_init(&d->skbpool);
 477         timer_setup(&d->timer, dummy_timer, 0);
 478         d->timer.expires = jiffies + HZ;
 479         add_timer(&d->timer);
 480         d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 481         d->tgt = d->targets;
 482         d->ref = 1;
 483         for (i = 0; i < NFACTIVE; i++)
 484                 INIT_LIST_HEAD(&d->factive[i]);
 485         INIT_LIST_HEAD(&d->rexmitq);
 486         d->sysminor = sysminor;
 487         d->aoemajor = maj;
 488         d->aoeminor = min;
 489         d->rttavg = RTTAVG_INIT;
 490         d->rttdev = RTTDEV_INIT;
 491         d->next = devlist;
 492         devlist = d;
 493  out:
 494         spin_unlock_irqrestore(&devlist_lock, flags);
 495         return d;
 496 }
 497 
 498 static void
 499 freetgt(struct aoedev *d, struct aoetgt *t)
 500 {
 501         struct frame *f;
 502         struct list_head *pos, *nx, *head;
 503         struct aoeif *ifp;
 504 
 505         for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 506                 if (!ifp->nd)
 507                         break;
 508                 dev_put(ifp->nd);
 509         }
 510 
 511         head = &t->ffree;
 512         list_for_each_safe(pos, nx, head) {
 513                 list_del(pos);
 514                 f = list_entry(pos, struct frame, head);
 515                 skbfree(f->skb);
 516                 kfree(f);
 517         }
 518         kfree(t);
 519 }
 520 
 521 void
 522 aoedev_exit(void)
 523 {
 524         flush_scheduled_work();
 525         flush(NULL, 0, EXITING);
 526 }
 527 
 528 int __init
 529 aoedev_init(void)
 530 {
 531         return 0;
 532 }

/* [<][>][^][v][top][bottom][index][help] */