1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/slab.h> 15#include <linux/init.h> 16#include <linux/sched.h> 17#include "internal.h" 18 19static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 20static unsigned afs_vlocation_update_timeout = 10 * 60; 21 22static void afs_vlocation_reaper(struct work_struct *); 23static void afs_vlocation_updater(struct work_struct *); 24 25static LIST_HEAD(afs_vlocation_updates); 26static LIST_HEAD(afs_vlocation_graveyard); 27static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 28static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 29static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 30static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 31static struct workqueue_struct *afs_vlocation_update_worker; 32 33/* 34 * iterate through the VL servers in a cell until one of them admits knowing 35 * about the volume in question 36 */ 37static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 38 struct key *key, 39 struct afs_cache_vlocation *vldb) 40{ 41 struct afs_cell *cell = vl->cell; 42 struct in_addr addr; 43 int count, ret; 44 45 _enter("%s,%s", cell->name, vl->vldb.name); 46 47 down_write(&vl->cell->vl_sem); 48 ret = -ENOMEDIUM; 49 for (count = cell->vl_naddrs; count > 0; count--) { 50 addr = cell->vl_addrs[cell->vl_curr_svix]; 51 52 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 53 54 /* attempt to access the VL server */ 55 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 56 &afs_sync_call); 57 switch (ret) { 58 case 0: 59 goto out; 60 case -ENOMEM: 61 case -ENONET: 62 case -ENETUNREACH: 63 case -EHOSTUNREACH: 64 case -ECONNREFUSED: 65 if (ret == -ENOMEM || ret == -ENONET) 66 goto out; 67 goto rotate; 68 case -ENOMEDIUM: 69 case -EKEYREJECTED: 70 case -EKEYEXPIRED: 71 goto out; 72 default: 73 ret = -EIO; 74 goto rotate; 75 } 76 77 /* rotate the server records upon lookup failure */ 78 rotate: 79 cell->vl_curr_svix++; 80 cell->vl_curr_svix %= cell->vl_naddrs; 81 } 82 83out: 84 up_write(&vl->cell->vl_sem); 85 _leave(" = %d", ret); 86 return ret; 87} 88 89/* 90 * iterate through the VL servers in a cell until one of them admits knowing 91 * about the volume in question 92 */ 93static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 94 struct key *key, 95 afs_volid_t volid, 96 afs_voltype_t voltype, 97 struct afs_cache_vlocation *vldb) 98{ 99 struct afs_cell *cell = vl->cell; 100 struct in_addr addr; 101 int count, ret; 102 103 _enter("%s,%x,%d,", cell->name, volid, voltype); 104 105 down_write(&vl->cell->vl_sem); 106 ret = -ENOMEDIUM; 107 for (count = cell->vl_naddrs; count > 0; count--) { 108 addr = cell->vl_addrs[cell->vl_curr_svix]; 109 110 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 111 112 /* attempt to access the VL server */ 113 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 114 &afs_sync_call); 115 switch (ret) { 116 case 0: 117 goto out; 118 case -ENOMEM: 119 case -ENONET: 120 case -ENETUNREACH: 121 case -EHOSTUNREACH: 122 case -ECONNREFUSED: 123 if (ret == -ENOMEM || ret == -ENONET) 124 goto out; 125 goto rotate; 126 case -EBUSY: 127 vl->upd_busy_cnt++; 128 if (vl->upd_busy_cnt <= 3) { 129 if (vl->upd_busy_cnt > 1) { 130 /* second+ BUSY - sleep a little bit */ 131 set_current_state(TASK_UNINTERRUPTIBLE); 132 schedule_timeout(1); 133 } 134 continue; 135 } 136 break; 137 case -ENOMEDIUM: 138 vl->upd_rej_cnt++; 139 goto rotate; 140 default: 141 ret = -EIO; 142 goto rotate; 143 } 144 145 /* rotate the server records upon lookup failure */ 146 rotate: 147 cell->vl_curr_svix++; 148 cell->vl_curr_svix %= cell->vl_naddrs; 149 vl->upd_busy_cnt = 0; 150 } 151 152out: 153 if (ret < 0 && vl->upd_rej_cnt > 0) { 154 printk(KERN_NOTICE "kAFS:" 155 " Active volume no longer valid '%s'\n", 156 vl->vldb.name); 157 vl->valid = 0; 158 ret = -ENOMEDIUM; 159 } 160 161 up_write(&vl->cell->vl_sem); 162 _leave(" = %d", ret); 163 return ret; 164} 165 166/* 167 * allocate a volume location record 168 */ 169static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 170 const char *name, 171 size_t namesz) 172{ 173 struct afs_vlocation *vl; 174 175 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 176 if (vl) { 177 vl->cell = cell; 178 vl->state = AFS_VL_NEW; 179 atomic_set(&vl->usage, 1); 180 INIT_LIST_HEAD(&vl->link); 181 INIT_LIST_HEAD(&vl->grave); 182 INIT_LIST_HEAD(&vl->update); 183 init_waitqueue_head(&vl->waitq); 184 spin_lock_init(&vl->lock); 185 memcpy(vl->vldb.name, name, namesz); 186 } 187 188 _leave(" = %p", vl); 189 return vl; 190} 191 192/* 193 * update record if we found it in the cache 194 */ 195static int afs_vlocation_update_record(struct afs_vlocation *vl, 196 struct key *key, 197 struct afs_cache_vlocation *vldb) 198{ 199 afs_voltype_t voltype; 200 afs_volid_t vid; 201 int ret; 202 203 /* try to look up a cached volume in the cell VL databases by ID */ 204 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 205 vl->vldb.name, 206 vl->vldb.vidmask, 207 ntohl(vl->vldb.servers[0].s_addr), 208 vl->vldb.srvtmask[0], 209 ntohl(vl->vldb.servers[1].s_addr), 210 vl->vldb.srvtmask[1], 211 ntohl(vl->vldb.servers[2].s_addr), 212 vl->vldb.srvtmask[2]); 213 214 _debug("Vids: %08x %08x %08x", 215 vl->vldb.vid[0], 216 vl->vldb.vid[1], 217 vl->vldb.vid[2]); 218 219 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 220 vid = vl->vldb.vid[0]; 221 voltype = AFSVL_RWVOL; 222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 223 vid = vl->vldb.vid[1]; 224 voltype = AFSVL_ROVOL; 225 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 226 vid = vl->vldb.vid[2]; 227 voltype = AFSVL_BACKVOL; 228 } else { 229 BUG(); 230 vid = 0; 231 voltype = 0; 232 } 233 234 /* contact the server to make sure the volume is still available 235 * - TODO: need to handle disconnected operation here 236 */ 237 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 238 switch (ret) { 239 /* net error */ 240 default: 241 printk(KERN_WARNING "kAFS:" 242 " failed to update volume '%s' (%x) up in '%s': %d\n", 243 vl->vldb.name, vid, vl->cell->name, ret); 244 _leave(" = %d", ret); 245 return ret; 246 247 /* pulled from local cache into memory */ 248 case 0: 249 _leave(" = 0"); 250 return 0; 251 252 /* uh oh... looks like the volume got deleted */ 253 case -ENOMEDIUM: 254 printk(KERN_ERR "kAFS:" 255 " volume '%s' (%x) does not exist '%s'\n", 256 vl->vldb.name, vid, vl->cell->name); 257 258 /* TODO: make existing record unavailable */ 259 _leave(" = %d", ret); 260 return ret; 261 } 262} 263 264/* 265 * apply the update to a VL record 266 */ 267static void afs_vlocation_apply_update(struct afs_vlocation *vl, 268 struct afs_cache_vlocation *vldb) 269{ 270 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 271 vldb->name, vldb->vidmask, 272 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 273 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 274 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 275 276 _debug("Vids: %08x %08x %08x", 277 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 278 279 if (strcmp(vldb->name, vl->vldb.name) != 0) 280 printk(KERN_NOTICE "kAFS:" 281 " name of volume '%s' changed to '%s' on server\n", 282 vl->vldb.name, vldb->name); 283 284 vl->vldb = *vldb; 285 286#ifdef CONFIG_AFS_FSCACHE 287 fscache_update_cookie(vl->cache); 288#endif 289} 290 291/* 292 * fill in a volume location record, consulting the cache and the VL server 293 * both 294 */ 295static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 296 struct key *key) 297{ 298 struct afs_cache_vlocation vldb; 299 int ret; 300 301 _enter(""); 302 303 ASSERTCMP(vl->valid, ==, 0); 304 305 memset(&vldb, 0, sizeof(vldb)); 306 307 /* see if we have an in-cache copy (will set vl->valid if there is) */ 308#ifdef CONFIG_AFS_FSCACHE 309 vl->cache = fscache_acquire_cookie(vl->cell->cache, 310 &afs_vlocation_cache_index_def, vl, 311 true); 312#endif 313 314 if (vl->valid) { 315 /* try to update a known volume in the cell VL databases by 316 * ID as the name may have changed */ 317 _debug("found in cache"); 318 ret = afs_vlocation_update_record(vl, key, &vldb); 319 } else { 320 /* try to look up an unknown volume in the cell VL databases by 321 * name */ 322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 323 if (ret < 0) { 324 printk("kAFS: failed to locate '%s' in cell '%s'\n", 325 vl->vldb.name, vl->cell->name); 326 return ret; 327 } 328 } 329 330 afs_vlocation_apply_update(vl, &vldb); 331 _leave(" = 0"); 332 return 0; 333} 334 335/* 336 * queue a vlocation record for updates 337 */ 338static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 339{ 340 struct afs_vlocation *xvl; 341 342 /* wait at least 10 minutes before updating... */ 343 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 344 345 spin_lock(&afs_vlocation_updates_lock); 346 347 if (!list_empty(&afs_vlocation_updates)) { 348 /* ... but wait at least 1 second more than the newest record 349 * already queued so that we don't spam the VL server suddenly 350 * with lots of requests 351 */ 352 xvl = list_entry(afs_vlocation_updates.prev, 353 struct afs_vlocation, update); 354 if (vl->update_at <= xvl->update_at) 355 vl->update_at = xvl->update_at + 1; 356 } else { 357 queue_delayed_work(afs_vlocation_update_worker, 358 &afs_vlocation_update, 359 afs_vlocation_update_timeout * HZ); 360 } 361 362 list_add_tail(&vl->update, &afs_vlocation_updates); 363 spin_unlock(&afs_vlocation_updates_lock); 364} 365 366/* 367 * lookup volume location 368 * - iterate through the VL servers in a cell until one of them admits knowing 369 * about the volume in question 370 * - lookup in the local cache if not able to find on the VL server 371 * - insert/update in the local cache if did get a VL response 372 */ 373struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 374 struct key *key, 375 const char *name, 376 size_t namesz) 377{ 378 struct afs_vlocation *vl; 379 int ret; 380 381 _enter("{%s},{%x},%*.*s,%zu", 382 cell->name, key_serial(key), 383 (int) namesz, (int) namesz, name, namesz); 384 385 if (namesz >= sizeof(vl->vldb.name)) { 386 _leave(" = -ENAMETOOLONG"); 387 return ERR_PTR(-ENAMETOOLONG); 388 } 389 390 /* see if we have an in-memory copy first */ 391 down_write(&cell->vl_sem); 392 spin_lock(&cell->vl_lock); 393 list_for_each_entry(vl, &cell->vl_list, link) { 394 if (vl->vldb.name[namesz] != '\0') 395 continue; 396 if (memcmp(vl->vldb.name, name, namesz) == 0) 397 goto found_in_memory; 398 } 399 spin_unlock(&cell->vl_lock); 400 401 /* not in the cell's in-memory lists - create a new record */ 402 vl = afs_vlocation_alloc(cell, name, namesz); 403 if (!vl) { 404 up_write(&cell->vl_sem); 405 return ERR_PTR(-ENOMEM); 406 } 407 408 afs_get_cell(cell); 409 410 list_add_tail(&vl->link, &cell->vl_list); 411 vl->state = AFS_VL_CREATING; 412 up_write(&cell->vl_sem); 413 414fill_in_record: 415 ret = afs_vlocation_fill_in_record(vl, key); 416 if (ret < 0) 417 goto error_abandon; 418 spin_lock(&vl->lock); 419 vl->state = AFS_VL_VALID; 420 spin_unlock(&vl->lock); 421 wake_up(&vl->waitq); 422 423 /* update volume entry in local cache */ 424#ifdef CONFIG_AFS_FSCACHE 425 fscache_update_cookie(vl->cache); 426#endif 427 428 /* schedule for regular updates */ 429 afs_vlocation_queue_for_updates(vl); 430 goto success; 431 432found_in_memory: 433 /* found in memory */ 434 _debug("found in memory"); 435 atomic_inc(&vl->usage); 436 spin_unlock(&cell->vl_lock); 437 if (!list_empty(&vl->grave)) { 438 spin_lock(&afs_vlocation_graveyard_lock); 439 list_del_init(&vl->grave); 440 spin_unlock(&afs_vlocation_graveyard_lock); 441 } 442 up_write(&cell->vl_sem); 443 444 /* see if it was an abandoned record that we might try filling in */ 445 spin_lock(&vl->lock); 446 while (vl->state != AFS_VL_VALID) { 447 afs_vlocation_state_t state = vl->state; 448 449 _debug("invalid [state %d]", state); 450 451 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 452 vl->state = AFS_VL_CREATING; 453 spin_unlock(&vl->lock); 454 goto fill_in_record; 455 } 456 457 /* must now wait for creation or update by someone else to 458 * complete */ 459 _debug("wait"); 460 461 spin_unlock(&vl->lock); 462 ret = wait_event_interruptible(vl->waitq, 463 vl->state == AFS_VL_NEW || 464 vl->state == AFS_VL_VALID || 465 vl->state == AFS_VL_NO_VOLUME); 466 if (ret < 0) 467 goto error; 468 spin_lock(&vl->lock); 469 } 470 spin_unlock(&vl->lock); 471 472success: 473 _leave(" = %p", vl); 474 return vl; 475 476error_abandon: 477 spin_lock(&vl->lock); 478 vl->state = AFS_VL_NEW; 479 spin_unlock(&vl->lock); 480 wake_up(&vl->waitq); 481error: 482 ASSERT(vl != NULL); 483 afs_put_vlocation(vl); 484 _leave(" = %d", ret); 485 return ERR_PTR(ret); 486} 487 488/* 489 * finish using a volume location record 490 */ 491void afs_put_vlocation(struct afs_vlocation *vl) 492{ 493 if (!vl) 494 return; 495 496 _enter("%s", vl->vldb.name); 497 498 ASSERTCMP(atomic_read(&vl->usage), >, 0); 499 500 if (likely(!atomic_dec_and_test(&vl->usage))) { 501 _leave(""); 502 return; 503 } 504 505 spin_lock(&afs_vlocation_graveyard_lock); 506 if (atomic_read(&vl->usage) == 0) { 507 _debug("buried"); 508 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 509 vl->time_of_death = get_seconds(); 510 queue_delayed_work(afs_wq, &afs_vlocation_reap, 511 afs_vlocation_timeout * HZ); 512 513 /* suspend updates on this record */ 514 if (!list_empty(&vl->update)) { 515 spin_lock(&afs_vlocation_updates_lock); 516 list_del_init(&vl->update); 517 spin_unlock(&afs_vlocation_updates_lock); 518 } 519 } 520 spin_unlock(&afs_vlocation_graveyard_lock); 521 _leave(" [killed?]"); 522} 523 524/* 525 * destroy a dead volume location record 526 */ 527static void afs_vlocation_destroy(struct afs_vlocation *vl) 528{ 529 _enter("%p", vl); 530 531#ifdef CONFIG_AFS_FSCACHE 532 fscache_relinquish_cookie(vl->cache, 0); 533#endif 534 afs_put_cell(vl->cell); 535 kfree(vl); 536} 537 538/* 539 * reap dead volume location records 540 */ 541static void afs_vlocation_reaper(struct work_struct *work) 542{ 543 LIST_HEAD(corpses); 544 struct afs_vlocation *vl; 545 unsigned long delay, expiry; 546 time_t now; 547 548 _enter(""); 549 550 now = get_seconds(); 551 spin_lock(&afs_vlocation_graveyard_lock); 552 553 while (!list_empty(&afs_vlocation_graveyard)) { 554 vl = list_entry(afs_vlocation_graveyard.next, 555 struct afs_vlocation, grave); 556 557 _debug("check %p", vl); 558 559 /* the queue is ordered most dead first */ 560 expiry = vl->time_of_death + afs_vlocation_timeout; 561 if (expiry > now) { 562 delay = (expiry - now) * HZ; 563 _debug("delay %lu", delay); 564 mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); 565 break; 566 } 567 568 spin_lock(&vl->cell->vl_lock); 569 if (atomic_read(&vl->usage) > 0) { 570 _debug("no reap"); 571 list_del_init(&vl->grave); 572 } else { 573 _debug("reap"); 574 list_move_tail(&vl->grave, &corpses); 575 list_del_init(&vl->link); 576 } 577 spin_unlock(&vl->cell->vl_lock); 578 } 579 580 spin_unlock(&afs_vlocation_graveyard_lock); 581 582 /* now reap the corpses we've extracted */ 583 while (!list_empty(&corpses)) { 584 vl = list_entry(corpses.next, struct afs_vlocation, grave); 585 list_del(&vl->grave); 586 afs_vlocation_destroy(vl); 587 } 588 589 _leave(""); 590} 591 592/* 593 * initialise the VL update process 594 */ 595int __init afs_vlocation_update_init(void) 596{ 597 afs_vlocation_update_worker = 598 create_singlethread_workqueue("kafs_vlupdated"); 599 return afs_vlocation_update_worker ? 0 : -ENOMEM; 600} 601 602/* 603 * discard all the volume location records for rmmod 604 */ 605void afs_vlocation_purge(void) 606{ 607 afs_vlocation_timeout = 0; 608 609 spin_lock(&afs_vlocation_updates_lock); 610 list_del_init(&afs_vlocation_updates); 611 spin_unlock(&afs_vlocation_updates_lock); 612 mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); 613 destroy_workqueue(afs_vlocation_update_worker); 614 615 mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); 616} 617 618/* 619 * update a volume location 620 */ 621static void afs_vlocation_updater(struct work_struct *work) 622{ 623 struct afs_cache_vlocation vldb; 624 struct afs_vlocation *vl, *xvl; 625 time_t now; 626 long timeout; 627 int ret; 628 629 _enter(""); 630 631 now = get_seconds(); 632 633 /* find a record to update */ 634 spin_lock(&afs_vlocation_updates_lock); 635 for (;;) { 636 if (list_empty(&afs_vlocation_updates)) { 637 spin_unlock(&afs_vlocation_updates_lock); 638 _leave(" [nothing]"); 639 return; 640 } 641 642 vl = list_entry(afs_vlocation_updates.next, 643 struct afs_vlocation, update); 644 if (atomic_read(&vl->usage) > 0) 645 break; 646 list_del_init(&vl->update); 647 } 648 649 timeout = vl->update_at - now; 650 if (timeout > 0) { 651 queue_delayed_work(afs_vlocation_update_worker, 652 &afs_vlocation_update, timeout * HZ); 653 spin_unlock(&afs_vlocation_updates_lock); 654 _leave(" [nothing]"); 655 return; 656 } 657 658 list_del_init(&vl->update); 659 atomic_inc(&vl->usage); 660 spin_unlock(&afs_vlocation_updates_lock); 661 662 /* we can now perform the update */ 663 _debug("update %s", vl->vldb.name); 664 vl->state = AFS_VL_UPDATING; 665 vl->upd_rej_cnt = 0; 666 vl->upd_busy_cnt = 0; 667 668 ret = afs_vlocation_update_record(vl, NULL, &vldb); 669 spin_lock(&vl->lock); 670 switch (ret) { 671 case 0: 672 afs_vlocation_apply_update(vl, &vldb); 673 vl->state = AFS_VL_VALID; 674 break; 675 case -ENOMEDIUM: 676 vl->state = AFS_VL_VOLUME_DELETED; 677 break; 678 default: 679 vl->state = AFS_VL_UNCERTAIN; 680 break; 681 } 682 spin_unlock(&vl->lock); 683 wake_up(&vl->waitq); 684 685 /* and then reschedule */ 686 _debug("reschedule"); 687 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 688 689 spin_lock(&afs_vlocation_updates_lock); 690 691 if (!list_empty(&afs_vlocation_updates)) { 692 /* next update in 10 minutes, but wait at least 1 second more 693 * than the newest record already queued so that we don't spam 694 * the VL server suddenly with lots of requests 695 */ 696 xvl = list_entry(afs_vlocation_updates.prev, 697 struct afs_vlocation, update); 698 if (vl->update_at <= xvl->update_at) 699 vl->update_at = xvl->update_at + 1; 700 xvl = list_entry(afs_vlocation_updates.next, 701 struct afs_vlocation, update); 702 timeout = xvl->update_at - now; 703 if (timeout < 0) 704 timeout = 0; 705 } else { 706 timeout = afs_vlocation_update_timeout; 707 } 708 709 ASSERT(list_empty(&vl->update)); 710 711 list_add_tail(&vl->update, &afs_vlocation_updates); 712 713 _debug("timeout %ld", timeout); 714 queue_delayed_work(afs_vlocation_update_worker, 715 &afs_vlocation_update, timeout * HZ); 716 spin_unlock(&afs_vlocation_updates_lock); 717 afs_put_vlocation(vl); 718} 719