1/* AFS volume management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/init.h> 15#include <linux/slab.h> 16#include <linux/fs.h> 17#include <linux/pagemap.h> 18#include <linux/sched.h> 19#include "internal.h" 20 21static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; 22 23/* 24 * lookup a volume by name 25 * - this can be one of the following: 26 * "%[cell:]volume[.]" R/W volume 27 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 28 * or R/W (rwparent=1) volume 29 * "%[cell:]volume.readonly" R/O volume 30 * "#[cell:]volume.readonly" R/O volume 31 * "%[cell:]volume.backup" Backup volume 32 * "#[cell:]volume.backup" Backup volume 33 * 34 * The cell name is optional, and defaults to the current cell. 35 * 36 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin 37 * Guide 38 * - Rule 1: Explicit type suffix forces access of that type or nothing 39 * (no suffix, then use Rule 2 & 3) 40 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W 41 * if not available 42 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 43 * explicitly told otherwise 44 */ 45struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) 46{ 47 struct afs_vlocation *vlocation = NULL; 48 struct afs_volume *volume = NULL; 49 struct afs_server *server = NULL; 50 char srvtmask; 51 int ret, loop; 52 53 _enter("{%*.*s,%d}", 54 params->volnamesz, params->volnamesz, params->volname, params->rwpath); 55 56 /* lookup the volume location record */ 57 vlocation = afs_vlocation_lookup(params->cell, params->key, 58 params->volname, params->volnamesz); 59 if (IS_ERR(vlocation)) { 60 ret = PTR_ERR(vlocation); 61 vlocation = NULL; 62 goto error; 63 } 64 65 /* make the final decision on the type we want */ 66 ret = -ENOMEDIUM; 67 if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) 68 goto error; 69 70 srvtmask = 0; 71 for (loop = 0; loop < vlocation->vldb.nservers; loop++) 72 srvtmask |= vlocation->vldb.srvtmask[loop]; 73 74 if (params->force) { 75 if (!(srvtmask & (1 << params->type))) 76 goto error; 77 } else if (srvtmask & AFS_VOL_VTM_RO) { 78 params->type = AFSVL_ROVOL; 79 } else if (srvtmask & AFS_VOL_VTM_RW) { 80 params->type = AFSVL_RWVOL; 81 } else { 82 goto error; 83 } 84 85 down_write(¶ms->cell->vl_sem); 86 87 /* is the volume already active? */ 88 if (vlocation->vols[params->type]) { 89 /* yes - re-use it */ 90 volume = vlocation->vols[params->type]; 91 afs_get_volume(volume); 92 goto success; 93 } 94 95 /* create a new volume record */ 96 _debug("creating new volume record"); 97 98 ret = -ENOMEM; 99 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 100 if (!volume) 101 goto error_up; 102 103 atomic_set(&volume->usage, 1); 104 volume->type = params->type; 105 volume->type_force = params->force; 106 volume->cell = params->cell; 107 volume->vid = vlocation->vldb.vid[params->type]; 108 109 ret = bdi_setup_and_register(&volume->bdi, "afs"); 110 if (ret) 111 goto error_bdi; 112 113 init_rwsem(&volume->server_sem); 114 115 /* look up all the applicable server records */ 116 for (loop = 0; loop < 8; loop++) { 117 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { 118 server = afs_lookup_server( 119 volume->cell, &vlocation->vldb.servers[loop]); 120 if (IS_ERR(server)) { 121 ret = PTR_ERR(server); 122 goto error_discard; 123 } 124 125 volume->servers[volume->nservers] = server; 126 volume->nservers++; 127 } 128 } 129 130 /* attach the cache and volume location */ 131#ifdef CONFIG_AFS_FSCACHE 132 volume->cache = fscache_acquire_cookie(vlocation->cache, 133 &afs_volume_cache_index_def, 134 volume, true); 135#endif 136 afs_get_vlocation(vlocation); 137 volume->vlocation = vlocation; 138 139 vlocation->vols[volume->type] = volume; 140 141success: 142 _debug("kAFS selected %s volume %08x", 143 afs_voltypes[volume->type], volume->vid); 144 up_write(¶ms->cell->vl_sem); 145 afs_put_vlocation(vlocation); 146 _leave(" = %p", volume); 147 return volume; 148 149 /* clean up */ 150error_up: 151 up_write(¶ms->cell->vl_sem); 152error: 153 afs_put_vlocation(vlocation); 154 _leave(" = %d", ret); 155 return ERR_PTR(ret); 156 157error_discard: 158 bdi_destroy(&volume->bdi); 159error_bdi: 160 up_write(¶ms->cell->vl_sem); 161 162 for (loop = volume->nservers - 1; loop >= 0; loop--) 163 afs_put_server(volume->servers[loop]); 164 165 kfree(volume); 166 goto error; 167} 168 169/* 170 * destroy a volume record 171 */ 172void afs_put_volume(struct afs_volume *volume) 173{ 174 struct afs_vlocation *vlocation; 175 int loop; 176 177 if (!volume) 178 return; 179 180 _enter("%p", volume); 181 182 ASSERTCMP(atomic_read(&volume->usage), >, 0); 183 184 vlocation = volume->vlocation; 185 186 /* to prevent a race, the decrement and the dequeue must be effectively 187 * atomic */ 188 down_write(&vlocation->cell->vl_sem); 189 190 if (likely(!atomic_dec_and_test(&volume->usage))) { 191 up_write(&vlocation->cell->vl_sem); 192 _leave(""); 193 return; 194 } 195 196 vlocation->vols[volume->type] = NULL; 197 198 up_write(&vlocation->cell->vl_sem); 199 200 /* finish cleaning up the volume */ 201#ifdef CONFIG_AFS_FSCACHE 202 fscache_relinquish_cookie(volume->cache, 0); 203#endif 204 afs_put_vlocation(vlocation); 205 206 for (loop = volume->nservers - 1; loop >= 0; loop--) 207 afs_put_server(volume->servers[loop]); 208 209 bdi_destroy(&volume->bdi); 210 kfree(volume); 211 212 _leave(" [destroyed]"); 213} 214 215/* 216 * pick a server to use to try accessing this volume 217 * - returns with an elevated usage count on the server chosen 218 */ 219struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) 220{ 221 struct afs_volume *volume = vnode->volume; 222 struct afs_server *server; 223 int ret, state, loop; 224 225 _enter("%s", volume->vlocation->vldb.name); 226 227 /* stick with the server we're already using if we can */ 228 if (vnode->server && vnode->server->fs_state == 0) { 229 afs_get_server(vnode->server); 230 _leave(" = %p [current]", vnode->server); 231 return vnode->server; 232 } 233 234 down_read(&volume->server_sem); 235 236 /* handle the no-server case */ 237 if (volume->nservers == 0) { 238 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; 239 up_read(&volume->server_sem); 240 _leave(" = %d [no servers]", ret); 241 return ERR_PTR(ret); 242 } 243 244 /* basically, just search the list for the first live server and use 245 * that */ 246 ret = 0; 247 for (loop = 0; loop < volume->nservers; loop++) { 248 server = volume->servers[loop]; 249 state = server->fs_state; 250 251 _debug("consider %d [%d]", loop, state); 252 253 switch (state) { 254 /* found an apparently healthy server */ 255 case 0: 256 afs_get_server(server); 257 up_read(&volume->server_sem); 258 _leave(" = %p (picked %08x)", 259 server, ntohl(server->addr.s_addr)); 260 return server; 261 262 case -ENETUNREACH: 263 if (ret == 0) 264 ret = state; 265 break; 266 267 case -EHOSTUNREACH: 268 if (ret == 0 || 269 ret == -ENETUNREACH) 270 ret = state; 271 break; 272 273 case -ECONNREFUSED: 274 if (ret == 0 || 275 ret == -ENETUNREACH || 276 ret == -EHOSTUNREACH) 277 ret = state; 278 break; 279 280 default: 281 case -EREMOTEIO: 282 if (ret == 0 || 283 ret == -ENETUNREACH || 284 ret == -EHOSTUNREACH || 285 ret == -ECONNREFUSED) 286 ret = state; 287 break; 288 } 289 } 290 291 /* no available servers 292 * - TODO: handle the no active servers case better 293 */ 294 up_read(&volume->server_sem); 295 _leave(" = %d", ret); 296 return ERR_PTR(ret); 297} 298 299/* 300 * release a server after use 301 * - releases the ref on the server struct that was acquired by picking 302 * - records result of using a particular server to access a volume 303 * - return 0 to try again, 1 if okay or to issue error 304 * - the caller must release the server struct if result was 0 305 */ 306int afs_volume_release_fileserver(struct afs_vnode *vnode, 307 struct afs_server *server, 308 int result) 309{ 310 struct afs_volume *volume = vnode->volume; 311 unsigned loop; 312 313 _enter("%s,%08x,%d", 314 volume->vlocation->vldb.name, ntohl(server->addr.s_addr), 315 result); 316 317 switch (result) { 318 /* success */ 319 case 0: 320 server->fs_act_jif = jiffies; 321 server->fs_state = 0; 322 _leave(""); 323 return 1; 324 325 /* the fileserver denied all knowledge of the volume */ 326 case -ENOMEDIUM: 327 server->fs_act_jif = jiffies; 328 down_write(&volume->server_sem); 329 330 /* firstly, find where the server is in the active list (if it 331 * is) */ 332 for (loop = 0; loop < volume->nservers; loop++) 333 if (volume->servers[loop] == server) 334 goto present; 335 336 /* no longer there - may have been discarded by another op */ 337 goto try_next_server_upw; 338 339 present: 340 volume->nservers--; 341 memmove(&volume->servers[loop], 342 &volume->servers[loop + 1], 343 sizeof(volume->servers[loop]) * 344 (volume->nservers - loop)); 345 volume->servers[volume->nservers] = NULL; 346 afs_put_server(server); 347 volume->rjservers++; 348 349 if (volume->nservers > 0) 350 /* another server might acknowledge its existence */ 351 goto try_next_server_upw; 352 353 /* handle the case where all the fileservers have rejected the 354 * volume 355 * - TODO: try asking the fileservers for volume information 356 * - TODO: contact the VL server again to see if the volume is 357 * no longer registered 358 */ 359 up_write(&volume->server_sem); 360 afs_put_server(server); 361 _leave(" [completely rejected]"); 362 return 1; 363 364 /* problem reaching the server */ 365 case -ENETUNREACH: 366 case -EHOSTUNREACH: 367 case -ECONNREFUSED: 368 case -ETIME: 369 case -ETIMEDOUT: 370 case -EREMOTEIO: 371 /* mark the server as dead 372 * TODO: vary dead timeout depending on error 373 */ 374 spin_lock(&server->fs_lock); 375 if (!server->fs_state) { 376 server->fs_dead_jif = jiffies + HZ * 10; 377 server->fs_state = result; 378 printk("kAFS: SERVER DEAD state=%d\n", result); 379 } 380 spin_unlock(&server->fs_lock); 381 goto try_next_server; 382 383 /* miscellaneous error */ 384 default: 385 server->fs_act_jif = jiffies; 386 case -ENOMEM: 387 case -ENONET: 388 /* tell the caller to accept the result */ 389 afs_put_server(server); 390 _leave(" [local failure]"); 391 return 1; 392 } 393 394 /* tell the caller to loop around and try the next server */ 395try_next_server_upw: 396 up_write(&volume->server_sem); 397try_next_server: 398 afs_put_server(server); 399 _leave(" [try next server]"); 400 return 0; 401} 402