1/* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10#include <linux/fs.h> 11#include <linux/slab.h> 12#include <linux/file.h> 13#include <linux/splice.h> 14#include <linux/xattr.h> 15#include <linux/security.h> 16#include <linux/uaccess.h> 17#include <linux/sched.h> 18#include <linux/namei.h> 19#include "overlayfs.h" 20 21#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 22 23int ovl_copy_xattr(struct dentry *old, struct dentry *new) 24{ 25 ssize_t list_size, size, value_size = 0; 26 char *buf, *name, *value = NULL; 27 int uninitialized_var(error); 28 29 if (!old->d_inode->i_op->getxattr || 30 !new->d_inode->i_op->getxattr) 31 return 0; 32 33 list_size = vfs_listxattr(old, NULL, 0); 34 if (list_size <= 0) { 35 if (list_size == -EOPNOTSUPP) 36 return 0; 37 return list_size; 38 } 39 40 buf = kzalloc(list_size, GFP_KERNEL); 41 if (!buf) 42 return -ENOMEM; 43 44 list_size = vfs_listxattr(old, buf, list_size); 45 if (list_size <= 0) { 46 error = list_size; 47 goto out; 48 } 49 50 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 51retry: 52 size = vfs_getxattr(old, name, value, value_size); 53 if (size == -ERANGE) 54 size = vfs_getxattr(old, name, NULL, 0); 55 56 if (size < 0) { 57 error = size; 58 break; 59 } 60 61 if (size > value_size) { 62 void *new; 63 64 new = krealloc(value, size, GFP_KERNEL); 65 if (!new) { 66 error = -ENOMEM; 67 break; 68 } 69 value = new; 70 value_size = size; 71 goto retry; 72 } 73 74 error = vfs_setxattr(new, name, value, size, 0); 75 if (error) 76 break; 77 } 78 kfree(value); 79out: 80 kfree(buf); 81 return error; 82} 83 84static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 85{ 86 struct file *old_file; 87 struct file *new_file; 88 loff_t old_pos = 0; 89 loff_t new_pos = 0; 90 int error = 0; 91 92 if (len == 0) 93 return 0; 94 95 old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); 96 if (IS_ERR(old_file)) 97 return PTR_ERR(old_file); 98 99 new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); 100 if (IS_ERR(new_file)) { 101 error = PTR_ERR(new_file); 102 goto out_fput; 103 } 104 105 /* FIXME: copy up sparse files efficiently */ 106 while (len) { 107 size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 108 long bytes; 109 110 if (len < this_len) 111 this_len = len; 112 113 if (signal_pending_state(TASK_KILLABLE, current)) { 114 error = -EINTR; 115 break; 116 } 117 118 bytes = do_splice_direct(old_file, &old_pos, 119 new_file, &new_pos, 120 this_len, SPLICE_F_MOVE); 121 if (bytes <= 0) { 122 error = bytes; 123 break; 124 } 125 WARN_ON(old_pos != new_pos); 126 127 len -= bytes; 128 } 129 130 fput(new_file); 131out_fput: 132 fput(old_file); 133 return error; 134} 135 136static char *ovl_read_symlink(struct dentry *realdentry) 137{ 138 int res; 139 char *buf; 140 struct inode *inode = realdentry->d_inode; 141 mm_segment_t old_fs; 142 143 res = -EINVAL; 144 if (!inode->i_op->readlink) 145 goto err; 146 147 res = -ENOMEM; 148 buf = (char *) __get_free_page(GFP_KERNEL); 149 if (!buf) 150 goto err; 151 152 old_fs = get_fs(); 153 set_fs(get_ds()); 154 /* The cast to a user pointer is valid due to the set_fs() */ 155 res = inode->i_op->readlink(realdentry, 156 (char __user *)buf, PAGE_SIZE - 1); 157 set_fs(old_fs); 158 if (res < 0) { 159 free_page((unsigned long) buf); 160 goto err; 161 } 162 buf[res] = '\0'; 163 164 return buf; 165 166err: 167 return ERR_PTR(res); 168} 169 170static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 171{ 172 struct iattr attr = { 173 .ia_valid = 174 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 175 .ia_atime = stat->atime, 176 .ia_mtime = stat->mtime, 177 }; 178 179 return notify_change(upperdentry, &attr, NULL); 180} 181 182int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) 183{ 184 int err = 0; 185 186 if (!S_ISLNK(stat->mode)) { 187 struct iattr attr = { 188 .ia_valid = ATTR_MODE, 189 .ia_mode = stat->mode, 190 }; 191 err = notify_change(upperdentry, &attr, NULL); 192 } 193 if (!err) { 194 struct iattr attr = { 195 .ia_valid = ATTR_UID | ATTR_GID, 196 .ia_uid = stat->uid, 197 .ia_gid = stat->gid, 198 }; 199 err = notify_change(upperdentry, &attr, NULL); 200 } 201 if (!err) 202 ovl_set_timestamps(upperdentry, stat); 203 204 return err; 205} 206 207static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, 208 struct dentry *dentry, struct path *lowerpath, 209 struct kstat *stat, struct iattr *attr, 210 const char *link) 211{ 212 struct inode *wdir = workdir->d_inode; 213 struct inode *udir = upperdir->d_inode; 214 struct dentry *newdentry = NULL; 215 struct dentry *upper = NULL; 216 umode_t mode = stat->mode; 217 int err; 218 219 newdentry = ovl_lookup_temp(workdir, dentry); 220 err = PTR_ERR(newdentry); 221 if (IS_ERR(newdentry)) 222 goto out; 223 224 upper = lookup_one_len(dentry->d_name.name, upperdir, 225 dentry->d_name.len); 226 err = PTR_ERR(upper); 227 if (IS_ERR(upper)) 228 goto out1; 229 230 /* Can't properly set mode on creation because of the umask */ 231 stat->mode &= S_IFMT; 232 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); 233 stat->mode = mode; 234 if (err) 235 goto out2; 236 237 if (S_ISREG(stat->mode)) { 238 struct path upperpath; 239 ovl_path_upper(dentry, &upperpath); 240 BUG_ON(upperpath.dentry != NULL); 241 upperpath.dentry = newdentry; 242 243 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); 244 if (err) 245 goto out_cleanup; 246 } 247 248 err = ovl_copy_xattr(lowerpath->dentry, newdentry); 249 if (err) 250 goto out_cleanup; 251 252 mutex_lock(&newdentry->d_inode->i_mutex); 253 err = ovl_set_attr(newdentry, stat); 254 if (!err && attr) 255 err = notify_change(newdentry, attr, NULL); 256 mutex_unlock(&newdentry->d_inode->i_mutex); 257 if (err) 258 goto out_cleanup; 259 260 err = ovl_do_rename(wdir, newdentry, udir, upper, 0); 261 if (err) 262 goto out_cleanup; 263 264 ovl_dentry_update(dentry, newdentry); 265 newdentry = NULL; 266 267 /* 268 * Non-directores become opaque when copied up. 269 */ 270 if (!S_ISDIR(stat->mode)) 271 ovl_dentry_set_opaque(dentry, true); 272out2: 273 dput(upper); 274out1: 275 dput(newdentry); 276out: 277 return err; 278 279out_cleanup: 280 ovl_cleanup(wdir, newdentry); 281 goto out2; 282} 283 284/* 285 * Copy up a single dentry 286 * 287 * Directory renames only allowed on "pure upper" (already created on 288 * upper filesystem, never copied up). Directories which are on lower or 289 * are merged may not be renamed. For these -EXDEV is returned and 290 * userspace has to deal with it. This means, when copying up a 291 * directory we can rely on it and ancestors being stable. 292 * 293 * Non-directory renames start with copy up of source if necessary. The 294 * actual rename will only proceed once the copy up was successful. Copy 295 * up uses upper parent i_mutex for exclusion. Since rename can change 296 * d_parent it is possible that the copy up will lock the old parent. At 297 * that point the file will have already been copied up anyway. 298 */ 299int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 300 struct path *lowerpath, struct kstat *stat, 301 struct iattr *attr) 302{ 303 struct dentry *workdir = ovl_workdir(dentry); 304 int err; 305 struct kstat pstat; 306 struct path parentpath; 307 struct dentry *upperdir; 308 struct dentry *upperdentry; 309 const struct cred *old_cred; 310 struct cred *override_cred; 311 char *link = NULL; 312 313 if (WARN_ON(!workdir)) 314 return -EROFS; 315 316 ovl_path_upper(parent, &parentpath); 317 upperdir = parentpath.dentry; 318 319 err = vfs_getattr(&parentpath, &pstat); 320 if (err) 321 return err; 322 323 if (S_ISLNK(stat->mode)) { 324 link = ovl_read_symlink(lowerpath->dentry); 325 if (IS_ERR(link)) 326 return PTR_ERR(link); 327 } 328 329 err = -ENOMEM; 330 override_cred = prepare_creds(); 331 if (!override_cred) 332 goto out_free_link; 333 334 override_cred->fsuid = stat->uid; 335 override_cred->fsgid = stat->gid; 336 /* 337 * CAP_SYS_ADMIN for copying up extended attributes 338 * CAP_DAC_OVERRIDE for create 339 * CAP_FOWNER for chmod, timestamp update 340 * CAP_FSETID for chmod 341 * CAP_CHOWN for chown 342 * CAP_MKNOD for mknod 343 */ 344 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 345 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 346 cap_raise(override_cred->cap_effective, CAP_FOWNER); 347 cap_raise(override_cred->cap_effective, CAP_FSETID); 348 cap_raise(override_cred->cap_effective, CAP_CHOWN); 349 cap_raise(override_cred->cap_effective, CAP_MKNOD); 350 old_cred = override_creds(override_cred); 351 352 err = -EIO; 353 if (lock_rename(workdir, upperdir) != NULL) { 354 pr_err("overlayfs: failed to lock workdir+upperdir\n"); 355 goto out_unlock; 356 } 357 upperdentry = ovl_dentry_upper(dentry); 358 if (upperdentry) { 359 unlock_rename(workdir, upperdir); 360 err = 0; 361 /* Raced with another copy-up? Do the setattr here */ 362 if (attr) { 363 mutex_lock(&upperdentry->d_inode->i_mutex); 364 err = notify_change(upperdentry, attr, NULL); 365 mutex_unlock(&upperdentry->d_inode->i_mutex); 366 } 367 goto out_put_cred; 368 } 369 370 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, 371 stat, attr, link); 372 if (!err) { 373 /* Restore timestamps on parent (best effort) */ 374 ovl_set_timestamps(upperdir, &pstat); 375 } 376out_unlock: 377 unlock_rename(workdir, upperdir); 378out_put_cred: 379 revert_creds(old_cred); 380 put_cred(override_cred); 381 382out_free_link: 383 if (link) 384 free_page((unsigned long) link); 385 386 return err; 387} 388 389int ovl_copy_up(struct dentry *dentry) 390{ 391 int err; 392 393 err = 0; 394 while (!err) { 395 struct dentry *next; 396 struct dentry *parent; 397 struct path lowerpath; 398 struct kstat stat; 399 enum ovl_path_type type = ovl_path_type(dentry); 400 401 if (OVL_TYPE_UPPER(type)) 402 break; 403 404 next = dget(dentry); 405 /* find the topmost dentry not yet copied up */ 406 for (;;) { 407 parent = dget_parent(next); 408 409 type = ovl_path_type(parent); 410 if (OVL_TYPE_UPPER(type)) 411 break; 412 413 dput(next); 414 next = parent; 415 } 416 417 ovl_path_lower(next, &lowerpath); 418 err = vfs_getattr(&lowerpath, &stat); 419 if (!err) 420 err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); 421 422 dput(parent); 423 dput(next); 424 } 425 426 return err; 427} 428