1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/file.c
37 *
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include "../include/lustre_dlm.h"
45 #include "../include/lustre_lite.h"
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include "../include/lustre/ll_fiemap.h"
50
51 #include "../include/cl_object.h"
52
53 static int
54 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
55
56 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
57 bool *lease_broken);
58
59 static enum llioc_iter
60 ll_iocontrol_call(struct inode *inode, struct file *file,
61 unsigned int cmd, unsigned long arg, int *rcp);
62
ll_file_data_get(void)63 static struct ll_file_data *ll_file_data_get(void)
64 {
65 struct ll_file_data *fd;
66
67 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
68 if (fd == NULL)
69 return NULL;
70 fd->fd_write_failed = false;
71 return fd;
72 }
73
ll_file_data_put(struct ll_file_data * fd)74 static void ll_file_data_put(struct ll_file_data *fd)
75 {
76 if (fd != NULL)
77 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
78 }
79
ll_pack_inode2opdata(struct inode * inode,struct md_op_data * op_data,struct lustre_handle * fh)80 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
81 struct lustre_handle *fh)
82 {
83 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
84 op_data->op_attr.ia_mode = inode->i_mode;
85 op_data->op_attr.ia_atime = inode->i_atime;
86 op_data->op_attr.ia_mtime = inode->i_mtime;
87 op_data->op_attr.ia_ctime = inode->i_ctime;
88 op_data->op_attr.ia_size = i_size_read(inode);
89 op_data->op_attr_blocks = inode->i_blocks;
90 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
91 ll_inode_to_ext_flags(inode->i_flags);
92 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
93 if (fh)
94 op_data->op_handle = *fh;
95 op_data->op_capa1 = ll_mdscapa_get(inode);
96
97 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
98 op_data->op_bias |= MDS_DATA_MODIFIED;
99 }
100
101 /**
102 * Closes the IO epoch and packs all the attributes into @op_data for
103 * the CLOSE rpc.
104 */
ll_prepare_close(struct inode * inode,struct md_op_data * op_data,struct obd_client_handle * och)105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
107 {
108 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
109 ATTR_MTIME | ATTR_MTIME_SET |
110 ATTR_CTIME | ATTR_CTIME_SET;
111
112 if (!(och->och_flags & FMODE_WRITE))
113 goto out;
114
115 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
116 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
117 else
118 ll_ioepoch_close(inode, op_data, &och, 0);
119
120 out:
121 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
122 ll_prep_md_op_data(op_data, inode, NULL, NULL,
123 0, 0, LUSTRE_OPC_ANY, NULL);
124 }
125
ll_close_inode_openhandle(struct obd_export * md_exp,struct inode * inode,struct obd_client_handle * och,const __u64 * data_version)126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
127 struct inode *inode,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
130 {
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
135 int epoch_close = 1;
136 int rc;
137
138 if (obd == NULL) {
139 /*
140 * XXX: in case of LMV, is this correct to access
141 * ->exp_handle?
142 */
143 CERROR("Invalid MDC connection handle %#llx\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
145 rc = 0;
146 goto out;
147 }
148
149 op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
150 if (!op_data) {
151 /* XXX We leak openhandle and request here. */
152 rc = -ENOMEM;
153 goto out;
154 }
155
156 ll_prepare_close(inode, op_data, och);
157 if (data_version != NULL) {
158 /* Pass in data_version implies release. */
159 op_data->op_bias |= MDS_HSM_RELEASE;
160 op_data->op_data_version = *data_version;
161 op_data->op_lease_handle = och->och_lease_handle;
162 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
163 }
164 epoch_close = op_data->op_flags & MF_EPOCH_CLOSE;
165 rc = md_close(md_exp, op_data, och->och_mod, &req);
166 if (rc == -EAGAIN) {
167 /* This close must have the epoch closed. */
168 LASSERT(epoch_close);
169 /* MDS has instructed us to obtain Size-on-MDS attribute from
170 * OSTs and send setattr to back to MDS. */
171 rc = ll_som_update(inode, op_data);
172 if (rc) {
173 CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n",
174 inode->i_ino, rc);
175 rc = 0;
176 }
177 } else if (rc) {
178 CERROR("inode %lu mdc close failed: rc = %d\n",
179 inode->i_ino, rc);
180 }
181
182 /* DATA_MODIFIED flag was successfully sent on close, cancel data
183 * modification flag. */
184 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
185 struct ll_inode_info *lli = ll_i2info(inode);
186
187 spin_lock(&lli->lli_lock);
188 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
189 spin_unlock(&lli->lli_lock);
190 }
191
192 if (rc == 0) {
193 rc = ll_objects_destroy(req, inode);
194 if (rc)
195 CERROR("inode %lu ll_objects destroy: rc = %d\n",
196 inode->i_ino, rc);
197 }
198 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
199 struct mdt_body *body;
200
201 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
202 if (!(body->valid & OBD_MD_FLRELEASED))
203 rc = -EBUSY;
204 }
205
206 ll_finish_md_op_data(op_data);
207
208 out:
209 if (exp_connect_som(exp) && !epoch_close &&
210 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
211 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
212 } else {
213 md_clear_open_replay_data(md_exp, och);
214 /* Free @och if it is not waiting for DONE_WRITING. */
215 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 OBD_FREE_PTR(och);
217 }
218 if (req) /* This is close request */
219 ptlrpc_req_finished(req);
220 return rc;
221 }
222
ll_md_real_close(struct inode * inode,fmode_t fmode)223 int ll_md_real_close(struct inode *inode, fmode_t fmode)
224 {
225 struct ll_inode_info *lli = ll_i2info(inode);
226 struct obd_client_handle **och_p;
227 struct obd_client_handle *och;
228 __u64 *och_usecount;
229 int rc = 0;
230
231 if (fmode & FMODE_WRITE) {
232 och_p = &lli->lli_mds_write_och;
233 och_usecount = &lli->lli_open_fd_write_count;
234 } else if (fmode & FMODE_EXEC) {
235 och_p = &lli->lli_mds_exec_och;
236 och_usecount = &lli->lli_open_fd_exec_count;
237 } else {
238 LASSERT(fmode & FMODE_READ);
239 och_p = &lli->lli_mds_read_och;
240 och_usecount = &lli->lli_open_fd_read_count;
241 }
242
243 mutex_lock(&lli->lli_och_mutex);
244 if (*och_usecount > 0) {
245 /* There are still users of this handle, so skip
246 * freeing it. */
247 mutex_unlock(&lli->lli_och_mutex);
248 return 0;
249 }
250
251 och = *och_p;
252 *och_p = NULL;
253 mutex_unlock(&lli->lli_och_mutex);
254
255 if (och != NULL) {
256 /* There might be a race and this handle may already
257 be closed. */
258 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
259 inode, och, NULL);
260 }
261
262 return rc;
263 }
264
ll_md_close(struct obd_export * md_exp,struct inode * inode,struct file * file)265 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
266 struct file *file)
267 {
268 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
269 struct ll_inode_info *lli = ll_i2info(inode);
270 int lockmode;
271 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
272 struct lustre_handle lockh;
273 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
274 int rc = 0;
275
276 /* clear group lock, if present */
277 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
278 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
279
280 if (fd->fd_lease_och != NULL) {
281 bool lease_broken;
282
283 /* Usually the lease is not released when the
284 * application crashed, we need to release here. */
285 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
286 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
287 PFID(&lli->lli_fid), rc, lease_broken);
288
289 fd->fd_lease_och = NULL;
290 }
291
292 if (fd->fd_och != NULL) {
293 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
294 fd->fd_och = NULL;
295 goto out;
296 }
297
298 /* Let's see if we have good enough OPEN lock on the file and if
299 we can skip talking to MDS */
300
301 mutex_lock(&lli->lli_och_mutex);
302 if (fd->fd_omode & FMODE_WRITE) {
303 lockmode = LCK_CW;
304 LASSERT(lli->lli_open_fd_write_count);
305 lli->lli_open_fd_write_count--;
306 } else if (fd->fd_omode & FMODE_EXEC) {
307 lockmode = LCK_PR;
308 LASSERT(lli->lli_open_fd_exec_count);
309 lli->lli_open_fd_exec_count--;
310 } else {
311 lockmode = LCK_CR;
312 LASSERT(lli->lli_open_fd_read_count);
313 lli->lli_open_fd_read_count--;
314 }
315 mutex_unlock(&lli->lli_och_mutex);
316
317 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
318 LDLM_IBITS, &policy, lockmode, &lockh))
319 rc = ll_md_real_close(inode, fd->fd_omode);
320
321 out:
322 LUSTRE_FPRIVATE(file) = NULL;
323 ll_file_data_put(fd);
324 ll_capa_close(inode);
325
326 return rc;
327 }
328
329 /* While this returns an error code, fput() the caller does not, so we need
330 * to make every effort to clean up all of our state here. Also, applications
331 * rarely check close errors and even if an error is returned they will not
332 * re-try the close call.
333 */
ll_file_release(struct inode * inode,struct file * file)334 int ll_file_release(struct inode *inode, struct file *file)
335 {
336 struct ll_file_data *fd;
337 struct ll_sb_info *sbi = ll_i2sbi(inode);
338 struct ll_inode_info *lli = ll_i2info(inode);
339 int rc;
340
341 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
342 inode->i_generation, inode);
343
344 #ifdef CONFIG_FS_POSIX_ACL
345 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
346 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
347
348 LASSERT(fd != NULL);
349 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
350 fd->fd_flags &= ~LL_FILE_RMTACL;
351 rct_del(&sbi->ll_rct, current_pid());
352 et_search_free(&sbi->ll_et, current_pid());
353 }
354 }
355 #endif
356
357 if (!is_root_inode(inode))
358 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
359 fd = LUSTRE_FPRIVATE(file);
360 LASSERT(fd != NULL);
361
362 /* The last ref on @file, maybe not the owner pid of statahead.
363 * Different processes can open the same dir, "ll_opendir_key" means:
364 * it is me that should stop the statahead thread. */
365 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
366 lli->lli_opendir_pid != 0)
367 ll_stop_statahead(inode, lli->lli_opendir_key);
368
369 if (is_root_inode(inode)) {
370 LUSTRE_FPRIVATE(file) = NULL;
371 ll_file_data_put(fd);
372 return 0;
373 }
374
375 if (!S_ISDIR(inode->i_mode)) {
376 lov_read_and_clear_async_rc(lli->lli_clob);
377 lli->lli_async_rc = 0;
378 }
379
380 rc = ll_md_close(sbi->ll_md_exp, inode, file);
381
382 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
383 libcfs_debug_dumplog();
384
385 return rc;
386 }
387
ll_intent_file_open(struct dentry * dentry,void * lmm,int lmmsize,struct lookup_intent * itp)388 static int ll_intent_file_open(struct dentry *dentry, void *lmm,
389 int lmmsize, struct lookup_intent *itp)
390 {
391 struct inode *inode = d_inode(dentry);
392 struct ll_sb_info *sbi = ll_i2sbi(inode);
393 struct dentry *parent = dentry->d_parent;
394 const char *name = dentry->d_name.name;
395 const int len = dentry->d_name.len;
396 struct md_op_data *op_data;
397 struct ptlrpc_request *req;
398 __u32 opc = LUSTRE_OPC_ANY;
399 int rc;
400
401 /* Usually we come here only for NFSD, and we want open lock.
402 But we can also get here with pre 2.6.15 patchless kernels, and in
403 that case that lock is also ok */
404 /* We can also get here if there was cached open handle in revalidate_it
405 * but it disappeared while we were getting from there to ll_file_open.
406 * But this means this file was closed and immediately opened which
407 * makes a good candidate for using OPEN lock */
408 /* If lmmsize & lmm are not 0, we are just setting stripe info
409 * parameters. No need for the open lock */
410 if (lmm == NULL && lmmsize == 0) {
411 itp->it_flags |= MDS_OPEN_LOCK;
412 if (itp->it_flags & FMODE_WRITE)
413 opc = LUSTRE_OPC_CREATE;
414 }
415
416 op_data = ll_prep_md_op_data(NULL, d_inode(parent),
417 inode, name, len,
418 O_RDWR, opc, NULL);
419 if (IS_ERR(op_data))
420 return PTR_ERR(op_data);
421
422 itp->it_flags |= MDS_OPEN_BY_FID;
423 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
424 0 /*unused */, &req, ll_md_blocking_ast, 0);
425 ll_finish_md_op_data(op_data);
426 if (rc == -ESTALE) {
427 /* reason for keep own exit path - don`t flood log
428 * with messages with -ESTALE errors.
429 */
430 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
431 it_open_error(DISP_OPEN_OPEN, itp))
432 goto out;
433 ll_release_openhandle(inode, itp);
434 goto out;
435 }
436
437 if (it_disposition(itp, DISP_LOOKUP_NEG)) {
438 rc = -ENOENT;
439 goto out;
440 }
441
442 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
443 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
444 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
445 goto out;
446 }
447
448 rc = ll_prep_inode(&inode, req, NULL, itp);
449 if (!rc && itp->d.lustre.it_lock_mode)
450 ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
451
452 out:
453 ptlrpc_req_finished(req);
454 ll_intent_drop_lock(itp);
455
456 return rc;
457 }
458
459 /**
460 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
461 * not believe attributes if a few ioepoch holders exist. Attributes for
462 * previous ioepoch if new one is opened are also skipped by MDS.
463 */
ll_ioepoch_open(struct ll_inode_info * lli,__u64 ioepoch)464 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
465 {
466 if (ioepoch && lli->lli_ioepoch != ioepoch) {
467 lli->lli_ioepoch = ioepoch;
468 CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n",
469 ioepoch, PFID(&lli->lli_fid));
470 }
471 }
472
ll_och_fill(struct obd_export * md_exp,struct lookup_intent * it,struct obd_client_handle * och)473 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
474 struct obd_client_handle *och)
475 {
476 struct ptlrpc_request *req = it->d.lustre.it_data;
477 struct mdt_body *body;
478
479 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
480 och->och_fh = body->handle;
481 och->och_fid = body->fid1;
482 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
483 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
484 och->och_flags = it->it_flags;
485
486 return md_set_open_replay_data(md_exp, och, it);
487 }
488
ll_local_open(struct file * file,struct lookup_intent * it,struct ll_file_data * fd,struct obd_client_handle * och)489 static int ll_local_open(struct file *file, struct lookup_intent *it,
490 struct ll_file_data *fd, struct obd_client_handle *och)
491 {
492 struct inode *inode = file_inode(file);
493 struct ll_inode_info *lli = ll_i2info(inode);
494
495 LASSERT(!LUSTRE_FPRIVATE(file));
496
497 LASSERT(fd != NULL);
498
499 if (och) {
500 struct ptlrpc_request *req = it->d.lustre.it_data;
501 struct mdt_body *body;
502 int rc;
503
504 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
505 if (rc != 0)
506 return rc;
507
508 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
509 ll_ioepoch_open(lli, body->ioepoch);
510 }
511
512 LUSTRE_FPRIVATE(file) = fd;
513 ll_readahead_init(inode, &fd->fd_ras);
514 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
515 return 0;
516 }
517
518 /* Open a file, and (for the very first open) create objects on the OSTs at
519 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
520 * creation or open until ll_lov_setstripe() ioctl is called.
521 *
522 * If we already have the stripe MD locally then we don't request it in
523 * md_open(), by passing a lmm_size = 0.
524 *
525 * It is up to the application to ensure no other processes open this file
526 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
527 * used. We might be able to avoid races of that sort by getting lli_open_sem
528 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
529 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
530 */
ll_file_open(struct inode * inode,struct file * file)531 int ll_file_open(struct inode *inode, struct file *file)
532 {
533 struct ll_inode_info *lli = ll_i2info(inode);
534 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
535 .it_flags = file->f_flags };
536 struct obd_client_handle **och_p = NULL;
537 __u64 *och_usecount = NULL;
538 struct ll_file_data *fd;
539 int rc = 0, opendir_set = 0;
540
541 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
542 inode->i_generation, inode, file->f_flags);
543
544 it = file->private_data; /* XXX: compat macro */
545 file->private_data = NULL; /* prevent ll_local_open assertion */
546
547 fd = ll_file_data_get();
548 if (fd == NULL) {
549 rc = -ENOMEM;
550 goto out_openerr;
551 }
552
553 fd->fd_file = file;
554 if (S_ISDIR(inode->i_mode)) {
555 spin_lock(&lli->lli_sa_lock);
556 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
557 lli->lli_opendir_pid == 0) {
558 lli->lli_opendir_key = fd;
559 lli->lli_opendir_pid = current_pid();
560 opendir_set = 1;
561 }
562 spin_unlock(&lli->lli_sa_lock);
563 }
564
565 if (is_root_inode(inode)) {
566 LUSTRE_FPRIVATE(file) = fd;
567 return 0;
568 }
569
570 if (!it || !it->d.lustre.it_disposition) {
571 /* Convert f_flags into access mode. We cannot use file->f_mode,
572 * because everything but O_ACCMODE mask was stripped from
573 * there */
574 if ((oit.it_flags + 1) & O_ACCMODE)
575 oit.it_flags++;
576 if (file->f_flags & O_TRUNC)
577 oit.it_flags |= FMODE_WRITE;
578
579 /* kernel only call f_op->open in dentry_open. filp_open calls
580 * dentry_open after call to open_namei that checks permissions.
581 * Only nfsd_open call dentry_open directly without checking
582 * permissions and because of that this code below is safe. */
583 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
584 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
585
586 /* We do not want O_EXCL here, presumably we opened the file
587 * already? XXX - NFS implications? */
588 oit.it_flags &= ~O_EXCL;
589
590 /* bug20584, if "it_flags" contains O_CREAT, the file will be
591 * created if necessary, then "IT_CREAT" should be set to keep
592 * consistent with it */
593 if (oit.it_flags & O_CREAT)
594 oit.it_op |= IT_CREAT;
595
596 it = &oit;
597 }
598
599 restart:
600 /* Let's see if we have file open on MDS already. */
601 if (it->it_flags & FMODE_WRITE) {
602 och_p = &lli->lli_mds_write_och;
603 och_usecount = &lli->lli_open_fd_write_count;
604 } else if (it->it_flags & FMODE_EXEC) {
605 och_p = &lli->lli_mds_exec_och;
606 och_usecount = &lli->lli_open_fd_exec_count;
607 } else {
608 och_p = &lli->lli_mds_read_och;
609 och_usecount = &lli->lli_open_fd_read_count;
610 }
611
612 mutex_lock(&lli->lli_och_mutex);
613 if (*och_p) { /* Open handle is present */
614 if (it_disposition(it, DISP_OPEN_OPEN)) {
615 /* Well, there's extra open request that we do not need,
616 let's close it somehow. This will decref request. */
617 rc = it_open_error(DISP_OPEN_OPEN, it);
618 if (rc) {
619 mutex_unlock(&lli->lli_och_mutex);
620 goto out_openerr;
621 }
622
623 ll_release_openhandle(inode, it);
624 }
625 (*och_usecount)++;
626
627 rc = ll_local_open(file, it, fd, NULL);
628 if (rc) {
629 (*och_usecount)--;
630 mutex_unlock(&lli->lli_och_mutex);
631 goto out_openerr;
632 }
633 } else {
634 LASSERT(*och_usecount == 0);
635 if (!it->d.lustre.it_disposition) {
636 /* We cannot just request lock handle now, new ELC code
637 means that one of other OPEN locks for this file
638 could be cancelled, and since blocking ast handler
639 would attempt to grab och_mutex as well, that would
640 result in a deadlock */
641 mutex_unlock(&lli->lli_och_mutex);
642 it->it_create_mode |= M_CHECK_STALE;
643 rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
644 it->it_create_mode &= ~M_CHECK_STALE;
645 if (rc)
646 goto out_openerr;
647
648 goto restart;
649 }
650 *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
651 if (!*och_p) {
652 rc = -ENOMEM;
653 goto out_och_free;
654 }
655
656 (*och_usecount)++;
657
658 /* md_intent_lock() didn't get a request ref if there was an
659 * open error, so don't do cleanup on the request here
660 * (bug 3430) */
661 /* XXX (green): Should not we bail out on any error here, not
662 * just open error? */
663 rc = it_open_error(DISP_OPEN_OPEN, it);
664 if (rc)
665 goto out_och_free;
666
667 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
668
669 rc = ll_local_open(file, it, fd, *och_p);
670 if (rc)
671 goto out_och_free;
672 }
673 mutex_unlock(&lli->lli_och_mutex);
674 fd = NULL;
675
676 /* Must do this outside lli_och_mutex lock to prevent deadlock where
677 different kind of OPEN lock for this same inode gets cancelled
678 by ldlm_cancel_lru */
679 if (!S_ISREG(inode->i_mode))
680 goto out_och_free;
681
682 ll_capa_open(inode);
683
684 if (!lli->lli_has_smd &&
685 (cl_is_lov_delay_create(file->f_flags) ||
686 (file->f_mode & FMODE_WRITE) == 0)) {
687 CDEBUG(D_INODE, "object creation was delayed\n");
688 goto out_och_free;
689 }
690 cl_lov_delay_create_clear(&file->f_flags);
691 goto out_och_free;
692
693 out_och_free:
694 if (rc) {
695 if (och_p && *och_p) {
696 OBD_FREE(*och_p, sizeof(struct obd_client_handle));
697 *och_p = NULL; /* OBD_FREE writes some magic there */
698 (*och_usecount)--;
699 }
700 mutex_unlock(&lli->lli_och_mutex);
701
702 out_openerr:
703 if (opendir_set != 0)
704 ll_stop_statahead(inode, lli->lli_opendir_key);
705 if (fd != NULL)
706 ll_file_data_put(fd);
707 } else {
708 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
709 }
710
711 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
712 ptlrpc_req_finished(it->d.lustre.it_data);
713 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
714 }
715
716 return rc;
717 }
718
ll_md_blocking_lease_ast(struct ldlm_lock * lock,struct ldlm_lock_desc * desc,void * data,int flag)719 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
720 struct ldlm_lock_desc *desc, void *data, int flag)
721 {
722 int rc;
723 struct lustre_handle lockh;
724
725 switch (flag) {
726 case LDLM_CB_BLOCKING:
727 ldlm_lock2handle(lock, &lockh);
728 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
729 if (rc < 0) {
730 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
731 return rc;
732 }
733 break;
734 case LDLM_CB_CANCELING:
735 /* do nothing */
736 break;
737 }
738 return 0;
739 }
740
741 /**
742 * Acquire a lease and open the file.
743 */
744 static struct obd_client_handle *
ll_lease_open(struct inode * inode,struct file * file,fmode_t fmode,__u64 open_flags)745 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
746 __u64 open_flags)
747 {
748 struct lookup_intent it = { .it_op = IT_OPEN };
749 struct ll_sb_info *sbi = ll_i2sbi(inode);
750 struct md_op_data *op_data;
751 struct ptlrpc_request *req;
752 struct lustre_handle old_handle = { 0 };
753 struct obd_client_handle *och = NULL;
754 int rc;
755 int rc2;
756
757 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
758 return ERR_PTR(-EINVAL);
759
760 if (file != NULL) {
761 struct ll_inode_info *lli = ll_i2info(inode);
762 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
763 struct obd_client_handle **och_p;
764 __u64 *och_usecount;
765
766 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
767 return ERR_PTR(-EPERM);
768
769 /* Get the openhandle of the file */
770 rc = -EBUSY;
771 mutex_lock(&lli->lli_och_mutex);
772 if (fd->fd_lease_och != NULL) {
773 mutex_unlock(&lli->lli_och_mutex);
774 return ERR_PTR(rc);
775 }
776
777 if (fd->fd_och == NULL) {
778 if (file->f_mode & FMODE_WRITE) {
779 LASSERT(lli->lli_mds_write_och != NULL);
780 och_p = &lli->lli_mds_write_och;
781 och_usecount = &lli->lli_open_fd_write_count;
782 } else {
783 LASSERT(lli->lli_mds_read_och != NULL);
784 och_p = &lli->lli_mds_read_och;
785 och_usecount = &lli->lli_open_fd_read_count;
786 }
787 if (*och_usecount == 1) {
788 fd->fd_och = *och_p;
789 *och_p = NULL;
790 *och_usecount = 0;
791 rc = 0;
792 }
793 }
794 mutex_unlock(&lli->lli_och_mutex);
795 if (rc < 0) /* more than 1 opener */
796 return ERR_PTR(rc);
797
798 LASSERT(fd->fd_och != NULL);
799 old_handle = fd->fd_och->och_fh;
800 }
801
802 och = kzalloc(sizeof(*och), GFP_NOFS);
803 if (!och)
804 return ERR_PTR(-ENOMEM);
805
806 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
807 LUSTRE_OPC_ANY, NULL);
808 if (IS_ERR(op_data)) {
809 rc = PTR_ERR(op_data);
810 goto out;
811 }
812
813 /* To tell the MDT this openhandle is from the same owner */
814 op_data->op_handle = old_handle;
815
816 it.it_flags = fmode | open_flags;
817 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
818 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
819 ll_md_blocking_lease_ast,
820 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
821 * it can be cancelled which may mislead applications that the lease is
822 * broken;
823 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
824 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
825 * doesn't deal with openhandle, so normal openhandle will be leaked. */
826 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
827 ll_finish_md_op_data(op_data);
828 ptlrpc_req_finished(req);
829 if (rc < 0)
830 goto out_release_it;
831
832 if (it_disposition(&it, DISP_LOOKUP_NEG)) {
833 rc = -ENOENT;
834 goto out_release_it;
835 }
836
837 rc = it_open_error(DISP_OPEN_OPEN, &it);
838 if (rc)
839 goto out_release_it;
840
841 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
842 ll_och_fill(sbi->ll_md_exp, &it, och);
843
844 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
845 rc = -EOPNOTSUPP;
846 goto out_close;
847 }
848
849 /* already get lease, handle lease lock */
850 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
851 if (it.d.lustre.it_lock_mode == 0 ||
852 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
853 /* open lock must return for lease */
854 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
855 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
856 it.d.lustre.it_lock_bits);
857 rc = -EPROTO;
858 goto out_close;
859 }
860
861 ll_intent_release(&it);
862 return och;
863
864 out_close:
865 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
866 if (rc2)
867 CERROR("Close openhandle returned %d\n", rc2);
868
869 /* cancel open lock */
870 if (it.d.lustre.it_lock_mode != 0) {
871 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
872 it.d.lustre.it_lock_mode);
873 it.d.lustre.it_lock_mode = 0;
874 }
875 out_release_it:
876 ll_intent_release(&it);
877 out:
878 OBD_FREE_PTR(och);
879 return ERR_PTR(rc);
880 }
881
882 /**
883 * Release lease and close the file.
884 * It will check if the lease has ever broken.
885 */
ll_lease_close(struct obd_client_handle * och,struct inode * inode,bool * lease_broken)886 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
887 bool *lease_broken)
888 {
889 struct ldlm_lock *lock;
890 bool cancelled = true;
891 int rc;
892
893 lock = ldlm_handle2lock(&och->och_lease_handle);
894 if (lock != NULL) {
895 lock_res_and_lock(lock);
896 cancelled = ldlm_is_cancel(lock);
897 unlock_res_and_lock(lock);
898 ldlm_lock_put(lock);
899 }
900
901 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
902 PFID(&ll_i2info(inode)->lli_fid), cancelled);
903
904 if (!cancelled)
905 ldlm_cli_cancel(&och->och_lease_handle, 0);
906 if (lease_broken != NULL)
907 *lease_broken = cancelled;
908
909 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
910 NULL);
911 return rc;
912 }
913
914 /* Fills the obdo with the attributes for the lsm */
ll_lsm_getattr(struct lov_stripe_md * lsm,struct obd_export * exp,struct obd_capa * capa,struct obdo * obdo,__u64 ioepoch,int sync)915 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
916 struct obd_capa *capa, struct obdo *obdo,
917 __u64 ioepoch, int sync)
918 {
919 struct ptlrpc_request_set *set;
920 struct obd_info oinfo = { { { 0 } } };
921 int rc;
922
923 LASSERT(lsm != NULL);
924
925 oinfo.oi_md = lsm;
926 oinfo.oi_oa = obdo;
927 oinfo.oi_oa->o_oi = lsm->lsm_oi;
928 oinfo.oi_oa->o_mode = S_IFREG;
929 oinfo.oi_oa->o_ioepoch = ioepoch;
930 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
931 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
932 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
933 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
934 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
935 OBD_MD_FLDATAVERSION;
936 oinfo.oi_capa = capa;
937 if (sync) {
938 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
939 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
940 }
941
942 set = ptlrpc_prep_set();
943 if (set == NULL) {
944 CERROR("can't allocate ptlrpc set\n");
945 rc = -ENOMEM;
946 } else {
947 rc = obd_getattr_async(exp, &oinfo, set);
948 if (rc == 0)
949 rc = ptlrpc_set_wait(set);
950 ptlrpc_set_destroy(set);
951 }
952 if (rc == 0)
953 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
954 OBD_MD_FLATIME | OBD_MD_FLMTIME |
955 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
956 OBD_MD_FLDATAVERSION);
957 return rc;
958 }
959
960 /**
961 * Performs the getattr on the inode and updates its fields.
962 * If @sync != 0, perform the getattr under the server-side lock.
963 */
ll_inode_getattr(struct inode * inode,struct obdo * obdo,__u64 ioepoch,int sync)964 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
965 __u64 ioepoch, int sync)
966 {
967 struct obd_capa *capa = ll_mdscapa_get(inode);
968 struct lov_stripe_md *lsm;
969 int rc;
970
971 lsm = ccc_inode_lsm_get(inode);
972 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
973 capa, obdo, ioepoch, sync);
974 capa_put(capa);
975 if (rc == 0) {
976 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
977
978 obdo_refresh_inode(inode, obdo, obdo->o_valid);
979 CDEBUG(D_INODE, "objid " DOSTID " size %llu, blocks %llu, blksize %lu\n",
980 POSTID(oi), i_size_read(inode),
981 (unsigned long long)inode->i_blocks,
982 1UL << inode->i_blkbits);
983 }
984 ccc_inode_lsm_put(inode, lsm);
985 return rc;
986 }
987
ll_merge_lvb(const struct lu_env * env,struct inode * inode)988 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
989 {
990 struct ll_inode_info *lli = ll_i2info(inode);
991 struct cl_object *obj = lli->lli_clob;
992 struct cl_attr *attr = ccc_env_thread_attr(env);
993 struct ost_lvb lvb;
994 int rc = 0;
995
996 ll_inode_size_lock(inode);
997 /* merge timestamps the most recently obtained from mds with
998 timestamps obtained from osts */
999 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1000 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1001 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1002
1003 lvb.lvb_size = i_size_read(inode);
1004 lvb.lvb_blocks = inode->i_blocks;
1005 lvb.lvb_mtime = LTIME_S(inode->i_mtime);
1006 lvb.lvb_atime = LTIME_S(inode->i_atime);
1007 lvb.lvb_ctime = LTIME_S(inode->i_ctime);
1008
1009 cl_object_attr_lock(obj);
1010 rc = cl_object_attr_get(env, obj, attr);
1011 cl_object_attr_unlock(obj);
1012
1013 if (rc == 0) {
1014 if (lvb.lvb_atime < attr->cat_atime)
1015 lvb.lvb_atime = attr->cat_atime;
1016 if (lvb.lvb_ctime < attr->cat_ctime)
1017 lvb.lvb_ctime = attr->cat_ctime;
1018 if (lvb.lvb_mtime < attr->cat_mtime)
1019 lvb.lvb_mtime = attr->cat_mtime;
1020
1021 CDEBUG(D_VFSTRACE, DFID" updating i_size %llu\n",
1022 PFID(&lli->lli_fid), attr->cat_size);
1023 cl_isize_write_nolock(inode, attr->cat_size);
1024
1025 inode->i_blocks = attr->cat_blocks;
1026
1027 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1028 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1029 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1030 }
1031 ll_inode_size_unlock(inode);
1032
1033 return rc;
1034 }
1035
ll_glimpse_ioctl(struct ll_sb_info * sbi,struct lov_stripe_md * lsm,lstat_t * st)1036 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1037 lstat_t *st)
1038 {
1039 struct obdo obdo = { 0 };
1040 int rc;
1041
1042 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1043 if (rc == 0) {
1044 st->st_size = obdo.o_size;
1045 st->st_blocks = obdo.o_blocks;
1046 st->st_mtime = obdo.o_mtime;
1047 st->st_atime = obdo.o_atime;
1048 st->st_ctime = obdo.o_ctime;
1049 }
1050 return rc;
1051 }
1052
file_is_noatime(const struct file * file)1053 static bool file_is_noatime(const struct file *file)
1054 {
1055 const struct vfsmount *mnt = file->f_path.mnt;
1056 const struct inode *inode = file_inode(file);
1057
1058 /* Adapted from file_accessed() and touch_atime().*/
1059 if (file->f_flags & O_NOATIME)
1060 return true;
1061
1062 if (inode->i_flags & S_NOATIME)
1063 return true;
1064
1065 if (IS_NOATIME(inode))
1066 return true;
1067
1068 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1069 return true;
1070
1071 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1072 return true;
1073
1074 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1075 return true;
1076
1077 return false;
1078 }
1079
ll_io_init(struct cl_io * io,const struct file * file,int write)1080 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1081 {
1082 struct inode *inode = file_inode(file);
1083
1084 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1085 if (write) {
1086 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1087 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1088 file->f_flags & O_DIRECT ||
1089 IS_SYNC(inode);
1090 }
1091 io->ci_obj = ll_i2info(inode)->lli_clob;
1092 io->ci_lockreq = CILR_MAYBE;
1093 if (ll_file_nolock(file)) {
1094 io->ci_lockreq = CILR_NEVER;
1095 io->ci_no_srvlock = 1;
1096 } else if (file->f_flags & O_APPEND) {
1097 io->ci_lockreq = CILR_MANDATORY;
1098 }
1099
1100 io->ci_noatime = file_is_noatime(file);
1101 }
1102
1103 static ssize_t
ll_file_io_generic(const struct lu_env * env,struct vvp_io_args * args,struct file * file,enum cl_io_type iot,loff_t * ppos,size_t count)1104 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1105 struct file *file, enum cl_io_type iot,
1106 loff_t *ppos, size_t count)
1107 {
1108 struct ll_inode_info *lli = ll_i2info(file_inode(file));
1109 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1110 struct cl_io *io;
1111 ssize_t result;
1112
1113 restart:
1114 io = ccc_env_thread_io(env);
1115 ll_io_init(io, file, iot == CIT_WRITE);
1116
1117 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1118 struct vvp_io *vio = vvp_env_io(env);
1119 struct ccc_io *cio = ccc_env_io(env);
1120 int write_mutex_locked = 0;
1121
1122 cio->cui_fd = LUSTRE_FPRIVATE(file);
1123 vio->cui_io_subtype = args->via_io_subtype;
1124
1125 switch (vio->cui_io_subtype) {
1126 case IO_NORMAL:
1127 cio->cui_iter = args->u.normal.via_iter;
1128 cio->cui_iocb = args->u.normal.via_iocb;
1129 if ((iot == CIT_WRITE) &&
1130 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1131 if (mutex_lock_interruptible(&lli->
1132 lli_write_mutex)) {
1133 result = -ERESTARTSYS;
1134 goto out;
1135 }
1136 write_mutex_locked = 1;
1137 } else if (iot == CIT_READ) {
1138 down_read(&lli->lli_trunc_sem);
1139 }
1140 break;
1141 case IO_SPLICE:
1142 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1143 vio->u.splice.cui_flags = args->u.splice.via_flags;
1144 break;
1145 default:
1146 CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
1147 LBUG();
1148 }
1149 result = cl_io_loop(env, io);
1150 if (write_mutex_locked)
1151 mutex_unlock(&lli->lli_write_mutex);
1152 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1153 up_read(&lli->lli_trunc_sem);
1154 } else {
1155 /* cl_io_rw_init() handled IO */
1156 result = io->ci_result;
1157 }
1158
1159 if (io->ci_nob > 0) {
1160 result = io->ci_nob;
1161 *ppos = io->u.ci_wr.wr.crw_pos;
1162 }
1163 goto out;
1164 out:
1165 cl_io_fini(env, io);
1166 /* If any bit been read/written (result != 0), we just return
1167 * short read/write instead of restart io. */
1168 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1169 CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
1170 iot == CIT_READ ? "read" : "write",
1171 file, *ppos, count);
1172 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1173 goto restart;
1174 }
1175
1176 if (iot == CIT_READ) {
1177 if (result >= 0)
1178 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
1179 LPROC_LL_READ_BYTES, result);
1180 } else if (iot == CIT_WRITE) {
1181 if (result >= 0) {
1182 ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
1183 LPROC_LL_WRITE_BYTES, result);
1184 fd->fd_write_failed = false;
1185 } else if (result != -ERESTARTSYS) {
1186 fd->fd_write_failed = true;
1187 }
1188 }
1189
1190 return result;
1191 }
1192
ll_file_read_iter(struct kiocb * iocb,struct iov_iter * to)1193 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1194 {
1195 struct lu_env *env;
1196 struct vvp_io_args *args;
1197 ssize_t result;
1198 int refcheck;
1199
1200 env = cl_env_get(&refcheck);
1201 if (IS_ERR(env))
1202 return PTR_ERR(env);
1203
1204 args = vvp_env_args(env, IO_NORMAL);
1205 args->u.normal.via_iter = to;
1206 args->u.normal.via_iocb = iocb;
1207
1208 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1209 &iocb->ki_pos, iov_iter_count(to));
1210 cl_env_put(env, &refcheck);
1211 return result;
1212 }
1213
1214 /*
1215 * Write to a file (through the page cache).
1216 */
ll_file_write_iter(struct kiocb * iocb,struct iov_iter * from)1217 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1218 {
1219 struct lu_env *env;
1220 struct vvp_io_args *args;
1221 ssize_t result;
1222 int refcheck;
1223
1224 env = cl_env_get(&refcheck);
1225 if (IS_ERR(env))
1226 return PTR_ERR(env);
1227
1228 args = vvp_env_args(env, IO_NORMAL);
1229 args->u.normal.via_iter = from;
1230 args->u.normal.via_iocb = iocb;
1231
1232 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1233 &iocb->ki_pos, iov_iter_count(from));
1234 cl_env_put(env, &refcheck);
1235 return result;
1236 }
1237
1238 /*
1239 * Send file content (through pagecache) somewhere with helper
1240 */
ll_file_splice_read(struct file * in_file,loff_t * ppos,struct pipe_inode_info * pipe,size_t count,unsigned int flags)1241 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1242 struct pipe_inode_info *pipe, size_t count,
1243 unsigned int flags)
1244 {
1245 struct lu_env *env;
1246 struct vvp_io_args *args;
1247 ssize_t result;
1248 int refcheck;
1249
1250 env = cl_env_get(&refcheck);
1251 if (IS_ERR(env))
1252 return PTR_ERR(env);
1253
1254 args = vvp_env_args(env, IO_SPLICE);
1255 args->u.splice.via_pipe = pipe;
1256 args->u.splice.via_flags = flags;
1257
1258 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1259 cl_env_put(env, &refcheck);
1260 return result;
1261 }
1262
ll_lov_recreate(struct inode * inode,struct ost_id * oi,u32 ost_idx)1263 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
1264 {
1265 struct obd_export *exp = ll_i2dtexp(inode);
1266 struct obd_trans_info oti = { 0 };
1267 struct obdo *oa = NULL;
1268 int lsm_size;
1269 int rc = 0;
1270 struct lov_stripe_md *lsm = NULL, *lsm2;
1271
1272 OBDO_ALLOC(oa);
1273 if (oa == NULL)
1274 return -ENOMEM;
1275
1276 lsm = ccc_inode_lsm_get(inode);
1277 if (!lsm_has_objects(lsm)) {
1278 rc = -ENOENT;
1279 goto out;
1280 }
1281
1282 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1283 (lsm->lsm_stripe_count));
1284
1285 OBD_ALLOC_LARGE(lsm2, lsm_size);
1286 if (lsm2 == NULL) {
1287 rc = -ENOMEM;
1288 goto out;
1289 }
1290
1291 oa->o_oi = *oi;
1292 oa->o_nlink = ost_idx;
1293 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1294 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1295 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1296 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1297 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1298 memcpy(lsm2, lsm, lsm_size);
1299 ll_inode_size_lock(inode);
1300 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1301 ll_inode_size_unlock(inode);
1302
1303 OBD_FREE_LARGE(lsm2, lsm_size);
1304 goto out;
1305 out:
1306 ccc_inode_lsm_put(inode, lsm);
1307 OBDO_FREE(oa);
1308 return rc;
1309 }
1310
ll_lov_recreate_obj(struct inode * inode,unsigned long arg)1311 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1312 {
1313 struct ll_recreate_obj ucreat;
1314 struct ost_id oi;
1315
1316 if (!capable(CFS_CAP_SYS_ADMIN))
1317 return -EPERM;
1318
1319 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1320 sizeof(ucreat)))
1321 return -EFAULT;
1322
1323 ostid_set_seq_mdt0(&oi);
1324 ostid_set_id(&oi, ucreat.lrc_id);
1325 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1326 }
1327
ll_lov_recreate_fid(struct inode * inode,unsigned long arg)1328 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1329 {
1330 struct lu_fid fid;
1331 struct ost_id oi;
1332 u32 ost_idx;
1333
1334 if (!capable(CFS_CAP_SYS_ADMIN))
1335 return -EPERM;
1336
1337 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1338 return -EFAULT;
1339
1340 fid_to_ostid(&fid, &oi);
1341 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1342 return ll_lov_recreate(inode, &oi, ost_idx);
1343 }
1344
ll_lov_setstripe_ea_info(struct inode * inode,struct dentry * dentry,int flags,struct lov_user_md * lum,int lum_size)1345 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
1346 int flags, struct lov_user_md *lum, int lum_size)
1347 {
1348 struct lov_stripe_md *lsm = NULL;
1349 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1350 int rc = 0;
1351
1352 lsm = ccc_inode_lsm_get(inode);
1353 if (lsm != NULL) {
1354 ccc_inode_lsm_put(inode, lsm);
1355 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1356 inode->i_ino);
1357 rc = -EEXIST;
1358 goto out;
1359 }
1360
1361 ll_inode_size_lock(inode);
1362 rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
1363 if (rc)
1364 goto out_unlock;
1365 rc = oit.d.lustre.it_status;
1366 if (rc < 0)
1367 goto out_req_free;
1368
1369 ll_release_openhandle(inode, &oit);
1370
1371 out_unlock:
1372 ll_inode_size_unlock(inode);
1373 ll_intent_release(&oit);
1374 ccc_inode_lsm_put(inode, lsm);
1375 out:
1376 return rc;
1377 out_req_free:
1378 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1379 goto out;
1380 }
1381
ll_lov_getstripe_ea_info(struct inode * inode,const char * filename,struct lov_mds_md ** lmmp,int * lmm_size,struct ptlrpc_request ** request)1382 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1383 struct lov_mds_md **lmmp, int *lmm_size,
1384 struct ptlrpc_request **request)
1385 {
1386 struct ll_sb_info *sbi = ll_i2sbi(inode);
1387 struct mdt_body *body;
1388 struct lov_mds_md *lmm = NULL;
1389 struct ptlrpc_request *req = NULL;
1390 struct md_op_data *op_data;
1391 int rc, lmmsize;
1392
1393 rc = ll_get_default_mdsize(sbi, &lmmsize);
1394 if (rc)
1395 return rc;
1396
1397 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1398 strlen(filename), lmmsize,
1399 LUSTRE_OPC_ANY, NULL);
1400 if (IS_ERR(op_data))
1401 return PTR_ERR(op_data);
1402
1403 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1404 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1405 ll_finish_md_op_data(op_data);
1406 if (rc < 0) {
1407 CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
1408 filename, rc);
1409 goto out;
1410 }
1411
1412 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1413 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1414
1415 lmmsize = body->eadatasize;
1416
1417 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1418 lmmsize == 0) {
1419 rc = -ENODATA;
1420 goto out;
1421 }
1422
1423 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1424 LASSERT(lmm != NULL);
1425
1426 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1427 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1428 rc = -EPROTO;
1429 goto out;
1430 }
1431
1432 /*
1433 * This is coming from the MDS, so is probably in
1434 * little endian. We convert it to host endian before
1435 * passing it to userspace.
1436 */
1437 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1438 int stripe_count;
1439
1440 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1441 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1442 stripe_count = 0;
1443
1444 /* if function called for directory - we should
1445 * avoid swab not existent lsm objects */
1446 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1447 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1448 if (S_ISREG(body->mode))
1449 lustre_swab_lov_user_md_objects(
1450 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1451 stripe_count);
1452 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1453 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1454 if (S_ISREG(body->mode))
1455 lustre_swab_lov_user_md_objects(
1456 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1457 stripe_count);
1458 }
1459 }
1460
1461 out:
1462 *lmmp = lmm;
1463 *lmm_size = lmmsize;
1464 *request = req;
1465 return rc;
1466 }
1467
ll_lov_setea(struct inode * inode,struct file * file,unsigned long arg)1468 static int ll_lov_setea(struct inode *inode, struct file *file,
1469 unsigned long arg)
1470 {
1471 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1472 struct lov_user_md *lump;
1473 int lum_size = sizeof(struct lov_user_md) +
1474 sizeof(struct lov_user_ost_data);
1475 int rc;
1476
1477 if (!capable(CFS_CAP_SYS_ADMIN))
1478 return -EPERM;
1479
1480 OBD_ALLOC_LARGE(lump, lum_size);
1481 if (lump == NULL)
1482 return -ENOMEM;
1483
1484 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1485 OBD_FREE_LARGE(lump, lum_size);
1486 return -EFAULT;
1487 }
1488
1489 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
1490 lum_size);
1491 cl_lov_delay_create_clear(&file->f_flags);
1492
1493 OBD_FREE_LARGE(lump, lum_size);
1494 return rc;
1495 }
1496
ll_lov_setstripe(struct inode * inode,struct file * file,unsigned long arg)1497 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1498 unsigned long arg)
1499 {
1500 struct lov_user_md_v3 lumv3;
1501 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1502 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1503 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1504 int lum_size, rc;
1505 int flags = FMODE_WRITE;
1506
1507 /* first try with v1 which is smaller than v3 */
1508 lum_size = sizeof(struct lov_user_md_v1);
1509 if (copy_from_user(lumv1, lumv1p, lum_size))
1510 return -EFAULT;
1511
1512 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1513 lum_size = sizeof(struct lov_user_md_v3);
1514 if (copy_from_user(&lumv3, lumv3p, lum_size))
1515 return -EFAULT;
1516 }
1517
1518 rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
1519 lum_size);
1520 cl_lov_delay_create_clear(&file->f_flags);
1521 if (rc == 0) {
1522 struct lov_stripe_md *lsm;
1523 __u32 gen;
1524
1525 put_user(0, &lumv1p->lmm_stripe_count);
1526
1527 ll_layout_refresh(inode, &gen);
1528 lsm = ccc_inode_lsm_get(inode);
1529 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1530 0, lsm, (void *)arg);
1531 ccc_inode_lsm_put(inode, lsm);
1532 }
1533 return rc;
1534 }
1535
ll_lov_getstripe(struct inode * inode,unsigned long arg)1536 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1537 {
1538 struct lov_stripe_md *lsm;
1539 int rc = -ENODATA;
1540
1541 lsm = ccc_inode_lsm_get(inode);
1542 if (lsm != NULL)
1543 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1544 lsm, (void *)arg);
1545 ccc_inode_lsm_put(inode, lsm);
1546 return rc;
1547 }
1548
1549 static int
ll_get_grouplock(struct inode * inode,struct file * file,unsigned long arg)1550 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1551 {
1552 struct ll_inode_info *lli = ll_i2info(inode);
1553 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1554 struct ccc_grouplock grouplock;
1555 int rc;
1556
1557 if (arg == 0) {
1558 CWARN("group id for group lock must not be 0\n");
1559 return -EINVAL;
1560 }
1561
1562 if (ll_file_nolock(file))
1563 return -EOPNOTSUPP;
1564
1565 spin_lock(&lli->lli_lock);
1566 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1567 CWARN("group lock already existed with gid %lu\n",
1568 fd->fd_grouplock.cg_gid);
1569 spin_unlock(&lli->lli_lock);
1570 return -EINVAL;
1571 }
1572 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1573 spin_unlock(&lli->lli_lock);
1574
1575 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1576 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1577 if (rc)
1578 return rc;
1579
1580 spin_lock(&lli->lli_lock);
1581 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1582 spin_unlock(&lli->lli_lock);
1583 CERROR("another thread just won the race\n");
1584 cl_put_grouplock(&grouplock);
1585 return -EINVAL;
1586 }
1587
1588 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1589 fd->fd_grouplock = grouplock;
1590 spin_unlock(&lli->lli_lock);
1591
1592 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1593 return 0;
1594 }
1595
ll_put_grouplock(struct inode * inode,struct file * file,unsigned long arg)1596 static int ll_put_grouplock(struct inode *inode, struct file *file,
1597 unsigned long arg)
1598 {
1599 struct ll_inode_info *lli = ll_i2info(inode);
1600 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1601 struct ccc_grouplock grouplock;
1602
1603 spin_lock(&lli->lli_lock);
1604 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1605 spin_unlock(&lli->lli_lock);
1606 CWARN("no group lock held\n");
1607 return -EINVAL;
1608 }
1609 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1610
1611 if (fd->fd_grouplock.cg_gid != arg) {
1612 CWARN("group lock %lu doesn't match current id %lu\n",
1613 arg, fd->fd_grouplock.cg_gid);
1614 spin_unlock(&lli->lli_lock);
1615 return -EINVAL;
1616 }
1617
1618 grouplock = fd->fd_grouplock;
1619 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1620 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1621 spin_unlock(&lli->lli_lock);
1622
1623 cl_put_grouplock(&grouplock);
1624 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1625 return 0;
1626 }
1627
1628 /**
1629 * Close inode open handle
1630 *
1631 * \param inode [in] inode in question
1632 * \param it [in,out] intent which contains open info and result
1633 *
1634 * \retval 0 success
1635 * \retval <0 failure
1636 */
ll_release_openhandle(struct inode * inode,struct lookup_intent * it)1637 int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
1638 {
1639 struct obd_client_handle *och;
1640 int rc;
1641
1642 LASSERT(inode);
1643
1644 /* Root ? Do nothing. */
1645 if (is_root_inode(inode))
1646 return 0;
1647
1648 /* No open handle to close? Move away */
1649 if (!it_disposition(it, DISP_OPEN_OPEN))
1650 return 0;
1651
1652 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1653
1654 och = kzalloc(sizeof(*och), GFP_NOFS);
1655 if (!och) {
1656 rc = -ENOMEM;
1657 goto out;
1658 }
1659
1660 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1661
1662 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1663 inode, och, NULL);
1664 out:
1665 /* this one is in place of ll_file_open */
1666 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1667 ptlrpc_req_finished(it->d.lustre.it_data);
1668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1669 }
1670 return rc;
1671 }
1672
1673 /**
1674 * Get size for inode for which FIEMAP mapping is requested.
1675 * Make the FIEMAP get_info call and returns the result.
1676 */
ll_do_fiemap(struct inode * inode,struct ll_user_fiemap * fiemap,size_t num_bytes)1677 static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1678 size_t num_bytes)
1679 {
1680 struct obd_export *exp = ll_i2dtexp(inode);
1681 struct lov_stripe_md *lsm = NULL;
1682 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1683 __u32 vallen = num_bytes;
1684 int rc;
1685
1686 /* Checks for fiemap flags */
1687 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1688 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1689 return -EBADR;
1690 }
1691
1692 /* Check for FIEMAP_FLAG_SYNC */
1693 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1694 rc = filemap_fdatawrite(inode->i_mapping);
1695 if (rc)
1696 return rc;
1697 }
1698
1699 lsm = ccc_inode_lsm_get(inode);
1700 if (lsm == NULL)
1701 return -ENOENT;
1702
1703 /* If the stripe_count > 1 and the application does not understand
1704 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1705 */
1706 if (lsm->lsm_stripe_count > 1 &&
1707 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
1708 rc = -EOPNOTSUPP;
1709 goto out;
1710 }
1711
1712 fm_key.oa.o_oi = lsm->lsm_oi;
1713 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1714
1715 if (i_size_read(inode) == 0) {
1716 rc = ll_glimpse_size(inode);
1717 if (rc)
1718 goto out;
1719 }
1720
1721 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1722 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1723 /* If filesize is 0, then there would be no objects for mapping */
1724 if (fm_key.oa.o_size == 0) {
1725 fiemap->fm_mapped_extents = 0;
1726 rc = 0;
1727 goto out;
1728 }
1729
1730 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1731
1732 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1733 fiemap, lsm);
1734 if (rc)
1735 CERROR("obd_get_info failed: rc = %d\n", rc);
1736
1737 out:
1738 ccc_inode_lsm_put(inode, lsm);
1739 return rc;
1740 }
1741
ll_fid2path(struct inode * inode,void __user * arg)1742 int ll_fid2path(struct inode *inode, void __user *arg)
1743 {
1744 struct obd_export *exp = ll_i2mdexp(inode);
1745 const struct getinfo_fid2path __user *gfin = arg;
1746 struct getinfo_fid2path *gfout;
1747 u32 pathlen;
1748 size_t outsize;
1749 int rc;
1750
1751 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
1752 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1753 return -EPERM;
1754
1755 /* Only need to get the buflen */
1756 if (get_user(pathlen, &gfin->gf_pathlen))
1757 return -EFAULT;
1758
1759 if (pathlen > PATH_MAX)
1760 return -EINVAL;
1761
1762 outsize = sizeof(*gfout) + pathlen;
1763
1764 gfout = kzalloc(outsize, GFP_NOFS);
1765 if (!gfout)
1766 return -ENOMEM;
1767
1768 if (copy_from_user(gfout, arg, sizeof(*gfout))) {
1769 rc = -EFAULT;
1770 goto gf_free;
1771 }
1772
1773 /* Call mdc_iocontrol */
1774 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1775 if (rc != 0)
1776 goto gf_free;
1777
1778 if (copy_to_user(arg, gfout, outsize))
1779 rc = -EFAULT;
1780
1781 gf_free:
1782 OBD_FREE(gfout, outsize);
1783 return rc;
1784 }
1785
ll_ioctl_fiemap(struct inode * inode,unsigned long arg)1786 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1787 {
1788 struct ll_user_fiemap *fiemap_s;
1789 size_t num_bytes, ret_bytes;
1790 unsigned int extent_count;
1791 int rc = 0;
1792
1793 /* Get the extent count so we can calculate the size of
1794 * required fiemap buffer */
1795 if (get_user(extent_count,
1796 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1797 return -EFAULT;
1798
1799 if (extent_count >=
1800 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
1801 return -EINVAL;
1802 num_bytes = sizeof(*fiemap_s) + (extent_count *
1803 sizeof(struct ll_fiemap_extent));
1804
1805 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1806 if (fiemap_s == NULL)
1807 return -ENOMEM;
1808
1809 /* get the fiemap value */
1810 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1811 sizeof(*fiemap_s))) {
1812 rc = -EFAULT;
1813 goto error;
1814 }
1815
1816 /* If fm_extent_count is non-zero, read the first extent since
1817 * it is used to calculate end_offset and device from previous
1818 * fiemap call. */
1819 if (extent_count) {
1820 if (copy_from_user(&fiemap_s->fm_extents[0],
1821 (char __user *)arg + sizeof(*fiemap_s),
1822 sizeof(struct ll_fiemap_extent))) {
1823 rc = -EFAULT;
1824 goto error;
1825 }
1826 }
1827
1828 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1829 if (rc)
1830 goto error;
1831
1832 ret_bytes = sizeof(struct ll_user_fiemap);
1833
1834 if (extent_count != 0)
1835 ret_bytes += (fiemap_s->fm_mapped_extents *
1836 sizeof(struct ll_fiemap_extent));
1837
1838 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1839 rc = -EFAULT;
1840
1841 error:
1842 OBD_FREE_LARGE(fiemap_s, num_bytes);
1843 return rc;
1844 }
1845
1846 /*
1847 * Read the data_version for inode.
1848 *
1849 * This value is computed using stripe object version on OST.
1850 * Version is computed using server side locking.
1851 *
1852 * @param extent_lock Take extent lock. Not needed if a process is already
1853 * holding the OST object group locks.
1854 */
ll_data_version(struct inode * inode,__u64 * data_version,int extent_lock)1855 int ll_data_version(struct inode *inode, __u64 *data_version,
1856 int extent_lock)
1857 {
1858 struct lov_stripe_md *lsm = NULL;
1859 struct ll_sb_info *sbi = ll_i2sbi(inode);
1860 struct obdo *obdo = NULL;
1861 int rc;
1862
1863 /* If no stripe, we consider version is 0. */
1864 lsm = ccc_inode_lsm_get(inode);
1865 if (!lsm_has_objects(lsm)) {
1866 *data_version = 0;
1867 CDEBUG(D_INODE, "No object for inode\n");
1868 rc = 0;
1869 goto out;
1870 }
1871
1872 obdo = kzalloc(sizeof(*obdo), GFP_NOFS);
1873 if (!obdo) {
1874 rc = -ENOMEM;
1875 goto out;
1876 }
1877
1878 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1879 if (rc == 0) {
1880 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1881 rc = -EOPNOTSUPP;
1882 else
1883 *data_version = obdo->o_data_version;
1884 }
1885
1886 OBD_FREE_PTR(obdo);
1887 out:
1888 ccc_inode_lsm_put(inode, lsm);
1889 return rc;
1890 }
1891
1892 /*
1893 * Trigger a HSM release request for the provided inode.
1894 */
ll_hsm_release(struct inode * inode)1895 int ll_hsm_release(struct inode *inode)
1896 {
1897 struct cl_env_nest nest;
1898 struct lu_env *env;
1899 struct obd_client_handle *och = NULL;
1900 __u64 data_version = 0;
1901 int rc;
1902
1903
1904 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1905 ll_get_fsname(inode->i_sb, NULL, 0),
1906 PFID(&ll_i2info(inode)->lli_fid));
1907
1908 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1909 if (IS_ERR(och)) {
1910 rc = PTR_ERR(och);
1911 goto out;
1912 }
1913
1914 /* Grab latest data_version and [am]time values */
1915 rc = ll_data_version(inode, &data_version, 1);
1916 if (rc != 0)
1917 goto out;
1918
1919 env = cl_env_nested_get(&nest);
1920 if (IS_ERR(env)) {
1921 rc = PTR_ERR(env);
1922 goto out;
1923 }
1924
1925 ll_merge_lvb(env, inode);
1926 cl_env_nested_put(&nest, env);
1927
1928 /* Release the file.
1929 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1930 * we still need it to pack l_remote_handle to MDT. */
1931 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1932 &data_version);
1933 och = NULL;
1934
1935
1936 out:
1937 if (och != NULL && !IS_ERR(och)) /* close the file */
1938 ll_lease_close(och, inode, NULL);
1939
1940 return rc;
1941 }
1942
1943 struct ll_swap_stack {
1944 struct iattr ia1, ia2;
1945 __u64 dv1, dv2;
1946 struct inode *inode1, *inode2;
1947 bool check_dv1, check_dv2;
1948 };
1949
ll_swap_layouts(struct file * file1,struct file * file2,struct lustre_swap_layouts * lsl)1950 static int ll_swap_layouts(struct file *file1, struct file *file2,
1951 struct lustre_swap_layouts *lsl)
1952 {
1953 struct mdc_swap_layouts msl;
1954 struct md_op_data *op_data;
1955 __u32 gid;
1956 __u64 dv;
1957 struct ll_swap_stack *llss = NULL;
1958 int rc;
1959
1960 llss = kzalloc(sizeof(*llss), GFP_NOFS);
1961 if (!llss)
1962 return -ENOMEM;
1963
1964 llss->inode1 = file_inode(file1);
1965 llss->inode2 = file_inode(file2);
1966
1967 if (!S_ISREG(llss->inode2->i_mode)) {
1968 rc = -EINVAL;
1969 goto free;
1970 }
1971
1972 if (inode_permission(llss->inode1, MAY_WRITE) ||
1973 inode_permission(llss->inode2, MAY_WRITE)) {
1974 rc = -EPERM;
1975 goto free;
1976 }
1977
1978 if (llss->inode2->i_sb != llss->inode1->i_sb) {
1979 rc = -EXDEV;
1980 goto free;
1981 }
1982
1983 /* we use 2 bool because it is easier to swap than 2 bits */
1984 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1985 llss->check_dv1 = true;
1986
1987 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1988 llss->check_dv2 = true;
1989
1990 /* we cannot use lsl->sl_dvX directly because we may swap them */
1991 llss->dv1 = lsl->sl_dv1;
1992 llss->dv2 = lsl->sl_dv2;
1993
1994 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1995 if (rc == 0) /* same file, done! */ {
1996 rc = 0;
1997 goto free;
1998 }
1999
2000 if (rc < 0) { /* sequentialize it */
2001 swap(llss->inode1, llss->inode2);
2002 swap(file1, file2);
2003 swap(llss->dv1, llss->dv2);
2004 swap(llss->check_dv1, llss->check_dv2);
2005 }
2006
2007 gid = lsl->sl_gid;
2008 if (gid != 0) { /* application asks to flush dirty cache */
2009 rc = ll_get_grouplock(llss->inode1, file1, gid);
2010 if (rc < 0)
2011 goto free;
2012
2013 rc = ll_get_grouplock(llss->inode2, file2, gid);
2014 if (rc < 0) {
2015 ll_put_grouplock(llss->inode1, file1, gid);
2016 goto free;
2017 }
2018 }
2019
2020 /* to be able to restore mtime and atime after swap
2021 * we need to first save them */
2022 if (lsl->sl_flags &
2023 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2024 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2025 llss->ia1.ia_atime = llss->inode1->i_atime;
2026 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2027 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2028 llss->ia2.ia_atime = llss->inode2->i_atime;
2029 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2030 }
2031
2032 /* ultimate check, before swapping the layouts we check if
2033 * dataversion has changed (if requested) */
2034 if (llss->check_dv1) {
2035 rc = ll_data_version(llss->inode1, &dv, 0);
2036 if (rc)
2037 goto putgl;
2038 if (dv != llss->dv1) {
2039 rc = -EAGAIN;
2040 goto putgl;
2041 }
2042 }
2043
2044 if (llss->check_dv2) {
2045 rc = ll_data_version(llss->inode2, &dv, 0);
2046 if (rc)
2047 goto putgl;
2048 if (dv != llss->dv2) {
2049 rc = -EAGAIN;
2050 goto putgl;
2051 }
2052 }
2053
2054 /* struct md_op_data is used to send the swap args to the mdt
2055 * only flags is missing, so we use struct mdc_swap_layouts
2056 * through the md_op_data->op_data */
2057 /* flags from user space have to be converted before they are send to
2058 * server, no flag is sent today, they are only used on the client */
2059 msl.msl_flags = 0;
2060 rc = -ENOMEM;
2061 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2062 0, LUSTRE_OPC_ANY, &msl);
2063 if (IS_ERR(op_data)) {
2064 rc = PTR_ERR(op_data);
2065 goto free;
2066 }
2067
2068 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2069 sizeof(*op_data), op_data, NULL);
2070 ll_finish_md_op_data(op_data);
2071
2072 putgl:
2073 if (gid != 0) {
2074 ll_put_grouplock(llss->inode2, file2, gid);
2075 ll_put_grouplock(llss->inode1, file1, gid);
2076 }
2077
2078 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2079 if (rc != 0)
2080 goto free;
2081
2082 /* clear useless flags */
2083 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2084 llss->ia1.ia_valid &= ~ATTR_MTIME;
2085 llss->ia2.ia_valid &= ~ATTR_MTIME;
2086 }
2087
2088 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2089 llss->ia1.ia_valid &= ~ATTR_ATIME;
2090 llss->ia2.ia_valid &= ~ATTR_ATIME;
2091 }
2092
2093 /* update time if requested */
2094 rc = 0;
2095 if (llss->ia2.ia_valid != 0) {
2096 mutex_lock(&llss->inode1->i_mutex);
2097 rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
2098 mutex_unlock(&llss->inode1->i_mutex);
2099 }
2100
2101 if (llss->ia1.ia_valid != 0) {
2102 int rc1;
2103
2104 mutex_lock(&llss->inode2->i_mutex);
2105 rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
2106 mutex_unlock(&llss->inode2->i_mutex);
2107 if (rc == 0)
2108 rc = rc1;
2109 }
2110
2111 free:
2112 if (llss != NULL)
2113 OBD_FREE_PTR(llss);
2114
2115 return rc;
2116 }
2117
ll_hsm_state_set(struct inode * inode,struct hsm_state_set * hss)2118 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2119 {
2120 struct md_op_data *op_data;
2121 int rc;
2122
2123 /* Non-root users are forbidden to set or clear flags which are
2124 * NOT defined in HSM_USER_MASK. */
2125 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2126 !capable(CFS_CAP_SYS_ADMIN))
2127 return -EPERM;
2128
2129 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2130 LUSTRE_OPC_ANY, hss);
2131 if (IS_ERR(op_data))
2132 return PTR_ERR(op_data);
2133
2134 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2135 sizeof(*op_data), op_data, NULL);
2136
2137 ll_finish_md_op_data(op_data);
2138
2139 return rc;
2140 }
2141
ll_hsm_import(struct inode * inode,struct file * file,struct hsm_user_import * hui)2142 static int ll_hsm_import(struct inode *inode, struct file *file,
2143 struct hsm_user_import *hui)
2144 {
2145 struct hsm_state_set *hss = NULL;
2146 struct iattr *attr = NULL;
2147 int rc;
2148
2149
2150 if (!S_ISREG(inode->i_mode))
2151 return -EINVAL;
2152
2153 /* set HSM flags */
2154 hss = kzalloc(sizeof(*hss), GFP_NOFS);
2155 if (!hss) {
2156 rc = -ENOMEM;
2157 goto out;
2158 }
2159
2160 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2161 hss->hss_archive_id = hui->hui_archive_id;
2162 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2163 rc = ll_hsm_state_set(inode, hss);
2164 if (rc != 0)
2165 goto out;
2166
2167 attr = kzalloc(sizeof(*attr), GFP_NOFS);
2168 if (!attr) {
2169 rc = -ENOMEM;
2170 goto out;
2171 }
2172
2173 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2174 attr->ia_mode |= S_IFREG;
2175 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2176 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2177 attr->ia_size = hui->hui_size;
2178 attr->ia_mtime.tv_sec = hui->hui_mtime;
2179 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2180 attr->ia_atime.tv_sec = hui->hui_atime;
2181 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2182
2183 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2184 ATTR_UID | ATTR_GID |
2185 ATTR_MTIME | ATTR_MTIME_SET |
2186 ATTR_ATIME | ATTR_ATIME_SET;
2187
2188 mutex_lock(&inode->i_mutex);
2189
2190 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2191 if (rc == -ENODATA)
2192 rc = 0;
2193
2194 mutex_unlock(&inode->i_mutex);
2195
2196 out:
2197 if (hss != NULL)
2198 OBD_FREE_PTR(hss);
2199
2200 if (attr != NULL)
2201 OBD_FREE_PTR(attr);
2202
2203 return rc;
2204 }
2205
2206 static long
ll_file_ioctl(struct file * file,unsigned int cmd,unsigned long arg)2207 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2208 {
2209 struct inode *inode = file_inode(file);
2210 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2211 int flags, rc;
2212
2213 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2214 inode->i_generation, inode, cmd);
2215 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2216
2217 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2218 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2219 return -ENOTTY;
2220
2221 switch (cmd) {
2222 case LL_IOC_GETFLAGS:
2223 /* Get the current value of the file flags */
2224 return put_user(fd->fd_flags, (int *)arg);
2225 case LL_IOC_SETFLAGS:
2226 case LL_IOC_CLRFLAGS:
2227 /* Set or clear specific file flags */
2228 /* XXX This probably needs checks to ensure the flags are
2229 * not abused, and to handle any flag side effects.
2230 */
2231 if (get_user(flags, (int *) arg))
2232 return -EFAULT;
2233
2234 if (cmd == LL_IOC_SETFLAGS) {
2235 if ((flags & LL_FILE_IGNORE_LOCK) &&
2236 !(file->f_flags & O_DIRECT)) {
2237 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2238 current->comm);
2239 return -EINVAL;
2240 }
2241
2242 fd->fd_flags |= flags;
2243 } else {
2244 fd->fd_flags &= ~flags;
2245 }
2246 return 0;
2247 case LL_IOC_LOV_SETSTRIPE:
2248 return ll_lov_setstripe(inode, file, arg);
2249 case LL_IOC_LOV_SETEA:
2250 return ll_lov_setea(inode, file, arg);
2251 case LL_IOC_LOV_SWAP_LAYOUTS: {
2252 struct file *file2;
2253 struct lustre_swap_layouts lsl;
2254
2255 if (copy_from_user(&lsl, (char *)arg,
2256 sizeof(struct lustre_swap_layouts)))
2257 return -EFAULT;
2258
2259 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2260 return -EPERM;
2261
2262 file2 = fget(lsl.sl_fd);
2263 if (file2 == NULL)
2264 return -EBADF;
2265
2266 rc = -EPERM;
2267 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2268 rc = ll_swap_layouts(file, file2, &lsl);
2269 fput(file2);
2270 return rc;
2271 }
2272 case LL_IOC_LOV_GETSTRIPE:
2273 return ll_lov_getstripe(inode, arg);
2274 case LL_IOC_RECREATE_OBJ:
2275 return ll_lov_recreate_obj(inode, arg);
2276 case LL_IOC_RECREATE_FID:
2277 return ll_lov_recreate_fid(inode, arg);
2278 case FSFILT_IOC_FIEMAP:
2279 return ll_ioctl_fiemap(inode, arg);
2280 case FSFILT_IOC_GETFLAGS:
2281 case FSFILT_IOC_SETFLAGS:
2282 return ll_iocontrol(inode, file, cmd, arg);
2283 case FSFILT_IOC_GETVERSION_OLD:
2284 case FSFILT_IOC_GETVERSION:
2285 return put_user(inode->i_generation, (int *)arg);
2286 case LL_IOC_GROUP_LOCK:
2287 return ll_get_grouplock(inode, file, arg);
2288 case LL_IOC_GROUP_UNLOCK:
2289 return ll_put_grouplock(inode, file, arg);
2290 case IOC_OBD_STATFS:
2291 return ll_obd_statfs(inode, (void *)arg);
2292
2293 /* We need to special case any other ioctls we want to handle,
2294 * to send them to the MDS/OST as appropriate and to properly
2295 * network encode the arg field.
2296 case FSFILT_IOC_SETVERSION_OLD:
2297 case FSFILT_IOC_SETVERSION:
2298 */
2299 case LL_IOC_FLUSHCTX:
2300 return ll_flush_ctx(inode);
2301 case LL_IOC_PATH2FID: {
2302 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2303 sizeof(struct lu_fid)))
2304 return -EFAULT;
2305
2306 return 0;
2307 }
2308 case OBD_IOC_FID2PATH:
2309 return ll_fid2path(inode, (void *)arg);
2310 case LL_IOC_DATA_VERSION: {
2311 struct ioc_data_version idv;
2312 int rc;
2313
2314 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2315 return -EFAULT;
2316
2317 rc = ll_data_version(inode, &idv.idv_version,
2318 !(idv.idv_flags & LL_DV_NOFLUSH));
2319
2320 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2321 return -EFAULT;
2322
2323 return rc;
2324 }
2325
2326 case LL_IOC_GET_MDTIDX: {
2327 int mdtidx;
2328
2329 mdtidx = ll_get_mdt_idx(inode);
2330 if (mdtidx < 0)
2331 return mdtidx;
2332
2333 if (put_user((int)mdtidx, (int *)arg))
2334 return -EFAULT;
2335
2336 return 0;
2337 }
2338 case OBD_IOC_GETDTNAME:
2339 case OBD_IOC_GETMDNAME:
2340 return ll_get_obd_name(inode, cmd, arg);
2341 case LL_IOC_HSM_STATE_GET: {
2342 struct md_op_data *op_data;
2343 struct hsm_user_state *hus;
2344 int rc;
2345
2346 hus = kzalloc(sizeof(*hus), GFP_NOFS);
2347 if (!hus)
2348 return -ENOMEM;
2349
2350 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2351 LUSTRE_OPC_ANY, hus);
2352 if (IS_ERR(op_data)) {
2353 OBD_FREE_PTR(hus);
2354 return PTR_ERR(op_data);
2355 }
2356
2357 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2358 op_data, NULL);
2359
2360 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2361 rc = -EFAULT;
2362
2363 ll_finish_md_op_data(op_data);
2364 OBD_FREE_PTR(hus);
2365 return rc;
2366 }
2367 case LL_IOC_HSM_STATE_SET: {
2368 struct hsm_state_set *hss;
2369 int rc;
2370
2371 hss = kzalloc(sizeof(*hss), GFP_NOFS);
2372 if (!hss)
2373 return -ENOMEM;
2374
2375 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2376 OBD_FREE_PTR(hss);
2377 return -EFAULT;
2378 }
2379
2380 rc = ll_hsm_state_set(inode, hss);
2381
2382 OBD_FREE_PTR(hss);
2383 return rc;
2384 }
2385 case LL_IOC_HSM_ACTION: {
2386 struct md_op_data *op_data;
2387 struct hsm_current_action *hca;
2388 int rc;
2389
2390 hca = kzalloc(sizeof(*hca), GFP_NOFS);
2391 if (!hca)
2392 return -ENOMEM;
2393
2394 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2395 LUSTRE_OPC_ANY, hca);
2396 if (IS_ERR(op_data)) {
2397 OBD_FREE_PTR(hca);
2398 return PTR_ERR(op_data);
2399 }
2400
2401 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2402 op_data, NULL);
2403
2404 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2405 rc = -EFAULT;
2406
2407 ll_finish_md_op_data(op_data);
2408 OBD_FREE_PTR(hca);
2409 return rc;
2410 }
2411 case LL_IOC_SET_LEASE: {
2412 struct ll_inode_info *lli = ll_i2info(inode);
2413 struct obd_client_handle *och = NULL;
2414 bool lease_broken;
2415 fmode_t mode = 0;
2416
2417 switch (arg) {
2418 case F_WRLCK:
2419 if (!(file->f_mode & FMODE_WRITE))
2420 return -EPERM;
2421 mode = FMODE_WRITE;
2422 break;
2423 case F_RDLCK:
2424 if (!(file->f_mode & FMODE_READ))
2425 return -EPERM;
2426 mode = FMODE_READ;
2427 break;
2428 case F_UNLCK:
2429 mutex_lock(&lli->lli_och_mutex);
2430 if (fd->fd_lease_och != NULL) {
2431 och = fd->fd_lease_och;
2432 fd->fd_lease_och = NULL;
2433 }
2434 mutex_unlock(&lli->lli_och_mutex);
2435
2436 if (och != NULL) {
2437 mode = och->och_flags &
2438 (FMODE_READ|FMODE_WRITE);
2439 rc = ll_lease_close(och, inode, &lease_broken);
2440 if (rc == 0 && lease_broken)
2441 mode = 0;
2442 } else {
2443 rc = -ENOLCK;
2444 }
2445
2446 /* return the type of lease or error */
2447 return rc < 0 ? rc : (int)mode;
2448 default:
2449 return -EINVAL;
2450 }
2451
2452 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2453
2454 /* apply for lease */
2455 och = ll_lease_open(inode, file, mode, 0);
2456 if (IS_ERR(och))
2457 return PTR_ERR(och);
2458
2459 rc = 0;
2460 mutex_lock(&lli->lli_och_mutex);
2461 if (fd->fd_lease_och == NULL) {
2462 fd->fd_lease_och = och;
2463 och = NULL;
2464 }
2465 mutex_unlock(&lli->lli_och_mutex);
2466 if (och != NULL) {
2467 /* impossible now that only excl is supported for now */
2468 ll_lease_close(och, inode, &lease_broken);
2469 rc = -EBUSY;
2470 }
2471 return rc;
2472 }
2473 case LL_IOC_GET_LEASE: {
2474 struct ll_inode_info *lli = ll_i2info(inode);
2475 struct ldlm_lock *lock = NULL;
2476
2477 rc = 0;
2478 mutex_lock(&lli->lli_och_mutex);
2479 if (fd->fd_lease_och != NULL) {
2480 struct obd_client_handle *och = fd->fd_lease_och;
2481
2482 lock = ldlm_handle2lock(&och->och_lease_handle);
2483 if (lock != NULL) {
2484 lock_res_and_lock(lock);
2485 if (!ldlm_is_cancel(lock))
2486 rc = och->och_flags &
2487 (FMODE_READ | FMODE_WRITE);
2488 unlock_res_and_lock(lock);
2489 ldlm_lock_put(lock);
2490 }
2491 }
2492 mutex_unlock(&lli->lli_och_mutex);
2493 return rc;
2494 }
2495 case LL_IOC_HSM_IMPORT: {
2496 struct hsm_user_import *hui;
2497
2498 hui = kzalloc(sizeof(*hui), GFP_NOFS);
2499 if (!hui)
2500 return -ENOMEM;
2501
2502 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2503 OBD_FREE_PTR(hui);
2504 return -EFAULT;
2505 }
2506
2507 rc = ll_hsm_import(inode, file, hui);
2508
2509 OBD_FREE_PTR(hui);
2510 return rc;
2511 }
2512 default: {
2513 int err;
2514
2515 if (LLIOC_STOP ==
2516 ll_iocontrol_call(inode, file, cmd, arg, &err))
2517 return err;
2518
2519 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2520 (void *)arg);
2521 }
2522 }
2523 }
2524
2525
ll_file_seek(struct file * file,loff_t offset,int origin)2526 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2527 {
2528 struct inode *inode = file_inode(file);
2529 loff_t retval, eof = 0;
2530
2531 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2532 (origin == SEEK_CUR) ? file->f_pos : 0);
2533 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2534 inode->i_ino, inode->i_generation, inode, retval, retval,
2535 origin);
2536 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2537
2538 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2539 retval = ll_glimpse_size(inode);
2540 if (retval != 0)
2541 return retval;
2542 eof = i_size_read(inode);
2543 }
2544
2545 retval = generic_file_llseek_size(file, offset, origin,
2546 ll_file_maxbytes(inode), eof);
2547 return retval;
2548 }
2549
ll_flush(struct file * file,fl_owner_t id)2550 static int ll_flush(struct file *file, fl_owner_t id)
2551 {
2552 struct inode *inode = file_inode(file);
2553 struct ll_inode_info *lli = ll_i2info(inode);
2554 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2555 int rc, err;
2556
2557 LASSERT(!S_ISDIR(inode->i_mode));
2558
2559 /* catch async errors that were recorded back when async writeback
2560 * failed for pages in this mapping. */
2561 rc = lli->lli_async_rc;
2562 lli->lli_async_rc = 0;
2563 err = lov_read_and_clear_async_rc(lli->lli_clob);
2564 if (rc == 0)
2565 rc = err;
2566
2567 /* The application has been told write failure already.
2568 * Do not report failure again. */
2569 if (fd->fd_write_failed)
2570 return 0;
2571 return rc ? -EIO : 0;
2572 }
2573
2574 /**
2575 * Called to make sure a portion of file has been written out.
2576 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2577 *
2578 * Return how many pages have been written.
2579 */
cl_sync_file_range(struct inode * inode,loff_t start,loff_t end,enum cl_fsync_mode mode,int ignore_layout)2580 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2581 enum cl_fsync_mode mode, int ignore_layout)
2582 {
2583 struct cl_env_nest nest;
2584 struct lu_env *env;
2585 struct cl_io *io;
2586 struct obd_capa *capa = NULL;
2587 struct cl_fsync_io *fio;
2588 int result;
2589
2590 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2591 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2592 return -EINVAL;
2593
2594 env = cl_env_nested_get(&nest);
2595 if (IS_ERR(env))
2596 return PTR_ERR(env);
2597
2598 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2599
2600 io = ccc_env_thread_io(env);
2601 io->ci_obj = cl_i2info(inode)->lli_clob;
2602 io->ci_ignore_layout = ignore_layout;
2603
2604 /* initialize parameters for sync */
2605 fio = &io->u.ci_fsync;
2606 fio->fi_capa = capa;
2607 fio->fi_start = start;
2608 fio->fi_end = end;
2609 fio->fi_fid = ll_inode2fid(inode);
2610 fio->fi_mode = mode;
2611 fio->fi_nr_written = 0;
2612
2613 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2614 result = cl_io_loop(env, io);
2615 else
2616 result = io->ci_result;
2617 if (result == 0)
2618 result = fio->fi_nr_written;
2619 cl_io_fini(env, io);
2620 cl_env_nested_put(&nest, env);
2621
2622 capa_put(capa);
2623
2624 return result;
2625 }
2626
ll_fsync(struct file * file,loff_t start,loff_t end,int datasync)2627 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2628 {
2629 struct inode *inode = file_inode(file);
2630 struct ll_inode_info *lli = ll_i2info(inode);
2631 struct ptlrpc_request *req;
2632 struct obd_capa *oc;
2633 int rc, err;
2634
2635 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2636 inode->i_generation, inode);
2637 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2638
2639 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2640 mutex_lock(&inode->i_mutex);
2641
2642 /* catch async errors that were recorded back when async writeback
2643 * failed for pages in this mapping. */
2644 if (!S_ISDIR(inode->i_mode)) {
2645 err = lli->lli_async_rc;
2646 lli->lli_async_rc = 0;
2647 if (rc == 0)
2648 rc = err;
2649 err = lov_read_and_clear_async_rc(lli->lli_clob);
2650 if (rc == 0)
2651 rc = err;
2652 }
2653
2654 oc = ll_mdscapa_get(inode);
2655 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2656 &req);
2657 capa_put(oc);
2658 if (!rc)
2659 rc = err;
2660 if (!err)
2661 ptlrpc_req_finished(req);
2662
2663 if (S_ISREG(inode->i_mode)) {
2664 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2665
2666 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2667 if (rc == 0 && err < 0)
2668 rc = err;
2669 if (rc < 0)
2670 fd->fd_write_failed = true;
2671 else
2672 fd->fd_write_failed = false;
2673 }
2674
2675 mutex_unlock(&inode->i_mutex);
2676 return rc;
2677 }
2678
2679 static int
ll_file_flock(struct file * file,int cmd,struct file_lock * file_lock)2680 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2681 {
2682 struct inode *inode = file_inode(file);
2683 struct ll_sb_info *sbi = ll_i2sbi(inode);
2684 struct ldlm_enqueue_info einfo = {
2685 .ei_type = LDLM_FLOCK,
2686 .ei_cb_cp = ldlm_flock_completion_ast,
2687 .ei_cbdata = file_lock,
2688 };
2689 struct md_op_data *op_data;
2690 struct lustre_handle lockh = {0};
2691 ldlm_policy_data_t flock = {{0}};
2692 __u64 flags = 0;
2693 int rc;
2694 int rc2 = 0;
2695
2696 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2697 inode->i_ino, file_lock);
2698
2699 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2700
2701 if (file_lock->fl_flags & FL_FLOCK)
2702 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2703 else if (!(file_lock->fl_flags & FL_POSIX))
2704 return -EINVAL;
2705
2706 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2707 flock.l_flock.pid = file_lock->fl_pid;
2708 flock.l_flock.start = file_lock->fl_start;
2709 flock.l_flock.end = file_lock->fl_end;
2710
2711 /* Somewhat ugly workaround for svc lockd.
2712 * lockd installs custom fl_lmops->lm_compare_owner that checks
2713 * for the fl_owner to be the same (which it always is on local node
2714 * I guess between lockd processes) and then compares pid.
2715 * As such we assign pid to the owner field to make it all work,
2716 * conflict with normal locks is unlikely since pid space and
2717 * pointer space for current->files are not intersecting */
2718 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2719 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2720
2721 switch (file_lock->fl_type) {
2722 case F_RDLCK:
2723 einfo.ei_mode = LCK_PR;
2724 break;
2725 case F_UNLCK:
2726 /* An unlock request may or may not have any relation to
2727 * existing locks so we may not be able to pass a lock handle
2728 * via a normal ldlm_lock_cancel() request. The request may even
2729 * unlock a byte range in the middle of an existing lock. In
2730 * order to process an unlock request we need all of the same
2731 * information that is given with a normal read or write record
2732 * lock request. To avoid creating another ldlm unlock (cancel)
2733 * message we'll treat a LCK_NL flock request as an unlock. */
2734 einfo.ei_mode = LCK_NL;
2735 break;
2736 case F_WRLCK:
2737 einfo.ei_mode = LCK_PW;
2738 break;
2739 default:
2740 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2741 file_lock->fl_type);
2742 return -ENOTSUPP;
2743 }
2744
2745 switch (cmd) {
2746 case F_SETLKW:
2747 #ifdef F_SETLKW64
2748 case F_SETLKW64:
2749 #endif
2750 flags = 0;
2751 break;
2752 case F_SETLK:
2753 #ifdef F_SETLK64
2754 case F_SETLK64:
2755 #endif
2756 flags = LDLM_FL_BLOCK_NOWAIT;
2757 break;
2758 case F_GETLK:
2759 #ifdef F_GETLK64
2760 case F_GETLK64:
2761 #endif
2762 flags = LDLM_FL_TEST_LOCK;
2763 /* Save the old mode so that if the mode in the lock changes we
2764 * can decrement the appropriate reader or writer refcount. */
2765 file_lock->fl_type = einfo.ei_mode;
2766 break;
2767 default:
2768 CERROR("unknown fcntl lock command: %d\n", cmd);
2769 return -EINVAL;
2770 }
2771
2772 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2773 LUSTRE_OPC_ANY, NULL);
2774 if (IS_ERR(op_data))
2775 return PTR_ERR(op_data);
2776
2777 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2778 inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode,
2779 flock.l_flock.start, flock.l_flock.end);
2780
2781 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2782 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2783
2784 if ((file_lock->fl_flags & FL_FLOCK) &&
2785 (rc == 0 || file_lock->fl_type == F_UNLCK))
2786 rc2 = flock_lock_file_wait(file, file_lock);
2787 if ((file_lock->fl_flags & FL_POSIX) &&
2788 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2789 !(flags & LDLM_FL_TEST_LOCK))
2790 rc2 = posix_lock_file_wait(file, file_lock);
2791
2792 if (rc2 && file_lock->fl_type != F_UNLCK) {
2793 einfo.ei_mode = LCK_NL;
2794 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2795 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2796 rc = rc2;
2797 }
2798
2799 ll_finish_md_op_data(op_data);
2800
2801 return rc;
2802 }
2803
2804 static int
ll_file_noflock(struct file * file,int cmd,struct file_lock * file_lock)2805 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2806 {
2807 return -ENOSYS;
2808 }
2809
2810 /**
2811 * test if some locks matching bits and l_req_mode are acquired
2812 * - bits can be in different locks
2813 * - if found clear the common lock bits in *bits
2814 * - the bits not found, are kept in *bits
2815 * \param inode [IN]
2816 * \param bits [IN] searched lock bits [IN]
2817 * \param l_req_mode [IN] searched lock mode
2818 * \retval boolean, true iff all bits are found
2819 */
ll_have_md_lock(struct inode * inode,__u64 * bits,ldlm_mode_t l_req_mode)2820 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2821 {
2822 struct lustre_handle lockh;
2823 ldlm_policy_data_t policy;
2824 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2825 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2826 struct lu_fid *fid;
2827 __u64 flags;
2828 int i;
2829
2830 if (!inode)
2831 return 0;
2832
2833 fid = &ll_i2info(inode)->lli_fid;
2834 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2835 ldlm_lockname[mode]);
2836
2837 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2838 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2839 policy.l_inodebits.bits = *bits & (1 << i);
2840 if (policy.l_inodebits.bits == 0)
2841 continue;
2842
2843 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2844 &policy, mode, &lockh)) {
2845 struct ldlm_lock *lock;
2846
2847 lock = ldlm_handle2lock(&lockh);
2848 if (lock) {
2849 *bits &=
2850 ~(lock->l_policy_data.l_inodebits.bits);
2851 LDLM_LOCK_PUT(lock);
2852 } else {
2853 *bits &= ~policy.l_inodebits.bits;
2854 }
2855 }
2856 }
2857 return *bits == 0;
2858 }
2859
ll_take_md_lock(struct inode * inode,__u64 bits,struct lustre_handle * lockh,__u64 flags,ldlm_mode_t mode)2860 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2861 struct lustre_handle *lockh, __u64 flags,
2862 ldlm_mode_t mode)
2863 {
2864 ldlm_policy_data_t policy = { .l_inodebits = {bits} };
2865 struct lu_fid *fid;
2866 ldlm_mode_t rc;
2867
2868 fid = &ll_i2info(inode)->lli_fid;
2869 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2870
2871 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2872 fid, LDLM_IBITS, &policy, mode, lockh);
2873
2874 return rc;
2875 }
2876
ll_inode_revalidate_fini(struct inode * inode,int rc)2877 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2878 {
2879 /* Already unlinked. Just update nlink and return success */
2880 if (rc == -ENOENT) {
2881 clear_nlink(inode);
2882 /* This path cannot be hit for regular files unless in
2883 * case of obscure races, so no need to validate size.
2884 */
2885 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2886 return 0;
2887 } else if (rc != 0) {
2888 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
2889 "%s: revalidate FID "DFID" error: rc = %d\n",
2890 ll_get_fsname(inode->i_sb, NULL, 0),
2891 PFID(ll_inode2fid(inode)), rc);
2892 }
2893
2894 return rc;
2895 }
2896
__ll_inode_revalidate(struct dentry * dentry,__u64 ibits)2897 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
2898 {
2899 struct inode *inode = d_inode(dentry);
2900 struct ptlrpc_request *req = NULL;
2901 struct obd_export *exp;
2902 int rc = 0;
2903
2904 LASSERT(inode != NULL);
2905
2906 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n",
2907 inode->i_ino, inode->i_generation, inode, dentry);
2908
2909 exp = ll_i2mdexp(inode);
2910
2911 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2912 * But under CMD case, it caused some lock issues, should be fixed
2913 * with new CMD ibits lock. See bug 12718 */
2914 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2915 struct lookup_intent oit = { .it_op = IT_GETATTR };
2916 struct md_op_data *op_data;
2917
2918 if (ibits == MDS_INODELOCK_LOOKUP)
2919 oit.it_op = IT_LOOKUP;
2920
2921 /* Call getattr by fid, so do not provide name at all. */
2922 op_data = ll_prep_md_op_data(NULL, inode,
2923 inode, NULL, 0, 0,
2924 LUSTRE_OPC_ANY, NULL);
2925 if (IS_ERR(op_data))
2926 return PTR_ERR(op_data);
2927
2928 oit.it_create_mode |= M_CHECK_STALE;
2929 rc = md_intent_lock(exp, op_data, NULL, 0,
2930 /* we are not interested in name
2931 based lookup */
2932 &oit, 0, &req,
2933 ll_md_blocking_ast, 0);
2934 ll_finish_md_op_data(op_data);
2935 oit.it_create_mode &= ~M_CHECK_STALE;
2936 if (rc < 0) {
2937 rc = ll_inode_revalidate_fini(inode, rc);
2938 goto out;
2939 }
2940
2941 rc = ll_revalidate_it_finish(req, &oit, inode);
2942 if (rc != 0) {
2943 ll_intent_release(&oit);
2944 goto out;
2945 }
2946
2947 /* Unlinked? Unhash dentry, so it is not picked up later by
2948 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2949 here to preserve get_cwd functionality on 2.6.
2950 Bug 10503 */
2951 if (!d_inode(dentry)->i_nlink)
2952 d_lustre_invalidate(dentry, 0);
2953
2954 ll_lookup_finish_locks(&oit, inode);
2955 } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
2956 struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
2957 u64 valid = OBD_MD_FLGETATTR;
2958 struct md_op_data *op_data;
2959 int ealen = 0;
2960
2961 if (S_ISREG(inode->i_mode)) {
2962 rc = ll_get_default_mdsize(sbi, &ealen);
2963 if (rc)
2964 return rc;
2965 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2966 }
2967
2968 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2969 0, ealen, LUSTRE_OPC_ANY,
2970 NULL);
2971 if (IS_ERR(op_data))
2972 return PTR_ERR(op_data);
2973
2974 op_data->op_valid = valid;
2975 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2976 * capa for this inode. Because we only keep capas of dirs
2977 * fresh. */
2978 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2979 ll_finish_md_op_data(op_data);
2980 if (rc) {
2981 rc = ll_inode_revalidate_fini(inode, rc);
2982 return rc;
2983 }
2984
2985 rc = ll_prep_inode(&inode, req, NULL, NULL);
2986 }
2987 out:
2988 ptlrpc_req_finished(req);
2989 return rc;
2990 }
2991
ll_inode_revalidate(struct dentry * dentry,__u64 ibits)2992 static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
2993 {
2994 struct inode *inode = d_inode(dentry);
2995 int rc;
2996
2997 rc = __ll_inode_revalidate(dentry, ibits);
2998 if (rc != 0)
2999 return rc;
3000
3001 /* if object isn't regular file, don't validate size */
3002 if (!S_ISREG(inode->i_mode)) {
3003 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3004 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3005 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3006 } else {
3007 /* In case of restore, the MDT has the right size and has
3008 * already send it back without granting the layout lock,
3009 * inode is up-to-date so glimpse is useless.
3010 * Also to glimpse we need the layout, in case of a running
3011 * restore the MDT holds the layout lock so the glimpse will
3012 * block up to the end of restore (getattr will block)
3013 */
3014 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3015 rc = ll_glimpse_size(inode);
3016 }
3017 return rc;
3018 }
3019
ll_getattr(struct vfsmount * mnt,struct dentry * de,struct kstat * stat)3020 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3021 {
3022 struct inode *inode = d_inode(de);
3023 struct ll_sb_info *sbi = ll_i2sbi(inode);
3024 struct ll_inode_info *lli = ll_i2info(inode);
3025 int res = 0;
3026
3027 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3028 MDS_INODELOCK_LOOKUP);
3029 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3030
3031 if (res)
3032 return res;
3033
3034 stat->dev = inode->i_sb->s_dev;
3035 if (ll_need_32bit_api(sbi))
3036 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3037 else
3038 stat->ino = inode->i_ino;
3039 stat->mode = inode->i_mode;
3040 stat->nlink = inode->i_nlink;
3041 stat->uid = inode->i_uid;
3042 stat->gid = inode->i_gid;
3043 stat->rdev = inode->i_rdev;
3044 stat->atime = inode->i_atime;
3045 stat->mtime = inode->i_mtime;
3046 stat->ctime = inode->i_ctime;
3047 stat->blksize = 1 << inode->i_blkbits;
3048
3049 stat->size = i_size_read(inode);
3050 stat->blocks = inode->i_blocks;
3051
3052 return 0;
3053 }
3054
ll_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,__u64 start,__u64 len)3055 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3056 __u64 start, __u64 len)
3057 {
3058 int rc;
3059 size_t num_bytes;
3060 struct ll_user_fiemap *fiemap;
3061 unsigned int extent_count = fieinfo->fi_extents_max;
3062
3063 num_bytes = sizeof(*fiemap) + (extent_count *
3064 sizeof(struct ll_fiemap_extent));
3065 OBD_ALLOC_LARGE(fiemap, num_bytes);
3066
3067 if (fiemap == NULL)
3068 return -ENOMEM;
3069
3070 fiemap->fm_flags = fieinfo->fi_flags;
3071 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3072 fiemap->fm_start = start;
3073 fiemap->fm_length = len;
3074 if (extent_count > 0)
3075 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3076 sizeof(struct ll_fiemap_extent));
3077
3078 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3079
3080 fieinfo->fi_flags = fiemap->fm_flags;
3081 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3082 if (extent_count > 0)
3083 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3084 fiemap->fm_mapped_extents *
3085 sizeof(struct ll_fiemap_extent));
3086
3087 OBD_FREE_LARGE(fiemap, num_bytes);
3088 return rc;
3089 }
3090
ll_get_acl(struct inode * inode,int type)3091 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3092 {
3093 struct ll_inode_info *lli = ll_i2info(inode);
3094 struct posix_acl *acl = NULL;
3095
3096 spin_lock(&lli->lli_lock);
3097 /* VFS' acl_permission_check->check_acl will release the refcount */
3098 acl = posix_acl_dup(lli->lli_posix_acl);
3099 spin_unlock(&lli->lli_lock);
3100
3101 return acl;
3102 }
3103
3104
ll_inode_permission(struct inode * inode,int mask)3105 int ll_inode_permission(struct inode *inode, int mask)
3106 {
3107 int rc = 0;
3108
3109 #ifdef MAY_NOT_BLOCK
3110 if (mask & MAY_NOT_BLOCK)
3111 return -ECHILD;
3112 #endif
3113
3114 /* as root inode are NOT getting validated in lookup operation,
3115 * need to do it before permission check. */
3116
3117 if (is_root_inode(inode)) {
3118 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3119 MDS_INODELOCK_LOOKUP);
3120 if (rc)
3121 return rc;
3122 }
3123
3124 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3125 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3126
3127 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3128 return lustre_check_remote_perm(inode, mask);
3129
3130 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3131 rc = generic_permission(inode, mask);
3132
3133 return rc;
3134 }
3135
3136 /* -o localflock - only provides locally consistent flock locks */
3137 struct file_operations ll_file_operations = {
3138 .read_iter = ll_file_read_iter,
3139 .write_iter = ll_file_write_iter,
3140 .unlocked_ioctl = ll_file_ioctl,
3141 .open = ll_file_open,
3142 .release = ll_file_release,
3143 .mmap = ll_file_mmap,
3144 .llseek = ll_file_seek,
3145 .splice_read = ll_file_splice_read,
3146 .fsync = ll_fsync,
3147 .flush = ll_flush
3148 };
3149
3150 struct file_operations ll_file_operations_flock = {
3151 .read_iter = ll_file_read_iter,
3152 .write_iter = ll_file_write_iter,
3153 .unlocked_ioctl = ll_file_ioctl,
3154 .open = ll_file_open,
3155 .release = ll_file_release,
3156 .mmap = ll_file_mmap,
3157 .llseek = ll_file_seek,
3158 .splice_read = ll_file_splice_read,
3159 .fsync = ll_fsync,
3160 .flush = ll_flush,
3161 .flock = ll_file_flock,
3162 .lock = ll_file_flock
3163 };
3164
3165 /* These are for -o noflock - to return ENOSYS on flock calls */
3166 struct file_operations ll_file_operations_noflock = {
3167 .read_iter = ll_file_read_iter,
3168 .write_iter = ll_file_write_iter,
3169 .unlocked_ioctl = ll_file_ioctl,
3170 .open = ll_file_open,
3171 .release = ll_file_release,
3172 .mmap = ll_file_mmap,
3173 .llseek = ll_file_seek,
3174 .splice_read = ll_file_splice_read,
3175 .fsync = ll_fsync,
3176 .flush = ll_flush,
3177 .flock = ll_file_noflock,
3178 .lock = ll_file_noflock
3179 };
3180
3181 struct inode_operations ll_file_inode_operations = {
3182 .setattr = ll_setattr,
3183 .getattr = ll_getattr,
3184 .permission = ll_inode_permission,
3185 .setxattr = ll_setxattr,
3186 .getxattr = ll_getxattr,
3187 .listxattr = ll_listxattr,
3188 .removexattr = ll_removexattr,
3189 .fiemap = ll_fiemap,
3190 .get_acl = ll_get_acl,
3191 };
3192
3193 /* dynamic ioctl number support routines */
3194 static struct llioc_ctl_data {
3195 struct rw_semaphore ioc_sem;
3196 struct list_head ioc_head;
3197 } llioc = {
3198 __RWSEM_INITIALIZER(llioc.ioc_sem),
3199 LIST_HEAD_INIT(llioc.ioc_head)
3200 };
3201
3202
3203 struct llioc_data {
3204 struct list_head iocd_list;
3205 unsigned int iocd_size;
3206 llioc_callback_t iocd_cb;
3207 unsigned int iocd_count;
3208 unsigned int iocd_cmd[0];
3209 };
3210
ll_iocontrol_register(llioc_callback_t cb,int count,unsigned int * cmd)3211 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3212 {
3213 unsigned int size;
3214 struct llioc_data *in_data = NULL;
3215
3216 if (cb == NULL || cmd == NULL ||
3217 count > LLIOC_MAX_CMD || count < 0)
3218 return NULL;
3219
3220 size = sizeof(*in_data) + count * sizeof(unsigned int);
3221 in_data = kzalloc(size, GFP_NOFS);
3222 if (!in_data)
3223 return NULL;
3224
3225 memset(in_data, 0, sizeof(*in_data));
3226 in_data->iocd_size = size;
3227 in_data->iocd_cb = cb;
3228 in_data->iocd_count = count;
3229 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3230
3231 down_write(&llioc.ioc_sem);
3232 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3233 up_write(&llioc.ioc_sem);
3234
3235 return in_data;
3236 }
3237 EXPORT_SYMBOL(ll_iocontrol_register);
3238
ll_iocontrol_unregister(void * magic)3239 void ll_iocontrol_unregister(void *magic)
3240 {
3241 struct llioc_data *tmp;
3242
3243 if (magic == NULL)
3244 return;
3245
3246 down_write(&llioc.ioc_sem);
3247 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3248 if (tmp == magic) {
3249 unsigned int size = tmp->iocd_size;
3250
3251 list_del(&tmp->iocd_list);
3252 up_write(&llioc.ioc_sem);
3253
3254 OBD_FREE(tmp, size);
3255 return;
3256 }
3257 }
3258 up_write(&llioc.ioc_sem);
3259
3260 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3261 }
3262 EXPORT_SYMBOL(ll_iocontrol_unregister);
3263
3264 static enum llioc_iter
ll_iocontrol_call(struct inode * inode,struct file * file,unsigned int cmd,unsigned long arg,int * rcp)3265 ll_iocontrol_call(struct inode *inode, struct file *file,
3266 unsigned int cmd, unsigned long arg, int *rcp)
3267 {
3268 enum llioc_iter ret = LLIOC_CONT;
3269 struct llioc_data *data;
3270 int rc = -EINVAL, i;
3271
3272 down_read(&llioc.ioc_sem);
3273 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3274 for (i = 0; i < data->iocd_count; i++) {
3275 if (cmd != data->iocd_cmd[i])
3276 continue;
3277
3278 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3279 break;
3280 }
3281
3282 if (ret == LLIOC_STOP)
3283 break;
3284 }
3285 up_read(&llioc.ioc_sem);
3286
3287 if (rcp)
3288 *rcp = rc;
3289 return ret;
3290 }
3291
ll_layout_conf(struct inode * inode,const struct cl_object_conf * conf)3292 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3293 {
3294 struct ll_inode_info *lli = ll_i2info(inode);
3295 struct cl_env_nest nest;
3296 struct lu_env *env;
3297 int result;
3298
3299 if (lli->lli_clob == NULL)
3300 return 0;
3301
3302 env = cl_env_nested_get(&nest);
3303 if (IS_ERR(env))
3304 return PTR_ERR(env);
3305
3306 result = cl_conf_set(env, lli->lli_clob, conf);
3307 cl_env_nested_put(&nest, env);
3308
3309 if (conf->coc_opc == OBJECT_CONF_SET) {
3310 struct ldlm_lock *lock = conf->coc_lock;
3311
3312 LASSERT(lock != NULL);
3313 LASSERT(ldlm_has_layout(lock));
3314 if (result == 0) {
3315 /* it can only be allowed to match after layout is
3316 * applied to inode otherwise false layout would be
3317 * seen. Applying layout should happen before dropping
3318 * the intent lock. */
3319 ldlm_lock_allow_match(lock);
3320 }
3321 }
3322 return result;
3323 }
3324
3325 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
ll_layout_fetch(struct inode * inode,struct ldlm_lock * lock)3326 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3327
3328 {
3329 struct ll_sb_info *sbi = ll_i2sbi(inode);
3330 struct obd_capa *oc;
3331 struct ptlrpc_request *req;
3332 struct mdt_body *body;
3333 void *lvbdata;
3334 void *lmm;
3335 int lmmsize;
3336 int rc;
3337
3338 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3339 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3340 lock->l_lvb_data, lock->l_lvb_len);
3341
3342 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3343 return 0;
3344
3345 /* if layout lock was granted right away, the layout is returned
3346 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3347 * blocked and then granted via completion ast, we have to fetch
3348 * layout here. Please note that we can't use the LVB buffer in
3349 * completion AST because it doesn't have a large enough buffer */
3350 oc = ll_mdscapa_get(inode);
3351 rc = ll_get_default_mdsize(sbi, &lmmsize);
3352 if (rc == 0)
3353 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3354 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3355 lmmsize, 0, &req);
3356 capa_put(oc);
3357 if (rc < 0)
3358 return rc;
3359
3360 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3361 if (body == NULL) {
3362 rc = -EPROTO;
3363 goto out;
3364 }
3365
3366 lmmsize = body->eadatasize;
3367 if (lmmsize == 0) /* empty layout */ {
3368 rc = 0;
3369 goto out;
3370 }
3371
3372 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3373 if (lmm == NULL) {
3374 rc = -EFAULT;
3375 goto out;
3376 }
3377
3378 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3379 if (lvbdata == NULL) {
3380 rc = -ENOMEM;
3381 goto out;
3382 }
3383
3384 memcpy(lvbdata, lmm, lmmsize);
3385 lock_res_and_lock(lock);
3386 if (lock->l_lvb_data != NULL)
3387 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3388
3389 lock->l_lvb_data = lvbdata;
3390 lock->l_lvb_len = lmmsize;
3391 unlock_res_and_lock(lock);
3392
3393 out:
3394 ptlrpc_req_finished(req);
3395 return rc;
3396 }
3397
3398 /**
3399 * Apply the layout to the inode. Layout lock is held and will be released
3400 * in this function.
3401 */
ll_layout_lock_set(struct lustre_handle * lockh,ldlm_mode_t mode,struct inode * inode,__u32 * gen,bool reconf)3402 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3403 struct inode *inode, __u32 *gen, bool reconf)
3404 {
3405 struct ll_inode_info *lli = ll_i2info(inode);
3406 struct ll_sb_info *sbi = ll_i2sbi(inode);
3407 struct ldlm_lock *lock;
3408 struct lustre_md md = { NULL };
3409 struct cl_object_conf conf;
3410 int rc = 0;
3411 bool lvb_ready;
3412 bool wait_layout = false;
3413
3414 LASSERT(lustre_handle_is_used(lockh));
3415
3416 lock = ldlm_handle2lock(lockh);
3417 LASSERT(lock != NULL);
3418 LASSERT(ldlm_has_layout(lock));
3419
3420 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3421 inode, PFID(&lli->lli_fid), reconf);
3422
3423 /* in case this is a caching lock and reinstate with new inode */
3424 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3425
3426 lock_res_and_lock(lock);
3427 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3428 unlock_res_and_lock(lock);
3429 /* checking lvb_ready is racy but this is okay. The worst case is
3430 * that multi processes may configure the file on the same time. */
3431 if (lvb_ready || !reconf) {
3432 rc = -ENODATA;
3433 if (lvb_ready) {
3434 /* layout_gen must be valid if layout lock is not
3435 * cancelled and stripe has already set */
3436 *gen = ll_layout_version_get(lli);
3437 rc = 0;
3438 }
3439 goto out;
3440 }
3441
3442 rc = ll_layout_fetch(inode, lock);
3443 if (rc < 0)
3444 goto out;
3445
3446 /* for layout lock, lmm is returned in lock's lvb.
3447 * lvb_data is immutable if the lock is held so it's safe to access it
3448 * without res lock. See the description in ldlm_lock_decref_internal()
3449 * for the condition to free lvb_data of layout lock */
3450 if (lock->l_lvb_data != NULL) {
3451 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3452 lock->l_lvb_data, lock->l_lvb_len);
3453 if (rc >= 0) {
3454 *gen = LL_LAYOUT_GEN_EMPTY;
3455 if (md.lsm != NULL)
3456 *gen = md.lsm->lsm_layout_gen;
3457 rc = 0;
3458 } else {
3459 CERROR("%s: file "DFID" unpackmd error: %d\n",
3460 ll_get_fsname(inode->i_sb, NULL, 0),
3461 PFID(&lli->lli_fid), rc);
3462 }
3463 }
3464 if (rc < 0)
3465 goto out;
3466
3467 /* set layout to file. Unlikely this will fail as old layout was
3468 * surely eliminated */
3469 memset(&conf, 0, sizeof(conf));
3470 conf.coc_opc = OBJECT_CONF_SET;
3471 conf.coc_inode = inode;
3472 conf.coc_lock = lock;
3473 conf.u.coc_md = &md;
3474 rc = ll_layout_conf(inode, &conf);
3475
3476 if (md.lsm != NULL)
3477 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3478
3479 /* refresh layout failed, need to wait */
3480 wait_layout = rc == -EBUSY;
3481
3482 out:
3483 LDLM_LOCK_PUT(lock);
3484 ldlm_lock_decref(lockh, mode);
3485
3486 /* wait for IO to complete if it's still being used. */
3487 if (wait_layout) {
3488 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3489 ll_get_fsname(inode->i_sb, NULL, 0),
3490 inode, PFID(&lli->lli_fid));
3491
3492 memset(&conf, 0, sizeof(conf));
3493 conf.coc_opc = OBJECT_CONF_WAIT;
3494 conf.coc_inode = inode;
3495 rc = ll_layout_conf(inode, &conf);
3496 if (rc == 0)
3497 rc = -EAGAIN;
3498
3499 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3500 PFID(&lli->lli_fid), rc);
3501 }
3502 return rc;
3503 }
3504
3505 /**
3506 * This function checks if there exists a LAYOUT lock on the client side,
3507 * or enqueues it if it doesn't have one in cache.
3508 *
3509 * This function will not hold layout lock so it may be revoked any time after
3510 * this function returns. Any operations depend on layout should be redone
3511 * in that case.
3512 *
3513 * This function should be called before lov_io_init() to get an uptodate
3514 * layout version, the caller should save the version number and after IO
3515 * is finished, this function should be called again to verify that layout
3516 * is not changed during IO time.
3517 */
ll_layout_refresh(struct inode * inode,__u32 * gen)3518 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3519 {
3520 struct ll_inode_info *lli = ll_i2info(inode);
3521 struct ll_sb_info *sbi = ll_i2sbi(inode);
3522 struct md_op_data *op_data;
3523 struct lookup_intent it;
3524 struct lustre_handle lockh;
3525 ldlm_mode_t mode;
3526 struct ldlm_enqueue_info einfo = {
3527 .ei_type = LDLM_IBITS,
3528 .ei_mode = LCK_CR,
3529 .ei_cb_bl = ll_md_blocking_ast,
3530 .ei_cb_cp = ldlm_completion_ast,
3531 };
3532 int rc;
3533
3534 *gen = ll_layout_version_get(lli);
3535 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3536 return 0;
3537
3538 /* sanity checks */
3539 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3540 LASSERT(S_ISREG(inode->i_mode));
3541
3542 /* take layout lock mutex to enqueue layout lock exclusively. */
3543 mutex_lock(&lli->lli_layout_mutex);
3544
3545 again:
3546 /* mostly layout lock is caching on the local side, so try to match
3547 * it before grabbing layout lock mutex. */
3548 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3549 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3550 if (mode != 0) { /* hit cached lock */
3551 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3552 if (rc == -EAGAIN)
3553 goto again;
3554
3555 mutex_unlock(&lli->lli_layout_mutex);
3556 return rc;
3557 }
3558
3559 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3560 0, 0, LUSTRE_OPC_ANY, NULL);
3561 if (IS_ERR(op_data)) {
3562 mutex_unlock(&lli->lli_layout_mutex);
3563 return PTR_ERR(op_data);
3564 }
3565
3566 /* have to enqueue one */
3567 memset(&it, 0, sizeof(it));
3568 it.it_op = IT_LAYOUT;
3569 lockh.cookie = 0ULL;
3570
3571 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3572 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3573 PFID(&lli->lli_fid));
3574
3575 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3576 NULL, 0, NULL, 0);
3577 if (it.d.lustre.it_data != NULL)
3578 ptlrpc_req_finished(it.d.lustre.it_data);
3579 it.d.lustre.it_data = NULL;
3580
3581 ll_finish_md_op_data(op_data);
3582
3583 mode = it.d.lustre.it_lock_mode;
3584 it.d.lustre.it_lock_mode = 0;
3585 ll_intent_drop_lock(&it);
3586
3587 if (rc == 0) {
3588 /* set lock data in case this is a new lock */
3589 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3590 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3591 if (rc == -EAGAIN)
3592 goto again;
3593 }
3594 mutex_unlock(&lli->lli_layout_mutex);
3595
3596 return rc;
3597 }
3598
3599 /**
3600 * This function send a restore request to the MDT
3601 */
ll_layout_restore(struct inode * inode)3602 int ll_layout_restore(struct inode *inode)
3603 {
3604 struct hsm_user_request *hur;
3605 int len, rc;
3606
3607 len = sizeof(struct hsm_user_request) +
3608 sizeof(struct hsm_user_item);
3609 hur = kzalloc(len, GFP_NOFS);
3610 if (!hur)
3611 return -ENOMEM;
3612
3613 hur->hur_request.hr_action = HUA_RESTORE;
3614 hur->hur_request.hr_archive_id = 0;
3615 hur->hur_request.hr_flags = 0;
3616 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3617 sizeof(hur->hur_user_item[0].hui_fid));
3618 hur->hur_user_item[0].hui_extent.length = -1;
3619 hur->hur_request.hr_itemcount = 1;
3620 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
3621 len, hur, NULL);
3622 OBD_FREE(hur, len);
3623 return rc;
3624 }
3625