1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * Implementation of cl_page for VVP layer.
37  *
38  *   Author: Nikita Danilov <nikita.danilov@sun.com>
39  *   Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
40  */
41 
42 #define DEBUG_SUBSYSTEM S_LLITE
43 
44 
45 #include "../include/obd.h"
46 #include "../include/lustre_lite.h"
47 
48 #include "vvp_internal.h"
49 
50 /*****************************************************************************
51  *
52  * Page operations.
53  *
54  */
55 
vvp_page_fini_common(struct ccc_page * cp)56 static void vvp_page_fini_common(struct ccc_page *cp)
57 {
58 	struct page *vmpage = cp->cpg_page;
59 
60 	LASSERT(vmpage != NULL);
61 	page_cache_release(vmpage);
62 }
63 
vvp_page_fini(const struct lu_env * env,struct cl_page_slice * slice)64 static void vvp_page_fini(const struct lu_env *env,
65 			  struct cl_page_slice *slice)
66 {
67 	struct ccc_page *cp = cl2ccc_page(slice);
68 	struct page *vmpage  = cp->cpg_page;
69 
70 	/*
71 	 * vmpage->private was already cleared when page was moved into
72 	 * VPG_FREEING state.
73 	 */
74 	LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
75 	vvp_page_fini_common(cp);
76 }
77 
vvp_page_own(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * io,int nonblock)78 static int vvp_page_own(const struct lu_env *env,
79 			const struct cl_page_slice *slice, struct cl_io *io,
80 			int nonblock)
81 {
82 	struct ccc_page *vpg    = cl2ccc_page(slice);
83 	struct page      *vmpage = vpg->cpg_page;
84 
85 	LASSERT(vmpage != NULL);
86 	if (nonblock) {
87 		if (!trylock_page(vmpage))
88 			return -EAGAIN;
89 
90 		if (unlikely(PageWriteback(vmpage))) {
91 			unlock_page(vmpage);
92 			return -EAGAIN;
93 		}
94 
95 		return 0;
96 	}
97 
98 	lock_page(vmpage);
99 	wait_on_page_writeback(vmpage);
100 	return 0;
101 }
102 
vvp_page_assume(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)103 static void vvp_page_assume(const struct lu_env *env,
104 			    const struct cl_page_slice *slice,
105 			    struct cl_io *unused)
106 {
107 	struct page *vmpage = cl2vm_page(slice);
108 
109 	LASSERT(vmpage != NULL);
110 	LASSERT(PageLocked(vmpage));
111 	wait_on_page_writeback(vmpage);
112 }
113 
vvp_page_unassume(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)114 static void vvp_page_unassume(const struct lu_env *env,
115 			      const struct cl_page_slice *slice,
116 			      struct cl_io *unused)
117 {
118 	struct page *vmpage = cl2vm_page(slice);
119 
120 	LASSERT(vmpage != NULL);
121 	LASSERT(PageLocked(vmpage));
122 }
123 
vvp_page_disown(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * io)124 static void vvp_page_disown(const struct lu_env *env,
125 			    const struct cl_page_slice *slice, struct cl_io *io)
126 {
127 	struct page *vmpage = cl2vm_page(slice);
128 
129 	LASSERT(vmpage != NULL);
130 	LASSERT(PageLocked(vmpage));
131 
132 	unlock_page(cl2vm_page(slice));
133 }
134 
vvp_page_discard(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)135 static void vvp_page_discard(const struct lu_env *env,
136 			     const struct cl_page_slice *slice,
137 			     struct cl_io *unused)
138 {
139 	struct page	   *vmpage  = cl2vm_page(slice);
140 	struct address_space *mapping;
141 	struct ccc_page      *cpg     = cl2ccc_page(slice);
142 
143 	LASSERT(vmpage != NULL);
144 	LASSERT(PageLocked(vmpage));
145 
146 	mapping = vmpage->mapping;
147 
148 	if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
149 		ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
150 
151 	/*
152 	 * truncate_complete_page() calls
153 	 * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
154 	 */
155 	truncate_complete_page(mapping, vmpage);
156 }
157 
vvp_page_unmap(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)158 static int vvp_page_unmap(const struct lu_env *env,
159 			  const struct cl_page_slice *slice,
160 			  struct cl_io *unused)
161 {
162 	struct page *vmpage = cl2vm_page(slice);
163 	__u64       offset;
164 
165 	LASSERT(vmpage != NULL);
166 	LASSERT(PageLocked(vmpage));
167 
168 	offset = vmpage->index << PAGE_CACHE_SHIFT;
169 
170 	/*
171 	 * XXX is it safe to call this with the page lock held?
172 	 */
173 	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_CACHE_SIZE);
174 	return 0;
175 }
176 
vvp_page_delete(const struct lu_env * env,const struct cl_page_slice * slice)177 static void vvp_page_delete(const struct lu_env *env,
178 			    const struct cl_page_slice *slice)
179 {
180 	struct page       *vmpage = cl2vm_page(slice);
181 	struct inode     *inode  = vmpage->mapping->host;
182 	struct cl_object *obj    = slice->cpl_obj;
183 
184 	LASSERT(PageLocked(vmpage));
185 	LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
186 	LASSERT(inode == ccc_object_inode(obj));
187 
188 	vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
189 	ClearPagePrivate(vmpage);
190 	vmpage->private = 0;
191 	/*
192 	 * Reference from vmpage to cl_page is removed, but the reference back
193 	 * is still here. It is removed later in vvp_page_fini().
194 	 */
195 }
196 
vvp_page_export(const struct lu_env * env,const struct cl_page_slice * slice,int uptodate)197 static void vvp_page_export(const struct lu_env *env,
198 			    const struct cl_page_slice *slice,
199 			    int uptodate)
200 {
201 	struct page *vmpage = cl2vm_page(slice);
202 
203 	LASSERT(vmpage != NULL);
204 	LASSERT(PageLocked(vmpage));
205 	if (uptodate)
206 		SetPageUptodate(vmpage);
207 	else
208 		ClearPageUptodate(vmpage);
209 }
210 
vvp_page_is_vmlocked(const struct lu_env * env,const struct cl_page_slice * slice)211 static int vvp_page_is_vmlocked(const struct lu_env *env,
212 				const struct cl_page_slice *slice)
213 {
214 	return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
215 }
216 
vvp_page_prep_read(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)217 static int vvp_page_prep_read(const struct lu_env *env,
218 			      const struct cl_page_slice *slice,
219 			      struct cl_io *unused)
220 {
221 	/* Skip the page already marked as PG_uptodate. */
222 	return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0;
223 }
224 
vvp_page_prep_write(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)225 static int vvp_page_prep_write(const struct lu_env *env,
226 			       const struct cl_page_slice *slice,
227 			       struct cl_io *unused)
228 {
229 	struct page *vmpage = cl2vm_page(slice);
230 
231 	LASSERT(PageLocked(vmpage));
232 	LASSERT(!PageDirty(vmpage));
233 
234 	set_page_writeback(vmpage);
235 	vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
236 
237 	return 0;
238 }
239 
240 /**
241  * Handles page transfer errors at VM level.
242  *
243  * This takes inode as a separate argument, because inode on which error is to
244  * be set can be different from \a vmpage inode in case of direct-io.
245  */
vvp_vmpage_error(struct inode * inode,struct page * vmpage,int ioret)246 static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
247 {
248 	struct ccc_object *obj = cl_inode2ccc(inode);
249 
250 	if (ioret == 0) {
251 		ClearPageError(vmpage);
252 		obj->cob_discard_page_warned = 0;
253 	} else {
254 		SetPageError(vmpage);
255 		if (ioret == -ENOSPC)
256 			set_bit(AS_ENOSPC, &inode->i_mapping->flags);
257 		else
258 			set_bit(AS_EIO, &inode->i_mapping->flags);
259 
260 		if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
261 		     obj->cob_discard_page_warned == 0) {
262 			obj->cob_discard_page_warned = 1;
263 			ll_dirty_page_discard_warn(vmpage, ioret);
264 		}
265 	}
266 }
267 
vvp_page_completion_read(const struct lu_env * env,const struct cl_page_slice * slice,int ioret)268 static void vvp_page_completion_read(const struct lu_env *env,
269 				     const struct cl_page_slice *slice,
270 				     int ioret)
271 {
272 	struct ccc_page *cp     = cl2ccc_page(slice);
273 	struct page      *vmpage = cp->cpg_page;
274 	struct cl_page  *page   = cl_page_top(slice->cpl_page);
275 	struct inode    *inode  = ccc_object_inode(page->cp_obj);
276 
277 	LASSERT(PageLocked(vmpage));
278 	CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
279 
280 	if (cp->cpg_defer_uptodate)
281 		ll_ra_count_put(ll_i2sbi(inode), 1);
282 
283 	if (ioret == 0)  {
284 		if (!cp->cpg_defer_uptodate)
285 			cl_page_export(env, page, 1);
286 	} else
287 		cp->cpg_defer_uptodate = 0;
288 
289 	if (page->cp_sync_io == NULL)
290 		unlock_page(vmpage);
291 }
292 
vvp_page_completion_write(const struct lu_env * env,const struct cl_page_slice * slice,int ioret)293 static void vvp_page_completion_write(const struct lu_env *env,
294 				      const struct cl_page_slice *slice,
295 				      int ioret)
296 {
297 	struct ccc_page *cp     = cl2ccc_page(slice);
298 	struct cl_page  *pg     = slice->cpl_page;
299 	struct page      *vmpage = cp->cpg_page;
300 
301 	LASSERT(ergo(pg->cp_sync_io != NULL, PageLocked(vmpage)));
302 	LASSERT(PageWriteback(vmpage));
303 
304 	CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
305 
306 	/*
307 	 * TODO: Actually it makes sense to add the page into oap pending
308 	 * list again and so that we don't need to take the page out from
309 	 * SoM write pending list, if we just meet a recoverable error,
310 	 * -ENOMEM, etc.
311 	 * To implement this, we just need to return a non zero value in
312 	 * ->cpo_completion method. The underlying transfer should be notified
313 	 * and then re-add the page into pending transfer queue.  -jay
314 	 */
315 
316 	cp->cpg_write_queued = 0;
317 	vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
318 
319 	/*
320 	 * Only mark the page error only when it's an async write because
321 	 * applications won't wait for IO to finish.
322 	 */
323 	if (pg->cp_sync_io == NULL)
324 		vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
325 
326 	end_page_writeback(vmpage);
327 }
328 
329 /**
330  * Implements cl_page_operations::cpo_make_ready() method.
331  *
332  * This is called to yank a page from the transfer cache and to send it out as
333  * a part of transfer. This function try-locks the page. If try-lock failed,
334  * page is owned by some concurrent IO, and should be skipped (this is bad,
335  * but hopefully rare situation, as it usually results in transfer being
336  * shorter than possible).
337  *
338  * \retval 0      success, page can be placed into transfer
339  *
340  * \retval -EAGAIN page is either used by concurrent IO has been
341  * truncated. Skip it.
342  */
vvp_page_make_ready(const struct lu_env * env,const struct cl_page_slice * slice)343 static int vvp_page_make_ready(const struct lu_env *env,
344 			       const struct cl_page_slice *slice)
345 {
346 	struct page *vmpage = cl2vm_page(slice);
347 	struct cl_page *pg = slice->cpl_page;
348 	int result = 0;
349 
350 	lock_page(vmpage);
351 	if (clear_page_dirty_for_io(vmpage)) {
352 		LASSERT(pg->cp_state == CPS_CACHED);
353 		/* This actually clears the dirty bit in the radix
354 		 * tree. */
355 		set_page_writeback(vmpage);
356 		vvp_write_pending(cl2ccc(slice->cpl_obj),
357 				cl2ccc_page(slice));
358 		CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
359 	} else if (pg->cp_state == CPS_PAGEOUT) {
360 		/* is it possible for osc_flush_async_page() to already
361 		 * make it ready? */
362 		result = -EALREADY;
363 	} else {
364 		CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
365 			      pg->cp_state);
366 		LBUG();
367 	}
368 	unlock_page(vmpage);
369 	return result;
370 }
371 
vvp_page_print(const struct lu_env * env,const struct cl_page_slice * slice,void * cookie,lu_printer_t printer)372 static int vvp_page_print(const struct lu_env *env,
373 			  const struct cl_page_slice *slice,
374 			  void *cookie, lu_printer_t printer)
375 {
376 	struct ccc_page *vp = cl2ccc_page(slice);
377 	struct page      *vmpage = vp->cpg_page;
378 
379 	(*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
380 		   vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
381 		   vp->cpg_write_queued, vmpage);
382 	if (vmpage != NULL) {
383 		(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
384 			   (long)vmpage->flags, page_count(vmpage),
385 			   page_mapcount(vmpage), vmpage->private,
386 			   page_index(vmpage),
387 			   list_empty(&vmpage->lru) ? "not-" : "");
388 	}
389 	(*printer)(env, cookie, "\n");
390 	return 0;
391 }
392 
393 static const struct cl_page_operations vvp_page_ops = {
394 	.cpo_own	   = vvp_page_own,
395 	.cpo_assume	= vvp_page_assume,
396 	.cpo_unassume      = vvp_page_unassume,
397 	.cpo_disown	= vvp_page_disown,
398 	.cpo_vmpage	= ccc_page_vmpage,
399 	.cpo_discard       = vvp_page_discard,
400 	.cpo_delete	= vvp_page_delete,
401 	.cpo_unmap	 = vvp_page_unmap,
402 	.cpo_export	= vvp_page_export,
403 	.cpo_is_vmlocked   = vvp_page_is_vmlocked,
404 	.cpo_fini	  = vvp_page_fini,
405 	.cpo_print	 = vvp_page_print,
406 	.cpo_is_under_lock = ccc_page_is_under_lock,
407 	.io = {
408 		[CRT_READ] = {
409 			.cpo_prep	= vvp_page_prep_read,
410 			.cpo_completion  = vvp_page_completion_read,
411 			.cpo_make_ready  = ccc_fail,
412 		},
413 		[CRT_WRITE] = {
414 			.cpo_prep	= vvp_page_prep_write,
415 			.cpo_completion  = vvp_page_completion_write,
416 			.cpo_make_ready  = vvp_page_make_ready,
417 		}
418 	}
419 };
420 
vvp_transient_page_verify(const struct cl_page * page)421 static void vvp_transient_page_verify(const struct cl_page *page)
422 {
423 	struct inode *inode = ccc_object_inode(page->cp_obj);
424 
425 	LASSERT(!mutex_trylock(&inode->i_mutex));
426 }
427 
vvp_transient_page_own(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused,int nonblock)428 static int vvp_transient_page_own(const struct lu_env *env,
429 				  const struct cl_page_slice *slice,
430 				  struct cl_io *unused, int nonblock)
431 {
432 	vvp_transient_page_verify(slice->cpl_page);
433 	return 0;
434 }
435 
vvp_transient_page_assume(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)436 static void vvp_transient_page_assume(const struct lu_env *env,
437 				      const struct cl_page_slice *slice,
438 				      struct cl_io *unused)
439 {
440 	vvp_transient_page_verify(slice->cpl_page);
441 }
442 
vvp_transient_page_unassume(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)443 static void vvp_transient_page_unassume(const struct lu_env *env,
444 					const struct cl_page_slice *slice,
445 					struct cl_io *unused)
446 {
447 	vvp_transient_page_verify(slice->cpl_page);
448 }
449 
vvp_transient_page_disown(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)450 static void vvp_transient_page_disown(const struct lu_env *env,
451 				      const struct cl_page_slice *slice,
452 				      struct cl_io *unused)
453 {
454 	vvp_transient_page_verify(slice->cpl_page);
455 }
456 
vvp_transient_page_discard(const struct lu_env * env,const struct cl_page_slice * slice,struct cl_io * unused)457 static void vvp_transient_page_discard(const struct lu_env *env,
458 				       const struct cl_page_slice *slice,
459 				       struct cl_io *unused)
460 {
461 	struct cl_page *page = slice->cpl_page;
462 
463 	vvp_transient_page_verify(slice->cpl_page);
464 
465 	/*
466 	 * For transient pages, remove it from the radix tree.
467 	 */
468 	cl_page_delete(env, page);
469 }
470 
vvp_transient_page_is_vmlocked(const struct lu_env * env,const struct cl_page_slice * slice)471 static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
472 					  const struct cl_page_slice *slice)
473 {
474 	struct inode    *inode = ccc_object_inode(slice->cpl_obj);
475 	int	locked;
476 
477 	locked = !mutex_trylock(&inode->i_mutex);
478 	if (!locked)
479 		mutex_unlock(&inode->i_mutex);
480 	return locked ? -EBUSY : -ENODATA;
481 }
482 
483 static void
vvp_transient_page_completion(const struct lu_env * env,const struct cl_page_slice * slice,int ioret)484 vvp_transient_page_completion(const struct lu_env *env,
485 			      const struct cl_page_slice *slice,
486 			      int ioret)
487 {
488 	vvp_transient_page_verify(slice->cpl_page);
489 }
490 
vvp_transient_page_fini(const struct lu_env * env,struct cl_page_slice * slice)491 static void vvp_transient_page_fini(const struct lu_env *env,
492 				    struct cl_page_slice *slice)
493 {
494 	struct ccc_page *cp = cl2ccc_page(slice);
495 	struct cl_page *clp = slice->cpl_page;
496 	struct ccc_object *clobj = cl2ccc(clp->cp_obj);
497 
498 	vvp_page_fini_common(cp);
499 	LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
500 	clobj->cob_transient_pages--;
501 }
502 
503 static const struct cl_page_operations vvp_transient_page_ops = {
504 	.cpo_own	   = vvp_transient_page_own,
505 	.cpo_assume	= vvp_transient_page_assume,
506 	.cpo_unassume      = vvp_transient_page_unassume,
507 	.cpo_disown	= vvp_transient_page_disown,
508 	.cpo_discard       = vvp_transient_page_discard,
509 	.cpo_vmpage	= ccc_page_vmpage,
510 	.cpo_fini	  = vvp_transient_page_fini,
511 	.cpo_is_vmlocked   = vvp_transient_page_is_vmlocked,
512 	.cpo_print	 = vvp_page_print,
513 	.cpo_is_under_lock = ccc_page_is_under_lock,
514 	.io = {
515 		[CRT_READ] = {
516 			.cpo_prep	= ccc_transient_page_prep,
517 			.cpo_completion  = vvp_transient_page_completion,
518 		},
519 		[CRT_WRITE] = {
520 			.cpo_prep	= ccc_transient_page_prep,
521 			.cpo_completion  = vvp_transient_page_completion,
522 		}
523 	}
524 };
525 
vvp_page_init(const struct lu_env * env,struct cl_object * obj,struct cl_page * page,struct page * vmpage)526 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
527 		struct cl_page *page, struct page *vmpage)
528 {
529 	struct ccc_page *cpg = cl_object_page_slice(obj, page);
530 
531 	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
532 
533 	cpg->cpg_page = vmpage;
534 	page_cache_get(vmpage);
535 
536 	INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
537 	if (page->cp_type == CPT_CACHEABLE) {
538 		SetPagePrivate(vmpage);
539 		vmpage->private = (unsigned long)page;
540 		cl_page_slice_add(page, &cpg->cpg_cl, obj,
541 				&vvp_page_ops);
542 	} else {
543 		struct ccc_object *clobj = cl2ccc(obj);
544 
545 		LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
546 		cl_page_slice_add(page, &cpg->cpg_cl, obj,
547 				&vvp_transient_page_ops);
548 		clobj->cob_transient_pages++;
549 	}
550 	return 0;
551 }
552