1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/fld/fld_cache.c
37  *
38  * FLD (Fids Location Database)
39  *
40  * Author: Pravin Shelar <pravin.shelar@sun.com>
41  * Author: Yury Umanets <umka@clusterfs.com>
42  */
43 
44 #define DEBUG_SUBSYSTEM S_FLD
45 
46 #include "../../include/linux/libcfs/libcfs.h"
47 #include <linux/module.h>
48 #include <asm/div64.h>
49 
50 #include "../include/obd.h"
51 #include "../include/obd_class.h"
52 #include "../include/lustre_ver.h"
53 #include "../include/obd_support.h"
54 #include "../include/lprocfs_status.h"
55 
56 #include "../include/dt_object.h"
57 #include "../include/lustre_req_layout.h"
58 #include "../include/lustre_fld.h"
59 #include "fld_internal.h"
60 
61 /**
62  * create fld cache.
63  */
fld_cache_init(const char * name,int cache_size,int cache_threshold)64 struct fld_cache *fld_cache_init(const char *name,
65 				 int cache_size, int cache_threshold)
66 {
67 	struct fld_cache *cache;
68 
69 	LASSERT(name != NULL);
70 	LASSERT(cache_threshold < cache_size);
71 
72 	OBD_ALLOC_PTR(cache);
73 	if (cache == NULL)
74 		return ERR_PTR(-ENOMEM);
75 
76 	INIT_LIST_HEAD(&cache->fci_entries_head);
77 	INIT_LIST_HEAD(&cache->fci_lru);
78 
79 	cache->fci_cache_count = 0;
80 	rwlock_init(&cache->fci_lock);
81 
82 	strlcpy(cache->fci_name, name,
83 		sizeof(cache->fci_name));
84 
85 	cache->fci_cache_size = cache_size;
86 	cache->fci_threshold = cache_threshold;
87 
88 	/* Init fld cache info. */
89 	memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
90 
91 	CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
92 	       cache->fci_name, cache_size, cache_threshold);
93 
94 	return cache;
95 }
96 
97 /**
98  * destroy fld cache.
99  */
fld_cache_fini(struct fld_cache * cache)100 void fld_cache_fini(struct fld_cache *cache)
101 {
102 	__u64 pct;
103 
104 	LASSERT(cache != NULL);
105 	fld_cache_flush(cache);
106 
107 	if (cache->fci_stat.fst_count > 0) {
108 		pct = cache->fci_stat.fst_cache * 100;
109 		do_div(pct, cache->fci_stat.fst_count);
110 	} else {
111 		pct = 0;
112 	}
113 
114 	CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
115 	CDEBUG(D_INFO, "  Total reqs: %llu\n", cache->fci_stat.fst_count);
116 	CDEBUG(D_INFO, "  Cache reqs: %llu\n", cache->fci_stat.fst_cache);
117 	CDEBUG(D_INFO, "  Cache hits: %llu%%\n", pct);
118 
119 	OBD_FREE_PTR(cache);
120 }
121 
122 /**
123  * delete given node from list.
124  */
fld_cache_entry_delete(struct fld_cache * cache,struct fld_cache_entry * node)125 void fld_cache_entry_delete(struct fld_cache *cache,
126 			    struct fld_cache_entry *node)
127 {
128 	list_del(&node->fce_list);
129 	list_del(&node->fce_lru);
130 	cache->fci_cache_count--;
131 	OBD_FREE_PTR(node);
132 }
133 
134 /**
135  * fix list by checking new entry with NEXT entry in order.
136  */
fld_fix_new_list(struct fld_cache * cache)137 static void fld_fix_new_list(struct fld_cache *cache)
138 {
139 	struct fld_cache_entry *f_curr;
140 	struct fld_cache_entry *f_next;
141 	struct lu_seq_range *c_range;
142 	struct lu_seq_range *n_range;
143 	struct list_head *head = &cache->fci_entries_head;
144 
145 restart_fixup:
146 
147 	list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
148 		c_range = &f_curr->fce_range;
149 		n_range = &f_next->fce_range;
150 
151 		LASSERT(range_is_sane(c_range));
152 		if (&f_next->fce_list == head)
153 			break;
154 
155 		if (c_range->lsr_flags != n_range->lsr_flags)
156 			continue;
157 
158 		LASSERTF(c_range->lsr_start <= n_range->lsr_start,
159 			 "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
160 			 PRANGE(c_range), PRANGE(n_range));
161 
162 		/* check merge possibility with next range */
163 		if (c_range->lsr_end == n_range->lsr_start) {
164 			if (c_range->lsr_index != n_range->lsr_index)
165 				continue;
166 			n_range->lsr_start = c_range->lsr_start;
167 			fld_cache_entry_delete(cache, f_curr);
168 			continue;
169 		}
170 
171 		/* check if current range overlaps with next range. */
172 		if (n_range->lsr_start < c_range->lsr_end) {
173 			if (c_range->lsr_index == n_range->lsr_index) {
174 				n_range->lsr_start = c_range->lsr_start;
175 				n_range->lsr_end = max(c_range->lsr_end,
176 						       n_range->lsr_end);
177 				fld_cache_entry_delete(cache, f_curr);
178 			} else {
179 				if (n_range->lsr_end <= c_range->lsr_end) {
180 					*n_range = *c_range;
181 					fld_cache_entry_delete(cache, f_curr);
182 				} else
183 					n_range->lsr_start = c_range->lsr_end;
184 			}
185 
186 			/* we could have overlap over next
187 			 * range too. better restart. */
188 			goto restart_fixup;
189 		}
190 
191 		/* kill duplicates */
192 		if (c_range->lsr_start == n_range->lsr_start &&
193 		    c_range->lsr_end == n_range->lsr_end)
194 			fld_cache_entry_delete(cache, f_curr);
195 	}
196 }
197 
198 /**
199  * add node to fld cache
200  */
fld_cache_entry_add(struct fld_cache * cache,struct fld_cache_entry * f_new,struct list_head * pos)201 static inline void fld_cache_entry_add(struct fld_cache *cache,
202 				       struct fld_cache_entry *f_new,
203 				       struct list_head *pos)
204 {
205 	list_add(&f_new->fce_list, pos);
206 	list_add(&f_new->fce_lru, &cache->fci_lru);
207 
208 	cache->fci_cache_count++;
209 	fld_fix_new_list(cache);
210 }
211 
212 /**
213  * Check if cache needs to be shrunk. If so - do it.
214  * Remove one entry in list and so on until cache is shrunk enough.
215  */
fld_cache_shrink(struct fld_cache * cache)216 static int fld_cache_shrink(struct fld_cache *cache)
217 {
218 	struct fld_cache_entry *flde;
219 	struct list_head *curr;
220 	int num = 0;
221 
222 	LASSERT(cache != NULL);
223 
224 	if (cache->fci_cache_count < cache->fci_cache_size)
225 		return 0;
226 
227 	curr = cache->fci_lru.prev;
228 
229 	while (cache->fci_cache_count + cache->fci_threshold >
230 	       cache->fci_cache_size && curr != &cache->fci_lru) {
231 
232 		flde = list_entry(curr, struct fld_cache_entry, fce_lru);
233 		curr = curr->prev;
234 		fld_cache_entry_delete(cache, flde);
235 		num++;
236 	}
237 
238 	CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n",
239 			cache->fci_name, num);
240 
241 	return 0;
242 }
243 
244 /**
245  * kill all fld cache entries.
246  */
fld_cache_flush(struct fld_cache * cache)247 void fld_cache_flush(struct fld_cache *cache)
248 {
249 	write_lock(&cache->fci_lock);
250 	cache->fci_cache_size = 0;
251 	fld_cache_shrink(cache);
252 	write_unlock(&cache->fci_lock);
253 }
254 
255 /**
256  * punch hole in existing range. divide this range and add new
257  * entry accordingly.
258  */
259 
fld_cache_punch_hole(struct fld_cache * cache,struct fld_cache_entry * f_curr,struct fld_cache_entry * f_new)260 static void fld_cache_punch_hole(struct fld_cache *cache,
261 				 struct fld_cache_entry *f_curr,
262 				 struct fld_cache_entry *f_new)
263 {
264 	const struct lu_seq_range *range = &f_new->fce_range;
265 	const u64 new_start  = range->lsr_start;
266 	const u64 new_end  = range->lsr_end;
267 	struct fld_cache_entry *fldt;
268 
269 	OBD_ALLOC_GFP(fldt, sizeof(*fldt), GFP_ATOMIC);
270 	if (!fldt) {
271 		OBD_FREE_PTR(f_new);
272 		/* overlap is not allowed, so dont mess up list. */
273 		return;
274 	}
275 	/*  break f_curr RANGE into three RANGES:
276 	 *	f_curr, f_new , fldt
277 	 */
278 
279 	/* f_new = *range */
280 
281 	/* fldt */
282 	fldt->fce_range.lsr_start = new_end;
283 	fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
284 	fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
285 
286 	/* f_curr */
287 	f_curr->fce_range.lsr_end = new_start;
288 
289 	/* add these two entries to list */
290 	fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
291 	fld_cache_entry_add(cache, fldt, &f_new->fce_list);
292 
293 	/* no need to fixup */
294 }
295 
296 /**
297  * handle range overlap in fld cache.
298  */
fld_cache_overlap_handle(struct fld_cache * cache,struct fld_cache_entry * f_curr,struct fld_cache_entry * f_new)299 static void fld_cache_overlap_handle(struct fld_cache *cache,
300 				struct fld_cache_entry *f_curr,
301 				struct fld_cache_entry *f_new)
302 {
303 	const struct lu_seq_range *range = &f_new->fce_range;
304 	const u64 new_start  = range->lsr_start;
305 	const u64 new_end  = range->lsr_end;
306 	const u32 mdt = range->lsr_index;
307 
308 	/* this is overlap case, these case are checking overlapping with
309 	 * prev range only. fixup will handle overlapping with next range. */
310 
311 	if (f_curr->fce_range.lsr_index == mdt) {
312 		f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
313 						  new_start);
314 
315 		f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
316 						new_end);
317 
318 		OBD_FREE_PTR(f_new);
319 		fld_fix_new_list(cache);
320 
321 	} else if (new_start <= f_curr->fce_range.lsr_start &&
322 			f_curr->fce_range.lsr_end <= new_end) {
323 		/* case 1: new range completely overshadowed existing range.
324 		 *	 e.g. whole range migrated. update fld cache entry */
325 
326 		f_curr->fce_range = *range;
327 		OBD_FREE_PTR(f_new);
328 		fld_fix_new_list(cache);
329 
330 	} else if (f_curr->fce_range.lsr_start < new_start &&
331 			new_end < f_curr->fce_range.lsr_end) {
332 		/* case 2: new range fit within existing range. */
333 
334 		fld_cache_punch_hole(cache, f_curr, f_new);
335 
336 	} else  if (new_end <= f_curr->fce_range.lsr_end) {
337 		/* case 3: overlap:
338 		 *	 [new_start [c_start  new_end)  c_end)
339 		 */
340 
341 		LASSERT(new_start <= f_curr->fce_range.lsr_start);
342 
343 		f_curr->fce_range.lsr_start = new_end;
344 		fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
345 
346 	} else if (f_curr->fce_range.lsr_start <= new_start) {
347 		/* case 4: overlap:
348 		 *	 [c_start [new_start c_end) new_end)
349 		 */
350 
351 		LASSERT(f_curr->fce_range.lsr_end <= new_end);
352 
353 		f_curr->fce_range.lsr_end = new_start;
354 		fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
355 	} else
356 		CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
357 		       PRANGE(range), PRANGE(&f_curr->fce_range));
358 }
359 
360 struct fld_cache_entry
fld_cache_entry_create(const struct lu_seq_range * range)361 *fld_cache_entry_create(const struct lu_seq_range *range)
362 {
363 	struct fld_cache_entry *f_new;
364 
365 	LASSERT(range_is_sane(range));
366 
367 	OBD_ALLOC_PTR(f_new);
368 	if (!f_new)
369 		return ERR_PTR(-ENOMEM);
370 
371 	f_new->fce_range = *range;
372 	return f_new;
373 }
374 
375 /**
376  * Insert FLD entry in FLD cache.
377  *
378  * This function handles all cases of merging and breaking up of
379  * ranges.
380  */
fld_cache_insert_nolock(struct fld_cache * cache,struct fld_cache_entry * f_new)381 int fld_cache_insert_nolock(struct fld_cache *cache,
382 			    struct fld_cache_entry *f_new)
383 {
384 	struct fld_cache_entry *f_curr;
385 	struct fld_cache_entry *n;
386 	struct list_head *head;
387 	struct list_head *prev = NULL;
388 	const u64 new_start  = f_new->fce_range.lsr_start;
389 	const u64 new_end  = f_new->fce_range.lsr_end;
390 	__u32 new_flags  = f_new->fce_range.lsr_flags;
391 
392 	/*
393 	 * Duplicate entries are eliminated in insert op.
394 	 * So we don't need to search new entry before starting
395 	 * insertion loop.
396 	 */
397 
398 	if (!cache->fci_no_shrink)
399 		fld_cache_shrink(cache);
400 
401 	head = &cache->fci_entries_head;
402 
403 	list_for_each_entry_safe(f_curr, n, head, fce_list) {
404 		/* add list if next is end of list */
405 		if (new_end < f_curr->fce_range.lsr_start ||
406 		   (new_end == f_curr->fce_range.lsr_start &&
407 		    new_flags != f_curr->fce_range.lsr_flags))
408 			break;
409 
410 		prev = &f_curr->fce_list;
411 		/* check if this range is to left of new range. */
412 		if (new_start < f_curr->fce_range.lsr_end &&
413 		    new_flags == f_curr->fce_range.lsr_flags) {
414 			fld_cache_overlap_handle(cache, f_curr, f_new);
415 			goto out;
416 		}
417 	}
418 
419 	if (prev == NULL)
420 		prev = head;
421 
422 	CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
423 	/* Add new entry to cache and lru list. */
424 	fld_cache_entry_add(cache, f_new, prev);
425 out:
426 	return 0;
427 }
428 
fld_cache_insert(struct fld_cache * cache,const struct lu_seq_range * range)429 int fld_cache_insert(struct fld_cache *cache,
430 		     const struct lu_seq_range *range)
431 {
432 	struct fld_cache_entry	*flde;
433 	int rc;
434 
435 	flde = fld_cache_entry_create(range);
436 	if (IS_ERR(flde))
437 		return PTR_ERR(flde);
438 
439 	write_lock(&cache->fci_lock);
440 	rc = fld_cache_insert_nolock(cache, flde);
441 	write_unlock(&cache->fci_lock);
442 	if (rc)
443 		OBD_FREE_PTR(flde);
444 
445 	return rc;
446 }
447 
fld_cache_delete_nolock(struct fld_cache * cache,const struct lu_seq_range * range)448 void fld_cache_delete_nolock(struct fld_cache *cache,
449 		      const struct lu_seq_range *range)
450 {
451 	struct fld_cache_entry *flde;
452 	struct fld_cache_entry *tmp;
453 	struct list_head *head;
454 
455 	head = &cache->fci_entries_head;
456 	list_for_each_entry_safe(flde, tmp, head, fce_list) {
457 		/* add list if next is end of list */
458 		if (range->lsr_start == flde->fce_range.lsr_start ||
459 		   (range->lsr_end == flde->fce_range.lsr_end &&
460 		    range->lsr_flags == flde->fce_range.lsr_flags)) {
461 			fld_cache_entry_delete(cache, flde);
462 			break;
463 		}
464 	}
465 }
466 
467 /**
468  * Delete FLD entry in FLD cache.
469  *
470  */
fld_cache_delete(struct fld_cache * cache,const struct lu_seq_range * range)471 void fld_cache_delete(struct fld_cache *cache,
472 		      const struct lu_seq_range *range)
473 {
474 	write_lock(&cache->fci_lock);
475 	fld_cache_delete_nolock(cache, range);
476 	write_unlock(&cache->fci_lock);
477 }
478 
479 struct fld_cache_entry
fld_cache_entry_lookup_nolock(struct fld_cache * cache,struct lu_seq_range * range)480 *fld_cache_entry_lookup_nolock(struct fld_cache *cache,
481 			      struct lu_seq_range *range)
482 {
483 	struct fld_cache_entry *flde;
484 	struct fld_cache_entry *got = NULL;
485 	struct list_head *head;
486 
487 	head = &cache->fci_entries_head;
488 	list_for_each_entry(flde, head, fce_list) {
489 		if (range->lsr_start == flde->fce_range.lsr_start ||
490 		   (range->lsr_end == flde->fce_range.lsr_end &&
491 		    range->lsr_flags == flde->fce_range.lsr_flags)) {
492 			got = flde;
493 			break;
494 		}
495 	}
496 
497 	return got;
498 }
499 
500 /**
501  * lookup \a seq sequence for range in fld cache.
502  */
503 struct fld_cache_entry
fld_cache_entry_lookup(struct fld_cache * cache,struct lu_seq_range * range)504 *fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
505 {
506 	struct fld_cache_entry *got = NULL;
507 
508 	read_lock(&cache->fci_lock);
509 	got = fld_cache_entry_lookup_nolock(cache, range);
510 	read_unlock(&cache->fci_lock);
511 	return got;
512 }
513 
514 /**
515  * lookup \a seq sequence for range in fld cache.
516  */
fld_cache_lookup(struct fld_cache * cache,const u64 seq,struct lu_seq_range * range)517 int fld_cache_lookup(struct fld_cache *cache,
518 		     const u64 seq, struct lu_seq_range *range)
519 {
520 	struct fld_cache_entry *flde;
521 	struct fld_cache_entry *prev = NULL;
522 	struct list_head *head;
523 
524 	read_lock(&cache->fci_lock);
525 	head = &cache->fci_entries_head;
526 
527 	cache->fci_stat.fst_count++;
528 	list_for_each_entry(flde, head, fce_list) {
529 		if (flde->fce_range.lsr_start > seq) {
530 			if (prev != NULL)
531 				*range = prev->fce_range;
532 			break;
533 		}
534 
535 		prev = flde;
536 		if (range_within(&flde->fce_range, seq)) {
537 			*range = flde->fce_range;
538 
539 			cache->fci_stat.fst_cache++;
540 			read_unlock(&cache->fci_lock);
541 			return 0;
542 		}
543 	}
544 	read_unlock(&cache->fci_lock);
545 	return -ENOENT;
546 }
547