1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 */
30 /*
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
33 *
34 * lustre/ldlm/interval_tree.c
35 *
36 * Interval tree library used by ldlm extent lock code
37 *
38 * Author: Huang Wei <huangwei@clusterfs.com>
39 * Author: Jay Xiong <jinshan.xiong@sun.com>
40 */
41 #include "../include/lustre_dlm.h"
42 #include "../include/obd_support.h"
43 #include "../include/interval_tree.h"
44
45 enum {
46 INTERVAL_RED = 0,
47 INTERVAL_BLACK = 1
48 };
49
node_is_left_child(struct interval_node * node)50 static inline int node_is_left_child(struct interval_node *node)
51 {
52 LASSERT(node->in_parent != NULL);
53 return node == node->in_parent->in_left;
54 }
55
node_is_right_child(struct interval_node * node)56 static inline int node_is_right_child(struct interval_node *node)
57 {
58 LASSERT(node->in_parent != NULL);
59 return node == node->in_parent->in_right;
60 }
61
node_is_red(struct interval_node * node)62 static inline int node_is_red(struct interval_node *node)
63 {
64 return node->in_color == INTERVAL_RED;
65 }
66
node_is_black(struct interval_node * node)67 static inline int node_is_black(struct interval_node *node)
68 {
69 return node->in_color == INTERVAL_BLACK;
70 }
71
extent_compare(struct interval_node_extent * e1,struct interval_node_extent * e2)72 static inline int extent_compare(struct interval_node_extent *e1,
73 struct interval_node_extent *e2)
74 {
75 int rc;
76
77 if (e1->start == e2->start) {
78 if (e1->end < e2->end)
79 rc = -1;
80 else if (e1->end > e2->end)
81 rc = 1;
82 else
83 rc = 0;
84 } else {
85 if (e1->start < e2->start)
86 rc = -1;
87 else
88 rc = 1;
89 }
90 return rc;
91 }
92
extent_equal(struct interval_node_extent * e1,struct interval_node_extent * e2)93 static inline int extent_equal(struct interval_node_extent *e1,
94 struct interval_node_extent *e2)
95 {
96 return (e1->start == e2->start) && (e1->end == e2->end);
97 }
98
extent_overlapped(struct interval_node_extent * e1,struct interval_node_extent * e2)99 static inline int extent_overlapped(struct interval_node_extent *e1,
100 struct interval_node_extent *e2)
101 {
102 return (e1->start <= e2->end) && (e2->start <= e1->end);
103 }
104
node_compare(struct interval_node * n1,struct interval_node * n2)105 static inline int node_compare(struct interval_node *n1,
106 struct interval_node *n2)
107 {
108 return extent_compare(&n1->in_extent, &n2->in_extent);
109 }
110
node_equal(struct interval_node * n1,struct interval_node * n2)111 static inline int node_equal(struct interval_node *n1,
112 struct interval_node *n2)
113 {
114 return extent_equal(&n1->in_extent, &n2->in_extent);
115 }
116
max_u64(__u64 x,__u64 y)117 static inline __u64 max_u64(__u64 x, __u64 y)
118 {
119 return x > y ? x : y;
120 }
121
min_u64(__u64 x,__u64 y)122 static inline __u64 min_u64(__u64 x, __u64 y)
123 {
124 return x < y ? x : y;
125 }
126
127 #define interval_for_each(node, root) \
128 for (node = interval_first(root); node != NULL; \
129 node = interval_next(node))
130
131 #define interval_for_each_reverse(node, root) \
132 for (node = interval_last(root); node != NULL; \
133 node = interval_prev(node))
134
interval_first(struct interval_node * node)135 static struct interval_node *interval_first(struct interval_node *node)
136 {
137 if (!node)
138 return NULL;
139 while (node->in_left)
140 node = node->in_left;
141 return node;
142 }
143
interval_last(struct interval_node * node)144 static struct interval_node *interval_last(struct interval_node *node)
145 {
146 if (!node)
147 return NULL;
148 while (node->in_right)
149 node = node->in_right;
150 return node;
151 }
152
interval_next(struct interval_node * node)153 static struct interval_node *interval_next(struct interval_node *node)
154 {
155 if (!node)
156 return NULL;
157 if (node->in_right)
158 return interval_first(node->in_right);
159 while (node->in_parent && node_is_right_child(node))
160 node = node->in_parent;
161 return node->in_parent;
162 }
163
interval_prev(struct interval_node * node)164 static struct interval_node *interval_prev(struct interval_node *node)
165 {
166 if (!node)
167 return NULL;
168
169 if (node->in_left)
170 return interval_last(node->in_left);
171
172 while (node->in_parent && node_is_left_child(node))
173 node = node->in_parent;
174
175 return node->in_parent;
176 }
177
interval_iterate(struct interval_node * root,interval_callback_t func,void * data)178 enum interval_iter interval_iterate(struct interval_node *root,
179 interval_callback_t func,
180 void *data)
181 {
182 struct interval_node *node;
183 enum interval_iter rc = INTERVAL_ITER_CONT;
184
185 interval_for_each(node, root) {
186 rc = func(node, data);
187 if (rc == INTERVAL_ITER_STOP)
188 break;
189 }
190
191 return rc;
192 }
193 EXPORT_SYMBOL(interval_iterate);
194
interval_iterate_reverse(struct interval_node * root,interval_callback_t func,void * data)195 enum interval_iter interval_iterate_reverse(struct interval_node *root,
196 interval_callback_t func,
197 void *data)
198 {
199 struct interval_node *node;
200 enum interval_iter rc = INTERVAL_ITER_CONT;
201
202 interval_for_each_reverse(node, root) {
203 rc = func(node, data);
204 if (rc == INTERVAL_ITER_STOP)
205 break;
206 }
207
208 return rc;
209 }
210 EXPORT_SYMBOL(interval_iterate_reverse);
211
212 /* try to find a node with same interval in the tree,
213 * if found, return the pointer to the node, otherwise return NULL*/
interval_find(struct interval_node * root,struct interval_node_extent * ex)214 struct interval_node *interval_find(struct interval_node *root,
215 struct interval_node_extent *ex)
216 {
217 struct interval_node *walk = root;
218 int rc;
219
220 while (walk) {
221 rc = extent_compare(ex, &walk->in_extent);
222 if (rc == 0)
223 break;
224 else if (rc < 0)
225 walk = walk->in_left;
226 else
227 walk = walk->in_right;
228 }
229
230 return walk;
231 }
232 EXPORT_SYMBOL(interval_find);
233
__rotate_change_maxhigh(struct interval_node * node,struct interval_node * rotate)234 static void __rotate_change_maxhigh(struct interval_node *node,
235 struct interval_node *rotate)
236 {
237 __u64 left_max, right_max;
238
239 rotate->in_max_high = node->in_max_high;
240 left_max = node->in_left ? node->in_left->in_max_high : 0;
241 right_max = node->in_right ? node->in_right->in_max_high : 0;
242 node->in_max_high = max_u64(interval_high(node),
243 max_u64(left_max, right_max));
244 }
245
246 /* The left rotation "pivots" around the link from node to node->right, and
247 * - node will be linked to node->right's left child, and
248 * - node->right's left child will be linked to node's right child. */
__rotate_left(struct interval_node * node,struct interval_node ** root)249 static void __rotate_left(struct interval_node *node,
250 struct interval_node **root)
251 {
252 struct interval_node *right = node->in_right;
253 struct interval_node *parent = node->in_parent;
254
255 node->in_right = right->in_left;
256 if (node->in_right)
257 right->in_left->in_parent = node;
258
259 right->in_left = node;
260 right->in_parent = parent;
261 if (parent) {
262 if (node_is_left_child(node))
263 parent->in_left = right;
264 else
265 parent->in_right = right;
266 } else {
267 *root = right;
268 }
269 node->in_parent = right;
270
271 /* update max_high for node and right */
272 __rotate_change_maxhigh(node, right);
273 }
274
275 /* The right rotation "pivots" around the link from node to node->left, and
276 * - node will be linked to node->left's right child, and
277 * - node->left's right child will be linked to node's left child. */
__rotate_right(struct interval_node * node,struct interval_node ** root)278 static void __rotate_right(struct interval_node *node,
279 struct interval_node **root)
280 {
281 struct interval_node *left = node->in_left;
282 struct interval_node *parent = node->in_parent;
283
284 node->in_left = left->in_right;
285 if (node->in_left)
286 left->in_right->in_parent = node;
287 left->in_right = node;
288
289 left->in_parent = parent;
290 if (parent) {
291 if (node_is_right_child(node))
292 parent->in_right = left;
293 else
294 parent->in_left = left;
295 } else {
296 *root = left;
297 }
298 node->in_parent = left;
299
300 /* update max_high for node and left */
301 __rotate_change_maxhigh(node, left);
302 }
303
304 #define interval_swap(a, b) do { \
305 struct interval_node *c = a; a = b; b = c; \
306 } while (0)
307
308 /*
309 * Operations INSERT and DELETE, when run on a tree with n keys,
310 * take O(logN) time.Because they modify the tree, the result
311 * may violate the red-black properties.To restore these properties,
312 * we must change the colors of some of the nodes in the tree
313 * and also change the pointer structure.
314 */
interval_insert_color(struct interval_node * node,struct interval_node ** root)315 static void interval_insert_color(struct interval_node *node,
316 struct interval_node **root)
317 {
318 struct interval_node *parent, *gparent;
319
320 while ((parent = node->in_parent) && node_is_red(parent)) {
321 gparent = parent->in_parent;
322 /* Parent is RED, so gparent must not be NULL */
323 if (node_is_left_child(parent)) {
324 struct interval_node *uncle;
325
326 uncle = gparent->in_right;
327 if (uncle && node_is_red(uncle)) {
328 uncle->in_color = INTERVAL_BLACK;
329 parent->in_color = INTERVAL_BLACK;
330 gparent->in_color = INTERVAL_RED;
331 node = gparent;
332 continue;
333 }
334
335 if (parent->in_right == node) {
336 __rotate_left(parent, root);
337 interval_swap(node, parent);
338 }
339
340 parent->in_color = INTERVAL_BLACK;
341 gparent->in_color = INTERVAL_RED;
342 __rotate_right(gparent, root);
343 } else {
344 struct interval_node *uncle;
345
346 uncle = gparent->in_left;
347 if (uncle && node_is_red(uncle)) {
348 uncle->in_color = INTERVAL_BLACK;
349 parent->in_color = INTERVAL_BLACK;
350 gparent->in_color = INTERVAL_RED;
351 node = gparent;
352 continue;
353 }
354
355 if (node_is_left_child(node)) {
356 __rotate_right(parent, root);
357 interval_swap(node, parent);
358 }
359
360 parent->in_color = INTERVAL_BLACK;
361 gparent->in_color = INTERVAL_RED;
362 __rotate_left(gparent, root);
363 }
364 }
365
366 (*root)->in_color = INTERVAL_BLACK;
367 }
368
interval_insert(struct interval_node * node,struct interval_node ** root)369 struct interval_node *interval_insert(struct interval_node *node,
370 struct interval_node **root)
371
372 {
373 struct interval_node **p, *parent = NULL;
374
375 LASSERT(!interval_is_intree(node));
376 p = root;
377 while (*p) {
378 parent = *p;
379 if (node_equal(parent, node))
380 return parent;
381
382 /* max_high field must be updated after each iteration */
383 if (parent->in_max_high < interval_high(node))
384 parent->in_max_high = interval_high(node);
385
386 if (node_compare(node, parent) < 0)
387 p = &parent->in_left;
388 else
389 p = &parent->in_right;
390 }
391
392 /* link node into the tree */
393 node->in_parent = parent;
394 node->in_color = INTERVAL_RED;
395 node->in_left = node->in_right = NULL;
396 *p = node;
397
398 interval_insert_color(node, root);
399 node->in_intree = 1;
400
401 return NULL;
402 }
403 EXPORT_SYMBOL(interval_insert);
404
node_is_black_or_0(struct interval_node * node)405 static inline int node_is_black_or_0(struct interval_node *node)
406 {
407 return !node || node_is_black(node);
408 }
409
interval_erase_color(struct interval_node * node,struct interval_node * parent,struct interval_node ** root)410 static void interval_erase_color(struct interval_node *node,
411 struct interval_node *parent,
412 struct interval_node **root)
413 {
414 struct interval_node *tmp;
415
416 while (node_is_black_or_0(node) && node != *root) {
417 if (parent->in_left == node) {
418 tmp = parent->in_right;
419 if (node_is_red(tmp)) {
420 tmp->in_color = INTERVAL_BLACK;
421 parent->in_color = INTERVAL_RED;
422 __rotate_left(parent, root);
423 tmp = parent->in_right;
424 }
425 if (node_is_black_or_0(tmp->in_left) &&
426 node_is_black_or_0(tmp->in_right)) {
427 tmp->in_color = INTERVAL_RED;
428 node = parent;
429 parent = node->in_parent;
430 } else {
431 if (node_is_black_or_0(tmp->in_right)) {
432 struct interval_node *o_left;
433
434 o_left = tmp->in_left;
435 if (o_left)
436 o_left->in_color = INTERVAL_BLACK;
437 tmp->in_color = INTERVAL_RED;
438 __rotate_right(tmp, root);
439 tmp = parent->in_right;
440 }
441 tmp->in_color = parent->in_color;
442 parent->in_color = INTERVAL_BLACK;
443 if (tmp->in_right)
444 tmp->in_right->in_color = INTERVAL_BLACK;
445 __rotate_left(parent, root);
446 node = *root;
447 break;
448 }
449 } else {
450 tmp = parent->in_left;
451 if (node_is_red(tmp)) {
452 tmp->in_color = INTERVAL_BLACK;
453 parent->in_color = INTERVAL_RED;
454 __rotate_right(parent, root);
455 tmp = parent->in_left;
456 }
457 if (node_is_black_or_0(tmp->in_left) &&
458 node_is_black_or_0(tmp->in_right)) {
459 tmp->in_color = INTERVAL_RED;
460 node = parent;
461 parent = node->in_parent;
462 } else {
463 if (node_is_black_or_0(tmp->in_left)) {
464 struct interval_node *o_right;
465
466 o_right = tmp->in_right;
467 if (o_right)
468 o_right->in_color = INTERVAL_BLACK;
469 tmp->in_color = INTERVAL_RED;
470 __rotate_left(tmp, root);
471 tmp = parent->in_left;
472 }
473 tmp->in_color = parent->in_color;
474 parent->in_color = INTERVAL_BLACK;
475 if (tmp->in_left)
476 tmp->in_left->in_color = INTERVAL_BLACK;
477 __rotate_right(parent, root);
478 node = *root;
479 break;
480 }
481 }
482 }
483 if (node)
484 node->in_color = INTERVAL_BLACK;
485 }
486
487 /*
488 * if the @max_high value of @node is changed, this function traverse a path
489 * from node up to the root to update max_high for the whole tree.
490 */
update_maxhigh(struct interval_node * node,__u64 old_maxhigh)491 static void update_maxhigh(struct interval_node *node,
492 __u64 old_maxhigh)
493 {
494 __u64 left_max, right_max;
495
496 while (node) {
497 left_max = node->in_left ? node->in_left->in_max_high : 0;
498 right_max = node->in_right ? node->in_right->in_max_high : 0;
499 node->in_max_high = max_u64(interval_high(node),
500 max_u64(left_max, right_max));
501
502 if (node->in_max_high >= old_maxhigh)
503 break;
504 node = node->in_parent;
505 }
506 }
507
interval_erase(struct interval_node * node,struct interval_node ** root)508 void interval_erase(struct interval_node *node,
509 struct interval_node **root)
510 {
511 struct interval_node *child, *parent;
512 int color;
513
514 LASSERT(interval_is_intree(node));
515 node->in_intree = 0;
516 if (!node->in_left) {
517 child = node->in_right;
518 } else if (!node->in_right) {
519 child = node->in_left;
520 } else { /* Both left and right child are not NULL */
521 struct interval_node *old = node;
522
523 node = interval_next(node);
524 child = node->in_right;
525 parent = node->in_parent;
526 color = node->in_color;
527
528 if (child)
529 child->in_parent = parent;
530 if (parent == old)
531 parent->in_right = child;
532 else
533 parent->in_left = child;
534
535 node->in_color = old->in_color;
536 node->in_right = old->in_right;
537 node->in_left = old->in_left;
538 node->in_parent = old->in_parent;
539
540 if (old->in_parent) {
541 if (node_is_left_child(old))
542 old->in_parent->in_left = node;
543 else
544 old->in_parent->in_right = node;
545 } else {
546 *root = node;
547 }
548
549 old->in_left->in_parent = node;
550 if (old->in_right)
551 old->in_right->in_parent = node;
552 update_maxhigh(child ? : parent, node->in_max_high);
553 update_maxhigh(node, old->in_max_high);
554 if (parent == old)
555 parent = node;
556 goto color;
557 }
558 parent = node->in_parent;
559 color = node->in_color;
560
561 if (child)
562 child->in_parent = parent;
563 if (parent) {
564 if (node_is_left_child(node))
565 parent->in_left = child;
566 else
567 parent->in_right = child;
568 } else {
569 *root = child;
570 }
571
572 update_maxhigh(child ? : parent, node->in_max_high);
573
574 color:
575 if (color == INTERVAL_BLACK)
576 interval_erase_color(child, parent, root);
577 }
578 EXPORT_SYMBOL(interval_erase);
579
interval_may_overlap(struct interval_node * node,struct interval_node_extent * ext)580 static inline int interval_may_overlap(struct interval_node *node,
581 struct interval_node_extent *ext)
582 {
583 return (ext->start <= node->in_max_high &&
584 ext->end >= interval_low(node));
585 }
586
587 /*
588 * This function finds all intervals that overlap interval ext,
589 * and calls func to handle resulted intervals one by one.
590 * in lustre, this function will find all conflicting locks in
591 * the granted queue and add these locks to the ast work list.
592 *
593 * {
594 * if (node == NULL)
595 * return 0;
596 * if (ext->end < interval_low(node)) {
597 * interval_search(node->in_left, ext, func, data);
598 * } else if (interval_may_overlap(node, ext)) {
599 * if (extent_overlapped(ext, &node->in_extent))
600 * func(node, data);
601 * interval_search(node->in_left, ext, func, data);
602 * interval_search(node->in_right, ext, func, data);
603 * }
604 * return 0;
605 * }
606 *
607 */
interval_search(struct interval_node * node,struct interval_node_extent * ext,interval_callback_t func,void * data)608 enum interval_iter interval_search(struct interval_node *node,
609 struct interval_node_extent *ext,
610 interval_callback_t func,
611 void *data)
612 {
613 struct interval_node *parent;
614 enum interval_iter rc = INTERVAL_ITER_CONT;
615
616 LASSERT(ext != NULL);
617 LASSERT(func != NULL);
618
619 while (node) {
620 if (ext->end < interval_low(node)) {
621 if (node->in_left) {
622 node = node->in_left;
623 continue;
624 }
625 } else if (interval_may_overlap(node, ext)) {
626 if (extent_overlapped(ext, &node->in_extent)) {
627 rc = func(node, data);
628 if (rc == INTERVAL_ITER_STOP)
629 break;
630 }
631
632 if (node->in_left) {
633 node = node->in_left;
634 continue;
635 }
636 if (node->in_right) {
637 node = node->in_right;
638 continue;
639 }
640 }
641
642 parent = node->in_parent;
643 while (parent) {
644 if (node_is_left_child(node) &&
645 parent->in_right) {
646 /* If we ever got the left, it means that the
647 * parent met ext->end<interval_low(parent), or
648 * may_overlap(parent). If the former is true,
649 * we needn't go back. So stop early and check
650 * may_overlap(parent) after this loop. */
651 node = parent->in_right;
652 break;
653 }
654 node = parent;
655 parent = parent->in_parent;
656 }
657 if (parent == NULL || !interval_may_overlap(parent, ext))
658 break;
659 }
660
661 return rc;
662 }
663 EXPORT_SYMBOL(interval_search);
664
interval_overlap_cb(struct interval_node * n,void * args)665 static enum interval_iter interval_overlap_cb(struct interval_node *n,
666 void *args)
667 {
668 *(int *)args = 1;
669 return INTERVAL_ITER_STOP;
670 }
671
interval_is_overlapped(struct interval_node * root,struct interval_node_extent * ext)672 int interval_is_overlapped(struct interval_node *root,
673 struct interval_node_extent *ext)
674 {
675 int has = 0;
676 (void)interval_search(root, ext, interval_overlap_cb, &has);
677 return has;
678 }
679 EXPORT_SYMBOL(interval_is_overlapped);
680
681 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
682 * some extents, because programs seldom do IO backward.
683 *
684 * The recursive algorithm of expanding low:
685 * expand_low {
686 * struct interval_node *tmp;
687 * static __u64 res = 0;
688 *
689 * if (root == NULL)
690 * return res;
691 * if (root->in_max_high < low) {
692 * res = max_u64(root->in_max_high + 1, res);
693 * return res;
694 * } else if (low < interval_low(root)) {
695 * interval_expand_low(root->in_left, low);
696 * return res;
697 * }
698 *
699 * if (interval_high(root) < low)
700 * res = max_u64(interval_high(root) + 1, res);
701 * interval_expand_low(root->in_left, low);
702 * interval_expand_low(root->in_right, low);
703 *
704 * return res;
705 * }
706 *
707 * It's much easy to eliminate the recursion, see interval_search for
708 * an example. -jay
709 */
interval_expand_low(struct interval_node * root,__u64 low)710 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
711 {
712 /* we only concern the empty tree right now. */
713 if (root == NULL)
714 return 0;
715 return low;
716 }
717
interval_expand_high(struct interval_node * node,__u64 high)718 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
719 {
720 __u64 result = ~0;
721
722 while (node != NULL) {
723 if (node->in_max_high < high)
724 break;
725
726 if (interval_low(node) > high) {
727 result = interval_low(node) - 1;
728 node = node->in_left;
729 } else {
730 node = node->in_right;
731 }
732 }
733
734 return result;
735 }
736
737 /* expanding the extent based on @ext. */
interval_expand(struct interval_node * root,struct interval_node_extent * ext,struct interval_node_extent * limiter)738 void interval_expand(struct interval_node *root,
739 struct interval_node_extent *ext,
740 struct interval_node_extent *limiter)
741 {
742 /* The assertion of interval_is_overlapped is expensive because we may
743 * travel many nodes to find the overlapped node. */
744 LASSERT(interval_is_overlapped(root, ext) == 0);
745 if (!limiter || limiter->start < ext->start)
746 ext->start = interval_expand_low(root, ext->start);
747 if (!limiter || limiter->end > ext->end)
748 ext->end = interval_expand_high(root, ext->end);
749 LASSERT(interval_is_overlapped(root, ext) == 0);
750 }
751 EXPORT_SYMBOL(interval_expand);
752