1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2015 Intel Corporation All Rights Reserved
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation.  See file COPYING.
10 *
11 */
12
13#ifdef __KERNEL__
14# include <linux/string.h>
15# include <linux/slab.h>
16# include <linux/bug.h>
17# include <linux/kernel.h>
18# include <linux/crush/crush.h>
19# include <linux/crush/hash.h>
20#else
21# include "crush_compat.h"
22# include "crush.h"
23# include "hash.h"
24#endif
25#include "crush_ln_table.h"
26
27#define dprintk(args...) /* printf(args) */
28
29/*
30 * Implement the core CRUSH mapping algorithm.
31 */
32
33/**
34 * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
35 * @map: the crush_map
36 * @ruleset: the storage ruleset id (user defined)
37 * @type: storage ruleset type (user defined)
38 * @size: output set size
39 */
40int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
41{
42	__u32 i;
43
44	for (i = 0; i < map->max_rules; i++) {
45		if (map->rules[i] &&
46		    map->rules[i]->mask.ruleset == ruleset &&
47		    map->rules[i]->mask.type == type &&
48		    map->rules[i]->mask.min_size <= size &&
49		    map->rules[i]->mask.max_size >= size)
50			return i;
51	}
52	return -1;
53}
54
55
56/*
57 * bucket choose methods
58 *
59 * For each bucket algorithm, we have a "choose" method that, given a
60 * crush input @x and replica position (usually, position in output set) @r,
61 * will produce an item in the bucket.
62 */
63
64/*
65 * Choose based on a random permutation of the bucket.
66 *
67 * We used to use some prime number arithmetic to do this, but it
68 * wasn't very random, and had some other bad behaviors.  Instead, we
69 * calculate an actual random permutation of the bucket members.
70 * Since this is expensive, we optimize for the r=0 case, which
71 * captures the vast majority of calls.
72 */
73static int bucket_perm_choose(struct crush_bucket *bucket,
74			      int x, int r)
75{
76	unsigned int pr = r % bucket->size;
77	unsigned int i, s;
78
79	/* start a new permutation if @x has changed */
80	if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
81		dprintk("bucket %d new x=%d\n", bucket->id, x);
82		bucket->perm_x = x;
83
84		/* optimize common r=0 case */
85		if (pr == 0) {
86			s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
87				bucket->size;
88			bucket->perm[0] = s;
89			bucket->perm_n = 0xffff;   /* magic value, see below */
90			goto out;
91		}
92
93		for (i = 0; i < bucket->size; i++)
94			bucket->perm[i] = i;
95		bucket->perm_n = 0;
96	} else if (bucket->perm_n == 0xffff) {
97		/* clean up after the r=0 case above */
98		for (i = 1; i < bucket->size; i++)
99			bucket->perm[i] = i;
100		bucket->perm[bucket->perm[0]] = 0;
101		bucket->perm_n = 1;
102	}
103
104	/* calculate permutation up to pr */
105	for (i = 0; i < bucket->perm_n; i++)
106		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
107	while (bucket->perm_n <= pr) {
108		unsigned int p = bucket->perm_n;
109		/* no point in swapping the final entry */
110		if (p < bucket->size - 1) {
111			i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
112				(bucket->size - p);
113			if (i) {
114				unsigned int t = bucket->perm[p + i];
115				bucket->perm[p + i] = bucket->perm[p];
116				bucket->perm[p] = t;
117			}
118			dprintk(" perm_choose swap %d with %d\n", p, p+i);
119		}
120		bucket->perm_n++;
121	}
122	for (i = 0; i < bucket->size; i++)
123		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
124
125	s = bucket->perm[pr];
126out:
127	dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
128		bucket->size, x, r, pr, s);
129	return bucket->items[s];
130}
131
132/* uniform */
133static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
134				 int x, int r)
135{
136	return bucket_perm_choose(&bucket->h, x, r);
137}
138
139/* list */
140static int bucket_list_choose(struct crush_bucket_list *bucket,
141			      int x, int r)
142{
143	int i;
144
145	for (i = bucket->h.size-1; i >= 0; i--) {
146		__u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
147					 r, bucket->h.id);
148		w &= 0xffff;
149		dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
150			"sw %x rand %llx",
151			i, x, r, bucket->h.items[i], bucket->item_weights[i],
152			bucket->sum_weights[i], w);
153		w *= bucket->sum_weights[i];
154		w = w >> 16;
155		/*dprintk(" scaled %llx\n", w);*/
156		if (w < bucket->item_weights[i])
157			return bucket->h.items[i];
158	}
159
160	dprintk("bad list sums for bucket %d\n", bucket->h.id);
161	return bucket->h.items[0];
162}
163
164
165/* (binary) tree */
166static int height(int n)
167{
168	int h = 0;
169	while ((n & 1) == 0) {
170		h++;
171		n = n >> 1;
172	}
173	return h;
174}
175
176static int left(int x)
177{
178	int h = height(x);
179	return x - (1 << (h-1));
180}
181
182static int right(int x)
183{
184	int h = height(x);
185	return x + (1 << (h-1));
186}
187
188static int terminal(int x)
189{
190	return x & 1;
191}
192
193static int bucket_tree_choose(struct crush_bucket_tree *bucket,
194			      int x, int r)
195{
196	int n;
197	__u32 w;
198	__u64 t;
199
200	/* start at root */
201	n = bucket->num_nodes >> 1;
202
203	while (!terminal(n)) {
204		int l;
205		/* pick point in [0, w) */
206		w = bucket->node_weights[n];
207		t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
208					  bucket->h.id) * (__u64)w;
209		t = t >> 32;
210
211		/* descend to the left or right? */
212		l = left(n);
213		if (t < bucket->node_weights[l])
214			n = l;
215		else
216			n = right(n);
217	}
218
219	return bucket->h.items[n >> 1];
220}
221
222
223/* straw */
224
225static int bucket_straw_choose(struct crush_bucket_straw *bucket,
226			       int x, int r)
227{
228	__u32 i;
229	int high = 0;
230	__u64 high_draw = 0;
231	__u64 draw;
232
233	for (i = 0; i < bucket->h.size; i++) {
234		draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
235		draw &= 0xffff;
236		draw *= bucket->straws[i];
237		if (i == 0 || draw > high_draw) {
238			high = i;
239			high_draw = draw;
240		}
241	}
242	return bucket->h.items[high];
243}
244
245/* compute 2^44*log2(input+1) */
246static __u64 crush_ln(unsigned int xin)
247{
248	unsigned int x = xin, x1;
249	int iexpon, index1, index2;
250	__u64 RH, LH, LL, xl64, result;
251
252	x++;
253
254	/* normalize input */
255	iexpon = 15;
256	while (!(x & 0x18000)) {
257		x <<= 1;
258		iexpon--;
259	}
260
261	index1 = (x >> 8) << 1;
262	/* RH ~ 2^56/index1 */
263	RH = __RH_LH_tbl[index1 - 256];
264	/* LH ~ 2^48 * log2(index1/256) */
265	LH = __RH_LH_tbl[index1 + 1 - 256];
266
267	/* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
268	xl64 = (__s64)x * RH;
269	xl64 >>= 48;
270	x1 = xl64;
271
272	result = iexpon;
273	result <<= (12 + 32);
274
275	index2 = x1 & 0xff;
276	/* LL ~ 2^48*log2(1.0+index2/2^15) */
277	LL = __LL_tbl[index2];
278
279	LH = LH + LL;
280
281	LH >>= (48 - 12 - 32);
282	result += LH;
283
284	return result;
285}
286
287
288/*
289 * straw2
290 *
291 * for reference, see:
292 *
293 * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
294 *
295 */
296
297static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
298				int x, int r)
299{
300	unsigned int i, high = 0;
301	unsigned int u;
302	unsigned int w;
303	__s64 ln, draw, high_draw = 0;
304
305	for (i = 0; i < bucket->h.size; i++) {
306		w = bucket->item_weights[i];
307		if (w) {
308			u = crush_hash32_3(bucket->h.hash, x,
309					   bucket->h.items[i], r);
310			u &= 0xffff;
311
312			/*
313			 * for some reason slightly less than 0x10000 produces
314			 * a slightly more accurate distribution... probably a
315			 * rounding effect.
316			 *
317			 * the natural log lookup table maps [0,0xffff]
318			 * (corresponding to real numbers [1/0x10000, 1] to
319			 * [0, 0xffffffffffff] (corresponding to real numbers
320			 * [-11.090355,0]).
321			 */
322			ln = crush_ln(u) - 0x1000000000000ll;
323
324			/*
325			 * divide by 16.16 fixed-point weight.  note
326			 * that the ln value is negative, so a larger
327			 * weight means a larger (less negative) value
328			 * for draw.
329			 */
330			draw = div64_s64(ln, w);
331		} else {
332			draw = S64_MIN;
333		}
334
335		if (i == 0 || draw > high_draw) {
336			high = i;
337			high_draw = draw;
338		}
339	}
340	return bucket->h.items[high];
341}
342
343
344static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
345{
346	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
347	BUG_ON(in->size == 0);
348	switch (in->alg) {
349	case CRUSH_BUCKET_UNIFORM:
350		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
351					  x, r);
352	case CRUSH_BUCKET_LIST:
353		return bucket_list_choose((struct crush_bucket_list *)in,
354					  x, r);
355	case CRUSH_BUCKET_TREE:
356		return bucket_tree_choose((struct crush_bucket_tree *)in,
357					  x, r);
358	case CRUSH_BUCKET_STRAW:
359		return bucket_straw_choose((struct crush_bucket_straw *)in,
360					   x, r);
361	case CRUSH_BUCKET_STRAW2:
362		return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
363					    x, r);
364	default:
365		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
366		return in->items[0];
367	}
368}
369
370
371/*
372 * true if device is marked "out" (failed, fully offloaded)
373 * of the cluster
374 */
375static int is_out(const struct crush_map *map,
376		  const __u32 *weight, int weight_max,
377		  int item, int x)
378{
379	if (item >= weight_max)
380		return 1;
381	if (weight[item] >= 0x10000)
382		return 0;
383	if (weight[item] == 0)
384		return 1;
385	if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
386	    < weight[item])
387		return 0;
388	return 1;
389}
390
391/**
392 * crush_choose_firstn - choose numrep distinct items of given type
393 * @map: the crush_map
394 * @bucket: the bucket we are choose an item from
395 * @x: crush input value
396 * @numrep: the number of items to choose
397 * @type: the type of item to choose
398 * @out: pointer to output vector
399 * @outpos: our position in that vector
400 * @out_size: size of the out vector
401 * @tries: number of attempts to make
402 * @recurse_tries: number of attempts to have recursive chooseleaf make
403 * @local_retries: localized retries
404 * @local_fallback_retries: localized fallback retries
405 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
406 * @vary_r: pass r to recursive calls
407 * @out2: second output vector for leaf items (if @recurse_to_leaf)
408 * @parent_r: r value passed from the parent
409 */
410static int crush_choose_firstn(const struct crush_map *map,
411			       struct crush_bucket *bucket,
412			       const __u32 *weight, int weight_max,
413			       int x, int numrep, int type,
414			       int *out, int outpos,
415			       int out_size,
416			       unsigned int tries,
417			       unsigned int recurse_tries,
418			       unsigned int local_retries,
419			       unsigned int local_fallback_retries,
420			       int recurse_to_leaf,
421			       unsigned int vary_r,
422			       int *out2,
423			       int parent_r)
424{
425	int rep;
426	unsigned int ftotal, flocal;
427	int retry_descent, retry_bucket, skip_rep;
428	struct crush_bucket *in = bucket;
429	int r;
430	int i;
431	int item = 0;
432	int itemtype;
433	int collide, reject;
434	int count = out_size;
435
436	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
437		recurse_to_leaf ? "_LEAF" : "",
438		bucket->id, x, outpos, numrep,
439		tries, recurse_tries, local_retries, local_fallback_retries,
440		parent_r);
441
442	for (rep = outpos; rep < numrep && count > 0 ; rep++) {
443		/* keep trying until we get a non-out, non-colliding item */
444		ftotal = 0;
445		skip_rep = 0;
446		do {
447			retry_descent = 0;
448			in = bucket;               /* initial bucket */
449
450			/* choose through intervening buckets */
451			flocal = 0;
452			do {
453				collide = 0;
454				retry_bucket = 0;
455				r = rep + parent_r;
456				/* r' = r + f_total */
457				r += ftotal;
458
459				/* bucket choose */
460				if (in->size == 0) {
461					reject = 1;
462					goto reject;
463				}
464				if (local_fallback_retries > 0 &&
465				    flocal >= (in->size>>1) &&
466				    flocal > local_fallback_retries)
467					item = bucket_perm_choose(in, x, r);
468				else
469					item = crush_bucket_choose(in, x, r);
470				if (item >= map->max_devices) {
471					dprintk("   bad item %d\n", item);
472					skip_rep = 1;
473					break;
474				}
475
476				/* desired type? */
477				if (item < 0)
478					itemtype = map->buckets[-1-item]->type;
479				else
480					itemtype = 0;
481				dprintk("  item %d type %d\n", item, itemtype);
482
483				/* keep going? */
484				if (itemtype != type) {
485					if (item >= 0 ||
486					    (-1-item) >= map->max_buckets) {
487						dprintk("   bad item type %d\n", type);
488						skip_rep = 1;
489						break;
490					}
491					in = map->buckets[-1-item];
492					retry_bucket = 1;
493					continue;
494				}
495
496				/* collision? */
497				for (i = 0; i < outpos; i++) {
498					if (out[i] == item) {
499						collide = 1;
500						break;
501					}
502				}
503
504				reject = 0;
505				if (!collide && recurse_to_leaf) {
506					if (item < 0) {
507						int sub_r;
508						if (vary_r)
509							sub_r = r >> (vary_r-1);
510						else
511							sub_r = 0;
512						if (crush_choose_firstn(map,
513							 map->buckets[-1-item],
514							 weight, weight_max,
515							 x, outpos+1, 0,
516							 out2, outpos, count,
517							 recurse_tries, 0,
518							 local_retries,
519							 local_fallback_retries,
520							 0,
521							 vary_r,
522							 NULL,
523							 sub_r) <= outpos)
524							/* didn't get leaf */
525							reject = 1;
526					} else {
527						/* we already have a leaf! */
528						out2[outpos] = item;
529					}
530				}
531
532				if (!reject) {
533					/* out? */
534					if (itemtype == 0)
535						reject = is_out(map, weight,
536								weight_max,
537								item, x);
538					else
539						reject = 0;
540				}
541
542reject:
543				if (reject || collide) {
544					ftotal++;
545					flocal++;
546
547					if (collide && flocal <= local_retries)
548						/* retry locally a few times */
549						retry_bucket = 1;
550					else if (local_fallback_retries > 0 &&
551						 flocal <= in->size + local_fallback_retries)
552						/* exhaustive bucket search */
553						retry_bucket = 1;
554					else if (ftotal < tries)
555						/* then retry descent */
556						retry_descent = 1;
557					else
558						/* else give up */
559						skip_rep = 1;
560					dprintk("  reject %d  collide %d  "
561						"ftotal %u  flocal %u\n",
562						reject, collide, ftotal,
563						flocal);
564				}
565			} while (retry_bucket);
566		} while (retry_descent);
567
568		if (skip_rep) {
569			dprintk("skip rep\n");
570			continue;
571		}
572
573		dprintk("CHOOSE got %d\n", item);
574		out[outpos] = item;
575		outpos++;
576		count--;
577#ifndef __KERNEL__
578		if (map->choose_tries && ftotal <= map->choose_total_tries)
579			map->choose_tries[ftotal]++;
580#endif
581	}
582
583	dprintk("CHOOSE returns %d\n", outpos);
584	return outpos;
585}
586
587
588/**
589 * crush_choose_indep: alternative breadth-first positionally stable mapping
590 *
591 */
592static void crush_choose_indep(const struct crush_map *map,
593			       struct crush_bucket *bucket,
594			       const __u32 *weight, int weight_max,
595			       int x, int left, int numrep, int type,
596			       int *out, int outpos,
597			       unsigned int tries,
598			       unsigned int recurse_tries,
599			       int recurse_to_leaf,
600			       int *out2,
601			       int parent_r)
602{
603	struct crush_bucket *in = bucket;
604	int endpos = outpos + left;
605	int rep;
606	unsigned int ftotal;
607	int r;
608	int i;
609	int item = 0;
610	int itemtype;
611	int collide;
612
613	dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
614		bucket->id, x, outpos, numrep);
615
616	/* initially my result is undefined */
617	for (rep = outpos; rep < endpos; rep++) {
618		out[rep] = CRUSH_ITEM_UNDEF;
619		if (out2)
620			out2[rep] = CRUSH_ITEM_UNDEF;
621	}
622
623	for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
624#ifdef DEBUG_INDEP
625		if (out2 && ftotal) {
626			dprintk("%u %d a: ", ftotal, left);
627			for (rep = outpos; rep < endpos; rep++) {
628				dprintk(" %d", out[rep]);
629			}
630			dprintk("\n");
631			dprintk("%u %d b: ", ftotal, left);
632			for (rep = outpos; rep < endpos; rep++) {
633				dprintk(" %d", out2[rep]);
634			}
635			dprintk("\n");
636		}
637#endif
638		for (rep = outpos; rep < endpos; rep++) {
639			if (out[rep] != CRUSH_ITEM_UNDEF)
640				continue;
641
642			in = bucket;  /* initial bucket */
643
644			/* choose through intervening buckets */
645			for (;;) {
646				/* note: we base the choice on the position
647				 * even in the nested call.  that means that
648				 * if the first layer chooses the same bucket
649				 * in a different position, we will tend to
650				 * choose a different item in that bucket.
651				 * this will involve more devices in data
652				 * movement and tend to distribute the load.
653				 */
654				r = rep + parent_r;
655
656				/* be careful */
657				if (in->alg == CRUSH_BUCKET_UNIFORM &&
658				    in->size % numrep == 0)
659					/* r'=r+(n+1)*f_total */
660					r += (numrep+1) * ftotal;
661				else
662					/* r' = r + n*f_total */
663					r += numrep * ftotal;
664
665				/* bucket choose */
666				if (in->size == 0) {
667					dprintk("   empty bucket\n");
668					break;
669				}
670
671				item = crush_bucket_choose(in, x, r);
672				if (item >= map->max_devices) {
673					dprintk("   bad item %d\n", item);
674					out[rep] = CRUSH_ITEM_NONE;
675					if (out2)
676						out2[rep] = CRUSH_ITEM_NONE;
677					left--;
678					break;
679				}
680
681				/* desired type? */
682				if (item < 0)
683					itemtype = map->buckets[-1-item]->type;
684				else
685					itemtype = 0;
686				dprintk("  item %d type %d\n", item, itemtype);
687
688				/* keep going? */
689				if (itemtype != type) {
690					if (item >= 0 ||
691					    (-1-item) >= map->max_buckets) {
692						dprintk("   bad item type %d\n", type);
693						out[rep] = CRUSH_ITEM_NONE;
694						if (out2)
695							out2[rep] =
696								CRUSH_ITEM_NONE;
697						left--;
698						break;
699					}
700					in = map->buckets[-1-item];
701					continue;
702				}
703
704				/* collision? */
705				collide = 0;
706				for (i = outpos; i < endpos; i++) {
707					if (out[i] == item) {
708						collide = 1;
709						break;
710					}
711				}
712				if (collide)
713					break;
714
715				if (recurse_to_leaf) {
716					if (item < 0) {
717						crush_choose_indep(map,
718						   map->buckets[-1-item],
719						   weight, weight_max,
720						   x, 1, numrep, 0,
721						   out2, rep,
722						   recurse_tries, 0,
723						   0, NULL, r);
724						if (out2[rep] == CRUSH_ITEM_NONE) {
725							/* placed nothing; no leaf */
726							break;
727						}
728					} else {
729						/* we already have a leaf! */
730						out2[rep] = item;
731					}
732				}
733
734				/* out? */
735				if (itemtype == 0 &&
736				    is_out(map, weight, weight_max, item, x))
737					break;
738
739				/* yay! */
740				out[rep] = item;
741				left--;
742				break;
743			}
744		}
745	}
746	for (rep = outpos; rep < endpos; rep++) {
747		if (out[rep] == CRUSH_ITEM_UNDEF) {
748			out[rep] = CRUSH_ITEM_NONE;
749		}
750		if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
751			out2[rep] = CRUSH_ITEM_NONE;
752		}
753	}
754#ifndef __KERNEL__
755	if (map->choose_tries && ftotal <= map->choose_total_tries)
756		map->choose_tries[ftotal]++;
757#endif
758#ifdef DEBUG_INDEP
759	if (out2) {
760		dprintk("%u %d a: ", ftotal, left);
761		for (rep = outpos; rep < endpos; rep++) {
762			dprintk(" %d", out[rep]);
763		}
764		dprintk("\n");
765		dprintk("%u %d b: ", ftotal, left);
766		for (rep = outpos; rep < endpos; rep++) {
767			dprintk(" %d", out2[rep]);
768		}
769		dprintk("\n");
770	}
771#endif
772}
773
774/**
775 * crush_do_rule - calculate a mapping with the given input and rule
776 * @map: the crush_map
777 * @ruleno: the rule id
778 * @x: hash input
779 * @result: pointer to result vector
780 * @result_max: maximum result size
781 * @weight: weight vector (for map leaves)
782 * @weight_max: size of weight vector
783 * @scratch: scratch vector for private use; must be >= 3 * result_max
784 */
785int crush_do_rule(const struct crush_map *map,
786		  int ruleno, int x, int *result, int result_max,
787		  const __u32 *weight, int weight_max,
788		  int *scratch)
789{
790	int result_len;
791	int *a = scratch;
792	int *b = scratch + result_max;
793	int *c = scratch + result_max*2;
794	int recurse_to_leaf;
795	int *w;
796	int wsize = 0;
797	int *o;
798	int osize;
799	int *tmp;
800	struct crush_rule *rule;
801	__u32 step;
802	int i, j;
803	int numrep;
804	int out_size;
805	/*
806	 * the original choose_total_tries value was off by one (it
807	 * counted "retries" and not "tries").  add one.
808	 */
809	int choose_tries = map->choose_total_tries + 1;
810	int choose_leaf_tries = 0;
811	/*
812	 * the local tries values were counted as "retries", though,
813	 * and need no adjustment
814	 */
815	int choose_local_retries = map->choose_local_tries;
816	int choose_local_fallback_retries = map->choose_local_fallback_tries;
817
818	int vary_r = map->chooseleaf_vary_r;
819
820	if ((__u32)ruleno >= map->max_rules) {
821		dprintk(" bad ruleno %d\n", ruleno);
822		return 0;
823	}
824
825	rule = map->rules[ruleno];
826	result_len = 0;
827	w = a;
828	o = b;
829
830	for (step = 0; step < rule->len; step++) {
831		int firstn = 0;
832		struct crush_rule_step *curstep = &rule->steps[step];
833
834		switch (curstep->op) {
835		case CRUSH_RULE_TAKE:
836			if ((curstep->arg1 >= 0 &&
837			     curstep->arg1 < map->max_devices) ||
838			    (-1-curstep->arg1 < map->max_buckets &&
839			     map->buckets[-1-curstep->arg1])) {
840				w[0] = curstep->arg1;
841				wsize = 1;
842			} else {
843				dprintk(" bad take value %d\n", curstep->arg1);
844			}
845			break;
846
847		case CRUSH_RULE_SET_CHOOSE_TRIES:
848			if (curstep->arg1 > 0)
849				choose_tries = curstep->arg1;
850			break;
851
852		case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
853			if (curstep->arg1 > 0)
854				choose_leaf_tries = curstep->arg1;
855			break;
856
857		case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
858			if (curstep->arg1 >= 0)
859				choose_local_retries = curstep->arg1;
860			break;
861
862		case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
863			if (curstep->arg1 >= 0)
864				choose_local_fallback_retries = curstep->arg1;
865			break;
866
867		case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
868			if (curstep->arg1 >= 0)
869				vary_r = curstep->arg1;
870			break;
871
872		case CRUSH_RULE_CHOOSELEAF_FIRSTN:
873		case CRUSH_RULE_CHOOSE_FIRSTN:
874			firstn = 1;
875			/* fall through */
876		case CRUSH_RULE_CHOOSELEAF_INDEP:
877		case CRUSH_RULE_CHOOSE_INDEP:
878			if (wsize == 0)
879				break;
880
881			recurse_to_leaf =
882				curstep->op ==
883				 CRUSH_RULE_CHOOSELEAF_FIRSTN ||
884				curstep->op ==
885				CRUSH_RULE_CHOOSELEAF_INDEP;
886
887			/* reset output */
888			osize = 0;
889
890			for (i = 0; i < wsize; i++) {
891				/*
892				 * see CRUSH_N, CRUSH_N_MINUS macros.
893				 * basically, numrep <= 0 means relative to
894				 * the provided result_max
895				 */
896				numrep = curstep->arg1;
897				if (numrep <= 0) {
898					numrep += result_max;
899					if (numrep <= 0)
900						continue;
901				}
902				j = 0;
903				if (firstn) {
904					int recurse_tries;
905					if (choose_leaf_tries)
906						recurse_tries =
907							choose_leaf_tries;
908					else if (map->chooseleaf_descend_once)
909						recurse_tries = 1;
910					else
911						recurse_tries = choose_tries;
912					osize += crush_choose_firstn(
913						map,
914						map->buckets[-1-w[i]],
915						weight, weight_max,
916						x, numrep,
917						curstep->arg2,
918						o+osize, j,
919						result_max-osize,
920						choose_tries,
921						recurse_tries,
922						choose_local_retries,
923						choose_local_fallback_retries,
924						recurse_to_leaf,
925						vary_r,
926						c+osize,
927						0);
928				} else {
929					out_size = ((numrep < (result_max-osize)) ?
930						    numrep : (result_max-osize));
931					crush_choose_indep(
932						map,
933						map->buckets[-1-w[i]],
934						weight, weight_max,
935						x, out_size, numrep,
936						curstep->arg2,
937						o+osize, j,
938						choose_tries,
939						choose_leaf_tries ?
940						   choose_leaf_tries : 1,
941						recurse_to_leaf,
942						c+osize,
943						0);
944					osize += out_size;
945				}
946			}
947
948			if (recurse_to_leaf)
949				/* copy final _leaf_ values to output set */
950				memcpy(o, c, osize*sizeof(*o));
951
952			/* swap o and w arrays */
953			tmp = o;
954			o = w;
955			w = tmp;
956			wsize = osize;
957			break;
958
959
960		case CRUSH_RULE_EMIT:
961			for (i = 0; i < wsize && result_len < result_max; i++) {
962				result[result_len] = w[i];
963				result_len++;
964			}
965			wsize = 0;
966			break;
967
968		default:
969			dprintk(" unknown op %d at step %d\n",
970				curstep->op, step);
971			break;
972		}
973	}
974	return result_len;
975}
976