1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9 
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15 
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2011 Intel Corporation
24  *
25  * Copyright 2012 Xyratex Technology Limited
26  */
27 /*
28  * lustre/ptlrpc/nrs.c
29  *
30  * Network Request Scheduler (NRS)
31  *
32  * Allows to reorder the handling of RPCs at servers.
33  *
34  * Author: Liang Zhen <liang@whamcloud.com>
35  * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
36  */
37 /**
38  * \addtogoup nrs
39  * @{
40  */
41 
42 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "../include/lustre_net.h"
46 #include "../include/lprocfs_status.h"
47 #include "../../include/linux/libcfs/libcfs.h"
48 #include "ptlrpc_internal.h"
49 
50 /* XXX: This is just for liblustre. Remove the #if defined directive when the
51  * "cfs_" prefix is dropped from cfs_list_head. */
52 extern struct list_head ptlrpc_all_services;
53 
54 /**
55  * NRS core object.
56  */
57 struct nrs_core nrs_core;
58 
nrs_policy_init(struct ptlrpc_nrs_policy * policy)59 static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
60 {
61 	return policy->pol_desc->pd_ops->op_policy_init != NULL ?
62 	       policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
63 }
64 
nrs_policy_fini(struct ptlrpc_nrs_policy * policy)65 static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
66 {
67 	LASSERT(policy->pol_ref == 0);
68 	LASSERT(policy->pol_req_queued == 0);
69 
70 	if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
71 		policy->pol_desc->pd_ops->op_policy_fini(policy);
72 }
73 
nrs_policy_ctl_locked(struct ptlrpc_nrs_policy * policy,enum ptlrpc_nrs_ctl opc,void * arg)74 static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
75 				 enum ptlrpc_nrs_ctl opc, void *arg)
76 {
77 	/**
78 	 * The policy may be stopped, but the lprocfs files and
79 	 * ptlrpc_nrs_policy instances remain present until unregistration time.
80 	 * Do not perform the ctl operation if the policy is stopped, as
81 	 * policy->pol_private will be NULL in such a case.
82 	 */
83 	if (policy->pol_state == NRS_POL_STATE_STOPPED)
84 		return -ENODEV;
85 
86 	return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
87 	       policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
88 	       -ENOSYS;
89 }
90 
nrs_policy_stop0(struct ptlrpc_nrs_policy * policy)91 static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
92 {
93 	struct ptlrpc_nrs *nrs = policy->pol_nrs;
94 
95 	if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
96 		spin_unlock(&nrs->nrs_lock);
97 
98 		policy->pol_desc->pd_ops->op_policy_stop(policy);
99 
100 		spin_lock(&nrs->nrs_lock);
101 	}
102 
103 	LASSERT(list_empty(&policy->pol_list_queued));
104 	LASSERT(policy->pol_req_queued == 0 &&
105 		policy->pol_req_started == 0);
106 
107 	policy->pol_private = NULL;
108 
109 	policy->pol_state = NRS_POL_STATE_STOPPED;
110 
111 	if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
112 		module_put(policy->pol_desc->pd_owner);
113 }
114 
nrs_policy_stop_locked(struct ptlrpc_nrs_policy * policy)115 static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
116 {
117 	struct ptlrpc_nrs *nrs = policy->pol_nrs;
118 
119 	if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
120 		return -EPERM;
121 
122 	if (policy->pol_state == NRS_POL_STATE_STARTING)
123 		return -EAGAIN;
124 
125 	/* In progress or already stopped */
126 	if (policy->pol_state != NRS_POL_STATE_STARTED)
127 		return 0;
128 
129 	policy->pol_state = NRS_POL_STATE_STOPPING;
130 
131 	/* Immediately make it invisible */
132 	if (nrs->nrs_policy_primary == policy) {
133 		nrs->nrs_policy_primary = NULL;
134 
135 	} else {
136 		LASSERT(nrs->nrs_policy_fallback == policy);
137 		nrs->nrs_policy_fallback = NULL;
138 	}
139 
140 	/* I have the only refcount */
141 	if (policy->pol_ref == 1)
142 		nrs_policy_stop0(policy);
143 
144 	return 0;
145 }
146 
147 /**
148  * Transitions the \a nrs NRS head's primary policy to
149  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
150  * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
151  *
152  * \param[in] nrs the NRS head to carry out this operation on
153  */
nrs_policy_stop_primary(struct ptlrpc_nrs * nrs)154 static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
155 {
156 	struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
157 
158 	if (tmp == NULL)
159 		return;
160 
161 	nrs->nrs_policy_primary = NULL;
162 
163 	LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
164 	tmp->pol_state = NRS_POL_STATE_STOPPING;
165 
166 	if (tmp->pol_ref == 0)
167 		nrs_policy_stop0(tmp);
168 }
169 
170 /**
171  * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
172  * response to an lprocfs command to start a policy.
173  *
174  * If a primary policy different to the current one is specified, this function
175  * will transition the new policy to the
176  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
177  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
178  * the old primary policy (if there is one) to
179  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
180  * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
181  *
182  * If the fallback policy is specified, this is taken to indicate an instruction
183  * to stop the current primary policy, without substituting it with another
184  * primary policy, so the primary policy (if any) is transitioned to
185  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
186  * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
187  * this case, the fallback policy is only left active in the NRS head.
188  */
nrs_policy_start_locked(struct ptlrpc_nrs_policy * policy)189 static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
190 {
191 	struct ptlrpc_nrs      *nrs = policy->pol_nrs;
192 	int			rc = 0;
193 
194 	/**
195 	 * Don't allow multiple starting which is too complex, and has no real
196 	 * benefit.
197 	 */
198 	if (nrs->nrs_policy_starting)
199 		return -EAGAIN;
200 
201 	LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
202 
203 	if (policy->pol_state == NRS_POL_STATE_STOPPING)
204 		return -EAGAIN;
205 
206 	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
207 		/**
208 		 * This is for cases in which the user sets the policy to the
209 		 * fallback policy (currently fifo for all services); i.e. the
210 		 * user is resetting the policy to the default; so we stop the
211 		 * primary policy, if any.
212 		 */
213 		if (policy == nrs->nrs_policy_fallback) {
214 			nrs_policy_stop_primary(nrs);
215 			return 0;
216 		}
217 
218 		/**
219 		 * If we reach here, we must be setting up the fallback policy
220 		 * at service startup time, and only a single policy with the
221 		 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
222 		 * register with NRS core.
223 		 */
224 		LASSERT(nrs->nrs_policy_fallback == NULL);
225 	} else {
226 		/**
227 		 * Shouldn't start primary policy if w/o fallback policy.
228 		 */
229 		if (nrs->nrs_policy_fallback == NULL)
230 			return -EPERM;
231 
232 		if (policy->pol_state == NRS_POL_STATE_STARTED)
233 			return 0;
234 	}
235 
236 	/**
237 	 * Increase the module usage count for policies registering from other
238 	 * modules.
239 	 */
240 	if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
241 	    !try_module_get(policy->pol_desc->pd_owner)) {
242 		atomic_dec(&policy->pol_desc->pd_refs);
243 		CERROR("NRS: cannot get module for policy %s; is it alive?\n",
244 		       policy->pol_desc->pd_name);
245 		return -ENODEV;
246 	}
247 
248 	/**
249 	 * Serialize policy starting across the NRS head
250 	 */
251 	nrs->nrs_policy_starting = 1;
252 
253 	policy->pol_state = NRS_POL_STATE_STARTING;
254 
255 	if (policy->pol_desc->pd_ops->op_policy_start) {
256 		spin_unlock(&nrs->nrs_lock);
257 
258 		rc = policy->pol_desc->pd_ops->op_policy_start(policy);
259 
260 		spin_lock(&nrs->nrs_lock);
261 		if (rc != 0) {
262 			if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
263 				module_put(policy->pol_desc->pd_owner);
264 
265 			policy->pol_state = NRS_POL_STATE_STOPPED;
266 			goto out;
267 		}
268 	}
269 
270 	policy->pol_state = NRS_POL_STATE_STARTED;
271 
272 	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
273 		/**
274 		 * This path is only used at PTLRPC service setup time.
275 		 */
276 		nrs->nrs_policy_fallback = policy;
277 	} else {
278 		/*
279 		 * Try to stop the current primary policy if there is one.
280 		 */
281 		nrs_policy_stop_primary(nrs);
282 
283 		/**
284 		 * And set the newly-started policy as the primary one.
285 		 */
286 		nrs->nrs_policy_primary = policy;
287 	}
288 
289 out:
290 	nrs->nrs_policy_starting = 0;
291 
292 	return rc;
293 }
294 
295 /**
296  * Increases the policy's usage reference count.
297  */
nrs_policy_get_locked(struct ptlrpc_nrs_policy * policy)298 static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
299 {
300 	policy->pol_ref++;
301 }
302 
303 /**
304  * Decreases the policy's usage reference count, and stops the policy in case it
305  * was already stopping and have no more outstanding usage references (which
306  * indicates it has no more queued or started requests, and can be safely
307  * stopped).
308  */
nrs_policy_put_locked(struct ptlrpc_nrs_policy * policy)309 static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
310 {
311 	LASSERT(policy->pol_ref > 0);
312 
313 	policy->pol_ref--;
314 	if (unlikely(policy->pol_ref == 0 &&
315 	    policy->pol_state == NRS_POL_STATE_STOPPING))
316 		nrs_policy_stop0(policy);
317 }
318 
nrs_policy_put(struct ptlrpc_nrs_policy * policy)319 static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
320 {
321 	spin_lock(&policy->pol_nrs->nrs_lock);
322 	nrs_policy_put_locked(policy);
323 	spin_unlock(&policy->pol_nrs->nrs_lock);
324 }
325 
326 /**
327  * Find and return a policy by name.
328  */
nrs_policy_find_locked(struct ptlrpc_nrs * nrs,char * name)329 static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
330 							 char *name)
331 {
332 	struct ptlrpc_nrs_policy *tmp;
333 
334 	list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
335 		if (strncmp(tmp->pol_desc->pd_name, name,
336 			    NRS_POL_NAME_MAX) == 0) {
337 			nrs_policy_get_locked(tmp);
338 			return tmp;
339 		}
340 	}
341 	return NULL;
342 }
343 
344 /**
345  * Release references for the resource hierarchy moving upwards towards the
346  * policy instance resource.
347  */
nrs_resource_put(struct ptlrpc_nrs_resource * res)348 static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
349 {
350 	struct ptlrpc_nrs_policy *policy = res->res_policy;
351 
352 	if (policy->pol_desc->pd_ops->op_res_put != NULL) {
353 		struct ptlrpc_nrs_resource *parent;
354 
355 		for (; res != NULL; res = parent) {
356 			parent = res->res_parent;
357 			policy->pol_desc->pd_ops->op_res_put(policy, res);
358 		}
359 	}
360 }
361 
362 /**
363  * Obtains references for each resource in the resource hierarchy for request
364  * \a nrq if it is to be handled by \a policy.
365  *
366  * \param[in] policy	  the policy
367  * \param[in] nrq	  the request
368  * \param[in] moving_req  denotes whether this is a call to the function by
369  *			  ldlm_lock_reorder_req(), in order to move \a nrq to
370  *			  the high-priority NRS head; we should not sleep when
371  *			  set.
372  *
373  * \retval NULL		  resource hierarchy references not obtained
374  * \retval valid-pointer  the bottom level of the resource hierarchy
375  *
376  * \see ptlrpc_nrs_pol_ops::op_res_get()
377  */
378 static
nrs_resource_get(struct ptlrpc_nrs_policy * policy,struct ptlrpc_nrs_request * nrq,bool moving_req)379 struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
380 					      struct ptlrpc_nrs_request *nrq,
381 					      bool moving_req)
382 {
383 	/**
384 	 * Set to NULL to traverse the resource hierarchy from the top.
385 	 */
386 	struct ptlrpc_nrs_resource *res = NULL;
387 	struct ptlrpc_nrs_resource *tmp = NULL;
388 	int			    rc;
389 
390 	while (1) {
391 		rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
392 							  &tmp, moving_req);
393 		if (rc < 0) {
394 			if (res != NULL)
395 				nrs_resource_put(res);
396 			return NULL;
397 		}
398 
399 		LASSERT(tmp != NULL);
400 		tmp->res_parent = res;
401 		tmp->res_policy = policy;
402 		res = tmp;
403 		tmp = NULL;
404 		/**
405 		 * Return once we have obtained a reference to the bottom level
406 		 * of the resource hierarchy.
407 		 */
408 		if (rc > 0)
409 			return res;
410 	}
411 }
412 
413 /**
414  * Obtains resources for the resource hierarchies and policy references for
415  * the fallback and current primary policy (if any), that will later be used
416  * to handle request \a nrq.
417  *
418  * \param[in]  nrs  the NRS head instance that will be handling request \a nrq.
419  * \param[in]  nrq  the request that is being handled.
420  * \param[out] resp the array where references to the resource hierarchy are
421  *		    stored.
422  * \param[in]  moving_req  is set when obtaining resources while moving a
423  *			   request from a policy on the regular NRS head to a
424  *			   policy on the HP NRS head (via
425  *			   ldlm_lock_reorder_req()). It signifies that
426  *			   allocations to get resources should be atomic; for
427  *			   a full explanation, see comment in
428  *			   ptlrpc_nrs_pol_ops::op_res_get().
429  */
nrs_resource_get_safe(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_request * nrq,struct ptlrpc_nrs_resource ** resp,bool moving_req)430 static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
431 				  struct ptlrpc_nrs_request *nrq,
432 				  struct ptlrpc_nrs_resource **resp,
433 				  bool moving_req)
434 {
435 	struct ptlrpc_nrs_policy   *primary = NULL;
436 	struct ptlrpc_nrs_policy   *fallback = NULL;
437 
438 	memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
439 
440 	/**
441 	 * Obtain policy references.
442 	 */
443 	spin_lock(&nrs->nrs_lock);
444 
445 	fallback = nrs->nrs_policy_fallback;
446 	nrs_policy_get_locked(fallback);
447 
448 	primary = nrs->nrs_policy_primary;
449 	if (primary != NULL)
450 		nrs_policy_get_locked(primary);
451 
452 	spin_unlock(&nrs->nrs_lock);
453 
454 	/**
455 	 * Obtain resource hierarchy references.
456 	 */
457 	resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
458 	LASSERT(resp[NRS_RES_FALLBACK] != NULL);
459 
460 	if (primary != NULL) {
461 		resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
462 							 moving_req);
463 		/**
464 		 * A primary policy may exist which may not wish to serve a
465 		 * particular request for different reasons; release the
466 		 * reference on the policy as it will not be used for this
467 		 * request.
468 		 */
469 		if (resp[NRS_RES_PRIMARY] == NULL)
470 			nrs_policy_put(primary);
471 	}
472 }
473 
474 /**
475  * Releases references to resource hierarchies and policies, because they are no
476  * longer required; used when request handling has been completed, or the
477  * request is moving to the high priority NRS head.
478  *
479  * \param resp	the resource hierarchy that is being released
480  *
481  * \see ptlrpcnrs_req_hp_move()
482  * \see ptlrpc_nrs_req_finalize()
483  */
nrs_resource_put_safe(struct ptlrpc_nrs_resource ** resp)484 static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
485 {
486 	struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
487 	struct ptlrpc_nrs	 *nrs = NULL;
488 	int			  i;
489 
490 	for (i = 0; i < NRS_RES_MAX; i++) {
491 		if (resp[i] != NULL) {
492 			pols[i] = resp[i]->res_policy;
493 			nrs_resource_put(resp[i]);
494 			resp[i] = NULL;
495 		} else {
496 			pols[i] = NULL;
497 		}
498 	}
499 
500 	for (i = 0; i < NRS_RES_MAX; i++) {
501 		if (pols[i] == NULL)
502 			continue;
503 
504 		if (nrs == NULL) {
505 			nrs = pols[i]->pol_nrs;
506 			spin_lock(&nrs->nrs_lock);
507 		}
508 		nrs_policy_put_locked(pols[i]);
509 	}
510 
511 	if (nrs != NULL)
512 		spin_unlock(&nrs->nrs_lock);
513 }
514 
515 /**
516  * Obtains an NRS request from \a policy for handling or examination; the
517  * request should be removed in the 'handling' case.
518  *
519  * Calling into this function implies we already know the policy has a request
520  * waiting to be handled.
521  *
522  * \param[in] policy the policy from which a request
523  * \param[in] peek   when set, signifies that we just want to examine the
524  *		     request, and not handle it, so the request is not removed
525  *		     from the policy.
526  * \param[in] force  when set, it will force a policy to return a request if it
527  *		     has one pending
528  *
529  * \retval the NRS request to be handled
530  */
531 static inline
nrs_request_get(struct ptlrpc_nrs_policy * policy,bool peek,bool force)532 struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
533 					    bool peek, bool force)
534 {
535 	struct ptlrpc_nrs_request *nrq;
536 
537 	LASSERT(policy->pol_req_queued > 0);
538 
539 	nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
540 
541 	LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
542 
543 	return nrq;
544 }
545 
546 /**
547  * Enqueues request \a nrq for later handling, via one one the policies for
548  * which resources where earlier obtained via nrs_resource_get_safe(). The
549  * function attempts to enqueue the request first on the primary policy
550  * (if any), since this is the preferred choice.
551  *
552  * \param nrq the request being enqueued
553  *
554  * \see nrs_resource_get_safe()
555  */
nrs_request_enqueue(struct ptlrpc_nrs_request * nrq)556 static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
557 {
558 	struct ptlrpc_nrs_policy *policy;
559 	int			  rc;
560 	int			  i;
561 
562 	/**
563 	 * Try in descending order, because the primary policy (if any) is
564 	 * the preferred choice.
565 	 */
566 	for (i = NRS_RES_MAX - 1; i >= 0; i--) {
567 		if (nrq->nr_res_ptrs[i] == NULL)
568 			continue;
569 
570 		nrq->nr_res_idx = i;
571 		policy = nrq->nr_res_ptrs[i]->res_policy;
572 
573 		rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
574 		if (rc == 0) {
575 			policy->pol_nrs->nrs_req_queued++;
576 			policy->pol_req_queued++;
577 			return;
578 		}
579 	}
580 	/**
581 	 * Should never get here, as at least the primary policy's
582 	 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
583 	 * succeed.
584 	 */
585 	LBUG();
586 }
587 
588 /**
589  * Called when a request has been handled
590  *
591  * \param[in] nrs the request that has been handled; can be used for
592  *		  job/resource control.
593  *
594  * \see ptlrpc_nrs_req_stop_nolock()
595  */
nrs_request_stop(struct ptlrpc_nrs_request * nrq)596 static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
597 {
598 	struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
599 
600 	if (policy->pol_desc->pd_ops->op_req_stop)
601 		policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
602 
603 	LASSERT(policy->pol_nrs->nrs_req_started > 0);
604 	LASSERT(policy->pol_req_started > 0);
605 
606 	policy->pol_nrs->nrs_req_started--;
607 	policy->pol_req_started--;
608 }
609 
610 /**
611  * Handler for operations that can be carried out on policies.
612  *
613  * Handles opcodes that are common to all policy types within NRS core, and
614  * passes any unknown opcodes to the policy-specific control function.
615  *
616  * \param[in]	  nrs  the NRS head this policy belongs to.
617  * \param[in]	  name the human-readable policy name; should be the same as
618  *		       ptlrpc_nrs_pol_desc::pd_name.
619  * \param[in]	  opc  the opcode of the operation being carried out.
620  * \param[in,out] arg  can be used to pass information in and out between when
621  *		       carrying an operation; usually data that is private to
622  *		       the policy at some level, or generic policy status
623  *		       information.
624  *
625  * \retval -ve error condition
626  * \retval   0 operation was carried out successfully
627  */
nrs_policy_ctl(struct ptlrpc_nrs * nrs,char * name,enum ptlrpc_nrs_ctl opc,void * arg)628 static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
629 			  enum ptlrpc_nrs_ctl opc, void *arg)
630 {
631 	struct ptlrpc_nrs_policy       *policy;
632 	int				rc = 0;
633 
634 	spin_lock(&nrs->nrs_lock);
635 
636 	policy = nrs_policy_find_locked(nrs, name);
637 	if (policy == NULL) {
638 		rc = -ENOENT;
639 		goto out;
640 	}
641 
642 	switch (opc) {
643 		/**
644 		 * Unknown opcode, pass it down to the policy-specific control
645 		 * function for handling.
646 		 */
647 	default:
648 		rc = nrs_policy_ctl_locked(policy, opc, arg);
649 		break;
650 
651 		/**
652 		 * Start \e policy
653 		 */
654 	case PTLRPC_NRS_CTL_START:
655 		rc = nrs_policy_start_locked(policy);
656 		break;
657 	}
658 out:
659 	if (policy != NULL)
660 		nrs_policy_put_locked(policy);
661 
662 	spin_unlock(&nrs->nrs_lock);
663 
664 	return rc;
665 }
666 
667 /**
668  * Unregisters a policy by name.
669  *
670  * \param[in] nrs  the NRS head this policy belongs to.
671  * \param[in] name the human-readable policy name; should be the same as
672  *		   ptlrpc_nrs_pol_desc::pd_name
673  *
674  * \retval -ve error
675  * \retval   0 success
676  */
nrs_policy_unregister(struct ptlrpc_nrs * nrs,char * name)677 static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
678 {
679 	struct ptlrpc_nrs_policy *policy = NULL;
680 
681 	spin_lock(&nrs->nrs_lock);
682 
683 	policy = nrs_policy_find_locked(nrs, name);
684 	if (policy == NULL) {
685 		spin_unlock(&nrs->nrs_lock);
686 
687 		CERROR("Can't find NRS policy %s\n", name);
688 		return -ENOENT;
689 	}
690 
691 	if (policy->pol_ref > 1) {
692 		CERROR("Policy %s is busy with %d references\n", name,
693 		       (int)policy->pol_ref);
694 		nrs_policy_put_locked(policy);
695 
696 		spin_unlock(&nrs->nrs_lock);
697 		return -EBUSY;
698 	}
699 
700 	LASSERT(policy->pol_req_queued == 0);
701 	LASSERT(policy->pol_req_started == 0);
702 
703 	if (policy->pol_state != NRS_POL_STATE_STOPPED) {
704 		nrs_policy_stop_locked(policy);
705 		LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
706 	}
707 
708 	list_del(&policy->pol_list);
709 	nrs->nrs_num_pols--;
710 
711 	nrs_policy_put_locked(policy);
712 
713 	spin_unlock(&nrs->nrs_lock);
714 
715 	nrs_policy_fini(policy);
716 
717 	LASSERT(policy->pol_private == NULL);
718 	OBD_FREE_PTR(policy);
719 
720 	return 0;
721 }
722 
723 /**
724  * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
725  *
726  * \param[in] nrs   the NRS head on which the policy will be registered.
727  * \param[in] desc  the policy descriptor from which the information will be
728  *		    obtained to register the policy.
729  *
730  * \retval -ve error
731  * \retval   0 success
732  */
nrs_policy_register(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_pol_desc * desc)733 static int nrs_policy_register(struct ptlrpc_nrs *nrs,
734 			       struct ptlrpc_nrs_pol_desc *desc)
735 {
736 	struct ptlrpc_nrs_policy       *policy;
737 	struct ptlrpc_nrs_policy       *tmp;
738 	struct ptlrpc_service_part     *svcpt = nrs->nrs_svcpt;
739 	int				rc;
740 
741 	LASSERT(svcpt != NULL);
742 	LASSERT(desc->pd_ops != NULL);
743 	LASSERT(desc->pd_ops->op_res_get != NULL);
744 	LASSERT(desc->pd_ops->op_req_get != NULL);
745 	LASSERT(desc->pd_ops->op_req_enqueue != NULL);
746 	LASSERT(desc->pd_ops->op_req_dequeue != NULL);
747 	LASSERT(desc->pd_compat != NULL);
748 
749 	OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable,
750 			  svcpt->scp_cpt, sizeof(*policy), GFP_NOFS);
751 	if (policy == NULL)
752 		return -ENOMEM;
753 
754 	policy->pol_nrs     = nrs;
755 	policy->pol_desc    = desc;
756 	policy->pol_state   = NRS_POL_STATE_STOPPED;
757 	policy->pol_flags   = desc->pd_flags;
758 
759 	INIT_LIST_HEAD(&policy->pol_list);
760 	INIT_LIST_HEAD(&policy->pol_list_queued);
761 
762 	rc = nrs_policy_init(policy);
763 	if (rc != 0) {
764 		OBD_FREE_PTR(policy);
765 		return rc;
766 	}
767 
768 	spin_lock(&nrs->nrs_lock);
769 
770 	tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
771 	if (tmp != NULL) {
772 		CERROR("NRS policy %s has been registered, can't register it for %s\n",
773 		       policy->pol_desc->pd_name,
774 		       svcpt->scp_service->srv_name);
775 		nrs_policy_put_locked(tmp);
776 
777 		spin_unlock(&nrs->nrs_lock);
778 		nrs_policy_fini(policy);
779 		OBD_FREE_PTR(policy);
780 
781 		return -EEXIST;
782 	}
783 
784 	list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
785 	nrs->nrs_num_pols++;
786 
787 	if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
788 		rc = nrs_policy_start_locked(policy);
789 
790 	spin_unlock(&nrs->nrs_lock);
791 
792 	if (rc != 0)
793 		(void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
794 
795 	return rc;
796 }
797 
798 /**
799  * Enqueue request \a req using one of the policies its resources are referring
800  * to.
801  *
802  * \param[in] req the request to enqueue.
803  */
ptlrpc_nrs_req_add_nolock(struct ptlrpc_request * req)804 static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
805 {
806 	struct ptlrpc_nrs_policy       *policy;
807 
808 	LASSERT(req->rq_nrq.nr_initialized);
809 	LASSERT(!req->rq_nrq.nr_enqueued);
810 
811 	nrs_request_enqueue(&req->rq_nrq);
812 	req->rq_nrq.nr_enqueued = 1;
813 
814 	policy = nrs_request_policy(&req->rq_nrq);
815 	/**
816 	 * Add the policy to the NRS head's list of policies with enqueued
817 	 * requests, if it has not been added there.
818 	 */
819 	if (unlikely(list_empty(&policy->pol_list_queued)))
820 		list_add_tail(&policy->pol_list_queued,
821 				  &policy->pol_nrs->nrs_policy_queued);
822 }
823 
824 /**
825  * Enqueue a request on the high priority NRS head.
826  *
827  * \param req the request to enqueue.
828  */
ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request * req)829 static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
830 {
831 	int	opc = lustre_msg_get_opc(req->rq_reqmsg);
832 
833 	spin_lock(&req->rq_lock);
834 	req->rq_hp = 1;
835 	ptlrpc_nrs_req_add_nolock(req);
836 	if (opc != OBD_PING)
837 		DEBUG_REQ(D_NET, req, "high priority req");
838 	spin_unlock(&req->rq_lock);
839 }
840 
841 /**
842  * Returns a boolean predicate indicating whether the policy described by
843  * \a desc is adequate for use with service \a svc.
844  *
845  * \param[in] svc  the service
846  * \param[in] desc the policy descriptor
847  *
848  * \retval false the policy is not compatible with the service
849  * \retval true	 the policy is compatible with the service
850  */
nrs_policy_compatible(const struct ptlrpc_service * svc,const struct ptlrpc_nrs_pol_desc * desc)851 static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
852 					 const struct ptlrpc_nrs_pol_desc *desc)
853 {
854 	return desc->pd_compat(svc, desc);
855 }
856 
857 /**
858  * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
859  * \a nrs.
860  *
861  * \param[in] nrs the NRS head
862  *
863  * \retval -ve error
864  * \retval   0 success
865  *
866  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
867  *
868  * \see ptlrpc_service_nrs_setup()
869  */
nrs_register_policies_locked(struct ptlrpc_nrs * nrs)870 static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
871 {
872 	struct ptlrpc_nrs_pol_desc *desc;
873 	/* for convenience */
874 	struct ptlrpc_service_part	 *svcpt = nrs->nrs_svcpt;
875 	struct ptlrpc_service		 *svc = svcpt->scp_service;
876 	int				  rc = -EINVAL;
877 
878 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
879 
880 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
881 		if (nrs_policy_compatible(svc, desc)) {
882 			rc = nrs_policy_register(nrs, desc);
883 			if (rc != 0) {
884 				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
885 				       desc->pd_name, svcpt->scp_cpt,
886 				       svc->srv_name, rc);
887 				/**
888 				 * Fail registration if any of the policies'
889 				 * registration fails.
890 				 */
891 				break;
892 			}
893 		}
894 	}
895 
896 	return rc;
897 }
898 
899 /**
900  * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
901  * compatible policies in NRS core, with the NRS head.
902  *
903  * \param[in] nrs   the NRS head
904  * \param[in] svcpt the PTLRPC service partition to setup
905  *
906  * \retval -ve error
907  * \retval   0 success
908  *
909  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
910  */
nrs_svcpt_setup_locked0(struct ptlrpc_nrs * nrs,struct ptlrpc_service_part * svcpt)911 static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
912 				   struct ptlrpc_service_part *svcpt)
913 {
914 	enum ptlrpc_nrs_queue_type	queue;
915 
916 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
917 
918 	if (nrs == &svcpt->scp_nrs_reg)
919 		queue = PTLRPC_NRS_QUEUE_REG;
920 	else if (nrs == svcpt->scp_nrs_hp)
921 		queue = PTLRPC_NRS_QUEUE_HP;
922 	else
923 		LBUG();
924 
925 	nrs->nrs_svcpt = svcpt;
926 	nrs->nrs_queue_type = queue;
927 	spin_lock_init(&nrs->nrs_lock);
928 	INIT_LIST_HEAD(&nrs->nrs_policy_list);
929 	INIT_LIST_HEAD(&nrs->nrs_policy_queued);
930 
931 	return nrs_register_policies_locked(nrs);
932 }
933 
934 /**
935  * Allocates a regular and optionally a high-priority NRS head (if the service
936  * handles high-priority RPCs), and then registers all available compatible
937  * policies on those NRS heads.
938  *
939  * \param[in,out] svcpt the PTLRPC service partition to setup
940  *
941  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
942  */
nrs_svcpt_setup_locked(struct ptlrpc_service_part * svcpt)943 static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
944 {
945 	struct ptlrpc_nrs	       *nrs;
946 	int				rc;
947 
948 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
949 
950 	/**
951 	 * Initialize the regular NRS head.
952 	 */
953 	nrs = nrs_svcpt2nrs(svcpt, false);
954 	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
955 	if (rc < 0)
956 		goto out;
957 
958 	/**
959 	 * Optionally allocate a high-priority NRS head.
960 	 */
961 	if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
962 		goto out;
963 
964 	OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp,
965 			  svcpt->scp_service->srv_cptable,
966 			  svcpt->scp_cpt);
967 	if (svcpt->scp_nrs_hp == NULL) {
968 		rc = -ENOMEM;
969 		goto out;
970 	}
971 
972 	nrs = nrs_svcpt2nrs(svcpt, true);
973 	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
974 
975 out:
976 	return rc;
977 }
978 
979 /**
980  * Unregisters all policies on all available NRS heads in a service partition;
981  * called at PTLRPC service unregistration time.
982  *
983  * \param[in] svcpt the PTLRPC service partition
984  *
985  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
986  */
nrs_svcpt_cleanup_locked(struct ptlrpc_service_part * svcpt)987 static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
988 {
989 	struct ptlrpc_nrs	       *nrs;
990 	struct ptlrpc_nrs_policy       *policy;
991 	struct ptlrpc_nrs_policy       *tmp;
992 	int				rc;
993 	bool				hp = false;
994 
995 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
996 
997 again:
998 	nrs = nrs_svcpt2nrs(svcpt, hp);
999 	nrs->nrs_stopping = 1;
1000 
1001 	list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
1002 				     pol_list) {
1003 		rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
1004 		LASSERT(rc == 0);
1005 	}
1006 
1007 	/**
1008 	 * If the service partition has an HP NRS head, clean that up as well.
1009 	 */
1010 	if (!hp && nrs_svcpt_has_hp(svcpt)) {
1011 		hp = true;
1012 		goto again;
1013 	}
1014 
1015 	if (hp)
1016 		OBD_FREE_PTR(nrs);
1017 }
1018 
1019 /**
1020  * Returns the descriptor for a policy as identified by by \a name.
1021  *
1022  * \param[in] name the policy name
1023  *
1024  * \retval the policy descriptor
1025  * \retval NULL
1026  */
nrs_policy_find_desc_locked(const char * name)1027 static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1028 {
1029 	struct ptlrpc_nrs_pol_desc     *tmp;
1030 
1031 	list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1032 		if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1033 			return tmp;
1034 	}
1035 	return NULL;
1036 }
1037 
1038 /**
1039  * Removes the policy from all supported NRS heads of all partitions of all
1040  * PTLRPC services.
1041  *
1042  * \param[in] desc the policy descriptor to unregister
1043  *
1044  * \retval -ve error
1045  * \retval  0  successfully unregistered policy on all supported NRS heads
1046  *
1047  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1048  * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1049  */
nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc * desc)1050 static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1051 {
1052 	struct ptlrpc_nrs	       *nrs;
1053 	struct ptlrpc_service	       *svc;
1054 	struct ptlrpc_service_part     *svcpt;
1055 	int				i;
1056 	int				rc = 0;
1057 
1058 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1059 	LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1060 
1061 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1062 
1063 		if (!nrs_policy_compatible(svc, desc) ||
1064 		    unlikely(svc->srv_is_stopping))
1065 			continue;
1066 
1067 		ptlrpc_service_for_each_part(svcpt, i, svc) {
1068 			bool hp = false;
1069 
1070 again:
1071 			nrs = nrs_svcpt2nrs(svcpt, hp);
1072 			rc = nrs_policy_unregister(nrs, desc->pd_name);
1073 			/**
1074 			 * Ignore -ENOENT as the policy may not have registered
1075 			 * successfully on all service partitions.
1076 			 */
1077 			if (rc == -ENOENT) {
1078 				rc = 0;
1079 			} else if (rc != 0) {
1080 				CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1081 				       desc->pd_name, svcpt->scp_cpt,
1082 				       svcpt->scp_service->srv_name, rc);
1083 				return rc;
1084 			}
1085 
1086 			if (!hp && nrs_svc_has_hp(svc)) {
1087 				hp = true;
1088 				goto again;
1089 			}
1090 		}
1091 
1092 		if (desc->pd_ops->op_lprocfs_fini != NULL)
1093 			desc->pd_ops->op_lprocfs_fini(svc);
1094 	}
1095 
1096 	return rc;
1097 }
1098 
1099 /**
1100  * Registers a new policy with NRS core.
1101  *
1102  * The function will only succeed if policy registration with all compatible
1103  * service partitions (if any) is successful.
1104  *
1105  * N.B. This function should be called either at ptlrpc module initialization
1106  *	time when registering a policy that ships with NRS core, or in a
1107  *	module's init() function for policies registering from other modules.
1108  *
1109  * \param[in] conf configuration information for the new policy to register
1110  *
1111  * \retval -ve error
1112  * \retval   0 success
1113  */
ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf * conf)1114 int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1115 {
1116 	struct ptlrpc_service	       *svc;
1117 	struct ptlrpc_nrs_pol_desc     *desc;
1118 	int				rc = 0;
1119 
1120 	LASSERT(conf != NULL);
1121 	LASSERT(conf->nc_ops != NULL);
1122 	LASSERT(conf->nc_compat != NULL);
1123 	LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1124 		conf->nc_compat_svc_name != NULL));
1125 	LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1126 		     conf->nc_owner != NULL));
1127 
1128 	conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1129 
1130 	/**
1131 	 * External policies are not allowed to start immediately upon
1132 	 * registration, as there is a relatively higher chance that their
1133 	 * registration might fail. In such a case, some policy instances may
1134 	 * already have requests queued wen unregistration needs to happen as
1135 	 * part o cleanup; since there is currently no way to drain requests
1136 	 * from a policy unless the service is unregistering, we just disallow
1137 	 * this.
1138 	 */
1139 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1140 	    (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1141 			       PTLRPC_NRS_FL_REG_START))) {
1142 		CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1143 		       conf->nc_name);
1144 		return -EINVAL;
1145 	}
1146 
1147 	mutex_lock(&nrs_core.nrs_mutex);
1148 
1149 	if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
1150 		CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1151 		       conf->nc_name);
1152 		rc = -EEXIST;
1153 		goto fail;
1154 	}
1155 
1156 	OBD_ALLOC_PTR(desc);
1157 	if (desc == NULL) {
1158 		rc = -ENOMEM;
1159 		goto fail;
1160 	}
1161 
1162 	strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
1163 	desc->pd_ops		 = conf->nc_ops;
1164 	desc->pd_compat		 = conf->nc_compat;
1165 	desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1166 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1167 		desc->pd_owner	 = conf->nc_owner;
1168 	desc->pd_flags		 = conf->nc_flags;
1169 	atomic_set(&desc->pd_refs, 0);
1170 
1171 	/**
1172 	 * For policies that are held in the same module as NRS (currently
1173 	 * ptlrpc), do not register the policy with all compatible services,
1174 	 * as the services will not have started at this point, since we are
1175 	 * calling from ptlrpc module initialization code. In such cases each
1176 	 * service will register all compatible policies later, via
1177 	 * ptlrpc_service_nrs_setup().
1178 	 */
1179 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1180 		goto internal;
1181 
1182 	/**
1183 	 * Register the new policy on all compatible services
1184 	 */
1185 	mutex_lock(&ptlrpc_all_services_mutex);
1186 
1187 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1188 		struct ptlrpc_service_part     *svcpt;
1189 		int				i;
1190 		int				rc2;
1191 
1192 		if (!nrs_policy_compatible(svc, desc) ||
1193 		    unlikely(svc->srv_is_stopping))
1194 			continue;
1195 
1196 		ptlrpc_service_for_each_part(svcpt, i, svc) {
1197 			struct ptlrpc_nrs      *nrs;
1198 			bool			hp = false;
1199 again:
1200 			nrs = nrs_svcpt2nrs(svcpt, hp);
1201 			rc = nrs_policy_register(nrs, desc);
1202 			if (rc != 0) {
1203 				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1204 				       desc->pd_name, svcpt->scp_cpt,
1205 				       svcpt->scp_service->srv_name, rc);
1206 
1207 				rc2 = nrs_policy_unregister_locked(desc);
1208 				/**
1209 				 * Should not fail at this point
1210 				 */
1211 				LASSERT(rc2 == 0);
1212 				mutex_unlock(&ptlrpc_all_services_mutex);
1213 				OBD_FREE_PTR(desc);
1214 				goto fail;
1215 			}
1216 
1217 			if (!hp && nrs_svc_has_hp(svc)) {
1218 				hp = true;
1219 				goto again;
1220 			}
1221 		}
1222 
1223 		/**
1224 		 * No need to take a reference to other modules here, as we
1225 		 * will be calling from the module's init() function.
1226 		 */
1227 		if (desc->pd_ops->op_lprocfs_init != NULL) {
1228 			rc = desc->pd_ops->op_lprocfs_init(svc);
1229 			if (rc != 0) {
1230 				rc2 = nrs_policy_unregister_locked(desc);
1231 				/**
1232 				 * Should not fail at this point
1233 				 */
1234 				LASSERT(rc2 == 0);
1235 				mutex_unlock(&ptlrpc_all_services_mutex);
1236 				OBD_FREE_PTR(desc);
1237 				goto fail;
1238 			}
1239 		}
1240 	}
1241 
1242 	mutex_unlock(&ptlrpc_all_services_mutex);
1243 internal:
1244 	list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1245 fail:
1246 	mutex_unlock(&nrs_core.nrs_mutex);
1247 
1248 	return rc;
1249 }
1250 EXPORT_SYMBOL(ptlrpc_nrs_policy_register);
1251 
1252 /**
1253  * Unregisters a previously registered policy with NRS core. All instances of
1254  * the policy on all NRS heads of all supported services are removed.
1255  *
1256  * N.B. This function should only be called from a module's exit() function.
1257  *	Although it can be used for policies that ship alongside NRS core, the
1258  *	function is primarily intended for policies that register externally,
1259  *	from other modules.
1260  *
1261  * \param[in] conf configuration information for the policy to unregister
1262  *
1263  * \retval -ve error
1264  * \retval   0 success
1265  */
ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf * conf)1266 int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf)
1267 {
1268 	struct ptlrpc_nrs_pol_desc	*desc;
1269 	int				 rc;
1270 
1271 	LASSERT(conf != NULL);
1272 
1273 	if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) {
1274 		CERROR("Unable to unregister a fallback policy, unless the PTLRPC service is stopping.\n");
1275 		return -EPERM;
1276 	}
1277 
1278 	conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1279 
1280 	mutex_lock(&nrs_core.nrs_mutex);
1281 
1282 	desc = nrs_policy_find_desc_locked(conf->nc_name);
1283 	if (desc == NULL) {
1284 		CERROR("Failing to unregister NRS policy %s which has not been registered with NRS core!\n",
1285 		       conf->nc_name);
1286 		rc = -ENOENT;
1287 		goto not_exist;
1288 	}
1289 
1290 	mutex_lock(&ptlrpc_all_services_mutex);
1291 
1292 	rc = nrs_policy_unregister_locked(desc);
1293 	if (rc < 0) {
1294 		if (rc == -EBUSY)
1295 			CERROR("Please first stop policy %s on all service partitions and then retry to unregister the policy.\n",
1296 			       conf->nc_name);
1297 		goto fail;
1298 	}
1299 
1300 	CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n",
1301 	       conf->nc_name);
1302 
1303 	list_del(&desc->pd_list);
1304 	OBD_FREE_PTR(desc);
1305 
1306 fail:
1307 	mutex_unlock(&ptlrpc_all_services_mutex);
1308 
1309 not_exist:
1310 	mutex_unlock(&nrs_core.nrs_mutex);
1311 
1312 	return rc;
1313 }
1314 EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister);
1315 
1316 /**
1317  * Setup NRS heads on all service partitions of service \a svc, and register
1318  * all compatible policies on those NRS heads.
1319  *
1320  * To be called from within ptl
1321  * \param[in] svc the service to setup
1322  *
1323  * \retval -ve error, the calling logic should eventually call
1324  *		      ptlrpc_service_nrs_cleanup() to undo any work performed
1325  *		      by this function.
1326  *
1327  * \see ptlrpc_register_service()
1328  * \see ptlrpc_service_nrs_cleanup()
1329  */
ptlrpc_service_nrs_setup(struct ptlrpc_service * svc)1330 int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1331 {
1332 	struct ptlrpc_service_part	       *svcpt;
1333 	const struct ptlrpc_nrs_pol_desc       *desc;
1334 	int					i;
1335 	int					rc = 0;
1336 
1337 	mutex_lock(&nrs_core.nrs_mutex);
1338 
1339 	/**
1340 	 * Initialize NRS heads on all service CPTs.
1341 	 */
1342 	ptlrpc_service_for_each_part(svcpt, i, svc) {
1343 		rc = nrs_svcpt_setup_locked(svcpt);
1344 		if (rc != 0)
1345 			goto failed;
1346 	}
1347 
1348 	/**
1349 	 * Set up lprocfs interfaces for all supported policies for the
1350 	 * service.
1351 	 */
1352 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1353 		if (!nrs_policy_compatible(svc, desc))
1354 			continue;
1355 
1356 		if (desc->pd_ops->op_lprocfs_init != NULL) {
1357 			rc = desc->pd_ops->op_lprocfs_init(svc);
1358 			if (rc != 0)
1359 				goto failed;
1360 		}
1361 	}
1362 
1363 failed:
1364 
1365 	mutex_unlock(&nrs_core.nrs_mutex);
1366 
1367 	return rc;
1368 }
1369 
1370 /**
1371  * Unregisters all policies on all service partitions of service \a svc.
1372  *
1373  * \param[in] svc the PTLRPC service to unregister
1374  */
ptlrpc_service_nrs_cleanup(struct ptlrpc_service * svc)1375 void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1376 {
1377 	struct ptlrpc_service_part	     *svcpt;
1378 	const struct ptlrpc_nrs_pol_desc     *desc;
1379 	int				      i;
1380 
1381 	mutex_lock(&nrs_core.nrs_mutex);
1382 
1383 	/**
1384 	 * Clean up NRS heads on all service partitions
1385 	 */
1386 	ptlrpc_service_for_each_part(svcpt, i, svc)
1387 		nrs_svcpt_cleanup_locked(svcpt);
1388 
1389 	/**
1390 	 * Clean up lprocfs interfaces for all supported policies for the
1391 	 * service.
1392 	 */
1393 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1394 		if (!nrs_policy_compatible(svc, desc))
1395 			continue;
1396 
1397 		if (desc->pd_ops->op_lprocfs_fini != NULL)
1398 			desc->pd_ops->op_lprocfs_fini(svc);
1399 	}
1400 
1401 	mutex_unlock(&nrs_core.nrs_mutex);
1402 }
1403 
1404 /**
1405  * Obtains NRS head resources for request \a req.
1406  *
1407  * These could be either on the regular or HP NRS head of \a svcpt; resources
1408  * taken on the regular head can later be swapped for HP head resources by
1409  * ldlm_lock_reorder_req().
1410  *
1411  * \param[in] svcpt the service partition
1412  * \param[in] req   the request
1413  * \param[in] hp    which NRS head of \a svcpt to use
1414  */
ptlrpc_nrs_req_initialize(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1415 void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1416 			       struct ptlrpc_request *req, bool hp)
1417 {
1418 	struct ptlrpc_nrs	*nrs = nrs_svcpt2nrs(svcpt, hp);
1419 
1420 	memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1421 	nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1422 			      false);
1423 
1424 	/**
1425 	 * It is fine to access \e nr_initialized without locking as there is
1426 	 * no contention at this early stage.
1427 	 */
1428 	req->rq_nrq.nr_initialized = 1;
1429 }
1430 
1431 /**
1432  * Releases resources for a request; is called after the request has been
1433  * handled.
1434  *
1435  * \param[in] req the request
1436  *
1437  * \see ptlrpc_server_finish_request()
1438  */
ptlrpc_nrs_req_finalize(struct ptlrpc_request * req)1439 void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1440 {
1441 	if (req->rq_nrq.nr_initialized) {
1442 		nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1443 		/* no protection on bit nr_initialized because no
1444 		 * contention at this late stage */
1445 		req->rq_nrq.nr_finalized = 1;
1446 	}
1447 }
1448 
ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request * req)1449 void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1450 {
1451 	if (req->rq_nrq.nr_started)
1452 		nrs_request_stop(&req->rq_nrq);
1453 }
1454 
1455 /**
1456  * Enqueues request \a req on either the regular or high-priority NRS head
1457  * of service partition \a svcpt.
1458  *
1459  * \param[in] svcpt the service partition
1460  * \param[in] req   the request to be enqueued
1461  * \param[in] hp    whether to enqueue the request on the regular or
1462  *		    high-priority NRS head.
1463  */
ptlrpc_nrs_req_add(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1464 void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1465 			struct ptlrpc_request *req, bool hp)
1466 {
1467 	spin_lock(&svcpt->scp_req_lock);
1468 
1469 	if (hp)
1470 		ptlrpc_nrs_hpreq_add_nolock(req);
1471 	else
1472 		ptlrpc_nrs_req_add_nolock(req);
1473 
1474 	spin_unlock(&svcpt->scp_req_lock);
1475 }
1476 
nrs_request_removed(struct ptlrpc_nrs_policy * policy)1477 static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1478 {
1479 	LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1480 	LASSERT(policy->pol_req_queued > 0);
1481 
1482 	policy->pol_nrs->nrs_req_queued--;
1483 	policy->pol_req_queued--;
1484 
1485 	/**
1486 	 * If the policy has no more requests queued, remove it from
1487 	 * ptlrpc_nrs::nrs_policy_queued.
1488 	 */
1489 	if (unlikely(policy->pol_req_queued == 0)) {
1490 		list_del_init(&policy->pol_list_queued);
1491 
1492 		/**
1493 		 * If there are other policies with queued requests, move the
1494 		 * current policy to the end so that we can round robin over
1495 		 * all policies and drain the requests.
1496 		 */
1497 	} else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1498 		LASSERT(policy->pol_req_queued <
1499 			policy->pol_nrs->nrs_req_queued);
1500 
1501 		list_move_tail(&policy->pol_list_queued,
1502 				   &policy->pol_nrs->nrs_policy_queued);
1503 	}
1504 }
1505 
1506 /**
1507  * Obtains a request for handling from an NRS head of service partition
1508  * \a svcpt.
1509  *
1510  * \param[in] svcpt the service partition
1511  * \param[in] hp    whether to obtain a request from the regular or
1512  *		    high-priority NRS head.
1513  * \param[in] peek  when set, signifies that we just want to examine the
1514  *		    request, and not handle it, so the request is not removed
1515  *		    from the policy.
1516  * \param[in] force when set, it will force a policy to return a request if it
1517  *		    has one pending
1518  *
1519  * \retval the	request to be handled
1520  * \retval NULL the head has no requests to serve
1521  */
1522 struct ptlrpc_request *
ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part * svcpt,bool hp,bool peek,bool force)1523 ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1524 			   bool peek, bool force)
1525 {
1526 	struct ptlrpc_nrs	  *nrs = nrs_svcpt2nrs(svcpt, hp);
1527 	struct ptlrpc_nrs_policy  *policy;
1528 	struct ptlrpc_nrs_request *nrq;
1529 
1530 	/**
1531 	 * Always try to drain requests from all NRS polices even if they are
1532 	 * inactive, because the user can change policy status at runtime.
1533 	 */
1534 	list_for_each_entry(policy, &nrs->nrs_policy_queued,
1535 				pol_list_queued) {
1536 		nrq = nrs_request_get(policy, peek, force);
1537 		if (nrq != NULL) {
1538 			if (likely(!peek)) {
1539 				nrq->nr_started = 1;
1540 
1541 				policy->pol_req_started++;
1542 				policy->pol_nrs->nrs_req_started++;
1543 
1544 				nrs_request_removed(policy);
1545 			}
1546 
1547 			return container_of(nrq, struct ptlrpc_request, rq_nrq);
1548 		}
1549 	}
1550 
1551 	return NULL;
1552 }
1553 
1554 /**
1555  * Dequeues request \a req from the policy it has been enqueued on.
1556  *
1557  * \param[in] req the request
1558  */
ptlrpc_nrs_req_del_nolock(struct ptlrpc_request * req)1559 void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req)
1560 {
1561 	struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq);
1562 
1563 	policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq);
1564 
1565 	req->rq_nrq.nr_enqueued = 0;
1566 
1567 	nrs_request_removed(policy);
1568 }
1569 
1570 /**
1571  * Returns whether there are any requests currently enqueued on any of the
1572  * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1573  * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1574  * result.
1575  *
1576  * \param[in] svcpt the service partition to enquire.
1577  * \param[in] hp    whether the regular or high-priority NRS head is to be
1578  *		    enquired.
1579  *
1580  * \retval false the indicated NRS head has no enqueued requests.
1581  * \retval true	 the indicated NRS head has some enqueued requests.
1582  */
ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part * svcpt,bool hp)1583 bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1584 {
1585 	struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1586 
1587 	return nrs->nrs_req_queued > 0;
1588 };
1589 
1590 /**
1591  * Moves request \a req from the regular to the high-priority NRS head.
1592  *
1593  * \param[in] req the request to move
1594  */
ptlrpc_nrs_req_hp_move(struct ptlrpc_request * req)1595 void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req)
1596 {
1597 	struct ptlrpc_service_part	*svcpt = req->rq_rqbd->rqbd_svcpt;
1598 	struct ptlrpc_nrs_request	*nrq = &req->rq_nrq;
1599 	struct ptlrpc_nrs_resource	*res1[NRS_RES_MAX];
1600 	struct ptlrpc_nrs_resource	*res2[NRS_RES_MAX];
1601 
1602 	/**
1603 	 * Obtain the high-priority NRS head resources.
1604 	 */
1605 	nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true);
1606 
1607 	spin_lock(&svcpt->scp_req_lock);
1608 
1609 	if (!ptlrpc_nrs_req_can_move(req))
1610 		goto out;
1611 
1612 	ptlrpc_nrs_req_del_nolock(req);
1613 
1614 	memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0]));
1615 	memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0]));
1616 
1617 	ptlrpc_nrs_hpreq_add_nolock(req);
1618 
1619 	memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0]));
1620 out:
1621 	spin_unlock(&svcpt->scp_req_lock);
1622 
1623 	/**
1624 	 * Release either the regular NRS head resources if we moved the
1625 	 * request, or the high-priority NRS head resources if we took a
1626 	 * reference earlier in this function and ptlrpc_nrs_req_can_move()
1627 	 * returned false.
1628 	 */
1629 	nrs_resource_put_safe(res1);
1630 }
1631 
1632 /**
1633  * Carries out a control operation \a opc on the policy identified by the
1634  * human-readable \a name, on either all partitions, or only on the first
1635  * partition of service \a svc.
1636  *
1637  * \param[in]	  svc	 the service the policy belongs to.
1638  * \param[in]	  queue  whether to carry out the command on the policy which
1639  *			 belongs to the regular, high-priority, or both NRS
1640  *			 heads of service partitions of \a svc.
1641  * \param[in]	  name   the policy to act upon, by human-readable name
1642  * \param[in]	  opc	 the opcode of the operation to carry out
1643  * \param[in]	  single when set, the operation will only be carried out on the
1644  *			 NRS heads of the first service partition of \a svc.
1645  *			 This is useful for some policies which e.g. share
1646  *			 identical values on the same parameters of different
1647  *			 service partitions; when reading these parameters via
1648  *			 lprocfs, these policies may just want to obtain and
1649  *			 print out the values from the first service partition.
1650  *			 Storing these values centrally elsewhere then could be
1651  *			 another solution for this.
1652  * \param[in,out] arg	 can be used as a generic in/out buffer between control
1653  *			 operations and the user environment.
1654  *
1655  *\retval -ve error condition
1656  *\retval   0 operation was carried out successfully
1657  */
ptlrpc_nrs_policy_control(const struct ptlrpc_service * svc,enum ptlrpc_nrs_queue_type queue,char * name,enum ptlrpc_nrs_ctl opc,bool single,void * arg)1658 int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1659 			      enum ptlrpc_nrs_queue_type queue, char *name,
1660 			      enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1661 {
1662 	struct ptlrpc_service_part     *svcpt;
1663 	int				i;
1664 	int				rc = 0;
1665 
1666 	LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1667 
1668 	if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1669 		return -EINVAL;
1670 
1671 	ptlrpc_service_for_each_part(svcpt, i, svc) {
1672 		if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1673 			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1674 					    opc, arg);
1675 			if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1676 					single))
1677 				goto out;
1678 		}
1679 
1680 		if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1681 			/**
1682 			 * XXX: We could optionally check for
1683 			 * nrs_svc_has_hp(svc) here, and return an error if it
1684 			 * is false. Right now we rely on the policies' lprocfs
1685 			 * handlers that call the present function to make this
1686 			 * check; if they fail to do so, they might hit the
1687 			 * assertion inside nrs_svcpt2nrs() below.
1688 			 */
1689 			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1690 					    opc, arg);
1691 			if (rc != 0 || single)
1692 				goto out;
1693 		}
1694 	}
1695 out:
1696 	return rc;
1697 }
1698 
1699 
1700 /* ptlrpc/nrs_fifo.c */
1701 extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
1702 
1703 /**
1704  * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1705  * policies \e nrs_core.nrs_policies.
1706  *
1707  * \retval 0 all policies have been registered successfully
1708  * \retval -ve error
1709  */
ptlrpc_nrs_init(void)1710 int ptlrpc_nrs_init(void)
1711 {
1712 	int	rc;
1713 
1714 	mutex_init(&nrs_core.nrs_mutex);
1715 	INIT_LIST_HEAD(&nrs_core.nrs_policies);
1716 
1717 	rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1718 	if (rc != 0)
1719 		goto fail;
1720 
1721 
1722 	return rc;
1723 fail:
1724 	/**
1725 	 * Since no PTLRPC services have been started at this point, all we need
1726 	 * to do for cleanup is to free the descriptors.
1727 	 */
1728 	ptlrpc_nrs_fini();
1729 
1730 	return rc;
1731 }
1732 
1733 /**
1734  * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1735  * policy descriptors.
1736  *
1737  * Since all PTLRPC services are stopped at this point, there are no more
1738  * instances of any policies, because each service will have stopped its policy
1739  * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1740  * descriptors here.
1741  */
ptlrpc_nrs_fini(void)1742 void ptlrpc_nrs_fini(void)
1743 {
1744 	struct ptlrpc_nrs_pol_desc *desc;
1745 	struct ptlrpc_nrs_pol_desc *tmp;
1746 
1747 	list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
1748 				     pd_list) {
1749 		list_del_init(&desc->pd_list);
1750 		OBD_FREE_PTR(desc);
1751 	}
1752 }
1753 
1754 /** @} nrs */
1755