1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9 
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15 
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2011 Intel Corporation
24  *
25  * Copyright 2012 Xyratex Technology Limited
26  */
27 /*
28  * lustre/ptlrpc/nrs.c
29  *
30  * Network Request Scheduler (NRS)
31  *
32  * Allows to reorder the handling of RPCs at servers.
33  *
34  * Author: Liang Zhen <liang@whamcloud.com>
35  * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
36  */
37 /**
38  * \addtogoup nrs
39  * @{
40  */
41 
42 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "../include/lustre_net.h"
46 #include "../include/lprocfs_status.h"
47 #include "../../include/linux/libcfs/libcfs.h"
48 #include "ptlrpc_internal.h"
49 
50 /* XXX: This is just for liblustre. Remove the #if defined directive when the
51  * "cfs_" prefix is dropped from cfs_list_head. */
52 
53 /**
54  * NRS core object.
55  */
56 struct nrs_core nrs_core;
57 
nrs_policy_init(struct ptlrpc_nrs_policy * policy)58 static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
59 {
60 	return policy->pol_desc->pd_ops->op_policy_init != NULL ?
61 	       policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
62 }
63 
nrs_policy_fini(struct ptlrpc_nrs_policy * policy)64 static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
65 {
66 	LASSERT(policy->pol_ref == 0);
67 	LASSERT(policy->pol_req_queued == 0);
68 
69 	if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
70 		policy->pol_desc->pd_ops->op_policy_fini(policy);
71 }
72 
nrs_policy_ctl_locked(struct ptlrpc_nrs_policy * policy,enum ptlrpc_nrs_ctl opc,void * arg)73 static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
74 				 enum ptlrpc_nrs_ctl opc, void *arg)
75 {
76 	/**
77 	 * The policy may be stopped, but the lprocfs files and
78 	 * ptlrpc_nrs_policy instances remain present until unregistration time.
79 	 * Do not perform the ctl operation if the policy is stopped, as
80 	 * policy->pol_private will be NULL in such a case.
81 	 */
82 	if (policy->pol_state == NRS_POL_STATE_STOPPED)
83 		return -ENODEV;
84 
85 	return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
86 	       policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
87 	       -ENOSYS;
88 }
89 
nrs_policy_stop0(struct ptlrpc_nrs_policy * policy)90 static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
91 {
92 	struct ptlrpc_nrs *nrs = policy->pol_nrs;
93 
94 	if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
95 		spin_unlock(&nrs->nrs_lock);
96 
97 		policy->pol_desc->pd_ops->op_policy_stop(policy);
98 
99 		spin_lock(&nrs->nrs_lock);
100 	}
101 
102 	LASSERT(list_empty(&policy->pol_list_queued));
103 	LASSERT(policy->pol_req_queued == 0 &&
104 		policy->pol_req_started == 0);
105 
106 	policy->pol_private = NULL;
107 
108 	policy->pol_state = NRS_POL_STATE_STOPPED;
109 
110 	if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
111 		module_put(policy->pol_desc->pd_owner);
112 }
113 
nrs_policy_stop_locked(struct ptlrpc_nrs_policy * policy)114 static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
115 {
116 	struct ptlrpc_nrs *nrs = policy->pol_nrs;
117 
118 	if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
119 		return -EPERM;
120 
121 	if (policy->pol_state == NRS_POL_STATE_STARTING)
122 		return -EAGAIN;
123 
124 	/* In progress or already stopped */
125 	if (policy->pol_state != NRS_POL_STATE_STARTED)
126 		return 0;
127 
128 	policy->pol_state = NRS_POL_STATE_STOPPING;
129 
130 	/* Immediately make it invisible */
131 	if (nrs->nrs_policy_primary == policy) {
132 		nrs->nrs_policy_primary = NULL;
133 
134 	} else {
135 		LASSERT(nrs->nrs_policy_fallback == policy);
136 		nrs->nrs_policy_fallback = NULL;
137 	}
138 
139 	/* I have the only refcount */
140 	if (policy->pol_ref == 1)
141 		nrs_policy_stop0(policy);
142 
143 	return 0;
144 }
145 
146 /**
147  * Transitions the \a nrs NRS head's primary policy to
148  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
149  * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
150  *
151  * \param[in] nrs the NRS head to carry out this operation on
152  */
nrs_policy_stop_primary(struct ptlrpc_nrs * nrs)153 static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
154 {
155 	struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
156 
157 	if (tmp == NULL)
158 		return;
159 
160 	nrs->nrs_policy_primary = NULL;
161 
162 	LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
163 	tmp->pol_state = NRS_POL_STATE_STOPPING;
164 
165 	if (tmp->pol_ref == 0)
166 		nrs_policy_stop0(tmp);
167 }
168 
169 /**
170  * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
171  * response to an lprocfs command to start a policy.
172  *
173  * If a primary policy different to the current one is specified, this function
174  * will transition the new policy to the
175  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
176  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
177  * the old primary policy (if there is one) to
178  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
179  * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
180  *
181  * If the fallback policy is specified, this is taken to indicate an instruction
182  * to stop the current primary policy, without substituting it with another
183  * primary policy, so the primary policy (if any) is transitioned to
184  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
185  * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
186  * this case, the fallback policy is only left active in the NRS head.
187  */
nrs_policy_start_locked(struct ptlrpc_nrs_policy * policy)188 static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
189 {
190 	struct ptlrpc_nrs *nrs = policy->pol_nrs;
191 	int rc = 0;
192 
193 	/**
194 	 * Don't allow multiple starting which is too complex, and has no real
195 	 * benefit.
196 	 */
197 	if (nrs->nrs_policy_starting)
198 		return -EAGAIN;
199 
200 	LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
201 
202 	if (policy->pol_state == NRS_POL_STATE_STOPPING)
203 		return -EAGAIN;
204 
205 	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
206 		/**
207 		 * This is for cases in which the user sets the policy to the
208 		 * fallback policy (currently fifo for all services); i.e. the
209 		 * user is resetting the policy to the default; so we stop the
210 		 * primary policy, if any.
211 		 */
212 		if (policy == nrs->nrs_policy_fallback) {
213 			nrs_policy_stop_primary(nrs);
214 			return 0;
215 		}
216 
217 		/**
218 		 * If we reach here, we must be setting up the fallback policy
219 		 * at service startup time, and only a single policy with the
220 		 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
221 		 * register with NRS core.
222 		 */
223 		LASSERT(nrs->nrs_policy_fallback == NULL);
224 	} else {
225 		/**
226 		 * Shouldn't start primary policy if w/o fallback policy.
227 		 */
228 		if (nrs->nrs_policy_fallback == NULL)
229 			return -EPERM;
230 
231 		if (policy->pol_state == NRS_POL_STATE_STARTED)
232 			return 0;
233 	}
234 
235 	/**
236 	 * Increase the module usage count for policies registering from other
237 	 * modules.
238 	 */
239 	if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
240 	    !try_module_get(policy->pol_desc->pd_owner)) {
241 		atomic_dec(&policy->pol_desc->pd_refs);
242 		CERROR("NRS: cannot get module for policy %s; is it alive?\n",
243 		       policy->pol_desc->pd_name);
244 		return -ENODEV;
245 	}
246 
247 	/**
248 	 * Serialize policy starting across the NRS head
249 	 */
250 	nrs->nrs_policy_starting = 1;
251 
252 	policy->pol_state = NRS_POL_STATE_STARTING;
253 
254 	if (policy->pol_desc->pd_ops->op_policy_start) {
255 		spin_unlock(&nrs->nrs_lock);
256 
257 		rc = policy->pol_desc->pd_ops->op_policy_start(policy);
258 
259 		spin_lock(&nrs->nrs_lock);
260 		if (rc != 0) {
261 			if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
262 				module_put(policy->pol_desc->pd_owner);
263 
264 			policy->pol_state = NRS_POL_STATE_STOPPED;
265 			goto out;
266 		}
267 	}
268 
269 	policy->pol_state = NRS_POL_STATE_STARTED;
270 
271 	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
272 		/**
273 		 * This path is only used at PTLRPC service setup time.
274 		 */
275 		nrs->nrs_policy_fallback = policy;
276 	} else {
277 		/*
278 		 * Try to stop the current primary policy if there is one.
279 		 */
280 		nrs_policy_stop_primary(nrs);
281 
282 		/**
283 		 * And set the newly-started policy as the primary one.
284 		 */
285 		nrs->nrs_policy_primary = policy;
286 	}
287 
288 out:
289 	nrs->nrs_policy_starting = 0;
290 
291 	return rc;
292 }
293 
294 /**
295  * Increases the policy's usage reference count.
296  */
nrs_policy_get_locked(struct ptlrpc_nrs_policy * policy)297 static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
298 {
299 	policy->pol_ref++;
300 }
301 
302 /**
303  * Decreases the policy's usage reference count, and stops the policy in case it
304  * was already stopping and have no more outstanding usage references (which
305  * indicates it has no more queued or started requests, and can be safely
306  * stopped).
307  */
nrs_policy_put_locked(struct ptlrpc_nrs_policy * policy)308 static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
309 {
310 	LASSERT(policy->pol_ref > 0);
311 
312 	policy->pol_ref--;
313 	if (unlikely(policy->pol_ref == 0 &&
314 	    policy->pol_state == NRS_POL_STATE_STOPPING))
315 		nrs_policy_stop0(policy);
316 }
317 
nrs_policy_put(struct ptlrpc_nrs_policy * policy)318 static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
319 {
320 	spin_lock(&policy->pol_nrs->nrs_lock);
321 	nrs_policy_put_locked(policy);
322 	spin_unlock(&policy->pol_nrs->nrs_lock);
323 }
324 
325 /**
326  * Find and return a policy by name.
327  */
nrs_policy_find_locked(struct ptlrpc_nrs * nrs,char * name)328 static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
329 							 char *name)
330 {
331 	struct ptlrpc_nrs_policy *tmp;
332 
333 	list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
334 		if (strncmp(tmp->pol_desc->pd_name, name,
335 			    NRS_POL_NAME_MAX) == 0) {
336 			nrs_policy_get_locked(tmp);
337 			return tmp;
338 		}
339 	}
340 	return NULL;
341 }
342 
343 /**
344  * Release references for the resource hierarchy moving upwards towards the
345  * policy instance resource.
346  */
nrs_resource_put(struct ptlrpc_nrs_resource * res)347 static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
348 {
349 	struct ptlrpc_nrs_policy *policy = res->res_policy;
350 
351 	if (policy->pol_desc->pd_ops->op_res_put != NULL) {
352 		struct ptlrpc_nrs_resource *parent;
353 
354 		for (; res != NULL; res = parent) {
355 			parent = res->res_parent;
356 			policy->pol_desc->pd_ops->op_res_put(policy, res);
357 		}
358 	}
359 }
360 
361 /**
362  * Obtains references for each resource in the resource hierarchy for request
363  * \a nrq if it is to be handled by \a policy.
364  *
365  * \param[in] policy	  the policy
366  * \param[in] nrq	  the request
367  * \param[in] moving_req  denotes whether this is a call to the function by
368  *			  ldlm_lock_reorder_req(), in order to move \a nrq to
369  *			  the high-priority NRS head; we should not sleep when
370  *			  set.
371  *
372  * \retval NULL		  resource hierarchy references not obtained
373  * \retval valid-pointer  the bottom level of the resource hierarchy
374  *
375  * \see ptlrpc_nrs_pol_ops::op_res_get()
376  */
377 static
nrs_resource_get(struct ptlrpc_nrs_policy * policy,struct ptlrpc_nrs_request * nrq,bool moving_req)378 struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
379 					     struct ptlrpc_nrs_request *nrq,
380 					     bool moving_req)
381 {
382 	/**
383 	 * Set to NULL to traverse the resource hierarchy from the top.
384 	 */
385 	struct ptlrpc_nrs_resource *res = NULL;
386 	struct ptlrpc_nrs_resource *tmp = NULL;
387 	int rc;
388 
389 	while (1) {
390 		rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
391 							  &tmp, moving_req);
392 		if (rc < 0) {
393 			if (res != NULL)
394 				nrs_resource_put(res);
395 			return NULL;
396 		}
397 
398 		LASSERT(tmp != NULL);
399 		tmp->res_parent = res;
400 		tmp->res_policy = policy;
401 		res = tmp;
402 		tmp = NULL;
403 		/**
404 		 * Return once we have obtained a reference to the bottom level
405 		 * of the resource hierarchy.
406 		 */
407 		if (rc > 0)
408 			return res;
409 	}
410 }
411 
412 /**
413  * Obtains resources for the resource hierarchies and policy references for
414  * the fallback and current primary policy (if any), that will later be used
415  * to handle request \a nrq.
416  *
417  * \param[in]  nrs  the NRS head instance that will be handling request \a nrq.
418  * \param[in]  nrq  the request that is being handled.
419  * \param[out] resp the array where references to the resource hierarchy are
420  *		    stored.
421  * \param[in]  moving_req  is set when obtaining resources while moving a
422  *			   request from a policy on the regular NRS head to a
423  *			   policy on the HP NRS head (via
424  *			   ldlm_lock_reorder_req()). It signifies that
425  *			   allocations to get resources should be atomic; for
426  *			   a full explanation, see comment in
427  *			   ptlrpc_nrs_pol_ops::op_res_get().
428  */
nrs_resource_get_safe(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_request * nrq,struct ptlrpc_nrs_resource ** resp,bool moving_req)429 static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
430 				  struct ptlrpc_nrs_request *nrq,
431 				  struct ptlrpc_nrs_resource **resp,
432 				  bool moving_req)
433 {
434 	struct ptlrpc_nrs_policy *primary = NULL;
435 	struct ptlrpc_nrs_policy *fallback = NULL;
436 
437 	memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
438 
439 	/**
440 	 * Obtain policy references.
441 	 */
442 	spin_lock(&nrs->nrs_lock);
443 
444 	fallback = nrs->nrs_policy_fallback;
445 	nrs_policy_get_locked(fallback);
446 
447 	primary = nrs->nrs_policy_primary;
448 	if (primary != NULL)
449 		nrs_policy_get_locked(primary);
450 
451 	spin_unlock(&nrs->nrs_lock);
452 
453 	/**
454 	 * Obtain resource hierarchy references.
455 	 */
456 	resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
457 	LASSERT(resp[NRS_RES_FALLBACK] != NULL);
458 
459 	if (primary != NULL) {
460 		resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
461 							 moving_req);
462 		/**
463 		 * A primary policy may exist which may not wish to serve a
464 		 * particular request for different reasons; release the
465 		 * reference on the policy as it will not be used for this
466 		 * request.
467 		 */
468 		if (resp[NRS_RES_PRIMARY] == NULL)
469 			nrs_policy_put(primary);
470 	}
471 }
472 
473 /**
474  * Releases references to resource hierarchies and policies, because they are no
475  * longer required; used when request handling has been completed, or the
476  * request is moving to the high priority NRS head.
477  *
478  * \param resp	the resource hierarchy that is being released
479  *
480  * \see ptlrpc_nrs_req_finalize()
481  */
nrs_resource_put_safe(struct ptlrpc_nrs_resource ** resp)482 static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
483 {
484 	struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
485 	struct ptlrpc_nrs *nrs = NULL;
486 	int i;
487 
488 	for (i = 0; i < NRS_RES_MAX; i++) {
489 		if (resp[i] != NULL) {
490 			pols[i] = resp[i]->res_policy;
491 			nrs_resource_put(resp[i]);
492 			resp[i] = NULL;
493 		} else {
494 			pols[i] = NULL;
495 		}
496 	}
497 
498 	for (i = 0; i < NRS_RES_MAX; i++) {
499 		if (pols[i] == NULL)
500 			continue;
501 
502 		if (nrs == NULL) {
503 			nrs = pols[i]->pol_nrs;
504 			spin_lock(&nrs->nrs_lock);
505 		}
506 		nrs_policy_put_locked(pols[i]);
507 	}
508 
509 	if (nrs != NULL)
510 		spin_unlock(&nrs->nrs_lock);
511 }
512 
513 /**
514  * Obtains an NRS request from \a policy for handling or examination; the
515  * request should be removed in the 'handling' case.
516  *
517  * Calling into this function implies we already know the policy has a request
518  * waiting to be handled.
519  *
520  * \param[in] policy the policy from which a request
521  * \param[in] peek   when set, signifies that we just want to examine the
522  *		     request, and not handle it, so the request is not removed
523  *		     from the policy.
524  * \param[in] force  when set, it will force a policy to return a request if it
525  *		     has one pending
526  *
527  * \retval the NRS request to be handled
528  */
529 static inline
nrs_request_get(struct ptlrpc_nrs_policy * policy,bool peek,bool force)530 struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
531 					   bool peek, bool force)
532 {
533 	struct ptlrpc_nrs_request *nrq;
534 
535 	LASSERT(policy->pol_req_queued > 0);
536 
537 	nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
538 
539 	LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
540 
541 	return nrq;
542 }
543 
544 /**
545  * Enqueues request \a nrq for later handling, via one one the policies for
546  * which resources where earlier obtained via nrs_resource_get_safe(). The
547  * function attempts to enqueue the request first on the primary policy
548  * (if any), since this is the preferred choice.
549  *
550  * \param nrq the request being enqueued
551  *
552  * \see nrs_resource_get_safe()
553  */
nrs_request_enqueue(struct ptlrpc_nrs_request * nrq)554 static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
555 {
556 	struct ptlrpc_nrs_policy *policy;
557 	int rc;
558 	int i;
559 
560 	/**
561 	 * Try in descending order, because the primary policy (if any) is
562 	 * the preferred choice.
563 	 */
564 	for (i = NRS_RES_MAX - 1; i >= 0; i--) {
565 		if (nrq->nr_res_ptrs[i] == NULL)
566 			continue;
567 
568 		nrq->nr_res_idx = i;
569 		policy = nrq->nr_res_ptrs[i]->res_policy;
570 
571 		rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
572 		if (rc == 0) {
573 			policy->pol_nrs->nrs_req_queued++;
574 			policy->pol_req_queued++;
575 			return;
576 		}
577 	}
578 	/**
579 	 * Should never get here, as at least the primary policy's
580 	 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
581 	 * succeed.
582 	 */
583 	LBUG();
584 }
585 
586 /**
587  * Called when a request has been handled
588  *
589  * \param[in] nrs the request that has been handled; can be used for
590  *		  job/resource control.
591  *
592  * \see ptlrpc_nrs_req_stop_nolock()
593  */
nrs_request_stop(struct ptlrpc_nrs_request * nrq)594 static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
595 {
596 	struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
597 
598 	if (policy->pol_desc->pd_ops->op_req_stop)
599 		policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
600 
601 	LASSERT(policy->pol_nrs->nrs_req_started > 0);
602 	LASSERT(policy->pol_req_started > 0);
603 
604 	policy->pol_nrs->nrs_req_started--;
605 	policy->pol_req_started--;
606 }
607 
608 /**
609  * Handler for operations that can be carried out on policies.
610  *
611  * Handles opcodes that are common to all policy types within NRS core, and
612  * passes any unknown opcodes to the policy-specific control function.
613  *
614  * \param[in]	  nrs  the NRS head this policy belongs to.
615  * \param[in]	  name the human-readable policy name; should be the same as
616  *		       ptlrpc_nrs_pol_desc::pd_name.
617  * \param[in]	  opc  the opcode of the operation being carried out.
618  * \param[in,out] arg  can be used to pass information in and out between when
619  *		       carrying an operation; usually data that is private to
620  *		       the policy at some level, or generic policy status
621  *		       information.
622  *
623  * \retval -ve error condition
624  * \retval   0 operation was carried out successfully
625  */
nrs_policy_ctl(struct ptlrpc_nrs * nrs,char * name,enum ptlrpc_nrs_ctl opc,void * arg)626 static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
627 			  enum ptlrpc_nrs_ctl opc, void *arg)
628 {
629 	struct ptlrpc_nrs_policy *policy;
630 	int rc = 0;
631 
632 	spin_lock(&nrs->nrs_lock);
633 
634 	policy = nrs_policy_find_locked(nrs, name);
635 	if (policy == NULL) {
636 		rc = -ENOENT;
637 		goto out;
638 	}
639 
640 	switch (opc) {
641 		/**
642 		 * Unknown opcode, pass it down to the policy-specific control
643 		 * function for handling.
644 		 */
645 	default:
646 		rc = nrs_policy_ctl_locked(policy, opc, arg);
647 		break;
648 
649 		/**
650 		 * Start \e policy
651 		 */
652 	case PTLRPC_NRS_CTL_START:
653 		rc = nrs_policy_start_locked(policy);
654 		break;
655 	}
656 out:
657 	if (policy != NULL)
658 		nrs_policy_put_locked(policy);
659 
660 	spin_unlock(&nrs->nrs_lock);
661 
662 	return rc;
663 }
664 
665 /**
666  * Unregisters a policy by name.
667  *
668  * \param[in] nrs  the NRS head this policy belongs to.
669  * \param[in] name the human-readable policy name; should be the same as
670  *		   ptlrpc_nrs_pol_desc::pd_name
671  *
672  * \retval -ve error
673  * \retval   0 success
674  */
nrs_policy_unregister(struct ptlrpc_nrs * nrs,char * name)675 static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
676 {
677 	struct ptlrpc_nrs_policy *policy = NULL;
678 
679 	spin_lock(&nrs->nrs_lock);
680 
681 	policy = nrs_policy_find_locked(nrs, name);
682 	if (policy == NULL) {
683 		spin_unlock(&nrs->nrs_lock);
684 
685 		CERROR("Can't find NRS policy %s\n", name);
686 		return -ENOENT;
687 	}
688 
689 	if (policy->pol_ref > 1) {
690 		CERROR("Policy %s is busy with %d references\n", name,
691 		       (int)policy->pol_ref);
692 		nrs_policy_put_locked(policy);
693 
694 		spin_unlock(&nrs->nrs_lock);
695 		return -EBUSY;
696 	}
697 
698 	LASSERT(policy->pol_req_queued == 0);
699 	LASSERT(policy->pol_req_started == 0);
700 
701 	if (policy->pol_state != NRS_POL_STATE_STOPPED) {
702 		nrs_policy_stop_locked(policy);
703 		LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
704 	}
705 
706 	list_del(&policy->pol_list);
707 	nrs->nrs_num_pols--;
708 
709 	nrs_policy_put_locked(policy);
710 
711 	spin_unlock(&nrs->nrs_lock);
712 
713 	nrs_policy_fini(policy);
714 
715 	LASSERT(policy->pol_private == NULL);
716 	kfree(policy);
717 
718 	return 0;
719 }
720 
721 /**
722  * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
723  *
724  * \param[in] nrs   the NRS head on which the policy will be registered.
725  * \param[in] desc  the policy descriptor from which the information will be
726  *		    obtained to register the policy.
727  *
728  * \retval -ve error
729  * \retval   0 success
730  */
nrs_policy_register(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_pol_desc * desc)731 static int nrs_policy_register(struct ptlrpc_nrs *nrs,
732 			       struct ptlrpc_nrs_pol_desc *desc)
733 {
734 	struct ptlrpc_nrs_policy *policy;
735 	struct ptlrpc_nrs_policy *tmp;
736 	struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
737 	int rc;
738 
739 	LASSERT(svcpt != NULL);
740 	LASSERT(desc->pd_ops != NULL);
741 	LASSERT(desc->pd_ops->op_res_get != NULL);
742 	LASSERT(desc->pd_ops->op_req_get != NULL);
743 	LASSERT(desc->pd_ops->op_req_enqueue != NULL);
744 	LASSERT(desc->pd_ops->op_req_dequeue != NULL);
745 	LASSERT(desc->pd_compat != NULL);
746 
747 	policy = kzalloc_node(sizeof(*policy), GFP_NOFS,
748 			cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
749 					    svcpt->scp_cpt));
750 	if (policy == NULL)
751 		return -ENOMEM;
752 
753 	policy->pol_nrs = nrs;
754 	policy->pol_desc = desc;
755 	policy->pol_state = NRS_POL_STATE_STOPPED;
756 	policy->pol_flags = desc->pd_flags;
757 
758 	INIT_LIST_HEAD(&policy->pol_list);
759 	INIT_LIST_HEAD(&policy->pol_list_queued);
760 
761 	rc = nrs_policy_init(policy);
762 	if (rc != 0) {
763 		kfree(policy);
764 		return rc;
765 	}
766 
767 	spin_lock(&nrs->nrs_lock);
768 
769 	tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
770 	if (tmp != NULL) {
771 		CERROR("NRS policy %s has been registered, can't register it for %s\n",
772 		       policy->pol_desc->pd_name,
773 		       svcpt->scp_service->srv_name);
774 		nrs_policy_put_locked(tmp);
775 
776 		spin_unlock(&nrs->nrs_lock);
777 		nrs_policy_fini(policy);
778 		kfree(policy);
779 
780 		return -EEXIST;
781 	}
782 
783 	list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
784 	nrs->nrs_num_pols++;
785 
786 	if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
787 		rc = nrs_policy_start_locked(policy);
788 
789 	spin_unlock(&nrs->nrs_lock);
790 
791 	if (rc != 0)
792 		(void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
793 
794 	return rc;
795 }
796 
797 /**
798  * Enqueue request \a req using one of the policies its resources are referring
799  * to.
800  *
801  * \param[in] req the request to enqueue.
802  */
ptlrpc_nrs_req_add_nolock(struct ptlrpc_request * req)803 static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
804 {
805 	struct ptlrpc_nrs_policy *policy;
806 
807 	LASSERT(req->rq_nrq.nr_initialized);
808 	LASSERT(!req->rq_nrq.nr_enqueued);
809 
810 	nrs_request_enqueue(&req->rq_nrq);
811 	req->rq_nrq.nr_enqueued = 1;
812 
813 	policy = nrs_request_policy(&req->rq_nrq);
814 	/**
815 	 * Add the policy to the NRS head's list of policies with enqueued
816 	 * requests, if it has not been added there.
817 	 */
818 	if (unlikely(list_empty(&policy->pol_list_queued)))
819 		list_add_tail(&policy->pol_list_queued,
820 				  &policy->pol_nrs->nrs_policy_queued);
821 }
822 
823 /**
824  * Enqueue a request on the high priority NRS head.
825  *
826  * \param req the request to enqueue.
827  */
ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request * req)828 static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
829 {
830 	int opc = lustre_msg_get_opc(req->rq_reqmsg);
831 
832 	spin_lock(&req->rq_lock);
833 	req->rq_hp = 1;
834 	ptlrpc_nrs_req_add_nolock(req);
835 	if (opc != OBD_PING)
836 		DEBUG_REQ(D_NET, req, "high priority req");
837 	spin_unlock(&req->rq_lock);
838 }
839 
840 /**
841  * Returns a boolean predicate indicating whether the policy described by
842  * \a desc is adequate for use with service \a svc.
843  *
844  * \param[in] svc  the service
845  * \param[in] desc the policy descriptor
846  *
847  * \retval false the policy is not compatible with the service
848  * \retval true	 the policy is compatible with the service
849  */
nrs_policy_compatible(const struct ptlrpc_service * svc,const struct ptlrpc_nrs_pol_desc * desc)850 static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
851 					 const struct ptlrpc_nrs_pol_desc *desc)
852 {
853 	return desc->pd_compat(svc, desc);
854 }
855 
856 /**
857  * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
858  * \a nrs.
859  *
860  * \param[in] nrs the NRS head
861  *
862  * \retval -ve error
863  * \retval   0 success
864  *
865  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
866  *
867  * \see ptlrpc_service_nrs_setup()
868  */
nrs_register_policies_locked(struct ptlrpc_nrs * nrs)869 static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
870 {
871 	struct ptlrpc_nrs_pol_desc *desc;
872 	/* for convenience */
873 	struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
874 	struct ptlrpc_service *svc = svcpt->scp_service;
875 	int rc = -EINVAL;
876 
877 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
878 
879 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
880 		if (nrs_policy_compatible(svc, desc)) {
881 			rc = nrs_policy_register(nrs, desc);
882 			if (rc != 0) {
883 				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
884 				       desc->pd_name, svcpt->scp_cpt,
885 				       svc->srv_name, rc);
886 				/**
887 				 * Fail registration if any of the policies'
888 				 * registration fails.
889 				 */
890 				break;
891 			}
892 		}
893 	}
894 
895 	return rc;
896 }
897 
898 /**
899  * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
900  * compatible policies in NRS core, with the NRS head.
901  *
902  * \param[in] nrs   the NRS head
903  * \param[in] svcpt the PTLRPC service partition to setup
904  *
905  * \retval -ve error
906  * \retval   0 success
907  *
908  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
909  */
nrs_svcpt_setup_locked0(struct ptlrpc_nrs * nrs,struct ptlrpc_service_part * svcpt)910 static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
911 				   struct ptlrpc_service_part *svcpt)
912 {
913 	enum ptlrpc_nrs_queue_type queue;
914 
915 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
916 
917 	if (nrs == &svcpt->scp_nrs_reg)
918 		queue = PTLRPC_NRS_QUEUE_REG;
919 	else if (nrs == svcpt->scp_nrs_hp)
920 		queue = PTLRPC_NRS_QUEUE_HP;
921 	else
922 		LBUG();
923 
924 	nrs->nrs_svcpt = svcpt;
925 	nrs->nrs_queue_type = queue;
926 	spin_lock_init(&nrs->nrs_lock);
927 	INIT_LIST_HEAD(&nrs->nrs_policy_list);
928 	INIT_LIST_HEAD(&nrs->nrs_policy_queued);
929 
930 	return nrs_register_policies_locked(nrs);
931 }
932 
933 /**
934  * Allocates a regular and optionally a high-priority NRS head (if the service
935  * handles high-priority RPCs), and then registers all available compatible
936  * policies on those NRS heads.
937  *
938  * \param[in,out] svcpt the PTLRPC service partition to setup
939  *
940  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
941  */
nrs_svcpt_setup_locked(struct ptlrpc_service_part * svcpt)942 static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
943 {
944 	struct ptlrpc_nrs *nrs;
945 	int rc;
946 
947 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
948 
949 	/**
950 	 * Initialize the regular NRS head.
951 	 */
952 	nrs = nrs_svcpt2nrs(svcpt, false);
953 	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
954 	if (rc < 0)
955 		goto out;
956 
957 	/**
958 	 * Optionally allocate a high-priority NRS head.
959 	 */
960 	if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
961 		goto out;
962 
963 	svcpt->scp_nrs_hp =
964 		kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS,
965 			cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
966 					    svcpt->scp_cpt));
967 	if (svcpt->scp_nrs_hp == NULL) {
968 		rc = -ENOMEM;
969 		goto out;
970 	}
971 
972 	nrs = nrs_svcpt2nrs(svcpt, true);
973 	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
974 
975 out:
976 	return rc;
977 }
978 
979 /**
980  * Unregisters all policies on all available NRS heads in a service partition;
981  * called at PTLRPC service unregistration time.
982  *
983  * \param[in] svcpt the PTLRPC service partition
984  *
985  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
986  */
nrs_svcpt_cleanup_locked(struct ptlrpc_service_part * svcpt)987 static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
988 {
989 	struct ptlrpc_nrs *nrs;
990 	struct ptlrpc_nrs_policy *policy;
991 	struct ptlrpc_nrs_policy *tmp;
992 	int rc;
993 	bool hp = false;
994 
995 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
996 
997 again:
998 	nrs = nrs_svcpt2nrs(svcpt, hp);
999 	nrs->nrs_stopping = 1;
1000 
1001 	list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
1002 				     pol_list) {
1003 		rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
1004 		LASSERT(rc == 0);
1005 	}
1006 
1007 	/**
1008 	 * If the service partition has an HP NRS head, clean that up as well.
1009 	 */
1010 	if (!hp && nrs_svcpt_has_hp(svcpt)) {
1011 		hp = true;
1012 		goto again;
1013 	}
1014 
1015 	if (hp)
1016 		kfree(nrs);
1017 }
1018 
1019 /**
1020  * Returns the descriptor for a policy as identified by by \a name.
1021  *
1022  * \param[in] name the policy name
1023  *
1024  * \retval the policy descriptor
1025  * \retval NULL
1026  */
nrs_policy_find_desc_locked(const char * name)1027 static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1028 {
1029 	struct ptlrpc_nrs_pol_desc *tmp;
1030 
1031 	list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1032 		if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1033 			return tmp;
1034 	}
1035 	return NULL;
1036 }
1037 
1038 /**
1039  * Removes the policy from all supported NRS heads of all partitions of all
1040  * PTLRPC services.
1041  *
1042  * \param[in] desc the policy descriptor to unregister
1043  *
1044  * \retval -ve error
1045  * \retval  0  successfully unregistered policy on all supported NRS heads
1046  *
1047  * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1048  * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1049  */
nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc * desc)1050 static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1051 {
1052 	struct ptlrpc_nrs *nrs;
1053 	struct ptlrpc_service *svc;
1054 	struct ptlrpc_service_part *svcpt;
1055 	int i;
1056 	int rc = 0;
1057 
1058 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1059 	LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1060 
1061 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1062 
1063 		if (!nrs_policy_compatible(svc, desc) ||
1064 		    unlikely(svc->srv_is_stopping))
1065 			continue;
1066 
1067 		ptlrpc_service_for_each_part(svcpt, i, svc) {
1068 			bool hp = false;
1069 
1070 again:
1071 			nrs = nrs_svcpt2nrs(svcpt, hp);
1072 			rc = nrs_policy_unregister(nrs, desc->pd_name);
1073 			/**
1074 			 * Ignore -ENOENT as the policy may not have registered
1075 			 * successfully on all service partitions.
1076 			 */
1077 			if (rc == -ENOENT) {
1078 				rc = 0;
1079 			} else if (rc != 0) {
1080 				CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1081 				       desc->pd_name, svcpt->scp_cpt,
1082 				       svcpt->scp_service->srv_name, rc);
1083 				return rc;
1084 			}
1085 
1086 			if (!hp && nrs_svc_has_hp(svc)) {
1087 				hp = true;
1088 				goto again;
1089 			}
1090 		}
1091 
1092 		if (desc->pd_ops->op_lprocfs_fini != NULL)
1093 			desc->pd_ops->op_lprocfs_fini(svc);
1094 	}
1095 
1096 	return rc;
1097 }
1098 
1099 /**
1100  * Registers a new policy with NRS core.
1101  *
1102  * The function will only succeed if policy registration with all compatible
1103  * service partitions (if any) is successful.
1104  *
1105  * N.B. This function should be called either at ptlrpc module initialization
1106  *	time when registering a policy that ships with NRS core, or in a
1107  *	module's init() function for policies registering from other modules.
1108  *
1109  * \param[in] conf configuration information for the new policy to register
1110  *
1111  * \retval -ve error
1112  * \retval   0 success
1113  */
ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf * conf)1114 static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1115 {
1116 	struct ptlrpc_service *svc;
1117 	struct ptlrpc_nrs_pol_desc *desc;
1118 	int rc = 0;
1119 
1120 	LASSERT(conf != NULL);
1121 	LASSERT(conf->nc_ops != NULL);
1122 	LASSERT(conf->nc_compat != NULL);
1123 	LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1124 		conf->nc_compat_svc_name != NULL));
1125 	LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1126 		     conf->nc_owner != NULL));
1127 
1128 	conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1129 
1130 	/**
1131 	 * External policies are not allowed to start immediately upon
1132 	 * registration, as there is a relatively higher chance that their
1133 	 * registration might fail. In such a case, some policy instances may
1134 	 * already have requests queued wen unregistration needs to happen as
1135 	 * part o cleanup; since there is currently no way to drain requests
1136 	 * from a policy unless the service is unregistering, we just disallow
1137 	 * this.
1138 	 */
1139 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1140 	    (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1141 			       PTLRPC_NRS_FL_REG_START))) {
1142 		CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1143 		       conf->nc_name);
1144 		return -EINVAL;
1145 	}
1146 
1147 	mutex_lock(&nrs_core.nrs_mutex);
1148 
1149 	if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
1150 		CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1151 		       conf->nc_name);
1152 		rc = -EEXIST;
1153 		goto fail;
1154 	}
1155 
1156 	desc = kzalloc(sizeof(*desc), GFP_NOFS);
1157 	if (!desc) {
1158 		rc = -ENOMEM;
1159 		goto fail;
1160 	}
1161 
1162 	strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
1163 	desc->pd_ops = conf->nc_ops;
1164 	desc->pd_compat = conf->nc_compat;
1165 	desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1166 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1167 		desc->pd_owner = conf->nc_owner;
1168 	desc->pd_flags = conf->nc_flags;
1169 	atomic_set(&desc->pd_refs, 0);
1170 
1171 	/**
1172 	 * For policies that are held in the same module as NRS (currently
1173 	 * ptlrpc), do not register the policy with all compatible services,
1174 	 * as the services will not have started at this point, since we are
1175 	 * calling from ptlrpc module initialization code. In such cases each
1176 	 * service will register all compatible policies later, via
1177 	 * ptlrpc_service_nrs_setup().
1178 	 */
1179 	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1180 		goto internal;
1181 
1182 	/**
1183 	 * Register the new policy on all compatible services
1184 	 */
1185 	mutex_lock(&ptlrpc_all_services_mutex);
1186 
1187 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1188 		struct ptlrpc_service_part *svcpt;
1189 		int i;
1190 		int rc2;
1191 
1192 		if (!nrs_policy_compatible(svc, desc) ||
1193 		    unlikely(svc->srv_is_stopping))
1194 			continue;
1195 
1196 		ptlrpc_service_for_each_part(svcpt, i, svc) {
1197 			struct ptlrpc_nrs *nrs;
1198 			bool hp = false;
1199 again:
1200 			nrs = nrs_svcpt2nrs(svcpt, hp);
1201 			rc = nrs_policy_register(nrs, desc);
1202 			if (rc != 0) {
1203 				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1204 				       desc->pd_name, svcpt->scp_cpt,
1205 				       svcpt->scp_service->srv_name, rc);
1206 
1207 				rc2 = nrs_policy_unregister_locked(desc);
1208 				/**
1209 				 * Should not fail at this point
1210 				 */
1211 				LASSERT(rc2 == 0);
1212 				mutex_unlock(&ptlrpc_all_services_mutex);
1213 				kfree(desc);
1214 				goto fail;
1215 			}
1216 
1217 			if (!hp && nrs_svc_has_hp(svc)) {
1218 				hp = true;
1219 				goto again;
1220 			}
1221 		}
1222 
1223 		/**
1224 		 * No need to take a reference to other modules here, as we
1225 		 * will be calling from the module's init() function.
1226 		 */
1227 		if (desc->pd_ops->op_lprocfs_init != NULL) {
1228 			rc = desc->pd_ops->op_lprocfs_init(svc);
1229 			if (rc != 0) {
1230 				rc2 = nrs_policy_unregister_locked(desc);
1231 				/**
1232 				 * Should not fail at this point
1233 				 */
1234 				LASSERT(rc2 == 0);
1235 				mutex_unlock(&ptlrpc_all_services_mutex);
1236 				kfree(desc);
1237 				goto fail;
1238 			}
1239 		}
1240 	}
1241 
1242 	mutex_unlock(&ptlrpc_all_services_mutex);
1243 internal:
1244 	list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1245 fail:
1246 	mutex_unlock(&nrs_core.nrs_mutex);
1247 
1248 	return rc;
1249 }
1250 
1251 /**
1252  * Setup NRS heads on all service partitions of service \a svc, and register
1253  * all compatible policies on those NRS heads.
1254  *
1255  * To be called from within ptl
1256  * \param[in] svc the service to setup
1257  *
1258  * \retval -ve error, the calling logic should eventually call
1259  *		      ptlrpc_service_nrs_cleanup() to undo any work performed
1260  *		      by this function.
1261  *
1262  * \see ptlrpc_register_service()
1263  * \see ptlrpc_service_nrs_cleanup()
1264  */
ptlrpc_service_nrs_setup(struct ptlrpc_service * svc)1265 int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1266 {
1267 	struct ptlrpc_service_part *svcpt;
1268 	const struct ptlrpc_nrs_pol_desc *desc;
1269 	int i;
1270 	int rc = 0;
1271 
1272 	mutex_lock(&nrs_core.nrs_mutex);
1273 
1274 	/**
1275 	 * Initialize NRS heads on all service CPTs.
1276 	 */
1277 	ptlrpc_service_for_each_part(svcpt, i, svc) {
1278 		rc = nrs_svcpt_setup_locked(svcpt);
1279 		if (rc != 0)
1280 			goto failed;
1281 	}
1282 
1283 	/**
1284 	 * Set up lprocfs interfaces for all supported policies for the
1285 	 * service.
1286 	 */
1287 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1288 		if (!nrs_policy_compatible(svc, desc))
1289 			continue;
1290 
1291 		if (desc->pd_ops->op_lprocfs_init != NULL) {
1292 			rc = desc->pd_ops->op_lprocfs_init(svc);
1293 			if (rc != 0)
1294 				goto failed;
1295 		}
1296 	}
1297 
1298 failed:
1299 
1300 	mutex_unlock(&nrs_core.nrs_mutex);
1301 
1302 	return rc;
1303 }
1304 
1305 /**
1306  * Unregisters all policies on all service partitions of service \a svc.
1307  *
1308  * \param[in] svc the PTLRPC service to unregister
1309  */
ptlrpc_service_nrs_cleanup(struct ptlrpc_service * svc)1310 void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1311 {
1312 	struct ptlrpc_service_part *svcpt;
1313 	const struct ptlrpc_nrs_pol_desc *desc;
1314 	int i;
1315 
1316 	mutex_lock(&nrs_core.nrs_mutex);
1317 
1318 	/**
1319 	 * Clean up NRS heads on all service partitions
1320 	 */
1321 	ptlrpc_service_for_each_part(svcpt, i, svc)
1322 		nrs_svcpt_cleanup_locked(svcpt);
1323 
1324 	/**
1325 	 * Clean up lprocfs interfaces for all supported policies for the
1326 	 * service.
1327 	 */
1328 	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1329 		if (!nrs_policy_compatible(svc, desc))
1330 			continue;
1331 
1332 		if (desc->pd_ops->op_lprocfs_fini != NULL)
1333 			desc->pd_ops->op_lprocfs_fini(svc);
1334 	}
1335 
1336 	mutex_unlock(&nrs_core.nrs_mutex);
1337 }
1338 
1339 /**
1340  * Obtains NRS head resources for request \a req.
1341  *
1342  * These could be either on the regular or HP NRS head of \a svcpt; resources
1343  * taken on the regular head can later be swapped for HP head resources by
1344  * ldlm_lock_reorder_req().
1345  *
1346  * \param[in] svcpt the service partition
1347  * \param[in] req   the request
1348  * \param[in] hp    which NRS head of \a svcpt to use
1349  */
ptlrpc_nrs_req_initialize(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1350 void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1351 			       struct ptlrpc_request *req, bool hp)
1352 {
1353 	struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1354 
1355 	memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1356 	nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1357 			      false);
1358 
1359 	/**
1360 	 * It is fine to access \e nr_initialized without locking as there is
1361 	 * no contention at this early stage.
1362 	 */
1363 	req->rq_nrq.nr_initialized = 1;
1364 }
1365 
1366 /**
1367  * Releases resources for a request; is called after the request has been
1368  * handled.
1369  *
1370  * \param[in] req the request
1371  *
1372  * \see ptlrpc_server_finish_request()
1373  */
ptlrpc_nrs_req_finalize(struct ptlrpc_request * req)1374 void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1375 {
1376 	if (req->rq_nrq.nr_initialized) {
1377 		nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1378 		/* no protection on bit nr_initialized because no
1379 		 * contention at this late stage */
1380 		req->rq_nrq.nr_finalized = 1;
1381 	}
1382 }
1383 
ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request * req)1384 void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1385 {
1386 	if (req->rq_nrq.nr_started)
1387 		nrs_request_stop(&req->rq_nrq);
1388 }
1389 
1390 /**
1391  * Enqueues request \a req on either the regular or high-priority NRS head
1392  * of service partition \a svcpt.
1393  *
1394  * \param[in] svcpt the service partition
1395  * \param[in] req   the request to be enqueued
1396  * \param[in] hp    whether to enqueue the request on the regular or
1397  *		    high-priority NRS head.
1398  */
ptlrpc_nrs_req_add(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1399 void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1400 			struct ptlrpc_request *req, bool hp)
1401 {
1402 	spin_lock(&svcpt->scp_req_lock);
1403 
1404 	if (hp)
1405 		ptlrpc_nrs_hpreq_add_nolock(req);
1406 	else
1407 		ptlrpc_nrs_req_add_nolock(req);
1408 
1409 	spin_unlock(&svcpt->scp_req_lock);
1410 }
1411 
nrs_request_removed(struct ptlrpc_nrs_policy * policy)1412 static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1413 {
1414 	LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1415 	LASSERT(policy->pol_req_queued > 0);
1416 
1417 	policy->pol_nrs->nrs_req_queued--;
1418 	policy->pol_req_queued--;
1419 
1420 	/**
1421 	 * If the policy has no more requests queued, remove it from
1422 	 * ptlrpc_nrs::nrs_policy_queued.
1423 	 */
1424 	if (unlikely(policy->pol_req_queued == 0)) {
1425 		list_del_init(&policy->pol_list_queued);
1426 
1427 		/**
1428 		 * If there are other policies with queued requests, move the
1429 		 * current policy to the end so that we can round robin over
1430 		 * all policies and drain the requests.
1431 		 */
1432 	} else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1433 		LASSERT(policy->pol_req_queued <
1434 			policy->pol_nrs->nrs_req_queued);
1435 
1436 		list_move_tail(&policy->pol_list_queued,
1437 				   &policy->pol_nrs->nrs_policy_queued);
1438 	}
1439 }
1440 
1441 /**
1442  * Obtains a request for handling from an NRS head of service partition
1443  * \a svcpt.
1444  *
1445  * \param[in] svcpt the service partition
1446  * \param[in] hp    whether to obtain a request from the regular or
1447  *		    high-priority NRS head.
1448  * \param[in] peek  when set, signifies that we just want to examine the
1449  *		    request, and not handle it, so the request is not removed
1450  *		    from the policy.
1451  * \param[in] force when set, it will force a policy to return a request if it
1452  *		    has one pending
1453  *
1454  * \retval the	request to be handled
1455  * \retval NULL the head has no requests to serve
1456  */
1457 struct ptlrpc_request *
ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part * svcpt,bool hp,bool peek,bool force)1458 ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1459 			   bool peek, bool force)
1460 {
1461 	struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1462 	struct ptlrpc_nrs_policy *policy;
1463 	struct ptlrpc_nrs_request *nrq;
1464 
1465 	/**
1466 	 * Always try to drain requests from all NRS polices even if they are
1467 	 * inactive, because the user can change policy status at runtime.
1468 	 */
1469 	list_for_each_entry(policy, &nrs->nrs_policy_queued,
1470 				pol_list_queued) {
1471 		nrq = nrs_request_get(policy, peek, force);
1472 		if (nrq != NULL) {
1473 			if (likely(!peek)) {
1474 				nrq->nr_started = 1;
1475 
1476 				policy->pol_req_started++;
1477 				policy->pol_nrs->nrs_req_started++;
1478 
1479 				nrs_request_removed(policy);
1480 			}
1481 
1482 			return container_of(nrq, struct ptlrpc_request, rq_nrq);
1483 		}
1484 	}
1485 
1486 	return NULL;
1487 }
1488 
1489 /**
1490  * Returns whether there are any requests currently enqueued on any of the
1491  * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1492  * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1493  * result.
1494  *
1495  * \param[in] svcpt the service partition to enquire.
1496  * \param[in] hp    whether the regular or high-priority NRS head is to be
1497  *		    enquired.
1498  *
1499  * \retval false the indicated NRS head has no enqueued requests.
1500  * \retval true	 the indicated NRS head has some enqueued requests.
1501  */
ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part * svcpt,bool hp)1502 bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1503 {
1504 	struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1505 
1506 	return nrs->nrs_req_queued > 0;
1507 };
1508 
1509 /**
1510  * Carries out a control operation \a opc on the policy identified by the
1511  * human-readable \a name, on either all partitions, or only on the first
1512  * partition of service \a svc.
1513  *
1514  * \param[in]	  svc	 the service the policy belongs to.
1515  * \param[in]	  queue  whether to carry out the command on the policy which
1516  *			 belongs to the regular, high-priority, or both NRS
1517  *			 heads of service partitions of \a svc.
1518  * \param[in]	  name   the policy to act upon, by human-readable name
1519  * \param[in]	  opc	 the opcode of the operation to carry out
1520  * \param[in]	  single when set, the operation will only be carried out on the
1521  *			 NRS heads of the first service partition of \a svc.
1522  *			 This is useful for some policies which e.g. share
1523  *			 identical values on the same parameters of different
1524  *			 service partitions; when reading these parameters via
1525  *			 lprocfs, these policies may just want to obtain and
1526  *			 print out the values from the first service partition.
1527  *			 Storing these values centrally elsewhere then could be
1528  *			 another solution for this.
1529  * \param[in,out] arg	 can be used as a generic in/out buffer between control
1530  *			 operations and the user environment.
1531  *
1532  *\retval -ve error condition
1533  *\retval   0 operation was carried out successfully
1534  */
ptlrpc_nrs_policy_control(const struct ptlrpc_service * svc,enum ptlrpc_nrs_queue_type queue,char * name,enum ptlrpc_nrs_ctl opc,bool single,void * arg)1535 int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1536 			      enum ptlrpc_nrs_queue_type queue, char *name,
1537 			      enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1538 {
1539 	struct ptlrpc_service_part *svcpt;
1540 	int i;
1541 	int rc = 0;
1542 
1543 	LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1544 
1545 	if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1546 		return -EINVAL;
1547 
1548 	ptlrpc_service_for_each_part(svcpt, i, svc) {
1549 		if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1550 			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1551 					    opc, arg);
1552 			if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1553 					single))
1554 				goto out;
1555 		}
1556 
1557 		if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1558 			/**
1559 			 * XXX: We could optionally check for
1560 			 * nrs_svc_has_hp(svc) here, and return an error if it
1561 			 * is false. Right now we rely on the policies' lprocfs
1562 			 * handlers that call the present function to make this
1563 			 * check; if they fail to do so, they might hit the
1564 			 * assertion inside nrs_svcpt2nrs() below.
1565 			 */
1566 			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1567 					    opc, arg);
1568 			if (rc != 0 || single)
1569 				goto out;
1570 		}
1571 	}
1572 out:
1573 	return rc;
1574 }
1575 
1576 /* ptlrpc/nrs_fifo.c */
1577 extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
1578 
1579 /**
1580  * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1581  * policies \e nrs_core.nrs_policies.
1582  *
1583  * \retval 0 all policies have been registered successfully
1584  * \retval -ve error
1585  */
ptlrpc_nrs_init(void)1586 int ptlrpc_nrs_init(void)
1587 {
1588 	int rc;
1589 
1590 	mutex_init(&nrs_core.nrs_mutex);
1591 	INIT_LIST_HEAD(&nrs_core.nrs_policies);
1592 
1593 	rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1594 	if (rc != 0)
1595 		goto fail;
1596 
1597 	return rc;
1598 fail:
1599 	/**
1600 	 * Since no PTLRPC services have been started at this point, all we need
1601 	 * to do for cleanup is to free the descriptors.
1602 	 */
1603 	ptlrpc_nrs_fini();
1604 
1605 	return rc;
1606 }
1607 
1608 /**
1609  * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1610  * policy descriptors.
1611  *
1612  * Since all PTLRPC services are stopped at this point, there are no more
1613  * instances of any policies, because each service will have stopped its policy
1614  * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1615  * descriptors here.
1616  */
ptlrpc_nrs_fini(void)1617 void ptlrpc_nrs_fini(void)
1618 {
1619 	struct ptlrpc_nrs_pol_desc *desc;
1620 	struct ptlrpc_nrs_pol_desc *tmp;
1621 
1622 	list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
1623 				     pd_list) {
1624 		list_del_init(&desc->pd_list);
1625 		kfree(desc);
1626 	}
1627 }
1628 
1629 /** @} nrs */
1630