1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
15
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 * GPL HEADER END
21 */
22 /*
23 * Copyright (c) 2011 Intel Corporation
24 *
25 * Copyright 2012 Xyratex Technology Limited
26 */
27 /*
28 * lustre/ptlrpc/nrs.c
29 *
30 * Network Request Scheduler (NRS)
31 *
32 * Allows to reorder the handling of RPCs at servers.
33 *
34 * Author: Liang Zhen <liang@whamcloud.com>
35 * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
36 */
37 /**
38 * \addtogoup nrs
39 * @{
40 */
41
42 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "../include/lustre_net.h"
46 #include "../include/lprocfs_status.h"
47 #include "../../include/linux/libcfs/libcfs.h"
48 #include "ptlrpc_internal.h"
49
50 /* XXX: This is just for liblustre. Remove the #if defined directive when the
51 * "cfs_" prefix is dropped from cfs_list_head. */
52 extern struct list_head ptlrpc_all_services;
53
54 /**
55 * NRS core object.
56 */
57 struct nrs_core nrs_core;
58
nrs_policy_init(struct ptlrpc_nrs_policy * policy)59 static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
60 {
61 return policy->pol_desc->pd_ops->op_policy_init != NULL ?
62 policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
63 }
64
nrs_policy_fini(struct ptlrpc_nrs_policy * policy)65 static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
66 {
67 LASSERT(policy->pol_ref == 0);
68 LASSERT(policy->pol_req_queued == 0);
69
70 if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
71 policy->pol_desc->pd_ops->op_policy_fini(policy);
72 }
73
nrs_policy_ctl_locked(struct ptlrpc_nrs_policy * policy,enum ptlrpc_nrs_ctl opc,void * arg)74 static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
75 enum ptlrpc_nrs_ctl opc, void *arg)
76 {
77 /**
78 * The policy may be stopped, but the lprocfs files and
79 * ptlrpc_nrs_policy instances remain present until unregistration time.
80 * Do not perform the ctl operation if the policy is stopped, as
81 * policy->pol_private will be NULL in such a case.
82 */
83 if (policy->pol_state == NRS_POL_STATE_STOPPED)
84 return -ENODEV;
85
86 return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
87 policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
88 -ENOSYS;
89 }
90
nrs_policy_stop0(struct ptlrpc_nrs_policy * policy)91 static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
92 {
93 struct ptlrpc_nrs *nrs = policy->pol_nrs;
94
95 if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
96 spin_unlock(&nrs->nrs_lock);
97
98 policy->pol_desc->pd_ops->op_policy_stop(policy);
99
100 spin_lock(&nrs->nrs_lock);
101 }
102
103 LASSERT(list_empty(&policy->pol_list_queued));
104 LASSERT(policy->pol_req_queued == 0 &&
105 policy->pol_req_started == 0);
106
107 policy->pol_private = NULL;
108
109 policy->pol_state = NRS_POL_STATE_STOPPED;
110
111 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
112 module_put(policy->pol_desc->pd_owner);
113 }
114
nrs_policy_stop_locked(struct ptlrpc_nrs_policy * policy)115 static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
116 {
117 struct ptlrpc_nrs *nrs = policy->pol_nrs;
118
119 if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
120 return -EPERM;
121
122 if (policy->pol_state == NRS_POL_STATE_STARTING)
123 return -EAGAIN;
124
125 /* In progress or already stopped */
126 if (policy->pol_state != NRS_POL_STATE_STARTED)
127 return 0;
128
129 policy->pol_state = NRS_POL_STATE_STOPPING;
130
131 /* Immediately make it invisible */
132 if (nrs->nrs_policy_primary == policy) {
133 nrs->nrs_policy_primary = NULL;
134
135 } else {
136 LASSERT(nrs->nrs_policy_fallback == policy);
137 nrs->nrs_policy_fallback = NULL;
138 }
139
140 /* I have the only refcount */
141 if (policy->pol_ref == 1)
142 nrs_policy_stop0(policy);
143
144 return 0;
145 }
146
147 /**
148 * Transitions the \a nrs NRS head's primary policy to
149 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
150 * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
151 *
152 * \param[in] nrs the NRS head to carry out this operation on
153 */
nrs_policy_stop_primary(struct ptlrpc_nrs * nrs)154 static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
155 {
156 struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
157
158 if (tmp == NULL)
159 return;
160
161 nrs->nrs_policy_primary = NULL;
162
163 LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
164 tmp->pol_state = NRS_POL_STATE_STOPPING;
165
166 if (tmp->pol_ref == 0)
167 nrs_policy_stop0(tmp);
168 }
169
170 /**
171 * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
172 * response to an lprocfs command to start a policy.
173 *
174 * If a primary policy different to the current one is specified, this function
175 * will transition the new policy to the
176 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
177 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
178 * the old primary policy (if there is one) to
179 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
180 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
181 *
182 * If the fallback policy is specified, this is taken to indicate an instruction
183 * to stop the current primary policy, without substituting it with another
184 * primary policy, so the primary policy (if any) is transitioned to
185 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
186 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
187 * this case, the fallback policy is only left active in the NRS head.
188 */
nrs_policy_start_locked(struct ptlrpc_nrs_policy * policy)189 static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
190 {
191 struct ptlrpc_nrs *nrs = policy->pol_nrs;
192 int rc = 0;
193
194 /**
195 * Don't allow multiple starting which is too complex, and has no real
196 * benefit.
197 */
198 if (nrs->nrs_policy_starting)
199 return -EAGAIN;
200
201 LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
202
203 if (policy->pol_state == NRS_POL_STATE_STOPPING)
204 return -EAGAIN;
205
206 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
207 /**
208 * This is for cases in which the user sets the policy to the
209 * fallback policy (currently fifo for all services); i.e. the
210 * user is resetting the policy to the default; so we stop the
211 * primary policy, if any.
212 */
213 if (policy == nrs->nrs_policy_fallback) {
214 nrs_policy_stop_primary(nrs);
215 return 0;
216 }
217
218 /**
219 * If we reach here, we must be setting up the fallback policy
220 * at service startup time, and only a single policy with the
221 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
222 * register with NRS core.
223 */
224 LASSERT(nrs->nrs_policy_fallback == NULL);
225 } else {
226 /**
227 * Shouldn't start primary policy if w/o fallback policy.
228 */
229 if (nrs->nrs_policy_fallback == NULL)
230 return -EPERM;
231
232 if (policy->pol_state == NRS_POL_STATE_STARTED)
233 return 0;
234 }
235
236 /**
237 * Increase the module usage count for policies registering from other
238 * modules.
239 */
240 if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
241 !try_module_get(policy->pol_desc->pd_owner)) {
242 atomic_dec(&policy->pol_desc->pd_refs);
243 CERROR("NRS: cannot get module for policy %s; is it alive?\n",
244 policy->pol_desc->pd_name);
245 return -ENODEV;
246 }
247
248 /**
249 * Serialize policy starting across the NRS head
250 */
251 nrs->nrs_policy_starting = 1;
252
253 policy->pol_state = NRS_POL_STATE_STARTING;
254
255 if (policy->pol_desc->pd_ops->op_policy_start) {
256 spin_unlock(&nrs->nrs_lock);
257
258 rc = policy->pol_desc->pd_ops->op_policy_start(policy);
259
260 spin_lock(&nrs->nrs_lock);
261 if (rc != 0) {
262 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
263 module_put(policy->pol_desc->pd_owner);
264
265 policy->pol_state = NRS_POL_STATE_STOPPED;
266 goto out;
267 }
268 }
269
270 policy->pol_state = NRS_POL_STATE_STARTED;
271
272 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
273 /**
274 * This path is only used at PTLRPC service setup time.
275 */
276 nrs->nrs_policy_fallback = policy;
277 } else {
278 /*
279 * Try to stop the current primary policy if there is one.
280 */
281 nrs_policy_stop_primary(nrs);
282
283 /**
284 * And set the newly-started policy as the primary one.
285 */
286 nrs->nrs_policy_primary = policy;
287 }
288
289 out:
290 nrs->nrs_policy_starting = 0;
291
292 return rc;
293 }
294
295 /**
296 * Increases the policy's usage reference count.
297 */
nrs_policy_get_locked(struct ptlrpc_nrs_policy * policy)298 static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
299 {
300 policy->pol_ref++;
301 }
302
303 /**
304 * Decreases the policy's usage reference count, and stops the policy in case it
305 * was already stopping and have no more outstanding usage references (which
306 * indicates it has no more queued or started requests, and can be safely
307 * stopped).
308 */
nrs_policy_put_locked(struct ptlrpc_nrs_policy * policy)309 static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
310 {
311 LASSERT(policy->pol_ref > 0);
312
313 policy->pol_ref--;
314 if (unlikely(policy->pol_ref == 0 &&
315 policy->pol_state == NRS_POL_STATE_STOPPING))
316 nrs_policy_stop0(policy);
317 }
318
nrs_policy_put(struct ptlrpc_nrs_policy * policy)319 static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
320 {
321 spin_lock(&policy->pol_nrs->nrs_lock);
322 nrs_policy_put_locked(policy);
323 spin_unlock(&policy->pol_nrs->nrs_lock);
324 }
325
326 /**
327 * Find and return a policy by name.
328 */
nrs_policy_find_locked(struct ptlrpc_nrs * nrs,char * name)329 static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
330 char *name)
331 {
332 struct ptlrpc_nrs_policy *tmp;
333
334 list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
335 if (strncmp(tmp->pol_desc->pd_name, name,
336 NRS_POL_NAME_MAX) == 0) {
337 nrs_policy_get_locked(tmp);
338 return tmp;
339 }
340 }
341 return NULL;
342 }
343
344 /**
345 * Release references for the resource hierarchy moving upwards towards the
346 * policy instance resource.
347 */
nrs_resource_put(struct ptlrpc_nrs_resource * res)348 static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
349 {
350 struct ptlrpc_nrs_policy *policy = res->res_policy;
351
352 if (policy->pol_desc->pd_ops->op_res_put != NULL) {
353 struct ptlrpc_nrs_resource *parent;
354
355 for (; res != NULL; res = parent) {
356 parent = res->res_parent;
357 policy->pol_desc->pd_ops->op_res_put(policy, res);
358 }
359 }
360 }
361
362 /**
363 * Obtains references for each resource in the resource hierarchy for request
364 * \a nrq if it is to be handled by \a policy.
365 *
366 * \param[in] policy the policy
367 * \param[in] nrq the request
368 * \param[in] moving_req denotes whether this is a call to the function by
369 * ldlm_lock_reorder_req(), in order to move \a nrq to
370 * the high-priority NRS head; we should not sleep when
371 * set.
372 *
373 * \retval NULL resource hierarchy references not obtained
374 * \retval valid-pointer the bottom level of the resource hierarchy
375 *
376 * \see ptlrpc_nrs_pol_ops::op_res_get()
377 */
378 static
nrs_resource_get(struct ptlrpc_nrs_policy * policy,struct ptlrpc_nrs_request * nrq,bool moving_req)379 struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
380 struct ptlrpc_nrs_request *nrq,
381 bool moving_req)
382 {
383 /**
384 * Set to NULL to traverse the resource hierarchy from the top.
385 */
386 struct ptlrpc_nrs_resource *res = NULL;
387 struct ptlrpc_nrs_resource *tmp = NULL;
388 int rc;
389
390 while (1) {
391 rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
392 &tmp, moving_req);
393 if (rc < 0) {
394 if (res != NULL)
395 nrs_resource_put(res);
396 return NULL;
397 }
398
399 LASSERT(tmp != NULL);
400 tmp->res_parent = res;
401 tmp->res_policy = policy;
402 res = tmp;
403 tmp = NULL;
404 /**
405 * Return once we have obtained a reference to the bottom level
406 * of the resource hierarchy.
407 */
408 if (rc > 0)
409 return res;
410 }
411 }
412
413 /**
414 * Obtains resources for the resource hierarchies and policy references for
415 * the fallback and current primary policy (if any), that will later be used
416 * to handle request \a nrq.
417 *
418 * \param[in] nrs the NRS head instance that will be handling request \a nrq.
419 * \param[in] nrq the request that is being handled.
420 * \param[out] resp the array where references to the resource hierarchy are
421 * stored.
422 * \param[in] moving_req is set when obtaining resources while moving a
423 * request from a policy on the regular NRS head to a
424 * policy on the HP NRS head (via
425 * ldlm_lock_reorder_req()). It signifies that
426 * allocations to get resources should be atomic; for
427 * a full explanation, see comment in
428 * ptlrpc_nrs_pol_ops::op_res_get().
429 */
nrs_resource_get_safe(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_request * nrq,struct ptlrpc_nrs_resource ** resp,bool moving_req)430 static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
431 struct ptlrpc_nrs_request *nrq,
432 struct ptlrpc_nrs_resource **resp,
433 bool moving_req)
434 {
435 struct ptlrpc_nrs_policy *primary = NULL;
436 struct ptlrpc_nrs_policy *fallback = NULL;
437
438 memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
439
440 /**
441 * Obtain policy references.
442 */
443 spin_lock(&nrs->nrs_lock);
444
445 fallback = nrs->nrs_policy_fallback;
446 nrs_policy_get_locked(fallback);
447
448 primary = nrs->nrs_policy_primary;
449 if (primary != NULL)
450 nrs_policy_get_locked(primary);
451
452 spin_unlock(&nrs->nrs_lock);
453
454 /**
455 * Obtain resource hierarchy references.
456 */
457 resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
458 LASSERT(resp[NRS_RES_FALLBACK] != NULL);
459
460 if (primary != NULL) {
461 resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
462 moving_req);
463 /**
464 * A primary policy may exist which may not wish to serve a
465 * particular request for different reasons; release the
466 * reference on the policy as it will not be used for this
467 * request.
468 */
469 if (resp[NRS_RES_PRIMARY] == NULL)
470 nrs_policy_put(primary);
471 }
472 }
473
474 /**
475 * Releases references to resource hierarchies and policies, because they are no
476 * longer required; used when request handling has been completed, or the
477 * request is moving to the high priority NRS head.
478 *
479 * \param resp the resource hierarchy that is being released
480 *
481 * \see ptlrpcnrs_req_hp_move()
482 * \see ptlrpc_nrs_req_finalize()
483 */
nrs_resource_put_safe(struct ptlrpc_nrs_resource ** resp)484 static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
485 {
486 struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
487 struct ptlrpc_nrs *nrs = NULL;
488 int i;
489
490 for (i = 0; i < NRS_RES_MAX; i++) {
491 if (resp[i] != NULL) {
492 pols[i] = resp[i]->res_policy;
493 nrs_resource_put(resp[i]);
494 resp[i] = NULL;
495 } else {
496 pols[i] = NULL;
497 }
498 }
499
500 for (i = 0; i < NRS_RES_MAX; i++) {
501 if (pols[i] == NULL)
502 continue;
503
504 if (nrs == NULL) {
505 nrs = pols[i]->pol_nrs;
506 spin_lock(&nrs->nrs_lock);
507 }
508 nrs_policy_put_locked(pols[i]);
509 }
510
511 if (nrs != NULL)
512 spin_unlock(&nrs->nrs_lock);
513 }
514
515 /**
516 * Obtains an NRS request from \a policy for handling or examination; the
517 * request should be removed in the 'handling' case.
518 *
519 * Calling into this function implies we already know the policy has a request
520 * waiting to be handled.
521 *
522 * \param[in] policy the policy from which a request
523 * \param[in] peek when set, signifies that we just want to examine the
524 * request, and not handle it, so the request is not removed
525 * from the policy.
526 * \param[in] force when set, it will force a policy to return a request if it
527 * has one pending
528 *
529 * \retval the NRS request to be handled
530 */
531 static inline
nrs_request_get(struct ptlrpc_nrs_policy * policy,bool peek,bool force)532 struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
533 bool peek, bool force)
534 {
535 struct ptlrpc_nrs_request *nrq;
536
537 LASSERT(policy->pol_req_queued > 0);
538
539 nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
540
541 LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
542
543 return nrq;
544 }
545
546 /**
547 * Enqueues request \a nrq for later handling, via one one the policies for
548 * which resources where earlier obtained via nrs_resource_get_safe(). The
549 * function attempts to enqueue the request first on the primary policy
550 * (if any), since this is the preferred choice.
551 *
552 * \param nrq the request being enqueued
553 *
554 * \see nrs_resource_get_safe()
555 */
nrs_request_enqueue(struct ptlrpc_nrs_request * nrq)556 static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
557 {
558 struct ptlrpc_nrs_policy *policy;
559 int rc;
560 int i;
561
562 /**
563 * Try in descending order, because the primary policy (if any) is
564 * the preferred choice.
565 */
566 for (i = NRS_RES_MAX - 1; i >= 0; i--) {
567 if (nrq->nr_res_ptrs[i] == NULL)
568 continue;
569
570 nrq->nr_res_idx = i;
571 policy = nrq->nr_res_ptrs[i]->res_policy;
572
573 rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
574 if (rc == 0) {
575 policy->pol_nrs->nrs_req_queued++;
576 policy->pol_req_queued++;
577 return;
578 }
579 }
580 /**
581 * Should never get here, as at least the primary policy's
582 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
583 * succeed.
584 */
585 LBUG();
586 }
587
588 /**
589 * Called when a request has been handled
590 *
591 * \param[in] nrs the request that has been handled; can be used for
592 * job/resource control.
593 *
594 * \see ptlrpc_nrs_req_stop_nolock()
595 */
nrs_request_stop(struct ptlrpc_nrs_request * nrq)596 static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
597 {
598 struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
599
600 if (policy->pol_desc->pd_ops->op_req_stop)
601 policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
602
603 LASSERT(policy->pol_nrs->nrs_req_started > 0);
604 LASSERT(policy->pol_req_started > 0);
605
606 policy->pol_nrs->nrs_req_started--;
607 policy->pol_req_started--;
608 }
609
610 /**
611 * Handler for operations that can be carried out on policies.
612 *
613 * Handles opcodes that are common to all policy types within NRS core, and
614 * passes any unknown opcodes to the policy-specific control function.
615 *
616 * \param[in] nrs the NRS head this policy belongs to.
617 * \param[in] name the human-readable policy name; should be the same as
618 * ptlrpc_nrs_pol_desc::pd_name.
619 * \param[in] opc the opcode of the operation being carried out.
620 * \param[in,out] arg can be used to pass information in and out between when
621 * carrying an operation; usually data that is private to
622 * the policy at some level, or generic policy status
623 * information.
624 *
625 * \retval -ve error condition
626 * \retval 0 operation was carried out successfully
627 */
nrs_policy_ctl(struct ptlrpc_nrs * nrs,char * name,enum ptlrpc_nrs_ctl opc,void * arg)628 static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
629 enum ptlrpc_nrs_ctl opc, void *arg)
630 {
631 struct ptlrpc_nrs_policy *policy;
632 int rc = 0;
633
634 spin_lock(&nrs->nrs_lock);
635
636 policy = nrs_policy_find_locked(nrs, name);
637 if (policy == NULL) {
638 rc = -ENOENT;
639 goto out;
640 }
641
642 switch (opc) {
643 /**
644 * Unknown opcode, pass it down to the policy-specific control
645 * function for handling.
646 */
647 default:
648 rc = nrs_policy_ctl_locked(policy, opc, arg);
649 break;
650
651 /**
652 * Start \e policy
653 */
654 case PTLRPC_NRS_CTL_START:
655 rc = nrs_policy_start_locked(policy);
656 break;
657 }
658 out:
659 if (policy != NULL)
660 nrs_policy_put_locked(policy);
661
662 spin_unlock(&nrs->nrs_lock);
663
664 return rc;
665 }
666
667 /**
668 * Unregisters a policy by name.
669 *
670 * \param[in] nrs the NRS head this policy belongs to.
671 * \param[in] name the human-readable policy name; should be the same as
672 * ptlrpc_nrs_pol_desc::pd_name
673 *
674 * \retval -ve error
675 * \retval 0 success
676 */
nrs_policy_unregister(struct ptlrpc_nrs * nrs,char * name)677 static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
678 {
679 struct ptlrpc_nrs_policy *policy = NULL;
680
681 spin_lock(&nrs->nrs_lock);
682
683 policy = nrs_policy_find_locked(nrs, name);
684 if (policy == NULL) {
685 spin_unlock(&nrs->nrs_lock);
686
687 CERROR("Can't find NRS policy %s\n", name);
688 return -ENOENT;
689 }
690
691 if (policy->pol_ref > 1) {
692 CERROR("Policy %s is busy with %d references\n", name,
693 (int)policy->pol_ref);
694 nrs_policy_put_locked(policy);
695
696 spin_unlock(&nrs->nrs_lock);
697 return -EBUSY;
698 }
699
700 LASSERT(policy->pol_req_queued == 0);
701 LASSERT(policy->pol_req_started == 0);
702
703 if (policy->pol_state != NRS_POL_STATE_STOPPED) {
704 nrs_policy_stop_locked(policy);
705 LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
706 }
707
708 list_del(&policy->pol_list);
709 nrs->nrs_num_pols--;
710
711 nrs_policy_put_locked(policy);
712
713 spin_unlock(&nrs->nrs_lock);
714
715 nrs_policy_fini(policy);
716
717 LASSERT(policy->pol_private == NULL);
718 OBD_FREE_PTR(policy);
719
720 return 0;
721 }
722
723 /**
724 * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
725 *
726 * \param[in] nrs the NRS head on which the policy will be registered.
727 * \param[in] desc the policy descriptor from which the information will be
728 * obtained to register the policy.
729 *
730 * \retval -ve error
731 * \retval 0 success
732 */
nrs_policy_register(struct ptlrpc_nrs * nrs,struct ptlrpc_nrs_pol_desc * desc)733 static int nrs_policy_register(struct ptlrpc_nrs *nrs,
734 struct ptlrpc_nrs_pol_desc *desc)
735 {
736 struct ptlrpc_nrs_policy *policy;
737 struct ptlrpc_nrs_policy *tmp;
738 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
739 int rc;
740
741 LASSERT(svcpt != NULL);
742 LASSERT(desc->pd_ops != NULL);
743 LASSERT(desc->pd_ops->op_res_get != NULL);
744 LASSERT(desc->pd_ops->op_req_get != NULL);
745 LASSERT(desc->pd_ops->op_req_enqueue != NULL);
746 LASSERT(desc->pd_ops->op_req_dequeue != NULL);
747 LASSERT(desc->pd_compat != NULL);
748
749 OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable,
750 svcpt->scp_cpt, sizeof(*policy), GFP_NOFS);
751 if (policy == NULL)
752 return -ENOMEM;
753
754 policy->pol_nrs = nrs;
755 policy->pol_desc = desc;
756 policy->pol_state = NRS_POL_STATE_STOPPED;
757 policy->pol_flags = desc->pd_flags;
758
759 INIT_LIST_HEAD(&policy->pol_list);
760 INIT_LIST_HEAD(&policy->pol_list_queued);
761
762 rc = nrs_policy_init(policy);
763 if (rc != 0) {
764 OBD_FREE_PTR(policy);
765 return rc;
766 }
767
768 spin_lock(&nrs->nrs_lock);
769
770 tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
771 if (tmp != NULL) {
772 CERROR("NRS policy %s has been registered, can't register it for %s\n",
773 policy->pol_desc->pd_name,
774 svcpt->scp_service->srv_name);
775 nrs_policy_put_locked(tmp);
776
777 spin_unlock(&nrs->nrs_lock);
778 nrs_policy_fini(policy);
779 OBD_FREE_PTR(policy);
780
781 return -EEXIST;
782 }
783
784 list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
785 nrs->nrs_num_pols++;
786
787 if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
788 rc = nrs_policy_start_locked(policy);
789
790 spin_unlock(&nrs->nrs_lock);
791
792 if (rc != 0)
793 (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
794
795 return rc;
796 }
797
798 /**
799 * Enqueue request \a req using one of the policies its resources are referring
800 * to.
801 *
802 * \param[in] req the request to enqueue.
803 */
ptlrpc_nrs_req_add_nolock(struct ptlrpc_request * req)804 static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
805 {
806 struct ptlrpc_nrs_policy *policy;
807
808 LASSERT(req->rq_nrq.nr_initialized);
809 LASSERT(!req->rq_nrq.nr_enqueued);
810
811 nrs_request_enqueue(&req->rq_nrq);
812 req->rq_nrq.nr_enqueued = 1;
813
814 policy = nrs_request_policy(&req->rq_nrq);
815 /**
816 * Add the policy to the NRS head's list of policies with enqueued
817 * requests, if it has not been added there.
818 */
819 if (unlikely(list_empty(&policy->pol_list_queued)))
820 list_add_tail(&policy->pol_list_queued,
821 &policy->pol_nrs->nrs_policy_queued);
822 }
823
824 /**
825 * Enqueue a request on the high priority NRS head.
826 *
827 * \param req the request to enqueue.
828 */
ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request * req)829 static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
830 {
831 int opc = lustre_msg_get_opc(req->rq_reqmsg);
832
833 spin_lock(&req->rq_lock);
834 req->rq_hp = 1;
835 ptlrpc_nrs_req_add_nolock(req);
836 if (opc != OBD_PING)
837 DEBUG_REQ(D_NET, req, "high priority req");
838 spin_unlock(&req->rq_lock);
839 }
840
841 /**
842 * Returns a boolean predicate indicating whether the policy described by
843 * \a desc is adequate for use with service \a svc.
844 *
845 * \param[in] svc the service
846 * \param[in] desc the policy descriptor
847 *
848 * \retval false the policy is not compatible with the service
849 * \retval true the policy is compatible with the service
850 */
nrs_policy_compatible(const struct ptlrpc_service * svc,const struct ptlrpc_nrs_pol_desc * desc)851 static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
852 const struct ptlrpc_nrs_pol_desc *desc)
853 {
854 return desc->pd_compat(svc, desc);
855 }
856
857 /**
858 * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
859 * \a nrs.
860 *
861 * \param[in] nrs the NRS head
862 *
863 * \retval -ve error
864 * \retval 0 success
865 *
866 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
867 *
868 * \see ptlrpc_service_nrs_setup()
869 */
nrs_register_policies_locked(struct ptlrpc_nrs * nrs)870 static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
871 {
872 struct ptlrpc_nrs_pol_desc *desc;
873 /* for convenience */
874 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
875 struct ptlrpc_service *svc = svcpt->scp_service;
876 int rc = -EINVAL;
877
878 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
879
880 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
881 if (nrs_policy_compatible(svc, desc)) {
882 rc = nrs_policy_register(nrs, desc);
883 if (rc != 0) {
884 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
885 desc->pd_name, svcpt->scp_cpt,
886 svc->srv_name, rc);
887 /**
888 * Fail registration if any of the policies'
889 * registration fails.
890 */
891 break;
892 }
893 }
894 }
895
896 return rc;
897 }
898
899 /**
900 * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
901 * compatible policies in NRS core, with the NRS head.
902 *
903 * \param[in] nrs the NRS head
904 * \param[in] svcpt the PTLRPC service partition to setup
905 *
906 * \retval -ve error
907 * \retval 0 success
908 *
909 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
910 */
nrs_svcpt_setup_locked0(struct ptlrpc_nrs * nrs,struct ptlrpc_service_part * svcpt)911 static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
912 struct ptlrpc_service_part *svcpt)
913 {
914 enum ptlrpc_nrs_queue_type queue;
915
916 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
917
918 if (nrs == &svcpt->scp_nrs_reg)
919 queue = PTLRPC_NRS_QUEUE_REG;
920 else if (nrs == svcpt->scp_nrs_hp)
921 queue = PTLRPC_NRS_QUEUE_HP;
922 else
923 LBUG();
924
925 nrs->nrs_svcpt = svcpt;
926 nrs->nrs_queue_type = queue;
927 spin_lock_init(&nrs->nrs_lock);
928 INIT_LIST_HEAD(&nrs->nrs_policy_list);
929 INIT_LIST_HEAD(&nrs->nrs_policy_queued);
930
931 return nrs_register_policies_locked(nrs);
932 }
933
934 /**
935 * Allocates a regular and optionally a high-priority NRS head (if the service
936 * handles high-priority RPCs), and then registers all available compatible
937 * policies on those NRS heads.
938 *
939 * \param[in,out] svcpt the PTLRPC service partition to setup
940 *
941 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
942 */
nrs_svcpt_setup_locked(struct ptlrpc_service_part * svcpt)943 static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
944 {
945 struct ptlrpc_nrs *nrs;
946 int rc;
947
948 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
949
950 /**
951 * Initialize the regular NRS head.
952 */
953 nrs = nrs_svcpt2nrs(svcpt, false);
954 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
955 if (rc < 0)
956 goto out;
957
958 /**
959 * Optionally allocate a high-priority NRS head.
960 */
961 if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
962 goto out;
963
964 OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp,
965 svcpt->scp_service->srv_cptable,
966 svcpt->scp_cpt);
967 if (svcpt->scp_nrs_hp == NULL) {
968 rc = -ENOMEM;
969 goto out;
970 }
971
972 nrs = nrs_svcpt2nrs(svcpt, true);
973 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
974
975 out:
976 return rc;
977 }
978
979 /**
980 * Unregisters all policies on all available NRS heads in a service partition;
981 * called at PTLRPC service unregistration time.
982 *
983 * \param[in] svcpt the PTLRPC service partition
984 *
985 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
986 */
nrs_svcpt_cleanup_locked(struct ptlrpc_service_part * svcpt)987 static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
988 {
989 struct ptlrpc_nrs *nrs;
990 struct ptlrpc_nrs_policy *policy;
991 struct ptlrpc_nrs_policy *tmp;
992 int rc;
993 bool hp = false;
994
995 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
996
997 again:
998 nrs = nrs_svcpt2nrs(svcpt, hp);
999 nrs->nrs_stopping = 1;
1000
1001 list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
1002 pol_list) {
1003 rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
1004 LASSERT(rc == 0);
1005 }
1006
1007 /**
1008 * If the service partition has an HP NRS head, clean that up as well.
1009 */
1010 if (!hp && nrs_svcpt_has_hp(svcpt)) {
1011 hp = true;
1012 goto again;
1013 }
1014
1015 if (hp)
1016 OBD_FREE_PTR(nrs);
1017 }
1018
1019 /**
1020 * Returns the descriptor for a policy as identified by by \a name.
1021 *
1022 * \param[in] name the policy name
1023 *
1024 * \retval the policy descriptor
1025 * \retval NULL
1026 */
nrs_policy_find_desc_locked(const char * name)1027 static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1028 {
1029 struct ptlrpc_nrs_pol_desc *tmp;
1030
1031 list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1032 if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1033 return tmp;
1034 }
1035 return NULL;
1036 }
1037
1038 /**
1039 * Removes the policy from all supported NRS heads of all partitions of all
1040 * PTLRPC services.
1041 *
1042 * \param[in] desc the policy descriptor to unregister
1043 *
1044 * \retval -ve error
1045 * \retval 0 successfully unregistered policy on all supported NRS heads
1046 *
1047 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1048 * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1049 */
nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc * desc)1050 static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1051 {
1052 struct ptlrpc_nrs *nrs;
1053 struct ptlrpc_service *svc;
1054 struct ptlrpc_service_part *svcpt;
1055 int i;
1056 int rc = 0;
1057
1058 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1059 LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1060
1061 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1062
1063 if (!nrs_policy_compatible(svc, desc) ||
1064 unlikely(svc->srv_is_stopping))
1065 continue;
1066
1067 ptlrpc_service_for_each_part(svcpt, i, svc) {
1068 bool hp = false;
1069
1070 again:
1071 nrs = nrs_svcpt2nrs(svcpt, hp);
1072 rc = nrs_policy_unregister(nrs, desc->pd_name);
1073 /**
1074 * Ignore -ENOENT as the policy may not have registered
1075 * successfully on all service partitions.
1076 */
1077 if (rc == -ENOENT) {
1078 rc = 0;
1079 } else if (rc != 0) {
1080 CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1081 desc->pd_name, svcpt->scp_cpt,
1082 svcpt->scp_service->srv_name, rc);
1083 return rc;
1084 }
1085
1086 if (!hp && nrs_svc_has_hp(svc)) {
1087 hp = true;
1088 goto again;
1089 }
1090 }
1091
1092 if (desc->pd_ops->op_lprocfs_fini != NULL)
1093 desc->pd_ops->op_lprocfs_fini(svc);
1094 }
1095
1096 return rc;
1097 }
1098
1099 /**
1100 * Registers a new policy with NRS core.
1101 *
1102 * The function will only succeed if policy registration with all compatible
1103 * service partitions (if any) is successful.
1104 *
1105 * N.B. This function should be called either at ptlrpc module initialization
1106 * time when registering a policy that ships with NRS core, or in a
1107 * module's init() function for policies registering from other modules.
1108 *
1109 * \param[in] conf configuration information for the new policy to register
1110 *
1111 * \retval -ve error
1112 * \retval 0 success
1113 */
ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf * conf)1114 int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1115 {
1116 struct ptlrpc_service *svc;
1117 struct ptlrpc_nrs_pol_desc *desc;
1118 int rc = 0;
1119
1120 LASSERT(conf != NULL);
1121 LASSERT(conf->nc_ops != NULL);
1122 LASSERT(conf->nc_compat != NULL);
1123 LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1124 conf->nc_compat_svc_name != NULL));
1125 LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1126 conf->nc_owner != NULL));
1127
1128 conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1129
1130 /**
1131 * External policies are not allowed to start immediately upon
1132 * registration, as there is a relatively higher chance that their
1133 * registration might fail. In such a case, some policy instances may
1134 * already have requests queued wen unregistration needs to happen as
1135 * part o cleanup; since there is currently no way to drain requests
1136 * from a policy unless the service is unregistering, we just disallow
1137 * this.
1138 */
1139 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1140 (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1141 PTLRPC_NRS_FL_REG_START))) {
1142 CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1143 conf->nc_name);
1144 return -EINVAL;
1145 }
1146
1147 mutex_lock(&nrs_core.nrs_mutex);
1148
1149 if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
1150 CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1151 conf->nc_name);
1152 rc = -EEXIST;
1153 goto fail;
1154 }
1155
1156 OBD_ALLOC_PTR(desc);
1157 if (desc == NULL) {
1158 rc = -ENOMEM;
1159 goto fail;
1160 }
1161
1162 strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
1163 desc->pd_ops = conf->nc_ops;
1164 desc->pd_compat = conf->nc_compat;
1165 desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1166 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1167 desc->pd_owner = conf->nc_owner;
1168 desc->pd_flags = conf->nc_flags;
1169 atomic_set(&desc->pd_refs, 0);
1170
1171 /**
1172 * For policies that are held in the same module as NRS (currently
1173 * ptlrpc), do not register the policy with all compatible services,
1174 * as the services will not have started at this point, since we are
1175 * calling from ptlrpc module initialization code. In such cases each
1176 * service will register all compatible policies later, via
1177 * ptlrpc_service_nrs_setup().
1178 */
1179 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1180 goto internal;
1181
1182 /**
1183 * Register the new policy on all compatible services
1184 */
1185 mutex_lock(&ptlrpc_all_services_mutex);
1186
1187 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1188 struct ptlrpc_service_part *svcpt;
1189 int i;
1190 int rc2;
1191
1192 if (!nrs_policy_compatible(svc, desc) ||
1193 unlikely(svc->srv_is_stopping))
1194 continue;
1195
1196 ptlrpc_service_for_each_part(svcpt, i, svc) {
1197 struct ptlrpc_nrs *nrs;
1198 bool hp = false;
1199 again:
1200 nrs = nrs_svcpt2nrs(svcpt, hp);
1201 rc = nrs_policy_register(nrs, desc);
1202 if (rc != 0) {
1203 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1204 desc->pd_name, svcpt->scp_cpt,
1205 svcpt->scp_service->srv_name, rc);
1206
1207 rc2 = nrs_policy_unregister_locked(desc);
1208 /**
1209 * Should not fail at this point
1210 */
1211 LASSERT(rc2 == 0);
1212 mutex_unlock(&ptlrpc_all_services_mutex);
1213 OBD_FREE_PTR(desc);
1214 goto fail;
1215 }
1216
1217 if (!hp && nrs_svc_has_hp(svc)) {
1218 hp = true;
1219 goto again;
1220 }
1221 }
1222
1223 /**
1224 * No need to take a reference to other modules here, as we
1225 * will be calling from the module's init() function.
1226 */
1227 if (desc->pd_ops->op_lprocfs_init != NULL) {
1228 rc = desc->pd_ops->op_lprocfs_init(svc);
1229 if (rc != 0) {
1230 rc2 = nrs_policy_unregister_locked(desc);
1231 /**
1232 * Should not fail at this point
1233 */
1234 LASSERT(rc2 == 0);
1235 mutex_unlock(&ptlrpc_all_services_mutex);
1236 OBD_FREE_PTR(desc);
1237 goto fail;
1238 }
1239 }
1240 }
1241
1242 mutex_unlock(&ptlrpc_all_services_mutex);
1243 internal:
1244 list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1245 fail:
1246 mutex_unlock(&nrs_core.nrs_mutex);
1247
1248 return rc;
1249 }
1250 EXPORT_SYMBOL(ptlrpc_nrs_policy_register);
1251
1252 /**
1253 * Unregisters a previously registered policy with NRS core. All instances of
1254 * the policy on all NRS heads of all supported services are removed.
1255 *
1256 * N.B. This function should only be called from a module's exit() function.
1257 * Although it can be used for policies that ship alongside NRS core, the
1258 * function is primarily intended for policies that register externally,
1259 * from other modules.
1260 *
1261 * \param[in] conf configuration information for the policy to unregister
1262 *
1263 * \retval -ve error
1264 * \retval 0 success
1265 */
ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf * conf)1266 int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf)
1267 {
1268 struct ptlrpc_nrs_pol_desc *desc;
1269 int rc;
1270
1271 LASSERT(conf != NULL);
1272
1273 if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) {
1274 CERROR("Unable to unregister a fallback policy, unless the PTLRPC service is stopping.\n");
1275 return -EPERM;
1276 }
1277
1278 conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1279
1280 mutex_lock(&nrs_core.nrs_mutex);
1281
1282 desc = nrs_policy_find_desc_locked(conf->nc_name);
1283 if (desc == NULL) {
1284 CERROR("Failing to unregister NRS policy %s which has not been registered with NRS core!\n",
1285 conf->nc_name);
1286 rc = -ENOENT;
1287 goto not_exist;
1288 }
1289
1290 mutex_lock(&ptlrpc_all_services_mutex);
1291
1292 rc = nrs_policy_unregister_locked(desc);
1293 if (rc < 0) {
1294 if (rc == -EBUSY)
1295 CERROR("Please first stop policy %s on all service partitions and then retry to unregister the policy.\n",
1296 conf->nc_name);
1297 goto fail;
1298 }
1299
1300 CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n",
1301 conf->nc_name);
1302
1303 list_del(&desc->pd_list);
1304 OBD_FREE_PTR(desc);
1305
1306 fail:
1307 mutex_unlock(&ptlrpc_all_services_mutex);
1308
1309 not_exist:
1310 mutex_unlock(&nrs_core.nrs_mutex);
1311
1312 return rc;
1313 }
1314 EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister);
1315
1316 /**
1317 * Setup NRS heads on all service partitions of service \a svc, and register
1318 * all compatible policies on those NRS heads.
1319 *
1320 * To be called from within ptl
1321 * \param[in] svc the service to setup
1322 *
1323 * \retval -ve error, the calling logic should eventually call
1324 * ptlrpc_service_nrs_cleanup() to undo any work performed
1325 * by this function.
1326 *
1327 * \see ptlrpc_register_service()
1328 * \see ptlrpc_service_nrs_cleanup()
1329 */
ptlrpc_service_nrs_setup(struct ptlrpc_service * svc)1330 int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1331 {
1332 struct ptlrpc_service_part *svcpt;
1333 const struct ptlrpc_nrs_pol_desc *desc;
1334 int i;
1335 int rc = 0;
1336
1337 mutex_lock(&nrs_core.nrs_mutex);
1338
1339 /**
1340 * Initialize NRS heads on all service CPTs.
1341 */
1342 ptlrpc_service_for_each_part(svcpt, i, svc) {
1343 rc = nrs_svcpt_setup_locked(svcpt);
1344 if (rc != 0)
1345 goto failed;
1346 }
1347
1348 /**
1349 * Set up lprocfs interfaces for all supported policies for the
1350 * service.
1351 */
1352 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1353 if (!nrs_policy_compatible(svc, desc))
1354 continue;
1355
1356 if (desc->pd_ops->op_lprocfs_init != NULL) {
1357 rc = desc->pd_ops->op_lprocfs_init(svc);
1358 if (rc != 0)
1359 goto failed;
1360 }
1361 }
1362
1363 failed:
1364
1365 mutex_unlock(&nrs_core.nrs_mutex);
1366
1367 return rc;
1368 }
1369
1370 /**
1371 * Unregisters all policies on all service partitions of service \a svc.
1372 *
1373 * \param[in] svc the PTLRPC service to unregister
1374 */
ptlrpc_service_nrs_cleanup(struct ptlrpc_service * svc)1375 void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1376 {
1377 struct ptlrpc_service_part *svcpt;
1378 const struct ptlrpc_nrs_pol_desc *desc;
1379 int i;
1380
1381 mutex_lock(&nrs_core.nrs_mutex);
1382
1383 /**
1384 * Clean up NRS heads on all service partitions
1385 */
1386 ptlrpc_service_for_each_part(svcpt, i, svc)
1387 nrs_svcpt_cleanup_locked(svcpt);
1388
1389 /**
1390 * Clean up lprocfs interfaces for all supported policies for the
1391 * service.
1392 */
1393 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1394 if (!nrs_policy_compatible(svc, desc))
1395 continue;
1396
1397 if (desc->pd_ops->op_lprocfs_fini != NULL)
1398 desc->pd_ops->op_lprocfs_fini(svc);
1399 }
1400
1401 mutex_unlock(&nrs_core.nrs_mutex);
1402 }
1403
1404 /**
1405 * Obtains NRS head resources for request \a req.
1406 *
1407 * These could be either on the regular or HP NRS head of \a svcpt; resources
1408 * taken on the regular head can later be swapped for HP head resources by
1409 * ldlm_lock_reorder_req().
1410 *
1411 * \param[in] svcpt the service partition
1412 * \param[in] req the request
1413 * \param[in] hp which NRS head of \a svcpt to use
1414 */
ptlrpc_nrs_req_initialize(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1415 void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1416 struct ptlrpc_request *req, bool hp)
1417 {
1418 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1419
1420 memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1421 nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1422 false);
1423
1424 /**
1425 * It is fine to access \e nr_initialized without locking as there is
1426 * no contention at this early stage.
1427 */
1428 req->rq_nrq.nr_initialized = 1;
1429 }
1430
1431 /**
1432 * Releases resources for a request; is called after the request has been
1433 * handled.
1434 *
1435 * \param[in] req the request
1436 *
1437 * \see ptlrpc_server_finish_request()
1438 */
ptlrpc_nrs_req_finalize(struct ptlrpc_request * req)1439 void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1440 {
1441 if (req->rq_nrq.nr_initialized) {
1442 nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1443 /* no protection on bit nr_initialized because no
1444 * contention at this late stage */
1445 req->rq_nrq.nr_finalized = 1;
1446 }
1447 }
1448
ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request * req)1449 void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1450 {
1451 if (req->rq_nrq.nr_started)
1452 nrs_request_stop(&req->rq_nrq);
1453 }
1454
1455 /**
1456 * Enqueues request \a req on either the regular or high-priority NRS head
1457 * of service partition \a svcpt.
1458 *
1459 * \param[in] svcpt the service partition
1460 * \param[in] req the request to be enqueued
1461 * \param[in] hp whether to enqueue the request on the regular or
1462 * high-priority NRS head.
1463 */
ptlrpc_nrs_req_add(struct ptlrpc_service_part * svcpt,struct ptlrpc_request * req,bool hp)1464 void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1465 struct ptlrpc_request *req, bool hp)
1466 {
1467 spin_lock(&svcpt->scp_req_lock);
1468
1469 if (hp)
1470 ptlrpc_nrs_hpreq_add_nolock(req);
1471 else
1472 ptlrpc_nrs_req_add_nolock(req);
1473
1474 spin_unlock(&svcpt->scp_req_lock);
1475 }
1476
nrs_request_removed(struct ptlrpc_nrs_policy * policy)1477 static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1478 {
1479 LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1480 LASSERT(policy->pol_req_queued > 0);
1481
1482 policy->pol_nrs->nrs_req_queued--;
1483 policy->pol_req_queued--;
1484
1485 /**
1486 * If the policy has no more requests queued, remove it from
1487 * ptlrpc_nrs::nrs_policy_queued.
1488 */
1489 if (unlikely(policy->pol_req_queued == 0)) {
1490 list_del_init(&policy->pol_list_queued);
1491
1492 /**
1493 * If there are other policies with queued requests, move the
1494 * current policy to the end so that we can round robin over
1495 * all policies and drain the requests.
1496 */
1497 } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1498 LASSERT(policy->pol_req_queued <
1499 policy->pol_nrs->nrs_req_queued);
1500
1501 list_move_tail(&policy->pol_list_queued,
1502 &policy->pol_nrs->nrs_policy_queued);
1503 }
1504 }
1505
1506 /**
1507 * Obtains a request for handling from an NRS head of service partition
1508 * \a svcpt.
1509 *
1510 * \param[in] svcpt the service partition
1511 * \param[in] hp whether to obtain a request from the regular or
1512 * high-priority NRS head.
1513 * \param[in] peek when set, signifies that we just want to examine the
1514 * request, and not handle it, so the request is not removed
1515 * from the policy.
1516 * \param[in] force when set, it will force a policy to return a request if it
1517 * has one pending
1518 *
1519 * \retval the request to be handled
1520 * \retval NULL the head has no requests to serve
1521 */
1522 struct ptlrpc_request *
ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part * svcpt,bool hp,bool peek,bool force)1523 ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1524 bool peek, bool force)
1525 {
1526 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1527 struct ptlrpc_nrs_policy *policy;
1528 struct ptlrpc_nrs_request *nrq;
1529
1530 /**
1531 * Always try to drain requests from all NRS polices even if they are
1532 * inactive, because the user can change policy status at runtime.
1533 */
1534 list_for_each_entry(policy, &nrs->nrs_policy_queued,
1535 pol_list_queued) {
1536 nrq = nrs_request_get(policy, peek, force);
1537 if (nrq != NULL) {
1538 if (likely(!peek)) {
1539 nrq->nr_started = 1;
1540
1541 policy->pol_req_started++;
1542 policy->pol_nrs->nrs_req_started++;
1543
1544 nrs_request_removed(policy);
1545 }
1546
1547 return container_of(nrq, struct ptlrpc_request, rq_nrq);
1548 }
1549 }
1550
1551 return NULL;
1552 }
1553
1554 /**
1555 * Dequeues request \a req from the policy it has been enqueued on.
1556 *
1557 * \param[in] req the request
1558 */
ptlrpc_nrs_req_del_nolock(struct ptlrpc_request * req)1559 void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req)
1560 {
1561 struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq);
1562
1563 policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq);
1564
1565 req->rq_nrq.nr_enqueued = 0;
1566
1567 nrs_request_removed(policy);
1568 }
1569
1570 /**
1571 * Returns whether there are any requests currently enqueued on any of the
1572 * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1573 * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1574 * result.
1575 *
1576 * \param[in] svcpt the service partition to enquire.
1577 * \param[in] hp whether the regular or high-priority NRS head is to be
1578 * enquired.
1579 *
1580 * \retval false the indicated NRS head has no enqueued requests.
1581 * \retval true the indicated NRS head has some enqueued requests.
1582 */
ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part * svcpt,bool hp)1583 bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1584 {
1585 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1586
1587 return nrs->nrs_req_queued > 0;
1588 };
1589
1590 /**
1591 * Moves request \a req from the regular to the high-priority NRS head.
1592 *
1593 * \param[in] req the request to move
1594 */
ptlrpc_nrs_req_hp_move(struct ptlrpc_request * req)1595 void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req)
1596 {
1597 struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
1598 struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
1599 struct ptlrpc_nrs_resource *res1[NRS_RES_MAX];
1600 struct ptlrpc_nrs_resource *res2[NRS_RES_MAX];
1601
1602 /**
1603 * Obtain the high-priority NRS head resources.
1604 */
1605 nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true);
1606
1607 spin_lock(&svcpt->scp_req_lock);
1608
1609 if (!ptlrpc_nrs_req_can_move(req))
1610 goto out;
1611
1612 ptlrpc_nrs_req_del_nolock(req);
1613
1614 memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0]));
1615 memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0]));
1616
1617 ptlrpc_nrs_hpreq_add_nolock(req);
1618
1619 memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0]));
1620 out:
1621 spin_unlock(&svcpt->scp_req_lock);
1622
1623 /**
1624 * Release either the regular NRS head resources if we moved the
1625 * request, or the high-priority NRS head resources if we took a
1626 * reference earlier in this function and ptlrpc_nrs_req_can_move()
1627 * returned false.
1628 */
1629 nrs_resource_put_safe(res1);
1630 }
1631
1632 /**
1633 * Carries out a control operation \a opc on the policy identified by the
1634 * human-readable \a name, on either all partitions, or only on the first
1635 * partition of service \a svc.
1636 *
1637 * \param[in] svc the service the policy belongs to.
1638 * \param[in] queue whether to carry out the command on the policy which
1639 * belongs to the regular, high-priority, or both NRS
1640 * heads of service partitions of \a svc.
1641 * \param[in] name the policy to act upon, by human-readable name
1642 * \param[in] opc the opcode of the operation to carry out
1643 * \param[in] single when set, the operation will only be carried out on the
1644 * NRS heads of the first service partition of \a svc.
1645 * This is useful for some policies which e.g. share
1646 * identical values on the same parameters of different
1647 * service partitions; when reading these parameters via
1648 * lprocfs, these policies may just want to obtain and
1649 * print out the values from the first service partition.
1650 * Storing these values centrally elsewhere then could be
1651 * another solution for this.
1652 * \param[in,out] arg can be used as a generic in/out buffer between control
1653 * operations and the user environment.
1654 *
1655 *\retval -ve error condition
1656 *\retval 0 operation was carried out successfully
1657 */
ptlrpc_nrs_policy_control(const struct ptlrpc_service * svc,enum ptlrpc_nrs_queue_type queue,char * name,enum ptlrpc_nrs_ctl opc,bool single,void * arg)1658 int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1659 enum ptlrpc_nrs_queue_type queue, char *name,
1660 enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1661 {
1662 struct ptlrpc_service_part *svcpt;
1663 int i;
1664 int rc = 0;
1665
1666 LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1667
1668 if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1669 return -EINVAL;
1670
1671 ptlrpc_service_for_each_part(svcpt, i, svc) {
1672 if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1673 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1674 opc, arg);
1675 if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1676 single))
1677 goto out;
1678 }
1679
1680 if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1681 /**
1682 * XXX: We could optionally check for
1683 * nrs_svc_has_hp(svc) here, and return an error if it
1684 * is false. Right now we rely on the policies' lprocfs
1685 * handlers that call the present function to make this
1686 * check; if they fail to do so, they might hit the
1687 * assertion inside nrs_svcpt2nrs() below.
1688 */
1689 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1690 opc, arg);
1691 if (rc != 0 || single)
1692 goto out;
1693 }
1694 }
1695 out:
1696 return rc;
1697 }
1698
1699
1700 /* ptlrpc/nrs_fifo.c */
1701 extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
1702
1703 /**
1704 * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1705 * policies \e nrs_core.nrs_policies.
1706 *
1707 * \retval 0 all policies have been registered successfully
1708 * \retval -ve error
1709 */
ptlrpc_nrs_init(void)1710 int ptlrpc_nrs_init(void)
1711 {
1712 int rc;
1713
1714 mutex_init(&nrs_core.nrs_mutex);
1715 INIT_LIST_HEAD(&nrs_core.nrs_policies);
1716
1717 rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1718 if (rc != 0)
1719 goto fail;
1720
1721
1722 return rc;
1723 fail:
1724 /**
1725 * Since no PTLRPC services have been started at this point, all we need
1726 * to do for cleanup is to free the descriptors.
1727 */
1728 ptlrpc_nrs_fini();
1729
1730 return rc;
1731 }
1732
1733 /**
1734 * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1735 * policy descriptors.
1736 *
1737 * Since all PTLRPC services are stopped at this point, there are no more
1738 * instances of any policies, because each service will have stopped its policy
1739 * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1740 * descriptors here.
1741 */
ptlrpc_nrs_fini(void)1742 void ptlrpc_nrs_fini(void)
1743 {
1744 struct ptlrpc_nrs_pol_desc *desc;
1745 struct ptlrpc_nrs_pol_desc *tmp;
1746
1747 list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
1748 pd_list) {
1749 list_del_init(&desc->pd_list);
1750 OBD_FREE_PTR(desc);
1751 }
1752 }
1753
1754 /** @} nrs */
1755