root/drivers/infiniband/hw/hfi1/affinity.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_cpu_mask_set
  2. _cpu_mask_set_gen_inc
  3. _cpu_mask_set_gen_dec
  4. cpu_mask_set_get_first
  5. cpu_mask_set_put
  6. init_real_cpu_mask
  7. node_affinity_init
  8. node_affinity_destroy
  9. node_affinity_destroy_all
  10. node_affinity_allocate
  11. node_affinity_add_tail
  12. node_affinity_lookup
  13. per_cpu_affinity_get
  14. per_cpu_affinity_put_max
  15. _dev_comp_vect_cpu_get
  16. _dev_comp_vect_cpu_put
  17. _dev_comp_vect_mappings_destroy
  18. _dev_comp_vect_mappings_create
  19. hfi1_comp_vectors_set_up
  20. hfi1_comp_vectors_clean_up
  21. hfi1_comp_vect_mappings_lookup
  22. _dev_comp_vect_cpu_mask_init
  23. _dev_comp_vect_cpu_mask_clean_up
  24. hfi1_dev_affinity_init
  25. hfi1_dev_affinity_clean_up
  26. hfi1_update_sdma_affinity
  27. hfi1_irq_notifier_notify
  28. hfi1_irq_notifier_release
  29. hfi1_setup_sdma_notifier
  30. hfi1_cleanup_sdma_notifier
  31. get_irq_affinity
  32. hfi1_get_irq_affinity
  33. hfi1_put_irq_affinity
  34. find_hw_thread_mask
  35. hfi1_get_proc_affinity
  36. hfi1_put_proc_affinity

   1 /*
   2  * Copyright(c) 2015 - 2018 Intel Corporation.
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of version 2 of the GNU General Public License as
  11  * published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * General Public License for more details.
  17  *
  18  * BSD LICENSE
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  *
  24  *  - Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  *  - Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in
  28  *    the documentation and/or other materials provided with the
  29  *    distribution.
  30  *  - Neither the name of Intel Corporation nor the names of its
  31  *    contributors may be used to endorse or promote products derived
  32  *    from this software without specific prior written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45  *
  46  */
  47 #include <linux/topology.h>
  48 #include <linux/cpumask.h>
  49 #include <linux/module.h>
  50 #include <linux/interrupt.h>
  51 #include <linux/numa.h>
  52 
  53 #include "hfi.h"
  54 #include "affinity.h"
  55 #include "sdma.h"
  56 #include "trace.h"
  57 
  58 struct hfi1_affinity_node_list node_affinity = {
  59         .list = LIST_HEAD_INIT(node_affinity.list),
  60         .lock = __MUTEX_INITIALIZER(node_affinity.lock)
  61 };
  62 
  63 /* Name of IRQ types, indexed by enum irq_type */
  64 static const char * const irq_type_names[] = {
  65         "SDMA",
  66         "RCVCTXT",
  67         "GENERAL",
  68         "OTHER",
  69 };
  70 
  71 /* Per NUMA node count of HFI devices */
  72 static unsigned int *hfi1_per_node_cntr;
  73 
  74 static inline void init_cpu_mask_set(struct cpu_mask_set *set)
  75 {
  76         cpumask_clear(&set->mask);
  77         cpumask_clear(&set->used);
  78         set->gen = 0;
  79 }
  80 
  81 /* Increment generation of CPU set if needed */
  82 static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
  83 {
  84         if (cpumask_equal(&set->mask, &set->used)) {
  85                 /*
  86                  * We've used up all the CPUs, bump up the generation
  87                  * and reset the 'used' map
  88                  */
  89                 set->gen++;
  90                 cpumask_clear(&set->used);
  91         }
  92 }
  93 
  94 static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
  95 {
  96         if (cpumask_empty(&set->used) && set->gen) {
  97                 set->gen--;
  98                 cpumask_copy(&set->used, &set->mask);
  99         }
 100 }
 101 
 102 /* Get the first CPU from the list of unused CPUs in a CPU set data structure */
 103 static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
 104 {
 105         int cpu;
 106 
 107         if (!diff || !set)
 108                 return -EINVAL;
 109 
 110         _cpu_mask_set_gen_inc(set);
 111 
 112         /* Find out CPUs left in CPU mask */
 113         cpumask_andnot(diff, &set->mask, &set->used);
 114 
 115         cpu = cpumask_first(diff);
 116         if (cpu >= nr_cpu_ids) /* empty */
 117                 cpu = -EINVAL;
 118         else
 119                 cpumask_set_cpu(cpu, &set->used);
 120 
 121         return cpu;
 122 }
 123 
 124 static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
 125 {
 126         if (!set)
 127                 return;
 128 
 129         cpumask_clear_cpu(cpu, &set->used);
 130         _cpu_mask_set_gen_dec(set);
 131 }
 132 
 133 /* Initialize non-HT cpu cores mask */
 134 void init_real_cpu_mask(void)
 135 {
 136         int possible, curr_cpu, i, ht;
 137 
 138         cpumask_clear(&node_affinity.real_cpu_mask);
 139 
 140         /* Start with cpu online mask as the real cpu mask */
 141         cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
 142 
 143         /*
 144          * Remove HT cores from the real cpu mask.  Do this in two steps below.
 145          */
 146         possible = cpumask_weight(&node_affinity.real_cpu_mask);
 147         ht = cpumask_weight(topology_sibling_cpumask(
 148                                 cpumask_first(&node_affinity.real_cpu_mask)));
 149         /*
 150          * Step 1.  Skip over the first N HT siblings and use them as the
 151          * "real" cores.  Assumes that HT cores are not enumerated in
 152          * succession (except in the single core case).
 153          */
 154         curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
 155         for (i = 0; i < possible / ht; i++)
 156                 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
 157         /*
 158          * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
 159          * skip any gaps.
 160          */
 161         for (; i < possible; i++) {
 162                 cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
 163                 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
 164         }
 165 }
 166 
 167 int node_affinity_init(void)
 168 {
 169         int node;
 170         struct pci_dev *dev = NULL;
 171         const struct pci_device_id *ids = hfi1_pci_tbl;
 172 
 173         cpumask_clear(&node_affinity.proc.used);
 174         cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
 175 
 176         node_affinity.proc.gen = 0;
 177         node_affinity.num_core_siblings =
 178                                 cpumask_weight(topology_sibling_cpumask(
 179                                         cpumask_first(&node_affinity.proc.mask)
 180                                         ));
 181         node_affinity.num_possible_nodes = num_possible_nodes();
 182         node_affinity.num_online_nodes = num_online_nodes();
 183         node_affinity.num_online_cpus = num_online_cpus();
 184 
 185         /*
 186          * The real cpu mask is part of the affinity struct but it has to be
 187          * initialized early. It is needed to calculate the number of user
 188          * contexts in set_up_context_variables().
 189          */
 190         init_real_cpu_mask();
 191 
 192         hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
 193                                      sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
 194         if (!hfi1_per_node_cntr)
 195                 return -ENOMEM;
 196 
 197         while (ids->vendor) {
 198                 dev = NULL;
 199                 while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
 200                         node = pcibus_to_node(dev->bus);
 201                         if (node < 0)
 202                                 goto out;
 203 
 204                         hfi1_per_node_cntr[node]++;
 205                 }
 206                 ids++;
 207         }
 208 
 209         return 0;
 210 
 211 out:
 212         /*
 213          * Invalid PCI NUMA node information found, note it, and populate
 214          * our database 1:1.
 215          */
 216         pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
 217         pr_err("HFI: System BIOS may need to be upgraded\n");
 218         for (node = 0; node < node_affinity.num_possible_nodes; node++)
 219                 hfi1_per_node_cntr[node] = 1;
 220 
 221         return 0;
 222 }
 223 
 224 static void node_affinity_destroy(struct hfi1_affinity_node *entry)
 225 {
 226         free_percpu(entry->comp_vect_affinity);
 227         kfree(entry);
 228 }
 229 
 230 void node_affinity_destroy_all(void)
 231 {
 232         struct list_head *pos, *q;
 233         struct hfi1_affinity_node *entry;
 234 
 235         mutex_lock(&node_affinity.lock);
 236         list_for_each_safe(pos, q, &node_affinity.list) {
 237                 entry = list_entry(pos, struct hfi1_affinity_node,
 238                                    list);
 239                 list_del(pos);
 240                 node_affinity_destroy(entry);
 241         }
 242         mutex_unlock(&node_affinity.lock);
 243         kfree(hfi1_per_node_cntr);
 244 }
 245 
 246 static struct hfi1_affinity_node *node_affinity_allocate(int node)
 247 {
 248         struct hfi1_affinity_node *entry;
 249 
 250         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 251         if (!entry)
 252                 return NULL;
 253         entry->node = node;
 254         entry->comp_vect_affinity = alloc_percpu(u16);
 255         INIT_LIST_HEAD(&entry->list);
 256 
 257         return entry;
 258 }
 259 
 260 /*
 261  * It appends an entry to the list.
 262  * It *must* be called with node_affinity.lock held.
 263  */
 264 static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
 265 {
 266         list_add_tail(&entry->list, &node_affinity.list);
 267 }
 268 
 269 /* It must be called with node_affinity.lock held */
 270 static struct hfi1_affinity_node *node_affinity_lookup(int node)
 271 {
 272         struct list_head *pos;
 273         struct hfi1_affinity_node *entry;
 274 
 275         list_for_each(pos, &node_affinity.list) {
 276                 entry = list_entry(pos, struct hfi1_affinity_node, list);
 277                 if (entry->node == node)
 278                         return entry;
 279         }
 280 
 281         return NULL;
 282 }
 283 
 284 static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
 285                                 u16 __percpu *comp_vect_affinity)
 286 {
 287         int curr_cpu;
 288         u16 cntr;
 289         u16 prev_cntr;
 290         int ret_cpu;
 291 
 292         if (!possible_cpumask) {
 293                 ret_cpu = -EINVAL;
 294                 goto fail;
 295         }
 296 
 297         if (!comp_vect_affinity) {
 298                 ret_cpu = -EINVAL;
 299                 goto fail;
 300         }
 301 
 302         ret_cpu = cpumask_first(possible_cpumask);
 303         if (ret_cpu >= nr_cpu_ids) {
 304                 ret_cpu = -EINVAL;
 305                 goto fail;
 306         }
 307 
 308         prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
 309         for_each_cpu(curr_cpu, possible_cpumask) {
 310                 cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
 311 
 312                 if (cntr < prev_cntr) {
 313                         ret_cpu = curr_cpu;
 314                         prev_cntr = cntr;
 315                 }
 316         }
 317 
 318         *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
 319 
 320 fail:
 321         return ret_cpu;
 322 }
 323 
 324 static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
 325                                     u16 __percpu *comp_vect_affinity)
 326 {
 327         int curr_cpu;
 328         int max_cpu;
 329         u16 cntr;
 330         u16 prev_cntr;
 331 
 332         if (!possible_cpumask)
 333                 return -EINVAL;
 334 
 335         if (!comp_vect_affinity)
 336                 return -EINVAL;
 337 
 338         max_cpu = cpumask_first(possible_cpumask);
 339         if (max_cpu >= nr_cpu_ids)
 340                 return -EINVAL;
 341 
 342         prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
 343         for_each_cpu(curr_cpu, possible_cpumask) {
 344                 cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
 345 
 346                 if (cntr > prev_cntr) {
 347                         max_cpu = curr_cpu;
 348                         prev_cntr = cntr;
 349                 }
 350         }
 351 
 352         *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
 353 
 354         return max_cpu;
 355 }
 356 
 357 /*
 358  * Non-interrupt CPUs are used first, then interrupt CPUs.
 359  * Two already allocated cpu masks must be passed.
 360  */
 361 static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
 362                                   struct hfi1_affinity_node *entry,
 363                                   cpumask_var_t non_intr_cpus,
 364                                   cpumask_var_t available_cpus)
 365         __must_hold(&node_affinity.lock)
 366 {
 367         int cpu;
 368         struct cpu_mask_set *set = dd->comp_vect;
 369 
 370         lockdep_assert_held(&node_affinity.lock);
 371         if (!non_intr_cpus) {
 372                 cpu = -1;
 373                 goto fail;
 374         }
 375 
 376         if (!available_cpus) {
 377                 cpu = -1;
 378                 goto fail;
 379         }
 380 
 381         /* Available CPUs for pinning completion vectors */
 382         _cpu_mask_set_gen_inc(set);
 383         cpumask_andnot(available_cpus, &set->mask, &set->used);
 384 
 385         /* Available CPUs without SDMA engine interrupts */
 386         cpumask_andnot(non_intr_cpus, available_cpus,
 387                        &entry->def_intr.used);
 388 
 389         /* If there are non-interrupt CPUs available, use them first */
 390         if (!cpumask_empty(non_intr_cpus))
 391                 cpu = cpumask_first(non_intr_cpus);
 392         else /* Otherwise, use interrupt CPUs */
 393                 cpu = cpumask_first(available_cpus);
 394 
 395         if (cpu >= nr_cpu_ids) { /* empty */
 396                 cpu = -1;
 397                 goto fail;
 398         }
 399         cpumask_set_cpu(cpu, &set->used);
 400 
 401 fail:
 402         return cpu;
 403 }
 404 
 405 static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
 406 {
 407         struct cpu_mask_set *set = dd->comp_vect;
 408 
 409         if (cpu < 0)
 410                 return;
 411 
 412         cpu_mask_set_put(set, cpu);
 413 }
 414 
 415 /* _dev_comp_vect_mappings_destroy() is reentrant */
 416 static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
 417 {
 418         int i, cpu;
 419 
 420         if (!dd->comp_vect_mappings)
 421                 return;
 422 
 423         for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
 424                 cpu = dd->comp_vect_mappings[i];
 425                 _dev_comp_vect_cpu_put(dd, cpu);
 426                 dd->comp_vect_mappings[i] = -1;
 427                 hfi1_cdbg(AFFINITY,
 428                           "[%s] Release CPU %d from completion vector %d",
 429                           rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
 430         }
 431 
 432         kfree(dd->comp_vect_mappings);
 433         dd->comp_vect_mappings = NULL;
 434 }
 435 
 436 /*
 437  * This function creates the table for looking up CPUs for completion vectors.
 438  * num_comp_vectors needs to have been initilized before calling this function.
 439  */
 440 static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
 441                                           struct hfi1_affinity_node *entry)
 442         __must_hold(&node_affinity.lock)
 443 {
 444         int i, cpu, ret;
 445         cpumask_var_t non_intr_cpus;
 446         cpumask_var_t available_cpus;
 447 
 448         lockdep_assert_held(&node_affinity.lock);
 449 
 450         if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
 451                 return -ENOMEM;
 452 
 453         if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
 454                 free_cpumask_var(non_intr_cpus);
 455                 return -ENOMEM;
 456         }
 457 
 458         dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
 459                                          sizeof(*dd->comp_vect_mappings),
 460                                          GFP_KERNEL);
 461         if (!dd->comp_vect_mappings) {
 462                 ret = -ENOMEM;
 463                 goto fail;
 464         }
 465         for (i = 0; i < dd->comp_vect_possible_cpus; i++)
 466                 dd->comp_vect_mappings[i] = -1;
 467 
 468         for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
 469                 cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
 470                                              available_cpus);
 471                 if (cpu < 0) {
 472                         ret = -EINVAL;
 473                         goto fail;
 474                 }
 475 
 476                 dd->comp_vect_mappings[i] = cpu;
 477                 hfi1_cdbg(AFFINITY,
 478                           "[%s] Completion Vector %d -> CPU %d",
 479                           rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
 480         }
 481 
 482         free_cpumask_var(available_cpus);
 483         free_cpumask_var(non_intr_cpus);
 484         return 0;
 485 
 486 fail:
 487         free_cpumask_var(available_cpus);
 488         free_cpumask_var(non_intr_cpus);
 489         _dev_comp_vect_mappings_destroy(dd);
 490 
 491         return ret;
 492 }
 493 
 494 int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
 495 {
 496         int ret;
 497         struct hfi1_affinity_node *entry;
 498 
 499         mutex_lock(&node_affinity.lock);
 500         entry = node_affinity_lookup(dd->node);
 501         if (!entry) {
 502                 ret = -EINVAL;
 503                 goto unlock;
 504         }
 505         ret = _dev_comp_vect_mappings_create(dd, entry);
 506 unlock:
 507         mutex_unlock(&node_affinity.lock);
 508 
 509         return ret;
 510 }
 511 
 512 void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
 513 {
 514         _dev_comp_vect_mappings_destroy(dd);
 515 }
 516 
 517 int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
 518 {
 519         struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
 520         struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
 521 
 522         if (!dd->comp_vect_mappings)
 523                 return -EINVAL;
 524         if (comp_vect >= dd->comp_vect_possible_cpus)
 525                 return -EINVAL;
 526 
 527         return dd->comp_vect_mappings[comp_vect];
 528 }
 529 
 530 /*
 531  * It assumes dd->comp_vect_possible_cpus is available.
 532  */
 533 static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
 534                                         struct hfi1_affinity_node *entry,
 535                                         bool first_dev_init)
 536         __must_hold(&node_affinity.lock)
 537 {
 538         int i, j, curr_cpu;
 539         int possible_cpus_comp_vect = 0;
 540         struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
 541 
 542         lockdep_assert_held(&node_affinity.lock);
 543         /*
 544          * If there's only one CPU available for completion vectors, then
 545          * there will only be one completion vector available. Othewise,
 546          * the number of completion vector available will be the number of
 547          * available CPUs divide it by the number of devices in the
 548          * local NUMA node.
 549          */
 550         if (cpumask_weight(&entry->comp_vect_mask) == 1) {
 551                 possible_cpus_comp_vect = 1;
 552                 dd_dev_warn(dd,
 553                             "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
 554         } else {
 555                 possible_cpus_comp_vect +=
 556                         cpumask_weight(&entry->comp_vect_mask) /
 557                                        hfi1_per_node_cntr[dd->node];
 558 
 559                 /*
 560                  * If the completion vector CPUs available doesn't divide
 561                  * evenly among devices, then the first device device to be
 562                  * initialized gets an extra CPU.
 563                  */
 564                 if (first_dev_init &&
 565                     cpumask_weight(&entry->comp_vect_mask) %
 566                     hfi1_per_node_cntr[dd->node] != 0)
 567                         possible_cpus_comp_vect++;
 568         }
 569 
 570         dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
 571 
 572         /* Reserving CPUs for device completion vector */
 573         for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
 574                 curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
 575                                                 entry->comp_vect_affinity);
 576                 if (curr_cpu < 0)
 577                         goto fail;
 578 
 579                 cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
 580         }
 581 
 582         hfi1_cdbg(AFFINITY,
 583                   "[%s] Completion vector affinity CPU set(s) %*pbl",
 584                   rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
 585                   cpumask_pr_args(dev_comp_vect_mask));
 586 
 587         return 0;
 588 
 589 fail:
 590         for (j = 0; j < i; j++)
 591                 per_cpu_affinity_put_max(&entry->comp_vect_mask,
 592                                          entry->comp_vect_affinity);
 593 
 594         return curr_cpu;
 595 }
 596 
 597 /*
 598  * It assumes dd->comp_vect_possible_cpus is available.
 599  */
 600 static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
 601                                              struct hfi1_affinity_node *entry)
 602         __must_hold(&node_affinity.lock)
 603 {
 604         int i, cpu;
 605 
 606         lockdep_assert_held(&node_affinity.lock);
 607         if (!dd->comp_vect_possible_cpus)
 608                 return;
 609 
 610         for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
 611                 cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
 612                                                entry->comp_vect_affinity);
 613                 /* Clearing CPU in device completion vector cpu mask */
 614                 if (cpu >= 0)
 615                         cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
 616         }
 617 
 618         dd->comp_vect_possible_cpus = 0;
 619 }
 620 
 621 /*
 622  * Interrupt affinity.
 623  *
 624  * non-rcv avail gets a default mask that
 625  * starts as possible cpus with threads reset
 626  * and each rcv avail reset.
 627  *
 628  * rcv avail gets node relative 1 wrapping back
 629  * to the node relative 1 as necessary.
 630  *
 631  */
 632 int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
 633 {
 634         int node = pcibus_to_node(dd->pcidev->bus);
 635         struct hfi1_affinity_node *entry;
 636         const struct cpumask *local_mask;
 637         int curr_cpu, possible, i, ret;
 638         bool new_entry = false;
 639 
 640         /*
 641          * If the BIOS does not have the NUMA node information set, select
 642          * NUMA 0 so we get consistent performance.
 643          */
 644         if (node < 0) {
 645                 dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
 646                 node = 0;
 647         }
 648         dd->node = node;
 649 
 650         local_mask = cpumask_of_node(dd->node);
 651         if (cpumask_first(local_mask) >= nr_cpu_ids)
 652                 local_mask = topology_core_cpumask(0);
 653 
 654         mutex_lock(&node_affinity.lock);
 655         entry = node_affinity_lookup(dd->node);
 656 
 657         /*
 658          * If this is the first time this NUMA node's affinity is used,
 659          * create an entry in the global affinity structure and initialize it.
 660          */
 661         if (!entry) {
 662                 entry = node_affinity_allocate(node);
 663                 if (!entry) {
 664                         dd_dev_err(dd,
 665                                    "Unable to allocate global affinity node\n");
 666                         ret = -ENOMEM;
 667                         goto fail;
 668                 }
 669                 new_entry = true;
 670 
 671                 init_cpu_mask_set(&entry->def_intr);
 672                 init_cpu_mask_set(&entry->rcv_intr);
 673                 cpumask_clear(&entry->comp_vect_mask);
 674                 cpumask_clear(&entry->general_intr_mask);
 675                 /* Use the "real" cpu mask of this node as the default */
 676                 cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
 677                             local_mask);
 678 
 679                 /* fill in the receive list */
 680                 possible = cpumask_weight(&entry->def_intr.mask);
 681                 curr_cpu = cpumask_first(&entry->def_intr.mask);
 682 
 683                 if (possible == 1) {
 684                         /* only one CPU, everyone will use it */
 685                         cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
 686                         cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
 687                 } else {
 688                         /*
 689                          * The general/control context will be the first CPU in
 690                          * the default list, so it is removed from the default
 691                          * list and added to the general interrupt list.
 692                          */
 693                         cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
 694                         cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
 695                         curr_cpu = cpumask_next(curr_cpu,
 696                                                 &entry->def_intr.mask);
 697 
 698                         /*
 699                          * Remove the remaining kernel receive queues from
 700                          * the default list and add them to the receive list.
 701                          */
 702                         for (i = 0;
 703                              i < (dd->n_krcv_queues - 1) *
 704                                   hfi1_per_node_cntr[dd->node];
 705                              i++) {
 706                                 cpumask_clear_cpu(curr_cpu,
 707                                                   &entry->def_intr.mask);
 708                                 cpumask_set_cpu(curr_cpu,
 709                                                 &entry->rcv_intr.mask);
 710                                 curr_cpu = cpumask_next(curr_cpu,
 711                                                         &entry->def_intr.mask);
 712                                 if (curr_cpu >= nr_cpu_ids)
 713                                         break;
 714                         }
 715 
 716                         /*
 717                          * If there ends up being 0 CPU cores leftover for SDMA
 718                          * engines, use the same CPU cores as general/control
 719                          * context.
 720                          */
 721                         if (cpumask_weight(&entry->def_intr.mask) == 0)
 722                                 cpumask_copy(&entry->def_intr.mask,
 723                                              &entry->general_intr_mask);
 724                 }
 725 
 726                 /* Determine completion vector CPUs for the entire node */
 727                 cpumask_and(&entry->comp_vect_mask,
 728                             &node_affinity.real_cpu_mask, local_mask);
 729                 cpumask_andnot(&entry->comp_vect_mask,
 730                                &entry->comp_vect_mask,
 731                                &entry->rcv_intr.mask);
 732                 cpumask_andnot(&entry->comp_vect_mask,
 733                                &entry->comp_vect_mask,
 734                                &entry->general_intr_mask);
 735 
 736                 /*
 737                  * If there ends up being 0 CPU cores leftover for completion
 738                  * vectors, use the same CPU core as the general/control
 739                  * context.
 740                  */
 741                 if (cpumask_weight(&entry->comp_vect_mask) == 0)
 742                         cpumask_copy(&entry->comp_vect_mask,
 743                                      &entry->general_intr_mask);
 744         }
 745 
 746         ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
 747         if (ret < 0)
 748                 goto fail;
 749 
 750         if (new_entry)
 751                 node_affinity_add_tail(entry);
 752 
 753         mutex_unlock(&node_affinity.lock);
 754 
 755         return 0;
 756 
 757 fail:
 758         if (new_entry)
 759                 node_affinity_destroy(entry);
 760         mutex_unlock(&node_affinity.lock);
 761         return ret;
 762 }
 763 
 764 void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
 765 {
 766         struct hfi1_affinity_node *entry;
 767 
 768         if (dd->node < 0)
 769                 return;
 770 
 771         mutex_lock(&node_affinity.lock);
 772         entry = node_affinity_lookup(dd->node);
 773         if (!entry)
 774                 goto unlock;
 775 
 776         /*
 777          * Free device completion vector CPUs to be used by future
 778          * completion vectors
 779          */
 780         _dev_comp_vect_cpu_mask_clean_up(dd, entry);
 781 unlock:
 782         mutex_unlock(&node_affinity.lock);
 783         dd->node = NUMA_NO_NODE;
 784 }
 785 
 786 /*
 787  * Function updates the irq affinity hint for msix after it has been changed
 788  * by the user using the /proc/irq interface. This function only accepts
 789  * one cpu in the mask.
 790  */
 791 static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
 792 {
 793         struct sdma_engine *sde = msix->arg;
 794         struct hfi1_devdata *dd = sde->dd;
 795         struct hfi1_affinity_node *entry;
 796         struct cpu_mask_set *set;
 797         int i, old_cpu;
 798 
 799         if (cpu > num_online_cpus() || cpu == sde->cpu)
 800                 return;
 801 
 802         mutex_lock(&node_affinity.lock);
 803         entry = node_affinity_lookup(dd->node);
 804         if (!entry)
 805                 goto unlock;
 806 
 807         old_cpu = sde->cpu;
 808         sde->cpu = cpu;
 809         cpumask_clear(&msix->mask);
 810         cpumask_set_cpu(cpu, &msix->mask);
 811         dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
 812                    msix->irq, irq_type_names[msix->type],
 813                    sde->this_idx, cpu);
 814         irq_set_affinity_hint(msix->irq, &msix->mask);
 815 
 816         /*
 817          * Set the new cpu in the hfi1_affinity_node and clean
 818          * the old cpu if it is not used by any other IRQ
 819          */
 820         set = &entry->def_intr;
 821         cpumask_set_cpu(cpu, &set->mask);
 822         cpumask_set_cpu(cpu, &set->used);
 823         for (i = 0; i < dd->msix_info.max_requested; i++) {
 824                 struct hfi1_msix_entry *other_msix;
 825 
 826                 other_msix = &dd->msix_info.msix_entries[i];
 827                 if (other_msix->type != IRQ_SDMA || other_msix == msix)
 828                         continue;
 829 
 830                 if (cpumask_test_cpu(old_cpu, &other_msix->mask))
 831                         goto unlock;
 832         }
 833         cpumask_clear_cpu(old_cpu, &set->mask);
 834         cpumask_clear_cpu(old_cpu, &set->used);
 835 unlock:
 836         mutex_unlock(&node_affinity.lock);
 837 }
 838 
 839 static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
 840                                      const cpumask_t *mask)
 841 {
 842         int cpu = cpumask_first(mask);
 843         struct hfi1_msix_entry *msix = container_of(notify,
 844                                                     struct hfi1_msix_entry,
 845                                                     notify);
 846 
 847         /* Only one CPU configuration supported currently */
 848         hfi1_update_sdma_affinity(msix, cpu);
 849 }
 850 
 851 static void hfi1_irq_notifier_release(struct kref *ref)
 852 {
 853         /*
 854          * This is required by affinity notifier. We don't have anything to
 855          * free here.
 856          */
 857 }
 858 
 859 static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
 860 {
 861         struct irq_affinity_notify *notify = &msix->notify;
 862 
 863         notify->irq = msix->irq;
 864         notify->notify = hfi1_irq_notifier_notify;
 865         notify->release = hfi1_irq_notifier_release;
 866 
 867         if (irq_set_affinity_notifier(notify->irq, notify))
 868                 pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
 869                        notify->irq);
 870 }
 871 
 872 static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
 873 {
 874         struct irq_affinity_notify *notify = &msix->notify;
 875 
 876         if (irq_set_affinity_notifier(notify->irq, NULL))
 877                 pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
 878                        notify->irq);
 879 }
 880 
 881 /*
 882  * Function sets the irq affinity for msix.
 883  * It *must* be called with node_affinity.lock held.
 884  */
 885 static int get_irq_affinity(struct hfi1_devdata *dd,
 886                             struct hfi1_msix_entry *msix)
 887 {
 888         cpumask_var_t diff;
 889         struct hfi1_affinity_node *entry;
 890         struct cpu_mask_set *set = NULL;
 891         struct sdma_engine *sde = NULL;
 892         struct hfi1_ctxtdata *rcd = NULL;
 893         char extra[64];
 894         int cpu = -1;
 895 
 896         extra[0] = '\0';
 897         cpumask_clear(&msix->mask);
 898 
 899         entry = node_affinity_lookup(dd->node);
 900 
 901         switch (msix->type) {
 902         case IRQ_SDMA:
 903                 sde = (struct sdma_engine *)msix->arg;
 904                 scnprintf(extra, 64, "engine %u", sde->this_idx);
 905                 set = &entry->def_intr;
 906                 break;
 907         case IRQ_GENERAL:
 908                 cpu = cpumask_first(&entry->general_intr_mask);
 909                 break;
 910         case IRQ_RCVCTXT:
 911                 rcd = (struct hfi1_ctxtdata *)msix->arg;
 912                 if (rcd->ctxt == HFI1_CTRL_CTXT)
 913                         cpu = cpumask_first(&entry->general_intr_mask);
 914                 else
 915                         set = &entry->rcv_intr;
 916                 scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
 917                 break;
 918         default:
 919                 dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
 920                 return -EINVAL;
 921         }
 922 
 923         /*
 924          * The general and control contexts are placed on a particular
 925          * CPU, which is set above. Skip accounting for it. Everything else
 926          * finds its CPU here.
 927          */
 928         if (cpu == -1 && set) {
 929                 if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
 930                         return -ENOMEM;
 931 
 932                 cpu = cpu_mask_set_get_first(set, diff);
 933                 if (cpu < 0) {
 934                         free_cpumask_var(diff);
 935                         dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
 936                         return cpu;
 937                 }
 938 
 939                 free_cpumask_var(diff);
 940         }
 941 
 942         cpumask_set_cpu(cpu, &msix->mask);
 943         dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
 944                     msix->irq, irq_type_names[msix->type],
 945                     extra, cpu);
 946         irq_set_affinity_hint(msix->irq, &msix->mask);
 947 
 948         if (msix->type == IRQ_SDMA) {
 949                 sde->cpu = cpu;
 950                 hfi1_setup_sdma_notifier(msix);
 951         }
 952 
 953         return 0;
 954 }
 955 
 956 int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
 957 {
 958         int ret;
 959 
 960         mutex_lock(&node_affinity.lock);
 961         ret = get_irq_affinity(dd, msix);
 962         mutex_unlock(&node_affinity.lock);
 963         return ret;
 964 }
 965 
 966 void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
 967                            struct hfi1_msix_entry *msix)
 968 {
 969         struct cpu_mask_set *set = NULL;
 970         struct hfi1_ctxtdata *rcd;
 971         struct hfi1_affinity_node *entry;
 972 
 973         mutex_lock(&node_affinity.lock);
 974         entry = node_affinity_lookup(dd->node);
 975 
 976         switch (msix->type) {
 977         case IRQ_SDMA:
 978                 set = &entry->def_intr;
 979                 hfi1_cleanup_sdma_notifier(msix);
 980                 break;
 981         case IRQ_GENERAL:
 982                 /* Don't do accounting for general contexts */
 983                 break;
 984         case IRQ_RCVCTXT:
 985                 rcd = (struct hfi1_ctxtdata *)msix->arg;
 986                 /* Don't do accounting for control contexts */
 987                 if (rcd->ctxt != HFI1_CTRL_CTXT)
 988                         set = &entry->rcv_intr;
 989                 break;
 990         default:
 991                 mutex_unlock(&node_affinity.lock);
 992                 return;
 993         }
 994 
 995         if (set) {
 996                 cpumask_andnot(&set->used, &set->used, &msix->mask);
 997                 _cpu_mask_set_gen_dec(set);
 998         }
 999 
1000         irq_set_affinity_hint(msix->irq, NULL);
1001         cpumask_clear(&msix->mask);
1002         mutex_unlock(&node_affinity.lock);
1003 }
1004 
1005 /* This should be called with node_affinity.lock held */
1006 static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
1007                                 struct hfi1_affinity_node_list *affinity)
1008 {
1009         int possible, curr_cpu, i;
1010         uint num_cores_per_socket = node_affinity.num_online_cpus /
1011                                         affinity->num_core_siblings /
1012                                                 node_affinity.num_online_nodes;
1013 
1014         cpumask_copy(hw_thread_mask, &affinity->proc.mask);
1015         if (affinity->num_core_siblings > 0) {
1016                 /* Removing other siblings not needed for now */
1017                 possible = cpumask_weight(hw_thread_mask);
1018                 curr_cpu = cpumask_first(hw_thread_mask);
1019                 for (i = 0;
1020                      i < num_cores_per_socket * node_affinity.num_online_nodes;
1021                      i++)
1022                         curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
1023 
1024                 for (; i < possible; i++) {
1025                         cpumask_clear_cpu(curr_cpu, hw_thread_mask);
1026                         curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
1027                 }
1028 
1029                 /* Identifying correct HW threads within physical cores */
1030                 cpumask_shift_left(hw_thread_mask, hw_thread_mask,
1031                                    num_cores_per_socket *
1032                                    node_affinity.num_online_nodes *
1033                                    hw_thread_no);
1034         }
1035 }
1036 
1037 int hfi1_get_proc_affinity(int node)
1038 {
1039         int cpu = -1, ret, i;
1040         struct hfi1_affinity_node *entry;
1041         cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
1042         const struct cpumask *node_mask,
1043                 *proc_mask = current->cpus_ptr;
1044         struct hfi1_affinity_node_list *affinity = &node_affinity;
1045         struct cpu_mask_set *set = &affinity->proc;
1046 
1047         /*
1048          * check whether process/context affinity has already
1049          * been set
1050          */
1051         if (current->nr_cpus_allowed == 1) {
1052                 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
1053                           current->pid, current->comm,
1054                           cpumask_pr_args(proc_mask));
1055                 /*
1056                  * Mark the pre-set CPU as used. This is atomic so we don't
1057                  * need the lock
1058                  */
1059                 cpu = cpumask_first(proc_mask);
1060                 cpumask_set_cpu(cpu, &set->used);
1061                 goto done;
1062         } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
1063                 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
1064                           current->pid, current->comm,
1065                           cpumask_pr_args(proc_mask));
1066                 goto done;
1067         }
1068 
1069         /*
1070          * The process does not have a preset CPU affinity so find one to
1071          * recommend using the following algorithm:
1072          *
1073          * For each user process that is opening a context on HFI Y:
1074          *  a) If all cores are filled, reinitialize the bitmask
1075          *  b) Fill real cores first, then HT cores (First set of HT
1076          *     cores on all physical cores, then second set of HT core,
1077          *     and, so on) in the following order:
1078          *
1079          *     1. Same NUMA node as HFI Y and not running an IRQ
1080          *        handler
1081          *     2. Same NUMA node as HFI Y and running an IRQ handler
1082          *     3. Different NUMA node to HFI Y and not running an IRQ
1083          *        handler
1084          *     4. Different NUMA node to HFI Y and running an IRQ
1085          *        handler
1086          *  c) Mark core as filled in the bitmask. As user processes are
1087          *     done, clear cores from the bitmask.
1088          */
1089 
1090         ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
1091         if (!ret)
1092                 goto done;
1093         ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
1094         if (!ret)
1095                 goto free_diff;
1096         ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
1097         if (!ret)
1098                 goto free_hw_thread_mask;
1099         ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
1100         if (!ret)
1101                 goto free_available_mask;
1102 
1103         mutex_lock(&affinity->lock);
1104         /*
1105          * If we've used all available HW threads, clear the mask and start
1106          * overloading.
1107          */
1108         _cpu_mask_set_gen_inc(set);
1109 
1110         /*
1111          * If NUMA node has CPUs used by interrupt handlers, include them in the
1112          * interrupt handler mask.
1113          */
1114         entry = node_affinity_lookup(node);
1115         if (entry) {
1116                 cpumask_copy(intrs_mask, (entry->def_intr.gen ?
1117                                           &entry->def_intr.mask :
1118                                           &entry->def_intr.used));
1119                 cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
1120                                                     &entry->rcv_intr.mask :
1121                                                     &entry->rcv_intr.used));
1122                 cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
1123         }
1124         hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
1125                   cpumask_pr_args(intrs_mask));
1126 
1127         cpumask_copy(hw_thread_mask, &set->mask);
1128 
1129         /*
1130          * If HT cores are enabled, identify which HW threads within the
1131          * physical cores should be used.
1132          */
1133         if (affinity->num_core_siblings > 0) {
1134                 for (i = 0; i < affinity->num_core_siblings; i++) {
1135                         find_hw_thread_mask(i, hw_thread_mask, affinity);
1136 
1137                         /*
1138                          * If there's at least one available core for this HW
1139                          * thread number, stop looking for a core.
1140                          *
1141                          * diff will always be not empty at least once in this
1142                          * loop as the used mask gets reset when
1143                          * (set->mask == set->used) before this loop.
1144                          */
1145                         cpumask_andnot(diff, hw_thread_mask, &set->used);
1146                         if (!cpumask_empty(diff))
1147                                 break;
1148                 }
1149         }
1150         hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
1151                   cpumask_pr_args(hw_thread_mask));
1152 
1153         node_mask = cpumask_of_node(node);
1154         hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
1155                   cpumask_pr_args(node_mask));
1156 
1157         /* Get cpumask of available CPUs on preferred NUMA */
1158         cpumask_and(available_mask, hw_thread_mask, node_mask);
1159         cpumask_andnot(available_mask, available_mask, &set->used);
1160         hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
1161                   cpumask_pr_args(available_mask));
1162 
1163         /*
1164          * At first, we don't want to place processes on the same
1165          * CPUs as interrupt handlers. Then, CPUs running interrupt
1166          * handlers are used.
1167          *
1168          * 1) If diff is not empty, then there are CPUs not running
1169          *    non-interrupt handlers available, so diff gets copied
1170          *    over to available_mask.
1171          * 2) If diff is empty, then all CPUs not running interrupt
1172          *    handlers are taken, so available_mask contains all
1173          *    available CPUs running interrupt handlers.
1174          * 3) If available_mask is empty, then all CPUs on the
1175          *    preferred NUMA node are taken, so other NUMA nodes are
1176          *    used for process assignments using the same method as
1177          *    the preferred NUMA node.
1178          */
1179         cpumask_andnot(diff, available_mask, intrs_mask);
1180         if (!cpumask_empty(diff))
1181                 cpumask_copy(available_mask, diff);
1182 
1183         /* If we don't have CPUs on the preferred node, use other NUMA nodes */
1184         if (cpumask_empty(available_mask)) {
1185                 cpumask_andnot(available_mask, hw_thread_mask, &set->used);
1186                 /* Excluding preferred NUMA cores */
1187                 cpumask_andnot(available_mask, available_mask, node_mask);
1188                 hfi1_cdbg(PROC,
1189                           "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
1190                           cpumask_pr_args(available_mask));
1191 
1192                 /*
1193                  * At first, we don't want to place processes on the same
1194                  * CPUs as interrupt handlers.
1195                  */
1196                 cpumask_andnot(diff, available_mask, intrs_mask);
1197                 if (!cpumask_empty(diff))
1198                         cpumask_copy(available_mask, diff);
1199         }
1200         hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
1201                   cpumask_pr_args(available_mask));
1202 
1203         cpu = cpumask_first(available_mask);
1204         if (cpu >= nr_cpu_ids) /* empty */
1205                 cpu = -1;
1206         else
1207                 cpumask_set_cpu(cpu, &set->used);
1208 
1209         mutex_unlock(&affinity->lock);
1210         hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
1211 
1212         free_cpumask_var(intrs_mask);
1213 free_available_mask:
1214         free_cpumask_var(available_mask);
1215 free_hw_thread_mask:
1216         free_cpumask_var(hw_thread_mask);
1217 free_diff:
1218         free_cpumask_var(diff);
1219 done:
1220         return cpu;
1221 }
1222 
1223 void hfi1_put_proc_affinity(int cpu)
1224 {
1225         struct hfi1_affinity_node_list *affinity = &node_affinity;
1226         struct cpu_mask_set *set = &affinity->proc;
1227 
1228         if (cpu < 0)
1229                 return;
1230 
1231         mutex_lock(&affinity->lock);
1232         cpu_mask_set_put(set, cpu);
1233         hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
1234         mutex_unlock(&affinity->lock);
1235 }

/* [<][>][^][v][top][bottom][index][help] */