root/drivers/misc/habanalabs/habanalabs_drv.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_asic_type
  2. hl_device_open
  3. hl_device_open_ctrl
  4. set_driver_behavior_per_device
  5. create_hdev
  6. destroy_hdev
  7. hl_pmops_suspend
  8. hl_pmops_resume
  9. hl_pci_probe
  10. hl_pci_remove
  11. hl_init
  12. hl_exit

   1 // SPDX-License-Identifier: GPL-2.0
   2 
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  *
   7  */
   8 
   9 #define pr_fmt(fmt)             "habanalabs: " fmt
  10 
  11 #include "habanalabs.h"
  12 
  13 #include <linux/pci.h>
  14 #include <linux/module.h>
  15 
  16 #define HL_DRIVER_AUTHOR        "HabanaLabs Kernel Driver Team"
  17 
  18 #define HL_DRIVER_DESC          "Driver for HabanaLabs's AI Accelerators"
  19 
  20 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
  21 MODULE_DESCRIPTION(HL_DRIVER_DESC);
  22 MODULE_LICENSE("GPL v2");
  23 
  24 static int hl_major;
  25 static struct class *hl_class;
  26 static DEFINE_IDR(hl_devs_idr);
  27 static DEFINE_MUTEX(hl_devs_idr_lock);
  28 
  29 static int timeout_locked = 5;
  30 static int reset_on_lockup = 1;
  31 
  32 module_param(timeout_locked, int, 0444);
  33 MODULE_PARM_DESC(timeout_locked,
  34         "Device lockup timeout in seconds (0 = disabled, default 5s)");
  35 
  36 module_param(reset_on_lockup, int, 0444);
  37 MODULE_PARM_DESC(reset_on_lockup,
  38         "Do device reset on lockup (0 = no, 1 = yes, default yes)");
  39 
  40 #define PCI_VENDOR_ID_HABANALABS        0x1da3
  41 
  42 #define PCI_IDS_GOYA                    0x0001
  43 
  44 static const struct pci_device_id ids[] = {
  45         { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
  46         { 0, }
  47 };
  48 MODULE_DEVICE_TABLE(pci, ids);
  49 
  50 /*
  51  * get_asic_type - translate device id to asic type
  52  *
  53  * @device: id of the PCI device
  54  *
  55  * Translate device id to asic type.
  56  * In case of unidentified device, return -1
  57  */
  58 static enum hl_asic_type get_asic_type(u16 device)
  59 {
  60         enum hl_asic_type asic_type;
  61 
  62         switch (device) {
  63         case PCI_IDS_GOYA:
  64                 asic_type = ASIC_GOYA;
  65                 break;
  66         default:
  67                 asic_type = ASIC_INVALID;
  68                 break;
  69         }
  70 
  71         return asic_type;
  72 }
  73 
  74 /*
  75  * hl_device_open - open function for habanalabs device
  76  *
  77  * @inode: pointer to inode structure
  78  * @filp: pointer to file structure
  79  *
  80  * Called when process opens an habanalabs device.
  81  */
  82 int hl_device_open(struct inode *inode, struct file *filp)
  83 {
  84         struct hl_device *hdev;
  85         struct hl_fpriv *hpriv;
  86         int rc;
  87 
  88         mutex_lock(&hl_devs_idr_lock);
  89         hdev = idr_find(&hl_devs_idr, iminor(inode));
  90         mutex_unlock(&hl_devs_idr_lock);
  91 
  92         if (!hdev) {
  93                 pr_err("Couldn't find device %d:%d\n",
  94                         imajor(inode), iminor(inode));
  95                 return -ENXIO;
  96         }
  97 
  98         hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
  99         if (!hpriv)
 100                 return -ENOMEM;
 101 
 102         hpriv->hdev = hdev;
 103         filp->private_data = hpriv;
 104         hpriv->filp = filp;
 105         mutex_init(&hpriv->restore_phase_mutex);
 106         kref_init(&hpriv->refcount);
 107         nonseekable_open(inode, filp);
 108 
 109         hl_cb_mgr_init(&hpriv->cb_mgr);
 110         hl_ctx_mgr_init(&hpriv->ctx_mgr);
 111 
 112         hpriv->taskpid = find_get_pid(current->pid);
 113 
 114         mutex_lock(&hdev->fpriv_list_lock);
 115 
 116         if (hl_device_disabled_or_in_reset(hdev)) {
 117                 dev_err_ratelimited(hdev->dev,
 118                         "Can't open %s because it is disabled or in reset\n",
 119                         dev_name(hdev->dev));
 120                 rc = -EPERM;
 121                 goto out_err;
 122         }
 123 
 124         if (hdev->in_debug) {
 125                 dev_err_ratelimited(hdev->dev,
 126                         "Can't open %s because it is being debugged by another user\n",
 127                         dev_name(hdev->dev));
 128                 rc = -EPERM;
 129                 goto out_err;
 130         }
 131 
 132         if (hdev->compute_ctx) {
 133                 dev_dbg_ratelimited(hdev->dev,
 134                         "Can't open %s because another user is working on it\n",
 135                         dev_name(hdev->dev));
 136                 rc = -EBUSY;
 137                 goto out_err;
 138         }
 139 
 140         rc = hl_ctx_create(hdev, hpriv);
 141         if (rc) {
 142                 dev_err(hdev->dev, "Failed to create context %d\n", rc);
 143                 goto out_err;
 144         }
 145 
 146         /* Device is IDLE at this point so it is legal to change PLLs.
 147          * There is no need to check anything because if the PLL is
 148          * already HIGH, the set function will return without doing
 149          * anything
 150          */
 151         hl_device_set_frequency(hdev, PLL_HIGH);
 152 
 153         list_add(&hpriv->dev_node, &hdev->fpriv_list);
 154         mutex_unlock(&hdev->fpriv_list_lock);
 155 
 156         hl_debugfs_add_file(hpriv);
 157 
 158         return 0;
 159 
 160 out_err:
 161         mutex_unlock(&hdev->fpriv_list_lock);
 162 
 163         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 164         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 165         filp->private_data = NULL;
 166         mutex_destroy(&hpriv->restore_phase_mutex);
 167         put_pid(hpriv->taskpid);
 168 
 169         kfree(hpriv);
 170         return rc;
 171 }
 172 
 173 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 174 {
 175         struct hl_device *hdev;
 176         struct hl_fpriv *hpriv;
 177         int rc;
 178 
 179         mutex_lock(&hl_devs_idr_lock);
 180         hdev = idr_find(&hl_devs_idr, iminor(inode));
 181         mutex_unlock(&hl_devs_idr_lock);
 182 
 183         if (!hdev) {
 184                 pr_err("Couldn't find device %d:%d\n",
 185                         imajor(inode), iminor(inode));
 186                 return -ENXIO;
 187         }
 188 
 189         hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 190         if (!hpriv)
 191                 return -ENOMEM;
 192 
 193         mutex_lock(&hdev->fpriv_list_lock);
 194 
 195         if (hl_device_disabled_or_in_reset(hdev)) {
 196                 dev_err_ratelimited(hdev->dev_ctrl,
 197                         "Can't open %s because it is disabled or in reset\n",
 198                         dev_name(hdev->dev_ctrl));
 199                 rc = -EPERM;
 200                 goto out_err;
 201         }
 202 
 203         list_add(&hpriv->dev_node, &hdev->fpriv_list);
 204         mutex_unlock(&hdev->fpriv_list_lock);
 205 
 206         hpriv->hdev = hdev;
 207         filp->private_data = hpriv;
 208         hpriv->filp = filp;
 209         hpriv->is_control = true;
 210         nonseekable_open(inode, filp);
 211 
 212         hpriv->taskpid = find_get_pid(current->pid);
 213 
 214         return 0;
 215 
 216 out_err:
 217         mutex_unlock(&hdev->fpriv_list_lock);
 218         kfree(hpriv);
 219         return rc;
 220 }
 221 
 222 static void set_driver_behavior_per_device(struct hl_device *hdev)
 223 {
 224         hdev->mmu_enable = 1;
 225         hdev->cpu_enable = 1;
 226         hdev->fw_loading = 1;
 227         hdev->cpu_queues_enable = 1;
 228         hdev->heartbeat = 1;
 229 
 230         hdev->reset_pcilink = 0;
 231 }
 232 
 233 /*
 234  * create_hdev - create habanalabs device instance
 235  *
 236  * @dev: will hold the pointer to the new habanalabs device structure
 237  * @pdev: pointer to the pci device
 238  * @asic_type: in case of simulator device, which device is it
 239  * @minor: in case of simulator device, the minor of the device
 240  *
 241  * Allocate memory for habanalabs device and initialize basic fields
 242  * Identify the ASIC type
 243  * Allocate ID (minor) for the device (only for real devices)
 244  */
 245 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 246                 enum hl_asic_type asic_type, int minor)
 247 {
 248         struct hl_device *hdev;
 249         int rc, main_id, ctrl_id = 0;
 250 
 251         *dev = NULL;
 252 
 253         hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
 254         if (!hdev)
 255                 return -ENOMEM;
 256 
 257         /* First, we must find out which ASIC are we handling. This is needed
 258          * to configure the behavior of the driver (kernel parameters)
 259          */
 260         if (pdev) {
 261                 hdev->asic_type = get_asic_type(pdev->device);
 262                 if (hdev->asic_type == ASIC_INVALID) {
 263                         dev_err(&pdev->dev, "Unsupported ASIC\n");
 264                         rc = -ENODEV;
 265                         goto free_hdev;
 266                 }
 267         } else {
 268                 hdev->asic_type = asic_type;
 269         }
 270 
 271         hdev->major = hl_major;
 272         hdev->reset_on_lockup = reset_on_lockup;
 273         hdev->pldm = 0;
 274 
 275         set_driver_behavior_per_device(hdev);
 276 
 277         if (timeout_locked)
 278                 hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
 279         else
 280                 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
 281 
 282         hdev->disabled = true;
 283         hdev->pdev = pdev; /* can be NULL in case of simulator device */
 284 
 285         /* Set default DMA mask to 32 bits */
 286         hdev->dma_mask = 32;
 287 
 288         mutex_lock(&hl_devs_idr_lock);
 289 
 290         /* Always save 2 numbers, 1 for main device and 1 for control.
 291          * They must be consecutive
 292          */
 293         main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
 294                                 GFP_KERNEL);
 295 
 296         if (main_id >= 0)
 297                 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
 298                                         main_id + 2, GFP_KERNEL);
 299 
 300         mutex_unlock(&hl_devs_idr_lock);
 301 
 302         if ((main_id < 0) || (ctrl_id < 0)) {
 303                 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
 304                         pr_err("too many devices in the system\n");
 305 
 306                 if (main_id >= 0) {
 307                         mutex_lock(&hl_devs_idr_lock);
 308                         idr_remove(&hl_devs_idr, main_id);
 309                         mutex_unlock(&hl_devs_idr_lock);
 310                 }
 311 
 312                 rc = -EBUSY;
 313                 goto free_hdev;
 314         }
 315 
 316         hdev->id = main_id;
 317         hdev->id_control = ctrl_id;
 318 
 319         *dev = hdev;
 320 
 321         return 0;
 322 
 323 free_hdev:
 324         kfree(hdev);
 325         return rc;
 326 }
 327 
 328 /*
 329  * destroy_hdev - destroy habanalabs device instance
 330  *
 331  * @dev: pointer to the habanalabs device structure
 332  *
 333  */
 334 void destroy_hdev(struct hl_device *hdev)
 335 {
 336         /* Remove device from the device list */
 337         mutex_lock(&hl_devs_idr_lock);
 338         idr_remove(&hl_devs_idr, hdev->id);
 339         idr_remove(&hl_devs_idr, hdev->id_control);
 340         mutex_unlock(&hl_devs_idr_lock);
 341 
 342         kfree(hdev);
 343 }
 344 
 345 static int hl_pmops_suspend(struct device *dev)
 346 {
 347         struct hl_device *hdev = dev_get_drvdata(dev);
 348 
 349         pr_debug("Going to suspend PCI device\n");
 350 
 351         if (!hdev) {
 352                 pr_err("device pointer is NULL in suspend\n");
 353                 return 0;
 354         }
 355 
 356         return hl_device_suspend(hdev);
 357 }
 358 
 359 static int hl_pmops_resume(struct device *dev)
 360 {
 361         struct hl_device *hdev = dev_get_drvdata(dev);
 362 
 363         pr_debug("Going to resume PCI device\n");
 364 
 365         if (!hdev) {
 366                 pr_err("device pointer is NULL in resume\n");
 367                 return 0;
 368         }
 369 
 370         return hl_device_resume(hdev);
 371 }
 372 
 373 /*
 374  * hl_pci_probe - probe PCI habanalabs devices
 375  *
 376  * @pdev: pointer to pci device
 377  * @id: pointer to pci device id structure
 378  *
 379  * Standard PCI probe function for habanalabs device.
 380  * Create a new habanalabs device and initialize it according to the
 381  * device's type
 382  */
 383 static int hl_pci_probe(struct pci_dev *pdev,
 384                                 const struct pci_device_id *id)
 385 {
 386         struct hl_device *hdev;
 387         int rc;
 388 
 389         dev_info(&pdev->dev, HL_NAME
 390                  " device found [%04x:%04x] (rev %x)\n",
 391                  (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
 392 
 393         rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
 394         if (rc)
 395                 return rc;
 396 
 397         pci_set_drvdata(pdev, hdev);
 398 
 399         rc = hl_device_init(hdev, hl_class);
 400         if (rc) {
 401                 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
 402                 rc = -ENODEV;
 403                 goto disable_device;
 404         }
 405 
 406         return 0;
 407 
 408 disable_device:
 409         pci_set_drvdata(pdev, NULL);
 410         destroy_hdev(hdev);
 411 
 412         return rc;
 413 }
 414 
 415 /*
 416  * hl_pci_remove - remove PCI habanalabs devices
 417  *
 418  * @pdev: pointer to pci device
 419  *
 420  * Standard PCI remove function for habanalabs device
 421  */
 422 static void hl_pci_remove(struct pci_dev *pdev)
 423 {
 424         struct hl_device *hdev;
 425 
 426         hdev = pci_get_drvdata(pdev);
 427         if (!hdev)
 428                 return;
 429 
 430         hl_device_fini(hdev);
 431         pci_set_drvdata(pdev, NULL);
 432 
 433         destroy_hdev(hdev);
 434 }
 435 
 436 static const struct dev_pm_ops hl_pm_ops = {
 437         .suspend = hl_pmops_suspend,
 438         .resume = hl_pmops_resume,
 439 };
 440 
 441 static struct pci_driver hl_pci_driver = {
 442         .name = HL_NAME,
 443         .id_table = ids,
 444         .probe = hl_pci_probe,
 445         .remove = hl_pci_remove,
 446         .driver.pm = &hl_pm_ops,
 447 };
 448 
 449 /*
 450  * hl_init - Initialize the habanalabs kernel driver
 451  */
 452 static int __init hl_init(void)
 453 {
 454         int rc;
 455         dev_t dev;
 456 
 457         pr_info("loading driver\n");
 458 
 459         rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
 460         if (rc < 0) {
 461                 pr_err("unable to get major\n");
 462                 return rc;
 463         }
 464 
 465         hl_major = MAJOR(dev);
 466 
 467         hl_class = class_create(THIS_MODULE, HL_NAME);
 468         if (IS_ERR(hl_class)) {
 469                 pr_err("failed to allocate class\n");
 470                 rc = PTR_ERR(hl_class);
 471                 goto remove_major;
 472         }
 473 
 474         hl_debugfs_init();
 475 
 476         rc = pci_register_driver(&hl_pci_driver);
 477         if (rc) {
 478                 pr_err("failed to register pci device\n");
 479                 goto remove_debugfs;
 480         }
 481 
 482         pr_debug("driver loaded\n");
 483 
 484         return 0;
 485 
 486 remove_debugfs:
 487         hl_debugfs_fini();
 488         class_destroy(hl_class);
 489 remove_major:
 490         unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 491         return rc;
 492 }
 493 
 494 /*
 495  * hl_exit - Release all resources of the habanalabs kernel driver
 496  */
 497 static void __exit hl_exit(void)
 498 {
 499         pci_unregister_driver(&hl_pci_driver);
 500 
 501         /*
 502          * Removing debugfs must be after all devices or simulator devices
 503          * have been removed because otherwise we get a bug in the
 504          * debugfs module for referencing NULL objects
 505          */
 506         hl_debugfs_fini();
 507 
 508         class_destroy(hl_class);
 509         unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 510 
 511         idr_destroy(&hl_devs_idr);
 512 
 513         pr_debug("driver removed\n");
 514 }
 515 
 516 module_init(hl_init);
 517 module_exit(hl_exit);

/* [<][>][^][v][top][bottom][index][help] */