root/drivers/misc/habanalabs/command_submission.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. hl_fence_get_driver_name
  2. hl_fence_get_timeline_name
  3. hl_fence_enable_signaling
  4. hl_fence_release
  5. cs_get
  6. cs_get_unless_zero
  7. cs_put
  8. cs_parser
  9. free_job
  10. cs_do_release
  11. cs_timedout
  12. allocate_cs
  13. cs_rollback
  14. hl_cs_rollback_all
  15. job_wq_completion
  16. validate_queue_index
  17. hl_cs_allocate_job
  18. _hl_cs_ioctl
  19. hl_cs_ioctl
  20. _hl_cs_wait_ioctl
  21. hl_cs_wait_ioctl

   1 // SPDX-License-Identifier: GPL-2.0
   2 
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7 
   8 #include <uapi/misc/habanalabs.h>
   9 #include "habanalabs.h"
  10 
  11 #include <linux/uaccess.h>
  12 #include <linux/slab.h>
  13 
  14 static void job_wq_completion(struct work_struct *work);
  15 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
  16                 struct hl_ctx *ctx, u64 timeout_us, u64 seq);
  17 static void cs_do_release(struct kref *ref);
  18 
  19 static const char *hl_fence_get_driver_name(struct dma_fence *fence)
  20 {
  21         return "HabanaLabs";
  22 }
  23 
  24 static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
  25 {
  26         struct hl_dma_fence *hl_fence =
  27                 container_of(fence, struct hl_dma_fence, base_fence);
  28 
  29         return dev_name(hl_fence->hdev->dev);
  30 }
  31 
  32 static bool hl_fence_enable_signaling(struct dma_fence *fence)
  33 {
  34         return true;
  35 }
  36 
  37 static void hl_fence_release(struct dma_fence *fence)
  38 {
  39         struct hl_dma_fence *hl_fence =
  40                 container_of(fence, struct hl_dma_fence, base_fence);
  41 
  42         kfree_rcu(hl_fence, base_fence.rcu);
  43 }
  44 
  45 static const struct dma_fence_ops hl_fence_ops = {
  46         .get_driver_name = hl_fence_get_driver_name,
  47         .get_timeline_name = hl_fence_get_timeline_name,
  48         .enable_signaling = hl_fence_enable_signaling,
  49         .wait = dma_fence_default_wait,
  50         .release = hl_fence_release
  51 };
  52 
  53 static void cs_get(struct hl_cs *cs)
  54 {
  55         kref_get(&cs->refcount);
  56 }
  57 
  58 static int cs_get_unless_zero(struct hl_cs *cs)
  59 {
  60         return kref_get_unless_zero(&cs->refcount);
  61 }
  62 
  63 static void cs_put(struct hl_cs *cs)
  64 {
  65         kref_put(&cs->refcount, cs_do_release);
  66 }
  67 
  68 /*
  69  * cs_parser - parse the user command submission
  70  *
  71  * @hpriv       : pointer to the private data of the fd
  72  * @job        : pointer to the job that holds the command submission info
  73  *
  74  * The function parses the command submission of the user. It calls the
  75  * ASIC specific parser, which returns a list of memory blocks to send
  76  * to the device as different command buffers
  77  *
  78  */
  79 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
  80 {
  81         struct hl_device *hdev = hpriv->hdev;
  82         struct hl_cs_parser parser;
  83         int rc;
  84 
  85         parser.ctx_id = job->cs->ctx->asid;
  86         parser.cs_sequence = job->cs->sequence;
  87         parser.job_id = job->id;
  88 
  89         parser.hw_queue_id = job->hw_queue_id;
  90         parser.job_userptr_list = &job->userptr_list;
  91         parser.patched_cb = NULL;
  92         parser.user_cb = job->user_cb;
  93         parser.user_cb_size = job->user_cb_size;
  94         parser.ext_queue = job->ext_queue;
  95         job->patched_cb = NULL;
  96 
  97         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
  98         if (job->ext_queue) {
  99                 if (!rc) {
 100                         job->patched_cb = parser.patched_cb;
 101                         job->job_cb_size = parser.patched_cb_size;
 102 
 103                         spin_lock(&job->patched_cb->lock);
 104                         job->patched_cb->cs_cnt++;
 105                         spin_unlock(&job->patched_cb->lock);
 106                 }
 107 
 108                 /*
 109                  * Whether the parsing worked or not, we don't need the
 110                  * original CB anymore because it was already parsed and
 111                  * won't be accessed again for this CS
 112                  */
 113                 spin_lock(&job->user_cb->lock);
 114                 job->user_cb->cs_cnt--;
 115                 spin_unlock(&job->user_cb->lock);
 116                 hl_cb_put(job->user_cb);
 117                 job->user_cb = NULL;
 118         }
 119 
 120         return rc;
 121 }
 122 
 123 static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 124 {
 125         struct hl_cs *cs = job->cs;
 126 
 127         if (job->ext_queue) {
 128                 hl_userptr_delete_list(hdev, &job->userptr_list);
 129 
 130                 /*
 131                  * We might arrive here from rollback and patched CB wasn't
 132                  * created, so we need to check it's not NULL
 133                  */
 134                 if (job->patched_cb) {
 135                         spin_lock(&job->patched_cb->lock);
 136                         job->patched_cb->cs_cnt--;
 137                         spin_unlock(&job->patched_cb->lock);
 138 
 139                         hl_cb_put(job->patched_cb);
 140                 }
 141         }
 142 
 143         /*
 144          * This is the only place where there can be multiple threads
 145          * modifying the list at the same time
 146          */
 147         spin_lock(&cs->job_lock);
 148         list_del(&job->cs_node);
 149         spin_unlock(&cs->job_lock);
 150 
 151         hl_debugfs_remove_job(hdev, job);
 152 
 153         if (job->ext_queue)
 154                 cs_put(cs);
 155 
 156         kfree(job);
 157 }
 158 
 159 static void cs_do_release(struct kref *ref)
 160 {
 161         struct hl_cs *cs = container_of(ref, struct hl_cs,
 162                                                 refcount);
 163         struct hl_device *hdev = cs->ctx->hdev;
 164         struct hl_cs_job *job, *tmp;
 165 
 166         cs->completed = true;
 167 
 168         /*
 169          * Although if we reached here it means that all external jobs have
 170          * finished, because each one of them took refcnt to CS, we still
 171          * need to go over the internal jobs and free them. Otherwise, we
 172          * will have leaked memory and what's worse, the CS object (and
 173          * potentially the CTX object) could be released, while the JOB
 174          * still holds a pointer to them (but no reference).
 175          */
 176         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 177                 free_job(hdev, job);
 178 
 179         /* We also need to update CI for internal queues */
 180         if (cs->submitted) {
 181                 hdev->asic_funcs->hw_queues_lock(hdev);
 182 
 183                 hdev->cs_active_cnt--;
 184                 if (!hdev->cs_active_cnt) {
 185                         struct hl_device_idle_busy_ts *ts;
 186 
 187                         ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
 188                         ts->busy_to_idle_ts = ktime_get();
 189 
 190                         if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
 191                                 hdev->idle_busy_ts_idx = 0;
 192                 } else if (hdev->cs_active_cnt < 0) {
 193                         dev_crit(hdev->dev, "CS active cnt %d is negative\n",
 194                                 hdev->cs_active_cnt);
 195                 }
 196 
 197                 hdev->asic_funcs->hw_queues_unlock(hdev);
 198 
 199                 hl_int_hw_queue_update_ci(cs);
 200 
 201                 spin_lock(&hdev->hw_queues_mirror_lock);
 202                 /* remove CS from hw_queues mirror list */
 203                 list_del_init(&cs->mirror_node);
 204                 spin_unlock(&hdev->hw_queues_mirror_lock);
 205 
 206                 /*
 207                  * Don't cancel TDR in case this CS was timedout because we
 208                  * might be running from the TDR context
 209                  */
 210                 if ((!cs->timedout) &&
 211                         (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
 212                         struct hl_cs *next;
 213 
 214                         if (cs->tdr_active)
 215                                 cancel_delayed_work_sync(&cs->work_tdr);
 216 
 217                         spin_lock(&hdev->hw_queues_mirror_lock);
 218 
 219                         /* queue TDR for next CS */
 220                         next = list_first_entry_or_null(
 221                                         &hdev->hw_queues_mirror_list,
 222                                         struct hl_cs, mirror_node);
 223 
 224                         if ((next) && (!next->tdr_active)) {
 225                                 next->tdr_active = true;
 226                                 schedule_delayed_work(&next->work_tdr,
 227                                                         hdev->timeout_jiffies);
 228                         }
 229 
 230                         spin_unlock(&hdev->hw_queues_mirror_lock);
 231                 }
 232         }
 233 
 234         /*
 235          * Must be called before hl_ctx_put because inside we use ctx to get
 236          * the device
 237          */
 238         hl_debugfs_remove_cs(cs);
 239 
 240         hl_ctx_put(cs->ctx);
 241 
 242         if (cs->timedout)
 243                 dma_fence_set_error(cs->fence, -ETIMEDOUT);
 244         else if (cs->aborted)
 245                 dma_fence_set_error(cs->fence, -EIO);
 246 
 247         dma_fence_signal(cs->fence);
 248         dma_fence_put(cs->fence);
 249 
 250         kfree(cs);
 251 }
 252 
 253 static void cs_timedout(struct work_struct *work)
 254 {
 255         struct hl_device *hdev;
 256         int ctx_asid, rc;
 257         struct hl_cs *cs = container_of(work, struct hl_cs,
 258                                                  work_tdr.work);
 259         rc = cs_get_unless_zero(cs);
 260         if (!rc)
 261                 return;
 262 
 263         if ((!cs->submitted) || (cs->completed)) {
 264                 cs_put(cs);
 265                 return;
 266         }
 267 
 268         /* Mark the CS is timed out so we won't try to cancel its TDR */
 269         cs->timedout = true;
 270 
 271         hdev = cs->ctx->hdev;
 272         ctx_asid = cs->ctx->asid;
 273 
 274         /* TODO: add information about last signaled seq and last emitted seq */
 275         dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
 276                 ctx_asid, cs->sequence);
 277 
 278         cs_put(cs);
 279 
 280         if (hdev->reset_on_lockup)
 281                 hl_device_reset(hdev, false, false);
 282 }
 283 
 284 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 285                         struct hl_cs **cs_new)
 286 {
 287         struct hl_dma_fence *fence;
 288         struct dma_fence *other = NULL;
 289         struct hl_cs *cs;
 290         int rc;
 291 
 292         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 293         if (!cs)
 294                 return -ENOMEM;
 295 
 296         cs->ctx = ctx;
 297         cs->submitted = false;
 298         cs->completed = false;
 299         INIT_LIST_HEAD(&cs->job_list);
 300         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 301         kref_init(&cs->refcount);
 302         spin_lock_init(&cs->job_lock);
 303 
 304         fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
 305         if (!fence) {
 306                 rc = -ENOMEM;
 307                 goto free_cs;
 308         }
 309 
 310         fence->hdev = hdev;
 311         spin_lock_init(&fence->lock);
 312         cs->fence = &fence->base_fence;
 313 
 314         spin_lock(&ctx->cs_lock);
 315 
 316         fence->cs_seq = ctx->cs_sequence;
 317         other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
 318         if ((other) && (!dma_fence_is_signaled(other))) {
 319                 spin_unlock(&ctx->cs_lock);
 320                 dev_dbg(hdev->dev,
 321                         "Rejecting CS because of too many in-flights CS\n");
 322                 rc = -EAGAIN;
 323                 goto free_fence;
 324         }
 325 
 326         dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
 327                         ctx->asid, ctx->cs_sequence);
 328 
 329         cs->sequence = fence->cs_seq;
 330 
 331         ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
 332                                                         &fence->base_fence;
 333         ctx->cs_sequence++;
 334 
 335         dma_fence_get(&fence->base_fence);
 336 
 337         dma_fence_put(other);
 338 
 339         spin_unlock(&ctx->cs_lock);
 340 
 341         *cs_new = cs;
 342 
 343         return 0;
 344 
 345 free_fence:
 346         kfree(fence);
 347 free_cs:
 348         kfree(cs);
 349         return rc;
 350 }
 351 
 352 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 353 {
 354         struct hl_cs_job *job, *tmp;
 355 
 356         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 357                 free_job(hdev, job);
 358 }
 359 
 360 void hl_cs_rollback_all(struct hl_device *hdev)
 361 {
 362         struct hl_cs *cs, *tmp;
 363 
 364         /* flush all completions */
 365         flush_workqueue(hdev->cq_wq);
 366 
 367         /* Make sure we don't have leftovers in the H/W queues mirror list */
 368         list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
 369                                 mirror_node) {
 370                 cs_get(cs);
 371                 cs->aborted = true;
 372                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 373                                         cs->ctx->asid, cs->sequence);
 374                 cs_rollback(hdev, cs);
 375                 cs_put(cs);
 376         }
 377 }
 378 
 379 static void job_wq_completion(struct work_struct *work)
 380 {
 381         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 382                                                 finish_work);
 383         struct hl_cs *cs = job->cs;
 384         struct hl_device *hdev = cs->ctx->hdev;
 385 
 386         /* job is no longer needed */
 387         free_job(hdev, job);
 388 }
 389 
 390 static struct hl_cb *validate_queue_index(struct hl_device *hdev,
 391                                         struct hl_cb_mgr *cb_mgr,
 392                                         struct hl_cs_chunk *chunk,
 393                                         bool *ext_queue)
 394 {
 395         struct asic_fixed_properties *asic = &hdev->asic_prop;
 396         struct hw_queue_properties *hw_queue_prop;
 397         u32 cb_handle;
 398         struct hl_cb *cb;
 399 
 400         /* Assume external queue */
 401         *ext_queue = true;
 402 
 403         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 404 
 405         if ((chunk->queue_index >= HL_MAX_QUEUES) ||
 406                         (hw_queue_prop->type == QUEUE_TYPE_NA)) {
 407                 dev_err(hdev->dev, "Queue index %d is invalid\n",
 408                         chunk->queue_index);
 409                 return NULL;
 410         }
 411 
 412         if (hw_queue_prop->driver_only) {
 413                 dev_err(hdev->dev,
 414                         "Queue index %d is restricted for the kernel driver\n",
 415                         chunk->queue_index);
 416                 return NULL;
 417         } else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
 418                 *ext_queue = false;
 419                 return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
 420         }
 421 
 422         /* Retrieve CB object */
 423         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
 424 
 425         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
 426         if (!cb) {
 427                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
 428                 return NULL;
 429         }
 430 
 431         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
 432                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
 433                 goto release_cb;
 434         }
 435 
 436         spin_lock(&cb->lock);
 437         cb->cs_cnt++;
 438         spin_unlock(&cb->lock);
 439 
 440         return cb;
 441 
 442 release_cb:
 443         hl_cb_put(cb);
 444         return NULL;
 445 }
 446 
 447 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
 448 {
 449         struct hl_cs_job *job;
 450 
 451         job = kzalloc(sizeof(*job), GFP_ATOMIC);
 452         if (!job)
 453                 return NULL;
 454 
 455         job->ext_queue = ext_queue;
 456 
 457         if (job->ext_queue) {
 458                 INIT_LIST_HEAD(&job->userptr_list);
 459                 INIT_WORK(&job->finish_work, job_wq_completion);
 460         }
 461 
 462         return job;
 463 }
 464 
 465 static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 466                         u32 num_chunks, u64 *cs_seq)
 467 {
 468         struct hl_device *hdev = hpriv->hdev;
 469         struct hl_cs_chunk *cs_chunk_array;
 470         struct hl_cs_job *job;
 471         struct hl_cs *cs;
 472         struct hl_cb *cb;
 473         bool ext_queue_present = false;
 474         u32 size_to_copy;
 475         int rc, i, parse_cnt;
 476 
 477         *cs_seq = ULLONG_MAX;
 478 
 479         if (num_chunks > HL_MAX_JOBS_PER_CS) {
 480                 dev_err(hdev->dev,
 481                         "Number of chunks can NOT be larger than %d\n",
 482                         HL_MAX_JOBS_PER_CS);
 483                 rc = -EINVAL;
 484                 goto out;
 485         }
 486 
 487         cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
 488                                         GFP_ATOMIC);
 489         if (!cs_chunk_array) {
 490                 rc = -ENOMEM;
 491                 goto out;
 492         }
 493 
 494         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
 495         if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
 496                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
 497                 rc = -EFAULT;
 498                 goto free_cs_chunk_array;
 499         }
 500 
 501         /* increment refcnt for context */
 502         hl_ctx_get(hdev, hpriv->ctx);
 503 
 504         rc = allocate_cs(hdev, hpriv->ctx, &cs);
 505         if (rc) {
 506                 hl_ctx_put(hpriv->ctx);
 507                 goto free_cs_chunk_array;
 508         }
 509 
 510         *cs_seq = cs->sequence;
 511 
 512         hl_debugfs_add_cs(cs);
 513 
 514         /* Validate ALL the CS chunks before submitting the CS */
 515         for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
 516                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
 517                 bool ext_queue;
 518 
 519                 cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
 520                                         &ext_queue);
 521                 if (ext_queue) {
 522                         ext_queue_present = true;
 523                         if (!cb) {
 524                                 rc = -EINVAL;
 525                                 goto free_cs_object;
 526                         }
 527                 }
 528 
 529                 job = hl_cs_allocate_job(hdev, ext_queue);
 530                 if (!job) {
 531                         dev_err(hdev->dev, "Failed to allocate a new job\n");
 532                         rc = -ENOMEM;
 533                         if (ext_queue)
 534                                 goto release_cb;
 535                         else
 536                                 goto free_cs_object;
 537                 }
 538 
 539                 job->id = i + 1;
 540                 job->cs = cs;
 541                 job->user_cb = cb;
 542                 job->user_cb_size = chunk->cb_size;
 543                 if (job->ext_queue)
 544                         job->job_cb_size = cb->size;
 545                 else
 546                         job->job_cb_size = chunk->cb_size;
 547                 job->hw_queue_id = chunk->queue_index;
 548 
 549                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
 550 
 551                 list_add_tail(&job->cs_node, &cs->job_list);
 552 
 553                 /*
 554                  * Increment CS reference. When CS reference is 0, CS is
 555                  * done and can be signaled to user and free all its resources
 556                  * Only increment for JOB on external queues, because only
 557                  * for those JOBs we get completion
 558                  */
 559                 if (job->ext_queue)
 560                         cs_get(cs);
 561 
 562                 hl_debugfs_add_job(hdev, job);
 563 
 564                 rc = cs_parser(hpriv, job);
 565                 if (rc) {
 566                         dev_err(hdev->dev,
 567                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
 568                                 cs->ctx->asid, cs->sequence, job->id, rc);
 569                         goto free_cs_object;
 570                 }
 571         }
 572 
 573         if (!ext_queue_present) {
 574                 dev_err(hdev->dev,
 575                         "Reject CS %d.%llu because no external queues jobs\n",
 576                         cs->ctx->asid, cs->sequence);
 577                 rc = -EINVAL;
 578                 goto free_cs_object;
 579         }
 580 
 581         rc = hl_hw_queue_schedule_cs(cs);
 582         if (rc) {
 583                 dev_err(hdev->dev,
 584                         "Failed to submit CS %d.%llu to H/W queues, error %d\n",
 585                         cs->ctx->asid, cs->sequence, rc);
 586                 goto free_cs_object;
 587         }
 588 
 589         rc = HL_CS_STATUS_SUCCESS;
 590         goto put_cs;
 591 
 592 release_cb:
 593         spin_lock(&cb->lock);
 594         cb->cs_cnt--;
 595         spin_unlock(&cb->lock);
 596         hl_cb_put(cb);
 597 free_cs_object:
 598         cs_rollback(hdev, cs);
 599         *cs_seq = ULLONG_MAX;
 600         /* The path below is both for good and erroneous exits */
 601 put_cs:
 602         /* We finished with the CS in this function, so put the ref */
 603         cs_put(cs);
 604 free_cs_chunk_array:
 605         kfree(cs_chunk_array);
 606 out:
 607         return rc;
 608 }
 609 
 610 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 611 {
 612         struct hl_device *hdev = hpriv->hdev;
 613         union hl_cs_args *args = data;
 614         struct hl_ctx *ctx = hpriv->ctx;
 615         void __user *chunks;
 616         u32 num_chunks;
 617         u64 cs_seq = ULONG_MAX;
 618         int rc, do_ctx_switch;
 619         bool need_soft_reset = false;
 620 
 621         if (hl_device_disabled_or_in_reset(hdev)) {
 622                 dev_warn_ratelimited(hdev->dev,
 623                         "Device is %s. Can't submit new CS\n",
 624                         atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
 625                 rc = -EBUSY;
 626                 goto out;
 627         }
 628 
 629         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
 630 
 631         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
 632                 long ret;
 633 
 634                 chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
 635                 num_chunks = args->in.num_chunks_restore;
 636 
 637                 mutex_lock(&hpriv->restore_phase_mutex);
 638 
 639                 if (do_ctx_switch) {
 640                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
 641                         if (rc) {
 642                                 dev_err_ratelimited(hdev->dev,
 643                                         "Failed to switch to context %d, rejecting CS! %d\n",
 644                                         ctx->asid, rc);
 645                                 /*
 646                                  * If we timedout, or if the device is not IDLE
 647                                  * while we want to do context-switch (-EBUSY),
 648                                  * we need to soft-reset because QMAN is
 649                                  * probably stuck. However, we can't call to
 650                                  * reset here directly because of deadlock, so
 651                                  * need to do it at the very end of this
 652                                  * function
 653                                  */
 654                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
 655                                         need_soft_reset = true;
 656                                 mutex_unlock(&hpriv->restore_phase_mutex);
 657                                 goto out;
 658                         }
 659                 }
 660 
 661                 hdev->asic_funcs->restore_phase_topology(hdev);
 662 
 663                 if (num_chunks == 0) {
 664                         dev_dbg(hdev->dev,
 665                         "Need to run restore phase but restore CS is empty\n");
 666                         rc = 0;
 667                 } else {
 668                         rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
 669                                                 &cs_seq);
 670                 }
 671 
 672                 mutex_unlock(&hpriv->restore_phase_mutex);
 673 
 674                 if (rc) {
 675                         dev_err(hdev->dev,
 676                                 "Failed to submit restore CS for context %d (%d)\n",
 677                                 ctx->asid, rc);
 678                         goto out;
 679                 }
 680 
 681                 /* Need to wait for restore completion before execution phase */
 682                 if (num_chunks > 0) {
 683                         ret = _hl_cs_wait_ioctl(hdev, ctx,
 684                                         jiffies_to_usecs(hdev->timeout_jiffies),
 685                                         cs_seq);
 686                         if (ret <= 0) {
 687                                 dev_err(hdev->dev,
 688                                         "Restore CS for context %d failed to complete %ld\n",
 689                                         ctx->asid, ret);
 690                                 rc = -ENOEXEC;
 691                                 goto out;
 692                         }
 693                 }
 694 
 695                 ctx->thread_ctx_switch_wait_token = 1;
 696         } else if (!ctx->thread_ctx_switch_wait_token) {
 697                 u32 tmp;
 698 
 699                 rc = hl_poll_timeout_memory(hdev,
 700                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
 701                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
 702 
 703                 if (rc == -ETIMEDOUT) {
 704                         dev_err(hdev->dev,
 705                                 "context switch phase timeout (%d)\n", tmp);
 706                         goto out;
 707                 }
 708         }
 709 
 710         chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
 711         num_chunks = args->in.num_chunks_execute;
 712 
 713         if (num_chunks == 0) {
 714                 dev_err(hdev->dev,
 715                         "Got execute CS with 0 chunks, context %d\n",
 716                         ctx->asid);
 717                 rc = -EINVAL;
 718                 goto out;
 719         }
 720 
 721         rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
 722 
 723 out:
 724         if (rc != -EAGAIN) {
 725                 memset(args, 0, sizeof(*args));
 726                 args->out.status = rc;
 727                 args->out.seq = cs_seq;
 728         }
 729 
 730         if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
 731                 hl_device_reset(hdev, false, false);
 732 
 733         return rc;
 734 }
 735 
 736 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 737                 struct hl_ctx *ctx, u64 timeout_us, u64 seq)
 738 {
 739         struct dma_fence *fence;
 740         unsigned long timeout;
 741         long rc;
 742 
 743         if (timeout_us == MAX_SCHEDULE_TIMEOUT)
 744                 timeout = timeout_us;
 745         else
 746                 timeout = usecs_to_jiffies(timeout_us);
 747 
 748         hl_ctx_get(hdev, ctx);
 749 
 750         fence = hl_ctx_get_fence(ctx, seq);
 751         if (IS_ERR(fence)) {
 752                 rc = PTR_ERR(fence);
 753         } else if (fence) {
 754                 rc = dma_fence_wait_timeout(fence, true, timeout);
 755                 if (fence->error == -ETIMEDOUT)
 756                         rc = -ETIMEDOUT;
 757                 else if (fence->error == -EIO)
 758                         rc = -EIO;
 759                 dma_fence_put(fence);
 760         } else
 761                 rc = 1;
 762 
 763         hl_ctx_put(ctx);
 764 
 765         return rc;
 766 }
 767 
 768 int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 769 {
 770         struct hl_device *hdev = hpriv->hdev;
 771         union hl_wait_cs_args *args = data;
 772         u64 seq = args->in.seq;
 773         long rc;
 774 
 775         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
 776 
 777         memset(args, 0, sizeof(*args));
 778 
 779         if (rc < 0) {
 780                 dev_err_ratelimited(hdev->dev,
 781                                 "Error %ld on waiting for CS handle %llu\n",
 782                                 rc, seq);
 783                 if (rc == -ERESTARTSYS) {
 784                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
 785                         rc = -EINTR;
 786                 } else if (rc == -ETIMEDOUT) {
 787                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
 788                 } else if (rc == -EIO) {
 789                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
 790                 }
 791                 return rc;
 792         }
 793 
 794         if (rc == 0)
 795                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
 796         else
 797                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
 798 
 799         return 0;
 800 }

/* [<][>][^][v][top][bottom][index][help] */