root/drivers/gpu/drm/i915/gt/selftest_timeline.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. hwsp_page
  2. hwsp_cacheline
  3. __mock_hwsp_record
  4. __mock_hwsp_timeline
  5. mock_hwsp_freelist
  6. __igt_sync
  7. igt_sync
  8. random_engine
  9. bench_sync
  10. intel_timeline_mock_selftests
  11. emit_ggtt_store_dw
  12. tl_write
  13. checked_intel_timeline_create
  14. live_hwsp_engine
  15. live_hwsp_alternate
  16. live_hwsp_wrap
  17. live_hwsp_recycle
  18. intel_timeline_live_selftests

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2017-2018 Intel Corporation
   5  */
   6 
   7 #include <linux/prime_numbers.h>
   8 
   9 #include "gem/i915_gem_pm.h"
  10 #include "intel_gt.h"
  11 
  12 #include "../selftests/i915_random.h"
  13 #include "../i915_selftest.h"
  14 
  15 #include "../selftests/igt_flush_test.h"
  16 #include "../selftests/mock_gem_device.h"
  17 #include "selftests/mock_timeline.h"
  18 
  19 static struct page *hwsp_page(struct intel_timeline *tl)
  20 {
  21         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  22 
  23         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  24         return sg_page(obj->mm.pages->sgl);
  25 }
  26 
  27 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
  28 {
  29         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  30 
  31         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  32 }
  33 
  34 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  35 
  36 struct mock_hwsp_freelist {
  37         struct drm_i915_private *i915;
  38         struct radix_tree_root cachelines;
  39         struct intel_timeline **history;
  40         unsigned long count, max;
  41         struct rnd_state prng;
  42 };
  43 
  44 enum {
  45         SHUFFLE = BIT(0),
  46 };
  47 
  48 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  49                                unsigned int idx,
  50                                struct intel_timeline *tl)
  51 {
  52         tl = xchg(&state->history[idx], tl);
  53         if (tl) {
  54                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  55                 intel_timeline_put(tl);
  56         }
  57 }
  58 
  59 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  60                                 unsigned int count,
  61                                 unsigned int flags)
  62 {
  63         struct intel_timeline *tl;
  64         unsigned int idx;
  65 
  66         while (count--) {
  67                 unsigned long cacheline;
  68                 int err;
  69 
  70                 tl = intel_timeline_create(&state->i915->gt, NULL);
  71                 if (IS_ERR(tl))
  72                         return PTR_ERR(tl);
  73 
  74                 cacheline = hwsp_cacheline(tl);
  75                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
  76                 if (err) {
  77                         if (err == -EEXIST) {
  78                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  79                                        cacheline);
  80                         }
  81                         intel_timeline_put(tl);
  82                         return err;
  83                 }
  84 
  85                 idx = state->count++ % state->max;
  86                 __mock_hwsp_record(state, idx, tl);
  87         }
  88 
  89         if (flags & SHUFFLE)
  90                 i915_prandom_shuffle(state->history,
  91                                      sizeof(*state->history),
  92                                      min(state->count, state->max),
  93                                      &state->prng);
  94 
  95         count = i915_prandom_u32_max_state(min(state->count, state->max),
  96                                            &state->prng);
  97         while (count--) {
  98                 idx = --state->count % state->max;
  99                 __mock_hwsp_record(state, idx, NULL);
 100         }
 101 
 102         return 0;
 103 }
 104 
 105 static int mock_hwsp_freelist(void *arg)
 106 {
 107         struct mock_hwsp_freelist state;
 108         const struct {
 109                 const char *name;
 110                 unsigned int flags;
 111         } phases[] = {
 112                 { "linear", 0 },
 113                 { "shuffled", SHUFFLE },
 114                 { },
 115         }, *p;
 116         unsigned int na;
 117         int err = 0;
 118 
 119         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 120         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 121 
 122         state.i915 = mock_gem_device();
 123         if (!state.i915)
 124                 return -ENOMEM;
 125 
 126         /*
 127          * Create a bunch of timelines and check that their HWSP do not overlap.
 128          * Free some, and try again.
 129          */
 130 
 131         state.max = PAGE_SIZE / sizeof(*state.history);
 132         state.count = 0;
 133         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 134         if (!state.history) {
 135                 err = -ENOMEM;
 136                 goto err_put;
 137         }
 138 
 139         mutex_lock(&state.i915->drm.struct_mutex);
 140         for (p = phases; p->name; p++) {
 141                 pr_debug("%s(%s)\n", __func__, p->name);
 142                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 143                         err = __mock_hwsp_timeline(&state, na, p->flags);
 144                         if (err)
 145                                 goto out;
 146                 }
 147         }
 148 
 149 out:
 150         for (na = 0; na < state.max; na++)
 151                 __mock_hwsp_record(&state, na, NULL);
 152         mutex_unlock(&state.i915->drm.struct_mutex);
 153         kfree(state.history);
 154 err_put:
 155         drm_dev_put(&state.i915->drm);
 156         return err;
 157 }
 158 
 159 struct __igt_sync {
 160         const char *name;
 161         u32 seqno;
 162         bool expected;
 163         bool set;
 164 };
 165 
 166 static int __igt_sync(struct intel_timeline *tl,
 167                       u64 ctx,
 168                       const struct __igt_sync *p,
 169                       const char *name)
 170 {
 171         int ret;
 172 
 173         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 174                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 175                        name, p->name, ctx, p->seqno, yesno(p->expected));
 176                 return -EINVAL;
 177         }
 178 
 179         if (p->set) {
 180                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
 181                 if (ret)
 182                         return ret;
 183         }
 184 
 185         return 0;
 186 }
 187 
 188 static int igt_sync(void *arg)
 189 {
 190         const struct __igt_sync pass[] = {
 191                 { "unset", 0, false, false },
 192                 { "new", 0, false, true },
 193                 { "0a", 0, true, true },
 194                 { "1a", 1, false, true },
 195                 { "1b", 1, true, true },
 196                 { "0b", 0, true, false },
 197                 { "2a", 2, false, true },
 198                 { "4", 4, false, true },
 199                 { "INT_MAX", INT_MAX, false, true },
 200                 { "INT_MAX-1", INT_MAX-1, true, false },
 201                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 202                 { "INT_MAX", INT_MAX, true, false },
 203                 { "UINT_MAX", UINT_MAX, false, true },
 204                 { "wrap", 0, false, true },
 205                 { "unwrap", UINT_MAX, true, false },
 206                 {},
 207         }, *p;
 208         struct intel_timeline tl;
 209         int order, offset;
 210         int ret = -ENODEV;
 211 
 212         mock_timeline_init(&tl, 0);
 213         for (p = pass; p->name; p++) {
 214                 for (order = 1; order < 64; order++) {
 215                         for (offset = -1; offset <= (order > 1); offset++) {
 216                                 u64 ctx = BIT_ULL(order) + offset;
 217 
 218                                 ret = __igt_sync(&tl, ctx, p, "1");
 219                                 if (ret)
 220                                         goto out;
 221                         }
 222                 }
 223         }
 224         mock_timeline_fini(&tl);
 225 
 226         mock_timeline_init(&tl, 0);
 227         for (order = 1; order < 64; order++) {
 228                 for (offset = -1; offset <= (order > 1); offset++) {
 229                         u64 ctx = BIT_ULL(order) + offset;
 230 
 231                         for (p = pass; p->name; p++) {
 232                                 ret = __igt_sync(&tl, ctx, p, "2");
 233                                 if (ret)
 234                                         goto out;
 235                         }
 236                 }
 237         }
 238 
 239 out:
 240         mock_timeline_fini(&tl);
 241         return ret;
 242 }
 243 
 244 static unsigned int random_engine(struct rnd_state *rnd)
 245 {
 246         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 247 }
 248 
 249 static int bench_sync(void *arg)
 250 {
 251         struct rnd_state prng;
 252         struct intel_timeline tl;
 253         unsigned long end_time, count;
 254         u64 prng32_1M;
 255         ktime_t kt;
 256         int order, last_order;
 257 
 258         mock_timeline_init(&tl, 0);
 259 
 260         /* Lookups from cache are very fast and so the random number generation
 261          * and the loop itself becomes a significant factor in the per-iteration
 262          * timings. We try to compensate the results by measuring the overhead
 263          * of the prng and subtract it from the reported results.
 264          */
 265         prandom_seed_state(&prng, i915_selftest.random_seed);
 266         count = 0;
 267         kt = ktime_get();
 268         end_time = jiffies + HZ/10;
 269         do {
 270                 u32 x;
 271 
 272                 /* Make sure the compiler doesn't optimise away the prng call */
 273                 WRITE_ONCE(x, prandom_u32_state(&prng));
 274 
 275                 count++;
 276         } while (!time_after(jiffies, end_time));
 277         kt = ktime_sub(ktime_get(), kt);
 278         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 279                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 280         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 281 
 282         /* Benchmark (only) setting random context ids */
 283         prandom_seed_state(&prng, i915_selftest.random_seed);
 284         count = 0;
 285         kt = ktime_get();
 286         end_time = jiffies + HZ/10;
 287         do {
 288                 u64 id = i915_prandom_u64_state(&prng);
 289 
 290                 __intel_timeline_sync_set(&tl, id, 0);
 291                 count++;
 292         } while (!time_after(jiffies, end_time));
 293         kt = ktime_sub(ktime_get(), kt);
 294         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 295         pr_info("%s: %lu random insertions, %lluns/insert\n",
 296                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 297 
 298         /* Benchmark looking up the exact same context ids as we just set */
 299         prandom_seed_state(&prng, i915_selftest.random_seed);
 300         end_time = count;
 301         kt = ktime_get();
 302         while (end_time--) {
 303                 u64 id = i915_prandom_u64_state(&prng);
 304 
 305                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
 306                         mock_timeline_fini(&tl);
 307                         pr_err("Lookup of %llu failed\n", id);
 308                         return -EINVAL;
 309                 }
 310         }
 311         kt = ktime_sub(ktime_get(), kt);
 312         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 313         pr_info("%s: %lu random lookups, %lluns/lookup\n",
 314                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 315 
 316         mock_timeline_fini(&tl);
 317         cond_resched();
 318 
 319         mock_timeline_init(&tl, 0);
 320 
 321         /* Benchmark setting the first N (in order) contexts */
 322         count = 0;
 323         kt = ktime_get();
 324         end_time = jiffies + HZ/10;
 325         do {
 326                 __intel_timeline_sync_set(&tl, count++, 0);
 327         } while (!time_after(jiffies, end_time));
 328         kt = ktime_sub(ktime_get(), kt);
 329         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 330                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 331 
 332         /* Benchmark looking up the exact same context ids as we just set */
 333         end_time = count;
 334         kt = ktime_get();
 335         while (end_time--) {
 336                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
 337                         pr_err("Lookup of %lu failed\n", end_time);
 338                         mock_timeline_fini(&tl);
 339                         return -EINVAL;
 340                 }
 341         }
 342         kt = ktime_sub(ktime_get(), kt);
 343         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 344                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 345 
 346         mock_timeline_fini(&tl);
 347         cond_resched();
 348 
 349         mock_timeline_init(&tl, 0);
 350 
 351         /* Benchmark searching for a random context id and maybe changing it */
 352         prandom_seed_state(&prng, i915_selftest.random_seed);
 353         count = 0;
 354         kt = ktime_get();
 355         end_time = jiffies + HZ/10;
 356         do {
 357                 u32 id = random_engine(&prng);
 358                 u32 seqno = prandom_u32_state(&prng);
 359 
 360                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
 361                         __intel_timeline_sync_set(&tl, id, seqno);
 362 
 363                 count++;
 364         } while (!time_after(jiffies, end_time));
 365         kt = ktime_sub(ktime_get(), kt);
 366         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 367         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 368                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 369         mock_timeline_fini(&tl);
 370         cond_resched();
 371 
 372         /* Benchmark searching for a known context id and changing the seqno */
 373         for (last_order = 1, order = 1; order < 32;
 374              ({ int tmp = last_order; last_order = order; order += tmp; })) {
 375                 unsigned int mask = BIT(order) - 1;
 376 
 377                 mock_timeline_init(&tl, 0);
 378 
 379                 count = 0;
 380                 kt = ktime_get();
 381                 end_time = jiffies + HZ/10;
 382                 do {
 383                         /* Without assuming too many details of the underlying
 384                          * implementation, try to identify its phase-changes
 385                          * (if any)!
 386                          */
 387                         u64 id = (u64)(count & mask) << order;
 388 
 389                         __intel_timeline_sync_is_later(&tl, id, 0);
 390                         __intel_timeline_sync_set(&tl, id, 0);
 391 
 392                         count++;
 393                 } while (!time_after(jiffies, end_time));
 394                 kt = ktime_sub(ktime_get(), kt);
 395                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 396                         __func__, count, order,
 397                         (long long)div64_ul(ktime_to_ns(kt), count));
 398                 mock_timeline_fini(&tl);
 399                 cond_resched();
 400         }
 401 
 402         return 0;
 403 }
 404 
 405 int intel_timeline_mock_selftests(void)
 406 {
 407         static const struct i915_subtest tests[] = {
 408                 SUBTEST(mock_hwsp_freelist),
 409                 SUBTEST(igt_sync),
 410                 SUBTEST(bench_sync),
 411         };
 412 
 413         return i915_subtests(tests, NULL);
 414 }
 415 
 416 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 417 {
 418         u32 *cs;
 419 
 420         cs = intel_ring_begin(rq, 4);
 421         if (IS_ERR(cs))
 422                 return PTR_ERR(cs);
 423 
 424         if (INTEL_GEN(rq->i915) >= 8) {
 425                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 426                 *cs++ = addr;
 427                 *cs++ = 0;
 428                 *cs++ = value;
 429         } else if (INTEL_GEN(rq->i915) >= 4) {
 430                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 431                 *cs++ = 0;
 432                 *cs++ = addr;
 433                 *cs++ = value;
 434         } else {
 435                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 436                 *cs++ = addr;
 437                 *cs++ = value;
 438                 *cs++ = MI_NOOP;
 439         }
 440 
 441         intel_ring_advance(rq, cs);
 442 
 443         return 0;
 444 }
 445 
 446 static struct i915_request *
 447 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 448 {
 449         struct i915_request *rq;
 450         int err;
 451 
 452         lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
 453 
 454         err = intel_timeline_pin(tl);
 455         if (err) {
 456                 rq = ERR_PTR(err);
 457                 goto out;
 458         }
 459 
 460         rq = i915_request_create(engine->kernel_context);
 461         if (IS_ERR(rq))
 462                 goto out_unpin;
 463 
 464         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 465         i915_request_add(rq);
 466         if (err)
 467                 rq = ERR_PTR(err);
 468 
 469 out_unpin:
 470         intel_timeline_unpin(tl);
 471 out:
 472         if (IS_ERR(rq))
 473                 pr_err("Failed to write to timeline!\n");
 474         return rq;
 475 }
 476 
 477 static struct intel_timeline *
 478 checked_intel_timeline_create(struct drm_i915_private *i915)
 479 {
 480         struct intel_timeline *tl;
 481 
 482         tl = intel_timeline_create(&i915->gt, NULL);
 483         if (IS_ERR(tl))
 484                 return tl;
 485 
 486         if (*tl->hwsp_seqno != tl->seqno) {
 487                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 488                        *tl->hwsp_seqno, tl->seqno);
 489                 intel_timeline_put(tl);
 490                 return ERR_PTR(-EINVAL);
 491         }
 492 
 493         return tl;
 494 }
 495 
 496 static int live_hwsp_engine(void *arg)
 497 {
 498 #define NUM_TIMELINES 4096
 499         struct drm_i915_private *i915 = arg;
 500         struct intel_timeline **timelines;
 501         struct intel_engine_cs *engine;
 502         enum intel_engine_id id;
 503         intel_wakeref_t wakeref;
 504         unsigned long count, n;
 505         int err = 0;
 506 
 507         /*
 508          * Create a bunch of timelines and check we can write
 509          * independently to each of their breadcrumb slots.
 510          */
 511 
 512         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 513                                    sizeof(*timelines),
 514                                    GFP_KERNEL);
 515         if (!timelines)
 516                 return -ENOMEM;
 517 
 518         mutex_lock(&i915->drm.struct_mutex);
 519         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 520 
 521         count = 0;
 522         for_each_engine(engine, i915, id) {
 523                 if (!intel_engine_can_store_dword(engine))
 524                         continue;
 525 
 526                 for (n = 0; n < NUM_TIMELINES; n++) {
 527                         struct intel_timeline *tl;
 528                         struct i915_request *rq;
 529 
 530                         tl = checked_intel_timeline_create(i915);
 531                         if (IS_ERR(tl)) {
 532                                 err = PTR_ERR(tl);
 533                                 goto out;
 534                         }
 535 
 536                         rq = tl_write(tl, engine, count);
 537                         if (IS_ERR(rq)) {
 538                                 intel_timeline_put(tl);
 539                                 err = PTR_ERR(rq);
 540                                 goto out;
 541                         }
 542 
 543                         timelines[count++] = tl;
 544                 }
 545         }
 546 
 547 out:
 548         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 549                 err = -EIO;
 550 
 551         for (n = 0; n < count; n++) {
 552                 struct intel_timeline *tl = timelines[n];
 553 
 554                 if (!err && *tl->hwsp_seqno != n) {
 555                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 556                                n, *tl->hwsp_seqno);
 557                         err = -EINVAL;
 558                 }
 559                 intel_timeline_put(tl);
 560         }
 561 
 562         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 563         mutex_unlock(&i915->drm.struct_mutex);
 564 
 565         kvfree(timelines);
 566 
 567         return err;
 568 #undef NUM_TIMELINES
 569 }
 570 
 571 static int live_hwsp_alternate(void *arg)
 572 {
 573 #define NUM_TIMELINES 4096
 574         struct drm_i915_private *i915 = arg;
 575         struct intel_timeline **timelines;
 576         struct intel_engine_cs *engine;
 577         enum intel_engine_id id;
 578         intel_wakeref_t wakeref;
 579         unsigned long count, n;
 580         int err = 0;
 581 
 582         /*
 583          * Create a bunch of timelines and check we can write
 584          * independently to each of their breadcrumb slots with adjacent
 585          * engines.
 586          */
 587 
 588         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 589                                    sizeof(*timelines),
 590                                    GFP_KERNEL);
 591         if (!timelines)
 592                 return -ENOMEM;
 593 
 594         mutex_lock(&i915->drm.struct_mutex);
 595         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 596 
 597         count = 0;
 598         for (n = 0; n < NUM_TIMELINES; n++) {
 599                 for_each_engine(engine, i915, id) {
 600                         struct intel_timeline *tl;
 601                         struct i915_request *rq;
 602 
 603                         if (!intel_engine_can_store_dword(engine))
 604                                 continue;
 605 
 606                         tl = checked_intel_timeline_create(i915);
 607                         if (IS_ERR(tl)) {
 608                                 err = PTR_ERR(tl);
 609                                 goto out;
 610                         }
 611 
 612                         rq = tl_write(tl, engine, count);
 613                         if (IS_ERR(rq)) {
 614                                 intel_timeline_put(tl);
 615                                 err = PTR_ERR(rq);
 616                                 goto out;
 617                         }
 618 
 619                         timelines[count++] = tl;
 620                 }
 621         }
 622 
 623 out:
 624         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 625                 err = -EIO;
 626 
 627         for (n = 0; n < count; n++) {
 628                 struct intel_timeline *tl = timelines[n];
 629 
 630                 if (!err && *tl->hwsp_seqno != n) {
 631                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 632                                n, *tl->hwsp_seqno);
 633                         err = -EINVAL;
 634                 }
 635                 intel_timeline_put(tl);
 636         }
 637 
 638         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 639         mutex_unlock(&i915->drm.struct_mutex);
 640 
 641         kvfree(timelines);
 642 
 643         return err;
 644 #undef NUM_TIMELINES
 645 }
 646 
 647 static int live_hwsp_wrap(void *arg)
 648 {
 649         struct drm_i915_private *i915 = arg;
 650         struct intel_engine_cs *engine;
 651         struct intel_timeline *tl;
 652         enum intel_engine_id id;
 653         intel_wakeref_t wakeref;
 654         int err = 0;
 655 
 656         /*
 657          * Across a seqno wrap, we need to keep the old cacheline alive for
 658          * foreign GPU references.
 659          */
 660 
 661         mutex_lock(&i915->drm.struct_mutex);
 662         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 663 
 664         tl = intel_timeline_create(&i915->gt, NULL);
 665         if (IS_ERR(tl)) {
 666                 err = PTR_ERR(tl);
 667                 goto out_rpm;
 668         }
 669         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 670                 goto out_free;
 671 
 672         err = intel_timeline_pin(tl);
 673         if (err)
 674                 goto out_free;
 675 
 676         for_each_engine(engine, i915, id) {
 677                 const u32 *hwsp_seqno[2];
 678                 struct i915_request *rq;
 679                 u32 seqno[2];
 680 
 681                 if (!intel_engine_can_store_dword(engine))
 682                         continue;
 683 
 684                 rq = i915_request_create(engine->kernel_context);
 685                 if (IS_ERR(rq)) {
 686                         err = PTR_ERR(rq);
 687                         goto out;
 688                 }
 689 
 690                 tl->seqno = -4u;
 691 
 692                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 693                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
 694                 mutex_unlock(&tl->mutex);
 695                 if (err) {
 696                         i915_request_add(rq);
 697                         goto out;
 698                 }
 699                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 700                          seqno[0], tl->hwsp_offset);
 701 
 702                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 703                 if (err) {
 704                         i915_request_add(rq);
 705                         goto out;
 706                 }
 707                 hwsp_seqno[0] = tl->hwsp_seqno;
 708 
 709                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 710                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
 711                 mutex_unlock(&tl->mutex);
 712                 if (err) {
 713                         i915_request_add(rq);
 714                         goto out;
 715                 }
 716                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 717                          seqno[1], tl->hwsp_offset);
 718 
 719                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 720                 if (err) {
 721                         i915_request_add(rq);
 722                         goto out;
 723                 }
 724                 hwsp_seqno[1] = tl->hwsp_seqno;
 725 
 726                 /* With wrap should come a new hwsp */
 727                 GEM_BUG_ON(seqno[1] >= seqno[0]);
 728                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 729 
 730                 i915_request_add(rq);
 731 
 732                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 733                         pr_err("Wait for timeline writes timed out!\n");
 734                         err = -EIO;
 735                         goto out;
 736                 }
 737 
 738                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
 739                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 740                                *hwsp_seqno[0], *hwsp_seqno[1],
 741                                seqno[0], seqno[1]);
 742                         err = -EINVAL;
 743                         goto out;
 744                 }
 745 
 746                 i915_retire_requests(i915); /* recycle HWSP */
 747         }
 748 
 749 out:
 750         if (igt_flush_test(i915, I915_WAIT_LOCKED))
 751                 err = -EIO;
 752 
 753         intel_timeline_unpin(tl);
 754 out_free:
 755         intel_timeline_put(tl);
 756 out_rpm:
 757         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 758         mutex_unlock(&i915->drm.struct_mutex);
 759 
 760         return err;
 761 }
 762 
 763 static int live_hwsp_recycle(void *arg)
 764 {
 765         struct drm_i915_private *i915 = arg;
 766         struct intel_engine_cs *engine;
 767         enum intel_engine_id id;
 768         intel_wakeref_t wakeref;
 769         unsigned long count;
 770         int err = 0;
 771 
 772         /*
 773          * Check seqno writes into one timeline at a time. We expect to
 774          * recycle the breadcrumb slot between iterations and neither
 775          * want to confuse ourselves or the GPU.
 776          */
 777 
 778         mutex_lock(&i915->drm.struct_mutex);
 779         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 780 
 781         count = 0;
 782         for_each_engine(engine, i915, id) {
 783                 IGT_TIMEOUT(end_time);
 784 
 785                 if (!intel_engine_can_store_dword(engine))
 786                         continue;
 787 
 788                 do {
 789                         struct intel_timeline *tl;
 790                         struct i915_request *rq;
 791 
 792                         tl = checked_intel_timeline_create(i915);
 793                         if (IS_ERR(tl)) {
 794                                 err = PTR_ERR(tl);
 795                                 goto out;
 796                         }
 797 
 798                         rq = tl_write(tl, engine, count);
 799                         if (IS_ERR(rq)) {
 800                                 intel_timeline_put(tl);
 801                                 err = PTR_ERR(rq);
 802                                 goto out;
 803                         }
 804 
 805                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 806                                 pr_err("Wait for timeline writes timed out!\n");
 807                                 intel_timeline_put(tl);
 808                                 err = -EIO;
 809                                 goto out;
 810                         }
 811 
 812                         if (*tl->hwsp_seqno != count) {
 813                                 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 814                                        count, *tl->hwsp_seqno);
 815                                 err = -EINVAL;
 816                         }
 817 
 818                         intel_timeline_put(tl);
 819                         count++;
 820 
 821                         if (err)
 822                                 goto out;
 823                 } while (!__igt_timeout(end_time, NULL));
 824         }
 825 
 826 out:
 827         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 828         mutex_unlock(&i915->drm.struct_mutex);
 829 
 830         return err;
 831 }
 832 
 833 int intel_timeline_live_selftests(struct drm_i915_private *i915)
 834 {
 835         static const struct i915_subtest tests[] = {
 836                 SUBTEST(live_hwsp_recycle),
 837                 SUBTEST(live_hwsp_engine),
 838                 SUBTEST(live_hwsp_alternate),
 839                 SUBTEST(live_hwsp_wrap),
 840         };
 841 
 842         if (intel_gt_is_wedged(&i915->gt))
 843                 return 0;
 844 
 845         return i915_live_subtests(tests, i915);
 846 }

/* [<][>][^][v][top][bottom][index][help] */