root/drivers/gpu/drm/i915/gt/intel_engine.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. hangcheck_action_to_str
  2. execlists_num_ports
  3. execlists_active
  4. execlists_active_lock_bh
  5. execlists_active_unlock_bh
  6. intel_read_status_page
  7. intel_write_status_page
  8. intel_ring_get
  9. intel_ring_put
  10. intel_ring_advance
  11. intel_ring_wrap
  12. intel_ring_direction
  13. intel_ring_offset_valid
  14. intel_ring_offset
  15. assert_ring_tail_valid
  16. intel_ring_set_tail
  17. __intel_ring_space
  18. intel_engine_queue_breadcrumbs
  19. gen8_emit_pipe_control
  20. gen8_emit_ggtt_write_rcs
  21. gen8_emit_ggtt_write
  22. __intel_engine_reset
  23. __printf
  24. intel_engine_context_out
  25. inject_preempt_hang
  26. inject_preempt_hang

   1 /* SPDX-License-Identifier: MIT */
   2 #ifndef _INTEL_RINGBUFFER_H_
   3 #define _INTEL_RINGBUFFER_H_
   4 
   5 #include <drm/drm_util.h>
   6 
   7 #include <linux/hashtable.h>
   8 #include <linux/irq_work.h>
   9 #include <linux/random.h>
  10 #include <linux/seqlock.h>
  11 
  12 #include "i915_pmu.h"
  13 #include "i915_reg.h"
  14 #include "i915_request.h"
  15 #include "i915_selftest.h"
  16 #include "gt/intel_timeline.h"
  17 #include "intel_engine_types.h"
  18 #include "intel_gpu_commands.h"
  19 #include "intel_workarounds.h"
  20 
  21 struct drm_printer;
  22 
  23 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  24  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  25  * to give some inclination as to some of the magic values used in the various
  26  * workarounds!
  27  */
  28 #define CACHELINE_BYTES 64
  29 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
  30 
  31 /*
  32  * The register defines to be used with the following macros need to accept a
  33  * base param, e.g:
  34  *
  35  * REG_FOO(base) _MMIO((base) + <relative offset>)
  36  * ENGINE_READ(engine, REG_FOO);
  37  *
  38  * register arrays are to be defined and accessed as follows:
  39  *
  40  * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>)
  41  * ENGINE_READ_IDX(engine, REG_BAR, i)
  42  */
  43 
  44 #define __ENGINE_REG_OP(op__, engine__, ...) \
  45         intel_uncore_##op__((engine__)->uncore, __VA_ARGS__)
  46 
  47 #define __ENGINE_READ_OP(op__, engine__, reg__) \
  48         __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base))
  49 
  50 #define ENGINE_READ16(...)      __ENGINE_READ_OP(read16, __VA_ARGS__)
  51 #define ENGINE_READ(...)        __ENGINE_READ_OP(read, __VA_ARGS__)
  52 #define ENGINE_READ_FW(...)     __ENGINE_READ_OP(read_fw, __VA_ARGS__)
  53 #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__)
  54 #define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__)
  55 
  56 #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
  57         __ENGINE_REG_OP(read64_2x32, (engine__), \
  58                         lower_reg__((engine__)->mmio_base), \
  59                         upper_reg__((engine__)->mmio_base))
  60 
  61 #define ENGINE_READ_IDX(engine__, reg__, idx__) \
  62         __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__)))
  63 
  64 #define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \
  65         __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__))
  66 
  67 #define ENGINE_WRITE16(...)     __ENGINE_WRITE_OP(write16, __VA_ARGS__)
  68 #define ENGINE_WRITE(...)       __ENGINE_WRITE_OP(write, __VA_ARGS__)
  69 #define ENGINE_WRITE_FW(...)    __ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
  70 
  71 #define GEN6_RING_FAULT_REG_READ(engine__) \
  72         intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__))
  73 
  74 #define GEN6_RING_FAULT_REG_POSTING_READ(engine__) \
  75         intel_uncore_posting_read((engine__)->uncore, RING_FAULT_REG(engine__))
  76 
  77 #define GEN6_RING_FAULT_REG_RMW(engine__, clear__, set__) \
  78 ({ \
  79         u32 __val; \
  80 \
  81         __val = intel_uncore_read((engine__)->uncore, \
  82                                   RING_FAULT_REG(engine__)); \
  83         __val &= ~(clear__); \
  84         __val |= (set__); \
  85         intel_uncore_write((engine__)->uncore, RING_FAULT_REG(engine__), \
  86                            __val); \
  87 })
  88 
  89 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  90  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  91  */
  92 enum intel_engine_hangcheck_action {
  93         ENGINE_IDLE = 0,
  94         ENGINE_WAIT,
  95         ENGINE_ACTIVE_SEQNO,
  96         ENGINE_ACTIVE_HEAD,
  97         ENGINE_ACTIVE_SUBUNITS,
  98         ENGINE_WAIT_KICK,
  99         ENGINE_DEAD,
 100 };
 101 
 102 static inline const char *
 103 hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 104 {
 105         switch (a) {
 106         case ENGINE_IDLE:
 107                 return "idle";
 108         case ENGINE_WAIT:
 109                 return "wait";
 110         case ENGINE_ACTIVE_SEQNO:
 111                 return "active seqno";
 112         case ENGINE_ACTIVE_HEAD:
 113                 return "active head";
 114         case ENGINE_ACTIVE_SUBUNITS:
 115                 return "active subunits";
 116         case ENGINE_WAIT_KICK:
 117                 return "wait kick";
 118         case ENGINE_DEAD:
 119                 return "dead";
 120         }
 121 
 122         return "unknown";
 123 }
 124 
 125 static inline unsigned int
 126 execlists_num_ports(const struct intel_engine_execlists * const execlists)
 127 {
 128         return execlists->port_mask + 1;
 129 }
 130 
 131 static inline struct i915_request *
 132 execlists_active(const struct intel_engine_execlists *execlists)
 133 {
 134         GEM_BUG_ON(execlists->active - execlists->inflight >
 135                    execlists_num_ports(execlists));
 136         return READ_ONCE(*execlists->active);
 137 }
 138 
 139 static inline void
 140 execlists_active_lock_bh(struct intel_engine_execlists *execlists)
 141 {
 142         local_bh_disable(); /* prevent local softirq and lock recursion */
 143         tasklet_lock(&execlists->tasklet);
 144 }
 145 
 146 static inline void
 147 execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
 148 {
 149         tasklet_unlock(&execlists->tasklet);
 150         local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
 151 }
 152 
 153 struct i915_request *
 154 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 155 
 156 static inline u32
 157 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 158 {
 159         /* Ensure that the compiler doesn't optimize away the load. */
 160         return READ_ONCE(engine->status_page.addr[reg]);
 161 }
 162 
 163 static inline void
 164 intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 165 {
 166         /* Writing into the status page should be done sparingly. Since
 167          * we do when we are uncertain of the device state, we take a bit
 168          * of extra paranoia to try and ensure that the HWS takes the value
 169          * we give and that it doesn't end up trapped inside the CPU!
 170          */
 171         if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 172                 mb();
 173                 clflush(&engine->status_page.addr[reg]);
 174                 engine->status_page.addr[reg] = value;
 175                 clflush(&engine->status_page.addr[reg]);
 176                 mb();
 177         } else {
 178                 WRITE_ONCE(engine->status_page.addr[reg], value);
 179         }
 180 }
 181 
 182 /*
 183  * Reads a dword out of the status page, which is written to from the command
 184  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
 185  * MI_STORE_DATA_IMM.
 186  *
 187  * The following dwords have a reserved meaning:
 188  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
 189  * 0x04: ring 0 head pointer
 190  * 0x05: ring 1 head pointer (915-class)
 191  * 0x06: ring 2 head pointer (915-class)
 192  * 0x10-0x1b: Context status DWords (GM45)
 193  * 0x1f: Last written status offset. (GM45)
 194  * 0x20-0x2f: Reserved (Gen6+)
 195  *
 196  * The area from dword 0x30 to 0x3ff is available for driver usage.
 197  */
 198 #define I915_GEM_HWS_PREEMPT            0x32
 199 #define I915_GEM_HWS_PREEMPT_ADDR       (I915_GEM_HWS_PREEMPT * sizeof(u32))
 200 #define I915_GEM_HWS_SEQNO              0x40
 201 #define I915_GEM_HWS_SEQNO_ADDR         (I915_GEM_HWS_SEQNO * sizeof(u32))
 202 #define I915_GEM_HWS_SCRATCH            0x80
 203 #define I915_GEM_HWS_SCRATCH_ADDR       (I915_GEM_HWS_SCRATCH * sizeof(u32))
 204 
 205 #define I915_HWS_CSB_BUF0_INDEX         0x10
 206 #define I915_HWS_CSB_WRITE_INDEX        0x1f
 207 #define CNL_HWS_CSB_WRITE_INDEX         0x2f
 208 
 209 struct intel_ring *
 210 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 211 int intel_ring_pin(struct intel_ring *ring);
 212 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 213 unsigned int intel_ring_update_space(struct intel_ring *ring);
 214 void intel_ring_unpin(struct intel_ring *ring);
 215 void intel_ring_free(struct kref *ref);
 216 
 217 static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
 218 {
 219         kref_get(&ring->ref);
 220         return ring;
 221 }
 222 
 223 static inline void intel_ring_put(struct intel_ring *ring)
 224 {
 225         kref_put(&ring->ref, intel_ring_free);
 226 }
 227 
 228 void intel_engine_stop(struct intel_engine_cs *engine);
 229 void intel_engine_cleanup(struct intel_engine_cs *engine);
 230 
 231 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 232 
 233 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 234 
 235 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
 236 {
 237         /* Dummy function.
 238          *
 239          * This serves as a placeholder in the code so that the reader
 240          * can compare against the preceding intel_ring_begin() and
 241          * check that the number of dwords emitted matches the space
 242          * reserved for the command packet (i.e. the value passed to
 243          * intel_ring_begin()).
 244          */
 245         GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
 246 }
 247 
 248 static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
 249 {
 250         return pos & (ring->size - 1);
 251 }
 252 
 253 static inline int intel_ring_direction(const struct intel_ring *ring,
 254                                        u32 next, u32 prev)
 255 {
 256         typecheck(typeof(ring->size), next);
 257         typecheck(typeof(ring->size), prev);
 258         return (next - prev) << ring->wrap;
 259 }
 260 
 261 static inline bool
 262 intel_ring_offset_valid(const struct intel_ring *ring,
 263                         unsigned int pos)
 264 {
 265         if (pos & -ring->size) /* must be strictly within the ring */
 266                 return false;
 267 
 268         if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
 269                 return false;
 270 
 271         return true;
 272 }
 273 
 274 static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
 275 {
 276         /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
 277         u32 offset = addr - rq->ring->vaddr;
 278         GEM_BUG_ON(offset > rq->ring->size);
 279         return intel_ring_wrap(rq->ring, offset);
 280 }
 281 
 282 static inline void
 283 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 284 {
 285         GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
 286 
 287         /*
 288          * "Ring Buffer Use"
 289          *      Gen2 BSpec "1. Programming Environment" / 1.4.4.6
 290          *      Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
 291          *      Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
 292          * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
 293          * same cacheline, the Head Pointer must not be greater than the Tail
 294          * Pointer."
 295          *
 296          * We use ring->head as the last known location of the actual RING_HEAD,
 297          * it may have advanced but in the worst case it is equally the same
 298          * as ring->head and so we should never program RING_TAIL to advance
 299          * into the same cacheline as ring->head.
 300          */
 301 #define cacheline(a) round_down(a, CACHELINE_BYTES)
 302         GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
 303                    tail < ring->head);
 304 #undef cacheline
 305 }
 306 
 307 static inline unsigned int
 308 intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 309 {
 310         /* Whilst writes to the tail are strictly order, there is no
 311          * serialisation between readers and the writers. The tail may be
 312          * read by i915_request_retire() just as it is being updated
 313          * by execlists, as although the breadcrumb is complete, the context
 314          * switch hasn't been seen.
 315          */
 316         assert_ring_tail_valid(ring, tail);
 317         ring->tail = tail;
 318         return tail;
 319 }
 320 
 321 static inline unsigned int
 322 __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 323 {
 324         /*
 325          * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
 326          * same cacheline, the Head Pointer must not be greater than the Tail
 327          * Pointer."
 328          */
 329         GEM_BUG_ON(!is_power_of_2(size));
 330         return (head - tail - CACHELINE_BYTES) & (size - 1);
 331 }
 332 
 333 int intel_engines_init_mmio(struct drm_i915_private *i915);
 334 int intel_engines_setup(struct drm_i915_private *i915);
 335 int intel_engines_init(struct drm_i915_private *i915);
 336 void intel_engines_cleanup(struct drm_i915_private *i915);
 337 
 338 int intel_engine_init_common(struct intel_engine_cs *engine);
 339 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 340 
 341 int intel_ring_submission_setup(struct intel_engine_cs *engine);
 342 int intel_ring_submission_init(struct intel_engine_cs *engine);
 343 
 344 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 345 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 346 
 347 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 348 
 349 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 350 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 351 
 352 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 353                                struct intel_instdone *instdone);
 354 
 355 void intel_engine_init_execlists(struct intel_engine_cs *engine);
 356 
 357 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 358 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 359 
 360 void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
 361 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 362 
 363 static inline void
 364 intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
 365 {
 366         irq_work_queue(&engine->breadcrumbs.irq_work);
 367 }
 368 
 369 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
 370 
 371 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 372 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 373 
 374 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 375                                     struct drm_printer *p);
 376 
 377 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 378 {
 379         memset(batch, 0, 6 * sizeof(u32));
 380 
 381         batch[0] = GFX_OP_PIPE_CONTROL(6);
 382         batch[1] = flags;
 383         batch[2] = offset;
 384 
 385         return batch + 6;
 386 }
 387 
 388 static inline u32 *
 389 gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 390 {
 391         /* We're using qword write, offset should be aligned to 8 bytes. */
 392         GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
 393 
 394         /* w/a for post sync ops following a GPGPU operation we
 395          * need a prior CS_STALL, which is emitted by the flush
 396          * following the batch.
 397          */
 398         *cs++ = GFX_OP_PIPE_CONTROL(6);
 399         *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
 400         *cs++ = gtt_offset;
 401         *cs++ = 0;
 402         *cs++ = value;
 403         /* We're thrashing one dword of HWS. */
 404         *cs++ = 0;
 405 
 406         return cs;
 407 }
 408 
 409 static inline u32 *
 410 gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 411 {
 412         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 413         GEM_BUG_ON(gtt_offset & (1 << 5));
 414         /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
 415         GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
 416 
 417         *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
 418         *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
 419         *cs++ = 0;
 420         *cs++ = value;
 421 
 422         return cs;
 423 }
 424 
 425 static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 426                                         bool stalled)
 427 {
 428         if (engine->reset.reset)
 429                 engine->reset.reset(engine, stalled);
 430         engine->serial++; /* contexts lost */
 431 }
 432 
 433 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 434 bool intel_engines_are_idle(struct intel_gt *gt);
 435 
 436 void intel_engines_reset_default_submission(struct intel_gt *gt);
 437 
 438 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 439 
 440 __printf(3, 4)
 441 void intel_engine_dump(struct intel_engine_cs *engine,
 442                        struct drm_printer *m,
 443                        const char *header, ...);
 444 
 445 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 446 {
 447         unsigned long flags;
 448 
 449         if (READ_ONCE(engine->stats.enabled) == 0)
 450                 return;
 451 
 452         write_seqlock_irqsave(&engine->stats.lock, flags);
 453 
 454         if (engine->stats.enabled > 0) {
 455                 if (engine->stats.active++ == 0)
 456                         engine->stats.start = ktime_get();
 457                 GEM_BUG_ON(engine->stats.active == 0);
 458         }
 459 
 460         write_sequnlock_irqrestore(&engine->stats.lock, flags);
 461 }
 462 
 463 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 464 {
 465         unsigned long flags;
 466 
 467         if (READ_ONCE(engine->stats.enabled) == 0)
 468                 return;
 469 
 470         write_seqlock_irqsave(&engine->stats.lock, flags);
 471 
 472         if (engine->stats.enabled > 0) {
 473                 ktime_t last;
 474 
 475                 if (engine->stats.active && --engine->stats.active == 0) {
 476                         /*
 477                          * Decrement the active context count and in case GPU
 478                          * is now idle add up to the running total.
 479                          */
 480                         last = ktime_sub(ktime_get(), engine->stats.start);
 481 
 482                         engine->stats.total = ktime_add(engine->stats.total,
 483                                                         last);
 484                 } else if (engine->stats.active == 0) {
 485                         /*
 486                          * After turning on engine stats, context out might be
 487                          * the first event in which case we account from the
 488                          * time stats gathering was turned on.
 489                          */
 490                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
 491 
 492                         engine->stats.total = ktime_add(engine->stats.total,
 493                                                         last);
 494                 }
 495         }
 496 
 497         write_sequnlock_irqrestore(&engine->stats.lock, flags);
 498 }
 499 
 500 int intel_enable_engine_stats(struct intel_engine_cs *engine);
 501 void intel_disable_engine_stats(struct intel_engine_cs *engine);
 502 
 503 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 504 
 505 struct i915_request *
 506 intel_engine_find_active_request(struct intel_engine_cs *engine);
 507 
 508 u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
 509 
 510 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 511 
 512 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 513 {
 514         if (!execlists->preempt_hang.inject_hang)
 515                 return false;
 516 
 517         complete(&execlists->preempt_hang.completion);
 518         return true;
 519 }
 520 
 521 #else
 522 
 523 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 524 {
 525         return false;
 526 }
 527 
 528 #endif
 529 
 530 void intel_engine_init_active(struct intel_engine_cs *engine,
 531                               unsigned int subclass);
 532 #define ENGINE_PHYSICAL 0
 533 #define ENGINE_MOCK     1
 534 #define ENGINE_VIRTUAL  2
 535 
 536 #endif /* _INTEL_RINGBUFFER_H_ */

/* [<][>][^][v][top][bottom][index][help] */