1 /* 2 * Copyright © 2008-2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef I915_REQUEST_H 26 #define I915_REQUEST_H 27 28 #include <linux/dma-fence.h> 29 #include <linux/irq_work.h> 30 #include <linux/lockdep.h> 31 32 #include "gt/intel_context_types.h" 33 #include "gt/intel_engine_types.h" 34 35 #include "i915_gem.h" 36 #include "i915_scheduler.h" 37 #include "i915_selftest.h" 38 #include "i915_sw_fence.h" 39 40 #include <uapi/drm/i915_drm.h> 41 42 struct drm_file; 43 struct drm_i915_gem_object; 44 struct i915_request; 45 struct intel_timeline; 46 struct intel_timeline_cacheline; 47 48 struct i915_capture_list { 49 struct i915_capture_list *next; 50 struct i915_vma *vma; 51 }; 52 53 enum { 54 /* 55 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. 56 * 57 * Set by __i915_request_submit() on handing over to HW, and cleared 58 * by __i915_request_unsubmit() if we preempt this request. 59 * 60 * Finally cleared for consistency on retiring the request, when 61 * we know the HW is no longer running this request. 62 * 63 * See i915_request_is_active() 64 */ 65 I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, 66 67 /* 68 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list 69 * 70 * Internal bookkeeping used by the breadcrumb code to track when 71 * a request is on the various signal_list. 72 */ 73 I915_FENCE_FLAG_SIGNAL, 74 }; 75 76 /** 77 * Request queue structure. 78 * 79 * The request queue allows us to note sequence numbers that have been emitted 80 * and may be associated with active buffers to be retired. 81 * 82 * By keeping this list, we can avoid having to do questionable sequence 83 * number comparisons on buffer last_read|write_seqno. It also allows an 84 * emission time to be associated with the request for tracking how far ahead 85 * of the GPU the submission is. 86 * 87 * When modifying this structure be very aware that we perform a lockless 88 * RCU lookup of it that may race against reallocation of the struct 89 * from the slab freelist. We intentionally do not zero the structure on 90 * allocation so that the lookup can use the dangling pointers (and is 91 * cogniscent that those pointers may be wrong). Instead, everything that 92 * needs to be initialised must be done so explicitly. 93 * 94 * The requests are reference counted. 95 */ 96 struct i915_request { 97 struct dma_fence fence; 98 spinlock_t lock; 99 100 /** On Which ring this request was generated */ 101 struct drm_i915_private *i915; 102 103 /** 104 * Context and ring buffer related to this request 105 * Contexts are refcounted, so when this request is associated with a 106 * context, we must increment the context's refcount, to guarantee that 107 * it persists while any request is linked to it. Requests themselves 108 * are also refcounted, so the request will only be freed when the last 109 * reference to it is dismissed, and the code in 110 * i915_request_free() will then decrement the refcount on the 111 * context. 112 */ 113 struct i915_gem_context *gem_context; 114 struct intel_engine_cs *engine; 115 struct intel_context *hw_context; 116 struct intel_ring *ring; 117 struct intel_timeline *timeline; 118 struct list_head signal_link; 119 120 /* 121 * The rcu epoch of when this request was allocated. Used to judiciously 122 * apply backpressure on future allocations to ensure that under 123 * mempressure there is sufficient RCU ticks for us to reclaim our 124 * RCU protected slabs. 125 */ 126 unsigned long rcustate; 127 128 /* 129 * We pin the timeline->mutex while constructing the request to 130 * ensure that no caller accidentally drops it during construction. 131 * The timeline->mutex must be held to ensure that only this caller 132 * can use the ring and manipulate the associated timeline during 133 * construction. 134 */ 135 struct pin_cookie cookie; 136 137 /* 138 * Fences for the various phases in the request's lifetime. 139 * 140 * The submit fence is used to await upon all of the request's 141 * dependencies. When it is signaled, the request is ready to run. 142 * It is used by the driver to then queue the request for execution. 143 */ 144 struct i915_sw_fence submit; 145 union { 146 wait_queue_entry_t submitq; 147 struct i915_sw_dma_fence_cb dmaq; 148 }; 149 struct list_head execute_cb; 150 struct i915_sw_fence semaphore; 151 struct irq_work semaphore_work; 152 153 /* 154 * A list of everyone we wait upon, and everyone who waits upon us. 155 * Even though we will not be submitted to the hardware before the 156 * submit fence is signaled (it waits for all external events as well 157 * as our own requests), the scheduler still needs to know the 158 * dependency tree for the lifetime of the request (from execbuf 159 * to retirement), i.e. bidirectional dependency information for the 160 * request not tied to individual fences. 161 */ 162 struct i915_sched_node sched; 163 struct i915_dependency dep; 164 intel_engine_mask_t execution_mask; 165 166 /* 167 * A convenience pointer to the current breadcrumb value stored in 168 * the HW status page (or our timeline's local equivalent). The full 169 * path would be rq->hw_context->ring->timeline->hwsp_seqno. 170 */ 171 const u32 *hwsp_seqno; 172 173 /* 174 * If we need to access the timeline's seqno for this request in 175 * another request, we need to keep a read reference to this associated 176 * cacheline, so that we do not free and recycle it before the foreign 177 * observers have completed. Hence, we keep a pointer to the cacheline 178 * inside the timeline's HWSP vma, but it is only valid while this 179 * request has not completed and guarded by the timeline mutex. 180 */ 181 struct intel_timeline_cacheline *hwsp_cacheline; 182 183 /** Position in the ring of the start of the request */ 184 u32 head; 185 186 /** Position in the ring of the start of the user packets */ 187 u32 infix; 188 189 /** 190 * Position in the ring of the start of the postfix. 191 * This is required to calculate the maximum available ring space 192 * without overwriting the postfix. 193 */ 194 u32 postfix; 195 196 /** Position in the ring of the end of the whole request */ 197 u32 tail; 198 199 /** Position in the ring of the end of any workarounds after the tail */ 200 u32 wa_tail; 201 202 /** Preallocate space in the ring for the emitting the request */ 203 u32 reserved_space; 204 205 /** Batch buffer related to this request if any (used for 206 * error state dump only). 207 */ 208 struct i915_vma *batch; 209 /** 210 * Additional buffers requested by userspace to be captured upon 211 * a GPU hang. The vma/obj on this list are protected by their 212 * active reference - all objects on this list must also be 213 * on the active_list (of their final request). 214 */ 215 struct i915_capture_list *capture_list; 216 struct list_head active_list; 217 218 /** Time at which this request was emitted, in jiffies. */ 219 unsigned long emitted_jiffies; 220 221 unsigned long flags; 222 #define I915_REQUEST_WAITBOOST BIT(0) 223 #define I915_REQUEST_NOPREEMPT BIT(1) 224 225 /** timeline->request entry for this request */ 226 struct list_head link; 227 228 struct drm_i915_file_private *file_priv; 229 /** file_priv list entry for this request */ 230 struct list_head client_link; 231 232 I915_SELFTEST_DECLARE(struct { 233 struct list_head link; 234 unsigned long delay; 235 } mock;) 236 }; 237 238 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) 239 240 extern const struct dma_fence_ops i915_fence_ops; 241 242 static inline bool dma_fence_is_i915(const struct dma_fence *fence) 243 { 244 return fence->ops == &i915_fence_ops; 245 } 246 247 struct i915_request * __must_check 248 __i915_request_create(struct intel_context *ce, gfp_t gfp); 249 struct i915_request * __must_check 250 i915_request_create(struct intel_context *ce); 251 252 struct i915_request *__i915_request_commit(struct i915_request *request); 253 void __i915_request_queue(struct i915_request *rq, 254 const struct i915_sched_attr *attr); 255 256 void i915_request_retire_upto(struct i915_request *rq); 257 258 static inline struct i915_request * 259 to_request(struct dma_fence *fence) 260 { 261 /* We assume that NULL fence/request are interoperable */ 262 BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0); 263 GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); 264 return container_of(fence, struct i915_request, fence); 265 } 266 267 static inline struct i915_request * 268 i915_request_get(struct i915_request *rq) 269 { 270 return to_request(dma_fence_get(&rq->fence)); 271 } 272 273 static inline struct i915_request * 274 i915_request_get_rcu(struct i915_request *rq) 275 { 276 return to_request(dma_fence_get_rcu(&rq->fence)); 277 } 278 279 static inline void 280 i915_request_put(struct i915_request *rq) 281 { 282 dma_fence_put(&rq->fence); 283 } 284 285 int i915_request_await_object(struct i915_request *to, 286 struct drm_i915_gem_object *obj, 287 bool write); 288 int i915_request_await_dma_fence(struct i915_request *rq, 289 struct dma_fence *fence); 290 int i915_request_await_execution(struct i915_request *rq, 291 struct dma_fence *fence, 292 void (*hook)(struct i915_request *rq, 293 struct dma_fence *signal)); 294 295 void i915_request_add(struct i915_request *rq); 296 297 bool __i915_request_submit(struct i915_request *request); 298 void i915_request_submit(struct i915_request *request); 299 300 void i915_request_skip(struct i915_request *request, int error); 301 302 void __i915_request_unsubmit(struct i915_request *request); 303 void i915_request_unsubmit(struct i915_request *request); 304 305 /* Note: part of the intel_breadcrumbs family */ 306 bool i915_request_enable_breadcrumb(struct i915_request *request); 307 void i915_request_cancel_breadcrumb(struct i915_request *request); 308 309 long i915_request_wait(struct i915_request *rq, 310 unsigned int flags, 311 long timeout) 312 __attribute__((nonnull(1))); 313 #define I915_WAIT_INTERRUPTIBLE BIT(0) 314 #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ 315 #define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ 316 #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ 317 #define I915_WAIT_FOR_IDLE_BOOST BIT(4) 318 319 static inline bool i915_request_signaled(const struct i915_request *rq) 320 { 321 /* The request may live longer than its HWSP, so check flags first! */ 322 return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); 323 } 324 325 static inline bool i915_request_is_active(const struct i915_request *rq) 326 { 327 return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 328 } 329 330 /** 331 * Returns true if seq1 is later than seq2. 332 */ 333 static inline bool i915_seqno_passed(u32 seq1, u32 seq2) 334 { 335 return (s32)(seq1 - seq2) >= 0; 336 } 337 338 static inline u32 __hwsp_seqno(const struct i915_request *rq) 339 { 340 return READ_ONCE(*rq->hwsp_seqno); 341 } 342 343 /** 344 * hwsp_seqno - the current breadcrumb value in the HW status page 345 * @rq: the request, to chase the relevant HW status page 346 * 347 * The emphasis in naming here is that hwsp_seqno() is not a property of the 348 * request, but an indication of the current HW state (associated with this 349 * request). Its value will change as the GPU executes more requests. 350 * 351 * Returns the current breadcrumb value in the associated HW status page (or 352 * the local timeline's equivalent) for this request. The request itself 353 * has the associated breadcrumb value of rq->fence.seqno, when the HW 354 * status page has that breadcrumb or later, this request is complete. 355 */ 356 static inline u32 hwsp_seqno(const struct i915_request *rq) 357 { 358 u32 seqno; 359 360 rcu_read_lock(); /* the HWSP may be freed at runtime */ 361 seqno = __hwsp_seqno(rq); 362 rcu_read_unlock(); 363 364 return seqno; 365 } 366 367 static inline bool __i915_request_has_started(const struct i915_request *rq) 368 { 369 return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); 370 } 371 372 /** 373 * i915_request_started - check if the request has begun being executed 374 * @rq: the request 375 * 376 * If the timeline is not using initial breadcrumbs, a request is 377 * considered started if the previous request on its timeline (i.e. 378 * context) has been signaled. 379 * 380 * If the timeline is using semaphores, it will also be emitting an 381 * "initial breadcrumb" after the semaphores are complete and just before 382 * it began executing the user payload. A request can therefore be active 383 * on the HW and not yet started as it is still busywaiting on its 384 * dependencies (via HW semaphores). 385 * 386 * If the request has started, its dependencies will have been signaled 387 * (either by fences or by semaphores) and it will have begun processing 388 * the user payload. 389 * 390 * However, even if a request has started, it may have been preempted and 391 * so no longer active, or it may have already completed. 392 * 393 * See also i915_request_is_active(). 394 * 395 * Returns true if the request has begun executing the user payload, or 396 * has completed: 397 */ 398 static inline bool i915_request_started(const struct i915_request *rq) 399 { 400 if (i915_request_signaled(rq)) 401 return true; 402 403 /* Remember: started but may have since been preempted! */ 404 return __i915_request_has_started(rq); 405 } 406 407 /** 408 * i915_request_is_running - check if the request may actually be executing 409 * @rq: the request 410 * 411 * Returns true if the request is currently submitted to hardware, has passed 412 * its start point (i.e. the context is setup and not busywaiting). Note that 413 * it may no longer be running by the time the function returns! 414 */ 415 static inline bool i915_request_is_running(const struct i915_request *rq) 416 { 417 if (!i915_request_is_active(rq)) 418 return false; 419 420 return __i915_request_has_started(rq); 421 } 422 423 static inline bool i915_request_completed(const struct i915_request *rq) 424 { 425 if (i915_request_signaled(rq)) 426 return true; 427 428 return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); 429 } 430 431 static inline void i915_request_mark_complete(struct i915_request *rq) 432 { 433 rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ 434 } 435 436 static inline bool i915_request_has_waitboost(const struct i915_request *rq) 437 { 438 return rq->flags & I915_REQUEST_WAITBOOST; 439 } 440 441 static inline bool i915_request_has_nopreempt(const struct i915_request *rq) 442 { 443 /* Preemption should only be disabled very rarely */ 444 return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); 445 } 446 447 bool i915_retire_requests(struct drm_i915_private *i915); 448 449 #endif /* I915_REQUEST_H */