1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef _I915_ACTIVE_H_ 8 #define _I915_ACTIVE_H_ 9 10 #include <linux/lockdep.h> 11 12 #include "i915_active_types.h" 13 #include "i915_request.h" 14 15 /* 16 * We treat requests as fences. This is not be to confused with our 17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. 18 * We use the fences to synchronize access from the CPU with activity on the 19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU 20 * is reading them. We also track fences at a higher level to provide 21 * implicit synchronisation around GEM objects, e.g. set-domain will wait 22 * for outstanding GPU rendering before marking the object ready for CPU 23 * access, or a pageflip will wait until the GPU is complete before showing 24 * the frame on the scanout. 25 * 26 * In order to use a fence, the object must track the fence it needs to 27 * serialise with. For example, GEM objects want to track both read and 28 * write access so that we can perform concurrent read operations between 29 * the CPU and GPU engines, as well as waiting for all rendering to 30 * complete, or waiting for the last GPU user of a "fence register". The 31 * object then embeds a #i915_active_request to track the most recent (in 32 * retirement order) request relevant for the desired mode of access. 33 * The #i915_active_request is updated with i915_active_request_set() to 34 * track the most recent fence request, typically this is done as part of 35 * i915_vma_move_to_active(). 36 * 37 * When the #i915_active_request completes (is retired), it will 38 * signal its completion to the owner through a callback as well as mark 39 * itself as idle (i915_active_request.request == NULL). The owner 40 * can then perform any action, such as delayed freeing of an active 41 * resource including itself. 42 */ 43 44 void i915_active_retire_noop(struct i915_active_request *active, 45 struct i915_request *request); 46 47 /** 48 * i915_active_request_init - prepares the activity tracker for use 49 * @active - the active tracker 50 * @rq - initial request to track, can be NULL 51 * @func - a callback when then the tracker is retired (becomes idle), 52 * can be NULL 53 * 54 * i915_active_request_init() prepares the embedded @active struct for use as 55 * an activity tracker, that is for tracking the last known active request 56 * associated with it. When the last request becomes idle, when it is retired 57 * after completion, the optional callback @func is invoked. 58 */ 59 static inline void 60 i915_active_request_init(struct i915_active_request *active, 61 struct mutex *lock, 62 struct i915_request *rq, 63 i915_active_retire_fn retire) 64 { 65 RCU_INIT_POINTER(active->request, rq); 66 INIT_LIST_HEAD(&active->link); 67 active->retire = retire ?: i915_active_retire_noop; 68 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 69 active->lock = lock; 70 #endif 71 } 72 73 #define INIT_ACTIVE_REQUEST(name, lock) \ 74 i915_active_request_init((name), (lock), NULL, NULL) 75 76 /** 77 * i915_active_request_set - updates the tracker to watch the current request 78 * @active - the active tracker 79 * @request - the request to watch 80 * 81 * __i915_active_request_set() watches the given @request for completion. Whilst 82 * that @request is busy, the @active reports busy. When that @request is 83 * retired, the @active tracker is updated to report idle. 84 */ 85 static inline void 86 __i915_active_request_set(struct i915_active_request *active, 87 struct i915_request *request) 88 { 89 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 90 lockdep_assert_held(active->lock); 91 #endif 92 list_move(&active->link, &request->active_list); 93 rcu_assign_pointer(active->request, request); 94 } 95 96 int __must_check 97 i915_active_request_set(struct i915_active_request *active, 98 struct i915_request *rq); 99 100 /** 101 * i915_active_request_raw - return the active request 102 * @active - the active tracker 103 * 104 * i915_active_request_raw() returns the current request being tracked, or NULL. 105 * It does not obtain a reference on the request for the caller, so the caller 106 * must hold struct_mutex. 107 */ 108 static inline struct i915_request * 109 i915_active_request_raw(const struct i915_active_request *active, 110 struct mutex *mutex) 111 { 112 return rcu_dereference_protected(active->request, 113 lockdep_is_held(mutex)); 114 } 115 116 /** 117 * i915_active_request_peek - report the active request being monitored 118 * @active - the active tracker 119 * 120 * i915_active_request_peek() returns the current request being tracked if 121 * still active, or NULL. It does not obtain a reference on the request 122 * for the caller, so the caller must hold struct_mutex. 123 */ 124 static inline struct i915_request * 125 i915_active_request_peek(const struct i915_active_request *active, 126 struct mutex *mutex) 127 { 128 struct i915_request *request; 129 130 request = i915_active_request_raw(active, mutex); 131 if (!request || i915_request_completed(request)) 132 return NULL; 133 134 return request; 135 } 136 137 /** 138 * i915_active_request_get - return a reference to the active request 139 * @active - the active tracker 140 * 141 * i915_active_request_get() returns a reference to the active request, or NULL 142 * if the active tracker is idle. The caller must hold struct_mutex. 143 */ 144 static inline struct i915_request * 145 i915_active_request_get(const struct i915_active_request *active, 146 struct mutex *mutex) 147 { 148 return i915_request_get(i915_active_request_peek(active, mutex)); 149 } 150 151 /** 152 * __i915_active_request_get_rcu - return a reference to the active request 153 * @active - the active tracker 154 * 155 * __i915_active_request_get() returns a reference to the active request, 156 * or NULL if the active tracker is idle. The caller must hold the RCU read 157 * lock, but the returned pointer is safe to use outside of RCU. 158 */ 159 static inline struct i915_request * 160 __i915_active_request_get_rcu(const struct i915_active_request *active) 161 { 162 /* 163 * Performing a lockless retrieval of the active request is super 164 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing 165 * slab of request objects will not be freed whilst we hold the 166 * RCU read lock. It does not guarantee that the request itself 167 * will not be freed and then *reused*. Viz, 168 * 169 * Thread A Thread B 170 * 171 * rq = active.request 172 * retire(rq) -> free(rq); 173 * (rq is now first on the slab freelist) 174 * active.request = NULL 175 * 176 * rq = new submission on a new object 177 * ref(rq) 178 * 179 * To prevent the request from being reused whilst the caller 180 * uses it, we take a reference like normal. Whilst acquiring 181 * the reference we check that it is not in a destroyed state 182 * (refcnt == 0). That prevents the request being reallocated 183 * whilst the caller holds on to it. To check that the request 184 * was not reallocated as we acquired the reference we have to 185 * check that our request remains the active request across 186 * the lookup, in the same manner as a seqlock. The visibility 187 * of the pointer versus the reference counting is controlled 188 * by using RCU barriers (rcu_dereference and rcu_assign_pointer). 189 * 190 * In the middle of all that, we inspect whether the request is 191 * complete. Retiring is lazy so the request may be completed long 192 * before the active tracker is updated. Querying whether the 193 * request is complete is far cheaper (as it involves no locked 194 * instructions setting cachelines to exclusive) than acquiring 195 * the reference, so we do it first. The RCU read lock ensures the 196 * pointer dereference is valid, but does not ensure that the 197 * seqno nor HWS is the right one! However, if the request was 198 * reallocated, that means the active tracker's request was complete. 199 * If the new request is also complete, then both are and we can 200 * just report the active tracker is idle. If the new request is 201 * incomplete, then we acquire a reference on it and check that 202 * it remained the active request. 203 * 204 * It is then imperative that we do not zero the request on 205 * reallocation, so that we can chase the dangling pointers! 206 * See i915_request_alloc(). 207 */ 208 do { 209 struct i915_request *request; 210 211 request = rcu_dereference(active->request); 212 if (!request || i915_request_completed(request)) 213 return NULL; 214 215 /* 216 * An especially silly compiler could decide to recompute the 217 * result of i915_request_completed, more specifically 218 * re-emit the load for request->fence.seqno. A race would catch 219 * a later seqno value, which could flip the result from true to 220 * false. Which means part of the instructions below might not 221 * be executed, while later on instructions are executed. Due to 222 * barriers within the refcounting the inconsistency can't reach 223 * past the call to i915_request_get_rcu, but not executing 224 * that while still executing i915_request_put() creates 225 * havoc enough. Prevent this with a compiler barrier. 226 */ 227 barrier(); 228 229 request = i915_request_get_rcu(request); 230 231 /* 232 * What stops the following rcu_access_pointer() from occurring 233 * before the above i915_request_get_rcu()? If we were 234 * to read the value before pausing to get the reference to 235 * the request, we may not notice a change in the active 236 * tracker. 237 * 238 * The rcu_access_pointer() is a mere compiler barrier, which 239 * means both the CPU and compiler are free to perform the 240 * memory read without constraint. The compiler only has to 241 * ensure that any operations after the rcu_access_pointer() 242 * occur afterwards in program order. This means the read may 243 * be performed earlier by an out-of-order CPU, or adventurous 244 * compiler. 245 * 246 * The atomic operation at the heart of 247 * i915_request_get_rcu(), see dma_fence_get_rcu(), is 248 * atomic_inc_not_zero() which is only a full memory barrier 249 * when successful. That is, if i915_request_get_rcu() 250 * returns the request (and so with the reference counted 251 * incremented) then the following read for rcu_access_pointer() 252 * must occur after the atomic operation and so confirm 253 * that this request is the one currently being tracked. 254 * 255 * The corresponding write barrier is part of 256 * rcu_assign_pointer(). 257 */ 258 if (!request || request == rcu_access_pointer(active->request)) 259 return rcu_pointer_handoff(request); 260 261 i915_request_put(request); 262 } while (1); 263 } 264 265 /** 266 * i915_active_request_get_unlocked - return a reference to the active request 267 * @active - the active tracker 268 * 269 * i915_active_request_get_unlocked() returns a reference to the active request, 270 * or NULL if the active tracker is idle. The reference is obtained under RCU, 271 * so no locking is required by the caller. 272 * 273 * The reference should be freed with i915_request_put(). 274 */ 275 static inline struct i915_request * 276 i915_active_request_get_unlocked(const struct i915_active_request *active) 277 { 278 struct i915_request *request; 279 280 rcu_read_lock(); 281 request = __i915_active_request_get_rcu(active); 282 rcu_read_unlock(); 283 284 return request; 285 } 286 287 /** 288 * i915_active_request_isset - report whether the active tracker is assigned 289 * @active - the active tracker 290 * 291 * i915_active_request_isset() returns true if the active tracker is currently 292 * assigned to a request. Due to the lazy retiring, that request may be idle 293 * and this may report stale information. 294 */ 295 static inline bool 296 i915_active_request_isset(const struct i915_active_request *active) 297 { 298 return rcu_access_pointer(active->request); 299 } 300 301 /** 302 * i915_active_request_retire - waits until the request is retired 303 * @active - the active request on which to wait 304 * 305 * i915_active_request_retire() waits until the request is completed, 306 * and then ensures that at least the retirement handler for this 307 * @active tracker is called before returning. If the @active 308 * tracker is idle, the function returns immediately. 309 */ 310 static inline int __must_check 311 i915_active_request_retire(struct i915_active_request *active, 312 struct mutex *mutex, i915_active_retire_fn retire) 313 { 314 struct i915_request *request; 315 long ret; 316 317 request = i915_active_request_raw(active, mutex); 318 if (!request) 319 return 0; 320 321 ret = i915_request_wait(request, 322 I915_WAIT_INTERRUPTIBLE, 323 MAX_SCHEDULE_TIMEOUT); 324 if (ret < 0) 325 return ret; 326 327 list_del_init(&active->link); 328 RCU_INIT_POINTER(active->request, NULL); 329 330 retire(active, request); 331 332 return 0; 333 } 334 335 /* 336 * GPU activity tracking 337 * 338 * Each set of commands submitted to the GPU compromises a single request that 339 * signals a fence upon completion. struct i915_request combines the 340 * command submission, scheduling and fence signaling roles. If we want to see 341 * if a particular task is complete, we need to grab the fence (struct 342 * i915_request) for that task and check or wait for it to be signaled. More 343 * often though we want to track the status of a bunch of tasks, for example 344 * to wait for the GPU to finish accessing some memory across a variety of 345 * different command pipelines from different clients. We could choose to 346 * track every single request associated with the task, but knowing that 347 * each request belongs to an ordered timeline (later requests within a 348 * timeline must wait for earlier requests), we need only track the 349 * latest request in each timeline to determine the overall status of the 350 * task. 351 * 352 * struct i915_active provides this tracking across timelines. It builds a 353 * composite shared-fence, and is updated as new work is submitted to the task, 354 * forming a snapshot of the current status. It should be embedded into the 355 * different resources that need to track their associated GPU activity to 356 * provide a callback when that GPU activity has ceased, or otherwise to 357 * provide a serialisation point either for request submission or for CPU 358 * synchronisation. 359 */ 360 361 void __i915_active_init(struct drm_i915_private *i915, 362 struct i915_active *ref, 363 int (*active)(struct i915_active *ref), 364 void (*retire)(struct i915_active *ref), 365 struct lock_class_key *key); 366 #define i915_active_init(i915, ref, active, retire) do { \ 367 static struct lock_class_key __key; \ 368 \ 369 __i915_active_init(i915, ref, active, retire, &__key); \ 370 } while (0) 371 372 int i915_active_ref(struct i915_active *ref, 373 struct intel_timeline *tl, 374 struct i915_request *rq); 375 376 int i915_active_wait(struct i915_active *ref); 377 378 int i915_request_await_active(struct i915_request *rq, 379 struct i915_active *ref); 380 int i915_request_await_active_request(struct i915_request *rq, 381 struct i915_active_request *active); 382 383 int i915_active_acquire(struct i915_active *ref); 384 void i915_active_release(struct i915_active *ref); 385 void __i915_active_release_nested(struct i915_active *ref, int subclass); 386 387 bool i915_active_trygrab(struct i915_active *ref); 388 void i915_active_ungrab(struct i915_active *ref); 389 390 static inline bool 391 i915_active_is_idle(const struct i915_active *ref) 392 { 393 return !atomic_read(&ref->count); 394 } 395 396 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 397 void i915_active_fini(struct i915_active *ref); 398 #else 399 static inline void i915_active_fini(struct i915_active *ref) { } 400 #endif 401 402 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 403 struct intel_engine_cs *engine); 404 void i915_active_acquire_barrier(struct i915_active *ref); 405 void i915_request_add_active_barriers(struct i915_request *rq); 406 407 #endif /* _I915_ACTIVE_H_ */