1 /*
2  * Copyright 2009 VMware, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Michel Dänzer
23  */
24 #include <drm/drmP.h>
25 #include <drm/amdgpu_drm.h>
26 #include "amdgpu.h"
27 #include "amdgpu_uvd.h"
28 #include "amdgpu_vce.h"
29 
30 /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
amdgpu_do_test_moves(struct amdgpu_device * adev)31 static void amdgpu_do_test_moves(struct amdgpu_device *adev)
32 {
33 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
34 	struct amdgpu_bo *vram_obj = NULL;
35 	struct amdgpu_bo **gtt_obj = NULL;
36 	uint64_t gtt_addr, vram_addr;
37 	unsigned n, size;
38 	int i, r;
39 
40 	size = 1024 * 1024;
41 
42 	/* Number of tests =
43 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
44 	 */
45 	n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024;
46 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
47 		if (adev->rings[i])
48 			n -= adev->rings[i]->ring_size;
49 	if (adev->wb.wb_obj)
50 		n -= AMDGPU_GPU_PAGE_SIZE;
51 	if (adev->irq.ih.ring_obj)
52 		n -= adev->irq.ih.ring_size;
53 	n /= size;
54 
55 	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
56 	if (!gtt_obj) {
57 		DRM_ERROR("Failed to allocate %d pointers\n", n);
58 		r = 1;
59 		goto out_cleanup;
60 	}
61 
62 	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
63 			     AMDGPU_GEM_DOMAIN_VRAM, 0,
64 			     NULL, NULL, &vram_obj);
65 	if (r) {
66 		DRM_ERROR("Failed to create VRAM object\n");
67 		goto out_cleanup;
68 	}
69 	r = amdgpu_bo_reserve(vram_obj, false);
70 	if (unlikely(r != 0))
71 		goto out_unref;
72 	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
73 	if (r) {
74 		DRM_ERROR("Failed to pin VRAM object\n");
75 		goto out_unres;
76 	}
77 	for (i = 0; i < n; i++) {
78 		void *gtt_map, *vram_map;
79 		void **gtt_start, **gtt_end;
80 		void **vram_start, **vram_end;
81 		struct fence *fence = NULL;
82 
83 		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
84 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
85 				     NULL, gtt_obj + i);
86 		if (r) {
87 			DRM_ERROR("Failed to create GTT object %d\n", i);
88 			goto out_lclean;
89 		}
90 
91 		r = amdgpu_bo_reserve(gtt_obj[i], false);
92 		if (unlikely(r != 0))
93 			goto out_lclean_unref;
94 		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gtt_addr);
95 		if (r) {
96 			DRM_ERROR("Failed to pin GTT object %d\n", i);
97 			goto out_lclean_unres;
98 		}
99 
100 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
101 		if (r) {
102 			DRM_ERROR("Failed to map GTT object %d\n", i);
103 			goto out_lclean_unpin;
104 		}
105 
106 		for (gtt_start = gtt_map, gtt_end = gtt_map + size;
107 		     gtt_start < gtt_end;
108 		     gtt_start++)
109 			*gtt_start = gtt_start;
110 
111 		amdgpu_bo_kunmap(gtt_obj[i]);
112 
113 		r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr,
114 				       size, NULL, &fence);
115 
116 		if (r) {
117 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
118 			goto out_lclean_unpin;
119 		}
120 
121 		r = fence_wait(fence, false);
122 		if (r) {
123 			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
124 			goto out_lclean_unpin;
125 		}
126 
127 		fence_put(fence);
128 
129 		r = amdgpu_bo_kmap(vram_obj, &vram_map);
130 		if (r) {
131 			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
132 			goto out_lclean_unpin;
133 		}
134 
135 		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
136 		     vram_start = vram_map, vram_end = vram_map + size;
137 		     vram_start < vram_end;
138 		     gtt_start++, vram_start++) {
139 			if (*vram_start != gtt_start) {
140 				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
141 					  "expected 0x%p (GTT/VRAM offset "
142 					  "0x%16llx/0x%16llx)\n",
143 					  i, *vram_start, gtt_start,
144 					  (unsigned long long)
145 					  (gtt_addr - adev->mc.gtt_start +
146 					   (void*)gtt_start - gtt_map),
147 					  (unsigned long long)
148 					  (vram_addr - adev->mc.vram_start +
149 					   (void*)gtt_start - gtt_map));
150 				amdgpu_bo_kunmap(vram_obj);
151 				goto out_lclean_unpin;
152 			}
153 			*vram_start = vram_start;
154 		}
155 
156 		amdgpu_bo_kunmap(vram_obj);
157 
158 		r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr,
159 				       size, NULL, &fence);
160 
161 		if (r) {
162 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
163 			goto out_lclean_unpin;
164 		}
165 
166 		r = fence_wait(fence, false);
167 		if (r) {
168 			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
169 			goto out_lclean_unpin;
170 		}
171 
172 		fence_put(fence);
173 
174 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
175 		if (r) {
176 			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
177 			goto out_lclean_unpin;
178 		}
179 
180 		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
181 		     vram_start = vram_map, vram_end = vram_map + size;
182 		     gtt_start < gtt_end;
183 		     gtt_start++, vram_start++) {
184 			if (*gtt_start != vram_start) {
185 				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
186 					  "expected 0x%p (VRAM/GTT offset "
187 					  "0x%16llx/0x%16llx)\n",
188 					  i, *gtt_start, vram_start,
189 					  (unsigned long long)
190 					  (vram_addr - adev->mc.vram_start +
191 					   (void*)vram_start - vram_map),
192 					  (unsigned long long)
193 					  (gtt_addr - adev->mc.gtt_start +
194 					   (void*)vram_start - vram_map));
195 				amdgpu_bo_kunmap(gtt_obj[i]);
196 				goto out_lclean_unpin;
197 			}
198 		}
199 
200 		amdgpu_bo_kunmap(gtt_obj[i]);
201 
202 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
203 			 gtt_addr - adev->mc.gtt_start);
204 		continue;
205 
206 out_lclean_unpin:
207 		amdgpu_bo_unpin(gtt_obj[i]);
208 out_lclean_unres:
209 		amdgpu_bo_unreserve(gtt_obj[i]);
210 out_lclean_unref:
211 		amdgpu_bo_unref(&gtt_obj[i]);
212 out_lclean:
213 		for (--i; i >= 0; --i) {
214 			amdgpu_bo_unpin(gtt_obj[i]);
215 			amdgpu_bo_unreserve(gtt_obj[i]);
216 			amdgpu_bo_unref(&gtt_obj[i]);
217 		}
218 		if (fence)
219 			fence_put(fence);
220 		break;
221 	}
222 
223 	amdgpu_bo_unpin(vram_obj);
224 out_unres:
225 	amdgpu_bo_unreserve(vram_obj);
226 out_unref:
227 	amdgpu_bo_unref(&vram_obj);
228 out_cleanup:
229 	kfree(gtt_obj);
230 	if (r) {
231 		printk(KERN_WARNING "Error while testing BO move.\n");
232 	}
233 }
234 
amdgpu_test_moves(struct amdgpu_device * adev)235 void amdgpu_test_moves(struct amdgpu_device *adev)
236 {
237 	if (adev->mman.buffer_funcs)
238 		amdgpu_do_test_moves(adev);
239 }
240 
amdgpu_test_create_and_emit_fence(struct amdgpu_device * adev,struct amdgpu_ring * ring,struct fence ** fence)241 static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
242 					     struct amdgpu_ring *ring,
243 					     struct fence **fence)
244 {
245 	uint32_t handle = ring->idx ^ 0xdeafbeef;
246 	int r;
247 
248 	if (ring == &adev->uvd.ring) {
249 		r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
250 		if (r) {
251 			DRM_ERROR("Failed to get dummy create msg\n");
252 			return r;
253 		}
254 
255 		r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
256 		if (r) {
257 			DRM_ERROR("Failed to get dummy destroy msg\n");
258 			return r;
259 		}
260 
261 	} else if (ring == &adev->vce.ring[0] ||
262 		   ring == &adev->vce.ring[1]) {
263 		r = amdgpu_vce_get_create_msg(ring, handle, NULL);
264 		if (r) {
265 			DRM_ERROR("Failed to get dummy create msg\n");
266 			return r;
267 		}
268 
269 		r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
270 		if (r) {
271 			DRM_ERROR("Failed to get dummy destroy msg\n");
272 			return r;
273 		}
274 	} else {
275 		struct amdgpu_fence *a_fence = NULL;
276 		r = amdgpu_ring_lock(ring, 64);
277 		if (r) {
278 			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
279 			return r;
280 		}
281 		amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
282 		amdgpu_ring_unlock_commit(ring);
283 		*fence = &a_fence->base;
284 	}
285 	return 0;
286 }
287 
amdgpu_test_ring_sync(struct amdgpu_device * adev,struct amdgpu_ring * ringA,struct amdgpu_ring * ringB)288 void amdgpu_test_ring_sync(struct amdgpu_device *adev,
289 			   struct amdgpu_ring *ringA,
290 			   struct amdgpu_ring *ringB)
291 {
292 	struct fence *fence1 = NULL, *fence2 = NULL;
293 	struct amdgpu_semaphore *semaphore = NULL;
294 	int r;
295 
296 	r = amdgpu_semaphore_create(adev, &semaphore);
297 	if (r) {
298 		DRM_ERROR("Failed to create semaphore\n");
299 		goto out_cleanup;
300 	}
301 
302 	r = amdgpu_ring_lock(ringA, 64);
303 	if (r) {
304 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
305 		goto out_cleanup;
306 	}
307 	amdgpu_semaphore_emit_wait(ringA, semaphore);
308 	amdgpu_ring_unlock_commit(ringA);
309 
310 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
311 	if (r)
312 		goto out_cleanup;
313 
314 	r = amdgpu_ring_lock(ringA, 64);
315 	if (r) {
316 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
317 		goto out_cleanup;
318 	}
319 	amdgpu_semaphore_emit_wait(ringA, semaphore);
320 	amdgpu_ring_unlock_commit(ringA);
321 
322 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
323 	if (r)
324 		goto out_cleanup;
325 
326 	mdelay(1000);
327 
328 	if (fence_is_signaled(fence1)) {
329 		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
330 		goto out_cleanup;
331 	}
332 
333 	r = amdgpu_ring_lock(ringB, 64);
334 	if (r) {
335 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
336 		goto out_cleanup;
337 	}
338 	amdgpu_semaphore_emit_signal(ringB, semaphore);
339 	amdgpu_ring_unlock_commit(ringB);
340 
341 	r = fence_wait(fence1, false);
342 	if (r) {
343 		DRM_ERROR("Failed to wait for sync fence 1\n");
344 		goto out_cleanup;
345 	}
346 
347 	mdelay(1000);
348 
349 	if (fence_is_signaled(fence2)) {
350 		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
351 		goto out_cleanup;
352 	}
353 
354 	r = amdgpu_ring_lock(ringB, 64);
355 	if (r) {
356 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
357 		goto out_cleanup;
358 	}
359 	amdgpu_semaphore_emit_signal(ringB, semaphore);
360 	amdgpu_ring_unlock_commit(ringB);
361 
362 	r = fence_wait(fence2, false);
363 	if (r) {
364 		DRM_ERROR("Failed to wait for sync fence 1\n");
365 		goto out_cleanup;
366 	}
367 
368 out_cleanup:
369 	amdgpu_semaphore_free(adev, &semaphore, NULL);
370 
371 	if (fence1)
372 		fence_put(fence1);
373 
374 	if (fence2)
375 		fence_put(fence2);
376 
377 	if (r)
378 		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
379 }
380 
amdgpu_test_ring_sync2(struct amdgpu_device * adev,struct amdgpu_ring * ringA,struct amdgpu_ring * ringB,struct amdgpu_ring * ringC)381 static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
382 			    struct amdgpu_ring *ringA,
383 			    struct amdgpu_ring *ringB,
384 			    struct amdgpu_ring *ringC)
385 {
386 	struct fence *fenceA = NULL, *fenceB = NULL;
387 	struct amdgpu_semaphore *semaphore = NULL;
388 	bool sigA, sigB;
389 	int i, r;
390 
391 	r = amdgpu_semaphore_create(adev, &semaphore);
392 	if (r) {
393 		DRM_ERROR("Failed to create semaphore\n");
394 		goto out_cleanup;
395 	}
396 
397 	r = amdgpu_ring_lock(ringA, 64);
398 	if (r) {
399 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
400 		goto out_cleanup;
401 	}
402 	amdgpu_semaphore_emit_wait(ringA, semaphore);
403 	amdgpu_ring_unlock_commit(ringA);
404 
405 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
406 	if (r)
407 		goto out_cleanup;
408 
409 	r = amdgpu_ring_lock(ringB, 64);
410 	if (r) {
411 		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
412 		goto out_cleanup;
413 	}
414 	amdgpu_semaphore_emit_wait(ringB, semaphore);
415 	amdgpu_ring_unlock_commit(ringB);
416 	r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
417 	if (r)
418 		goto out_cleanup;
419 
420 	mdelay(1000);
421 
422 	if (fence_is_signaled(fenceA)) {
423 		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
424 		goto out_cleanup;
425 	}
426 	if (fence_is_signaled(fenceB)) {
427 		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
428 		goto out_cleanup;
429 	}
430 
431 	r = amdgpu_ring_lock(ringC, 64);
432 	if (r) {
433 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
434 		goto out_cleanup;
435 	}
436 	amdgpu_semaphore_emit_signal(ringC, semaphore);
437 	amdgpu_ring_unlock_commit(ringC);
438 
439 	for (i = 0; i < 30; ++i) {
440 		mdelay(100);
441 		sigA = fence_is_signaled(fenceA);
442 		sigB = fence_is_signaled(fenceB);
443 		if (sigA || sigB)
444 			break;
445 	}
446 
447 	if (!sigA && !sigB) {
448 		DRM_ERROR("Neither fence A nor B has been signaled\n");
449 		goto out_cleanup;
450 	} else if (sigA && sigB) {
451 		DRM_ERROR("Both fence A and B has been signaled\n");
452 		goto out_cleanup;
453 	}
454 
455 	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
456 
457 	r = amdgpu_ring_lock(ringC, 64);
458 	if (r) {
459 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
460 		goto out_cleanup;
461 	}
462 	amdgpu_semaphore_emit_signal(ringC, semaphore);
463 	amdgpu_ring_unlock_commit(ringC);
464 
465 	mdelay(1000);
466 
467 	r = fence_wait(fenceA, false);
468 	if (r) {
469 		DRM_ERROR("Failed to wait for sync fence A\n");
470 		goto out_cleanup;
471 	}
472 	r = fence_wait(fenceB, false);
473 	if (r) {
474 		DRM_ERROR("Failed to wait for sync fence B\n");
475 		goto out_cleanup;
476 	}
477 
478 out_cleanup:
479 	amdgpu_semaphore_free(adev, &semaphore, NULL);
480 
481 	if (fenceA)
482 		fence_put(fenceA);
483 
484 	if (fenceB)
485 		fence_put(fenceB);
486 
487 	if (r)
488 		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
489 }
490 
amdgpu_test_sync_possible(struct amdgpu_ring * ringA,struct amdgpu_ring * ringB)491 static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
492 				      struct amdgpu_ring *ringB)
493 {
494 	if (ringA == &ringA->adev->vce.ring[0] &&
495 	    ringB == &ringB->adev->vce.ring[1])
496 		return false;
497 
498 	return true;
499 }
500 
amdgpu_test_syncing(struct amdgpu_device * adev)501 void amdgpu_test_syncing(struct amdgpu_device *adev)
502 {
503 	int i, j, k;
504 
505 	for (i = 1; i < AMDGPU_MAX_RINGS; ++i) {
506 		struct amdgpu_ring *ringA = adev->rings[i];
507 		if (!ringA || !ringA->ready)
508 			continue;
509 
510 		for (j = 0; j < i; ++j) {
511 			struct amdgpu_ring *ringB = adev->rings[j];
512 			if (!ringB || !ringB->ready)
513 				continue;
514 
515 			if (!amdgpu_test_sync_possible(ringA, ringB))
516 				continue;
517 
518 			DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
519 			amdgpu_test_ring_sync(adev, ringA, ringB);
520 
521 			DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
522 			amdgpu_test_ring_sync(adev, ringB, ringA);
523 
524 			for (k = 0; k < j; ++k) {
525 				struct amdgpu_ring *ringC = adev->rings[k];
526 				if (!ringC || !ringC->ready)
527 					continue;
528 
529 				if (!amdgpu_test_sync_possible(ringA, ringC))
530 					continue;
531 
532 				if (!amdgpu_test_sync_possible(ringB, ringC))
533 					continue;
534 
535 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
536 				amdgpu_test_ring_sync2(adev, ringA, ringB, ringC);
537 
538 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
539 				amdgpu_test_ring_sync2(adev, ringA, ringC, ringB);
540 
541 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
542 				amdgpu_test_ring_sync2(adev, ringB, ringA, ringC);
543 
544 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
545 				amdgpu_test_ring_sync2(adev, ringB, ringC, ringA);
546 
547 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
548 				amdgpu_test_ring_sync2(adev, ringC, ringA, ringB);
549 
550 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
551 				amdgpu_test_ring_sync2(adev, ringC, ringB, ringA);
552 			}
553 		}
554 	}
555 }
556