1/* savage_state.c -- State and drawing support for Savage 2 * 3 * Copyright 2004 Felix Kuehling 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sub license, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25#include <drm/drmP.h> 26#include <drm/savage_drm.h> 27#include "savage_drv.h" 28 29void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv, 30 const struct drm_clip_rect * pbox) 31{ 32 uint32_t scstart = dev_priv->state.s3d.new_scstart; 33 uint32_t scend = dev_priv->state.s3d.new_scend; 34 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 35 ((uint32_t) pbox->x1 & 0x000007ff) | 36 (((uint32_t) pbox->y1 << 16) & 0x07ff0000); 37 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 38 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 39 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000); 40 if (scstart != dev_priv->state.s3d.scstart || 41 scend != dev_priv->state.s3d.scend) { 42 DMA_LOCALS; 43 BEGIN_DMA(4); 44 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 45 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 46 DMA_WRITE(scstart); 47 DMA_WRITE(scend); 48 dev_priv->state.s3d.scstart = scstart; 49 dev_priv->state.s3d.scend = scend; 50 dev_priv->waiting = 1; 51 DMA_COMMIT(); 52 } 53} 54 55void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv, 56 const struct drm_clip_rect * pbox) 57{ 58 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 59 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 60 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 61 ((uint32_t) pbox->x1 & 0x000007ff) | 62 (((uint32_t) pbox->y1 << 12) & 0x00fff000); 63 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 64 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 65 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000); 66 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 67 drawctrl1 != dev_priv->state.s4.drawctrl1) { 68 DMA_LOCALS; 69 BEGIN_DMA(4); 70 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 71 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 72 DMA_WRITE(drawctrl0); 73 DMA_WRITE(drawctrl1); 74 dev_priv->state.s4.drawctrl0 = drawctrl0; 75 dev_priv->state.s4.drawctrl1 = drawctrl1; 76 dev_priv->waiting = 1; 77 DMA_COMMIT(); 78 } 79} 80 81static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit, 82 uint32_t addr) 83{ 84 if ((addr & 6) != 2) { /* reserved bits */ 85 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 86 return -EINVAL; 87 } 88 if (!(addr & 1)) { /* local */ 89 addr &= ~7; 90 if (addr < dev_priv->texture_offset || 91 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 92 DRM_ERROR 93 ("bad texAddr%d %08x (local addr out of range)\n", 94 unit, addr); 95 return -EINVAL; 96 } 97 } else { /* AGP */ 98 if (!dev_priv->agp_textures) { 99 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 100 unit, addr); 101 return -EINVAL; 102 } 103 addr &= ~7; 104 if (addr < dev_priv->agp_textures->offset || 105 addr >= (dev_priv->agp_textures->offset + 106 dev_priv->agp_textures->size)) { 107 DRM_ERROR 108 ("bad texAddr%d %08x (AGP addr out of range)\n", 109 unit, addr); 110 return -EINVAL; 111 } 112 } 113 return 0; 114} 115 116#define SAVE_STATE(reg,where) \ 117 if(start <= reg && start+count > reg) \ 118 dev_priv->state.where = regs[reg - start] 119#define SAVE_STATE_MASK(reg,where,mask) do { \ 120 if(start <= reg && start+count > reg) { \ 121 uint32_t tmp; \ 122 tmp = regs[reg - start]; \ 123 dev_priv->state.where = (tmp & (mask)) | \ 124 (dev_priv->state.where & ~(mask)); \ 125 } \ 126} while (0) 127 128static int savage_verify_state_s3d(drm_savage_private_t * dev_priv, 129 unsigned int start, unsigned int count, 130 const uint32_t *regs) 131{ 132 if (start < SAVAGE_TEXPALADDR_S3D || 133 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 134 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 135 start, start + count - 1); 136 return -EINVAL; 137 } 138 139 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 140 ~SAVAGE_SCISSOR_MASK_S3D); 141 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 142 ~SAVAGE_SCISSOR_MASK_S3D); 143 144 /* if any texture regs were changed ... */ 145 if (start <= SAVAGE_TEXCTRL_S3D && 146 start + count > SAVAGE_TEXPALADDR_S3D) { 147 /* ... check texture state */ 148 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 149 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 150 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 151 return savage_verify_texaddr(dev_priv, 0, 152 dev_priv->state.s3d.texaddr); 153 } 154 155 return 0; 156} 157 158static int savage_verify_state_s4(drm_savage_private_t * dev_priv, 159 unsigned int start, unsigned int count, 160 const uint32_t *regs) 161{ 162 int ret = 0; 163 164 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 165 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 166 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 167 start, start + count - 1); 168 return -EINVAL; 169 } 170 171 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 172 ~SAVAGE_SCISSOR_MASK_S4); 173 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 174 ~SAVAGE_SCISSOR_MASK_S4); 175 176 /* if any texture regs were changed ... */ 177 if (start <= SAVAGE_TEXDESCR_S4 && 178 start + count > SAVAGE_TEXPALADDR_S4) { 179 /* ... check texture state */ 180 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 181 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 182 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 183 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 184 ret |= savage_verify_texaddr(dev_priv, 0, 185 dev_priv->state.s4.texaddr0); 186 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 187 ret |= savage_verify_texaddr(dev_priv, 1, 188 dev_priv->state.s4.texaddr1); 189 } 190 191 return ret; 192} 193 194#undef SAVE_STATE 195#undef SAVE_STATE_MASK 196 197static int savage_dispatch_state(drm_savage_private_t * dev_priv, 198 const drm_savage_cmd_header_t * cmd_header, 199 const uint32_t *regs) 200{ 201 unsigned int count = cmd_header->state.count; 202 unsigned int start = cmd_header->state.start; 203 unsigned int count2 = 0; 204 unsigned int bci_size; 205 int ret; 206 DMA_LOCALS; 207 208 if (!count) 209 return 0; 210 211 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 212 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 213 if (ret != 0) 214 return ret; 215 /* scissor regs are emitted in savage_dispatch_draw */ 216 if (start < SAVAGE_SCSTART_S3D) { 217 if (start + count > SAVAGE_SCEND_S3D + 1) 218 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 219 if (start + count > SAVAGE_SCSTART_S3D) 220 count = SAVAGE_SCSTART_S3D - start; 221 } else if (start <= SAVAGE_SCEND_S3D) { 222 if (start + count > SAVAGE_SCEND_S3D + 1) { 223 count -= SAVAGE_SCEND_S3D + 1 - start; 224 start = SAVAGE_SCEND_S3D + 1; 225 } else 226 return 0; 227 } 228 } else { 229 ret = savage_verify_state_s4(dev_priv, start, count, regs); 230 if (ret != 0) 231 return ret; 232 /* scissor regs are emitted in savage_dispatch_draw */ 233 if (start < SAVAGE_DRAWCTRL0_S4) { 234 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 235 count2 = count - 236 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 237 if (start + count > SAVAGE_DRAWCTRL0_S4) 238 count = SAVAGE_DRAWCTRL0_S4 - start; 239 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 240 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 241 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 242 start = SAVAGE_DRAWCTRL1_S4 + 1; 243 } else 244 return 0; 245 } 246 } 247 248 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 249 250 if (cmd_header->state.global) { 251 BEGIN_DMA(bci_size + 1); 252 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 253 dev_priv->waiting = 1; 254 } else { 255 BEGIN_DMA(bci_size); 256 } 257 258 do { 259 while (count > 0) { 260 unsigned int n = count < 255 ? count : 255; 261 DMA_SET_REGISTERS(start, n); 262 DMA_COPY(regs, n); 263 count -= n; 264 start += n; 265 regs += n; 266 } 267 start += 2; 268 regs += 2; 269 count = count2; 270 count2 = 0; 271 } while (count); 272 273 DMA_COMMIT(); 274 275 return 0; 276} 277 278static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv, 279 const drm_savage_cmd_header_t * cmd_header, 280 const struct drm_buf * dmabuf) 281{ 282 unsigned char reorder = 0; 283 unsigned int prim = cmd_header->prim.prim; 284 unsigned int skip = cmd_header->prim.skip; 285 unsigned int n = cmd_header->prim.count; 286 unsigned int start = cmd_header->prim.start; 287 unsigned int i; 288 BCI_LOCALS; 289 290 if (!dmabuf) { 291 DRM_ERROR("called without dma buffers!\n"); 292 return -EINVAL; 293 } 294 295 if (!n) 296 return 0; 297 298 switch (prim) { 299 case SAVAGE_PRIM_TRILIST_201: 300 reorder = 1; 301 prim = SAVAGE_PRIM_TRILIST; 302 case SAVAGE_PRIM_TRILIST: 303 if (n % 3 != 0) { 304 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 305 n); 306 return -EINVAL; 307 } 308 break; 309 case SAVAGE_PRIM_TRISTRIP: 310 case SAVAGE_PRIM_TRIFAN: 311 if (n < 3) { 312 DRM_ERROR 313 ("wrong number of vertices %u in TRIFAN/STRIP\n", 314 n); 315 return -EINVAL; 316 } 317 break; 318 default: 319 DRM_ERROR("invalid primitive type %u\n", prim); 320 return -EINVAL; 321 } 322 323 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 324 if (skip != 0) { 325 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 326 return -EINVAL; 327 } 328 } else { 329 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 330 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 331 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 332 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 333 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 334 return -EINVAL; 335 } 336 if (reorder) { 337 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 338 return -EINVAL; 339 } 340 } 341 342 if (start + n > dmabuf->total / 32) { 343 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 344 start, start + n - 1, dmabuf->total / 32); 345 return -EINVAL; 346 } 347 348 /* Vertex DMA doesn't work with command DMA at the same time, 349 * so we use BCI_... to submit commands here. Flush buffered 350 * faked DMA first. */ 351 DMA_FLUSH(); 352 353 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 354 BEGIN_BCI(2); 355 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 356 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 357 dev_priv->state.common.vbaddr = dmabuf->bus_address; 358 } 359 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 360 /* Workaround for what looks like a hardware bug. If a 361 * WAIT_3D_IDLE was emitted some time before the 362 * indexed drawing command then the engine will lock 363 * up. There are two known workarounds: 364 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 365 BEGIN_BCI(63); 366 for (i = 0; i < 63; ++i) 367 BCI_WRITE(BCI_CMD_WAIT); 368 dev_priv->waiting = 0; 369 } 370 371 prim <<= 25; 372 while (n != 0) { 373 /* Can emit up to 255 indices (85 triangles) at once. */ 374 unsigned int count = n > 255 ? 255 : n; 375 if (reorder) { 376 /* Need to reorder indices for correct flat 377 * shading while preserving the clock sense 378 * for correct culling. Only on Savage3D. */ 379 int reorder[3] = { -1, -1, -1 }; 380 reorder[start % 3] = 2; 381 382 BEGIN_BCI((count + 1 + 1) / 2); 383 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 384 385 for (i = start + 1; i + 1 < start + count; i += 2) 386 BCI_WRITE((i + reorder[i % 3]) | 387 ((i + 1 + 388 reorder[(i + 1) % 3]) << 16)); 389 if (i < start + count) 390 BCI_WRITE(i + reorder[i % 3]); 391 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 392 BEGIN_BCI((count + 1 + 1) / 2); 393 BCI_DRAW_INDICES_S3D(count, prim, start); 394 395 for (i = start + 1; i + 1 < start + count; i += 2) 396 BCI_WRITE(i | ((i + 1) << 16)); 397 if (i < start + count) 398 BCI_WRITE(i); 399 } else { 400 BEGIN_BCI((count + 2 + 1) / 2); 401 BCI_DRAW_INDICES_S4(count, prim, skip); 402 403 for (i = start; i + 1 < start + count; i += 2) 404 BCI_WRITE(i | ((i + 1) << 16)); 405 if (i < start + count) 406 BCI_WRITE(i); 407 } 408 409 start += count; 410 n -= count; 411 412 prim |= BCI_CMD_DRAW_CONT; 413 } 414 415 return 0; 416} 417 418static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv, 419 const drm_savage_cmd_header_t * cmd_header, 420 const uint32_t *vtxbuf, unsigned int vb_size, 421 unsigned int vb_stride) 422{ 423 unsigned char reorder = 0; 424 unsigned int prim = cmd_header->prim.prim; 425 unsigned int skip = cmd_header->prim.skip; 426 unsigned int n = cmd_header->prim.count; 427 unsigned int start = cmd_header->prim.start; 428 unsigned int vtx_size; 429 unsigned int i; 430 DMA_LOCALS; 431 432 if (!n) 433 return 0; 434 435 switch (prim) { 436 case SAVAGE_PRIM_TRILIST_201: 437 reorder = 1; 438 prim = SAVAGE_PRIM_TRILIST; 439 case SAVAGE_PRIM_TRILIST: 440 if (n % 3 != 0) { 441 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 442 n); 443 return -EINVAL; 444 } 445 break; 446 case SAVAGE_PRIM_TRISTRIP: 447 case SAVAGE_PRIM_TRIFAN: 448 if (n < 3) { 449 DRM_ERROR 450 ("wrong number of vertices %u in TRIFAN/STRIP\n", 451 n); 452 return -EINVAL; 453 } 454 break; 455 default: 456 DRM_ERROR("invalid primitive type %u\n", prim); 457 return -EINVAL; 458 } 459 460 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 461 if (skip > SAVAGE_SKIP_ALL_S3D) { 462 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 463 return -EINVAL; 464 } 465 vtx_size = 8; /* full vertex */ 466 } else { 467 if (skip > SAVAGE_SKIP_ALL_S4) { 468 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 469 return -EINVAL; 470 } 471 vtx_size = 10; /* full vertex */ 472 } 473 474 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 475 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 476 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 477 478 if (vtx_size > vb_stride) { 479 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 480 vtx_size, vb_stride); 481 return -EINVAL; 482 } 483 484 if (start + n > vb_size / (vb_stride * 4)) { 485 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 486 start, start + n - 1, vb_size / (vb_stride * 4)); 487 return -EINVAL; 488 } 489 490 prim <<= 25; 491 while (n != 0) { 492 /* Can emit up to 255 vertices (85 triangles) at once. */ 493 unsigned int count = n > 255 ? 255 : n; 494 if (reorder) { 495 /* Need to reorder vertices for correct flat 496 * shading while preserving the clock sense 497 * for correct culling. Only on Savage3D. */ 498 int reorder[3] = { -1, -1, -1 }; 499 reorder[start % 3] = 2; 500 501 BEGIN_DMA(count * vtx_size + 1); 502 DMA_DRAW_PRIMITIVE(count, prim, skip); 503 504 for (i = start; i < start + count; ++i) { 505 unsigned int j = i + reorder[i % 3]; 506 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 507 } 508 509 DMA_COMMIT(); 510 } else { 511 BEGIN_DMA(count * vtx_size + 1); 512 DMA_DRAW_PRIMITIVE(count, prim, skip); 513 514 if (vb_stride == vtx_size) { 515 DMA_COPY(&vtxbuf[vb_stride * start], 516 vtx_size * count); 517 } else { 518 for (i = start; i < start + count; ++i) { 519 DMA_COPY(&vtxbuf [vb_stride * i], 520 vtx_size); 521 } 522 } 523 524 DMA_COMMIT(); 525 } 526 527 start += count; 528 n -= count; 529 530 prim |= BCI_CMD_DRAW_CONT; 531 } 532 533 return 0; 534} 535 536static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv, 537 const drm_savage_cmd_header_t * cmd_header, 538 const uint16_t *idx, 539 const struct drm_buf * dmabuf) 540{ 541 unsigned char reorder = 0; 542 unsigned int prim = cmd_header->idx.prim; 543 unsigned int skip = cmd_header->idx.skip; 544 unsigned int n = cmd_header->idx.count; 545 unsigned int i; 546 BCI_LOCALS; 547 548 if (!dmabuf) { 549 DRM_ERROR("called without dma buffers!\n"); 550 return -EINVAL; 551 } 552 553 if (!n) 554 return 0; 555 556 switch (prim) { 557 case SAVAGE_PRIM_TRILIST_201: 558 reorder = 1; 559 prim = SAVAGE_PRIM_TRILIST; 560 case SAVAGE_PRIM_TRILIST: 561 if (n % 3 != 0) { 562 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 563 return -EINVAL; 564 } 565 break; 566 case SAVAGE_PRIM_TRISTRIP: 567 case SAVAGE_PRIM_TRIFAN: 568 if (n < 3) { 569 DRM_ERROR 570 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 571 return -EINVAL; 572 } 573 break; 574 default: 575 DRM_ERROR("invalid primitive type %u\n", prim); 576 return -EINVAL; 577 } 578 579 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 580 if (skip != 0) { 581 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 582 return -EINVAL; 583 } 584 } else { 585 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 586 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 587 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 588 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 589 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 590 return -EINVAL; 591 } 592 if (reorder) { 593 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 594 return -EINVAL; 595 } 596 } 597 598 /* Vertex DMA doesn't work with command DMA at the same time, 599 * so we use BCI_... to submit commands here. Flush buffered 600 * faked DMA first. */ 601 DMA_FLUSH(); 602 603 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 604 BEGIN_BCI(2); 605 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 606 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 607 dev_priv->state.common.vbaddr = dmabuf->bus_address; 608 } 609 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 610 /* Workaround for what looks like a hardware bug. If a 611 * WAIT_3D_IDLE was emitted some time before the 612 * indexed drawing command then the engine will lock 613 * up. There are two known workarounds: 614 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 615 BEGIN_BCI(63); 616 for (i = 0; i < 63; ++i) 617 BCI_WRITE(BCI_CMD_WAIT); 618 dev_priv->waiting = 0; 619 } 620 621 prim <<= 25; 622 while (n != 0) { 623 /* Can emit up to 255 indices (85 triangles) at once. */ 624 unsigned int count = n > 255 ? 255 : n; 625 626 /* check indices */ 627 for (i = 0; i < count; ++i) { 628 if (idx[i] > dmabuf->total / 32) { 629 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 630 i, idx[i], dmabuf->total / 32); 631 return -EINVAL; 632 } 633 } 634 635 if (reorder) { 636 /* Need to reorder indices for correct flat 637 * shading while preserving the clock sense 638 * for correct culling. Only on Savage3D. */ 639 int reorder[3] = { 2, -1, -1 }; 640 641 BEGIN_BCI((count + 1 + 1) / 2); 642 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 643 644 for (i = 1; i + 1 < count; i += 2) 645 BCI_WRITE(idx[i + reorder[i % 3]] | 646 (idx[i + 1 + 647 reorder[(i + 1) % 3]] << 16)); 648 if (i < count) 649 BCI_WRITE(idx[i + reorder[i % 3]]); 650 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 651 BEGIN_BCI((count + 1 + 1) / 2); 652 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 653 654 for (i = 1; i + 1 < count; i += 2) 655 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 656 if (i < count) 657 BCI_WRITE(idx[i]); 658 } else { 659 BEGIN_BCI((count + 2 + 1) / 2); 660 BCI_DRAW_INDICES_S4(count, prim, skip); 661 662 for (i = 0; i + 1 < count; i += 2) 663 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 664 if (i < count) 665 BCI_WRITE(idx[i]); 666 } 667 668 idx += count; 669 n -= count; 670 671 prim |= BCI_CMD_DRAW_CONT; 672 } 673 674 return 0; 675} 676 677static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv, 678 const drm_savage_cmd_header_t * cmd_header, 679 const uint16_t *idx, 680 const uint32_t *vtxbuf, 681 unsigned int vb_size, unsigned int vb_stride) 682{ 683 unsigned char reorder = 0; 684 unsigned int prim = cmd_header->idx.prim; 685 unsigned int skip = cmd_header->idx.skip; 686 unsigned int n = cmd_header->idx.count; 687 unsigned int vtx_size; 688 unsigned int i; 689 DMA_LOCALS; 690 691 if (!n) 692 return 0; 693 694 switch (prim) { 695 case SAVAGE_PRIM_TRILIST_201: 696 reorder = 1; 697 prim = SAVAGE_PRIM_TRILIST; 698 case SAVAGE_PRIM_TRILIST: 699 if (n % 3 != 0) { 700 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 701 return -EINVAL; 702 } 703 break; 704 case SAVAGE_PRIM_TRISTRIP: 705 case SAVAGE_PRIM_TRIFAN: 706 if (n < 3) { 707 DRM_ERROR 708 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 709 return -EINVAL; 710 } 711 break; 712 default: 713 DRM_ERROR("invalid primitive type %u\n", prim); 714 return -EINVAL; 715 } 716 717 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 718 if (skip > SAVAGE_SKIP_ALL_S3D) { 719 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 720 return -EINVAL; 721 } 722 vtx_size = 8; /* full vertex */ 723 } else { 724 if (skip > SAVAGE_SKIP_ALL_S4) { 725 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 726 return -EINVAL; 727 } 728 vtx_size = 10; /* full vertex */ 729 } 730 731 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 732 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 733 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 734 735 if (vtx_size > vb_stride) { 736 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 737 vtx_size, vb_stride); 738 return -EINVAL; 739 } 740 741 prim <<= 25; 742 while (n != 0) { 743 /* Can emit up to 255 vertices (85 triangles) at once. */ 744 unsigned int count = n > 255 ? 255 : n; 745 746 /* Check indices */ 747 for (i = 0; i < count; ++i) { 748 if (idx[i] > vb_size / (vb_stride * 4)) { 749 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 750 i, idx[i], vb_size / (vb_stride * 4)); 751 return -EINVAL; 752 } 753 } 754 755 if (reorder) { 756 /* Need to reorder vertices for correct flat 757 * shading while preserving the clock sense 758 * for correct culling. Only on Savage3D. */ 759 int reorder[3] = { 2, -1, -1 }; 760 761 BEGIN_DMA(count * vtx_size + 1); 762 DMA_DRAW_PRIMITIVE(count, prim, skip); 763 764 for (i = 0; i < count; ++i) { 765 unsigned int j = idx[i + reorder[i % 3]]; 766 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 767 } 768 769 DMA_COMMIT(); 770 } else { 771 BEGIN_DMA(count * vtx_size + 1); 772 DMA_DRAW_PRIMITIVE(count, prim, skip); 773 774 for (i = 0; i < count; ++i) { 775 unsigned int j = idx[i]; 776 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 777 } 778 779 DMA_COMMIT(); 780 } 781 782 idx += count; 783 n -= count; 784 785 prim |= BCI_CMD_DRAW_CONT; 786 } 787 788 return 0; 789} 790 791static int savage_dispatch_clear(drm_savage_private_t * dev_priv, 792 const drm_savage_cmd_header_t * cmd_header, 793 const drm_savage_cmd_header_t *data, 794 unsigned int nbox, 795 const struct drm_clip_rect *boxes) 796{ 797 unsigned int flags = cmd_header->clear0.flags; 798 unsigned int clear_cmd; 799 unsigned int i, nbufs; 800 DMA_LOCALS; 801 802 if (nbox == 0) 803 return 0; 804 805 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 806 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 807 BCI_CMD_SET_ROP(clear_cmd, 0xCC); 808 809 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 810 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 811 if (nbufs == 0) 812 return 0; 813 814 if (data->clear1.mask != 0xffffffff) { 815 /* set mask */ 816 BEGIN_DMA(2); 817 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 818 DMA_WRITE(data->clear1.mask); 819 DMA_COMMIT(); 820 } 821 for (i = 0; i < nbox; ++i) { 822 unsigned int x, y, w, h; 823 unsigned int buf; 824 x = boxes[i].x1, y = boxes[i].y1; 825 w = boxes[i].x2 - boxes[i].x1; 826 h = boxes[i].y2 - boxes[i].y1; 827 BEGIN_DMA(nbufs * 6); 828 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 829 if (!(flags & buf)) 830 continue; 831 DMA_WRITE(clear_cmd); 832 switch (buf) { 833 case SAVAGE_FRONT: 834 DMA_WRITE(dev_priv->front_offset); 835 DMA_WRITE(dev_priv->front_bd); 836 break; 837 case SAVAGE_BACK: 838 DMA_WRITE(dev_priv->back_offset); 839 DMA_WRITE(dev_priv->back_bd); 840 break; 841 case SAVAGE_DEPTH: 842 DMA_WRITE(dev_priv->depth_offset); 843 DMA_WRITE(dev_priv->depth_bd); 844 break; 845 } 846 DMA_WRITE(data->clear1.value); 847 DMA_WRITE(BCI_X_Y(x, y)); 848 DMA_WRITE(BCI_W_H(w, h)); 849 } 850 DMA_COMMIT(); 851 } 852 if (data->clear1.mask != 0xffffffff) { 853 /* reset mask */ 854 BEGIN_DMA(2); 855 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 856 DMA_WRITE(0xffffffff); 857 DMA_COMMIT(); 858 } 859 860 return 0; 861} 862 863static int savage_dispatch_swap(drm_savage_private_t * dev_priv, 864 unsigned int nbox, const struct drm_clip_rect *boxes) 865{ 866 unsigned int swap_cmd; 867 unsigned int i; 868 DMA_LOCALS; 869 870 if (nbox == 0) 871 return 0; 872 873 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 874 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 875 BCI_CMD_SET_ROP(swap_cmd, 0xCC); 876 877 for (i = 0; i < nbox; ++i) { 878 BEGIN_DMA(6); 879 DMA_WRITE(swap_cmd); 880 DMA_WRITE(dev_priv->back_offset); 881 DMA_WRITE(dev_priv->back_bd); 882 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 883 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 884 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 885 boxes[i].y2 - boxes[i].y1)); 886 DMA_COMMIT(); 887 } 888 889 return 0; 890} 891 892static int savage_dispatch_draw(drm_savage_private_t * dev_priv, 893 const drm_savage_cmd_header_t *start, 894 const drm_savage_cmd_header_t *end, 895 const struct drm_buf * dmabuf, 896 const unsigned int *vtxbuf, 897 unsigned int vb_size, unsigned int vb_stride, 898 unsigned int nbox, 899 const struct drm_clip_rect *boxes) 900{ 901 unsigned int i, j; 902 int ret; 903 904 for (i = 0; i < nbox; ++i) { 905 const drm_savage_cmd_header_t *cmdbuf; 906 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 907 908 cmdbuf = start; 909 while (cmdbuf < end) { 910 drm_savage_cmd_header_t cmd_header; 911 cmd_header = *cmdbuf; 912 cmdbuf++; 913 switch (cmd_header.cmd.cmd) { 914 case SAVAGE_CMD_DMA_PRIM: 915 ret = savage_dispatch_dma_prim( 916 dev_priv, &cmd_header, dmabuf); 917 break; 918 case SAVAGE_CMD_VB_PRIM: 919 ret = savage_dispatch_vb_prim( 920 dev_priv, &cmd_header, 921 vtxbuf, vb_size, vb_stride); 922 break; 923 case SAVAGE_CMD_DMA_IDX: 924 j = (cmd_header.idx.count + 3) / 4; 925 /* j was check in savage_bci_cmdbuf */ 926 ret = savage_dispatch_dma_idx(dev_priv, 927 &cmd_header, (const uint16_t *)cmdbuf, 928 dmabuf); 929 cmdbuf += j; 930 break; 931 case SAVAGE_CMD_VB_IDX: 932 j = (cmd_header.idx.count + 3) / 4; 933 /* j was check in savage_bci_cmdbuf */ 934 ret = savage_dispatch_vb_idx(dev_priv, 935 &cmd_header, (const uint16_t *)cmdbuf, 936 (const uint32_t *)vtxbuf, vb_size, 937 vb_stride); 938 cmdbuf += j; 939 break; 940 default: 941 /* What's the best return code? EFAULT? */ 942 DRM_ERROR("IMPLEMENTATION ERROR: " 943 "non-drawing-command %d\n", 944 cmd_header.cmd.cmd); 945 return -EINVAL; 946 } 947 948 if (ret != 0) 949 return ret; 950 } 951 } 952 953 return 0; 954} 955 956int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 957{ 958 drm_savage_private_t *dev_priv = dev->dev_private; 959 struct drm_device_dma *dma = dev->dma; 960 struct drm_buf *dmabuf; 961 drm_savage_cmdbuf_t *cmdbuf = data; 962 drm_savage_cmd_header_t *kcmd_addr = NULL; 963 drm_savage_cmd_header_t *first_draw_cmd; 964 unsigned int *kvb_addr = NULL; 965 struct drm_clip_rect *kbox_addr = NULL; 966 unsigned int i, j; 967 int ret = 0; 968 969 DRM_DEBUG("\n"); 970 971 LOCK_TEST_WITH_RETURN(dev, file_priv); 972 973 if (dma && dma->buflist) { 974 if (cmdbuf->dma_idx > dma->buf_count) { 975 DRM_ERROR 976 ("vertex buffer index %u out of range (0-%u)\n", 977 cmdbuf->dma_idx, dma->buf_count - 1); 978 return -EINVAL; 979 } 980 dmabuf = dma->buflist[cmdbuf->dma_idx]; 981 } else { 982 dmabuf = NULL; 983 } 984 985 /* Copy the user buffers into kernel temporary areas. This hasn't been 986 * a performance loss compared to VERIFYAREA_READ/ 987 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 988 * for locking on FreeBSD. 989 */ 990 if (cmdbuf->size) { 991 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL); 992 if (kcmd_addr == NULL) 993 return -ENOMEM; 994 995 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr, 996 cmdbuf->size * 8)) 997 { 998 kfree(kcmd_addr); 999 return -EFAULT; 1000 } 1001 cmdbuf->cmd_addr = kcmd_addr; 1002 } 1003 if (cmdbuf->vb_size) { 1004 kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL); 1005 if (kvb_addr == NULL) { 1006 ret = -ENOMEM; 1007 goto done; 1008 } 1009 1010 if (copy_from_user(kvb_addr, cmdbuf->vb_addr, 1011 cmdbuf->vb_size)) { 1012 ret = -EFAULT; 1013 goto done; 1014 } 1015 cmdbuf->vb_addr = kvb_addr; 1016 } 1017 if (cmdbuf->nbox) { 1018 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect), 1019 GFP_KERNEL); 1020 if (kbox_addr == NULL) { 1021 ret = -ENOMEM; 1022 goto done; 1023 } 1024 1025 if (copy_from_user(kbox_addr, cmdbuf->box_addr, 1026 cmdbuf->nbox * sizeof(struct drm_clip_rect))) { 1027 ret = -EFAULT; 1028 goto done; 1029 } 1030 cmdbuf->box_addr = kbox_addr; 1031 } 1032 1033 /* Make sure writes to DMA buffers are finished before sending 1034 * DMA commands to the graphics hardware. */ 1035 mb(); 1036 1037 /* Coming from user space. Don't know if the Xserver has 1038 * emitted wait commands. Assuming the worst. */ 1039 dev_priv->waiting = 1; 1040 1041 i = 0; 1042 first_draw_cmd = NULL; 1043 while (i < cmdbuf->size) { 1044 drm_savage_cmd_header_t cmd_header; 1045 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1046 cmdbuf->cmd_addr++; 1047 i++; 1048 1049 /* Group drawing commands with same state to minimize 1050 * iterations over clip rects. */ 1051 j = 0; 1052 switch (cmd_header.cmd.cmd) { 1053 case SAVAGE_CMD_DMA_IDX: 1054 case SAVAGE_CMD_VB_IDX: 1055 j = (cmd_header.idx.count + 3) / 4; 1056 if (i + j > cmdbuf->size) { 1057 DRM_ERROR("indexed drawing command extends " 1058 "beyond end of command buffer\n"); 1059 DMA_FLUSH(); 1060 ret = -EINVAL; 1061 goto done; 1062 } 1063 /* fall through */ 1064 case SAVAGE_CMD_DMA_PRIM: 1065 case SAVAGE_CMD_VB_PRIM: 1066 if (!first_draw_cmd) 1067 first_draw_cmd = cmdbuf->cmd_addr - 1; 1068 cmdbuf->cmd_addr += j; 1069 i += j; 1070 break; 1071 default: 1072 if (first_draw_cmd) { 1073 ret = savage_dispatch_draw( 1074 dev_priv, first_draw_cmd, 1075 cmdbuf->cmd_addr - 1, 1076 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size, 1077 cmdbuf->vb_stride, 1078 cmdbuf->nbox, cmdbuf->box_addr); 1079 if (ret != 0) 1080 goto done; 1081 first_draw_cmd = NULL; 1082 } 1083 } 1084 if (first_draw_cmd) 1085 continue; 1086 1087 switch (cmd_header.cmd.cmd) { 1088 case SAVAGE_CMD_STATE: 1089 j = (cmd_header.state.count + 1) / 2; 1090 if (i + j > cmdbuf->size) { 1091 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1092 "beyond end of command buffer\n"); 1093 DMA_FLUSH(); 1094 ret = -EINVAL; 1095 goto done; 1096 } 1097 ret = savage_dispatch_state(dev_priv, &cmd_header, 1098 (const uint32_t *)cmdbuf->cmd_addr); 1099 cmdbuf->cmd_addr += j; 1100 i += j; 1101 break; 1102 case SAVAGE_CMD_CLEAR: 1103 if (i + 1 > cmdbuf->size) { 1104 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1105 "beyond end of command buffer\n"); 1106 DMA_FLUSH(); 1107 ret = -EINVAL; 1108 goto done; 1109 } 1110 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1111 cmdbuf->cmd_addr, 1112 cmdbuf->nbox, 1113 cmdbuf->box_addr); 1114 cmdbuf->cmd_addr++; 1115 i++; 1116 break; 1117 case SAVAGE_CMD_SWAP: 1118 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1119 cmdbuf->box_addr); 1120 break; 1121 default: 1122 DRM_ERROR("invalid command 0x%x\n", 1123 cmd_header.cmd.cmd); 1124 DMA_FLUSH(); 1125 ret = -EINVAL; 1126 goto done; 1127 } 1128 1129 if (ret != 0) { 1130 DMA_FLUSH(); 1131 goto done; 1132 } 1133 } 1134 1135 if (first_draw_cmd) { 1136 ret = savage_dispatch_draw ( 1137 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1138 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1139 cmdbuf->nbox, cmdbuf->box_addr); 1140 if (ret != 0) { 1141 DMA_FLUSH(); 1142 goto done; 1143 } 1144 } 1145 1146 DMA_FLUSH(); 1147 1148 if (dmabuf && cmdbuf->discard) { 1149 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1150 uint16_t event; 1151 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1152 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1153 savage_freelist_put(dev, dmabuf); 1154 } 1155 1156done: 1157 /* If we didn't need to allocate them, these'll be NULL */ 1158 kfree(kcmd_addr); 1159 kfree(kvb_addr); 1160 kfree(kbox_addr); 1161 1162 return ret; 1163} 1164