root/drivers/gpu/drm/msm/adreno/a5xx_gpu.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. a5xx_flush
  2. a5xx_submit_in_rb
  3. a5xx_submit
  4. a5xx_set_hwcg
  5. a5xx_me_init
  6. a5xx_preempt_start
  7. a5xx_ucode_init
  8. a5xx_zap_shader_resume
  9. a5xx_zap_shader_init
  10. a5xx_hw_init
  11. a5xx_recover
  12. a5xx_destroy
  13. _a5xx_check_idle
  14. a5xx_idle
  15. a5xx_fault_handler
  16. a5xx_cp_err_irq
  17. a5xx_rbbm_err_irq
  18. a5xx_uche_err_irq
  19. a5xx_gpmu_err_irq
  20. a5xx_fault_detect_irq
  21. a5xx_irq
  22. a5xx_dump
  23. a5xx_pm_resume
  24. a5xx_pm_suspend
  25. a5xx_get_timestamp
  26. a5xx_crashdumper_init
  27. a5xx_crashdumper_run
  28. a5xx_gpu_state_get_hlsq_regs
  29. a5xx_gpu_state_get
  30. a5xx_gpu_state_destroy
  31. a5xx_gpu_state_put
  32. a5xx_show
  33. a5xx_active_ring
  34. a5xx_gpu_busy
  35. check_speed_bin
  36. a5xx_gpu_init

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   3  */
   4 
   5 #include <linux/kernel.h>
   6 #include <linux/types.h>
   7 #include <linux/cpumask.h>
   8 #include <linux/qcom_scm.h>
   9 #include <linux/pm_opp.h>
  10 #include <linux/nvmem-consumer.h>
  11 #include <linux/slab.h>
  12 #include "msm_gem.h"
  13 #include "msm_mmu.h"
  14 #include "a5xx_gpu.h"
  15 
  16 extern bool hang_debug;
  17 static void a5xx_dump(struct msm_gpu *gpu);
  18 
  19 #define GPU_PAS_ID 13
  20 
  21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  22 {
  23         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  24         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  25         uint32_t wptr;
  26         unsigned long flags;
  27 
  28         spin_lock_irqsave(&ring->lock, flags);
  29 
  30         /* Copy the shadow to the actual register */
  31         ring->cur = ring->next;
  32 
  33         /* Make sure to wrap wptr if we need to */
  34         wptr = get_wptr(ring);
  35 
  36         spin_unlock_irqrestore(&ring->lock, flags);
  37 
  38         /* Make sure everything is posted before making a decision */
  39         mb();
  40 
  41         /* Update HW if this is the current ring and we are not in preempt */
  42         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  43                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  44 }
  45 
  46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  47         struct msm_file_private *ctx)
  48 {
  49         struct msm_drm_private *priv = gpu->dev->dev_private;
  50         struct msm_ringbuffer *ring = submit->ring;
  51         struct msm_gem_object *obj;
  52         uint32_t *ptr, dwords;
  53         unsigned int i;
  54 
  55         for (i = 0; i < submit->nr_cmds; i++) {
  56                 switch (submit->cmd[i].type) {
  57                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  58                         break;
  59                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  60                         if (priv->lastctx == ctx)
  61                                 break;
  62                         /* fall-thru */
  63                 case MSM_SUBMIT_CMD_BUF:
  64                         /* copy commands into RB: */
  65                         obj = submit->bos[submit->cmd[i].idx].obj;
  66                         dwords = submit->cmd[i].size;
  67 
  68                         ptr = msm_gem_get_vaddr(&obj->base);
  69 
  70                         /* _get_vaddr() shouldn't fail at this point,
  71                          * since we've already mapped it once in
  72                          * submit_reloc()
  73                          */
  74                         if (WARN_ON(!ptr))
  75                                 return;
  76 
  77                         for (i = 0; i < dwords; i++) {
  78                                 /* normally the OUT_PKTn() would wait
  79                                  * for space for the packet.  But since
  80                                  * we just OUT_RING() the whole thing,
  81                                  * need to call adreno_wait_ring()
  82                                  * ourself:
  83                                  */
  84                                 adreno_wait_ring(ring, 1);
  85                                 OUT_RING(ring, ptr[i]);
  86                         }
  87 
  88                         msm_gem_put_vaddr(&obj->base);
  89 
  90                         break;
  91                 }
  92         }
  93 
  94         a5xx_flush(gpu, ring);
  95         a5xx_preempt_trigger(gpu);
  96 
  97         /* we might not necessarily have a cmd from userspace to
  98          * trigger an event to know that submit has completed, so
  99          * do this manually:
 100          */
 101         a5xx_idle(gpu, ring);
 102         ring->memptrs->fence = submit->seqno;
 103         msm_gpu_retire(gpu);
 104 }
 105 
 106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 107         struct msm_file_private *ctx)
 108 {
 109         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 110         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 111         struct msm_drm_private *priv = gpu->dev->dev_private;
 112         struct msm_ringbuffer *ring = submit->ring;
 113         unsigned int i, ibs = 0;
 114 
 115         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 116                 priv->lastctx = NULL;
 117                 a5xx_submit_in_rb(gpu, submit, ctx);
 118                 return;
 119         }
 120 
 121         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 122         OUT_RING(ring, 0x02);
 123 
 124         /* Turn off protected mode to write to special registers */
 125         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 126         OUT_RING(ring, 0);
 127 
 128         /* Set the save preemption record for the ring/command */
 129         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 130         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 131         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 132 
 133         /* Turn back on protected mode */
 134         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 135         OUT_RING(ring, 1);
 136 
 137         /* Enable local preemption for finegrain preemption */
 138         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 139         OUT_RING(ring, 0x02);
 140 
 141         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 142         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 143         OUT_RING(ring, 0x02);
 144 
 145         /* Submit the commands */
 146         for (i = 0; i < submit->nr_cmds; i++) {
 147                 switch (submit->cmd[i].type) {
 148                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 149                         break;
 150                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 151                         if (priv->lastctx == ctx)
 152                                 break;
 153                         /* fall-thru */
 154                 case MSM_SUBMIT_CMD_BUF:
 155                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 156                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 157                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 158                         OUT_RING(ring, submit->cmd[i].size);
 159                         ibs++;
 160                         break;
 161                 }
 162         }
 163 
 164         /*
 165          * Write the render mode to NULL (0) to indicate to the CP that the IBs
 166          * are done rendering - otherwise a lucky preemption would start
 167          * replaying from the last checkpoint
 168          */
 169         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 170         OUT_RING(ring, 0);
 171         OUT_RING(ring, 0);
 172         OUT_RING(ring, 0);
 173         OUT_RING(ring, 0);
 174         OUT_RING(ring, 0);
 175 
 176         /* Turn off IB level preemptions */
 177         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 178         OUT_RING(ring, 0x01);
 179 
 180         /* Write the fence to the scratch register */
 181         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 182         OUT_RING(ring, submit->seqno);
 183 
 184         /*
 185          * Execute a CACHE_FLUSH_TS event. This will ensure that the
 186          * timestamp is written to the memory and then triggers the interrupt
 187          */
 188         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 189         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 190         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 191         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 192         OUT_RING(ring, submit->seqno);
 193 
 194         /* Yield the floor on command completion */
 195         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 196         /*
 197          * If dword[2:1] are non zero, they specify an address for the CP to
 198          * write the value of dword[3] to on preemption complete. Write 0 to
 199          * skip the write
 200          */
 201         OUT_RING(ring, 0x00);
 202         OUT_RING(ring, 0x00);
 203         /* Data value - not used if the address above is 0 */
 204         OUT_RING(ring, 0x01);
 205         /* Set bit 0 to trigger an interrupt on preempt complete */
 206         OUT_RING(ring, 0x01);
 207 
 208         a5xx_flush(gpu, ring);
 209 
 210         /* Check to see if we need to start preemption */
 211         a5xx_preempt_trigger(gpu);
 212 }
 213 
 214 static const struct {
 215         u32 offset;
 216         u32 value;
 217 } a5xx_hwcg[] = {
 218         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 219         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 220         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 221         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 222         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 223         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 224         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 225         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 226         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 227         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 228         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 229         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 230         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 231         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 232         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 233         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 234         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 235         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 236         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 237         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 238         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 239         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 240         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 241         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 242         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 243         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 244         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 245         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 246         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 247         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 248         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 249         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 250         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 251         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 252         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 253         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 254         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 255         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 256         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 257         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 258         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 259         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 260         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 261         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 262         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 263         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 264         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 265         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 266         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 267         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 268         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 269         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 270         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 271         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 272         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 273         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 274         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 275         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 276         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 277         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 278         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 279         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 280         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 281         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 282         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 283         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 284         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 285         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 286         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 287         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 288         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 289         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 290         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 291         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 292         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 293         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 294         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 295         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 296         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 297         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 298         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 299         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 300         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 301         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 302         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 303         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 304         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 305         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 306         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 307         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 308         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 309         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 310 };
 311 
 312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 313 {
 314         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 315         unsigned int i;
 316 
 317         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 318                 gpu_write(gpu, a5xx_hwcg[i].offset,
 319                         state ? a5xx_hwcg[i].value : 0);
 320 
 321         if (adreno_is_a540(adreno_gpu)) {
 322                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
 323                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
 324         }
 325 
 326         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 327         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 328 }
 329 
 330 static int a5xx_me_init(struct msm_gpu *gpu)
 331 {
 332         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 333         struct msm_ringbuffer *ring = gpu->rb[0];
 334 
 335         OUT_PKT7(ring, CP_ME_INIT, 8);
 336 
 337         OUT_RING(ring, 0x0000002F);
 338 
 339         /* Enable multiple hardware contexts */
 340         OUT_RING(ring, 0x00000003);
 341 
 342         /* Enable error detection */
 343         OUT_RING(ring, 0x20000000);
 344 
 345         /* Don't enable header dump */
 346         OUT_RING(ring, 0x00000000);
 347         OUT_RING(ring, 0x00000000);
 348 
 349         /* Specify workarounds for various microcode issues */
 350         if (adreno_is_a530(adreno_gpu)) {
 351                 /* Workaround for token end syncs
 352                  * Force a WFI after every direct-render 3D mode draw and every
 353                  * 2D mode 3 draw
 354                  */
 355                 OUT_RING(ring, 0x0000000B);
 356         } else {
 357                 /* No workarounds enabled */
 358                 OUT_RING(ring, 0x00000000);
 359         }
 360 
 361         OUT_RING(ring, 0x00000000);
 362         OUT_RING(ring, 0x00000000);
 363 
 364         gpu->funcs->flush(gpu, ring);
 365         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 366 }
 367 
 368 static int a5xx_preempt_start(struct msm_gpu *gpu)
 369 {
 370         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 371         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 372         struct msm_ringbuffer *ring = gpu->rb[0];
 373 
 374         if (gpu->nr_rings == 1)
 375                 return 0;
 376 
 377         /* Turn off protected mode to write to special registers */
 378         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 379         OUT_RING(ring, 0);
 380 
 381         /* Set the save preemption record for the ring/command */
 382         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 383         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 384         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 385 
 386         /* Turn back on protected mode */
 387         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 388         OUT_RING(ring, 1);
 389 
 390         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 391         OUT_RING(ring, 0x00);
 392 
 393         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 394         OUT_RING(ring, 0x01);
 395 
 396         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 397         OUT_RING(ring, 0x01);
 398 
 399         /* Yield the floor on command completion */
 400         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 401         OUT_RING(ring, 0x00);
 402         OUT_RING(ring, 0x00);
 403         OUT_RING(ring, 0x01);
 404         OUT_RING(ring, 0x01);
 405 
 406         gpu->funcs->flush(gpu, ring);
 407 
 408         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 409 }
 410 
 411 static int a5xx_ucode_init(struct msm_gpu *gpu)
 412 {
 413         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 414         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 415         int ret;
 416 
 417         if (!a5xx_gpu->pm4_bo) {
 418                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 419                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 420 
 421 
 422                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
 423                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
 424                         a5xx_gpu->pm4_bo = NULL;
 425                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
 426                                 ret);
 427                         return ret;
 428                 }
 429 
 430                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
 431         }
 432 
 433         if (!a5xx_gpu->pfp_bo) {
 434                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 435                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 436 
 437                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
 438                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
 439                         a5xx_gpu->pfp_bo = NULL;
 440                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
 441                                 ret);
 442                         return ret;
 443                 }
 444 
 445                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
 446         }
 447 
 448         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 449                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 450 
 451         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 452                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 453 
 454         return 0;
 455 }
 456 
 457 #define SCM_GPU_ZAP_SHADER_RESUME 0
 458 
 459 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 460 {
 461         int ret;
 462 
 463         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 464         if (ret)
 465                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
 466                         gpu->name, ret);
 467 
 468         return ret;
 469 }
 470 
 471 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 472 {
 473         static bool loaded;
 474         int ret;
 475 
 476         /*
 477          * If the zap shader is already loaded into memory we just need to kick
 478          * the remote processor to reinitialize it
 479          */
 480         if (loaded)
 481                 return a5xx_zap_shader_resume(gpu);
 482 
 483         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 484 
 485         loaded = !ret;
 486         return ret;
 487 }
 488 
 489 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 490           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 491           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 492           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 493           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 494           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 495           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 496           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 497           A5XX_RBBM_INT_0_MASK_CP_SW | \
 498           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 499           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 500           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 501 
 502 static int a5xx_hw_init(struct msm_gpu *gpu)
 503 {
 504         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 505         int ret;
 506 
 507         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 508 
 509         if (adreno_is_a540(adreno_gpu))
 510                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 511 
 512         /* Make all blocks contribute to the GPU BUSY perf counter */
 513         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 514 
 515         /* Enable RBBM error reporting bits */
 516         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 517 
 518         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 519                 /*
 520                  * Mask out the activity signals from RB1-3 to avoid false
 521                  * positives
 522                  */
 523 
 524                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 525                         0xF0000000);
 526                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 527                         0xFFFFFFFF);
 528                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 529                         0xFFFFFFFF);
 530                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 531                         0xFFFFFFFF);
 532                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 533                         0xFFFFFFFF);
 534                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 535                         0xFFFFFFFF);
 536                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 537                         0xFFFFFFFF);
 538                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 539                         0xFFFFFFFF);
 540         }
 541 
 542         /* Enable fault detection */
 543         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 544                 (1 << 30) | 0xFFFF);
 545 
 546         /* Turn on performance counters */
 547         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 548 
 549         /* Select CP0 to always count cycles */
 550         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 551 
 552         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 553         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 554 
 555         /* Increase VFD cache access so LRZ and other data gets evicted less */
 556         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 557 
 558         /* Disable L2 bypass in the UCHE */
 559         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 560         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 561         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 562         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 563 
 564         /* Set the GMEM VA range (0 to gpu->gmem) */
 565         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 566         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 567         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 568                 0x00100000 + adreno_gpu->gmem - 1);
 569         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 570 
 571         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 572         if (adreno_is_a530(adreno_gpu))
 573                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 574         if (adreno_is_a540(adreno_gpu))
 575                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 576         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 577         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 578 
 579         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 580 
 581         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 582                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 583 
 584         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 585 
 586         /* Enable USE_RETENTION_FLOPS */
 587         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 588 
 589         /* Enable ME/PFP split notification */
 590         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 591 
 592         /* Enable HWCG */
 593         a5xx_set_hwcg(gpu, true);
 594 
 595         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 596 
 597         /* Set the highest bank bit */
 598         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 599         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 600         if (adreno_is_a540(adreno_gpu))
 601                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
 602 
 603         /* Protect registers from the CP */
 604         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 605 
 606         /* RBBM */
 607         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 608         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 609         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 610         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 611         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 612         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 613 
 614         /* Content protect */
 615         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 616                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 617                         16));
 618         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 619                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 620 
 621         /* CP */
 622         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 623         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 624         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 625         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 626 
 627         /* RB */
 628         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 629         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 630 
 631         /* VPC */
 632         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 633         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 634 
 635         /* UCHE */
 636         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 637 
 638         if (adreno_is_a530(adreno_gpu))
 639                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 640                         ADRENO_PROTECT_RW(0x10000, 0x8000));
 641 
 642         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 643         /*
 644          * Disable the trusted memory range - we don't actually supported secure
 645          * memory rendering at this point in time and we don't want to block off
 646          * part of the virtual memory space.
 647          */
 648         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 649                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 650         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 651 
 652         /* Put the GPU into 64 bit by default */
 653         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
 654         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
 655         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
 656         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
 657         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
 658         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
 659         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
 660         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
 661         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
 662         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
 663         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
 664         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 665 
 666         /*
 667          * VPC corner case with local memory load kill leads to corrupt
 668          * internal state. Normal Disable does not work for all a5x chips.
 669          * So do the following setting to disable it.
 670          */
 671         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
 672                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
 673                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
 674         }
 675 
 676         ret = adreno_hw_init(gpu);
 677         if (ret)
 678                 return ret;
 679 
 680         a5xx_preempt_hw_init(gpu);
 681 
 682         a5xx_gpmu_ucode_init(gpu);
 683 
 684         ret = a5xx_ucode_init(gpu);
 685         if (ret)
 686                 return ret;
 687 
 688         /* Disable the interrupts through the initial bringup stage */
 689         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 690 
 691         /* Clear ME_HALT to start the micro engine */
 692         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 693         ret = a5xx_me_init(gpu);
 694         if (ret)
 695                 return ret;
 696 
 697         ret = a5xx_power_init(gpu);
 698         if (ret)
 699                 return ret;
 700 
 701         /*
 702          * Send a pipeline event stat to get misbehaving counters to start
 703          * ticking correctly
 704          */
 705         if (adreno_is_a530(adreno_gpu)) {
 706                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 707                 OUT_RING(gpu->rb[0], 0x0F);
 708 
 709                 gpu->funcs->flush(gpu, gpu->rb[0]);
 710                 if (!a5xx_idle(gpu, gpu->rb[0]))
 711                         return -EINVAL;
 712         }
 713 
 714         /*
 715          * Try to load a zap shader into the secure world. If successful
 716          * we can use the CP to switch out of secure mode. If not then we
 717          * have no resource but to try to switch ourselves out manually. If we
 718          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 719          * be blocked and a permissions violation will soon follow.
 720          */
 721         ret = a5xx_zap_shader_init(gpu);
 722         if (!ret) {
 723                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 724                 OUT_RING(gpu->rb[0], 0x00000000);
 725 
 726                 gpu->funcs->flush(gpu, gpu->rb[0]);
 727                 if (!a5xx_idle(gpu, gpu->rb[0]))
 728                         return -EINVAL;
 729         } else if (ret == -ENODEV) {
 730                 /*
 731                  * This device does not use zap shader (but print a warning
 732                  * just in case someone got their dt wrong.. hopefully they
 733                  * have a debug UART to realize the error of their ways...
 734                  * if you mess this up you are about to crash horribly)
 735                  */
 736                 dev_warn_once(gpu->dev->dev,
 737                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 738                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 739         } else {
 740                 return ret;
 741         }
 742 
 743         /* Last step - yield the ringbuffer */
 744         a5xx_preempt_start(gpu);
 745 
 746         return 0;
 747 }
 748 
 749 static void a5xx_recover(struct msm_gpu *gpu)
 750 {
 751         int i;
 752 
 753         adreno_dump_info(gpu);
 754 
 755         for (i = 0; i < 8; i++) {
 756                 printk("CP_SCRATCH_REG%d: %u\n", i,
 757                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 758         }
 759 
 760         if (hang_debug)
 761                 a5xx_dump(gpu);
 762 
 763         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 764         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 765         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 766         adreno_recover(gpu);
 767 }
 768 
 769 static void a5xx_destroy(struct msm_gpu *gpu)
 770 {
 771         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 772         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 773 
 774         DBG("%s", gpu->name);
 775 
 776         a5xx_preempt_fini(gpu);
 777 
 778         if (a5xx_gpu->pm4_bo) {
 779                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 780                 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 781         }
 782 
 783         if (a5xx_gpu->pfp_bo) {
 784                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 785                 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 786         }
 787 
 788         if (a5xx_gpu->gpmu_bo) {
 789                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 790                 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 791         }
 792 
 793         adreno_gpu_cleanup(adreno_gpu);
 794         kfree(a5xx_gpu);
 795 }
 796 
 797 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 798 {
 799         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 800                 return false;
 801 
 802         /*
 803          * Nearly every abnormality ends up pausing the GPU and triggering a
 804          * fault so we can safely just watch for this one interrupt to fire
 805          */
 806         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 807                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 808 }
 809 
 810 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 811 {
 812         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 813         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 814 
 815         if (ring != a5xx_gpu->cur_ring) {
 816                 WARN(1, "Tried to idle a non-current ringbuffer\n");
 817                 return false;
 818         }
 819 
 820         /* wait for CP to drain ringbuffer: */
 821         if (!adreno_idle(gpu, ring))
 822                 return false;
 823 
 824         if (spin_until(_a5xx_check_idle(gpu))) {
 825                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 826                         gpu->name, __builtin_return_address(0),
 827                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 828                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 829                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 830                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 831                 return false;
 832         }
 833 
 834         return true;
 835 }
 836 
 837 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 838 {
 839         struct msm_gpu *gpu = arg;
 840         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 841                         iova, flags,
 842                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 843                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 844                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 845                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 846 
 847         return -EFAULT;
 848 }
 849 
 850 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 851 {
 852         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 853 
 854         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 855                 u32 val;
 856 
 857                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 858 
 859                 /*
 860                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 861                  * read it twice
 862                  */
 863 
 864                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 865                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 866 
 867                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 868                         val);
 869         }
 870 
 871         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 872                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 873                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 874 
 875         if (status & A5XX_CP_INT_CP_DMA_ERROR)
 876                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 877 
 878         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 879                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 880 
 881                 dev_err_ratelimited(gpu->dev->dev,
 882                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 883                         val & (1 << 24) ? "WRITE" : "READ",
 884                         (val & 0xFFFFF) >> 2, val);
 885         }
 886 
 887         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 888                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 889                 const char *access[16] = { "reserved", "reserved",
 890                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 891                         "", "", "me read", "me write", "", "", "crashdump read",
 892                         "crashdump write" };
 893 
 894                 dev_err_ratelimited(gpu->dev->dev,
 895                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 896                         status & 0xFFFFF, access[(status >> 24) & 0xF],
 897                         (status & (1 << 31)), status);
 898         }
 899 }
 900 
 901 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 902 {
 903         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 904                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 905 
 906                 dev_err_ratelimited(gpu->dev->dev,
 907                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 908                         val & (1 << 28) ? "WRITE" : "READ",
 909                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 910                         (val >> 24) & 0xF);
 911 
 912                 /* Clear the error */
 913                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 914 
 915                 /* Clear the interrupt */
 916                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 917                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 918         }
 919 
 920         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 921                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 922 
 923         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 924                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 925                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 926 
 927         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 928                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 929                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 930 
 931         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 932                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 933                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 934 
 935         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 936                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
 937 
 938         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
 939                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
 940 }
 941 
 942 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
 943 {
 944         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
 945 
 946         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
 947 
 948         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
 949                 addr);
 950 }
 951 
 952 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
 953 {
 954         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
 955 }
 956 
 957 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 958 {
 959         struct drm_device *dev = gpu->dev;
 960         struct msm_drm_private *priv = dev->dev_private;
 961         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 962 
 963         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 964                 ring ? ring->id : -1, ring ? ring->seqno : 0,
 965                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 966                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 967                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
 968                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
 969                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
 970                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
 971                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
 972 
 973         /* Turn off the hangcheck timer to keep it from bothering us */
 974         del_timer(&gpu->hangcheck_timer);
 975 
 976         queue_work(priv->wq, &gpu->recover_work);
 977 }
 978 
 979 #define RBBM_ERROR_MASK \
 980         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 981         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 982         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 983         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 984         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 985         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 986 
 987 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
 988 {
 989         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
 990 
 991         /*
 992          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
 993          * before the source is cleared the interrupt will storm.
 994          */
 995         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 996                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 997 
 998         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
 999         if (status & RBBM_ERROR_MASK)
1000                 a5xx_rbbm_err_irq(gpu, status);
1001 
1002         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1003                 a5xx_cp_err_irq(gpu);
1004 
1005         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1006                 a5xx_fault_detect_irq(gpu);
1007 
1008         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1009                 a5xx_uche_err_irq(gpu);
1010 
1011         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1012                 a5xx_gpmu_err_irq(gpu);
1013 
1014         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1015                 a5xx_preempt_trigger(gpu);
1016                 msm_gpu_retire(gpu);
1017         }
1018 
1019         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1020                 a5xx_preempt_irq(gpu);
1021 
1022         return IRQ_HANDLED;
1023 }
1024 
1025 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1026         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1027         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1028         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1029         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1030                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1031         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1032         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1033         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1034 };
1035 
1036 static const u32 a5xx_registers[] = {
1037         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1038         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1039         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1040         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1041         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1042         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1043         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1044         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1045         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1046         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1047         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1048         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1049         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1050         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1051         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1052         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1053         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1054         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1055         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1056         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1057         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1058         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1059         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1060         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1061         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1062         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1063         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1064         0xAC60, 0xAC60, ~0,
1065 };
1066 
1067 static void a5xx_dump(struct msm_gpu *gpu)
1068 {
1069         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1070                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1071         adreno_dump(gpu);
1072 }
1073 
1074 static int a5xx_pm_resume(struct msm_gpu *gpu)
1075 {
1076         int ret;
1077 
1078         /* Turn on the core power */
1079         ret = msm_gpu_pm_resume(gpu);
1080         if (ret)
1081                 return ret;
1082 
1083         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1084         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1085 
1086         /* Wait 3 usecs before polling */
1087         udelay(3);
1088 
1089         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1090                 (1 << 20), (1 << 20));
1091         if (ret) {
1092                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1093                         gpu->name,
1094                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1095                 return ret;
1096         }
1097 
1098         /* Turn on the SP domain */
1099         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1100         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1101                 (1 << 20), (1 << 20));
1102         if (ret)
1103                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1104                         gpu->name);
1105 
1106         return ret;
1107 }
1108 
1109 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1110 {
1111         /* Clear the VBIF pipe before shutting down */
1112         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1113         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1114 
1115         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1116 
1117         /*
1118          * Reset the VBIF before power collapse to avoid issue with FIFO
1119          * entries
1120          */
1121         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1122         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1123 
1124         return msm_gpu_pm_suspend(gpu);
1125 }
1126 
1127 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1128 {
1129         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1130                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1131 
1132         return 0;
1133 }
1134 
1135 struct a5xx_crashdumper {
1136         void *ptr;
1137         struct drm_gem_object *bo;
1138         u64 iova;
1139 };
1140 
1141 struct a5xx_gpu_state {
1142         struct msm_gpu_state base;
1143         u32 *hlsqregs;
1144 };
1145 
1146 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1147                 struct a5xx_crashdumper *dumper)
1148 {
1149         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1150                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1151                 &dumper->bo, &dumper->iova);
1152 
1153         if (!IS_ERR(dumper->ptr))
1154                 msm_gem_object_set_name(dumper->bo, "crashdump");
1155 
1156         return PTR_ERR_OR_ZERO(dumper->ptr);
1157 }
1158 
1159 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1160                 struct a5xx_crashdumper *dumper)
1161 {
1162         u32 val;
1163 
1164         if (IS_ERR_OR_NULL(dumper->ptr))
1165                 return -EINVAL;
1166 
1167         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1168                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1169 
1170         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1171 
1172         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1173                 val & 0x04, 100, 10000);
1174 }
1175 
1176 /*
1177  * These are a list of the registers that need to be read through the HLSQ
1178  * aperture through the crashdumper.  These are not nominally accessible from
1179  * the CPU on a secure platform.
1180  */
1181 static const struct {
1182         u32 type;
1183         u32 regoffset;
1184         u32 count;
1185 } a5xx_hlsq_aperture_regs[] = {
1186         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1187         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1188         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1189         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1190         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1191         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1192         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1193         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1194         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1195         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1196         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1197         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1198         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1199         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1200         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1201 };
1202 
1203 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1204                 struct a5xx_gpu_state *a5xx_state)
1205 {
1206         struct a5xx_crashdumper dumper = { 0 };
1207         u32 offset, count = 0;
1208         u64 *ptr;
1209         int i;
1210 
1211         if (a5xx_crashdumper_init(gpu, &dumper))
1212                 return;
1213 
1214         /* The script will be written at offset 0 */
1215         ptr = dumper.ptr;
1216 
1217         /* Start writing the data at offset 256k */
1218         offset = dumper.iova + (256 * SZ_1K);
1219 
1220         /* Count how many additional registers to get from the HLSQ aperture */
1221         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1222                 count += a5xx_hlsq_aperture_regs[i].count;
1223 
1224         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1225         if (!a5xx_state->hlsqregs)
1226                 return;
1227 
1228         /* Build the crashdump script */
1229         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1230                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1231                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1232 
1233                 /* Write the register to select the desired bank */
1234                 *ptr++ = ((u64) type << 8);
1235                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1236                         (1 << 21) | 1;
1237 
1238                 *ptr++ = offset;
1239                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1240                         | c;
1241 
1242                 offset += c * sizeof(u32);
1243         }
1244 
1245         /* Write two zeros to close off the script */
1246         *ptr++ = 0;
1247         *ptr++ = 0;
1248 
1249         if (a5xx_crashdumper_run(gpu, &dumper)) {
1250                 kfree(a5xx_state->hlsqregs);
1251                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1252                 return;
1253         }
1254 
1255         /* Copy the data from the crashdumper to the state */
1256         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1257                 count * sizeof(u32));
1258 
1259         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1260 }
1261 
1262 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1263 {
1264         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1265                         GFP_KERNEL);
1266 
1267         if (!a5xx_state)
1268                 return ERR_PTR(-ENOMEM);
1269 
1270         /* Temporarily disable hardware clock gating before reading the hw */
1271         a5xx_set_hwcg(gpu, false);
1272 
1273         /* First get the generic state from the adreno core */
1274         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1275 
1276         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1277 
1278         /* Get the HLSQ regs with the help of the crashdumper */
1279         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1280 
1281         a5xx_set_hwcg(gpu, true);
1282 
1283         return &a5xx_state->base;
1284 }
1285 
1286 static void a5xx_gpu_state_destroy(struct kref *kref)
1287 {
1288         struct msm_gpu_state *state = container_of(kref,
1289                 struct msm_gpu_state, ref);
1290         struct a5xx_gpu_state *a5xx_state = container_of(state,
1291                 struct a5xx_gpu_state, base);
1292 
1293         kfree(a5xx_state->hlsqregs);
1294 
1295         adreno_gpu_state_destroy(state);
1296         kfree(a5xx_state);
1297 }
1298 
1299 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1300 {
1301         if (IS_ERR_OR_NULL(state))
1302                 return 1;
1303 
1304         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1305 }
1306 
1307 
1308 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1309 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1310                 struct drm_printer *p)
1311 {
1312         int i, j;
1313         u32 pos = 0;
1314         struct a5xx_gpu_state *a5xx_state = container_of(state,
1315                 struct a5xx_gpu_state, base);
1316 
1317         if (IS_ERR_OR_NULL(state))
1318                 return;
1319 
1320         adreno_show(gpu, state, p);
1321 
1322         /* Dump the additional a5xx HLSQ registers */
1323         if (!a5xx_state->hlsqregs)
1324                 return;
1325 
1326         drm_printf(p, "registers-hlsq:\n");
1327 
1328         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1329                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1330                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1331 
1332                 for (j = 0; j < c; j++, pos++, o++) {
1333                         /*
1334                          * To keep the crashdump simple we pull the entire range
1335                          * for each register type but not all of the registers
1336                          * in the range are valid. Fortunately invalid registers
1337                          * stick out like a sore thumb with a value of
1338                          * 0xdeadbeef
1339                          */
1340                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1341                                 continue;
1342 
1343                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1344                                 o << 2, a5xx_state->hlsqregs[pos]);
1345                 }
1346         }
1347 }
1348 #endif
1349 
1350 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1351 {
1352         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1353         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1354 
1355         return a5xx_gpu->cur_ring;
1356 }
1357 
1358 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1359 {
1360         u64 busy_cycles, busy_time;
1361 
1362         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1363                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1364 
1365         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1366         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1367 
1368         gpu->devfreq.busy_cycles = busy_cycles;
1369 
1370         if (WARN_ON(busy_time > ~0LU))
1371                 return ~0LU;
1372 
1373         return (unsigned long)busy_time;
1374 }
1375 
1376 static const struct adreno_gpu_funcs funcs = {
1377         .base = {
1378                 .get_param = adreno_get_param,
1379                 .hw_init = a5xx_hw_init,
1380                 .pm_suspend = a5xx_pm_suspend,
1381                 .pm_resume = a5xx_pm_resume,
1382                 .recover = a5xx_recover,
1383                 .submit = a5xx_submit,
1384                 .flush = a5xx_flush,
1385                 .active_ring = a5xx_active_ring,
1386                 .irq = a5xx_irq,
1387                 .destroy = a5xx_destroy,
1388 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1389                 .show = a5xx_show,
1390 #endif
1391 #if defined(CONFIG_DEBUG_FS)
1392                 .debugfs_init = a5xx_debugfs_init,
1393 #endif
1394                 .gpu_busy = a5xx_gpu_busy,
1395                 .gpu_state_get = a5xx_gpu_state_get,
1396                 .gpu_state_put = a5xx_gpu_state_put,
1397         },
1398         .get_timestamp = a5xx_get_timestamp,
1399 };
1400 
1401 static void check_speed_bin(struct device *dev)
1402 {
1403         struct nvmem_cell *cell;
1404         u32 bin, val;
1405 
1406         cell = nvmem_cell_get(dev, "speed_bin");
1407 
1408         /* If a nvmem cell isn't defined, nothing to do */
1409         if (IS_ERR(cell))
1410                 return;
1411 
1412         bin = *((u32 *) nvmem_cell_read(cell, NULL));
1413         nvmem_cell_put(cell);
1414 
1415         val = (1 << bin);
1416 
1417         dev_pm_opp_set_supported_hw(dev, &val, 1);
1418 }
1419 
1420 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1421 {
1422         struct msm_drm_private *priv = dev->dev_private;
1423         struct platform_device *pdev = priv->gpu_pdev;
1424         struct a5xx_gpu *a5xx_gpu = NULL;
1425         struct adreno_gpu *adreno_gpu;
1426         struct msm_gpu *gpu;
1427         int ret;
1428 
1429         if (!pdev) {
1430                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1431                 return ERR_PTR(-ENXIO);
1432         }
1433 
1434         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1435         if (!a5xx_gpu)
1436                 return ERR_PTR(-ENOMEM);
1437 
1438         adreno_gpu = &a5xx_gpu->base;
1439         gpu = &adreno_gpu->base;
1440 
1441         adreno_gpu->registers = a5xx_registers;
1442         adreno_gpu->reg_offsets = a5xx_register_offsets;
1443 
1444         a5xx_gpu->lm_leakage = 0x4E001A;
1445 
1446         check_speed_bin(&pdev->dev);
1447 
1448         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1449         if (ret) {
1450                 a5xx_destroy(&(a5xx_gpu->base.base));
1451                 return ERR_PTR(ret);
1452         }
1453 
1454         if (gpu->aspace)
1455                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1456 
1457         /* Set up the preemption specific bits and pieces for each ringbuffer */
1458         a5xx_preempt_init(gpu);
1459 
1460         return gpu;
1461 }

/* [<][>][^][v][top][bottom][index][help] */